1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2024-11-21 15:04:11 -05:00

perf(cache): single cache file for remote modules (#24983)

This changes the global cache to store the cache file for remote modules
in one file instead of two.
This commit is contained in:
David Sherret 2024-08-26 19:59:17 -04:00 committed by GitHub
parent e13230226f
commit c89a20b428
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 93 additions and 126 deletions

4
Cargo.lock generated
View file

@ -1346,9 +1346,9 @@ dependencies = [
[[package]]
name = "deno_cache_dir"
version = "0.10.2"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9fa4989e4c6b0409438e2a68a91e4e02858b0d8ba6e5bc6860af6b0d0f385e8"
checksum = "405790fa1fa5f05c2e7dca817f820dc1c950ea846f47212ed6d670a3023cb4fe"
dependencies = [
"deno_media_type",
"indexmap",

View file

@ -102,7 +102,7 @@ chrono = { version = "0.4", default-features = false, features = ["std", "serde"
console_static_text = "=0.8.1"
data-encoding = "2.3.3"
data-url = "=0.3.0"
deno_cache_dir = "=0.10.2"
deno_cache_dir = "=0.11.0"
deno_package_json = { version = "=0.1.1", default-features = false }
dlopen2 = "0.6.1"
ecb = "=0.1.2"

8
cli/cache/mod.rs vendored
View file

@ -62,12 +62,8 @@ pub const CACHE_PERM: u32 = 0o644;
pub struct RealDenoCacheEnv;
impl deno_cache_dir::DenoCacheEnv for RealDenoCacheEnv {
fn read_file_bytes(&self, path: &Path) -> std::io::Result<Option<Vec<u8>>> {
match std::fs::read(path) {
Ok(s) => Ok(Some(s)),
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(err) => Err(err),
}
fn read_file_bytes(&self, path: &Path) -> std::io::Result<Vec<u8>> {
std::fs::read(path)
}
fn atomic_write_file(

View file

@ -304,8 +304,11 @@ impl CliFactory {
let global_cache = self.global_http_cache()?.clone();
match self.cli_options()?.vendor_dir_path() {
Some(local_path) => {
let local_cache =
LocalHttpCache::new(local_path.clone(), global_cache);
let local_cache = LocalHttpCache::new(
local_path.clone(),
global_cache,
deno_cache_dir::GlobalToLocalCopy::Allow,
);
Ok(Arc::new(local_cache))
}
None => Ok(global_cache),

View file

@ -11,7 +11,6 @@ use crate::http_util::HttpClientProvider;
use crate::util::progress_bar::ProgressBar;
use deno_ast::MediaType;
use deno_core::anyhow::bail;
use deno_core::anyhow::Context;
use deno_core::error::custom_error;
use deno_core::error::generic_error;
@ -52,6 +51,25 @@ pub enum FileOrRedirect {
Redirect(ModuleSpecifier),
}
impl FileOrRedirect {
fn from_deno_cache_entry(
specifier: &ModuleSpecifier,
cache_entry: deno_cache_dir::CacheEntry,
) -> Result<Self, AnyError> {
if let Some(redirect_to) = cache_entry.metadata.headers.get("location") {
let redirect =
deno_core::resolve_import(redirect_to, specifier.as_str())?;
Ok(FileOrRedirect::Redirect(redirect))
} else {
Ok(FileOrRedirect::File(File {
specifier: specifier.clone(),
maybe_headers: Some(cache_entry.metadata.headers),
source: Arc::from(cache_entry.content),
}))
}
}
}
/// A structure representing a source file.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct File {
@ -238,45 +256,32 @@ impl FileFetcher {
);
let cache_key = self.http_cache.cache_item_key(specifier)?; // compute this once
let Some(headers) = self.http_cache.read_headers(&cache_key)? else {
return Ok(None);
};
if let Some(redirect_to) = headers.get("location") {
let redirect =
deno_core::resolve_import(redirect_to, specifier.as_str())?;
return Ok(Some(FileOrRedirect::Redirect(redirect)));
}
let result = self.http_cache.read_file_bytes(
let result = self.http_cache.get(
&cache_key,
maybe_checksum
.as_ref()
.map(|c| deno_cache_dir::Checksum::new(c.as_str())),
deno_cache_dir::GlobalToLocalCopy::Allow,
);
let bytes = match result {
Ok(Some(bytes)) => bytes,
Ok(None) => return Ok(None),
match result {
Ok(Some(cache_data)) => Ok(Some(FileOrRedirect::from_deno_cache_entry(
specifier, cache_data,
)?)),
Ok(None) => Ok(None),
Err(err) => match err {
deno_cache_dir::CacheReadFileError::Io(err) => return Err(err.into()),
deno_cache_dir::CacheReadFileError::Io(err) => Err(err.into()),
deno_cache_dir::CacheReadFileError::ChecksumIntegrity(err) => {
// convert to the equivalent deno_graph error so that it
// enhances it if this is passed to deno_graph
return Err(
Err(
deno_graph::source::ChecksumIntegrityError {
actual: err.actual,
expected: err.expected,
}
.into(),
);
)
}
},
};
Ok(Some(FileOrRedirect::File(File {
specifier: specifier.clone(),
maybe_headers: Some(headers),
source: Arc::from(bytes),
})))
}
}
/// Convert a data URL into a file, resulting in an error if the URL is
@ -363,12 +368,30 @@ impl FileFetcher {
);
}
let maybe_etag = self
let maybe_etag_cache_entry = self
.http_cache
.cache_item_key(specifier)
.ok()
.and_then(|key| self.http_cache.read_headers(&key).ok().flatten())
.and_then(|headers| headers.get("etag").cloned());
.and_then(|key| {
self
.http_cache
.get(
&key,
maybe_checksum
.as_ref()
.map(|c| deno_cache_dir::Checksum::new(c.as_str())),
)
.ok()
.flatten()
})
.and_then(|cache_entry| {
cache_entry
.metadata
.headers
.get("etag")
.cloned()
.map(|etag| (cache_entry, etag))
});
let maybe_auth_token = self.auth_tokens.get(specifier);
async fn handle_request_or_server_error(
@ -390,7 +413,6 @@ impl FileFetcher {
}
}
let mut maybe_etag = maybe_etag;
let mut retried = false; // retry intermittent failures
let result = loop {
let result = match self
@ -399,31 +421,17 @@ impl FileFetcher {
.fetch_no_follow(FetchOnceArgs {
url: specifier.clone(),
maybe_accept: maybe_accept.map(ToOwned::to_owned),
maybe_etag: maybe_etag.clone(),
maybe_etag: maybe_etag_cache_entry
.as_ref()
.map(|(_, etag)| etag.clone()),
maybe_auth_token: maybe_auth_token.clone(),
maybe_progress_guard: maybe_progress_guard.as_ref(),
})
.await?
{
FetchOnceResult::NotModified => {
let file_or_redirect =
self.fetch_cached_no_follow(specifier, maybe_checksum)?;
match file_or_redirect {
Some(file_or_redirect) => Ok(file_or_redirect),
None => {
// Someone may have deleted the body from the cache since
// it's currently stored in a separate file from the headers,
// so delete the etag and try again
if maybe_etag.is_some() {
debug!("Cache body not found. Trying again without etag.");
maybe_etag = None;
continue;
} else {
// should never happen
bail!("Your deno cache directory is in an unrecoverable state. Please delete it and try again.")
}
}
}
let (cache_entry, _) = maybe_etag_cache_entry.unwrap();
FileOrRedirect::from_deno_cache_entry(specifier, cache_entry)
}
FetchOnceResult::Redirect(redirect_url, headers) => {
self.http_cache.set(specifier, headers, &[])?;
@ -1480,13 +1488,10 @@ mod tests {
let cache_key = file_fetcher.http_cache.cache_item_key(url).unwrap();
let bytes = file_fetcher
.http_cache
.read_file_bytes(
&cache_key,
None,
deno_cache_dir::GlobalToLocalCopy::Allow,
)
.get(&cache_key, None)
.unwrap()
.unwrap();
.unwrap()
.content;
String::from_utf8(bytes).unwrap()
}

View file

@ -17,16 +17,6 @@ use std::path::Path;
use std::sync::Arc;
use std::time::SystemTime;
/// In the LSP, we disallow the cache from automatically copying from
/// the global cache to the local cache for technical reasons.
///
/// 1. We need to verify the checksums from the lockfile are correct when
/// moving from the global to the local cache.
/// 2. We need to verify the checksums for JSR https specifiers match what
/// is found in the package's manifest.
pub const LSP_DISALLOW_GLOBAL_TO_LOCAL_COPY: deno_cache_dir::GlobalToLocalCopy =
deno_cache_dir::GlobalToLocalCopy::Disallow;
pub fn calculate_fs_version(
cache: &LspCache,
specifier: &ModuleSpecifier,

View file

@ -2,7 +2,6 @@
use super::cache::calculate_fs_version;
use super::cache::LspCache;
use super::cache::LSP_DISALLOW_GLOBAL_TO_LOCAL_COPY;
use super::config::Config;
use super::resolver::LspResolver;
use super::testing::TestCollector;
@ -872,22 +871,19 @@ impl FileSystemDocuments {
} else {
let http_cache = cache.for_specifier(file_referrer);
let cache_key = http_cache.cache_item_key(specifier).ok()?;
let bytes = http_cache
.read_file_bytes(&cache_key, None, LSP_DISALLOW_GLOBAL_TO_LOCAL_COPY)
.ok()??;
let specifier_headers = http_cache.read_headers(&cache_key).ok()??;
let cached_file = http_cache.get(&cache_key, None).ok()??;
let (_, maybe_charset) =
deno_graph::source::resolve_media_type_and_charset_from_headers(
specifier,
Some(&specifier_headers),
Some(&cached_file.metadata.headers),
);
let content = deno_graph::source::decode_owned_source(
specifier,
bytes,
cached_file.content,
maybe_charset,
)
.ok()?;
let maybe_headers = Some(specifier_headers);
let maybe_headers = Some(cached_file.metadata.headers);
Document::new(
specifier.clone(),
content.into(),

View file

@ -258,12 +258,9 @@ fn read_cached_url(
cache: &Arc<dyn HttpCache>,
) -> Option<Vec<u8>> {
cache
.read_file_bytes(
&cache.cache_item_key(url).ok()?,
None,
deno_cache_dir::GlobalToLocalCopy::Disallow,
)
.get(&cache.cache_item_key(url).ok()?, None)
.ok()?
.map(|f| f.content)
}
#[derive(Debug)]

View file

@ -1,5 +1,7 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
use deno_cache_dir::HttpCache;
use deno_core::serde_json;
use deno_core::serde_json::json;
use deno_core::serde_json::Value;
use deno_lockfile::Lockfile;
@ -184,13 +186,24 @@ fn reload_info_not_found_cache_but_exists_remote() {
let specifier =
Url::parse(&format!("http://127.0.0.1:4250/{}/meta.json", package))
.unwrap();
let registry_json_path = deno_dir
.path()
.join("deps")
.join(deno_cache_dir::url_to_filename(&specifier).unwrap());
let mut registry_json = registry_json_path.read_json_value();
let cache = deno_cache_dir::GlobalHttpCache::new(
deno_dir.path().join("deps").to_path_buf(),
deno_cache_dir::TestRealDenoCacheEnv,
);
let entry = cache
.get(&cache.cache_item_key(&specifier).unwrap(), None)
.unwrap()
.unwrap();
let mut registry_json: serde_json::Value =
serde_json::from_slice(&entry.content).unwrap();
remove_version(&mut registry_json, version);
registry_json_path.write_json(&registry_json);
cache
.set(
&specifier,
entry.metadata.headers.clone(),
registry_json.to_string().as_bytes(),
)
.unwrap();
}
// This tests that when a local machine doesn't have a version

View file

@ -4972,39 +4972,6 @@ console.log(add(3, 4));
output.assert_matches_text("[WILDCARD]5\n7\n");
}
#[test]
fn run_etag_delete_source_cache() {
let test_context = TestContextBuilder::new()
.use_temp_cwd()
.use_http_server()
.build();
test_context
.temp_dir()
.write("main.ts", "import 'http://localhost:4545/etag_script.ts'");
test_context
.new_command()
.args("cache main.ts")
.run()
.skip_output_check();
// The cache is currently stored unideally in two files where one file has the headers
// and the other contains the body. An issue can happen with the etag header where the
// headers file exists, but the body was deleted. We need to get the cache to gracefully
// handle this scenario.
let deno_dir = test_context.deno_dir().path();
let etag_script_path = deno_dir.join("deps/http/localhost_PORT4545/26110db7d42c9bad32386735cbc05c301f83e4393963deb8da14fec3b4202a13");
assert!(etag_script_path.exists());
etag_script_path.remove_file();
test_context
.new_command()
.args("cache --reload --log-level=debug main.ts")
.run()
.assert_matches_text(
"[WILDCARD]Cache body not found. Trying again without etag.[WILDCARD]",
);
}
#[test]
fn code_cache_test() {
let test_context = TestContextBuilder::new().use_temp_cwd().build();