1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2024-11-25 15:29:32 -05:00

perf(cache): single cache file for typescript emit (#24994)

This commit is contained in:
David Sherret 2024-08-26 11:43:57 -04:00 committed by GitHub
parent d8dfe6dc97
commit a8ce02473a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

165
cli/cache/emit.rs vendored
View file

@ -5,33 +5,25 @@ use std::path::PathBuf;
use deno_ast::ModuleSpecifier; use deno_ast::ModuleSpecifier;
use deno_core::anyhow::anyhow; use deno_core::anyhow::anyhow;
use deno_core::error::AnyError; use deno_core::error::AnyError;
use deno_core::serde_json;
use deno_core::unsync::sync::AtomicFlag; use deno_core::unsync::sync::AtomicFlag;
use serde::Deserialize;
use serde::Serialize;
use super::DiskCache; use super::DiskCache;
use super::FastInsecureHasher;
#[derive(Debug, Deserialize, Serialize)]
struct EmitMetadata {
pub source_hash: u64,
pub emit_hash: u64,
}
/// The cache that stores previously emitted files. /// The cache that stores previously emitted files.
pub struct EmitCache { pub struct EmitCache {
disk_cache: DiskCache, disk_cache: DiskCache,
cli_version: &'static str,
emit_failed_flag: AtomicFlag, emit_failed_flag: AtomicFlag,
file_serializer: EmitFileSerializer,
} }
impl EmitCache { impl EmitCache {
pub fn new(disk_cache: DiskCache) -> Self { pub fn new(disk_cache: DiskCache) -> Self {
Self { Self {
disk_cache, disk_cache,
cli_version: crate::version::DENO_VERSION_INFO.deno,
emit_failed_flag: Default::default(), emit_failed_flag: Default::default(),
file_serializer: EmitFileSerializer {
cli_version: crate::version::DENO_VERSION_INFO.deno,
},
} }
} }
@ -48,37 +40,11 @@ impl EmitCache {
specifier: &ModuleSpecifier, specifier: &ModuleSpecifier,
expected_source_hash: u64, expected_source_hash: u64,
) -> Option<Vec<u8>> { ) -> Option<Vec<u8>> {
let meta_filename = self.get_meta_filename(specifier)?;
let emit_filename = self.get_emit_filename(specifier)?; let emit_filename = self.get_emit_filename(specifier)?;
let bytes = self.disk_cache.get(&emit_filename).ok()?;
// load and verify the meta data file is for this source and CLI version self
let bytes = self.disk_cache.get(&meta_filename).ok()?; .file_serializer
let meta: EmitMetadata = serde_json::from_slice(&bytes).ok()?; .deserialize(bytes, expected_source_hash)
if meta.source_hash != expected_source_hash {
return None;
}
// load and verify the emit is for the meta data
let emit_bytes = self.disk_cache.get(&emit_filename).ok()?;
if meta.emit_hash != compute_emit_hash(&emit_bytes, self.cli_version) {
return None;
}
// everything looks good, return it
Some(emit_bytes)
}
/// Gets the filepath which stores the emit.
pub fn get_emit_filepath(
&self,
specifier: &ModuleSpecifier,
) -> Option<PathBuf> {
Some(
self
.disk_cache
.location
.join(self.get_emit_filename(specifier)?),
)
} }
/// Sets the emit code in the cache. /// Sets the emit code in the cache.
@ -107,32 +73,26 @@ impl EmitCache {
return Ok(()); return Ok(());
} }
let meta_filename = self
.get_meta_filename(specifier)
.ok_or_else(|| anyhow!("Could not get meta filename."))?;
let emit_filename = self let emit_filename = self
.get_emit_filename(specifier) .get_emit_filename(specifier)
.ok_or_else(|| anyhow!("Could not get emit filename."))?; .ok_or_else(|| anyhow!("Could not get emit filename."))?;
let cache_data = self.file_serializer.serialize(code, source_hash);
// save the metadata self.disk_cache.set(&emit_filename, &cache_data)?;
let metadata = EmitMetadata {
source_hash,
emit_hash: compute_emit_hash(code, self.cli_version),
};
self
.disk_cache
.set(&meta_filename, &serde_json::to_vec(&metadata)?)?;
// save the emit source
self.disk_cache.set(&emit_filename, code)?;
Ok(()) Ok(())
} }
fn get_meta_filename(&self, specifier: &ModuleSpecifier) -> Option<PathBuf> { /// Gets the filepath which stores the emit.
self pub fn get_emit_filepath(
.disk_cache &self,
.get_cache_filename_with_extension(specifier, "meta") specifier: &ModuleSpecifier,
) -> Option<PathBuf> {
Some(
self
.disk_cache
.location
.join(self.get_emit_filename(specifier)?),
)
} }
fn get_emit_filename(&self, specifier: &ModuleSpecifier) -> Option<PathBuf> { fn get_emit_filename(&self, specifier: &ModuleSpecifier) -> Option<PathBuf> {
@ -142,15 +102,68 @@ impl EmitCache {
} }
} }
fn compute_emit_hash(bytes: &[u8], cli_version: &str) -> u64 { const LAST_LINE_PREFIX: &str = "\n// denoCacheMetadata=";
// it's ok to use an insecure hash here because
// if someone can change the emit source then they struct EmitFileSerializer {
// can also change the version hash cli_version: &'static str,
FastInsecureHasher::new_without_deno_version() // use cli_version param instead }
.write(bytes)
// emit should not be re-used between cli versions impl EmitFileSerializer {
.write_str(cli_version) pub fn deserialize(
.finish() &self,
mut bytes: Vec<u8>,
expected_source_hash: u64,
) -> Option<Vec<u8>> {
let last_newline_index = bytes.iter().rposition(|&b| b == b'\n')?;
let (content, last_line) = bytes.split_at(last_newline_index);
let hashes = last_line.strip_prefix(LAST_LINE_PREFIX.as_bytes())?;
let hashes = String::from_utf8_lossy(hashes);
let (source_hash, emit_hash) = hashes.split_once(',')?;
// verify the meta data file is for this source and CLI version
let source_hash = source_hash.parse::<u64>().ok()?;
if source_hash != expected_source_hash {
return None;
}
let emit_hash = emit_hash.parse::<u64>().ok()?;
// prevent using an emit from a different cli version or emits that were tampered with
if emit_hash != self.compute_emit_hash(content) {
return None;
}
// everything looks good, truncate and return it
bytes.truncate(content.len());
Some(bytes)
}
pub fn serialize(&self, code: &[u8], source_hash: u64) -> Vec<u8> {
let source_hash = source_hash.to_string();
let emit_hash = self.compute_emit_hash(code).to_string();
let capacity = code.len()
+ LAST_LINE_PREFIX.len()
+ source_hash.len()
+ 1
+ emit_hash.len();
let mut cache_data = Vec::with_capacity(capacity);
cache_data.extend(code);
cache_data.extend(LAST_LINE_PREFIX.as_bytes());
cache_data.extend(source_hash.as_bytes());
cache_data.push(b',');
cache_data.extend(emit_hash.as_bytes());
debug_assert_eq!(cache_data.len(), capacity);
cache_data
}
fn compute_emit_hash(&self, bytes: &[u8]) -> u64 {
// it's ok to use an insecure hash here because
// if someone can change the emit source then they
// can also change the version hash
crate::cache::FastInsecureHasher::new_without_deno_version() // use cli_version property instead
.write(bytes)
// emit should not be re-used between cli versions
.write_str(self.cli_version)
.finish()
}
} }
#[cfg(test)] #[cfg(test)]
@ -165,7 +178,9 @@ mod test {
let disk_cache = DiskCache::new(temp_dir.path().as_path()); let disk_cache = DiskCache::new(temp_dir.path().as_path());
let cache = EmitCache { let cache = EmitCache {
disk_cache: disk_cache.clone(), disk_cache: disk_cache.clone(),
cli_version: "1.0.0", file_serializer: EmitFileSerializer {
cli_version: "1.0.0",
},
emit_failed_flag: Default::default(), emit_failed_flag: Default::default(),
}; };
let to_string = let to_string =
@ -197,7 +212,9 @@ mod test {
// try changing the cli version (should not load previous ones) // try changing the cli version (should not load previous ones)
let cache = EmitCache { let cache = EmitCache {
disk_cache: disk_cache.clone(), disk_cache: disk_cache.clone(),
cli_version: "2.0.0", file_serializer: EmitFileSerializer {
cli_version: "2.0.0",
},
emit_failed_flag: Default::default(), emit_failed_flag: Default::default(),
}; };
assert_eq!(cache.get_emit_code(&specifier1, 10), None); assert_eq!(cache.get_emit_code(&specifier1, 10), None);
@ -206,7 +223,9 @@ mod test {
// recreating the cache should still load the data because the CLI version is the same // recreating the cache should still load the data because the CLI version is the same
let cache = EmitCache { let cache = EmitCache {
disk_cache, disk_cache,
cli_version: "2.0.0", file_serializer: EmitFileSerializer {
cli_version: "2.0.0",
},
emit_failed_flag: Default::default(), emit_failed_flag: Default::default(),
}; };
assert_eq!( assert_eq!(