// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. use std::collections::BTreeMap; use std::collections::HashMap; use std::io::BufReader; use std::io::BufWriter; use std::io::Read; use std::io::Write; use std::path::Path; use std::path::PathBuf; use std::sync::Arc; use deno_ast::ModuleSpecifier; use deno_core::anyhow::bail; use deno_core::error::AnyError; use deno_core::parking_lot::Mutex; use deno_core::unsync::sync::AtomicFlag; use deno_runtime::code_cache::CodeCache; use deno_runtime::code_cache::CodeCacheType; use crate::cache::FastInsecureHasher; use crate::util::path::get_atomic_file_path; use crate::worker::CliCodeCache; enum CodeCacheStrategy { FirstRun(FirstRunCodeCacheStrategy), SubsequentRun(SubsequentRunCodeCacheStrategy), } #[derive(Debug, Clone, PartialEq, Eq)] pub struct DenoCompileCodeCacheEntry { pub source_hash: u64, pub data: Vec, } pub struct DenoCompileCodeCache { strategy: CodeCacheStrategy, } impl DenoCompileCodeCache { pub fn new(file_path: PathBuf, cache_key: u64) -> Self { // attempt to deserialize the cache data match deserialize(&file_path, cache_key) { Ok(data) => { log::debug!( "Loaded {} code cache entries from {}", data.len(), file_path.display() ); Self { strategy: CodeCacheStrategy::SubsequentRun( SubsequentRunCodeCacheStrategy { is_finished: AtomicFlag::lowered(), data: Mutex::new(data), }, ), } } Err(err) => { log::debug!( "Failed to deserialize code cache from {}: {:#}", file_path.display(), err ); Self { strategy: CodeCacheStrategy::FirstRun(FirstRunCodeCacheStrategy { cache_key, file_path, is_finished: AtomicFlag::lowered(), data: Mutex::new(FirstRunCodeCacheData { cache: HashMap::new(), add_count: 0, }), }), } } } } } impl CodeCache for DenoCompileCodeCache { fn get_sync( &self, specifier: &ModuleSpecifier, code_cache_type: CodeCacheType, source_hash: u64, ) -> Option> { match &self.strategy { CodeCacheStrategy::FirstRun(strategy) => { if !strategy.is_finished.is_raised() { // we keep track of how many times the cache is requested // then serialize the cache when we get that number of // "set" calls strategy.data.lock().add_count += 1; } None } CodeCacheStrategy::SubsequentRun(strategy) => { if strategy.is_finished.is_raised() { return None; } strategy.take_from_cache(specifier, code_cache_type, source_hash) } } } fn set_sync( &self, specifier: ModuleSpecifier, code_cache_type: CodeCacheType, source_hash: u64, bytes: &[u8], ) { match &self.strategy { CodeCacheStrategy::FirstRun(strategy) => { if strategy.is_finished.is_raised() { return; } let data_to_serialize = { let mut data = strategy.data.lock(); data.cache.insert( (specifier.to_string(), code_cache_type), DenoCompileCodeCacheEntry { source_hash, data: bytes.to_vec(), }, ); if data.add_count != 0 { data.add_count -= 1; } if data.add_count == 0 { // don't allow using the cache anymore strategy.is_finished.raise(); if data.cache.is_empty() { None } else { Some(std::mem::take(&mut data.cache)) } } else { None } }; if let Some(cache_data) = &data_to_serialize { strategy.write_cache_data(cache_data); } } CodeCacheStrategy::SubsequentRun(_) => { // do nothing } } } } impl CliCodeCache for DenoCompileCodeCache { fn enabled(&self) -> bool { match &self.strategy { CodeCacheStrategy::FirstRun(strategy) => { !strategy.is_finished.is_raised() } CodeCacheStrategy::SubsequentRun(strategy) => { !strategy.is_finished.is_raised() } } } fn as_code_cache(self: Arc) -> Arc { self } } type CodeCacheKey = (String, CodeCacheType); struct FirstRunCodeCacheData { cache: HashMap, add_count: usize, } struct FirstRunCodeCacheStrategy { cache_key: u64, file_path: PathBuf, is_finished: AtomicFlag, data: Mutex, } impl FirstRunCodeCacheStrategy { fn write_cache_data( &self, cache_data: &HashMap, ) { let count = cache_data.len(); let temp_file = get_atomic_file_path(&self.file_path); match serialize(&temp_file, self.cache_key, cache_data) { Ok(()) => { if let Err(err) = std::fs::rename(&temp_file, &self.file_path) { log::debug!("Failed to rename code cache: {}", err); let _ = std::fs::remove_file(&temp_file); } else { log::debug!("Serialized {} code cache entries", count); } } Err(err) => { let _ = std::fs::remove_file(&temp_file); log::debug!("Failed to serialize code cache: {}", err); } } } } struct SubsequentRunCodeCacheStrategy { is_finished: AtomicFlag, data: Mutex>, } impl SubsequentRunCodeCacheStrategy { fn take_from_cache( &self, specifier: &ModuleSpecifier, code_cache_type: CodeCacheType, source_hash: u64, ) -> Option> { let mut data = self.data.lock(); // todo(dsherret): how to avoid the clone here? let entry = data.remove(&(specifier.to_string(), code_cache_type))?; if entry.source_hash != source_hash { return None; } if data.is_empty() { self.is_finished.raise(); } Some(entry.data) } } /// File format: /// -
/// - /// - /// - <[entry length]> - u64 * number of entries /// - <[entry]> /// - <[u8]: entry data> /// - /// - : code cache type /// - /// - /// - fn serialize( file_path: &Path, cache_key: u64, cache: &HashMap, ) -> Result<(), AnyError> { let cache_file = std::fs::OpenOptions::new() .create(true) .truncate(true) .write(true) .open(file_path)?; let mut writer = BufWriter::new(cache_file); serialize_with_writer(&mut writer, cache_key, cache) } fn serialize_with_writer( writer: &mut BufWriter, cache_key: u64, cache: &HashMap, ) -> Result<(), AnyError> { // header writer.write_all(&cache_key.to_le_bytes())?; writer.write_all(&(cache.len() as u32).to_le_bytes())?; // lengths of each entry for ((specifier, _), entry) in cache { let len: u64 = entry.data.len() as u64 + specifier.len() as u64 + 1 + 4 + 8 + 8; writer.write_all(&len.to_le_bytes())?; } // entries for ((specifier, code_cache_type), entry) in cache { writer.write_all(&entry.data)?; writer.write_all(&[match code_cache_type { CodeCacheType::EsModule => 0, CodeCacheType::Script => 1, }])?; writer.write_all(specifier.as_bytes())?; writer.write_all(&(specifier.len() as u32).to_le_bytes())?; writer.write_all(&entry.source_hash.to_le_bytes())?; let hash: u64 = FastInsecureHasher::new_without_deno_version() .write(&entry.data) .finish(); writer.write_all(&hash.to_le_bytes())?; } writer.flush()?; Ok(()) } fn deserialize( file_path: &Path, expected_cache_key: u64, ) -> Result, AnyError> { let cache_file = std::fs::File::open(file_path)?; let mut reader = BufReader::new(cache_file); deserialize_with_reader(&mut reader, expected_cache_key) } fn deserialize_with_reader( reader: &mut BufReader, expected_cache_key: u64, ) -> Result, AnyError> { // it's very important to use this below so that a corrupt cache file // doesn't cause a memory allocation error fn new_vec_sized( capacity: usize, default_value: T, ) -> Result, AnyError> { let mut vec = Vec::new(); vec.try_reserve(capacity)?; vec.resize(capacity, default_value); Ok(vec) } fn try_subtract(a: usize, b: usize) -> Result { if a < b { bail!("Integer underflow"); } Ok(a - b) } let mut header_bytes = vec![0; 8 + 4]; reader.read_exact(&mut header_bytes)?; let actual_cache_key = u64::from_le_bytes(header_bytes[..8].try_into()?); if actual_cache_key != expected_cache_key { // cache bust bail!("Cache key mismatch"); } let len = u32::from_le_bytes(header_bytes[8..].try_into()?) as usize; // read the lengths for each entry found in the file let entry_len_bytes_capacity = len * 8; let mut entry_len_bytes = new_vec_sized(entry_len_bytes_capacity, 0)?; reader.read_exact(&mut entry_len_bytes)?; let mut lengths = Vec::new(); lengths.try_reserve(len)?; for i in 0..len { let pos = i * 8; lengths.push( u64::from_le_bytes(entry_len_bytes[pos..pos + 8].try_into()?) as usize, ); } let mut map = HashMap::new(); map.try_reserve(len)?; for len in lengths { let mut buffer = new_vec_sized(len, 0)?; reader.read_exact(&mut buffer)?; let entry_data_hash_start_pos = try_subtract(buffer.len(), 8)?; let expected_entry_data_hash = u64::from_le_bytes(buffer[entry_data_hash_start_pos..].try_into()?); let source_hash_start_pos = try_subtract(entry_data_hash_start_pos, 8)?; let source_hash = u64::from_le_bytes( buffer[source_hash_start_pos..entry_data_hash_start_pos].try_into()?, ); let specifier_end_pos = try_subtract(source_hash_start_pos, 4)?; let specifier_len = u32::from_le_bytes( buffer[specifier_end_pos..source_hash_start_pos].try_into()?, ) as usize; let specifier_start_pos = try_subtract(specifier_end_pos, specifier_len)?; let specifier = String::from_utf8( buffer[specifier_start_pos..specifier_end_pos].to_vec(), )?; let code_cache_type_pos = try_subtract(specifier_start_pos, 1)?; let code_cache_type = match buffer[code_cache_type_pos] { 0 => CodeCacheType::EsModule, 1 => CodeCacheType::Script, _ => bail!("Invalid code cache type"), }; buffer.truncate(code_cache_type_pos); let actual_entry_data_hash: u64 = FastInsecureHasher::new_without_deno_version() .write(&buffer) .finish(); if expected_entry_data_hash != actual_entry_data_hash { bail!("Hash mismatch.") } map.insert( (specifier, code_cache_type), DenoCompileCodeCacheEntry { source_hash, data: buffer, }, ); } Ok(map) } #[cfg(test)] mod test { use test_util::TempDir; use super::*; use std::fs::File; #[test] fn serialize_deserialize() { let cache_key = 123456; let cache = { let mut cache = HashMap::new(); cache.insert( ("specifier1".to_string(), CodeCacheType::EsModule), DenoCompileCodeCacheEntry { source_hash: 1, data: vec![1, 2, 3], }, ); cache.insert( ("specifier2".to_string(), CodeCacheType::EsModule), DenoCompileCodeCacheEntry { source_hash: 2, data: vec![4, 5, 6], }, ); cache.insert( ("specifier2".to_string(), CodeCacheType::Script), DenoCompileCodeCacheEntry { source_hash: 2, data: vec![6, 5, 1], }, ); cache }; let mut buffer = Vec::new(); serialize_with_writer(&mut BufWriter::new(&mut buffer), cache_key, &cache) .unwrap(); let deserialized = deserialize_with_reader(&mut BufReader::new(&buffer[..]), cache_key) .unwrap(); assert_eq!(cache, deserialized); } #[test] fn serialize_deserialize_empty() { let cache_key = 1234; let cache = HashMap::new(); let mut buffer = Vec::new(); serialize_with_writer(&mut BufWriter::new(&mut buffer), cache_key, &cache) .unwrap(); let deserialized = deserialize_with_reader(&mut BufReader::new(&buffer[..]), cache_key) .unwrap(); assert_eq!(cache, deserialized); } #[test] fn serialize_deserialize_corrupt() { let buffer = "corrupttestingtestingtesting".as_bytes().to_vec(); let err = deserialize_with_reader(&mut BufReader::new(&buffer[..]), 1234) .unwrap_err(); assert_eq!(err.to_string(), "Cache key mismatch"); } #[test] fn code_cache() { let temp_dir = TempDir::new(); let file_path = temp_dir.path().join("cache.bin").to_path_buf(); let url1 = ModuleSpecifier::parse("https://deno.land/example1.js").unwrap(); let url2 = ModuleSpecifier::parse("https://deno.land/example2.js").unwrap(); // first run { let code_cache = DenoCompileCodeCache::new(file_path.clone(), 1234); assert!(code_cache .get_sync(&url1, CodeCacheType::EsModule, 0) .is_none()); assert!(code_cache .get_sync(&url2, CodeCacheType::EsModule, 1) .is_none()); assert!(code_cache.enabled()); code_cache.set_sync(url1.clone(), CodeCacheType::EsModule, 0, &[1, 2, 3]); assert!(code_cache.enabled()); assert!(!file_path.exists()); code_cache.set_sync(url2.clone(), CodeCacheType::EsModule, 1, &[2, 1, 3]); assert!(file_path.exists()); // now the new code cache exists assert!(!code_cache.enabled()); // no longer enabled } // second run { let code_cache = DenoCompileCodeCache::new(file_path.clone(), 1234); assert!(code_cache.enabled()); let result1 = code_cache .get_sync(&url1, CodeCacheType::EsModule, 0) .unwrap(); assert!(code_cache.enabled()); let result2 = code_cache .get_sync(&url2, CodeCacheType::EsModule, 1) .unwrap(); assert!(!code_cache.enabled()); // no longer enabled assert_eq!(result1, vec![1, 2, 3]); assert_eq!(result2, vec![2, 1, 3]); } // new cache key first run { let code_cache = DenoCompileCodeCache::new(file_path.clone(), 54321); assert!(code_cache .get_sync(&url1, CodeCacheType::EsModule, 0) .is_none()); assert!(code_cache .get_sync(&url2, CodeCacheType::EsModule, 1) .is_none()); code_cache.set_sync(url1.clone(), CodeCacheType::EsModule, 0, &[2, 2, 3]); code_cache.set_sync(url2.clone(), CodeCacheType::EsModule, 1, &[3, 2, 3]); } // new cache key second run { let code_cache = DenoCompileCodeCache::new(file_path.clone(), 54321); let result1 = code_cache .get_sync(&url1, CodeCacheType::EsModule, 0) .unwrap(); assert_eq!(result1, vec![2, 2, 3]); assert!(code_cache .get_sync(&url2, CodeCacheType::EsModule, 5) // different hash will cause none .is_none()); } } }