1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2025-01-01 03:54:06 -05:00
denoland-deno/cli/standalone/code_cache.rs
2024-11-21 18:16:40 -05:00

523 lines
15 KiB
Rust

// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::io::BufReader;
use std::io::BufWriter;
use std::io::Read;
use std::io::Write;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use deno_ast::ModuleSpecifier;
use deno_core::anyhow::bail;
use deno_core::error::AnyError;
use deno_core::parking_lot::Mutex;
use deno_core::unsync::sync::AtomicFlag;
use deno_runtime::code_cache::CodeCache;
use deno_runtime::code_cache::CodeCacheType;
use crate::cache::FastInsecureHasher;
use crate::util::path::get_atomic_file_path;
use crate::worker::CliCodeCache;
enum CodeCacheStrategy {
FirstRun(FirstRunCodeCacheStrategy),
SubsequentRun(SubsequentRunCodeCacheStrategy),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DenoCompileCodeCacheEntry {
pub source_hash: u64,
pub data: Vec<u8>,
}
pub struct DenoCompileCodeCache {
strategy: CodeCacheStrategy,
}
impl DenoCompileCodeCache {
pub fn new(file_path: PathBuf, cache_key: u64) -> Self {
// attempt to deserialize the cache data
match deserialize(&file_path, cache_key) {
Ok(data) => {
log::debug!(
"Loaded {} code cache entries from {}",
data.len(),
file_path.display()
);
Self {
strategy: CodeCacheStrategy::SubsequentRun(
SubsequentRunCodeCacheStrategy {
is_finished: AtomicFlag::lowered(),
data: Mutex::new(data),
},
),
}
}
Err(err) => {
log::debug!(
"Failed to deserialize code cache from {}: {:#}",
file_path.display(),
err
);
Self {
strategy: CodeCacheStrategy::FirstRun(FirstRunCodeCacheStrategy {
cache_key,
file_path,
is_finished: AtomicFlag::lowered(),
data: Mutex::new(FirstRunCodeCacheData {
cache: HashMap::new(),
add_count: 0,
}),
}),
}
}
}
}
}
impl CodeCache for DenoCompileCodeCache {
fn get_sync(
&self,
specifier: &ModuleSpecifier,
code_cache_type: CodeCacheType,
source_hash: u64,
) -> Option<Vec<u8>> {
match &self.strategy {
CodeCacheStrategy::FirstRun(strategy) => {
if !strategy.is_finished.is_raised() {
// we keep track of how many times the cache is requested
// then serialize the cache when we get that number of
// "set" calls
strategy.data.lock().add_count += 1;
}
None
}
CodeCacheStrategy::SubsequentRun(strategy) => {
if strategy.is_finished.is_raised() {
return None;
}
strategy.take_from_cache(specifier, code_cache_type, source_hash)
}
}
}
fn set_sync(
&self,
specifier: ModuleSpecifier,
code_cache_type: CodeCacheType,
source_hash: u64,
bytes: &[u8],
) {
match &self.strategy {
CodeCacheStrategy::FirstRun(strategy) => {
if strategy.is_finished.is_raised() {
return;
}
let data_to_serialize = {
let mut data = strategy.data.lock();
data.cache.insert(
(specifier.to_string(), code_cache_type),
DenoCompileCodeCacheEntry {
source_hash,
data: bytes.to_vec(),
},
);
if data.add_count != 0 {
data.add_count -= 1;
}
if data.add_count == 0 {
// don't allow using the cache anymore
strategy.is_finished.raise();
if data.cache.is_empty() {
None
} else {
Some(std::mem::take(&mut data.cache))
}
} else {
None
}
};
if let Some(cache_data) = &data_to_serialize {
strategy.write_cache_data(cache_data);
}
}
CodeCacheStrategy::SubsequentRun(_) => {
// do nothing
}
}
}
}
impl CliCodeCache for DenoCompileCodeCache {
fn enabled(&self) -> bool {
match &self.strategy {
CodeCacheStrategy::FirstRun(strategy) => {
!strategy.is_finished.is_raised()
}
CodeCacheStrategy::SubsequentRun(strategy) => {
!strategy.is_finished.is_raised()
}
}
}
fn as_code_cache(self: Arc<Self>) -> Arc<dyn CodeCache> {
self
}
}
type CodeCacheKey = (String, CodeCacheType);
struct FirstRunCodeCacheData {
cache: HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>,
add_count: usize,
}
struct FirstRunCodeCacheStrategy {
cache_key: u64,
file_path: PathBuf,
is_finished: AtomicFlag,
data: Mutex<FirstRunCodeCacheData>,
}
impl FirstRunCodeCacheStrategy {
fn write_cache_data(
&self,
cache_data: &HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>,
) {
let count = cache_data.len();
let temp_file = get_atomic_file_path(&self.file_path);
match serialize(&temp_file, self.cache_key, cache_data) {
Ok(()) => {
if let Err(err) = std::fs::rename(&temp_file, &self.file_path) {
log::debug!("Failed to rename code cache: {}", err);
let _ = std::fs::remove_file(&temp_file);
} else {
log::debug!("Serialized {} code cache entries", count);
}
}
Err(err) => {
let _ = std::fs::remove_file(&temp_file);
log::debug!("Failed to serialize code cache: {}", err);
}
}
}
}
struct SubsequentRunCodeCacheStrategy {
is_finished: AtomicFlag,
data: Mutex<HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>>,
}
impl SubsequentRunCodeCacheStrategy {
fn take_from_cache(
&self,
specifier: &ModuleSpecifier,
code_cache_type: CodeCacheType,
source_hash: u64,
) -> Option<Vec<u8>> {
let mut data = self.data.lock();
// todo(dsherret): how to avoid the clone here?
let entry = data.remove(&(specifier.to_string(), code_cache_type))?;
if entry.source_hash != source_hash {
return None;
}
if data.is_empty() {
self.is_finished.raise();
}
Some(entry.data)
}
}
/// File format:
/// - <header>
/// - <cache key>
/// - <u32: number of entries>
/// - <[entry length]> - u64 * number of entries
/// - <[entry]>
/// - <[u8]: entry data>
/// - <String: specifier>
/// - <u8>: code cache type
/// - <u32: specifier length>
/// - <u64: source hash>
/// - <u64: entry data hash>
fn serialize(
file_path: &Path,
cache_key: u64,
cache: &HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>,
) -> Result<(), AnyError> {
let cache_file = std::fs::OpenOptions::new()
.create(true)
.truncate(true)
.write(true)
.open(file_path)?;
let mut writer = BufWriter::new(cache_file);
serialize_with_writer(&mut writer, cache_key, cache)
}
fn serialize_with_writer<T: Write>(
writer: &mut BufWriter<T>,
cache_key: u64,
cache: &HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>,
) -> Result<(), AnyError> {
// header
writer.write_all(&cache_key.to_le_bytes())?;
writer.write_all(&(cache.len() as u32).to_le_bytes())?;
// lengths of each entry
for ((specifier, _), entry) in cache {
let len: u64 =
entry.data.len() as u64 + specifier.len() as u64 + 1 + 4 + 8 + 8;
writer.write_all(&len.to_le_bytes())?;
}
// entries
for ((specifier, code_cache_type), entry) in cache {
writer.write_all(&entry.data)?;
writer.write_all(&[match code_cache_type {
CodeCacheType::EsModule => 0,
CodeCacheType::Script => 1,
}])?;
writer.write_all(specifier.as_bytes())?;
writer.write_all(&(specifier.len() as u32).to_le_bytes())?;
writer.write_all(&entry.source_hash.to_le_bytes())?;
let hash: u64 = FastInsecureHasher::new_without_deno_version()
.write(&entry.data)
.finish();
writer.write_all(&hash.to_le_bytes())?;
}
writer.flush()?;
Ok(())
}
fn deserialize(
file_path: &Path,
expected_cache_key: u64,
) -> Result<HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, AnyError> {
let cache_file = std::fs::File::open(file_path)?;
let mut reader = BufReader::new(cache_file);
deserialize_with_reader(&mut reader, expected_cache_key)
}
fn deserialize_with_reader<T: Read>(
reader: &mut BufReader<T>,
expected_cache_key: u64,
) -> Result<HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, AnyError> {
// it's very important to use this below so that a corrupt cache file
// doesn't cause a memory allocation error
fn new_vec_sized<T: Clone>(
capacity: usize,
default_value: T,
) -> Result<Vec<T>, AnyError> {
let mut vec = Vec::new();
vec.try_reserve(capacity)?;
vec.resize(capacity, default_value);
Ok(vec)
}
fn try_subtract(a: usize, b: usize) -> Result<usize, AnyError> {
if a < b {
bail!("Integer underflow");
}
Ok(a - b)
}
let mut header_bytes = vec![0; 8 + 4];
reader.read_exact(&mut header_bytes)?;
let actual_cache_key = u64::from_le_bytes(header_bytes[..8].try_into()?);
if actual_cache_key != expected_cache_key {
// cache bust
bail!("Cache key mismatch");
}
let len = u32::from_le_bytes(header_bytes[8..].try_into()?) as usize;
// read the lengths for each entry found in the file
let entry_len_bytes_capacity = len * 8;
let mut entry_len_bytes = new_vec_sized(entry_len_bytes_capacity, 0)?;
reader.read_exact(&mut entry_len_bytes)?;
let mut lengths = Vec::new();
lengths.try_reserve(len)?;
for i in 0..len {
let pos = i * 8;
lengths.push(
u64::from_le_bytes(entry_len_bytes[pos..pos + 8].try_into()?) as usize,
);
}
let mut map = HashMap::new();
map.try_reserve(len)?;
for len in lengths {
let mut buffer = new_vec_sized(len, 0)?;
reader.read_exact(&mut buffer)?;
let entry_data_hash_start_pos = try_subtract(buffer.len(), 8)?;
let expected_entry_data_hash =
u64::from_le_bytes(buffer[entry_data_hash_start_pos..].try_into()?);
let source_hash_start_pos = try_subtract(entry_data_hash_start_pos, 8)?;
let source_hash = u64::from_le_bytes(
buffer[source_hash_start_pos..entry_data_hash_start_pos].try_into()?,
);
let specifier_end_pos = try_subtract(source_hash_start_pos, 4)?;
let specifier_len = u32::from_le_bytes(
buffer[specifier_end_pos..source_hash_start_pos].try_into()?,
) as usize;
let specifier_start_pos = try_subtract(specifier_end_pos, specifier_len)?;
let specifier = String::from_utf8(
buffer[specifier_start_pos..specifier_end_pos].to_vec(),
)?;
let code_cache_type_pos = try_subtract(specifier_start_pos, 1)?;
let code_cache_type = match buffer[code_cache_type_pos] {
0 => CodeCacheType::EsModule,
1 => CodeCacheType::Script,
_ => bail!("Invalid code cache type"),
};
buffer.truncate(code_cache_type_pos);
let actual_entry_data_hash: u64 =
FastInsecureHasher::new_without_deno_version()
.write(&buffer)
.finish();
if expected_entry_data_hash != actual_entry_data_hash {
bail!("Hash mismatch.")
}
map.insert(
(specifier, code_cache_type),
DenoCompileCodeCacheEntry {
source_hash,
data: buffer,
},
);
}
Ok(map)
}
#[cfg(test)]
mod test {
use test_util::TempDir;
use super::*;
use std::fs::File;
#[test]
fn serialize_deserialize() {
let cache_key = 123456;
let cache = {
let mut cache = HashMap::new();
cache.insert(
("specifier1".to_string(), CodeCacheType::EsModule),
DenoCompileCodeCacheEntry {
source_hash: 1,
data: vec![1, 2, 3],
},
);
cache.insert(
("specifier2".to_string(), CodeCacheType::EsModule),
DenoCompileCodeCacheEntry {
source_hash: 2,
data: vec![4, 5, 6],
},
);
cache.insert(
("specifier2".to_string(), CodeCacheType::Script),
DenoCompileCodeCacheEntry {
source_hash: 2,
data: vec![6, 5, 1],
},
);
cache
};
let mut buffer = Vec::new();
serialize_with_writer(&mut BufWriter::new(&mut buffer), cache_key, &cache)
.unwrap();
let deserialized =
deserialize_with_reader(&mut BufReader::new(&buffer[..]), cache_key)
.unwrap();
assert_eq!(cache, deserialized);
}
#[test]
fn serialize_deserialize_empty() {
let cache_key = 1234;
let cache = HashMap::new();
let mut buffer = Vec::new();
serialize_with_writer(&mut BufWriter::new(&mut buffer), cache_key, &cache)
.unwrap();
let deserialized =
deserialize_with_reader(&mut BufReader::new(&buffer[..]), cache_key)
.unwrap();
assert_eq!(cache, deserialized);
}
#[test]
fn serialize_deserialize_corrupt() {
let buffer = "corrupttestingtestingtesting".as_bytes().to_vec();
let err = deserialize_with_reader(&mut BufReader::new(&buffer[..]), 1234)
.unwrap_err();
assert_eq!(err.to_string(), "Cache key mismatch");
}
#[test]
fn code_cache() {
let temp_dir = TempDir::new();
let file_path = temp_dir.path().join("cache.bin").to_path_buf();
let url1 = ModuleSpecifier::parse("https://deno.land/example1.js").unwrap();
let url2 = ModuleSpecifier::parse("https://deno.land/example2.js").unwrap();
// first run
{
let code_cache = DenoCompileCodeCache::new(file_path.clone(), 1234);
assert!(code_cache
.get_sync(&url1, CodeCacheType::EsModule, 0)
.is_none());
assert!(code_cache
.get_sync(&url2, CodeCacheType::EsModule, 1)
.is_none());
assert!(code_cache.enabled());
code_cache.set_sync(url1.clone(), CodeCacheType::EsModule, 0, &[1, 2, 3]);
assert!(code_cache.enabled());
assert!(!file_path.exists());
code_cache.set_sync(url2.clone(), CodeCacheType::EsModule, 1, &[2, 1, 3]);
assert!(file_path.exists()); // now the new code cache exists
assert!(!code_cache.enabled()); // no longer enabled
}
// second run
{
let code_cache = DenoCompileCodeCache::new(file_path.clone(), 1234);
assert!(code_cache.enabled());
let result1 = code_cache
.get_sync(&url1, CodeCacheType::EsModule, 0)
.unwrap();
assert!(code_cache.enabled());
let result2 = code_cache
.get_sync(&url2, CodeCacheType::EsModule, 1)
.unwrap();
assert!(!code_cache.enabled()); // no longer enabled
assert_eq!(result1, vec![1, 2, 3]);
assert_eq!(result2, vec![2, 1, 3]);
}
// new cache key first run
{
let code_cache = DenoCompileCodeCache::new(file_path.clone(), 54321);
assert!(code_cache
.get_sync(&url1, CodeCacheType::EsModule, 0)
.is_none());
assert!(code_cache
.get_sync(&url2, CodeCacheType::EsModule, 1)
.is_none());
code_cache.set_sync(url1.clone(), CodeCacheType::EsModule, 0, &[2, 2, 3]);
code_cache.set_sync(url2.clone(), CodeCacheType::EsModule, 1, &[3, 2, 3]);
}
// new cache key second run
{
let code_cache = DenoCompileCodeCache::new(file_path.clone(), 54321);
let result1 = code_cache
.get_sync(&url1, CodeCacheType::EsModule, 0)
.unwrap();
assert_eq!(result1, vec![2, 2, 3]);
assert!(code_cache
.get_sync(&url2, CodeCacheType::EsModule, 5) // different hash will cause none
.is_none());
}
}
}