diff --git a/Cargo.lock b/Cargo.lock index d9785975c7..de0095e967 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -335,6 +335,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" +[[package]] +name = "cache_control" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bf2a5fb3207c12b5d208ebc145f967fea5cac41a021c37417ccc31ba40f39ee" + [[package]] name = "cc" version = "1.0.71" @@ -628,6 +634,7 @@ version = "1.16.4" dependencies = [ "atty", "base64 0.13.0", + "cache_control", "chrono", "clap", "data-url", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index dd80d6dfb1..33ead8a5e9 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -48,6 +48,8 @@ deno_runtime = { version = "0.36.0", path = "../runtime" } atty = "=0.2.14" base64 = "=0.13.0" +cache_control = "=0.2.0" +chrono = "=0.4.19" clap = "=2.33.3" data-url = "=0.1.1" dissimilar = "=1.0.2" @@ -89,8 +91,6 @@ fwdansi = "=1.1.0" winapi = { version = "=0.3.9", features = ["knownfolders", "mswsock", "objbase", "shlobj", "tlhelp32", "winbase", "winerror", "winsock2"] } [dev-dependencies] -# Used in benchmark -chrono = "=0.4.19" flaky_test = "=0.1.0" os_pipe = "=0.9.2" pretty_assertions = "=0.7.2" diff --git a/cli/file_fetcher.rs b/cli/file_fetcher.rs index 924076872c..e8ad2ccb2d 100644 --- a/cli/file_fetcher.rs +++ b/cli/file_fetcher.rs @@ -4,10 +4,12 @@ use crate::auth_tokens::AuthTokens; use crate::colors; use crate::http_cache::HttpCache; use crate::http_util::fetch_once; +use crate::http_util::CacheSemantics; use crate::http_util::FetchOnceArgs; use crate::http_util::FetchOnceResult; use crate::text_encoding; use crate::version::get_user_agent; + use data_url::DataUrl; use deno_ast::MediaType; use deno_core::error::custom_error; @@ -34,6 +36,7 @@ use std::io::Read; use std::path::PathBuf; use std::pin::Pin; use std::sync::Arc; +use std::time::SystemTime; pub const SUPPORTED_SCHEMES: [&str; 5] = ["data", "blob", "file", "http", "https"]; @@ -89,6 +92,10 @@ pub enum CacheSetting { /// `--reload=https://deno.land/std` or /// `--reload=https://deno.land/std,https://deno.land/x/example`. ReloadSome(Vec), + /// The usability of a cached value is determined by analyzing the cached + /// headers and other metadata associated with a cached response, reloading + /// any cached "non-fresh" cached responses. + RespectHeaders, /// The cached source files should be used for local modules. This is the /// default behavior of the CLI. Use, @@ -96,10 +103,23 @@ pub enum CacheSetting { impl CacheSetting { /// Returns if the cache should be used for a given specifier. - pub fn should_use(&self, specifier: &ModuleSpecifier) -> bool { + pub fn should_use( + &self, + specifier: &ModuleSpecifier, + http_cache: &HttpCache, + ) -> bool { match self { CacheSetting::ReloadAll => false, CacheSetting::Use | CacheSetting::Only => true, + CacheSetting::RespectHeaders => { + if let Ok((_, headers, cache_time)) = http_cache.get(specifier) { + let cache_semantics = + CacheSemantics::new(headers, cache_time, SystemTime::now()); + cache_semantics.should_use() + } else { + false + } + } CacheSetting::ReloadSome(list) => { let mut url = specifier.clone(); url.set_fragment(None); @@ -312,7 +332,7 @@ impl FileFetcher { return Err(custom_error("Http", "Too many redirects.")); } - let (mut source_file, headers) = match self.http_cache.get(specifier) { + let (mut source_file, headers, _) = match self.http_cache.get(specifier) { Err(err) => { if let Some(err) = err.downcast_ref::() { if err.kind() == std::io::ErrorKind::NotFound { @@ -469,7 +489,7 @@ impl FileFetcher { return futures::future::err(err).boxed(); } - if self.cache_setting.should_use(specifier) { + if self.cache_setting.should_use(specifier, &self.http_cache) { match self.fetch_cached(specifier, redirect_limit) { Ok(Some(file)) => { return futures::future::ok(file).boxed(); @@ -495,7 +515,7 @@ impl FileFetcher { info!("{} {}", colors::green("Download"), specifier); let maybe_etag = match self.http_cache.get(specifier) { - Ok((_, headers)) => headers.get("etag").cloned(), + Ok((_, headers, _)) => headers.get("etag").cloned(), _ => None, }; let maybe_auth_token = self.auth_tokens.get(specifier); @@ -682,7 +702,7 @@ mod tests { .fetch_remote(specifier, &mut Permissions::allow_all(), 1) .await; assert!(result.is_ok()); - let (_, headers) = file_fetcher.http_cache.get(specifier).unwrap(); + let (_, headers, _) = file_fetcher.http_cache.get(specifier).unwrap(); (result.unwrap(), headers) } @@ -1065,7 +1085,7 @@ mod tests { // the value above. assert_eq!(file.media_type, MediaType::JavaScript); - let (_, headers) = file_fetcher_02.http_cache.get(&specifier).unwrap(); + let (_, headers, _) = file_fetcher_02.http_cache.get(&specifier).unwrap(); assert_eq!(headers.get("content-type").unwrap(), "text/javascript"); metadata.headers = HashMap::new(); metadata @@ -1194,7 +1214,7 @@ mod tests { "", "redirected files should have empty cached contents" ); - let (_, headers) = file_fetcher.http_cache.get(&specifier).unwrap(); + let (_, headers, _) = file_fetcher.http_cache.get(&specifier).unwrap(); assert_eq!( headers.get("location").unwrap(), "http://localhost:4545/subdir/redirects/redirect1.js" @@ -1204,7 +1224,7 @@ mod tests { fs::read_to_string(redirected_cached_filename).unwrap(), "export const redirect = 1;\n" ); - let (_, headers) = + let (_, headers, _) = file_fetcher.http_cache.get(&redirected_specifier).unwrap(); assert!(headers.get("location").is_none()); } @@ -1247,7 +1267,7 @@ mod tests { "", "redirected files should have empty cached contents" ); - let (_, headers) = file_fetcher.http_cache.get(&specifier).unwrap(); + let (_, headers, _) = file_fetcher.http_cache.get(&specifier).unwrap(); assert_eq!( headers.get("location").unwrap(), "http://localhost:4546/subdir/redirects/redirect1.js" @@ -1258,7 +1278,7 @@ mod tests { "", "redirected files should have empty cached contents" ); - let (_, headers) = file_fetcher + let (_, headers, _) = file_fetcher .http_cache .get(&redirected_01_specifier) .unwrap(); @@ -1271,7 +1291,7 @@ mod tests { fs::read_to_string(redirected_02_cached_filename).unwrap(), "export const redirect = 1;\n" ); - let (_, headers) = file_fetcher + let (_, headers, _) = file_fetcher .http_cache .get(&redirected_02_specifier) .unwrap(); @@ -1392,7 +1412,7 @@ mod tests { "", "redirected files should have empty cached contents" ); - let (_, headers) = file_fetcher.http_cache.get(&specifier).unwrap(); + let (_, headers, _) = file_fetcher.http_cache.get(&specifier).unwrap(); assert_eq!( headers.get("location").unwrap(), "/subdir/redirects/redirect1.js" @@ -1402,7 +1422,7 @@ mod tests { fs::read_to_string(redirected_cached_filename).unwrap(), "export const redirect = 1;\n" ); - let (_, headers) = + let (_, headers, _) = file_fetcher.http_cache.get(&redirected_specifier).unwrap(); assert!(headers.get("location").is_none()); } @@ -1499,6 +1519,60 @@ mod tests { assert_eq!(file.source.as_str(), r#"console.log("goodbye deno");"#); } + #[tokio::test] + async fn test_respect_cache_revalidates() { + let _g = test_util::http_server(); + let temp_dir = Rc::new(TempDir::new().unwrap()); + let (file_fetcher, _) = + setup(CacheSetting::RespectHeaders, Some(temp_dir.clone())); + let specifier = + ModuleSpecifier::parse("http://localhost:4545/dynamic").unwrap(); + let result = file_fetcher + .fetch(&specifier, &mut Permissions::allow_all()) + .await; + assert!(result.is_ok()); + let file = result.unwrap(); + let first = file.source.as_str(); + + let (file_fetcher, _) = + setup(CacheSetting::RespectHeaders, Some(temp_dir.clone())); + let result = file_fetcher + .fetch(&specifier, &mut Permissions::allow_all()) + .await; + assert!(result.is_ok()); + let file = result.unwrap(); + let second = file.source.as_str(); + + assert_ne!(first, second); + } + + #[tokio::test] + async fn test_respect_cache_still_fresh() { + let _g = test_util::http_server(); + let temp_dir = Rc::new(TempDir::new().unwrap()); + let (file_fetcher, _) = + setup(CacheSetting::RespectHeaders, Some(temp_dir.clone())); + let specifier = + ModuleSpecifier::parse("http://localhost:4545/dynamic_cache").unwrap(); + let result = file_fetcher + .fetch(&specifier, &mut Permissions::allow_all()) + .await; + assert!(result.is_ok()); + let file = result.unwrap(); + let first = file.source.as_str(); + + let (file_fetcher, _) = + setup(CacheSetting::RespectHeaders, Some(temp_dir.clone())); + let result = file_fetcher + .fetch(&specifier, &mut Permissions::allow_all()) + .await; + assert!(result.is_ok()); + let file = result.unwrap(); + let second = file.source.as_str(); + + assert_eq!(first, second); + } + #[tokio::test] async fn test_fetch_local_utf_16be() { let expected = String::from_utf8( diff --git a/cli/http_cache.rs b/cli/http_cache.rs index 9f76364dea..8a1995acef 100644 --- a/cli/http_cache.rs +++ b/cli/http_cache.rs @@ -17,6 +17,7 @@ use std::fs::File; use std::io; use std::path::Path; use std::path::PathBuf; +use std::time::SystemTime; pub const CACHE_PERM: u32 = 0o644; @@ -81,6 +82,8 @@ pub struct HttpCache { pub struct Metadata { pub headers: HeadersMap, pub url: String, + #[serde(default = "SystemTime::now")] + pub now: SystemTime, } impl Metadata { @@ -138,7 +141,10 @@ impl HttpCache { // TODO(bartlomieju): this method should check headers file // and validate against ETAG/Last-modified-as headers. // ETAG check is currently done in `cli/file_fetcher.rs`. - pub fn get(&self, url: &Url) -> Result<(File, HeadersMap), AnyError> { + pub fn get( + &self, + url: &Url, + ) -> Result<(File, HeadersMap, SystemTime), AnyError> { let cache_filename = self.location.join( url_to_filename(url) .ok_or_else(|| generic_error("Can't convert url to filename."))?, @@ -147,7 +153,7 @@ impl HttpCache { let file = File::open(cache_filename)?; let metadata = fs::read_to_string(metadata_filename)?; let metadata: Metadata = serde_json::from_str(&metadata)?; - Ok((file, metadata.headers)) + Ok((file, metadata.headers, metadata.now)) } pub fn set( @@ -169,6 +175,7 @@ impl HttpCache { fs_util::atomic_write_file(&cache_filename, content, CACHE_PERM)?; let metadata = Metadata { + now: SystemTime::now(), url: url.to_string(), headers: headers_map, }; @@ -227,7 +234,7 @@ mod tests { assert!(r.is_ok()); let r = cache.get(&url); assert!(r.is_ok()); - let (mut file, headers) = r.unwrap(); + let (mut file, headers, _) = r.unwrap(); let mut content = String::new(); file.read_to_string(&mut content).unwrap(); assert_eq!(content, "Hello world"); diff --git a/cli/http_util.rs b/cli/http_util.rs index 87ed7d598e..562cd06f27 100644 --- a/cli/http_util.rs +++ b/cli/http_util.rs @@ -1,6 +1,9 @@ // Copyright 2018-2021 the Deno authors. All rights reserved. MIT license. use crate::auth_tokens::AuthToken; +use cache_control::Cachability; +use cache_control::CacheControl; +use chrono::DateTime; use deno_core::error::custom_error; use deno_core::error::generic_error; use deno_core::error::AnyError; @@ -13,6 +16,8 @@ use deno_runtime::deno_fetch::reqwest::Client; use deno_runtime::deno_fetch::reqwest::StatusCode; use log::debug; use std::collections::HashMap; +use std::time::Duration; +use std::time::SystemTime; /// Construct the next uri based on base uri and location header fragment /// See @@ -46,6 +51,153 @@ fn resolve_url_from_location(base_url: &Url, location: &str) -> Url { // Vec<(String, String)> pub type HeadersMap = HashMap; +/// A structure used to determine if a entity in the http cache can be used. +/// +/// This is heavily influenced by +/// https://github.com/kornelski/rusty-http-cache-semantics which is BSD +/// 2-Clause Licensed and copyright Kornel LesiƄski +pub(crate) struct CacheSemantics { + cache_control: CacheControl, + cached: SystemTime, + headers: HashMap, + now: SystemTime, +} + +impl CacheSemantics { + pub fn new( + headers: HashMap, + cached: SystemTime, + now: SystemTime, + ) -> Self { + let cache_control = headers + .get("cache-control") + .map(|v| CacheControl::from_value(v).unwrap_or_default()) + .unwrap_or_default(); + Self { + cache_control, + cached, + headers, + now, + } + } + + fn age(&self) -> Duration { + let mut age = self.age_header_value(); + + if let Ok(resident_time) = self.now.duration_since(self.cached) { + age += resident_time; + } + + age + } + + fn age_header_value(&self) -> Duration { + Duration::from_secs( + self + .headers + .get("age") + .and_then(|v| v.parse().ok()) + .unwrap_or(0), + ) + } + + fn is_stale(&self) -> bool { + self.max_age() <= self.age() + } + + fn max_age(&self) -> Duration { + if self.cache_control.cachability == Some(Cachability::NoCache) { + return Duration::from_secs(0); + } + + if self.headers.get("vary").map(|s| s.trim()) == Some("*") { + return Duration::from_secs(0); + } + + if let Some(max_age) = self.cache_control.max_age { + return max_age; + } + + let default_min_ttl = Duration::from_secs(0); + + let server_date = self.raw_server_date(); + if let Some(expires) = self.headers.get("expires") { + return match DateTime::parse_from_rfc2822(expires) { + Err(_) => Duration::from_secs(0), + Ok(expires) => { + let expires = SystemTime::UNIX_EPOCH + + Duration::from_secs(expires.timestamp().max(0) as _); + return default_min_ttl + .max(expires.duration_since(server_date).unwrap_or_default()); + } + }; + } + + if let Some(last_modified) = self.headers.get("last-modified") { + if let Ok(last_modified) = DateTime::parse_from_rfc2822(last_modified) { + let last_modified = SystemTime::UNIX_EPOCH + + Duration::from_secs(last_modified.timestamp().max(0) as _); + if let Ok(diff) = server_date.duration_since(last_modified) { + let secs_left = diff.as_secs() as f64 * 0.1; + return default_min_ttl.max(Duration::from_secs(secs_left as _)); + } + } + } + + default_min_ttl + } + + fn raw_server_date(&self) -> SystemTime { + self + .headers + .get("date") + .and_then(|d| DateTime::parse_from_rfc2822(d).ok()) + .and_then(|d| { + SystemTime::UNIX_EPOCH + .checked_add(Duration::from_secs(d.timestamp() as _)) + }) + .unwrap_or(self.cached) + } + + /// Returns true if the cached value is "fresh" respecting cached headers, + /// otherwise returns false. + pub fn should_use(&self) -> bool { + if self.cache_control.cachability == Some(Cachability::NoCache) { + return false; + } + + if let Some(max_age) = self.cache_control.max_age { + if self.age() > max_age { + return false; + } + } + + if let Some(min_fresh) = self.cache_control.min_fresh { + if self.time_to_live() < min_fresh { + return false; + } + } + + if self.is_stale() { + let has_max_stale = self.cache_control.max_stale.is_some(); + let allows_stale = has_max_stale + && self + .cache_control + .max_stale + .map_or(true, |val| val > self.age() - self.max_age()); + if !allows_stale { + return false; + } + } + + true + } + + fn time_to_live(&self) -> Duration { + self.max_age().checked_sub(self.age()).unwrap_or_default() + } +} + #[derive(Debug, PartialEq)] pub enum FetchOnceResult { Code(Vec, HeadersMap), diff --git a/cli/lsp/registries.rs b/cli/lsp/registries.rs index fda8d52058..0703e468a2 100644 --- a/cli/lsp/registries.rs +++ b/cli/lsp/registries.rs @@ -282,7 +282,7 @@ impl Default for ModuleRegistry { let dir = deno_dir::DenoDir::new(None).unwrap(); let location = dir.root.join("registries"); let http_cache = HttpCache::new(&location); - let cache_setting = CacheSetting::Use; + let cache_setting = CacheSetting::RespectHeaders; let file_fetcher = FileFetcher::new( http_cache, cache_setting, @@ -305,7 +305,7 @@ impl ModuleRegistry { let http_cache = HttpCache::new(location); let file_fetcher = FileFetcher::new( http_cache, - CacheSetting::Use, + CacheSetting::RespectHeaders, true, None, BlobStore::default(), @@ -387,12 +387,17 @@ impl ModuleRegistry { .await; // if there is an error fetching, we will cache an empty file, so that // subsequent requests they are just an empty doc which will error without - // needing to connect to the remote URL + // needing to connect to the remote URL. We will cache it for 1 week. if fetch_result.is_err() { + let mut headers_map = HashMap::new(); + headers_map.insert( + "cache-control".to_string(), + "max-age=604800, immutable".to_string(), + ); self .file_fetcher .http_cache - .set(specifier, HashMap::default(), &[])?; + .set(specifier, headers_map, &[])?; } let file = fetch_result?; let config: RegistryConfigurationJson = serde_json::from_str(&file.source)?; diff --git a/test_util/src/lib.rs b/test_util/src/lib.rs index 3cae1d7e13..4ad6f1c3c2 100644 --- a/test_util/src/lib.rs +++ b/test_util/src/lib.rs @@ -893,6 +893,25 @@ async fn main_server( ); Ok(res) } + (_, "/dynamic") => { + let mut res = Response::new(Body::from( + serde_json::to_string_pretty(&std::time::SystemTime::now()).unwrap(), + )); + res + .headers_mut() + .insert("cache-control", HeaderValue::from_static("no-cache")); + Ok(res) + } + (_, "/dynamic_cache") => { + let mut res = Response::new(Body::from( + serde_json::to_string_pretty(&std::time::SystemTime::now()).unwrap(), + )); + res.headers_mut().insert( + "cache-control", + HeaderValue::from_static("public, max-age=604800, immutable"), + ); + Ok(res) + } _ => { let mut file_path = testdata_path(); file_path.push(&req.uri().path()[1..]);