1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2025-01-11 08:33:43 -05:00

fix(unstable): vendor cache should support adding files to hashed directories (#20070)

This changes the design of the manifest.json file to have a separate
"folders" map for mapping hashed directories. This allows, for example,
to add files in a folder like `http_localhost_8000/#testing_5de71/` and
have them be resolved automatically as long as their remaining
components are identity-mappable to the file system (not hashed). It
also saves space in the manifest.json file by only including the hashed
directory instead of each descendant file.

```
// manifest.json
{
  "folders": {
    "https://localhost/NOT_MAPPABLE/": "localhost/#not_mappable_5cefgh"
  },
  "modules": {
    "https://localhost/folder/file": {
      "headers": {
        "content-type": "application/javascript"
      }
    },
  }
}

// folder structure
localhost
  - folder
    - #file_2defn (note: I've made up the hashes in these examples)
  - #not_mappable_5cefgh
    - mod.ts
    - etc.ts
    - more_files.ts
```
This commit is contained in:
David Sherret 2023-08-06 12:25:48 -04:00 committed by GitHub
parent c1c8eb3d55
commit 7b5bc87f29
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 336 additions and 112 deletions

4
Cargo.lock generated
View file

@ -1240,9 +1240,9 @@ dependencies = [
[[package]]
name = "deno_media_type"
version = "0.1.0"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63772a60d740a41d97fbffb4788fc3779e6df47289e01892c12be38f4a5beded"
checksum = "001d69b833ed4d244b608bab9c07069bfb570f631b763b58e73f82a020bf84ef"
dependencies = [
"data-url",
"serde",

View file

@ -48,7 +48,7 @@ napi_sym = { version = "0.44.0", path = "./cli/napi/sym" }
deno_bench_util = { version = "0.108.0", path = "./bench_util" }
test_util = { path = "./test_util" }
deno_lockfile = "0.14.1"
deno_media_type = { version = "0.1.0", features = ["module_specifier"] }
deno_media_type = { version = "0.1.1", features = ["module_specifier"] }
deno_npm = "0.10.1"
deno_semver = "0.3.0"

View file

@ -49,17 +49,17 @@ impl LocalLspHttpCache {
}
pub fn get_file_url(&self, url: &Url) -> Option<Url> {
{
let sub_path = {
let data = self.cache.manifest.data.read();
if let Some(data) = data.get(url) {
if let Some(path) = &data.path {
return Url::from_file_path(self.cache.path.join(path)).ok();
}
}
}
match self.cache.get_cache_filepath(url, &Default::default()) {
Ok(path) if path.exists() => Url::from_file_path(path).ok(),
_ => None,
let maybe_content_type =
data.get(url).and_then(|d| d.content_type_header());
url_to_local_sub_path(url, maybe_content_type).ok()?
};
let path = sub_path.as_path_from_root(&self.cache.path);
if path.exists() {
Url::from_file_path(path).ok()
} else {
None
}
}
@ -67,18 +67,39 @@ impl LocalLspHttpCache {
let Ok(path) = path.strip_prefix(&self.cache.path) else {
return None; // not in this directory
};
let has_hashed_component = path
let components = path
.components()
.any(|p| p.as_os_str().to_string_lossy().starts_with('#'));
if has_hashed_component {
// check in the manifest
{
let data = self.cache.manifest.data.read();
if let Some(url) = data.get_reverse_mapping(path) {
return Some(url);
}
}
None
.map(|c| c.as_os_str().to_string_lossy())
.collect::<Vec<_>>();
if components
.last()
.map(|c| c.starts_with('#'))
.unwrap_or(false)
{
// the file itself will have an entry in the manifest
let data = self.cache.manifest.data.read();
data.get_reverse_mapping(path)
} else if let Some(last_index) =
components.iter().rposition(|c| c.starts_with('#'))
{
// get the mapping to the deepest hashed directory and
// then add the remaining path components to the url
let dir_path: PathBuf = components[..last_index + 1].iter().fold(
PathBuf::new(),
|mut path, c| {
path.push(c.as_ref());
path
},
);
let dir_url = self
.cache
.manifest
.data
.read()
.get_reverse_mapping(&dir_path)?;
let file_url =
dir_url.join(&components[last_index + 1..].join("/")).ok()?;
Some(file_url)
} else {
// we can work backwards from the path to the url
let mut parts = Vec::new();
@ -169,14 +190,6 @@ impl LocalHttpCache {
}
}
fn get_cache_filepath(
&self,
url: &Url,
headers: &HeadersMap,
) -> Result<PathBuf, AnyError> {
Ok(url_to_local_sub_path(url, headers)?.as_path_from_root(&self.path))
}
/// Copies the file from the global cache to the local cache returning
/// if the data was successfully copied to the local cache.
fn check_copy_global_to_local(&self, url: &Url) -> Result<bool, AnyError> {
@ -185,20 +198,22 @@ impl LocalHttpCache {
return Ok(false);
};
let local_path =
url_to_local_sub_path(url, headers_content_type(&metadata.headers))?;
if !metadata.is_redirect() {
let Some(cached_bytes) = self.global_cache.read_file_bytes(&global_key)? else {
return Ok(false);
};
let local_file_path = self.get_cache_filepath(url, &metadata.headers)?;
let local_file_path = local_path.as_path_from_root(&self.path);
// if we're here, then this will be set
atomic_write_file(&local_file_path, cached_bytes, CACHE_PERM)?;
}
self.manifest.insert_data(
url_to_local_sub_path(url, &metadata.headers)?,
url.clone(),
metadata.headers,
);
self
.manifest
.insert_data(local_path, url.clone(), metadata.headers);
Ok(true)
}
@ -254,13 +269,17 @@ impl HttpCache for LocalHttpCache {
content: &[u8],
) -> Result<(), AnyError> {
let is_redirect = headers.contains_key("location");
let sub_path = url_to_local_sub_path(url, headers_content_type(&headers))?;
if !is_redirect {
let cache_filepath = self.get_cache_filepath(url, &headers)?;
// Cache content
atomic_write_file(&cache_filepath, content, CACHE_PERM)?;
atomic_write_file(
&sub_path.as_path_from_root(&self.path),
content,
CACHE_PERM,
)?;
}
let sub_path = url_to_local_sub_path(url, &headers)?;
self.manifest.insert_data(sub_path, url.clone(), headers);
Ok(())
@ -281,9 +300,12 @@ impl HttpCache for LocalHttpCache {
Ok(Some(Vec::new()))
} else {
// if it's not a redirect, then it should have a file path
let cache_filepath =
self.get_cache_filepath(key.url, &data.headers)?;
Ok(read_file_bytes(&cache_filepath)?)
let cache_file_path = url_to_local_sub_path(
key.url,
headers_content_type(&data.headers),
)?
.as_path_from_root(&self.path);
Ok(read_file_bytes(&cache_file_path)?)
}
}
None => Ok(None),
@ -324,9 +346,13 @@ impl LocalCacheSubPath {
}
}
fn headers_content_type(headers: &HeadersMap) -> Option<&str> {
headers.get("content-type").map(|s| s.as_str())
}
fn url_to_local_sub_path(
url: &Url,
headers: &HeadersMap,
content_type: Option<&str>,
) -> Result<LocalCacheSubPath, UrlToFilenameConversionError> {
// https://stackoverflow.com/a/31976060/188246
static FORBIDDEN_CHARS: Lazy<HashSet<char>> = Lazy::new(|| {
@ -368,8 +394,9 @@ fn url_to_local_sub_path(
|| path.ends_with(".wasm")
}
fn get_extension(url: &Url, headers: &HeadersMap) -> &'static str {
MediaType::from_specifier_and_headers(url, Some(headers)).as_ts_extension()
fn get_extension(url: &Url, content_type: Option<&str>) -> &'static str {
MediaType::from_specifier_and_content_type(url, content_type)
.as_ts_extension()
}
fn short_hash(data: &str, last_ext: Option<&str>) -> String {
@ -439,11 +466,7 @@ fn url_to_local_sub_path(
}
// first, try to get the filename of the path
let path_segments = url
.path()
.strip_prefix('/')
.unwrap_or(url.path())
.split('/');
let path_segments = url_path_segments(url);
let mut parts = base_parts
.into_iter()
.chain(path_segments.map(|s| s.to_string()))
@ -464,7 +487,7 @@ fn url_to_local_sub_path(
.map(|(i, part)| {
let is_last = i == parts_len - 1;
let last_ext = if is_last {
Some(get_extension(url, headers))
Some(get_extension(url, content_type))
} else {
None
};
@ -542,16 +565,16 @@ impl LocalCacheManifest {
}
let mut data = self.data.write();
let is_empty = headers_subset.is_empty() && !sub_path.has_hash;
let has_changed = if is_empty {
let add_module_entry = headers_subset.is_empty()
&& !sub_path
.parts
.last()
.map(|s| s.starts_with('#'))
.unwrap_or(false);
let mut has_changed = if add_module_entry {
data.remove(&url, &sub_path)
} else {
let new_data = manifest::SerializedLocalCacheManifestDataModule {
path: if headers_subset.contains_key("location") {
None
} else {
Some(sub_path.parts.join("/"))
},
headers: headers_subset,
};
if data.get(&url) == Some(&new_data) {
@ -562,6 +585,29 @@ impl LocalCacheManifest {
}
};
if sub_path.has_hash {
let url_path_parts = url_path_segments(&url).collect::<Vec<_>>();
let base_url = {
let mut url = url.clone();
url.set_path("/");
url.set_query(None);
url.set_fragment(None);
url
};
for (i, local_part) in sub_path.parts[1..sub_path.parts.len() - 1]
.iter()
.enumerate()
{
if local_part.starts_with('#') {
let mut url = base_url.clone();
url.set_path(&format!("{}/", url_path_parts[..i + 1].join("/")));
if data.add_directory(url, sub_path.parts[..i + 2].join("/")) {
has_changed = true;
}
}
}
}
if has_changed {
// don't bother ensuring the directory here because it will
// eventually be created by files being added to the cache
@ -582,11 +628,13 @@ impl LocalCacheManifest {
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect::<HashMap<_, _>>();
let sub_path = match &module.path {
Some(sub_path) => {
Cow::Owned(self.file_path.parent().unwrap().join(sub_path))
}
None => Cow::Borrowed(&self.file_path),
let sub_path = if headers.contains_key("location") {
Cow::Borrowed(&self.file_path)
} else {
let sub_path =
url_to_local_sub_path(url, headers_content_type(&headers)).ok()?;
let folder_path = self.file_path.parent().unwrap();
Cow::Owned(sub_path.as_path_from_root(folder_path))
};
let Ok(metadata) = sub_path.metadata() else {
@ -601,9 +649,14 @@ impl LocalCacheManifest {
}
None => {
let folder_path = self.file_path.parent().unwrap();
let sub_path = url_to_local_sub_path(url, &Default::default()).ok()?;
if sub_path.has_hash {
// only paths without a hash are considered as in the cache
let sub_path = url_to_local_sub_path(url, None).ok()?;
if sub_path
.parts
.last()
.map(|s| s.starts_with('#'))
.unwrap_or(false)
{
// only filenames without a hash are considered as in the cache
// when they don't have a metadata entry
return None;
}
@ -635,12 +688,11 @@ mod manifest {
use serde::Deserialize;
use serde::Serialize;
use super::url_to_local_sub_path;
use super::LocalCacheSubPath;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct SerializedLocalCacheManifestDataModule {
#[serde(skip_serializing_if = "Option::is_none")]
pub path: Option<String>,
#[serde(
default = "IndexMap::new",
skip_serializing_if = "IndexMap::is_empty"
@ -648,8 +700,23 @@ mod manifest {
pub headers: IndexMap<String, String>,
}
impl SerializedLocalCacheManifestDataModule {
pub fn content_type_header(&self) -> Option<&str> {
self.headers.get("content-type").map(|s| s.as_str())
}
}
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
struct SerializedLocalCacheManifestData {
#[serde(
default = "IndexMap::new",
skip_serializing_if = "IndexMap::is_empty"
)]
pub folders: IndexMap<Url, String>,
#[serde(
default = "IndexMap::new",
skip_serializing_if = "IndexMap::is_empty"
)]
pub modules: IndexMap<Url, SerializedLocalCacheManifestDataModule>,
}
@ -677,15 +744,28 @@ mod manifest {
.modules
.iter()
.filter_map(|(url, module)| {
module.path.as_ref().map(|path| {
let path = if cfg!(windows) {
PathBuf::from(path.split('/').collect::<Vec<_>>().join("\\"))
} else {
PathBuf::from(path)
};
(path, url.clone())
})
if module.headers.contains_key("location") {
return None;
}
url_to_local_sub_path(url, module.content_type_header())
.ok()
.map(|local_path| {
let path = if cfg!(windows) {
PathBuf::from(local_path.parts.join("\\"))
} else {
PathBuf::from(local_path.parts.join("/"))
};
(path, url.clone())
})
})
.chain(serialized.folders.iter().map(|(url, local_path)| {
let path = if cfg!(windows) {
PathBuf::from(local_path.replace('/', "\\"))
} else {
PathBuf::from(local_path)
};
(path, url.clone())
}))
.collect::<HashMap<_, _>>(),
)
} else {
@ -713,6 +793,28 @@ mod manifest {
.cloned()
}
pub fn add_directory(&mut self, url: Url, local_path: String) -> bool {
if let Some(current) = self.serialized.folders.get(&url) {
if *current == local_path {
return false;
}
}
if let Some(reverse_mapping) = &mut self.reverse_mapping {
reverse_mapping.insert(
if cfg!(windows) {
PathBuf::from(local_path.replace('/', "\\"))
} else {
PathBuf::from(&local_path)
},
url.clone(),
);
}
self.serialized.folders.insert(url, local_path);
true
}
pub fn insert(
&mut self,
url: Url,
@ -742,6 +844,14 @@ mod manifest {
}
}
fn url_path_segments(url: &Url) -> impl Iterator<Item = &str> {
url
.path()
.strip_prefix('/')
.unwrap_or(url.path())
.split('/')
}
#[cfg(test)]
mod test {
use super::*;
@ -849,7 +959,8 @@ mod test {
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect();
let result = url_to_local_sub_path(&url, &headers).unwrap();
let result =
url_to_local_sub_path(&url, headers_content_type(&headers)).unwrap();
let parts = result.parts.join("/");
assert_eq!(parts, expected);
assert_eq!(
@ -962,7 +1073,6 @@ mod test {
json!({
"modules": {
"https://deno.land/x/different_content_type.ts": {
"path": "deno.land/x/#different_content_ty_f15dc.js",
"headers": {
"content-type": "application/javascript"
}
@ -1035,7 +1145,6 @@ mod test {
json!({
"modules": {
"https://deno.land/x/my_file.ts": {
"path": "deno.land/x/my_file.ts",
"headers": {
"x-deno-warning": "Stop right now.",
"x-typescript-types": "./types.d.ts"
@ -1060,29 +1169,69 @@ mod test {
// try a file that can't be mapped to the file system
{
let url = Url::parse("https://deno.land/INVALID/Module.ts?dev").unwrap();
let content = "export const test = 5;";
global_cache
.set(&url, HashMap::new(), content.as_bytes())
.unwrap();
let key = local_cache.cache_item_key(&url).unwrap();
assert_eq!(
String::from_utf8(local_cache.read_file_bytes(&key).unwrap().unwrap())
{
let url =
Url::parse("https://deno.land/INVALID/Module.ts?dev").unwrap();
let content = "export const test = 5;";
global_cache
.set(&url, HashMap::new(), content.as_bytes())
.unwrap();
let key = local_cache.cache_item_key(&url).unwrap();
assert_eq!(
String::from_utf8(
local_cache.read_file_bytes(&key).unwrap().unwrap()
)
.unwrap(),
content
);
let metadata = local_cache.read_metadata(&key).unwrap().unwrap();
// won't have any headers because the content-type is derivable from the url
assert_eq!(metadata.headers, HashMap::new());
assert_eq!(metadata.url, url.to_string());
content
);
let metadata = local_cache.read_metadata(&key).unwrap().unwrap();
// won't have any headers because the content-type is derivable from the url
assert_eq!(metadata.headers, HashMap::new());
assert_eq!(metadata.url, url.to_string());
}
// now try a file in the same directory, but that maps to the local filesystem
{
let url = Url::parse("https://deno.land/INVALID/module2.ts").unwrap();
let content = "export const test = 4;";
global_cache
.set(&url, HashMap::new(), content.as_bytes())
.unwrap();
let key = local_cache.cache_item_key(&url).unwrap();
assert_eq!(
String::from_utf8(
local_cache.read_file_bytes(&key).unwrap().unwrap()
)
.unwrap(),
content
);
assert!(local_cache_path
.join("deno.land/#invalid_1ee01/module2.ts")
.exists());
// ensure we can still read this file with a new local cache
let local_cache = LocalHttpCache::new(
local_cache_path.to_path_buf(),
global_cache.clone(),
);
assert_eq!(
String::from_utf8(
local_cache.read_file_bytes(&key).unwrap().unwrap()
)
.unwrap(),
content
);
}
assert_eq!(
manifest_file.read_json_value(),
json!({
"modules": {
"https://deno.land/INVALID/Module.ts?dev": {
"path": "deno.land/#invalid_1ee01/#module_b8d2b.ts"
}
},
"folders": {
"https://deno.land/INVALID/": "deno.land/#invalid_1ee01",
}
})
);
@ -1206,25 +1355,71 @@ mod test {
assert_eq!(mapping.as_ref(), Some(&url));
}
// try an http specifier that can't be mapped to the file system
// try http specifiers that can't be mapped to the file system
{
let url = Url::parse("http://deno.land/INVALID/Module.ts?dev").unwrap();
let content = "export const test = 5;";
global_cache
.set(&url, HashMap::new(), content.as_bytes())
.unwrap();
let key = local_cache.cache_item_key(&url).unwrap();
assert_eq!(
String::from_utf8(local_cache.read_file_bytes(&key).unwrap().unwrap())
let urls = [
"http://deno.land/INVALID/Module.ts?dev",
"http://deno.land/INVALID/SubDir/Module.ts?dev",
];
for url in urls {
let url = Url::parse(url).unwrap();
let content = "export const test = 5;";
global_cache
.set(&url, HashMap::new(), content.as_bytes())
.unwrap();
let key = local_cache.cache_item_key(&url).unwrap();
assert_eq!(
String::from_utf8(
local_cache.read_file_bytes(&key).unwrap().unwrap()
)
.unwrap(),
content
);
content
);
let file_url = local_cache.get_file_url(&url).unwrap();
let path = file_url.to_file_path().unwrap();
assert!(path.exists());
let mapping = local_cache.get_remote_url(&path);
assert_eq!(mapping.as_ref(), Some(&url));
let file_url = local_cache.get_file_url(&url).unwrap();
let path = file_url.to_file_path().unwrap();
assert!(path.exists());
let mapping = local_cache.get_remote_url(&path);
assert_eq!(mapping.as_ref(), Some(&url));
}
// now try a files in the same and sub directories, that maps to the local filesystem
let urls = [
"http://deno.land/INVALID/module2.ts",
"http://deno.land/INVALID/SubDir/module3.ts",
"http://deno.land/INVALID/SubDir/sub_dir/module4.ts",
];
for url in urls {
let url = Url::parse(url).unwrap();
let content = "export const test = 4;";
global_cache
.set(&url, HashMap::new(), content.as_bytes())
.unwrap();
let key = local_cache.cache_item_key(&url).unwrap();
assert_eq!(
String::from_utf8(
local_cache.read_file_bytes(&key).unwrap().unwrap()
)
.unwrap(),
content
);
let file_url = local_cache.get_file_url(&url).unwrap();
let path = file_url.to_file_path().unwrap();
assert!(path.exists());
let mapping = local_cache.get_remote_url(&path);
assert_eq!(mapping.as_ref(), Some(&url));
// ensure we can still get this file with a new local cache
let local_cache = LocalLspHttpCache::new(
local_cache_path.to_path_buf(),
global_cache.clone(),
);
let file_url = local_cache.get_file_url(&url).unwrap();
let path = file_url.to_file_path().unwrap();
assert!(path.exists());
let mapping = local_cache.get_remote_url(&path);
assert_eq!(mapping.as_ref(), Some(&url));
}
}
}
}

View file

@ -1,5 +1,6 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
use deno_core::serde_json::json;
use deno_core::url;
use deno_runtime::deno_fetch::reqwest;
use std::io::Read;
@ -4553,4 +4554,29 @@ console.log(returnsHi());"#,
// now it should run
deno_run_cmd.run().assert_matches_text("bye bye bye\n");
assert!(lockfile.exists());
// ensure we can add and execute files in directories that have a hash in them
test_context
.new_command()
// http_localhost_4545/subdir/#capitals_c75d7/main.js
.args("cache http://localhost:4545/subdir/CAPITALS/main.js")
.run()
.skip_output_check();
assert_eq!(
deno_modules_dir.join("manifest.json").read_json_value(),
json!({
"folders": {
"http://localhost:4545/subdir/CAPITALS/": "http_localhost_4545/subdir/#capitals_c75d7"
}
})
);
deno_modules_dir
.join("http_localhost_4545/subdir/#capitals_c75d7/hello_there.ts")
.write("console.log('hello there');");
test_context
.new_command()
// todo(dsherret): seems wrong that we don't auto-discover the config file to get the vendor directory for this
.args("run --deno-modules-dir http://localhost:4545/subdir/CAPITALS/hello_there.ts")
.run()
.assert_matches_text("hello there\n");
}

View file

@ -0,0 +1,3 @@
export function returnsUppperHi() {
return "HI";
}