mirror of
https://github.com/denoland/deno.git
synced 2025-01-05 22:09:02 -05:00
2024c974b6
The same issue in two different places - doing blocking FS work in an async task, limiting the amount of work that happens concurrently. - When setting up node_modules, where we try to set up entries concurrently but were blocking other tasks from actually running. - When loading package info from the npm registry file cache, loading and deserializing is expensive and prevents concurrency. This was especially noticeable when loading an npm resolution snapshot from a lockfile (`snapshot_from_lockfile` in `deno_npm`). Installing deps in `deno-docs`: ``` ❯ hyperfine -i -p 'rm -rf node_modules/' '../d7/deno-main i' '../d7/target/release/deno i' Benchmark 1: ../d7/deno-main i Time (mean ± σ): 2.193 s ± 0.027 s [User: 0.589 s, System: 1.033 s] Range (min … max): 2.151 s … 2.242 s 10 runs Benchmark 2: ../d7/target/release/deno i Time (mean ± σ): 1.597 s ± 0.021 s [User: 0.977 s, System: 1.337 s] Range (min … max): 1.550 s … 1.627 s 10 runs Summary ../d7/target/release/deno i ran 1.37 ± 0.02 times faster than ../d7/deno-main i ``` Caching `npm:@11ty/eleventy`: ``` ❯ hyperfine -i -p 'rm -rf node_modules/' --warmup 5 '../../d7/deno-main cache npm:@11ty/eleventy' '../../d7/target/release/deno cache npm:@11ty/eleventy' Benchmark 1: ../../d7/deno-main cache npm:@11ty/eleventy Time (mean ± σ): 129.9 ms ± 2.2 ms [User: 27.5 ms, System: 101.3 ms] Range (min … max): 127.5 ms … 135.8 ms 10 runs Benchmark 2: ../../d7/target/release/deno cache npm:@11ty/eleventy Time (mean ± σ): 100.6 ms ± 1.3 ms [User: 38.8 ms, System: 233.8 ms] Range (min … max): 99.3 ms … 103.2 ms 10 runs Summary ../../d7/target/release/deno cache npm:@11ty/eleventy ran 1.29 ± 0.03 times faster than ../../d7/deno-main cache npm:@11ty/eleventy ``` --------- Co-authored-by: David Sherret <dsherret@gmail.com>
375 lines
10 KiB
Rust
375 lines
10 KiB
Rust
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
|
|
|
|
use std::collections::HashMap;
|
|
use std::collections::HashSet;
|
|
use std::fs;
|
|
use std::io::ErrorKind;
|
|
use std::path::PathBuf;
|
|
use std::sync::Arc;
|
|
|
|
use async_trait::async_trait;
|
|
use deno_core::anyhow::anyhow;
|
|
use deno_core::anyhow::Context;
|
|
use deno_core::error::custom_error;
|
|
use deno_core::error::AnyError;
|
|
use deno_core::futures::future::BoxFuture;
|
|
use deno_core::futures::future::Shared;
|
|
use deno_core::futures::FutureExt;
|
|
use deno_core::parking_lot::Mutex;
|
|
use deno_core::serde_json;
|
|
use deno_core::url::Url;
|
|
use deno_npm::npm_rc::RegistryConfig;
|
|
use deno_npm::npm_rc::ResolvedNpmRc;
|
|
use deno_npm::registry::NpmPackageInfo;
|
|
use deno_npm::registry::NpmRegistryApi;
|
|
use deno_npm::registry::NpmRegistryPackageInfoLoadError;
|
|
|
|
use crate::args::CacheSetting;
|
|
use crate::cache::CACHE_PERM;
|
|
use crate::http_util::HttpClient;
|
|
use crate::npm::common::maybe_auth_header_for_npm_registry;
|
|
use crate::util::fs::atomic_write_file;
|
|
use crate::util::progress_bar::ProgressBar;
|
|
use crate::util::sync::AtomicFlag;
|
|
|
|
use super::cache::NpmCache;
|
|
|
|
#[derive(Debug)]
|
|
pub struct CliNpmRegistryApi(Option<Arc<CliNpmRegistryApiInner>>);
|
|
|
|
impl CliNpmRegistryApi {
|
|
pub fn new(
|
|
cache: Arc<NpmCache>,
|
|
http_client: Arc<HttpClient>,
|
|
npmrc: Arc<ResolvedNpmRc>,
|
|
progress_bar: ProgressBar,
|
|
) -> Self {
|
|
Self(Some(Arc::new(CliNpmRegistryApiInner {
|
|
cache,
|
|
force_reload_flag: Default::default(),
|
|
mem_cache: Default::default(),
|
|
previously_reloaded_packages: Default::default(),
|
|
npmrc,
|
|
http_client,
|
|
progress_bar,
|
|
})))
|
|
}
|
|
|
|
/// Clears the internal memory cache.
|
|
pub fn clear_memory_cache(&self) {
|
|
self.inner().clear_memory_cache();
|
|
}
|
|
|
|
pub fn get_cached_package_info(
|
|
&self,
|
|
name: &str,
|
|
) -> Option<Arc<NpmPackageInfo>> {
|
|
self.inner().get_cached_package_info(name)
|
|
}
|
|
|
|
fn inner(&self) -> &Arc<CliNpmRegistryApiInner> {
|
|
// this panicking indicates a bug in the code where this
|
|
// wasn't initialized
|
|
self.0.as_ref().unwrap()
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl NpmRegistryApi for CliNpmRegistryApi {
|
|
async fn package_info(
|
|
&self,
|
|
name: &str,
|
|
) -> Result<Arc<NpmPackageInfo>, NpmRegistryPackageInfoLoadError> {
|
|
match self.inner().maybe_package_info(name).await {
|
|
Ok(Some(info)) => Ok(info),
|
|
Ok(None) => Err(NpmRegistryPackageInfoLoadError::PackageNotExists {
|
|
package_name: name.to_string(),
|
|
}),
|
|
Err(err) => {
|
|
Err(NpmRegistryPackageInfoLoadError::LoadError(Arc::new(err)))
|
|
}
|
|
}
|
|
}
|
|
|
|
fn mark_force_reload(&self) -> bool {
|
|
// never force reload the registry information if reloading
|
|
// is disabled or if we're already reloading
|
|
if matches!(
|
|
self.inner().cache.cache_setting(),
|
|
CacheSetting::Only | CacheSetting::ReloadAll
|
|
) {
|
|
return false;
|
|
}
|
|
if self.inner().force_reload_flag.raise() {
|
|
self.clear_memory_cache(); // clear the cache to force reloading
|
|
true
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
}
|
|
|
|
type CacheItemPendingResult =
|
|
Result<Option<Arc<NpmPackageInfo>>, Arc<AnyError>>;
|
|
|
|
#[derive(Debug)]
|
|
enum CacheItem {
|
|
Pending(Shared<BoxFuture<'static, CacheItemPendingResult>>),
|
|
Resolved(Option<Arc<NpmPackageInfo>>),
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct CliNpmRegistryApiInner {
|
|
cache: Arc<NpmCache>,
|
|
force_reload_flag: AtomicFlag,
|
|
mem_cache: Mutex<HashMap<String, CacheItem>>,
|
|
previously_reloaded_packages: Mutex<HashSet<String>>,
|
|
http_client: Arc<HttpClient>,
|
|
npmrc: Arc<ResolvedNpmRc>,
|
|
progress_bar: ProgressBar,
|
|
}
|
|
|
|
impl CliNpmRegistryApiInner {
|
|
pub async fn maybe_package_info(
|
|
self: &Arc<Self>,
|
|
name: &str,
|
|
) -> Result<Option<Arc<NpmPackageInfo>>, AnyError> {
|
|
let (created, future) = {
|
|
let mut mem_cache = self.mem_cache.lock();
|
|
match mem_cache.get(name) {
|
|
Some(CacheItem::Resolved(maybe_info)) => {
|
|
return Ok(maybe_info.clone());
|
|
}
|
|
Some(CacheItem::Pending(future)) => (false, future.clone()),
|
|
None => {
|
|
let future = {
|
|
let api = self.clone();
|
|
let name = name.to_string();
|
|
async move {
|
|
if (api.cache.cache_setting().should_use_for_npm_package(&name) && !api.force_reload())
|
|
// if this has been previously reloaded, then try loading from the
|
|
// file system cache
|
|
|| !api.previously_reloaded_packages.lock().insert(name.to_string())
|
|
{
|
|
// attempt to load from the file cache
|
|
if let Some(info) = api.load_file_cached_package_info(&name).await {
|
|
let result = Some(Arc::new(info));
|
|
return Ok(result);
|
|
}
|
|
}
|
|
api
|
|
.load_package_info_from_registry(&name)
|
|
.await
|
|
.map(|info| info.map(Arc::new))
|
|
.map_err(Arc::new)
|
|
}
|
|
.boxed()
|
|
.shared()
|
|
};
|
|
mem_cache
|
|
.insert(name.to_string(), CacheItem::Pending(future.clone()));
|
|
(true, future)
|
|
}
|
|
}
|
|
};
|
|
|
|
if created {
|
|
match future.await {
|
|
Ok(maybe_info) => {
|
|
// replace the cache item to say it's resolved now
|
|
self
|
|
.mem_cache
|
|
.lock()
|
|
.insert(name.to_string(), CacheItem::Resolved(maybe_info.clone()));
|
|
Ok(maybe_info)
|
|
}
|
|
Err(err) => {
|
|
// purge the item from the cache so it loads next time
|
|
self.mem_cache.lock().remove(name);
|
|
Err(anyhow!("{:#}", err))
|
|
}
|
|
}
|
|
} else {
|
|
Ok(future.await.map_err(|err| anyhow!("{:#}", err))?)
|
|
}
|
|
}
|
|
|
|
fn force_reload(&self) -> bool {
|
|
self.force_reload_flag.is_raised()
|
|
}
|
|
|
|
async fn load_file_cached_package_info(
|
|
&self,
|
|
name: &str,
|
|
) -> Option<NpmPackageInfo> {
|
|
match self.load_file_cached_package_info_result(name).await {
|
|
Ok(value) => value,
|
|
Err(err) => {
|
|
if cfg!(debug_assertions) {
|
|
panic!("error loading cached npm package info for {name}: {err:#}");
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn load_file_cached_package_info_result(
|
|
&self,
|
|
name: &str,
|
|
) -> Result<Option<NpmPackageInfo>, AnyError> {
|
|
let file_cache_path = self.get_package_file_cache_path(name);
|
|
let deserialization_result = deno_core::unsync::spawn_blocking(|| {
|
|
let file_text = match fs::read_to_string(file_cache_path) {
|
|
Ok(file_text) => file_text,
|
|
Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None),
|
|
Err(err) => return Err(err.into()),
|
|
};
|
|
serde_json::from_str(&file_text)
|
|
.map(Some)
|
|
.map_err(AnyError::from)
|
|
})
|
|
.await
|
|
.unwrap();
|
|
match deserialization_result {
|
|
Ok(maybe_package_info) => Ok(maybe_package_info),
|
|
Err(err) => {
|
|
// This scenario might mean we need to load more data from the
|
|
// npm registry than before. So, just debug log while in debug
|
|
// rather than panic.
|
|
log::debug!(
|
|
"error deserializing registry.json for '{}'. Reloading. {:?}",
|
|
name,
|
|
err
|
|
);
|
|
Ok(None)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn save_package_info_to_file_cache(
|
|
&self,
|
|
name: &str,
|
|
package_info: &NpmPackageInfo,
|
|
) {
|
|
if let Err(err) =
|
|
self.save_package_info_to_file_cache_result(name, package_info)
|
|
{
|
|
if cfg!(debug_assertions) {
|
|
panic!("error saving cached npm package info for {name}: {err:#}");
|
|
}
|
|
}
|
|
}
|
|
|
|
fn save_package_info_to_file_cache_result(
|
|
&self,
|
|
name: &str,
|
|
package_info: &NpmPackageInfo,
|
|
) -> Result<(), AnyError> {
|
|
let file_cache_path = self.get_package_file_cache_path(name);
|
|
let file_text = serde_json::to_string(&package_info)?;
|
|
atomic_write_file(&file_cache_path, file_text, CACHE_PERM)?;
|
|
Ok(())
|
|
}
|
|
|
|
async fn load_package_info_from_registry(
|
|
&self,
|
|
name: &str,
|
|
) -> Result<Option<NpmPackageInfo>, AnyError> {
|
|
let registry_url = self.npmrc.get_registry_url(name);
|
|
let registry_config = self.npmrc.get_registry_config(name);
|
|
|
|
self
|
|
.load_package_info_from_registry_inner(
|
|
name,
|
|
registry_url,
|
|
registry_config,
|
|
)
|
|
.await
|
|
.with_context(|| {
|
|
format!(
|
|
"Error getting response at {} for package \"{}\"",
|
|
self.get_package_url(name, registry_url),
|
|
name
|
|
)
|
|
})
|
|
}
|
|
|
|
async fn load_package_info_from_registry_inner(
|
|
&self,
|
|
name: &str,
|
|
registry_url: &Url,
|
|
registry_config: &RegistryConfig,
|
|
) -> Result<Option<NpmPackageInfo>, AnyError> {
|
|
if *self.cache.cache_setting() == CacheSetting::Only {
|
|
return Err(custom_error(
|
|
"NotCached",
|
|
format!(
|
|
"An npm specifier not found in cache: \"{name}\", --cached-only is specified."
|
|
)
|
|
));
|
|
}
|
|
|
|
let package_url = self.get_package_url(name, registry_url);
|
|
let guard = self.progress_bar.update(package_url.as_str());
|
|
|
|
let maybe_auth_header = maybe_auth_header_for_npm_registry(registry_config);
|
|
|
|
let maybe_bytes = self
|
|
.http_client
|
|
.download_with_progress(package_url, maybe_auth_header, &guard)
|
|
.await?;
|
|
match maybe_bytes {
|
|
Some(bytes) => {
|
|
let package_info = deno_core::unsync::spawn_blocking(move || {
|
|
serde_json::from_slice(&bytes)
|
|
})
|
|
.await??;
|
|
self.save_package_info_to_file_cache(name, &package_info);
|
|
Ok(Some(package_info))
|
|
}
|
|
None => Ok(None),
|
|
}
|
|
}
|
|
|
|
fn get_package_url(&self, name: &str, registry_url: &Url) -> Url {
|
|
// list of all characters used in npm packages:
|
|
// !, ', (, ), *, -, ., /, [0-9], @, [A-Za-z], _, ~
|
|
const ASCII_SET: percent_encoding::AsciiSet =
|
|
percent_encoding::NON_ALPHANUMERIC
|
|
.remove(b'!')
|
|
.remove(b'\'')
|
|
.remove(b'(')
|
|
.remove(b')')
|
|
.remove(b'*')
|
|
.remove(b'-')
|
|
.remove(b'.')
|
|
.remove(b'/')
|
|
.remove(b'@')
|
|
.remove(b'_')
|
|
.remove(b'~');
|
|
let name = percent_encoding::utf8_percent_encode(name, &ASCII_SET);
|
|
registry_url.join(&name.to_string()).unwrap()
|
|
}
|
|
|
|
fn get_package_file_cache_path(&self, name: &str) -> PathBuf {
|
|
let name_folder_path = self.cache.package_name_folder(name);
|
|
name_folder_path.join("registry.json")
|
|
}
|
|
|
|
fn clear_memory_cache(&self) {
|
|
self.mem_cache.lock().clear();
|
|
}
|
|
|
|
pub fn get_cached_package_info(
|
|
&self,
|
|
name: &str,
|
|
) -> Option<Arc<NpmPackageInfo>> {
|
|
let mem_cache = self.mem_cache.lock();
|
|
if let Some(CacheItem::Resolved(maybe_info)) = mem_cache.get(name) {
|
|
maybe_info.clone()
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
}
|