From f6a9b49dfb57a2392ea37a64cfdee956a1c392ec Mon Sep 17 00:00:00 2001 From: David Sherret Date: Fri, 23 Sep 2022 17:35:48 -0400 Subject: [PATCH] perf: don't re-download package tarball to global cache if local node_modules folder exists for package (#16005) --- cli/npm/cache.rs | 12 ++++++ cli/npm/resolvers/common.rs | 52 ++++++++++++------------ cli/npm/resolvers/local.rs | 63 +++++++++++++++++++++--------- cli/tests/integration/npm_tests.rs | 33 ++++++++++++++++ 4 files changed, 115 insertions(+), 45 deletions(-) diff --git a/cli/npm/cache.rs b/cli/npm/cache.rs index a733eccebd..6b841501ac 100644 --- a/cli/npm/cache.rs +++ b/cli/npm/cache.rs @@ -198,6 +198,18 @@ impl NpmCache { id: &NpmPackageId, dist: &NpmPackageVersionDistInfo, registry_url: &Url, + ) -> Result<(), AnyError> { + self + .ensure_package_inner(id, dist, registry_url) + .await + .with_context(|| format!("Failed caching npm package '{}'.", id)) + } + + async fn ensure_package_inner( + &self, + id: &NpmPackageId, + dist: &NpmPackageVersionDistInfo, + registry_url: &Url, ) -> Result<(), AnyError> { let package_folder = self.readonly.package_folder(id, registry_url); if package_folder.exists() diff --git a/cli/npm/resolvers/common.rs b/cli/npm/resolvers/common.rs index cc590e2ad6..508b783c95 100644 --- a/cli/npm/resolvers/common.rs +++ b/cli/npm/resolvers/common.rs @@ -5,7 +5,6 @@ use std::path::Path; use std::path::PathBuf; use deno_ast::ModuleSpecifier; -use deno_core::anyhow::Context; use deno_core::error::AnyError; use deno_core::futures; use deno_core::futures::future::BoxFuture; @@ -48,40 +47,41 @@ pub async fn cache_packages( cache: &NpmCache, registry_url: &Url, ) -> Result<(), AnyError> { - if std::env::var("DENO_UNSTABLE_NPM_SYNC_DOWNLOAD") == Ok("1".to_string()) { - // for some of the tests, we want downloading of packages - // to be deterministic so that the output is always the same + let sync_download = should_sync_download(); + if sync_download { + // we're running the tests not with --quiet + // and we want the output to be deterministic packages.sort_by(|a, b| a.id.cmp(&b.id)); - for package in packages { + } + let mut handles = Vec::with_capacity(packages.len()); + for package in packages { + let cache = cache.clone(); + let registry_url = registry_url.clone(); + let handle = tokio::task::spawn(async move { cache - .ensure_package(&package.id, &package.dist, registry_url) + .ensure_package(&package.id, &package.dist, ®istry_url) .await - .with_context(|| { - format!("Failed caching npm package '{}'.", package.id) - })?; - } - } else { - let handles = packages.into_iter().map(|package| { - let cache = cache.clone(); - let registry_url = registry_url.clone(); - tokio::task::spawn(async move { - cache - .ensure_package(&package.id, &package.dist, ®istry_url) - .await - .with_context(|| { - format!("Failed caching npm package '{}'.", package.id) - }) - }) }); - let results = futures::future::join_all(handles).await; - for result in results { - // surface the first error - result??; + if sync_download { + handle.await??; + } else { + handles.push(handle); } } + let results = futures::future::join_all(handles).await; + for result in results { + // surface the first error + result??; + } Ok(()) } +/// For some of the tests, we want downloading of packages +/// to be deterministic so that the output is always the same +pub fn should_sync_download() -> bool { + std::env::var("DENO_UNSTABLE_NPM_SYNC_DOWNLOAD") == Ok("1".to_string()) +} + pub fn ensure_registry_read_permission( registry_path: &Path, path: &Path, diff --git a/cli/npm/resolvers/local.rs b/cli/npm/resolvers/local.rs index d92ffb84d9..35223f1aaf 100644 --- a/cli/npm/resolvers/local.rs +++ b/cli/npm/resolvers/local.rs @@ -16,16 +16,18 @@ use deno_core::error::AnyError; use deno_core::futures::future::BoxFuture; use deno_core::futures::FutureExt; use deno_core::url::Url; +use deno_runtime::deno_core::futures; +use tokio::task::JoinHandle; use crate::fs_util; use crate::npm::resolution::NpmResolution; use crate::npm::resolution::NpmResolutionSnapshot; +use crate::npm::resolvers::common::should_sync_download; use crate::npm::NpmCache; use crate::npm::NpmPackageId; use crate::npm::NpmPackageReq; use crate::npm::NpmRegistryApi; -use super::common::cache_packages; use super::common::ensure_registry_read_permission; use super::common::InnerNpmPackageResolver; @@ -161,19 +163,14 @@ impl InnerNpmPackageResolver for LocalNpmPackageResolver { let resolver = self.clone(); async move { resolver.resolution.add_package_reqs(packages).await?; - cache_packages( - resolver.resolution.all_packages(), - &resolver.cache, - &resolver.registry_url, - ) - .await?; sync_resolution_with_fs( &resolver.resolution.snapshot(), &resolver.cache, &resolver.registry_url, &resolver.root_node_modules_path, - )?; + ) + .await?; Ok(()) } @@ -186,7 +183,7 @@ impl InnerNpmPackageResolver for LocalNpmPackageResolver { } /// Creates a pnpm style folder structure. -fn sync_resolution_with_fs( +async fn sync_resolution_with_fs( snapshot: &NpmResolutionSnapshot, cache: &NpmCache, registry_url: &Url, @@ -205,24 +202,52 @@ fn sync_resolution_with_fs( // // Copy (hardlink in future) // to // node_modules/.deno//node_modules/ - let all_packages = snapshot.all_packages(); + let sync_download = should_sync_download(); + let mut all_packages = snapshot.all_packages(); + if sync_download { + // we're running the tests not with --quiet + // and we want the output to be deterministic + all_packages.sort_by(|a, b| a.id.cmp(&b.id)); + } + let mut handles: Vec>> = + Vec::with_capacity(all_packages.len()); for package in &all_packages { let folder_name = get_package_folder_name(&package.id); let folder_path = deno_local_registry_dir.join(&folder_name); let initialized_file = folder_path.join("deno_initialized"); if !initialized_file.exists() { - let sub_node_modules = folder_path.join("node_modules"); - let package_path = join_package_name(&sub_node_modules, &package.id.name); - fs::create_dir_all(&package_path) - .with_context(|| format!("Creating '{}'", folder_path.display()))?; - let cache_folder = cache.package_folder(&package.id, registry_url); - // for now copy, but in the future consider hard linking - fs_util::copy_dir_recursive(&cache_folder, &package_path)?; - // write out a file that indicates this folder has been initialized - fs::write(initialized_file, "")?; + let cache = cache.clone(); + let registry_url = registry_url.clone(); + let package = package.clone(); + let handle = tokio::task::spawn(async move { + cache + .ensure_package(&package.id, &package.dist, ®istry_url) + .await?; + let sub_node_modules = folder_path.join("node_modules"); + let package_path = + join_package_name(&sub_node_modules, &package.id.name); + fs::create_dir_all(&package_path) + .with_context(|| format!("Creating '{}'", folder_path.display()))?; + let cache_folder = cache.package_folder(&package.id, ®istry_url); + // for now copy, but in the future consider hard linking + fs_util::copy_dir_recursive(&cache_folder, &package_path)?; + // write out a file that indicates this folder has been initialized + fs::write(initialized_file, "")?; + Ok(()) + }); + if sync_download { + handle.await??; + } else { + handles.push(handle); + } } } + let results = futures::future::join_all(handles).await; + for result in results { + result??; // surface the first error + } + // 2. Symlink all the dependencies into the .deno directory. // // Symlink node_modules/.deno//node_modules/ to diff --git a/cli/tests/integration/npm_tests.rs b/cli/tests/integration/npm_tests.rs index 507bddb30c..6f77cda84c 100644 --- a/cli/tests/integration/npm_tests.rs +++ b/cli/tests/integration/npm_tests.rs @@ -578,6 +578,39 @@ fn node_modules_dir_cache() { ) .exists()); assert!(node_modules.join("@denotest/dual-cjs-esm").exists()); + + // now try deleting the folder with the package source in the npm cache dir + let package_global_cache_dir = deno_dir + .path() + .join("npm") + .join("localhost_4545") + .join("npm") + .join("registry") + .join("@denotest") + .join("dual-cjs-esm") + .join("1.0.0"); + assert!(package_global_cache_dir.exists()); + std::fs::remove_dir_all(&package_global_cache_dir).unwrap(); + + // run the output, and it shouldn't bother recreating the directory + // because it already has everything cached locally in the node_modules folder + let deno = util::deno_cmd_with_deno_dir(&deno_dir) + .current_dir(deno_dir.path()) + .arg("run") + .arg("--unstable") + .arg("--node-modules-dir") + .arg("--quiet") + .arg("-A") + .arg(util::testdata_path().join("npm/dual_cjs_esm/main.ts")) + .envs(env_vars()) + .spawn() + .unwrap(); + let output = deno.wait_with_output().unwrap(); + assert!(output.status.success()); + + // this won't exist, but actually the parent directory + // will because it still re-downloads the registry information + assert!(!package_global_cache_dir.exists()); } #[test]