From 8dc242f7891492886827a350b7736c11df7aa419 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Fri, 11 Nov 2022 11:33:57 -0500 Subject: [PATCH] perf: more efficient `deno cache` and npm package info usage (#16592) 1. There was a lot of cloning going on with `NpmPackageInfo`. This is now stored in an `Arc` and cloning only happens on the individual version. 2. The package cache is now cleared from memory after resolution. 3. This surfaced a bug in `deno cache` and I noticed it can be more efficient if we have multiple root specifiers if we provide all the specifiers as roots. --- cli/main.rs | 28 +++++------ cli/npm/registry.rs | 32 ++++++++---- cli/npm/resolution/graph.rs | 30 ++++++----- cli/npm/resolution/mod.rs | 50 ++++++++++++------- .../testdata/cache/037_fetch_multiple.out | 2 +- cli/tests/testdata/npm/deno_cache.out | 1 - 6 files changed, 84 insertions(+), 59 deletions(-) diff --git a/cli/main.rs b/cli/main.rs index cf1bb4ca54..31ac4b3314 100644 --- a/cli/main.rs +++ b/cli/main.rs @@ -284,23 +284,23 @@ async fn check_command( async fn load_and_type_check( ps: &ProcState, - files: &Vec, + files: &[String], ) -> Result<(), AnyError> { let lib = ps.options.ts_type_lib_window(); - for file in files { - let specifier = resolve_url_or_path(file)?; - - ps.prepare_module_load( - vec![specifier], - false, - lib, - Permissions::allow_all(), - Permissions::allow_all(), - false, - ) - .await?; - } + let specifiers = files + .iter() + .map(|file| resolve_url_or_path(file)) + .collect::, _>>()?; + ps.prepare_module_load( + specifiers, + false, + lib, + Permissions::allow_all(), + Permissions::allow_all(), + false, + ) + .await?; Ok(()) } diff --git a/cli/npm/registry.rs b/cli/npm/registry.rs index 2a89d4463b..daefec04ce 100644 --- a/cli/npm/registry.rs +++ b/cli/npm/registry.rs @@ -185,12 +185,12 @@ pub trait NpmRegistryApi: Clone + Sync + Send + 'static { fn maybe_package_info( &self, name: &str, - ) -> BoxFuture<'static, Result, AnyError>>; + ) -> BoxFuture<'static, Result>, AnyError>>; fn package_info( &self, name: &str, - ) -> BoxFuture<'static, Result> { + ) -> BoxFuture<'static, Result, AnyError>> { let api = self.clone(); let name = name.to_string(); async move { @@ -212,13 +212,14 @@ pub trait NpmRegistryApi: Clone + Sync + Send + 'static { let name = name.to_string(); let version = version.to_string(); async move { - // todo(dsherret): this could be optimized to not clone the - // entire package info in the case of the RealNpmRegistryApi - let mut package_info = api.package_info(&name).await?; - Ok(package_info.versions.remove(&version)) + let package_info = api.package_info(&name).await?; + Ok(package_info.versions.get(&version).cloned()) } .boxed() } + + /// Clears the internal memory cache. + fn clear_memory_cache(&self); } #[derive(Clone)] @@ -268,17 +269,21 @@ impl NpmRegistryApi for RealNpmRegistryApi { fn maybe_package_info( &self, name: &str, - ) -> BoxFuture<'static, Result, AnyError>> { + ) -> BoxFuture<'static, Result>, AnyError>> { let api = self.clone(); let name = name.to_string(); async move { api.0.maybe_package_info(&name).await }.boxed() } + + fn clear_memory_cache(&self) { + self.0.mem_cache.lock().clear(); + } } struct RealNpmRegistryApiInner { base_url: Url, cache: NpmCache, - mem_cache: Mutex>>, + mem_cache: Mutex>>>, cache_setting: CacheSetting, progress_bar: ProgressBar, } @@ -287,7 +292,7 @@ impl RealNpmRegistryApiInner { pub async fn maybe_package_info( &self, name: &str, - ) -> Result, AnyError> { + ) -> Result>, AnyError> { let maybe_info = self.mem_cache.lock().get(name).cloned(); if let Some(info) = maybe_info { Ok(info) @@ -306,6 +311,7 @@ impl RealNpmRegistryApiInner { format!("Error getting response at {}", self.get_package_url(name)) })?; } + let maybe_package_info = maybe_package_info.map(Arc::new); // Not worth the complexity to ensure multiple in-flight requests // for the same package only request once because with how this is @@ -548,8 +554,12 @@ impl NpmRegistryApi for TestNpmRegistryApi { fn maybe_package_info( &self, name: &str, - ) -> BoxFuture<'static, Result, AnyError>> { + ) -> BoxFuture<'static, Result>, AnyError>> { let result = self.package_infos.lock().get(name).cloned(); - Box::pin(deno_core::futures::future::ready(Ok(result))) + Box::pin(deno_core::futures::future::ready(Ok(result.map(Arc::new)))) + } + + fn clear_memory_cache(&self) { + // do nothing for the test api } } diff --git a/cli/npm/resolution/graph.rs b/cli/npm/resolution/graph.rs index 497067925b..b56df35ae9 100644 --- a/cli/npm/resolution/graph.rs +++ b/cli/npm/resolution/graph.rs @@ -415,7 +415,7 @@ impl<'a, TNpmRegistryApi: NpmRegistryApi> &self, name: &str, version_matcher: &impl NpmVersionMatcher, - package_info: NpmPackageInfo, + package_info: &NpmPackageInfo, ) -> Result { if let Some(version) = self.resolve_best_package_version(name, version_matcher) @@ -462,10 +462,14 @@ impl<'a, TNpmRegistryApi: NpmRegistryApi> maybe_best_version.cloned() } + pub fn has_package_req(&self, req: &NpmPackageReq) -> bool { + self.graph.has_package_req(req) + } + pub fn add_package_req( &mut self, package_req: &NpmPackageReq, - package_info: NpmPackageInfo, + package_info: &NpmPackageInfo, ) -> Result<(), AnyError> { let node = self.resolve_node_from_info( &package_req.name, @@ -484,7 +488,7 @@ impl<'a, TNpmRegistryApi: NpmRegistryApi> fn analyze_dependency( &mut self, entry: &NpmDependencyEntry, - package_info: NpmPackageInfo, + package_info: &NpmPackageInfo, parent_id: &NpmPackageId, visited_versions: &Arc, ) -> Result<(), AnyError> { @@ -536,7 +540,7 @@ impl<'a, TNpmRegistryApi: NpmRegistryApi> &mut self, name: &str, version_matcher: &impl NpmVersionMatcher, - package_info: NpmPackageInfo, + package_info: &NpmPackageInfo, ) -> Result>, AnyError> { let version_and_info = self.resolve_best_package_version_and_info( name, @@ -613,7 +617,7 @@ impl<'a, TNpmRegistryApi: NpmRegistryApi> NpmDependencyEntryKind::Dep => { self.analyze_dependency( dep, - package_info, + &package_info, &parent_id, &visited_versions, )?; @@ -624,7 +628,7 @@ impl<'a, TNpmRegistryApi: NpmRegistryApi> &dep.bare_specifier, &parent_id, dep, - package_info, + &package_info, &visited_versions, )?; if let Some(new_parent_id) = maybe_new_parent_id { @@ -647,7 +651,7 @@ impl<'a, TNpmRegistryApi: NpmRegistryApi> specifier: &str, parent_id: &NpmPackageId, peer_dep: &NpmDependencyEntry, - peer_package_info: NpmPackageInfo, + peer_package_info: &NpmPackageInfo, visited_ancestor_versions: &Arc, ) -> Result, AnyError> { fn find_matching_child<'a>( @@ -881,7 +885,7 @@ struct VersionAndInfo { fn get_resolved_package_version_and_info( pkg_name: &str, version_matcher: &impl NpmVersionMatcher, - info: NpmPackageInfo, + info: &NpmPackageInfo, parent: Option<&NpmPackageId>, ) -> Result { let mut maybe_best_version: Option = None; @@ -922,7 +926,7 @@ fn get_resolved_package_version_and_info( bail!("Could not find dist-tag '{}'.", tag) } } else { - for (_, version_info) in info.versions.into_iter() { + for version_info in info.versions.values() { let version = NpmVersion::parse(&version_info.version)?; if version_matcher.matches(&version) { let is_best_version = maybe_best_version @@ -932,7 +936,7 @@ fn get_resolved_package_version_and_info( if is_best_version { maybe_best_version = Some(VersionAndInfo { version, - info: version_info, + info: version_info.clone(), }); } } @@ -981,7 +985,7 @@ mod test { let result = get_resolved_package_version_and_info( "test", &package_ref.req, - NpmPackageInfo { + &NpmPackageInfo { name: "test".to_string(), versions: HashMap::new(), dist_tags: HashMap::from([( @@ -1001,7 +1005,7 @@ mod test { let result = get_resolved_package_version_and_info( "test", &package_ref.req, - NpmPackageInfo { + &NpmPackageInfo { name: "test".to_string(), versions: HashMap::from([ ("0.1.0".to_string(), NpmPackageVersionInfo::default()), @@ -2014,7 +2018,7 @@ mod test { for req in reqs { let req = NpmPackageReference::from_str(req).unwrap().req; resolver - .add_package_req(&req, api.package_info(&req.name).await.unwrap()) + .add_package_req(&req, &api.package_info(&req.name).await.unwrap()) .unwrap(); } diff --git a/cli/npm/resolution/mod.rs b/cli/npm/resolution/mod.rs index 934cfb59b8..381e350c9f 100644 --- a/cli/npm/resolution/mod.rs +++ b/cli/npm/resolution/mod.rs @@ -422,42 +422,54 @@ impl NpmResolution { // multiple packages are resolved in alphabetical order package_reqs.sort_by(|a, b| a.name.cmp(&b.name)); - // go over the top level packages first, then down the + // go over the top level package names first, then down the // tree one level at a time through all the branches let mut unresolved_tasks = Vec::with_capacity(package_reqs.len()); - for package_req in package_reqs { - if graph.has_package_req(&package_req) { + let mut resolving_package_names = + HashSet::with_capacity(package_reqs.len()); + for package_req in &package_reqs { + if graph.has_package_req(package_req) { // skip analyzing this package, as there's already a matching top level package continue; } + if !resolving_package_names.insert(package_req.name.clone()) { + continue; // already resolving + } - // no existing best version, so resolve the current packages - let api = self.api.clone(); - let maybe_info = if should_sync_download() { + // cache the package info up front in parallel + if should_sync_download() { // for deterministic test output - Some(api.package_info(&package_req.name).await) + self.api.package_info(&package_req.name).await?; } else { - None + let api = self.api.clone(); + let package_name = package_req.name.clone(); + unresolved_tasks.push(tokio::task::spawn(async move { + // This is ok to call because api will internally cache + // the package information in memory. + api.package_info(&package_name).await + })); }; - unresolved_tasks.push(tokio::task::spawn(async move { - let info = match maybe_info { - Some(info) => info?, - None => api.package_info(&package_req.name).await?, - }; - Result::<_, AnyError>::Ok((package_req, info)) - })); + } + + for result in futures::future::join_all(unresolved_tasks).await { + result??; // surface the first error } let mut resolver = GraphDependencyResolver::new(&mut graph, &self.api); - for result in futures::future::join_all(unresolved_tasks).await { - let (package_req, info) = result??; - resolver.add_package_req(&package_req, info)?; + for package_req in package_reqs { + // avoid loading the info if this is already in the graph + if !resolver.has_package_req(&package_req) { + let info = self.api.package_info(&package_req.name).await?; + resolver.add_package_req(&package_req, &info)?; + } } resolver.resolve_pending().await?; - graph.into_snapshot(&self.api).await + let result = graph.into_snapshot(&self.api).await; + self.api.clear_memory_cache(); + result } pub fn resolve_package_from_id( diff --git a/cli/tests/testdata/cache/037_fetch_multiple.out b/cli/tests/testdata/cache/037_fetch_multiple.out index 09c6c0f60f..f4c0c314bc 100644 --- a/cli/tests/testdata/cache/037_fetch_multiple.out +++ b/cli/tests/testdata/cache/037_fetch_multiple.out @@ -1,5 +1,5 @@ Download http://localhost:4545/subdir/mod2.ts +Download http://localhost:4545/subdir/mt_text_typescript.t1.ts Download http://localhost:4545/subdir/print_hello.ts Check [WILDCARD]/fetch/test.ts -Download http://localhost:4545/subdir/mt_text_typescript.t1.ts Check [WILDCARD]/fetch/other.ts diff --git a/cli/tests/testdata/npm/deno_cache.out b/cli/tests/testdata/npm/deno_cache.out index 957919df1c..e4f03e2f10 100644 --- a/cli/tests/testdata/npm/deno_cache.out +++ b/cli/tests/testdata/npm/deno_cache.out @@ -1,5 +1,4 @@ Download http://localhost:4545/npm/registry/chalk -Download http://localhost:4545/npm/registry/chalk/chalk-5.0.1.tgz Download http://localhost:4545/npm/registry/mkdirp Download http://localhost:4545/npm/registry/chalk/chalk-5.0.1.tgz Download http://localhost:4545/npm/registry/mkdirp/mkdirp-1.0.4.tgz