1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2024-12-20 14:24:48 -05:00
denoland-deno/cli/npm/mod.rs
Nathan Whitaker 6f506208f6
feat(unstable): support caching npm dependencies only as they're needed (#27300)
Currently deno eagerly caches all npm packages in the workspace's npm
resolution. So, for instance, running a file `foo.ts` that imports
`npm:chalk` will also install all dependencies listed in `package.json`
and all `npm` dependencies listed in the lockfile.

This PR refactors things to give more control over when and what npm
packages are automatically cached while building the module graph.

After this PR, by default the current behavior is unchanged _except_ for
`deno install --entrypoint`, which will only cache npm packages used by
the given entrypoint. For the other subcommands, this behavior can be
enabled with `--unstable-npm-lazy-caching`


Fixes #25782.

---------

Signed-off-by: Nathan Whitaker <17734409+nathanwhit@users.noreply.github.com>
Co-authored-by: Luca Casonato <hello@lcas.dev>
2024-12-10 18:24:23 -08:00

321 lines
9.3 KiB
Rust

// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
mod byonm;
mod managed;
use std::borrow::Cow;
use std::path::Path;
use std::sync::Arc;
use dashmap::DashMap;
use deno_core::error::AnyError;
use deno_core::serde_json;
use deno_core::url::Url;
use deno_npm::npm_rc::ResolvedNpmRc;
use deno_npm::registry::NpmPackageInfo;
use deno_resolver::npm::ByonmInNpmPackageChecker;
use deno_resolver::npm::ByonmNpmResolver;
use deno_resolver::npm::CliNpmReqResolver;
use deno_resolver::npm::ResolvePkgFolderFromDenoReqError;
use deno_runtime::deno_fs::FileSystem;
use deno_runtime::deno_node::NodePermissions;
use deno_runtime::ops::process::NpmProcessStateProvider;
use deno_semver::package::PackageNv;
use deno_semver::package::PackageReq;
use http::HeaderName;
use http::HeaderValue;
use managed::create_managed_in_npm_pkg_checker;
use node_resolver::InNpmPackageChecker;
use node_resolver::NpmPackageFolderResolver;
use crate::file_fetcher::FileFetcher;
use crate::http_util::HttpClientProvider;
use crate::util::fs::atomic_write_file_with_retries_and_fs;
use crate::util::fs::hard_link_dir_recursive;
use crate::util::fs::AtomicWriteFileFsAdapter;
use crate::util::progress_bar::ProgressBar;
pub use self::byonm::CliByonmNpmResolver;
pub use self::byonm::CliByonmNpmResolverCreateOptions;
pub use self::managed::CliManagedInNpmPkgCheckerCreateOptions;
pub use self::managed::CliManagedNpmResolverCreateOptions;
pub use self::managed::CliNpmResolverManagedSnapshotOption;
pub use self::managed::ManagedCliNpmResolver;
pub use self::managed::PackageCaching;
pub type CliNpmTarballCache = deno_npm_cache::TarballCache<CliNpmCacheEnv>;
pub type CliNpmCache = deno_npm_cache::NpmCache<CliNpmCacheEnv>;
pub type CliNpmRegistryInfoProvider =
deno_npm_cache::RegistryInfoProvider<CliNpmCacheEnv>;
#[derive(Debug)]
pub struct CliNpmCacheEnv {
fs: Arc<dyn FileSystem>,
http_client_provider: Arc<HttpClientProvider>,
progress_bar: ProgressBar,
}
impl CliNpmCacheEnv {
pub fn new(
fs: Arc<dyn FileSystem>,
http_client_provider: Arc<HttpClientProvider>,
progress_bar: ProgressBar,
) -> Self {
Self {
fs,
http_client_provider,
progress_bar,
}
}
}
#[async_trait::async_trait(?Send)]
impl deno_npm_cache::NpmCacheEnv for CliNpmCacheEnv {
fn exists(&self, path: &Path) -> bool {
self.fs.exists_sync(path)
}
fn hard_link_dir_recursive(
&self,
from: &Path,
to: &Path,
) -> Result<(), AnyError> {
// todo(dsherret): use self.fs here instead
hard_link_dir_recursive(from, to)
}
fn atomic_write_file_with_retries(
&self,
file_path: &Path,
data: &[u8],
) -> std::io::Result<()> {
atomic_write_file_with_retries_and_fs(
&AtomicWriteFileFsAdapter {
fs: self.fs.as_ref(),
write_mode: crate::cache::CACHE_PERM,
},
file_path,
data,
)
}
async fn download_with_retries_on_any_tokio_runtime(
&self,
url: Url,
maybe_auth_header: Option<(HeaderName, HeaderValue)>,
) -> Result<Option<Vec<u8>>, deno_npm_cache::DownloadError> {
let guard = self.progress_bar.update(url.as_str());
let client = self.http_client_provider.get_or_create().map_err(|err| {
deno_npm_cache::DownloadError {
status_code: None,
error: err,
}
})?;
client
.download_with_progress_and_retries(url, maybe_auth_header, &guard)
.await
.map_err(|err| {
use crate::http_util::DownloadError::*;
let status_code = match &err {
Fetch { .. }
| UrlParse { .. }
| HttpParse { .. }
| Json { .. }
| ToStr { .. }
| NoRedirectHeader { .. }
| TooManyRedirects => None,
BadResponse(bad_response_error) => {
Some(bad_response_error.status_code)
}
};
deno_npm_cache::DownloadError {
status_code,
error: err.into(),
}
})
}
}
pub enum CliNpmResolverCreateOptions {
Managed(CliManagedNpmResolverCreateOptions),
Byonm(CliByonmNpmResolverCreateOptions),
}
pub async fn create_cli_npm_resolver_for_lsp(
options: CliNpmResolverCreateOptions,
) -> Arc<dyn CliNpmResolver> {
use CliNpmResolverCreateOptions::*;
match options {
Managed(options) => {
managed::create_managed_npm_resolver_for_lsp(options).await
}
Byonm(options) => Arc::new(ByonmNpmResolver::new(options)),
}
}
pub async fn create_cli_npm_resolver(
options: CliNpmResolverCreateOptions,
) -> Result<Arc<dyn CliNpmResolver>, AnyError> {
use CliNpmResolverCreateOptions::*;
match options {
Managed(options) => managed::create_managed_npm_resolver(options).await,
Byonm(options) => Ok(Arc::new(ByonmNpmResolver::new(options))),
}
}
pub enum CreateInNpmPkgCheckerOptions<'a> {
Managed(CliManagedInNpmPkgCheckerCreateOptions<'a>),
Byonm,
}
pub fn create_in_npm_pkg_checker(
options: CreateInNpmPkgCheckerOptions,
) -> Arc<dyn InNpmPackageChecker> {
match options {
CreateInNpmPkgCheckerOptions::Managed(options) => {
create_managed_in_npm_pkg_checker(options)
}
CreateInNpmPkgCheckerOptions::Byonm => Arc::new(ByonmInNpmPackageChecker),
}
}
pub enum InnerCliNpmResolverRef<'a> {
Managed(&'a ManagedCliNpmResolver),
#[allow(dead_code)]
Byonm(&'a CliByonmNpmResolver),
}
pub trait CliNpmResolver: NpmPackageFolderResolver + CliNpmReqResolver {
fn into_npm_pkg_folder_resolver(
self: Arc<Self>,
) -> Arc<dyn NpmPackageFolderResolver>;
fn into_npm_req_resolver(self: Arc<Self>) -> Arc<dyn CliNpmReqResolver>;
fn into_process_state_provider(
self: Arc<Self>,
) -> Arc<dyn NpmProcessStateProvider>;
fn into_maybe_byonm(self: Arc<Self>) -> Option<Arc<CliByonmNpmResolver>> {
None
}
fn clone_snapshotted(&self) -> Arc<dyn CliNpmResolver>;
fn as_inner(&self) -> InnerCliNpmResolverRef;
fn as_managed(&self) -> Option<&ManagedCliNpmResolver> {
match self.as_inner() {
InnerCliNpmResolverRef::Managed(inner) => Some(inner),
InnerCliNpmResolverRef::Byonm(_) => None,
}
}
fn as_byonm(&self) -> Option<&CliByonmNpmResolver> {
match self.as_inner() {
InnerCliNpmResolverRef::Managed(_) => None,
InnerCliNpmResolverRef::Byonm(inner) => Some(inner),
}
}
fn root_node_modules_path(&self) -> Option<&Path>;
fn ensure_read_permission<'a>(
&self,
permissions: &mut dyn NodePermissions,
path: &'a Path,
) -> Result<Cow<'a, Path>, AnyError>;
/// Returns a hash returning the state of the npm resolver
/// or `None` if the state currently can't be determined.
fn check_state_hash(&self) -> Option<u64>;
}
#[derive(Debug)]
pub struct NpmFetchResolver {
nv_by_req: DashMap<PackageReq, Option<PackageNv>>,
info_by_name: DashMap<String, Option<Arc<NpmPackageInfo>>>,
file_fetcher: Arc<FileFetcher>,
npmrc: Arc<ResolvedNpmRc>,
}
impl NpmFetchResolver {
pub fn new(
file_fetcher: Arc<FileFetcher>,
npmrc: Arc<ResolvedNpmRc>,
) -> Self {
Self {
nv_by_req: Default::default(),
info_by_name: Default::default(),
file_fetcher,
npmrc,
}
}
pub async fn req_to_nv(&self, req: &PackageReq) -> Option<PackageNv> {
if let Some(nv) = self.nv_by_req.get(req) {
return nv.value().clone();
}
let maybe_get_nv = || async {
let name = req.name.clone();
let package_info = self.package_info(&name).await?;
if let Some(dist_tag) = req.version_req.tag() {
let version = package_info.dist_tags.get(dist_tag)?.clone();
return Some(PackageNv { name, version });
}
// Find the first matching version of the package.
let mut versions = package_info.versions.keys().collect::<Vec<_>>();
versions.sort();
let version = versions
.into_iter()
.rev()
.find(|v| req.version_req.tag().is_none() && req.version_req.matches(v))
.cloned()?;
Some(PackageNv { name, version })
};
let nv = maybe_get_nv().await;
self.nv_by_req.insert(req.clone(), nv.clone());
nv
}
pub async fn package_info(&self, name: &str) -> Option<Arc<NpmPackageInfo>> {
if let Some(info) = self.info_by_name.get(name) {
return info.value().clone();
}
// todo(#27198): use RegistryInfoProvider instead
let fetch_package_info = || async {
let info_url = deno_npm_cache::get_package_url(&self.npmrc, name);
let file_fetcher = self.file_fetcher.clone();
let registry_config = self.npmrc.get_registry_config(name);
// TODO(bartlomieju): this should error out, not use `.ok()`.
let maybe_auth_header =
deno_npm_cache::maybe_auth_header_for_npm_registry(registry_config)
.ok()?;
// spawn due to the lsp's `Send` requirement
let file = deno_core::unsync::spawn(async move {
file_fetcher
.fetch_bypass_permissions_with_maybe_auth(
&info_url,
maybe_auth_header,
)
.await
.ok()
})
.await
.ok()??;
serde_json::from_slice::<NpmPackageInfo>(&file.source).ok()
};
let info = fetch_package_info().await.map(Arc::new);
self.info_by_name.insert(name.to_string(), info.clone());
info
}
}
pub const NPM_CONFIG_USER_AGENT_ENV_VAR: &str = "npm_config_user_agent";
pub fn get_npm_config_user_agent() -> String {
format!(
"deno/{} npm/? deno/{} {} {}",
env!("CARGO_PKG_VERSION"),
env!("CARGO_PKG_VERSION"),
std::env::consts::OS,
std::env::consts::ARCH
)
}