1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2024-12-11 18:17:48 -05:00
denoland-deno/ext/node_resolver/analyze.rs
Nathan Whitaker f1ba266613
fix(node): Don't error out if we fail to statically analyze CJS re-export (#25748)
Fixes rsbuild running in deno.

You can look at the test to see what was failing, the gist is that we
were trying to statically analyze the re-exports of a CJS script, and if
we couldn't find the source for the re-exported file we would fail.

Instead, we should just treat these as if they were too dynamic to
analyze, and let it fail (or succeed) at runtime. This aligns with
node's behavior.
2024-09-19 18:37:36 -07:00

654 lines
17 KiB
Rust

// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
use std::borrow::Cow;
use std::collections::BTreeSet;
use std::collections::HashSet;
use std::path::Path;
use std::path::PathBuf;
use futures::future::LocalBoxFuture;
use futures::stream::FuturesUnordered;
use futures::FutureExt;
use futures::StreamExt;
use once_cell::sync::Lazy;
use anyhow::Context;
use anyhow::Error as AnyError;
use url::Url;
use crate::env::NodeResolverEnv;
use crate::package_json::load_pkg_json;
use crate::path::to_file_specifier;
use crate::resolution::NodeResolverRc;
use crate::NodeModuleKind;
use crate::NodeResolutionMode;
use crate::NpmResolverRc;
use crate::PathClean;
#[derive(Debug, Clone)]
pub enum CjsAnalysis {
/// File was found to be an ES module and the translator should
/// load the code as ESM.
Esm(String),
Cjs(CjsAnalysisExports),
}
#[derive(Debug, Clone)]
pub struct CjsAnalysisExports {
pub exports: Vec<String>,
pub reexports: Vec<String>,
}
/// Code analyzer for CJS and ESM files.
#[async_trait::async_trait(?Send)]
pub trait CjsCodeAnalyzer {
/// Analyzes CommonJs code for exports and reexports, which is
/// then used to determine the wrapper ESM module exports.
///
/// Note that the source is provided by the caller when the caller
/// already has it. If the source is needed by the implementation,
/// then it can use the provided source, or otherwise load it if
/// necessary.
async fn analyze_cjs(
&self,
specifier: &Url,
maybe_source: Option<String>,
) -> Result<CjsAnalysis, AnyError>;
}
pub struct NodeCodeTranslator<
TCjsCodeAnalyzer: CjsCodeAnalyzer,
TNodeResolverEnv: NodeResolverEnv,
> {
cjs_code_analyzer: TCjsCodeAnalyzer,
env: TNodeResolverEnv,
node_resolver: NodeResolverRc<TNodeResolverEnv>,
npm_resolver: NpmResolverRc,
}
impl<TCjsCodeAnalyzer: CjsCodeAnalyzer, TNodeResolverEnv: NodeResolverEnv>
NodeCodeTranslator<TCjsCodeAnalyzer, TNodeResolverEnv>
{
pub fn new(
cjs_code_analyzer: TCjsCodeAnalyzer,
env: TNodeResolverEnv,
node_resolver: NodeResolverRc<TNodeResolverEnv>,
npm_resolver: NpmResolverRc,
) -> Self {
Self {
cjs_code_analyzer,
env,
node_resolver,
npm_resolver,
}
}
/// Translates given CJS module into ESM. This function will perform static
/// analysis on the file to find defined exports and reexports.
///
/// For all discovered reexports the analysis will be performed recursively.
///
/// If successful a source code for equivalent ES module is returned.
pub async fn translate_cjs_to_esm(
&self,
entry_specifier: &Url,
source: Option<String>,
) -> Result<String, AnyError> {
let mut temp_var_count = 0;
let analysis = self
.cjs_code_analyzer
.analyze_cjs(entry_specifier, source)
.await?;
let analysis = match analysis {
CjsAnalysis::Esm(source) => return Ok(source),
CjsAnalysis::Cjs(analysis) => analysis,
};
let mut source = vec![
r#"import {createRequire as __internalCreateRequire} from "node:module";
const require = __internalCreateRequire(import.meta.url);"#
.to_string(),
];
// use a BTreeSet to make the output deterministic for v8's code cache
let mut all_exports = analysis.exports.into_iter().collect::<BTreeSet<_>>();
if !analysis.reexports.is_empty() {
let mut errors = Vec::new();
self
.analyze_reexports(
entry_specifier,
analysis.reexports,
&mut all_exports,
&mut errors,
)
.await;
// surface errors afterwards in a deterministic way
if !errors.is_empty() {
errors.sort_by_cached_key(|e| e.to_string());
return Err(errors.remove(0));
}
}
source.push(format!(
"const mod = require(\"{}\");",
entry_specifier
.to_file_path()
.unwrap()
.to_str()
.unwrap()
.replace('\\', "\\\\")
.replace('\'', "\\\'")
.replace('\"', "\\\"")
));
for export in &all_exports {
if export.as_str() != "default" {
add_export(
&mut source,
export,
&format!("mod[\"{}\"]", escape_for_double_quote_string(export)),
&mut temp_var_count,
);
}
}
source.push("export default mod;".to_string());
let translated_source = source.join("\n");
Ok(translated_source)
}
async fn analyze_reexports<'a>(
&'a self,
entry_specifier: &url::Url,
reexports: Vec<String>,
all_exports: &mut BTreeSet<String>,
// this goes through the modules concurrently, so collect
// the errors in order to be deterministic
errors: &mut Vec<anyhow::Error>,
) {
struct Analysis {
reexport_specifier: url::Url,
referrer: url::Url,
analysis: CjsAnalysis,
}
type AnalysisFuture<'a> = LocalBoxFuture<'a, Result<Analysis, AnyError>>;
let mut handled_reexports: HashSet<Url> = HashSet::default();
handled_reexports.insert(entry_specifier.clone());
let mut analyze_futures: FuturesUnordered<AnalysisFuture<'a>> =
FuturesUnordered::new();
let cjs_code_analyzer = &self.cjs_code_analyzer;
let mut handle_reexports =
|referrer: url::Url,
reexports: Vec<String>,
analyze_futures: &mut FuturesUnordered<AnalysisFuture<'a>>,
errors: &mut Vec<anyhow::Error>| {
// 1. Resolve the re-exports and start a future to analyze each one
for reexport in reexports {
let result = self.resolve(
&reexport,
&referrer,
// FIXME(bartlomieju): check if these conditions are okay, probably
// should be `deno-require`, because `deno` is already used in `esm_resolver.rs`
&["deno", "require", "default"],
NodeResolutionMode::Execution,
);
let reexport_specifier = match result {
Ok(Some(specifier)) => specifier,
Ok(None) => continue,
Err(err) => {
errors.push(err);
continue;
}
};
if !handled_reexports.insert(reexport_specifier.clone()) {
continue;
}
let referrer = referrer.clone();
let future = async move {
let analysis = cjs_code_analyzer
.analyze_cjs(&reexport_specifier, None)
.await
.with_context(|| {
format!(
"Could not load '{}' ({}) referenced from {}",
reexport, reexport_specifier, referrer
)
})?;
Ok(Analysis {
reexport_specifier,
referrer,
analysis,
})
}
.boxed_local();
analyze_futures.push(future);
}
};
handle_reexports(
entry_specifier.clone(),
reexports,
&mut analyze_futures,
errors,
);
while let Some(analysis_result) = analyze_futures.next().await {
// 2. Look at the analysis result and resolve its exports and re-exports
let Analysis {
reexport_specifier,
referrer,
analysis,
} = match analysis_result {
Ok(analysis) => analysis,
Err(err) => {
errors.push(err);
continue;
}
};
match analysis {
CjsAnalysis::Esm(_) => {
// todo(dsherret): support this once supporting requiring ES modules
errors.push(anyhow::anyhow!(
"Cannot require ES module '{}' from '{}'",
reexport_specifier,
referrer,
));
}
CjsAnalysis::Cjs(analysis) => {
if !analysis.reexports.is_empty() {
handle_reexports(
reexport_specifier.clone(),
analysis.reexports,
&mut analyze_futures,
errors,
);
}
all_exports.extend(
analysis
.exports
.into_iter()
.filter(|e| e.as_str() != "default"),
);
}
}
}
}
// todo(dsherret): what is going on here? Isn't this a bunch of duplicate code?
fn resolve(
&self,
specifier: &str,
referrer: &Url,
conditions: &[&str],
mode: NodeResolutionMode,
) -> Result<Option<Url>, AnyError> {
if specifier.starts_with('/') {
todo!();
}
let referrer_path = referrer.to_file_path().unwrap();
if specifier.starts_with("./") || specifier.starts_with("../") {
if let Some(parent) = referrer_path.parent() {
return Some(
self
.file_extension_probe(parent.join(specifier), &referrer_path)
.map(|p| to_file_specifier(&p)),
)
.transpose();
} else {
todo!();
}
}
// We've got a bare specifier or maybe bare_specifier/blah.js"
let (package_specifier, package_subpath) =
parse_specifier(specifier).unwrap();
let module_dir = match self
.npm_resolver
.resolve_package_folder_from_package(package_specifier.as_str(), referrer)
{
Err(err)
if matches!(
err.as_kind(),
crate::errors::PackageFolderResolveErrorKind::PackageNotFound(..)
) =>
{
return Ok(None);
}
other => other,
}?;
let package_json_path = module_dir.join("package.json");
let maybe_package_json =
load_pkg_json(self.env.pkg_json_fs(), &package_json_path)?;
if let Some(package_json) = maybe_package_json {
if let Some(exports) = &package_json.exports {
return Some(
self
.node_resolver
.package_exports_resolve(
&package_json_path,
&package_subpath,
exports,
Some(referrer),
NodeModuleKind::Esm,
conditions,
mode,
)
.map_err(AnyError::from),
)
.transpose();
}
// old school
if package_subpath != "." {
let d = module_dir.join(package_subpath);
if self.env.is_dir_sync(&d) {
// subdir might have a package.json that specifies the entrypoint
let package_json_path = d.join("package.json");
let maybe_package_json =
load_pkg_json(self.env.pkg_json_fs(), &package_json_path)?;
if let Some(package_json) = maybe_package_json {
if let Some(main) = package_json.main(NodeModuleKind::Cjs) {
return Ok(Some(to_file_specifier(&d.join(main).clean())));
}
}
return Ok(Some(to_file_specifier(&d.join("index.js").clean())));
}
return Some(
self
.file_extension_probe(d, &referrer_path)
.map(|p| to_file_specifier(&p)),
)
.transpose();
} else if let Some(main) = package_json.main(NodeModuleKind::Cjs) {
return Ok(Some(to_file_specifier(&module_dir.join(main).clean())));
} else {
return Ok(Some(to_file_specifier(
&module_dir.join("index.js").clean(),
)));
}
}
// as a fallback, attempt to resolve it via the ancestor directories
let mut last = referrer_path.as_path();
while let Some(parent) = last.parent() {
if !self.npm_resolver.in_npm_package_at_dir_path(parent) {
break;
}
let path = if parent.ends_with("node_modules") {
parent.join(specifier)
} else {
parent.join("node_modules").join(specifier)
};
if let Ok(path) = self.file_extension_probe(path, &referrer_path) {
return Ok(Some(to_file_specifier(&path)));
}
last = parent;
}
Err(not_found(specifier, &referrer_path))
}
fn file_extension_probe(
&self,
p: PathBuf,
referrer: &Path,
) -> Result<PathBuf, AnyError> {
let p = p.clean();
if self.env.exists_sync(&p) {
let file_name = p.file_name().unwrap();
let p_js =
p.with_file_name(format!("{}.js", file_name.to_str().unwrap()));
if self.env.is_file_sync(&p_js) {
return Ok(p_js);
} else if self.env.is_dir_sync(&p) {
return Ok(p.join("index.js"));
} else {
return Ok(p);
}
} else if let Some(file_name) = p.file_name() {
{
let p_js =
p.with_file_name(format!("{}.js", file_name.to_str().unwrap()));
if self.env.is_file_sync(&p_js) {
return Ok(p_js);
}
}
{
let p_json =
p.with_file_name(format!("{}.json", file_name.to_str().unwrap()));
if self.env.is_file_sync(&p_json) {
return Ok(p_json);
}
}
}
Err(not_found(&p.to_string_lossy(), referrer))
}
}
static RESERVED_WORDS: Lazy<HashSet<&str>> = Lazy::new(|| {
HashSet::from([
"abstract",
"arguments",
"async",
"await",
"boolean",
"break",
"byte",
"case",
"catch",
"char",
"class",
"const",
"continue",
"debugger",
"default",
"delete",
"do",
"double",
"else",
"enum",
"eval",
"export",
"extends",
"false",
"final",
"finally",
"float",
"for",
"function",
"get",
"goto",
"if",
"implements",
"import",
"in",
"instanceof",
"int",
"interface",
"let",
"long",
"mod",
"native",
"new",
"null",
"package",
"private",
"protected",
"public",
"return",
"set",
"short",
"static",
"super",
"switch",
"synchronized",
"this",
"throw",
"throws",
"transient",
"true",
"try",
"typeof",
"var",
"void",
"volatile",
"while",
"with",
"yield",
])
});
fn add_export(
source: &mut Vec<String>,
name: &str,
initializer: &str,
temp_var_count: &mut usize,
) {
fn is_valid_var_decl(name: &str) -> bool {
// it's ok to be super strict here
if name.is_empty() {
return false;
}
if let Some(first) = name.chars().next() {
if !first.is_ascii_alphabetic() && first != '_' && first != '$' {
return false;
}
}
name
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '$')
}
// TODO(bartlomieju): Node actually checks if a given export exists in `exports` object,
// but it might not be necessary here since our analysis is more detailed?
if RESERVED_WORDS.contains(name) || !is_valid_var_decl(name) {
*temp_var_count += 1;
// we can't create an identifier with a reserved word or invalid identifier name,
// so assign it to a temporary variable that won't have a conflict, then re-export
// it as a string
source.push(format!(
"const __deno_export_{temp_var_count}__ = {initializer};"
));
source.push(format!(
"export {{ __deno_export_{temp_var_count}__ as \"{}\" }};",
escape_for_double_quote_string(name)
));
} else {
source.push(format!("export const {name} = {initializer};"));
}
}
fn parse_specifier(specifier: &str) -> Option<(String, String)> {
let mut separator_index = specifier.find('/');
let mut valid_package_name = true;
// let mut is_scoped = false;
if specifier.is_empty() {
valid_package_name = false;
} else if specifier.starts_with('@') {
// is_scoped = true;
if let Some(index) = separator_index {
separator_index = specifier[index + 1..].find('/').map(|i| i + index + 1);
} else {
valid_package_name = false;
}
}
let package_name = if let Some(index) = separator_index {
specifier[0..index].to_string()
} else {
specifier.to_string()
};
// Package name cannot have leading . and cannot have percent-encoding or separators.
for ch in package_name.chars() {
if ch == '%' || ch == '\\' {
valid_package_name = false;
break;
}
}
if !valid_package_name {
return None;
}
let package_subpath = if let Some(index) = separator_index {
format!(".{}", specifier.chars().skip(index).collect::<String>())
} else {
".".to_string()
};
Some((package_name, package_subpath))
}
fn not_found(path: &str, referrer: &Path) -> AnyError {
let msg = format!(
"[ERR_MODULE_NOT_FOUND] Cannot find module \"{}\" imported from \"{}\"",
path,
referrer.to_string_lossy()
);
std::io::Error::new(std::io::ErrorKind::NotFound, msg).into()
}
fn escape_for_double_quote_string(text: &str) -> Cow<str> {
// this should be rare, so doing a scan first before allocating is ok
if text.chars().any(|c| matches!(c, '"' | '\\')) {
// don't bother making this more complex for perf because it's rare
Cow::Owned(text.replace('\\', "\\\\").replace('"', "\\\""))
} else {
Cow::Borrowed(text)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_add_export() {
let mut temp_var_count = 0;
let mut source = vec![];
let exports = vec!["static", "server", "app", "dashed-export", "3d"];
for export in exports {
add_export(&mut source, export, "init", &mut temp_var_count);
}
assert_eq!(
source,
vec![
"const __deno_export_1__ = init;".to_string(),
"export { __deno_export_1__ as \"static\" };".to_string(),
"export const server = init;".to_string(),
"export const app = init;".to_string(),
"const __deno_export_2__ = init;".to_string(),
"export { __deno_export_2__ as \"dashed-export\" };".to_string(),
"const __deno_export_3__ = init;".to_string(),
"export { __deno_export_3__ as \"3d\" };".to_string(),
]
)
}
#[test]
fn test_parse_specifier() {
assert_eq!(
parse_specifier("@some-package/core/actions"),
Some(("@some-package/core".to_string(), "./actions".to_string()))
);
}
}