From f161adf19e3ba0436ed18fcdbba81fbb360a6fb1 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Wed, 27 Nov 2024 21:28:41 -0500 Subject: [PATCH] perf(compile): read embedded files as static references when UTF-8 and reading as strings (#27033) --- cli/args/deno_json.rs | 2 ++ cli/cache/mod.rs | 2 ++ cli/module_loader.rs | 26 ++++++++------ cli/node.rs | 2 +- cli/resolver.rs | 16 ++++++--- cli/standalone/binary.rs | 7 ++-- cli/standalone/mod.rs | 5 +-- cli/standalone/virtual_fs.rs | 11 +++--- cli/util/text_encoding.rs | 9 +++++ ext/fs/in_memory_fs.rs | 7 ++-- ext/fs/interface.rs | 21 +++++++---- ext/fs/lib.rs | 1 + ext/fs/ops.rs | 68 +++++++++++++++++++++++++++++++----- ext/fs/std_fs.rs | 9 ++--- ext/io/fs.rs | 4 +-- ext/io/lib.rs | 8 ++--- ext/node/lib.rs | 9 ++++- ext/node/ops/require.rs | 6 ++-- resolvers/deno/fs.rs | 6 +++- 19 files changed, 158 insertions(+), 61 deletions(-) diff --git a/cli/args/deno_json.rs b/cli/args/deno_json.rs index a82289e67d..3e6eb617a6 100644 --- a/cli/args/deno_json.rs +++ b/cli/args/deno_json.rs @@ -22,6 +22,8 @@ impl<'a> deno_config::fs::DenoConfigFs for DenoConfigFsAdapter<'a> { self .0 .read_text_file_lossy_sync(path, None) + // todo(https://github.com/denoland/deno_config/pull/140): avoid clone + .map(|s| s.into_owned()) .map_err(|err| err.into_io_error()) } diff --git a/cli/cache/mod.rs b/cli/cache/mod.rs index 50fc135ddf..73a3895a10 100644 --- a/cli/cache/mod.rs +++ b/cli/cache/mod.rs @@ -116,6 +116,8 @@ impl<'a> deno_cache_dir::DenoCacheEnv for DenoCacheEnvFsAdapter<'a> { self .0 .read_file_sync(path, None) + // todo(https://github.com/denoland/deno_cache_dir/pull/66): avoid clone + .map(|bytes| bytes.into_owned()) .map_err(|err| err.into_io_error()) } diff --git a/cli/module_loader.rs b/cli/module_loader.rs index 3d2dfb2a66..447c85a9ac 100644 --- a/cli/module_loader.rs +++ b/cli/module_loader.rs @@ -1060,7 +1060,10 @@ impl NodeRequireLoader self.npm_resolver.ensure_read_permission(permissions, path) } - fn load_text_file_lossy(&self, path: &Path) -> Result { + fn load_text_file_lossy( + &self, + path: &Path, + ) -> Result, AnyError> { // todo(dsherret): use the preloaded module from the graph if available? let media_type = MediaType::from_path(path); let text = self.fs.read_text_file_lossy_sync(path, None)?; @@ -1075,15 +1078,18 @@ impl NodeRequireLoader .into(), ); } - self.emitter.emit_parsed_source_sync( - &specifier, - media_type, - // this is probably not super accurate due to require esm, but probably ok. - // If we find this causes a lot of churn in the emit cache then we should - // investigate how we can make this better - ModuleKind::Cjs, - &text.into(), - ) + self + .emitter + .emit_parsed_source_sync( + &specifier, + media_type, + // this is probably not super accurate due to require esm, but probably ok. + // If we find this causes a lot of churn in the emit cache then we should + // investigate how we can make this better + ModuleKind::Cjs, + &text.into(), + ) + .map(Cow::Owned) } else { Ok(text) } diff --git a/cli/node.rs b/cli/node.rs index bc39cdbde9..11959df6b9 100644 --- a/cli/node.rs +++ b/cli/node.rs @@ -160,7 +160,7 @@ impl CjsCodeAnalyzer for CliCjsCodeAnalyzer { if let Ok(source_from_file) = self.fs.read_text_file_lossy_async(path, None).await { - Cow::Owned(source_from_file) + source_from_file } else { return Ok(ExtNodeCjsAnalysis::Cjs(CjsAnalysisExports { exports: vec![], diff --git a/cli/resolver.rs b/cli/resolver.rs index 6f3351391f..15ca4aa2b6 100644 --- a/cli/resolver.rs +++ b/cli/resolver.rs @@ -37,7 +37,7 @@ use crate::node::CliNodeCodeTranslator; use crate::npm::CliNpmResolver; use crate::npm::InnerCliNpmResolverRef; use crate::util::sync::AtomicFlag; -use crate::util::text_encoding::from_utf8_lossy_owned; +use crate::util::text_encoding::from_utf8_lossy_cow; pub type CjsTracker = deno_resolver::cjs::CjsTracker; pub type IsCjsResolver = @@ -62,7 +62,10 @@ pub struct ModuleCodeStringSource { pub struct CliDenoResolverFs(pub Arc); impl deno_resolver::fs::DenoResolverFs for CliDenoResolverFs { - fn read_to_string_lossy(&self, path: &Path) -> std::io::Result { + fn read_to_string_lossy( + &self, + path: &Path, + ) -> std::io::Result> { self .0 .read_text_file_lossy_sync(path, None) @@ -182,18 +185,21 @@ impl NpmModuleLoader { let code = if self.cjs_tracker.is_maybe_cjs(specifier, media_type)? { // translate cjs to esm if it's cjs and inject node globals - let code = from_utf8_lossy_owned(code); + let code = from_utf8_lossy_cow(code); ModuleSourceCode::String( self .node_code_translator - .translate_cjs_to_esm(specifier, Some(Cow::Owned(code))) + .translate_cjs_to_esm(specifier, Some(code)) .await? .into_owned() .into(), ) } else { // esm and json code is untouched - ModuleSourceCode::Bytes(code.into_boxed_slice().into()) + ModuleSourceCode::Bytes(match code { + Cow::Owned(bytes) => bytes.into_boxed_slice().into(), + Cow::Borrowed(bytes) => bytes.into(), + }) }; Ok(ModuleCodeStringSource { diff --git a/cli/standalone/binary.rs b/cli/standalone/binary.rs index b0623807ae..632f27da6f 100644 --- a/cli/standalone/binary.rs +++ b/cli/standalone/binary.rs @@ -282,14 +282,13 @@ impl StandaloneModules { .vfs .read_file_all(entry, VfsFileSubDataKind::ModuleGraph)?, Err(err) if err.kind() == ErrorKind::NotFound => { - let bytes = match RealFs.read_file_sync(&path, None) { + match RealFs.read_file_sync(&path, None) { Ok(bytes) => bytes, Err(FsError::Io(err)) if err.kind() == ErrorKind::NotFound => { return Ok(None) } Err(err) => return Err(err.into()), - }; - Cow::Owned(bytes) + } } Err(err) => return Err(err.into()), }; @@ -694,7 +693,7 @@ impl<'a> DenoCompileBinaryWriter<'a> { &file_path, match maybe_source { Some(source) => source, - None => RealFs.read_file_sync(&file_path, None)?, + None => RealFs.read_file_sync(&file_path, None)?.into_owned(), }, VfsFileSubDataKind::ModuleGraph, ) diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index ed0ed762c9..53efab2964 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -91,6 +91,7 @@ use crate::resolver::CliNpmReqResolver; use crate::resolver::NpmModuleLoader; use crate::util::progress_bar::ProgressBar; use crate::util::progress_bar::ProgressBarStyle; +use crate::util::text_encoding::from_utf8_lossy_cow; use crate::util::v8::construct_v8_flags; use crate::worker::CliCodeCache; use crate::worker::CliMainWorkerFactory; @@ -516,13 +517,13 @@ impl NodeRequireLoader for EmbeddedModuleLoader { fn load_text_file_lossy( &self, path: &std::path::Path, - ) -> Result { + ) -> Result, AnyError> { let file_entry = self.shared.vfs.file_entry(path)?; let file_bytes = self .shared .vfs .read_file_all(file_entry, VfsFileSubDataKind::ModuleGraph)?; - Ok(String::from_utf8(file_bytes.into_owned())?) + Ok(from_utf8_lossy_cow(file_bytes)) } fn is_maybe_cjs( diff --git a/cli/standalone/virtual_fs.rs b/cli/standalone/virtual_fs.rs index b630f629c5..66fc835534 100644 --- a/cli/standalone/virtual_fs.rs +++ b/cli/standalone/virtual_fs.rs @@ -743,15 +743,12 @@ impl deno_io::fs::File for FileBackedVfsFile { Err(FsError::NotSupported) } - fn read_all_sync(self: Rc) -> FsResult> { - self.read_to_end().map(|bytes| bytes.into_owned()) + fn read_all_sync(self: Rc) -> FsResult> { + self.read_to_end() } - async fn read_all_async(self: Rc) -> FsResult> { + async fn read_all_async(self: Rc) -> FsResult> { let inner = (*self).clone(); - tokio::task::spawn_blocking(move || { - inner.read_to_end().map(|bytes| bytes.into_owned()) - }) - .await? + tokio::task::spawn_blocking(move || inner.read_to_end()).await? } fn chmod_sync(self: Rc, _pathmode: u32) -> FsResult<()> { diff --git a/cli/util/text_encoding.rs b/cli/util/text_encoding.rs index 8524e63ebb..06b311e150 100644 --- a/cli/util/text_encoding.rs +++ b/cli/util/text_encoding.rs @@ -11,6 +11,15 @@ use deno_core::ModuleSourceCode; static SOURCE_MAP_PREFIX: &[u8] = b"//# sourceMappingURL=data:application/json;base64,"; +#[inline(always)] +pub fn from_utf8_lossy_cow(bytes: Cow<[u8]>) -> Cow { + match bytes { + Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), + Cow::Owned(bytes) => Cow::Owned(from_utf8_lossy_owned(bytes)), + } +} + +#[inline(always)] pub fn from_utf8_lossy_owned(bytes: Vec) -> String { match String::from_utf8_lossy(&bytes) { Cow::Owned(code) => code, diff --git a/ext/fs/in_memory_fs.rs b/ext/fs/in_memory_fs.rs index 34b77836d9..b79b0ae984 100644 --- a/ext/fs/in_memory_fs.rs +++ b/ext/fs/in_memory_fs.rs @@ -3,6 +3,7 @@ // Allow using Arc for this module. #![allow(clippy::disallowed_types)] +use std::borrow::Cow; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::io::Error; @@ -457,11 +458,11 @@ impl FileSystem for InMemoryFs { &self, path: &Path, _access_check: Option, - ) -> FsResult> { + ) -> FsResult> { let entry = self.get_entry(path); match entry { Some(entry) => match &*entry { - PathEntry::File(data) => Ok(data.clone()), + PathEntry::File(data) => Ok(Cow::Owned(data.clone())), PathEntry::Dir => Err(FsError::Io(Error::new( ErrorKind::InvalidInput, "Is a directory", @@ -474,7 +475,7 @@ impl FileSystem for InMemoryFs { &'a self, path: PathBuf, access_check: Option>, - ) -> FsResult> { + ) -> FsResult> { self.read_file_sync(&path, access_check) } } diff --git a/ext/fs/interface.rs b/ext/fs/interface.rs index 73333b0fd1..28a49c5d9b 100644 --- a/ext/fs/interface.rs +++ b/ext/fs/interface.rs @@ -1,5 +1,6 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. +use core::str; use std::borrow::Cow; use std::path::Path; use std::path::PathBuf; @@ -288,7 +289,7 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync { &self, path: &Path, access_check: Option, - ) -> FsResult> { + ) -> FsResult> { let options = OpenOptions::read(); let file = self.open_sync(path, options, access_check)?; let buf = file.read_all_sync()?; @@ -298,7 +299,7 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync { &'a self, path: PathBuf, access_check: Option>, - ) -> FsResult> { + ) -> FsResult> { let options = OpenOptions::read(); let file = self.open_async(path, options, access_check).await?; let buf = file.read_all_async().await?; @@ -327,17 +328,25 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync { &self, path: &Path, access_check: Option, - ) -> FsResult { + ) -> FsResult> { let buf = self.read_file_sync(path, access_check)?; - Ok(string_from_utf8_lossy(buf)) + Ok(string_from_cow_utf8_lossy(buf)) } async fn read_text_file_lossy_async<'a>( &'a self, path: PathBuf, access_check: Option>, - ) -> FsResult { + ) -> FsResult> { let buf = self.read_file_async(path, access_check).await?; - Ok(string_from_utf8_lossy(buf)) + Ok(string_from_cow_utf8_lossy(buf)) + } +} + +#[inline(always)] +fn string_from_cow_utf8_lossy(buf: Cow<'static, [u8]>) -> Cow<'static, str> { + match buf { + Cow::Owned(buf) => Cow::Owned(string_from_utf8_lossy(buf)), + Cow::Borrowed(buf) => String::from_utf8_lossy(buf), } } diff --git a/ext/fs/lib.rs b/ext/fs/lib.rs index aed9a7085f..26fac1e79f 100644 --- a/ext/fs/lib.rs +++ b/ext/fs/lib.rs @@ -17,6 +17,7 @@ pub use crate::interface::OpenOptions; pub use crate::ops::FsOpsError; pub use crate::ops::FsOpsErrorKind; pub use crate::ops::OperationError; +pub use crate::ops::V8MaybeStaticStr; pub use crate::std_fs::RealFs; pub use crate::sync::MaybeSend; pub use crate::sync::MaybeSync; diff --git a/ext/fs/ops.rs b/ext/fs/ops.rs index 7a9778c485..5e64585e0c 100644 --- a/ext/fs/ops.rs +++ b/ext/fs/ops.rs @@ -1,5 +1,6 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. +use std::borrow::Cow; use std::cell::RefCell; use std::error::Error; use std::fmt::Formatter; @@ -18,12 +19,15 @@ use crate::FsPermissions; use crate::OpenOptions; use boxed_error::Boxed; use deno_core::op2; +use deno_core::v8; use deno_core::CancelFuture; use deno_core::CancelHandle; +use deno_core::FastString; use deno_core::JsBuffer; use deno_core::OpState; use deno_core::ResourceId; use deno_core::ToJsBuffer; +use deno_core::ToV8; use deno_io::fs::FileResource; use deno_io::fs::FsError; use deno_io::fs::FsStat; @@ -1333,7 +1337,8 @@ where .read_file_sync(&path, Some(&mut access_check)) .map_err(|error| map_permission_error("readfile", error, &path))?; - Ok(buf.into()) + // todo(https://github.com/denoland/deno/issues/27107): do not clone here + Ok(buf.into_owned().into_boxed_slice().into()) } #[op2(async, stack_trace)] @@ -1375,15 +1380,61 @@ where .map_err(|error| map_permission_error("readfile", error, &path))? }; - Ok(buf.into()) + // todo(https://github.com/denoland/deno/issues/27107): do not clone here + Ok(buf.into_owned().into_boxed_slice().into()) +} + +// todo(https://github.com/denoland/deno_core/pull/986): remove +// when upgrading deno_core +#[derive(Debug)] +pub struct FastStringV8AllocationError; + +impl std::error::Error for FastStringV8AllocationError {} + +impl std::fmt::Display for FastStringV8AllocationError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "failed to allocate string; buffer exceeds maximum length" + ) + } +} + +/// Maintains a static reference to the string if possible. +pub struct V8MaybeStaticStr(pub Cow<'static, str>); + +impl<'s> ToV8<'s> for V8MaybeStaticStr { + type Error = FastStringV8AllocationError; + + #[inline] + fn to_v8( + self, + scope: &mut v8::HandleScope<'s>, + ) -> Result, Self::Error> { + // todo(https://github.com/denoland/deno_core/pull/986): remove this check + // when upgrading deno_core + const MAX_V8_STRING_LENGTH: usize = 536870888; + if self.0.len() > MAX_V8_STRING_LENGTH { + return Err(FastStringV8AllocationError); + } + + Ok( + match self.0 { + Cow::Borrowed(text) => FastString::from_static(text), + Cow::Owned(value) => value.into(), + } + .v8_string(scope) + .into(), + ) + } } #[op2(stack_trace)] -#[string] +#[to_v8] pub fn op_fs_read_file_text_sync

( state: &mut OpState, #[string] path: String, -) -> Result +) -> Result where P: FsPermissions + 'static, { @@ -1395,17 +1446,16 @@ where let str = fs .read_text_file_lossy_sync(&path, Some(&mut access_check)) .map_err(|error| map_permission_error("readfile", error, &path))?; - - Ok(str) + Ok(V8MaybeStaticStr(str)) } #[op2(async, stack_trace)] -#[string] +#[to_v8] pub async fn op_fs_read_file_text_async

( state: Rc>, #[string] path: String, #[smi] cancel_rid: Option, -) -> Result +) -> Result where P: FsPermissions + 'static, { @@ -1439,7 +1489,7 @@ where .map_err(|error| map_permission_error("readfile", error, &path))? }; - Ok(str) + Ok(V8MaybeStaticStr(str)) } fn to_seek_from(offset: i64, whence: i32) -> Result { diff --git a/ext/fs/std_fs.rs b/ext/fs/std_fs.rs index 73439d9bab..86ad213160 100644 --- a/ext/fs/std_fs.rs +++ b/ext/fs/std_fs.rs @@ -2,6 +2,7 @@ #![allow(clippy::disallowed_methods)] +use std::borrow::Cow; use std::env::current_dir; use std::fs; use std::io; @@ -371,7 +372,7 @@ impl FileSystem for RealFs { &self, path: &Path, access_check: Option, - ) -> FsResult> { + ) -> FsResult> { let mut file = open_with_access_check( OpenOptions { read: true, @@ -382,13 +383,13 @@ impl FileSystem for RealFs { )?; let mut buf = Vec::new(); file.read_to_end(&mut buf)?; - Ok(buf) + Ok(Cow::Owned(buf)) } async fn read_file_async<'a>( &'a self, path: PathBuf, access_check: Option>, - ) -> FsResult> { + ) -> FsResult> { let mut file = open_with_access_check( OpenOptions { read: true, @@ -400,7 +401,7 @@ impl FileSystem for RealFs { spawn_blocking(move || { let mut buf = Vec::new(); file.read_to_end(&mut buf)?; - Ok::<_, FsError>(buf) + Ok::<_, FsError>(Cow::Owned(buf)) }) .await? .map_err(Into::into) diff --git a/ext/io/fs.rs b/ext/io/fs.rs index 7ef02315ba..bd5dfd0bb9 100644 --- a/ext/io/fs.rs +++ b/ext/io/fs.rs @@ -215,8 +215,8 @@ pub trait File { fn write_all_sync(self: Rc, buf: &[u8]) -> FsResult<()>; async fn write_all(self: Rc, buf: BufView) -> FsResult<()>; - fn read_all_sync(self: Rc) -> FsResult>; - async fn read_all_async(self: Rc) -> FsResult>; + fn read_all_sync(self: Rc) -> FsResult>; + async fn read_all_async(self: Rc) -> FsResult>; fn chmod_sync(self: Rc, pathmode: u32) -> FsResult<()>; async fn chmod_async(self: Rc, mode: u32) -> FsResult<()>; diff --git a/ext/io/lib.rs b/ext/io/lib.rs index 5d183aa464..873fccd7b8 100644 --- a/ext/io/lib.rs +++ b/ext/io/lib.rs @@ -789,26 +789,26 @@ impl crate::fs::File for StdFileResourceInner { } } - fn read_all_sync(self: Rc) -> FsResult> { + fn read_all_sync(self: Rc) -> FsResult> { match self.kind { StdFileResourceKind::File | StdFileResourceKind::Stdin(_) => { let mut buf = Vec::new(); self.with_sync(|file| Ok(file.read_to_end(&mut buf)?))?; - Ok(buf) + Ok(Cow::Owned(buf)) } StdFileResourceKind::Stdout | StdFileResourceKind::Stderr => { Err(FsError::NotSupported) } } } - async fn read_all_async(self: Rc) -> FsResult> { + async fn read_all_async(self: Rc) -> FsResult> { match self.kind { StdFileResourceKind::File | StdFileResourceKind::Stdin(_) => { self .with_inner_blocking_task(|file| { let mut buf = Vec::new(); file.read_to_end(&mut buf)?; - Ok(buf) + Ok(Cow::Owned(buf)) }) .await } diff --git a/ext/node/lib.rs b/ext/node/lib.rs index 63f5794b7d..9986b0f607 100644 --- a/ext/node/lib.rs +++ b/ext/node/lib.rs @@ -157,7 +157,10 @@ pub trait NodeRequireLoader { path: &'a Path, ) -> Result, AnyError>; - fn load_text_file_lossy(&self, path: &Path) -> Result; + fn load_text_file_lossy( + &self, + path: &Path, + ) -> Result, AnyError>; /// Get if the module kind is maybe CJS and loading should determine /// if its CJS or ESM. @@ -873,6 +876,8 @@ impl deno_package_json::fs::DenoPkgJsonFs for DenoFsNodeResolverEnv { self .fs .read_text_file_lossy_sync(path, None) + // todo(https://github.com/denoland/deno_package_json/pull/9): don't clone + .map(|text| text.into_owned()) .map_err(|err| err.into_io_error()) } } @@ -887,6 +892,8 @@ impl<'a> deno_package_json::fs::DenoPkgJsonFs for DenoPkgJsonFsAdapter<'a> { self .0 .read_text_file_lossy_sync(path, None) + // todo(https://github.com/denoland/deno_package_json/pull/9): don't clone + .map(|text| text.into_owned()) .map_err(|err| err.into_io_error()) } } diff --git a/ext/node/ops/require.rs b/ext/node/ops/require.rs index 64dc4423ae..1c204f54e8 100644 --- a/ext/node/ops/require.rs +++ b/ext/node/ops/require.rs @@ -8,6 +8,7 @@ use deno_core::v8; use deno_core::JsRuntimeInspector; use deno_core::OpState; use deno_fs::FileSystemRc; +use deno_fs::V8MaybeStaticStr; use deno_package_json::PackageJsonRc; use deno_path_util::normalize_path; use deno_path_util::url_from_file_path; @@ -477,11 +478,11 @@ where } #[op2(stack_trace)] -#[string] +#[to_v8] pub fn op_require_read_file

( state: &mut OpState, #[string] file_path: String, -) -> Result +) -> Result where P: NodePermissions + 'static, { @@ -492,6 +493,7 @@ where let loader = state.borrow::(); loader .load_text_file_lossy(&file_path) + .map(V8MaybeStaticStr) .map_err(|e| RequireErrorKind::ReadModule(e).into_box()) } diff --git a/resolvers/deno/fs.rs b/resolvers/deno/fs.rs index 4929f4508e..f2021a73a9 100644 --- a/resolvers/deno/fs.rs +++ b/resolvers/deno/fs.rs @@ -1,5 +1,6 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. +use std::borrow::Cow; use std::path::Path; use std::path::PathBuf; @@ -10,7 +11,10 @@ pub struct DirEntry { } pub trait DenoResolverFs { - fn read_to_string_lossy(&self, path: &Path) -> std::io::Result; + fn read_to_string_lossy( + &self, + path: &Path, + ) -> std::io::Result>; fn realpath_sync(&self, path: &Path) -> std::io::Result; fn exists_sync(&self, path: &Path) -> bool; fn is_dir_sync(&self, path: &Path) -> bool;