From c1f23c578881b85ae79b524a60160d8f4fb7151b Mon Sep 17 00:00:00 2001 From: Luca Casonato Date: Sat, 8 Jun 2024 18:36:13 +0200 Subject: [PATCH] fix(ext/node): lossy UTF-8 read node_modules files (#24140) Previously various reads of files in `node_modules` would error on invalid UTF-8. These were cases involving: - reading package.json from Rust - reading package.json from JS - reading CommonJS files from JS - reading CommonJS files from Rust (for ESM translation) - reading ESM files from Rust --- cli/node.rs | 2 +- cli/resolver.rs | 7 +++++- cli/util/gitignore.rs | 2 +- ext/fs/interface.rs | 25 +++++++++++++------ ext/fs/ops.rs | 25 ++++++------------- ext/node/ops/require.rs | 2 +- ext/node/package_json.rs | 2 +- .../lossy-utf8-module/1.0.0/index.js | 1 + .../lossy-utf8-module/1.0.0/package.json | 6 +++++ .../lossy-utf8-package-json/1.0.0/index.js | 1 + .../1.0.0/package.json | 7 ++++++ .../lossy-utf8-script/1.0.0/index.js | 1 + .../lossy-utf8-script/1.0.0/package.json | 6 +++++ .../npm/lossy_utf8_module/__test__.jsonc | 5 ++++ tests/specs/npm/lossy_utf8_module/main.mjs | 3 +++ tests/specs/npm/lossy_utf8_module/main.out | 3 +++ .../lossy_utf8_package_json/__test__.jsonc | 5 ++++ .../npm/lossy_utf8_package_json/main.mjs | 3 +++ .../npm/lossy_utf8_package_json/main.out | 3 +++ .../npm/lossy_utf8_script/__test__.jsonc | 5 ++++ tests/specs/npm/lossy_utf8_script/main.mjs | 3 +++ tests/specs/npm/lossy_utf8_script/main.out | 3 +++ .../lossy_utf8_script_from_cjs/__test__.jsonc | 6 +++++ .../npm/lossy_utf8_script_from_cjs/main.mjs | 10 ++++++++ .../npm/lossy_utf8_script_from_cjs/main.out | 4 +++ tests/util/server/src/npm.rs | 5 ++-- 26 files changed, 112 insertions(+), 33 deletions(-) create mode 100644 tests/registry/npm/@denotest/lossy-utf8-module/1.0.0/index.js create mode 100644 tests/registry/npm/@denotest/lossy-utf8-module/1.0.0/package.json create mode 100644 tests/registry/npm/@denotest/lossy-utf8-package-json/1.0.0/index.js create mode 100644 tests/registry/npm/@denotest/lossy-utf8-package-json/1.0.0/package.json create mode 100644 tests/registry/npm/@denotest/lossy-utf8-script/1.0.0/index.js create mode 100644 tests/registry/npm/@denotest/lossy-utf8-script/1.0.0/package.json create mode 100644 tests/specs/npm/lossy_utf8_module/__test__.jsonc create mode 100644 tests/specs/npm/lossy_utf8_module/main.mjs create mode 100644 tests/specs/npm/lossy_utf8_module/main.out create mode 100644 tests/specs/npm/lossy_utf8_package_json/__test__.jsonc create mode 100644 tests/specs/npm/lossy_utf8_package_json/main.mjs create mode 100644 tests/specs/npm/lossy_utf8_package_json/main.out create mode 100644 tests/specs/npm/lossy_utf8_script/__test__.jsonc create mode 100644 tests/specs/npm/lossy_utf8_script/main.mjs create mode 100644 tests/specs/npm/lossy_utf8_script/main.out create mode 100644 tests/specs/npm/lossy_utf8_script_from_cjs/__test__.jsonc create mode 100644 tests/specs/npm/lossy_utf8_script_from_cjs/main.mjs create mode 100644 tests/specs/npm/lossy_utf8_script_from_cjs/main.out diff --git a/cli/node.rs b/cli/node.rs index c696fcac93..5ecbacdc72 100644 --- a/cli/node.rs +++ b/cli/node.rs @@ -125,7 +125,7 @@ impl CjsCodeAnalyzer for CliCjsCodeAnalyzer { None => { self .fs - .read_text_file_async(specifier.to_file_path().unwrap(), None) + .read_text_file_lossy_async(specifier.to_file_path().unwrap(), None) .await? } }; diff --git a/cli/resolver.rs b/cli/resolver.rs index 301cd0666d..3edc6f4292 100644 --- a/cli/resolver.rs +++ b/cli/resolver.rs @@ -320,7 +320,12 @@ impl NpmModuleLoader { let code = if self.cjs_resolutions.contains(specifier) { // translate cjs to esm if it's cjs and inject node globals - let code = String::from_utf8(code)?; + let code = match String::from_utf8_lossy(&code) { + Cow::Owned(code) => code, + // SAFETY: `String::from_utf8_lossy` guarantees that the result is valid + // UTF-8 if `Cow::Borrowed` is returned. + Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(code) }, + }; ModuleSourceCode::String( self .node_code_translator diff --git a/cli/util/gitignore.rs b/cli/util/gitignore.rs index 12a450d64e..4538e0912f 100644 --- a/cli/util/gitignore.rs +++ b/cli/util/gitignore.rs @@ -105,7 +105,7 @@ impl GitIgnoreTree { }); let current = self .fs - .read_text_file_sync(&dir_path.join(".gitignore"), None) + .read_text_file_lossy_sync(&dir_path.join(".gitignore"), None) .ok() .and_then(|text| { let mut builder = ignore::gitignore::GitignoreBuilder::new(dir_path); diff --git a/ext/fs/interface.rs b/ext/fs/interface.rs index 70f9fdf636..5031dc1349 100644 --- a/ext/fs/interface.rs +++ b/ext/fs/interface.rs @@ -1,5 +1,6 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. +use std::borrow::Cow; use std::path::Path; use std::path::PathBuf; use std::rc::Rc; @@ -284,24 +285,32 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync { self.stat_sync(path).is_ok() } - fn read_text_file_sync( + fn read_text_file_lossy_sync( &self, path: &Path, access_check: Option, ) -> FsResult { let buf = self.read_file_sync(path, access_check)?; - String::from_utf8(buf).map_err(|err| { - std::io::Error::new(std::io::ErrorKind::InvalidData, err).into() - }) + Ok(string_from_utf8_lossy(buf)) } - async fn read_text_file_async<'a>( + async fn read_text_file_lossy_async<'a>( &'a self, path: PathBuf, access_check: Option>, ) -> FsResult { let buf = self.read_file_async(path, access_check).await?; - String::from_utf8(buf).map_err(|err| { - std::io::Error::new(std::io::ErrorKind::InvalidData, err).into() - }) + Ok(string_from_utf8_lossy(buf)) + } +} + +// Like String::from_utf8_lossy but operates on owned values +#[inline(always)] +fn string_from_utf8_lossy(buf: Vec) -> String { + match String::from_utf8_lossy(&buf) { + // buf contained non-utf8 chars than have been patched + Cow::Owned(s) => s, + // SAFETY: if Borrowed then the buf only contains utf8 chars, + // we do this instead of .into_owned() to avoid copying the input buf + Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(buf) }, } } diff --git a/ext/fs/ops.rs b/ext/fs/ops.rs index 8e715d8258..57b0e0b9e0 100644 --- a/ext/fs/ops.rs +++ b/ext/fs/ops.rs @@ -1,6 +1,5 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. -use std::borrow::Cow; use std::cell::RefCell; use std::io; use std::io::SeekFrom; @@ -1333,11 +1332,11 @@ where let fs = state.borrow::().clone(); let mut access_check = sync_permission_check::

(state.borrow_mut(), "Deno.readFileSync()"); - let buf = fs - .read_file_sync(&path, Some(&mut access_check)) + let str = fs + .read_text_file_lossy_sync(&path, Some(&mut access_check)) .map_err(|error| map_permission_error("readfile", error, &path))?; - Ok(string_from_utf8_lossy(buf)) + Ok(str) } #[op2(async)] @@ -1361,9 +1360,10 @@ where (state.borrow::().clone(), cancel_handle) }; - let fut = fs.read_file_async(path.clone(), Some(&mut access_check)); + let fut = + fs.read_text_file_lossy_async(path.clone(), Some(&mut access_check)); - let buf = if let Some(cancel_handle) = cancel_handle { + let str = if let Some(cancel_handle) = cancel_handle { let res = fut.or_cancel(cancel_handle).await; if let Some(cancel_rid) = cancel_rid { @@ -1379,18 +1379,7 @@ where .map_err(|error| map_permission_error("readfile", error, &path))? }; - Ok(string_from_utf8_lossy(buf)) -} - -// Like String::from_utf8_lossy but operates on owned values -fn string_from_utf8_lossy(buf: Vec) -> String { - match String::from_utf8_lossy(&buf) { - // buf contained non-utf8 chars than have been patched - Cow::Owned(s) => s, - // SAFETY: if Borrowed then the buf only contains utf8 chars, - // we do this instead of .into_owned() to avoid copying the input buf - Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(buf) }, - } + Ok(str) } fn to_seek_from(offset: i64, whence: i32) -> Result { diff --git a/ext/node/ops/require.rs b/ext/node/ops/require.rs index de26870013..3e1f1c6ae1 100644 --- a/ext/node/ops/require.rs +++ b/ext/node/ops/require.rs @@ -451,7 +451,7 @@ where let file_path = PathBuf::from(file_path); ensure_read_permission::

(state, &file_path)?; let fs = state.borrow::(); - Ok(fs.read_text_file_sync(&file_path, None)?) + Ok(fs.read_text_file_lossy_sync(&file_path, None)?) } #[op2] diff --git a/ext/node/package_json.rs b/ext/node/package_json.rs index adae7d6344..a19a2d64d8 100644 --- a/ext/node/package_json.rs +++ b/ext/node/package_json.rs @@ -82,7 +82,7 @@ impl PackageJson { return Ok(CACHE.with(|cache| cache.borrow()[&path].clone())); } - let source = match fs.read_text_file_sync(&path, None) { + let source = match fs.read_text_file_lossy_sync(&path, None) { Ok(source) => source, Err(err) if err.kind() == ErrorKind::NotFound => { return Ok(Rc::new(PackageJson::empty(path))); diff --git a/tests/registry/npm/@denotest/lossy-utf8-module/1.0.0/index.js b/tests/registry/npm/@denotest/lossy-utf8-module/1.0.0/index.js new file mode 100644 index 0000000000..411f3c372e --- /dev/null +++ b/tests/registry/npm/@denotest/lossy-utf8-module/1.0.0/index.js @@ -0,0 +1 @@ +export default 'þþÿÿ'; \ No newline at end of file diff --git a/tests/registry/npm/@denotest/lossy-utf8-module/1.0.0/package.json b/tests/registry/npm/@denotest/lossy-utf8-module/1.0.0/package.json new file mode 100644 index 0000000000..1260655fba --- /dev/null +++ b/tests/registry/npm/@denotest/lossy-utf8-module/1.0.0/package.json @@ -0,0 +1,6 @@ +{ + "name": "@denotest/lossy-utf8-script", + "version": "1.0.0", + "type": "module", + "dependencies": {} +} diff --git a/tests/registry/npm/@denotest/lossy-utf8-package-json/1.0.0/index.js b/tests/registry/npm/@denotest/lossy-utf8-package-json/1.0.0/index.js new file mode 100644 index 0000000000..34b58e6e12 --- /dev/null +++ b/tests/registry/npm/@denotest/lossy-utf8-package-json/1.0.0/index.js @@ -0,0 +1 @@ +export default "hello"; diff --git a/tests/registry/npm/@denotest/lossy-utf8-package-json/1.0.0/package.json b/tests/registry/npm/@denotest/lossy-utf8-package-json/1.0.0/package.json new file mode 100644 index 0000000000..5ca42d0d12 --- /dev/null +++ b/tests/registry/npm/@denotest/lossy-utf8-package-json/1.0.0/package.json @@ -0,0 +1,7 @@ +{ + "name": "@denotest/lossy-utf8-package-json", + "version": "1.0.0", + "type": "module", + "dependencies": {}, + "files": ["þþÿÿ"] +} \ No newline at end of file diff --git a/tests/registry/npm/@denotest/lossy-utf8-script/1.0.0/index.js b/tests/registry/npm/@denotest/lossy-utf8-script/1.0.0/index.js new file mode 100644 index 0000000000..adbfd382a7 --- /dev/null +++ b/tests/registry/npm/@denotest/lossy-utf8-script/1.0.0/index.js @@ -0,0 +1 @@ +module.exports = 'þþÿÿ'; \ No newline at end of file diff --git a/tests/registry/npm/@denotest/lossy-utf8-script/1.0.0/package.json b/tests/registry/npm/@denotest/lossy-utf8-script/1.0.0/package.json new file mode 100644 index 0000000000..e23605945c --- /dev/null +++ b/tests/registry/npm/@denotest/lossy-utf8-script/1.0.0/package.json @@ -0,0 +1,6 @@ +{ + "name": "@denotest/lossy-utf8-script", + "version": "1.0.0", + "type": "commonjs", + "dependencies": {} +} diff --git a/tests/specs/npm/lossy_utf8_module/__test__.jsonc b/tests/specs/npm/lossy_utf8_module/__test__.jsonc new file mode 100644 index 0000000000..1ee7d2d6ce --- /dev/null +++ b/tests/specs/npm/lossy_utf8_module/__test__.jsonc @@ -0,0 +1,5 @@ +{ + "args": "run main.mjs", + "output": "main.out", + "exitCode": 0 +} diff --git a/tests/specs/npm/lossy_utf8_module/main.mjs b/tests/specs/npm/lossy_utf8_module/main.mjs new file mode 100644 index 0000000000..54cfbb16ae --- /dev/null +++ b/tests/specs/npm/lossy_utf8_module/main.mjs @@ -0,0 +1,3 @@ +import mod from "npm:@denotest/lossy-utf8-module@1.0.0"; + +console.log(mod); diff --git a/tests/specs/npm/lossy_utf8_module/main.out b/tests/specs/npm/lossy_utf8_module/main.out new file mode 100644 index 0000000000..0e96f9ebb3 --- /dev/null +++ b/tests/specs/npm/lossy_utf8_module/main.out @@ -0,0 +1,3 @@ +Download http://localhost:4260/@denotest/lossy-utf8-module +Download http://localhost:4260/@denotest/lossy-utf8-module/1.0.0.tgz +���� diff --git a/tests/specs/npm/lossy_utf8_package_json/__test__.jsonc b/tests/specs/npm/lossy_utf8_package_json/__test__.jsonc new file mode 100644 index 0000000000..1ee7d2d6ce --- /dev/null +++ b/tests/specs/npm/lossy_utf8_package_json/__test__.jsonc @@ -0,0 +1,5 @@ +{ + "args": "run main.mjs", + "output": "main.out", + "exitCode": 0 +} diff --git a/tests/specs/npm/lossy_utf8_package_json/main.mjs b/tests/specs/npm/lossy_utf8_package_json/main.mjs new file mode 100644 index 0000000000..9a63eb604f --- /dev/null +++ b/tests/specs/npm/lossy_utf8_package_json/main.mjs @@ -0,0 +1,3 @@ +import mod from "npm:@denotest/lossy-utf8-package-json@1.0.0"; + +console.log(mod); diff --git a/tests/specs/npm/lossy_utf8_package_json/main.out b/tests/specs/npm/lossy_utf8_package_json/main.out new file mode 100644 index 0000000000..99aa5ab61f --- /dev/null +++ b/tests/specs/npm/lossy_utf8_package_json/main.out @@ -0,0 +1,3 @@ +Download http://localhost:4260/@denotest/lossy-utf8-package-json +Download http://localhost:4260/@denotest/lossy-utf8-package-json/1.0.0.tgz +hello diff --git a/tests/specs/npm/lossy_utf8_script/__test__.jsonc b/tests/specs/npm/lossy_utf8_script/__test__.jsonc new file mode 100644 index 0000000000..1ee7d2d6ce --- /dev/null +++ b/tests/specs/npm/lossy_utf8_script/__test__.jsonc @@ -0,0 +1,5 @@ +{ + "args": "run main.mjs", + "output": "main.out", + "exitCode": 0 +} diff --git a/tests/specs/npm/lossy_utf8_script/main.mjs b/tests/specs/npm/lossy_utf8_script/main.mjs new file mode 100644 index 0000000000..59fe555aae --- /dev/null +++ b/tests/specs/npm/lossy_utf8_script/main.mjs @@ -0,0 +1,3 @@ +import mod from "npm:@denotest/lossy-utf8-script@1.0.0"; + +console.log(mod); diff --git a/tests/specs/npm/lossy_utf8_script/main.out b/tests/specs/npm/lossy_utf8_script/main.out new file mode 100644 index 0000000000..180ecdf1c3 --- /dev/null +++ b/tests/specs/npm/lossy_utf8_script/main.out @@ -0,0 +1,3 @@ +Download http://localhost:4260/@denotest/lossy-utf8-script +Download http://localhost:4260/@denotest/lossy-utf8-script/1.0.0.tgz +���� diff --git a/tests/specs/npm/lossy_utf8_script_from_cjs/__test__.jsonc b/tests/specs/npm/lossy_utf8_script_from_cjs/__test__.jsonc new file mode 100644 index 0000000000..c8d353de00 --- /dev/null +++ b/tests/specs/npm/lossy_utf8_script_from_cjs/__test__.jsonc @@ -0,0 +1,6 @@ +{ + "args": "run --node-modules-dir --allow-read main.mjs", + "output": "main.out", + "exitCode": 0, + "tempDir": true +} diff --git a/tests/specs/npm/lossy_utf8_script_from_cjs/main.mjs b/tests/specs/npm/lossy_utf8_script_from_cjs/main.mjs new file mode 100644 index 0000000000..a9e70adfb8 --- /dev/null +++ b/tests/specs/npm/lossy_utf8_script_from_cjs/main.mjs @@ -0,0 +1,10 @@ +import { createRequire } from "node:module"; + +// Import this so that deno_graph knows to download this file. +if (false) import("npm:@denotest/lossy-utf8-script@1.0.0"); + +const require = createRequire(import.meta.url); + +const mod = require("@denotest/lossy-utf8-script"); + +console.log(mod); diff --git a/tests/specs/npm/lossy_utf8_script_from_cjs/main.out b/tests/specs/npm/lossy_utf8_script_from_cjs/main.out new file mode 100644 index 0000000000..4f062a2aef --- /dev/null +++ b/tests/specs/npm/lossy_utf8_script_from_cjs/main.out @@ -0,0 +1,4 @@ +Download http://localhost:4260/@denotest/lossy-utf8-script +Download http://localhost:4260/@denotest/lossy-utf8-script/1.0.0.tgz +Initialize @denotest/lossy-utf8-script@1.0.0 +���� diff --git a/tests/util/server/src/npm.rs b/tests/util/server/src/npm.rs index 66b7bddcd7..e7d8d96aba 100644 --- a/tests/util/server/src/npm.rs +++ b/tests/util/server/src/npm.rs @@ -226,10 +226,11 @@ fn get_npm_package( tarballs.insert(version.clone(), tarball_bytes); let package_json_path = version_folder.join("package.json"); - let package_json_text = fs::read_to_string(&package_json_path) - .with_context(|| { + let package_json_bytes = + fs::read(&package_json_path).with_context(|| { format!("Error reading package.json at {}", package_json_path) })?; + let package_json_text = String::from_utf8_lossy(&package_json_bytes); let mut version_info: serde_json::Map = serde_json::from_str(&package_json_text)?; version_info.insert("dist".to_string(), dist.into());