mirror of
https://github.com/denoland/deno.git
synced 2024-11-21 15:04:11 -05:00
fix(ext/node): lossy UTF-8 read node_modules files (#24140)
Previously various reads of files in `node_modules` would error on invalid UTF-8. These were cases involving: - reading package.json from Rust - reading package.json from JS - reading CommonJS files from JS - reading CommonJS files from Rust (for ESM translation) - reading ESM files from Rust
This commit is contained in:
parent
22d34f7012
commit
c1f23c5788
26 changed files with 112 additions and 33 deletions
|
@ -125,7 +125,7 @@ impl CjsCodeAnalyzer for CliCjsCodeAnalyzer {
|
||||||
None => {
|
None => {
|
||||||
self
|
self
|
||||||
.fs
|
.fs
|
||||||
.read_text_file_async(specifier.to_file_path().unwrap(), None)
|
.read_text_file_lossy_async(specifier.to_file_path().unwrap(), None)
|
||||||
.await?
|
.await?
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -320,7 +320,12 @@ impl NpmModuleLoader {
|
||||||
|
|
||||||
let code = if self.cjs_resolutions.contains(specifier) {
|
let code = if self.cjs_resolutions.contains(specifier) {
|
||||||
// translate cjs to esm if it's cjs and inject node globals
|
// translate cjs to esm if it's cjs and inject node globals
|
||||||
let code = String::from_utf8(code)?;
|
let code = match String::from_utf8_lossy(&code) {
|
||||||
|
Cow::Owned(code) => code,
|
||||||
|
// SAFETY: `String::from_utf8_lossy` guarantees that the result is valid
|
||||||
|
// UTF-8 if `Cow::Borrowed` is returned.
|
||||||
|
Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(code) },
|
||||||
|
};
|
||||||
ModuleSourceCode::String(
|
ModuleSourceCode::String(
|
||||||
self
|
self
|
||||||
.node_code_translator
|
.node_code_translator
|
||||||
|
|
|
@ -105,7 +105,7 @@ impl GitIgnoreTree {
|
||||||
});
|
});
|
||||||
let current = self
|
let current = self
|
||||||
.fs
|
.fs
|
||||||
.read_text_file_sync(&dir_path.join(".gitignore"), None)
|
.read_text_file_lossy_sync(&dir_path.join(".gitignore"), None)
|
||||||
.ok()
|
.ok()
|
||||||
.and_then(|text| {
|
.and_then(|text| {
|
||||||
let mut builder = ignore::gitignore::GitignoreBuilder::new(dir_path);
|
let mut builder = ignore::gitignore::GitignoreBuilder::new(dir_path);
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
|
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
|
||||||
|
|
||||||
|
use std::borrow::Cow;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
|
@ -284,24 +285,32 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync {
|
||||||
self.stat_sync(path).is_ok()
|
self.stat_sync(path).is_ok()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_text_file_sync(
|
fn read_text_file_lossy_sync(
|
||||||
&self,
|
&self,
|
||||||
path: &Path,
|
path: &Path,
|
||||||
access_check: Option<AccessCheckCb>,
|
access_check: Option<AccessCheckCb>,
|
||||||
) -> FsResult<String> {
|
) -> FsResult<String> {
|
||||||
let buf = self.read_file_sync(path, access_check)?;
|
let buf = self.read_file_sync(path, access_check)?;
|
||||||
String::from_utf8(buf).map_err(|err| {
|
Ok(string_from_utf8_lossy(buf))
|
||||||
std::io::Error::new(std::io::ErrorKind::InvalidData, err).into()
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
async fn read_text_file_async<'a>(
|
async fn read_text_file_lossy_async<'a>(
|
||||||
&'a self,
|
&'a self,
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
access_check: Option<AccessCheckCb<'a>>,
|
access_check: Option<AccessCheckCb<'a>>,
|
||||||
) -> FsResult<String> {
|
) -> FsResult<String> {
|
||||||
let buf = self.read_file_async(path, access_check).await?;
|
let buf = self.read_file_async(path, access_check).await?;
|
||||||
String::from_utf8(buf).map_err(|err| {
|
Ok(string_from_utf8_lossy(buf))
|
||||||
std::io::Error::new(std::io::ErrorKind::InvalidData, err).into()
|
}
|
||||||
})
|
}
|
||||||
|
|
||||||
|
// Like String::from_utf8_lossy but operates on owned values
|
||||||
|
#[inline(always)]
|
||||||
|
fn string_from_utf8_lossy(buf: Vec<u8>) -> String {
|
||||||
|
match String::from_utf8_lossy(&buf) {
|
||||||
|
// buf contained non-utf8 chars than have been patched
|
||||||
|
Cow::Owned(s) => s,
|
||||||
|
// SAFETY: if Borrowed then the buf only contains utf8 chars,
|
||||||
|
// we do this instead of .into_owned() to avoid copying the input buf
|
||||||
|
Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(buf) },
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
|
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
|
||||||
|
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::SeekFrom;
|
use std::io::SeekFrom;
|
||||||
|
@ -1333,11 +1332,11 @@ where
|
||||||
let fs = state.borrow::<FileSystemRc>().clone();
|
let fs = state.borrow::<FileSystemRc>().clone();
|
||||||
let mut access_check =
|
let mut access_check =
|
||||||
sync_permission_check::<P>(state.borrow_mut(), "Deno.readFileSync()");
|
sync_permission_check::<P>(state.borrow_mut(), "Deno.readFileSync()");
|
||||||
let buf = fs
|
let str = fs
|
||||||
.read_file_sync(&path, Some(&mut access_check))
|
.read_text_file_lossy_sync(&path, Some(&mut access_check))
|
||||||
.map_err(|error| map_permission_error("readfile", error, &path))?;
|
.map_err(|error| map_permission_error("readfile", error, &path))?;
|
||||||
|
|
||||||
Ok(string_from_utf8_lossy(buf))
|
Ok(str)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[op2(async)]
|
#[op2(async)]
|
||||||
|
@ -1361,9 +1360,10 @@ where
|
||||||
(state.borrow::<FileSystemRc>().clone(), cancel_handle)
|
(state.borrow::<FileSystemRc>().clone(), cancel_handle)
|
||||||
};
|
};
|
||||||
|
|
||||||
let fut = fs.read_file_async(path.clone(), Some(&mut access_check));
|
let fut =
|
||||||
|
fs.read_text_file_lossy_async(path.clone(), Some(&mut access_check));
|
||||||
|
|
||||||
let buf = if let Some(cancel_handle) = cancel_handle {
|
let str = if let Some(cancel_handle) = cancel_handle {
|
||||||
let res = fut.or_cancel(cancel_handle).await;
|
let res = fut.or_cancel(cancel_handle).await;
|
||||||
|
|
||||||
if let Some(cancel_rid) = cancel_rid {
|
if let Some(cancel_rid) = cancel_rid {
|
||||||
|
@ -1379,18 +1379,7 @@ where
|
||||||
.map_err(|error| map_permission_error("readfile", error, &path))?
|
.map_err(|error| map_permission_error("readfile", error, &path))?
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(string_from_utf8_lossy(buf))
|
Ok(str)
|
||||||
}
|
|
||||||
|
|
||||||
// Like String::from_utf8_lossy but operates on owned values
|
|
||||||
fn string_from_utf8_lossy(buf: Vec<u8>) -> String {
|
|
||||||
match String::from_utf8_lossy(&buf) {
|
|
||||||
// buf contained non-utf8 chars than have been patched
|
|
||||||
Cow::Owned(s) => s,
|
|
||||||
// SAFETY: if Borrowed then the buf only contains utf8 chars,
|
|
||||||
// we do this instead of .into_owned() to avoid copying the input buf
|
|
||||||
Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(buf) },
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn to_seek_from(offset: i64, whence: i32) -> Result<SeekFrom, AnyError> {
|
fn to_seek_from(offset: i64, whence: i32) -> Result<SeekFrom, AnyError> {
|
||||||
|
|
|
@ -451,7 +451,7 @@ where
|
||||||
let file_path = PathBuf::from(file_path);
|
let file_path = PathBuf::from(file_path);
|
||||||
ensure_read_permission::<P>(state, &file_path)?;
|
ensure_read_permission::<P>(state, &file_path)?;
|
||||||
let fs = state.borrow::<FileSystemRc>();
|
let fs = state.borrow::<FileSystemRc>();
|
||||||
Ok(fs.read_text_file_sync(&file_path, None)?)
|
Ok(fs.read_text_file_lossy_sync(&file_path, None)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[op2]
|
#[op2]
|
||||||
|
|
|
@ -82,7 +82,7 @@ impl PackageJson {
|
||||||
return Ok(CACHE.with(|cache| cache.borrow()[&path].clone()));
|
return Ok(CACHE.with(|cache| cache.borrow()[&path].clone()));
|
||||||
}
|
}
|
||||||
|
|
||||||
let source = match fs.read_text_file_sync(&path, None) {
|
let source = match fs.read_text_file_lossy_sync(&path, None) {
|
||||||
Ok(source) => source,
|
Ok(source) => source,
|
||||||
Err(err) if err.kind() == ErrorKind::NotFound => {
|
Err(err) if err.kind() == ErrorKind::NotFound => {
|
||||||
return Ok(Rc::new(PackageJson::empty(path)));
|
return Ok(Rc::new(PackageJson::empty(path)));
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
export default 'þþÿÿ';
|
|
@ -0,0 +1,6 @@
|
||||||
|
{
|
||||||
|
"name": "@denotest/lossy-utf8-script",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"type": "module",
|
||||||
|
"dependencies": {}
|
||||||
|
}
|
|
@ -0,0 +1 @@
|
||||||
|
export default "hello";
|
|
@ -0,0 +1,7 @@
|
||||||
|
{
|
||||||
|
"name": "@denotest/lossy-utf8-package-json",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"type": "module",
|
||||||
|
"dependencies": {},
|
||||||
|
"files": ["þþÿÿ"]
|
||||||
|
}
|
|
@ -0,0 +1 @@
|
||||||
|
module.exports = 'þþÿÿ';
|
|
@ -0,0 +1,6 @@
|
||||||
|
{
|
||||||
|
"name": "@denotest/lossy-utf8-script",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"type": "commonjs",
|
||||||
|
"dependencies": {}
|
||||||
|
}
|
5
tests/specs/npm/lossy_utf8_module/__test__.jsonc
Normal file
5
tests/specs/npm/lossy_utf8_module/__test__.jsonc
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
{
|
||||||
|
"args": "run main.mjs",
|
||||||
|
"output": "main.out",
|
||||||
|
"exitCode": 0
|
||||||
|
}
|
3
tests/specs/npm/lossy_utf8_module/main.mjs
Normal file
3
tests/specs/npm/lossy_utf8_module/main.mjs
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
import mod from "npm:@denotest/lossy-utf8-module@1.0.0";
|
||||||
|
|
||||||
|
console.log(mod);
|
3
tests/specs/npm/lossy_utf8_module/main.out
Normal file
3
tests/specs/npm/lossy_utf8_module/main.out
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
Download http://localhost:4260/@denotest/lossy-utf8-module
|
||||||
|
Download http://localhost:4260/@denotest/lossy-utf8-module/1.0.0.tgz
|
||||||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
5
tests/specs/npm/lossy_utf8_package_json/__test__.jsonc
Normal file
5
tests/specs/npm/lossy_utf8_package_json/__test__.jsonc
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
{
|
||||||
|
"args": "run main.mjs",
|
||||||
|
"output": "main.out",
|
||||||
|
"exitCode": 0
|
||||||
|
}
|
3
tests/specs/npm/lossy_utf8_package_json/main.mjs
Normal file
3
tests/specs/npm/lossy_utf8_package_json/main.mjs
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
import mod from "npm:@denotest/lossy-utf8-package-json@1.0.0";
|
||||||
|
|
||||||
|
console.log(mod);
|
3
tests/specs/npm/lossy_utf8_package_json/main.out
Normal file
3
tests/specs/npm/lossy_utf8_package_json/main.out
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
Download http://localhost:4260/@denotest/lossy-utf8-package-json
|
||||||
|
Download http://localhost:4260/@denotest/lossy-utf8-package-json/1.0.0.tgz
|
||||||
|
hello
|
5
tests/specs/npm/lossy_utf8_script/__test__.jsonc
Normal file
5
tests/specs/npm/lossy_utf8_script/__test__.jsonc
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
{
|
||||||
|
"args": "run main.mjs",
|
||||||
|
"output": "main.out",
|
||||||
|
"exitCode": 0
|
||||||
|
}
|
3
tests/specs/npm/lossy_utf8_script/main.mjs
Normal file
3
tests/specs/npm/lossy_utf8_script/main.mjs
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
import mod from "npm:@denotest/lossy-utf8-script@1.0.0";
|
||||||
|
|
||||||
|
console.log(mod);
|
3
tests/specs/npm/lossy_utf8_script/main.out
Normal file
3
tests/specs/npm/lossy_utf8_script/main.out
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
Download http://localhost:4260/@denotest/lossy-utf8-script
|
||||||
|
Download http://localhost:4260/@denotest/lossy-utf8-script/1.0.0.tgz
|
||||||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
@ -0,0 +1,6 @@
|
||||||
|
{
|
||||||
|
"args": "run --node-modules-dir --allow-read main.mjs",
|
||||||
|
"output": "main.out",
|
||||||
|
"exitCode": 0,
|
||||||
|
"tempDir": true
|
||||||
|
}
|
10
tests/specs/npm/lossy_utf8_script_from_cjs/main.mjs
Normal file
10
tests/specs/npm/lossy_utf8_script_from_cjs/main.mjs
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
import { createRequire } from "node:module";
|
||||||
|
|
||||||
|
// Import this so that deno_graph knows to download this file.
|
||||||
|
if (false) import("npm:@denotest/lossy-utf8-script@1.0.0");
|
||||||
|
|
||||||
|
const require = createRequire(import.meta.url);
|
||||||
|
|
||||||
|
const mod = require("@denotest/lossy-utf8-script");
|
||||||
|
|
||||||
|
console.log(mod);
|
4
tests/specs/npm/lossy_utf8_script_from_cjs/main.out
Normal file
4
tests/specs/npm/lossy_utf8_script_from_cjs/main.out
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
Download http://localhost:4260/@denotest/lossy-utf8-script
|
||||||
|
Download http://localhost:4260/@denotest/lossy-utf8-script/1.0.0.tgz
|
||||||
|
Initialize @denotest/lossy-utf8-script@1.0.0
|
||||||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
@ -226,10 +226,11 @@ fn get_npm_package(
|
||||||
|
|
||||||
tarballs.insert(version.clone(), tarball_bytes);
|
tarballs.insert(version.clone(), tarball_bytes);
|
||||||
let package_json_path = version_folder.join("package.json");
|
let package_json_path = version_folder.join("package.json");
|
||||||
let package_json_text = fs::read_to_string(&package_json_path)
|
let package_json_bytes =
|
||||||
.with_context(|| {
|
fs::read(&package_json_path).with_context(|| {
|
||||||
format!("Error reading package.json at {}", package_json_path)
|
format!("Error reading package.json at {}", package_json_path)
|
||||||
})?;
|
})?;
|
||||||
|
let package_json_text = String::from_utf8_lossy(&package_json_bytes);
|
||||||
let mut version_info: serde_json::Map<String, serde_json::Value> =
|
let mut version_info: serde_json::Map<String, serde_json::Value> =
|
||||||
serde_json::from_str(&package_json_text)?;
|
serde_json::from_str(&package_json_text)?;
|
||||||
version_info.insert("dist".to_string(), dist.into());
|
version_info.insert("dist".to_string(), dist.into());
|
||||||
|
|
Loading…
Reference in a new issue