From ad77ba0f7b40760e04b79d9789da16d7c49010b8 Mon Sep 17 00:00:00 2001 From: Matt Mastracci Date: Thu, 23 Mar 2023 16:00:46 -0600 Subject: [PATCH] fix(core): panic at build time if extension code contains anything other than 7-bit ASCII (#18372) This will improve diagnostics and catch any non-ASCII extension code early. This will use `debug_assert!` rather than `assert!` to avoid runtime costs, and ensures (in debug_assert mode only) that all extension source files are ASCII as we load them. --- cli/tsc/00_typescript.js | 8 ++--- core/extensions.rs | 44 +++++++++++++++++------- core/modules.rs | 10 +++--- core/runtime.rs | 4 +-- ext/node/polyfills/internal/cli_table.ts | 5 --- runtime/build.rs | 2 +- 6 files changed, 44 insertions(+), 29 deletions(-) diff --git a/cli/tsc/00_typescript.js b/cli/tsc/00_typescript.js index a54ab1e321..6bbd968a92 100644 --- a/cli/tsc/00_typescript.js +++ b/cli/tsc/00_typescript.js @@ -128952,7 +128952,7 @@ ${lanes.join("\n")} // Adding or removing imports from node could change the outcome of that guess, so could change the suggestions list. typeAcquisitionEnabled && consumesNodeCoreModules(oldSourceFile) !== consumesNodeCoreModules(newSourceFile) || // Module agumentation and ambient module changes can add or remove exports available to be auto-imported. // Changes elsewhere in the file can change the *type* of an export in a module augmentation, - // but type info is gathered in getCompletionEntryDetails, which doesn’t use the cache. + // but type info is gathered in getCompletionEntryDetails, which doesn't use the cache. !arrayIsEqualTo(oldSourceFile.moduleAugmentations, newSourceFile.moduleAugmentations) || !ambientModuleDeclarationsAreEqual(oldSourceFile, newSourceFile)) { cache.clear(); return true; @@ -137732,7 +137732,7 @@ ${lanes.join("\n")} function symbolReferenceIsAlsoMissingAwait(reference, diagnostics, sourceFile, checker) { const errorNode = isPropertyAccessExpression(reference.parent) ? reference.parent.name : isBinaryExpression(reference.parent) ? reference.parent : reference; const diagnostic = find(diagnostics, (diagnostic2) => diagnostic2.start === errorNode.getStart(sourceFile) && diagnostic2.start + diagnostic2.length === errorNode.getEnd()); - return diagnostic && contains(errorCodes3, diagnostic.code) || // A Promise is usually not correct in a binary expression (it’s not valid + return diagnostic && contains(errorCodes3, diagnostic.code) || // A Promise is usually not correct in a binary expression (it's not valid // in an arithmetic expression and an equality comparison seems unusual), // but if the other side of the binary expression has an error, the side // is typed `any` which will squash the error that would identify this @@ -153491,7 +153491,7 @@ ${lanes.join("\n")} */ this.markSeenReExportRHS = nodeSeenTracker(); this.symbolIdToReferences = []; - // Source file ID → symbol ID → Whether the symbol has been searched for in the source file. + // Source file ID -> symbol ID -> Whether the symbol has been searched for in the source file. this.sourceFileToSeenSymbols = []; } includesSourceFile(sourceFile) { @@ -162610,7 +162610,7 @@ ${newComment.split("\n").map((c) => ` * ${c}`).join("\n")} if (start !== end) { const textSpan = createTextSpanFromBounds(start, end); if (!selectionRange || // Skip ranges that are identical to the parent - !textSpansEqual(textSpan, selectionRange.textSpan) && // Skip ranges that don’t contain the original position + !textSpansEqual(textSpan, selectionRange.textSpan) && // Skip ranges that don't contain the original position textSpanIntersectsWithPosition(textSpan, pos)) { selectionRange = { textSpan, ...selectionRange && { parent: selectionRange } }; } diff --git a/core/extensions.rs b/core/extensions.rs index 94c4a2a794..9b4fb203a1 100644 --- a/core/extensions.rs +++ b/core/extensions.rs @@ -23,23 +23,43 @@ pub enum ExtensionFileSourceCode { LoadedFromFsDuringSnapshot(PathBuf), } -impl ExtensionFileSourceCode { - pub fn load(&self) -> Result { - match self { - ExtensionFileSourceCode::IncludedInBinary(code) => Ok((*code).into()), - ExtensionFileSourceCode::LoadedFromFsDuringSnapshot(path) => { - let msg = || format!("Failed to read \"{}\"", path.display()); - Ok(std::fs::read_to_string(path).with_context(msg)?.into()) - } - } - } -} - #[derive(Clone, Debug)] pub struct ExtensionFileSource { pub specifier: &'static str, pub code: ExtensionFileSourceCode, } + +impl ExtensionFileSource { + fn find_non_ascii(s: &str) -> String { + s.chars().filter(|c| !c.is_ascii()).collect::() + } + + pub fn load(&self) -> Result { + match &self.code { + ExtensionFileSourceCode::IncludedInBinary(code) => { + debug_assert!( + code.is_ascii(), + "Extension code must be 7-bit ASCII: {} (found {})", + self.specifier, + Self::find_non_ascii(code) + ); + Ok((*code).into()) + } + ExtensionFileSourceCode::LoadedFromFsDuringSnapshot(path) => { + let msg = || format!("Failed to read \"{}\"", path.display()); + let s = std::fs::read_to_string(path).with_context(msg)?; + debug_assert!( + s.is_ascii(), + "Extension code must be 7-bit ASCII: {} (found {})", + self.specifier, + Self::find_non_ascii(&s) + ); + Ok(s.into()) + } + } + } +} + pub type OpFnRef = v8::FunctionCallback; pub type OpMiddlewareFn = dyn Fn(OpDecl) -> OpDecl; pub type OpStateFn = dyn FnOnce(&mut OpState); diff --git a/core/modules.rs b/core/modules.rs index 78efdedfdf..cfd68d245e 100644 --- a/core/modules.rs +++ b/core/modules.rs @@ -309,7 +309,7 @@ impl From> for ModuleCode { impl From<&'static str> for ModuleCode { #[inline(always)] fn from(value: &'static str) -> Self { - assert!(value.is_ascii()); + debug_assert!(value.is_ascii()); ModuleCode::Static(value.as_bytes()) } } @@ -331,7 +331,7 @@ impl From> for ModuleCode { impl From<&'static [u8]> for ModuleCode { #[inline(always)] fn from(value: &'static [u8]) -> Self { - assert!(value.is_ascii()); + debug_assert!(value.is_ascii()); ModuleCode::Static(value) } } @@ -339,7 +339,7 @@ impl From<&'static [u8]> for ModuleCode { impl From<&'static [u8; N]> for ModuleCode { #[inline(always)] fn from(value: &'static [u8; N]) -> Self { - assert!(value.is_ascii()); + debug_assert!(value.is_ascii()); ModuleCode::Static(value) } } @@ -583,7 +583,7 @@ impl ModuleLoader for ExtModuleLoader { let result = if let Some(load_callback) = &self.maybe_load_callback { load_callback(file_source) } else { - match file_source.code.load() { + match file_source.load() { Ok(code) => Ok(code), Err(err) => return futures::future::err(err).boxed_local(), } @@ -1517,7 +1517,7 @@ impl ModuleMap { ) -> Option> { match name { ModuleName::Static(s) => { - assert!(s.is_ascii()); + debug_assert!(s.is_ascii()); v8::String::new_external_onebyte_static(scope, s.as_bytes()) } ModuleName::NotStatic(s) => v8::String::new(scope, s), diff --git a/core/runtime.rs b/core/runtime.rs index c5afbda536..787bac9727 100644 --- a/core/runtime.rs +++ b/core/runtime.rs @@ -751,7 +751,7 @@ impl JsRuntime { realm.execute_script( self.v8_isolate(), file_source.specifier, - file_source.code.load()?, + file_source.load()?, )?; } } @@ -2544,7 +2544,7 @@ impl JsRealm { let scope = &mut self.handle_scope(isolate); let source = Self::string_from_code(scope, &source_code).unwrap(); - assert!(name.is_ascii()); + debug_assert!(name.is_ascii()); let name = v8::String::new_external_onebyte_static(scope, name.as_bytes()).unwrap(); let origin = bindings::script_origin(scope, name); diff --git a/ext/node/polyfills/internal/cli_table.ts b/ext/node/polyfills/internal/cli_table.ts index a3740d8892..b3523fffa5 100644 --- a/ext/node/polyfills/internal/cli_table.ts +++ b/ext/node/polyfills/internal/cli_table.ts @@ -3,11 +3,6 @@ import { getStringWidth } from "ext:deno_node/internal/util/inspect.mjs"; -// The use of Unicode characters below is the only non-comment use of non-ASCII -// Unicode characters in Node.js built-in modules. If they are ever removed or -// rewritten with \u escapes, then a test will need to be (re-)added to Node.js -// core to verify that Unicode characters work in built-ins. -// Refs: https://github.com/nodejs/node/issues/10673 const tableChars = { middleMiddle: "\u2500", rowMiddle: "\u253c", diff --git a/runtime/build.rs b/runtime/build.rs index 5d0ba0cc7c..df20c54277 100644 --- a/runtime/build.rs +++ b/runtime/build.rs @@ -34,7 +34,7 @@ mod startup_snapshot { file_source.specifier ), }; - let code = file_source.code.load()?; + let code = file_source.load()?; if !should_transpile { return Ok(code);