diff --git a/.dprint.json b/.dprint.json index 91c827fc1a..a0f502b943 100644 --- a/.dprint.json +++ b/.dprint.json @@ -25,6 +25,7 @@ "cli/tests/testdata/inline_js_source_map*", "cli/tests/testdata/badly_formatted.md", "cli/tests/testdata/badly_formatted.json", + "cli/tests/testdata/byte_order_mark.ts", "cli/tsc/*typescript.js", "test_util/std", "test_util/wpt", diff --git a/cli/ast/mod.rs b/cli/ast/mod.rs index 15414ba8ec..232db13055 100644 --- a/cli/ast/mod.rs +++ b/cli/ast/mod.rs @@ -2,6 +2,7 @@ use crate::config_file; use crate::media_type::MediaType; +use crate::text_encoding::strip_bom; use deno_core::error::AnyError; use deno_core::resolve_url_or_path; @@ -392,10 +393,15 @@ pub fn parse( source: &str, media_type: &MediaType, ) -> Result { + let source = strip_bom(source); let info = SourceFileInfo::new(specifier, source); let input = StringInput::new(source, BytePos(0), BytePos(source.len() as u32)); - let (comments, module) = parse_string_input(&info, input, media_type)?; + let (comments, module) = + parse_string_input(input, media_type).map_err(|err| Diagnostic { + location: info.get_location(err.span().lo), + message: err.into_kind().msg().to_string(), + })?; Ok(ParsedModule { info: Arc::new(info), @@ -468,13 +474,17 @@ pub fn transpile_module( globals: &Globals, cm: Rc, ) -> Result<(Rc, Module), AnyError> { - let info = SourceFileInfo::new(specifier, source); + let source = strip_bom(source); let source_file = cm.new_source_file( FileName::Custom(specifier.to_string()), source.to_string(), ); let input = StringInput::from(&*source_file); - let (comments, module) = parse_string_input(&info, input, media_type)?; + let (comments, module) = + parse_string_input(input, media_type).map_err(|err| Diagnostic { + location: cm.lookup_char_pos(err.span().lo).into(), + message: err.into_kind().msg().to_string(), + })?; let jsx_pass = react::react( cm, @@ -511,19 +521,17 @@ pub fn transpile_module( } fn parse_string_input( - info: &SourceFileInfo, input: StringInput, media_type: &MediaType, -) -> Result<(SingleThreadedComments, Module), AnyError> { +) -> Result< + (SingleThreadedComments, Module), + swc_ecmascript::parser::error::Error, +> { let syntax = get_syntax(media_type); let comments = SingleThreadedComments::default(); let lexer = Lexer::new(syntax, TARGET, input, Some(&comments)); let mut parser = swc_ecmascript::parser::Parser::new_from(lexer); - - let module = parser.parse_module().map_err(|err| Diagnostic { - location: info.get_location(err.span().lo), - message: err.into_kind().msg().to_string(), - })?; + let module = parser.parse_module()?; Ok((comments, module)) } diff --git a/cli/tests/integration/run_tests.rs b/cli/tests/integration/run_tests.rs index f5ac82e9cf..c7e0325ecc 100644 --- a/cli/tests/integration/run_tests.rs +++ b/cli/tests/integration/run_tests.rs @@ -1788,3 +1788,8 @@ itest!(tls_connecttls { args: "run --quiet --reload --allow-net --allow-read --cert tls/RootCA.pem tls_connecttls.js", output: "tls.out", }); + +itest!(byte_order_mark { + args: "run --no-check byte_order_mark.ts", + output: "byte_order_mark.out", +}); diff --git a/cli/tests/testdata/byte_order_mark.out b/cli/tests/testdata/byte_order_mark.out new file mode 100644 index 0000000000..557db03de9 --- /dev/null +++ b/cli/tests/testdata/byte_order_mark.out @@ -0,0 +1 @@ +Hello World diff --git a/cli/tests/testdata/byte_order_mark.ts b/cli/tests/testdata/byte_order_mark.ts new file mode 100644 index 0000000000..40eb23c1d0 --- /dev/null +++ b/cli/tests/testdata/byte_order_mark.ts @@ -0,0 +1,4 @@ +import "./001_hello.js"; +// Note this file starts with special byte order mark +// it's important that this file is a .ts typescript file which is passed to +// deno through `--no-check` mode. diff --git a/cli/text_encoding.rs b/cli/text_encoding.rs index 8d316909c8..f61b877dc0 100644 --- a/cli/text_encoding.rs +++ b/cli/text_encoding.rs @@ -6,6 +6,8 @@ use std::{ io::{Error, ErrorKind}, }; +pub const BOM_CHAR: char = '\u{FEFF}'; + /// Attempts to detect the character encoding of the provided bytes. /// /// Supports UTF-8, UTF-16 Little Endian and UTF-16 Big Endian. @@ -43,6 +45,15 @@ pub fn convert_to_utf8<'a>( } } +/// Strips the byte order mark from the provided text if it exists. +pub fn strip_bom(text: &str) -> &str { + if text.starts_with(BOM_CHAR) { + &text[BOM_CHAR.len_utf8()..] + } else { + text + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/cli/tools/fmt.rs b/cli/tools/fmt.rs index a02b86b179..33c3599d7b 100644 --- a/cli/tools/fmt.rs +++ b/cli/tools/fmt.rs @@ -28,8 +28,6 @@ use std::path::PathBuf; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; -const BOM_CHAR: char = '\u{FEFF}'; - /// Format JavaScript/TypeScript files. pub async fn format( args: Vec, @@ -350,12 +348,11 @@ fn read_file_contents(file_path: &Path) -> Result { let file_bytes = fs::read(&file_path)?; let charset = text_encoding::detect_charset(&file_bytes); let file_text = text_encoding::convert_to_utf8(&file_bytes, charset)?; - let had_bom = file_text.starts_with(BOM_CHAR); + let had_bom = file_text.starts_with(text_encoding::BOM_CHAR); let text = if had_bom { - // remove the BOM - String::from(&file_text[BOM_CHAR.len_utf8()..]) + text_encoding::strip_bom(&file_text).to_string() } else { - String::from(file_text) + file_text.to_string() }; Ok(FileContents { text, had_bom }) @@ -367,7 +364,7 @@ fn write_file_contents( ) -> Result<(), AnyError> { let file_text = if file_contents.had_bom { // add back the BOM - format!("{}{}", BOM_CHAR, file_contents.text) + format!("{}{}", text_encoding::BOM_CHAR, file_contents.text) } else { file_contents.text };