1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2025-01-14 10:01:51 -05:00

fix: align encoding APIs to spec using WPT (#9004)

This commit is contained in:
Luca Casonato 2021-01-05 19:50:40 +01:00 committed by GitHub
parent ee9c0c8373
commit 0d41e21b0e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 346 additions and 175 deletions

View file

@ -3,9 +3,10 @@
The WPT are test suites for Web platform specs, like Fetch, WHATWG Streams, or
console. Deno is able to run most `.any.js` and `.window.js` web platform tests.
This directory contains a `wpt.json` file that is used to configure our WPT test
runner. You can use this json file to set which WPT suites to run, and which
tests we expect to fail (due to bugs or because they are out of scope for Deno).
This directory contains a `wpt.jsonc` file that is used to configure our WPT
test runner. You can use this json file to set which WPT suites to run, and
which tests we expect to fail (due to bugs or because they are out of scope for
Deno).
To include a new test file to run, add it to the array of test files for the
corresponding suite. For example we want to enable

View file

@ -4919,11 +4919,15 @@ fn standalone_runtime_flags() {
.contains("PermissionDenied: write access"));
}
fn concat_bundle(files: Vec<(PathBuf, String)>, bundle_path: &Path) -> String {
fn concat_bundle(
files: Vec<(PathBuf, String)>,
bundle_path: &Path,
init: String,
) -> String {
let bundle_url = url::Url::from_file_path(bundle_path).unwrap().to_string();
let mut bundle = String::new();
let mut bundle_line_count = 0;
let mut bundle = init.clone();
let mut bundle_line_count = init.lines().count() as u32;
let mut source_map = sourcemap::SourceMapBuilder::new(Some(&bundle_url));
for (path, text) in files {
@ -4963,6 +4967,35 @@ fn concat_bundle(files: Vec<(PathBuf, String)>, bundle_path: &Path) -> String {
bundle
}
// TODO(lucacasonato): DRY with tsc_config.rs
/// Convert a jsonc libraries `JsonValue` to a serde `Value`.
fn jsonc_to_serde(j: jsonc_parser::JsonValue) -> serde_json::Value {
use jsonc_parser::JsonValue;
use serde_json::Value;
use std::str::FromStr;
match j {
JsonValue::Array(arr) => {
let vec = arr.into_iter().map(jsonc_to_serde).collect();
Value::Array(vec)
}
JsonValue::Boolean(bool) => Value::Bool(bool),
JsonValue::Null => Value::Null,
JsonValue::Number(num) => {
let number =
serde_json::Number::from_str(&num).expect("could not parse number");
Value::Number(number)
}
JsonValue::Object(obj) => {
let mut map = serde_json::map::Map::new();
for (key, json_value) in obj.into_iter() {
map.insert(key, jsonc_to_serde(json_value));
}
Value::Object(map)
}
JsonValue::String(str) => Value::String(str),
}
}
#[test]
fn web_platform_tests() {
use deno_core::serde::Deserialize;
@ -4979,9 +5012,10 @@ fn web_platform_tests() {
}
let text =
std::fs::read_to_string(util::tests_path().join("wpt.json")).unwrap();
std::fs::read_to_string(util::tests_path().join("wpt.jsonc")).unwrap();
let jsonc = jsonc_parser::parse_to_value(&text).unwrap().unwrap();
let config: std::collections::HashMap<String, Vec<WptConfig>> =
deno_core::serde_json::from_str(&text).unwrap();
deno_core::serde_json::from_value(jsonc_to_serde(jsonc)).unwrap();
for (suite_name, includes) in config.into_iter() {
let suite_path = util::wpt_path().join(suite_name);
@ -5037,10 +5071,8 @@ fn web_platform_tests() {
};
if s.starts_with('/') {
util::wpt_path().join(format!(".{}", s))
} else if s.starts_with('.') {
test_file_path.parent().unwrap().join(s)
} else {
PathBuf::from(s)
test_file_path.parent().unwrap().join(s)
}
})
.map(|path| {
@ -5049,40 +5081,56 @@ fn web_platform_tests() {
})
.collect();
let mut files = Vec::with_capacity(3 + imports.len());
files.push((testharness_path.clone(), testharness_text.clone()));
files.push((
testharnessreporter_path.clone(),
testharnessreporter_text.clone(),
));
files.extend(imports);
files.push((test_file_path.clone(), test_file_text));
let mut variants: Vec<&str> = test_file_text
.split('\n')
.into_iter()
.filter_map(|t| t.strip_prefix("// META: variant="))
.collect();
let mut file = tempfile::Builder::new()
.prefix("wpt-bundle-")
.suffix(".js")
.rand_bytes(5)
.tempfile()
.unwrap();
let bundle = concat_bundle(files, file.path());
file.write_all(bundle.as_bytes()).unwrap();
let child = util::deno_cmd()
.current_dir(test_file_path.parent().unwrap())
.arg("run")
.arg("-A")
.arg(file.path())
.arg(deno_core::serde_json::to_string(&expect_fail).unwrap())
.stdin(std::process::Stdio::piped())
.spawn()
.unwrap();
let output = child.wait_with_output().unwrap();
if !output.status.success() {
file.keep().unwrap();
if variants.is_empty() {
variants.push("");
}
for variant in variants {
let mut files = Vec::with_capacity(3 + imports.len());
files.push((testharness_path.clone(), testharness_text.clone()));
files.push((
testharnessreporter_path.clone(),
testharnessreporter_text.clone(),
));
files.extend(imports.clone());
files.push((test_file_path.clone(), test_file_text.clone()));
let mut file = tempfile::Builder::new()
.prefix("wpt-bundle-")
.suffix(".js")
.rand_bytes(5)
.tempfile()
.unwrap();
let bundle = concat_bundle(
files,
file.path(),
format!("window.location = {{search: \"{}\"}};\n", variant),
);
file.write_all(bundle.as_bytes()).unwrap();
let child = util::deno_cmd()
.current_dir(test_file_path.parent().unwrap())
.arg("run")
.arg("-A")
.arg(file.path())
.arg(deno_core::serde_json::to_string(&expect_fail).unwrap())
.stdin(std::process::Stdio::piped())
.spawn()
.unwrap();
let output = child.wait_with_output().unwrap();
if !output.status.success() {
file.keep().unwrap();
}
assert!(output.status.success());
}
assert!(output.status.success());
}
}
}

View file

@ -69,31 +69,7 @@ unitTest(function textDecoder2(): void {
assertEquals(decoder.decode(fixture), "𝓽𝓮𝔁𝓽");
});
unitTest(function textDecoderIgnoreBOM(): void {
// deno-fmt-ignore
const fixture = new Uint8Array([
0xef, 0xbb, 0xbf,
0xf0, 0x9d, 0x93, 0xbd,
0xf0, 0x9d, 0x93, 0xae,
0xf0, 0x9d, 0x94, 0x81,
0xf0, 0x9d, 0x93, 0xbd
]);
const decoder = new TextDecoder("utf-8", { ignoreBOM: true });
assertEquals(decoder.decode(fixture), "𝓽𝓮𝔁𝓽");
});
unitTest(function textDecoderNotBOM(): void {
// deno-fmt-ignore
const fixture = new Uint8Array([
0xef, 0xbb, 0x89,
0xf0, 0x9d, 0x93, 0xbd,
0xf0, 0x9d, 0x93, 0xae,
0xf0, 0x9d, 0x94, 0x81,
0xf0, 0x9d, 0x93, 0xbd
]);
const decoder = new TextDecoder("utf-8", { ignoreBOM: true });
assertEquals(decoder.decode(fixture), "ﻉ𝓽𝓮𝔁𝓽");
});
// ignoreBOM is tested through WPT
unitTest(function textDecoderASCII(): void {
const fixture = new Uint8Array([0x89, 0x95, 0x9f, 0xbf]);

View file

@ -1,12 +0,0 @@
{
"streams": [
{
"name": "readable-streams/general",
"expectFail": [
"ReadableStream can't be constructed with an invalid type",
"default ReadableStream getReader() should only accept mode:undefined"
]
},
"writable-streams/general"
]
}

138
cli/tests/wpt.jsonc Normal file
View file

@ -0,0 +1,138 @@
{
"streams": [
{
"name": "readable-streams/general",
"expectFail": [
"ReadableStream can't be constructed with an invalid type",
"default ReadableStream getReader() should only accept mode:undefined"
]
},
"writable-streams/general"
],
"encoding": [
{
"name": "api-basics",
"expectFail": [
// TODO(lucacasonato): enable when we support utf-16
"Decode sample: utf-16le",
"Decode sample: utf-16be",
"Decode sample: utf-16"
]
},
"api-invalid-label",
"api-replacement-encodings",
"api-surrogates-utf8",
// TODO(lucacasonato): enable encodeInto. It is buggy at the moment.
// "encodeInto",
// TODO(lucacasonato): enable when we support iso-2022-jp
// "iso-2022-jp-decoder",
// TODO(lucacasonato): uses XMLHttpRequest unnecessarily. should be fixed upstream before enabling
// "replacement-encodings",
{
"name": "textdecoder-byte-order-marks",
"expectFail": [
// TODO(lucacasonato): enable when we support utf-16
"Byte-order marks: utf-16le",
"Byte-order marks: utf-16be"
]
},
{
"name": "textdecoder-copy",
"expectFail": [
// TODO(lucacasonato): enable when we have stream support
"Modify buffer after passing it in (ArrayBuffer)",
"Modify buffer after passing it in (SharedArrayBuffer)"
]
},
"textdecoder-fatal-single-byte",
{
"name": "textdecoder-fatal.",
// TODO(lucacasonato): enable when we support utf-16
"expectFail": ["Fatal flag: utf-16le - truncated code unit"]
},
{
"name": "textdecoder-ignorebom",
"expectFail": [
// TODO(lucacasonato): enable when we support utf-16
"BOM is ignored if ignoreBOM option is specified: utf-16le",
"BOM is ignored if ignoreBOM option is specified: utf-16be"
]
},
{
"name": "textdecoder-labels",
"expectFail": [
"chinese => GBK",
"csgb2312 => GBK",
"csiso58gb231280 => GBK",
"gb2312 => GBK",
"gb_2312 => GBK",
"gb_2312-80 => GBK",
"gbk => GBK",
"iso-ir-58 => GBK",
"x-gbk => GBK",
"gb18030 => gb18030",
"big5 => Big5",
"big5-hkscs => Big5",
"cn-big5 => Big5",
"csbig5 => Big5",
"x-x-big5 => Big5",
"cseucpkdfmtjapanese => EUC-JP",
"euc-jp => EUC-JP",
"x-euc-jp => EUC-JP",
"csiso2022jp => ISO-2022-JP",
"iso-2022-jp => ISO-2022-JP",
"csshiftjis => Shift_JIS",
"ms932 => Shift_JIS",
"ms_kanji => Shift_JIS",
"shift-jis => Shift_JIS",
"shift_jis => Shift_JIS",
"sjis => Shift_JIS",
"windows-31j => Shift_JIS",
"x-sjis => Shift_JIS",
"cseuckr => EUC-KR",
"csksc56011987 => EUC-KR",
"euc-kr => EUC-KR",
"iso-ir-149 => EUC-KR",
"korean => EUC-KR",
"ks_c_5601-1987 => EUC-KR",
"ks_c_5601-1989 => EUC-KR",
"ksc5601 => EUC-KR",
"ksc_5601 => EUC-KR",
"windows-949 => EUC-KR",
"unicodefffe => UTF-16BE",
"utf-16be => UTF-16BE",
"csunicode => UTF-16LE",
"iso-10646-ucs-2 => UTF-16LE",
"ucs-2 => UTF-16LE",
"unicode => UTF-16LE",
"unicodefeff => UTF-16LE",
"utf-16 => UTF-16LE",
"utf-16le => UTF-16LE",
"x-user-defined => x-user-defined"
]
}
// TODO(lucacasonato): enable when we have stream support
// "textdecoder-streaming",
// TODO(lucacasonato): enable when we support utf-16
// "textdecoder-utf16-surrogates",
{
"name": "textencoder-constructor-non-utf",
"expectFail": [
"Encoding argument supported for decode: GBK",
"Encoding argument supported for decode: gb18030",
"Encoding argument supported for decode: Big5",
"Encoding argument supported for decode: EUC-JP",
"Encoding argument supported for decode: ISO-2022-JP",
"Encoding argument supported for decode: Shift_JIS",
"Encoding argument supported for decode: EUC-KR",
"Encoding argument supported for decode: UTF-16BE",
"Encoding argument supported for decode: UTF-16LE",
"Encoding argument supported for decode: x-user-defined"
]
},
// TODO(lucacasonato): enable when we support utf-16
// "textencoder-utf16-surrogates",
// TODO(lucacasonato): uses XMLHttpRequest unnecessarily. should be fixed upstream before enabling
// "unsupported-encodings",
]
}

View file

@ -29,9 +29,16 @@ export function yellow(str) {
const testResults = [];
const testsExpectFail = JSON.parse(Deno.args[0]);
function shouldExpectFail(name) {
if (testsExpectFail.includes(name)) return true;
for (const expectFail of testsExpectFail) {
if (name.startsWith(expectFail)) return true;
}
return false;
}
window.add_result_callback(({ message, name, stack, status }) => {
const expectFail = testsExpectFail.includes(name);
const expectFail = shouldExpectFail(name);
let simpleMessage = `test ${name} ... `;
switch (status) {
case 0:

View file

@ -654,6 +654,14 @@ fn decode(
)
};
// Strip BOM
let buf =
if buf.len() >= 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf {
&buf[3..]
} else {
buf
};
// If `String::new_from_utf8()` returns `None`, this means that the
// length of the decoded string would be longer than what V8 can
// handle. In this case we return `RangeError`.

View file

@ -169,26 +169,14 @@
// The encodingMap is a hash of labels that are indexed by the conical
// encoding.
const encodingMap = {
"windows-1252": [
"ansi_x3.4-1968",
"ascii",
"cp1252",
"cp819",
"csisolatin1",
"ibm819",
"iso-8859-1",
"iso-ir-100",
"iso8859-1",
"iso88591",
"iso_8859-1",
"iso_8859-1:1987",
"l1",
"latin1",
"us-ascii",
"windows-1252",
"x-cp1252",
"utf-8": [
"unicode-1-1-utf-8",
"unicode11utf8",
"unicode20utf8",
"utf-8",
"utf8",
"x-unicode20utf8",
],
"utf-8": ["unicode-1-1-utf-8", "utf-8", "utf8"],
ibm866: ["866", "cp866", "csibm866", "ibm866"],
"iso-8859-2": [
"csisolatin2",
@ -276,6 +264,11 @@
"iso_8859-8:1988",
"visual",
],
"iso-8859-8-i": [
"csiso88598i",
"iso-8859-8-i",
"logical",
],
"iso-8859-10": [
"csisolatin6",
"iso-8859-10",
@ -296,19 +289,6 @@
"l9",
],
"iso-8859-16": ["iso-8859-16"],
gbk: [
"chinese",
"csgb2312",
"csiso58gb231280",
"gb2312",
"gb_2312",
"gb_2312-80",
"gbk",
"iso-ir-58",
"x-gbk",
],
gb18030: ["gb18030"],
big5: ["big5", "big5-hkscs", "cn-big5", "csbig5", "x-x-big5"],
"koi8-r": ["cskoi8r", "koi", "koi8", "koi8-r", "koi8_r"],
"koi8-u": ["koi8-ru", "koi8-u"],
macintosh: ["csmacintosh", "mac", "macintosh", "x-mac-roman"],
@ -322,6 +302,25 @@
],
"windows-1250": ["cp1250", "windows-1250", "x-cp1250"],
"windows-1251": ["cp1251", "windows-1251", "x-cp1251"],
"windows-1252": [
"ansi_x3.4-1968",
"ascii",
"cp1252",
"cp819",
"csisolatin1",
"ibm819",
"iso-8859-1",
"iso-ir-100",
"iso8859-1",
"iso88591",
"iso_8859-1",
"iso_8859-1:1987",
"l1",
"latin1",
"us-ascii",
"windows-1252",
"x-cp1252",
],
"windows-1253": ["cp1253", "windows-1253", "x-cp1253"],
"windows-1254": [
"cp1254",
@ -342,6 +341,19 @@
"windows-1257": ["cp1257", "windows-1257", "x-cp1257"],
"windows-1258": ["cp1258", "windows-1258", "x-cp1258"],
"x-mac-cyrillic": ["x-mac-cyrillic", "x-mac-ukrainian"],
gbk: [
"chinese",
"csgb2312",
"csiso58gb231280",
"gb2312",
"gb_2312",
"gb_2312-80",
"gbk",
"iso-ir-58",
"x-gbk",
],
gb18030: ["gb18030"],
big5: ["big5", "big5-hkscs", "cn-big5", "csbig5", "x-x-big5"],
};
// We convert these into a Map where every label resolves to its canonical
// encoding type.
@ -539,6 +551,26 @@
1512, 1513, 1514, null, null, 8206, 8207, null,
]);
// deno-fmt-ignore
encodingIndexes.set("iso-8859-8-i", [
128, 129, 130, 131, 132, 133, 134, 135,
136, 137, 138, 139, 140, 141, 142, 143,
144, 145, 146, 147, 148, 149, 150, 151,
152, 153, 154, 155, 156, 157, 158, 159,
160, null, 162, 163, 164, 165, 166, 167,
168, 169, 215, 171, 172, 173, 174, 175,
176, 177, 178, 179, 180, 181, 182, 183,
184, 185, 247, 187, 188, 189, 190, null,
null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, 8215,
1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495,
1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503,
1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511,
1512, 1513, 1514, null, null, 8206, 8207, null,
]);
// deno-fmt-ignore
encodingIndexes.set("iso-8859-10", [
128, 129, 130, 131, 132, 133, 134, 135,
@ -957,6 +989,26 @@
);
}
const whitespace = [" ", "\t", "\n", "\f", "\r"];
function trimAsciiWhitespace(label) {
let start = 0;
for (const i in label) {
if (!whitespace.includes(label[i])) {
start = i;
break;
}
}
let end = label.length - 1;
for (const _i in label) {
const i = end - _i;
if (!whitespace.includes(label[i])) {
end = i;
break;
}
}
return label.substring(start, end + 1);
}
class TextDecoder {
#encoding = "";
@ -973,7 +1025,7 @@
if (options.fatal) {
this.fatal = true;
}
const _label = String(label).trim().toLowerCase();
const _label = trimAsciiWhitespace(String(label)).toLowerCase();
const encoding = encodings.get(_label);
if (!encoding) {
throw new RangeError(
@ -1085,21 +1137,28 @@
const encoder = new UTF8Encoder();
const inputStream = new Stream(stringToCodePoints(input));
if (!(dest instanceof Uint8Array)) {
throw new TypeError(
"2nd argument to TextEncoder.encodeInto must be Uint8Array",
);
}
let written = 0;
let read = 0;
while (true) {
const result = encoder.handler(inputStream.read());
const item = inputStream.read();
const result = encoder.handler(item);
if (result === "finished") {
break;
}
if (dest.length - written >= result.length) {
read++;
dest.set(result, written);
written += result.length;
if (result.length > 3) {
if (item > 0xFFFF) {
// increment read a second time if greater than U+FFFF
read++;
}
dest.set(result, written);
written += result.length;
} else {
break;
}
@ -1151,7 +1210,7 @@
let type;
let i =
ignoreBOM && input[0] === 0xef && input[1] === 0xbb && input[2] === 0xbf
!ignoreBOM && input[0] === 0xef && input[1] === 0xbb && input[2] === 0xbf
? 3
: 0;

View file

@ -100,59 +100,7 @@ function textDecoder2() {
assert(decoder.decode(fixture) === "𝓽𝓮𝔁𝓽");
}
function textDecoderIgnoreBOM() {
// deno-fmt-ignore
const fixture = new Uint8Array([
0xef,
0xbb,
0xbf,
0xf0,
0x9d,
0x93,
0xbd,
0xf0,
0x9d,
0x93,
0xae,
0xf0,
0x9d,
0x94,
0x81,
0xf0,
0x9d,
0x93,
0xbd,
]);
const decoder = new TextDecoder("utf-8", { ignoreBOM: true });
assert(decoder.decode(fixture) === "𝓽𝓮𝔁𝓽");
}
function textDecoderNotBOM() {
// deno-fmt-ignore
const fixture = new Uint8Array([
0xef,
0xbb,
0x89,
0xf0,
0x9d,
0x93,
0xbd,
0xf0,
0x9d,
0x93,
0xae,
0xf0,
0x9d,
0x94,
0x81,
0xf0,
0x9d,
0x93,
0xbd,
]);
const decoder = new TextDecoder("utf-8", { ignoreBOM: true });
assert(decoder.decode(fixture) === "ﻉ𝓽𝓮𝔁𝓽");
}
// ignoreBOM is tested by WPT
function textDecoderASCII() {
const fixture = new Uint8Array([0x89, 0x95, 0x9f, 0xbf]);
@ -1252,8 +1200,6 @@ function main() {
atobThrows2();
btoaFailed();
textDecoder2();
textDecoderIgnoreBOM();
textDecoderNotBOM();
textDecoderASCII();
textDecoderErrorEncoding();
textDecoderHandlesUndefined();

@ -1 +1 @@
Subproject commit 077d53c8da8b47c1d5060893af96a29f27b10008
Subproject commit b6f4d8ca80c396728754871433e61340da413022