mirror of
https://github.com/denoland/deno.git
synced 2024-12-02 17:01:14 -05:00
49ccfdd696
In the `transform` function to `TextEncoderStream`'s internal `TransformStream`, if `chunk` is the empty string and `this.#pendingHighSurrogate` is null, then `lastCodeUnit` will be NaN. As it turns out, this does not cause a bug because the comparison to check for lone surrogates turns out to be false for NaN, but to rely on it makes the code brittle.
416 lines
11 KiB
JavaScript
416 lines
11 KiB
JavaScript
// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.
|
|
|
|
// @ts-check
|
|
/// <reference path="../../core/lib.deno_core.d.ts" />
|
|
/// <reference path="../../core/internal.d.ts" />
|
|
/// <reference path="../webidl/internal.d.ts" />
|
|
/// <reference path="../fetch/lib.deno_fetch.d.ts" />
|
|
/// <reference path="../web/internal.d.ts" />
|
|
/// <reference path="../web/lib.deno_web.d.ts" />
|
|
/// <reference lib="esnext" />
|
|
|
|
"use strict";
|
|
|
|
((window) => {
|
|
const core = Deno.core;
|
|
const webidl = window.__bootstrap.webidl;
|
|
const {
|
|
ArrayBufferIsView,
|
|
PromiseReject,
|
|
PromiseResolve,
|
|
StringPrototypeCharCodeAt,
|
|
StringPrototypeSlice,
|
|
TypedArrayPrototypeSubarray,
|
|
TypedArrayPrototypeSlice,
|
|
Uint8Array,
|
|
} = window.__bootstrap.primordials;
|
|
|
|
class TextDecoder {
|
|
/** @type {string} */
|
|
#encoding;
|
|
/** @type {boolean} */
|
|
#fatal;
|
|
/** @type {boolean} */
|
|
#ignoreBOM;
|
|
|
|
/** @type {number | null} */
|
|
#rid = null;
|
|
|
|
/**
|
|
* @param {string} label
|
|
* @param {TextDecoderOptions} options
|
|
*/
|
|
constructor(label = "utf-8", options = {}) {
|
|
const prefix = "Failed to construct 'TextDecoder'";
|
|
label = webidl.converters.DOMString(label, {
|
|
prefix,
|
|
context: "Argument 1",
|
|
});
|
|
options = webidl.converters.TextDecoderOptions(options, {
|
|
prefix,
|
|
context: "Argument 2",
|
|
});
|
|
const encoding = core.opSync("op_encoding_normalize_label", label);
|
|
this.#encoding = encoding;
|
|
this.#fatal = options.fatal;
|
|
this.#ignoreBOM = options.ignoreBOM;
|
|
this[webidl.brand] = webidl.brand;
|
|
}
|
|
|
|
/** @returns {string} */
|
|
get encoding() {
|
|
webidl.assertBranded(this, TextDecoder);
|
|
return this.#encoding;
|
|
}
|
|
|
|
/** @returns {boolean} */
|
|
get fatal() {
|
|
webidl.assertBranded(this, TextDecoder);
|
|
return this.#fatal;
|
|
}
|
|
|
|
/** @returns {boolean} */
|
|
get ignoreBOM() {
|
|
webidl.assertBranded(this, TextDecoder);
|
|
return this.#ignoreBOM;
|
|
}
|
|
|
|
/**
|
|
* @param {BufferSource} [input]
|
|
* @param {TextDecodeOptions} options
|
|
*/
|
|
decode(input = new Uint8Array(), options = {}) {
|
|
webidl.assertBranded(this, TextDecoder);
|
|
const prefix = "Failed to execute 'decode' on 'TextDecoder'";
|
|
if (input !== undefined) {
|
|
input = webidl.converters.BufferSource(input, {
|
|
prefix,
|
|
context: "Argument 1",
|
|
allowShared: true,
|
|
});
|
|
}
|
|
options = webidl.converters.TextDecodeOptions(options, {
|
|
prefix,
|
|
context: "Argument 2",
|
|
});
|
|
|
|
// TODO(lucacasonato): add fast path for non-streaming decoder & decode
|
|
|
|
if (this.#rid === null) {
|
|
this.#rid = core.opSync("op_encoding_new_decoder", {
|
|
label: this.#encoding,
|
|
fatal: this.#fatal,
|
|
ignoreBom: this.#ignoreBOM,
|
|
});
|
|
}
|
|
|
|
try {
|
|
try {
|
|
if (ArrayBufferIsView(input)) {
|
|
input = new Uint8Array(
|
|
input.buffer,
|
|
input.byteOffset,
|
|
input.byteLength,
|
|
);
|
|
} else {
|
|
input = new Uint8Array(input);
|
|
}
|
|
} catch {
|
|
// If the buffer is detached, just create a new empty Uint8Array.
|
|
input = new Uint8Array();
|
|
}
|
|
if (input.buffer instanceof SharedArrayBuffer) {
|
|
// We clone the data into a non-shared ArrayBuffer so we can pass it
|
|
// to Rust.
|
|
// `input` is now a Uint8Array, and calling the TypedArray constructor
|
|
// with a TypedArray argument copies the data.
|
|
input = new Uint8Array(input);
|
|
}
|
|
return core.opSync("op_encoding_decode", input, {
|
|
rid: this.#rid,
|
|
stream: options.stream,
|
|
});
|
|
} finally {
|
|
if (!options.stream) {
|
|
core.close(this.#rid);
|
|
this.#rid = null;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
webidl.configurePrototype(TextDecoder);
|
|
|
|
class TextEncoder {
|
|
constructor() {
|
|
this[webidl.brand] = webidl.brand;
|
|
}
|
|
|
|
/** @returns {string} */
|
|
get encoding() {
|
|
webidl.assertBranded(this, TextEncoder);
|
|
return "utf-8";
|
|
}
|
|
|
|
/**
|
|
* @param {string} input
|
|
* @returns {Uint8Array}
|
|
*/
|
|
encode(input = "") {
|
|
webidl.assertBranded(this, TextEncoder);
|
|
const prefix = "Failed to execute 'encode' on 'TextEncoder'";
|
|
// The WebIDL type of `input` is `USVString`, but `core.encode` already
|
|
// converts lone surrogates to the replacement character.
|
|
input = webidl.converters.DOMString(input, {
|
|
prefix,
|
|
context: "Argument 1",
|
|
});
|
|
return core.encode(input);
|
|
}
|
|
|
|
/**
|
|
* @param {string} source
|
|
* @param {Uint8Array} destination
|
|
* @returns {TextEncoderEncodeIntoResult}
|
|
*/
|
|
encodeInto(source, destination) {
|
|
webidl.assertBranded(this, TextEncoder);
|
|
const prefix = "Failed to execute 'encodeInto' on 'TextEncoder'";
|
|
// The WebIDL type of `source` is `USVString`, but the ops bindings
|
|
// already convert lone surrogates to the replacement character.
|
|
source = webidl.converters.DOMString(source, {
|
|
prefix,
|
|
context: "Argument 1",
|
|
});
|
|
destination = webidl.converters.Uint8Array(destination, {
|
|
prefix,
|
|
context: "Argument 2",
|
|
allowShared: true,
|
|
});
|
|
return core.opSync("op_encoding_encode_into", source, destination);
|
|
}
|
|
}
|
|
|
|
webidl.configurePrototype(TextEncoder);
|
|
|
|
class TextDecoderStream {
|
|
/** @type {TextDecoder} */
|
|
#decoder;
|
|
/** @type {TransformStream<BufferSource, string>} */
|
|
#transform;
|
|
|
|
/**
|
|
* @param {string} label
|
|
* @param {TextDecoderOptions} options
|
|
*/
|
|
constructor(label = "utf-8", options = {}) {
|
|
const prefix = "Failed to construct 'TextDecoderStream'";
|
|
label = webidl.converters.DOMString(label, {
|
|
prefix,
|
|
context: "Argument 1",
|
|
});
|
|
options = webidl.converters.TextDecoderOptions(options, {
|
|
prefix,
|
|
context: "Argument 2",
|
|
});
|
|
this.#decoder = new TextDecoder(label, options);
|
|
this.#transform = new TransformStream({
|
|
// The transform and flush functions need access to TextDecoderStream's
|
|
// `this`, so they are defined as functions rather than methods.
|
|
transform: (chunk, controller) => {
|
|
try {
|
|
chunk = webidl.converters.BufferSource(chunk, {
|
|
allowShared: true,
|
|
});
|
|
const decoded = this.#decoder.decode(chunk, { stream: true });
|
|
if (decoded) {
|
|
controller.enqueue(decoded);
|
|
}
|
|
return PromiseResolve();
|
|
} catch (err) {
|
|
return PromiseReject(err);
|
|
}
|
|
},
|
|
flush: (controller) => {
|
|
try {
|
|
const final = this.#decoder.decode();
|
|
if (final) {
|
|
controller.enqueue(final);
|
|
}
|
|
return PromiseResolve();
|
|
} catch (err) {
|
|
return PromiseReject(err);
|
|
}
|
|
},
|
|
});
|
|
this[webidl.brand] = webidl.brand;
|
|
}
|
|
|
|
/** @returns {string} */
|
|
get encoding() {
|
|
webidl.assertBranded(this, TextDecoderStream);
|
|
return this.#decoder.encoding;
|
|
}
|
|
|
|
/** @returns {boolean} */
|
|
get fatal() {
|
|
webidl.assertBranded(this, TextDecoderStream);
|
|
return this.#decoder.fatal;
|
|
}
|
|
|
|
/** @returns {boolean} */
|
|
get ignoreBOM() {
|
|
webidl.assertBranded(this, TextDecoderStream);
|
|
return this.#decoder.ignoreBOM;
|
|
}
|
|
|
|
/** @returns {ReadableStream<string>} */
|
|
get readable() {
|
|
webidl.assertBranded(this, TextDecoderStream);
|
|
return this.#transform.readable;
|
|
}
|
|
|
|
/** @returns {WritableStream<BufferSource>} */
|
|
get writable() {
|
|
webidl.assertBranded(this, TextDecoderStream);
|
|
return this.#transform.writable;
|
|
}
|
|
}
|
|
|
|
webidl.configurePrototype(TextDecoderStream);
|
|
|
|
class TextEncoderStream {
|
|
/** @type {string | null} */
|
|
#pendingHighSurrogate = null;
|
|
/** @type {TransformStream<string, Uint8Array>} */
|
|
#transform;
|
|
|
|
constructor() {
|
|
this.#transform = new TransformStream({
|
|
// The transform and flush functions need access to TextEncoderStream's
|
|
// `this`, so they are defined as functions rather than methods.
|
|
transform: (chunk, controller) => {
|
|
try {
|
|
chunk = webidl.converters.DOMString(chunk);
|
|
if (chunk === "") {
|
|
return PromiseResolve();
|
|
}
|
|
if (this.#pendingHighSurrogate !== null) {
|
|
chunk = this.#pendingHighSurrogate + chunk;
|
|
}
|
|
const lastCodeUnit = StringPrototypeCharCodeAt(
|
|
chunk,
|
|
chunk.length - 1,
|
|
);
|
|
if (0xD800 <= lastCodeUnit && lastCodeUnit <= 0xDBFF) {
|
|
this.#pendingHighSurrogate = StringPrototypeSlice(chunk, -1);
|
|
chunk = StringPrototypeSlice(chunk, 0, -1);
|
|
} else {
|
|
this.#pendingHighSurrogate = null;
|
|
}
|
|
if (chunk) {
|
|
controller.enqueue(core.encode(chunk));
|
|
}
|
|
return PromiseResolve();
|
|
} catch (err) {
|
|
return PromiseReject(err);
|
|
}
|
|
},
|
|
flush: (controller) => {
|
|
try {
|
|
if (this.#pendingHighSurrogate !== null) {
|
|
controller.enqueue(new Uint8Array([0xEF, 0xBF, 0xBD]));
|
|
}
|
|
return PromiseResolve();
|
|
} catch (err) {
|
|
return PromiseReject(err);
|
|
}
|
|
},
|
|
});
|
|
this[webidl.brand] = webidl.brand;
|
|
}
|
|
|
|
/** @returns {string} */
|
|
get encoding() {
|
|
webidl.assertBranded(this, TextEncoderStream);
|
|
return "utf-8";
|
|
}
|
|
|
|
/** @returns {ReadableStream<Uint8Array>} */
|
|
get readable() {
|
|
webidl.assertBranded(this, TextEncoderStream);
|
|
return this.#transform.readable;
|
|
}
|
|
|
|
/** @returns {WritableStream<string>} */
|
|
get writable() {
|
|
webidl.assertBranded(this, TextEncoderStream);
|
|
return this.#transform.writable;
|
|
}
|
|
}
|
|
|
|
webidl.configurePrototype(TextEncoderStream);
|
|
|
|
webidl.converters.TextDecoderOptions = webidl.createDictionaryConverter(
|
|
"TextDecoderOptions",
|
|
[
|
|
{
|
|
key: "fatal",
|
|
converter: webidl.converters.boolean,
|
|
defaultValue: false,
|
|
},
|
|
{
|
|
key: "ignoreBOM",
|
|
converter: webidl.converters.boolean,
|
|
defaultValue: false,
|
|
},
|
|
],
|
|
);
|
|
webidl.converters.TextDecodeOptions = webidl.createDictionaryConverter(
|
|
"TextDecodeOptions",
|
|
[
|
|
{
|
|
key: "stream",
|
|
converter: webidl.converters.boolean,
|
|
defaultValue: false,
|
|
},
|
|
],
|
|
);
|
|
|
|
/**
|
|
* @param {Uint8Array} bytes
|
|
*/
|
|
function decode(bytes, encoding) {
|
|
const BOMEncoding = BOMSniff(bytes);
|
|
let start = 0;
|
|
if (BOMEncoding !== null) {
|
|
encoding = BOMEncoding;
|
|
if (BOMEncoding === "UTF-8") start = 3;
|
|
else start = 2;
|
|
}
|
|
return new TextDecoder(encoding).decode(
|
|
TypedArrayPrototypeSlice(bytes, start),
|
|
);
|
|
}
|
|
|
|
/**
|
|
* @param {Uint8Array} bytes
|
|
*/
|
|
function BOMSniff(bytes) {
|
|
const BOM = TypedArrayPrototypeSubarray(bytes, 0, 3);
|
|
if (BOM[0] === 0xEF && BOM[1] === 0xBB && BOM[2] === 0xBF) {
|
|
return "UTF-8";
|
|
}
|
|
if (BOM[0] === 0xFE && BOM[1] === 0xFF) return "UTF-16BE";
|
|
if (BOM[0] === 0xFF && BOM[1] === 0xFE) return "UTF-16LE";
|
|
return null;
|
|
}
|
|
|
|
window.__bootstrap.encoding = {
|
|
TextEncoder,
|
|
TextDecoder,
|
|
TextEncoderStream,
|
|
TextDecoderStream,
|
|
decode,
|
|
};
|
|
})(this);
|