mirror of
https://github.com/denoland/deno.git
synced 2024-11-25 15:29:32 -05:00
Use alternate TextEncoder/TextDecoder implementation (#1281)
This is faster and smaller.
This commit is contained in:
parent
60c008d23b
commit
6cc89b9e27
9 changed files with 366 additions and 42 deletions
|
@ -1,6 +1,7 @@
|
|||
// Copyright 2018 the Deno authors. All rights reserved. MIT license.
|
||||
import * as domTypes from "./dom_types";
|
||||
import { containsOnlyASCII } from "./util";
|
||||
import { TextEncoder } from "./text_encoding";
|
||||
|
||||
const bytesSymbol = Symbol("bytes");
|
||||
|
||||
|
|
|
@ -13,6 +13,8 @@ See the Apache Version 2.0 License for specific language governing permissions
|
|||
and limitations under the License.
|
||||
*******************************************************************************/
|
||||
|
||||
export type BufferSource = ArrayBufferView | ArrayBuffer;
|
||||
|
||||
export type HeadersInit =
|
||||
| Headers
|
||||
| Array<[string, string]>
|
||||
|
|
|
@ -4,7 +4,7 @@ import * as flatbuffers from "./flatbuffers";
|
|||
import { sendAsync } from "./dispatch";
|
||||
import * as msg from "gen/msg_generated";
|
||||
import * as domTypes from "./dom_types";
|
||||
import { TextDecoder } from "./text_encoding";
|
||||
import { TextDecoder, TextEncoder } from "./text_encoding";
|
||||
import { DenoBlob } from "./blob";
|
||||
import { Headers } from "./headers";
|
||||
import * as io from "./io";
|
||||
|
|
|
@ -29,8 +29,6 @@ import { libdeno } from "./libdeno";
|
|||
declare global {
|
||||
const console: consoleTypes.Console;
|
||||
const setTimeout: typeof timers.setTimeout;
|
||||
// tslint:disable-next-line:variable-name
|
||||
const TextEncoder: typeof textEncoding.TextEncoder;
|
||||
}
|
||||
|
||||
// A reference to the global object.
|
||||
|
@ -69,7 +67,7 @@ export type Headers = domTypes.Headers;
|
|||
window.FormData = formData.FormData as domTypes.FormDataConstructor;
|
||||
export type FormData = domTypes.FormData;
|
||||
|
||||
// While these are classes, they have their global instance types created in
|
||||
// other type definitions, therefore we do not have to include them here.
|
||||
window.TextEncoder = textEncoding.TextEncoder;
|
||||
export type TextEncoder = textEncoding.TextEncoder;
|
||||
window.TextDecoder = textEncoding.TextDecoder;
|
||||
export type TextDecoder = textEncoding.TextDecoder;
|
||||
|
|
|
@ -1,5 +1,29 @@
|
|||
// Copyright 2018 the Deno authors. All rights reserved. MIT license.
|
||||
// The following code is based off of text-encoding at:
|
||||
// https://github.com/inexorabletash/text-encoding
|
||||
//
|
||||
// Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
// distribute this software, either in source code form or as a compiled
|
||||
// binary, for any purpose, commercial or non-commercial, and by any
|
||||
// means.
|
||||
//
|
||||
// In jurisdictions that recognize copyright laws, the author or authors
|
||||
// of this software dedicate any and all copyright interest in the
|
||||
// software to the public domain. We make this dedication for the benefit
|
||||
// of the public at large and to the detriment of our heirs and
|
||||
// successors. We intend this dedication to be an overt act of
|
||||
// relinquishment in perpetuity of all present and future rights to this
|
||||
// software under copyright law.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
import * as base64 from "base64-js";
|
||||
import * as domTypes from "./dom_types";
|
||||
import { DenoError, ErrorKind } from "./errors";
|
||||
|
||||
/** Decodes a string of data which has been encoded using base-64. */
|
||||
|
@ -43,29 +67,299 @@ export function btoa(s: string): string {
|
|||
return result;
|
||||
}
|
||||
|
||||
// @types/text-encoding relies on lib.dom.d.ts for some interfaces. We do not
|
||||
// want to include lib.dom.d.ts (due to size) into deno's global type scope.
|
||||
// Therefore this hack: add a few of the missing interfaces in
|
||||
// @types/text-encoding to the global scope before importing.
|
||||
interface Decoder {
|
||||
handler(stream: Stream, byte: number): number | number[] | null;
|
||||
}
|
||||
|
||||
declare global {
|
||||
type BufferSource = ArrayBufferView | ArrayBuffer;
|
||||
interface Encoder {
|
||||
handler(codePoint: number): number | number[];
|
||||
}
|
||||
|
||||
interface TextDecodeOptions {
|
||||
stream?: boolean;
|
||||
const CONTINUE = null;
|
||||
const END_OF_STREAM = -1;
|
||||
const FINISHED = -1;
|
||||
|
||||
function codePointsToString(codePoints: number[]): string {
|
||||
let s = "";
|
||||
for (const cp of codePoints) {
|
||||
s += String.fromCodePoint(cp);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
function decoderError(fatal: boolean): number | never {
|
||||
if (fatal) {
|
||||
throw new TypeError("Decoder error.");
|
||||
}
|
||||
return 0xfffd; // default code point
|
||||
}
|
||||
|
||||
function inRange(a: number, min: number, max: number) {
|
||||
return min <= a && a <= max;
|
||||
}
|
||||
|
||||
function stringToCodePoints(input: string): number[] {
|
||||
const u: number[] = [];
|
||||
for (const c of input) {
|
||||
u.push(c.codePointAt(0)!);
|
||||
}
|
||||
return u;
|
||||
}
|
||||
|
||||
class Stream {
|
||||
private _tokens: number[];
|
||||
constructor(tokens: number[] | Uint8Array) {
|
||||
this._tokens = [].slice.call(tokens);
|
||||
this._tokens.reverse();
|
||||
}
|
||||
|
||||
interface TextDecoderOptions {
|
||||
fatal?: boolean;
|
||||
ignoreBOM?: boolean;
|
||||
endOfStream(): boolean {
|
||||
return !this._tokens.length;
|
||||
}
|
||||
|
||||
interface TextDecoder {
|
||||
readonly encoding: string;
|
||||
readonly fatal: boolean;
|
||||
readonly ignoreBOM: boolean;
|
||||
decode(input?: BufferSource, options?: TextDecodeOptions): string;
|
||||
read(): number {
|
||||
return !this._tokens.length ? END_OF_STREAM : this._tokens.pop()!;
|
||||
}
|
||||
|
||||
prepend(token: number | number[]): void {
|
||||
if (Array.isArray(token)) {
|
||||
while (token.length) {
|
||||
this._tokens.push(token.pop()!);
|
||||
}
|
||||
} else {
|
||||
this._tokens.push(token);
|
||||
}
|
||||
}
|
||||
|
||||
push(token: number | number[]): void {
|
||||
if (Array.isArray(token)) {
|
||||
while (token.length) {
|
||||
this._tokens.unshift(token.shift()!);
|
||||
}
|
||||
} else {
|
||||
this._tokens.unshift(token);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export { TextEncoder, TextDecoder } from "text-encoding";
|
||||
class UTF8Decoder implements Decoder {
|
||||
private _codePoint = 0;
|
||||
private _bytesSeen = 0;
|
||||
private _bytesNeeded = 0;
|
||||
private _fatal: boolean;
|
||||
private _lowerBoundary = 0x80;
|
||||
private _upperBoundary = 0xbf;
|
||||
|
||||
constructor(options = { fatal: false }) {
|
||||
this._fatal = options.fatal;
|
||||
}
|
||||
|
||||
handler(stream: Stream, byte: number): number | null {
|
||||
if (byte === END_OF_STREAM && this._bytesNeeded !== 0) {
|
||||
this._bytesNeeded = 0;
|
||||
return decoderError(this._fatal);
|
||||
}
|
||||
|
||||
if (byte === END_OF_STREAM) {
|
||||
return FINISHED;
|
||||
}
|
||||
|
||||
if (this._bytesNeeded === 0) {
|
||||
if (inRange(byte, 0x00, 0x7f)) {
|
||||
// Single byte code point
|
||||
return byte;
|
||||
} else if (inRange(byte, 0xc2, 0xdf)) {
|
||||
// Two byte code point
|
||||
this._bytesNeeded = 1;
|
||||
this._codePoint = byte & 0x1f;
|
||||
} else if (inRange(byte, 0xe0, 0xef)) {
|
||||
// Three byte code point
|
||||
if (byte === 0xe0) {
|
||||
this._lowerBoundary = 0xa0;
|
||||
} else if (byte === 0xed) {
|
||||
this._upperBoundary = 0x9f;
|
||||
}
|
||||
this._bytesNeeded = 2;
|
||||
this._codePoint = byte & 0xf;
|
||||
} else if (inRange(byte, 0xf0, 0xf4)) {
|
||||
if (byte === 0xf0) {
|
||||
this._lowerBoundary = 0x90;
|
||||
} else if (byte === 0xf4) {
|
||||
this._upperBoundary = 0x8f;
|
||||
}
|
||||
this._bytesNeeded = 3;
|
||||
this._codePoint = byte & 0x7;
|
||||
} else {
|
||||
return decoderError(this._fatal);
|
||||
}
|
||||
return CONTINUE;
|
||||
}
|
||||
|
||||
if (!inRange(byte, this._lowerBoundary, this._upperBoundary)) {
|
||||
// Byte out of range, so encoding error
|
||||
this._codePoint = 0;
|
||||
this._bytesNeeded = 0;
|
||||
this._bytesSeen = 0;
|
||||
stream.prepend(byte);
|
||||
return decoderError(this._fatal);
|
||||
}
|
||||
|
||||
this._lowerBoundary = 0x80;
|
||||
this._upperBoundary = 0xbf;
|
||||
|
||||
this._codePoint = (this._codePoint << 6) | (byte & 0x3f);
|
||||
|
||||
this._bytesSeen++;
|
||||
|
||||
if (this._bytesSeen !== this._bytesNeeded) {
|
||||
return CONTINUE;
|
||||
}
|
||||
|
||||
const codePoint = this._codePoint;
|
||||
|
||||
this._codePoint = 0;
|
||||
this._bytesNeeded = 0;
|
||||
this._bytesSeen = 0;
|
||||
|
||||
return codePoint;
|
||||
}
|
||||
}
|
||||
|
||||
class UTF8Encoder implements Encoder {
|
||||
handler(codePoint: number): number | number[] {
|
||||
if (codePoint === END_OF_STREAM) {
|
||||
return FINISHED;
|
||||
}
|
||||
|
||||
if (inRange(codePoint, 0x00, 0x7f)) {
|
||||
return codePoint;
|
||||
}
|
||||
|
||||
let count: number;
|
||||
let offset: number;
|
||||
if (inRange(codePoint, 0x0080, 0x07ff)) {
|
||||
count = 1;
|
||||
offset = 0xc0;
|
||||
} else if (inRange(codePoint, 0x0800, 0xffff)) {
|
||||
count = 2;
|
||||
offset = 0xe0;
|
||||
} else if (inRange(codePoint, 0x10000, 0x10ffff)) {
|
||||
count = 3;
|
||||
offset = 0xf0;
|
||||
} else {
|
||||
throw TypeError(`Code point out of range: \\x${codePoint.toString(16)}`);
|
||||
}
|
||||
|
||||
const bytes = [(codePoint >> (6 * count)) + offset];
|
||||
|
||||
while (count > 0) {
|
||||
const temp = codePoint >> (6 * (count - 1));
|
||||
bytes.push(0x80 | (temp & 0x3f));
|
||||
count--;
|
||||
}
|
||||
|
||||
return bytes;
|
||||
}
|
||||
}
|
||||
|
||||
export interface TextDecodeOptions {
|
||||
stream?: false;
|
||||
}
|
||||
|
||||
export interface TextDecoderOptions {
|
||||
fatal?: boolean;
|
||||
ignoreBOM?: false;
|
||||
}
|
||||
|
||||
export class TextDecoder {
|
||||
/** Returns encoding's name, lowercased. */
|
||||
readonly encoding = "utf-8";
|
||||
/** Returns `true` if error mode is "fatal", and `false` otherwise. */
|
||||
readonly fatal: boolean = false;
|
||||
/** Returns `true` if ignore BOM flag is set, and `false` otherwise. */
|
||||
readonly ignoreBOM = false;
|
||||
|
||||
constructor(
|
||||
label: "utf-8" = "utf-8",
|
||||
options: TextDecoderOptions = { fatal: false }
|
||||
) {
|
||||
if (label !== "utf-8") {
|
||||
throw new TypeError("Only UTF8 decoding supported.");
|
||||
}
|
||||
if (options.ignoreBOM) {
|
||||
throw new TypeError("Ignoring the BOM not supported.");
|
||||
}
|
||||
if (options.fatal) {
|
||||
this.fatal = true;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns the result of running encoding's decoder. */
|
||||
decode(
|
||||
input?: domTypes.BufferSource,
|
||||
options: TextDecodeOptions = { stream: false }
|
||||
): string {
|
||||
if (options.stream) {
|
||||
throw new TypeError("Stream not supported.");
|
||||
}
|
||||
|
||||
let bytes: Uint8Array;
|
||||
if (typeof input === "object" && input instanceof ArrayBuffer) {
|
||||
bytes = new Uint8Array(input);
|
||||
} else if (
|
||||
typeof input === "object" &&
|
||||
"buffer" in input &&
|
||||
input.buffer instanceof ArrayBuffer
|
||||
) {
|
||||
bytes = new Uint8Array(input.buffer, input.byteOffset, input.byteLength);
|
||||
} else {
|
||||
bytes = new Uint8Array(0);
|
||||
}
|
||||
|
||||
const decoder = new UTF8Decoder({ fatal: this.fatal });
|
||||
const inputStream = new Stream(bytes);
|
||||
const output: number[] = [];
|
||||
|
||||
while (true) {
|
||||
const result = decoder.handler(inputStream, inputStream.read());
|
||||
if (result === FINISHED) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (result !== CONTINUE) {
|
||||
output.push(result);
|
||||
}
|
||||
}
|
||||
|
||||
if (output.length > 0 && output[0] === 0xfeff) {
|
||||
output.shift();
|
||||
}
|
||||
|
||||
return codePointsToString(output);
|
||||
}
|
||||
}
|
||||
|
||||
export class TextEncoder {
|
||||
/** Returns "utf-8". */
|
||||
readonly encoding = "utf-8";
|
||||
/** Returns the result of running UTF-8's encoder. */
|
||||
encode(input = ""): Uint8Array {
|
||||
const encoder = new UTF8Encoder();
|
||||
const inputStream = new Stream(stringToCodePoints(input));
|
||||
const output: number[] = [];
|
||||
|
||||
while (true) {
|
||||
const result = encoder.handler(inputStream.read());
|
||||
if (result === FINISHED) {
|
||||
break;
|
||||
}
|
||||
if (Array.isArray(result)) {
|
||||
output.push.apply(output, result);
|
||||
} else {
|
||||
output.push(result);
|
||||
}
|
||||
}
|
||||
|
||||
return new Uint8Array(output);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,3 +24,49 @@ test(function btoaFailed() {
|
|||
assert(!!err);
|
||||
assertEqual(err.name, "InvalidInput");
|
||||
});
|
||||
|
||||
test(function textDecoder() {
|
||||
// prettier-ignore
|
||||
const fixture = new Uint8Array([
|
||||
0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
|
||||
0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
|
||||
0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd
|
||||
]);
|
||||
const decoder = new TextDecoder();
|
||||
assertEqual(decoder.decode(fixture), "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>");
|
||||
});
|
||||
|
||||
test(function textDecoder2() {
|
||||
// prettier-ignore
|
||||
const fixture = new Uint8Array([
|
||||
0xf0, 0x9d, 0x93, 0xbd,
|
||||
0xf0, 0x9d, 0x93, 0xae,
|
||||
0xf0, 0x9d, 0x94, 0x81,
|
||||
0xf0, 0x9d, 0x93, 0xbd
|
||||
]);
|
||||
const decoder = new TextDecoder();
|
||||
assertEqual(decoder.decode(fixture), "𝓽𝓮𝔁𝓽");
|
||||
});
|
||||
|
||||
test(function textEncoder() {
|
||||
const fixture = "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>";
|
||||
const encoder = new TextEncoder();
|
||||
// prettier-ignore
|
||||
assertEqual(Array.from(encoder.encode(fixture)), [
|
||||
0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
|
||||
0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
|
||||
0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd
|
||||
]);
|
||||
});
|
||||
|
||||
test(function textEncoder2() {
|
||||
const fixture = "𝓽𝓮𝔁𝓽";
|
||||
const encoder = new TextEncoder();
|
||||
// prettier-ignore
|
||||
assertEqual(Array.from(encoder.encode(fixture)), [
|
||||
0xf0, 0x9d, 0x93, 0xbd,
|
||||
0xf0, 0x9d, 0x93, 0xae,
|
||||
0xf0, 0x9d, 0x94, 0x81,
|
||||
0xf0, 0x9d, 0x93, 0xbd
|
||||
]);
|
||||
});
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
"rollup-plugin-typescript2": "^0.16.1",
|
||||
"rollup-pluginutils": "^2.3.0",
|
||||
"source-map-support": "^0.5.6",
|
||||
"text-encoding": "0.6.4",
|
||||
"ts-node": "^7.0.1",
|
||||
"ts-simple-ast": "17.1.0",
|
||||
"tslint": "^5.10.0",
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit d8123728834250395e859b10618ad2ca35f7a555
|
||||
Subproject commit e058979631fd3ecc55f8995a02eaa6ff8f35c321
|
|
@ -13,7 +13,6 @@ import {
|
|||
addInterfaceProperty,
|
||||
addSourceComment,
|
||||
addVariableDeclaration,
|
||||
appendSourceFile,
|
||||
checkDiagnostics,
|
||||
flattenNamespace,
|
||||
getSourceComment,
|
||||
|
@ -370,18 +369,13 @@ export function main({
|
|||
moduleResolution: ModuleResolutionKind.NodeJs,
|
||||
noLib: true,
|
||||
strict: true,
|
||||
target: ScriptTarget.ESNext,
|
||||
types: ["text-encoding"]
|
||||
target: ScriptTarget.ESNext
|
||||
},
|
||||
useVirtualFileSystem: true
|
||||
});
|
||||
|
||||
// There are files we need to load into memory, so that the project "compiles"
|
||||
loadDtsFiles(outputProject);
|
||||
// tslint:disable-next-line:max-line-length
|
||||
const textEncodingFilePath = `${buildPath}/node_modules/@types/text-encoding/index.d.ts`;
|
||||
loadFiles(outputProject, [textEncodingFilePath]);
|
||||
outputProject.addExistingSourceFileIfExists(textEncodingFilePath);
|
||||
|
||||
// libDts is the final output file we are looking to build and we are not
|
||||
// actually creating it, only in memory at this stage.
|
||||
|
@ -433,16 +427,6 @@ export function main({
|
|||
console.log(`Merged "globals" into global scope.`);
|
||||
}
|
||||
|
||||
// Since we flatten the namespaces, we don't attempt to import `text-encoding`
|
||||
// so we then need to concatenate that onto the `libDts` so it can stand on
|
||||
// its own.
|
||||
const textEncodingSourceFile = outputProject.getSourceFileOrThrow(
|
||||
textEncodingFilePath
|
||||
);
|
||||
appendSourceFile(textEncodingSourceFile, libDTs);
|
||||
// Removing it from the project so we know the libDTs can stand on its own.
|
||||
outputProject.removeSourceFile(textEncodingSourceFile);
|
||||
|
||||
// Add the preamble
|
||||
libDTs.insertStatements(0, libPreamble);
|
||||
|
||||
|
|
Loading…
Reference in a new issue