1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2024-11-29 16:30:56 -05:00

Implement ignoreBOM option of UTF8Decoder in text_encoding (#3040)

This commit is contained in:
Tomohito Nakayama 2019-10-02 09:08:51 +09:00 committed by Ryan Dahl
parent 75eeac03f3
commit a646c2a885
3 changed files with 60 additions and 5 deletions

View file

@ -2372,7 +2372,7 @@ declare namespace textEncoding {
} }
export interface TextDecoderOptions { export interface TextDecoderOptions {
fatal?: boolean; fatal?: boolean;
ignoreBOM?: false; ignoreBOM?: boolean;
} }
export class TextDecoder { export class TextDecoder {
private _encoding; private _encoding;

View file

@ -59,11 +59,13 @@ class UTF8Decoder implements Decoder {
private _bytesSeen = 0; private _bytesSeen = 0;
private _bytesNeeded = 0; private _bytesNeeded = 0;
private _fatal: boolean; private _fatal: boolean;
private _ignoreBOM: boolean;
private _lowerBoundary = 0x80; private _lowerBoundary = 0x80;
private _upperBoundary = 0xbf; private _upperBoundary = 0xbf;
constructor(options: DecoderOptions) { constructor(options: DecoderOptions) {
this._fatal = options.fatal || false; this._fatal = options.fatal || false;
this._ignoreBOM = options.ignoreBOM || false;
} }
handler(stream: Stream, byte: number): number | null { handler(stream: Stream, byte: number): number | null {
@ -76,6 +78,26 @@ class UTF8Decoder implements Decoder {
return FINISHED; return FINISHED;
} }
if (this._ignoreBOM) {
if (
(this._bytesSeen === 0 && byte !== 0xef) ||
(this._bytesSeen === 1 && byte !== 0xbb)
) {
this._ignoreBOM = false;
}
if (this._bytesSeen === 2) {
this._ignoreBOM = false;
if (byte === 0xbf) {
//Ignore BOM
this._codePoint = 0;
this._bytesNeeded = 0;
this._bytesSeen = 0;
return CONTINUE;
}
}
}
if (this._bytesNeeded === 0) { if (this._bytesNeeded === 0) {
if (isASCIIByte(byte)) { if (isASCIIByte(byte)) {
// Single byte code point // Single byte code point
@ -225,6 +247,7 @@ export function btoa(s: string): string {
interface DecoderOptions { interface DecoderOptions {
fatal?: boolean; fatal?: boolean;
ignoreBOM?: boolean;
} }
interface Decoder { interface Decoder {
@ -240,6 +263,9 @@ class SingleByteDecoder implements Decoder {
private _fatal: boolean; private _fatal: boolean;
constructor(index: number[], options: DecoderOptions) { constructor(index: number[], options: DecoderOptions) {
if (options.ignoreBOM) {
throw new TypeError("Ignoring the BOM is available only with utf-8.");
}
this._fatal = options.fatal || false; this._fatal = options.fatal || false;
this._index = index; this._index = index;
} }
@ -367,7 +393,7 @@ export interface TextDecodeOptions {
export interface TextDecoderOptions { export interface TextDecoderOptions {
fatal?: boolean; fatal?: boolean;
ignoreBOM?: false; ignoreBOM?: boolean;
} }
type EitherArrayBuffer = SharedArrayBuffer | ArrayBuffer; type EitherArrayBuffer = SharedArrayBuffer | ArrayBuffer;
@ -387,11 +413,11 @@ export class TextDecoder {
/** Returns `true` if error mode is "fatal", and `false` otherwise. */ /** Returns `true` if error mode is "fatal", and `false` otherwise. */
readonly fatal: boolean = false; readonly fatal: boolean = false;
/** Returns `true` if ignore BOM flag is set, and `false` otherwise. */ /** Returns `true` if ignore BOM flag is set, and `false` otherwise. */
readonly ignoreBOM = false; readonly ignoreBOM: boolean = false;
constructor(label = "utf-8", options: TextDecoderOptions = { fatal: false }) { constructor(label = "utf-8", options: TextDecoderOptions = { fatal: false }) {
if (options.ignoreBOM) { if (options.ignoreBOM) {
throw new TypeError("Ignoring the BOM not supported."); this.ignoreBOM = true;
} }
if (options.fatal) { if (options.fatal) {
this.fatal = true; this.fatal = true;
@ -435,7 +461,10 @@ export class TextDecoder {
bytes = new Uint8Array(0); bytes = new Uint8Array(0);
} }
const decoder = decoders.get(this._encoding)!({ fatal: this.fatal }); const decoder = decoders.get(this._encoding)!({
fatal: this.fatal,
ignoreBOM: this.ignoreBOM
});
const inputStream = new Stream(bytes); const inputStream = new Stream(bytes);
const output: number[] = []; const output: number[] = [];

View file

@ -74,6 +74,32 @@ test(function textDecoder2(): void {
assertEquals(decoder.decode(fixture), "𝓽𝓮𝔁𝓽"); assertEquals(decoder.decode(fixture), "𝓽𝓮𝔁𝓽");
}); });
test(function textDecoderIgnoreBOM(): void {
// prettier-ignore
const fixture = new Uint8Array([
0xef, 0xbb, 0xbf,
0xf0, 0x9d, 0x93, 0xbd,
0xf0, 0x9d, 0x93, 0xae,
0xf0, 0x9d, 0x94, 0x81,
0xf0, 0x9d, 0x93, 0xbd
]);
const decoder = new TextDecoder("utf-8", { ignoreBOM: true });
assertEquals(decoder.decode(fixture), "𝓽𝓮𝔁𝓽");
});
test(function textDecoderNotBOM(): void {
// prettier-ignore
const fixture = new Uint8Array([
0xef, 0xbb, 0x89,
0xf0, 0x9d, 0x93, 0xbd,
0xf0, 0x9d, 0x93, 0xae,
0xf0, 0x9d, 0x94, 0x81,
0xf0, 0x9d, 0x93, 0xbd
]);
const decoder = new TextDecoder("utf-8", { ignoreBOM: true });
assertEquals(decoder.decode(fixture), "ﻉ𝓽𝓮𝔁𝓽");
});
test(function textDecoderASCII(): void { test(function textDecoderASCII(): void {
const fixture = new Uint8Array([0x89, 0x95, 0x9f, 0xbf]); const fixture = new Uint8Array([0x89, 0x95, 0x9f, 0xbf]);
const decoder = new TextDecoder("ascii"); const decoder = new TextDecoder("ascii");