mirror of
https://github.com/denoland/deno.git
synced 2024-11-25 15:29:32 -05:00
Implement ignoreBOM option of UTF8Decoder in text_encoding (#3040)
This commit is contained in:
parent
75eeac03f3
commit
a646c2a885
3 changed files with 60 additions and 5 deletions
2
js/lib.deno_runtime.d.ts
vendored
2
js/lib.deno_runtime.d.ts
vendored
|
@ -2372,7 +2372,7 @@ declare namespace textEncoding {
|
|||
}
|
||||
export interface TextDecoderOptions {
|
||||
fatal?: boolean;
|
||||
ignoreBOM?: false;
|
||||
ignoreBOM?: boolean;
|
||||
}
|
||||
export class TextDecoder {
|
||||
private _encoding;
|
||||
|
|
|
@ -59,11 +59,13 @@ class UTF8Decoder implements Decoder {
|
|||
private _bytesSeen = 0;
|
||||
private _bytesNeeded = 0;
|
||||
private _fatal: boolean;
|
||||
private _ignoreBOM: boolean;
|
||||
private _lowerBoundary = 0x80;
|
||||
private _upperBoundary = 0xbf;
|
||||
|
||||
constructor(options: DecoderOptions) {
|
||||
this._fatal = options.fatal || false;
|
||||
this._ignoreBOM = options.ignoreBOM || false;
|
||||
}
|
||||
|
||||
handler(stream: Stream, byte: number): number | null {
|
||||
|
@ -76,6 +78,26 @@ class UTF8Decoder implements Decoder {
|
|||
return FINISHED;
|
||||
}
|
||||
|
||||
if (this._ignoreBOM) {
|
||||
if (
|
||||
(this._bytesSeen === 0 && byte !== 0xef) ||
|
||||
(this._bytesSeen === 1 && byte !== 0xbb)
|
||||
) {
|
||||
this._ignoreBOM = false;
|
||||
}
|
||||
|
||||
if (this._bytesSeen === 2) {
|
||||
this._ignoreBOM = false;
|
||||
if (byte === 0xbf) {
|
||||
//Ignore BOM
|
||||
this._codePoint = 0;
|
||||
this._bytesNeeded = 0;
|
||||
this._bytesSeen = 0;
|
||||
return CONTINUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (this._bytesNeeded === 0) {
|
||||
if (isASCIIByte(byte)) {
|
||||
// Single byte code point
|
||||
|
@ -225,6 +247,7 @@ export function btoa(s: string): string {
|
|||
|
||||
interface DecoderOptions {
|
||||
fatal?: boolean;
|
||||
ignoreBOM?: boolean;
|
||||
}
|
||||
|
||||
interface Decoder {
|
||||
|
@ -240,6 +263,9 @@ class SingleByteDecoder implements Decoder {
|
|||
private _fatal: boolean;
|
||||
|
||||
constructor(index: number[], options: DecoderOptions) {
|
||||
if (options.ignoreBOM) {
|
||||
throw new TypeError("Ignoring the BOM is available only with utf-8.");
|
||||
}
|
||||
this._fatal = options.fatal || false;
|
||||
this._index = index;
|
||||
}
|
||||
|
@ -367,7 +393,7 @@ export interface TextDecodeOptions {
|
|||
|
||||
export interface TextDecoderOptions {
|
||||
fatal?: boolean;
|
||||
ignoreBOM?: false;
|
||||
ignoreBOM?: boolean;
|
||||
}
|
||||
|
||||
type EitherArrayBuffer = SharedArrayBuffer | ArrayBuffer;
|
||||
|
@ -387,11 +413,11 @@ export class TextDecoder {
|
|||
/** Returns `true` if error mode is "fatal", and `false` otherwise. */
|
||||
readonly fatal: boolean = false;
|
||||
/** Returns `true` if ignore BOM flag is set, and `false` otherwise. */
|
||||
readonly ignoreBOM = false;
|
||||
readonly ignoreBOM: boolean = false;
|
||||
|
||||
constructor(label = "utf-8", options: TextDecoderOptions = { fatal: false }) {
|
||||
if (options.ignoreBOM) {
|
||||
throw new TypeError("Ignoring the BOM not supported.");
|
||||
this.ignoreBOM = true;
|
||||
}
|
||||
if (options.fatal) {
|
||||
this.fatal = true;
|
||||
|
@ -435,7 +461,10 @@ export class TextDecoder {
|
|||
bytes = new Uint8Array(0);
|
||||
}
|
||||
|
||||
const decoder = decoders.get(this._encoding)!({ fatal: this.fatal });
|
||||
const decoder = decoders.get(this._encoding)!({
|
||||
fatal: this.fatal,
|
||||
ignoreBOM: this.ignoreBOM
|
||||
});
|
||||
const inputStream = new Stream(bytes);
|
||||
const output: number[] = [];
|
||||
|
||||
|
|
|
@ -74,6 +74,32 @@ test(function textDecoder2(): void {
|
|||
assertEquals(decoder.decode(fixture), "𝓽𝓮𝔁𝓽");
|
||||
});
|
||||
|
||||
test(function textDecoderIgnoreBOM(): void {
|
||||
// prettier-ignore
|
||||
const fixture = new Uint8Array([
|
||||
0xef, 0xbb, 0xbf,
|
||||
0xf0, 0x9d, 0x93, 0xbd,
|
||||
0xf0, 0x9d, 0x93, 0xae,
|
||||
0xf0, 0x9d, 0x94, 0x81,
|
||||
0xf0, 0x9d, 0x93, 0xbd
|
||||
]);
|
||||
const decoder = new TextDecoder("utf-8", { ignoreBOM: true });
|
||||
assertEquals(decoder.decode(fixture), "𝓽𝓮𝔁𝓽");
|
||||
});
|
||||
|
||||
test(function textDecoderNotBOM(): void {
|
||||
// prettier-ignore
|
||||
const fixture = new Uint8Array([
|
||||
0xef, 0xbb, 0x89,
|
||||
0xf0, 0x9d, 0x93, 0xbd,
|
||||
0xf0, 0x9d, 0x93, 0xae,
|
||||
0xf0, 0x9d, 0x94, 0x81,
|
||||
0xf0, 0x9d, 0x93, 0xbd
|
||||
]);
|
||||
const decoder = new TextDecoder("utf-8", { ignoreBOM: true });
|
||||
assertEquals(decoder.decode(fixture), "ﻉ𝓽𝓮𝔁𝓽");
|
||||
});
|
||||
|
||||
test(function textDecoderASCII(): void {
|
||||
const fixture = new Uint8Array([0x89, 0x95, 0x9f, 0xbf]);
|
||||
const decoder = new TextDecoder("ascii");
|
||||
|
|
Loading…
Reference in a new issue