mirror of
https://github.com/denoland/deno.git
synced 2025-01-11 08:33:43 -05:00
perf(web): optimize single pass utf8 decoding (#16593)
- [x] Avoid copying buffers. https://encoding.spec.whatwg.org/#dom-textdecoder-decode > Implementations are strongly encouraged to use an implementation strategy that avoids this copy. When doing so they will have to make sure that changes to input do not affect future calls to [decode()](https://encoding.spec.whatwg.org/#dom-textdecoder-decode). - [x] Special op to avoid string label deserialization and parsing. (Ideally we should map labels to integers in JS) - [x] Avoid webidl `Object.assign` when options is undefined.
This commit is contained in:
parent
5b9620df7a
commit
38f0b41e7d
3 changed files with 74 additions and 28 deletions
|
@ -16,14 +16,14 @@
|
||||||
const ops = core.ops;
|
const ops = core.ops;
|
||||||
const webidl = window.__bootstrap.webidl;
|
const webidl = window.__bootstrap.webidl;
|
||||||
const {
|
const {
|
||||||
ArrayBufferIsView,
|
|
||||||
ObjectPrototypeIsPrototypeOf,
|
|
||||||
PromiseReject,
|
PromiseReject,
|
||||||
PromiseResolve,
|
PromiseResolve,
|
||||||
StringPrototypeCharCodeAt,
|
StringPrototypeCharCodeAt,
|
||||||
StringPrototypeSlice,
|
StringPrototypeSlice,
|
||||||
TypedArrayPrototypeSubarray,
|
TypedArrayPrototypeSubarray,
|
||||||
Uint8Array,
|
Uint8Array,
|
||||||
|
ObjectPrototypeIsPrototypeOf,
|
||||||
|
ArrayBufferIsView,
|
||||||
Uint32Array,
|
Uint32Array,
|
||||||
} = window.__bootstrap.primordials;
|
} = window.__bootstrap.primordials;
|
||||||
|
|
||||||
|
@ -34,6 +34,8 @@
|
||||||
#fatal;
|
#fatal;
|
||||||
/** @type {boolean} */
|
/** @type {boolean} */
|
||||||
#ignoreBOM;
|
#ignoreBOM;
|
||||||
|
/** @type {boolean} */
|
||||||
|
#utf8SinglePass;
|
||||||
|
|
||||||
/** @type {number | null} */
|
/** @type {number | null} */
|
||||||
#rid = null;
|
#rid = null;
|
||||||
|
@ -56,6 +58,7 @@
|
||||||
this.#encoding = encoding;
|
this.#encoding = encoding;
|
||||||
this.#fatal = options.fatal;
|
this.#fatal = options.fatal;
|
||||||
this.#ignoreBOM = options.ignoreBOM;
|
this.#ignoreBOM = options.ignoreBOM;
|
||||||
|
this.#utf8SinglePass = encoding === "utf-8" && !options.fatal;
|
||||||
this[webidl.brand] = webidl.brand;
|
this[webidl.brand] = webidl.brand;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,7 +84,7 @@
|
||||||
* @param {BufferSource} [input]
|
* @param {BufferSource} [input]
|
||||||
* @param {TextDecodeOptions} options
|
* @param {TextDecodeOptions} options
|
||||||
*/
|
*/
|
||||||
decode(input = new Uint8Array(), options = {}) {
|
decode(input = new Uint8Array(), options = undefined) {
|
||||||
webidl.assertBranded(this, TextDecoderPrototype);
|
webidl.assertBranded(this, TextDecoderPrototype);
|
||||||
const prefix = "Failed to execute 'decode' on 'TextDecoder'";
|
const prefix = "Failed to execute 'decode' on 'TextDecoder'";
|
||||||
if (input !== undefined) {
|
if (input !== undefined) {
|
||||||
|
@ -91,13 +94,28 @@
|
||||||
allowShared: true,
|
allowShared: true,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
let stream = false;
|
||||||
|
if (options !== undefined) {
|
||||||
options = webidl.converters.TextDecodeOptions(options, {
|
options = webidl.converters.TextDecodeOptions(options, {
|
||||||
prefix,
|
prefix,
|
||||||
context: "Argument 2",
|
context: "Argument 2",
|
||||||
});
|
});
|
||||||
|
stream = options.stream;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
try {
|
// Note from spec: implementations are strongly encouraged to use an implementation strategy that avoids this copy.
|
||||||
|
// When doing so they will have to make sure that changes to input do not affect future calls to decode().
|
||||||
|
if (
|
||||||
|
ObjectPrototypeIsPrototypeOf(
|
||||||
|
SharedArrayBuffer.prototype,
|
||||||
|
input || input.buffer,
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
// We clone the data into a non-shared ArrayBuffer so we can pass it
|
||||||
|
// to Rust.
|
||||||
|
// `input` is now a Uint8Array, and calling the TypedArray constructor
|
||||||
|
// with a TypedArray argument copies the data.
|
||||||
if (ArrayBufferIsView(input)) {
|
if (ArrayBufferIsView(input)) {
|
||||||
input = new Uint8Array(
|
input = new Uint8Array(
|
||||||
input.buffer,
|
input.buffer,
|
||||||
|
@ -107,24 +125,15 @@
|
||||||
} else {
|
} else {
|
||||||
input = new Uint8Array(input);
|
input = new Uint8Array(input);
|
||||||
}
|
}
|
||||||
} catch {
|
|
||||||
// If the buffer is detached, just create a new empty Uint8Array.
|
|
||||||
input = new Uint8Array();
|
|
||||||
}
|
|
||||||
if (
|
|
||||||
ObjectPrototypeIsPrototypeOf(
|
|
||||||
SharedArrayBuffer.prototype,
|
|
||||||
input.buffer,
|
|
||||||
)
|
|
||||||
) {
|
|
||||||
// We clone the data into a non-shared ArrayBuffer so we can pass it
|
|
||||||
// to Rust.
|
|
||||||
// `input` is now a Uint8Array, and calling the TypedArray constructor
|
|
||||||
// with a TypedArray argument copies the data.
|
|
||||||
input = new Uint8Array(input);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!options.stream && this.#rid === null) {
|
// Fast path for single pass encoding.
|
||||||
|
if (!stream && this.#rid === null) {
|
||||||
|
// Fast path for utf8 single pass encoding.
|
||||||
|
if (this.#utf8SinglePass) {
|
||||||
|
return ops.op_encoding_decode_utf8(input, this.#ignoreBOM);
|
||||||
|
}
|
||||||
|
|
||||||
return ops.op_encoding_decode_single(
|
return ops.op_encoding_decode_single(
|
||||||
input,
|
input,
|
||||||
this.#encoding,
|
this.#encoding,
|
||||||
|
@ -140,9 +149,9 @@
|
||||||
this.#ignoreBOM,
|
this.#ignoreBOM,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return ops.op_encoding_decode(input, this.#rid, options.stream);
|
return ops.op_encoding_decode(input, this.#rid, stream);
|
||||||
} finally {
|
} finally {
|
||||||
if (!options.stream && this.#rid !== null) {
|
if (!stream && this.#rid !== null) {
|
||||||
core.close(this.#rid);
|
core.close(this.#rid);
|
||||||
this.#rid = null;
|
this.#rid = null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -91,6 +91,7 @@ pub fn init<P: TimersPermission + 'static>(
|
||||||
op_base64_btoa::decl(),
|
op_base64_btoa::decl(),
|
||||||
op_encoding_normalize_label::decl(),
|
op_encoding_normalize_label::decl(),
|
||||||
op_encoding_decode_single::decl(),
|
op_encoding_decode_single::decl(),
|
||||||
|
op_encoding_decode_utf8::decl(),
|
||||||
op_encoding_new_decoder::decl(),
|
op_encoding_new_decoder::decl(),
|
||||||
op_encoding_decode::decl(),
|
op_encoding_decode::decl(),
|
||||||
op_encoding_encode_into::decl(),
|
op_encoding_encode_into::decl(),
|
||||||
|
@ -179,6 +180,39 @@ fn op_encoding_normalize_label(label: String) -> Result<String, AnyError> {
|
||||||
Ok(encoding.name().to_lowercase())
|
Ok(encoding.name().to_lowercase())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[op(v8)]
|
||||||
|
fn op_encoding_decode_utf8<'a>(
|
||||||
|
scope: &mut v8::HandleScope<'a>,
|
||||||
|
zero_copy: &[u8],
|
||||||
|
ignore_bom: bool,
|
||||||
|
) -> Result<serde_v8::Value<'a>, AnyError> {
|
||||||
|
let buf = &zero_copy;
|
||||||
|
|
||||||
|
let buf = if !ignore_bom
|
||||||
|
&& buf.len() >= 3
|
||||||
|
&& buf[0] == 0xef
|
||||||
|
&& buf[1] == 0xbb
|
||||||
|
&& buf[2] == 0xbf
|
||||||
|
{
|
||||||
|
&buf[3..]
|
||||||
|
} else {
|
||||||
|
buf
|
||||||
|
};
|
||||||
|
|
||||||
|
// If `String::new_from_utf8()` returns `None`, this means that the
|
||||||
|
// length of the decoded string would be longer than what V8 can
|
||||||
|
// handle. In this case we return `RangeError`.
|
||||||
|
//
|
||||||
|
// For more details see:
|
||||||
|
// - https://encoding.spec.whatwg.org/#dom-textdecoder-decode
|
||||||
|
// - https://github.com/denoland/deno/issues/6649
|
||||||
|
// - https://github.com/v8/v8/blob/d68fb4733e39525f9ff0a9222107c02c28096e2a/include/v8.h#L3277-L3278
|
||||||
|
match v8::String::new_from_utf8(scope, buf, v8::NewStringType::Normal) {
|
||||||
|
Some(text) => Ok(serde_v8::from_v8(scope, text.into())?),
|
||||||
|
None => Err(type_error("buffer exceeds maximum length")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[op]
|
#[op]
|
||||||
fn op_encoding_decode_single(
|
fn op_encoding_decode_single(
|
||||||
data: &[u8],
|
data: &[u8],
|
||||||
|
|
|
@ -449,13 +449,16 @@ fn codegen_u8_slice(core: &TokenStream2, idx: usize) -> TokenStream2 {
|
||||||
let value = args.get(#idx as i32);
|
let value = args.get(#idx as i32);
|
||||||
match #core::v8::Local::<#core::v8::ArrayBuffer>::try_from(value) {
|
match #core::v8::Local::<#core::v8::ArrayBuffer>::try_from(value) {
|
||||||
Ok(b) => {
|
Ok(b) => {
|
||||||
|
// Handles detached buffers.
|
||||||
|
let byte_length = b.byte_length();
|
||||||
let store = b.data() as *mut u8;
|
let store = b.data() as *mut u8;
|
||||||
// SAFETY: rust guarantees that lifetime of slice is no longer than the call.
|
// SAFETY: rust guarantees that lifetime of slice is no longer than the call.
|
||||||
unsafe { ::std::slice::from_raw_parts_mut(store, b.byte_length()) }
|
unsafe { ::std::slice::from_raw_parts_mut(store, byte_length) }
|
||||||
},
|
},
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
if let Ok(view) = #core::v8::Local::<#core::v8::ArrayBufferView>::try_from(value) {
|
if let Ok(view) = #core::v8::Local::<#core::v8::ArrayBufferView>::try_from(value) {
|
||||||
let (offset, len) = (view.byte_offset(), view.byte_length());
|
let len = view.byte_length();
|
||||||
|
let offset = view.byte_offset();
|
||||||
let buffer = match view.buffer(scope) {
|
let buffer = match view.buffer(scope) {
|
||||||
Some(v) => v,
|
Some(v) => v,
|
||||||
None => {
|
None => {
|
||||||
|
|
Loading…
Reference in a new issue