mirror of
https://github.com/denoland/deno.git
synced 2024-11-22 15:06:54 -05:00
perf: speed up TextEncoder.prototype.encodeInto() (#11219)
The current implementation of op_encoding_encode_into UTF-8 encodes each individual code point in the input string into the output buffer. But after the ops binding, the input is a Rust String, so the UTF-8 bytes can simply be copied to the output. This should improve this API's performance.
This commit is contained in:
parent
4bc8fe71db
commit
7b0375fae7
1 changed files with 27 additions and 16 deletions
|
@ -298,23 +298,34 @@ fn op_encoding_encode_into(
|
||||||
input: String,
|
input: String,
|
||||||
mut buffer: ZeroCopyBuf,
|
mut buffer: ZeroCopyBuf,
|
||||||
) -> Result<EncodeIntoResult, AnyError> {
|
) -> Result<EncodeIntoResult, AnyError> {
|
||||||
let dst: &mut [u8] = &mut buffer;
|
// Since `input` is already UTF-8, we can simply find the last UTF-8 code
|
||||||
let mut read = 0;
|
// point boundary from input that fits in `buffer`, and copy the bytes up to
|
||||||
let mut written = 0;
|
// that point.
|
||||||
for char in input.chars() {
|
let boundary = if buffer.len() >= input.len() {
|
||||||
let len = char.len_utf8();
|
input.len()
|
||||||
if dst.len() < written + len {
|
} else {
|
||||||
break;
|
let mut boundary = buffer.len();
|
||||||
|
|
||||||
|
// The maximum length of a UTF-8 code point is 4 bytes.
|
||||||
|
for _ in 0..4 {
|
||||||
|
if input.is_char_boundary(boundary) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
debug_assert!(boundary > 0);
|
||||||
|
boundary -= 1;
|
||||||
}
|
}
|
||||||
char.encode_utf8(&mut dst[written..]);
|
|
||||||
written += len;
|
debug_assert!(input.is_char_boundary(boundary));
|
||||||
if char > '\u{FFFF}' {
|
boundary
|
||||||
read += 2
|
};
|
||||||
} else {
|
|
||||||
read += 1
|
buffer[..boundary].copy_from_slice(input[..boundary].as_bytes());
|
||||||
};
|
|
||||||
}
|
Ok(EncodeIntoResult {
|
||||||
Ok(EncodeIntoResult { read, written })
|
// The `read` output parameter is measured in UTF-16 code units.
|
||||||
|
read: input[..boundary].encode_utf16().count(),
|
||||||
|
written: boundary,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_declaration() -> PathBuf {
|
pub fn get_declaration() -> PathBuf {
|
||||||
|
|
Loading…
Reference in a new issue