mirror of
https://github.com/denoland/deno.git
synced 2024-12-31 11:34:15 -05:00
perf: speed up TextEncoder.prototype.encodeInto() (#11219)
The current implementation of op_encoding_encode_into UTF-8 encodes each individual code point in the input string into the output buffer. But after the ops binding, the input is a Rust String, so the UTF-8 bytes can simply be copied to the output. This should improve this API's performance.
This commit is contained in:
parent
4bc8fe71db
commit
7b0375fae7
1 changed files with 27 additions and 16 deletions
|
@ -298,23 +298,34 @@ fn op_encoding_encode_into(
|
|||
input: String,
|
||||
mut buffer: ZeroCopyBuf,
|
||||
) -> Result<EncodeIntoResult, AnyError> {
|
||||
let dst: &mut [u8] = &mut buffer;
|
||||
let mut read = 0;
|
||||
let mut written = 0;
|
||||
for char in input.chars() {
|
||||
let len = char.len_utf8();
|
||||
if dst.len() < written + len {
|
||||
break;
|
||||
// Since `input` is already UTF-8, we can simply find the last UTF-8 code
|
||||
// point boundary from input that fits in `buffer`, and copy the bytes up to
|
||||
// that point.
|
||||
let boundary = if buffer.len() >= input.len() {
|
||||
input.len()
|
||||
} else {
|
||||
let mut boundary = buffer.len();
|
||||
|
||||
// The maximum length of a UTF-8 code point is 4 bytes.
|
||||
for _ in 0..4 {
|
||||
if input.is_char_boundary(boundary) {
|
||||
break;
|
||||
}
|
||||
debug_assert!(boundary > 0);
|
||||
boundary -= 1;
|
||||
}
|
||||
char.encode_utf8(&mut dst[written..]);
|
||||
written += len;
|
||||
if char > '\u{FFFF}' {
|
||||
read += 2
|
||||
} else {
|
||||
read += 1
|
||||
};
|
||||
}
|
||||
Ok(EncodeIntoResult { read, written })
|
||||
|
||||
debug_assert!(input.is_char_boundary(boundary));
|
||||
boundary
|
||||
};
|
||||
|
||||
buffer[..boundary].copy_from_slice(input[..boundary].as_bytes());
|
||||
|
||||
Ok(EncodeIntoResult {
|
||||
// The `read` output parameter is measured in UTF-16 code units.
|
||||
read: input[..boundary].encode_utf16().count(),
|
||||
written: boundary,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_declaration() -> PathBuf {
|
||||
|
|
Loading…
Reference in a new issue