1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2024-11-25 15:29:32 -05:00

perf: speed up TextEncoder.prototype.encodeInto() (#11219)

The current implementation of op_encoding_encode_into UTF-8 encodes each
individual code point in the input string into the output buffer. But after the
ops binding, the input is a Rust String, so the UTF-8 bytes can simply be copied
to the output. This should improve this API's performance.
This commit is contained in:
Andreu Botella 2021-07-02 12:11:20 +02:00 committed by GitHub
parent 4bc8fe71db
commit 7b0375fae7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -298,23 +298,34 @@ fn op_encoding_encode_into(
input: String,
mut buffer: ZeroCopyBuf,
) -> Result<EncodeIntoResult, AnyError> {
let dst: &mut [u8] = &mut buffer;
let mut read = 0;
let mut written = 0;
for char in input.chars() {
let len = char.len_utf8();
if dst.len() < written + len {
// Since `input` is already UTF-8, we can simply find the last UTF-8 code
// point boundary from input that fits in `buffer`, and copy the bytes up to
// that point.
let boundary = if buffer.len() >= input.len() {
input.len()
} else {
let mut boundary = buffer.len();
// The maximum length of a UTF-8 code point is 4 bytes.
for _ in 0..4 {
if input.is_char_boundary(boundary) {
break;
}
char.encode_utf8(&mut dst[written..]);
written += len;
if char > '\u{FFFF}' {
read += 2
} else {
read += 1
};
debug_assert!(boundary > 0);
boundary -= 1;
}
Ok(EncodeIntoResult { read, written })
debug_assert!(input.is_char_boundary(boundary));
boundary
};
buffer[..boundary].copy_from_slice(input[..boundary].as_bytes());
Ok(EncodeIntoResult {
// The `read` output parameter is measured in UTF-16 code units.
read: input[..boundary].encode_utf16().count(),
written: boundary,
})
}
pub fn get_declaration() -> PathBuf {