mirror of
https://github.com/denoland/deno.git
synced 2025-01-12 00:54:02 -05:00
9d6584c16f
Extracted from https://github.com/denoland/deno/pull/17815 Optimise Buffer's string operations, most significantly when dealing with ASCII and UTF-16. Base64 and HEX encodings are affected to much lesser degrees. ## Performance ### String length 15 With very small strings we're at break-even or sometimes even lose a tad bit of performance from creating a `DataView` that ends up not paying for itself. **This PR:** ``` benchmark time (avg) iter/s (min … max) p75 p99 p995 -------------------------------------------------------------------------------------------------------------- ----------------------------- Buffer.from ascii string 1.15 µs/iter 871,388.6 (728.78 ns … 1.56 µs) 1.23 µs 1.56 µs 1.56 µs Buffer.from base64 string 1.63 µs/iter 612,790.9 (1.31 µs … 1.96 µs) 1.77 µs 1.96 µs 1.96 µs Buffer.from utf16 string 1.41 µs/iter 707,396.3 (915.24 ns … 1.93 µs) 1.61 µs 1.93 µs 1.93 µs Buffer.from hex string 1.87 µs/iter 535,357.9 (1.56 µs … 2.19 µs) 2 µs 2.19 µs 2.19 µs Buffer.toString ascii string 154.58 ns/iter 6,469,162.8 (149.69 ns … 198 ns) 154.51 ns 182.89 ns 191.91 ns Buffer.toString base64 string 161.65 ns/iter 6,186,189.6 (150.91 ns … 181.15 ns) 165.18 ns 171.87 ns 174.94 ns Buffer.toString utf16 string 292.74 ns/iter 3,415,959.8 (285.43 ns … 312.47 ns) 295.25 ns 310.47 ns 312.47 ns Buffer.toString hex string 89.61 ns/iter 11,159,315.6 (81.09 ns … 123.77 ns) 91.09 ns 113.62 ns 119.28 ns ``` **Main:** ``` benchmark time (avg) iter/s (min … max) p75 p99 p995 -------------------------------------------------------------------------------------------------------------- ----------------------------- Buffer.from ascii string 1.26 µs/iter 794,875.8 (1.07 µs … 1.46 µs) 1.31 µs 1.46 µs 1.46 µs Buffer.from base64 string 1.65 µs/iter 607,853.3 (1.38 µs … 2.01 µs) 1.69 µs 2.01 µs 2.01 µs Buffer.from utf16 string 1.34 µs/iter 744,894.6 (1.09 µs … 1.55 µs) 1.45 µs 1.55 µs 1.55 µs Buffer.from hex string 2.01 µs/iter 496,345.8 (1.54 µs … 2.6 µs) 2.26 µs 2.6 µs 2.6 µs Buffer.toString ascii string 150.16 ns/iter 6,659,630.5 (144.99 ns … 166.68 ns) 152.4 ns 157.26 ns 159.14 ns Buffer.toString base64 string 164.73 ns/iter 6,070,692.0 (158.77 ns … 185.63 ns) 168.48 ns 175.74 ns 176.68 ns Buffer.toString utf16 string 150.61 ns/iter 6,639,864.0 (148.2 ns … 168.29 ns) 150.93 ns 157.21 ns 168.15 ns Buffer.toString hex string 94.21 ns/iter 10,614,972.9 (86.21 ns … 98.75 ns) 95.43 ns 97.99 ns 98.21 ns ``` ### String length 1500 With moderate lengths we already see great upsides for `Buffer.from()` with ASCII and UTF-16. **This PR:** ``` benchmark time (avg) iter/s (min … max) p75 p99 p995 -------------------------------------------------------------------------------------------------------------- ----------------------------- Buffer.from ascii string 5.79 µs/iter 172,562.6 (4.72 µs … 4.71 ms) 5.04 µs 10.3 µs 11.67 µs Buffer.from base64 string 5.08 µs/iter 196,678.9 (4.97 µs … 5.76 µs) 5.08 µs 5.76 µs 5.76 µs Buffer.from utf16 string 9.68 µs/iter 103,316.5 (7.14 µs … 3.44 ms) 10.32 µs 13.42 µs 15.21 µs Buffer.from hex string 53.7 µs/iter 18,620.2 (49.37 µs … 2.2 ms) 54.74 µs 72.2 µs 81.07 µs Buffer.toString ascii string 6.63 µs/iter 150,761.3 (5.59 µs … 1.11 ms) 6.08 µs 15.68 µs 24.77 µs Buffer.toString base64 string 460.57 ns/iter 2,171,224.4 (448.33 ns … 511.73 ns) 465.05 ns 495.54 ns 511.73 ns Buffer.toString utf16 string 6.52 µs/iter 153,287.0 (6.47 µs … 6.66 µs) 6.53 µs 6.66 µs 6.66 µs Buffer.toString hex string 3.68 µs/iter 271,965.4 (3.64 µs … 3.82 µs) 3.68 µs 3.82 µs 3.82 µs ``` **Main:** ``` benchmark time (avg) iter/s (min … max) p75 p99 p995 -------------------------------------------------------------------------------------------------------------- ----------------------------- Buffer.from ascii string 11.46 µs/iter 87,298.1 (8.53 µs … 834.1 µs) 9.61 µs 83.31 µs 87.3 µs Buffer.from base64 string 5.4 µs/iter 185,027.8 (5.07 µs … 7.49 µs) 5.44 µs 7.49 µs 7.49 µs Buffer.from utf16 string 20.3 µs/iter 49,270.8 (13.55 µs … 649.11 µs) 18.8 µs 113.93 µs 125.17 µs Buffer.from hex string 52.03 µs/iter 19,218.9 (48.74 µs … 2.59 ms) 52.84 µs 67.05 µs 73.56 µs Buffer.toString ascii string 6.46 µs/iter 154,822.5 (6.32 µs … 6.69 µs) 6.52 µs 6.69 µs 6.69 µs Buffer.toString base64 string 440.19 ns/iter 2,271,764.6 (427 ns … 490.77 ns) 444.74 ns 484.64 ns 490.77 ns Buffer.toString utf16 string 6.89 µs/iter 145,106.7 (6.81 µs … 7.24 µs) 6.91 µs 7.24 µs 7.24 µs Buffer.toString hex string 3.66 µs/iter 273,456.5 (3.6 µs … 4.02 µs) 3.64 µs 4.02 µs 4.02 µs ``` ### String length 2^20 With massive lengths we the difference in ASCII and UTF-16 parsing performance is enormous. **This PR:** ``` benchmark time (avg) iter/s (min … max) p75 p99 p995 -------------------------------------------------------------------------------------------------------------------- ----------------------------- Buffer.from ascii string 4.1 ms/iter 243.7 (2.64 ms … 6.74 ms) 4.43 ms 6.26 ms 6.74 ms Buffer.from base64 string 3.74 ms/iter 267.6 (2.91 ms … 4.92 ms) 3.96 ms 4.31 ms 4.92 ms Buffer.from utf16 string 7.72 ms/iter 129.5 (5.91 ms … 11.03 ms) 7.97 ms 11.03 ms 11.03 ms Buffer.from hex string 35.72 ms/iter 28.0 (34.71 ms … 38.42 ms) 35.93 ms 38.42 ms 38.42 ms Buffer.toString ascii string 78.92 ms/iter 12.7 (42.72 ms … 94.13 ms) 91.64 ms 94.13 ms 94.13 ms Buffer.toString base64 string 833.62 µs/iter 1,199.6 (638.05 µs … 5.97 ms) 826.86 µs 2.45 ms 2.48 ms Buffer.toString utf16 string 79.35 ms/iter 12.6 (69.72 ms … 88.9 ms) 86.66 ms 88.9 ms 88.9 ms Buffer.toString hex string 31.04 ms/iter 32.2 (4.3 ms … 46.9 ms) 37.21 ms 46.9 ms 46.9 ms ``` **Main:** ``` benchmark time (avg) iter/s (min … max) p75 p99 p995 -------------------------------------------------------------------------------------------------------------------- ----------------------------- Buffer.from ascii string 18.66 ms/iter 53.6 (15.61 ms … 23.26 ms) 20.62 ms 23.26 ms 23.26 ms Buffer.from base64 string 4.7 ms/iter 212.9 (2.94 ms … 9.07 ms) 4.65 ms 9.06 ms 9.07 ms Buffer.from utf16 string 33.49 ms/iter 29.9 (31.24 ms … 35.67 ms) 34.08 ms 35.67 ms 35.67 ms Buffer.from hex string 39.38 ms/iter 25.4 (38.66 ms … 42.36 ms) 39.58 ms 42.36 ms 42.36 ms Buffer.toString ascii string 77.68 ms/iter 12.9 (67.46 ms … 95.68 ms) 84.71 ms 95.68 ms 95.68 ms Buffer.toString base64 string 825.53 µs/iter 1,211.3 (655.38 µs … 6.69 ms) 816.62 µs 3.07 ms 3.13 ms Buffer.toString utf16 string 76.54 ms/iter 13.1 (66.9 ms … 85.26 ms) 83.63 ms 85.26 ms 85.26 ms Buffer.toString hex string 38.56 ms/iter 25.9 (33.83 ms … 46.56 ms) 45.33 ms 46.56 ms 46.56 ms ```
108 lines
3.5 KiB
TypeScript
108 lines
3.5 KiB
TypeScript
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
|
|
|
|
// TODO(petamoriken): enable prefer-primordials for node polyfills
|
|
// deno-lint-ignore-file prefer-primordials
|
|
|
|
import {
|
|
forgivingBase64Decode,
|
|
forgivingBase64UrlEncode,
|
|
} from "ext:deno_web/00_infra.js";
|
|
|
|
export function asciiToBytes(str: string) {
|
|
const length = str.length;
|
|
const byteArray = new Uint8Array(length);
|
|
for (let i = 0; i < length; ++i) {
|
|
byteArray[i] = str.charCodeAt(i) & 255;
|
|
}
|
|
return byteArray;
|
|
}
|
|
|
|
export function base64ToBytes(str: string) {
|
|
str = base64clean(str);
|
|
str = str.replaceAll("-", "+").replaceAll("_", "/");
|
|
return forgivingBase64Decode(str);
|
|
}
|
|
|
|
const INVALID_BASE64_RE = /[^+/0-9A-Za-z-_]/g;
|
|
function base64clean(str: string) {
|
|
// Node takes equal signs as end of the Base64 encoding
|
|
const eqIndex = str.indexOf("=");
|
|
str = eqIndex !== -1 ? str.substring(0, eqIndex).trimStart() : str.trim();
|
|
// Node strips out invalid characters like \n and \t from the string, std/base64 does not
|
|
str = str.replace(INVALID_BASE64_RE, "");
|
|
// Node converts strings with length < 2 to ''
|
|
const length = str.length;
|
|
if (length < 2) return "";
|
|
// Node allows for non-padded base64 strings (missing trailing ===), std/base64 does not
|
|
switch (length % 4) {
|
|
case 0:
|
|
return str;
|
|
case 1:
|
|
return `${str}===`;
|
|
case 2:
|
|
return `${str}==`;
|
|
case 3:
|
|
return `${str}=`;
|
|
default:
|
|
throw new Error("Unexpected NaN value for string length");
|
|
}
|
|
}
|
|
|
|
export function base64UrlToBytes(str: string) {
|
|
str = base64clean(str);
|
|
str = str.replaceAll("+", "-").replaceAll("/", "_");
|
|
return forgivingBase64UrlEncode(str);
|
|
}
|
|
|
|
export function hexToBytes(str: string) {
|
|
const length = str.length >>> 1;
|
|
const byteArray = new Uint8Array(length);
|
|
let i: number;
|
|
for (i = 0; i < length; i++) {
|
|
const a = Number.parseInt(str[i * 2], 16);
|
|
const b = Number.parseInt(str[i * 2 + 1], 16);
|
|
if (Number.isNaN(a) && Number.isNaN(b)) {
|
|
break;
|
|
}
|
|
byteArray[i] = (a << 4) | b;
|
|
}
|
|
// Returning a buffer subarray is okay: This API's return value
|
|
// is never exposed to users and is only ever used for its length
|
|
// and the data within the subarray.
|
|
return i === length ? byteArray : byteArray.subarray(0, i);
|
|
}
|
|
|
|
export function utf16leToBytes(str: string, units?: number) {
|
|
// If units is defined, round it to even values for 16 byte "steps"
|
|
// and use it as an upper bound value for our string byte array's length.
|
|
const length = Math.min(str.length * 2, units ? (units >>> 1) * 2 : Infinity);
|
|
const byteArray = new Uint8Array(length);
|
|
const view = new DataView(byteArray.buffer);
|
|
let i: number;
|
|
for (i = 0; i * 2 < length; i++) {
|
|
view.setUint16(i * 2, str.charCodeAt(i), true);
|
|
}
|
|
// Returning a buffer subarray is okay: This API's return value
|
|
// is never exposed to users and is only ever used for its length
|
|
// and the data within the subarray.
|
|
return i * 2 === length ? byteArray : byteArray.subarray(0, i * 2);
|
|
}
|
|
|
|
export function bytesToAscii(bytes: Uint8Array) {
|
|
let res = "";
|
|
const length = bytes.byteLength;
|
|
for (let i = 0; i < length; ++i) {
|
|
res = `${res}${String.fromCharCode(bytes[i] & 127)}`;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
export function bytesToUtf16le(bytes: Uint8Array) {
|
|
let res = "";
|
|
const length = bytes.byteLength;
|
|
const view = new DataView(bytes.buffer, bytes.byteOffset, length);
|
|
for (let i = 0; i < length - 1; i += 2) {
|
|
res = `${res}${String.fromCharCode(view.getUint16(i, true))}`;
|
|
}
|
|
return res;
|
|
}
|