mirror of
https://github.com/denoland/deno.git
synced 2025-01-12 00:54:02 -05:00
perf(ext/node): Optimise Buffer string operations (#20158)
Extracted from https://github.com/denoland/deno/pull/17815 Optimise Buffer's string operations, most significantly when dealing with ASCII and UTF-16. Base64 and HEX encodings are affected to much lesser degrees. ## Performance ### String length 15 With very small strings we're at break-even or sometimes even lose a tad bit of performance from creating a `DataView` that ends up not paying for itself. **This PR:** ``` benchmark time (avg) iter/s (min … max) p75 p99 p995 -------------------------------------------------------------------------------------------------------------- ----------------------------- Buffer.from ascii string 1.15 µs/iter 871,388.6 (728.78 ns … 1.56 µs) 1.23 µs 1.56 µs 1.56 µs Buffer.from base64 string 1.63 µs/iter 612,790.9 (1.31 µs … 1.96 µs) 1.77 µs 1.96 µs 1.96 µs Buffer.from utf16 string 1.41 µs/iter 707,396.3 (915.24 ns … 1.93 µs) 1.61 µs 1.93 µs 1.93 µs Buffer.from hex string 1.87 µs/iter 535,357.9 (1.56 µs … 2.19 µs) 2 µs 2.19 µs 2.19 µs Buffer.toString ascii string 154.58 ns/iter 6,469,162.8 (149.69 ns … 198 ns) 154.51 ns 182.89 ns 191.91 ns Buffer.toString base64 string 161.65 ns/iter 6,186,189.6 (150.91 ns … 181.15 ns) 165.18 ns 171.87 ns 174.94 ns Buffer.toString utf16 string 292.74 ns/iter 3,415,959.8 (285.43 ns … 312.47 ns) 295.25 ns 310.47 ns 312.47 ns Buffer.toString hex string 89.61 ns/iter 11,159,315.6 (81.09 ns … 123.77 ns) 91.09 ns 113.62 ns 119.28 ns ``` **Main:** ``` benchmark time (avg) iter/s (min … max) p75 p99 p995 -------------------------------------------------------------------------------------------------------------- ----------------------------- Buffer.from ascii string 1.26 µs/iter 794,875.8 (1.07 µs … 1.46 µs) 1.31 µs 1.46 µs 1.46 µs Buffer.from base64 string 1.65 µs/iter 607,853.3 (1.38 µs … 2.01 µs) 1.69 µs 2.01 µs 2.01 µs Buffer.from utf16 string 1.34 µs/iter 744,894.6 (1.09 µs … 1.55 µs) 1.45 µs 1.55 µs 1.55 µs Buffer.from hex string 2.01 µs/iter 496,345.8 (1.54 µs … 2.6 µs) 2.26 µs 2.6 µs 2.6 µs Buffer.toString ascii string 150.16 ns/iter 6,659,630.5 (144.99 ns … 166.68 ns) 152.4 ns 157.26 ns 159.14 ns Buffer.toString base64 string 164.73 ns/iter 6,070,692.0 (158.77 ns … 185.63 ns) 168.48 ns 175.74 ns 176.68 ns Buffer.toString utf16 string 150.61 ns/iter 6,639,864.0 (148.2 ns … 168.29 ns) 150.93 ns 157.21 ns 168.15 ns Buffer.toString hex string 94.21 ns/iter 10,614,972.9 (86.21 ns … 98.75 ns) 95.43 ns 97.99 ns 98.21 ns ``` ### String length 1500 With moderate lengths we already see great upsides for `Buffer.from()` with ASCII and UTF-16. **This PR:** ``` benchmark time (avg) iter/s (min … max) p75 p99 p995 -------------------------------------------------------------------------------------------------------------- ----------------------------- Buffer.from ascii string 5.79 µs/iter 172,562.6 (4.72 µs … 4.71 ms) 5.04 µs 10.3 µs 11.67 µs Buffer.from base64 string 5.08 µs/iter 196,678.9 (4.97 µs … 5.76 µs) 5.08 µs 5.76 µs 5.76 µs Buffer.from utf16 string 9.68 µs/iter 103,316.5 (7.14 µs … 3.44 ms) 10.32 µs 13.42 µs 15.21 µs Buffer.from hex string 53.7 µs/iter 18,620.2 (49.37 µs … 2.2 ms) 54.74 µs 72.2 µs 81.07 µs Buffer.toString ascii string 6.63 µs/iter 150,761.3 (5.59 µs … 1.11 ms) 6.08 µs 15.68 µs 24.77 µs Buffer.toString base64 string 460.57 ns/iter 2,171,224.4 (448.33 ns … 511.73 ns) 465.05 ns 495.54 ns 511.73 ns Buffer.toString utf16 string 6.52 µs/iter 153,287.0 (6.47 µs … 6.66 µs) 6.53 µs 6.66 µs 6.66 µs Buffer.toString hex string 3.68 µs/iter 271,965.4 (3.64 µs … 3.82 µs) 3.68 µs 3.82 µs 3.82 µs ``` **Main:** ``` benchmark time (avg) iter/s (min … max) p75 p99 p995 -------------------------------------------------------------------------------------------------------------- ----------------------------- Buffer.from ascii string 11.46 µs/iter 87,298.1 (8.53 µs … 834.1 µs) 9.61 µs 83.31 µs 87.3 µs Buffer.from base64 string 5.4 µs/iter 185,027.8 (5.07 µs … 7.49 µs) 5.44 µs 7.49 µs 7.49 µs Buffer.from utf16 string 20.3 µs/iter 49,270.8 (13.55 µs … 649.11 µs) 18.8 µs 113.93 µs 125.17 µs Buffer.from hex string 52.03 µs/iter 19,218.9 (48.74 µs … 2.59 ms) 52.84 µs 67.05 µs 73.56 µs Buffer.toString ascii string 6.46 µs/iter 154,822.5 (6.32 µs … 6.69 µs) 6.52 µs 6.69 µs 6.69 µs Buffer.toString base64 string 440.19 ns/iter 2,271,764.6 (427 ns … 490.77 ns) 444.74 ns 484.64 ns 490.77 ns Buffer.toString utf16 string 6.89 µs/iter 145,106.7 (6.81 µs … 7.24 µs) 6.91 µs 7.24 µs 7.24 µs Buffer.toString hex string 3.66 µs/iter 273,456.5 (3.6 µs … 4.02 µs) 3.64 µs 4.02 µs 4.02 µs ``` ### String length 2^20 With massive lengths we the difference in ASCII and UTF-16 parsing performance is enormous. **This PR:** ``` benchmark time (avg) iter/s (min … max) p75 p99 p995 -------------------------------------------------------------------------------------------------------------------- ----------------------------- Buffer.from ascii string 4.1 ms/iter 243.7 (2.64 ms … 6.74 ms) 4.43 ms 6.26 ms 6.74 ms Buffer.from base64 string 3.74 ms/iter 267.6 (2.91 ms … 4.92 ms) 3.96 ms 4.31 ms 4.92 ms Buffer.from utf16 string 7.72 ms/iter 129.5 (5.91 ms … 11.03 ms) 7.97 ms 11.03 ms 11.03 ms Buffer.from hex string 35.72 ms/iter 28.0 (34.71 ms … 38.42 ms) 35.93 ms 38.42 ms 38.42 ms Buffer.toString ascii string 78.92 ms/iter 12.7 (42.72 ms … 94.13 ms) 91.64 ms 94.13 ms 94.13 ms Buffer.toString base64 string 833.62 µs/iter 1,199.6 (638.05 µs … 5.97 ms) 826.86 µs 2.45 ms 2.48 ms Buffer.toString utf16 string 79.35 ms/iter 12.6 (69.72 ms … 88.9 ms) 86.66 ms 88.9 ms 88.9 ms Buffer.toString hex string 31.04 ms/iter 32.2 (4.3 ms … 46.9 ms) 37.21 ms 46.9 ms 46.9 ms ``` **Main:** ``` benchmark time (avg) iter/s (min … max) p75 p99 p995 -------------------------------------------------------------------------------------------------------------------- ----------------------------- Buffer.from ascii string 18.66 ms/iter 53.6 (15.61 ms … 23.26 ms) 20.62 ms 23.26 ms 23.26 ms Buffer.from base64 string 4.7 ms/iter 212.9 (2.94 ms … 9.07 ms) 4.65 ms 9.06 ms 9.07 ms Buffer.from utf16 string 33.49 ms/iter 29.9 (31.24 ms … 35.67 ms) 34.08 ms 35.67 ms 35.67 ms Buffer.from hex string 39.38 ms/iter 25.4 (38.66 ms … 42.36 ms) 39.58 ms 42.36 ms 42.36 ms Buffer.toString ascii string 77.68 ms/iter 12.9 (67.46 ms … 95.68 ms) 84.71 ms 95.68 ms 95.68 ms Buffer.toString base64 string 825.53 µs/iter 1,211.3 (655.38 µs … 6.69 ms) 816.62 µs 3.07 ms 3.13 ms Buffer.toString utf16 string 76.54 ms/iter 13.1 (66.9 ms … 85.26 ms) 83.63 ms 85.26 ms 85.26 ms Buffer.toString hex string 38.56 ms/iter 25.9 (33.83 ms … 46.56 ms) 45.33 ms 46.56 ms 46.56 ms ```
This commit is contained in:
parent
29784df24e
commit
9d6584c16f
3 changed files with 76 additions and 45 deletions
|
@ -693,7 +693,7 @@ Buffer.prototype.base64urlWrite = function base64urlWrite(
|
|||
|
||||
Buffer.prototype.hexWrite = function hexWrite(string, offset, length) {
|
||||
return blitBuffer(
|
||||
hexToBytes(string, this.length - offset),
|
||||
hexToBytes(string),
|
||||
this,
|
||||
offset,
|
||||
length,
|
||||
|
@ -751,6 +751,9 @@ Buffer.prototype.utf8Write = function utf8Write(string, offset, length) {
|
|||
};
|
||||
|
||||
Buffer.prototype.write = function write(string, offset, length, encoding) {
|
||||
if (typeof string !== "string") {
|
||||
throw new codes.ERR_INVALID_ARG_TYPE("argument", "string");
|
||||
}
|
||||
// Buffer#write(string);
|
||||
if (offset === undefined) {
|
||||
return this.utf8Write(string, 0, this.length);
|
||||
|
@ -1756,16 +1759,26 @@ function utf8ToBytes(string, units) {
|
|||
return bytes;
|
||||
}
|
||||
|
||||
function blitBuffer(src, dst, offset, byteLength) {
|
||||
let i;
|
||||
const length = byteLength === undefined ? src.length : byteLength;
|
||||
for (i = 0; i < length; ++i) {
|
||||
if (i + offset >= dst.length || i >= src.length) {
|
||||
break;
|
||||
}
|
||||
dst[i + offset] = src[i];
|
||||
function blitBuffer(src, dst, offset, byteLength = Infinity) {
|
||||
// Establish the number of bytes to be written
|
||||
const bytesToWrite = Math.min(
|
||||
// If byte length is defined in the call, then it sets an upper bound,
|
||||
// otherwise it is Infinity and is never chosen.
|
||||
byteLength,
|
||||
// The length of the source sets an upper bound being the source of data.
|
||||
src.length,
|
||||
// The length of the destination minus any offset into it sets an upper bound.
|
||||
dst.length - offset,
|
||||
);
|
||||
if (bytesToWrite < src.length) {
|
||||
// Resize the source buffer to the number of bytes we're about to write.
|
||||
// This both makes sure that we're actually only writing what we're told to
|
||||
// write but also prevents `Uint8Array#set` from throwing an error if the
|
||||
// source is longer than the target.
|
||||
src = src.subarray(0, length);
|
||||
}
|
||||
return i;
|
||||
dst.set(src, offset);
|
||||
return bytesToWrite;
|
||||
}
|
||||
|
||||
function isInstance(obj, type) {
|
||||
|
|
|
@ -9,11 +9,12 @@ import {
|
|||
} from "ext:deno_web/00_infra.js";
|
||||
|
||||
export function asciiToBytes(str: string) {
|
||||
const byteArray = [];
|
||||
for (let i = 0; i < str.length; ++i) {
|
||||
byteArray.push(str.charCodeAt(i) & 255);
|
||||
const length = str.length;
|
||||
const byteArray = new Uint8Array(length);
|
||||
for (let i = 0; i < length; ++i) {
|
||||
byteArray[i] = str.charCodeAt(i) & 255;
|
||||
}
|
||||
return new Uint8Array(byteArray);
|
||||
return byteArray;
|
||||
}
|
||||
|
||||
export function base64ToBytes(str: string) {
|
||||
|
@ -25,16 +26,26 @@ export function base64ToBytes(str: string) {
|
|||
const INVALID_BASE64_RE = /[^+/0-9A-Za-z-_]/g;
|
||||
function base64clean(str: string) {
|
||||
// Node takes equal signs as end of the Base64 encoding
|
||||
str = str.split("=")[0];
|
||||
const eqIndex = str.indexOf("=");
|
||||
str = eqIndex !== -1 ? str.substring(0, eqIndex).trimStart() : str.trim();
|
||||
// Node strips out invalid characters like \n and \t from the string, std/base64 does not
|
||||
str = str.trim().replace(INVALID_BASE64_RE, "");
|
||||
str = str.replace(INVALID_BASE64_RE, "");
|
||||
// Node converts strings with length < 2 to ''
|
||||
if (str.length < 2) return "";
|
||||
const length = str.length;
|
||||
if (length < 2) return "";
|
||||
// Node allows for non-padded base64 strings (missing trailing ===), std/base64 does not
|
||||
while (str.length % 4 !== 0) {
|
||||
str = str + "=";
|
||||
switch (length % 4) {
|
||||
case 0:
|
||||
return str;
|
||||
case 1:
|
||||
return `${str}===`;
|
||||
case 2:
|
||||
return `${str}==`;
|
||||
case 3:
|
||||
return `${str}=`;
|
||||
default:
|
||||
throw new Error("Unexpected NaN value for string length");
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
export function base64UrlToBytes(str: string) {
|
||||
|
@ -44,9 +55,10 @@ export function base64UrlToBytes(str: string) {
|
|||
}
|
||||
|
||||
export function hexToBytes(str: string) {
|
||||
const byteArray = new Uint8Array(Math.floor((str || "").length / 2));
|
||||
let i;
|
||||
for (i = 0; i < byteArray.length; i++) {
|
||||
const length = str.length >>> 1;
|
||||
const byteArray = new Uint8Array(length);
|
||||
let i: number;
|
||||
for (i = 0; i < length; i++) {
|
||||
const a = Number.parseInt(str[i * 2], 16);
|
||||
const b = Number.parseInt(str[i * 2 + 1], 16);
|
||||
if (Number.isNaN(a) && Number.isNaN(b)) {
|
||||
|
@ -54,39 +66,43 @@ export function hexToBytes(str: string) {
|
|||
}
|
||||
byteArray[i] = (a << 4) | b;
|
||||
}
|
||||
return new Uint8Array(
|
||||
i === byteArray.length ? byteArray : byteArray.slice(0, i),
|
||||
);
|
||||
// Returning a buffer subarray is okay: This API's return value
|
||||
// is never exposed to users and is only ever used for its length
|
||||
// and the data within the subarray.
|
||||
return i === length ? byteArray : byteArray.subarray(0, i);
|
||||
}
|
||||
|
||||
export function utf16leToBytes(str: string, units: number) {
|
||||
let c, hi, lo;
|
||||
const byteArray = [];
|
||||
for (let i = 0; i < str.length; ++i) {
|
||||
if ((units -= 2) < 0) {
|
||||
break;
|
||||
}
|
||||
c = str.charCodeAt(i);
|
||||
hi = c >> 8;
|
||||
lo = c % 256;
|
||||
byteArray.push(lo);
|
||||
byteArray.push(hi);
|
||||
export function utf16leToBytes(str: string, units?: number) {
|
||||
// If units is defined, round it to even values for 16 byte "steps"
|
||||
// and use it as an upper bound value for our string byte array's length.
|
||||
const length = Math.min(str.length * 2, units ? (units >>> 1) * 2 : Infinity);
|
||||
const byteArray = new Uint8Array(length);
|
||||
const view = new DataView(byteArray.buffer);
|
||||
let i: number;
|
||||
for (i = 0; i * 2 < length; i++) {
|
||||
view.setUint16(i * 2, str.charCodeAt(i), true);
|
||||
}
|
||||
return new Uint8Array(byteArray);
|
||||
// Returning a buffer subarray is okay: This API's return value
|
||||
// is never exposed to users and is only ever used for its length
|
||||
// and the data within the subarray.
|
||||
return i * 2 === length ? byteArray : byteArray.subarray(0, i * 2);
|
||||
}
|
||||
|
||||
export function bytesToAscii(bytes: Uint8Array) {
|
||||
let ret = "";
|
||||
for (let i = 0; i < bytes.length; ++i) {
|
||||
ret += String.fromCharCode(bytes[i] & 127);
|
||||
let res = "";
|
||||
const length = bytes.byteLength;
|
||||
for (let i = 0; i < length; ++i) {
|
||||
res = `${res}${String.fromCharCode(bytes[i] & 127)}`;
|
||||
}
|
||||
return ret;
|
||||
return res;
|
||||
}
|
||||
|
||||
export function bytesToUtf16le(bytes: Uint8Array) {
|
||||
let res = "";
|
||||
for (let i = 0; i < bytes.length - 1; i += 2) {
|
||||
res += String.fromCharCode(bytes[i] + bytes[i + 1] * 256);
|
||||
const length = bytes.byteLength;
|
||||
const view = new DataView(bytes.buffer, bytes.byteOffset, length);
|
||||
for (let i = 0; i < length - 1; i += 2) {
|
||||
res = `${res}${String.fromCharCode(view.getUint16(i, true))}`;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
|
2
ext/web/internal.d.ts
vendored
2
ext/web/internal.d.ts
vendored
|
@ -41,6 +41,8 @@ declare module "ext:deno_web/00_infra.js" {
|
|||
};
|
||||
function forgivingBase64Encode(data: Uint8Array): string;
|
||||
function forgivingBase64Decode(data: string): Uint8Array;
|
||||
function forgivingBase64UrlEncode(data: Uint8Array | string): string;
|
||||
function forgivingBase64UrlDecode(data: string): Uint8Array;
|
||||
function serializeJSValueToJSONString(value: unknown): string;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue