From dc6e0c3591709d6f8887bb672af1de54dfc8a974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartek=20Iwa=C5=84czuk?= Date: Sun, 15 Mar 2020 15:31:55 +0100 Subject: [PATCH] feat: Deno.core.{encode,decode}; standalone UTF-8 encoding/decoding (#4349) This commits add two new methods to "Deno.core" namespace: "encode" and "decode". Those methods are bound in Rust to provide a) fast b) generally available of encoding and decoding UTF-8 strings. Both methods are now used in "cli/js/dispatch_json.ts". --- cli/js/globals.ts | 3 ++ cli/js/ops/dispatch_json.ts | 5 ++- cli/tests/core_decode_perf.js | 37 +++++++++++++++++++ cli/tests/core_encode_perf.js | 32 +++++++++++++++++ core/bindings.rs | 68 +++++++++++++++++++++++++++++++++++ core/encode_decode_test.js | 40 +++++++++++++++++++++ core/isolate.rs | 14 ++++++++ tools/benchmark.py | 2 ++ 8 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 cli/tests/core_decode_perf.js create mode 100644 cli/tests/core_encode_perf.js create mode 100644 core/encode_decode_test.js diff --git a/cli/js/globals.ts b/cli/js/globals.ts index 8d122878f8..21ce7e6198 100644 --- a/cli/js/globals.ts +++ b/cli/js/globals.ts @@ -97,6 +97,9 @@ declare global { evalContext(code: string): [any, EvalErrorInfo | null]; formatError: (e: Error) => string; + + decode(bytes: Uint8Array): string; + encode(text: string): Uint8Array; } // Only `var` variables show up in the `globalThis` type when doing a global diff --git a/cli/js/ops/dispatch_json.ts b/cli/js/ops/dispatch_json.ts index 4aa1f6b8b1..9ff0f13f59 100644 --- a/cli/js/ops/dispatch_json.ts +++ b/cli/js/ops/dispatch_json.ts @@ -1,6 +1,5 @@ // Copyright 2018-2020 the Deno authors. All rights reserved. MIT license. import * as util from "../util.ts"; -import { TextEncoder, TextDecoder } from "../web/text_encoding.ts"; import { core } from "../core.ts"; import { OPS_CACHE } from "../runtime.ts"; import { ErrorKind, getErrorClass } from "../errors.ts"; @@ -30,13 +29,13 @@ function nextPromiseId(): number { } function decode(ui8: Uint8Array): JsonResponse { - const s = new TextDecoder().decode(ui8); + const s = core.decode(ui8); return JSON.parse(s) as JsonResponse; } function encode(args: object): Uint8Array { const s = JSON.stringify(args); - return new TextEncoder().encode(s); + return core.encode(s); } function unwrapResponse(res: JsonResponse): Ok { diff --git a/cli/tests/core_decode_perf.js b/cli/tests/core_decode_perf.js new file mode 100644 index 0000000000..fc00b89960 --- /dev/null +++ b/cli/tests/core_decode_perf.js @@ -0,0 +1,37 @@ +const mixed = new TextEncoder().encode("@ฤ€เน๐Ÿ˜€"); + +function generateRandom(bytes) { + const result = new Uint8Array(bytes); + let i = 0; + while (i < bytes) { + const toAdd = Math.floor(Math.random() * Math.min(4, bytes - i)); + switch (toAdd) { + case 0: + result[i] = mixed[0]; + i++; + break; + case 1: + result[i] = mixed[1]; + result[i + 1] = mixed[2]; + i += 2; + break; + case 2: + result[i] = mixed[3]; + result[i + 1] = mixed[4]; + result[i + 2] = mixed[5]; + i += 3; + break; + case 3: + result[i] = mixed[6]; + result[i + 1] = mixed[7]; + result[i + 2] = mixed[8]; + result[i + 3] = mixed[9]; + i += 4; + break; + } + } + return result; +} + +const randomData = generateRandom(1024); +for (let i = 0; i < 10_000; i++) Deno.core.decode(randomData); diff --git a/cli/tests/core_encode_perf.js b/cli/tests/core_encode_perf.js new file mode 100644 index 0000000000..5cde81c7a1 --- /dev/null +++ b/cli/tests/core_encode_perf.js @@ -0,0 +1,32 @@ +const mixed = "@ฤ€เน๐Ÿ˜€"; + +function generateRandom(bytes) { + let result = ""; + let i = 0; + while (i < bytes) { + const toAdd = Math.floor(Math.random() * Math.min(4, bytes - i)); + switch (toAdd) { + case 0: + result += mixed[0]; + i++; + break; + case 1: + result += mixed[1]; + i++; + break; + case 2: + result += mixed[2]; + i++; + break; + case 3: + result += mixed[3]; + result += mixed[4]; + i += 2; + break; + } + } + return result; +} + +const randomData = generateRandom(1024); +for (let i = 0; i < 10_000; i++) Deno.core.encode(randomData); diff --git a/core/bindings.rs b/core/bindings.rs index cb9c15ee1d..9df9a78a0c 100644 --- a/core/bindings.rs +++ b/core/bindings.rs @@ -35,6 +35,12 @@ lazy_static! { v8::ExternalReference { function: queue_microtask.map_fn_to() }, + v8::ExternalReference { + function: encode.map_fn_to() + }, + v8::ExternalReference { + function: decode.map_fn_to() + }, ]); } @@ -156,6 +162,22 @@ pub fn initialize_context<'s>( format_error_val.into(), ); + let mut encode_tmpl = v8::FunctionTemplate::new(scope, encode); + let encode_val = encode_tmpl.get_function(scope, context).unwrap(); + core_val.set( + context, + v8::String::new(scope, "encode").unwrap().into(), + encode_val.into(), + ); + + let mut decode_tmpl = v8::FunctionTemplate::new(scope, decode); + let decode_val = decode_tmpl.get_function(scope, context).unwrap(); + core_val.set( + context, + v8::String::new(scope, "decode").unwrap().into(), + decode_val.into(), + ); + core_val.set_accessor( context, v8::String::new(scope, "shared").unwrap().into(), @@ -551,6 +573,52 @@ fn format_error( rv.set(e.into()) } +fn encode( + scope: v8::FunctionCallbackScope, + args: v8::FunctionCallbackArguments, + mut rv: v8::ReturnValue, +) { + let text = match v8::Local::::try_from(args.get(0)) { + Ok(s) => s, + Err(_) => { + let msg = v8::String::new(scope, "Invalid argument").unwrap(); + let exception = v8::Exception::type_error(scope, msg); + scope.isolate().throw_exception(exception); + return; + } + }; + let text_str = text.to_rust_string_lossy(scope); + let text_bytes = text_str.as_bytes().to_vec().into_boxed_slice(); + let buf = boxed_slice_to_uint8array(scope, text_bytes); + rv.set(buf.into()) +} + +fn decode( + scope: v8::FunctionCallbackScope, + args: v8::FunctionCallbackArguments, + mut rv: v8::ReturnValue, +) { + let buf = match v8::Local::::try_from(args.get(0)) { + Ok(view) => { + let byte_offset = view.byte_offset(); + let byte_length = view.byte_length(); + let backing_store = view.buffer().unwrap().get_backing_store(); + let buf = unsafe { &**backing_store.get() }; + &buf[byte_offset..byte_offset + byte_length] + } + Err(..) => { + let msg = v8::String::new(scope, "Invalid argument").unwrap(); + let exception = v8::Exception::type_error(scope, msg); + scope.isolate().throw_exception(exception); + return; + } + }; + + let text_str = + v8::String::new_from_utf8(scope, &buf, v8::NewStringType::Normal).unwrap(); + rv.set(text_str.into()) +} + fn queue_microtask( scope: v8::FunctionCallbackScope, args: v8::FunctionCallbackArguments, diff --git a/core/encode_decode_test.js b/core/encode_decode_test.js new file mode 100644 index 0000000000..8a366dd664 --- /dev/null +++ b/core/encode_decode_test.js @@ -0,0 +1,40 @@ +// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license. + +function assertArrayEquals(a1, a2) { + if (a1.length !== a2.length) throw Error("assert"); + + for (const index in a1) { + if (a1[index] !== a2[index]) { + throw Error("assert"); + } + } +} + +function main() { + // prettier-ignore + const fixture1 = [ + 0xf0, 0x9d, 0x93, 0xbd, + 0xf0, 0x9d, 0x93, 0xae, + 0xf0, 0x9d, 0x94, 0x81, + 0xf0, 0x9d, 0x93, 0xbd + ]; + // prettier-ignore + const fixture2 = [ + 72, 101, 108, 108, + 111, 32, 239, 191, + 189, 239, 191, 189, + 32, 87, 111, 114, + 108, 100 + ]; + + assertArrayEquals(Array.from(Deno.core.encode("๐“ฝ๐“ฎ๐”๐“ฝ")), fixture1); + assertArrayEquals( + Array.from(Deno.core.encode("Hello \udc12\ud834 World")), + fixture2 + ); + + assert(Deno.core.decode(new Uint8Array(fixture1)) === "๐“ฝ๐“ฎ๐”๐“ฝ"); + assert(Deno.core.decode(new Uint8Array(fixture2)) === "Hello ๏ฟฝ๏ฟฝ World"); +} + +main(); diff --git a/core/isolate.rs b/core/isolate.rs index cb4bdaaf47..9efe86c0eb 100644 --- a/core/isolate.rs +++ b/core/isolate.rs @@ -1112,6 +1112,20 @@ pub mod tests { }); } + #[test] + fn test_encode_decode() { + run_in_task(|mut cx| { + let (mut isolate, _dispatch_count) = setup(Mode::Async); + js_check(isolate.execute( + "encode_decode_test.js", + include_str!("encode_decode_test.js"), + )); + if let Poll::Ready(Err(_)) = isolate.poll_unpin(&mut cx) { + unreachable!(); + } + }); + } + #[test] fn will_snapshot() { let snapshot = { diff --git a/tools/benchmark.py b/tools/benchmark.py index c29ca3e8ca..3891bc2078 100755 --- a/tools/benchmark.py +++ b/tools/benchmark.py @@ -28,7 +28,9 @@ exec_time_benchmarks = [ ("workers_startup", ["cli/tests/workers_startup_bench.ts"]), ("workers_round_robin", ["cli/tests/workers_round_robin_bench.ts"]), ("text_decoder", ["cli/tests/text_decoder_perf.js"]), + ("core_decode", ["cli/tests/core_decode_perf.js"]), ("text_encoder", ["cli/tests/text_encoder_perf.js"]), + ("core_encode", ["cli/tests/core_encode_perf.js"]), ]