From d55acb4c0ffbdc42c2383e917b2b2b18930e226a Mon Sep 17 00:00:00 2001 From: Aaron O'Mullan Date: Sat, 5 Mar 2022 20:12:30 +0100 Subject: [PATCH] perf(ext/web): optimize atob/btoa (#13841) Follow up to #13839, optimizing `base64_roundtrip` ~20x (~125ms => ~6.5ms) --- .github/workflows/ci.yml | 6 +-- ext/web/05_base64.js | 58 ++++++++++++---------------- ext/web/lib.rs | 83 +++++++++++++++++++++++++++------------- 3 files changed, 83 insertions(+), 64 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e25f33d1d3..db75c1baa7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -263,7 +263,7 @@ jobs: ~/.cargo/registry/index ~/.cargo/registry/cache ~/.cargo/git/db - key: 4-cargo-home-${{ matrix.os }}-${{ hashFiles('Cargo.lock') }} + key: 5-cargo-home-${{ matrix.os }}-${{ hashFiles('Cargo.lock') }} # In main branch, always creates fresh cache - name: Cache build output (main) @@ -279,7 +279,7 @@ jobs: !./target/*/*.zip !./target/*/*.tar.gz key: | - 4-cargo-target-${{ matrix.os }}-${{ matrix.profile }}-${{ github.sha }} + 5-cargo-target-${{ matrix.os }}-${{ matrix.profile }}-${{ github.sha }} # Restore cache from the latest 'main' branch build. - name: Cache build output (PR) @@ -295,7 +295,7 @@ jobs: !./target/*/*.tar.gz key: never_saved restore-keys: | - 4-cargo-target-${{ matrix.os }}-${{ matrix.profile }}- + 5-cargo-target-${{ matrix.os }}-${{ matrix.profile }}- # Don't save cache after building PRs or branches other than 'main'. - name: Skip save cache (PR) diff --git a/ext/web/05_base64.js b/ext/web/05_base64.js index 1244ecfd5c..8238831f86 100644 --- a/ext/web/05_base64.js +++ b/ext/web/05_base64.js @@ -9,21 +9,10 @@ "use strict"; ((window) => { + const core = Deno.core; const webidl = window.__bootstrap.webidl; - const { - forgivingBase64Encode, - forgivingBase64Decode, - } = window.__bootstrap.infra; const { DOMException } = window.__bootstrap.domException; - const { - ArrayPrototypeMap, - StringPrototypeCharCodeAt, - ArrayPrototypeJoin, - SafeArrayIterator, - StringFromCharCode, - TypedArrayFrom, - Uint8Array, - } = window.__bootstrap.primordials; + const { TypeError } = window.__bootstrap.primordials; /** * @param {string} data @@ -36,13 +25,17 @@ prefix, context: "Argument 1", }); - - const uint8Array = forgivingBase64Decode(data); - const result = ArrayPrototypeMap( - [...new SafeArrayIterator(uint8Array)], - (byte) => StringFromCharCode(byte), - ); - return ArrayPrototypeJoin(result, ""); + try { + return core.opSync("op_base64_atob", data); + } catch (e) { + if (e instanceof TypeError) { + throw new DOMException( + "Failed to decode base64: invalid character", + "InvalidCharacterError", + ); + } + throw e; + } } /** @@ -56,20 +49,17 @@ prefix, context: "Argument 1", }); - const byteArray = ArrayPrototypeMap( - [...new SafeArrayIterator(data)], - (char) => { - const charCode = StringPrototypeCharCodeAt(char, 0); - if (charCode > 0xff) { - throw new DOMException( - "The string to be encoded contains characters outside of the Latin1 range.", - "InvalidCharacterError", - ); - } - return charCode; - }, - ); - return forgivingBase64Encode(TypedArrayFrom(Uint8Array, byteArray)); + try { + return core.opSync("op_base64_btoa", data); + } catch (e) { + if (e instanceof TypeError) { + throw new DOMException( + "The string to be encoded contains characters outside of the Latin1 range.", + "InvalidCharacterError", + ); + } + throw e; + } } window.__bootstrap.base64 = { diff --git a/ext/web/lib.rs b/ext/web/lib.rs index b10cb972d1..b8f948159e 100644 --- a/ext/web/lib.rs +++ b/ext/web/lib.rs @@ -12,6 +12,7 @@ use deno_core::include_js_files; use deno_core::op_async; use deno_core::op_sync; use deno_core::url::Url; +use deno_core::ByteString; use deno_core::Extension; use deno_core::OpState; use deno_core::Resource; @@ -85,6 +86,8 @@ pub fn init( .ops(vec![ ("op_base64_decode", op_sync(op_base64_decode)), ("op_base64_encode", op_sync(op_base64_encode)), + ("op_base64_atob", op_sync(op_base64_atob)), + ("op_base64_btoa", op_sync(op_base64_btoa)), ( "op_encoding_normalize_label", op_sync(op_encoding_normalize_label), @@ -146,21 +149,42 @@ pub fn init( } fn op_base64_decode( - _state: &mut OpState, + _: &mut OpState, input: String, _: (), ) -> Result { - let mut input: &str = &input.replace(|c| char::is_ascii_whitespace(&c), ""); + let mut input = input.into_bytes(); + input.retain(|c| !c.is_ascii_whitespace()); + Ok(b64_decode(&input)?.into()) +} + +fn op_base64_atob( + _: &mut OpState, + s: ByteString, + _: (), +) -> Result { + let mut s = s.0; + s.retain(|c| !c.is_ascii_whitespace()); + + // If padding is expected, fail if not 4-byte aligned + if s.len() % 4 != 0 && (s.ends_with(b"==") || s.ends_with(b"=")) { + return Err( + DomExceptionInvalidCharacterError::new("Failed to decode base64.").into(), + ); + } + + Ok(ByteString(b64_decode(&s)?)) +} + +fn b64_decode(input: &[u8]) -> Result, AnyError> { // "If the length of input divides by 4 leaving no remainder, then: // if input ends with one or two U+003D EQUALS SIGN (=) characters, // remove them from input." - if input.len() % 4 == 0 { - if input.ends_with("==") { - input = &input[..input.len() - 2] - } else if input.ends_with('=') { - input = &input[..input.len() - 1] - } - } + let input = match input.len() % 4 == 0 { + true if input.ends_with(b"==") => &input[..input.len() - 2], + true if input.ends_with(b"=") => &input[..input.len() - 1], + _ => input, + }; // "If the length of input divides by 4 leaving a remainder of 1, // throw an InvalidCharacterError exception and abort these steps." @@ -170,38 +194,43 @@ fn op_base64_decode( ); } - if input - .chars() - .any(|c| c != '+' && c != '/' && !c.is_alphanumeric()) - { - return Err( + let cfg = base64::Config::new(base64::CharacterSet::Standard, true) + .decode_allow_trailing_bits(true); + let out = base64::decode_config(input, cfg).map_err(|err| match err { + base64::DecodeError::InvalidByte(_, _) => { DomExceptionInvalidCharacterError::new( "Failed to decode base64: invalid character", ) - .into(), - ); - } - - let cfg = base64::Config::new(base64::CharacterSet::Standard, true) - .decode_allow_trailing_bits(true); - let out = base64::decode_config(&input, cfg).map_err(|err| { - DomExceptionInvalidCharacterError::new(&format!( + } + _ => DomExceptionInvalidCharacterError::new(&format!( "Failed to decode base64: {:?}", err - )) + )), })?; - Ok(ZeroCopyBuf::from(out)) + + Ok(out) } fn op_base64_encode( - _state: &mut OpState, + _: &mut OpState, s: ZeroCopyBuf, _: (), ) -> Result { + Ok(b64_encode(&s)) +} + +fn op_base64_btoa( + _: &mut OpState, + s: ByteString, + _: (), +) -> Result { + Ok(b64_encode(&s)) +} + +fn b64_encode(s: impl AsRef<[u8]>) -> String { let cfg = base64::Config::new(base64::CharacterSet::Standard, true) .decode_allow_trailing_bits(true); - let out = base64::encode_config(&s, cfg); - Ok(out) + base64::encode_config(s.as_ref(), cfg) } #[derive(Deserialize)]