diff --git a/cli/bench/url_parse.js b/cli/bench/url_parse.js new file mode 100644 index 0000000000..d19793f63b --- /dev/null +++ b/cli/bench/url_parse.js @@ -0,0 +1,22 @@ +// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license. +const queueMicrotask = globalThis.queueMicrotask || process.nextTick; +let [total, count] = typeof Deno !== "undefined" + ? Deno.args + : [process.argv[2], process.argv[3]]; + +total = total ? parseInt(total, 0) : 50; +count = count ? parseInt(count, 10) : 10000000; + +function bench(fun) { + const start = Date.now(); + for (let i = 0; i < count; i++) fun(); + const elapsed = Date.now() - start; + const rate = Math.floor(count / (elapsed / 1000)); + console.log(`time ${elapsed} ms rate ${rate}`); + if (--total) queueMicrotask(() => bench(fun)); +} + +bench(() => { + const url = new URL("http://example.com/"); + url.pathname; +}); diff --git a/core/Cargo.toml b/core/Cargo.toml index 3e62663e38..f6628531af 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -32,7 +32,7 @@ serde = { version = "1.0.136", features = ["derive"] } serde_json = { version = "1.0.79", features = ["preserve_order"] } serde_v8 = { version = "0.61.0", path = "../serde_v8" } sourcemap = "=6.0.1" -url = { version = "2.3.1", features = ["serde"] } +url = { version = "2.3.1", features = ["serde", "expose_internals"] } v8 = { version = "0.49.0", default-features = false } [[example]] diff --git a/ext/url/00_url.js b/ext/url/00_url.js index ab87e85e0f..659e6a37b5 100644 --- a/ext/url/00_url.js +++ b/ext/url/00_url.js @@ -19,9 +19,9 @@ ArrayPrototypeSort, ArrayPrototypeSplice, ObjectKeys, + Uint32Array, SafeArrayIterator, StringPrototypeSlice, - StringPrototypeSplit, Symbol, SymbolFor, SymbolIterator, @@ -44,41 +44,37 @@ // Helper functions function opUrlReparse(href, setter, value) { - return _urlParts( - ops.op_url_reparse(href, [setter, value]), - ); - } - function opUrlParse(href, maybeBase) { - return _urlParts(ops.op_url_parse(href, maybeBase)); - } - function _urlParts(internalParts) { - // WARNING: must match UrlParts serialization rust's url_result() - const { - 0: href, - 1: hash, - 2: host, - 3: hostname, - 4: origin, - 5: password, - 6: pathname, - 7: port, - 8: protocol, - 9: search, - 10: username, - } = StringPrototypeSplit(internalParts, "\n"); - return { + const status = ops.op_url_reparse( href, - hash, - host, - hostname, - origin, - password, - pathname, - port, - protocol, - search, - username, - }; + setter, + value, + componentsBuf.buffer, + ); + return getSerialization(status, href); + } + + function opUrlParse(href, maybeBase) { + let status; + if (maybeBase === undefined) { + status = ops.op_url_parse(href, componentsBuf.buffer); + } else { + status = core.ops.op_url_parse_with_base( + href, + maybeBase, + componentsBuf.buffer, + ); + } + return getSerialization(status, href); + } + + function getSerialization(status, href) { + if (status === 0) { + return href; + } else if (status === 1) { + return core.ops.op_url_get_serialization(); + } else { + throw new TypeError("Invalid URL"); + } } class URLSearchParams { @@ -131,7 +127,7 @@ if (url === null) { return; } - url[_url] = opUrlReparse(url.href, SET_SEARCH, this.toString()); + url[_updateUrlSearch](this.toString()); } /** @@ -308,11 +304,37 @@ URLSearchParamsPrototype, ); - const _url = Symbol("url"); + const _updateUrlSearch = Symbol("updateUrlSearch"); + function trim(s) { + if (s.length === 1) return ""; + return s; + } + + // Represents a "no port" value. A port in URL cannot be greater than 2^16 − 1 + const NO_PORT = 65536; + + const componentsBuf = new Uint32Array(8); class URL { - [_url]; #queryObject = null; + #serialization; + #schemeEnd; + #usernameEnd; + #hostStart; + #hostEnd; + #port; + #pathStart; + #queryStart; + #fragmentStart; + + [_updateUrlSearch](value) { + this.#serialization = opUrlReparse( + this.#serialization, + SET_SEARCH, + value, + ); + this.#updateComponents(); + } /** * @param {string} url @@ -328,7 +350,21 @@ }); } this[webidl.brand] = webidl.brand; - this[_url] = opUrlParse(url, base); + this.#serialization = opUrlParse(url, base); + this.#updateComponents(); + } + + #updateComponents() { + [ + this.#schemeEnd, + this.#usernameEnd, + this.#hostStart, + this.#hostEnd, + this.#port, + this.#pathStart, + this.#queryStart, + this.#fragmentStart, + ] = componentsBuf; } [SymbolFor("Deno.privateCustomInspect")](inspect, inspectOptions) { @@ -363,10 +399,18 @@ } } + #hasAuthority() { + // https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/lib.rs#L824 + return this.#serialization.slice(this.#schemeEnd).startsWith("://"); + } + /** @return {string} */ get hash() { webidl.assertBranded(this, URLPrototype); - return this[_url].hash; + // https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/quirks.rs#L263 + return this.#fragmentStart + ? trim(this.#serialization.slice(this.#fragmentStart)) + : ""; } /** @param {string} value */ @@ -379,7 +423,12 @@ context: "Argument 1", }); try { - this[_url] = opUrlReparse(this[_url].href, SET_HASH, value); + this.#serialization = opUrlReparse( + this.#serialization, + SET_HASH, + value, + ); + this.#updateComponents(); } catch { /* pass */ } @@ -388,7 +437,8 @@ /** @return {string} */ get host() { webidl.assertBranded(this, URLPrototype); - return this[_url].host; + // https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/quirks.rs#L101 + return this.#serialization.slice(this.#hostStart, this.#pathStart); } /** @param {string} value */ @@ -401,7 +451,12 @@ context: "Argument 1", }); try { - this[_url] = opUrlReparse(this[_url].href, SET_HOST, value); + this.#serialization = opUrlReparse( + this.#serialization, + SET_HOST, + value, + ); + this.#updateComponents(); } catch { /* pass */ } @@ -410,7 +465,8 @@ /** @return {string} */ get hostname() { webidl.assertBranded(this, URLPrototype); - return this[_url].hostname; + // https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/lib.rs#L988 + return this.#serialization.slice(this.#hostStart, this.#hostEnd); } /** @param {string} value */ @@ -423,7 +479,12 @@ context: "Argument 1", }); try { - this[_url] = opUrlReparse(this[_url].href, SET_HOSTNAME, value); + this.#serialization = opUrlReparse( + this.#serialization, + SET_HOSTNAME, + value, + ); + this.#updateComponents(); } catch { /* pass */ } @@ -432,7 +493,7 @@ /** @return {string} */ get href() { webidl.assertBranded(this, URLPrototype); - return this[_url].href; + return this.#serialization; } /** @param {string} value */ @@ -444,20 +505,50 @@ prefix, context: "Argument 1", }); - this[_url] = opUrlParse(value); + this.#serialization = opUrlParse(value); + this.#updateComponents(); this.#updateSearchParams(); } /** @return {string} */ get origin() { webidl.assertBranded(this, URLPrototype); - return this[_url].origin; + // https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/origin.rs#L14 + const scheme = this.#serialization.slice(0, this.#schemeEnd); + if ( + scheme === "http" || scheme === "https" || scheme === "ftp" || + scheme === "ws" || scheme === "wss" + ) { + return `${scheme}://${this.host}`; + } + + if (scheme === "blob") { + // TODO(@littledivy): Fast path. + try { + return new URL(this.pathname).origin; + } catch { + return "null"; + } + } + + return "null"; } /** @return {string} */ get password() { webidl.assertBranded(this, URLPrototype); - return this[_url].password; + // https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/lib.rs#L914 + if ( + this.#hasAuthority() && + this.#usernameEnd !== this.#serialization.length && + this.#serialization[this.#usernameEnd] === ":" + ) { + return this.#serialization.slice( + this.#usernameEnd + 1, + this.#hostStart - 1, + ); + } + return ""; } /** @param {string} value */ @@ -470,7 +561,12 @@ context: "Argument 1", }); try { - this[_url] = opUrlReparse(this[_url].href, SET_PASSWORD, value); + this.#serialization = opUrlReparse( + this.#serialization, + SET_PASSWORD, + value, + ); + this.#updateComponents(); } catch { /* pass */ } @@ -479,7 +575,13 @@ /** @return {string} */ get pathname() { webidl.assertBranded(this, URLPrototype); - return this[_url].pathname; + // https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/lib.rs#L1203 + if (!this.#queryStart && !this.#fragmentStart) { + return this.#serialization.slice(this.#pathStart); + } + + const nextComponentStart = this.#queryStart || this.#fragmentStart; + return this.#serialization.slice(this.#pathStart, nextComponentStart); } /** @param {string} value */ @@ -492,7 +594,12 @@ context: "Argument 1", }); try { - this[_url] = opUrlReparse(this[_url].href, SET_PATHNAME, value); + this.#serialization = opUrlReparse( + this.#serialization, + SET_PATHNAME, + value, + ); + this.#updateComponents(); } catch { /* pass */ } @@ -501,7 +608,15 @@ /** @return {string} */ get port() { webidl.assertBranded(this, URLPrototype); - return this[_url].port; + // https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/quirks.rs#L196 + if (this.#port === NO_PORT) { + return this.#serialization.slice(this.#hostEnd, this.#pathStart); + } else { + return this.#serialization.slice( + this.#hostEnd + 1, /* : */ + this.#pathStart, + ); + } } /** @param {string} value */ @@ -514,7 +629,12 @@ context: "Argument 1", }); try { - this[_url] = opUrlReparse(this[_url].href, SET_PORT, value); + this.#serialization = opUrlReparse( + this.#serialization, + SET_PORT, + value, + ); + this.#updateComponents(); } catch { /* pass */ } @@ -523,7 +643,8 @@ /** @return {string} */ get protocol() { webidl.assertBranded(this, URLPrototype); - return this[_url].protocol; + // https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/quirks.rs#L56 + return this.#serialization.slice(0, this.#schemeEnd + 1 /* : */); } /** @param {string} value */ @@ -536,7 +657,12 @@ context: "Argument 1", }); try { - this[_url] = opUrlReparse(this[_url].href, SET_PROTOCOL, value); + this.#serialization = opUrlReparse( + this.#serialization, + SET_PROTOCOL, + value, + ); + this.#updateComponents(); } catch { /* pass */ } @@ -545,7 +671,11 @@ /** @return {string} */ get search() { webidl.assertBranded(this, URLPrototype); - return this[_url].search; + // https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/quirks.rs#L249 + const afterPath = this.#queryStart || this.#fragmentStart || + this.#serialization.length; + const afterQuery = this.#fragmentStart || this.#serialization.length; + return trim(this.#serialization.slice(afterPath, afterQuery)); } /** @param {string} value */ @@ -558,7 +688,12 @@ context: "Argument 1", }); try { - this[_url] = opUrlReparse(this[_url].href, SET_SEARCH, value); + this.#serialization = opUrlReparse( + this.#serialization, + SET_SEARCH, + value, + ); + this.#updateComponents(); this.#updateSearchParams(); } catch { /* pass */ @@ -568,7 +703,19 @@ /** @return {string} */ get username() { webidl.assertBranded(this, URLPrototype); - return this[_url].username; + // https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/lib.rs#L881 + const schemeSeperatorLen = 3; /* :// */ + if ( + this.#hasAuthority() && + this.#usernameEnd > this.#schemeEnd + schemeSeperatorLen + ) { + return this.#serialization.slice( + this.#schemeEnd + schemeSeperatorLen, + this.#usernameEnd, + ); + } else { + return ""; + } } /** @param {string} value */ @@ -581,7 +728,12 @@ context: "Argument 1", }); try { - this[_url] = opUrlReparse(this[_url].href, SET_USERNAME, value); + this.#serialization = opUrlReparse( + this.#serialization, + SET_USERNAME, + value, + ); + this.#updateComponents(); } catch { /* pass */ } @@ -599,13 +751,13 @@ /** @return {string} */ toString() { webidl.assertBranded(this, URLPrototype); - return this[_url].href; + return this.#serialization; } /** @return {string} */ toJSON() { webidl.assertBranded(this, URLPrototype); - return this[_url].href; + return this.#serialization; } } diff --git a/ext/url/lib.rs b/ext/url/lib.rs index be229cec55..c6e91e1552 100644 --- a/ext/url/lib.rs +++ b/ext/url/lib.rs @@ -3,7 +3,6 @@ mod urlpattern; use deno_core::error::type_error; -use deno_core::error::uri_error; use deno_core::error::AnyError; use deno_core::include_js_files; use deno_core::op; @@ -11,6 +10,7 @@ use deno_core::url::form_urlencoded; use deno_core::url::quirks; use deno_core::url::Url; use deno_core::Extension; +use deno_core::OpState; use deno_core::ZeroCopyBuf; use std::path::PathBuf; @@ -25,8 +25,10 @@ pub fn init() -> Extension { "01_urlpattern.js", )) .ops(vec![ - op_url_parse::decl(), op_url_reparse::decl(), + op_url_parse::decl(), + op_url_get_serialization::decl(), + op_url_parse_with_base::decl(), op_url_parse_search_params::decl(), op_url_stringify_search_params::decl(), op_urlpattern_parse::decl(), @@ -35,41 +37,95 @@ pub fn init() -> Extension { .build() } -// UrlParts is a \n joined string of the following parts: -// #[derive(Serialize)] -// pub struct UrlParts { -// href: String, -// hash: String, -// host: String, -// hostname: String, -// origin: String, -// password: String, -// pathname: String, -// port: String, -// protocol: String, -// search: String, -// username: String, -// } -// TODO: implement cleaner & faster serialization -type UrlParts = String; - -/// Parse `UrlParseArgs::href` with an optional `UrlParseArgs::base_href`, or an -/// optional part to "set" after parsing. Return `UrlParts`. +/// Parse `href` with a `base_href`. Fills the out `buf` with URL components. #[op] -pub fn op_url_parse( +pub fn op_url_parse_with_base( + state: &mut OpState, href: String, - base_href: Option, -) -> Result { - let base_url = base_href - .as_ref() - .map(|b| Url::parse(b).map_err(|_| type_error("Invalid base URL"))) - .transpose()?; - let url = Url::options() - .base_url(base_url.as_ref()) - .parse(&href) - .map_err(|_| type_error("Invalid URL"))?; + base_href: String, + buf: &mut [u8], +) -> u32 { + let base_url = match Url::parse(&base_href) { + Ok(url) => url, + Err(_) => return ParseStatus::Err as u32, + }; + parse_url(state, href, Some(&base_url), buf) +} - Ok(url_parts(url)) +#[repr(u32)] +pub enum ParseStatus { + Ok = 0, + OkSerialization = 1, + Err, +} + +struct UrlSerialization(String); + +#[op] +pub fn op_url_get_serialization(state: &mut OpState) -> String { + state.take::().0 +} + +/// Parse `href` without a `base_url`. Fills the out `buf` with URL components. +#[op] +pub fn op_url_parse(state: &mut OpState, href: String, buf: &mut [u8]) -> u32 { + parse_url(state, href, None, buf) +} + +/// `op_url_parse` and `op_url_parse_with_base` share the same implementation. +/// +/// This function is used to parse the URL and fill the `buf` with internal +/// offset values of the URL components. +/// +/// If the serialized URL is the same as the input URL, then `UrlSerialization` is +/// not set and returns `ParseStatus::Ok`. +/// +/// If the serialized URL is different from the input URL, then `UrlSerialization` is +/// set and returns `ParseStatus::OkSerialization`. JS side should check status and +/// use `op_url_get_serialization` to get the serialized URL. +/// +/// If the URL is invalid, then `UrlSerialization` is not set and returns `ParseStatus::Err`. +/// +/// ```js +/// const buf = new Uint32Array(8); +/// const status = op_url_parse("http://example.com", buf.buffer); +/// let serializedUrl = ""; +/// if (status === ParseStatus.Ok) { +/// serializedUrl = "http://example.com"; +/// } else if (status === ParseStatus.OkSerialization) { +/// serializedUrl = op_url_get_serialization(); +/// } +/// ``` +#[inline] +fn parse_url( + state: &mut OpState, + href: String, + base_href: Option<&Url>, + buf: &mut [u8], +) -> u32 { + match Url::options().base_url(base_href).parse(&href) { + Ok(url) => { + let inner_url = quirks::internal_components(&url); + + let buf: &mut [u32] = as_u32_slice(buf); + buf[0] = inner_url.scheme_end; + buf[1] = inner_url.username_end; + buf[2] = inner_url.host_start; + buf[3] = inner_url.host_end; + buf[4] = inner_url.port.unwrap_or(0) as u32; + buf[5] = inner_url.path_start; + buf[6] = inner_url.query_start.unwrap_or(0); + buf[7] = inner_url.fragment_start.unwrap_or(0); + let serialization: String = url.into(); + if serialization != href { + state.put(UrlSerialization(serialization)); + ParseStatus::OkSerialization as u32 + } else { + ParseStatus::Ok as u32 + } + } + Err(_) => ParseStatus::Err as u32, + } } #[derive(PartialEq, Debug)] @@ -86,58 +142,86 @@ pub enum UrlSetter { Username = 8, } +const NO_PORT: u32 = 65536; + +fn as_u32_slice(slice: &mut [u8]) -> &mut [u32] { + assert_eq!(slice.len() % std::mem::size_of::(), 0); + // SAFETY: size is multiple of 4 + unsafe { + std::slice::from_raw_parts_mut( + slice.as_mut_ptr() as *mut u32, + slice.len() / std::mem::size_of::(), + ) + } +} + #[op] pub fn op_url_reparse( + state: &mut OpState, href: String, - setter_opts: (u8, String), -) -> Result { - let mut url = Url::options() - .parse(&href) - .map_err(|_| type_error("Invalid URL"))?; + setter: u8, + setter_value: String, + buf: &mut [u8], +) -> u32 { + let mut url = match Url::options().parse(&href) { + Ok(url) => url, + Err(_) => return ParseStatus::Err as u32, + }; - let (setter, setter_value) = setter_opts; if setter > 8 { - return Err(type_error("Invalid URL setter")); + return ParseStatus::Err as u32; } // SAFETY: checked to be less than 9. let setter = unsafe { std::mem::transmute::(setter) }; let value = setter_value.as_ref(); - match setter { - UrlSetter::Hash => quirks::set_hash(&mut url, value), - UrlSetter::Host => quirks::set_host(&mut url, value) - .map_err(|_| uri_error("Invalid host"))?, - UrlSetter::Hostname => quirks::set_hostname(&mut url, value) - .map_err(|_| uri_error("Invalid hostname"))?, - UrlSetter::Password => quirks::set_password(&mut url, value) - .map_err(|_| uri_error("Invalid password"))?, - UrlSetter::Pathname => quirks::set_pathname(&mut url, value), - UrlSetter::Port => quirks::set_port(&mut url, value) - .map_err(|_| uri_error("Invalid port"))?, - UrlSetter::Protocol => quirks::set_protocol(&mut url, value) - .map_err(|_| uri_error("Invalid protocol"))?, - UrlSetter::Search => quirks::set_search(&mut url, value), - UrlSetter::Username => quirks::set_username(&mut url, value) - .map_err(|_| uri_error("Invalid username"))?, + let e = match setter { + UrlSetter::Hash => { + quirks::set_hash(&mut url, value); + Ok(()) + } + UrlSetter::Host => quirks::set_host(&mut url, value), + + UrlSetter::Hostname => quirks::set_hostname(&mut url, value), + + UrlSetter::Password => quirks::set_password(&mut url, value), + + UrlSetter::Pathname => { + quirks::set_pathname(&mut url, value); + Ok(()) + } + UrlSetter::Port => quirks::set_port(&mut url, value), + + UrlSetter::Protocol => quirks::set_protocol(&mut url, value), + UrlSetter::Search => { + quirks::set_search(&mut url, value); + Ok(()) + } + UrlSetter::Username => quirks::set_username(&mut url, value), + }; + + match e { + Ok(_) => { + let inner_url = quirks::internal_components(&url); + + let buf: &mut [u32] = as_u32_slice(buf); + buf[0] = inner_url.scheme_end; + buf[1] = inner_url.username_end; + buf[2] = inner_url.host_start; + buf[3] = inner_url.host_end; + buf[4] = inner_url.port.map(|p| p as u32).unwrap_or(NO_PORT); + buf[5] = inner_url.path_start; + buf[6] = inner_url.query_start.unwrap_or(0); + buf[7] = inner_url.fragment_start.unwrap_or(0); + let serialization: String = url.into(); + if serialization != href { + state.put(UrlSerialization(serialization)); + ParseStatus::OkSerialization as u32 + } else { + ParseStatus::Ok as u32 + } + } + Err(_) => ParseStatus::Err as u32, } - - Ok(url_parts(url)) -} - -fn url_parts(url: Url) -> UrlParts { - [ - quirks::href(&url), - quirks::hash(&url), - quirks::host(&url), - quirks::hostname(&url), - &quirks::origin(&url), - quirks::password(&url), - quirks::pathname(&url), - quirks::port(&url), - quirks::protocol(&url), - quirks::search(&url), - quirks::username(&url), - ] - .join("\n") } #[op]