1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2024-11-28 16:20:57 -05:00

perf: optimize URL serialization (#15663)

This commit is contained in:
Divy Srivastava 2022-09-10 09:15:16 +05:30 committed by cjihrig
parent 347efd3fab
commit 008b9afc00
No known key found for this signature in database
GPG key ID: 7434390BDBE9B9C5
4 changed files with 397 additions and 139 deletions

22
cli/bench/url_parse.js Normal file
View file

@ -0,0 +1,22 @@
// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license.
const queueMicrotask = globalThis.queueMicrotask || process.nextTick;
let [total, count] = typeof Deno !== "undefined"
? Deno.args
: [process.argv[2], process.argv[3]];
total = total ? parseInt(total, 0) : 50;
count = count ? parseInt(count, 10) : 10000000;
function bench(fun) {
const start = Date.now();
for (let i = 0; i < count; i++) fun();
const elapsed = Date.now() - start;
const rate = Math.floor(count / (elapsed / 1000));
console.log(`time ${elapsed} ms rate ${rate}`);
if (--total) queueMicrotask(() => bench(fun));
}
bench(() => {
const url = new URL("http://example.com/");
url.pathname;
});

View file

@ -32,7 +32,7 @@ serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.79", features = ["preserve_order"] }
serde_v8 = { version = "0.61.0", path = "../serde_v8" }
sourcemap = "=6.0.1"
url = { version = "2.3.1", features = ["serde"] }
url = { version = "2.3.1", features = ["serde", "expose_internals"] }
v8 = { version = "0.49.0", default-features = false }
[[example]]

View file

@ -19,9 +19,9 @@
ArrayPrototypeSort,
ArrayPrototypeSplice,
ObjectKeys,
Uint32Array,
SafeArrayIterator,
StringPrototypeSlice,
StringPrototypeSplit,
Symbol,
SymbolFor,
SymbolIterator,
@ -44,41 +44,37 @@
// Helper functions
function opUrlReparse(href, setter, value) {
return _urlParts(
ops.op_url_reparse(href, [setter, value]),
);
}
function opUrlParse(href, maybeBase) {
return _urlParts(ops.op_url_parse(href, maybeBase));
}
function _urlParts(internalParts) {
// WARNING: must match UrlParts serialization rust's url_result()
const {
0: href,
1: hash,
2: host,
3: hostname,
4: origin,
5: password,
6: pathname,
7: port,
8: protocol,
9: search,
10: username,
} = StringPrototypeSplit(internalParts, "\n");
return {
const status = ops.op_url_reparse(
href,
hash,
host,
hostname,
origin,
password,
pathname,
port,
protocol,
search,
username,
};
setter,
value,
componentsBuf.buffer,
);
return getSerialization(status, href);
}
function opUrlParse(href, maybeBase) {
let status;
if (maybeBase === undefined) {
status = ops.op_url_parse(href, componentsBuf.buffer);
} else {
status = core.ops.op_url_parse_with_base(
href,
maybeBase,
componentsBuf.buffer,
);
}
return getSerialization(status, href);
}
function getSerialization(status, href) {
if (status === 0) {
return href;
} else if (status === 1) {
return core.ops.op_url_get_serialization();
} else {
throw new TypeError("Invalid URL");
}
}
class URLSearchParams {
@ -131,7 +127,7 @@
if (url === null) {
return;
}
url[_url] = opUrlReparse(url.href, SET_SEARCH, this.toString());
url[_updateUrlSearch](this.toString());
}
/**
@ -308,11 +304,37 @@
URLSearchParamsPrototype,
);
const _url = Symbol("url");
const _updateUrlSearch = Symbol("updateUrlSearch");
function trim(s) {
if (s.length === 1) return "";
return s;
}
// Represents a "no port" value. A port in URL cannot be greater than 2^16 1
const NO_PORT = 65536;
const componentsBuf = new Uint32Array(8);
class URL {
[_url];
#queryObject = null;
#serialization;
#schemeEnd;
#usernameEnd;
#hostStart;
#hostEnd;
#port;
#pathStart;
#queryStart;
#fragmentStart;
[_updateUrlSearch](value) {
this.#serialization = opUrlReparse(
this.#serialization,
SET_SEARCH,
value,
);
this.#updateComponents();
}
/**
* @param {string} url
@ -328,7 +350,21 @@
});
}
this[webidl.brand] = webidl.brand;
this[_url] = opUrlParse(url, base);
this.#serialization = opUrlParse(url, base);
this.#updateComponents();
}
#updateComponents() {
[
this.#schemeEnd,
this.#usernameEnd,
this.#hostStart,
this.#hostEnd,
this.#port,
this.#pathStart,
this.#queryStart,
this.#fragmentStart,
] = componentsBuf;
}
[SymbolFor("Deno.privateCustomInspect")](inspect, inspectOptions) {
@ -363,10 +399,18 @@
}
}
#hasAuthority() {
// https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/lib.rs#L824
return this.#serialization.slice(this.#schemeEnd).startsWith("://");
}
/** @return {string} */
get hash() {
webidl.assertBranded(this, URLPrototype);
return this[_url].hash;
// https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/quirks.rs#L263
return this.#fragmentStart
? trim(this.#serialization.slice(this.#fragmentStart))
: "";
}
/** @param {string} value */
@ -379,7 +423,12 @@
context: "Argument 1",
});
try {
this[_url] = opUrlReparse(this[_url].href, SET_HASH, value);
this.#serialization = opUrlReparse(
this.#serialization,
SET_HASH,
value,
);
this.#updateComponents();
} catch {
/* pass */
}
@ -388,7 +437,8 @@
/** @return {string} */
get host() {
webidl.assertBranded(this, URLPrototype);
return this[_url].host;
// https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/quirks.rs#L101
return this.#serialization.slice(this.#hostStart, this.#pathStart);
}
/** @param {string} value */
@ -401,7 +451,12 @@
context: "Argument 1",
});
try {
this[_url] = opUrlReparse(this[_url].href, SET_HOST, value);
this.#serialization = opUrlReparse(
this.#serialization,
SET_HOST,
value,
);
this.#updateComponents();
} catch {
/* pass */
}
@ -410,7 +465,8 @@
/** @return {string} */
get hostname() {
webidl.assertBranded(this, URLPrototype);
return this[_url].hostname;
// https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/lib.rs#L988
return this.#serialization.slice(this.#hostStart, this.#hostEnd);
}
/** @param {string} value */
@ -423,7 +479,12 @@
context: "Argument 1",
});
try {
this[_url] = opUrlReparse(this[_url].href, SET_HOSTNAME, value);
this.#serialization = opUrlReparse(
this.#serialization,
SET_HOSTNAME,
value,
);
this.#updateComponents();
} catch {
/* pass */
}
@ -432,7 +493,7 @@
/** @return {string} */
get href() {
webidl.assertBranded(this, URLPrototype);
return this[_url].href;
return this.#serialization;
}
/** @param {string} value */
@ -444,20 +505,50 @@
prefix,
context: "Argument 1",
});
this[_url] = opUrlParse(value);
this.#serialization = opUrlParse(value);
this.#updateComponents();
this.#updateSearchParams();
}
/** @return {string} */
get origin() {
webidl.assertBranded(this, URLPrototype);
return this[_url].origin;
// https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/origin.rs#L14
const scheme = this.#serialization.slice(0, this.#schemeEnd);
if (
scheme === "http" || scheme === "https" || scheme === "ftp" ||
scheme === "ws" || scheme === "wss"
) {
return `${scheme}://${this.host}`;
}
if (scheme === "blob") {
// TODO(@littledivy): Fast path.
try {
return new URL(this.pathname).origin;
} catch {
return "null";
}
}
return "null";
}
/** @return {string} */
get password() {
webidl.assertBranded(this, URLPrototype);
return this[_url].password;
// https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/lib.rs#L914
if (
this.#hasAuthority() &&
this.#usernameEnd !== this.#serialization.length &&
this.#serialization[this.#usernameEnd] === ":"
) {
return this.#serialization.slice(
this.#usernameEnd + 1,
this.#hostStart - 1,
);
}
return "";
}
/** @param {string} value */
@ -470,7 +561,12 @@
context: "Argument 1",
});
try {
this[_url] = opUrlReparse(this[_url].href, SET_PASSWORD, value);
this.#serialization = opUrlReparse(
this.#serialization,
SET_PASSWORD,
value,
);
this.#updateComponents();
} catch {
/* pass */
}
@ -479,7 +575,13 @@
/** @return {string} */
get pathname() {
webidl.assertBranded(this, URLPrototype);
return this[_url].pathname;
// https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/lib.rs#L1203
if (!this.#queryStart && !this.#fragmentStart) {
return this.#serialization.slice(this.#pathStart);
}
const nextComponentStart = this.#queryStart || this.#fragmentStart;
return this.#serialization.slice(this.#pathStart, nextComponentStart);
}
/** @param {string} value */
@ -492,7 +594,12 @@
context: "Argument 1",
});
try {
this[_url] = opUrlReparse(this[_url].href, SET_PATHNAME, value);
this.#serialization = opUrlReparse(
this.#serialization,
SET_PATHNAME,
value,
);
this.#updateComponents();
} catch {
/* pass */
}
@ -501,7 +608,15 @@
/** @return {string} */
get port() {
webidl.assertBranded(this, URLPrototype);
return this[_url].port;
// https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/quirks.rs#L196
if (this.#port === NO_PORT) {
return this.#serialization.slice(this.#hostEnd, this.#pathStart);
} else {
return this.#serialization.slice(
this.#hostEnd + 1, /* : */
this.#pathStart,
);
}
}
/** @param {string} value */
@ -514,7 +629,12 @@
context: "Argument 1",
});
try {
this[_url] = opUrlReparse(this[_url].href, SET_PORT, value);
this.#serialization = opUrlReparse(
this.#serialization,
SET_PORT,
value,
);
this.#updateComponents();
} catch {
/* pass */
}
@ -523,7 +643,8 @@
/** @return {string} */
get protocol() {
webidl.assertBranded(this, URLPrototype);
return this[_url].protocol;
// https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/quirks.rs#L56
return this.#serialization.slice(0, this.#schemeEnd + 1 /* : */);
}
/** @param {string} value */
@ -536,7 +657,12 @@
context: "Argument 1",
});
try {
this[_url] = opUrlReparse(this[_url].href, SET_PROTOCOL, value);
this.#serialization = opUrlReparse(
this.#serialization,
SET_PROTOCOL,
value,
);
this.#updateComponents();
} catch {
/* pass */
}
@ -545,7 +671,11 @@
/** @return {string} */
get search() {
webidl.assertBranded(this, URLPrototype);
return this[_url].search;
// https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/quirks.rs#L249
const afterPath = this.#queryStart || this.#fragmentStart ||
this.#serialization.length;
const afterQuery = this.#fragmentStart || this.#serialization.length;
return trim(this.#serialization.slice(afterPath, afterQuery));
}
/** @param {string} value */
@ -558,7 +688,12 @@
context: "Argument 1",
});
try {
this[_url] = opUrlReparse(this[_url].href, SET_SEARCH, value);
this.#serialization = opUrlReparse(
this.#serialization,
SET_SEARCH,
value,
);
this.#updateComponents();
this.#updateSearchParams();
} catch {
/* pass */
@ -568,7 +703,19 @@
/** @return {string} */
get username() {
webidl.assertBranded(this, URLPrototype);
return this[_url].username;
// https://github.com/servo/rust-url/blob/1d307ae51a28fecc630ecec03380788bfb03a643/url/src/lib.rs#L881
const schemeSeperatorLen = 3; /* :// */
if (
this.#hasAuthority() &&
this.#usernameEnd > this.#schemeEnd + schemeSeperatorLen
) {
return this.#serialization.slice(
this.#schemeEnd + schemeSeperatorLen,
this.#usernameEnd,
);
} else {
return "";
}
}
/** @param {string} value */
@ -581,7 +728,12 @@
context: "Argument 1",
});
try {
this[_url] = opUrlReparse(this[_url].href, SET_USERNAME, value);
this.#serialization = opUrlReparse(
this.#serialization,
SET_USERNAME,
value,
);
this.#updateComponents();
} catch {
/* pass */
}
@ -599,13 +751,13 @@
/** @return {string} */
toString() {
webidl.assertBranded(this, URLPrototype);
return this[_url].href;
return this.#serialization;
}
/** @return {string} */
toJSON() {
webidl.assertBranded(this, URLPrototype);
return this[_url].href;
return this.#serialization;
}
}

View file

@ -3,7 +3,6 @@
mod urlpattern;
use deno_core::error::type_error;
use deno_core::error::uri_error;
use deno_core::error::AnyError;
use deno_core::include_js_files;
use deno_core::op;
@ -11,6 +10,7 @@ use deno_core::url::form_urlencoded;
use deno_core::url::quirks;
use deno_core::url::Url;
use deno_core::Extension;
use deno_core::OpState;
use deno_core::ZeroCopyBuf;
use std::path::PathBuf;
@ -25,8 +25,10 @@ pub fn init() -> Extension {
"01_urlpattern.js",
))
.ops(vec![
op_url_parse::decl(),
op_url_reparse::decl(),
op_url_parse::decl(),
op_url_get_serialization::decl(),
op_url_parse_with_base::decl(),
op_url_parse_search_params::decl(),
op_url_stringify_search_params::decl(),
op_urlpattern_parse::decl(),
@ -35,41 +37,95 @@ pub fn init() -> Extension {
.build()
}
// UrlParts is a \n joined string of the following parts:
// #[derive(Serialize)]
// pub struct UrlParts {
// href: String,
// hash: String,
// host: String,
// hostname: String,
// origin: String,
// password: String,
// pathname: String,
// port: String,
// protocol: String,
// search: String,
// username: String,
// }
// TODO: implement cleaner & faster serialization
type UrlParts = String;
/// Parse `UrlParseArgs::href` with an optional `UrlParseArgs::base_href`, or an
/// optional part to "set" after parsing. Return `UrlParts`.
/// Parse `href` with a `base_href`. Fills the out `buf` with URL components.
#[op]
pub fn op_url_parse(
pub fn op_url_parse_with_base(
state: &mut OpState,
href: String,
base_href: Option<String>,
) -> Result<UrlParts, AnyError> {
let base_url = base_href
.as_ref()
.map(|b| Url::parse(b).map_err(|_| type_error("Invalid base URL")))
.transpose()?;
let url = Url::options()
.base_url(base_url.as_ref())
.parse(&href)
.map_err(|_| type_error("Invalid URL"))?;
base_href: String,
buf: &mut [u8],
) -> u32 {
let base_url = match Url::parse(&base_href) {
Ok(url) => url,
Err(_) => return ParseStatus::Err as u32,
};
parse_url(state, href, Some(&base_url), buf)
}
Ok(url_parts(url))
#[repr(u32)]
pub enum ParseStatus {
Ok = 0,
OkSerialization = 1,
Err,
}
struct UrlSerialization(String);
#[op]
pub fn op_url_get_serialization(state: &mut OpState) -> String {
state.take::<UrlSerialization>().0
}
/// Parse `href` without a `base_url`. Fills the out `buf` with URL components.
#[op]
pub fn op_url_parse(state: &mut OpState, href: String, buf: &mut [u8]) -> u32 {
parse_url(state, href, None, buf)
}
/// `op_url_parse` and `op_url_parse_with_base` share the same implementation.
///
/// This function is used to parse the URL and fill the `buf` with internal
/// offset values of the URL components.
///
/// If the serialized URL is the same as the input URL, then `UrlSerialization` is
/// not set and returns `ParseStatus::Ok`.
///
/// If the serialized URL is different from the input URL, then `UrlSerialization` is
/// set and returns `ParseStatus::OkSerialization`. JS side should check status and
/// use `op_url_get_serialization` to get the serialized URL.
///
/// If the URL is invalid, then `UrlSerialization` is not set and returns `ParseStatus::Err`.
///
/// ```js
/// const buf = new Uint32Array(8);
/// const status = op_url_parse("http://example.com", buf.buffer);
/// let serializedUrl = "";
/// if (status === ParseStatus.Ok) {
/// serializedUrl = "http://example.com";
/// } else if (status === ParseStatus.OkSerialization) {
/// serializedUrl = op_url_get_serialization();
/// }
/// ```
#[inline]
fn parse_url(
state: &mut OpState,
href: String,
base_href: Option<&Url>,
buf: &mut [u8],
) -> u32 {
match Url::options().base_url(base_href).parse(&href) {
Ok(url) => {
let inner_url = quirks::internal_components(&url);
let buf: &mut [u32] = as_u32_slice(buf);
buf[0] = inner_url.scheme_end;
buf[1] = inner_url.username_end;
buf[2] = inner_url.host_start;
buf[3] = inner_url.host_end;
buf[4] = inner_url.port.unwrap_or(0) as u32;
buf[5] = inner_url.path_start;
buf[6] = inner_url.query_start.unwrap_or(0);
buf[7] = inner_url.fragment_start.unwrap_or(0);
let serialization: String = url.into();
if serialization != href {
state.put(UrlSerialization(serialization));
ParseStatus::OkSerialization as u32
} else {
ParseStatus::Ok as u32
}
}
Err(_) => ParseStatus::Err as u32,
}
}
#[derive(PartialEq, Debug)]
@ -86,58 +142,86 @@ pub enum UrlSetter {
Username = 8,
}
const NO_PORT: u32 = 65536;
fn as_u32_slice(slice: &mut [u8]) -> &mut [u32] {
assert_eq!(slice.len() % std::mem::size_of::<u32>(), 0);
// SAFETY: size is multiple of 4
unsafe {
std::slice::from_raw_parts_mut(
slice.as_mut_ptr() as *mut u32,
slice.len() / std::mem::size_of::<u32>(),
)
}
}
#[op]
pub fn op_url_reparse(
state: &mut OpState,
href: String,
setter_opts: (u8, String),
) -> Result<UrlParts, AnyError> {
let mut url = Url::options()
.parse(&href)
.map_err(|_| type_error("Invalid URL"))?;
setter: u8,
setter_value: String,
buf: &mut [u8],
) -> u32 {
let mut url = match Url::options().parse(&href) {
Ok(url) => url,
Err(_) => return ParseStatus::Err as u32,
};
let (setter, setter_value) = setter_opts;
if setter > 8 {
return Err(type_error("Invalid URL setter"));
return ParseStatus::Err as u32;
}
// SAFETY: checked to be less than 9.
let setter = unsafe { std::mem::transmute::<u8, UrlSetter>(setter) };
let value = setter_value.as_ref();
match setter {
UrlSetter::Hash => quirks::set_hash(&mut url, value),
UrlSetter::Host => quirks::set_host(&mut url, value)
.map_err(|_| uri_error("Invalid host"))?,
UrlSetter::Hostname => quirks::set_hostname(&mut url, value)
.map_err(|_| uri_error("Invalid hostname"))?,
UrlSetter::Password => quirks::set_password(&mut url, value)
.map_err(|_| uri_error("Invalid password"))?,
UrlSetter::Pathname => quirks::set_pathname(&mut url, value),
UrlSetter::Port => quirks::set_port(&mut url, value)
.map_err(|_| uri_error("Invalid port"))?,
UrlSetter::Protocol => quirks::set_protocol(&mut url, value)
.map_err(|_| uri_error("Invalid protocol"))?,
UrlSetter::Search => quirks::set_search(&mut url, value),
UrlSetter::Username => quirks::set_username(&mut url, value)
.map_err(|_| uri_error("Invalid username"))?,
let e = match setter {
UrlSetter::Hash => {
quirks::set_hash(&mut url, value);
Ok(())
}
UrlSetter::Host => quirks::set_host(&mut url, value),
UrlSetter::Hostname => quirks::set_hostname(&mut url, value),
UrlSetter::Password => quirks::set_password(&mut url, value),
UrlSetter::Pathname => {
quirks::set_pathname(&mut url, value);
Ok(())
}
UrlSetter::Port => quirks::set_port(&mut url, value),
UrlSetter::Protocol => quirks::set_protocol(&mut url, value),
UrlSetter::Search => {
quirks::set_search(&mut url, value);
Ok(())
}
UrlSetter::Username => quirks::set_username(&mut url, value),
};
match e {
Ok(_) => {
let inner_url = quirks::internal_components(&url);
let buf: &mut [u32] = as_u32_slice(buf);
buf[0] = inner_url.scheme_end;
buf[1] = inner_url.username_end;
buf[2] = inner_url.host_start;
buf[3] = inner_url.host_end;
buf[4] = inner_url.port.map(|p| p as u32).unwrap_or(NO_PORT);
buf[5] = inner_url.path_start;
buf[6] = inner_url.query_start.unwrap_or(0);
buf[7] = inner_url.fragment_start.unwrap_or(0);
let serialization: String = url.into();
if serialization != href {
state.put(UrlSerialization(serialization));
ParseStatus::OkSerialization as u32
} else {
ParseStatus::Ok as u32
}
}
Err(_) => ParseStatus::Err as u32,
}
Ok(url_parts(url))
}
fn url_parts(url: Url) -> UrlParts {
[
quirks::href(&url),
quirks::hash(&url),
quirks::host(&url),
quirks::hostname(&url),
&quirks::origin(&url),
quirks::password(&url),
quirks::pathname(&url),
quirks::port(&url),
quirks::protocol(&url),
quirks::search(&url),
quirks::username(&url),
]
.join("\n")
}
#[op]