2024-01-01 14:58:21 -05:00
|
|
|
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
|
2021-09-08 05:14:29 -04:00
|
|
|
|
|
|
|
// @ts-check
|
|
|
|
/// <reference path="../../core/internal.d.ts" />
|
|
|
|
/// <reference path="../../core/lib.deno_core.d.ts" />
|
|
|
|
/// <reference path="../webidl/internal.d.ts" />
|
|
|
|
/// <reference path="./internal.d.ts" />
|
|
|
|
/// <reference path="./lib.deno_url.d.ts" />
|
|
|
|
|
2024-01-26 17:46:46 -05:00
|
|
|
import { primordials } from "ext:core/mod.js";
|
|
|
|
import {
|
2024-01-10 17:37:25 -05:00
|
|
|
op_urlpattern_parse,
|
|
|
|
op_urlpattern_process_match_input,
|
2024-01-26 17:46:46 -05:00
|
|
|
} from "ext:core/ops";
|
2023-02-07 14:22:46 -05:00
|
|
|
const {
|
2023-12-08 06:02:52 -05:00
|
|
|
ArrayPrototypePush,
|
|
|
|
MathRandom,
|
|
|
|
ObjectAssign,
|
|
|
|
ObjectCreate,
|
|
|
|
ObjectPrototypeIsPrototypeOf,
|
2023-02-07 14:22:46 -05:00
|
|
|
RegExpPrototypeExec,
|
|
|
|
RegExpPrototypeTest,
|
2023-12-08 06:02:52 -05:00
|
|
|
SafeMap,
|
2023-02-28 18:14:16 -05:00
|
|
|
SafeRegExp,
|
2023-02-07 14:22:46 -05:00
|
|
|
Symbol,
|
|
|
|
SymbolFor,
|
|
|
|
TypeError,
|
|
|
|
} = primordials;
|
|
|
|
|
2024-01-10 17:37:25 -05:00
|
|
|
import * as webidl from "ext:deno_webidl/00_webidl.js";
|
|
|
|
import { createFilteredInspectProxy } from "ext:deno_console/01_console.js";
|
|
|
|
|
2023-02-07 14:22:46 -05:00
|
|
|
const _components = Symbol("components");
|
2024-10-02 12:02:46 -04:00
|
|
|
const urlPatternSettings = { groupStringFallback: false };
|
2023-02-07 14:22:46 -05:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @typedef Components
|
|
|
|
* @property {Component} protocol
|
|
|
|
* @property {Component} username
|
|
|
|
* @property {Component} password
|
|
|
|
* @property {Component} hostname
|
|
|
|
* @property {Component} port
|
|
|
|
* @property {Component} pathname
|
|
|
|
* @property {Component} search
|
|
|
|
* @property {Component} hash
|
|
|
|
*/
|
perf(ext/urlpattern): optimize URLPattern.exec (#20170)
This PR optimizes `URLPattern.exec`
- Use component keys from constructor instead of calling it on every
`.exec`. AFAIK keys should always be
`protocol`,`username`,`password`,`hostname`,`port`,`pathname`,`search`,`hash`.
Haven't looked much into it but I think it's safe to define these
outside the constructor as well.
- Add a fast path for `/^$/u` (default regexp) and empty input
- Replaced `ArrayPrototypeMap` & `ObjectFromEntries` with a `for` loop.
**this PR**
```
cpu: 13th Gen Intel(R) Core(TM) i9-13900H
runtime: deno 1.36.1 (x86_64-unknown-linux-gnu)
benchmark time (avg) iter/s (min … max) p75 p99 p995
--------------------------------------------------------------- -----------------------------
exec 1 2.17 µs/iter 461,022.8 (2.14 µs … 2.27 µs) 2.18 µs 2.27 µs 2.27 µs
exec 2 4.13 µs/iter 242,173.4 (4.08 µs … 4.27 µs) 4.15 µs 4.27 µs 4.27 µs
exec 3 2.55 µs/iter 391,508.1 (2.53 µs … 2.68 µs) 2.56 µs 2.68 µs 2.68 µs
```
**main**
```
cpu: 13th Gen Intel(R) Core(TM) i9-13900H
runtime: deno 1.36.1 (x86_64-unknown-linux-gnu)
benchmark time (avg) iter/s (min … max) p75 p99 p995
--------------------------------------------------------------- -----------------------------
exec 1 2.45 µs/iter 408,092.4 (2.41 µs … 2.55 µs) 2.46 µs 2.55 µs 2.55 µs
exec 2 4.41 µs/iter 226,706.0 (3.49 µs … 399.56 µs) 4.39 µs 5.49 µs 6.07 µs
exec 3 2.99 µs/iter 334,833.4 (2.94 µs … 3.21 µs) 2.99 µs 3.21 µs 3.21 µs
```
2023-08-16 06:58:03 -04:00
|
|
|
const COMPONENTS_KEYS = [
|
|
|
|
"protocol",
|
|
|
|
"username",
|
|
|
|
"password",
|
|
|
|
"hostname",
|
|
|
|
"port",
|
|
|
|
"pathname",
|
|
|
|
"search",
|
|
|
|
"hash",
|
|
|
|
];
|
2023-02-07 14:22:46 -05:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @typedef Component
|
|
|
|
* @property {string} patternString
|
|
|
|
* @property {RegExp} regexp
|
|
|
|
* @property {string[]} groupNameList
|
|
|
|
*/
|
|
|
|
|
2023-12-08 06:02:52 -05:00
|
|
|
/**
|
|
|
|
* This implements a least-recently-used cache that has a pseudo-"young
|
|
|
|
* generation" by using sampling. The idea is that we want to keep the most
|
|
|
|
* recently used items in the cache, but we don't want to pay the cost of
|
|
|
|
* updating the cache on every access. This relies on the fact that the data
|
|
|
|
* we're caching is not uniformly distributed, and that the most recently used
|
|
|
|
* items are more likely to be used again soon (long tail distribution).
|
|
|
|
*
|
|
|
|
* The LRU cache is implemented as a Map, with the key being the cache key and
|
|
|
|
* the value being the cache value. When an item is accessed, it is moved to the
|
|
|
|
* end of the Map. When an item is inserted, if the Map is at capacity, the
|
|
|
|
* first item in the Map is deleted. Because maps iterate using insertion order,
|
|
|
|
* this means that the oldest item is always the first.
|
|
|
|
*
|
|
|
|
* The sampling is implemented by using a random number generator to decide
|
|
|
|
* whether to update the cache on each access. This means that the cache will
|
|
|
|
* not be updated on every access, but will be updated on a random subset of
|
|
|
|
* accesses.
|
|
|
|
*
|
|
|
|
* @template K
|
|
|
|
* @template V
|
|
|
|
*/
|
|
|
|
class SampledLRUCache {
|
|
|
|
/** @type {SafeMap<K, V>} */
|
|
|
|
#map = new SafeMap();
|
|
|
|
#capacity = 0;
|
|
|
|
#sampleRate = 0.1;
|
|
|
|
|
|
|
|
/** @type {K} */
|
|
|
|
#lastUsedKey = undefined;
|
|
|
|
/** @type {V} */
|
|
|
|
#lastUsedValue = undefined;
|
|
|
|
|
|
|
|
/** @param {number} capacity */
|
|
|
|
constructor(capacity) {
|
|
|
|
this.#capacity = capacity;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param {K} key
|
|
|
|
* @param {(key: K) => V} factory
|
|
|
|
* @return {V}
|
|
|
|
*/
|
|
|
|
getOrInsert(key, factory) {
|
|
|
|
if (this.#lastUsedKey === key) return this.#lastUsedValue;
|
|
|
|
const value = this.#map.get(key);
|
|
|
|
if (value !== undefined) {
|
|
|
|
if (MathRandom() < this.#sampleRate) {
|
|
|
|
// put the item into the map
|
|
|
|
this.#map.delete(key);
|
|
|
|
this.#map.set(key, value);
|
|
|
|
}
|
|
|
|
this.#lastUsedKey = key;
|
|
|
|
this.#lastUsedValue = value;
|
|
|
|
return value;
|
|
|
|
} else {
|
|
|
|
// value doesn't exist yet, create
|
|
|
|
const value = factory(key);
|
|
|
|
if (MathRandom() < this.#sampleRate) {
|
|
|
|
// if the map is at capacity, delete the oldest (first) element
|
|
|
|
if (this.#map.size > this.#capacity) {
|
|
|
|
// deno-lint-ignore prefer-primordials
|
|
|
|
this.#map.delete(this.#map.keys().next().value);
|
|
|
|
}
|
|
|
|
// insert the new value
|
|
|
|
this.#map.set(key, value);
|
|
|
|
}
|
|
|
|
this.#lastUsedKey = key;
|
|
|
|
this.#lastUsedValue = value;
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const matchInputCache = new SampledLRUCache(4096);
|
|
|
|
|
2024-08-05 04:49:28 -04:00
|
|
|
const _hasRegExpGroups = Symbol("[[hasRegExpGroups]]");
|
|
|
|
|
2023-02-07 14:22:46 -05:00
|
|
|
class URLPattern {
|
|
|
|
/** @type {Components} */
|
|
|
|
[_components];
|
2024-08-05 04:49:28 -04:00
|
|
|
[_hasRegExpGroups];
|
2021-09-08 05:14:29 -04:00
|
|
|
|
2023-12-08 06:02:52 -05:00
|
|
|
#reusedResult;
|
|
|
|
|
2021-09-08 05:14:29 -04:00
|
|
|
/**
|
2023-02-07 14:22:46 -05:00
|
|
|
* @param {URLPatternInput} input
|
2024-08-05 04:49:28 -04:00
|
|
|
* @param {string} [baseURLOrOptions]
|
|
|
|
* @param {string} [maybeOptions]
|
2021-09-08 05:14:29 -04:00
|
|
|
*/
|
2024-08-05 04:49:28 -04:00
|
|
|
constructor(
|
|
|
|
input,
|
|
|
|
baseURLOrOptions = undefined,
|
|
|
|
maybeOptions = undefined,
|
|
|
|
) {
|
2023-02-07 14:22:46 -05:00
|
|
|
this[webidl.brand] = webidl.brand;
|
|
|
|
const prefix = "Failed to construct 'URLPattern'";
|
2024-08-05 04:49:28 -04:00
|
|
|
|
|
|
|
let baseURL;
|
|
|
|
let options;
|
|
|
|
if (webidl.type(baseURLOrOptions) === "String") {
|
|
|
|
webidl.requiredArguments(arguments.length, 1, prefix);
|
|
|
|
input = webidl.converters.URLPatternInput(input, prefix, "Argument 1");
|
|
|
|
baseURL = webidl.converters.USVString(
|
|
|
|
baseURLOrOptions,
|
|
|
|
prefix,
|
|
|
|
"Argument 2",
|
|
|
|
);
|
|
|
|
options = webidl.converters.URLPatternOptions(
|
|
|
|
maybeOptions !== undefined ? maybeOptions : { __proto: null },
|
|
|
|
prefix,
|
|
|
|
"Argument 3",
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
if (input !== undefined) {
|
|
|
|
input = webidl.converters.URLPatternInput(input, prefix, "Argument 1");
|
|
|
|
} else {
|
|
|
|
input = { __proto__: null };
|
|
|
|
}
|
|
|
|
options = webidl.converters.URLPatternOptions(
|
|
|
|
maybeOptions,
|
|
|
|
prefix,
|
|
|
|
"Argument 2",
|
|
|
|
);
|
2023-02-07 14:22:46 -05:00
|
|
|
}
|
2021-09-08 05:14:29 -04:00
|
|
|
|
2024-08-05 04:49:28 -04:00
|
|
|
const components = op_urlpattern_parse(input, baseURL, options);
|
|
|
|
this[_hasRegExpGroups] = components.hasRegexpGroups;
|
2021-09-08 05:14:29 -04:00
|
|
|
|
perf(ext/urlpattern): optimize URLPattern.exec (#20170)
This PR optimizes `URLPattern.exec`
- Use component keys from constructor instead of calling it on every
`.exec`. AFAIK keys should always be
`protocol`,`username`,`password`,`hostname`,`port`,`pathname`,`search`,`hash`.
Haven't looked much into it but I think it's safe to define these
outside the constructor as well.
- Add a fast path for `/^$/u` (default regexp) and empty input
- Replaced `ArrayPrototypeMap` & `ObjectFromEntries` with a `for` loop.
**this PR**
```
cpu: 13th Gen Intel(R) Core(TM) i9-13900H
runtime: deno 1.36.1 (x86_64-unknown-linux-gnu)
benchmark time (avg) iter/s (min … max) p75 p99 p995
--------------------------------------------------------------- -----------------------------
exec 1 2.17 µs/iter 461,022.8 (2.14 µs … 2.27 µs) 2.18 µs 2.27 µs 2.27 µs
exec 2 4.13 µs/iter 242,173.4 (4.08 µs … 4.27 µs) 4.15 µs 4.27 µs 4.27 µs
exec 3 2.55 µs/iter 391,508.1 (2.53 µs … 2.68 µs) 2.56 µs 2.68 µs 2.68 µs
```
**main**
```
cpu: 13th Gen Intel(R) Core(TM) i9-13900H
runtime: deno 1.36.1 (x86_64-unknown-linux-gnu)
benchmark time (avg) iter/s (min … max) p75 p99 p995
--------------------------------------------------------------- -----------------------------
exec 1 2.45 µs/iter 408,092.4 (2.41 µs … 2.55 µs) 2.46 µs 2.55 µs 2.55 µs
exec 2 4.41 µs/iter 226,706.0 (3.49 µs … 399.56 µs) 4.39 µs 5.49 µs 6.07 µs
exec 3 2.99 µs/iter 334,833.4 (2.94 µs … 3.21 µs) 2.99 µs 3.21 µs 3.21 µs
```
2023-08-16 06:58:03 -04:00
|
|
|
for (let i = 0; i < COMPONENTS_KEYS.length; ++i) {
|
|
|
|
const key = COMPONENTS_KEYS[i];
|
2023-02-07 14:22:46 -05:00
|
|
|
try {
|
2023-02-28 18:14:16 -05:00
|
|
|
components[key].regexp = new SafeRegExp(
|
2023-02-07 14:22:46 -05:00
|
|
|
components[key].regexpString,
|
2024-08-05 04:49:28 -04:00
|
|
|
options.ignoreCase ? "ui" : "u",
|
2023-02-07 14:22:46 -05:00
|
|
|
);
|
|
|
|
} catch (e) {
|
|
|
|
throw new TypeError(`${prefix}: ${key} is invalid; ${e.message}`);
|
|
|
|
}
|
2021-09-08 05:14:29 -04:00
|
|
|
}
|
2023-02-07 14:22:46 -05:00
|
|
|
this[_components] = components;
|
|
|
|
}
|
2021-09-08 05:14:29 -04:00
|
|
|
|
2023-02-07 14:22:46 -05:00
|
|
|
get protocol() {
|
|
|
|
webidl.assertBranded(this, URLPatternPrototype);
|
|
|
|
return this[_components].protocol.patternString;
|
|
|
|
}
|
2021-09-08 05:14:29 -04:00
|
|
|
|
2023-02-07 14:22:46 -05:00
|
|
|
get username() {
|
|
|
|
webidl.assertBranded(this, URLPatternPrototype);
|
|
|
|
return this[_components].username.patternString;
|
|
|
|
}
|
2021-09-08 05:14:29 -04:00
|
|
|
|
2023-02-07 14:22:46 -05:00
|
|
|
get password() {
|
|
|
|
webidl.assertBranded(this, URLPatternPrototype);
|
|
|
|
return this[_components].password.patternString;
|
|
|
|
}
|
2021-09-08 05:14:29 -04:00
|
|
|
|
2023-02-07 14:22:46 -05:00
|
|
|
get hostname() {
|
|
|
|
webidl.assertBranded(this, URLPatternPrototype);
|
|
|
|
return this[_components].hostname.patternString;
|
|
|
|
}
|
2021-09-08 05:14:29 -04:00
|
|
|
|
2023-02-07 14:22:46 -05:00
|
|
|
get port() {
|
|
|
|
webidl.assertBranded(this, URLPatternPrototype);
|
|
|
|
return this[_components].port.patternString;
|
|
|
|
}
|
2021-09-08 05:14:29 -04:00
|
|
|
|
2023-02-07 14:22:46 -05:00
|
|
|
get pathname() {
|
|
|
|
webidl.assertBranded(this, URLPatternPrototype);
|
|
|
|
return this[_components].pathname.patternString;
|
|
|
|
}
|
2021-09-08 05:14:29 -04:00
|
|
|
|
2023-02-07 14:22:46 -05:00
|
|
|
get search() {
|
|
|
|
webidl.assertBranded(this, URLPatternPrototype);
|
|
|
|
return this[_components].search.patternString;
|
|
|
|
}
|
|
|
|
|
|
|
|
get hash() {
|
|
|
|
webidl.assertBranded(this, URLPatternPrototype);
|
|
|
|
return this[_components].hash.patternString;
|
|
|
|
}
|
2021-09-08 05:14:29 -04:00
|
|
|
|
2024-08-05 04:49:28 -04:00
|
|
|
get hasRegExpGroups() {
|
|
|
|
webidl.assertBranded(this, URLPatternPrototype);
|
|
|
|
return this[_hasRegExpGroups];
|
|
|
|
}
|
|
|
|
|
2023-02-07 14:22:46 -05:00
|
|
|
/**
|
|
|
|
* @param {URLPatternInput} input
|
|
|
|
* @param {string} [baseURL]
|
|
|
|
* @returns {boolean}
|
|
|
|
*/
|
|
|
|
test(input, baseURL = undefined) {
|
|
|
|
webidl.assertBranded(this, URLPatternPrototype);
|
|
|
|
const prefix = "Failed to execute 'test' on 'URLPattern'";
|
2023-04-12 15:58:57 -04:00
|
|
|
webidl.requiredArguments(arguments.length, 1, prefix);
|
2023-05-01 06:47:13 -04:00
|
|
|
input = webidl.converters.URLPatternInput(input, prefix, "Argument 1");
|
2023-02-07 14:22:46 -05:00
|
|
|
if (baseURL !== undefined) {
|
2023-05-01 06:47:13 -04:00
|
|
|
baseURL = webidl.converters.USVString(baseURL, prefix, "Argument 2");
|
2023-02-07 14:22:46 -05:00
|
|
|
}
|
2021-09-08 05:14:29 -04:00
|
|
|
|
2023-12-08 06:02:52 -05:00
|
|
|
const res = baseURL === undefined
|
|
|
|
? matchInputCache.getOrInsert(
|
|
|
|
input,
|
2024-01-10 17:37:25 -05:00
|
|
|
op_urlpattern_process_match_input,
|
2023-12-08 06:02:52 -05:00
|
|
|
)
|
2024-01-10 17:37:25 -05:00
|
|
|
: op_urlpattern_process_match_input(input, baseURL);
|
2023-12-08 06:02:52 -05:00
|
|
|
if (res === null) return false;
|
2021-09-08 05:14:29 -04:00
|
|
|
|
2023-02-07 14:22:46 -05:00
|
|
|
const values = res[0];
|
2021-09-08 05:14:29 -04:00
|
|
|
|
perf(ext/urlpattern): optimize URLPattern.exec (#20170)
This PR optimizes `URLPattern.exec`
- Use component keys from constructor instead of calling it on every
`.exec`. AFAIK keys should always be
`protocol`,`username`,`password`,`hostname`,`port`,`pathname`,`search`,`hash`.
Haven't looked much into it but I think it's safe to define these
outside the constructor as well.
- Add a fast path for `/^$/u` (default regexp) and empty input
- Replaced `ArrayPrototypeMap` & `ObjectFromEntries` with a `for` loop.
**this PR**
```
cpu: 13th Gen Intel(R) Core(TM) i9-13900H
runtime: deno 1.36.1 (x86_64-unknown-linux-gnu)
benchmark time (avg) iter/s (min … max) p75 p99 p995
--------------------------------------------------------------- -----------------------------
exec 1 2.17 µs/iter 461,022.8 (2.14 µs … 2.27 µs) 2.18 µs 2.27 µs 2.27 µs
exec 2 4.13 µs/iter 242,173.4 (4.08 µs … 4.27 µs) 4.15 µs 4.27 µs 4.27 µs
exec 3 2.55 µs/iter 391,508.1 (2.53 µs … 2.68 µs) 2.56 µs 2.68 µs 2.68 µs
```
**main**
```
cpu: 13th Gen Intel(R) Core(TM) i9-13900H
runtime: deno 1.36.1 (x86_64-unknown-linux-gnu)
benchmark time (avg) iter/s (min … max) p75 p99 p995
--------------------------------------------------------------- -----------------------------
exec 1 2.45 µs/iter 408,092.4 (2.41 µs … 2.55 µs) 2.46 µs 2.55 µs 2.55 µs
exec 2 4.41 µs/iter 226,706.0 (3.49 µs … 399.56 µs) 4.39 µs 5.49 µs 6.07 µs
exec 3 2.99 µs/iter 334,833.4 (2.94 µs … 3.21 µs) 2.99 µs 3.21 µs 3.21 µs
```
2023-08-16 06:58:03 -04:00
|
|
|
for (let i = 0; i < COMPONENTS_KEYS.length; ++i) {
|
|
|
|
const key = COMPONENTS_KEYS[i];
|
2023-12-08 06:02:52 -05:00
|
|
|
const component = this[_components][key];
|
|
|
|
switch (component.regexpString) {
|
|
|
|
case "^$":
|
|
|
|
if (values[key] !== "") return false;
|
|
|
|
break;
|
|
|
|
case "^(.*)$":
|
|
|
|
break;
|
|
|
|
default: {
|
|
|
|
if (!RegExpPrototypeTest(component.regexp, values[key])) return false;
|
|
|
|
}
|
2021-09-08 05:14:29 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-02-07 14:22:46 -05:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param {URLPatternInput} input
|
|
|
|
* @param {string} [baseURL]
|
|
|
|
* @returns {URLPatternResult | null}
|
|
|
|
*/
|
|
|
|
exec(input, baseURL = undefined) {
|
|
|
|
webidl.assertBranded(this, URLPatternPrototype);
|
|
|
|
const prefix = "Failed to execute 'exec' on 'URLPattern'";
|
2023-04-12 15:58:57 -04:00
|
|
|
webidl.requiredArguments(arguments.length, 1, prefix);
|
2023-05-01 06:47:13 -04:00
|
|
|
input = webidl.converters.URLPatternInput(input, prefix, "Argument 1");
|
2023-02-07 14:22:46 -05:00
|
|
|
if (baseURL !== undefined) {
|
2023-05-01 06:47:13 -04:00
|
|
|
baseURL = webidl.converters.USVString(baseURL, prefix, "Argument 2");
|
2023-02-07 14:22:46 -05:00
|
|
|
}
|
2021-09-08 05:14:29 -04:00
|
|
|
|
2023-12-08 06:02:52 -05:00
|
|
|
const res = baseURL === undefined
|
|
|
|
? matchInputCache.getOrInsert(
|
|
|
|
input,
|
2024-01-10 17:37:25 -05:00
|
|
|
op_urlpattern_process_match_input,
|
2023-12-08 06:02:52 -05:00
|
|
|
)
|
2024-01-10 17:37:25 -05:00
|
|
|
: op_urlpattern_process_match_input(input, baseURL);
|
2023-02-07 14:22:46 -05:00
|
|
|
if (res === null) {
|
|
|
|
return null;
|
|
|
|
}
|
2021-09-08 05:14:29 -04:00
|
|
|
|
2023-12-08 06:02:52 -05:00
|
|
|
const { 0: values, 1: inputs } = res; /** @type {URLPatternResult} */
|
|
|
|
|
|
|
|
// globalThis.allocAttempt++;
|
|
|
|
this.#reusedResult ??= { inputs: [undefined] };
|
|
|
|
const result = this.#reusedResult;
|
|
|
|
// We don't construct the `inputs` until after the matching is done under
|
|
|
|
// the assumption that most patterns do not match.
|
2021-09-08 05:14:29 -04:00
|
|
|
|
2023-12-08 06:02:52 -05:00
|
|
|
const components = this[_components];
|
2023-02-07 14:22:46 -05:00
|
|
|
|
perf(ext/urlpattern): optimize URLPattern.exec (#20170)
This PR optimizes `URLPattern.exec`
- Use component keys from constructor instead of calling it on every
`.exec`. AFAIK keys should always be
`protocol`,`username`,`password`,`hostname`,`port`,`pathname`,`search`,`hash`.
Haven't looked much into it but I think it's safe to define these
outside the constructor as well.
- Add a fast path for `/^$/u` (default regexp) and empty input
- Replaced `ArrayPrototypeMap` & `ObjectFromEntries` with a `for` loop.
**this PR**
```
cpu: 13th Gen Intel(R) Core(TM) i9-13900H
runtime: deno 1.36.1 (x86_64-unknown-linux-gnu)
benchmark time (avg) iter/s (min … max) p75 p99 p995
--------------------------------------------------------------- -----------------------------
exec 1 2.17 µs/iter 461,022.8 (2.14 µs … 2.27 µs) 2.18 µs 2.27 µs 2.27 µs
exec 2 4.13 µs/iter 242,173.4 (4.08 µs … 4.27 µs) 4.15 µs 4.27 µs 4.27 µs
exec 3 2.55 µs/iter 391,508.1 (2.53 µs … 2.68 µs) 2.56 µs 2.68 µs 2.68 µs
```
**main**
```
cpu: 13th Gen Intel(R) Core(TM) i9-13900H
runtime: deno 1.36.1 (x86_64-unknown-linux-gnu)
benchmark time (avg) iter/s (min … max) p75 p99 p995
--------------------------------------------------------------- -----------------------------
exec 1 2.45 µs/iter 408,092.4 (2.41 µs … 2.55 µs) 2.46 µs 2.55 µs 2.55 µs
exec 2 4.41 µs/iter 226,706.0 (3.49 µs … 399.56 µs) 4.39 µs 5.49 µs 6.07 µs
exec 3 2.99 µs/iter 334,833.4 (2.94 µs … 3.21 µs) 2.99 µs 3.21 µs 3.21 µs
```
2023-08-16 06:58:03 -04:00
|
|
|
for (let i = 0; i < COMPONENTS_KEYS.length; ++i) {
|
|
|
|
const key = COMPONENTS_KEYS[i];
|
2023-02-07 14:22:46 -05:00
|
|
|
/** @type {Component} */
|
2023-12-08 06:02:52 -05:00
|
|
|
const component = components[key];
|
perf(ext/urlpattern): optimize URLPattern.exec (#20170)
This PR optimizes `URLPattern.exec`
- Use component keys from constructor instead of calling it on every
`.exec`. AFAIK keys should always be
`protocol`,`username`,`password`,`hostname`,`port`,`pathname`,`search`,`hash`.
Haven't looked much into it but I think it's safe to define these
outside the constructor as well.
- Add a fast path for `/^$/u` (default regexp) and empty input
- Replaced `ArrayPrototypeMap` & `ObjectFromEntries` with a `for` loop.
**this PR**
```
cpu: 13th Gen Intel(R) Core(TM) i9-13900H
runtime: deno 1.36.1 (x86_64-unknown-linux-gnu)
benchmark time (avg) iter/s (min … max) p75 p99 p995
--------------------------------------------------------------- -----------------------------
exec 1 2.17 µs/iter 461,022.8 (2.14 µs … 2.27 µs) 2.18 µs 2.27 µs 2.27 µs
exec 2 4.13 µs/iter 242,173.4 (4.08 µs … 4.27 µs) 4.15 µs 4.27 µs 4.27 µs
exec 3 2.55 µs/iter 391,508.1 (2.53 µs … 2.68 µs) 2.56 µs 2.68 µs 2.68 µs
```
**main**
```
cpu: 13th Gen Intel(R) Core(TM) i9-13900H
runtime: deno 1.36.1 (x86_64-unknown-linux-gnu)
benchmark time (avg) iter/s (min … max) p75 p99 p995
--------------------------------------------------------------- -----------------------------
exec 1 2.45 µs/iter 408,092.4 (2.41 µs … 2.55 µs) 2.46 µs 2.55 µs 2.55 µs
exec 2 4.41 µs/iter 226,706.0 (3.49 µs … 399.56 µs) 4.39 µs 5.49 µs 6.07 µs
exec 3 2.99 µs/iter 334,833.4 (2.94 µs … 3.21 µs) 2.99 µs 3.21 µs 3.21 µs
```
2023-08-16 06:58:03 -04:00
|
|
|
|
2023-12-08 06:02:52 -05:00
|
|
|
const res = result[key] ??= {
|
|
|
|
input: values[key],
|
|
|
|
groups: component.regexpString === "^(.*)$" ? { "0": values[key] } : {},
|
|
|
|
};
|
perf(ext/urlpattern): optimize URLPattern.exec (#20170)
This PR optimizes `URLPattern.exec`
- Use component keys from constructor instead of calling it on every
`.exec`. AFAIK keys should always be
`protocol`,`username`,`password`,`hostname`,`port`,`pathname`,`search`,`hash`.
Haven't looked much into it but I think it's safe to define these
outside the constructor as well.
- Add a fast path for `/^$/u` (default regexp) and empty input
- Replaced `ArrayPrototypeMap` & `ObjectFromEntries` with a `for` loop.
**this PR**
```
cpu: 13th Gen Intel(R) Core(TM) i9-13900H
runtime: deno 1.36.1 (x86_64-unknown-linux-gnu)
benchmark time (avg) iter/s (min … max) p75 p99 p995
--------------------------------------------------------------- -----------------------------
exec 1 2.17 µs/iter 461,022.8 (2.14 µs … 2.27 µs) 2.18 µs 2.27 µs 2.27 µs
exec 2 4.13 µs/iter 242,173.4 (4.08 µs … 4.27 µs) 4.15 µs 4.27 µs 4.27 µs
exec 3 2.55 µs/iter 391,508.1 (2.53 µs … 2.68 µs) 2.56 µs 2.68 µs 2.68 µs
```
**main**
```
cpu: 13th Gen Intel(R) Core(TM) i9-13900H
runtime: deno 1.36.1 (x86_64-unknown-linux-gnu)
benchmark time (avg) iter/s (min … max) p75 p99 p995
--------------------------------------------------------------- -----------------------------
exec 1 2.45 µs/iter 408,092.4 (2.41 µs … 2.55 µs) 2.46 µs 2.55 µs 2.55 µs
exec 2 4.41 µs/iter 226,706.0 (3.49 µs … 399.56 µs) 4.39 µs 5.49 µs 6.07 µs
exec 3 2.99 µs/iter 334,833.4 (2.94 µs … 3.21 µs) 2.99 µs 3.21 µs 3.21 µs
```
2023-08-16 06:58:03 -04:00
|
|
|
|
2023-12-08 06:02:52 -05:00
|
|
|
switch (component.regexpString) {
|
|
|
|
case "^$":
|
|
|
|
if (values[key] !== "") return null;
|
|
|
|
break;
|
|
|
|
case "^(.*)$":
|
|
|
|
res.groups["0"] = values[key];
|
|
|
|
break;
|
|
|
|
default: {
|
|
|
|
const match = RegExpPrototypeExec(component.regexp, values[key]);
|
|
|
|
if (match === null) return null;
|
|
|
|
const groupList = component.groupNameList;
|
|
|
|
const groups = res.groups;
|
|
|
|
for (let i = 0; i < groupList.length; ++i) {
|
|
|
|
// TODO(lucacasonato): this is vulnerable to override mistake
|
2024-10-02 12:02:46 -04:00
|
|
|
if (urlPatternSettings.groupStringFallback) {
|
|
|
|
groups[groupList[i]] = match[i + 1] ?? "";
|
|
|
|
} else {
|
|
|
|
groups[groupList[i]] = match[i + 1];
|
|
|
|
}
|
2023-12-08 06:02:52 -05:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
perf(ext/urlpattern): optimize URLPattern.exec (#20170)
This PR optimizes `URLPattern.exec`
- Use component keys from constructor instead of calling it on every
`.exec`. AFAIK keys should always be
`protocol`,`username`,`password`,`hostname`,`port`,`pathname`,`search`,`hash`.
Haven't looked much into it but I think it's safe to define these
outside the constructor as well.
- Add a fast path for `/^$/u` (default regexp) and empty input
- Replaced `ArrayPrototypeMap` & `ObjectFromEntries` with a `for` loop.
**this PR**
```
cpu: 13th Gen Intel(R) Core(TM) i9-13900H
runtime: deno 1.36.1 (x86_64-unknown-linux-gnu)
benchmark time (avg) iter/s (min … max) p75 p99 p995
--------------------------------------------------------------- -----------------------------
exec 1 2.17 µs/iter 461,022.8 (2.14 µs … 2.27 µs) 2.18 µs 2.27 µs 2.27 µs
exec 2 4.13 µs/iter 242,173.4 (4.08 µs … 4.27 µs) 4.15 µs 4.27 µs 4.27 µs
exec 3 2.55 µs/iter 391,508.1 (2.53 µs … 2.68 µs) 2.56 µs 2.68 µs 2.68 µs
```
**main**
```
cpu: 13th Gen Intel(R) Core(TM) i9-13900H
runtime: deno 1.36.1 (x86_64-unknown-linux-gnu)
benchmark time (avg) iter/s (min … max) p75 p99 p995
--------------------------------------------------------------- -----------------------------
exec 1 2.45 µs/iter 408,092.4 (2.41 µs … 2.55 µs) 2.46 µs 2.55 µs 2.55 µs
exec 2 4.41 µs/iter 226,706.0 (3.49 µs … 399.56 µs) 4.39 µs 5.49 µs 6.07 µs
exec 3 2.99 µs/iter 334,833.4 (2.94 µs … 3.21 µs) 2.99 µs 3.21 µs 3.21 µs
```
2023-08-16 06:58:03 -04:00
|
|
|
}
|
2023-12-08 06:02:52 -05:00
|
|
|
res.input = values[key];
|
2021-09-08 05:14:29 -04:00
|
|
|
}
|
|
|
|
|
2023-12-08 06:02:52 -05:00
|
|
|
// Now populate result.inputs
|
|
|
|
result.inputs[0] = typeof inputs[0] === "string"
|
|
|
|
? inputs[0]
|
|
|
|
: ObjectAssign(ObjectCreate(null), inputs[0]);
|
|
|
|
if (inputs[1] !== null) ArrayPrototypePush(result.inputs, inputs[1]);
|
|
|
|
|
|
|
|
this.#reusedResult = undefined;
|
2023-02-07 14:22:46 -05:00
|
|
|
return result;
|
2021-09-08 05:14:29 -04:00
|
|
|
}
|
|
|
|
|
2023-11-19 03:13:38 -05:00
|
|
|
[SymbolFor("Deno.privateCustomInspect")](inspect, inspectOptions) {
|
|
|
|
return inspect(
|
|
|
|
createFilteredInspectProxy({
|
|
|
|
object: this,
|
|
|
|
evaluate: ObjectPrototypeIsPrototypeOf(URLPatternPrototype, this),
|
|
|
|
keys: [
|
|
|
|
"protocol",
|
|
|
|
"username",
|
|
|
|
"password",
|
|
|
|
"hostname",
|
|
|
|
"port",
|
|
|
|
"pathname",
|
|
|
|
"search",
|
|
|
|
"hash",
|
2024-08-05 04:49:28 -04:00
|
|
|
"hasRegExpGroups",
|
2023-11-19 03:13:38 -05:00
|
|
|
],
|
|
|
|
}),
|
|
|
|
inspectOptions,
|
|
|
|
);
|
2023-02-07 14:22:46 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-09 23:01:01 -04:00
|
|
|
webidl.configureInterface(URLPattern);
|
2023-02-07 14:22:46 -05:00
|
|
|
const URLPatternPrototype = URLPattern.prototype;
|
|
|
|
|
|
|
|
webidl.converters.URLPatternInit = webidl
|
|
|
|
.createDictionaryConverter("URLPatternInit", [
|
|
|
|
{ key: "protocol", converter: webidl.converters.USVString },
|
|
|
|
{ key: "username", converter: webidl.converters.USVString },
|
|
|
|
{ key: "password", converter: webidl.converters.USVString },
|
|
|
|
{ key: "hostname", converter: webidl.converters.USVString },
|
|
|
|
{ key: "port", converter: webidl.converters.USVString },
|
|
|
|
{ key: "pathname", converter: webidl.converters.USVString },
|
|
|
|
{ key: "search", converter: webidl.converters.USVString },
|
|
|
|
{ key: "hash", converter: webidl.converters.USVString },
|
|
|
|
{ key: "baseURL", converter: webidl.converters.USVString },
|
|
|
|
]);
|
|
|
|
|
2023-05-01 06:47:13 -04:00
|
|
|
webidl.converters["URLPatternInput"] = (V, prefix, context, opts) => {
|
2023-02-07 14:22:46 -05:00
|
|
|
// Union for (URLPatternInit or USVString)
|
|
|
|
if (typeof V == "object") {
|
2023-05-01 06:47:13 -04:00
|
|
|
return webidl.converters.URLPatternInit(V, prefix, context, opts);
|
2023-02-07 14:22:46 -05:00
|
|
|
}
|
2023-05-01 06:47:13 -04:00
|
|
|
return webidl.converters.USVString(V, prefix, context, opts);
|
2023-02-07 14:22:46 -05:00
|
|
|
};
|
2021-09-08 05:14:29 -04:00
|
|
|
|
2024-08-05 04:49:28 -04:00
|
|
|
webidl.converters.URLPatternOptions = webidl
|
|
|
|
.createDictionaryConverter("URLPatternOptions", [
|
|
|
|
{
|
|
|
|
key: "ignoreCase",
|
|
|
|
converter: webidl.converters.boolean,
|
|
|
|
defaultValue: false,
|
|
|
|
},
|
|
|
|
]);
|
|
|
|
|
2024-10-02 12:02:46 -04:00
|
|
|
export { URLPattern, urlPatternSettings };
|