1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2024-11-22 15:06:54 -05:00
denoland-deno/ext/http/00_serve.ts

907 lines
24 KiB
TypeScript
Raw Normal View History

// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
import { core, internals, primordials } from "ext:core/mod.js";
const {
BadResourcePrototype,
InterruptedPrototype,
Interrupted,
internalRidSymbol,
} = core;
import {
op_http_cancel,
op_http_close,
op_http_close_after_finish,
op_http_get_request_headers,
op_http_get_request_method_and_url,
op_http_read_request_body,
op_http_serve,
op_http_serve_on,
op_http_set_promise_complete,
op_http_set_response_body_bytes,
op_http_set_response_body_resource,
op_http_set_response_body_text,
op_http_set_response_header,
op_http_set_response_headers,
op_http_set_response_trailers,
op_http_try_wait,
op_http_upgrade_raw,
op_http_upgrade_websocket_next,
op_http_wait,
} from "ext:core/ops";
const {
ArrayPrototypePush,
ObjectHasOwn,
ObjectPrototypeIsPrototypeOf,
PromisePrototypeCatch,
PromisePrototypeThen,
StringPrototypeIncludes,
Symbol,
TypeError,
TypedArrayPrototypeGetSymbolToStringTag,
Uint8Array,
Promise,
} = primordials;
import { InnerBody } from "ext:deno_fetch/22_body.js";
import { Event } from "ext:deno_web/02_event.js";
import {
fromInnerResponse,
newInnerResponse,
ResponsePrototype,
toInnerResponse,
} from "ext:deno_fetch/23_response.js";
perf(ext/http): recover memory for serve and optimize AbortController (#23559) Max rps without a signal is unchanged, however we can drastically reduce memory usage by not creating the signal until needed, and we can optimize the rps in the case where the signal is created. With a quick memory benchmark, it looks like this helps pretty drastically with # of GCs when benchmarking w/wrk: - 1.42.4: 1763 - canary: 1093 - this patch: 874 This branch: ``` Running 10s test @ http://localhost:8080/ 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 87.33us 439.95us 20.68ms 99.67% Req/Sec 66.70k 6.39k 74.11k 83.66% 1340255 requests in 10.10s, 191.73MB read Requests/sec: 132696.90 Transfer/sec: 18.98MB cpu: Apple M2 Pro runtime: deno 1.43.0 (aarch64-apple-darwin) file:///Users/matt/Documents/scripts/bench_request.js benchmark time (avg) iter/s (min … max) p75 p99 p995 ----------------------------------------------------------------------------------------------- ----------------------------- newRequest 986.5 ns/iter 1,013,682.6 (878.2 ns … 1.18 µs) 1.01 µs 1.18 µs 1.18 µs newAbortController 18 ns/iter 55,541,104.1 (15.6 ns … 42.62 ns) 17.71 ns 25.05 ns 26.27 ns newAbortControllerSignal 18.66 ns/iter 53,578,966.7 (16.49 ns … 32.16 ns) 18.71 ns 25.67 ns 26.39 ns newAbortControllerSignalOnAbort 106.49 ns/iter 9,390,164.9 (97.87 ns … 120.61 ns) 108.6 ns 114.24 ns 115.89 ns newAbortControllerSignalAddEventListener 86.92 ns/iter 11,504,880.2 (81.88 ns … 103.15 ns) 90 ns 98.28 ns 99.55 ns newAbortControllerSignalOnAbortNoListener 3.01 µs/iter 331,964.4 (2.97 µs … 3.1 µs) 3.06 µs 3.1 µs 3.1 µs newAbortControllerSignalOnAbortAbort 3.26 µs/iter 306,662.6 (3.22 µs … 3.36 µs) 3.27 µs 3.36 µs 3.36 µs ``` Latest canary: ``` Running 10s test @ http://localhost:8080/ 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 72.86us 71.23us 4.47ms 99.05% Req/Sec 64.66k 5.54k 72.48k 82.18% 1299015 requests in 10.10s, 185.83MB read Requests/sec: 128616.02 Transfer/sec: 18.40MB cpu: Apple M2 Pro runtime: deno 1.43.0+bc4aa5f (aarch64-apple-darwin) file:///Users/matt/Documents/scripts/bench_request.js benchmark time (avg) iter/s (min … max) p75 p99 p995 ----------------------------------------------------------------------------------------------- ----------------------------- newRequest 1.25 µs/iter 800,005.2 (1.01 µs … 4.18 µs) 1.16 µs 4.18 µs 4.18 µs newAbortController 18.56 ns/iter 53,868,204.3 (16.04 ns … 38.73 ns) 18.38 ns 26.1 ns 26.63 ns newAbortControllerSignal 18.72 ns/iter 53,430,746.1 (16.13 ns … 36.71 ns) 18.71 ns 26.19 ns 26.98 ns newAbortControllerSignalOnAbort 193.91 ns/iter 5,156,992.4 (184.25 ns … 211.41 ns) 194.96 ns 207.87 ns 209.4 ns newAbortControllerSignalAddEventListener 171.45 ns/iter 5,832,569.2 (153 ns … 182.03 ns) 176.17 ns 180.75 ns 181.05 ns newAbortControllerSignalOnAbortNoListener 3.07 µs/iter 326,263.3 (2.98 µs … 3.17 µs) 3.08 µs 3.17 µs 3.17 µs newAbortControllerSignalOnAbortAbort 3.32 µs/iter 301,344.6 (3.29 µs … 3.4 µs) 3.33 µs 3.4 µs 3.4 µs ```
2024-04-25 14:52:24 -04:00
import {
abortRequest,
fromInnerRequest,
toInnerRequest,
} from "ext:deno_fetch/23_request.js";
import { AbortController } from "ext:deno_web/03_abort_signal.js";
import {
_eventLoop,
_idleTimeoutDuration,
_idleTimeoutTimeout,
_protocol,
_readyState,
_rid,
_role,
_server,
_serverHandleIdleTimeout,
SERVER,
WebSocket,
} from "ext:deno_websocket/01_websocket.js";
import {
Deferred,
getReadableStreamResourceBacking,
readableStreamForRid,
ReadableStreamPrototype,
resourceForReadableStream,
} from "ext:deno_web/06_streams.js";
import { listen, listenOptionApiName, TcpConn } from "ext:deno_net/01_net.js";
import { hasTlsKeyPairOptions, listenTls } from "ext:deno_net/02_tls.js";
import { SymbolAsyncDispose } from "ext:deno_web/00_infra.js";
const _upgraded = Symbol("_upgraded");
function internalServerError() {
// "Internal Server Error"
return new Response(
new Uint8Array([
73,
110,
116,
101,
114,
110,
97,
108,
32,
83,
101,
114,
118,
101,
114,
32,
69,
114,
114,
111,
114,
]),
{ status: 500 },
);
}
// Used to ensure that user returns a valid response (but not a different response) from handlers that are upgraded.
const UPGRADE_RESPONSE_SENTINEL = fromInnerResponse(
newInnerResponse(101),
"immutable",
);
function upgradeHttpRaw(req, conn) {
const inner = toInnerRequest(req);
if (inner._wantsUpgrade) {
return inner._wantsUpgrade("upgradeHttpRaw", conn);
}
throw new TypeError("'upgradeHttpRaw' may only be used with Deno.serve");
}
function addTrailers(resp, headerList) {
const inner = toInnerResponse(resp);
op_http_set_response_trailers(inner.external, headerList);
}
class InnerRequest {
#external;
#context;
#methodAndUri;
#streamRid;
#body;
#upgraded;
#urlValue;
#completed;
perf(ext/http): recover memory for serve and optimize AbortController (#23559) Max rps without a signal is unchanged, however we can drastically reduce memory usage by not creating the signal until needed, and we can optimize the rps in the case where the signal is created. With a quick memory benchmark, it looks like this helps pretty drastically with # of GCs when benchmarking w/wrk: - 1.42.4: 1763 - canary: 1093 - this patch: 874 This branch: ``` Running 10s test @ http://localhost:8080/ 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 87.33us 439.95us 20.68ms 99.67% Req/Sec 66.70k 6.39k 74.11k 83.66% 1340255 requests in 10.10s, 191.73MB read Requests/sec: 132696.90 Transfer/sec: 18.98MB cpu: Apple M2 Pro runtime: deno 1.43.0 (aarch64-apple-darwin) file:///Users/matt/Documents/scripts/bench_request.js benchmark time (avg) iter/s (min … max) p75 p99 p995 ----------------------------------------------------------------------------------------------- ----------------------------- newRequest 986.5 ns/iter 1,013,682.6 (878.2 ns … 1.18 µs) 1.01 µs 1.18 µs 1.18 µs newAbortController 18 ns/iter 55,541,104.1 (15.6 ns … 42.62 ns) 17.71 ns 25.05 ns 26.27 ns newAbortControllerSignal 18.66 ns/iter 53,578,966.7 (16.49 ns … 32.16 ns) 18.71 ns 25.67 ns 26.39 ns newAbortControllerSignalOnAbort 106.49 ns/iter 9,390,164.9 (97.87 ns … 120.61 ns) 108.6 ns 114.24 ns 115.89 ns newAbortControllerSignalAddEventListener 86.92 ns/iter 11,504,880.2 (81.88 ns … 103.15 ns) 90 ns 98.28 ns 99.55 ns newAbortControllerSignalOnAbortNoListener 3.01 µs/iter 331,964.4 (2.97 µs … 3.1 µs) 3.06 µs 3.1 µs 3.1 µs newAbortControllerSignalOnAbortAbort 3.26 µs/iter 306,662.6 (3.22 µs … 3.36 µs) 3.27 µs 3.36 µs 3.36 µs ``` Latest canary: ``` Running 10s test @ http://localhost:8080/ 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 72.86us 71.23us 4.47ms 99.05% Req/Sec 64.66k 5.54k 72.48k 82.18% 1299015 requests in 10.10s, 185.83MB read Requests/sec: 128616.02 Transfer/sec: 18.40MB cpu: Apple M2 Pro runtime: deno 1.43.0+bc4aa5f (aarch64-apple-darwin) file:///Users/matt/Documents/scripts/bench_request.js benchmark time (avg) iter/s (min … max) p75 p99 p995 ----------------------------------------------------------------------------------------------- ----------------------------- newRequest 1.25 µs/iter 800,005.2 (1.01 µs … 4.18 µs) 1.16 µs 4.18 µs 4.18 µs newAbortController 18.56 ns/iter 53,868,204.3 (16.04 ns … 38.73 ns) 18.38 ns 26.1 ns 26.63 ns newAbortControllerSignal 18.72 ns/iter 53,430,746.1 (16.13 ns … 36.71 ns) 18.71 ns 26.19 ns 26.98 ns newAbortControllerSignalOnAbort 193.91 ns/iter 5,156,992.4 (184.25 ns … 211.41 ns) 194.96 ns 207.87 ns 209.4 ns newAbortControllerSignalAddEventListener 171.45 ns/iter 5,832,569.2 (153 ns … 182.03 ns) 176.17 ns 180.75 ns 181.05 ns newAbortControllerSignalOnAbortNoListener 3.07 µs/iter 326,263.3 (2.98 µs … 3.17 µs) 3.08 µs 3.17 µs 3.17 µs newAbortControllerSignalOnAbortAbort 3.32 µs/iter 301,344.6 (3.29 µs … 3.4 µs) 3.33 µs 3.4 µs 3.4 µs ```
2024-04-25 14:52:24 -04:00
request;
perf(ext/http): recover memory for serve and optimize AbortController (#23559) Max rps without a signal is unchanged, however we can drastically reduce memory usage by not creating the signal until needed, and we can optimize the rps in the case where the signal is created. With a quick memory benchmark, it looks like this helps pretty drastically with # of GCs when benchmarking w/wrk: - 1.42.4: 1763 - canary: 1093 - this patch: 874 This branch: ``` Running 10s test @ http://localhost:8080/ 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 87.33us 439.95us 20.68ms 99.67% Req/Sec 66.70k 6.39k 74.11k 83.66% 1340255 requests in 10.10s, 191.73MB read Requests/sec: 132696.90 Transfer/sec: 18.98MB cpu: Apple M2 Pro runtime: deno 1.43.0 (aarch64-apple-darwin) file:///Users/matt/Documents/scripts/bench_request.js benchmark time (avg) iter/s (min … max) p75 p99 p995 ----------------------------------------------------------------------------------------------- ----------------------------- newRequest 986.5 ns/iter 1,013,682.6 (878.2 ns … 1.18 µs) 1.01 µs 1.18 µs 1.18 µs newAbortController 18 ns/iter 55,541,104.1 (15.6 ns … 42.62 ns) 17.71 ns 25.05 ns 26.27 ns newAbortControllerSignal 18.66 ns/iter 53,578,966.7 (16.49 ns … 32.16 ns) 18.71 ns 25.67 ns 26.39 ns newAbortControllerSignalOnAbort 106.49 ns/iter 9,390,164.9 (97.87 ns … 120.61 ns) 108.6 ns 114.24 ns 115.89 ns newAbortControllerSignalAddEventListener 86.92 ns/iter 11,504,880.2 (81.88 ns … 103.15 ns) 90 ns 98.28 ns 99.55 ns newAbortControllerSignalOnAbortNoListener 3.01 µs/iter 331,964.4 (2.97 µs … 3.1 µs) 3.06 µs 3.1 µs 3.1 µs newAbortControllerSignalOnAbortAbort 3.26 µs/iter 306,662.6 (3.22 µs … 3.36 µs) 3.27 µs 3.36 µs 3.36 µs ``` Latest canary: ``` Running 10s test @ http://localhost:8080/ 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 72.86us 71.23us 4.47ms 99.05% Req/Sec 64.66k 5.54k 72.48k 82.18% 1299015 requests in 10.10s, 185.83MB read Requests/sec: 128616.02 Transfer/sec: 18.40MB cpu: Apple M2 Pro runtime: deno 1.43.0+bc4aa5f (aarch64-apple-darwin) file:///Users/matt/Documents/scripts/bench_request.js benchmark time (avg) iter/s (min … max) p75 p99 p995 ----------------------------------------------------------------------------------------------- ----------------------------- newRequest 1.25 µs/iter 800,005.2 (1.01 µs … 4.18 µs) 1.16 µs 4.18 µs 4.18 µs newAbortController 18.56 ns/iter 53,868,204.3 (16.04 ns … 38.73 ns) 18.38 ns 26.1 ns 26.63 ns newAbortControllerSignal 18.72 ns/iter 53,430,746.1 (16.13 ns … 36.71 ns) 18.71 ns 26.19 ns 26.98 ns newAbortControllerSignalOnAbort 193.91 ns/iter 5,156,992.4 (184.25 ns … 211.41 ns) 194.96 ns 207.87 ns 209.4 ns newAbortControllerSignalAddEventListener 171.45 ns/iter 5,832,569.2 (153 ns … 182.03 ns) 176.17 ns 180.75 ns 181.05 ns newAbortControllerSignalOnAbortNoListener 3.07 µs/iter 326,263.3 (2.98 µs … 3.17 µs) 3.08 µs 3.17 µs 3.17 µs newAbortControllerSignalOnAbortAbort 3.32 µs/iter 301,344.6 (3.29 µs … 3.4 µs) 3.33 µs 3.4 µs 3.4 µs ```
2024-04-25 14:52:24 -04:00
constructor(external, context) {
this.#external = external;
this.#context = context;
this.#upgraded = false;
this.#completed = undefined;
}
close(success = true) {
// The completion signal fires only if someone cares
if (this.#completed) {
if (success) {
this.#completed.resolve(undefined);
} else {
this.#completed.reject(
new Interrupted("HTTP response was not sent successfully"),
);
}
}
perf(ext/http): recover memory for serve and optimize AbortController (#23559) Max rps without a signal is unchanged, however we can drastically reduce memory usage by not creating the signal until needed, and we can optimize the rps in the case where the signal is created. With a quick memory benchmark, it looks like this helps pretty drastically with # of GCs when benchmarking w/wrk: - 1.42.4: 1763 - canary: 1093 - this patch: 874 This branch: ``` Running 10s test @ http://localhost:8080/ 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 87.33us 439.95us 20.68ms 99.67% Req/Sec 66.70k 6.39k 74.11k 83.66% 1340255 requests in 10.10s, 191.73MB read Requests/sec: 132696.90 Transfer/sec: 18.98MB cpu: Apple M2 Pro runtime: deno 1.43.0 (aarch64-apple-darwin) file:///Users/matt/Documents/scripts/bench_request.js benchmark time (avg) iter/s (min … max) p75 p99 p995 ----------------------------------------------------------------------------------------------- ----------------------------- newRequest 986.5 ns/iter 1,013,682.6 (878.2 ns … 1.18 µs) 1.01 µs 1.18 µs 1.18 µs newAbortController 18 ns/iter 55,541,104.1 (15.6 ns … 42.62 ns) 17.71 ns 25.05 ns 26.27 ns newAbortControllerSignal 18.66 ns/iter 53,578,966.7 (16.49 ns … 32.16 ns) 18.71 ns 25.67 ns 26.39 ns newAbortControllerSignalOnAbort 106.49 ns/iter 9,390,164.9 (97.87 ns … 120.61 ns) 108.6 ns 114.24 ns 115.89 ns newAbortControllerSignalAddEventListener 86.92 ns/iter 11,504,880.2 (81.88 ns … 103.15 ns) 90 ns 98.28 ns 99.55 ns newAbortControllerSignalOnAbortNoListener 3.01 µs/iter 331,964.4 (2.97 µs … 3.1 µs) 3.06 µs 3.1 µs 3.1 µs newAbortControllerSignalOnAbortAbort 3.26 µs/iter 306,662.6 (3.22 µs … 3.36 µs) 3.27 µs 3.36 µs 3.36 µs ``` Latest canary: ``` Running 10s test @ http://localhost:8080/ 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 72.86us 71.23us 4.47ms 99.05% Req/Sec 64.66k 5.54k 72.48k 82.18% 1299015 requests in 10.10s, 185.83MB read Requests/sec: 128616.02 Transfer/sec: 18.40MB cpu: Apple M2 Pro runtime: deno 1.43.0+bc4aa5f (aarch64-apple-darwin) file:///Users/matt/Documents/scripts/bench_request.js benchmark time (avg) iter/s (min … max) p75 p99 p995 ----------------------------------------------------------------------------------------------- ----------------------------- newRequest 1.25 µs/iter 800,005.2 (1.01 µs … 4.18 µs) 1.16 µs 4.18 µs 4.18 µs newAbortController 18.56 ns/iter 53,868,204.3 (16.04 ns … 38.73 ns) 18.38 ns 26.1 ns 26.63 ns newAbortControllerSignal 18.72 ns/iter 53,430,746.1 (16.13 ns … 36.71 ns) 18.71 ns 26.19 ns 26.98 ns newAbortControllerSignalOnAbort 193.91 ns/iter 5,156,992.4 (184.25 ns … 211.41 ns) 194.96 ns 207.87 ns 209.4 ns newAbortControllerSignalAddEventListener 171.45 ns/iter 5,832,569.2 (153 ns … 182.03 ns) 176.17 ns 180.75 ns 181.05 ns newAbortControllerSignalOnAbortNoListener 3.07 µs/iter 326,263.3 (2.98 µs … 3.17 µs) 3.08 µs 3.17 µs 3.17 µs newAbortControllerSignalOnAbortAbort 3.32 µs/iter 301,344.6 (3.29 µs … 3.4 µs) 3.33 µs 3.4 µs 3.4 µs ```
2024-04-25 14:52:24 -04:00
abortRequest(this.request);
this.#external = null;
}
get [_upgraded]() {
return this.#upgraded;
}
_wantsUpgrade(upgradeType, ...originalArgs) {
if (this.#upgraded) {
throw new Deno.errors.Http("Already upgraded");
}
if (this.#external === null) {
throw new Deno.errors.Http("Already closed");
}
// upgradeHttpRaw is sync
if (upgradeType == "upgradeHttpRaw") {
const external = this.#external;
const underlyingConn = originalArgs[0];
this.url();
this.headerList;
this.close();
this.#upgraded = () => {};
const upgradeRid = op_http_upgrade_raw(external);
const conn = new TcpConn(
upgradeRid,
underlyingConn?.remoteAddr,
underlyingConn?.localAddr,
);
return { response: UPGRADE_RESPONSE_SENTINEL, conn };
}
// upgradeWebSocket is sync
if (upgradeType == "upgradeWebSocket") {
const response = originalArgs[0];
const ws = originalArgs[1];
const external = this.#external;
this.url();
this.headerList;
this.close();
const goAhead = new Deferred();
this.#upgraded = () => {
goAhead.resolve();
};
const wsPromise = op_http_upgrade_websocket_next(
external,
response.headerList,
);
// Start the upgrade in the background.
(async () => {
try {
// Returns the upgraded websocket connection
const wsRid = await wsPromise;
// We have to wait for the go-ahead signal
await goAhead.promise;
ws[_rid] = wsRid;
ws[_readyState] = WebSocket.OPEN;
ws[_role] = SERVER;
const event = new Event("open");
ws.dispatchEvent(event);
ws[_eventLoop]();
if (ws[_idleTimeoutDuration]) {
ws.addEventListener(
"close",
() => clearTimeout(ws[_idleTimeoutTimeout]),
);
}
ws[_serverHandleIdleTimeout]();
} catch (error) {
const event = new ErrorEvent("error", { error });
ws.dispatchEvent(event);
}
})();
return { response: UPGRADE_RESPONSE_SENTINEL, socket: ws };
}
}
url() {
if (this.#urlValue !== undefined) {
return this.#urlValue;
}
if (this.#methodAndUri === undefined) {
if (this.#external === null) {
throw new TypeError("Request closed");
}
// TODO(mmastrac): This is quite slow as we're serializing a large number of values. We may want to consider
// splitting this up into multiple ops.
this.#methodAndUri = op_http_get_request_method_and_url(this.#external);
}
const path = this.#methodAndUri[2];
// * is valid for OPTIONS
if (path === "*") {
return this.#urlValue = "*";
}
// If the path is empty, return the authority (valid for CONNECT)
if (path == "") {
return this.#urlValue = this.#methodAndUri[1];
}
// CONNECT requires an authority
if (this.#methodAndUri[0] == "CONNECT") {
return this.#urlValue = this.#methodAndUri[1];
}
const hostname = this.#methodAndUri[1];
if (hostname) {
// Construct a URL from the scheme, the hostname, and the path
return this.#urlValue = this.#context.scheme + hostname + path;
}
// Construct a URL from the scheme, the fallback hostname, and the path
return this.#urlValue = this.#context.scheme + this.#context.fallbackHost +
path;
}
get completed() {
if (!this.#completed) {
// NOTE: this is faster than Promise.withResolvers()
let resolve, reject;
const promise = new Promise((r1, r2) => {
resolve = r1;
reject = r2;
});
this.#completed = { promise, resolve, reject };
}
return this.#completed.promise;
}
get remoteAddr() {
const transport = this.#context.listener?.addr.transport;
if (transport === "unix" || transport === "unixpacket") {
return {
transport,
path: this.#context.listener.addr.path,
};
}
if (this.#methodAndUri === undefined) {
if (this.#external === null) {
throw new TypeError("Request closed");
}
this.#methodAndUri = op_http_get_request_method_and_url(this.#external);
}
return {
transport: "tcp",
hostname: this.#methodAndUri[3],
port: this.#methodAndUri[4],
};
}
get method() {
if (this.#methodAndUri === undefined) {
if (this.#external === null) {
throw new TypeError("Request closed");
}
this.#methodAndUri = op_http_get_request_method_and_url(this.#external);
}
return this.#methodAndUri[0];
}
get body() {
if (this.#external === null) {
throw new TypeError("Request closed");
}
if (this.#body !== undefined) {
return this.#body;
}
// If the method is GET or HEAD, we do not want to include a body here, even if the Rust
// side of the code is willing to provide it to us.
if (this.method == "GET" || this.method == "HEAD") {
this.#body = null;
return null;
}
this.#streamRid = op_http_read_request_body(this.#external);
this.#body = new InnerBody(readableStreamForRid(this.#streamRid, false));
return this.#body;
}
get headerList() {
if (this.#external === null) {
throw new TypeError("Request closed");
}
const headers = [];
const reqHeaders = op_http_get_request_headers(this.#external);
for (let i = 0; i < reqHeaders.length; i += 2) {
ArrayPrototypePush(headers, [reqHeaders[i], reqHeaders[i + 1]]);
}
return headers;
}
get external() {
return this.#external;
}
}
class CallbackContext {
abortController;
scheme;
fallbackHost;
serverRid;
closed;
/** @type {Promise<void> | undefined} */
closing;
listener;
constructor(signal, args, listener) {
// The abort signal triggers a non-graceful shutdown
signal?.addEventListener(
"abort",
() => {
op_http_cancel(this.serverRid, false);
},
{ once: true },
);
this.abortController = new AbortController();
this.serverRid = args[0];
this.scheme = args[1];
this.fallbackHost = args[2];
this.closed = false;
this.listener = listener;
}
close() {
try {
this.closed = true;
core.tryClose(this.serverRid);
} catch {
// Pass
}
}
}
perf(ext/http): use ServeHandlerInfo class instead of object literal (#20122) This PR improves performance of `Deno.Serve` when providing `info` argument by creating `ServeHandlerInfo` class instead of creating an object literal with a getter on every request. ```js Deno.serve((_req, info) => new Response(info.remoteAddr.transport) }); ``` ### Benchmarks ``` wrk -d 10s --latency http://127.0.0.1:4500 Running 10s test @ http://127.0.0.1:4500 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 42.34us 16.30us 1.66ms 95.88% Req/Sec 118.17k 2.95k 127.38k 76.73% Latency Distribution 50% 38.00us 75% 41.00us 90% 56.00us 99% 83.00us 2375298 requests in 10.10s, 319.40MB read Requests/sec: 235177.04 Transfer/sec: 31.62MB ``` **main** ``` wrk -d 10s --latency http://127.0.0.1:4500 Running 10s test @ http://127.0.0.1:4500 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 78.86us 211.06us 3.58ms 96.52% Req/Sec 105.90k 4.35k 117.41k 78.22% Latency Distribution 50% 41.00us 75% 53.00us 90% 62.00us 99% 1.18ms 2127534 requests in 10.10s, 286.09MB read Requests/sec: 210647.49 Transfer/sec: 28.33MB ``` ``` cpu: 13th Gen Intel(R) Core(TM) i9-13900H runtime: deno 1.36.0 (x86_64-unknown-linux-gnu) benchmark time (avg) iter/s (min … max) p75 p99 p995 -------------------------------------------------------------------------- ----------------------------- new ServeHandlerInfo 3.43 ns/iter 291,508,889.3 (3.07 ns … 12.21 ns) 3.42 ns 3.84 ns 3.87 ns {} with getter 133.84 ns/iter 7,471,528.9 (92.9 ns … 458.95 ns) 132.45 ns 364.96 ns 429.43 ns ``` ---- ### Drawbacks: `.remoteAddr` is now not enumerable ``` ServeHandlerInfo {} ``` vs ``` { remoteAddr: [Getter] } ``` It'll break any code trying to iterate through `info` keys (Doubt there's anyone doing it though) ```js Deno.serve((req, info) => { console.log(Object.keys(info).length === 0) // true; return new Response("yes"); });
2023-08-10 13:45:55 -04:00
class ServeHandlerInfo {
#inner: InnerRequest;
constructor(inner: InnerRequest) {
perf(ext/http): use ServeHandlerInfo class instead of object literal (#20122) This PR improves performance of `Deno.Serve` when providing `info` argument by creating `ServeHandlerInfo` class instead of creating an object literal with a getter on every request. ```js Deno.serve((_req, info) => new Response(info.remoteAddr.transport) }); ``` ### Benchmarks ``` wrk -d 10s --latency http://127.0.0.1:4500 Running 10s test @ http://127.0.0.1:4500 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 42.34us 16.30us 1.66ms 95.88% Req/Sec 118.17k 2.95k 127.38k 76.73% Latency Distribution 50% 38.00us 75% 41.00us 90% 56.00us 99% 83.00us 2375298 requests in 10.10s, 319.40MB read Requests/sec: 235177.04 Transfer/sec: 31.62MB ``` **main** ``` wrk -d 10s --latency http://127.0.0.1:4500 Running 10s test @ http://127.0.0.1:4500 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 78.86us 211.06us 3.58ms 96.52% Req/Sec 105.90k 4.35k 117.41k 78.22% Latency Distribution 50% 41.00us 75% 53.00us 90% 62.00us 99% 1.18ms 2127534 requests in 10.10s, 286.09MB read Requests/sec: 210647.49 Transfer/sec: 28.33MB ``` ``` cpu: 13th Gen Intel(R) Core(TM) i9-13900H runtime: deno 1.36.0 (x86_64-unknown-linux-gnu) benchmark time (avg) iter/s (min … max) p75 p99 p995 -------------------------------------------------------------------------- ----------------------------- new ServeHandlerInfo 3.43 ns/iter 291,508,889.3 (3.07 ns … 12.21 ns) 3.42 ns 3.84 ns 3.87 ns {} with getter 133.84 ns/iter 7,471,528.9 (92.9 ns … 458.95 ns) 132.45 ns 364.96 ns 429.43 ns ``` ---- ### Drawbacks: `.remoteAddr` is now not enumerable ``` ServeHandlerInfo {} ``` vs ``` { remoteAddr: [Getter] } ``` It'll break any code trying to iterate through `info` keys (Doubt there's anyone doing it though) ```js Deno.serve((req, info) => { console.log(Object.keys(info).length === 0) // true; return new Response("yes"); });
2023-08-10 13:45:55 -04:00
this.#inner = inner;
}
get remoteAddr() {
return this.#inner.remoteAddr;
}
get completed() {
return this.#inner.completed;
}
perf(ext/http): use ServeHandlerInfo class instead of object literal (#20122) This PR improves performance of `Deno.Serve` when providing `info` argument by creating `ServeHandlerInfo` class instead of creating an object literal with a getter on every request. ```js Deno.serve((_req, info) => new Response(info.remoteAddr.transport) }); ``` ### Benchmarks ``` wrk -d 10s --latency http://127.0.0.1:4500 Running 10s test @ http://127.0.0.1:4500 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 42.34us 16.30us 1.66ms 95.88% Req/Sec 118.17k 2.95k 127.38k 76.73% Latency Distribution 50% 38.00us 75% 41.00us 90% 56.00us 99% 83.00us 2375298 requests in 10.10s, 319.40MB read Requests/sec: 235177.04 Transfer/sec: 31.62MB ``` **main** ``` wrk -d 10s --latency http://127.0.0.1:4500 Running 10s test @ http://127.0.0.1:4500 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 78.86us 211.06us 3.58ms 96.52% Req/Sec 105.90k 4.35k 117.41k 78.22% Latency Distribution 50% 41.00us 75% 53.00us 90% 62.00us 99% 1.18ms 2127534 requests in 10.10s, 286.09MB read Requests/sec: 210647.49 Transfer/sec: 28.33MB ``` ``` cpu: 13th Gen Intel(R) Core(TM) i9-13900H runtime: deno 1.36.0 (x86_64-unknown-linux-gnu) benchmark time (avg) iter/s (min … max) p75 p99 p995 -------------------------------------------------------------------------- ----------------------------- new ServeHandlerInfo 3.43 ns/iter 291,508,889.3 (3.07 ns … 12.21 ns) 3.42 ns 3.84 ns 3.87 ns {} with getter 133.84 ns/iter 7,471,528.9 (92.9 ns … 458.95 ns) 132.45 ns 364.96 ns 429.43 ns ``` ---- ### Drawbacks: `.remoteAddr` is now not enumerable ``` ServeHandlerInfo {} ``` vs ``` { remoteAddr: [Getter] } ``` It'll break any code trying to iterate through `info` keys (Doubt there's anyone doing it though) ```js Deno.serve((req, info) => { console.log(Object.keys(info).length === 0) // true; return new Response("yes"); });
2023-08-10 13:45:55 -04:00
}
function fastSyncResponseOrStream(
req,
respBody,
status,
innerRequest: InnerRequest,
) {
if (respBody === null || respBody === undefined) {
// Don't set the body
innerRequest?.close();
op_http_set_promise_complete(req, status);
return;
}
const stream = respBody.streamOrStatic;
const body = stream.body;
if (body !== undefined) {
// We ensure the response has not been consumed yet in the caller of this
// function.
stream.consumed = true;
}
if (TypedArrayPrototypeGetSymbolToStringTag(body) === "Uint8Array") {
innerRequest?.close();
op_http_set_response_body_bytes(req, body, status);
return;
}
if (typeof body === "string") {
innerRequest?.close();
op_http_set_response_body_text(req, body, status);
return;
}
// At this point in the response it needs to be a stream
if (!ObjectPrototypeIsPrototypeOf(ReadableStreamPrototype, stream)) {
innerRequest?.close();
throw new TypeError("Invalid response");
}
const resourceBacking = getReadableStreamResourceBacking(stream);
let rid, autoClose;
if (resourceBacking) {
rid = resourceBacking.rid;
autoClose = resourceBacking.autoClose;
} else {
rid = resourceForReadableStream(stream);
autoClose = true;
}
PromisePrototypeThen(
op_http_set_response_body_resource(
req,
rid,
autoClose,
status,
),
(success) => {
innerRequest?.close(success);
op_http_close_after_finish(req);
},
);
}
/**
* Maps the incoming request slab ID to a fully-fledged Request object, passes it to the user-provided
* callback, then extracts the response that was returned from that callback. The response is then pulled
* apart and handled on the Rust side.
*
* This function returns a promise that will only reject in the case of abnormal exit.
*/
function mapToCallback(context, callback, onError) {
return async function (req) {
// Get the response from the user-provided callback. If that fails, use onError. If that fails, return a fallback
// 500 error.
let innerRequest;
let response;
try {
perf(ext/http): recover memory for serve and optimize AbortController (#23559) Max rps without a signal is unchanged, however we can drastically reduce memory usage by not creating the signal until needed, and we can optimize the rps in the case where the signal is created. With a quick memory benchmark, it looks like this helps pretty drastically with # of GCs when benchmarking w/wrk: - 1.42.4: 1763 - canary: 1093 - this patch: 874 This branch: ``` Running 10s test @ http://localhost:8080/ 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 87.33us 439.95us 20.68ms 99.67% Req/Sec 66.70k 6.39k 74.11k 83.66% 1340255 requests in 10.10s, 191.73MB read Requests/sec: 132696.90 Transfer/sec: 18.98MB cpu: Apple M2 Pro runtime: deno 1.43.0 (aarch64-apple-darwin) file:///Users/matt/Documents/scripts/bench_request.js benchmark time (avg) iter/s (min … max) p75 p99 p995 ----------------------------------------------------------------------------------------------- ----------------------------- newRequest 986.5 ns/iter 1,013,682.6 (878.2 ns … 1.18 µs) 1.01 µs 1.18 µs 1.18 µs newAbortController 18 ns/iter 55,541,104.1 (15.6 ns … 42.62 ns) 17.71 ns 25.05 ns 26.27 ns newAbortControllerSignal 18.66 ns/iter 53,578,966.7 (16.49 ns … 32.16 ns) 18.71 ns 25.67 ns 26.39 ns newAbortControllerSignalOnAbort 106.49 ns/iter 9,390,164.9 (97.87 ns … 120.61 ns) 108.6 ns 114.24 ns 115.89 ns newAbortControllerSignalAddEventListener 86.92 ns/iter 11,504,880.2 (81.88 ns … 103.15 ns) 90 ns 98.28 ns 99.55 ns newAbortControllerSignalOnAbortNoListener 3.01 µs/iter 331,964.4 (2.97 µs … 3.1 µs) 3.06 µs 3.1 µs 3.1 µs newAbortControllerSignalOnAbortAbort 3.26 µs/iter 306,662.6 (3.22 µs … 3.36 µs) 3.27 µs 3.36 µs 3.36 µs ``` Latest canary: ``` Running 10s test @ http://localhost:8080/ 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 72.86us 71.23us 4.47ms 99.05% Req/Sec 64.66k 5.54k 72.48k 82.18% 1299015 requests in 10.10s, 185.83MB read Requests/sec: 128616.02 Transfer/sec: 18.40MB cpu: Apple M2 Pro runtime: deno 1.43.0+bc4aa5f (aarch64-apple-darwin) file:///Users/matt/Documents/scripts/bench_request.js benchmark time (avg) iter/s (min … max) p75 p99 p995 ----------------------------------------------------------------------------------------------- ----------------------------- newRequest 1.25 µs/iter 800,005.2 (1.01 µs … 4.18 µs) 1.16 µs 4.18 µs 4.18 µs newAbortController 18.56 ns/iter 53,868,204.3 (16.04 ns … 38.73 ns) 18.38 ns 26.1 ns 26.63 ns newAbortControllerSignal 18.72 ns/iter 53,430,746.1 (16.13 ns … 36.71 ns) 18.71 ns 26.19 ns 26.98 ns newAbortControllerSignalOnAbort 193.91 ns/iter 5,156,992.4 (184.25 ns … 211.41 ns) 194.96 ns 207.87 ns 209.4 ns newAbortControllerSignalAddEventListener 171.45 ns/iter 5,832,569.2 (153 ns … 182.03 ns) 176.17 ns 180.75 ns 181.05 ns newAbortControllerSignalOnAbortNoListener 3.07 µs/iter 326,263.3 (2.98 µs … 3.17 µs) 3.08 µs 3.17 µs 3.17 µs newAbortControllerSignalOnAbortAbort 3.32 µs/iter 301,344.6 (3.29 µs … 3.4 µs) 3.33 µs 3.4 µs 3.4 µs ```
2024-04-25 14:52:24 -04:00
innerRequest = new InnerRequest(req, context);
const request = fromInnerRequest(innerRequest, "immutable");
innerRequest.request = request;
response = await callback(
perf(ext/http): recover memory for serve and optimize AbortController (#23559) Max rps without a signal is unchanged, however we can drastically reduce memory usage by not creating the signal until needed, and we can optimize the rps in the case where the signal is created. With a quick memory benchmark, it looks like this helps pretty drastically with # of GCs when benchmarking w/wrk: - 1.42.4: 1763 - canary: 1093 - this patch: 874 This branch: ``` Running 10s test @ http://localhost:8080/ 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 87.33us 439.95us 20.68ms 99.67% Req/Sec 66.70k 6.39k 74.11k 83.66% 1340255 requests in 10.10s, 191.73MB read Requests/sec: 132696.90 Transfer/sec: 18.98MB cpu: Apple M2 Pro runtime: deno 1.43.0 (aarch64-apple-darwin) file:///Users/matt/Documents/scripts/bench_request.js benchmark time (avg) iter/s (min … max) p75 p99 p995 ----------------------------------------------------------------------------------------------- ----------------------------- newRequest 986.5 ns/iter 1,013,682.6 (878.2 ns … 1.18 µs) 1.01 µs 1.18 µs 1.18 µs newAbortController 18 ns/iter 55,541,104.1 (15.6 ns … 42.62 ns) 17.71 ns 25.05 ns 26.27 ns newAbortControllerSignal 18.66 ns/iter 53,578,966.7 (16.49 ns … 32.16 ns) 18.71 ns 25.67 ns 26.39 ns newAbortControllerSignalOnAbort 106.49 ns/iter 9,390,164.9 (97.87 ns … 120.61 ns) 108.6 ns 114.24 ns 115.89 ns newAbortControllerSignalAddEventListener 86.92 ns/iter 11,504,880.2 (81.88 ns … 103.15 ns) 90 ns 98.28 ns 99.55 ns newAbortControllerSignalOnAbortNoListener 3.01 µs/iter 331,964.4 (2.97 µs … 3.1 µs) 3.06 µs 3.1 µs 3.1 µs newAbortControllerSignalOnAbortAbort 3.26 µs/iter 306,662.6 (3.22 µs … 3.36 µs) 3.27 µs 3.36 µs 3.36 µs ``` Latest canary: ``` Running 10s test @ http://localhost:8080/ 2 threads and 10 connections Thread Stats Avg Stdev Max +/- Stdev Latency 72.86us 71.23us 4.47ms 99.05% Req/Sec 64.66k 5.54k 72.48k 82.18% 1299015 requests in 10.10s, 185.83MB read Requests/sec: 128616.02 Transfer/sec: 18.40MB cpu: Apple M2 Pro runtime: deno 1.43.0+bc4aa5f (aarch64-apple-darwin) file:///Users/matt/Documents/scripts/bench_request.js benchmark time (avg) iter/s (min … max) p75 p99 p995 ----------------------------------------------------------------------------------------------- ----------------------------- newRequest 1.25 µs/iter 800,005.2 (1.01 µs … 4.18 µs) 1.16 µs 4.18 µs 4.18 µs newAbortController 18.56 ns/iter 53,868,204.3 (16.04 ns … 38.73 ns) 18.38 ns 26.1 ns 26.63 ns newAbortControllerSignal 18.72 ns/iter 53,430,746.1 (16.13 ns … 36.71 ns) 18.71 ns 26.19 ns 26.98 ns newAbortControllerSignalOnAbort 193.91 ns/iter 5,156,992.4 (184.25 ns … 211.41 ns) 194.96 ns 207.87 ns 209.4 ns newAbortControllerSignalAddEventListener 171.45 ns/iter 5,832,569.2 (153 ns … 182.03 ns) 176.17 ns 180.75 ns 181.05 ns newAbortControllerSignalOnAbortNoListener 3.07 µs/iter 326,263.3 (2.98 µs … 3.17 µs) 3.08 µs 3.17 µs 3.17 µs newAbortControllerSignalOnAbortAbort 3.32 µs/iter 301,344.6 (3.29 µs … 3.4 µs) 3.33 µs 3.4 µs 3.4 µs ```
2024-04-25 14:52:24 -04:00
request,
new ServeHandlerInfo(innerRequest),
);
// Throwing Error if the handler return value is not a Response class
if (!ObjectPrototypeIsPrototypeOf(ResponsePrototype, response)) {
throw new TypeError(
"Return value from serve handler must be a response or a promise resolving to a response",
);
}
if (response.type === "error") {
throw new TypeError(
"Return value from serve handler must not be an error response (like Response.error())",
);
}
if (response.bodyUsed) {
throw new TypeError(
"The body of the Response returned from the serve handler has already been consumed",
);
}
} catch (error) {
try {
response = await onError(error);
if (!ObjectPrototypeIsPrototypeOf(ResponsePrototype, response)) {
throw new TypeError(
"Return value from onError handler must be a response or a promise resolving to a response",
);
}
} catch (error) {
// deno-lint-ignore no-console
console.error("Exception in onError while handling exception", error);
response = internalServerError();
}
}
const inner = toInnerResponse(response);
if (innerRequest?.[_upgraded]) {
// We're done here as the connection has been upgraded during the callback and no longer requires servicing.
if (response !== UPGRADE_RESPONSE_SENTINEL) {
// deno-lint-ignore no-console
console.error("Upgrade response was not returned from callback");
context.close();
}
innerRequest?.[_upgraded]();
return;
}
// Did everything shut down while we were waiting?
if (context.closed) {
// We're shutting down, so this status shouldn't make it back to the client but "Service Unavailable" seems appropriate
innerRequest?.close();
op_http_set_promise_complete(req, 503);
return;
}
const status = inner.status;
const headers = inner.headerList;
if (headers && headers.length > 0) {
if (headers.length == 1) {
op_http_set_response_header(req, headers[0][0], headers[0][1]);
} else {
op_http_set_response_headers(req, headers);
}
}
fastSyncResponseOrStream(req, inner.body, status, innerRequest);
};
}
type RawHandler = (
request: Request,
info: ServeHandlerInfo,
) => Response | Promise<Response>;
type RawServeOptions = {
port?: number;
hostname?: string;
signal?: AbortSignal;
reusePort?: boolean;
key?: string;
cert?: string;
onError?: (error: unknown) => Response | Promise<Response>;
onListen?: (params: { hostname: string; port: number }) => void;
handler?: RawHandler;
};
feat(serve): Opt-in parallelism for `deno serve` (#24920) Adds a `parallel` flag to `deno serve`. When present, we spawn multiple workers to parallelize serving requests. ```bash deno serve --parallel main.ts ``` Currently on linux we use `SO_REUSEPORT` and rely on the fact that the kernel will distribute connections in a round-robin manner. On mac and windows, we sort of emulate this by cloning the underlying file descriptor and passing a handle to each worker. The connections will not be guaranteed to be fairly distributed (and in practice almost certainly won't be), but the distribution is still spread enough to provide a significant performance increase. --- (Run on an Macbook Pro with an M3 Max, serving `deno.com` baseline:: ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 239.78ms 13.56ms 330.54ms 79.12% Req/Sec 258.58 35.56 360.00 70.64% Latency Distribution 50% 236.72ms 75% 248.46ms 90% 256.84ms 99% 268.23ms 15458 requests in 30.02s, 2.47GB read Requests/sec: 514.89 Transfer/sec: 84.33MB ``` this PR (`with --parallel` flag) ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 117.40ms 142.84ms 590.45ms 79.07% Req/Sec 1.33k 175.19 1.77k 69.00% Latency Distribution 50% 22.34ms 75% 223.67ms 90% 357.32ms 99% 460.50ms 79636 requests in 30.07s, 12.74GB read Requests/sec: 2647.96 Transfer/sec: 433.71MB ```
2024-08-14 18:26:21 -04:00
const kLoadBalanced = Symbol("kLoadBalanced");
function formatHostName(hostname: string): string {
// If the hostname is "0.0.0.0", we display "localhost" in console
// because browsers in Windows don't resolve "0.0.0.0".
// See the discussion in https://github.com/denoland/deno_std/issues/1165
if (
(Deno.build.os === "windows") &&
(hostname == "0.0.0.0" || hostname == "::")
) {
return "localhost";
}
// Add brackets around ipv6 hostname
return StringPrototypeIncludes(hostname, ":") ? `[${hostname}]` : hostname;
}
function serve(arg1, arg2) {
let options: RawServeOptions | undefined;
let handler: RawHandler | undefined;
if (typeof arg1 === "function") {
handler = arg1;
} else if (typeof arg2 === "function") {
handler = arg2;
options = arg1;
} else {
options = arg1;
}
if (handler === undefined) {
if (options === undefined) {
throw new TypeError(
"Cannot serve HTTP requests: either a `handler` or `options` must be specified",
);
}
handler = options.handler;
}
if (typeof handler !== "function") {
throw new TypeError(
`Cannot serve HTTP requests: handler must be a function, received ${typeof handler}`,
);
}
if (options === undefined) {
options = { __proto__: null };
}
const wantsHttps = hasTlsKeyPairOptions(options);
const wantsUnix = ObjectHasOwn(options, "path");
const signal = options.signal;
const onError = options.onError ?? function (error) {
// deno-lint-ignore no-console
console.error(error);
return internalServerError();
};
if (wantsUnix) {
const listener = listen({
transport: "unix",
path: options.path,
[listenOptionApiName]: "Deno.serve",
});
const path = listener.addr.path;
return serveHttpOnListener(listener, signal, handler, onError, () => {
if (options.onListen) {
options.onListen(listener.addr);
} else {
// deno-lint-ignore no-console
console.error(`Listening on ${path}`);
}
});
}
const listenOpts = {
hostname: options.hostname ?? "0.0.0.0",
port: options.port ?? 8000,
reusePort: options.reusePort ?? false,
feat(serve): Opt-in parallelism for `deno serve` (#24920) Adds a `parallel` flag to `deno serve`. When present, we spawn multiple workers to parallelize serving requests. ```bash deno serve --parallel main.ts ``` Currently on linux we use `SO_REUSEPORT` and rely on the fact that the kernel will distribute connections in a round-robin manner. On mac and windows, we sort of emulate this by cloning the underlying file descriptor and passing a handle to each worker. The connections will not be guaranteed to be fairly distributed (and in practice almost certainly won't be), but the distribution is still spread enough to provide a significant performance increase. --- (Run on an Macbook Pro with an M3 Max, serving `deno.com` baseline:: ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 239.78ms 13.56ms 330.54ms 79.12% Req/Sec 258.58 35.56 360.00 70.64% Latency Distribution 50% 236.72ms 75% 248.46ms 90% 256.84ms 99% 268.23ms 15458 requests in 30.02s, 2.47GB read Requests/sec: 514.89 Transfer/sec: 84.33MB ``` this PR (`with --parallel` flag) ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 117.40ms 142.84ms 590.45ms 79.07% Req/Sec 1.33k 175.19 1.77k 69.00% Latency Distribution 50% 22.34ms 75% 223.67ms 90% 357.32ms 99% 460.50ms 79636 requests in 30.07s, 12.74GB read Requests/sec: 2647.96 Transfer/sec: 433.71MB ```
2024-08-14 18:26:21 -04:00
loadBalanced: options[kLoadBalanced] ?? false,
};
if (options.certFile || options.keyFile) {
throw new TypeError(
"Unsupported 'certFile' / 'keyFile' options provided: use 'cert' / 'key' instead.",
);
}
if (options.alpnProtocols) {
throw new TypeError(
"Unsupported 'alpnProtocols' option provided. 'h2' and 'http/1.1' are automatically supported.",
);
}
let listener;
if (wantsHttps) {
if (!options.cert || !options.key) {
throw new TypeError(
"Both 'cert' and 'key' must be provided to enable HTTPS",
);
}
listenOpts.cert = options.cert;
listenOpts.key = options.key;
listenOpts.alpnProtocols = ["h2", "http/1.1"];
listener = listenTls(listenOpts);
listenOpts.port = listener.addr.port;
} else {
listener = listen(listenOpts);
listenOpts.port = listener.addr.port;
}
const addr = listener.addr;
const onListen = (scheme) => {
if (options.onListen) {
options.onListen(addr);
} else {
const host = formatHostName(addr.hostname);
// deno-lint-ignore no-console
console.error(`Listening on ${scheme}${host}:${addr.port}/`);
}
};
return serveHttpOnListener(listener, signal, handler, onError, onListen);
}
/**
* Serve HTTP/1.1 and/or HTTP/2 on an arbitrary listener.
*/
function serveHttpOnListener(listener, signal, handler, onError, onListen) {
const context = new CallbackContext(
signal,
op_http_serve(listener[internalRidSymbol]),
listener,
);
const callback = mapToCallback(context, handler, onError);
onListen(context.scheme);
return serveHttpOn(context, listener.addr, callback);
}
/**
* Serve HTTP/1.1 and/or HTTP/2 on an arbitrary connection.
*/
function serveHttpOnConnection(connection, signal, handler, onError, onListen) {
const context = new CallbackContext(
signal,
op_http_serve_on(connection[internalRidSymbol]),
null,
);
const callback = mapToCallback(context, handler, onError);
onListen(context.scheme);
return serveHttpOn(context, connection.localAddr, callback);
}
function serveHttpOn(context, addr, callback) {
let ref = true;
let currentPromise = null;
const promiseErrorHandler = (error) => {
// Abnormal exit
// deno-lint-ignore no-console
console.error(
"Terminating Deno.serve loop due to unexpected error",
error,
);
context.close();
};
// Run the server
const finished = (async () => {
const rid = context.serverRid;
while (true) {
let req;
try {
// Attempt to pull as many requests out of the queue as possible before awaiting. This API is
// a synchronous, non-blocking API that returns u32::MAX if anything goes wrong.
while ((req = op_http_try_wait(rid)) !== null) {
PromisePrototypeCatch(callback(req), promiseErrorHandler);
}
currentPromise = op_http_wait(rid);
if (!ref) {
core.unrefOpPromise(currentPromise);
}
req = await currentPromise;
currentPromise = null;
} catch (error) {
if (ObjectPrototypeIsPrototypeOf(BadResourcePrototype, error)) {
break;
}
if (ObjectPrototypeIsPrototypeOf(InterruptedPrototype, error)) {
break;
}
throw new Deno.errors.Http(error);
}
if (req === null) {
break;
}
PromisePrototypeCatch(callback(req), promiseErrorHandler);
}
try {
if (!context.closing && !context.closed) {
context.closing = await op_http_close(rid, false);
context.close();
}
await context.closing;
} catch (error) {
if (ObjectPrototypeIsPrototypeOf(InterruptedPrototype, error)) {
return;
}
if (ObjectPrototypeIsPrototypeOf(BadResourcePrototype, error)) {
return;
}
throw error;
} finally {
context.close();
context.closed = true;
}
})();
return {
addr,
finished,
async shutdown() {
try {
if (!context.closing && !context.closed) {
// Shut this HTTP server down gracefully
context.closing = op_http_close(context.serverRid, true);
}
await context.closing;
} catch (error) {
// The server was interrupted
if (ObjectPrototypeIsPrototypeOf(InterruptedPrototype, error)) {
return;
}
if (ObjectPrototypeIsPrototypeOf(BadResourcePrototype, error)) {
return;
}
throw error;
} finally {
context.closed = true;
}
},
ref() {
ref = true;
if (currentPromise) {
core.refOpPromise(currentPromise);
}
},
unref() {
ref = false;
if (currentPromise) {
core.unrefOpPromise(currentPromise);
}
},
[SymbolAsyncDispose]() {
return this.shutdown();
},
};
}
internals.addTrailers = addTrailers;
internals.upgradeHttpRaw = upgradeHttpRaw;
internals.serveHttpOnListener = serveHttpOnListener;
internals.serveHttpOnConnection = serveHttpOnConnection;
function registerDeclarativeServer(exports) {
if (ObjectHasOwn(exports, "fetch")) {
if (typeof exports.fetch !== "function") {
throw new TypeError(
"Invalid type for fetch: must be a function with a single or no parameter",
);
}
feat(serve): Opt-in parallelism for `deno serve` (#24920) Adds a `parallel` flag to `deno serve`. When present, we spawn multiple workers to parallelize serving requests. ```bash deno serve --parallel main.ts ``` Currently on linux we use `SO_REUSEPORT` and rely on the fact that the kernel will distribute connections in a round-robin manner. On mac and windows, we sort of emulate this by cloning the underlying file descriptor and passing a handle to each worker. The connections will not be guaranteed to be fairly distributed (and in practice almost certainly won't be), but the distribution is still spread enough to provide a significant performance increase. --- (Run on an Macbook Pro with an M3 Max, serving `deno.com` baseline:: ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 239.78ms 13.56ms 330.54ms 79.12% Req/Sec 258.58 35.56 360.00 70.64% Latency Distribution 50% 236.72ms 75% 248.46ms 90% 256.84ms 99% 268.23ms 15458 requests in 30.02s, 2.47GB read Requests/sec: 514.89 Transfer/sec: 84.33MB ``` this PR (`with --parallel` flag) ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 117.40ms 142.84ms 590.45ms 79.07% Req/Sec 1.33k 175.19 1.77k 69.00% Latency Distribution 50% 22.34ms 75% 223.67ms 90% 357.32ms 99% 460.50ms 79636 requests in 30.07s, 12.74GB read Requests/sec: 2647.96 Transfer/sec: 433.71MB ```
2024-08-14 18:26:21 -04:00
return ({ servePort, serveHost, serveIsMain, serveWorkerCount }) => {
Deno.serve({
port: servePort,
hostname: serveHost,
feat(serve): Opt-in parallelism for `deno serve` (#24920) Adds a `parallel` flag to `deno serve`. When present, we spawn multiple workers to parallelize serving requests. ```bash deno serve --parallel main.ts ``` Currently on linux we use `SO_REUSEPORT` and rely on the fact that the kernel will distribute connections in a round-robin manner. On mac and windows, we sort of emulate this by cloning the underlying file descriptor and passing a handle to each worker. The connections will not be guaranteed to be fairly distributed (and in practice almost certainly won't be), but the distribution is still spread enough to provide a significant performance increase. --- (Run on an Macbook Pro with an M3 Max, serving `deno.com` baseline:: ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 239.78ms 13.56ms 330.54ms 79.12% Req/Sec 258.58 35.56 360.00 70.64% Latency Distribution 50% 236.72ms 75% 248.46ms 90% 256.84ms 99% 268.23ms 15458 requests in 30.02s, 2.47GB read Requests/sec: 514.89 Transfer/sec: 84.33MB ``` this PR (`with --parallel` flag) ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 117.40ms 142.84ms 590.45ms 79.07% Req/Sec 1.33k 175.19 1.77k 69.00% Latency Distribution 50% 22.34ms 75% 223.67ms 90% 357.32ms 99% 460.50ms 79636 requests in 30.07s, 12.74GB read Requests/sec: 2647.96 Transfer/sec: 433.71MB ```
2024-08-14 18:26:21 -04:00
[kLoadBalanced]: (serveIsMain && serveWorkerCount > 1) ||
(serveWorkerCount !== null),
onListen: ({ port, hostname }) => {
feat(serve): Opt-in parallelism for `deno serve` (#24920) Adds a `parallel` flag to `deno serve`. When present, we spawn multiple workers to parallelize serving requests. ```bash deno serve --parallel main.ts ``` Currently on linux we use `SO_REUSEPORT` and rely on the fact that the kernel will distribute connections in a round-robin manner. On mac and windows, we sort of emulate this by cloning the underlying file descriptor and passing a handle to each worker. The connections will not be guaranteed to be fairly distributed (and in practice almost certainly won't be), but the distribution is still spread enough to provide a significant performance increase. --- (Run on an Macbook Pro with an M3 Max, serving `deno.com` baseline:: ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 239.78ms 13.56ms 330.54ms 79.12% Req/Sec 258.58 35.56 360.00 70.64% Latency Distribution 50% 236.72ms 75% 248.46ms 90% 256.84ms 99% 268.23ms 15458 requests in 30.02s, 2.47GB read Requests/sec: 514.89 Transfer/sec: 84.33MB ``` this PR (`with --parallel` flag) ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 117.40ms 142.84ms 590.45ms 79.07% Req/Sec 1.33k 175.19 1.77k 69.00% Latency Distribution 50% 22.34ms 75% 223.67ms 90% 357.32ms 99% 460.50ms 79636 requests in 30.07s, 12.74GB read Requests/sec: 2647.96 Transfer/sec: 433.71MB ```
2024-08-14 18:26:21 -04:00
if (serveIsMain) {
const nThreads = serveWorkerCount > 1
? ` with ${serveWorkerCount} threads`
: "";
const host = formatHostName(hostname);
// deno-lint-ignore no-console
console.error(
`%cdeno serve%c: Listening on %chttp://${host}:${port}/%c${nThreads}`,
feat(serve): Opt-in parallelism for `deno serve` (#24920) Adds a `parallel` flag to `deno serve`. When present, we spawn multiple workers to parallelize serving requests. ```bash deno serve --parallel main.ts ``` Currently on linux we use `SO_REUSEPORT` and rely on the fact that the kernel will distribute connections in a round-robin manner. On mac and windows, we sort of emulate this by cloning the underlying file descriptor and passing a handle to each worker. The connections will not be guaranteed to be fairly distributed (and in practice almost certainly won't be), but the distribution is still spread enough to provide a significant performance increase. --- (Run on an Macbook Pro with an M3 Max, serving `deno.com` baseline:: ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 239.78ms 13.56ms 330.54ms 79.12% Req/Sec 258.58 35.56 360.00 70.64% Latency Distribution 50% 236.72ms 75% 248.46ms 90% 256.84ms 99% 268.23ms 15458 requests in 30.02s, 2.47GB read Requests/sec: 514.89 Transfer/sec: 84.33MB ``` this PR (`with --parallel` flag) ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 117.40ms 142.84ms 590.45ms 79.07% Req/Sec 1.33k 175.19 1.77k 69.00% Latency Distribution 50% 22.34ms 75% 223.67ms 90% 357.32ms 99% 460.50ms 79636 requests in 30.07s, 12.74GB read Requests/sec: 2647.96 Transfer/sec: 433.71MB ```
2024-08-14 18:26:21 -04:00
"color: green",
"color: inherit",
"color: yellow",
"color: inherit",
);
}
},
handler: (req, connInfo) => {
return exports.fetch(req, connInfo);
},
});
};
}
}
export {
addTrailers,
registerDeclarativeServer,
serve,
serveHttpOnConnection,
serveHttpOnListener,
upgradeHttpRaw,
};