1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2024-12-27 09:39:08 -05:00
denoland-deno/runtime/worker_bootstrap.rs

227 lines
5.9 KiB
Rust
Raw Normal View History

// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
use deno_core::v8;
use deno_core::ModuleSpecifier;
use serde::Serialize;
use std::cell::RefCell;
use std::thread;
use deno_terminal::colors;
/// The execution mode for this worker. Some modes may have implicit behaviour.
#[derive(Copy, Clone)]
pub enum WorkerExecutionMode {
/// No special behaviour.
None,
/// Running in a worker.
Worker,
/// `deno run`
Run,
/// `deno repl`
Repl,
/// `deno eval`
Eval,
/// `deno test`
Test,
/// `deno bench`
Bench,
/// `deno serve`
feat(serve): Opt-in parallelism for `deno serve` (#24920) Adds a `parallel` flag to `deno serve`. When present, we spawn multiple workers to parallelize serving requests. ```bash deno serve --parallel main.ts ``` Currently on linux we use `SO_REUSEPORT` and rely on the fact that the kernel will distribute connections in a round-robin manner. On mac and windows, we sort of emulate this by cloning the underlying file descriptor and passing a handle to each worker. The connections will not be guaranteed to be fairly distributed (and in practice almost certainly won't be), but the distribution is still spread enough to provide a significant performance increase. --- (Run on an Macbook Pro with an M3 Max, serving `deno.com` baseline:: ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 239.78ms 13.56ms 330.54ms 79.12% Req/Sec 258.58 35.56 360.00 70.64% Latency Distribution 50% 236.72ms 75% 248.46ms 90% 256.84ms 99% 268.23ms 15458 requests in 30.02s, 2.47GB read Requests/sec: 514.89 Transfer/sec: 84.33MB ``` this PR (`with --parallel` flag) ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 117.40ms 142.84ms 590.45ms 79.07% Req/Sec 1.33k 175.19 1.77k 69.00% Latency Distribution 50% 22.34ms 75% 223.67ms 90% 357.32ms 99% 460.50ms 79636 requests in 30.07s, 12.74GB read Requests/sec: 2647.96 Transfer/sec: 433.71MB ```
2024-08-14 18:26:21 -04:00
Serve {
is_main: bool,
worker_count: Option<usize>,
},
/// `deno jupyter`
Jupyter,
}
feat(serve): Opt-in parallelism for `deno serve` (#24920) Adds a `parallel` flag to `deno serve`. When present, we spawn multiple workers to parallelize serving requests. ```bash deno serve --parallel main.ts ``` Currently on linux we use `SO_REUSEPORT` and rely on the fact that the kernel will distribute connections in a round-robin manner. On mac and windows, we sort of emulate this by cloning the underlying file descriptor and passing a handle to each worker. The connections will not be guaranteed to be fairly distributed (and in practice almost certainly won't be), but the distribution is still spread enough to provide a significant performance increase. --- (Run on an Macbook Pro with an M3 Max, serving `deno.com` baseline:: ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 239.78ms 13.56ms 330.54ms 79.12% Req/Sec 258.58 35.56 360.00 70.64% Latency Distribution 50% 236.72ms 75% 248.46ms 90% 256.84ms 99% 268.23ms 15458 requests in 30.02s, 2.47GB read Requests/sec: 514.89 Transfer/sec: 84.33MB ``` this PR (`with --parallel` flag) ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 117.40ms 142.84ms 590.45ms 79.07% Req/Sec 1.33k 175.19 1.77k 69.00% Latency Distribution 50% 22.34ms 75% 223.67ms 90% 357.32ms 99% 460.50ms 79636 requests in 30.07s, 12.74GB read Requests/sec: 2647.96 Transfer/sec: 433.71MB ```
2024-08-14 18:26:21 -04:00
impl WorkerExecutionMode {
pub fn discriminant(&self) -> u8 {
match self {
WorkerExecutionMode::None => 0,
WorkerExecutionMode::Worker => 1,
WorkerExecutionMode::Run => 2,
WorkerExecutionMode::Repl => 3,
WorkerExecutionMode::Eval => 4,
WorkerExecutionMode::Test => 5,
WorkerExecutionMode::Bench => 6,
WorkerExecutionMode::Serve { .. } => 7,
WorkerExecutionMode::Jupyter => 8,
}
}
pub fn serve_info(&self) -> (Option<bool>, Option<usize>) {
match *self {
WorkerExecutionMode::Serve {
is_main,
worker_count,
} => (Some(is_main), worker_count),
_ => (None, None),
}
}
}
/// The log level to use when printing diagnostic log messages, warnings,
/// or errors in the worker.
///
/// Note: This is disconnected with the log crate's log level and the Rust code
/// in this crate will respect that value instead. To specify that, use
/// `log::set_max_level`.
#[derive(Debug, Default, Clone, Copy)]
pub enum WorkerLogLevel {
// WARNING: Ensure this is kept in sync with
// the JS values (search for LogLevel).
Error = 1,
Warn = 2,
#[default]
Info = 3,
Debug = 4,
}
impl From<log::Level> for WorkerLogLevel {
fn from(value: log::Level) -> Self {
match value {
log::Level::Error => WorkerLogLevel::Error,
log::Level::Warn => WorkerLogLevel::Warn,
log::Level::Info => WorkerLogLevel::Info,
log::Level::Debug => WorkerLogLevel::Debug,
log::Level::Trace => WorkerLogLevel::Debug,
}
}
}
/// Common bootstrap options for MainWorker & WebWorker
#[derive(Clone)]
pub struct BootstrapOptions {
pub deno_version: String,
/// Sets `Deno.args` in JS runtime.
pub args: Vec<String>,
pub cpu_count: usize,
pub log_level: WorkerLogLevel,
pub enable_op_summary_metrics: bool,
pub enable_testing_features: bool,
pub locale: String,
pub location: Option<ModuleSpecifier>,
/// Sets `Deno.noColor` in JS runtime.
pub no_color: bool,
pub is_stdout_tty: bool,
pub is_stderr_tty: bool,
pub color_level: deno_terminal::colors::ColorLevel,
// --unstable-* flags
pub unstable_features: Vec<i32>,
pub user_agent: String,
pub inspect: bool,
pub has_node_modules_dir: bool,
pub argv0: Option<String>,
pub node_debug: Option<String>,
pub node_ipc_fd: Option<i64>,
pub mode: WorkerExecutionMode,
// Used by `deno serve`
pub serve_port: Option<u16>,
pub serve_host: Option<String>,
}
impl Default for BootstrapOptions {
fn default() -> Self {
let cpu_count = thread::available_parallelism()
.map(|p| p.get())
.unwrap_or(1);
let runtime_version = env!("CARGO_PKG_VERSION");
let user_agent = format!("Deno/{runtime_version}");
Self {
deno_version: runtime_version.to_string(),
user_agent,
cpu_count,
no_color: !colors::use_color(),
is_stdout_tty: deno_terminal::is_stdout_tty(),
is_stderr_tty: deno_terminal::is_stderr_tty(),
color_level: colors::get_color_level(),
enable_op_summary_metrics: Default::default(),
enable_testing_features: Default::default(),
log_level: Default::default(),
locale: "en".to_string(),
location: Default::default(),
unstable_features: Default::default(),
inspect: Default::default(),
args: Default::default(),
has_node_modules_dir: Default::default(),
argv0: None,
node_debug: None,
node_ipc_fd: None,
mode: WorkerExecutionMode::None,
serve_port: Default::default(),
serve_host: Default::default(),
}
}
}
/// This is a struct that we use to serialize the contents of the `BootstrapOptions`
/// struct above to a V8 form. While `serde_v8` is not as fast as hand-coding this,
/// it's "fast enough" while serializing a large tuple like this that it doesn't appear
/// on flamegraphs.
///
/// Note that a few fields in here are derived from the process and environment and
/// are not sourced from the underlying `BootstrapOptions`.
///
/// Keep this in sync with `99_main.js`.
#[derive(Serialize)]
struct BootstrapV8<'a>(
// deno version
&'a str,
// location
Option<&'a str>,
// granular unstable flags
&'a [i32],
// inspect
bool,
// enable_testing_features
bool,
// has_node_modules_dir
bool,
// argv0
Option<&'a str>,
// node_debug
Option<&'a str>,
// mode
i32,
// serve port
u16,
// serve host
Option<&'a str>,
feat(serve): Opt-in parallelism for `deno serve` (#24920) Adds a `parallel` flag to `deno serve`. When present, we spawn multiple workers to parallelize serving requests. ```bash deno serve --parallel main.ts ``` Currently on linux we use `SO_REUSEPORT` and rely on the fact that the kernel will distribute connections in a round-robin manner. On mac and windows, we sort of emulate this by cloning the underlying file descriptor and passing a handle to each worker. The connections will not be guaranteed to be fairly distributed (and in practice almost certainly won't be), but the distribution is still spread enough to provide a significant performance increase. --- (Run on an Macbook Pro with an M3 Max, serving `deno.com` baseline:: ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 239.78ms 13.56ms 330.54ms 79.12% Req/Sec 258.58 35.56 360.00 70.64% Latency Distribution 50% 236.72ms 75% 248.46ms 90% 256.84ms 99% 268.23ms 15458 requests in 30.02s, 2.47GB read Requests/sec: 514.89 Transfer/sec: 84.33MB ``` this PR (`with --parallel` flag) ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 117.40ms 142.84ms 590.45ms 79.07% Req/Sec 1.33k 175.19 1.77k 69.00% Latency Distribution 50% 22.34ms 75% 223.67ms 90% 357.32ms 99% 460.50ms 79636 requests in 30.07s, 12.74GB read Requests/sec: 2647.96 Transfer/sec: 433.71MB ```
2024-08-14 18:26:21 -04:00
// serve is main
Option<bool>,
// serve worker count
Option<usize>,
);
impl BootstrapOptions {
/// Return the v8 equivalent of this structure.
pub fn as_v8<'s>(
&self,
scope: &mut v8::HandleScope<'s>,
) -> v8::Local<'s, v8::Value> {
let scope = RefCell::new(scope);
let ser = deno_core::serde_v8::Serializer::new(&scope);
feat(serve): Opt-in parallelism for `deno serve` (#24920) Adds a `parallel` flag to `deno serve`. When present, we spawn multiple workers to parallelize serving requests. ```bash deno serve --parallel main.ts ``` Currently on linux we use `SO_REUSEPORT` and rely on the fact that the kernel will distribute connections in a round-robin manner. On mac and windows, we sort of emulate this by cloning the underlying file descriptor and passing a handle to each worker. The connections will not be guaranteed to be fairly distributed (and in practice almost certainly won't be), but the distribution is still spread enough to provide a significant performance increase. --- (Run on an Macbook Pro with an M3 Max, serving `deno.com` baseline:: ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 239.78ms 13.56ms 330.54ms 79.12% Req/Sec 258.58 35.56 360.00 70.64% Latency Distribution 50% 236.72ms 75% 248.46ms 90% 256.84ms 99% 268.23ms 15458 requests in 30.02s, 2.47GB read Requests/sec: 514.89 Transfer/sec: 84.33MB ``` this PR (`with --parallel` flag) ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 117.40ms 142.84ms 590.45ms 79.07% Req/Sec 1.33k 175.19 1.77k 69.00% Latency Distribution 50% 22.34ms 75% 223.67ms 90% 357.32ms 99% 460.50ms 79636 requests in 30.07s, 12.74GB read Requests/sec: 2647.96 Transfer/sec: 433.71MB ```
2024-08-14 18:26:21 -04:00
let (serve_is_main, serve_worker_count) = self.mode.serve_info();
let bootstrap = BootstrapV8(
&self.deno_version,
self.location.as_ref().map(|l| l.as_str()),
self.unstable_features.as_ref(),
self.inspect,
self.enable_testing_features,
self.has_node_modules_dir,
self.argv0.as_deref(),
self.node_debug.as_deref(),
feat(serve): Opt-in parallelism for `deno serve` (#24920) Adds a `parallel` flag to `deno serve`. When present, we spawn multiple workers to parallelize serving requests. ```bash deno serve --parallel main.ts ``` Currently on linux we use `SO_REUSEPORT` and rely on the fact that the kernel will distribute connections in a round-robin manner. On mac and windows, we sort of emulate this by cloning the underlying file descriptor and passing a handle to each worker. The connections will not be guaranteed to be fairly distributed (and in practice almost certainly won't be), but the distribution is still spread enough to provide a significant performance increase. --- (Run on an Macbook Pro with an M3 Max, serving `deno.com` baseline:: ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 239.78ms 13.56ms 330.54ms 79.12% Req/Sec 258.58 35.56 360.00 70.64% Latency Distribution 50% 236.72ms 75% 248.46ms 90% 256.84ms 99% 268.23ms 15458 requests in 30.02s, 2.47GB read Requests/sec: 514.89 Transfer/sec: 84.33MB ``` this PR (`with --parallel` flag) ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 117.40ms 142.84ms 590.45ms 79.07% Req/Sec 1.33k 175.19 1.77k 69.00% Latency Distribution 50% 22.34ms 75% 223.67ms 90% 357.32ms 99% 460.50ms 79636 requests in 30.07s, 12.74GB read Requests/sec: 2647.96 Transfer/sec: 433.71MB ```
2024-08-14 18:26:21 -04:00
self.mode.discriminant() as _,
self.serve_port.unwrap_or_default(),
self.serve_host.as_deref(),
feat(serve): Opt-in parallelism for `deno serve` (#24920) Adds a `parallel` flag to `deno serve`. When present, we spawn multiple workers to parallelize serving requests. ```bash deno serve --parallel main.ts ``` Currently on linux we use `SO_REUSEPORT` and rely on the fact that the kernel will distribute connections in a round-robin manner. On mac and windows, we sort of emulate this by cloning the underlying file descriptor and passing a handle to each worker. The connections will not be guaranteed to be fairly distributed (and in practice almost certainly won't be), but the distribution is still spread enough to provide a significant performance increase. --- (Run on an Macbook Pro with an M3 Max, serving `deno.com` baseline:: ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 239.78ms 13.56ms 330.54ms 79.12% Req/Sec 258.58 35.56 360.00 70.64% Latency Distribution 50% 236.72ms 75% 248.46ms 90% 256.84ms 99% 268.23ms 15458 requests in 30.02s, 2.47GB read Requests/sec: 514.89 Transfer/sec: 84.33MB ``` this PR (`with --parallel` flag) ``` ❯ wrk -d 30s -c 125 --latency http://127.0.0.1:8000 Running 30s test @ http://127.0.0.1:8000 2 threads and 125 connections Thread Stats Avg Stdev Max +/- Stdev Latency 117.40ms 142.84ms 590.45ms 79.07% Req/Sec 1.33k 175.19 1.77k 69.00% Latency Distribution 50% 22.34ms 75% 223.67ms 90% 357.32ms 99% 460.50ms 79636 requests in 30.07s, 12.74GB read Requests/sec: 2647.96 Transfer/sec: 433.71MB ```
2024-08-14 18:26:21 -04:00
serve_is_main,
serve_worker_count,
);
bootstrap.serialize(ser).unwrap()
}
}