From 5c5f4ea1d6fe0e46ea535234bf9ae46b1a2a981b Mon Sep 17 00:00:00 2001 From: Andreu Botella Date: Wed, 22 Sep 2021 18:02:15 +0200 Subject: [PATCH] fix(workers): Don't panic when a worker's parent thread stops running (#12156) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This panic could happen in the following cases: - A non-fatal error being thrown from a worker, that doesn't terminate the worker's execution, but propagates to the main thread without being handled, and makes the main thread terminate. - A nested worker being alive while its parent worker gets terminated. - A race condition if the main event loop terminates the worker as part of its last task, but the worker doesn't fully terminate before the main event loop stops running. This panic happens because a worker's event loop should have pending ops as long as the worker isn't closed or terminated – but if an event loop finishes running while it has living workers, its associated `WorkerThread` structs will be dropped, closing the channels that keep those ops pending. This change adds a `Drop` implementation to `WorkerThread`, which terminates the worker without waiting for a response. This fixes the panic, and makes it so nested workers are automatically terminated once any of their ancestors is closed or terminated. This change also refactors a worker's termination code into a `WorkerThread::terminate()` method. Closes #11342. Co-authored-by: Bartek Iwańczuk --- cli/tests/integration/run_tests.rs | 6 +++ cli/tests/testdata/worker_drop_handle_race.js | 12 +++++ .../testdata/worker_drop_handle_race.js.out | 8 ++++ .../testdata/workers/drop_handle_race.js | 3 ++ runtime/ops/worker_host.rs | 48 +++++++++++++------ runtime/web_worker.rs | 6 +-- 6 files changed, 65 insertions(+), 18 deletions(-) create mode 100644 cli/tests/testdata/worker_drop_handle_race.js create mode 100644 cli/tests/testdata/worker_drop_handle_race.js.out create mode 100644 cli/tests/testdata/workers/drop_handle_race.js diff --git a/cli/tests/integration/run_tests.rs b/cli/tests/integration/run_tests.rs index df92ad4227..5116db2951 100644 --- a/cli/tests/integration/run_tests.rs +++ b/cli/tests/integration/run_tests.rs @@ -1188,6 +1188,12 @@ itest!(worker_close_race { output: "worker_close_race.js.out", }); +itest!(worker_drop_handle_race { + args: "run --quiet --reload --allow-read worker_drop_handle_race.js", + output: "worker_drop_handle_race.js.out", + exit_code: 1, +}); + itest!(worker_message_before_close { args: "run --quiet --reload --allow-read worker_message_before_close.js", output: "worker_message_before_close.js.out", diff --git a/cli/tests/testdata/worker_drop_handle_race.js b/cli/tests/testdata/worker_drop_handle_race.js new file mode 100644 index 0000000000..d637ac8c26 --- /dev/null +++ b/cli/tests/testdata/worker_drop_handle_race.js @@ -0,0 +1,12 @@ +// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license. + +// https://github.com/denoland/deno/issues/11342 +// Test for a panic that happens when the main thread's event loop finishes +// running while the worker's event loop is still spinning. + +// The exception thrown in the worker will not terminate the worker, but it will +// propagate to the main thread and cause it to exit. +new Worker( + new URL("./workers/drop_handle_race.js", import.meta.url).href, + { type: "module" }, +); diff --git a/cli/tests/testdata/worker_drop_handle_race.js.out b/cli/tests/testdata/worker_drop_handle_race.js.out new file mode 100644 index 0000000000..71dfde620b --- /dev/null +++ b/cli/tests/testdata/worker_drop_handle_race.js.out @@ -0,0 +1,8 @@ +error: Uncaught (in worker "") Error + throw new Error(); + ^ + at [WILDCARD]/workers/drop_handle_race.js:2:9 + at fire (deno:ext/timers/[WILDCARD]) + at handleTimerMacrotask (deno:ext/timers/[WILDCARD]) +error: Uncaught (in promise) Error: Unhandled error event reached main worker. + at Worker.#pollControl (deno:runtime/js/11_workers.js:[WILDCARD]) diff --git a/cli/tests/testdata/workers/drop_handle_race.js b/cli/tests/testdata/workers/drop_handle_race.js new file mode 100644 index 0000000000..30676a600a --- /dev/null +++ b/cli/tests/testdata/workers/drop_handle_race.js @@ -0,0 +1,3 @@ +setTimeout(() => { + throw new Error(); +}, 1000); diff --git a/runtime/ops/worker_host.rs b/runtime/ops/worker_host.rs index 829681ab69..f749e495c0 100644 --- a/runtime/ops/worker_host.rs +++ b/runtime/ops/worker_host.rs @@ -65,7 +65,8 @@ pub type CreateWebWorkerCb = dyn Fn(CreateWebWorkerArgs) -> (WebWorker, Sendable pub struct CreateWebWorkerCbHolder(Arc); pub struct WorkerThread { - join_handle: JoinHandle>, + // It's an Option so we can take the value before dropping the WorkerThread. + join_handle: Option>>, worker_handle: WebWorkerHandle, // A WorkerThread that hasn't been explicitly terminated can only be removed @@ -75,6 +76,34 @@ pub struct WorkerThread { message_closed: bool, } +impl WorkerThread { + fn terminate(mut self) { + self.worker_handle.clone().terminate(); + self + .join_handle + .take() + .unwrap() + .join() + .expect("Worker thread panicked") + .expect("Panic in worker event loop"); + + // Optimization so the Drop impl doesn't try to terminate the worker handle + // again. + self.ctrl_closed = true; + self.message_closed = true; + } +} + +impl Drop for WorkerThread { + fn drop(&mut self) { + // If either of the channels is closed, the worker thread has at least + // started closing, and its event loop won't start another run. + if !(self.ctrl_closed || self.message_closed) { + self.worker_handle.clone().terminate(); + } + } +} + pub type WorkersTable = HashMap; pub fn init(create_web_worker_cb: Arc) -> Extension { @@ -557,7 +586,7 @@ fn op_create_worker( let worker_handle = handle_receiver.recv().unwrap()?; let worker_thread = WorkerThread { - join_handle, + join_handle: Some(join_handle), worker_handle: worker_handle.into(), ctrl_closed: false, message_closed: false, @@ -578,12 +607,7 @@ fn op_host_terminate_worker( _: (), ) -> Result<(), AnyError> { if let Some(worker_thread) = state.borrow_mut::().remove(&id) { - worker_thread.worker_handle.terminate(); - worker_thread - .join_handle - .join() - .expect("Panic in worker thread") - .expect("Panic in worker event loop"); + worker_thread.terminate(); } else { debug!("tried to terminate non-existent worker {}", id); } @@ -625,13 +649,7 @@ fn close_channel( }; if terminate { - let worker_thread = entry.remove(); - worker_thread.worker_handle.terminate(); - worker_thread - .join_handle - .join() - .expect("Worker thread panicked") - .expect("Panic in worker event loop"); + entry.remove().terminate(); } } } diff --git a/runtime/web_worker.rs b/runtime/web_worker.rs index 975028b169..07f6facc5f 100644 --- a/runtime/web_worker.rs +++ b/runtime/web_worker.rs @@ -206,9 +206,9 @@ impl WebWorkerHandle { /// Terminate the worker /// This function will set terminated to true, terminate the isolate and close the message channel pub fn terminate(self) { - // This function can be called multiple times by whomever holds - // the handle. However only a single "termination" should occur so - // we need a guard here. + // A WebWorkerHandle can be terminated / dropped after `self.close()` has + // been called inside the worker, but only a single "termination" can occur, + // so we need a guard here. let already_terminated = self.terminated.swap(true, Ordering::SeqCst); if !already_terminated {