From 77d065e034db7ed21a0e110bbbfc5eb5287d009c Mon Sep 17 00:00:00 2001 From: Divy Srivastava Date: Tue, 12 Jul 2022 06:33:05 +0530 Subject: [PATCH] fix(ext/ffi): trampoline for fast calls (#15139) --- .dprint.json | 1 + .gitmodules | 3 + ext/ffi/README.md | 22 ++++ ext/ffi/build.rs | 63 ++++++++++++ ext/ffi/jit_trampoline.rs | 153 ++++++++++++++++++++++++++++ ext/ffi/lib.rs | 39 +++++-- ext/ffi/tcc.rs | 143 ++++++++++++++++++++++++++ ext/ffi/tinycc | 1 + test_ffi/tests/integration_tests.rs | 2 + test_ffi/tests/test.js | 17 +++- third_party | 2 +- 11 files changed, 435 insertions(+), 11 deletions(-) create mode 100644 ext/ffi/build.rs create mode 100644 ext/ffi/jit_trampoline.rs create mode 100644 ext/ffi/tcc.rs create mode 160000 ext/ffi/tinycc diff --git a/.dprint.json b/.dprint.json index 3eeed9a0ac..50a40555e6 100644 --- a/.dprint.json +++ b/.dprint.json @@ -38,6 +38,7 @@ "cli/tsc/*typescript.js", "gh-pages", "target", + "test_ffi/tests/test.js", "test_util/std", "test_util/wpt", "third_party", diff --git a/.gitmodules b/.gitmodules index 1967e6cfa5..a94ebe6689 100644 --- a/.gitmodules +++ b/.gitmodules @@ -9,3 +9,6 @@ [submodule "test_util/wpt"] path = test_util/wpt url = https://github.com/web-platform-tests/wpt.git +[submodule "ext/ffi/tinycc"] + path = ext/ffi/tinycc + url = https://github.com/TinyCC/tinycc diff --git a/ext/ffi/README.md b/ext/ffi/README.md index cc2d81cd2f..5f7f1cb9f4 100644 --- a/ext/ffi/README.md +++ b/ext/ffi/README.md @@ -1,3 +1,25 @@ # deno_ffi This crate implements dynamic library ffi. + +## Performance + +Deno FFI calls have extremely low overhead (~1ns on M1 16GB RAM) and perform on +par with native code. Deno leverages V8 fast api calls and JIT compiled bindings +to achieve these high speeds. + +`Deno.dlopen` generates an optimized and a fallback path. Optimized paths are +triggered when V8 decides to optimize the function, hence call through the Fast +API. Fallback paths handle types like function callbacks and implement proper +error handling for unexpected types, that is not supported in Fast calls. + +Optimized calls enter a JIT compiled function "trampoline" that translates Fast +API values directly for symbol calls. JIT compilation itself is super fast, +thanks to `tinycc`. Currently, the optimized path is only supported on Linux and +MacOS. + +To run benchmarks: + +```bash +target/release/deno bench --allow-ffi --allow-read --unstable ./test_ffi/tests/bench.js +``` diff --git a/ext/ffi/build.rs b/ext/ffi/build.rs new file mode 100644 index 0000000000..fd6aea6089 --- /dev/null +++ b/ext/ffi/build.rs @@ -0,0 +1,63 @@ +// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license. + +use std::env; + +fn build_tcc() { + { + // TODO(@littledivy): Windows support for fast call. + // let tcc_path = root + // .parent() + // .unwrap() + // .to_path_buf() + // .parent() + // .unwrap() + // .to_path_buf() + // .join("third_party") + // .join("prebuilt") + // .join("win"); + // println!("cargo:rustc-link-search=native={}", tcc_path.display()); + } + #[cfg(not(target_os = "windows"))] + { + use std::path::PathBuf; + use std::process::exit; + use std::process::Command; + + let root = PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"))); + let tcc_src = root.join("tinycc"); + dbg!(&tcc_src); + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); + let mut configure = Command::new(tcc_src.join("configure")); + configure.current_dir(&out_dir); + configure.args(&["--enable-static", "--extra-cflags=-fPIC -O3 -g -static"]); + let status = configure.status().unwrap(); + if !status.success() { + eprintln!("Fail to configure: {:?}", status); + exit(1); + } + + let mut make = Command::new("make"); + make.current_dir(&out_dir).arg(format!( + "-j{}", + env::var("NUM_JOBS").unwrap_or_else(|_| String::from("1")) + )); + make.args(&["libtcc.a"]); + let status = make.status().unwrap(); + + if !status.success() { + eprintln!("Fail to make: {:?}", status); + exit(1); + } + println!("cargo:rustc-link-search=native={}", out_dir.display()); + println!("cargo:rerun-if-changed={}", tcc_src.display()); + } +} + +#[cfg(target_os = "windows")] +fn main() {} + +#[cfg(not(target_os = "windows"))] +fn main() { + build_tcc(); + println!("cargo:rustc-link-lib=static=tcc"); +} diff --git a/ext/ffi/jit_trampoline.rs b/ext/ffi/jit_trampoline.rs new file mode 100644 index 0000000000..40c14dfb0f --- /dev/null +++ b/ext/ffi/jit_trampoline.rs @@ -0,0 +1,153 @@ +// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license. + +use crate::NativeType; +use crate::{tcc::Compiler, Symbol}; +use std::ffi::c_void; +use std::ffi::CString; +use std::fmt::Write as _; + +pub(crate) struct Allocation { + pub addr: *mut c_void, + _ctx: Compiler, + _sym: Box, +} + +macro_rules! cstr { + ($st:expr) => { + &CString::new($st).unwrap() + }; +} + +fn native_arg_to_c(ty: &NativeType) -> &'static str { + match ty { + NativeType::U8 | NativeType::U16 | NativeType::U32 => "uint32_t", + NativeType::I8 | NativeType::I16 | NativeType::I32 => "int32_t", + NativeType::Void => "void", + NativeType::F32 => "float", + NativeType::F64 => "double", + _ => unimplemented!(), + } +} + +fn native_to_c(ty: &NativeType) -> &'static str { + match ty { + NativeType::U8 => "uint8_t", + NativeType::U16 => "uint16_t", + NativeType::U32 => "uint32_t", + NativeType::I8 => "int8_t", + NativeType::I16 => "uint16_t", + NativeType::I32 => "int32_t", + NativeType::Void => "void", + NativeType::F32 => "float", + NativeType::F64 => "double", + _ => unimplemented!(), + } +} + +pub(crate) fn codegen(sym: &crate::Symbol) -> String { + let mut c = String::from("#include \n"); + let ret = native_to_c(&sym.result_type); + + // extern func( + c += "\nextern "; + c += ret; + c += " func("; + // p0, p1, ...); + for (i, ty) in sym.parameter_types.iter().enumerate() { + if i > 0 { + c += ", "; + } + c += native_to_c(ty); + let _ = write!(c, " p{i}"); + } + c += ");\n\n"; + + // void* recv, p0, p1, ...); + c += ret; + c += " func_trampoline("; + c += "void* recv"; + for (i, ty) in sym.parameter_types.iter().enumerate() { + c += ", "; + c += native_arg_to_c(ty); + let _ = write!(c, " p{i}"); + } + c += ") {\n"; + // return func(p0, p1, ...); + c += " return func("; + for (i, _) in sym.parameter_types.iter().enumerate() { + if i > 0 { + c += ", "; + } + let _ = write!(c, "p{i}"); + } + c += ");\n}\n\n"; + c +} + +pub(crate) fn gen_trampoline( + sym: Box, +) -> Result, ()> { + let mut ctx = Compiler::new()?; + ctx.set_options(cstr!("-nostdlib")); + // SAFETY: symbol satisfies ABI requirement. + unsafe { ctx.add_symbol(cstr!("func"), sym.ptr.0 as *const c_void) }; + let c = codegen(&sym); + + ctx.compile_string(cstr!(c))?; + let alloc = Allocation { + addr: ctx.relocate_and_get_symbol(cstr!("func_trampoline"))?, + _ctx: ctx, + _sym: sym, + }; + Ok(Box::new(alloc)) +} + +#[cfg(test)] +mod tests { + use super::*; + use libffi::middle::Type; + use std::ptr::null_mut; + + fn codegen(parameters: Vec, ret: NativeType) -> String { + let sym = Box::new(crate::Symbol { + cif: libffi::middle::Cif::new(vec![], Type::void()), + ptr: libffi::middle::CodePtr(null_mut()), + parameter_types: parameters, + result_type: ret, + can_callback: false, + }); + super::codegen(&sym) + } + + #[test] + fn test_gen_trampoline() { + assert_eq!( + codegen(vec![], NativeType::Void), + "#include \n\nextern void func();\n\nvoid func_trampoline(void* recv) {\n return func();\n}\n\n" + ); + assert_eq!( + codegen(vec![NativeType::U32, NativeType::U32], NativeType::U32), + "#include \n\nextern uint32_t func(uint32_t p0, uint32_t p1);\n\nuint32_t func_trampoline(void* recv, uint32_t p0, uint32_t p1) {\n return func(p0, p1);\n}\n\n" + ); + assert_eq!( + codegen(vec![NativeType::I32, NativeType::I32], NativeType::I32), + "#include \n\nextern int32_t func(int32_t p0, int32_t p1);\n\nint32_t func_trampoline(void* recv, int32_t p0, int32_t p1) {\n return func(p0, p1);\n}\n\n" + ); + assert_eq!( + codegen(vec![NativeType::F32, NativeType::F32], NativeType::F32), + "#include \n\nextern float func(float p0, float p1);\n\nfloat func_trampoline(void* recv, float p0, float p1) {\n return func(p0, p1);\n}\n\n" + ); + assert_eq!( + codegen(vec![NativeType::F64, NativeType::F64], NativeType::F64), + "#include \n\nextern double func(double p0, double p1);\n\ndouble func_trampoline(void* recv, double p0, double p1) {\n return func(p0, p1);\n}\n\n" + ); + } + + #[test] + fn test_gen_trampoline_implicit_cast() { + assert_eq!( + codegen(vec![NativeType::I8, NativeType::U8], NativeType::I8), + "#include \n\nextern int8_t func(int8_t p0, uint8_t p1);\n\nint8_t func_trampoline(void* recv, int32_t p0, uint32_t p1) {\n return func(p0, p1);\n}\n\n" + ) + } +} diff --git a/ext/ffi/lib.rs b/ext/ffi/lib.rs index a5a1567275..feb879aba7 100644 --- a/ext/ffi/lib.rs +++ b/ext/ffi/lib.rs @@ -39,6 +39,11 @@ use std::path::PathBuf; use std::ptr; use std::rc::Rc; +#[cfg(not(target_os = "windows"))] +mod jit_trampoline; +#[cfg(not(target_os = "windows"))] +mod tcc; + thread_local! { static LOCAL_ISOLATE_POINTER: RefCell<*const v8::Isolate> = RefCell::new(ptr::null()); } @@ -72,6 +77,8 @@ struct Symbol { ptr: libffi::middle::CodePtr, parameter_types: Vec, result_type: NativeType, + // This is dead code only on Windows + #[allow(dead_code)] can_callback: bool, } @@ -678,6 +685,7 @@ impl From<&NativeType> for fast_api::Type { } } +#[cfg(not(target_os = "windows"))] fn is_fast_api(rv: NativeType) -> bool { !matches!( rv, @@ -696,25 +704,36 @@ fn make_sync_fn<'s>( scope: &mut v8::HandleScope<'s>, sym: Box, ) -> v8::Local<'s, v8::Function> { - let mut fast_ffi_templ = None; + #[cfg(not(target_os = "windows"))] + let mut fast_ffi_templ: Option = None; + #[cfg(target_os = "windows")] + let fast_ffi_templ: Option = None; + + #[cfg(not(target_os = "windows"))] + let mut fast_allocations: Option<*mut ()> = None; + #[cfg(not(target_os = "windows"))] if !sym.can_callback && !sym.parameter_types.iter().any(|t| !is_fast_api(*t)) && is_fast_api(sym.result_type) { + let ret = fast_api::Type::from(&sym.result_type); + let mut args = sym .parameter_types .iter() .map(|t| t.into()) .collect::>(); - if args.is_empty() { - args.push(fast_api::Type::V8Value); - } + // recv + args.insert(0, fast_api::Type::V8Value); + let symbol_trampoline = + jit_trampoline::gen_trampoline(sym.clone()).expect("gen_trampoline"); fast_ffi_templ = Some(FfiFastCallTemplate { args: args.into_boxed_slice(), - ret: (&fast_api::Type::from(&sym.result_type)).into(), - symbol_ptr: sym.ptr.as_ptr() as *const c_void, + ret: (&ret).into(), + symbol_ptr: symbol_trampoline.addr, }); + fast_allocations = Some(Box::into_raw(symbol_trampoline) as *mut ()); } let sym = Box::leak(sym); @@ -754,7 +773,13 @@ fn make_sync_fn<'s>( Box::new(move |_| { // SAFETY: This is never called twice. pointer obtained // from Box::into_raw, hence, satisfies memory layout requirements. - unsafe { Box::from_raw(sym) }; + unsafe { + Box::from_raw(sym); + #[cfg(not(target_os = "windows"))] + if let Some(fast_allocations) = fast_allocations { + Box::from_raw(fast_allocations as *mut jit_trampoline::Allocation); + } + } }), ); diff --git a/ext/ffi/tcc.rs b/ext/ffi/tcc.rs new file mode 100644 index 0000000000..edc30c8932 --- /dev/null +++ b/ext/ffi/tcc.rs @@ -0,0 +1,143 @@ +// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license. + +use std::{ + ffi::CStr, + marker::PhantomData, + os::raw::{c_char, c_int, c_void}, + ptr::null_mut, +}; + +#[repr(C)] +#[derive(Debug)] +pub struct TCCState { + _unused: [u8; 0], +} +pub const TCC_OUTPUT_MEMORY: i32 = 1; + +extern "C" { + pub fn tcc_new() -> *mut TCCState; + pub fn tcc_delete(s: *mut TCCState); + pub fn tcc_set_options(s: *mut TCCState, str: *const c_char); + pub fn tcc_compile_string(s: *mut TCCState, buf: *const c_char) -> c_int; + pub fn tcc_add_symbol( + s: *mut TCCState, + name: *const c_char, + val: *const c_void, + ) -> c_int; + pub fn tcc_set_output_type(s: *mut TCCState, output_type: c_int) -> c_int; + pub fn tcc_relocate(s1: *mut TCCState, ptr: *mut c_void) -> c_int; + pub fn tcc_get_symbol(s: *mut TCCState, name: *const c_char) -> *mut c_void; +} + +/// Compilation context. +pub struct Compiler { + inner: *mut TCCState, + _phantom: PhantomData, + pub bin: Option>, +} + +impl Compiler { + pub fn new() -> Result { + // SAFETY: There is one context per thread. + let inner = unsafe { tcc_new() }; + if inner.is_null() { + Err(()) + } else { + let ret = + // SAFETY: set output to memory. + unsafe { tcc_set_output_type(inner, TCC_OUTPUT_MEMORY as c_int) }; + assert_eq!(ret, 0); + Ok(Self { + inner, + _phantom: PhantomData, + bin: None, + }) + } + } + + pub fn set_options(&mut self, option: &CStr) -> &mut Self { + // SAFETY: option is a null-terminated C string. + unsafe { + tcc_set_options(self.inner, option.as_ptr()); + } + self + } + + pub fn compile_string(&mut self, p: &CStr) -> Result<(), ()> { + // SAFETY: p is a null-terminated C string. + let ret = unsafe { tcc_compile_string(self.inner, p.as_ptr()) }; + if ret == 0 { + Ok(()) + } else { + Err(()) + } + } + + /// # Safety + /// Symbol need satisfy ABI requirement. + pub unsafe fn add_symbol(&mut self, sym: &CStr, val: *const c_void) { + // SAFETY: sym is a null-terminated C string. + let ret = tcc_add_symbol(self.inner, sym.as_ptr(), val); + assert_eq!(ret, 0); + } + + pub fn relocate_and_get_symbol( + &mut self, + sym: &CStr, + ) -> Result<*mut c_void, ()> { + // SAFETY: pass null ptr to get required length + let len = unsafe { tcc_relocate(self.inner, null_mut()) }; + if len == -1 { + return Err(()); + }; + let mut bin = Vec::with_capacity(len as usize); + let ret = + // SAFETY: bin is allocated up to len. + unsafe { tcc_relocate(self.inner, bin.as_mut_ptr() as *mut c_void) }; + if ret != 0 { + return Err(()); + } + // SAFETY: if ret == 0, bin is initialized. + unsafe { + bin.set_len(len as usize); + } + self.bin = Some(bin); + // SAFETY: sym is a null-terminated C string. + let addr = unsafe { tcc_get_symbol(self.inner, sym.as_ptr()) }; + Ok(addr) + } +} + +impl Drop for Compiler { + fn drop(&mut self) { + // SAFETY: delete state from tcc_new() + unsafe { tcc_delete(self.inner) }; + } +} + +#[cfg(test)] +mod test { + use super::*; + use std::ffi::CString; + + #[test] + fn test_compiler_jit() { + let p = CString::new( + r#" + #include + int32_t add(int32_t a, int32_t b) { + return a + b; + } + "# + .as_bytes(), + ) + .unwrap(); + let sym = CString::new("add".as_bytes()).unwrap(); + + let mut ctx = Compiler::new().unwrap(); + let ops = CString::new("-nostdlib").unwrap(); + ctx.set_options(&ops); + assert!(ctx.compile_string(&p).is_ok()); + ctx.relocate_and_get_symbol(&sym).unwrap(); + } +} diff --git a/ext/ffi/tinycc b/ext/ffi/tinycc new file mode 160000 index 0000000000..afc136262e --- /dev/null +++ b/ext/ffi/tinycc @@ -0,0 +1 @@ +Subproject commit afc136262e93ae85fb3643005b36dbfc30d99c42 diff --git a/test_ffi/tests/integration_tests.rs b/test_ffi/tests/integration_tests.rs index 55b0f7a606..4982ffad59 100644 --- a/test_ffi/tests/integration_tests.rs +++ b/test_ffi/tests/integration_tests.rs @@ -30,6 +30,7 @@ fn basic() { .arg("--allow-read") .arg("--unstable") .arg("--quiet") + .arg(r#"--v8-flags=--allow-natives-syntax"#) .arg("tests/test.js") .env("NO_COLOR", "1") .output() @@ -62,6 +63,7 @@ fn basic() { true\n\ 579\n\ 579\n\ + 579\n\ 8589934590n\n\ -8589934590n\n\ 8589934590n\n\ diff --git a/test_ffi/tests/test.js b/test_ffi/tests/test.js index 94c2069c0f..4e05be3edc 100644 --- a/test_ffi/tests/test.js +++ b/test_ffi/tests/test.js @@ -1,6 +1,8 @@ // Copyright 2018-2022 the Deno authors. All rights reserved. MIT license. // deno-lint-ignore-file +// Run using cargo test or `--v8-options=--allow-natives-syntax` + import { assertThrows } from "../../test_util/std/testing/asserts.ts"; const targetDir = Deno.execPath().replace(/[^\/\\]+$/, ""); @@ -182,8 +184,9 @@ const dylib = Deno.dlopen(libPath, { type: "pointer", }, }); +const { symbols } = dylib; -dylib.symbols.printSomething(); +symbols.printSomething(); const buffer = new Uint8Array([1, 2, 3, 4, 5, 6, 7, 8]); const buffer2 = new Uint8Array([9, 10]); dylib.symbols.print_buffer(buffer, buffer.length); @@ -238,7 +241,15 @@ const before = performance.now(); await sleepNonBlocking.call(100); console.log(performance.now() - before >= 100); -console.log(dylib.symbols.add_u32(123, 456)); +const { add_u32 } = symbols; +function addU32Fast(a, b) { + return add_u32(a, b); +}; + +%PrepareFunctionForOptimization(addU32Fast); +console.log(addU32Fast(123, 456)); +%OptimizeFunctionOnNextCall(addU32Fast); +console.log(addU32Fast(123, 456)); console.log(dylib.symbols.add_i32(123, 456)); console.log(dylib.symbols.add_u64(0xffffffffn, 0xffffffffn)); @@ -448,4 +459,4 @@ After: ${postStr}`, } console.log("Correct number of resources"); -})(); +})(); \ No newline at end of file diff --git a/third_party b/third_party index 4fd74a381b..9f314cefb5 160000 --- a/third_party +++ b/third_party @@ -1 +1 @@ -Subproject commit 4fd74a381b2a9f357ea7be80c12c24863596841f +Subproject commit 9f314cefb507e3b9de08edc6046353e4012279fc