0
0
Fork 0
mirror of https://github.com/denoland/rusty_v8.git synced 2025-01-13 01:22:42 -05:00

fix: Ensure that one-byte strings that are not ASCII go through write_utf8_uninit (#1261)

This commit is contained in:
Matt Mastracci 2023-06-30 09:46:29 -06:00 committed by GitHub
parent c4033caf23
commit d706291c5d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 36 additions and 18 deletions

View file

@ -422,13 +422,16 @@ impl String {
/// Creates a copy of a [`crate::String`] in a [`std::string::String`].
/// Convenience function not present in the original V8 API.
#[inline(always)]
pub fn to_rust_string_lossy(
&self,
scope: &mut Isolate,
) -> std::string::String {
if self.is_onebyte() {
let len_utf16 = self.length();
let len_utf8 = self.utf8_length(scope);
let len_utf16 = self.length();
// If len_utf8 == len_utf16 and the string is one-byte, we can take the fast memcpy path. This is true iff the
// string is 100% 7-bit ASCII.
if self.is_onebyte() && len_utf8 == len_utf16 {
unsafe {
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
// accidentally creating a slice of u8 which would be invalid.
@ -452,15 +455,14 @@ impl String {
}
}
let capacity = self.utf8_length(scope);
// SAFETY: This allocates a buffer manually using the default allocator using the string's capacity.
// We have a large number of invariants to uphold, so please check changes to this code carefully
unsafe {
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
// accidentally creating a slice of u8 which would be invalid.
let layout = std::alloc::Layout::from_size_align(capacity, 1).unwrap();
let layout = std::alloc::Layout::from_size_align(len_utf8, 1).unwrap();
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
let buffer = std::ptr::slice_from_raw_parts_mut(data, capacity);
let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf8);
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
let length = self.write_utf8_uninit(
@ -469,26 +471,28 @@ impl String {
None,
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
);
debug_assert!(length == capacity);
debug_assert!(length == len_utf8);
// Return an owned string from this guaranteed now-initialized data
let buffer = data as *mut u8;
std::string::String::from_raw_parts(buffer, length, capacity)
std::string::String::from_raw_parts(buffer, length, len_utf8)
}
}
/// Converts a [`crate::String`] to either an owned [`std::string::String`], or a borrowed [`str`], depending on whether it fits into the
/// provided buffer.
#[inline(always)]
pub fn to_rust_cow_lossy<'a, const N: usize>(
&self,
scope: &mut Isolate,
buffer: &'a mut [MaybeUninit<u8>; N],
) -> Cow<'a, str> {
// TODO(mmastrac): Ideally we should be able to access the string's internal representation
let len_utf8 = self.utf8_length(scope);
let len_utf16 = self.length();
if self.is_onebyte() {
// If len_utf8 == len_utf16 and the string is one-byte, we can take the fast memcpy path. This is true iff the
// string is 100% 7-bit ASCII.
if self.is_onebyte() && len_utf8 == len_utf16 {
if len_utf16 <= N {
let length = self.write_one_byte_uninit(
scope,
@ -532,8 +536,7 @@ impl String {
}
}
let capacity = self.utf8_length(scope);
if capacity <= N {
if len_utf8 <= N {
// No malloc path
let length = self.write_utf8_uninit(
scope,
@ -541,7 +544,7 @@ impl String {
None,
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
);
debug_assert!(length == capacity);
debug_assert!(length == len_utf8);
// SAFETY: We know that we wrote `length` UTF-8 bytes. See `slice_assume_init_mut` for additional guarantee information.
unsafe {
@ -559,9 +562,9 @@ impl String {
unsafe {
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
// accidentally creating a slice of u8 which would be invalid.
let layout = std::alloc::Layout::from_size_align(capacity, 1).unwrap();
let layout = std::alloc::Layout::from_size_align(len_utf8, 1).unwrap();
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
let buffer = std::ptr::slice_from_raw_parts_mut(data, capacity);
let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf8);
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
let length = self.write_utf8_uninit(
@ -570,12 +573,12 @@ impl String {
None,
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
);
debug_assert!(length == capacity);
debug_assert!(length == len_utf8);
// Return an owned string from this guaranteed now-initialized data
let buffer = data as *mut u8;
Cow::Owned(std::string::String::from_raw_parts(
buffer, length, capacity,
buffer, length, len_utf8,
))
}
}

View file

@ -241,6 +241,21 @@ fn global_handle_drop() {
fn test_string() {
let _setup_guard = setup::parallel_test();
let isolate = &mut v8::Isolate::new(Default::default());
{
// Ensure that a Latin-1 string correctly round-trips
let scope = &mut v8::HandleScope::new(isolate);
let reference = "\u{00a0}";
assert_eq!(2, reference.len());
let local = v8::String::new(scope, reference).unwrap();
assert_eq!(1, local.length());
assert_eq!(2, local.utf8_length(scope));
// Should round-trip to UTF-8
assert_eq!(2, local.to_rust_string_lossy(scope).len());
let mut buf = [MaybeUninit::uninit(); 0];
assert_eq!(2, local.to_rust_cow_lossy(scope, &mut buf).len());
let mut buf = [MaybeUninit::uninit(); 10];
assert_eq!(2, local.to_rust_cow_lossy(scope, &mut buf).len());
}
{
let scope = &mut v8::HandleScope::new(isolate);
let reference = "Hello 🦕 world!";