0
0
Fork 0
mirror of https://github.com/denoland/rusty_v8.git synced 2024-11-28 16:21:04 -05:00

feat: Use MaybeUninit for to_rust_string_lossy and add to_rust_cow_lossy (#1256)

Co-authored-by: Bartek Iwańczuk <biwanczuk@gmail.com>
This commit is contained in:
Matt Mastracci 2023-06-28 07:46:50 -06:00 committed by GitHub
parent 517f4d7032
commit ad0a65d0a5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 192 additions and 11 deletions

View file

@ -1,6 +1,6 @@
use std::borrow::Cow;
use std::convert::TryInto; use std::convert::TryInto;
use std::default::Default; use std::default::Default;
use std::mem::forget;
use std::mem::MaybeUninit; use std::mem::MaybeUninit;
use std::slice; use std::slice;
@ -420,22 +420,163 @@ impl String {
unsafe { v8__String__ContainsOnlyOneByte(self) } unsafe { v8__String__ContainsOnlyOneByte(self) }
} }
/// Creates a copy of a [`crate::String`] in a [`std::string::String`].
/// Convenience function not present in the original V8 API. /// Convenience function not present in the original V8 API.
#[inline(always)] #[inline(always)]
pub fn to_rust_string_lossy( pub fn to_rust_string_lossy(
&self, &self,
scope: &mut Isolate, scope: &mut Isolate,
) -> std::string::String { ) -> std::string::String {
if self.is_onebyte() {
let len_utf16 = self.length();
unsafe {
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
// accidentally creating a slice of u8 which would be invalid.
let layout = std::alloc::Layout::from_size_align(len_utf16, 1).unwrap();
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf16);
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
let length = self.write_one_byte_uninit(
scope,
&mut *buffer,
0,
WriteOptions::NO_NULL_TERMINATION
| WriteOptions::REPLACE_INVALID_UTF8,
);
debug_assert!(length == len_utf16);
// Return an owned string from this guaranteed now-initialized data
let buffer = data as *mut u8;
return std::string::String::from_raw_parts(buffer, length, len_utf16);
}
}
let capacity = self.utf8_length(scope); let capacity = self.utf8_length(scope);
let mut string = std::string::String::with_capacity(capacity); // SAFETY: This allocates a buffer manually using the default allocator using the string's capacity.
let data = string.as_mut_ptr(); // We have a large number of invariants to uphold, so please check changes to this code carefully
forget(string); unsafe {
let length = self.write_utf8( // Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
scope, // accidentally creating a slice of u8 which would be invalid.
unsafe { slice::from_raw_parts_mut(data, capacity) }, let layout = std::alloc::Layout::from_size_align(capacity, 1).unwrap();
None, let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8, let buffer = std::ptr::slice_from_raw_parts_mut(data, capacity);
);
unsafe { std::string::String::from_raw_parts(data, length, capacity) } // Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
let length = self.write_utf8_uninit(
scope,
&mut *buffer,
None,
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
);
debug_assert!(length == capacity);
// Return an owned string from this guaranteed now-initialized data
let buffer = data as *mut u8;
std::string::String::from_raw_parts(buffer, length, capacity)
}
}
/// Converts a [`crate::String`] to either an owned [`std::string::String`], or a borrowed [`str`], depending on whether it fits into the
/// provided buffer.
#[inline(always)]
pub fn to_rust_cow_lossy<'a, const N: usize>(
&self,
scope: &mut Isolate,
buffer: &'a mut [MaybeUninit<u8>; N],
) -> Cow<'a, str> {
// TODO(mmastrac): Ideally we should be able to access the string's internal representation
let len_utf16 = self.length();
if self.is_onebyte() {
if len_utf16 <= N {
let length = self.write_one_byte_uninit(
scope,
buffer,
0,
WriteOptions::NO_NULL_TERMINATION,
);
debug_assert!(length == len_utf16);
unsafe {
// Get a slice of &[u8] of what we know is initialized now
let buffer = &mut buffer[..length];
let buffer = &mut *(buffer as *mut [_] as *mut [u8]);
// We know it's valid UTF-8, so make a string
return Cow::Borrowed(std::str::from_utf8_unchecked(buffer));
}
}
unsafe {
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
// accidentally creating a slice of u8 which would be invalid.
let layout = std::alloc::Layout::from_size_align(len_utf16, 1).unwrap();
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf16);
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
let length = self.write_one_byte_uninit(
scope,
&mut *buffer,
0,
WriteOptions::NO_NULL_TERMINATION
| WriteOptions::REPLACE_INVALID_UTF8,
);
debug_assert!(length == len_utf16);
// Return an owned string from this guaranteed now-initialized data
let buffer = data as *mut u8;
return Cow::Owned(std::string::String::from_raw_parts(
buffer, length, len_utf16,
));
}
}
let capacity = self.utf8_length(scope);
if capacity <= N {
// No malloc path
let length = self.write_utf8_uninit(
scope,
buffer,
None,
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
);
debug_assert!(length == capacity);
// SAFETY: We know that we wrote `length` UTF-8 bytes. See `slice_assume_init_mut` for additional guarantee information.
unsafe {
// Get a slice of &[u8] of what we know is initialized now
let buffer = &mut buffer[..length];
let buffer = &mut *(buffer as *mut [_] as *mut [u8]);
// We know it's valid UTF-8, so make a string
return Cow::Borrowed(std::str::from_utf8_unchecked(buffer));
}
}
// SAFETY: This allocates a buffer manually using the default allocator using the string's capacity.
// We have a large number of invariants to uphold, so please check changes to this code carefully
unsafe {
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
// accidentally creating a slice of u8 which would be invalid.
let layout = std::alloc::Layout::from_size_align(capacity, 1).unwrap();
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
let buffer = std::ptr::slice_from_raw_parts_mut(data, capacity);
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
let length = self.write_utf8_uninit(
scope,
&mut *buffer,
None,
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
);
debug_assert!(length == capacity);
// Return an owned string from this guaranteed now-initialized data
let buffer = data as *mut u8;
Cow::Owned(std::string::String::from_raw_parts(
buffer, length, capacity,
))
}
} }
} }

View file

@ -1,6 +1,7 @@
// Copyright 2019-2021 the Deno authors. All rights reserved. MIT license. // Copyright 2019-2021 the Deno authors. All rights reserved. MIT license.
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use std::any::type_name; use std::any::type_name;
use std::borrow::Cow;
use std::cell::RefCell; use std::cell::RefCell;
use std::collections::hash_map::DefaultHasher; use std::collections::hash_map::DefaultHasher;
use std::collections::HashMap; use std::collections::HashMap;
@ -410,6 +411,45 @@ fn test_string() {
let invalid_4_octet_sequence = valid_6_octet_sequence.unwrap(); let invalid_4_octet_sequence = valid_6_octet_sequence.unwrap();
assert_eq!(invalid_4_octet_sequence.length(), 6); assert_eq!(invalid_4_octet_sequence.length(), 6);
} }
{
let scope = &mut v8::HandleScope::new(isolate);
let s = "Lorem ipsum dolor sit amet. Qui inventore debitis et voluptas cupiditate qui recusandae molestias et ullam possimus";
let one_byte = v8::String::new_from_one_byte(
scope,
s.as_bytes(),
v8::NewStringType::Normal,
)
.unwrap();
// Does not fit
let mut buffer = [MaybeUninit::uninit(); 10];
let cow = one_byte.to_rust_cow_lossy(scope, &mut buffer);
assert!(matches!(cow, Cow::Owned(_)));
assert_eq!(s, cow);
// Fits
let mut buffer = [MaybeUninit::uninit(); 1000];
let cow = one_byte.to_rust_cow_lossy(scope, &mut buffer);
assert!(matches!(cow, Cow::Borrowed(_)));
assert_eq!(s, cow);
let s = "🦕 Lorem ipsum dolor sit amet. Qui inventore debitis et voluptas cupiditate qui recusandae molestias et ullam possimus";
let two_bytes =
v8::String::new_from_utf8(scope, s.as_bytes(), v8::NewStringType::Normal)
.unwrap();
// Does not fit
let mut buffer = [MaybeUninit::uninit(); 10];
let cow = two_bytes.to_rust_cow_lossy(scope, &mut buffer);
assert!(matches!(cow, Cow::Owned(_)));
assert_eq!(s, cow);
// Fits
let mut buffer = [MaybeUninit::uninit(); 1000];
let cow = two_bytes.to_rust_cow_lossy(scope, &mut buffer);
assert!(matches!(cow, Cow::Borrowed(_)));
assert_eq!(s, cow);
}
} }
#[test] #[test]