mirror of
https://github.com/denoland/rusty_v8.git
synced 2024-11-24 15:19:31 -05:00
fix: Ensure that one-byte strings that are not ASCII go through write_utf8_uninit (#1261)
This commit is contained in:
parent
c4033caf23
commit
d706291c5d
2 changed files with 36 additions and 18 deletions
|
@ -422,13 +422,16 @@ impl String {
|
|||
|
||||
/// Creates a copy of a [`crate::String`] in a [`std::string::String`].
|
||||
/// Convenience function not present in the original V8 API.
|
||||
#[inline(always)]
|
||||
pub fn to_rust_string_lossy(
|
||||
&self,
|
||||
scope: &mut Isolate,
|
||||
) -> std::string::String {
|
||||
if self.is_onebyte() {
|
||||
let len_utf8 = self.utf8_length(scope);
|
||||
let len_utf16 = self.length();
|
||||
|
||||
// If len_utf8 == len_utf16 and the string is one-byte, we can take the fast memcpy path. This is true iff the
|
||||
// string is 100% 7-bit ASCII.
|
||||
if self.is_onebyte() && len_utf8 == len_utf16 {
|
||||
unsafe {
|
||||
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
||||
// accidentally creating a slice of u8 which would be invalid.
|
||||
|
@ -452,15 +455,14 @@ impl String {
|
|||
}
|
||||
}
|
||||
|
||||
let capacity = self.utf8_length(scope);
|
||||
// SAFETY: This allocates a buffer manually using the default allocator using the string's capacity.
|
||||
// We have a large number of invariants to uphold, so please check changes to this code carefully
|
||||
unsafe {
|
||||
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
||||
// accidentally creating a slice of u8 which would be invalid.
|
||||
let layout = std::alloc::Layout::from_size_align(capacity, 1).unwrap();
|
||||
let layout = std::alloc::Layout::from_size_align(len_utf8, 1).unwrap();
|
||||
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
|
||||
let buffer = std::ptr::slice_from_raw_parts_mut(data, capacity);
|
||||
let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf8);
|
||||
|
||||
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
|
||||
let length = self.write_utf8_uninit(
|
||||
|
@ -469,26 +471,28 @@ impl String {
|
|||
None,
|
||||
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
||||
);
|
||||
debug_assert!(length == capacity);
|
||||
debug_assert!(length == len_utf8);
|
||||
|
||||
// Return an owned string from this guaranteed now-initialized data
|
||||
let buffer = data as *mut u8;
|
||||
std::string::String::from_raw_parts(buffer, length, capacity)
|
||||
std::string::String::from_raw_parts(buffer, length, len_utf8)
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a [`crate::String`] to either an owned [`std::string::String`], or a borrowed [`str`], depending on whether it fits into the
|
||||
/// provided buffer.
|
||||
#[inline(always)]
|
||||
pub fn to_rust_cow_lossy<'a, const N: usize>(
|
||||
&self,
|
||||
scope: &mut Isolate,
|
||||
buffer: &'a mut [MaybeUninit<u8>; N],
|
||||
) -> Cow<'a, str> {
|
||||
// TODO(mmastrac): Ideally we should be able to access the string's internal representation
|
||||
|
||||
let len_utf8 = self.utf8_length(scope);
|
||||
let len_utf16 = self.length();
|
||||
if self.is_onebyte() {
|
||||
|
||||
// If len_utf8 == len_utf16 and the string is one-byte, we can take the fast memcpy path. This is true iff the
|
||||
// string is 100% 7-bit ASCII.
|
||||
if self.is_onebyte() && len_utf8 == len_utf16 {
|
||||
if len_utf16 <= N {
|
||||
let length = self.write_one_byte_uninit(
|
||||
scope,
|
||||
|
@ -532,8 +536,7 @@ impl String {
|
|||
}
|
||||
}
|
||||
|
||||
let capacity = self.utf8_length(scope);
|
||||
if capacity <= N {
|
||||
if len_utf8 <= N {
|
||||
// No malloc path
|
||||
let length = self.write_utf8_uninit(
|
||||
scope,
|
||||
|
@ -541,7 +544,7 @@ impl String {
|
|||
None,
|
||||
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
||||
);
|
||||
debug_assert!(length == capacity);
|
||||
debug_assert!(length == len_utf8);
|
||||
|
||||
// SAFETY: We know that we wrote `length` UTF-8 bytes. See `slice_assume_init_mut` for additional guarantee information.
|
||||
unsafe {
|
||||
|
@ -559,9 +562,9 @@ impl String {
|
|||
unsafe {
|
||||
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
||||
// accidentally creating a slice of u8 which would be invalid.
|
||||
let layout = std::alloc::Layout::from_size_align(capacity, 1).unwrap();
|
||||
let layout = std::alloc::Layout::from_size_align(len_utf8, 1).unwrap();
|
||||
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
|
||||
let buffer = std::ptr::slice_from_raw_parts_mut(data, capacity);
|
||||
let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf8);
|
||||
|
||||
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
|
||||
let length = self.write_utf8_uninit(
|
||||
|
@ -570,12 +573,12 @@ impl String {
|
|||
None,
|
||||
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
||||
);
|
||||
debug_assert!(length == capacity);
|
||||
debug_assert!(length == len_utf8);
|
||||
|
||||
// Return an owned string from this guaranteed now-initialized data
|
||||
let buffer = data as *mut u8;
|
||||
Cow::Owned(std::string::String::from_raw_parts(
|
||||
buffer, length, capacity,
|
||||
buffer, length, len_utf8,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -241,6 +241,21 @@ fn global_handle_drop() {
|
|||
fn test_string() {
|
||||
let _setup_guard = setup::parallel_test();
|
||||
let isolate = &mut v8::Isolate::new(Default::default());
|
||||
{
|
||||
// Ensure that a Latin-1 string correctly round-trips
|
||||
let scope = &mut v8::HandleScope::new(isolate);
|
||||
let reference = "\u{00a0}";
|
||||
assert_eq!(2, reference.len());
|
||||
let local = v8::String::new(scope, reference).unwrap();
|
||||
assert_eq!(1, local.length());
|
||||
assert_eq!(2, local.utf8_length(scope));
|
||||
// Should round-trip to UTF-8
|
||||
assert_eq!(2, local.to_rust_string_lossy(scope).len());
|
||||
let mut buf = [MaybeUninit::uninit(); 0];
|
||||
assert_eq!(2, local.to_rust_cow_lossy(scope, &mut buf).len());
|
||||
let mut buf = [MaybeUninit::uninit(); 10];
|
||||
assert_eq!(2, local.to_rust_cow_lossy(scope, &mut buf).len());
|
||||
}
|
||||
{
|
||||
let scope = &mut v8::HandleScope::new(isolate);
|
||||
let reference = "Hello 🦕 world!";
|
||||
|
|
Loading…
Reference in a new issue