mirror of
https://github.com/denoland/rusty_v8.git
synced 2024-12-25 16:49:29 -05:00
feat: Use MaybeUninit for to_rust_string_lossy and add to_rust_cow_lossy (#1256)
Co-authored-by: Bartek Iwańczuk <biwanczuk@gmail.com>
This commit is contained in:
parent
517f4d7032
commit
ad0a65d0a5
2 changed files with 192 additions and 11 deletions
163
src/string.rs
163
src/string.rs
|
@ -1,6 +1,6 @@
|
|||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::default::Default;
|
||||
use std::mem::forget;
|
||||
use std::mem::MaybeUninit;
|
||||
use std::slice;
|
||||
|
||||
|
@ -420,22 +420,163 @@ impl String {
|
|||
unsafe { v8__String__ContainsOnlyOneByte(self) }
|
||||
}
|
||||
|
||||
/// Creates a copy of a [`crate::String`] in a [`std::string::String`].
|
||||
/// Convenience function not present in the original V8 API.
|
||||
#[inline(always)]
|
||||
pub fn to_rust_string_lossy(
|
||||
&self,
|
||||
scope: &mut Isolate,
|
||||
) -> std::string::String {
|
||||
if self.is_onebyte() {
|
||||
let len_utf16 = self.length();
|
||||
unsafe {
|
||||
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
||||
// accidentally creating a slice of u8 which would be invalid.
|
||||
let layout = std::alloc::Layout::from_size_align(len_utf16, 1).unwrap();
|
||||
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
|
||||
let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf16);
|
||||
|
||||
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
|
||||
let length = self.write_one_byte_uninit(
|
||||
scope,
|
||||
&mut *buffer,
|
||||
0,
|
||||
WriteOptions::NO_NULL_TERMINATION
|
||||
| WriteOptions::REPLACE_INVALID_UTF8,
|
||||
);
|
||||
debug_assert!(length == len_utf16);
|
||||
|
||||
// Return an owned string from this guaranteed now-initialized data
|
||||
let buffer = data as *mut u8;
|
||||
return std::string::String::from_raw_parts(buffer, length, len_utf16);
|
||||
}
|
||||
}
|
||||
|
||||
let capacity = self.utf8_length(scope);
|
||||
let mut string = std::string::String::with_capacity(capacity);
|
||||
let data = string.as_mut_ptr();
|
||||
forget(string);
|
||||
let length = self.write_utf8(
|
||||
scope,
|
||||
unsafe { slice::from_raw_parts_mut(data, capacity) },
|
||||
None,
|
||||
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
||||
);
|
||||
unsafe { std::string::String::from_raw_parts(data, length, capacity) }
|
||||
// SAFETY: This allocates a buffer manually using the default allocator using the string's capacity.
|
||||
// We have a large number of invariants to uphold, so please check changes to this code carefully
|
||||
unsafe {
|
||||
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
||||
// accidentally creating a slice of u8 which would be invalid.
|
||||
let layout = std::alloc::Layout::from_size_align(capacity, 1).unwrap();
|
||||
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
|
||||
let buffer = std::ptr::slice_from_raw_parts_mut(data, capacity);
|
||||
|
||||
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
|
||||
let length = self.write_utf8_uninit(
|
||||
scope,
|
||||
&mut *buffer,
|
||||
None,
|
||||
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
||||
);
|
||||
debug_assert!(length == capacity);
|
||||
|
||||
// Return an owned string from this guaranteed now-initialized data
|
||||
let buffer = data as *mut u8;
|
||||
std::string::String::from_raw_parts(buffer, length, capacity)
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a [`crate::String`] to either an owned [`std::string::String`], or a borrowed [`str`], depending on whether it fits into the
|
||||
/// provided buffer.
|
||||
#[inline(always)]
|
||||
pub fn to_rust_cow_lossy<'a, const N: usize>(
|
||||
&self,
|
||||
scope: &mut Isolate,
|
||||
buffer: &'a mut [MaybeUninit<u8>; N],
|
||||
) -> Cow<'a, str> {
|
||||
// TODO(mmastrac): Ideally we should be able to access the string's internal representation
|
||||
|
||||
let len_utf16 = self.length();
|
||||
if self.is_onebyte() {
|
||||
if len_utf16 <= N {
|
||||
let length = self.write_one_byte_uninit(
|
||||
scope,
|
||||
buffer,
|
||||
0,
|
||||
WriteOptions::NO_NULL_TERMINATION,
|
||||
);
|
||||
debug_assert!(length == len_utf16);
|
||||
unsafe {
|
||||
// Get a slice of &[u8] of what we know is initialized now
|
||||
let buffer = &mut buffer[..length];
|
||||
let buffer = &mut *(buffer as *mut [_] as *mut [u8]);
|
||||
|
||||
// We know it's valid UTF-8, so make a string
|
||||
return Cow::Borrowed(std::str::from_utf8_unchecked(buffer));
|
||||
}
|
||||
}
|
||||
|
||||
unsafe {
|
||||
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
||||
// accidentally creating a slice of u8 which would be invalid.
|
||||
let layout = std::alloc::Layout::from_size_align(len_utf16, 1).unwrap();
|
||||
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
|
||||
let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf16);
|
||||
|
||||
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
|
||||
let length = self.write_one_byte_uninit(
|
||||
scope,
|
||||
&mut *buffer,
|
||||
0,
|
||||
WriteOptions::NO_NULL_TERMINATION
|
||||
| WriteOptions::REPLACE_INVALID_UTF8,
|
||||
);
|
||||
debug_assert!(length == len_utf16);
|
||||
|
||||
// Return an owned string from this guaranteed now-initialized data
|
||||
let buffer = data as *mut u8;
|
||||
return Cow::Owned(std::string::String::from_raw_parts(
|
||||
buffer, length, len_utf16,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
let capacity = self.utf8_length(scope);
|
||||
if capacity <= N {
|
||||
// No malloc path
|
||||
let length = self.write_utf8_uninit(
|
||||
scope,
|
||||
buffer,
|
||||
None,
|
||||
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
||||
);
|
||||
debug_assert!(length == capacity);
|
||||
|
||||
// SAFETY: We know that we wrote `length` UTF-8 bytes. See `slice_assume_init_mut` for additional guarantee information.
|
||||
unsafe {
|
||||
// Get a slice of &[u8] of what we know is initialized now
|
||||
let buffer = &mut buffer[..length];
|
||||
let buffer = &mut *(buffer as *mut [_] as *mut [u8]);
|
||||
|
||||
// We know it's valid UTF-8, so make a string
|
||||
return Cow::Borrowed(std::str::from_utf8_unchecked(buffer));
|
||||
}
|
||||
}
|
||||
|
||||
// SAFETY: This allocates a buffer manually using the default allocator using the string's capacity.
|
||||
// We have a large number of invariants to uphold, so please check changes to this code carefully
|
||||
unsafe {
|
||||
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
||||
// accidentally creating a slice of u8 which would be invalid.
|
||||
let layout = std::alloc::Layout::from_size_align(capacity, 1).unwrap();
|
||||
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
|
||||
let buffer = std::ptr::slice_from_raw_parts_mut(data, capacity);
|
||||
|
||||
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
|
||||
let length = self.write_utf8_uninit(
|
||||
scope,
|
||||
&mut *buffer,
|
||||
None,
|
||||
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
||||
);
|
||||
debug_assert!(length == capacity);
|
||||
|
||||
// Return an owned string from this guaranteed now-initialized data
|
||||
let buffer = data as *mut u8;
|
||||
Cow::Owned(std::string::String::from_raw_parts(
|
||||
buffer, length, capacity,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
// Copyright 2019-2021 the Deno authors. All rights reserved. MIT license.
|
||||
use once_cell::sync::Lazy;
|
||||
use std::any::type_name;
|
||||
use std::borrow::Cow;
|
||||
use std::cell::RefCell;
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::collections::HashMap;
|
||||
|
@ -410,6 +411,45 @@ fn test_string() {
|
|||
let invalid_4_octet_sequence = valid_6_octet_sequence.unwrap();
|
||||
assert_eq!(invalid_4_octet_sequence.length(), 6);
|
||||
}
|
||||
{
|
||||
let scope = &mut v8::HandleScope::new(isolate);
|
||||
let s = "Lorem ipsum dolor sit amet. Qui inventore debitis et voluptas cupiditate qui recusandae molestias et ullam possimus";
|
||||
let one_byte = v8::String::new_from_one_byte(
|
||||
scope,
|
||||
s.as_bytes(),
|
||||
v8::NewStringType::Normal,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Does not fit
|
||||
let mut buffer = [MaybeUninit::uninit(); 10];
|
||||
let cow = one_byte.to_rust_cow_lossy(scope, &mut buffer);
|
||||
assert!(matches!(cow, Cow::Owned(_)));
|
||||
assert_eq!(s, cow);
|
||||
|
||||
// Fits
|
||||
let mut buffer = [MaybeUninit::uninit(); 1000];
|
||||
let cow = one_byte.to_rust_cow_lossy(scope, &mut buffer);
|
||||
assert!(matches!(cow, Cow::Borrowed(_)));
|
||||
assert_eq!(s, cow);
|
||||
|
||||
let s = "🦕 Lorem ipsum dolor sit amet. Qui inventore debitis et voluptas cupiditate qui recusandae molestias et ullam possimus";
|
||||
let two_bytes =
|
||||
v8::String::new_from_utf8(scope, s.as_bytes(), v8::NewStringType::Normal)
|
||||
.unwrap();
|
||||
|
||||
// Does not fit
|
||||
let mut buffer = [MaybeUninit::uninit(); 10];
|
||||
let cow = two_bytes.to_rust_cow_lossy(scope, &mut buffer);
|
||||
assert!(matches!(cow, Cow::Owned(_)));
|
||||
assert_eq!(s, cow);
|
||||
|
||||
// Fits
|
||||
let mut buffer = [MaybeUninit::uninit(); 1000];
|
||||
let cow = two_bytes.to_rust_cow_lossy(scope, &mut buffer);
|
||||
assert!(matches!(cow, Cow::Borrowed(_)));
|
||||
assert_eq!(s, cow);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Reference in a new issue