mirror of
https://github.com/denoland/rusty_v8.git
synced 2024-11-28 16:21:04 -05:00
fix: Ensure that one-byte strings that are not ASCII go through write_utf8_uninit (#1261)
This commit is contained in:
parent
c4033caf23
commit
d706291c5d
2 changed files with 36 additions and 18 deletions
|
@ -422,13 +422,16 @@ impl String {
|
||||||
|
|
||||||
/// Creates a copy of a [`crate::String`] in a [`std::string::String`].
|
/// Creates a copy of a [`crate::String`] in a [`std::string::String`].
|
||||||
/// Convenience function not present in the original V8 API.
|
/// Convenience function not present in the original V8 API.
|
||||||
#[inline(always)]
|
|
||||||
pub fn to_rust_string_lossy(
|
pub fn to_rust_string_lossy(
|
||||||
&self,
|
&self,
|
||||||
scope: &mut Isolate,
|
scope: &mut Isolate,
|
||||||
) -> std::string::String {
|
) -> std::string::String {
|
||||||
if self.is_onebyte() {
|
let len_utf8 = self.utf8_length(scope);
|
||||||
let len_utf16 = self.length();
|
let len_utf16 = self.length();
|
||||||
|
|
||||||
|
// If len_utf8 == len_utf16 and the string is one-byte, we can take the fast memcpy path. This is true iff the
|
||||||
|
// string is 100% 7-bit ASCII.
|
||||||
|
if self.is_onebyte() && len_utf8 == len_utf16 {
|
||||||
unsafe {
|
unsafe {
|
||||||
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
||||||
// accidentally creating a slice of u8 which would be invalid.
|
// accidentally creating a slice of u8 which would be invalid.
|
||||||
|
@ -452,15 +455,14 @@ impl String {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let capacity = self.utf8_length(scope);
|
|
||||||
// SAFETY: This allocates a buffer manually using the default allocator using the string's capacity.
|
// SAFETY: This allocates a buffer manually using the default allocator using the string's capacity.
|
||||||
// We have a large number of invariants to uphold, so please check changes to this code carefully
|
// We have a large number of invariants to uphold, so please check changes to this code carefully
|
||||||
unsafe {
|
unsafe {
|
||||||
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
||||||
// accidentally creating a slice of u8 which would be invalid.
|
// accidentally creating a slice of u8 which would be invalid.
|
||||||
let layout = std::alloc::Layout::from_size_align(capacity, 1).unwrap();
|
let layout = std::alloc::Layout::from_size_align(len_utf8, 1).unwrap();
|
||||||
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
|
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
|
||||||
let buffer = std::ptr::slice_from_raw_parts_mut(data, capacity);
|
let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf8);
|
||||||
|
|
||||||
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
|
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
|
||||||
let length = self.write_utf8_uninit(
|
let length = self.write_utf8_uninit(
|
||||||
|
@ -469,26 +471,28 @@ impl String {
|
||||||
None,
|
None,
|
||||||
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
||||||
);
|
);
|
||||||
debug_assert!(length == capacity);
|
debug_assert!(length == len_utf8);
|
||||||
|
|
||||||
// Return an owned string from this guaranteed now-initialized data
|
// Return an owned string from this guaranteed now-initialized data
|
||||||
let buffer = data as *mut u8;
|
let buffer = data as *mut u8;
|
||||||
std::string::String::from_raw_parts(buffer, length, capacity)
|
std::string::String::from_raw_parts(buffer, length, len_utf8)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts a [`crate::String`] to either an owned [`std::string::String`], or a borrowed [`str`], depending on whether it fits into the
|
/// Converts a [`crate::String`] to either an owned [`std::string::String`], or a borrowed [`str`], depending on whether it fits into the
|
||||||
/// provided buffer.
|
/// provided buffer.
|
||||||
#[inline(always)]
|
|
||||||
pub fn to_rust_cow_lossy<'a, const N: usize>(
|
pub fn to_rust_cow_lossy<'a, const N: usize>(
|
||||||
&self,
|
&self,
|
||||||
scope: &mut Isolate,
|
scope: &mut Isolate,
|
||||||
buffer: &'a mut [MaybeUninit<u8>; N],
|
buffer: &'a mut [MaybeUninit<u8>; N],
|
||||||
) -> Cow<'a, str> {
|
) -> Cow<'a, str> {
|
||||||
// TODO(mmastrac): Ideally we should be able to access the string's internal representation
|
// TODO(mmastrac): Ideally we should be able to access the string's internal representation
|
||||||
|
let len_utf8 = self.utf8_length(scope);
|
||||||
let len_utf16 = self.length();
|
let len_utf16 = self.length();
|
||||||
if self.is_onebyte() {
|
|
||||||
|
// If len_utf8 == len_utf16 and the string is one-byte, we can take the fast memcpy path. This is true iff the
|
||||||
|
// string is 100% 7-bit ASCII.
|
||||||
|
if self.is_onebyte() && len_utf8 == len_utf16 {
|
||||||
if len_utf16 <= N {
|
if len_utf16 <= N {
|
||||||
let length = self.write_one_byte_uninit(
|
let length = self.write_one_byte_uninit(
|
||||||
scope,
|
scope,
|
||||||
|
@ -532,8 +536,7 @@ impl String {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let capacity = self.utf8_length(scope);
|
if len_utf8 <= N {
|
||||||
if capacity <= N {
|
|
||||||
// No malloc path
|
// No malloc path
|
||||||
let length = self.write_utf8_uninit(
|
let length = self.write_utf8_uninit(
|
||||||
scope,
|
scope,
|
||||||
|
@ -541,7 +544,7 @@ impl String {
|
||||||
None,
|
None,
|
||||||
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
||||||
);
|
);
|
||||||
debug_assert!(length == capacity);
|
debug_assert!(length == len_utf8);
|
||||||
|
|
||||||
// SAFETY: We know that we wrote `length` UTF-8 bytes. See `slice_assume_init_mut` for additional guarantee information.
|
// SAFETY: We know that we wrote `length` UTF-8 bytes. See `slice_assume_init_mut` for additional guarantee information.
|
||||||
unsafe {
|
unsafe {
|
||||||
|
@ -559,9 +562,9 @@ impl String {
|
||||||
unsafe {
|
unsafe {
|
||||||
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
||||||
// accidentally creating a slice of u8 which would be invalid.
|
// accidentally creating a slice of u8 which would be invalid.
|
||||||
let layout = std::alloc::Layout::from_size_align(capacity, 1).unwrap();
|
let layout = std::alloc::Layout::from_size_align(len_utf8, 1).unwrap();
|
||||||
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
|
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
|
||||||
let buffer = std::ptr::slice_from_raw_parts_mut(data, capacity);
|
let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf8);
|
||||||
|
|
||||||
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
|
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
|
||||||
let length = self.write_utf8_uninit(
|
let length = self.write_utf8_uninit(
|
||||||
|
@ -570,12 +573,12 @@ impl String {
|
||||||
None,
|
None,
|
||||||
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
||||||
);
|
);
|
||||||
debug_assert!(length == capacity);
|
debug_assert!(length == len_utf8);
|
||||||
|
|
||||||
// Return an owned string from this guaranteed now-initialized data
|
// Return an owned string from this guaranteed now-initialized data
|
||||||
let buffer = data as *mut u8;
|
let buffer = data as *mut u8;
|
||||||
Cow::Owned(std::string::String::from_raw_parts(
|
Cow::Owned(std::string::String::from_raw_parts(
|
||||||
buffer, length, capacity,
|
buffer, length, len_utf8,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -241,6 +241,21 @@ fn global_handle_drop() {
|
||||||
fn test_string() {
|
fn test_string() {
|
||||||
let _setup_guard = setup::parallel_test();
|
let _setup_guard = setup::parallel_test();
|
||||||
let isolate = &mut v8::Isolate::new(Default::default());
|
let isolate = &mut v8::Isolate::new(Default::default());
|
||||||
|
{
|
||||||
|
// Ensure that a Latin-1 string correctly round-trips
|
||||||
|
let scope = &mut v8::HandleScope::new(isolate);
|
||||||
|
let reference = "\u{00a0}";
|
||||||
|
assert_eq!(2, reference.len());
|
||||||
|
let local = v8::String::new(scope, reference).unwrap();
|
||||||
|
assert_eq!(1, local.length());
|
||||||
|
assert_eq!(2, local.utf8_length(scope));
|
||||||
|
// Should round-trip to UTF-8
|
||||||
|
assert_eq!(2, local.to_rust_string_lossy(scope).len());
|
||||||
|
let mut buf = [MaybeUninit::uninit(); 0];
|
||||||
|
assert_eq!(2, local.to_rust_cow_lossy(scope, &mut buf).len());
|
||||||
|
let mut buf = [MaybeUninit::uninit(); 10];
|
||||||
|
assert_eq!(2, local.to_rust_cow_lossy(scope, &mut buf).len());
|
||||||
|
}
|
||||||
{
|
{
|
||||||
let scope = &mut v8::HandleScope::new(isolate);
|
let scope = &mut v8::HandleScope::new(isolate);
|
||||||
let reference = "Hello 🦕 world!";
|
let reference = "Hello 🦕 world!";
|
||||||
|
|
Loading…
Reference in a new issue