mirror of
https://github.com/denoland/rusty_v8.git
synced 2024-11-28 16:21:04 -05:00
feat: Use MaybeUninit for to_rust_string_lossy and add to_rust_cow_lossy (#1256)
Co-authored-by: Bartek Iwańczuk <biwanczuk@gmail.com>
This commit is contained in:
parent
517f4d7032
commit
ad0a65d0a5
2 changed files with 192 additions and 11 deletions
163
src/string.rs
163
src/string.rs
|
@ -1,6 +1,6 @@
|
||||||
|
use std::borrow::Cow;
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::default::Default;
|
use std::default::Default;
|
||||||
use std::mem::forget;
|
|
||||||
use std::mem::MaybeUninit;
|
use std::mem::MaybeUninit;
|
||||||
use std::slice;
|
use std::slice;
|
||||||
|
|
||||||
|
@ -420,22 +420,163 @@ impl String {
|
||||||
unsafe { v8__String__ContainsOnlyOneByte(self) }
|
unsafe { v8__String__ContainsOnlyOneByte(self) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Creates a copy of a [`crate::String`] in a [`std::string::String`].
|
||||||
/// Convenience function not present in the original V8 API.
|
/// Convenience function not present in the original V8 API.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn to_rust_string_lossy(
|
pub fn to_rust_string_lossy(
|
||||||
&self,
|
&self,
|
||||||
scope: &mut Isolate,
|
scope: &mut Isolate,
|
||||||
) -> std::string::String {
|
) -> std::string::String {
|
||||||
|
if self.is_onebyte() {
|
||||||
|
let len_utf16 = self.length();
|
||||||
|
unsafe {
|
||||||
|
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
||||||
|
// accidentally creating a slice of u8 which would be invalid.
|
||||||
|
let layout = std::alloc::Layout::from_size_align(len_utf16, 1).unwrap();
|
||||||
|
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
|
||||||
|
let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf16);
|
||||||
|
|
||||||
|
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
|
||||||
|
let length = self.write_one_byte_uninit(
|
||||||
|
scope,
|
||||||
|
&mut *buffer,
|
||||||
|
0,
|
||||||
|
WriteOptions::NO_NULL_TERMINATION
|
||||||
|
| WriteOptions::REPLACE_INVALID_UTF8,
|
||||||
|
);
|
||||||
|
debug_assert!(length == len_utf16);
|
||||||
|
|
||||||
|
// Return an owned string from this guaranteed now-initialized data
|
||||||
|
let buffer = data as *mut u8;
|
||||||
|
return std::string::String::from_raw_parts(buffer, length, len_utf16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let capacity = self.utf8_length(scope);
|
let capacity = self.utf8_length(scope);
|
||||||
let mut string = std::string::String::with_capacity(capacity);
|
// SAFETY: This allocates a buffer manually using the default allocator using the string's capacity.
|
||||||
let data = string.as_mut_ptr();
|
// We have a large number of invariants to uphold, so please check changes to this code carefully
|
||||||
forget(string);
|
unsafe {
|
||||||
let length = self.write_utf8(
|
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
||||||
scope,
|
// accidentally creating a slice of u8 which would be invalid.
|
||||||
unsafe { slice::from_raw_parts_mut(data, capacity) },
|
let layout = std::alloc::Layout::from_size_align(capacity, 1).unwrap();
|
||||||
None,
|
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
|
||||||
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
let buffer = std::ptr::slice_from_raw_parts_mut(data, capacity);
|
||||||
);
|
|
||||||
unsafe { std::string::String::from_raw_parts(data, length, capacity) }
|
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
|
||||||
|
let length = self.write_utf8_uninit(
|
||||||
|
scope,
|
||||||
|
&mut *buffer,
|
||||||
|
None,
|
||||||
|
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
||||||
|
);
|
||||||
|
debug_assert!(length == capacity);
|
||||||
|
|
||||||
|
// Return an owned string from this guaranteed now-initialized data
|
||||||
|
let buffer = data as *mut u8;
|
||||||
|
std::string::String::from_raw_parts(buffer, length, capacity)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Converts a [`crate::String`] to either an owned [`std::string::String`], or a borrowed [`str`], depending on whether it fits into the
|
||||||
|
/// provided buffer.
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn to_rust_cow_lossy<'a, const N: usize>(
|
||||||
|
&self,
|
||||||
|
scope: &mut Isolate,
|
||||||
|
buffer: &'a mut [MaybeUninit<u8>; N],
|
||||||
|
) -> Cow<'a, str> {
|
||||||
|
// TODO(mmastrac): Ideally we should be able to access the string's internal representation
|
||||||
|
|
||||||
|
let len_utf16 = self.length();
|
||||||
|
if self.is_onebyte() {
|
||||||
|
if len_utf16 <= N {
|
||||||
|
let length = self.write_one_byte_uninit(
|
||||||
|
scope,
|
||||||
|
buffer,
|
||||||
|
0,
|
||||||
|
WriteOptions::NO_NULL_TERMINATION,
|
||||||
|
);
|
||||||
|
debug_assert!(length == len_utf16);
|
||||||
|
unsafe {
|
||||||
|
// Get a slice of &[u8] of what we know is initialized now
|
||||||
|
let buffer = &mut buffer[..length];
|
||||||
|
let buffer = &mut *(buffer as *mut [_] as *mut [u8]);
|
||||||
|
|
||||||
|
// We know it's valid UTF-8, so make a string
|
||||||
|
return Cow::Borrowed(std::str::from_utf8_unchecked(buffer));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
||||||
|
// accidentally creating a slice of u8 which would be invalid.
|
||||||
|
let layout = std::alloc::Layout::from_size_align(len_utf16, 1).unwrap();
|
||||||
|
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
|
||||||
|
let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf16);
|
||||||
|
|
||||||
|
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
|
||||||
|
let length = self.write_one_byte_uninit(
|
||||||
|
scope,
|
||||||
|
&mut *buffer,
|
||||||
|
0,
|
||||||
|
WriteOptions::NO_NULL_TERMINATION
|
||||||
|
| WriteOptions::REPLACE_INVALID_UTF8,
|
||||||
|
);
|
||||||
|
debug_assert!(length == len_utf16);
|
||||||
|
|
||||||
|
// Return an owned string from this guaranteed now-initialized data
|
||||||
|
let buffer = data as *mut u8;
|
||||||
|
return Cow::Owned(std::string::String::from_raw_parts(
|
||||||
|
buffer, length, len_utf16,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let capacity = self.utf8_length(scope);
|
||||||
|
if capacity <= N {
|
||||||
|
// No malloc path
|
||||||
|
let length = self.write_utf8_uninit(
|
||||||
|
scope,
|
||||||
|
buffer,
|
||||||
|
None,
|
||||||
|
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
||||||
|
);
|
||||||
|
debug_assert!(length == capacity);
|
||||||
|
|
||||||
|
// SAFETY: We know that we wrote `length` UTF-8 bytes. See `slice_assume_init_mut` for additional guarantee information.
|
||||||
|
unsafe {
|
||||||
|
// Get a slice of &[u8] of what we know is initialized now
|
||||||
|
let buffer = &mut buffer[..length];
|
||||||
|
let buffer = &mut *(buffer as *mut [_] as *mut [u8]);
|
||||||
|
|
||||||
|
// We know it's valid UTF-8, so make a string
|
||||||
|
return Cow::Borrowed(std::str::from_utf8_unchecked(buffer));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SAFETY: This allocates a buffer manually using the default allocator using the string's capacity.
|
||||||
|
// We have a large number of invariants to uphold, so please check changes to this code carefully
|
||||||
|
unsafe {
|
||||||
|
// Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
|
||||||
|
// accidentally creating a slice of u8 which would be invalid.
|
||||||
|
let layout = std::alloc::Layout::from_size_align(capacity, 1).unwrap();
|
||||||
|
let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
|
||||||
|
let buffer = std::ptr::slice_from_raw_parts_mut(data, capacity);
|
||||||
|
|
||||||
|
// Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
|
||||||
|
let length = self.write_utf8_uninit(
|
||||||
|
scope,
|
||||||
|
&mut *buffer,
|
||||||
|
None,
|
||||||
|
WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
|
||||||
|
);
|
||||||
|
debug_assert!(length == capacity);
|
||||||
|
|
||||||
|
// Return an owned string from this guaranteed now-initialized data
|
||||||
|
let buffer = data as *mut u8;
|
||||||
|
Cow::Owned(std::string::String::from_raw_parts(
|
||||||
|
buffer, length, capacity,
|
||||||
|
))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// Copyright 2019-2021 the Deno authors. All rights reserved. MIT license.
|
// Copyright 2019-2021 the Deno authors. All rights reserved. MIT license.
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use std::any::type_name;
|
use std::any::type_name;
|
||||||
|
use std::borrow::Cow;
|
||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
use std::collections::hash_map::DefaultHasher;
|
use std::collections::hash_map::DefaultHasher;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
@ -410,6 +411,45 @@ fn test_string() {
|
||||||
let invalid_4_octet_sequence = valid_6_octet_sequence.unwrap();
|
let invalid_4_octet_sequence = valid_6_octet_sequence.unwrap();
|
||||||
assert_eq!(invalid_4_octet_sequence.length(), 6);
|
assert_eq!(invalid_4_octet_sequence.length(), 6);
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
let scope = &mut v8::HandleScope::new(isolate);
|
||||||
|
let s = "Lorem ipsum dolor sit amet. Qui inventore debitis et voluptas cupiditate qui recusandae molestias et ullam possimus";
|
||||||
|
let one_byte = v8::String::new_from_one_byte(
|
||||||
|
scope,
|
||||||
|
s.as_bytes(),
|
||||||
|
v8::NewStringType::Normal,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Does not fit
|
||||||
|
let mut buffer = [MaybeUninit::uninit(); 10];
|
||||||
|
let cow = one_byte.to_rust_cow_lossy(scope, &mut buffer);
|
||||||
|
assert!(matches!(cow, Cow::Owned(_)));
|
||||||
|
assert_eq!(s, cow);
|
||||||
|
|
||||||
|
// Fits
|
||||||
|
let mut buffer = [MaybeUninit::uninit(); 1000];
|
||||||
|
let cow = one_byte.to_rust_cow_lossy(scope, &mut buffer);
|
||||||
|
assert!(matches!(cow, Cow::Borrowed(_)));
|
||||||
|
assert_eq!(s, cow);
|
||||||
|
|
||||||
|
let s = "🦕 Lorem ipsum dolor sit amet. Qui inventore debitis et voluptas cupiditate qui recusandae molestias et ullam possimus";
|
||||||
|
let two_bytes =
|
||||||
|
v8::String::new_from_utf8(scope, s.as_bytes(), v8::NewStringType::Normal)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Does not fit
|
||||||
|
let mut buffer = [MaybeUninit::uninit(); 10];
|
||||||
|
let cow = two_bytes.to_rust_cow_lossy(scope, &mut buffer);
|
||||||
|
assert!(matches!(cow, Cow::Owned(_)));
|
||||||
|
assert_eq!(s, cow);
|
||||||
|
|
||||||
|
// Fits
|
||||||
|
let mut buffer = [MaybeUninit::uninit(); 1000];
|
||||||
|
let cow = two_bytes.to_rust_cow_lossy(scope, &mut buffer);
|
||||||
|
assert!(matches!(cow, Cow::Borrowed(_)));
|
||||||
|
assert_eq!(s, cow);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Reference in a new issue