diff --git a/src/string.rs b/src/string.rs index 54c6f680..1cd60e6c 100644 --- a/src/string.rs +++ b/src/string.rs @@ -135,12 +135,16 @@ pub struct ExternalStringResourceBase(Opaque); #[repr(C)] /// An external, one-byte string resource. /// This corresponds with `v8::String::ExternalOneByteStringResource`. +/// +/// Note: The data contained in a one-byte string resource is guaranteed to be +/// Latin-1 data. It is not safe to assume that it is valid UTF-8, as Latin-1 +/// only has commonality with UTF-8 in the ASCII range and differs beyond that. pub struct ExternalOneByteStringResource(Opaque); impl ExternalOneByteStringResource { /// Returns a pointer to the data owned by this resource. /// This pointer is valid as long as the resource is alive. - /// The data is guaranteed to be ASCII. + /// The data is guaranteed to be Latin-1. pub fn data(&self) -> *const char { unsafe { v8__ExternalOneByteStringResource__data(self) } } @@ -151,23 +155,19 @@ impl ExternalOneByteStringResource { } /// Returns the data owned by this resource as a string slice. - /// The data is guaranteed to be ASCII. - pub fn as_str(&self) -> &str { + /// The data is guaranteed to be Latin-1. + pub fn as_bytes(&self) -> &[u8] { let len = self.length(); if len == 0 { - "" + &[] } else { - // SAFETY: We know this is ASCII and length > 0 - unsafe { - std::str::from_utf8_unchecked(std::slice::from_raw_parts( - self.data().cast(), - len, - )) - } + // SAFETY: We know this is Latin-1 + unsafe { std::slice::from_raw_parts(self.data().cast(), len) } } } } +/// A static ASCII string resource for usage in V8, created at build time. #[repr(C)] #[derive(Copy, Clone, Debug)] pub struct OneByteConst { @@ -551,6 +551,12 @@ impl String { /// Compile-time function to create an external string resource which /// skips the ASCII and length checks. + /// + /// ## Safety + /// + /// The passed in buffer must contain only ASCII data. Note that while V8 + /// allows OneByte string resources to contain Latin-1 data, the OneByteConst + /// struct does not allow it. #[inline(always)] pub const unsafe fn create_external_onebyte_const_unchecked( buffer: &'static [u8], @@ -563,7 +569,10 @@ impl String { } /// Creates a v8::String from a `&'static OneByteConst` - /// which is guaranteed to be Latin-1 or ASCII. + /// which is guaranteed to be ASCII. + /// + /// Note that OneByteConst guarantees ASCII even though V8 would allow + /// OneByte string resources to contain Latin-1. #[inline(always)] pub fn new_from_onebyte_const<'s>( scope: &mut HandleScope<'s, ()>, diff --git a/tests/test_api.rs b/tests/test_api.rs index a37dedd7..2917fd36 100644 --- a/tests/test_api.rs +++ b/tests/test_api.rs @@ -7839,22 +7839,23 @@ fn external_onebyte_string() { let isolate = &mut v8::Isolate::new(Default::default()); let scope = &mut v8::HandleScope::new(isolate); - let input = "hello"; - let s = v8::String::new_external_onebyte( - scope, - Box::::from(input).into_boxed_bytes(), - ) - .unwrap(); + // "hello©" + // Note that we're specifically testing a byte array that is not ASCII nor + // UTF-8, but is valid Latin-1. V8's one-byte strings accept Latin-1 and we + // need to remember this detail: It is not safe to access one-byte strings as + // UTF-8 strings. + let input = Box::new([b'h', b'e', b'l', b'l', b'o', 0xA9]); + let s = v8::String::new_external_onebyte(scope, input).unwrap(); assert!(s.is_external_onebyte()); - assert_eq!(s.utf8_length(scope), 5); + assert_eq!(s.utf8_length(scope), 7); let one_byte = unsafe { &*s.get_external_onebyte_string_resource().unwrap().as_ptr() }; - assert_eq!(one_byte.length(), 5); + assert_eq!(one_byte.length(), 6); - assert_eq!(one_byte.as_str(), "hello"); + assert_eq!(one_byte.as_bytes(), [b'h', b'e', b'l', b'l', b'o', 0xA9]); } #[test]