1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2024-12-11 01:58:05 -05:00
denoland-deno/ext/node/ops/buffer.rs

119 lines
3.1 KiB
Rust

// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
use deno_core::anyhow::anyhow;
use deno_core::anyhow::Result;
use deno_core::op2;
#[op2(fast)]
pub fn op_is_ascii(#[buffer] buf: &[u8]) -> bool {
buf.is_ascii()
}
#[op2(fast)]
pub fn op_is_utf8(#[buffer] buf: &[u8]) -> bool {
std::str::from_utf8(buf).is_ok()
}
#[op2]
#[buffer]
pub fn op_transcode(
#[buffer] source: &[u8],
#[string] from_encoding: &str,
#[string] to_encoding: &str,
) -> Result<Vec<u8>> {
match (from_encoding, to_encoding) {
("utf8", "ascii") => Ok(utf8_to_ascii(source)),
("utf8", "latin1") => Ok(utf8_to_latin1(source)),
("utf8", "utf16le") => utf8_to_utf16le(source),
("utf16le", "utf8") => utf16le_to_utf8(source),
("latin1", "utf16le") | ("ascii", "utf16le") => {
Ok(latin1_ascii_to_utf16le(source))
}
(from, to) => Err(anyhow!("Unable to transcode Buffer {from}->{to}")),
}
}
fn latin1_ascii_to_utf16le(source: &[u8]) -> Vec<u8> {
let mut result = Vec::with_capacity(source.len() * 2);
for &byte in source {
result.push(byte);
result.push(0);
}
result
}
fn utf16le_to_utf8(source: &[u8]) -> Result<Vec<u8>> {
let ucs2_vec: Vec<u16> = source
.chunks(2)
.map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
.collect();
String::from_utf16(&ucs2_vec)
.map(|utf8_string| utf8_string.into_bytes())
.map_err(|e| anyhow!("Invalid UTF-16 sequence: {}", e))
}
fn utf8_to_utf16le(source: &[u8]) -> Result<Vec<u8>> {
let utf8_string = std::str::from_utf8(source)?;
let ucs2_vec: Vec<u16> = utf8_string.encode_utf16().collect();
let bytes: Vec<u8> = ucs2_vec.iter().flat_map(|&x| x.to_le_bytes()).collect();
Ok(bytes)
}
fn utf8_to_latin1(source: &[u8]) -> Vec<u8> {
let mut latin1_bytes = Vec::with_capacity(source.len());
let mut i = 0;
while i < source.len() {
match source[i] {
byte if byte <= 0x7F => {
// ASCII character
latin1_bytes.push(byte);
i += 1;
}
byte if (0xC2..=0xDF).contains(&byte) && i + 1 < source.len() => {
// 2-byte UTF-8 sequence
let codepoint =
((byte as u16 & 0x1F) << 6) | (source[i + 1] as u16 & 0x3F);
latin1_bytes.push(if codepoint <= 0xFF {
codepoint as u8
} else {
b'?'
});
i += 2;
}
_ => {
// 3-byte or 4-byte UTF-8 sequence, or invalid UTF-8
latin1_bytes.push(b'?');
// Skip to the next valid UTF-8 start byte
i += 1;
while i < source.len() && (source[i] & 0xC0) == 0x80 {
i += 1;
}
}
}
}
latin1_bytes
}
fn utf8_to_ascii(source: &[u8]) -> Vec<u8> {
let mut ascii_bytes = Vec::with_capacity(source.len());
let mut i = 0;
while i < source.len() {
match source[i] {
byte if byte <= 0x7F => {
// ASCII character
ascii_bytes.push(byte);
i += 1;
}
_ => {
// Non-ASCII character
ascii_bytes.push(b'?');
// Skip to the next valid UTF-8 start byte
i += 1;
while i < source.len() && (source[i] & 0xC0) == 0x80 {
i += 1;
}
}
}
}
ascii_bytes
}