mirror of
https://github.com/denoland/deno.git
synced 2024-11-21 15:04:11 -05:00
feat(ext/node): buffer.transcode() (#25972)
Closes https://github.com/denoland/deno/issues/25911
This commit is contained in:
parent
620e6b43a6
commit
32c1278736
7 changed files with 255 additions and 3 deletions
|
@ -167,6 +167,7 @@ deno_core::extension!(deno_node,
|
|||
|
||||
ops::buffer::op_is_ascii,
|
||||
ops::buffer::op_is_utf8,
|
||||
ops::buffer::op_transcode,
|
||||
ops::crypto::op_node_check_prime_async,
|
||||
ops::crypto::op_node_check_prime_bytes_async,
|
||||
ops::crypto::op_node_check_prime_bytes,
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
|
||||
|
||||
use deno_core::anyhow::anyhow;
|
||||
use deno_core::anyhow::Result;
|
||||
use deno_core::op2;
|
||||
|
||||
#[op2(fast)]
|
||||
|
@ -11,3 +13,107 @@ pub fn op_is_ascii(#[buffer] buf: &[u8]) -> bool {
|
|||
pub fn op_is_utf8(#[buffer] buf: &[u8]) -> bool {
|
||||
std::str::from_utf8(buf).is_ok()
|
||||
}
|
||||
|
||||
#[op2]
|
||||
#[buffer]
|
||||
pub fn op_transcode(
|
||||
#[buffer] source: &[u8],
|
||||
#[string] from_encoding: &str,
|
||||
#[string] to_encoding: &str,
|
||||
) -> Result<Vec<u8>> {
|
||||
match (from_encoding, to_encoding) {
|
||||
("utf8", "ascii") => Ok(utf8_to_ascii(source)),
|
||||
("utf8", "latin1") => Ok(utf8_to_latin1(source)),
|
||||
("utf8", "utf16le") => utf8_to_utf16le(source),
|
||||
("utf16le", "utf8") => utf16le_to_utf8(source),
|
||||
("latin1", "utf16le") | ("ascii", "utf16le") => {
|
||||
Ok(latin1_ascii_to_utf16le(source))
|
||||
}
|
||||
(from, to) => Err(anyhow!("Unable to transcode Buffer {from}->{to}")),
|
||||
}
|
||||
}
|
||||
|
||||
fn latin1_ascii_to_utf16le(source: &[u8]) -> Vec<u8> {
|
||||
let mut result = Vec::with_capacity(source.len() * 2);
|
||||
for &byte in source {
|
||||
result.push(byte);
|
||||
result.push(0);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn utf16le_to_utf8(source: &[u8]) -> Result<Vec<u8>> {
|
||||
let ucs2_vec: Vec<u16> = source
|
||||
.chunks(2)
|
||||
.map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
|
||||
.collect();
|
||||
String::from_utf16(&ucs2_vec)
|
||||
.map(|utf8_string| utf8_string.into_bytes())
|
||||
.map_err(|e| anyhow!("Invalid UTF-16 sequence: {}", e))
|
||||
}
|
||||
|
||||
fn utf8_to_utf16le(source: &[u8]) -> Result<Vec<u8>> {
|
||||
let utf8_string = std::str::from_utf8(source)?;
|
||||
let ucs2_vec: Vec<u16> = utf8_string.encode_utf16().collect();
|
||||
let bytes: Vec<u8> = ucs2_vec.iter().flat_map(|&x| x.to_le_bytes()).collect();
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
fn utf8_to_latin1(source: &[u8]) -> Vec<u8> {
|
||||
let mut latin1_bytes = Vec::with_capacity(source.len());
|
||||
let mut i = 0;
|
||||
while i < source.len() {
|
||||
match source[i] {
|
||||
byte if byte <= 0x7F => {
|
||||
// ASCII character
|
||||
latin1_bytes.push(byte);
|
||||
i += 1;
|
||||
}
|
||||
byte if (0xC2..=0xDF).contains(&byte) && i + 1 < source.len() => {
|
||||
// 2-byte UTF-8 sequence
|
||||
let codepoint =
|
||||
((byte as u16 & 0x1F) << 6) | (source[i + 1] as u16 & 0x3F);
|
||||
latin1_bytes.push(if codepoint <= 0xFF {
|
||||
codepoint as u8
|
||||
} else {
|
||||
b'?'
|
||||
});
|
||||
i += 2;
|
||||
}
|
||||
_ => {
|
||||
// 3-byte or 4-byte UTF-8 sequence, or invalid UTF-8
|
||||
latin1_bytes.push(b'?');
|
||||
// Skip to the next valid UTF-8 start byte
|
||||
i += 1;
|
||||
while i < source.len() && (source[i] & 0xC0) == 0x80 {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
latin1_bytes
|
||||
}
|
||||
|
||||
fn utf8_to_ascii(source: &[u8]) -> Vec<u8> {
|
||||
let mut ascii_bytes = Vec::with_capacity(source.len());
|
||||
let mut i = 0;
|
||||
while i < source.len() {
|
||||
match source[i] {
|
||||
byte if byte <= 0x7F => {
|
||||
// ASCII character
|
||||
ascii_bytes.push(byte);
|
||||
i += 1;
|
||||
}
|
||||
_ => {
|
||||
// Non-ASCII character
|
||||
ascii_bytes.push(b'?');
|
||||
// Skip to the next valid UTF-8 start byte
|
||||
i += 1;
|
||||
while i < source.len() && (source[i] & 0xC0) == 0x80 {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ascii_bytes
|
||||
}
|
||||
|
|
|
@ -13,4 +13,5 @@ export {
|
|||
kMaxLength,
|
||||
kStringMaxLength,
|
||||
SlowBuffer,
|
||||
transcode,
|
||||
} from "ext:deno_node/internal/buffer.mjs";
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
// deno-lint-ignore-file prefer-primordials
|
||||
|
||||
import { core } from "ext:core/mod.js";
|
||||
import { op_is_ascii, op_is_utf8 } from "ext:core/ops";
|
||||
import { op_is_ascii, op_is_utf8, op_transcode } from "ext:core/ops";
|
||||
|
||||
import { TextDecoder, TextEncoder } from "ext:deno_web/08_text_encoding.js";
|
||||
import { codes } from "ext:deno_node/internal/error_codes.ts";
|
||||
|
@ -32,7 +32,11 @@ import {
|
|||
import { normalizeEncoding } from "ext:deno_node/internal/util.mjs";
|
||||
import { validateBuffer } from "ext:deno_node/internal/validators.mjs";
|
||||
import { isUint8Array } from "ext:deno_node/internal/util/types.ts";
|
||||
import { ERR_INVALID_STATE, NodeError } from "ext:deno_node/internal/errors.ts";
|
||||
import {
|
||||
ERR_INVALID_STATE,
|
||||
genericNodeError,
|
||||
NodeError,
|
||||
} from "ext:deno_node/internal/errors.ts";
|
||||
import {
|
||||
forgivingBase64Encode,
|
||||
forgivingBase64UrlEncode,
|
||||
|
@ -2598,6 +2602,48 @@ export function isAscii(input) {
|
|||
], input);
|
||||
}
|
||||
|
||||
export function transcode(source, fromEnco, toEnco) {
|
||||
if (!isUint8Array(source)) {
|
||||
throw new codes.ERR_INVALID_ARG_TYPE(
|
||||
"source",
|
||||
["Buffer", "Uint8Array"],
|
||||
source,
|
||||
);
|
||||
}
|
||||
if (source.length === 0) {
|
||||
return Buffer.alloc(0);
|
||||
}
|
||||
const code = "U_ILLEGAL_ARGUMENT_ERROR";
|
||||
const illegalArgumentError = genericNodeError(
|
||||
`Unable to transcode Buffer [${code}]`,
|
||||
{ code: code, errno: 1 },
|
||||
);
|
||||
fromEnco = normalizeEncoding(fromEnco);
|
||||
toEnco = normalizeEncoding(toEnco);
|
||||
if (!fromEnco || !toEnco) {
|
||||
throw illegalArgumentError;
|
||||
}
|
||||
// Return the provided source when transcode is not required
|
||||
// for the from/to encoding pair.
|
||||
const returnSource = fromEnco === toEnco ||
|
||||
fromEnco === "ascii" && toEnco === "utf8" ||
|
||||
fromEnco === "ascii" && toEnco === "latin1";
|
||||
if (returnSource) {
|
||||
return Buffer.from(source);
|
||||
}
|
||||
|
||||
try {
|
||||
const result = op_transcode(new Uint8Array(source), fromEnco, toEnco);
|
||||
return Buffer.from(result, toEnco);
|
||||
} catch (err) {
|
||||
if (err.message.includes("Unable to transcode Buffer")) {
|
||||
throw illegalArgumentError;
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export default {
|
||||
atob,
|
||||
btoa,
|
||||
|
@ -2610,4 +2656,5 @@ export default {
|
|||
kMaxLength,
|
||||
kStringMaxLength,
|
||||
SlowBuffer,
|
||||
transcode,
|
||||
};
|
||||
|
|
|
@ -406,6 +406,7 @@
|
|||
"test-http-outgoing-settimeout.js",
|
||||
"test-http-url.parse-https.request.js",
|
||||
"test-http-url.parse-only-support-http-https-protocol.js",
|
||||
"test-icu-transcode.js",
|
||||
"test-net-access-byteswritten.js",
|
||||
"test-net-better-error-messages-listen-path.js",
|
||||
"test-net-better-error-messages-path.js",
|
||||
|
|
|
@ -1632,7 +1632,6 @@ NOTE: This file should not be manually edited. Please edit `tests/node_compat/co
|
|||
- [parallel/test-icu-minimum-version.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-icu-minimum-version.js)
|
||||
- [parallel/test-icu-punycode.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-icu-punycode.js)
|
||||
- [parallel/test-icu-stringwidth.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-icu-stringwidth.js)
|
||||
- [parallel/test-icu-transcode.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-icu-transcode.js)
|
||||
- [parallel/test-inspect-address-in-use.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-inspect-address-in-use.js)
|
||||
- [parallel/test-inspect-async-hook-setup-at-inspect.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-inspect-async-hook-setup-at-inspect.js)
|
||||
- [parallel/test-inspect-publish-uid.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-inspect-publish-uid.js)
|
||||
|
|
97
tests/node_compat/test/parallel/test-icu-transcode.js
Normal file
97
tests/node_compat/test/parallel/test-icu-transcode.js
Normal file
|
@ -0,0 +1,97 @@
|
|||
// deno-fmt-ignore-file
|
||||
// deno-lint-ignore-file
|
||||
|
||||
// Copyright Joyent and Node contributors. All rights reserved. MIT license.
|
||||
// Taken from Node 18.12.1
|
||||
// This file is automatically generated by `tests/node_compat/runner/setup.ts`. Do not modify this file manually.
|
||||
|
||||
'use strict';
|
||||
|
||||
const common = require('../common');
|
||||
|
||||
if (!common.hasIntl)
|
||||
common.skip('missing Intl');
|
||||
|
||||
const buffer = require('buffer');
|
||||
const assert = require('assert');
|
||||
const orig = Buffer.from('těst ☕', 'utf8');
|
||||
|
||||
// Test Transcoding
|
||||
const tests = {
|
||||
'latin1': [0x74, 0x3f, 0x73, 0x74, 0x20, 0x3f],
|
||||
'ascii': [0x74, 0x3f, 0x73, 0x74, 0x20, 0x3f],
|
||||
'ucs2': [0x74, 0x00, 0x1b, 0x01, 0x73,
|
||||
0x00, 0x74, 0x00, 0x20, 0x00,
|
||||
0x15, 0x26]
|
||||
};
|
||||
|
||||
for (const test in tests) {
|
||||
const dest = buffer.transcode(orig, 'utf8', test);
|
||||
assert.strictEqual(dest.length, tests[test].length, `utf8->${test} length`);
|
||||
for (let n = 0; n < tests[test].length; n++)
|
||||
assert.strictEqual(dest[n], tests[test][n], `utf8->${test} char ${n}`);
|
||||
}
|
||||
|
||||
{
|
||||
const dest = buffer.transcode(Buffer.from(tests.ucs2), 'ucs2', 'utf8');
|
||||
assert.strictEqual(dest.toString(), orig.toString());
|
||||
}
|
||||
|
||||
{
|
||||
const utf8 = Buffer.from('€'.repeat(4000), 'utf8');
|
||||
const ucs2 = Buffer.from('€'.repeat(4000), 'ucs2');
|
||||
const utf8_to_ucs2 = buffer.transcode(utf8, 'utf8', 'ucs2');
|
||||
const ucs2_to_utf8 = buffer.transcode(ucs2, 'ucs2', 'utf8');
|
||||
assert.deepStrictEqual(utf8, ucs2_to_utf8);
|
||||
assert.deepStrictEqual(ucs2, utf8_to_ucs2);
|
||||
assert.strictEqual(ucs2_to_utf8.toString('utf8'),
|
||||
utf8_to_ucs2.toString('ucs2'));
|
||||
}
|
||||
|
||||
assert.throws(
|
||||
() => buffer.transcode(null, 'utf8', 'ascii'),
|
||||
{
|
||||
name: 'TypeError',
|
||||
code: 'ERR_INVALID_ARG_TYPE',
|
||||
message: 'The "source" argument must be an instance of Buffer ' +
|
||||
'or Uint8Array. Received null'
|
||||
}
|
||||
);
|
||||
|
||||
assert.throws(
|
||||
() => buffer.transcode(Buffer.from('a'), 'b', 'utf8'),
|
||||
/^Error: Unable to transcode Buffer \[U_ILLEGAL_ARGUMENT_ERROR\]/
|
||||
);
|
||||
|
||||
assert.throws(
|
||||
() => buffer.transcode(Buffer.from('a'), 'uf8', 'b'),
|
||||
/^Error: Unable to transcode Buffer \[U_ILLEGAL_ARGUMENT_ERROR\]$/
|
||||
);
|
||||
|
||||
assert.deepStrictEqual(
|
||||
buffer.transcode(Buffer.from('hi', 'ascii'), 'ascii', 'utf16le'),
|
||||
Buffer.from('hi', 'utf16le'));
|
||||
assert.deepStrictEqual(
|
||||
buffer.transcode(Buffer.from('hi', 'latin1'), 'latin1', 'utf16le'),
|
||||
Buffer.from('hi', 'utf16le'));
|
||||
assert.deepStrictEqual(
|
||||
buffer.transcode(Buffer.from('hä', 'latin1'), 'latin1', 'utf16le'),
|
||||
Buffer.from('hä', 'utf16le'));
|
||||
|
||||
// Test that Uint8Array arguments are okay.
|
||||
{
|
||||
const uint8array = new Uint8Array([...Buffer.from('hä', 'latin1')]);
|
||||
assert.deepStrictEqual(
|
||||
buffer.transcode(uint8array, 'latin1', 'utf16le'),
|
||||
Buffer.from('hä', 'utf16le'));
|
||||
}
|
||||
|
||||
{
|
||||
const dest = buffer.transcode(new Uint8Array(), 'utf8', 'latin1');
|
||||
assert.strictEqual(dest.length, 0);
|
||||
}
|
||||
|
||||
// Test that it doesn't crash
|
||||
{
|
||||
buffer.transcode(new buffer.SlowBuffer(1), 'utf16le', 'ucs2');
|
||||
}
|
Loading…
Reference in a new issue