1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2025-01-15 18:38:53 -05:00
denoland-deno/ext/node/polyfills/internal/idna.ts
Nathan Whitaker a77b2987bc
fix(ext/node): Match punycode module behavior to node (#22847)
Fixes #19214.

We were using the `idna` crate to implement our polyfill for
`punycode.toASCII` and `punycode.toUnicode`. The `idna` crate is
correct, and adheres to the IDNA2003/2008 spec, but it turns out
`node`'s implementations don't really follow any spec! Instead, node
splits the domain by `'.'` and punycode encodes/decodes each part. This
means that node's implementations will happily work on codepoints that
are disallowed by the IDNA specs, causing the error in #19214.

While fixing this, I went ahead and matched the node behavior on all of
the punycode functions and enabled node's punycode test in our
`node_compat` suite.
2024-03-11 15:49:43 -07:00

126 lines
4.6 KiB
TypeScript

// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
// Copyright Joyent, Inc. and other Node contributors.
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to permit
// persons to whom the Software is furnished to do so, subject to the
// following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
// USE OR OTHER DEALINGS IN THE SOFTWARE.
// Copyright Mathias Bynens <https://mathiasbynens.be/>
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
// Adapted from https://github.com/mathiasbynens/punycode.js
// TODO(petamoriken): enable prefer-primordials for node polyfills
// deno-lint-ignore-file prefer-primordials
// TODO(cmorten): migrate punycode logic to "icu" internal binding and/or "url"
// internal module so there can be re-use within the "url" module etc.
"use strict";
import {
op_node_idna_domain_to_ascii,
op_node_idna_domain_to_unicode,
} from "ext:core/ops";
/**
* Creates an array containing the numeric code points of each Unicode
* character in the string. While JavaScript uses UCS-2 internally,
* this function will convert a pair of surrogate halves (each of which
* UCS-2 exposes as separate characters) into a single code point,
* matching UTF-16.
*
* @param str The Unicode input string (UCS-2).
* @return The new array of code points.
*/
function ucs2decode(str: string) {
const output = [];
let counter = 0;
const length = str.length;
while (counter < length) {
const value = str.charCodeAt(counter++);
if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
// It's a high surrogate, and there is a next character.
const extra = str.charCodeAt(counter++);
if ((extra & 0xFC00) == 0xDC00) { // Low surrogate.
output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
} else {
// It's an unmatched surrogate; only append this code unit, in case the
// next code unit is the high surrogate of a surrogate pair.
output.push(value);
counter--;
}
} else {
output.push(value);
}
}
return output;
}
/**
* Creates a string based on an array of numeric code points.
* @see `punycode.ucs2.decode`
* @memberOf punycode.ucs2
* @name encode
* @param codePoints The array of numeric code points.
* @returns The new Unicode string (UCS-2).
*/
function ucs2encode(array: number[]) {
return String.fromCodePoint(...array);
}
export const ucs2 = {
decode: ucs2decode,
encode: ucs2encode,
};
/**
* Converts a domain to ASCII as per the IDNA spec
*/
export function domainToASCII(domain: string) {
return op_node_idna_domain_to_ascii(domain);
}
/**
* Converts a domain to Unicode as per the IDNA spec
*/
export function domainToUnicode(domain: string) {
return op_node_idna_domain_to_unicode(domain);
}