mirror of
https://github.com/denoland/deno.git
synced 2024-11-24 15:19:26 -05:00
a77b2987bc
Fixes #19214. We were using the `idna` crate to implement our polyfill for `punycode.toASCII` and `punycode.toUnicode`. The `idna` crate is correct, and adheres to the IDNA2003/2008 spec, but it turns out `node`'s implementations don't really follow any spec! Instead, node splits the domain by `'.'` and punycode encodes/decodes each part. This means that node's implementations will happily work on codepoints that are disallowed by the IDNA specs, causing the error in #19214. While fixing this, I went ahead and matched the node behavior on all of the punycode functions and enabled node's punycode test in our `node_compat` suite.
126 lines
4.6 KiB
TypeScript
126 lines
4.6 KiB
TypeScript
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
|
|
// Copyright Joyent, Inc. and other Node contributors.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a
|
|
// copy of this software and associated documentation files (the
|
|
// "Software"), to deal in the Software without restriction, including
|
|
// without limitation the rights to use, copy, modify, merge, publish,
|
|
// distribute, sublicense, and/or sell copies of the Software, and to permit
|
|
// persons to whom the Software is furnished to do so, subject to the
|
|
// following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included
|
|
// in all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
|
|
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
|
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
|
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
|
// USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
// Copyright Mathias Bynens <https://mathiasbynens.be/>
|
|
|
|
// Permission is hereby granted, free of charge, to any person obtaining
|
|
// a copy of this software and associated documentation files (the
|
|
// "Software"), to deal in the Software without restriction, including
|
|
// without limitation the rights to use, copy, modify, merge, publish,
|
|
// distribute, sublicense, and/or sell copies of the Software, and to
|
|
// permit persons to whom the Software is furnished to do so, subject to
|
|
// the following conditions:
|
|
|
|
// The above copyright notice and this permission notice shall be
|
|
// included in all copies or substantial portions of the Software.
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
// Adapted from https://github.com/mathiasbynens/punycode.js
|
|
|
|
// TODO(petamoriken): enable prefer-primordials for node polyfills
|
|
// deno-lint-ignore-file prefer-primordials
|
|
|
|
// TODO(cmorten): migrate punycode logic to "icu" internal binding and/or "url"
|
|
// internal module so there can be re-use within the "url" module etc.
|
|
|
|
"use strict";
|
|
|
|
import {
|
|
op_node_idna_domain_to_ascii,
|
|
op_node_idna_domain_to_unicode,
|
|
} from "ext:core/ops";
|
|
|
|
/**
|
|
* Creates an array containing the numeric code points of each Unicode
|
|
* character in the string. While JavaScript uses UCS-2 internally,
|
|
* this function will convert a pair of surrogate halves (each of which
|
|
* UCS-2 exposes as separate characters) into a single code point,
|
|
* matching UTF-16.
|
|
*
|
|
* @param str The Unicode input string (UCS-2).
|
|
* @return The new array of code points.
|
|
*/
|
|
function ucs2decode(str: string) {
|
|
const output = [];
|
|
let counter = 0;
|
|
const length = str.length;
|
|
|
|
while (counter < length) {
|
|
const value = str.charCodeAt(counter++);
|
|
|
|
if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
|
|
// It's a high surrogate, and there is a next character.
|
|
const extra = str.charCodeAt(counter++);
|
|
|
|
if ((extra & 0xFC00) == 0xDC00) { // Low surrogate.
|
|
output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
|
|
} else {
|
|
// It's an unmatched surrogate; only append this code unit, in case the
|
|
// next code unit is the high surrogate of a surrogate pair.
|
|
output.push(value);
|
|
counter--;
|
|
}
|
|
} else {
|
|
output.push(value);
|
|
}
|
|
}
|
|
|
|
return output;
|
|
}
|
|
|
|
/**
|
|
* Creates a string based on an array of numeric code points.
|
|
* @see `punycode.ucs2.decode`
|
|
* @memberOf punycode.ucs2
|
|
* @name encode
|
|
* @param codePoints The array of numeric code points.
|
|
* @returns The new Unicode string (UCS-2).
|
|
*/
|
|
function ucs2encode(array: number[]) {
|
|
return String.fromCodePoint(...array);
|
|
}
|
|
|
|
export const ucs2 = {
|
|
decode: ucs2decode,
|
|
encode: ucs2encode,
|
|
};
|
|
|
|
/**
|
|
* Converts a domain to ASCII as per the IDNA spec
|
|
*/
|
|
export function domainToASCII(domain: string) {
|
|
return op_node_idna_domain_to_ascii(domain);
|
|
}
|
|
|
|
/**
|
|
* Converts a domain to Unicode as per the IDNA spec
|
|
*/
|
|
export function domainToUnicode(domain: string) {
|
|
return op_node_idna_domain_to_unicode(domain);
|
|
}
|