diff options
Diffstat (limited to 'packages/taler-util/src/punycode.ts')
-rw-r--r-- | packages/taler-util/src/punycode.ts | 468 |
1 files changed, 468 insertions, 0 deletions
diff --git a/packages/taler-util/src/punycode.ts b/packages/taler-util/src/punycode.ts new file mode 100644 index 000000000..353e3bf25 --- /dev/null +++ b/packages/taler-util/src/punycode.ts @@ -0,0 +1,468 @@ +/* +Copyright Mathias Bynens <https://mathiasbynens.be/> +Copyright (c) 2022 Taler Systems S.A. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/** Highest positive signed 32-bit float value */ +const maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1 + +/** Bootstring parameters */ +const base = 36; +const tMin = 1; +const tMax = 26; +const skew = 38; +const damp = 700; +const initialBias = 72; +const initialN = 128; // 0x80 +const delimiter = "-"; // '\x2D' + +/** Regular expressions */ +const regexPunycode = /^xn--/; +const regexNonASCII = /[^\0-\x7E]/; // non-ASCII chars +const regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators + +/** Error messages */ +const errors = { + overflow: "Overflow: input needs wider integers to process", + "not-basic": "Illegal input >= 0x80 (not a basic code point)", + "invalid-input": "Invalid input", +} as { [x: string]: string }; + +/** Convenience shortcuts */ +const baseMinusTMin = base - tMin; +const floor = Math.floor; +const stringFromCharCode = String.fromCharCode; + +/*--------------------------------------------------------------------------*/ + +/** + * A generic error utility function. + * @private + * @param {String} type The error type. + * @returns {Error} Throws a `RangeError` with the applicable error message. + */ +function error(type: string) { + throw new RangeError(errors[type]); +} + +/** + * A generic `Array#map` utility function. + * @private + * @param {Array} array The array to iterate over. + * @param {Function} callback The function that gets called for every array + * item. + * @returns {Array} A new array of values returned by the callback function. + */ +function map(array: any[], fn: (arg0: any) => any) { + const result = []; + let length = array.length; + while (length--) { + result[length] = fn(array[length]); + } + return result; +} + +/** + * A simple `Array#map`-like wrapper to work with domain name strings or email + * addresses. + * @private + * @param {String} domain The domain name or email address. + * @param {Function} callback The function that gets called for every + * character. + * @returns {Array} A new string of characters returned by the callback + * function. + */ +function mapDomain( + string: string, + fn: { (string: any): any; (string: any): any; (arg0: any): any }, +) { + const parts = string.split("@"); + let result = ""; + if (parts.length > 1) { + // In email addresses, only the domain name should be punycoded. Leave + // the local part (i.e. everything up to `@`) intact. + result = parts[0] + "@"; + string = parts[1]; + } + // Avoid `split(regex)` for IE8 compatibility. See #17. + string = string.replace(regexSeparators, "\x2E"); + const labels = string.split("."); + const encoded = map(labels, fn).join("."); + return result + encoded; +} + +/** + * Creates an array containing the numeric code points of each Unicode + * character in the string. While JavaScript uses UCS-2 internally, + * this function will convert a pair of surrogate halves (each of which + * UCS-2 exposes as separate characters) into a single code point, + * matching UTF-16. + * @see `punycode.ucs2.encode` + * @see <https://mathiasbynens.be/notes/javascript-encoding> + * @memberOf punycode.ucs2 + * @name decode + * @param {String} string The Unicode input string (UCS-2). + * @returns {Array} The new array of code points. + */ +function ucs2decode(string: string) { + const output = []; + let counter = 0; + const length = string.length; + while (counter < length) { + const value = string.charCodeAt(counter++); + if (value >= 0xd800 && value <= 0xdbff && counter < length) { + // It's a high surrogate, and there is a next character. + const extra = string.charCodeAt(counter++); + if ((extra & 0xfc00) == 0xdc00) { + // Low surrogate. + output.push(((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000); + } else { + // It's an unmatched surrogate; only append this code unit, in case the + // next code unit is the high surrogate of a surrogate pair. + output.push(value); + counter--; + } + } else { + output.push(value); + } + } + return output; +} + +/** + * Creates a string based on an array of numeric code points. + * @see `punycode.ucs2.decode` + * @memberOf punycode.ucs2 + * @name encode + * @param {Array} codePoints The array of numeric code points. + * @returns {String} The new Unicode string (UCS-2). + */ +const ucs2encode = (array: any): string => String.fromCodePoint(...array); + +/** + * Converts a basic code point into a digit/integer. + * @see `digitToBasic()` + * @private + * @param {Number} codePoint The basic numeric code point value. + * @returns {Number} The numeric value of a basic code point (for use in + * representing integers) in the range `0` to `base - 1`, or `base` if + * the code point does not represent a value. + */ +const basicToDigit = function (codePoint: number) { + if (codePoint - 0x30 < 0x0a) { + return codePoint - 0x16; + } + if (codePoint - 0x41 < 0x1a) { + return codePoint - 0x41; + } + if (codePoint - 0x61 < 0x1a) { + return codePoint - 0x61; + } + return base; +}; + +/** + * Converts a digit/integer into a basic code point. + * @see `basicToDigit()` + * @private + * @param {Number} digit The numeric value of a basic code point. + * @returns {Number} The basic code point whose value (when used for + * representing integers) is `digit`, which needs to be in the range + * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is + * used; else, the lowercase form is used. The behavior is undefined + * if `flag` is non-zero and `digit` has no uppercase form. + */ +const digitToBasic = function (digit: number, flag: number) { + // 0..25 map to ASCII a..z or A..Z + // 26..35 map to ASCII 0..9 + return digit + 22 + 75 * Number(digit < 26) - (Number(flag != 0) << 5); +}; + +/** + * Bias adaptation function as per section 3.4 of RFC 3492. + * https://tools.ietf.org/html/rfc3492#section-3.4 + * @private + */ +const adapt = function (delta: number, numPoints: number, firstTime: boolean) { + let k = 0; + delta = firstTime ? floor(delta / damp) : delta >> 1; + delta += floor(delta / numPoints); + for ( + ; + /* no initialization */ delta > (baseMinusTMin * tMax) >> 1; + k += base + ) { + delta = floor(delta / baseMinusTMin); + } + return floor(k + ((baseMinusTMin + 1) * delta) / (delta + skew)); +}; + +/** + * Converts a Punycode string of ASCII-only symbols to a string of Unicode + * symbols. + * @memberOf punycode + * @param {String} input The Punycode string of ASCII-only symbols. + * @returns {String} The resulting string of Unicode symbols. + */ +const decode = function (input: string) { + // Don't use UCS-2. + const output = []; + const inputLength = input.length; + let i = 0; + let n = initialN; + let bias = initialBias; + + // Handle the basic code points: let `basic` be the number of input code + // points before the last delimiter, or `0` if there is none, then copy + // the first basic code points to the output. + + let basic = input.lastIndexOf(delimiter); + if (basic < 0) { + basic = 0; + } + + for (let j = 0; j < basic; ++j) { + // if it's not a basic code point + if (input.charCodeAt(j) >= 0x80) { + error("not-basic"); + } + output.push(input.charCodeAt(j)); + } + + // Main decoding loop: start just after the last delimiter if any basic code + // points were copied; start at the beginning otherwise. + + for ( + let index = basic > 0 ? basic + 1 : 0; + index < inputLength /* no final expression */; + + ) { + // `index` is the index of the next character to be consumed. + // Decode a generalized variable-length integer into `delta`, + // which gets added to `i`. The overflow checking is easier + // if we increase `i` as we go, then subtract off its starting + // value at the end to obtain `delta`. + let oldi = i; + for (let w = 1, k = base /* no condition */; ; k += base) { + if (index >= inputLength) { + error("invalid-input"); + } + + const digit = basicToDigit(input.charCodeAt(index++)); + + if (digit >= base || digit > floor((maxInt - i) / w)) { + error("overflow"); + } + + i += digit * w; + const t = k <= bias ? tMin : k >= bias + tMax ? tMax : k - bias; + + if (digit < t) { + break; + } + + const baseMinusT = base - t; + if (w > floor(maxInt / baseMinusT)) { + error("overflow"); + } + + w *= baseMinusT; + } + + const out = output.length + 1; + bias = adapt(i - oldi, out, oldi == 0); + + // `i` was supposed to wrap around from `out` to `0`, + // incrementing `n` each time, so we'll fix that now: + if (floor(i / out) > maxInt - n) { + error("overflow"); + } + + n += floor(i / out); + i %= out; + + // Insert `n` at position `i` of the output. + output.splice(i++, 0, n); + } + + return String.fromCodePoint(...output); +}; + +/** + * Converts a string of Unicode symbols (e.g. a domain name label) to a + * Punycode string of ASCII-only symbols. + * @memberOf punycode + * @param {String} input The string of Unicode symbols. + * @returns {String} The resulting Punycode string of ASCII-only symbols. + */ +const encode = function (inputArg: string) { + const output = []; + + // Convert the input in UCS-2 to an array of Unicode code points. + let input = ucs2decode(inputArg); + + // Cache the length. + let inputLength = input.length; + + // Initialize the state. + let n = initialN; + let delta = 0; + let bias = initialBias; + + // Handle the basic code points. + for (const currentValue of input) { + if (currentValue < 0x80) { + output.push(stringFromCharCode(currentValue)); + } + } + + let basicLength = output.length; + let handledCPCount = basicLength; + + // `handledCPCount` is the number of code points that have been handled; + // `basicLength` is the number of basic code points. + + // Finish the basic string with a delimiter unless it's empty. + if (basicLength) { + output.push(delimiter); + } + + // Main encoding loop: + while (handledCPCount < inputLength) { + // All non-basic code points < n have been handled already. Find the next + // larger one: + let m = maxInt; + for (const currentValue of input) { + if (currentValue >= n && currentValue < m) { + m = currentValue; + } + } + + // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>, + // but guard against overflow. + const handledCPCountPlusOne = handledCPCount + 1; + if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) { + error("overflow"); + } + + delta += (m - n) * handledCPCountPlusOne; + n = m; + + for (const currentValue of input) { + if (currentValue < n && ++delta > maxInt) { + error("overflow"); + } + if (currentValue == n) { + // Represent delta as a generalized variable-length integer. + let q = delta; + for (let k = base /* no condition */; ; k += base) { + const t = k <= bias ? tMin : k >= bias + tMax ? tMax : k - bias; + if (q < t) { + break; + } + const qMinusT = q - t; + const baseMinusT = base - t; + output.push( + stringFromCharCode(digitToBasic(t + (qMinusT % baseMinusT), 0)), + ); + q = floor(qMinusT / baseMinusT); + } + + output.push(stringFromCharCode(digitToBasic(q, 0))); + bias = adapt( + delta, + handledCPCountPlusOne, + handledCPCount == basicLength, + ); + delta = 0; + ++handledCPCount; + } + } + + ++delta; + ++n; + } + return output.join(""); +}; + +/** + * Converts a Punycode string representing a domain name or an email address + * to Unicode. Only the Punycoded parts of the input will be converted, i.e. + * it doesn't matter if you call it on a string that has already been + * converted to Unicode. + * @memberOf punycode + * @param {String} input The Punycoded domain name or email address to + * convert to Unicode. + * @returns {String} The Unicode representation of the given Punycode + * string. + */ +const toUnicode = function (input: string) { + return mapDomain(input, function (string) { + return regexPunycode.test(string) + ? decode(string.slice(4).toLowerCase()) + : string; + }); +}; + +/** + * Converts a Unicode string representing a domain name or an email address to + * Punycode. Only the non-ASCII parts of the domain name will be converted, + * i.e. it doesn't matter if you call it with a domain that's already in + * ASCII. + * @memberOf punycode + * @param {String} input The domain name or email address to convert, as a + * Unicode string. + * @returns {String} The Punycode representation of the given domain name or + * email address. + */ +const toASCII = function (input: string) { + return mapDomain(input, function (string) { + return regexNonASCII.test(string) ? "xn--" + encode(string) : string; + }); +}; + +/*--------------------------------------------------------------------------*/ + +/** Define the public API */ +export const punycode = { + /** + * A string representing the current Punycode.js version number. + * @memberOf punycode + * @type String + */ + version: "2.1.0", + /** + * An object of methods to convert from JavaScript's internal character + * representation (UCS-2) to Unicode code points, and back. + * @see <https://mathiasbynens.be/notes/javascript-encoding> + * @memberOf punycode + * @type Object + */ + ucs2: { + decode: ucs2decode, + encode: ucs2encode, + }, + decode: decode, + encode: encode, + toASCII: toASCII, + toUnicode: toUnicode, +};
\ No newline at end of file |