diff options
author | Nik Nyby <nikolas@gnu.org> | 2015-01-17 17:12:36 -0500 |
---|---|---|
committer | Nik Nyby <nikolas@gnu.org> | 2015-01-17 17:12:36 -0500 |
commit | ada88090ead2c3b9d0804794c5f20f9b24d1c2b1 (patch) | |
tree | 2838a7eee6c5d74094216acebd86915e0ea1de42 /lib/url_handler/node_punycode.js | |
download | librejsxul-ada88090ead2c3b9d0804794c5f20f9b24d1c2b1.tar.lz librejsxul-ada88090ead2c3b9d0804794c5f20f9b24d1c2b1.tar.xz librejsxul-ada88090ead2c3b9d0804794c5f20f9b24d1c2b1.zip |
Import to new git repository
The old repository was using almost 100mb of space because of all
the unnecessary files in the history. So I've imported the code to a
new git repository. Unfortunately the history isn't viewable from this
repository anymore. To see what happened with LibreJS before 2015, see
the old Bazaar repo here: http://bzr.savannah.gnu.org/lh/librejs/
Diffstat (limited to 'lib/url_handler/node_punycode.js')
-rw-r--r-- | lib/url_handler/node_punycode.js | 510 |
1 files changed, 510 insertions, 0 deletions
diff --git a/lib/url_handler/node_punycode.js b/lib/url_handler/node_punycode.js new file mode 100644 index 0000000..163923c --- /dev/null +++ b/lib/url_handler/node_punycode.js @@ -0,0 +1,510 @@ +/*! http://mths.be/punycode v1.2.0 by @mathias */ +;(function(root) { + + /** + * The `punycode` object. + * @name punycode + * @type Object + */ + var punycode, + + /** Detect free variables `define`, `exports`, `module` and `require` */ + freeDefine = typeof define == 'function' && typeof define.amd == 'object' && + define.amd && define, + freeExports = typeof exports == 'object' && exports, + freeModule = typeof module == 'object' && module, + freeRequire = typeof require == 'function' && require, + + /** Highest positive signed 32-bit float value */ + maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1 + + /** Bootstring parameters */ + base = 36, + tMin = 1, + tMax = 26, + skew = 38, + damp = 700, + initialBias = 72, + initialN = 128, // 0x80 + delimiter = '-', // '\x2D' + + /** Regular expressions */ + regexPunycode = /^xn--/, + regexNonASCII = /[^ -~]/, // unprintable ASCII chars + non-ASCII chars + regexSeparators = /\x2E|\u3002|\uFF0E|\uFF61/g, // RFC 3490 separators + + /** Error messages */ + errors = { + 'overflow': 'Overflow: input needs wider integers to process', + 'not-basic': 'Illegal input >= 0x80 (not a basic code point)', + 'invalid-input': 'Invalid input' + }, + + /** Convenience shortcuts */ + baseMinusTMin = base - tMin, + floor = Math.floor, + stringFromCharCode = String.fromCharCode, + + /** Temporary variable */ + key; + + /*--------------------------------------------------------------------------*/ + + /** + * A generic error utility function. + * @private + * @param {String} type The error type. + * @returns {Error} Throws a `RangeError` with the applicable error message. + */ + function error(type) { + throw RangeError(errors[type]); + } + + /** + * A generic `Array#map` utility function. + * @private + * @param {Array} array The array to iterate over. + * @param {Function} callback The function that gets called for every array + * item. + * @returns {Array} A new array of values returned by the callback function. + */ + function map(array, fn) { + var length = array.length; + while (length--) { + array[length] = fn(array[length]); + } + return array; + } + + /** + * A simple `Array#map`-like wrapper to work with domain name strings. + * @private + * @param {String} domain The domain name. + * @param {Function} callback The function that gets called for every + * character. + * @returns {Array} A new string of characters returned by the callback + * function. + */ + function mapDomain(string, fn) { + return map(string.split(regexSeparators), fn).join('.'); + } + + /** + * Creates an array containing the decimal code points of each Unicode + * character in the string. While JavaScript uses UCS-2 internally, + * this function will convert a pair of surrogate halves (each of which + * UCS-2 exposes as separate characters) into a single code point, + * matching UTF-16. + * @see `punycode.ucs2.encode` + * @see <http://mathiasbynens.be/notes/javascript-encoding> + * @memberOf punycode.ucs2 + * @name decode + * @param {String} string The Unicode input string (UCS-2). + * @returns {Array} The new array of code points. + */ + function ucs2decode(string) { + var output = [], + counter = 0, + length = string.length, + value, + extra; + while (counter < length) { + value = string.charCodeAt(counter++); + if ((value & 0xF800) == 0xD800 && counter < length) { + // high surrogate, and there is a next character + extra = string.charCodeAt(counter++); + if ((extra & 0xFC00) == 0xDC00) { // low surrogate + output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000); + } else { + output.push(value, extra); + } + } else { + output.push(value); + } + } + return output; + } + + /** + * Creates a string based on an array of decimal code points. + * @see `punycode.ucs2.decode` + * @memberOf punycode.ucs2 + * @name encode + * @param {Array} codePoints The array of decimal code points. + * @returns {String} The new Unicode string (UCS-2). + */ + function ucs2encode(array) { + return map(array, function(value) { + var output = ''; + if (value > 0xFFFF) { + value -= 0x10000; + output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800); + value = 0xDC00 | value & 0x3FF; + } + output += stringFromCharCode(value); + return output; + }).join(''); + } + + /** + * Converts a basic code point into a digit/integer. + * @see `digitToBasic()` + * @private + * @param {Number} codePoint The basic (decimal) code point. + * @returns {Number} The numeric value of a basic code point (for use in + * representing integers) in the range `0` to `base - 1`, or `base` if + * the code point does not represent a value. + */ + function basicToDigit(codePoint) { + return codePoint - 48 < 10 + ? codePoint - 22 + : codePoint - 65 < 26 + ? codePoint - 65 + : codePoint - 97 < 26 + ? codePoint - 97 + : base; + } + + /** + * Converts a digit/integer into a basic code point. + * @see `basicToDigit()` + * @private + * @param {Number} digit The numeric value of a basic code point. + * @returns {Number} The basic code point whose value (when used for + * representing integers) is `digit`, which needs to be in the range + * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is + * used; else, the lowercase form is used. The behavior is undefined + * if flag is non-zero and `digit` has no uppercase form. + */ + function digitToBasic(digit, flag) { + // 0..25 map to ASCII a..z or A..Z + // 26..35 map to ASCII 0..9 + return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5); + } + + /** + * Bias adaptation function as per section 3.4 of RFC 3492. + * http://tools.ietf.org/html/rfc3492#section-3.4 + * @private + */ + function adapt(delta, numPoints, firstTime) { + var k = 0; + delta = firstTime ? floor(delta / damp) : delta >> 1; + delta += floor(delta / numPoints); + for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) { + delta = floor(delta / baseMinusTMin); + } + return floor(k + (baseMinusTMin + 1) * delta / (delta + skew)); + } + + /** + * Converts a basic code point to lowercase if `flag` is falsy, or to + * uppercase if `flag` is truthy. The code point is unchanged if it's + * caseless. The behavior is undefined if `codePoint` is not a basic code + * point. + * @private + * @param {Number} codePoint The numeric value of a basic code point. + * @returns {Number} The resulting basic code point. + */ + function encodeBasic(codePoint, flag) { + codePoint -= (codePoint - 97 < 26) << 5; + return codePoint + (!flag && codePoint - 65 < 26) << 5; + } + + /** + * Converts a Punycode string of ASCII code points to a string of Unicode + * code points. + * @memberOf punycode + * @param {String} input The Punycode string of ASCII code points. + * @returns {String} The resulting string of Unicode code points. + */ + function decode(input) { + // Don't use UCS-2 + var output = [], + inputLength = input.length, + out, + i = 0, + n = initialN, + bias = initialBias, + basic, + j, + index, + oldi, + w, + k, + digit, + t, + length, + /** Cached calculation results */ + baseMinusT; + + // Handle the basic code points: let `basic` be the number of input code + // points before the last delimiter, or `0` if there is none, then copy + // the first basic code points to the output. + + basic = input.lastIndexOf(delimiter); + if (basic < 0) { + basic = 0; + } + + for (j = 0; j < basic; ++j) { + // if it's not a basic code point + if (input.charCodeAt(j) >= 0x80) { + error('not-basic'); + } + output.push(input.charCodeAt(j)); + } + + // Main decoding loop: start just after the last delimiter if any basic code + // points were copied; start at the beginning otherwise. + + for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) { + + // `index` is the index of the next character to be consumed. + // Decode a generalized variable-length integer into `delta`, + // which gets added to `i`. The overflow checking is easier + // if we increase `i` as we go, then subtract off its starting + // value at the end to obtain `delta`. + for (oldi = i, w = 1, k = base; /* no condition */; k += base) { + + if (index >= inputLength) { + error('invalid-input'); + } + + digit = basicToDigit(input.charCodeAt(index++)); + + if (digit >= base || digit > floor((maxInt - i) / w)) { + error('overflow'); + } + + i += digit * w; + t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias); + + if (digit < t) { + break; + } + + baseMinusT = base - t; + if (w > floor(maxInt / baseMinusT)) { + error('overflow'); + } + + w *= baseMinusT; + + } + + out = output.length + 1; + bias = adapt(i - oldi, out, oldi == 0); + + // `i` was supposed to wrap around from `out` to `0`, + // incrementing `n` each time, so we'll fix that now: + if (floor(i / out) > maxInt - n) { + error('overflow'); + } + + n += floor(i / out); + i %= out; + + // Insert `n` at position `i` of the output + output.splice(i++, 0, n); + + } + + return ucs2encode(output); + } + + /** + * Converts a string of Unicode code points to a Punycode string of ASCII + * code points. + * @memberOf punycode + * @param {String} input The string of Unicode code points. + * @returns {String} The resulting Punycode string of ASCII code points. + */ + function encode(input) { + var n, + delta, + handledCPCount, + basicLength, + bias, + j, + m, + q, + k, + t, + currentValue, + output = [], + /** `inputLength` will hold the number of code points in `input`. */ + inputLength, + /** Cached calculation results */ + handledCPCountPlusOne, + baseMinusT, + qMinusT; + + // Convert the input in UCS-2 to Unicode + input = ucs2decode(input); + + // Cache the length + inputLength = input.length; + + // Initialize the state + n = initialN; + delta = 0; + bias = initialBias; + + // Handle the basic code points + for (j = 0; j < inputLength; ++j) { + currentValue = input[j]; + if (currentValue < 0x80) { + output.push(stringFromCharCode(currentValue)); + } + } + + handledCPCount = basicLength = output.length; + + // `handledCPCount` is the number of code points that have been handled; + // `basicLength` is the number of basic code points. + + // Finish the basic string - if it is not empty - with a delimiter + if (basicLength) { + output.push(delimiter); + } + + // Main encoding loop: + while (handledCPCount < inputLength) { + + // All non-basic code points < n have been handled already. Find the next + // larger one: + for (m = maxInt, j = 0; j < inputLength; ++j) { + currentValue = input[j]; + if (currentValue >= n && currentValue < m) { + m = currentValue; + } + } + + // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>, + // but guard against overflow + handledCPCountPlusOne = handledCPCount + 1; + if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) { + error('overflow'); + } + + delta += (m - n) * handledCPCountPlusOne; + n = m; + + for (j = 0; j < inputLength; ++j) { + currentValue = input[j]; + + if (currentValue < n && ++delta > maxInt) { + error('overflow'); + } + + if (currentValue == n) { + // Represent delta as a generalized variable-length integer + for (q = delta, k = base; /* no condition */; k += base) { + t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias); + if (q < t) { + break; + } + qMinusT = q - t; + baseMinusT = base - t; + output.push( + stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0)) + ); + q = floor(qMinusT / baseMinusT); + } + + output.push(stringFromCharCode(digitToBasic(q, 0))); + bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength); + delta = 0; + ++handledCPCount; + } + } + + ++delta; + ++n; + + } + return output.join(''); + } + + /** + * Converts a Punycode string representing a domain name to Unicode. Only the + * Punycoded parts of the domain name will be converted, i.e. it doesn't + * matter if you call it on a string that has already been converted to + * Unicode. + * @memberOf punycode + * @param {String} domain The Punycode domain name to convert to Unicode. + * @returns {String} The Unicode representation of the given Punycode + * string. + */ + function toUnicode(domain) { + return mapDomain(domain, function(string) { + return regexPunycode.test(string) + ? decode(string.slice(4).toLowerCase()) + : string; + }); + } + + /** + * Converts a Unicode string representing a domain name to Punycode. Only the + * non-ASCII parts of the domain name will be converted, i.e. it doesn't + * matter if you call it with a domain that's already in ASCII. + * @memberOf punycode + * @param {String} domain The domain name to convert, as a Unicode string. + * @returns {String} The Punycode representation of the given domain name. + */ + function toASCII(domain) { + return mapDomain(domain, function(string) { + return regexNonASCII.test(string) + ? 'xn--' + encode(string) + : string; + }); + } + + /*--------------------------------------------------------------------------*/ + + /** Define the public API */ + punycode = { + /** + * A string representing the current Punycode.js version number. + * @memberOf punycode + * @type String + */ + 'version': '1.2.0', + /** + * An object of methods to convert from JavaScript's internal character + * representation (UCS-2) to decimal Unicode code points, and back. + * @see <http://mathiasbynens.be/notes/javascript-encoding> + * @memberOf punycode + * @type Object + */ + 'ucs2': { + 'decode': ucs2decode, + 'encode': ucs2encode + }, + 'decode': decode, + 'encode': encode, + 'toASCII': toASCII, + 'toUnicode': toUnicode + }; + + /** Expose `punycode` */ + if (freeExports) { + if (freeModule && freeModule.exports == freeExports) { + // in Node.js or Ringo 0.8+ + freeModule.exports = punycode; + } else { + // in Narwhal or Ringo 0.7- + for (key in punycode) { + punycode.hasOwnProperty(key) && (freeExports[key] = punycode[key]); + } + } + } else if (freeDefine) { + // via curl.js or RequireJS + define('punycode', punycode); + } else { + // in a browser or Rhino + root.punycode = punycode; + } + +}(this)); |