diff options
author | Alessio Vanni <vannilla@firemail.cc> | 2019-02-19 21:06:09 +0100 |
---|---|---|
committer | Alessio Vanni <vannilla@firemail.cc> | 2019-02-19 21:06:09 +0100 |
commit | fe2f8acc8210c2ddead4621797b47106a9b38f5b (patch) | |
tree | 5fb103d45d7e4345f56fc068ce8173b82fa7051f /lib | |
download | ematrix-fe2f8acc8210c2ddead4621797b47106a9b38f5b.tar.lz ematrix-fe2f8acc8210c2ddead4621797b47106a9b38f5b.tar.xz ematrix-fe2f8acc8210c2ddead4621797b47106a9b38f5b.zip |
Fork uMatrix
Pretty much just changing the name and the copyright.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/publicsuffixlist.js | 369 | ||||
-rw-r--r-- | lib/punycode.js | 530 | ||||
-rw-r--r-- | lib/yamd5.js | 402 |
3 files changed, 1301 insertions, 0 deletions
diff --git a/lib/publicsuffixlist.js b/lib/publicsuffixlist.js new file mode 100644 index 0000000..a823188 --- /dev/null +++ b/lib/publicsuffixlist.js @@ -0,0 +1,369 @@ +/******************************************************************************* + + publicsuffixlist.js - an efficient javascript implementation to deal with + Mozilla Foundation's Public Suffix List <http://publicsuffix.org/list/> + Copyright (C) 2013 Raymond Hill + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + +*/ + +/*! Home: https://github.com/gorhill/publicsuffixlist.js */ + +/* + This code is mostly dumb: I consider this to be lower-level code, thus + in order to ensure efficiency, the caller is responsible for sanitizing + the inputs. +*/ + +/******************************************************************************/ + +// A single instance of PublicSuffixList is enough. + +;(function(root) { + +'use strict'; + +/******************************************************************************/ + +var exceptions = {}; +var rules = {}; +var selfieMagic = 'iscjsfsaolnm'; + +// This value dictate how the search will be performed: +// < this.cutoffLength = indexOf() +// >= this.cutoffLength = binary search +var cutoffLength = 256; +var mustPunycode = /[^\w.*-]/; + +var onChangedListeners = []; + +/******************************************************************************/ + +// In the context of this code, a domain is defined as: +// "{label}.{public suffix}". +// A single standalone label is a public suffix as per +// http://publicsuffix.org/list/: +// "If no rules match, the prevailing rule is '*' " +// This means 'localhost' is not deemed a domain by this +// code, since according to the definition above, it would be +// evaluated as a public suffix. The caller is therefore responsible to +// decide how to further interpret such public suffix. +// +// `hostname` must be a valid ascii-based hostname. + +function getDomain(hostname) { + // A hostname starting with a dot is not a valid hostname. + if ( !hostname || hostname.charAt(0) === '.' ) { + return ''; + } + hostname = hostname.toLowerCase(); + var suffix = getPublicSuffix(hostname); + if ( suffix === hostname ) { + return ''; + } + var pos = hostname.lastIndexOf('.', hostname.lastIndexOf('.', hostname.length - suffix.length) - 1); + if ( pos <= 0 ) { + return hostname; + } + return hostname.slice(pos + 1); +} + +/******************************************************************************/ + +// Return longest public suffix. +// +// `hostname` must be a valid ascii-based string which respect hostname naming. + +function getPublicSuffix(hostname) { + if ( !hostname ) { + return ''; + } + // Since we slice down the hostname with each pass, the first match + // is the longest, so no need to find all the matching rules. + var pos; + while ( true ) { + pos = hostname.indexOf('.'); + if ( pos < 0 ) { + return hostname; + } + if ( search(exceptions, hostname) ) { + return hostname.slice(pos + 1); + } + if ( search(rules, hostname) ) { + return hostname; + } + if ( search(rules, '*' + hostname.slice(pos)) ) { + return hostname; + } + hostname = hostname.slice(pos + 1); + } + // unreachable +} + +/******************************************************************************/ + +// Look up a specific hostname. + +function search(store, hostname) { + // Extract TLD + var pos = hostname.lastIndexOf('.'); + var tld, remainder; + if ( pos < 0 ) { + tld = hostname; + remainder = hostname; + } else { + tld = hostname.slice(pos + 1); + remainder = hostname.slice(0, pos); + } + var substore = store[tld]; + if ( !substore ) { + return false; + } + // If substore is a string, use indexOf() + if ( typeof substore === 'string' ) { + return substore.indexOf(' ' + remainder + ' ') >= 0; + } + // It is an array: use binary search. + var l = remainder.length; + var haystack = substore[l]; + if ( !haystack ) { + return false; + } + var left = 0; + var right = Math.floor(haystack.length / l + 0.5); + var i, needle; + while ( left < right ) { + i = left + right >> 1; + needle = haystack.substr( l * i, l ); + if ( remainder < needle ) { + right = i; + } else if ( remainder > needle ) { + left = i + 1; + } else { + return true; + } + } + return false; +} + +/******************************************************************************/ + +// Parse and set a UTF-8 text-based suffix list. Format is same as found at: +// http://publicsuffix.org/list/ +// +// `toAscii` is a converter from unicode to punycode. Required since the +// Public Suffix List contains unicode characters. +// Suggestion: use <https://github.com/bestiejs/punycode.js> it's quite good. + +function parse(text, toAscii) { + exceptions = {}; + rules = {}; + + var lineBeg = 0, lineEnd; + var textEnd = text.length; + var line, store, pos, tld; + + while ( lineBeg < textEnd ) { + lineEnd = text.indexOf('\n', lineBeg); + if ( lineEnd < 0 ) { + lineEnd = text.indexOf('\r', lineBeg); + if ( lineEnd < 0 ) { + lineEnd = textEnd; + } + } + line = text.slice(lineBeg, lineEnd).trim(); + lineBeg = lineEnd + 1; + + if ( line.length === 0 ) { + continue; + } + + // Ignore comments + pos = line.indexOf('//'); + if ( pos >= 0 ) { + line = line.slice(0, pos); + } + + // Ignore surrounding whitespaces + line = line.trim(); + if ( !line ) { + continue; + } + + // Is this an exception rule? + if ( line.charAt(0) === '!' ) { + store = exceptions; + line = line.slice(1); + } else { + store = rules; + } + + if ( mustPunycode.test(line) ) { + line = toAscii(line); + } + + // http://publicsuffix.org/list/: + // "... all rules must be canonicalized in the normal way + // for hostnames - lower-case, Punycode ..." + line = line.toLowerCase(); + + // Extract TLD + pos = line.lastIndexOf('.'); + if ( pos < 0 ) { + tld = line; + } else { + tld = line.slice(pos + 1); + line = line.slice(0, pos); + } + + // Store suffix using tld as key + if ( !store.hasOwnProperty(tld) ) { + store[tld] = []; + } + if ( line ) { + store[tld].push(line); + } + } + crystallize(exceptions); + crystallize(rules); + + callListeners(onChangedListeners); +} + +/******************************************************************************/ + +// Cristallize the storage of suffixes using optimal internal representation +// for future look up. + +function crystallize(store) { + var suffixes, suffix, i, l; + + for ( var tld in store ) { + if ( !store.hasOwnProperty(tld) ) { + continue; + } + suffixes = store[tld].join(' '); + // No suffix + if ( !suffixes ) { + store[tld] = ''; + continue; + } + // Concatenated list of suffixes less than cutoff length: + // Store as string, lookup using indexOf() + if ( suffixes.length < cutoffLength ) { + store[tld] = ' ' + suffixes + ' '; + continue; + } + // Concatenated list of suffixes greater or equal to cutoff length + // Store as array keyed on suffix length, lookup using binary search. + // I borrowed the idea to key on string length here: + // http://ejohn.org/blog/dictionary-lookups-in-javascript/#comment-392072 + + i = store[tld].length; + suffixes = []; + while ( i-- ) { + suffix = store[tld][i]; + l = suffix.length; + if ( !suffixes[l] ) { + suffixes[l] = []; + } + suffixes[l].push(suffix); + } + l = suffixes.length; + while ( l-- ) { + if ( suffixes[l] ) { + suffixes[l] = suffixes[l].sort().join(''); + } + } + store[tld] = suffixes; + } + return store; +} + +/******************************************************************************/ + +function toSelfie() { + return { + magic: selfieMagic, + rules: rules, + exceptions: exceptions + }; +} + +function fromSelfie(selfie) { + if ( typeof selfie !== 'object' || typeof selfie.magic !== 'string' || selfie.magic !== selfieMagic ) { + return false; + } + rules = selfie.rules; + exceptions = selfie.exceptions; + callListeners(onChangedListeners); + return true; +} + +/******************************************************************************/ + +var addListener = function(listeners, callback) { + if ( typeof callback !== 'function' ) { + return; + } + if ( listeners.indexOf(callback) === -1 ) { + listeners.push(callback); + } +}; + +var removeListener = function(listeners, callback) { + var pos = listeners.indexOf(callback); + if ( pos !== -1 ) { + listeners.splice(pos, 1); + } +}; + +var callListeners = function(listeners) { + for ( var i = 0; i < listeners.length; i++ ) { + listeners[i](); + } +}; + +/******************************************************************************/ + +var onChanged = { + addListener: function(callback) { + addListener(onChangedListeners, callback); + }, + removeListener: function(callback) { + removeListener(onChangedListeners, callback); + } +}; + +/******************************************************************************/ + +// Public API + +root = root || window; + +root.publicSuffixList = { + 'version': '1.0', + 'parse': parse, + 'getDomain': getDomain, + 'getPublicSuffix': getPublicSuffix, + 'toSelfie': toSelfie, + 'fromSelfie': fromSelfie, + 'onChanged': onChanged +}; + +/******************************************************************************/ + +})(this); + diff --git a/lib/punycode.js b/lib/punycode.js new file mode 100644 index 0000000..ac68597 --- /dev/null +++ b/lib/punycode.js @@ -0,0 +1,530 @@ +/*! https://mths.be/punycode v1.3.2 by @mathias */ +;(function(root) { + + /** Detect free variables */ + var freeExports = typeof exports == 'object' && exports && + !exports.nodeType && exports; + var freeModule = typeof module == 'object' && module && + !module.nodeType && module; + var freeGlobal = typeof global == 'object' && global; + if ( + freeGlobal.global === freeGlobal || + freeGlobal.window === freeGlobal || + freeGlobal.self === freeGlobal + ) { + root = freeGlobal; + } + + /** + * The `punycode` object. + * @name punycode + * @type Object + */ + var punycode, + + /** Highest positive signed 32-bit float value */ + maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1 + + /** Bootstring parameters */ + base = 36, + tMin = 1, + tMax = 26, + skew = 38, + damp = 700, + initialBias = 72, + initialN = 128, // 0x80 + delimiter = '-', // '\x2D' + + /** Regular expressions */ + regexPunycode = /^xn--/, + regexNonASCII = /[^\x20-\x7E]/, // unprintable ASCII chars + non-ASCII chars + regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g, // RFC 3490 separators + + /** Error messages */ + errors = { + 'overflow': 'Overflow: input needs wider integers to process', + 'not-basic': 'Illegal input >= 0x80 (not a basic code point)', + 'invalid-input': 'Invalid input' + }, + + /** Convenience shortcuts */ + baseMinusTMin = base - tMin, + floor = Math.floor, + stringFromCharCode = String.fromCharCode, + + /** Temporary variable */ + key; + + /*--------------------------------------------------------------------------*/ + + /** + * A generic error utility function. + * @private + * @param {String} type The error type. + * @returns {Error} Throws a `RangeError` with the applicable error message. + */ + function error(type) { + throw RangeError(errors[type]); + } + + /** + * A generic `Array#map` utility function. + * @private + * @param {Array} array The array to iterate over. + * @param {Function} callback The function that gets called for every array + * item. + * @returns {Array} A new array of values returned by the callback function. + */ + function map(array, fn) { + var length = array.length; + var result = []; + while (length--) { + result[length] = fn(array[length]); + } + return result; + } + + /** + * A simple `Array#map`-like wrapper to work with domain name strings or email + * addresses. + * @private + * @param {String} domain The domain name or email address. + * @param {Function} callback The function that gets called for every + * character. + * @returns {Array} A new string of characters returned by the callback + * function. + */ + function mapDomain(string, fn) { + var parts = string.split('@'); + var result = ''; + if (parts.length > 1) { + // In email addresses, only the domain name should be punycoded. Leave + // the local part (i.e. everything up to `@`) intact. + result = parts[0] + '@'; + string = parts[1]; + } + // Avoid `split(regex)` for IE8 compatibility. See #17. + string = string.replace(regexSeparators, '\x2E'); + var labels = string.split('.'); + var encoded = map(labels, fn).join('.'); + return result + encoded; + } + + /** + * Creates an array containing the numeric code points of each Unicode + * character in the string. While JavaScript uses UCS-2 internally, + * this function will convert a pair of surrogate halves (each of which + * UCS-2 exposes as separate characters) into a single code point, + * matching UTF-16. + * @see `punycode.ucs2.encode` + * @see <https://mathiasbynens.be/notes/javascript-encoding> + * @memberOf punycode.ucs2 + * @name decode + * @param {String} string The Unicode input string (UCS-2). + * @returns {Array} The new array of code points. + */ + function ucs2decode(string) { + var output = [], + counter = 0, + length = string.length, + value, + extra; + while (counter < length) { + value = string.charCodeAt(counter++); + if (value >= 0xD800 && value <= 0xDBFF && counter < length) { + // high surrogate, and there is a next character + extra = string.charCodeAt(counter++); + if ((extra & 0xFC00) == 0xDC00) { // low surrogate + output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000); + } else { + // unmatched surrogate; only append this code unit, in case the next + // code unit is the high surrogate of a surrogate pair + output.push(value); + counter--; + } + } else { + output.push(value); + } + } + return output; + } + + /** + * Creates a string based on an array of numeric code points. + * @see `punycode.ucs2.decode` + * @memberOf punycode.ucs2 + * @name encode + * @param {Array} codePoints The array of numeric code points. + * @returns {String} The new Unicode string (UCS-2). + */ + function ucs2encode(array) { + return map(array, function(value) { + var output = ''; + if (value > 0xFFFF) { + value -= 0x10000; + output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800); + value = 0xDC00 | value & 0x3FF; + } + output += stringFromCharCode(value); + return output; + }).join(''); + } + + /** + * Converts a basic code point into a digit/integer. + * @see `digitToBasic()` + * @private + * @param {Number} codePoint The basic numeric code point value. + * @returns {Number} The numeric value of a basic code point (for use in + * representing integers) in the range `0` to `base - 1`, or `base` if + * the code point does not represent a value. + */ + function basicToDigit(codePoint) { + if (codePoint - 48 < 10) { + return codePoint - 22; + } + if (codePoint - 65 < 26) { + return codePoint - 65; + } + if (codePoint - 97 < 26) { + return codePoint - 97; + } + return base; + } + + /** + * Converts a digit/integer into a basic code point. + * @see `basicToDigit()` + * @private + * @param {Number} digit The numeric value of a basic code point. + * @returns {Number} The basic code point whose value (when used for + * representing integers) is `digit`, which needs to be in the range + * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is + * used; else, the lowercase form is used. The behavior is undefined + * if `flag` is non-zero and `digit` has no uppercase form. + */ + function digitToBasic(digit, flag) { + // 0..25 map to ASCII a..z or A..Z + // 26..35 map to ASCII 0..9 + return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5); + } + + /** + * Bias adaptation function as per section 3.4 of RFC 3492. + * http://tools.ietf.org/html/rfc3492#section-3.4 + * @private + */ + function adapt(delta, numPoints, firstTime) { + var k = 0; + delta = firstTime ? floor(delta / damp) : delta >> 1; + delta += floor(delta / numPoints); + for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) { + delta = floor(delta / baseMinusTMin); + } + return floor(k + (baseMinusTMin + 1) * delta / (delta + skew)); + } + + /** + * Converts a Punycode string of ASCII-only symbols to a string of Unicode + * symbols. + * @memberOf punycode + * @param {String} input The Punycode string of ASCII-only symbols. + * @returns {String} The resulting string of Unicode symbols. + */ + function decode(input) { + // Don't use UCS-2 + var output = [], + inputLength = input.length, + out, + i = 0, + n = initialN, + bias = initialBias, + basic, + j, + index, + oldi, + w, + k, + digit, + t, + /** Cached calculation results */ + baseMinusT; + + // Handle the basic code points: let `basic` be the number of input code + // points before the last delimiter, or `0` if there is none, then copy + // the first basic code points to the output. + + basic = input.lastIndexOf(delimiter); + if (basic < 0) { + basic = 0; + } + + for (j = 0; j < basic; ++j) { + // if it's not a basic code point + if (input.charCodeAt(j) >= 0x80) { + error('not-basic'); + } + output.push(input.charCodeAt(j)); + } + + // Main decoding loop: start just after the last delimiter if any basic code + // points were copied; start at the beginning otherwise. + + for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) { + + // `index` is the index of the next character to be consumed. + // Decode a generalized variable-length integer into `delta`, + // which gets added to `i`. The overflow checking is easier + // if we increase `i` as we go, then subtract off its starting + // value at the end to obtain `delta`. + for (oldi = i, w = 1, k = base; /* no condition */; k += base) { + + if (index >= inputLength) { + error('invalid-input'); + } + + digit = basicToDigit(input.charCodeAt(index++)); + + if (digit >= base || digit > floor((maxInt - i) / w)) { + error('overflow'); + } + + i += digit * w; + t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias); + + if (digit < t) { + break; + } + + baseMinusT = base - t; + if (w > floor(maxInt / baseMinusT)) { + error('overflow'); + } + + w *= baseMinusT; + + } + + out = output.length + 1; + bias = adapt(i - oldi, out, oldi == 0); + + // `i` was supposed to wrap around from `out` to `0`, + // incrementing `n` each time, so we'll fix that now: + if (floor(i / out) > maxInt - n) { + error('overflow'); + } + + n += floor(i / out); + i %= out; + + // Insert `n` at position `i` of the output + output.splice(i++, 0, n); + + } + + return ucs2encode(output); + } + + /** + * Converts a string of Unicode symbols (e.g. a domain name label) to a + * Punycode string of ASCII-only symbols. + * @memberOf punycode + * @param {String} input The string of Unicode symbols. + * @returns {String} The resulting Punycode string of ASCII-only symbols. + */ + function encode(input) { + var n, + delta, + handledCPCount, + basicLength, + bias, + j, + m, + q, + k, + t, + currentValue, + output = [], + /** `inputLength` will hold the number of code points in `input`. */ + inputLength, + /** Cached calculation results */ + handledCPCountPlusOne, + baseMinusT, + qMinusT; + + // Convert the input in UCS-2 to Unicode + input = ucs2decode(input); + + // Cache the length + inputLength = input.length; + + // Initialize the state + n = initialN; + delta = 0; + bias = initialBias; + + // Handle the basic code points + for (j = 0; j < inputLength; ++j) { + currentValue = input[j]; + if (currentValue < 0x80) { + output.push(stringFromCharCode(currentValue)); + } + } + + handledCPCount = basicLength = output.length; + + // `handledCPCount` is the number of code points that have been handled; + // `basicLength` is the number of basic code points. + + // Finish the basic string - if it is not empty - with a delimiter + if (basicLength) { + output.push(delimiter); + } + + // Main encoding loop: + while (handledCPCount < inputLength) { + + // All non-basic code points < n have been handled already. Find the next + // larger one: + for (m = maxInt, j = 0; j < inputLength; ++j) { + currentValue = input[j]; + if (currentValue >= n && currentValue < m) { + m = currentValue; + } + } + + // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>, + // but guard against overflow + handledCPCountPlusOne = handledCPCount + 1; + if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) { + error('overflow'); + } + + delta += (m - n) * handledCPCountPlusOne; + n = m; + + for (j = 0; j < inputLength; ++j) { + currentValue = input[j]; + + if (currentValue < n && ++delta > maxInt) { + error('overflow'); + } + + if (currentValue == n) { + // Represent delta as a generalized variable-length integer + for (q = delta, k = base; /* no condition */; k += base) { + t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias); + if (q < t) { + break; + } + qMinusT = q - t; + baseMinusT = base - t; + output.push( + stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0)) + ); + q = floor(qMinusT / baseMinusT); + } + + output.push(stringFromCharCode(digitToBasic(q, 0))); + bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength); + delta = 0; + ++handledCPCount; + } + } + + ++delta; + ++n; + + } + return output.join(''); + } + + /** + * Converts a Punycode string representing a domain name or an email address + * to Unicode. Only the Punycoded parts of the input will be converted, i.e. + * it doesn't matter if you call it on a string that has already been + * converted to Unicode. + * @memberOf punycode + * @param {String} input The Punycoded domain name or email address to + * convert to Unicode. + * @returns {String} The Unicode representation of the given Punycode + * string. + */ + function toUnicode(input) { + return mapDomain(input, function(string) { + return regexPunycode.test(string) + ? decode(string.slice(4).toLowerCase()) + : string; + }); + } + + /** + * Converts a Unicode string representing a domain name or an email address to + * Punycode. Only the non-ASCII parts of the domain name will be converted, + * i.e. it doesn't matter if you call it with a domain that's already in + * ASCII. + * @memberOf punycode + * @param {String} input The domain name or email address to convert, as a + * Unicode string. + * @returns {String} The Punycode representation of the given domain name or + * email address. + */ + function toASCII(input) { + return mapDomain(input, function(string) { + return regexNonASCII.test(string) + ? 'xn--' + encode(string) + : string; + }); + } + + /*--------------------------------------------------------------------------*/ + + /** Define the public API */ + punycode = { + /** + * A string representing the current Punycode.js version number. + * @memberOf punycode + * @type String + */ + 'version': '1.3.2', + /** + * An object of methods to convert from JavaScript's internal character + * representation (UCS-2) to Unicode code points, and back. + * @see <https://mathiasbynens.be/notes/javascript-encoding> + * @memberOf punycode + * @type Object + */ + 'ucs2': { + 'decode': ucs2decode, + 'encode': ucs2encode + }, + 'decode': decode, + 'encode': encode, + 'toASCII': toASCII, + 'toUnicode': toUnicode + }; + + /** Expose `punycode` */ + // Some AMD build optimizers, like r.js, check for specific condition patterns + // like the following: + if ( + typeof define == 'function' && + typeof define.amd == 'object' && + define.amd + ) { + define('punycode', function() { + return punycode; + }); + } else if (freeExports && freeModule) { + if (module.exports == freeExports) { // in Node.js or RingoJS v0.8.0+ + freeModule.exports = punycode; + } else { // in Narwhal or RingoJS v0.7.0- + for (key in punycode) { + punycode.hasOwnProperty(key) && (freeExports[key] = punycode[key]); + } + } + } else { // in Rhino or a web browser + root.punycode = punycode; + } + +}(this)); diff --git a/lib/yamd5.js b/lib/yamd5.js new file mode 100644 index 0000000..582b576 --- /dev/null +++ b/lib/yamd5.js @@ -0,0 +1,402 @@ +/******************************************************************************* + +YaMD5 - Yet another MD5 hasher. +home: https://github.com/gorhill/yamd5.js + +I needed an MD5 hasher, and as usual I want small code base, and fast. + +Originally found md5-o-matic [1]. It was fast but did not work with Unicode +strings. Also, eventually realized it was really based on code from +Joseph Myers [2] with no proper credits given (not nice). + +Then I found SparkMD5 [3], which works with Unicode strings, but at a steep +cost to performance. Also, glancing at the code I saw avoidable redundancies +causing the code base to be much larger than needed. + +So from this point I set out to write my own version, YaMD5 (sorry, I am +not good with naming projects), of course heavily relying on the original +code from Joseph Myers [2], and bits from SparkMD5 -- I started to work from +SparkMD5 implementation, so there might be bits of code original to SparkMD5 +code left in a few places (like say, MD5.end()). + +Advantages of YaMD5: + +- Can handle Unicode strings +- Natively incremental +- Small code base +- Fastest MD5 hasher out there so far for large input [4] +- Even faster than versions supporting only simpler ascii strings + + + [1] https://github.com/trentmillar/md5-o-matic + [2] http://www.myersdaily.org/joseph/javascript/md5-text.html + [3] https://github.com/satazor/SparkMD5 + [4] http://jsperf.com/md5-shootout/75 + +So with that said, I don't know what license covers Joseph Myers' code (need +to find out). In any case, concerning whatever original code I contributed in +there: + +The MIT License (MIT) + +Copyright (C) 2014 Raymond Hill + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +**/ + +/* jshint bitwise: false */ + +(function(root) { + + 'use strict'; + + /* + * Fastest md5 implementation around (JKM md5) + * Credits: Joseph Myers + * + * @see http://www.myersdaily.org/joseph/javascript/md5-text.html + * @see http://jsperf.com/md5-shootout/7 + */ + + // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence + + var md5cycle = function(x, k) { + var a = x[0], + b = x[1], + c = x[2], + d = x[3]; + // ff() + a += (b & c | ~b & d) + k[0] - 680876936 | 0; + a = (a << 7 | a >>> 25) + b | 0; + d += (a & b | ~a & c) + k[1] - 389564586 | 0; + d = (d << 12 | d >>> 20) + a | 0; + c += (d & a | ~d & b) + k[2] + 606105819 | 0; + c = (c << 17 | c >>> 15) + d | 0; + b += (c & d | ~c & a) + k[3] - 1044525330 | 0; + b = (b << 22 | b >>> 10) + c | 0; + a += (b & c | ~b & d) + k[4] - 176418897 | 0; + a = (a << 7 | a >>> 25) + b | 0; + d += (a & b | ~a & c) + k[5] + 1200080426 | 0; + d = (d << 12 | d >>> 20) + a | 0; + c += (d & a | ~d & b) + k[6] - 1473231341 | 0; + c = (c << 17 | c >>> 15) + d | 0; + b += (c & d | ~c & a) + k[7] - 45705983 | 0; + b = (b << 22 | b >>> 10) + c | 0; + a += (b & c | ~b & d) + k[8] + 1770035416 | 0; + a = (a << 7 | a >>> 25) + b | 0; + d += (a & b | ~a & c) + k[9] - 1958414417 | 0; + d = (d << 12 | d >>> 20) + a | 0; + c += (d & a | ~d & b) + k[10] - 42063 | 0; + c = (c << 17 | c >>> 15) + d | 0; + b += (c & d | ~c & a) + k[11] - 1990404162 | 0; + b = (b << 22 | b >>> 10) + c | 0; + a += (b & c | ~b & d) + k[12] + 1804603682 | 0; + a = (a << 7 | a >>> 25) + b | 0; + d += (a & b | ~a & c) + k[13] - 40341101 | 0; + d = (d << 12 | d >>> 20) + a | 0; + c += (d & a | ~d & b) + k[14] - 1502002290 | 0; + c = (c << 17 | c >>> 15) + d | 0; + b += (c & d | ~c & a) + k[15] + 1236535329 | 0; + b = (b << 22 | b >>> 10) + c | 0; + // gg() + a += (b & d | c & ~d) + k[1] - 165796510 | 0; + a = (a << 5 | a >>> 27) + b | 0; + d += (a & c | b & ~c) + k[6] - 1069501632 | 0; + d = (d << 9 | d >>> 23) + a | 0; + c += (d & b | a & ~b) + k[11] + 643717713 | 0; + c = (c << 14 | c >>> 18) + d | 0; + b += (c & a | d & ~a) + k[0] - 373897302 | 0; + b = (b << 20 | b >>> 12) + c | 0; + a += (b & d | c & ~d) + k[5] - 701558691 | 0; + a = (a << 5 | a >>> 27) + b | 0; + d += (a & c | b & ~c) + k[10] + 38016083 | 0; + d = (d << 9 | d >>> 23) + a | 0; + c += (d & b | a & ~b) + k[15] - 660478335 | 0; + c = (c << 14 | c >>> 18) + d | 0; + b += (c & a | d & ~a) + k[4] - 405537848 | 0; + b = (b << 20 | b >>> 12) + c | 0; + a += (b & d | c & ~d) + k[9] + 568446438 | 0; + a = (a << 5 | a >>> 27) + b | 0; + d += (a & c | b & ~c) + k[14] - 1019803690 | 0; + d = (d << 9 | d >>> 23) + a | 0; + c += (d & b | a & ~b) + k[3] - 187363961 | 0; + c = (c << 14 | c >>> 18) + d | 0; + b += (c & a | d & ~a) + k[8] + 1163531501 | 0; + b = (b << 20 | b >>> 12) + c | 0; + a += (b & d | c & ~d) + k[13] - 1444681467 | 0; + a = (a << 5 | a >>> 27) + b | 0; + d += (a & c | b & ~c) + k[2] - 51403784 | 0; + d = (d << 9 | d >>> 23) + a | 0; + c += (d & b | a & ~b) + k[7] + 1735328473 | 0; + c = (c << 14 | c >>> 18) + d | 0; + b += (c & a | d & ~a) + k[12] - 1926607734 | 0; + b = (b << 20 | b >>> 12) + c | 0; + // hh() + a += (b ^ c ^ d) + k[5] - 378558 | 0; + a = (a << 4 | a >>> 28) + b | 0; + d += (a ^ b ^ c) + k[8] - 2022574463 | 0; + d = (d << 11 | d >>> 21) + a | 0; + c += (d ^ a ^ b) + k[11] + 1839030562 | 0; + c = (c << 16 | c >>> 16) + d | 0; + b += (c ^ d ^ a) + k[14] - 35309556 | 0; + b = (b << 23 | b >>> 9) + c | 0; + a += (b ^ c ^ d) + k[1] - 1530992060 | 0; + a = (a << 4 | a >>> 28) + b | 0; + d += (a ^ b ^ c) + k[4] + 1272893353 | 0; + d = (d << 11 | d >>> 21) + a | 0; + c += (d ^ a ^ b) + k[7] - 155497632 | 0; + c = (c << 16 | c >>> 16) + d | 0; + b += (c ^ d ^ a) + k[10] - 1094730640 | 0; + b = (b << 23 | b >>> 9) + c | 0; + a += (b ^ c ^ d) + k[13] + 681279174 | 0; + a = (a << 4 | a >>> 28) + b | 0; + d += (a ^ b ^ c) + k[0] - 358537222 | 0; + d = (d << 11 | d >>> 21) + a | 0; + c += (d ^ a ^ b) + k[3] - 722521979 | 0; + c = (c << 16 | c >>> 16) + d | 0; + b += (c ^ d ^ a) + k[6] + 76029189 | 0; + b = (b << 23 | b >>> 9) + c | 0; + a += (b ^ c ^ d) + k[9] - 640364487 | 0; + a = (a << 4 | a >>> 28) + b | 0; + d += (a ^ b ^ c) + k[12] - 421815835 | 0; + d = (d << 11 | d >>> 21) + a | 0; + c += (d ^ a ^ b) + k[15] + 530742520 | 0; + c = (c << 16 | c >>> 16) + d | 0; + b += (c ^ d ^ a) + k[2] - 995338651 | 0; + b = (b << 23 | b >>> 9) + c | 0; + // ii() + a += (c ^ (b | ~d)) + k[0] - 198630844 | 0; + a = (a << 6 | a >>> 26) + b | 0; + d += (b ^ (a | ~c)) + k[7] + 1126891415 | 0; + d = (d << 10 | d >>> 22) + a | 0; + c += (a ^ (d | ~b)) + k[14] - 1416354905 | 0; + c = (c << 15 | c >>> 17) + d | 0; + b += (d ^ (c | ~a)) + k[5] - 57434055 | 0; + b = (b << 21 |b >>> 11) + c | 0; + a += (c ^ (b | ~d)) + k[12] + 1700485571 | 0; + a = (a << 6 | a >>> 26) + b | 0; + d += (b ^ (a | ~c)) + k[3] - 1894986606 | 0; + d = (d << 10 | d >>> 22) + a | 0; + c += (a ^ (d | ~b)) + k[10] - 1051523 | 0; + c = (c << 15 | c >>> 17) + d | 0; + b += (d ^ (c | ~a)) + k[1] - 2054922799 | 0; + b = (b << 21 |b >>> 11) + c | 0; + a += (c ^ (b | ~d)) + k[8] + 1873313359 | 0; + a = (a << 6 | a >>> 26) + b | 0; + d += (b ^ (a | ~c)) + k[15] - 30611744 | 0; + d = (d << 10 | d >>> 22) + a | 0; + c += (a ^ (d | ~b)) + k[6] - 1560198380 | 0; + c = (c << 15 | c >>> 17) + d | 0; + b += (d ^ (c | ~a)) + k[13] + 1309151649 | 0; + b = (b << 21 |b >>> 11) + c | 0; + a += (c ^ (b | ~d)) + k[4] - 145523070 | 0; + a = (a << 6 | a >>> 26) + b | 0; + d += (b ^ (a | ~c)) + k[11] - 1120210379 | 0; + d = (d << 10 | d >>> 22) + a | 0; + c += (a ^ (d | ~b)) + k[2] + 718787259 | 0; + c = (c << 15 | c >>> 17) + d | 0; + b += (d ^ (c | ~a)) + k[9] - 343485551 | 0; + b = (b << 21 | b >>> 11) + c | 0; + + x[0] = a + x[0] | 0; + x[1] = b + x[1] | 0; + x[2] = c + x[2] | 0; + x[3] = d + x[3] | 0; + }; + + var hexChars = '0123456789abcdef'; + var hexOut = []; + + var hex = function(x) { + var hc = hexChars; + var ho = hexOut; + var n, offset, j; + for (var i = 0; i < 4; i++) { + offset = i * 8; + n = x[i]; + for ( j = 0; j < 8; j += 2 ) { + ho[offset+1+j] = hc.charAt(n & 0x0F); + n >>>= 4; + ho[offset+0+j] = hc.charAt(n & 0x0F); + n >>>= 4; + } + } + return ho.join(''); + }; + + var MD5 = function() { + this._dataLength = 0; + this._state = new Int32Array(4); + this._buffer = new ArrayBuffer(68); + this._bufferLength = 0; + this._buffer8 = new Uint8Array(this._buffer, 0, 68); + this._buffer32 = new Uint32Array(this._buffer, 0, 17); + this.start(); + }; + + var stateIdentity = new Int32Array([1732584193, -271733879, -1732584194, 271733878]); + var buffer32Identity = new Int32Array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + + // Char to code point to to array conversion: + // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/charCodeAt#Example.3A_Fixing_charCodeAt_to_handle_non-Basic-Multilingual-Plane_characters_if_their_presence_earlier_in_the_string_is_unknown + MD5.prototype.appendStr = function(str) { + var buf8 = this._buffer8; + var buf32 = this._buffer32; + var bufLen = this._bufferLength; + var code; + for ( var i = 0; i < str.length; i++ ) { + code = str.charCodeAt(i); + if ( code < 128 ) { + buf8[bufLen++] = code; + } else if ( code < 0x800 ) { + buf8[bufLen++] = (code >>> 6) + 0xC0; + buf8[bufLen++] = code & 0x3F | 0x80; + } else if ( code < 0xD800 || code > 0xDBFF ) { + buf8[bufLen++] = (code >>> 12) + 0xE0; + buf8[bufLen++] = (code >>> 6 & 0x3F) | 0x80; + buf8[bufLen++] = (code & 0x3F) | 0x80; + } else { + code = ((code - 0xD800) * 0x400) + (str.charCodeAt(++i) - 0xDC00) + 0x10000; + if ( code > 0x10FFFF ) { + throw 'Unicode standard supports code points up to U+10FFFF'; + } + buf8[bufLen++] = (code >>> 18) + 0xF0; + buf8[bufLen++] = (code >>> 12 & 0x3F) | 0x80; + buf8[bufLen++] = (code >>> 6 & 0x3F) | 0x80; + buf8[bufLen++] = (code & 0x3F) | 0x80; + } + if ( bufLen >= 64 ) { + this._dataLength += 64; + md5cycle(this._state, buf32); + bufLen -= 64; + buf32[0] = buf32[16]; + } + } + this._bufferLength = bufLen; + return this; + }; + + MD5.prototype.appendAsciiStr = function(str) { + var buf8 = this._buffer8; + var buf32 = this._buffer32; + var bufLen = this._bufferLength; + var i, j = 0; + for (;;) { + i = Math.min(str.length-j, 64-bufLen); + while ( i-- ) { + buf8[bufLen++] = str.charCodeAt(j++); + } + if ( bufLen < 64 ) { + break; + } + this._dataLength += 64; + md5cycle(this._state, buf32); + bufLen = 0; + } + this._bufferLength = bufLen; + return this; + }; + + MD5.prototype.appendByteArray = function(input) { + var buf8 = this._buffer8; + var buf32 = this._buffer32; + var bufLen = this._bufferLength; + var i, j = 0; + for (;;) { + i = Math.min(input.length-j, 64-bufLen); + while ( i-- ) { + buf8[bufLen++] = input[j++]; + } + if ( bufLen < 64 ) { + break; + } + this._dataLength += 64; + md5cycle(this._state, buf32); + bufLen = 0; + } + this._bufferLength = bufLen; + return this; + }; + + MD5.prototype.start = function() { + this._dataLength = 0; + this._bufferLength = 0; + this._state.set(stateIdentity); + return this; + }; + + MD5.prototype.end = function(raw) { + var bufLen = this._bufferLength; + this._dataLength += bufLen; + var buf8 = this._buffer8; + buf8[bufLen] = 0x80; + buf8[bufLen+1] = buf8[bufLen+2] = buf8[bufLen+3] = 0; + var buf32 = this._buffer32; + var i = (bufLen >> 2) + 1; + buf32.set(buffer32Identity.subarray(i), i); + if (bufLen > 55) { + md5cycle(this._state, buf32); + buf32.set(buffer32Identity); + } + // Do the final computation based on the tail and length + // Beware that the final length may not fit in 32 bits so we take care of that + var dataBitsLen = this._dataLength * 8; + if ( dataBitsLen <= 0xFFFFFFFF ) { + buf32[14] = dataBitsLen; + } else { + var matches = dataBitsLen.toString(16).match(/(.*?)(.{0,8})$/); + var lo = parseInt(matches[2], 16); + var hi = parseInt(matches[1], 16) || 0; + buf32[14] = lo; + buf32[15] = hi; + } + md5cycle(this._state, buf32); + + return !!raw ? this._state : hex(this._state); + }; + + // This permanent instance is to use for one-call hashing + var onePassHasher = new MD5(); + + MD5.hashStr = function(str, raw) { + return onePassHasher + .start() + .appendStr(str) + .end(raw); + }; + + MD5.hashAsciiStr = function(str, raw) { + return onePassHasher + .start() + .appendAsciiStr(str) + .end(raw); + }; + + // Self-test + // In some cases the fast add32 function cannot be used.. + if ( MD5.hashStr('hello') !== '5d41402abc4b2a76b9719d911017c592' ) { + console.error('YaMD5> this javascript engine does not support YaMD5. Sorry.'); + } + + if ( typeof root === 'object' ) { + root.YaMD5 = MD5; + } + return MD5; +})(this); |