diff options
Diffstat (limited to 'js/uritools.js')
-rw-r--r-- | js/uritools.js | 883 |
1 files changed, 441 insertions, 442 deletions
diff --git a/js/uritools.js b/js/uritools.js index 88d77de..d60af04 100644 --- a/js/uritools.js +++ b/js/uritools.js @@ -37,502 +37,501 @@ Naming convention from https://en.wikipedia.org/wiki/URI_scheme#Examples ηMatrix.URI = (function() { -/******************************************************************************/ + /******************************************************************************/ + + // Favorite regex tool: http://regex101.com/ + + // Ref: <http://tools.ietf.org/html/rfc3986#page-50> + // I removed redundant capture groups: capture less = peform faster. See + // <http://jsperf.com/old-uritools-vs-new-uritools> + // Performance improvements welcomed. + // jsperf: <http://jsperf.com/old-uritools-vs-new-uritools> + var reRFC3986 = /^([^:\/?#]+:)?(\/\/[^\/?#]*)?([^?#]*)(\?[^#]*)?(#.*)?/; + + // Derived + var reSchemeFromURI = /^[^:\/?#]+:/; + var reAuthorityFromURI = /^(?:[^:\/?#]+:)?(\/\/[^\/?#]+)/; + var reOriginFromURI = /^(?:[^:\/?#]+:)?(?:\/\/[^\/?#]+)/; + var reCommonHostnameFromURL = /^https?:\/\/([0-9a-z_][0-9a-z._-]*[0-9a-z])\//; + var rePathFromURI = /^(?:[^:\/?#]+:)?(?:\/\/[^\/?#]*)?([^?#]*)/; + var reMustNormalizeHostname = /[^0-9a-z._-]/; + + // These are to parse authority field, not parsed by above official regex + // IPv6 is seen as an exception: a non-compatible IPv6 is first tried, and + // if it fails, the IPv6 compatible regex istr used. This helps + // peformance by avoiding the use of a too complicated regex first. + + // https://github.com/gorhill/httpswitchboard/issues/211 + // "While a hostname may not contain other characters, such as the + // "underscore character (_), other DNS names may contain the underscore" + var reHostPortFromAuthority = /^(?:[^@]*@)?([^:]*)(:\d*)?$/; + var reIPv6PortFromAuthority = /^(?:[^@]*@)?(\[[0-9a-f:]*\])(:\d*)?$/i; + + var reHostFromNakedAuthority = /^[0-9a-z._-]+[0-9a-z]$/i; + var reHostFromAuthority = /^(?:[^@]*@)?([^:]+)(?::\d*)?$/; + var reIPv6FromAuthority = /^(?:[^@]*@)?(\[[0-9a-f:]+\])(?::\d*)?$/i; + + // Coarse (but fast) tests + var reValidHostname = /^([a-z\d]+(-*[a-z\d]+)*)(\.[a-z\d]+(-*[a-z\d])*)*$/; + var reIPAddressNaive = /^\d+\.\d+\.\d+\.\d+$|^\[[\da-zA-Z:]+\]$/; + + // Accurate tests + // Source.: http://stackoverflow.com/questions/5284147/validating-ipv4-addresses-with-regexp/5284410#5284410 + //var reIPv4 = /^((25[0-5]|2[0-4]\d|[01]?\d\d?)(\.|$)){4}/; + + // Source: http://forums.intermapper.com/viewtopic.php?p=1096#1096 + //var reIPv6 = /^\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?\s*$/; + + /******************************************************************************/ + + var reset = function(o) { + o.scheme = ''; + o.hostname = ''; + o._ipv4 = undefined; + o._ipv6 = undefined; + o.port = ''; + o.path = ''; + o.query = ''; + o.fragment = ''; + return o; + }; + + var resetAuthority = function(o) { + o.hostname = ''; + o._ipv4 = undefined; + o._ipv6 = undefined; + o.port = ''; + return o; + }; + + /******************************************************************************/ + + // This will be exported + + var URI = { + scheme: '', + authority: '', + hostname: '', + _ipv4: undefined, + _ipv6: undefined, + port: '', + domain: undefined, + path: '', + query: '', + fragment: '', + schemeBit: (1 << 0), + userBit: (1 << 1), + passwordBit: (1 << 2), + hostnameBit: (1 << 3), + portBit: (1 << 4), + pathBit: (1 << 5), + queryBit: (1 << 6), + fragmentBit: (1 << 7), + allBits: (0xFFFF) + }; + + URI.authorityBit = (URI.userBit | URI.passwordBit | URI.hostnameBit | URI.portBit); + URI.normalizeBits = (URI.schemeBit | URI.hostnameBit | URI.pathBit | URI.queryBit); + + /******************************************************************************/ + + // See: https://en.wikipedia.org/wiki/URI_scheme#Examples + // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + // + // foo://example.com:8042/over/there?name=ferret#nose + // \_/ \______________/\_________/ \_________/ \__/ + // | | | | | + // scheme authority path query fragment + // | _____________________|__ + // / \ / \ + // urn:example:animal:ferret:nose + + URI.set = function(uri) { + if ( uri === undefined ) { + return reset(URI); + } + var matches = reRFC3986.exec(uri); + if ( !matches ) { + return reset(URI); + } + this.scheme = matches[1] !== undefined ? matches[1].slice(0, -1) : ''; + this.authority = matches[2] !== undefined ? matches[2].slice(2).toLowerCase() : ''; + this.path = matches[3] !== undefined ? matches[3] : ''; + + // <http://tools.ietf.org/html/rfc3986#section-6.2.3> + // "In general, a URI that uses the generic syntax for authority + // "with an empty path should be normalized to a path of '/'." + if ( this.authority !== '' && this.path === '' ) { + this.path = '/'; + } + this.query = matches[4] !== undefined ? matches[4].slice(1) : ''; + this.fragment = matches[5] !== undefined ? matches[5].slice(1) : ''; + + // Assume very simple authority, i.e. just a hostname (highest likelihood + // case for ηMatrix) + if ( reHostFromNakedAuthority.test(this.authority) ) { + this.hostname = this.authority; + this.port = ''; + return this; + } + // Authority contains more than just a hostname + matches = reHostPortFromAuthority.exec(this.authority); + if ( !matches ) { + matches = reIPv6PortFromAuthority.exec(this.authority); + if ( !matches ) { + return resetAuthority(URI); + } + } + this.hostname = matches[1] !== undefined ? matches[1] : ''; + // http://en.wikipedia.org/wiki/FQDN + if ( this.hostname.slice(-1) === '.' ) { + this.hostname = this.hostname.slice(0, -1); + } + this.port = matches[2] !== undefined ? matches[2].slice(1) : ''; + return this; + }; + + /******************************************************************************/ + + // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + // + // foo://example.com:8042/over/there?name=ferret#nose + // \_/ \______________/\_________/ \_________/ \__/ + // | | | | | + // scheme authority path query fragment + // | _____________________|__ + // / \ / \ + // urn:example:animal:ferret:nose + + URI.assemble = function(bits) { + if ( bits === undefined ) { + bits = this.allBits; + } + var s = []; + if ( this.scheme && (bits & this.schemeBit) ) { + s.push(this.scheme, ':'); + } + if ( this.hostname && (bits & this.hostnameBit) ) { + s.push('//', this.hostname); + } + if ( this.port && (bits & this.portBit) ) { + s.push(':', this.port); + } + if ( this.path && (bits & this.pathBit) ) { + s.push(this.path); + } + if ( this.query && (bits & this.queryBit) ) { + s.push('?', this.query); + } + if ( this.fragment && (bits & this.fragmentBit) ) { + s.push('#', this.fragment); + } + return s.join(''); + }; -// Favorite regex tool: http://regex101.com/ - -// Ref: <http://tools.ietf.org/html/rfc3986#page-50> -// I removed redundant capture groups: capture less = peform faster. See -// <http://jsperf.com/old-uritools-vs-new-uritools> -// Performance improvements welcomed. -// jsperf: <http://jsperf.com/old-uritools-vs-new-uritools> -var reRFC3986 = /^([^:\/?#]+:)?(\/\/[^\/?#]*)?([^?#]*)(\?[^#]*)?(#.*)?/; - -// Derived -var reSchemeFromURI = /^[^:\/?#]+:/; -var reAuthorityFromURI = /^(?:[^:\/?#]+:)?(\/\/[^\/?#]+)/; -var reOriginFromURI = /^(?:[^:\/?#]+:)?(?:\/\/[^\/?#]+)/; -var reCommonHostnameFromURL = /^https?:\/\/([0-9a-z_][0-9a-z._-]*[0-9a-z])\//; -var rePathFromURI = /^(?:[^:\/?#]+:)?(?:\/\/[^\/?#]*)?([^?#]*)/; -var reMustNormalizeHostname = /[^0-9a-z._-]/; - -// These are to parse authority field, not parsed by above official regex -// IPv6 is seen as an exception: a non-compatible IPv6 is first tried, and -// if it fails, the IPv6 compatible regex istr used. This helps -// peformance by avoiding the use of a too complicated regex first. - -// https://github.com/gorhill/httpswitchboard/issues/211 -// "While a hostname may not contain other characters, such as the -// "underscore character (_), other DNS names may contain the underscore" -var reHostPortFromAuthority = /^(?:[^@]*@)?([^:]*)(:\d*)?$/; -var reIPv6PortFromAuthority = /^(?:[^@]*@)?(\[[0-9a-f:]*\])(:\d*)?$/i; - -var reHostFromNakedAuthority = /^[0-9a-z._-]+[0-9a-z]$/i; -var reHostFromAuthority = /^(?:[^@]*@)?([^:]+)(?::\d*)?$/; -var reIPv6FromAuthority = /^(?:[^@]*@)?(\[[0-9a-f:]+\])(?::\d*)?$/i; - -// Coarse (but fast) tests -var reValidHostname = /^([a-z\d]+(-*[a-z\d]+)*)(\.[a-z\d]+(-*[a-z\d])*)*$/; -var reIPAddressNaive = /^\d+\.\d+\.\d+\.\d+$|^\[[\da-zA-Z:]+\]$/; - -// Accurate tests -// Source.: http://stackoverflow.com/questions/5284147/validating-ipv4-addresses-with-regexp/5284410#5284410 -//var reIPv4 = /^((25[0-5]|2[0-4]\d|[01]?\d\d?)(\.|$)){4}/; - -// Source: http://forums.intermapper.com/viewtopic.php?p=1096#1096 -//var reIPv6 = /^\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?\s*$/; + /******************************************************************************/ -/******************************************************************************/ + URI.originFromURI = function(uri) { + var matches = reOriginFromURI.exec(uri); + return matches !== null ? matches[0].toLowerCase() : ''; + }; -var reset = function(o) { - o.scheme = ''; - o.hostname = ''; - o._ipv4 = undefined; - o._ipv6 = undefined; - o.port = ''; - o.path = ''; - o.query = ''; - o.fragment = ''; - return o; -}; - -var resetAuthority = function(o) { - o.hostname = ''; - o._ipv4 = undefined; - o._ipv6 = undefined; - o.port = ''; - return o; -}; + /******************************************************************************/ -/******************************************************************************/ + URI.schemeFromURI = function(uri) { + var matches = reSchemeFromURI.exec(uri); + if ( matches === null ) { + return ''; + } + return matches[0].slice(0, -1).toLowerCase(); + }; -// This will be exported - -var URI = { - scheme: '', - authority: '', - hostname: '', - _ipv4: undefined, - _ipv6: undefined, - port: '', - domain: undefined, - path: '', - query: '', - fragment: '', - schemeBit: (1 << 0), - userBit: (1 << 1), - passwordBit: (1 << 2), - hostnameBit: (1 << 3), - portBit: (1 << 4), - pathBit: (1 << 5), - queryBit: (1 << 6), - fragmentBit: (1 << 7), - allBits: (0xFFFF) -}; - -URI.authorityBit = (URI.userBit | URI.passwordBit | URI.hostnameBit | URI.portBit); -URI.normalizeBits = (URI.schemeBit | URI.hostnameBit | URI.pathBit | URI.queryBit); + /******************************************************************************/ -/******************************************************************************/ + URI.isNetworkScheme = function(scheme) { + return this.reNetworkScheme.test(scheme); + }; -// See: https://en.wikipedia.org/wiki/URI_scheme#Examples -// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] -// -// foo://example.com:8042/over/there?name=ferret#nose -// \_/ \______________/\_________/ \_________/ \__/ -// | | | | | -// scheme authority path query fragment -// | _____________________|__ -// / \ / \ -// urn:example:animal:ferret:nose - -URI.set = function(uri) { - if ( uri === undefined ) { - return reset(URI); - } - var matches = reRFC3986.exec(uri); - if ( !matches ) { - return reset(URI); - } - this.scheme = matches[1] !== undefined ? matches[1].slice(0, -1) : ''; - this.authority = matches[2] !== undefined ? matches[2].slice(2).toLowerCase() : ''; - this.path = matches[3] !== undefined ? matches[3] : ''; - - // <http://tools.ietf.org/html/rfc3986#section-6.2.3> - // "In general, a URI that uses the generic syntax for authority - // "with an empty path should be normalized to a path of '/'." - if ( this.authority !== '' && this.path === '' ) { - this.path = '/'; - } - this.query = matches[4] !== undefined ? matches[4].slice(1) : ''; - this.fragment = matches[5] !== undefined ? matches[5].slice(1) : ''; - - // Assume very simple authority, i.e. just a hostname (highest likelihood - // case for ηMatrix) - if ( reHostFromNakedAuthority.test(this.authority) ) { - this.hostname = this.authority; - this.port = ''; - return this; - } - // Authority contains more than just a hostname - matches = reHostPortFromAuthority.exec(this.authority); - if ( !matches ) { - matches = reIPv6PortFromAuthority.exec(this.authority); - if ( !matches ) { - return resetAuthority(URI); - } - } - this.hostname = matches[1] !== undefined ? matches[1] : ''; - // http://en.wikipedia.org/wiki/FQDN - if ( this.hostname.slice(-1) === '.' ) { - this.hostname = this.hostname.slice(0, -1); - } - this.port = matches[2] !== undefined ? matches[2].slice(1) : ''; - return this; -}; + URI.reNetworkScheme = /^(?:https?|wss?|ftps?)\b/; -/******************************************************************************/ + /******************************************************************************/ -// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] -// -// foo://example.com:8042/over/there?name=ferret#nose -// \_/ \______________/\_________/ \_________/ \__/ -// | | | | | -// scheme authority path query fragment -// | _____________________|__ -// / \ / \ -// urn:example:animal:ferret:nose - -URI.assemble = function(bits) { - if ( bits === undefined ) { - bits = this.allBits; - } - var s = []; - if ( this.scheme && (bits & this.schemeBit) ) { - s.push(this.scheme, ':'); - } - if ( this.hostname && (bits & this.hostnameBit) ) { - s.push('//', this.hostname); - } - if ( this.port && (bits & this.portBit) ) { - s.push(':', this.port); - } - if ( this.path && (bits & this.pathBit) ) { - s.push(this.path); - } - if ( this.query && (bits & this.queryBit) ) { - s.push('?', this.query); - } - if ( this.fragment && (bits & this.fragmentBit) ) { - s.push('#', this.fragment); - } - return s.join(''); -}; + URI.isSecureScheme = function(scheme) { + return this.reSecureScheme.test(scheme); + }; -/******************************************************************************/ + URI.reSecureScheme = /^(?:https|wss|ftps)\b/; -URI.originFromURI = function(uri) { - var matches = reOriginFromURI.exec(uri); - return matches !== null ? matches[0].toLowerCase() : ''; -}; + /******************************************************************************/ -/******************************************************************************/ + URI.authorityFromURI = function(uri) { + var matches = reAuthorityFromURI.exec(uri); + if ( !matches ) { + return ''; + } + return matches[1].slice(2).toLowerCase(); + }; -URI.schemeFromURI = function(uri) { - var matches = reSchemeFromURI.exec(uri); - if ( matches === null ) { - return ''; - } - return matches[0].slice(0, -1).toLowerCase(); -}; + /******************************************************************************/ -/******************************************************************************/ + // The most used function, so it better be fast. -URI.isNetworkScheme = function(scheme) { - return this.reNetworkScheme.test(scheme); -}; + // https://github.com/gorhill/uBlock/issues/1559 + // See http://en.wikipedia.org/wiki/FQDN + // https://bugzilla.mozilla.org/show_bug.cgi?id=1360285 + // Revisit punycode dependency when above issue is fixed in Firefox. -URI.reNetworkScheme = /^(?:https?|wss?|ftps?)\b/; + URI.hostnameFromURI = function(uri) { + var matches = reCommonHostnameFromURL.exec(uri); + if ( matches !== null ) { return matches[1]; } + matches = reAuthorityFromURI.exec(uri); + if ( matches === null ) { return ''; } + var authority = matches[1].slice(2); + // Assume very simple authority (most common case for ηBlock) + if ( reHostFromNakedAuthority.test(authority) ) { + return authority.toLowerCase(); + } + matches = reHostFromAuthority.exec(authority); + if ( matches === null ) { + matches = reIPv6FromAuthority.exec(authority); + if ( matches === null ) { return ''; } + } + var hostname = matches[1]; + while ( hostname.endsWith('.') ) { + hostname = hostname.slice(0, -1); + } + if ( reMustNormalizeHostname.test(hostname) ) { + hostname = punycode.toASCII(hostname.toLowerCase()); + } + return hostname; + }; -/******************************************************************************/ + /******************************************************************************/ -URI.isSecureScheme = function(scheme) { - return this.reSecureScheme.test(scheme); -}; + URI.domainFromHostname = function(hostname) { + // Try to skip looking up the PSL database + var entry = domainCache.get(hostname); + if ( entry !== undefined ) { + entry.tstamp = Date.now(); + return entry.domain; + } + // Meh.. will have to search it + if ( reIPAddressNaive.test(hostname) === false ) { + return domainCacheAdd(hostname, psl.getDomain(hostname)); + } + return domainCacheAdd(hostname, hostname); + }; -URI.reSecureScheme = /^(?:https|wss|ftps)\b/; + URI.domain = function() { + return this.domainFromHostname(this.hostname); + }; -/******************************************************************************/ + // It is expected that there is higher-scoped `publicSuffixList` lingering + // somewhere. Cache it. See <https://github.com/gorhill/publicsuffixlist.js>. + var psl = publicSuffixList; -URI.authorityFromURI = function(uri) { - var matches = reAuthorityFromURI.exec(uri); - if ( !matches ) { - return ''; - } - return matches[1].slice(2).toLowerCase(); -}; + /******************************************************************************/ -/******************************************************************************/ + URI.pathFromURI = function(uri) { + var matches = rePathFromURI.exec(uri); + return matches !== null ? matches[1] : ''; + }; -// The most used function, so it better be fast. - -// https://github.com/gorhill/uBlock/issues/1559 -// See http://en.wikipedia.org/wiki/FQDN -// https://bugzilla.mozilla.org/show_bug.cgi?id=1360285 -// Revisit punycode dependency when above issue is fixed in Firefox. - -URI.hostnameFromURI = function(uri) { - var matches = reCommonHostnameFromURL.exec(uri); - if ( matches !== null ) { return matches[1]; } - matches = reAuthorityFromURI.exec(uri); - if ( matches === null ) { return ''; } - var authority = matches[1].slice(2); - // Assume very simple authority (most common case for ηBlock) - if ( reHostFromNakedAuthority.test(authority) ) { - return authority.toLowerCase(); - } - matches = reHostFromAuthority.exec(authority); - if ( matches === null ) { - matches = reIPv6FromAuthority.exec(authority); - if ( matches === null ) { return ''; } - } - var hostname = matches[1]; - while ( hostname.endsWith('.') ) { - hostname = hostname.slice(0, -1); - } - if ( reMustNormalizeHostname.test(hostname) ) { - hostname = punycode.toASCII(hostname.toLowerCase()); - } - return hostname; -}; + /******************************************************************************/ -/******************************************************************************/ + // Trying to alleviate the worries of looking up too often the domain name from + // a hostname. With a cache, uBlock benefits given that it deals with a + // specific set of hostnames within a narrow time span -- in other words, I + // believe probability of cache hit are high in uBlock. -URI.domainFromHostname = function(hostname) { - // Try to skip looking up the PSL database - var entry = domainCache.get(hostname); - if ( entry !== undefined ) { - entry.tstamp = Date.now(); - return entry.domain; - } - // Meh.. will have to search it - if ( reIPAddressNaive.test(hostname) === false ) { - return domainCacheAdd(hostname, psl.getDomain(hostname)); - } - return domainCacheAdd(hostname, hostname); -}; - -URI.domain = function() { - return this.domainFromHostname(this.hostname); -}; - -// It is expected that there is higher-scoped `publicSuffixList` lingering -// somewhere. Cache it. See <https://github.com/gorhill/publicsuffixlist.js>. -var psl = publicSuffixList; + var domainCache = new Map(); + var domainCacheCountLowWaterMark = 75; + var domainCacheCountHighWaterMark = 100; + var domainCacheEntryJunkyard = []; + var domainCacheEntryJunkyardMax = domainCacheCountHighWaterMark - domainCacheCountLowWaterMark; -/******************************************************************************/ + var DomainCacheEntry = function(domain) { + this.init(domain); + }; -URI.pathFromURI = function(uri) { - var matches = rePathFromURI.exec(uri); - return matches !== null ? matches[1] : ''; -}; - -/******************************************************************************/ + DomainCacheEntry.prototype.init = function(domain) { + this.domain = domain; + this.tstamp = Date.now(); + return this; + }; - // Trying to alleviate the worries of looking up too often the domain name from -// a hostname. With a cache, uBlock benefits given that it deals with a -// specific set of hostnames within a narrow time span -- in other words, I -// believe probability of cache hit are high in uBlock. - -var domainCache = new Map(); -var domainCacheCountLowWaterMark = 75; -var domainCacheCountHighWaterMark = 100; -var domainCacheEntryJunkyard = []; -var domainCacheEntryJunkyardMax = domainCacheCountHighWaterMark - domainCacheCountLowWaterMark; - -var DomainCacheEntry = function(domain) { - this.init(domain); -}; - -DomainCacheEntry.prototype.init = function(domain) { - this.domain = domain; - this.tstamp = Date.now(); - return this; -}; - -DomainCacheEntry.prototype.dispose = function() { - this.domain = ''; - if ( domainCacheEntryJunkyard.length < domainCacheEntryJunkyardMax ) { - domainCacheEntryJunkyard.push(this); - } -}; - -var domainCacheEntryFactory = function(domain) { - var entry = domainCacheEntryJunkyard.pop(); - if ( entry ) { - return entry.init(domain); - } - return new DomainCacheEntry(domain); -}; - -var domainCacheAdd = function(hostname, domain) { - var entry = domainCache.get(hostname); - if ( entry !== undefined ) { - entry.tstamp = Date.now(); - } else { - domainCache.set(hostname, domainCacheEntryFactory(domain)); - if ( domainCache.size === domainCacheCountHighWaterMark ) { - domainCachePrune(); - } - } - return domain; -}; - -var domainCacheEntrySort = function(a, b) { - return domainCache.get(b).tstamp - domainCache.get(a).tstamp; -}; - -var domainCachePrune = function() { - var hostnames = Array.from(domainCache.keys()) - .sort(domainCacheEntrySort) - .slice(domainCacheCountLowWaterMark); - var i = hostnames.length; - var hostname; - while ( i-- ) { - hostname = hostnames[i]; - domainCache.get(hostname).dispose(); - domainCache.delete(hostname); - } -}; - -var domainCacheReset = function() { - domainCache.clear(); -}; - -psl.onChanged.addListener(domainCacheReset); + DomainCacheEntry.prototype.dispose = function() { + this.domain = ''; + if ( domainCacheEntryJunkyard.length < domainCacheEntryJunkyardMax ) { + domainCacheEntryJunkyard.push(this); + } + }; -/******************************************************************************/ + var domainCacheEntryFactory = function(domain) { + var entry = domainCacheEntryJunkyard.pop(); + if ( entry ) { + return entry.init(domain); + } + return new DomainCacheEntry(domain); + }; + + var domainCacheAdd = function(hostname, domain) { + var entry = domainCache.get(hostname); + if ( entry !== undefined ) { + entry.tstamp = Date.now(); + } else { + domainCache.set(hostname, domainCacheEntryFactory(domain)); + if ( domainCache.size === domainCacheCountHighWaterMark ) { + domainCachePrune(); + } + } + return domain; + }; + + var domainCacheEntrySort = function(a, b) { + return domainCache.get(b).tstamp - domainCache.get(a).tstamp; + }; + + var domainCachePrune = function() { + var hostnames = Array.from(domainCache.keys()) + .sort(domainCacheEntrySort) + .slice(domainCacheCountLowWaterMark); + var i = hostnames.length; + var hostname; + while ( i-- ) { + hostname = hostnames[i]; + domainCache.get(hostname).dispose(); + domainCache.delete(hostname); + } + }; -URI.domainFromURI = function(uri) { - if ( !uri ) { - return ''; - } - return this.domainFromHostname(this.hostnameFromURI(uri)); -}; + var domainCacheReset = function() { + domainCache.clear(); + }; -/******************************************************************************/ + psl.onChanged.addListener(domainCacheReset); -// Normalize the way ηMatrix expects it + /******************************************************************************/ -URI.normalizedURI = function() { - // Will be removed: - // - port - // - user id/password - // - fragment - return this.assemble(this.normalizeBits); -}; + URI.domainFromURI = function(uri) { + if ( !uri ) { + return ''; + } + return this.domainFromHostname(this.hostnameFromURI(uri)); + }; -/******************************************************************************/ + /******************************************************************************/ -URI.rootURL = function() { - if ( !this.hostname ) { - return ''; - } - return this.assemble(this.schemeBit | this.hostnameBit); -}; + // Normalize the way ηMatrix expects it -/******************************************************************************/ + URI.normalizedURI = function() { + // Will be removed: + // - port + // - user id/password + // - fragment + return this.assemble(this.normalizeBits); + }; -URI.isValidHostname = function(hostname) { - var r; - try { - r = reValidHostname.test(hostname); - } - catch (e) { - return false; - } - return r; -}; + /******************************************************************************/ -/******************************************************************************/ + URI.rootURL = function() { + if ( !this.hostname ) { + return ''; + } + return this.assemble(this.schemeBit | this.hostnameBit); + }; -// Return the parent domain. For IP address, there is no parent domain. + /******************************************************************************/ -URI.parentHostnameFromHostname = function(hostname) { - // `locahost` => `` - // `example.org` => `example.org` - // `www.example.org` => `example.org` - // `tomato.www.example.org` => `example.org` - var domain = this.domainFromHostname(hostname); + URI.isValidHostname = function(hostname) { + var r; + try { + r = reValidHostname.test(hostname); + } + catch (e) { + return false; + } + return r; + }; + + /******************************************************************************/ + + // Return the parent domain. For IP address, there is no parent domain. + + URI.parentHostnameFromHostname = function(hostname) { + // `locahost` => `` + // `example.org` => `example.org` + // `www.example.org` => `example.org` + // `tomato.www.example.org` => `example.org` + var domain = this.domainFromHostname(hostname); + + // `locahost` === `` => bye + // `example.org` === `example.org` => bye + // `www.example.org` !== `example.org` => stay + // `tomato.www.example.org` !== `example.org` => stay + if ( domain === '' || domain === hostname ) { + return undefined; + } - // `locahost` === `` => bye - // `example.org` === `example.org` => bye - // `www.example.org` !== `example.org` => stay - // `tomato.www.example.org` !== `example.org` => stay - if ( domain === '' || domain === hostname ) { - return undefined; - } + // Parent is hostname minus first label + return hostname.slice(hostname.indexOf('.') + 1); + }; - // Parent is hostname minus first label - return hostname.slice(hostname.indexOf('.') + 1); -}; + /******************************************************************************/ -/******************************************************************************/ + // Return all possible parent hostnames which can be derived from `hostname`, + // ordered from direct parent up to domain inclusively. -// Return all possible parent hostnames which can be derived from `hostname`, -// ordered from direct parent up to domain inclusively. - -URI.parentHostnamesFromHostname = function(hostname) { - // TODO: I should create an object which is optimized to receive - // the list of hostnames by making it reusable (junkyard etc.) and which - // has its own element counter property in order to avoid memory - // alloc/dealloc. - var domain = this.domainFromHostname(hostname); - if ( domain === '' || domain === hostname ) { - return []; - } - var nodes = []; - var pos; - for (;;) { - pos = hostname.indexOf('.'); - if ( pos < 0 ) { - break; - } - hostname = hostname.slice(pos + 1); - nodes.push(hostname); - if ( hostname === domain ) { - break; - } - } - return nodes; -}; + URI.parentHostnamesFromHostname = function(hostname) { + // TODO: I should create an object which is optimized to receive + // the list of hostnames by making it reusable (junkyard etc.) and which + // has its own element counter property in order to avoid memory + // alloc/dealloc. + var domain = this.domainFromHostname(hostname); + if ( domain === '' || domain === hostname ) { + return []; + } + var nodes = []; + var pos; + for (;;) { + pos = hostname.indexOf('.'); + if ( pos < 0 ) { + break; + } + hostname = hostname.slice(pos + 1); + nodes.push(hostname); + if ( hostname === domain ) { + break; + } + } + return nodes; + }; -/******************************************************************************/ + /******************************************************************************/ -// Return all possible hostnames which can be derived from `hostname`, -// ordered from self up to domain inclusively. + // Return all possible hostnames which can be derived from `hostname`, + // ordered from self up to domain inclusively. -URI.allHostnamesFromHostname = function(hostname) { - var nodes = this.parentHostnamesFromHostname(hostname); - nodes.unshift(hostname); - return nodes; -}; + URI.allHostnamesFromHostname = function(hostname) { + var nodes = this.parentHostnamesFromHostname(hostname); + nodes.unshift(hostname); + return nodes; + }; -/******************************************************************************/ + /******************************************************************************/ -URI.toString = function() { - return this.assemble(); -}; + URI.toString = function() { + return this.assemble(); + }; -/******************************************************************************/ + /******************************************************************************/ -// Export + // Export -return URI; + return URI; -/******************************************************************************/ + /******************************************************************************/ })(); /******************************************************************************/ - |