/******************************************************************************* ηMatrix - a browser extension to black/white list requests. Copyright (C) 2014-2019 Raymond Hill Copyright (C) 2019 Alessio Vanni This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see {http://www.gnu.org/licenses/}. Home: https://libregit.org/heckyel/ematrix uMatrix Home: https://github.com/gorhill/uMatrix */ 'use strict'; Components.utils.import('chrome://ematrix/content/lib/Punycode.jsm'); Components.utils.import('chrome://ematrix/content/lib/PublicSuffixList.jsm'); var EXPORTED_SYMBOLS = ['UriTools']; var reRFC3986 = /^([^:\/?#]+:)?(\/\/[^\/?#]*)?([^?#]*)(\?[^#]*)?(#.*)?/; var reSchemeFromURI = /^[^:\/?#]+:/; var reAuthorityFromURI = /^(?:[^:\/?#]+:)?(\/\/[^\/?#]+)/; var reOriginFromURI = /^(?:[^:\/?#]+:)?(?:\/\/[^\/?#]+)/; var reCommonHostnameFromURL = /^https?:\/\/([0-9a-z_][0-9a-z._-]*[0-9a-z])\//; var rePathFromURI = /^(?:[^:\/?#]+:)?(?:\/\/[^\/?#]*)?([^?#]*)/; var reMustNormalizeHostname = /[^0-9a-z._-]/; // These are to parse authority field, not parsed by above official regex // IPv6 is seen as an exception: a non-compatible IPv6 is first tried, and // if it fails, the IPv6 compatible regex istr used. This helps // peformance by avoiding the use of a too complicated regex first. // https://github.com/gorhill/httpswitchboard/issues/211 // "While a hostname may not contain other characters, such as the // "underscore character (_), other DNS names may contain the underscore" var reHostPortFromAuthority = /^(?:[^@]*@)?([^:]*)(:\d*)?$/; var reIPv6PortFromAuthority = /^(?:[^@]*@)?(\[[0-9a-f:]*\])(:\d*)?$/i; var reHostFromNakedAuthority = /^[0-9a-z._-]+[0-9a-z]$/i; var reHostFromAuthority = /^(?:[^@]*@)?([^:]+)(?::\d*)?$/; var reIPv6FromAuthority = /^(?:[^@]*@)?(\[[0-9a-f:]+\])(?::\d*)?$/i; // Coarse (but fast) tests var reValidHostname = /^([a-z\d]+(-*[a-z\d]+)*)(\.[a-z\d]+(-*[a-z\d])*)*$/; var reIPAddressNaive = /^\d+\.\d+\.\d+\.\d+$|^\[[\da-zA-Z:]+\]$/; var reNetworkScheme = /^(?:https?|wss?|ftps?)\b/; var reSecureScheme = /^(?:https|wss|ftps)\b/; function reset(o) { o.scheme = ''; o.hostname = ''; o._ipv4 = undefined; o._ipv6 = undefined; o.port = ''; o.path = ''; o.query = ''; o.fragment = ''; return o; } function resetAuthority(o) { o.hostname = ''; o._ipv4 = undefined; o._ipv6 = undefined; o.port = ''; return o; } function URI() { this.scheme = '', this.authority = '', this.hostname = '', this._ipv4 = undefined, this._ipv6 = undefined, this.port = '', this.domain = undefined, this.path = '', this.query = '', this.fragment = '', this.schemeBit = (1 << 0), this.userBit = (1 << 1), this.passwordBit = (1 << 2), this.hostnameBit = (1 << 3), this.portBit = (1 << 4), this.pathBit = (1 << 5), this.queryBit = (1 << 6), this.fragmentBit = (1 << 7), this.allBits = (0xFFFF), this.authorityBit = (this.userBit | this.passwordBit | this.hostnameBit | this.portBit); this.normalizeBits = (this.schemeBit | this.hostnameBit | this.pathBit | this.queryBit); } var cached = new URI(); var domainCache = new Map(); var cacheCountLow = 75; var cacheCountHigh = 100; var cacheJunkyard = []; var junkyardMax = cacheCountHigh - cacheCountLow; function DomainCacheEntry(domain) { this.init(domain); } DomainCacheEntry.prototype.init = function (domain) { this.domain = domain; this.tstamp = Date.now(); return this; }; DomainCacheEntry.prototype.dispose = function () { this.domain = ''; if (cacheJunkyard.length < junkyardMax) { cacheJunkyard.push(this); } }; var domainCacheEntryFactory = function (domain) { let entry = cacheJunkyard.pop(); if (entry) { return entry.init(domain); } return new DomainCacheEntry(domain); }; var domainCacheAdd = function (hostname, domain) { let entry = domainCache.get(hostname); if (entry !== undefined) { entry.tstamp = Date.now(); } else { domainCache.set(hostname, domainCacheEntryFactory(domain)); if (domainCache.size === cacheCountHigh) { domainCachePrune(); } } return domain; }; var domainCacheSort = function (a, b) { return domainCache.get(b).tstamp - domainCache.get(a).tstamp; }; var domainCachePrune = function () { let hostnames = Array.from(domainCache.keys()).sort(domainCacheSort).slice(cacheCountLow); for (let i=hostnames.length-1; i>=0; --i) { domainCache.get(hostnames[i]).dispose(); domainCache.delete(hostnames[i]); } }; var domainCacheReset = function () { domainCache.clear(); }; publicSuffixList.onChanged.addListener(domainCacheReset); var UriTools = { set: function (uri) { if (uri === undefined) { return reset(cached); } let matches = reRFC3986.exec(uri); if (!matches) { return reset(cached); } cached.scheme = matches[1] !== undefined ? matches[1].slice(0, -1) : ''; cached.authority = matches[2] !== undefined ? matches[2].slice(2).toLowerCase() : ''; cached.path = matches[3] !== undefined ? matches[3] : ''; // As per RFC3986 if (cached.authority !== '' && cached.path === '') { cached.path = '/'; } cached.query = matches[4] !== undefined ? matches[4].slice(1) : ''; cached.fragment = matches[5] !== undefined ? matches[5].slice(1) : ''; if (reHostFromNakedAuthority.test(cached.authority)) { cached.hostname = cached.authority; cached.port = ''; return cached; } matches = reHostPortFromAuthority.exec(cached.authority); if (!matches) { matches = reIPv6PortFromAuthority.exec(cached.authority); if (!matches) { return resetAuthority(cached); } } cached.hostname = matches[1] !== undefined ? matches[1] : ''; if (cached.hostname.slice(-1) === '.') { cached.hostname = cached.hostname.slice(0, -1); } cached.port = matches[2] !== undefined ? matches[2].slice(1) : ''; return cached; }, assemble: function (bits) { if (bits === undefined) { bits = cached.allBits; } let s = []; if (cached.scheme && (bits && cached.schemeBit)) { s.push(cached.scheme, ':'); } if (cached.hostname && (bits & cached.hostnameBit)) { s.push('//', cached.hostname); } if (cached.port && (bits & cached.portBit)) { s.push(':', cached.port); } if (cached.path && (bits & cached.pathBit)) { s.push(cached.path); } if (cached.query && (bits & cached.queryBit)) { s.push('?', cached.query); } if (cached.fragment && (bits & cached.fragmentBit)) { s.push('#', cached.fragment); } return s.join(''); }, isNetworkScheme: function (scheme) { return reNetworkScheme.test(scheme); }, isSecureScheme: function(scheme) { return reSecureScheme.test(scheme); }, originFromURI: function (uri) { let matches = reOriginFromURI.exec(uri); return matches !== null ? matches[0].toLowerCase() : ''; }, schemeFromURI: function (uri) { let matches = reSchemeFromURI.exec(uri); return matches !== null ? matches[0].slice(0, -1).toLowerCase() : ''; }, authorityFromURI: function (uri) { let matches = reAuthorityFromURI.exec(uri); return matches !== null ? matches[1].slice(1).toLowerCase() : ''; }, hostnameFromURI: function (uri) { let matches = reCommonHostnameFromURL.exec(uri); if (matches) { return matches[1]; } matches = reAuthorityFromURI.exec(uri); if (!matches) { return ''; } let auth = matches[1].slice(2); if (reHostFromNakedAuthority.test(auth)) { return auth.toLowerCase(); } matches = reHostFromAuthority.exec(auth); if (!matches) { matches = reIPv6FromAuthority.exec(auth); if (!matches) { return ''; } } let hostname = matches[1]; while (hostname.endsWith('.')) { hostname = hostname.slice(0, -1); } if (reMustNormalizeHostname.test(hostname)) { Punycode.toASCII(hostname.toLowerCase()); } return hostname; }, domainFromHostname: function (hostname) { let entry = domainCache.get(hostname); if (entry !== undefined) { entry.tstamp = Date.now(); return entry.domain; } if (reIPAddressNaive.test(hostname) == false) { return domainCacheAdd(hostname, publicSuffixList.getDomain(hostname)); } return domainCacheAdd(hostname, hostname); }, domainFromURI: function (uri) { if (!uri) { return ''; } return UriTools.domainFromHostname(UriTools.hostnameFromURI(uri)); }, domain: function() { return UriTools.domainFromHostname(cached.hostname); }, pathFromURI: function (uri) { let matches = rePathFromURI.exec(uri); return matches !== null ? matches[1] : ''; }, normalizedURI: function () { return UriTools.assemble(cached.normalizeBits); }, rootURL: function () { if (!cached.hostname) { return ''; } return UriTools.assemble(cached.scemeBit | cached.hostnameBit); }, isValidHostname: function (hostname) { try { let r = reValidHostname.test(hostname); return r; } catch (e) { return false; } }, parentHostnameFromHostname: function (hostname) { // "locahost" => "" // "example.org" => "example.org" // "www.example.org" => "example.org" // "tomato.www.example.org" => "example.org" let domain = UriTools.domainFromHostname(hostname); if (domain === '' || domain === hostname) { return undefined; } return hostname.slice(hostname.indexOf('.') + 1); }, parentHostnamesFromHostname: function (hostname) { let domain = UriTools.domainFromHostname(hostname); if (domain === '' || domain === hostname) { return []; } let nodes = []; for (;;) { let pos = hostname.indexOf('.'); if (pos < 0) { break; } hostname = hostname.slice(pos+1); nodes.push(hostname); if (hostname === domain) { break; } } return nodes; }, allHostNamesFromHostname: function (hostname) { let nodes = UriTools.parentHostnamesFromHostname(hostname); nodes.unshift(hostname); return nodes; }, toString: function () { return UriTools.assemble(); }, };