aboutsummaryrefslogtreecommitdiffstats
path: root/lib/UriTools.jsm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/UriTools.jsm')
-rw-r--r--lib/UriTools.jsm405
1 files changed, 405 insertions, 0 deletions
diff --git a/lib/UriTools.jsm b/lib/UriTools.jsm
new file mode 100644
index 0000000..4971909
--- /dev/null
+++ b/lib/UriTools.jsm
@@ -0,0 +1,405 @@
+/*******************************************************************************
+
+ ηMatrix - a browser extension to black/white list requests.
+ Copyright (C) 2014-2019 Raymond Hill
+ Copyright (C) 2019 Alessio Vanni
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see {http://www.gnu.org/licenses/}.
+
+ Home: https://gitlab.com/vannilla/ematrix
+ uMatrix Home: https://github.com/gorhill/uMatrix
+*/
+
+'use strict';
+
+Components.utils.import('chrome://ematrix/content/lib/Punycode.jsm');
+Components.utils.import('chrome://ematrix/content/lib/PublicSuffixList.jsm');
+
+var EXPORTED_SYMBOLS = ['UriTools'];
+
+var reRFC3986 = /^([^:\/?#]+:)?(\/\/[^\/?#]*)?([^?#]*)(\?[^#]*)?(#.*)?/;
+var reSchemeFromURI = /^[^:\/?#]+:/;
+var reAuthorityFromURI = /^(?:[^:\/?#]+:)?(\/\/[^\/?#]+)/;
+var reOriginFromURI = /^(?:[^:\/?#]+:)?(?:\/\/[^\/?#]+)/;
+var reCommonHostnameFromURL = /^https?:\/\/([0-9a-z_][0-9a-z._-]*[0-9a-z])\//;
+var rePathFromURI = /^(?:[^:\/?#]+:)?(?:\/\/[^\/?#]*)?([^?#]*)/;
+var reMustNormalizeHostname = /[^0-9a-z._-]/;
+
+// These are to parse authority field, not parsed by above official regex
+// IPv6 is seen as an exception: a non-compatible IPv6 is first tried, and
+// if it fails, the IPv6 compatible regex istr used. This helps
+// peformance by avoiding the use of a too complicated regex first.
+
+// https://github.com/gorhill/httpswitchboard/issues/211
+// "While a hostname may not contain other characters, such as the
+// "underscore character (_), other DNS names may contain the underscore"
+var reHostPortFromAuthority = /^(?:[^@]*@)?([^:]*)(:\d*)?$/;
+var reIPv6PortFromAuthority = /^(?:[^@]*@)?(\[[0-9a-f:]*\])(:\d*)?$/i;
+
+var reHostFromNakedAuthority = /^[0-9a-z._-]+[0-9a-z]$/i;
+var reHostFromAuthority = /^(?:[^@]*@)?([^:]+)(?::\d*)?$/;
+var reIPv6FromAuthority = /^(?:[^@]*@)?(\[[0-9a-f:]+\])(?::\d*)?$/i;
+
+// Coarse (but fast) tests
+var reValidHostname = /^([a-z\d]+(-*[a-z\d]+)*)(\.[a-z\d]+(-*[a-z\d])*)*$/;
+var reIPAddressNaive = /^\d+\.\d+\.\d+\.\d+$|^\[[\da-zA-Z:]+\]$/;
+
+var reNetworkScheme = /^(?:https?|wss?|ftps?)\b/;
+var reSecureScheme = /^(?:https|wss|ftps)\b/;
+
+function reset(o) {
+ o.scheme = '';
+ o.hostname = '';
+ o._ipv4 = undefined;
+ o._ipv6 = undefined;
+ o.port = '';
+ o.path = '';
+ o.query = '';
+ o.fragment = '';
+ return o;
+}
+
+function resetAuthority(o) {
+ o.hostname = '';
+ o._ipv4 = undefined;
+ o._ipv6 = undefined;
+ o.port = '';
+ return o;
+}
+
+function URI() {
+ this.scheme = '',
+ this.authority = '',
+ this.hostname = '',
+ this._ipv4 = undefined,
+ this._ipv6 = undefined,
+ this.port = '',
+ this.domain = undefined,
+ this.path = '',
+ this.query = '',
+ this.fragment = '',
+ this.schemeBit = (1 << 0),
+ this.userBit = (1 << 1),
+ this.passwordBit = (1 << 2),
+ this.hostnameBit = (1 << 3),
+ this.portBit = (1 << 4),
+ this.pathBit = (1 << 5),
+ this.queryBit = (1 << 6),
+ this.fragmentBit = (1 << 7),
+ this.allBits = (0xFFFF),
+ this.authorityBit =
+ (this.userBit | this.passwordBit | this.hostnameBit | this.portBit);
+ this.normalizeBits =
+ (this.schemeBit | this.hostnameBit | this.pathBit | this.queryBit);
+}
+
+var cached = new URI();
+
+var domainCache = new Map();
+var cacheCountLow = 75;
+var cacheCountHigh = 100;
+var cacheJunkyard = [];
+var junkyardMax = cacheCountHigh - cacheCountLow;
+
+function DomainCacheEntry(domain) {
+ this.init(domain);
+}
+
+DomainCacheEntry.prototype.init = function (domain) {
+ this.domain = domain;
+ this.tstamp = Date.now();
+ return this;
+};
+
+DomainCacheEntry.prototype.dispose = function () {
+ this.domain = '';
+ if (cacheJunkyard.length < junkyardMax) {
+ cacheJunkyard.push(this);
+ }
+};
+
+var domainCacheEntryFactory = function (domain) {
+ let entry = cacheJunkyard.pop();
+ if (entry) {
+ return entry.init(domain);
+ }
+ return new DomainCacheEntry(domain);
+};
+
+var domainCacheAdd = function (hostname, domain) {
+ let entry = domainCache.get(hostname);
+
+ if (entry !== undefined) {
+ entry.tstamp = Date.now();
+ } else {
+ domainCache.set(hostname, domainCacheEntryFactory(domain));
+ if (domainCache.size === cacheCountHigh) {
+ domainCachePrune();
+ }
+ }
+
+ return domain;
+};
+
+var domainCacheSort = function (a, b) {
+ return domainCache.get(b).tstamp - domainCache.get(a).tstamp;
+};
+
+var domainCachePrune = function () {
+ let hostnames =
+ Array.from(domainCache.keys()).sort(domainCacheSort).slice(cacheCountLow);
+
+ for (let i=hostnames.length-1; i>=0; --i) {
+ domainCache.get(hostnames[i]).dispose();
+ domainCache.delete(hostnames[i]);
+ }
+};
+
+var domainCacheReset = function () {
+ domainCache.clear();
+};
+
+publicSuffixList.onChanged.addListener(domainCacheReset);
+
+var UriTools = {
+ set: function (uri) {
+ if (uri === undefined) {
+ return reset(cached);
+ }
+
+ let matches = reRFC3986.exec(uri);
+ if (!matches) {
+ return reset(cached);
+ }
+
+ cached.scheme = matches[1] !== undefined ?
+ matches[1].slice(0, -1) :
+ '';
+ cached.authority = matches[2] !== undefined ?
+ matches[2].slice(2).toLowerCase() :
+ '';
+ cached.path = matches[3] !== undefined ?
+ matches[3] :
+ '';
+
+ // As per RFC3986
+ if (cached.authority !== '' && cached.path === '') {
+ cached.path = '/';
+ }
+
+ cached.query = matches[4] !== undefined ?
+ matches[4].slice(1) :
+ '';
+ cached.fragment = matches[5] !== undefined ?
+ matches[5].slice(1) :
+ '';
+
+ if (reHostFromNakedAuthority.test(cached.authority)) {
+ cached.hostname = cached.authority;
+ cached.port = '';
+ return cached;
+ }
+
+ matches = reHostPortFromAuthority.exec(cached.authority);
+ if (!matches) {
+ matches = reIPv6PortFromAuthority.exec(cached.authority);
+ if (!matches) {
+ return resetAuthority(cached);
+ }
+ }
+
+ cached.hostname = matches[1] !== undefined ?
+ matches[1] :
+ '';
+
+ if (cached.hostname.slice(-1) === '.') {
+ cached.hostname = cached.hostname.slice(0, -1);
+ }
+
+ cached.port = matches[2] !== undefined ?
+ matches[2].slice(1) :
+ '';
+
+ return cached;
+ },
+ assemble: function (bits) {
+ if (bits === undefined) {
+ bits = cached.allBits;
+ }
+
+ let s = [];
+
+ if (cached.scheme && (bits && cached.schemeBit)) {
+ s.push(cached.scheme, ':');
+ }
+ if (cached.hostname && (bits & cached.hostnameBit)) {
+ s.push('//', cached.hostname);
+ }
+ if (cached.port && (bits & cached.portBit)) {
+ s.push(':', cached.port);
+ }
+ if (cached.path && (bits & cached.pathBit)) {
+ s.push(cached.path);
+ }
+ if (cached.query && (bits & cached.queryBit)) {
+ s.push('?', cached.query);
+ }
+ if (cached.fragment && (bits & cached.fragmentBit)) {
+ s.push('#', cached.fragment);
+ }
+
+ return s.join('');
+ },
+ isNetworkScheme: function (scheme) {
+ return reNetworkScheme.test(scheme);
+ },
+ isSecureScheme: function(scheme) {
+ return reSecureScheme.test(scheme);
+ },
+ originFromURI: function (uri) {
+ let matches = reOriginFromURI.exec(uri);
+ return matches !== null ? matches[0].toLowerCase() : '';
+ },
+ schemeFromURI: function (uri) {
+ let matches = reSchemeFromURI.exec(uri);
+ return matches !== null ? matches[0].slice(0, -1).toLowerCase() : '';
+ },
+ authorityFromURI: function (uri) {
+ let matches = reAuthorityFromURI.exec(uri);
+ return matches !== null ? matches[1].slice(1).toLowerCase() : '';
+ },
+ hostnameFromURI: function (uri) {
+ let matches = reCommonHostnameFromURL.exec(uri);
+ if (matches) {
+ return matches[1];
+ }
+
+ matches = reAuthorityFromURI.exec(uri);
+ if (!matches) {
+ return '';
+ }
+
+ let auth = matches[1].slice(2);
+
+ if (reHostFromNakedAuthority.test(auth)) {
+ return auth.toLowerCase();
+ }
+
+ matches = reHostFromAuthority.exec(auth);
+ if (!matches) {
+ matches = reIPv6FromAuthority.exec(auth);
+ if (!matches) {
+ return '';
+ }
+ }
+
+ let hostname = matches[1];
+ while (hostname.endsWith('.')) {
+ hostname = hostname.slice(0, -1);
+ }
+
+ if (reMustNormalizeHostname.test(hostname)) {
+ Punycode.toASCII(hostname.toLowerCase());
+ }
+
+ return hostname;
+ },
+ domainFromHostname: function (hostname) {
+ let entry = domainCache.get(hostname);
+ if (entry !== undefined) {
+ entry.tstamp = Date.now();
+ return entry.domain;
+ }
+
+ if (reIPAddressNaive.test(hostname) == false) {
+ return domainCacheAdd(hostname,
+ publicSuffixList.getDomain(hostname));
+ }
+
+ return domainCacheAdd(hostname, hostname);
+ },
+ domainFromURI: function (uri) {
+ if (!uri) {
+ return '';
+ }
+ return UriTools.domainFromHostname(UriTools.hostnameFromURI(uri));
+ },
+ domain: function() {
+ return UriTools.domainFromHostname(cached.hostname);
+ },
+ pathFromURI: function (uri) {
+ let matches = rePathFromURI.exec(uri);
+ return matches !== null ? matches[1] : '';
+ },
+ normalizedURI: function () {
+ return UriTools.assemble(cached.normalizeBits);
+ },
+ rootURL: function () {
+ if (!cached.hostname) {
+ return '';
+ }
+ return UriTools.assemble(cached.scemeBit | cached.hostnameBit);
+ },
+ isValidHostname: function (hostname) {
+ try {
+ let r = reValidHostname.test(hostname);
+ return r;
+ } catch (e) {
+ return false;
+ }
+ },
+ parentHostnameFromHostname: function (hostname) {
+ // "locahost" => ""
+ // "example.org" => "example.org"
+ // "www.example.org" => "example.org"
+ // "tomato.www.example.org" => "example.org"
+ let domain = UriTools.domainFromHostname(hostname);
+
+ if (domain === '' || domain === hostname) {
+ return undefined;
+ }
+
+ return hostname.slice(hostname.indexOf('.') + 1);
+ },
+ parentHostnamesFromHostname: function (hostname) {
+ let domain = UriTools.domainFromHostname(hostname);
+ if (domain === '' || domain === hostname) {
+ return [];
+ }
+
+ let nodes = [];
+ for (;;) {
+ let pos = hostname.indexOf('.');
+ if (pos < 0) {
+ break;
+ }
+
+ hostname = hostname.slice(pos+1);
+ nodes.push(hostname);
+ if (hostname === domain) {
+ break;
+ }
+ }
+
+ return nodes;
+ },
+ allHostNamesFromHostname: function (hostname) {
+ let nodes = UriTools.parentHostnamesFromHostname(hostname);
+ nodes.unshift(hostname);
+ return nodes;
+ },
+ toString: function () {
+ return UriTools.assemble();
+ },
+};