aboutsummaryrefslogtreecommitdiffstats
path: root/lib/html_script_finder
diff options
context:
space:
mode:
Diffstat (limited to 'lib/html_script_finder')
-rw-r--r--lib/html_script_finder/bug_fix.js23
-rw-r--r--lib/html_script_finder/dom_handler.js571
-rw-r--r--lib/html_script_finder/dom_handler/attributes.js137
-rw-r--r--lib/html_script_finder/dom_handler/dom_checker.js478
-rw-r--r--lib/html_script_finder/dom_handler/dom_gatherer.js281
-rw-r--r--lib/html_script_finder/dom_handler/request.js115
-rw-r--r--lib/html_script_finder/dom_handler/script_object.js208
-rw-r--r--lib/html_script_finder/dom_handler/script_properties.js43
-rw-r--r--lib/html_script_finder/html_parser.js158
-rw-r--r--lib/html_script_finder/url_seen_tester.js78
-rw-r--r--lib/html_script_finder/web_labels/find_js_labels.js131
-rw-r--r--lib/html_script_finder/web_labels/js_web_labels.js279
-rw-r--r--lib/html_script_finder/web_labels/script_hash_worker.js62
13 files changed, 2564 insertions, 0 deletions
diff --git a/lib/html_script_finder/bug_fix.js b/lib/html_script_finder/bug_fix.js
new file mode 100644
index 0000000..bba7653
--- /dev/null
+++ b/lib/html_script_finder/bug_fix.js
@@ -0,0 +1,23 @@
+/**
+ * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+ * *
+ * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+exports.END_OF_SCRIPT = 'this.narcissusBugFixLibreJS'; // value from parse tree without \n\n
+exports.narcissusBugFixLibreJS = '\n\n' + exports.END_OF_SCRIPT; // real value from source.
+
diff --git a/lib/html_script_finder/dom_handler.js b/lib/html_script_finder/dom_handler.js
new file mode 100644
index 0000000..0f461d3
--- /dev/null
+++ b/lib/html_script_finder/dom_handler.js
@@ -0,0 +1,571 @@
+/**
+ * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+ * *
+ * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+/**
+ *
+ * dom_handler.js
+ *
+ * After the HTML DOM has been parsed, domHandler finds all the scripts
+ * on a page (including inline, on-page, and external files), and triggers the
+ * JavaScript analysis for each of them.
+ *
+ */
+
+var {Cc, Ci, Cu, Cm, Cr} = require("chrome");
+
+var scriptProperties =
+ require("html_script_finder/dom_handler/script_properties");
+
+const scriptTypes = scriptProperties.scriptTypes;
+const statusTypes = scriptProperties.statusTypes;
+const reasons = scriptProperties.reasons;
+
+var urlHandler = require("url_handler/url_handler");
+
+var WebLabelFinder =
+ require("html_script_finder/web_labels/js_web_labels").WebLabelFinder;
+
+// object model for script entries.
+var scriptObject = require("html_script_finder/dom_handler/script_object");
+
+var privacyChecker = require("js_checker/privacy_checker").privacyCheck;
+var jsChecker = require("js_checker/js_checker");
+const types = require("js_checker/constant_types");
+
+var checkTypes = types.checkTypes;
+
+var stripCDATAOpen = /<\!\[CDATA\[/gi;
+var stripCDATAClose = /]]>/g;
+
+var isDryRun = require("addon_management/prefchange").isDryRun;
+var allowedRef = require('http_observer/allowed_referrers').allowedReferrers;
+var attributeHelpers = require("html_script_finder/dom_handler/attributes");
+
+// javascript:*
+var jsInAttrRe = attributeHelpers.jsInAttrRe;
+
+// the list of all available event attributes
+var intrinsicEvents = attributeHelpers.intrinsicEvents;
+
+var domGatherer =
+ require("html_script_finder/dom_handler/dom_gatherer").domGatherer;
+var domChecker =
+ require("html_script_finder/dom_handler/dom_checker").domChecker;
+
+/**
+ * The DomHandler object takes a whole document,
+ * finds script elements within that DOM, analyzes them
+ * using the js_checker module and finally returns a cleaned
+ * DOM depending on the result.
+ */
+var DomHandler = function() {
+ // external object with methods used
+ // in DomHandler
+ this.domGatherer = null;
+
+ // external object with methods used
+ // in DomHandler
+ this.domChecker = null;
+
+ this.dom = null;
+ this.pageURL = null;
+
+ // fragment found in url.
+ this.fragment = null;
+
+ // array containing all scripts on a page.
+ this.domScripts = [];
+
+ // array containing all scripts on a page,
+ // data related to them, such as parse tree, ...
+ this.inlineScripts = [];
+
+ this.externalScripts = [];
+
+ // all scripts.
+ this.scripts = [];
+
+ // keeps track of the number of scripts.
+ this.numScripts = 0;
+
+ // store the reference to the callback method
+ // presumably from htmlParser.
+ this.callback = function() {};
+
+ // boolean set to true if external scripts are loaded
+ // from the html page.
+ this.loadsHtmlExternalScripts = false;
+
+ this.jsCheckString = null;
+
+ /* object containing boolean property set to false if trivialness
+ is not allowed anymore (if another script defines ajax requests,
+ ...) */
+ this.allowTrivial = null;
+
+ // boolean set to true if inline JavaScript
+ // is found to be free.
+ this.inlineJsFree = null;
+
+ // boolean set to true when at least one script
+ // has been removed.
+ this.hasRemovedScripts = null;
+
+ // boolean to check if scripts were removed
+ // prevents removeAllJs from running multiple times.
+ this.removedAllScripts = null;
+
+ // will eventually contain an array of data
+ // for the js web labels licenses.
+ this.licenseList = [];
+
+ // the response status for the page (200, 404, ...)
+ this.responseStatus = null;
+
+ // number of scripts fully tested.
+ this.scriptsTested = 0;
+
+ // number of external scripts to be tested.
+ this.numExternalScripts = null;
+
+ // number of inline/inattribute scripts
+ this.numInlineScripts = null;
+};
+
+/**
+ * Initialize properties of the object
+ *
+ * @param {domObject} obj A reference of the DOM object being
+ * analyzed.
+ *
+ * @param {pageURL} string The formatted URL (with fragment
+ * removed) of the corresponding page for this DOM
+ *
+ * @param {fragment} the #fragment from the url if applicable.
+ *
+ * @param {callback} the callback function.
+ *
+ */
+DomHandler.prototype.init = function(
+ domObject, pageURL, fragment, responseStatus, callback
+) {
+ // initialize object properties.
+
+ console.debug('init', pageURL);
+ var that = this;
+
+ this.reset();
+
+ // arguments passed.
+ this.dom = domObject;
+ this.pageURL = pageURL;
+ this.fragment = fragment;
+ this.responseStatus = responseStatus;
+
+ console.debug('in dom handler, responseStatus is', this.responseStatus);
+
+ // make callback function available
+ // for the entire object.
+ this.callback = function (dom) {
+ callback(dom);
+ that.destroy();
+ };
+};
+
+DomHandler.prototype.reset = function () {
+
+ this.dom = null;
+ // arrays.
+ this.onEventElement = [];
+ this.scriptStatus = [];
+ this.inlineScripts = [];
+ this.externalScripts = [];
+ this.scripts = [];
+
+ // booleans
+ this.allowTrivial = true;
+ this.inlineJsFree = false;
+ this.hasRemovedScripts = false;
+ this.removedAllScripts = false;
+
+ // we start with 0, and will increment in
+ // dom_checker.
+ this.numExternalScripts = 0;
+
+ this.numInlineScripts = 0;
+
+ this.scriptsTested = 0;
+
+};
+
+DomHandler.prototype.destroy = function () {
+ this.domGatherer = null;
+ this.domChecker = null;
+ /* destroy callback so that it can't be called multiple times. */
+ this.callback = function() {};
+ //this.reset();
+};
+
+DomHandler.prototype.scriptHasBeenTested = function() {
+ this.scriptsTested++;
+ console.debug('incremented DomHandler.scriptsTested to',
+ this.scriptsTested);
+};
+
+/**
+ * scriptHasJsWebLabel
+ *
+ * Checks if a script was found earlier in a Js License Web Label
+ * table. See http://www.gnu.org/licenses/javascript-labels.html
+ * for more information.
+ *
+ */
+DomHandler.prototype.scriptHasJsWebLabel = function(script) {
+ if (this.licenseList) {
+
+ var url = urlHandler.resolve(this.pageURL, script.src),
+ i = 0,
+ len = this.licenseList.length;
+
+ console.debug('looking for web label');
+
+ for (; i < len; i++) {
+ if (this.licenseList[i].fileUrl === url &&
+ this.licenseList[i].free === true
+ ) {
+ console.debug('found something true');
+ console.debug(
+ this.licenseList[i].fileUrl, ' is found');
+ return true;
+ }
+ }
+ }
+ return false;
+};
+
+/**
+ * processScripts.
+ * Starts by looking for a js web labels page
+ * then calls the complete function, which runs
+ * the rest of the check.
+ */
+DomHandler.prototype.processScripts = function () {
+ var that = this;
+
+ // check for the existence of the
+ // js web labels first.
+ this.lookForJsWebLabels(function () {
+
+ // gather and check all script elements on
+ // page.
+ console.debug("Calling checkAllScripts");
+ that.checkAllScripts();
+
+ });
+
+};
+
+/**
+ * jsWebLabelsComplete
+ *
+ */
+DomHandler.prototype.checkAllScripts = function () {
+ try {
+ console.debug(
+ 'found in', this.pageURL, JSON.stringify(this.licenseList));
+ console.debug('checkAllScripts triggered async');
+
+ // use domGatherer to gather scripts.
+ this.domGatherer.findScripts();
+ this.domGatherer.gatherScriptsContent();
+ this.domGatherer.gatherIntrinsicEvents();
+
+ console.debug('fragment is', this.fragment);
+
+ if (
+ this.fragment === undefined ||
+ this.fragment === null ||
+ this.fragment.indexOf('librejs=true') < 0
+ ) {
+ try {
+
+ // use domChecker to check scripts.
+ console.debug("Calling checkAllInlineScripts");
+ this.domChecker.checkAllInlineScripts();
+ } catch (x) {
+ console.debug('error in domChecker:', x, x.lineNumber);
+ this.removeAllJs();
+ }
+ } else {
+ console.debug('This is a pageworker, removing all js');
+ // this is the Page Worker to find contact link
+ // just remove all the JS since we don't need it.
+ console.debug('fragment found, remove js');
+ this.removeAllJs();
+ }
+ } catch (x) {
+ console.debug('error', x, x.lineNumber, x.fileName);
+ }
+};
+
+/**
+ * lookForJsWebLabels
+ *
+ * Checks if a link to a js web label table exists.
+ * If it does, return an array of objects with the data
+ * gathered (script name, path, license name, url, ...)
+ *
+ */
+DomHandler.prototype.lookForJsWebLabels = function (completed) {
+ var that = this;
+ console.debug("calling lookForJsWebLabels");
+ if (this.fragment !== '#librejs=true') {
+ var webLabelFinder = new WebLabelFinder();
+ webLabelFinder.init(
+ this.dom,
+ this.pageURL,
+ function (licenseList) {
+ // assign array returned to property.
+ that.licenseList = licenseList;
+ console.debug("calling completed");
+ completed();
+ });
+ } else {
+ completed();
+ }
+};
+
+DomHandler.prototype.checkScriptForJsWebLabels = function(script) {
+ var scriptEntry;
+
+ if (this.hasSrc(script) && this.scriptHasJsWebLabel(script)) {
+ // This script is in the list of allowed scripts (through web labels)
+ scriptEntry = scriptObject.Script({
+ 'type': scriptTypes.EXTERNAL,
+ 'status': statusTypes.ACCEPTED,
+ 'element': script,
+ 'url': urlHandler.resolve(this.pageURL, script.src)
+ });
+
+ scriptEntry.tagAsAccepted(this.pageURL, reasons.FREE);
+ return true;
+ }
+};
+
+/**
+ * hasSrc
+ * Check the given script has an src attribute.
+ * @param script obj The script element.
+ * @return a string with the value of the src attribute.
+ */
+DomHandler.prototype.hasSrc = function(script) {
+ if (script.src) {
+ return script.src;
+ }
+ return false;
+};
+
+/**
+ * Uses relationChecker to guess whether the script only uses
+ * predefined functions/variables or interacts with other scripts
+ * (this is still very experimental and needs improvement.)
+ *
+ */
+DomHandler.prototype.removeScriptIfDependent = function (script) {
+ var nonWindowProps = script.tree.relationChecker.nonWindowProperties;
+
+ for (var entry in nonWindowProps) {
+ if (nonWindowProps[entry]) {
+ console.debug('script has non window properties.');
+ this.removeGivenJs(script, reasons.TRIVIAL_NOT_ALLOWED);
+ return true;
+ }
+ }
+};
+
+/**
+ * removeGivenJs
+ * Remove a single script from the DOM.
+ * @param script Obj The script element to be removed from the
+ * DOM.
+ *
+ */
+DomHandler.prototype.removeGivenJs = function (script, reason, singleton, hash) {
+ var commentedOut;
+ var isAllowed = allowedRef.urlInAllowedReferrers(this.pageURL);
+ console.debug("removing given js hash", hash);
+
+ if (script.status != statusTypes.REJECTED &&
+ script.status != statusTypes.JSWEBLABEL
+ ) {
+ console.debug('removing a', script.type);
+ if (script.type === scriptTypes.ATTRIBUTE &&
+ !isAllowed
+ ) {
+ this.removeGivenAttribute(script, reason);
+ return;
+ }
+ if (!isAllowed) {
+ // set invalid type if dry run off.
+ script.element.setAttribute('type', 'librejs/blocked');
+ // add entry as removed.
+ console.debug('removeGivenJs hash is', hash);
+ script.tagAsRemoved(this.pageURL, reason, hash);
+ } else {
+ script.element.setAttribute(
+ 'data-librejs-dryrun', 'librejs/blocked');
+ script.tagAsDryRun(this.pageURL, reason, hash);
+ }
+
+ if (singleton === true) {
+ // flag singletons.
+ script.element.setAttribute('data-singleton', 'true');
+ }
+
+ // remove src if dry run off.
+ if (script.element.getAttribute('src') !== undefined) {
+ script.element.setAttribute(
+ 'data-librejs-blocked-src',
+ script.element.getAttribute('src')
+ );
+ if (!isAllowed) {
+ script.element.removeAttribute('src');
+ }
+ }
+ if (isAllowed) {
+ comment_str = 'LibreJS: Script should be blocked, but page is whitelisted.';
+ script.status = statusTypes.ACCEPTED;
+ } else {
+ comment_str = 'LibreJS: script blocked.';
+ script.status = statusTypes.REJECTED;
+ }
+
+ commentedOut = this.dom.createComment(comment_str);
+ // add a comment for curious source readers.
+ script.element.parentNode.appendChild(commentedOut);
+ script.element.parentNode.insertBefore(commentedOut, script.element);
+ this.hasRemovedScripts = true;
+ }
+};
+
+DomHandler.prototype.removeGivenAttribute = function (script, reason) {
+ var i = 0,
+ le = script.jsAttributes.length;
+
+ console.debug('removing given attribute', script, reason);
+ script.element.setAttribute('data-librejs-blocked-event',
+ JSON.stringify(script.jsAttributes));
+
+ script.tagAsRemoved(this.pageURL, reason, script.hash || script.tree.hash);
+
+ // might need to be removed.
+ script.element.setAttribute('data-librejs-blocked-value', '');
+
+ if (!allowedRef.urlInAllowedReferrers(this.pageURL)) {
+ // only run if not in dry run mode.
+ for (; i < le; i++) {
+ console.debug('removing attribute', JSON.stringify(script.jsAttributes));
+ script.element.removeAttribute(script.jsAttributes[i].attribute);
+ }
+ } else {
+
+ }
+ this.hasRemovedScripts = true;
+};
+
+/**
+ * removeAllJs
+ * Loop through all scripts from top to bottom and add a type
+ * attribute 'librejs/blocked' to prevent their interpretation
+ * by the browser.
+ *
+ */
+DomHandler.prototype.removeAllJs = function (reason) {
+ // remove all js is useless from now on.
+ console.debug('removeAllJs');
+ this.hasRemovedScripts = true;
+
+ // removeAllJs needs not be run next time.
+ this.removedAllScripts = true;
+
+ try {
+ this.removeAllArray(this.scripts, reason);
+ this.callback(this.dom);
+ } catch (x) {
+ console.debug(
+ 'in removeAllJs method: ',
+ x,
+ 'number of scripts is',
+ this.numScripts
+ );
+ this.callback(this.dom);
+ }
+
+};
+
+DomHandler.prototype.removeAllArray = function(scriptArray, reason) {
+ var script, i = 0, le;
+ console.debug('removeAllArray');
+ try {
+ le = scriptArray.length;
+ // loop through all scripts.
+
+ for (; i < le; i++) {
+ script = scriptArray[i];
+ if (script.type === scriptTypes.INLINE ||
+ script.type === scriptTypes.EXTERNAL
+ ) {
+ this.removeGivenJs(script, reason);
+ }
+ else if (script.type === scriptTypes.ATTRIBUTE) {
+ this.removeGivenAttribute(script, reason);
+ }
+ }
+ } catch (e) {
+ this.callback("");
+ }
+
+};
+
+exports.DomHandler = DomHandler;
+
+/**
+ * exports.domHandler
+ * Instantiates a DomHandler and checks the DOM
+ * @param dom obj The given dom for analysis.
+ * @param pageURL string the URL for the page.
+ * @param callback function callback when all the work has been performed.
+ */
+exports.domHandler = function(
+ dom, pageURL, fragment, responseStatus, callback) {
+ console.debug("Creating domHandler");
+ var domHandler = new DomHandler();
+ domHandler.init(dom, pageURL, fragment, responseStatus, callback);
+
+ // use domGatherer methods.
+ domHandler.domGatherer = domGatherer(domHandler);
+
+ // use domChecker methods.
+ domHandler.domChecker = domChecker(domHandler);
+
+ // launch the whole process.
+ console.debug("Calling processScripts");
+ domHandler.processScripts();
+};
diff --git a/lib/html_script_finder/dom_handler/attributes.js b/lib/html_script_finder/dom_handler/attributes.js
new file mode 100644
index 0000000..3e95bab
--- /dev/null
+++ b/lib/html_script_finder/dom_handler/attributes.js
@@ -0,0 +1,137 @@
+/**
+ * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+ * *
+ * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+// object model for script entries.
+var scriptObject = require("html_script_finder/dom_handler/script_object");
+
+var scriptProperties = require("html_script_finder/dom_handler/script_properties");
+
+const scriptTypes = scriptProperties.scriptTypes;
+
+const statusTypes = scriptProperties.statusTypes;
+
+var jsInAttrRe = /javascript:/ig;
+
+// the list of all available event attributes
+var intrinsicEvents = [
+ "onload",
+ "onunload",
+ "onclick",
+ "ondblclick",
+ "onmousedown",
+ "onmouseup",
+ "onmouseover",
+ "onmousemove",
+ "onmouseout",
+ "onfocus",
+ "onblur",
+ "onkeypress",
+ "onkeydown",
+ "onkeyup",
+ "onsubmit",
+ "onreset",
+ "onselect",
+ "onchange"];
+
+exports.jsInAttrRe = jsInAttrRe;
+exports.intrinsicEvents = intrinsicEvents;
+
+
+/**
+ * findJSinAttribute
+ *
+ * Looks for attributes containing 'javascript:'
+ *
+ */
+exports.findJSinAttribute = function (elem, callback) {
+ var i = 0, attrLen = elem.attributes.length;
+
+ var attribPairs = [];
+
+ for (; i < attrLen; i++) {
+
+ //looping through all attributes in elem to look for "javascript:"
+ attrib = elem.attributes[i];
+
+ if (attrib.value.match(jsInAttrRe)) {
+ str = attrib.value.replace(jsInAttrRe, '');
+ attribPairs.push({attribute: attrib.name, value: str});
+ }
+
+ }
+
+ if (attribPairs.length > 0) {
+ // contains in attribute javascript.
+ scriptEntry = scriptObject.Script({'type': scriptTypes.ATTRIBUTE,
+ 'status': statusTypes.UNCHECKED,
+ 'element': elem,
+ 'jsAttributes': attribPairs
+ });
+
+ // push back to DOMHandler
+ callback(scriptEntry);
+
+ } else {
+ callback(false);
+ }
+
+};
+
+/**
+ * findOnJSAttribute.
+ *
+ * Look for attributes in on*
+ *
+ */
+exports.findOnJSAttribute = function (elem, callback) {
+
+ var i = 0, eventsLen = intrinsicEvents.length;
+
+ var attribPairs = [];
+
+ for (; i < eventsLen; i++) {
+
+ // looping through all on* attributes
+ if (elem.hasAttribute(intrinsicEvents[i])) {
+
+ attribPairs.push({
+ attribute: intrinsicEvents[i],
+ value: elem.getAttribute(intrinsicEvents[i])
+ });
+
+ }
+
+ }
+ if (attribPairs.length > 0) {
+
+ console.debug('found an attribute', scriptTypes.ATTRIBUTE);
+ scriptEntry = scriptObject.Script({'type': scriptTypes.ATTRIBUTE,
+ 'status': statusTypes.UNCHECKED,
+ 'element':elem,
+ 'jsAttributes': attribPairs
+ });
+ // Push back to DOMHandler.
+ // push back to DOMHandler
+ callback(scriptEntry);
+
+ } else {
+ callback(false);
+ }
+};
diff --git a/lib/html_script_finder/dom_handler/dom_checker.js b/lib/html_script_finder/dom_handler/dom_checker.js
new file mode 100644
index 0000000..1a0f30e
--- /dev/null
+++ b/lib/html_script_finder/dom_handler/dom_checker.js
@@ -0,0 +1,478 @@
+/**
+ * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+ * *
+ * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+/**
+ * dom_checker.js
+ *
+ * checks scripts for nonfree/nontrivial.
+ *
+ */
+
+var {Cc, Ci, Cu, Cm, Cr} = require("chrome");
+var timer = require("sdk/timers");
+
+var scriptProperties = require("html_script_finder/dom_handler/script_properties");
+const scriptTypes = scriptProperties.scriptTypes;
+const statusTypes = scriptProperties.statusTypes;
+const reasons = scriptProperties.reasons;
+
+// ensure xhr won't create an infinite loop
+// with html content.
+var urlTester = require("html_script_finder/url_seen_tester").urlSeenTester;
+var urlHandler = require("url_handler/url_handler");
+
+var privacyChecker = require("js_checker/privacy_checker").privacyCheck;
+var jsChecker = require("js_checker/js_checker");
+
+const types = require("js_checker/constant_types");
+var checkTypes = types.checkTypes;
+var stripCDATAOpen = /<\!\[CDATA\[/gi;
+var stripCDATAClose = /]]>/g;
+
+const getHash = require("script_entries/scripts_cache").scriptsCached.getHash;
+
+var DomChecker = function() {
+ // reference to domHandler instance
+ // using this object.
+ this.d = null;
+};
+
+/**
+ * init
+ *
+ * assign a reference domHandler object
+ * to access/updates its properties.
+ *
+ */
+DomChecker.prototype.init = function(domHandler) {
+ "use strict";
+
+ this.d = domHandler;
+};
+
+DomChecker.prototype.destroy = function() {
+ "use strict";
+
+ this.d = null;
+};
+
+/**
+ * checkAllInlineScripts
+ *
+ * Sends all the inline/onpage scripts as a whole for a check and
+ * removes all scripts if nonfree nontrivial is found.
+ *
+ */
+DomChecker.prototype.checkAllInlineScripts = function() {
+ "use strict";
+
+ try {
+ var i = 0, len, script;
+
+ if (typeof this.d.inlineScripts !== 'undefined' &&
+ this.d.inlineScripts.length > 0
+ ) {
+ script = this.d.inlineScripts.shift();
+ console.debug("checking script for page",
+ this.d.pageURL
+ /*, JSON.stringify(script)*/);
+ if (this.d.removedAllScripts) {
+ // all js has already been removed.
+ // stop check.
+ console.debug("removed all");
+ return;
+ }
+
+ if (this.d.inlineJsFree === true) {
+ // add entry as accepted.
+ try {
+ hash = getHash(script.text);
+ script.tagAsAccepted(this.d.pageURL, reasons.FREE, hash);
+ } catch (e) {
+ console.debug(e);
+ }
+ }
+
+ // even if page is free we need to check for allow trivial.
+ if (script.type === scriptTypes.INLINE) {
+ console.debug("analyzing script", script);
+ this.analyzeJs(script,
+ script.text,
+ this.checkSingleInlineScript.bind(this));
+ } else if (script.type === scriptTypes.ATTRIBUTE) {
+ console.debug("analyzing inline script", script);
+ this.analyzeJs(script,
+ this.concatAttributes(script),
+ this.checkSingleElementAttributes.bind(this));
+ }
+ } else {
+ // no more inline scripts. Switch to external scripts.
+ this.readyForExternal();
+ }
+ } catch (x) {
+ console.debug('checkAllInlineScripts error',
+ x, x.lineNumber, x.fileName);
+ this.readyForExternal();
+ }
+};
+
+DomChecker.prototype.concatAttributes = function(script) {
+ "use strict";
+ var i = 0,
+ le = script.jsAttributes.length,
+ text = "";
+
+ // we concatenate all js in multiple attributes.
+ // because it's too much of a hassle to keep track
+ // otherwise.
+ for (; i < le; i++) {
+ text += script.jsAttributes[i].value + '\n';
+ }
+
+ return text;
+};
+
+/**
+ *
+ * check a single element with attributes
+ */
+DomChecker.prototype.checkSingleElementAttributes = function(
+ script, loadedScript, checker) {
+ "use strict";
+ var check, value,
+ i = 0,
+ le = script.jsAttributes.length,
+ text = "";
+
+ try {
+ check = checker.parseTree.freeTrivialCheck;
+ script.tree = checker;
+ script.result = check;
+ script.status = statusTypes.CHECKED;
+ } catch (e) {
+ console.debug('problem checking inline scripts', e, e.lineNumber);
+ this.d.removeGivenJs(script);
+ }
+
+ this.processInlineCheckResult(script, check, checker);
+};
+
+DomChecker.prototype.processInlineCheckResult = function(
+ script, check, checker) {
+ "use strict";
+ console.debug("check.reason is", check.reason, "and type", check.type);
+ var hash = checker.hash;
+
+ if (this.d.inlineJsFree === true) {
+ console.debug('tagging', script.text, 'as accepted', "with reason", check.reason);
+ script.tagAsAccepted(this.d.pageURL, this.d.freeReason + " -- " + check.reason, hash);
+ }
+
+ // process the result.
+ if (check.type === checkTypes.FREE) {
+ // this is free.
+ console.debug('tagging', script.text, 'as accepted with reason', check.reason);
+ this.d.inlineJsFree = true;
+ this.d.freeReason = check.reason;
+ // add entry as accepted.
+ script.tagAsAccepted(this.d.pageURL, check.reason, hash);
+ } else if (check.type === checkTypes.FREE_SINGLE_ITEM) {
+ // accept this script.
+ console.debug("free single item, ", check.reason);
+ script.tagAsAccepted(this.d.pageURL, check.reason, hash);
+ } else if (check.type === checkTypes.NONTRIVIAL) {
+ console.debug("nontrivial hash is", hash);
+ if (this.d.inlineJsFree) {
+ // inline is free. So accept.
+ console.debug('tagging', script.text, 'as accepted');
+ script.tagAsAccepted(
+ this.d.pageURL,
+ this.d.freeReason + ' -- ' + check.reason,
+ hash);
+ } else {
+ console.debug('tagging', script.text, 'as removed');
+ this.d.removeGivenJs(script, check.reason, false, hash);
+ }
+ } else if (!this.d.inlineJsFree &&
+ this.d.loadsHtmlExternalScripts &&
+ check.type === checkTypes.TRIVIAL_DEFINES_FUNCTION
+ ) {
+ // nontrivial, because defines function and loads
+ // external scripts
+ console.debug('tagging', script.text, 'as removed');
+ this.d.removeGivenJs(script, reasons.FUNCTIONS_INLINE, false, hash);
+ } else if (!this.d.loadsHtmlExternalScripts &&
+ check === checkTypes.TRIVIAL_DEFINES_FUNCTION
+ ) {
+ console.debug("Tag as accepted doesn't load another external script");
+ script.tagAsAccepted(this.d.pageURL, check.reason, hash);
+ } else if (check.type === checkTypes.TRIVIAL ||
+ check.type === checkTypes.TRIVIAL_DEFINES_FUNCTION ||
+ check.type === checkTypes.WHITELISTED
+ ) {
+ // add entry as accepted.
+ console.debug("Trivial accepted");
+ script.tagAsAccepted(this.d.pageURL, check.reason, hash);
+ }
+
+ // next inline script, if applicable.
+ this.checkAllInlineScripts();
+};
+
+DomChecker.prototype.readyForExternal = function() {
+ "use strict";
+
+ console.debug('DomChecker.readyForExternal');
+ // done with those inline scripts, continue with
+ // the rest.
+ this.checkExternalScripts();
+};
+
+/**
+ * check a single inline script.
+ */
+DomChecker.prototype.checkSingleInlineScript = function(
+ script, loadedScript, checker) {
+ "use strict";
+ var check, text;
+
+ console.debug('DomChecker.checkSingleInlineScript');
+
+ try {
+
+ check = checker.parseTree.freeTrivialCheck;
+
+ // update status.
+ script.tree = checker;
+ script.result = check;
+ console.debug("script result is", check.type);
+ script.status = statusTypes.CHECKED;
+
+ } catch (e) {
+ console.debug('problem checking inline scripts', e, e.lineNumber);
+ this.d.removeGivenJs(script, '', false, checker.hash);
+ }
+
+ this.processInlineCheckResult(script, check, checker);
+
+};
+
+/**
+ * checkExternalScripts
+ * Loop through series of external scripts,
+ * perform xhr to get their data, and check them
+ * to see whether they are free/nontrivial
+ *
+ */
+DomChecker.prototype.checkExternalScripts = function() {
+ "use strict";
+
+ console.debug('DomChecker.checkExternalScripts');
+
+ var i = 0;
+ var len = this.d.externalScripts.length;
+ var that = this;
+
+ console.debug("externalScripts length", len);
+ if (this.d.removedAllScripts || len === 0) {
+ // all js has already been removed.
+ // stop check.
+ this.wrapUpBeforeLeaving();
+ return;
+ }
+
+ for (; i < len; i++) {
+ this.xhr(
+ this.d.externalScripts[i],
+ function(script, scriptText) {
+ console.debug("In xhr callback for script url:", script.url);
+ if (scriptText === false) {
+ that.d.removeGivenJs(script);
+ that.d.scriptHasBeenTested();
+ that.externalCheckIsDone();
+ return;
+ }
+
+ console.debug('about to analyzeJS for script:', script.url);
+ that.analyzeJs(
+ script,
+ scriptText,
+ that.checkSingleExternalScript.bind(that));
+ }
+ );
+ }
+};
+
+DomChecker.prototype.wrapUpBeforeLeaving = function() {
+ "use strict";
+
+ console.debug("wrap up before leaving triggered");
+ console.debug('wrapping up');
+ this.d.callback(this.d.dom);
+
+};
+
+DomChecker.prototype.analyzeJs = function(script, scriptText, callback) {
+ "use strict";
+ console.debug('DomChecker.analyzeJs for script:', script.url);
+ try {
+ var checker = jsChecker.jsChecker();
+ var url = "";
+ if (typeof script.url !== "undefined") {
+ url = script.url;
+ } else {
+ url = this.pageURL;
+ }
+ checker.searchJs(scriptText, function() {
+ console.debug("Analyze JS"/*, JSON.stringify(checker)*/);
+ timer.setTimeout(function() {
+ callback(script, scriptText, checker);
+ }, 0);
+ }, url);
+ } catch (x) {
+ console.debug('error', x, x.lineNumber, x.fileName);
+ }
+};
+
+/**
+ * Check a single external script.
+ */
+DomChecker.prototype.checkSingleExternalScript = function(
+ script, loadedScript, checker
+) {
+ "use strict";
+ var check;
+
+ console.debug('DomChecker.checkSingleExternalScript()');
+ try {
+ check = checker.parseTree.freeTrivialCheck;
+
+ script.tree = checker;
+ script.result = check;
+ console.debug('in checkSingleExternalScript, checker.hash is',
+ checker.hash);
+ if (script.status != statusTypes.JSWEBLABEL) {
+ script.status = statusTypes.CHECKED;
+ }
+
+ if (check.type === checkTypes.FREE ||
+ check.type === checkTypes.FREE_SINGLE_ITEM
+ ) {
+ // add entry as accepted.
+ script.tagAsAccepted(this.d.pageURL, check.reason, checker.hash);
+ }
+
+ else if (check.type === checkTypes.NONTRIVIAL) {
+ console.debug("Removing given js", check.reason);
+ this.d.removeGivenJs(script, check.reason, false, checker.hash);
+ }
+
+ else if (check.type === checkTypes.TRIVIAL ||
+ check.type === checkTypes.WHITELISTED
+ ) {
+ // if it's accepted, allow.
+ script.tagAsAccepted(this.d.pageURL, check.reason, checker.hash);
+ } else {
+ // anything else is nontrivial. Including TRIVIAL_DEFINES_FUNCTION.
+ console.debug("checker hash for remove is ", checker.hash);
+ this.d.removeGivenJs(
+ script, reasons.FUNCTIONS_EXTERNAL, false, checker.hash);
+ }
+
+ } catch (e) {
+ console.debug('error in checkExternalScript',
+ e, e.lineNumber, 'for script', script.url);
+
+ this.d.removeAllJs();
+ this.destroy();
+ return;
+ }
+ console.debug('script url is', script.url, 'result is', script.result);
+ this.d.scriptHasBeenTested();
+ this.externalCheckIsDone();
+};
+
+DomChecker.prototype.externalCheckIsDone = function() {
+ "use strict";
+ console.debug('DomChecker.externalCheckIsDone');
+
+ console.debug('scriptsTested is', this.d.scriptsTested);
+ console.debug('num external', this.d.numExternalScripts);
+
+ if (this.d.scriptsTested >= this.d.numExternalScripts) {
+ console.debug('wrapping up external');
+ this.wrapUpBeforeLeaving();
+ } else {
+ var scriptsToCheck = this.d.numExternalScripts - this.d.scriptsTested;
+ console.debug('Not wrapping up! Waiting to check ' + scriptsToCheck +
+ ' more script(s)');
+
+ if (this.d.externalScripts[0]) {
+ console.debug('script 0 is', this.d.externalScripts[0]);
+ }
+ if (this.d.externalScripts[1]) {
+ console.debug('script 1 is', this.d.externalScripts[1]);
+ }
+ }
+};
+
+/**
+ * xhr
+ * Perform a XMLHttpRequest on the url given.
+ * @param url string A URL.
+ * @return The response text.
+ */
+DomChecker.prototype.xhr = function(script, responseCallback) {
+ "use strict";
+
+ var regex = /^text\/html/i;
+ var url = script.url;
+
+ try {
+ // add url to whitelist.
+ urlTester.addUrl(url);
+
+ // request module. Compatible with Https-Everywhere.
+ require('html_script_finder/dom_handler/request')
+ .request(script, responseCallback).request();
+ } catch (x) {
+ console.debug('error', x, x.lineNumber, x.fileName);
+ responseCallback(script, false);
+ }
+};
+
+/**
+ * exports.domChecker
+ * Instantiate a brand new clone of the domChecker.
+ * @param dom obj The given dom for analysis.
+ * @param pageURL string the URL for the page.
+ * @param callback function callback when all the work has been performed.
+ */
+exports.domChecker = function(domHandler) {
+ "use strict";
+
+ var domChecker = new DomChecker();
+
+ domChecker.init(domHandler);
+
+ return domChecker;
+};
+
+exports.xhr = new DomChecker().xhr;
diff --git a/lib/html_script_finder/dom_handler/dom_gatherer.js b/lib/html_script_finder/dom_handler/dom_gatherer.js
new file mode 100644
index 0000000..4fcee88
--- /dev/null
+++ b/lib/html_script_finder/dom_handler/dom_gatherer.js
@@ -0,0 +1,281 @@
+/**
+ * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+ * *
+ * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+var scriptProperties = require("html_script_finder/dom_handler/script_properties");
+
+const scriptTypes = scriptProperties.scriptTypes;
+const scriptsCached = require("script_entries/scripts_cache").scriptsCached;
+
+const statusTypes = scriptProperties.statusTypes;
+// object model for script entries.
+var scriptObject = require("html_script_finder/dom_handler/script_object");
+
+var urlHandler = require("url_handler/url_handler");
+
+var attributeHelpers = require("html_script_finder/dom_handler/attributes");
+
+// javascript:*
+var jsInAttrRe = attributeHelpers.jsInAttrRe;
+
+// the list of all available event attributes
+var intrinsicEvents = attributeHelpers.intrinsicEvents;
+
+var privacyChecker = require("js_checker/privacy_checker").privacyCheck;
+
+const types = require("js_checker/constant_types");
+
+var checkTypes = types.checkTypes;
+
+// array reflex valid types as listed in
+// http://mxr.mozilla.org/mozilla-central/source/content/base/src/nsScriptLoader.cpp#437
+// anything appended to end of strings is considered valid:
+var jsValidTypes = [
+ /^text\/javascript/i,
+ /^text\/ecmascript/i,
+ /^application\/javascript/i,
+ /^application\/ecmascript/i,
+ /^application\/x-javascript/i
+];
+
+var stripCDATAOpen = /<\!\[CDATA\[/gi;
+var stripCDATAClose = /]]>/g;
+
+var stripHtmlCommentsInScript = function (s) {
+ s = s.replace(stripCDATAOpen, '');
+ s = s.replace(stripCDATAClose, '');
+ return s;
+};
+
+
+// gather scripts and javascript in attributes across a dom object.
+var DomGatherer = function() {
+ // domHandler object.
+ this.d = null;
+};
+
+/**
+ * init
+ *
+ * assign a reference domHandler object
+ * to access/updates its properties.
+ *
+ */
+DomGatherer.prototype.init = function (domHandler) {
+ this.d = domHandler;
+};
+
+/**
+ * scriptHasInvalidType
+ *
+ * Checks that a script does not have a js "template" type.
+ * Normally any script that has a type attribute other than the
+ * few allowed ones is not interpreted. But by security, we only
+ * discard a few of them.
+ *
+ * @param script obj The script element.
+ * @return returns true if it matches a template type.
+ *
+ */
+DomGatherer.prototype.scriptHasInvalidType = function (script) {
+ var i = 0,
+ le = jsValidTypes.length;
+
+ var type = script.getAttribute('type');
+
+ if (type === 'librejs/blocked') {
+ // js has already been blocked.
+ return true;
+ }
+
+ if (!type) {
+ // type isn't set, don't look further.
+ return false;
+ }
+
+ for (; i < le; i++) {
+ if (jsValidTypes[i].test(type)) {
+ return false;
+ }
+ }
+
+ // type is invalid and
+ // hence cannot be executed.
+ return true;
+};
+
+/**
+ * findScripts
+ *
+ * Assigns the array of scripts in the dom to a property
+ * as well as a number of scripts present for looping purposing.
+ */
+DomGatherer.prototype.findScripts = function() {
+ this.d.domScripts = this.d.dom.getElementsByTagName('script');
+ this.d.numScripts = this.d.domScripts.length;
+};
+
+/**
+ * gatherIntrinsicEvents
+ *
+ * Fetches all the event attributes that might contain JavaScript
+ * as well as all element attributes that start with
+ * "javascript:".
+ *
+ */
+DomGatherer.prototype.gatherIntrinsicEvents = function() {
+ var i = 0, j, k,
+ all = this.d.dom.getElementsByTagName('*'),
+ max = all.length,
+ that = this,
+ attrLen, attrib, str, scriptEntry;
+
+ for (; i < max; i++) {
+ // look for attributes with value javascript:*
+ attributeHelpers.findJSinAttribute(
+ all[i],
+ function (scriptEntry) {
+ if (scriptEntry !== false) {
+
+ that.d.inlineScripts.push(scriptEntry);
+ that.d.scripts.push(scriptEntry);
+
+ // add inline script in the count.
+ that.d.numInlineScripts++;
+ }
+ });
+
+ // look for attributes of on* (onLoad, ...)
+ attributeHelpers.findOnJSAttribute(
+ all[i],
+ function (scriptEntry) {
+ if (scriptEntry !== false) {
+ that.d.inlineScripts.push(scriptEntry);
+ that.d.scripts.push(scriptEntry);
+
+ // add inline script in the count.
+ that.d.numInlineScripts++;
+ }
+ });
+ }
+
+};
+
+/**
+ * gatherScriptsContent
+ *
+ * Aggregate all content within on-page JavaScript code.
+ * Keep a list of all absolute urls to external scripts.
+ *
+ */
+DomGatherer.prototype.gatherScriptsContent = function() {
+ var i = 0, currentScript = '', absolutePath, scriptEntry,
+ that = this;
+ try {
+ for (; i < this.d.numScripts; i++) {
+ if (this.d.checkScriptForJsWebLabels(this.d.domScripts[i])) {
+ //break;
+ absolutePath = urlHandler.resolve(
+ this.d.pageURL, this.d.domScripts[i].src);
+ scriptEntry = scriptObject.Script(
+ {'type': scriptTypes.EXTERNAL,
+ 'status': statusTypes.JSWEBLABEL,
+ 'element': this.d.domScripts[i],
+ 'url': absolutePath});
+ scriptEntry.tree = {};
+
+ this.d.externalScripts.push(scriptEntry);
+ that.d.scripts.push(scriptEntry);
+
+ this.d.loadsHtmlExternalScripts = true;
+
+ // increment number of scripts found.
+ this.d.numExternalScripts++;
+ }
+
+ // check that script has valid type
+ else if (!this.scriptHasInvalidType(this.d.domScripts[i])) {
+
+
+ if (this.d.hasSrc(this.d.domScripts[i]) &&
+ !this.d.scriptHasJsWebLabel(this.d.domScripts[i])) {
+
+ console.debug('an external script', this.d.domScripts[i]);
+
+ absolutePath = urlHandler.resolve(
+ this.d.pageURL, this.d.domScripts[i].src);
+ scriptEntry = scriptObject.Script(
+ {'type': scriptTypes.EXTERNAL,
+ 'status': statusTypes.UNCHECKED,
+ 'element': this.d.domScripts[i],
+ 'url': absolutePath});
+ this.d.externalScripts.push(scriptEntry);
+ that.d.scripts.push(scriptEntry);
+
+ this.d.loadsHtmlExternalScripts = true;
+
+ // increment number of scripts found.
+ this.d.numExternalScripts++;
+
+ } else if (privacyChecker.checkScriptPrivacyThreat(this.d.domScripts[i].text)) {
+ this.d.removeGivenJs(scriptObject.Script(
+ {'type': scriptTypes.SINGLETON,
+ 'status': statusTypes.UNCHECKED,
+ 'element': this.d.domScripts[i],
+ 'text': this.d.domScripts[i].text
+ }), '', true);
+ } else if (this.d.domScripts[i].text !== '') {
+ // using else if since script text is
+ // ignored if src attribute is set.
+ // adding this.narcissusBugFixLibreJS to fix comment bug.
+ var bugfix = require('html_script_finder/bug_fix').narcissusBugFixLibreJS;
+ currentScript = stripHtmlCommentsInScript(this.d.domScripts[i].text + bugfix);
+
+ scriptEntry = scriptObject.Script(
+ {'type': scriptTypes.INLINE,
+ 'status': statusTypes.UNCHECKED,
+ 'element': this.d.domScripts[i],
+ 'text': currentScript});
+ this.d.inlineScripts.push(scriptEntry);
+ this.d.scripts.push(scriptEntry);
+
+ // add inline script in the count.
+ this.d.numInlineScripts++;
+ }
+ }
+ }
+ } catch (e) {
+ // Any problem arising, we remove the script.
+ console.debug('problem gathering scripts', e, e.lineNumber);
+ this.d.removeAllJs();
+ }
+};
+
+/*
+ * exports.domGatherer
+ * Instantiate a brand new clone of the domGatherer.
+ * @param dom obj The given dom for analysis.
+ * @param pageURL string the URL for the page.
+ * @param callback function callback when all the work has been performed.
+ */
+exports.domGatherer = function (domHandler) {
+ var dg = new DomGatherer();
+ dg.init(domHandler);
+ return dg;
+};
diff --git a/lib/html_script_finder/dom_handler/request.js b/lib/html_script_finder/dom_handler/request.js
new file mode 100644
index 0000000..7f217ef
--- /dev/null
+++ b/lib/html_script_finder/dom_handler/request.js
@@ -0,0 +1,115 @@
+/**
+ * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+ * *
+ * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+var timer = require("sdk/timers");
+
+var {Cc, Ci, Cu, Cm, Cr} = require("chrome");
+var {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm");
+
+
+// ensure xhr won't create an infinite loop
+// with html content.
+var urlTester = require("html_script_finder/url_seen_tester").urlSeenTester;
+var urlHandler = require("url_handler/url_handler");
+const scriptsCached = require("script_entries/scripts_cache").scriptsCached;
+
+var Request = function() {
+ this.url = null;
+ this.channel = null;
+ this.script = null;
+ this.responseCallback = null;
+};
+
+/**
+ * init
+ */
+Request.prototype.init = function(script, callback) {
+ this.script = script;
+ // set initial url
+ this.url = this.script.url;
+
+ console.debug('In Request.init() for url:', this.url);
+
+ this.responseCallback = callback;
+
+ var iOService = Cc["@mozilla.org/network/io-service;1"]
+ .getService(Ci.nsIIOService);
+
+ this.channel = iOService.newChannel(this.url, null, null);
+};
+
+Request.prototype.request = function() {
+ var that = this;
+ var responseReceived = function (data) {
+ that.responseCallback(that.script, data);
+ };
+ try {
+ this.channel.asyncOpen({
+ QueryInterface: XPCOMUtils.generateQI(
+ [Ci.nsIRequestObserver, Ci.nsIStreamListener]),
+ data: "",
+ charset: null,
+
+ onStartRequest: function(request, context) {
+ this.charset = request.contentCharset || "UTF-8";
+ },
+
+ onDataAvailable: function (request, context, stream, offset, count) {
+ try {
+ var binaryInputStream = Cc["@mozilla.org/binaryinputstream;1"]
+ .createInstance(Ci.nsIBinaryInputStream);
+ binaryInputStream.setInputStream(stream);
+ var data = binaryInputStream.readBytes(count);
+ this.data += data;
+ } catch (x) {
+ console.debug('error in request', x, x.lineNumber);
+ responseReceived("");
+ }
+ },
+
+ onStopRequest: function (request, context, result) {
+ try {
+ if (this.charset.toLowerCase() != "utf-8") {
+ var uConv = Cc["@mozilla.org/intl/utf8converterservice;1"]
+ .createInstance(Ci.nsIUTF8ConverterService);
+
+ this.data = uConv.convertStringToUTF8(
+ this.data, this.charset, true);
+ }
+ } catch (e) {
+ console.debug("Issue with nsIUTF8ConverterService", e);
+ console.debug("Charset was", this.charset);
+ responseReceived("");
+ }
+ responseReceived(this.data);
+ }
+ }, null);
+ } catch(e) {
+ console.debug("asyncOpen exception", e);
+ responseReceived("");
+ }
+};
+
+// Instantiate a Request
+exports.request = function(script, callback) {
+ var obj = new Request();
+ obj.init(script, callback);
+ return obj;
+};
diff --git a/lib/html_script_finder/dom_handler/script_object.js b/lib/html_script_finder/dom_handler/script_object.js
new file mode 100644
index 0000000..5431cf6
--- /dev/null
+++ b/lib/html_script_finder/dom_handler/script_object.js
@@ -0,0 +1,208 @@
+/**
+ * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+ * *
+ * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+var removedScripts = require("script_entries/removed_scripts").removedScripts;
+
+var acceptedScripts = require("script_entries/accepted_scripts")
+ .acceptedScripts;
+var dryRunScripts = require("script_entries/dryrun_scripts").dryRunScripts;
+
+var Script = function(props) {
+ // can be an attribute, an inline script,
+ // or an external script.
+ this.type = null;
+
+ /*
+ * Script.status - The script's current status.
+ *
+ * Possible values are:
+ *
+ * 0 - unchecked
+ * 1 - checked
+ * 2 - accepted
+ * 3 - rejected
+ * 4 - jsweblabel
+ *
+ * See script_properties.js for definitions.
+ */
+ this.status = null;
+
+ // contains the dom element
+ this.element = null;
+
+ // the attribute name, if applicable.
+ this.attribute = null;
+
+ // the script text as a string.
+ this.value = null;
+
+ // the src url if external.
+ this.url = null;
+
+ // the script text if inline.
+ this.text = null;
+
+ this.init(props);
+};
+
+Script.prototype.init = function(props) {
+ // check the required elements are present.
+ if (typeof props === 'undefined') {
+ throw "Error, missing script entry value in script_object.js";
+ }
+
+ // required properties
+ if (typeof props.type !== 'undefined') {
+ this.type = props.type;
+ } else {
+ throw "type is missing";
+ }
+
+ if (typeof props.status !== 'undefined') {
+ this.status = props.status;
+ } else {
+ throw "status is missing";
+ }
+
+ if (typeof props.element !== 'undefined') {
+ this.element = props.element;
+ } else {
+ throw "element is missing";
+ }
+
+ // conditional properties.
+ this.url = (props.url) ? props.url : null;
+ this.text = (props.text) ? props.text : null;
+ this.jsAttributes = (props.jsAttributes) ? props.jsAttributes : null;
+
+ if (typeof this.text !== 'string' &&
+ this.tree !== null &&
+ typeof this.tree === 'object' &&
+ this.tree.hasOwnProperty('jsCode')
+ ) {
+ this.text = this.tree.jsCode;
+ }
+};
+
+Script.prototype.tagAsDryRun = function(pageURL, reason, hash) {
+ var content = this.findContentType();
+ var inline = (this.url != undefined) ? false : true;
+ var url = (inline == false ? this.url : null);
+ console.debug("url is", url);
+ this.element.setAttribute('data-librejs', 'dryrun');
+ this.element.setAttribute('data-librejs-reason', reason);
+
+ dryRunScripts.addAScript(
+ pageURL,
+ {'inline': inline,
+ 'contents': content,
+ 'reason': reason,
+ 'url': url,
+ 'hash': hash
+ });
+};
+
+Script.prototype.tagAsAccepted = function(pageURL, reason, hash) {
+ var content = this.findContentType();
+ var inline = (this.url != undefined) ? false : true;
+ var url = (inline == false ? this.url : null);
+ console.debug("url is", url);
+ this.element.setAttribute('data-librejs', 'accepted');
+ this.element.setAttribute('data-librejs-reason', reason);
+
+ acceptedScripts.addAScript(
+ pageURL,
+ {'inline': inline,
+ 'contents': content,
+ 'reason': reason,
+ 'url': url,
+ 'hash': hash
+ });
+
+};
+
+Script.prototype.tagAsRemoved = function(pageURL, reason, hash) {
+ var content = this.findContentType();
+ var inline = (this.url != undefined) ? false : true;
+ var url = (inline == false ? this.url : null);
+ this.element.setAttribute('data-librejs', 'rejected');
+ this.element.setAttribute('data-librejs-reason', reason);
+ console.debug("tagAsRemoved hash is", hash);
+ removedScripts.addAScript(pageURL, {
+ 'inline': inline,
+ 'contents': content,
+ 'reason': reason,
+ 'url': url,
+ 'hash': hash
+ });
+
+};
+
+Script.prototype.tagAsDryRun = function(pageURL, reason, hash) {
+ var content = this.findContentType();
+ var inline = (this.url != undefined) ? false : true;
+ var url = (inline == false ? this.url : null);
+ this.element.setAttribute('data-librejs', 'dryrun');
+ this.element.setAttribute('data-librejs-reason', reason);
+
+ dryRunScripts.addAScript(
+ pageURL,
+ {'inline': inline,
+ 'contents': content,
+ 'reason': reason,
+ 'url': url,
+ 'hash': hash
+ });
+};
+
+/**
+ * removeNarcissusBugLine
+ *
+ * Removes the line that is appended to all
+ * inline scripts and prevent the bug that prevent
+ * script tags with comments only from being checked.
+ *
+ */
+Script.prototype.removeNarcissusBugLine = function(str) {
+ return str.replace('\n\nthis.narcissusBugFixLibreJS', '');
+};
+
+/**
+ * findContentType
+ *
+ * Figure out whether it's an external script,
+ * an inline script, or an attribute from the property
+ * that has been set, rather than blindly trusting the given
+ * constant.
+ */
+Script.prototype.findContentType = function() {
+ if (this.url != undefined) {
+ return "";
+ } else if (this.text != undefined) {
+ return this.element.text;
+ } else if (this.jsAttributes != undefined) {
+ // return the array.
+ return JSON.stringify(this.jsAttributes);
+ }
+};
+
+exports.Script = function(props) {
+ return new Script(props);
+};
diff --git a/lib/html_script_finder/dom_handler/script_properties.js b/lib/html_script_finder/dom_handler/script_properties.js
new file mode 100644
index 0000000..2eeeedb
--- /dev/null
+++ b/lib/html_script_finder/dom_handler/script_properties.js
@@ -0,0 +1,43 @@
+/**
+ * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+ * *
+ * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+exports.scriptTypes = {
+ INLINE: 0,
+ EXTERNAL: 1,
+ ATTRIBUTE: 2,
+ SINGLETON: 3
+};
+
+exports.statusTypes = {
+ UNCHECKED: 0,
+ CHECKED: 1,
+ ACCEPTED: 2,
+ REJECTED: 3,
+ JSWEBLABEL: 4
+};
+
+exports.reasons = {
+ 'FUNCTIONS_INLINE': 'This script is detected as inline, nonfree, defining functions or methods, and the rest of the page as loading external scripts',
+ 'FUNCTIONS_EXTERNAL': 'This script is detected as nonfree, external, and as defining functions or methods',
+ 'CONSTRUCT': 'This script is detected as nonfree and as defining nontrivial constructs',
+ 'FREE': 'This script is detected as free',
+ 'TRIVIAL': 'This script is detected as trivial',
+ 'TRIVIAL_NOT_ALLOWED': 'This script is detected as trivial, but trivial is not allowed here because of other scripts'
+};
diff --git a/lib/html_script_finder/html_parser.js b/lib/html_script_finder/html_parser.js
new file mode 100644
index 0000000..69b2acc
--- /dev/null
+++ b/lib/html_script_finder/html_parser.js
@@ -0,0 +1,158 @@
+/*
+ # ***** BEGIN LICENSE BLOCK *****
+ # Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ #
+ # The contents of this file are subject to the Mozilla Public License Version
+ # 1.1 (the "License"); you may not use this file except in compliance with
+ # the License. You may obtain a copy of the License at
+ # http://www.mozilla.org/MPL/
+ #
+ # Software distributed under the License is distributed on an "AS IS" basis,
+ # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ # for the specific language governing rights and limitations under the
+ # License.
+ #
+ # The Original Code is Microsummarizer.
+ #
+ # The Initial Developer of the Original Code is Mozilla.
+ # Portions created by the Initial Developer are Copyright (C) 2006
+ # the Initial Developer. All Rights Reserved.
+ #
+ # Contributor(s):
+ # Myk Melez <myk@mozilla.org> (Original Author)
+ # Simon Bünzli <zeniko@gmail.com>
+ # Asaf Romano <mano@mozilla.com>
+ # Dan Mills <thunder@mozilla.com>
+ # Ryan Flint <rflint@dslr.net>
+ #
+ # Alternatively, the contents of this file may be used under the terms of
+ # either the GNU General Public License Version 2 or later (the "GPL"), or
+ # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ # in which case the provisions of the GPL or the LGPL are applicable instead
+ # of those above. If you wish to allow use of your version of this file only
+ # under the terms of either the GPL or the LGPL, and not to allow others to
+ # use your version of this file under the terms of the MPL, indicate your
+ # decision by deleting the provisions above and replace them with the notice
+ # and other provisions required by the GPL or the LGPL. If you do not delete
+ # the provisions above, a recipient may use your version of this file under
+ # the terms of any one of the MPL, the GPL or the LGPL.
+ #
+ # ***** END LICENSE BLOCK *****
+ */
+
+/*
+ * The original file is located here:
+ * http://mxr.mozilla.org/mozilla/source/browser/components/microsummaries/src/nsMicrosummaryService.js?raw=1
+ *
+ */
+
+/**
+ * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+ * *
+ * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+/**
+ * html_parser
+ *
+ * Takes in an http response (string), loads it into a secured iframe
+ * so that it can be manipulated as a DOM object. It then returns a
+ * modified string to be passed along as a replacement of the original
+ * response.
+ *
+ */
+
+var {Cc, Ci, Cu} = require("chrome");
+
+var domHandlerModule = require("html_script_finder/dom_handler");
+
+const PR_UINT32_MAX = 2147483647;
+
+
+exports.htmlParser = function () {
+
+ return {
+ charset: null,
+ htmlText: null,
+ pageURL: null,
+ fragment: null,
+ contentType: null,
+ responseStatus: null,
+
+ parse: function (htmlText, charset, contentType, url, fragment,
+ responseStatus, parseResult) {
+
+ // DOMParser still has too many issues.
+ this.htmlText = htmlText;
+ this.charset = charset;
+
+ if (this.charset === "" || this.charset === undefined) {
+ this.charset = "utf-8";
+ }
+ this.contentType = contentType;
+ this.pageURL = url;
+ this.fragment = fragment;
+ this.responseStatus = responseStatus;
+ var that = this;
+
+ var domParser = Cc["@mozilla.org/xmlextras/domparser;1"].
+ createInstance(Ci.nsIDOMParser);
+
+ var dom = domParser.parseFromString(this.htmlText, this.contentType);
+ // console.debug(dom.getElementsByTagName('body')[0].innerHTML);
+ domHandlerModule.domHandler(dom, this.pageURL, this.fragment, this.responseStatus, function (newDom) {
+ parseResult(that.serializeToStream(newDom, that));
+ });
+
+ },
+
+ /**
+ * serializeToStream
+ * Serializes an HTML DOM into a binary stream. Uses
+ * nsIDOMSerializer only as a backup to when the
+ * reconstituteHtmlString method fails (not sure if/when it
+ * happens).
+ * @param dom obj Reference to the dom object
+ * @param that obj Reference to the object returned by htmlParser.
+ * This allows to give access to the iframe.
+ * @return a binary stream.
+ */
+ serializeToStream: function (dom, that) {
+
+ var newData, len;
+
+ try {
+ var storageStream = Cc["@mozilla.org/storagestream;1"].createInstance(Ci.nsIStorageStream);
+ var binaryOutputStream = Cc["@mozilla.org/binaryoutputstream;1"].createInstance(Ci.nsIBinaryOutputStream);
+ var serializer = Cc["@mozilla.org/xmlextras/xmlserializer;1"].createInstance(Ci.nsIDOMSerializer);
+ var encoder = Cc["@mozilla.org/layout/documentEncoder;1?type=" + this.contentType]
+ .createInstance(Ci.nsIDocumentEncoder);
+
+ encoder.setCharset(this.charset);
+ encoder.init(dom, this.contentType, 0);
+ storageStream.init(8192, PR_UINT32_MAX, null);
+
+ binaryOutputStream.setOutputStream(storageStream.getOutputStream(0));
+ encoder.encodeToStream(binaryOutputStream);
+ return storageStream;
+ } catch (e) {
+ console.debug('issue with serializer', e, e.lineNumber);
+ }
+ }
+
+ };
+};
diff --git a/lib/html_script_finder/url_seen_tester.js b/lib/html_script_finder/url_seen_tester.js
new file mode 100644
index 0000000..d2f38a9
--- /dev/null
+++ b/lib/html_script_finder/url_seen_tester.js
@@ -0,0 +1,78 @@
+/**
+ * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+ * *
+ * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+var httpRe = /^(http:)/i;
+var httpsRe = /^(https:)/i;
+
+exports.urlSeenTester = {
+ whitelist: {},
+
+ httpToHttps: function (url) {
+ try {
+
+ if (httpRe.test(url)) {
+
+ return url.replace(httpRe, 'https:');
+
+ } else if (httpsRe.test(url)) {
+
+ return url.replace(httpsRe, 'http:');
+
+ } else {
+
+ return url;
+
+ }
+ } catch (x) {
+ console.debug('error', x);
+ }
+ },
+
+ clearUrls: function () {
+ this.whitelist = {};
+ },
+
+ clearUrl: function (url) {
+ if (this.whitelist[url]) {
+
+ // console.debug('disallowing', url);
+ delete this.whitelist[url];
+ }
+ },
+
+ addUrl: function (url) {
+ console.debug('adding', url);
+
+ if (!this.isWhitelisted(url)) {
+
+ console.debug('allowing', url);
+ this.whitelist[url] = true;
+ }
+ },
+
+ isWhitelisted: function (url) {
+ if (this.whitelist[url] || this.whitelist[this.httpToHttps(url)]) {
+ console.debug('found to be whitelisted', url);
+ return true;
+ }
+ return false;
+ }
+
+};
diff --git a/lib/html_script_finder/web_labels/find_js_labels.js b/lib/html_script_finder/web_labels/find_js_labels.js
new file mode 100644
index 0000000..6404c49
--- /dev/null
+++ b/lib/html_script_finder/web_labels/find_js_labels.js
@@ -0,0 +1,131 @@
+/**
+ * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+ * *
+ * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+/**
+ * This file works in conjunction with lib/html_script_finder/js_web_labels.js
+ * to find mentions of external JavaScript files and their license information.
+ * This allows the dom_handler to allow them by default.
+ */
+
+/**
+ * @param {Array} licenses - An array of html nodes.
+ *
+ * @return {Array} - An array of simple license objects.
+ */
+function getLicensesArrayFromElements(licenses) {
+ var a = [];
+ // Convert the html node into a simpler object
+ for (var i = 0; i < licenses.length; i++) {
+ a.push({
+ licenseName: licenses[i].textContent,
+ licenseUrl: licenses[i].href
+ });
+ }
+ return a;
+}
+
+/**
+ * @param {Array} sources - An array of html nodes.
+ *
+ * @return {Array} - An array of simple source objects.
+ */
+function getSourcesArrayFromElements(sources) {
+ var a = [];
+ for (var i = 0; i < sources.length; i++) {
+ a.push({
+ sourceName: sources[i].textContent,
+ sourceUrl: sources[i].href
+ });
+ }
+ return a;
+}
+
+// find table.
+exports.getLicenseList = function(document) {
+ var tbl = document.getElementById('jslicense-labels1');
+ var jsList = [];
+ var i = 0;
+ var le;
+ var rows;
+ var link;
+ var fileCell;
+ var licenseCell;
+ var sourceCell;
+ var row;
+
+ if (tbl) {
+ try {
+ rows = tbl.getElementsByTagName('tr');
+ le = rows.length;
+ var mockElem = {textContent: 'Unknown', href: 'Unknown'};
+ // loop through rows, and add each valid element to
+ // the array.
+ for (; i < le; i++) {
+ row = rows[i].getElementsByTagName('td');
+
+ // Find script url
+ if (row[0] && row[0].getElementsByTagName('a')[0]) {
+ fileCell = row[0].getElementsByTagName('a')[0];
+ } else {
+ fileCell = mockElem;
+ }
+
+ // 'licenses' and 'sources' will, for normal cases, just
+ // contain one element. If the fileCell is pointing to a
+ // combined JS file with multiple licenses, though, these
+ // arrays will contain multiple elements.
+
+ // Find license info
+ var licenses = [mockElem];
+ if (row[1] && row[1].getElementsByTagName('a').length > 0) {
+ licenses = getLicensesArrayFromElements(
+ row[1].getElementsByTagName('a'));
+ }
+
+ // Find original source info
+ var sources = [mockElem];
+ if (row[2] && row[2].getElementsByTagName('a').length > 0) {
+ sources = getSourcesArrayFromElements(
+ row[2].getElementsByTagName('a'));
+ }
+
+ if (fileCell.href !== 'Unknown') {
+ jsList.push({
+ 'fileName': fileCell.textContent,
+ 'fileUrl': fileCell.href,
+
+ // we'll fill this with value when needed to compare
+ // script.
+ 'fileHash': null,
+
+ 'licenses': licenses,
+ 'sources': sources
+ });
+ }
+ }
+ } catch (e) {
+ console.debug(
+ 'Error fetching JS Web Label licenses',
+ e, e.lineNumber, e.fileName, 'index is', i);
+ }
+ }
+
+ return jsList;
+};
diff --git a/lib/html_script_finder/web_labels/js_web_labels.js b/lib/html_script_finder/web_labels/js_web_labels.js
new file mode 100644
index 0000000..b3fe063
--- /dev/null
+++ b/lib/html_script_finder/web_labels/js_web_labels.js
@@ -0,0 +1,279 @@
+/**
+ * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+ * *
+ * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+// node.js url module. Makes it easier to resolve
+// urls in that datauri loaded dom
+var urlHandler = require("url_handler/url_handler");
+var {Cc, Ci, Cu, Cm, Cr} = require("chrome");
+var data = require("sdk/self").data;
+
+// license definitions, we are using canonical urls and license
+// identifiers.
+var licenses = require('js_checker/license_definitions').licenses;
+
+var getLicenseList = require('html_script_finder/web_labels/find_js_labels')
+ .getLicenseList;
+const types = require("js_checker/constant_types");
+
+const addToCache = require("html_script_finder/web_labels/script_hash_worker")
+ .addToCache;
+
+// keep web labels in memory so that they can be checked even when they
+// are embedded dynamically.
+var jsWebLabelEntries = {};
+
+// store the url to js web labels already visited during this session
+var jsLabelsPagesVisited = {};
+
+var WebLabelFinder = function () {
+ this.dom = null;
+ this.pageURL = null;
+ this.jslicenseURL = null;
+ this.pageContent = null;
+ this.licenseList = null;
+ this.callback = null;
+};
+
+WebLabelFinder.prototype.init = function(dom, pageURL, callback) {
+ var that = this;
+ this.pageURL = pageURL;
+ this.dom = dom;
+ this.callback = function (a) {
+ if (typeof a === 'undefined') {
+ a = null;
+ }
+
+ // rewrite callback as soon as it is triggered once.
+ that.callback = function () {
+ console.debug("Callback already called");
+ };
+
+ callback(a);
+ };
+ this.findJavaScriptLicenses();
+ this.pageContent = '';
+ this.jslicenseURL = '';
+};
+
+WebLabelFinder.prototype.findJavaScriptLicenses = function () {
+ this.searchForJsLink();
+
+ if (this.jslicenseURL && !(jsLabelsPagesVisited[this.jslicenseURL])) {
+ // get content from license page.
+ console.debug('called fetch license page for', this.jslicenseURL);
+ this.pageContent = this.fetchLicensePage();
+ } else {
+ console.debug(this.jslicenseURL, "already visited");
+ this.callback();
+ }
+};
+
+WebLabelFinder.prototype.searchForJsLink = function() {
+ console.debug('triggered searchForJsLink');
+ if (this.dom) {
+ var linkTags = this.dom.getElementsByTagName('a'),
+ i = 0,
+ len = linkTags.length,
+ path;
+
+ // loop through all a tags.
+ for (; i < len; i++) {
+ if (
+ (linkTags[i].hasAttribute('rel') &&
+ linkTags[i].getAttribute('rel') === 'jslicense') ||
+ (linkTags[i].hasAttribute('data-jslicense') &&
+ linkTags[i].getAttribute('data-jslicense') === '1')
+ ) {
+ // This page has a web labels link
+ return this.formatURL(linkTags[i]);
+ }
+ }
+ }
+
+ // no js web labels were found. call back.
+ this.callback();
+ return false;
+};
+
+WebLabelFinder.prototype.formatURL = function(link) {
+ this.jslicenseURL = urlHandler.resolve(this.pageURL, link.href);
+ this.jslicenseURL = urlHandler.addFragment(this.jslicenseURL, 'librejs=true');
+ console.debug('license URL found', this.jslicenseURL);
+ return this.jslicenseURL;
+};
+
+WebLabelFinder.prototype.fetchLicensePage = function() {
+ var that = this;
+ try {
+ var req = Cc["@mozilla.org/xmlextras/xmlhttprequest;1"].createInstance();
+
+ req.onload = function() {
+ console.debug("Fetching License!");
+ console.debug("URL is ", this._url);
+
+ that.licenseList = getLicenseList(this.responseXML);
+ console.debug("the license list", that.licenseList);
+ that.matchListWithDefs(this._url);
+
+ // add these entries to the global
+ // object for dynamically embedded scripts.
+ jsWebLabelEntries[that.pageURL] = that.licenseList;
+ jsLabelsPagesVisited[req._url] = 1;
+ };
+ console.debug(this.jslicenseURL);
+ req.open('GET', this.jslicenseURL, true);
+ req._url = this.jslicenseURL;
+ req.responseType = "document";
+ req.send();
+ } catch (e) {
+ console.debug(e, e.lineNumber, e.fileName, this.jslicenseURL);
+ this.callback({});
+ }
+};
+
+/**
+ * @method isLicenseFree
+ * Returns true if the given web labels row refers to a script that
+ * can be executed by LibreJS.
+ *
+ * This method has some side effects :-/
+ *
+ * @param {Object} lic - A license node from a JS web labels page. It's
+ * expected to contain one or more licenses.
+ * @return {Boolean}
+ */
+WebLabelFinder.prototype.isLicenseFree = function(
+ lic, jslicenseURL, callback
+) {
+ // For each license that this license row contains.
+ var isFree = false;
+ // licenseStatuses is later used to determine isFree.
+ var licenseStatuses = [];
+
+ for (var i = 0; i < lic.licenses.length; i++) {
+ var license;
+ var found = false;
+
+ // For each license from the internal license definitions
+ for (license in licenses) {
+ if (found === true) {
+ break;
+ }
+ var licDef = licenses[license];
+ var licArray = [];
+ if (!licDef.canonicalUrl) {
+ continue;
+ }
+ if (typeof licDef.canonicalUrl === 'string') {
+ licArray = [licDef.canonicalUrl];
+ } else {
+ licArray = licDef.canonicalUrl;
+ }
+
+ // For each of the canonical URLs recognized by this license
+ // definition
+ for (var j = 0; j < licArray.length; j++) {
+ if (urlHandler.removeFragment(licArray[j]) ===
+ urlHandler.removeFragment(lic.licenses[i].licenseUrl)
+ ) {
+ if (!require("sdk/url").isValidURI(lic.fileUrl)) {
+ console.debug(lic.fileUrl, " is not a valid URL");
+ callback();
+ }
+
+ // This license was recognized, and it was free. Add it
+ // to the array of license status, which we'll look at
+ // when we're done with this web label row.
+ licenseStatuses.push(true);
+
+ console.debug("about TO ADD TO XHR: ", lic.fileUrl);
+ this.listCheck[lic.fileUrl] = 0;
+ addToCache(lic, 0, jslicenseURL, callback);
+
+ // Break out of the nearest two loops cause we found
+ // a matching license
+ found = true;
+ break;
+ }
+ }
+ }
+ }
+
+ // Tally up the licenses we were able to match.
+ if (licenseStatuses.length > 0 &&
+ // If the number of licenses we matched is at least one, and
+ // it's the same number as the number of licenses in this Web
+ // Label column, only then can we recognize this script as free.
+ // licenseStatus.length should never be larger than
+ // lic.licenses.length.
+ licenseStatuses.length >= lic.licenses.length
+ ) {
+ isFree = true;
+ }
+
+ return isFree;
+};
+
+WebLabelFinder.prototype.matchListWithDefs = function(jslicenseURL) {
+ var that = this;
+ var licDef,
+ license, script;
+ var cacheCalls = 0;
+ this.listCheck = {};
+
+ // nested loop.
+ cacheCalls = 0;
+ var callback = function (url) {
+ cacheCalls++;
+ that.listCheck[url] = 1;
+ if (cacheCalls === Object.keys(that.listCheck).length) {
+ console.debug("triggering callback duh");
+ // return array to requester object
+ callback = false;
+ that.callback(that.licenseList);
+ }
+ };
+ require("sdk/timers").setTimeout(function () {
+ // callback after 60 seconds if it's still not returned.
+ // using this as a safeguard.
+ // return array to requester object
+ if (callback !== false) {
+ that.callback(that.licenseList);
+ console.debug(that.listCheck);
+ }
+ }, 15000);
+
+
+
+ for (var i = 0; i < this.licenseList.length; i++) {
+ // this.licenseList[i] is the web labels license column
+ var lic = this.licenseList[i];
+ if (this.isLicenseFree(lic, jslicenseURL, callback)) {
+ lic.free = true;
+ }
+ }
+};
+
+exports.WebLabelFinder = WebLabelFinder;
+
+// Store the web labels harvested across webpages (only in this session).
+exports.jsWebLabelEntries = jsWebLabelEntries;
+
+exports.jsLabelsPagesVisited = jsLabelsPagesVisited;
diff --git a/lib/html_script_finder/web_labels/script_hash_worker.js b/lib/html_script_finder/web_labels/script_hash_worker.js
new file mode 100644
index 0000000..6d8d837
--- /dev/null
+++ b/lib/html_script_finder/web_labels/script_hash_worker.js
@@ -0,0 +1,62 @@
+/**
+ * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+ * *
+ * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+const types = require("js_checker/constant_types");
+const scriptsCached = require("script_entries/scripts_cache").scriptsCached;
+const xhr = require('html_script_finder/dom_handler/dom_checker').xhr;
+const timers = require("sdk/timers");
+
+exports.addToCache = function (lic, delay, jsWebLabelsURL, callback) {
+ console.debug("jslicenseURL is", jsWebLabelsURL);
+ if (typeof delay === 'undefined') {
+ delay = 0;
+ }
+
+ // get file hash and store as cached.
+ console.debug('performing xhr for', lic.fileUrl);
+ timers.setTimeout(function() {
+ var cb = function (script, contents) {
+ try {
+ // add a cache entry.
+ var hash = scriptsCached.addEntryIfNotCached(
+ contents,
+ types.freeWithComment(
+ 'This script is free according to a JS Web Labels ' +
+ 'page visited recently (at ' +
+ jsWebLabelsURL.replace("librejs=true", "") + ' )'
+ ),
+ {},
+ true,
+ lic.fileUrl
+ );
+ console.debug('returning xhr from', lic.fileUrl);
+ callback(lic.fileUrl);
+ } catch (e) {
+ callback(lic.fileUrl);
+ }
+ };
+ // just callback after 5 seconds if we don't get the answer yet.
+ timers.setTimeout(function() {
+ cb = function() {};
+ callback(lic.fileUrl); }, 20000);
+
+ xhr({'url': lic.fileUrl}, cb);
+ }, delay);
+};