aboutsummaryrefslogtreecommitdiffstats
path: root/lib/html_script_finder/dom_handler.js
diff options
context:
space:
mode:
authorNik Nyby <nikolas@gnu.org>2015-01-17 17:12:36 -0500
committerNik Nyby <nikolas@gnu.org>2015-01-17 17:12:36 -0500
commitada88090ead2c3b9d0804794c5f20f9b24d1c2b1 (patch)
tree2838a7eee6c5d74094216acebd86915e0ea1de42 /lib/html_script_finder/dom_handler.js
downloadlibrejsxul-ada88090ead2c3b9d0804794c5f20f9b24d1c2b1.tar.lz
librejsxul-ada88090ead2c3b9d0804794c5f20f9b24d1c2b1.tar.xz
librejsxul-ada88090ead2c3b9d0804794c5f20f9b24d1c2b1.zip
Import to new git repository
The old repository was using almost 100mb of space because of all the unnecessary files in the history. So I've imported the code to a new git repository. Unfortunately the history isn't viewable from this repository anymore. To see what happened with LibreJS before 2015, see the old Bazaar repo here: http://bzr.savannah.gnu.org/lh/librejs/
Diffstat (limited to 'lib/html_script_finder/dom_handler.js')
-rw-r--r--lib/html_script_finder/dom_handler.js571
1 files changed, 571 insertions, 0 deletions
diff --git a/lib/html_script_finder/dom_handler.js b/lib/html_script_finder/dom_handler.js
new file mode 100644
index 0000000..0f461d3
--- /dev/null
+++ b/lib/html_script_finder/dom_handler.js
@@ -0,0 +1,571 @@
+/**
+ * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+ * *
+ * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+/**
+ *
+ * dom_handler.js
+ *
+ * After the HTML DOM has been parsed, domHandler finds all the scripts
+ * on a page (including inline, on-page, and external files), and triggers the
+ * JavaScript analysis for each of them.
+ *
+ */
+
+var {Cc, Ci, Cu, Cm, Cr} = require("chrome");
+
+var scriptProperties =
+ require("html_script_finder/dom_handler/script_properties");
+
+const scriptTypes = scriptProperties.scriptTypes;
+const statusTypes = scriptProperties.statusTypes;
+const reasons = scriptProperties.reasons;
+
+var urlHandler = require("url_handler/url_handler");
+
+var WebLabelFinder =
+ require("html_script_finder/web_labels/js_web_labels").WebLabelFinder;
+
+// object model for script entries.
+var scriptObject = require("html_script_finder/dom_handler/script_object");
+
+var privacyChecker = require("js_checker/privacy_checker").privacyCheck;
+var jsChecker = require("js_checker/js_checker");
+const types = require("js_checker/constant_types");
+
+var checkTypes = types.checkTypes;
+
+var stripCDATAOpen = /<\!\[CDATA\[/gi;
+var stripCDATAClose = /]]>/g;
+
+var isDryRun = require("addon_management/prefchange").isDryRun;
+var allowedRef = require('http_observer/allowed_referrers').allowedReferrers;
+var attributeHelpers = require("html_script_finder/dom_handler/attributes");
+
+// javascript:*
+var jsInAttrRe = attributeHelpers.jsInAttrRe;
+
+// the list of all available event attributes
+var intrinsicEvents = attributeHelpers.intrinsicEvents;
+
+var domGatherer =
+ require("html_script_finder/dom_handler/dom_gatherer").domGatherer;
+var domChecker =
+ require("html_script_finder/dom_handler/dom_checker").domChecker;
+
+/**
+ * The DomHandler object takes a whole document,
+ * finds script elements within that DOM, analyzes them
+ * using the js_checker module and finally returns a cleaned
+ * DOM depending on the result.
+ */
+var DomHandler = function() {
+ // external object with methods used
+ // in DomHandler
+ this.domGatherer = null;
+
+ // external object with methods used
+ // in DomHandler
+ this.domChecker = null;
+
+ this.dom = null;
+ this.pageURL = null;
+
+ // fragment found in url.
+ this.fragment = null;
+
+ // array containing all scripts on a page.
+ this.domScripts = [];
+
+ // array containing all scripts on a page,
+ // data related to them, such as parse tree, ...
+ this.inlineScripts = [];
+
+ this.externalScripts = [];
+
+ // all scripts.
+ this.scripts = [];
+
+ // keeps track of the number of scripts.
+ this.numScripts = 0;
+
+ // store the reference to the callback method
+ // presumably from htmlParser.
+ this.callback = function() {};
+
+ // boolean set to true if external scripts are loaded
+ // from the html page.
+ this.loadsHtmlExternalScripts = false;
+
+ this.jsCheckString = null;
+
+ /* object containing boolean property set to false if trivialness
+ is not allowed anymore (if another script defines ajax requests,
+ ...) */
+ this.allowTrivial = null;
+
+ // boolean set to true if inline JavaScript
+ // is found to be free.
+ this.inlineJsFree = null;
+
+ // boolean set to true when at least one script
+ // has been removed.
+ this.hasRemovedScripts = null;
+
+ // boolean to check if scripts were removed
+ // prevents removeAllJs from running multiple times.
+ this.removedAllScripts = null;
+
+ // will eventually contain an array of data
+ // for the js web labels licenses.
+ this.licenseList = [];
+
+ // the response status for the page (200, 404, ...)
+ this.responseStatus = null;
+
+ // number of scripts fully tested.
+ this.scriptsTested = 0;
+
+ // number of external scripts to be tested.
+ this.numExternalScripts = null;
+
+ // number of inline/inattribute scripts
+ this.numInlineScripts = null;
+};
+
+/**
+ * Initialize properties of the object
+ *
+ * @param {domObject} obj A reference of the DOM object being
+ * analyzed.
+ *
+ * @param {pageURL} string The formatted URL (with fragment
+ * removed) of the corresponding page for this DOM
+ *
+ * @param {fragment} the #fragment from the url if applicable.
+ *
+ * @param {callback} the callback function.
+ *
+ */
+DomHandler.prototype.init = function(
+ domObject, pageURL, fragment, responseStatus, callback
+) {
+ // initialize object properties.
+
+ console.debug('init', pageURL);
+ var that = this;
+
+ this.reset();
+
+ // arguments passed.
+ this.dom = domObject;
+ this.pageURL = pageURL;
+ this.fragment = fragment;
+ this.responseStatus = responseStatus;
+
+ console.debug('in dom handler, responseStatus is', this.responseStatus);
+
+ // make callback function available
+ // for the entire object.
+ this.callback = function (dom) {
+ callback(dom);
+ that.destroy();
+ };
+};
+
+DomHandler.prototype.reset = function () {
+
+ this.dom = null;
+ // arrays.
+ this.onEventElement = [];
+ this.scriptStatus = [];
+ this.inlineScripts = [];
+ this.externalScripts = [];
+ this.scripts = [];
+
+ // booleans
+ this.allowTrivial = true;
+ this.inlineJsFree = false;
+ this.hasRemovedScripts = false;
+ this.removedAllScripts = false;
+
+ // we start with 0, and will increment in
+ // dom_checker.
+ this.numExternalScripts = 0;
+
+ this.numInlineScripts = 0;
+
+ this.scriptsTested = 0;
+
+};
+
+DomHandler.prototype.destroy = function () {
+ this.domGatherer = null;
+ this.domChecker = null;
+ /* destroy callback so that it can't be called multiple times. */
+ this.callback = function() {};
+ //this.reset();
+};
+
+DomHandler.prototype.scriptHasBeenTested = function() {
+ this.scriptsTested++;
+ console.debug('incremented DomHandler.scriptsTested to',
+ this.scriptsTested);
+};
+
+/**
+ * scriptHasJsWebLabel
+ *
+ * Checks if a script was found earlier in a Js License Web Label
+ * table. See http://www.gnu.org/licenses/javascript-labels.html
+ * for more information.
+ *
+ */
+DomHandler.prototype.scriptHasJsWebLabel = function(script) {
+ if (this.licenseList) {
+
+ var url = urlHandler.resolve(this.pageURL, script.src),
+ i = 0,
+ len = this.licenseList.length;
+
+ console.debug('looking for web label');
+
+ for (; i < len; i++) {
+ if (this.licenseList[i].fileUrl === url &&
+ this.licenseList[i].free === true
+ ) {
+ console.debug('found something true');
+ console.debug(
+ this.licenseList[i].fileUrl, ' is found');
+ return true;
+ }
+ }
+ }
+ return false;
+};
+
+/**
+ * processScripts.
+ * Starts by looking for a js web labels page
+ * then calls the complete function, which runs
+ * the rest of the check.
+ */
+DomHandler.prototype.processScripts = function () {
+ var that = this;
+
+ // check for the existence of the
+ // js web labels first.
+ this.lookForJsWebLabels(function () {
+
+ // gather and check all script elements on
+ // page.
+ console.debug("Calling checkAllScripts");
+ that.checkAllScripts();
+
+ });
+
+};
+
+/**
+ * jsWebLabelsComplete
+ *
+ */
+DomHandler.prototype.checkAllScripts = function () {
+ try {
+ console.debug(
+ 'found in', this.pageURL, JSON.stringify(this.licenseList));
+ console.debug('checkAllScripts triggered async');
+
+ // use domGatherer to gather scripts.
+ this.domGatherer.findScripts();
+ this.domGatherer.gatherScriptsContent();
+ this.domGatherer.gatherIntrinsicEvents();
+
+ console.debug('fragment is', this.fragment);
+
+ if (
+ this.fragment === undefined ||
+ this.fragment === null ||
+ this.fragment.indexOf('librejs=true') < 0
+ ) {
+ try {
+
+ // use domChecker to check scripts.
+ console.debug("Calling checkAllInlineScripts");
+ this.domChecker.checkAllInlineScripts();
+ } catch (x) {
+ console.debug('error in domChecker:', x, x.lineNumber);
+ this.removeAllJs();
+ }
+ } else {
+ console.debug('This is a pageworker, removing all js');
+ // this is the Page Worker to find contact link
+ // just remove all the JS since we don't need it.
+ console.debug('fragment found, remove js');
+ this.removeAllJs();
+ }
+ } catch (x) {
+ console.debug('error', x, x.lineNumber, x.fileName);
+ }
+};
+
+/**
+ * lookForJsWebLabels
+ *
+ * Checks if a link to a js web label table exists.
+ * If it does, return an array of objects with the data
+ * gathered (script name, path, license name, url, ...)
+ *
+ */
+DomHandler.prototype.lookForJsWebLabels = function (completed) {
+ var that = this;
+ console.debug("calling lookForJsWebLabels");
+ if (this.fragment !== '#librejs=true') {
+ var webLabelFinder = new WebLabelFinder();
+ webLabelFinder.init(
+ this.dom,
+ this.pageURL,
+ function (licenseList) {
+ // assign array returned to property.
+ that.licenseList = licenseList;
+ console.debug("calling completed");
+ completed();
+ });
+ } else {
+ completed();
+ }
+};
+
+DomHandler.prototype.checkScriptForJsWebLabels = function(script) {
+ var scriptEntry;
+
+ if (this.hasSrc(script) && this.scriptHasJsWebLabel(script)) {
+ // This script is in the list of allowed scripts (through web labels)
+ scriptEntry = scriptObject.Script({
+ 'type': scriptTypes.EXTERNAL,
+ 'status': statusTypes.ACCEPTED,
+ 'element': script,
+ 'url': urlHandler.resolve(this.pageURL, script.src)
+ });
+
+ scriptEntry.tagAsAccepted(this.pageURL, reasons.FREE);
+ return true;
+ }
+};
+
+/**
+ * hasSrc
+ * Check the given script has an src attribute.
+ * @param script obj The script element.
+ * @return a string with the value of the src attribute.
+ */
+DomHandler.prototype.hasSrc = function(script) {
+ if (script.src) {
+ return script.src;
+ }
+ return false;
+};
+
+/**
+ * Uses relationChecker to guess whether the script only uses
+ * predefined functions/variables or interacts with other scripts
+ * (this is still very experimental and needs improvement.)
+ *
+ */
+DomHandler.prototype.removeScriptIfDependent = function (script) {
+ var nonWindowProps = script.tree.relationChecker.nonWindowProperties;
+
+ for (var entry in nonWindowProps) {
+ if (nonWindowProps[entry]) {
+ console.debug('script has non window properties.');
+ this.removeGivenJs(script, reasons.TRIVIAL_NOT_ALLOWED);
+ return true;
+ }
+ }
+};
+
+/**
+ * removeGivenJs
+ * Remove a single script from the DOM.
+ * @param script Obj The script element to be removed from the
+ * DOM.
+ *
+ */
+DomHandler.prototype.removeGivenJs = function (script, reason, singleton, hash) {
+ var commentedOut;
+ var isAllowed = allowedRef.urlInAllowedReferrers(this.pageURL);
+ console.debug("removing given js hash", hash);
+
+ if (script.status != statusTypes.REJECTED &&
+ script.status != statusTypes.JSWEBLABEL
+ ) {
+ console.debug('removing a', script.type);
+ if (script.type === scriptTypes.ATTRIBUTE &&
+ !isAllowed
+ ) {
+ this.removeGivenAttribute(script, reason);
+ return;
+ }
+ if (!isAllowed) {
+ // set invalid type if dry run off.
+ script.element.setAttribute('type', 'librejs/blocked');
+ // add entry as removed.
+ console.debug('removeGivenJs hash is', hash);
+ script.tagAsRemoved(this.pageURL, reason, hash);
+ } else {
+ script.element.setAttribute(
+ 'data-librejs-dryrun', 'librejs/blocked');
+ script.tagAsDryRun(this.pageURL, reason, hash);
+ }
+
+ if (singleton === true) {
+ // flag singletons.
+ script.element.setAttribute('data-singleton', 'true');
+ }
+
+ // remove src if dry run off.
+ if (script.element.getAttribute('src') !== undefined) {
+ script.element.setAttribute(
+ 'data-librejs-blocked-src',
+ script.element.getAttribute('src')
+ );
+ if (!isAllowed) {
+ script.element.removeAttribute('src');
+ }
+ }
+ if (isAllowed) {
+ comment_str = 'LibreJS: Script should be blocked, but page is whitelisted.';
+ script.status = statusTypes.ACCEPTED;
+ } else {
+ comment_str = 'LibreJS: script blocked.';
+ script.status = statusTypes.REJECTED;
+ }
+
+ commentedOut = this.dom.createComment(comment_str);
+ // add a comment for curious source readers.
+ script.element.parentNode.appendChild(commentedOut);
+ script.element.parentNode.insertBefore(commentedOut, script.element);
+ this.hasRemovedScripts = true;
+ }
+};
+
+DomHandler.prototype.removeGivenAttribute = function (script, reason) {
+ var i = 0,
+ le = script.jsAttributes.length;
+
+ console.debug('removing given attribute', script, reason);
+ script.element.setAttribute('data-librejs-blocked-event',
+ JSON.stringify(script.jsAttributes));
+
+ script.tagAsRemoved(this.pageURL, reason, script.hash || script.tree.hash);
+
+ // might need to be removed.
+ script.element.setAttribute('data-librejs-blocked-value', '');
+
+ if (!allowedRef.urlInAllowedReferrers(this.pageURL)) {
+ // only run if not in dry run mode.
+ for (; i < le; i++) {
+ console.debug('removing attribute', JSON.stringify(script.jsAttributes));
+ script.element.removeAttribute(script.jsAttributes[i].attribute);
+ }
+ } else {
+
+ }
+ this.hasRemovedScripts = true;
+};
+
+/**
+ * removeAllJs
+ * Loop through all scripts from top to bottom and add a type
+ * attribute 'librejs/blocked' to prevent their interpretation
+ * by the browser.
+ *
+ */
+DomHandler.prototype.removeAllJs = function (reason) {
+ // remove all js is useless from now on.
+ console.debug('removeAllJs');
+ this.hasRemovedScripts = true;
+
+ // removeAllJs needs not be run next time.
+ this.removedAllScripts = true;
+
+ try {
+ this.removeAllArray(this.scripts, reason);
+ this.callback(this.dom);
+ } catch (x) {
+ console.debug(
+ 'in removeAllJs method: ',
+ x,
+ 'number of scripts is',
+ this.numScripts
+ );
+ this.callback(this.dom);
+ }
+
+};
+
+DomHandler.prototype.removeAllArray = function(scriptArray, reason) {
+ var script, i = 0, le;
+ console.debug('removeAllArray');
+ try {
+ le = scriptArray.length;
+ // loop through all scripts.
+
+ for (; i < le; i++) {
+ script = scriptArray[i];
+ if (script.type === scriptTypes.INLINE ||
+ script.type === scriptTypes.EXTERNAL
+ ) {
+ this.removeGivenJs(script, reason);
+ }
+ else if (script.type === scriptTypes.ATTRIBUTE) {
+ this.removeGivenAttribute(script, reason);
+ }
+ }
+ } catch (e) {
+ this.callback("");
+ }
+
+};
+
+exports.DomHandler = DomHandler;
+
+/**
+ * exports.domHandler
+ * Instantiates a DomHandler and checks the DOM
+ * @param dom obj The given dom for analysis.
+ * @param pageURL string the URL for the page.
+ * @param callback function callback when all the work has been performed.
+ */
+exports.domHandler = function(
+ dom, pageURL, fragment, responseStatus, callback) {
+ console.debug("Creating domHandler");
+ var domHandler = new DomHandler();
+ domHandler.init(dom, pageURL, fragment, responseStatus, callback);
+
+ // use domGatherer methods.
+ domHandler.domGatherer = domGatherer(domHandler);
+
+ // use domChecker methods.
+ domHandler.domChecker = domChecker(domHandler);
+
+ // launch the whole process.
+ console.debug("Calling processScripts");
+ domHandler.processScripts();
+};