diff options
author | Nik Nyby <nikolas@gnu.org> | 2015-01-17 17:12:36 -0500 |
---|---|---|
committer | Nik Nyby <nikolas@gnu.org> | 2015-01-17 17:12:36 -0500 |
commit | ada88090ead2c3b9d0804794c5f20f9b24d1c2b1 (patch) | |
tree | 2838a7eee6c5d74094216acebd86915e0ea1de42 /lib/html_script_finder/dom_handler | |
download | librejsxul-ada88090ead2c3b9d0804794c5f20f9b24d1c2b1.tar.lz librejsxul-ada88090ead2c3b9d0804794c5f20f9b24d1c2b1.tar.xz librejsxul-ada88090ead2c3b9d0804794c5f20f9b24d1c2b1.zip |
Import to new git repository
The old repository was using almost 100mb of space because of all
the unnecessary files in the history. So I've imported the code to a
new git repository. Unfortunately the history isn't viewable from this
repository anymore. To see what happened with LibreJS before 2015, see
the old Bazaar repo here: http://bzr.savannah.gnu.org/lh/librejs/
Diffstat (limited to 'lib/html_script_finder/dom_handler')
-rw-r--r-- | lib/html_script_finder/dom_handler/attributes.js | 137 | ||||
-rw-r--r-- | lib/html_script_finder/dom_handler/dom_checker.js | 478 | ||||
-rw-r--r-- | lib/html_script_finder/dom_handler/dom_gatherer.js | 281 | ||||
-rw-r--r-- | lib/html_script_finder/dom_handler/request.js | 115 | ||||
-rw-r--r-- | lib/html_script_finder/dom_handler/script_object.js | 208 | ||||
-rw-r--r-- | lib/html_script_finder/dom_handler/script_properties.js | 43 |
6 files changed, 1262 insertions, 0 deletions
diff --git a/lib/html_script_finder/dom_handler/attributes.js b/lib/html_script_finder/dom_handler/attributes.js new file mode 100644 index 0000000..3e95bab --- /dev/null +++ b/lib/html_script_finder/dom_handler/attributes.js @@ -0,0 +1,137 @@ +/** + * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript. + * * + * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ + +// object model for script entries. +var scriptObject = require("html_script_finder/dom_handler/script_object"); + +var scriptProperties = require("html_script_finder/dom_handler/script_properties"); + +const scriptTypes = scriptProperties.scriptTypes; + +const statusTypes = scriptProperties.statusTypes; + +var jsInAttrRe = /javascript:/ig; + +// the list of all available event attributes +var intrinsicEvents = [ + "onload", + "onunload", + "onclick", + "ondblclick", + "onmousedown", + "onmouseup", + "onmouseover", + "onmousemove", + "onmouseout", + "onfocus", + "onblur", + "onkeypress", + "onkeydown", + "onkeyup", + "onsubmit", + "onreset", + "onselect", + "onchange"]; + +exports.jsInAttrRe = jsInAttrRe; +exports.intrinsicEvents = intrinsicEvents; + + +/** + * findJSinAttribute + * + * Looks for attributes containing 'javascript:' + * + */ +exports.findJSinAttribute = function (elem, callback) { + var i = 0, attrLen = elem.attributes.length; + + var attribPairs = []; + + for (; i < attrLen; i++) { + + //looping through all attributes in elem to look for "javascript:" + attrib = elem.attributes[i]; + + if (attrib.value.match(jsInAttrRe)) { + str = attrib.value.replace(jsInAttrRe, ''); + attribPairs.push({attribute: attrib.name, value: str}); + } + + } + + if (attribPairs.length > 0) { + // contains in attribute javascript. + scriptEntry = scriptObject.Script({'type': scriptTypes.ATTRIBUTE, + 'status': statusTypes.UNCHECKED, + 'element': elem, + 'jsAttributes': attribPairs + }); + + // push back to DOMHandler + callback(scriptEntry); + + } else { + callback(false); + } + +}; + +/** + * findOnJSAttribute. + * + * Look for attributes in on* + * + */ +exports.findOnJSAttribute = function (elem, callback) { + + var i = 0, eventsLen = intrinsicEvents.length; + + var attribPairs = []; + + for (; i < eventsLen; i++) { + + // looping through all on* attributes + if (elem.hasAttribute(intrinsicEvents[i])) { + + attribPairs.push({ + attribute: intrinsicEvents[i], + value: elem.getAttribute(intrinsicEvents[i]) + }); + + } + + } + if (attribPairs.length > 0) { + + console.debug('found an attribute', scriptTypes.ATTRIBUTE); + scriptEntry = scriptObject.Script({'type': scriptTypes.ATTRIBUTE, + 'status': statusTypes.UNCHECKED, + 'element':elem, + 'jsAttributes': attribPairs + }); + // Push back to DOMHandler. + // push back to DOMHandler + callback(scriptEntry); + + } else { + callback(false); + } +}; diff --git a/lib/html_script_finder/dom_handler/dom_checker.js b/lib/html_script_finder/dom_handler/dom_checker.js new file mode 100644 index 0000000..1a0f30e --- /dev/null +++ b/lib/html_script_finder/dom_handler/dom_checker.js @@ -0,0 +1,478 @@ +/** + * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript. + * * + * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ + +/** + * dom_checker.js + * + * checks scripts for nonfree/nontrivial. + * + */ + +var {Cc, Ci, Cu, Cm, Cr} = require("chrome"); +var timer = require("sdk/timers"); + +var scriptProperties = require("html_script_finder/dom_handler/script_properties"); +const scriptTypes = scriptProperties.scriptTypes; +const statusTypes = scriptProperties.statusTypes; +const reasons = scriptProperties.reasons; + +// ensure xhr won't create an infinite loop +// with html content. +var urlTester = require("html_script_finder/url_seen_tester").urlSeenTester; +var urlHandler = require("url_handler/url_handler"); + +var privacyChecker = require("js_checker/privacy_checker").privacyCheck; +var jsChecker = require("js_checker/js_checker"); + +const types = require("js_checker/constant_types"); +var checkTypes = types.checkTypes; +var stripCDATAOpen = /<\!\[CDATA\[/gi; +var stripCDATAClose = /]]>/g; + +const getHash = require("script_entries/scripts_cache").scriptsCached.getHash; + +var DomChecker = function() { + // reference to domHandler instance + // using this object. + this.d = null; +}; + +/** + * init + * + * assign a reference domHandler object + * to access/updates its properties. + * + */ +DomChecker.prototype.init = function(domHandler) { + "use strict"; + + this.d = domHandler; +}; + +DomChecker.prototype.destroy = function() { + "use strict"; + + this.d = null; +}; + +/** + * checkAllInlineScripts + * + * Sends all the inline/onpage scripts as a whole for a check and + * removes all scripts if nonfree nontrivial is found. + * + */ +DomChecker.prototype.checkAllInlineScripts = function() { + "use strict"; + + try { + var i = 0, len, script; + + if (typeof this.d.inlineScripts !== 'undefined' && + this.d.inlineScripts.length > 0 + ) { + script = this.d.inlineScripts.shift(); + console.debug("checking script for page", + this.d.pageURL + /*, JSON.stringify(script)*/); + if (this.d.removedAllScripts) { + // all js has already been removed. + // stop check. + console.debug("removed all"); + return; + } + + if (this.d.inlineJsFree === true) { + // add entry as accepted. + try { + hash = getHash(script.text); + script.tagAsAccepted(this.d.pageURL, reasons.FREE, hash); + } catch (e) { + console.debug(e); + } + } + + // even if page is free we need to check for allow trivial. + if (script.type === scriptTypes.INLINE) { + console.debug("analyzing script", script); + this.analyzeJs(script, + script.text, + this.checkSingleInlineScript.bind(this)); + } else if (script.type === scriptTypes.ATTRIBUTE) { + console.debug("analyzing inline script", script); + this.analyzeJs(script, + this.concatAttributes(script), + this.checkSingleElementAttributes.bind(this)); + } + } else { + // no more inline scripts. Switch to external scripts. + this.readyForExternal(); + } + } catch (x) { + console.debug('checkAllInlineScripts error', + x, x.lineNumber, x.fileName); + this.readyForExternal(); + } +}; + +DomChecker.prototype.concatAttributes = function(script) { + "use strict"; + var i = 0, + le = script.jsAttributes.length, + text = ""; + + // we concatenate all js in multiple attributes. + // because it's too much of a hassle to keep track + // otherwise. + for (; i < le; i++) { + text += script.jsAttributes[i].value + '\n'; + } + + return text; +}; + +/** + * + * check a single element with attributes + */ +DomChecker.prototype.checkSingleElementAttributes = function( + script, loadedScript, checker) { + "use strict"; + var check, value, + i = 0, + le = script.jsAttributes.length, + text = ""; + + try { + check = checker.parseTree.freeTrivialCheck; + script.tree = checker; + script.result = check; + script.status = statusTypes.CHECKED; + } catch (e) { + console.debug('problem checking inline scripts', e, e.lineNumber); + this.d.removeGivenJs(script); + } + + this.processInlineCheckResult(script, check, checker); +}; + +DomChecker.prototype.processInlineCheckResult = function( + script, check, checker) { + "use strict"; + console.debug("check.reason is", check.reason, "and type", check.type); + var hash = checker.hash; + + if (this.d.inlineJsFree === true) { + console.debug('tagging', script.text, 'as accepted', "with reason", check.reason); + script.tagAsAccepted(this.d.pageURL, this.d.freeReason + " -- " + check.reason, hash); + } + + // process the result. + if (check.type === checkTypes.FREE) { + // this is free. + console.debug('tagging', script.text, 'as accepted with reason', check.reason); + this.d.inlineJsFree = true; + this.d.freeReason = check.reason; + // add entry as accepted. + script.tagAsAccepted(this.d.pageURL, check.reason, hash); + } else if (check.type === checkTypes.FREE_SINGLE_ITEM) { + // accept this script. + console.debug("free single item, ", check.reason); + script.tagAsAccepted(this.d.pageURL, check.reason, hash); + } else if (check.type === checkTypes.NONTRIVIAL) { + console.debug("nontrivial hash is", hash); + if (this.d.inlineJsFree) { + // inline is free. So accept. + console.debug('tagging', script.text, 'as accepted'); + script.tagAsAccepted( + this.d.pageURL, + this.d.freeReason + ' -- ' + check.reason, + hash); + } else { + console.debug('tagging', script.text, 'as removed'); + this.d.removeGivenJs(script, check.reason, false, hash); + } + } else if (!this.d.inlineJsFree && + this.d.loadsHtmlExternalScripts && + check.type === checkTypes.TRIVIAL_DEFINES_FUNCTION + ) { + // nontrivial, because defines function and loads + // external scripts + console.debug('tagging', script.text, 'as removed'); + this.d.removeGivenJs(script, reasons.FUNCTIONS_INLINE, false, hash); + } else if (!this.d.loadsHtmlExternalScripts && + check === checkTypes.TRIVIAL_DEFINES_FUNCTION + ) { + console.debug("Tag as accepted doesn't load another external script"); + script.tagAsAccepted(this.d.pageURL, check.reason, hash); + } else if (check.type === checkTypes.TRIVIAL || + check.type === checkTypes.TRIVIAL_DEFINES_FUNCTION || + check.type === checkTypes.WHITELISTED + ) { + // add entry as accepted. + console.debug("Trivial accepted"); + script.tagAsAccepted(this.d.pageURL, check.reason, hash); + } + + // next inline script, if applicable. + this.checkAllInlineScripts(); +}; + +DomChecker.prototype.readyForExternal = function() { + "use strict"; + + console.debug('DomChecker.readyForExternal'); + // done with those inline scripts, continue with + // the rest. + this.checkExternalScripts(); +}; + +/** + * check a single inline script. + */ +DomChecker.prototype.checkSingleInlineScript = function( + script, loadedScript, checker) { + "use strict"; + var check, text; + + console.debug('DomChecker.checkSingleInlineScript'); + + try { + + check = checker.parseTree.freeTrivialCheck; + + // update status. + script.tree = checker; + script.result = check; + console.debug("script result is", check.type); + script.status = statusTypes.CHECKED; + + } catch (e) { + console.debug('problem checking inline scripts', e, e.lineNumber); + this.d.removeGivenJs(script, '', false, checker.hash); + } + + this.processInlineCheckResult(script, check, checker); + +}; + +/** + * checkExternalScripts + * Loop through series of external scripts, + * perform xhr to get their data, and check them + * to see whether they are free/nontrivial + * + */ +DomChecker.prototype.checkExternalScripts = function() { + "use strict"; + + console.debug('DomChecker.checkExternalScripts'); + + var i = 0; + var len = this.d.externalScripts.length; + var that = this; + + console.debug("externalScripts length", len); + if (this.d.removedAllScripts || len === 0) { + // all js has already been removed. + // stop check. + this.wrapUpBeforeLeaving(); + return; + } + + for (; i < len; i++) { + this.xhr( + this.d.externalScripts[i], + function(script, scriptText) { + console.debug("In xhr callback for script url:", script.url); + if (scriptText === false) { + that.d.removeGivenJs(script); + that.d.scriptHasBeenTested(); + that.externalCheckIsDone(); + return; + } + + console.debug('about to analyzeJS for script:', script.url); + that.analyzeJs( + script, + scriptText, + that.checkSingleExternalScript.bind(that)); + } + ); + } +}; + +DomChecker.prototype.wrapUpBeforeLeaving = function() { + "use strict"; + + console.debug("wrap up before leaving triggered"); + console.debug('wrapping up'); + this.d.callback(this.d.dom); + +}; + +DomChecker.prototype.analyzeJs = function(script, scriptText, callback) { + "use strict"; + console.debug('DomChecker.analyzeJs for script:', script.url); + try { + var checker = jsChecker.jsChecker(); + var url = ""; + if (typeof script.url !== "undefined") { + url = script.url; + } else { + url = this.pageURL; + } + checker.searchJs(scriptText, function() { + console.debug("Analyze JS"/*, JSON.stringify(checker)*/); + timer.setTimeout(function() { + callback(script, scriptText, checker); + }, 0); + }, url); + } catch (x) { + console.debug('error', x, x.lineNumber, x.fileName); + } +}; + +/** + * Check a single external script. + */ +DomChecker.prototype.checkSingleExternalScript = function( + script, loadedScript, checker +) { + "use strict"; + var check; + + console.debug('DomChecker.checkSingleExternalScript()'); + try { + check = checker.parseTree.freeTrivialCheck; + + script.tree = checker; + script.result = check; + console.debug('in checkSingleExternalScript, checker.hash is', + checker.hash); + if (script.status != statusTypes.JSWEBLABEL) { + script.status = statusTypes.CHECKED; + } + + if (check.type === checkTypes.FREE || + check.type === checkTypes.FREE_SINGLE_ITEM + ) { + // add entry as accepted. + script.tagAsAccepted(this.d.pageURL, check.reason, checker.hash); + } + + else if (check.type === checkTypes.NONTRIVIAL) { + console.debug("Removing given js", check.reason); + this.d.removeGivenJs(script, check.reason, false, checker.hash); + } + + else if (check.type === checkTypes.TRIVIAL || + check.type === checkTypes.WHITELISTED + ) { + // if it's accepted, allow. + script.tagAsAccepted(this.d.pageURL, check.reason, checker.hash); + } else { + // anything else is nontrivial. Including TRIVIAL_DEFINES_FUNCTION. + console.debug("checker hash for remove is ", checker.hash); + this.d.removeGivenJs( + script, reasons.FUNCTIONS_EXTERNAL, false, checker.hash); + } + + } catch (e) { + console.debug('error in checkExternalScript', + e, e.lineNumber, 'for script', script.url); + + this.d.removeAllJs(); + this.destroy(); + return; + } + console.debug('script url is', script.url, 'result is', script.result); + this.d.scriptHasBeenTested(); + this.externalCheckIsDone(); +}; + +DomChecker.prototype.externalCheckIsDone = function() { + "use strict"; + console.debug('DomChecker.externalCheckIsDone'); + + console.debug('scriptsTested is', this.d.scriptsTested); + console.debug('num external', this.d.numExternalScripts); + + if (this.d.scriptsTested >= this.d.numExternalScripts) { + console.debug('wrapping up external'); + this.wrapUpBeforeLeaving(); + } else { + var scriptsToCheck = this.d.numExternalScripts - this.d.scriptsTested; + console.debug('Not wrapping up! Waiting to check ' + scriptsToCheck + + ' more script(s)'); + + if (this.d.externalScripts[0]) { + console.debug('script 0 is', this.d.externalScripts[0]); + } + if (this.d.externalScripts[1]) { + console.debug('script 1 is', this.d.externalScripts[1]); + } + } +}; + +/** + * xhr + * Perform a XMLHttpRequest on the url given. + * @param url string A URL. + * @return The response text. + */ +DomChecker.prototype.xhr = function(script, responseCallback) { + "use strict"; + + var regex = /^text\/html/i; + var url = script.url; + + try { + // add url to whitelist. + urlTester.addUrl(url); + + // request module. Compatible with Https-Everywhere. + require('html_script_finder/dom_handler/request') + .request(script, responseCallback).request(); + } catch (x) { + console.debug('error', x, x.lineNumber, x.fileName); + responseCallback(script, false); + } +}; + +/** + * exports.domChecker + * Instantiate a brand new clone of the domChecker. + * @param dom obj The given dom for analysis. + * @param pageURL string the URL for the page. + * @param callback function callback when all the work has been performed. + */ +exports.domChecker = function(domHandler) { + "use strict"; + + var domChecker = new DomChecker(); + + domChecker.init(domHandler); + + return domChecker; +}; + +exports.xhr = new DomChecker().xhr; diff --git a/lib/html_script_finder/dom_handler/dom_gatherer.js b/lib/html_script_finder/dom_handler/dom_gatherer.js new file mode 100644 index 0000000..4fcee88 --- /dev/null +++ b/lib/html_script_finder/dom_handler/dom_gatherer.js @@ -0,0 +1,281 @@ +/** + * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript. + * * + * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ + +var scriptProperties = require("html_script_finder/dom_handler/script_properties"); + +const scriptTypes = scriptProperties.scriptTypes; +const scriptsCached = require("script_entries/scripts_cache").scriptsCached; + +const statusTypes = scriptProperties.statusTypes; +// object model for script entries. +var scriptObject = require("html_script_finder/dom_handler/script_object"); + +var urlHandler = require("url_handler/url_handler"); + +var attributeHelpers = require("html_script_finder/dom_handler/attributes"); + +// javascript:* +var jsInAttrRe = attributeHelpers.jsInAttrRe; + +// the list of all available event attributes +var intrinsicEvents = attributeHelpers.intrinsicEvents; + +var privacyChecker = require("js_checker/privacy_checker").privacyCheck; + +const types = require("js_checker/constant_types"); + +var checkTypes = types.checkTypes; + +// array reflex valid types as listed in +// http://mxr.mozilla.org/mozilla-central/source/content/base/src/nsScriptLoader.cpp#437 +// anything appended to end of strings is considered valid: +var jsValidTypes = [ + /^text\/javascript/i, + /^text\/ecmascript/i, + /^application\/javascript/i, + /^application\/ecmascript/i, + /^application\/x-javascript/i +]; + +var stripCDATAOpen = /<\!\[CDATA\[/gi; +var stripCDATAClose = /]]>/g; + +var stripHtmlCommentsInScript = function (s) { + s = s.replace(stripCDATAOpen, ''); + s = s.replace(stripCDATAClose, ''); + return s; +}; + + +// gather scripts and javascript in attributes across a dom object. +var DomGatherer = function() { + // domHandler object. + this.d = null; +}; + +/** + * init + * + * assign a reference domHandler object + * to access/updates its properties. + * + */ +DomGatherer.prototype.init = function (domHandler) { + this.d = domHandler; +}; + +/** + * scriptHasInvalidType + * + * Checks that a script does not have a js "template" type. + * Normally any script that has a type attribute other than the + * few allowed ones is not interpreted. But by security, we only + * discard a few of them. + * + * @param script obj The script element. + * @return returns true if it matches a template type. + * + */ +DomGatherer.prototype.scriptHasInvalidType = function (script) { + var i = 0, + le = jsValidTypes.length; + + var type = script.getAttribute('type'); + + if (type === 'librejs/blocked') { + // js has already been blocked. + return true; + } + + if (!type) { + // type isn't set, don't look further. + return false; + } + + for (; i < le; i++) { + if (jsValidTypes[i].test(type)) { + return false; + } + } + + // type is invalid and + // hence cannot be executed. + return true; +}; + +/** + * findScripts + * + * Assigns the array of scripts in the dom to a property + * as well as a number of scripts present for looping purposing. + */ +DomGatherer.prototype.findScripts = function() { + this.d.domScripts = this.d.dom.getElementsByTagName('script'); + this.d.numScripts = this.d.domScripts.length; +}; + +/** + * gatherIntrinsicEvents + * + * Fetches all the event attributes that might contain JavaScript + * as well as all element attributes that start with + * "javascript:". + * + */ +DomGatherer.prototype.gatherIntrinsicEvents = function() { + var i = 0, j, k, + all = this.d.dom.getElementsByTagName('*'), + max = all.length, + that = this, + attrLen, attrib, str, scriptEntry; + + for (; i < max; i++) { + // look for attributes with value javascript:* + attributeHelpers.findJSinAttribute( + all[i], + function (scriptEntry) { + if (scriptEntry !== false) { + + that.d.inlineScripts.push(scriptEntry); + that.d.scripts.push(scriptEntry); + + // add inline script in the count. + that.d.numInlineScripts++; + } + }); + + // look for attributes of on* (onLoad, ...) + attributeHelpers.findOnJSAttribute( + all[i], + function (scriptEntry) { + if (scriptEntry !== false) { + that.d.inlineScripts.push(scriptEntry); + that.d.scripts.push(scriptEntry); + + // add inline script in the count. + that.d.numInlineScripts++; + } + }); + } + +}; + +/** + * gatherScriptsContent + * + * Aggregate all content within on-page JavaScript code. + * Keep a list of all absolute urls to external scripts. + * + */ +DomGatherer.prototype.gatherScriptsContent = function() { + var i = 0, currentScript = '', absolutePath, scriptEntry, + that = this; + try { + for (; i < this.d.numScripts; i++) { + if (this.d.checkScriptForJsWebLabels(this.d.domScripts[i])) { + //break; + absolutePath = urlHandler.resolve( + this.d.pageURL, this.d.domScripts[i].src); + scriptEntry = scriptObject.Script( + {'type': scriptTypes.EXTERNAL, + 'status': statusTypes.JSWEBLABEL, + 'element': this.d.domScripts[i], + 'url': absolutePath}); + scriptEntry.tree = {}; + + this.d.externalScripts.push(scriptEntry); + that.d.scripts.push(scriptEntry); + + this.d.loadsHtmlExternalScripts = true; + + // increment number of scripts found. + this.d.numExternalScripts++; + } + + // check that script has valid type + else if (!this.scriptHasInvalidType(this.d.domScripts[i])) { + + + if (this.d.hasSrc(this.d.domScripts[i]) && + !this.d.scriptHasJsWebLabel(this.d.domScripts[i])) { + + console.debug('an external script', this.d.domScripts[i]); + + absolutePath = urlHandler.resolve( + this.d.pageURL, this.d.domScripts[i].src); + scriptEntry = scriptObject.Script( + {'type': scriptTypes.EXTERNAL, + 'status': statusTypes.UNCHECKED, + 'element': this.d.domScripts[i], + 'url': absolutePath}); + this.d.externalScripts.push(scriptEntry); + that.d.scripts.push(scriptEntry); + + this.d.loadsHtmlExternalScripts = true; + + // increment number of scripts found. + this.d.numExternalScripts++; + + } else if (privacyChecker.checkScriptPrivacyThreat(this.d.domScripts[i].text)) { + this.d.removeGivenJs(scriptObject.Script( + {'type': scriptTypes.SINGLETON, + 'status': statusTypes.UNCHECKED, + 'element': this.d.domScripts[i], + 'text': this.d.domScripts[i].text + }), '', true); + } else if (this.d.domScripts[i].text !== '') { + // using else if since script text is + // ignored if src attribute is set. + // adding this.narcissusBugFixLibreJS to fix comment bug. + var bugfix = require('html_script_finder/bug_fix').narcissusBugFixLibreJS; + currentScript = stripHtmlCommentsInScript(this.d.domScripts[i].text + bugfix); + + scriptEntry = scriptObject.Script( + {'type': scriptTypes.INLINE, + 'status': statusTypes.UNCHECKED, + 'element': this.d.domScripts[i], + 'text': currentScript}); + this.d.inlineScripts.push(scriptEntry); + this.d.scripts.push(scriptEntry); + + // add inline script in the count. + this.d.numInlineScripts++; + } + } + } + } catch (e) { + // Any problem arising, we remove the script. + console.debug('problem gathering scripts', e, e.lineNumber); + this.d.removeAllJs(); + } +}; + +/* + * exports.domGatherer + * Instantiate a brand new clone of the domGatherer. + * @param dom obj The given dom for analysis. + * @param pageURL string the URL for the page. + * @param callback function callback when all the work has been performed. + */ +exports.domGatherer = function (domHandler) { + var dg = new DomGatherer(); + dg.init(domHandler); + return dg; +}; diff --git a/lib/html_script_finder/dom_handler/request.js b/lib/html_script_finder/dom_handler/request.js new file mode 100644 index 0000000..7f217ef --- /dev/null +++ b/lib/html_script_finder/dom_handler/request.js @@ -0,0 +1,115 @@ +/** + * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript. + * * + * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ + +var timer = require("sdk/timers"); + +var {Cc, Ci, Cu, Cm, Cr} = require("chrome"); +var {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm"); + + +// ensure xhr won't create an infinite loop +// with html content. +var urlTester = require("html_script_finder/url_seen_tester").urlSeenTester; +var urlHandler = require("url_handler/url_handler"); +const scriptsCached = require("script_entries/scripts_cache").scriptsCached; + +var Request = function() { + this.url = null; + this.channel = null; + this.script = null; + this.responseCallback = null; +}; + +/** + * init + */ +Request.prototype.init = function(script, callback) { + this.script = script; + // set initial url + this.url = this.script.url; + + console.debug('In Request.init() for url:', this.url); + + this.responseCallback = callback; + + var iOService = Cc["@mozilla.org/network/io-service;1"] + .getService(Ci.nsIIOService); + + this.channel = iOService.newChannel(this.url, null, null); +}; + +Request.prototype.request = function() { + var that = this; + var responseReceived = function (data) { + that.responseCallback(that.script, data); + }; + try { + this.channel.asyncOpen({ + QueryInterface: XPCOMUtils.generateQI( + [Ci.nsIRequestObserver, Ci.nsIStreamListener]), + data: "", + charset: null, + + onStartRequest: function(request, context) { + this.charset = request.contentCharset || "UTF-8"; + }, + + onDataAvailable: function (request, context, stream, offset, count) { + try { + var binaryInputStream = Cc["@mozilla.org/binaryinputstream;1"] + .createInstance(Ci.nsIBinaryInputStream); + binaryInputStream.setInputStream(stream); + var data = binaryInputStream.readBytes(count); + this.data += data; + } catch (x) { + console.debug('error in request', x, x.lineNumber); + responseReceived(""); + } + }, + + onStopRequest: function (request, context, result) { + try { + if (this.charset.toLowerCase() != "utf-8") { + var uConv = Cc["@mozilla.org/intl/utf8converterservice;1"] + .createInstance(Ci.nsIUTF8ConverterService); + + this.data = uConv.convertStringToUTF8( + this.data, this.charset, true); + } + } catch (e) { + console.debug("Issue with nsIUTF8ConverterService", e); + console.debug("Charset was", this.charset); + responseReceived(""); + } + responseReceived(this.data); + } + }, null); + } catch(e) { + console.debug("asyncOpen exception", e); + responseReceived(""); + } +}; + +// Instantiate a Request +exports.request = function(script, callback) { + var obj = new Request(); + obj.init(script, callback); + return obj; +}; diff --git a/lib/html_script_finder/dom_handler/script_object.js b/lib/html_script_finder/dom_handler/script_object.js new file mode 100644 index 0000000..5431cf6 --- /dev/null +++ b/lib/html_script_finder/dom_handler/script_object.js @@ -0,0 +1,208 @@ +/** + * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript. + * * + * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ + +var removedScripts = require("script_entries/removed_scripts").removedScripts; + +var acceptedScripts = require("script_entries/accepted_scripts") + .acceptedScripts; +var dryRunScripts = require("script_entries/dryrun_scripts").dryRunScripts; + +var Script = function(props) { + // can be an attribute, an inline script, + // or an external script. + this.type = null; + + /* + * Script.status - The script's current status. + * + * Possible values are: + * + * 0 - unchecked + * 1 - checked + * 2 - accepted + * 3 - rejected + * 4 - jsweblabel + * + * See script_properties.js for definitions. + */ + this.status = null; + + // contains the dom element + this.element = null; + + // the attribute name, if applicable. + this.attribute = null; + + // the script text as a string. + this.value = null; + + // the src url if external. + this.url = null; + + // the script text if inline. + this.text = null; + + this.init(props); +}; + +Script.prototype.init = function(props) { + // check the required elements are present. + if (typeof props === 'undefined') { + throw "Error, missing script entry value in script_object.js"; + } + + // required properties + if (typeof props.type !== 'undefined') { + this.type = props.type; + } else { + throw "type is missing"; + } + + if (typeof props.status !== 'undefined') { + this.status = props.status; + } else { + throw "status is missing"; + } + + if (typeof props.element !== 'undefined') { + this.element = props.element; + } else { + throw "element is missing"; + } + + // conditional properties. + this.url = (props.url) ? props.url : null; + this.text = (props.text) ? props.text : null; + this.jsAttributes = (props.jsAttributes) ? props.jsAttributes : null; + + if (typeof this.text !== 'string' && + this.tree !== null && + typeof this.tree === 'object' && + this.tree.hasOwnProperty('jsCode') + ) { + this.text = this.tree.jsCode; + } +}; + +Script.prototype.tagAsDryRun = function(pageURL, reason, hash) { + var content = this.findContentType(); + var inline = (this.url != undefined) ? false : true; + var url = (inline == false ? this.url : null); + console.debug("url is", url); + this.element.setAttribute('data-librejs', 'dryrun'); + this.element.setAttribute('data-librejs-reason', reason); + + dryRunScripts.addAScript( + pageURL, + {'inline': inline, + 'contents': content, + 'reason': reason, + 'url': url, + 'hash': hash + }); +}; + +Script.prototype.tagAsAccepted = function(pageURL, reason, hash) { + var content = this.findContentType(); + var inline = (this.url != undefined) ? false : true; + var url = (inline == false ? this.url : null); + console.debug("url is", url); + this.element.setAttribute('data-librejs', 'accepted'); + this.element.setAttribute('data-librejs-reason', reason); + + acceptedScripts.addAScript( + pageURL, + {'inline': inline, + 'contents': content, + 'reason': reason, + 'url': url, + 'hash': hash + }); + +}; + +Script.prototype.tagAsRemoved = function(pageURL, reason, hash) { + var content = this.findContentType(); + var inline = (this.url != undefined) ? false : true; + var url = (inline == false ? this.url : null); + this.element.setAttribute('data-librejs', 'rejected'); + this.element.setAttribute('data-librejs-reason', reason); + console.debug("tagAsRemoved hash is", hash); + removedScripts.addAScript(pageURL, { + 'inline': inline, + 'contents': content, + 'reason': reason, + 'url': url, + 'hash': hash + }); + +}; + +Script.prototype.tagAsDryRun = function(pageURL, reason, hash) { + var content = this.findContentType(); + var inline = (this.url != undefined) ? false : true; + var url = (inline == false ? this.url : null); + this.element.setAttribute('data-librejs', 'dryrun'); + this.element.setAttribute('data-librejs-reason', reason); + + dryRunScripts.addAScript( + pageURL, + {'inline': inline, + 'contents': content, + 'reason': reason, + 'url': url, + 'hash': hash + }); +}; + +/** + * removeNarcissusBugLine + * + * Removes the line that is appended to all + * inline scripts and prevent the bug that prevent + * script tags with comments only from being checked. + * + */ +Script.prototype.removeNarcissusBugLine = function(str) { + return str.replace('\n\nthis.narcissusBugFixLibreJS', ''); +}; + +/** + * findContentType + * + * Figure out whether it's an external script, + * an inline script, or an attribute from the property + * that has been set, rather than blindly trusting the given + * constant. + */ +Script.prototype.findContentType = function() { + if (this.url != undefined) { + return ""; + } else if (this.text != undefined) { + return this.element.text; + } else if (this.jsAttributes != undefined) { + // return the array. + return JSON.stringify(this.jsAttributes); + } +}; + +exports.Script = function(props) { + return new Script(props); +}; diff --git a/lib/html_script_finder/dom_handler/script_properties.js b/lib/html_script_finder/dom_handler/script_properties.js new file mode 100644 index 0000000..2eeeedb --- /dev/null +++ b/lib/html_script_finder/dom_handler/script_properties.js @@ -0,0 +1,43 @@ +/** + * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript. + * * + * Copyright (C) 2011, 2012, 2013, 2014 Loic J. Duros + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ + +exports.scriptTypes = { + INLINE: 0, + EXTERNAL: 1, + ATTRIBUTE: 2, + SINGLETON: 3 +}; + +exports.statusTypes = { + UNCHECKED: 0, + CHECKED: 1, + ACCEPTED: 2, + REJECTED: 3, + JSWEBLABEL: 4 +}; + +exports.reasons = { + 'FUNCTIONS_INLINE': 'This script is detected as inline, nonfree, defining functions or methods, and the rest of the page as loading external scripts', + 'FUNCTIONS_EXTERNAL': 'This script is detected as nonfree, external, and as defining functions or methods', + 'CONSTRUCT': 'This script is detected as nonfree and as defining nontrivial constructs', + 'FREE': 'This script is detected as free', + 'TRIVIAL': 'This script is detected as trivial', + 'TRIVIAL_NOT_ALLOWED': 'This script is detected as trivial, but trivial is not allowed here because of other scripts' +}; |