1 files changed, 719 insertions, 0 deletions
diff --git a/data/chrome_worker/parser/jslex.js b/data/chrome_worker/parser/jslex.js
new file mode 100644
index 0000000..c5c2673
--- /dev/null
+++ b/data/chrome_worker/parser/jslex.js
@@ -0,0 +1,719 @@
+/* vim: set sw=4 ts=4 et tw=78: */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is the Narcissus JavaScript engine.
+ *
+ * The Initial Developer of the Original Code is
+ * Brendan Eich <brendan@mozilla.org>.
+ * Portions created by the Initial Developer are Copyright (C) 2004
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Tom Austin <taustin@ucsc.edu>
+ *   Brendan Eich <brendan@mozilla.org>
+ *   Shu-Yu Guo <shu@rfrn.org>
+ *   Stephan Herhut <stephan.a.herhut@intel.com>
+ *   Dave Herman <dherman@mozilla.com>
+ *   Dimitris Vardoulakis <dimvar@ccs.neu.edu>
+ *   Patrick Walton <pcwalton@mozilla.com>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/**
+ * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+ * *
+ * Copyright (C) 2011, 2012, 2014 Loic J. Duros
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see  <http://www.gnu.org/licenses/>.
+ *
+ */
+/*
+ * Narcissus - JS implemented in JS.
+ *
+ * Lexical scanner.
+ */
+
+"use strict";
+
+Narcissus.lexer = (function() {
+
+    var definitions = Narcissus.definitions;
+
+    //throw Error (definitions.consts);
+
+		       // Set constants in the local scope.
+		       //eval(definitions.consts);
+		       const END = 0,
+		       NEWLINE = 1,
+		       SEMICOLON = 2,
+		       COMMA = 3,
+		       ASSIGN = 4,
+		       HOOK = 5,
+		       COLON = 6,
+		       CONDITIONAL = 7,
+		       OR = 8,
+		       AND = 9,
+		       BITWISE_OR = 10,
+		       BITWISE_XOR = 11,
+		       BITWISE_AND = 12,
+		       EQ = 13,
+		       NE = 14,
+		       STRICT_EQ = 15,
+		       STRICT_NE = 16,
+		       LT = 17,
+		       LE = 18,
+		       GE = 19,
+		       GT = 20,
+		       LSH = 21,
+		       RSH = 22,
+		       URSH = 23,
+		       PLUS = 24,
+		       MINUS = 25,
+		       MUL = 26,
+		       DIV = 27,
+		       MOD = 28,
+		       NOT = 29,
+		       BITWISE_NOT = 30,
+		       UNARY_PLUS = 31,
+		       UNARY_MINUS = 32,
+		       INCREMENT = 33,
+		       DECREMENT = 34,
+		       DOT = 35,
+		       LEFT_BRACKET = 36,
+		       RIGHT_BRACKET = 37,
+		       LEFT_CURLY = 38,
+		       RIGHT_CURLY = 39,
+		       LEFT_PAREN = 40,
+		       RIGHT_PAREN = 41,
+		       SCRIPT = 42,
+		       BLOCK = 43,
+		       LABEL = 44,
+		       FOR_IN = 45,
+		       CALL = 46,
+		       NEW_WITH_ARGS = 47,
+		       INDEX = 48,
+		       ARRAY_INIT = 49,
+		       OBJECT_INIT = 50,
+		       PROPERTY_INIT = 51,
+		       GETTER = 52,
+		       SETTER = 53,
+		       GROUP = 54,
+		       LIST = 55,
+		       LET_BLOCK = 56,
+		       ARRAY_COMP = 57,
+		       GENERATOR = 58,
+		       COMP_TAIL = 59,
+		       IDENTIFIER = 60,
+		       NUMBER = 61,
+		       STRING = 62,
+		       REGEXP = 63,
+		       BREAK = 64,
+		   CASE = 65,
+		       CATCH = 66,
+		       CONST = 67,
+		       CONTINUE = 68,
+		       DEBUGGER = 69,
+		   DEFAULT = 70,
+		       DELETE = 71,
+		       DO = 72,
+		       ELSE = 73,
+		       EXPORT = 74,
+		       FALSE = 75,
+		       FINALLY = 76,
+		       FOR = 77,
+		       FUNCTION = 78,
+		       IF = 79,
+		       IMPORT = 80,
+			   IN = 81,
+			   INSTANCEOF = 82,
+		       LET = 83,
+		       MODULE = 84,
+		       NEW = 85,
+		       NULL = 86,
+		       RETURN = 87,
+		       SWITCH = 88,
+		       THIS = 89,
+		       THROW = 90,
+		       TRUE = 91,
+		       TRY = 92,
+		       TYPEOF = 93,
+		       VAR = 94,
+		       VOID = 95,
+		       YIELD = 96,
+		       WHILE = 97,
+		       WITH = 98;
+
+    // Banned keywords by language version
+    const blackLists = { 160: {}, 185: {}, harmony: {} };
+/*    blackLists[160][LET] = true;
+    blackLists[160][MODULE] = true;
+    blackLists[160][YIELD] = true;
+    blackLists[185][MODULE] = true;
+*/
+    // Build up a trie of operator tokens.
+    var opTokens = {};
+    for (var op in definitions.opTypeNames) {
+        if (op === '\n' || op === '.')
+            continue;
+
+        var node = opTokens;
+        for (var i = 0; i < op.length; i++) {
+            var ch = op[i];
+            if (!(ch in node))
+                node[ch] = {};
+            node = node[ch];
+            node.op = op;
+        }
+    }
+
+    /*
+     * Since JavaScript provides no convenient way to determine if a
+     * character is in a particular Unicode category, we use
+     * metacircularity to accomplish this (oh yeaaaah!)
+     */
+    function isValidIdentifierChar(ch, first) {
+        // check directly for ASCII
+        if (ch <= "\u007F") {
+            if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch === '$' || ch === '_' ||
+                (!first && (ch >= '0' && ch <= '9'))) {
+                return true;
+            }
+            return false;
+        }
+
+        // create an object to test this in
+        var x = {};
+        x["x"+ch] = true;
+        x[ch] = true;
+
+        // then use eval to determine if it's a valid character
+        var valid = false;
+        try {
+            valid = (Function("x", "return (x." + (first?"":"x") + ch + ");")(x) === true);
+        } catch (ex) {}
+
+        return valid;
+    }
+
+    function isIdentifier(str) {
+        if (typeof str !== "string")
+            return false;
+
+        if (str.length === 0)
+            return false;
+
+        if (!isValidIdentifierChar(str[0], true))
+            return false;
+
+        for (var i = 1; i < str.length; i++) {
+            if (!isValidIdentifierChar(str[i], false))
+                return false;
+        }
+
+        return true;
+    }
+
+    /*
+     * Tokenizer :: (source, filename, line number) -> Tokenizer
+     */
+    function Tokenizer(s, f, l) {
+        this.cursor = 0;
+        this.source = String(s);
+        this.tokens = [];
+        this.tokenIndex = 0;
+        this.lookahead = 0;
+        this.scanNewlines = false;
+        this.unexpectedEOF = false;
+        this.filename = f || "";
+        this.lineno = l || 1;
+        this.blackList = blackLists[Narcissus.options.version];
+        this.blockComments = null;
+    }
+
+    Tokenizer.prototype = {
+        get done() {
+            // We need to set scanOperand to true here because the first thing
+            // might be a regexp.
+            return this.peek(true) === END;
+        },
+
+        get token() {
+            return this.tokens[this.tokenIndex];
+        },
+
+        match: function (tt, scanOperand) {
+            return this.get(scanOperand) === tt || this.unget();
+        },
+
+        mustMatch: function (tt) {
+            if (!this.match(tt)) {
+                throw this.newSyntaxError("Missing " +
+                                          definitions.tokens[tt].toLowerCase());
+            }
+            return this.token;
+        },
+
+        peek: function (scanOperand) {
+            var tt, next;
+            if (this.lookahead) {
+                next = this.tokens[(this.tokenIndex + this.lookahead) & 3];
+                tt = (this.scanNewlines && next.lineno !== this.lineno)
+                     ? NEWLINE
+                     : next.type;
+            } else {
+                tt = this.get(scanOperand);
+                this.unget();
+            }
+            return tt;
+        },
+
+        peekOnSameLine: function (scanOperand) {
+            this.scanNewlines = true;
+            var tt = this.peek(scanOperand);
+            this.scanNewlines = false;
+            return tt;
+        },
+
+        lastBlockComment: function() {
+            var length = this.blockComments.length;
+            return length ? this.blockComments[length - 1] : null;
+        },
+
+        // Eat comments and whitespace.
+        skip: function () {
+            var input = this.source;
+            this.blockComments = [];
+            for (;;) {
+                var ch = input[this.cursor++];
+                var next = input[this.cursor];
+                // handle \r, \r\n and (always preferable) \n
+                if (ch === '\r') {
+                    // if the next character is \n, we don't care about this at all
+                    if (next === '\n') continue;
+
+                    // otherwise, we want to consider this as a newline
+                    ch = '\n';
+                }
+
+                if (ch === '\n' && !this.scanNewlines) {
+                    this.lineno++;
+                } else if (ch === '/' && next === '*') {
+                    var commentStart = ++this.cursor;
+                    for (;;) {
+                        ch = input[this.cursor++];
+                        if (ch === undefined)
+                            throw this.newSyntaxError("Unterminated comment");
+
+                        if (ch === '*') {
+                            next = input[this.cursor];
+                            if (next === '/') {
+                                var commentEnd = this.cursor - 1;
+                                this.cursor++;
+                                break;
+                            }
+                        } else if (ch === '\n') {
+                            this.lineno++;
+                        }
+                    }
+                    this.blockComments.push(input.substring(commentStart, commentEnd));
+                } 
+		else if (ch === '-' && next === '-' &&
+			   input[this.cursor + 1] === '>') {
+		    this.cursor += 2;
+		}
+		else if ((ch === '/' && next === '/') ||
+                           (ch === '<' && next === '!' &&
+                            input[this.cursor + 1] === '-' && 
+			    input[this.cursor + 2] === '-' &&
+                            (this.cursor += 2))) {
+
+		    // capture single line comments starts.
+		    var commentStart = ++this.cursor;
+                    for (;;) {
+                        ch = input[this.cursor++];
+                        if (ch === undefined) {
+                            //this.lineno++;
+			    break;
+			    //throw this.newSyntaxError("Unterminated comment");
+			}
+			if (ch === '\r') {
+                            // check for \r\n
+                            if (next !== '\n') ch = '\n';
+			    var commentEnd = this.cursor - 1;
+			}
+
+                        if (ch === '\n') {
+                            if (this.scanNewlines) {
+                                this.cursor--;
+                            } else {
+                                this.lineno++;
+                            }
+			    var commentEnd = this.cursor - 1;
+                            break;
+                        }
+                    }
+                    this.blockComments.push(input.substring(commentStart, commentEnd));
+		    // capture single line comments ends.
+                } else if (!(ch in definitions.whitespace)) {
+                    this.cursor--;
+                    return;
+                }
+            }
+        },
+
+        // Lex the exponential part of a number, if present. Return true iff an
+        // exponential part was found.
+        lexExponent: function() {
+            var input = this.source;
+            var next = input[this.cursor];
+            if (next === 'e' || next === 'E') {
+                this.cursor++;
+                ch = input[this.cursor++];
+                if (ch === '+' || ch === '-')
+                    ch = input[this.cursor++];
+
+                if (ch < '0' || ch > '9')
+                    throw this.newSyntaxError("Missing exponent");
+
+                do {
+                    ch = input[this.cursor++];
+                } while (ch >= '0' && ch <= '9');
+                this.cursor--;
+
+                return true;
+            }
+
+            return false;
+        },
+
+        lexZeroNumber: function (ch) {
+            var token = this.token, input = this.source;
+            token.type = NUMBER;
+
+            ch = input[this.cursor++];
+            if (ch === '.') {
+                do {
+                    ch = input[this.cursor++];
+                } while (ch >= '0' && ch <= '9');
+                this.cursor--;
+
+                this.lexExponent();
+                token.value = parseFloat(
+                                input.substring(token.start, this.cursor));
+            } else if (ch === 'x' || ch === 'X') {
+                do {
+                    ch = input[this.cursor++];
+                } while ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
+                         (ch >= 'A' && ch <= 'F'));
+                this.cursor--;
+
+                token.value = parseInt(input.substring(token.start, this.cursor));
+            } else if (ch >= '0' && ch <= '7') {
+                do {
+                    ch = input[this.cursor++];
+                } while (ch >= '0' && ch <= '7');
+                this.cursor--;
+
+                token.value = parseInt(input.substring(token.start, this.cursor));
+            } else {
+                this.cursor--;
+                this.lexExponent();     // 0E1, &c.
+                token.value = 0;
+            }
+        },
+
+        lexNumber: function (ch) {
+            var token = this.token, input = this.source;
+            token.type = NUMBER;
+
+            var floating = false;
+            do {
+                ch = input[this.cursor++];
+                if (ch === '.' && !floating) {
+                    floating = true;
+                    ch = input[this.cursor++];
+                }
+            } while (ch >= '0' && ch <= '9');
+
+            this.cursor--;
+
+            var exponent = this.lexExponent();
+            floating = floating || exponent;
+
+            var str = input.substring(token.start, this.cursor);
+            token.value = floating ? parseFloat(str) : parseInt(str);
+        },
+
+        lexDot: function (ch) {
+            var token = this.token, input = this.source;
+            var next = input[this.cursor];
+            if (next >= '0' && next <= '9') {
+                do {
+                    ch = input[this.cursor++];
+                } while (ch >= '0' && ch <= '9');
+                this.cursor--;
+
+                this.lexExponent();
+
+                token.type = NUMBER;
+                token.value = parseFloat(
+                                input.substring(token.start, this.cursor));
+            } else {
+                token.type = DOT;
+                token.assignOp = null;
+                token.value = '.';
+            }
+        },
+
+        lexString: function (ch) {
+            var token = this.token, input = this.source;
+            token.type = STRING;
+
+            var hasEscapes = false;
+            var delim = ch;
+            if (input.length <= this.cursor)
+                throw this.newSyntaxError("Unterminated string literal");
+            while ((ch = input[this.cursor++]) !== delim) {
+                if (this.cursor == input.length)
+                    throw this.newSyntaxError("Unterminated string literal");
+                if (ch === '\\') {
+                    hasEscapes = true;
+                    if (++this.cursor == input.length)
+                        throw this.newSyntaxError("Unterminated string literal");
+                }
+            }
+
+            token.value = hasEscapes
+                          ? eval(input.substring(token.start, this.cursor))
+                          : input.substring(token.start + 1, this.cursor - 1);
+        },
+
+        lexRegExp: function (ch) {
+            var token = this.token, input = this.source;
+            token.type = REGEXP;
+
+            do {
+                ch = input[this.cursor++];
+                if (ch === '\\') {
+                    this.cursor++;
+                } else if (ch === '[') {
+                    do {
+                        if (ch === undefined)
+                            throw this.newSyntaxError("Unterminated character class");
+
+                        if (ch === '\\')
+                            this.cursor++;
+
+                        ch = input[this.cursor++];
+                    } while (ch !== ']');
+                } else if (ch === undefined) {
+                    throw this.newSyntaxError("Unterminated regex");
+                }
+            } while (ch !== '/');
+
+            do {
+                ch = input[this.cursor++];
+            } while (ch >= 'a' && ch <= 'z');
+
+            this.cursor--;
+
+            token.value = eval(input.substring(token.start, this.cursor));
+        },
+
+        lexOp: function (ch) {
+            var token = this.token, input = this.source;
+
+            // A bit ugly, but it seems wasteful to write a trie lookup routine
+            // for only 3 characters...
+            var node = opTokens[ch];
+            var next = input[this.cursor];
+            if (next in node) {
+                node = node[next];
+                this.cursor++;
+                next = input[this.cursor];
+                if (next in node) {
+                    node = node[next];
+                    this.cursor++;
+                    next = input[this.cursor];
+                }
+            }
+
+            var op = node.op;
+            if (definitions.assignOps[op] && input[this.cursor] === '=') {
+                this.cursor++;
+                token.type = ASSIGN;
+                token.assignOp = definitions.tokenIds[definitions.opTypeNames[op]];
+                op += '=';
+            } else {
+                token.type = definitions.tokenIds[definitions.opTypeNames[op]];
+                token.assignOp = null;
+            }
+
+            token.value = op;
+        },
+
+        // FIXME: Unicode escape sequences
+        lexIdent: function (ch) {
+            var token = this.token;
+            var id = ch;
+
+            while ((ch = this.getValidIdentifierChar(false)) !== null) {
+                id += ch;
+            }
+
+            token.type = definitions.keywords[id] || IDENTIFIER;
+            if (token.type in this.blackList) {
+                // banned keyword, this is an identifier
+                token.type = IDENTIFIER;
+            }
+            token.value = id;
+        },
+
+        /*
+         * Tokenizer.get :: void -> token type
+         *
+         * Consume input *only* if there is no lookahead.
+         * Dispatch to the appropriate lexing function depending on the input.
+         */
+        get: function (scanOperand) {
+            var token;
+            while (this.lookahead) {
+                --this.lookahead;
+                this.tokenIndex = (this.tokenIndex + 1) & 3;
+                token = this.tokens[this.tokenIndex];
+                if (token.type !== NEWLINE || this.scanNewlines)
+                    return token.type;
+            }
+
+            this.skip();
+
+            this.tokenIndex = (this.tokenIndex + 1) & 3;
+            token = this.tokens[this.tokenIndex];
+            if (!token)
+                this.tokens[this.tokenIndex] = token = {};
+
+            var input = this.source;
+            if (this.cursor >= input.length)
+                return token.type = END;
+
+            token.start = this.cursor;
+            token.lineno = this.lineno;
+
+            var ich = this.getValidIdentifierChar(true);
+            var ch = (ich === null) ? input[this.cursor++] : null;
+            if (ich !== null) {
+                this.lexIdent(ich);
+            } else if (scanOperand && ch === '/') {
+                this.lexRegExp(ch);
+            } else if (ch in opTokens) {
+                this.lexOp(ch);
+            } else if (ch === '.') {
+                this.lexDot(ch);
+            } else if (ch >= '1' && ch <= '9') {
+                this.lexNumber(ch);
+            } else if (ch === '0') {
+                this.lexZeroNumber(ch);
+            } else if (ch === '"' || ch === "'") {
+                this.lexString(ch);
+            } else if (this.scanNewlines && (ch === '\n' || ch === '\r')) {
+                // if this was a \r, look for \r\n
+                if (ch === '\r' && input[this.cursor] === '\n') this.cursor++;
+                token.type = NEWLINE;
+                token.value = '\n';
+                this.lineno++;
+            } else {
+                throw this.newSyntaxError("Illegal token");
+            }
+
+            token.end = this.cursor;
+            return token.type;
+        },
+
+        /*
+         * Tokenizer.unget :: void -> undefined
+         *
+         * Match depends on unget returning undefined.
+         */
+        unget: function () {
+            if (++this.lookahead === 4) throw "PANIC: too much lookahead!";
+            this.tokenIndex = (this.tokenIndex - 1) & 3;
+        },
+
+        newSyntaxError: function (m) {
+            m = (this.filename ? this.filename + ":" : "") + this.lineno + ": " + m;
+            var e = new SyntaxError(m, this.filename, this.lineno);
+            e.source = this.source;
+            e.cursor = this.lookahead
+                       ? this.tokens[(this.tokenIndex + this.lookahead) & 3].start
+                       : this.cursor;
+            return e;
+        },
+
+
+        /* Gets a single valid identifier char from the input stream, or null
+         * if there is none.
+         */
+        getValidIdentifierChar: function(first) {
+            var input = this.source;
+            if (this.cursor >= input.length) return null;
+            var ch = input[this.cursor];
+
+            // first check for \u escapes
+            if (ch === '\\' && input[this.cursor+1] === 'u') {
+                // get the character value
+                try {
+                    ch = String.fromCharCode(parseInt(
+                        input.substring(this.cursor + 2, this.cursor + 6),
+                        16));
+                } catch (ex) {
+                    return null;
+                }
+                this.cursor += 5;
+            }
+
+            var valid = isValidIdentifierChar(ch, first);
+            if (valid) this.cursor++;
+            return (valid ? ch : null);
+        },
+    };
+
+
+    return {
+        isIdentifier: isIdentifier,
+        Tokenizer: Tokenizer
+    };
+
+}());