aboutsummaryrefslogtreecommitdiffstats
path: root/data/chrome_worker/parser/jslex.js
diff options
context:
space:
mode:
Diffstat (limited to 'data/chrome_worker/parser/jslex.js')
-rw-r--r--data/chrome_worker/parser/jslex.js719
1 files changed, 719 insertions, 0 deletions
diff --git a/data/chrome_worker/parser/jslex.js b/data/chrome_worker/parser/jslex.js
new file mode 100644
index 0000000..c5c2673
--- /dev/null
+++ b/data/chrome_worker/parser/jslex.js
@@ -0,0 +1,719 @@
+/* vim: set sw=4 ts=4 et tw=78: */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is the Narcissus JavaScript engine.
+ *
+ * The Initial Developer of the Original Code is
+ * Brendan Eich <brendan@mozilla.org>.
+ * Portions created by the Initial Developer are Copyright (C) 2004
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Tom Austin <taustin@ucsc.edu>
+ * Brendan Eich <brendan@mozilla.org>
+ * Shu-Yu Guo <shu@rfrn.org>
+ * Stephan Herhut <stephan.a.herhut@intel.com>
+ * Dave Herman <dherman@mozilla.com>
+ * Dimitris Vardoulakis <dimvar@ccs.neu.edu>
+ * Patrick Walton <pcwalton@mozilla.com>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/**
+ * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+ * *
+ * Copyright (C) 2011, 2012, 2014 Loic J. Duros
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+/*
+ * Narcissus - JS implemented in JS.
+ *
+ * Lexical scanner.
+ */
+
+"use strict";
+
+Narcissus.lexer = (function() {
+
+ var definitions = Narcissus.definitions;
+
+ //throw Error (definitions.consts);
+
+ // Set constants in the local scope.
+ //eval(definitions.consts);
+ const END = 0,
+ NEWLINE = 1,
+ SEMICOLON = 2,
+ COMMA = 3,
+ ASSIGN = 4,
+ HOOK = 5,
+ COLON = 6,
+ CONDITIONAL = 7,
+ OR = 8,
+ AND = 9,
+ BITWISE_OR = 10,
+ BITWISE_XOR = 11,
+ BITWISE_AND = 12,
+ EQ = 13,
+ NE = 14,
+ STRICT_EQ = 15,
+ STRICT_NE = 16,
+ LT = 17,
+ LE = 18,
+ GE = 19,
+ GT = 20,
+ LSH = 21,
+ RSH = 22,
+ URSH = 23,
+ PLUS = 24,
+ MINUS = 25,
+ MUL = 26,
+ DIV = 27,
+ MOD = 28,
+ NOT = 29,
+ BITWISE_NOT = 30,
+ UNARY_PLUS = 31,
+ UNARY_MINUS = 32,
+ INCREMENT = 33,
+ DECREMENT = 34,
+ DOT = 35,
+ LEFT_BRACKET = 36,
+ RIGHT_BRACKET = 37,
+ LEFT_CURLY = 38,
+ RIGHT_CURLY = 39,
+ LEFT_PAREN = 40,
+ RIGHT_PAREN = 41,
+ SCRIPT = 42,
+ BLOCK = 43,
+ LABEL = 44,
+ FOR_IN = 45,
+ CALL = 46,
+ NEW_WITH_ARGS = 47,
+ INDEX = 48,
+ ARRAY_INIT = 49,
+ OBJECT_INIT = 50,
+ PROPERTY_INIT = 51,
+ GETTER = 52,
+ SETTER = 53,
+ GROUP = 54,
+ LIST = 55,
+ LET_BLOCK = 56,
+ ARRAY_COMP = 57,
+ GENERATOR = 58,
+ COMP_TAIL = 59,
+ IDENTIFIER = 60,
+ NUMBER = 61,
+ STRING = 62,
+ REGEXP = 63,
+ BREAK = 64,
+ CASE = 65,
+ CATCH = 66,
+ CONST = 67,
+ CONTINUE = 68,
+ DEBUGGER = 69,
+ DEFAULT = 70,
+ DELETE = 71,
+ DO = 72,
+ ELSE = 73,
+ EXPORT = 74,
+ FALSE = 75,
+ FINALLY = 76,
+ FOR = 77,
+ FUNCTION = 78,
+ IF = 79,
+ IMPORT = 80,
+ IN = 81,
+ INSTANCEOF = 82,
+ LET = 83,
+ MODULE = 84,
+ NEW = 85,
+ NULL = 86,
+ RETURN = 87,
+ SWITCH = 88,
+ THIS = 89,
+ THROW = 90,
+ TRUE = 91,
+ TRY = 92,
+ TYPEOF = 93,
+ VAR = 94,
+ VOID = 95,
+ YIELD = 96,
+ WHILE = 97,
+ WITH = 98;
+
+ // Banned keywords by language version
+ const blackLists = { 160: {}, 185: {}, harmony: {} };
+/* blackLists[160][LET] = true;
+ blackLists[160][MODULE] = true;
+ blackLists[160][YIELD] = true;
+ blackLists[185][MODULE] = true;
+*/
+ // Build up a trie of operator tokens.
+ var opTokens = {};
+ for (var op in definitions.opTypeNames) {
+ if (op === '\n' || op === '.')
+ continue;
+
+ var node = opTokens;
+ for (var i = 0; i < op.length; i++) {
+ var ch = op[i];
+ if (!(ch in node))
+ node[ch] = {};
+ node = node[ch];
+ node.op = op;
+ }
+ }
+
+ /*
+ * Since JavaScript provides no convenient way to determine if a
+ * character is in a particular Unicode category, we use
+ * metacircularity to accomplish this (oh yeaaaah!)
+ */
+ function isValidIdentifierChar(ch, first) {
+ // check directly for ASCII
+ if (ch <= "\u007F") {
+ if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch === '$' || ch === '_' ||
+ (!first && (ch >= '0' && ch <= '9'))) {
+ return true;
+ }
+ return false;
+ }
+
+ // create an object to test this in
+ var x = {};
+ x["x"+ch] = true;
+ x[ch] = true;
+
+ // then use eval to determine if it's a valid character
+ var valid = false;
+ try {
+ valid = (Function("x", "return (x." + (first?"":"x") + ch + ");")(x) === true);
+ } catch (ex) {}
+
+ return valid;
+ }
+
+ function isIdentifier(str) {
+ if (typeof str !== "string")
+ return false;
+
+ if (str.length === 0)
+ return false;
+
+ if (!isValidIdentifierChar(str[0], true))
+ return false;
+
+ for (var i = 1; i < str.length; i++) {
+ if (!isValidIdentifierChar(str[i], false))
+ return false;
+ }
+
+ return true;
+ }
+
+ /*
+ * Tokenizer :: (source, filename, line number) -> Tokenizer
+ */
+ function Tokenizer(s, f, l) {
+ this.cursor = 0;
+ this.source = String(s);
+ this.tokens = [];
+ this.tokenIndex = 0;
+ this.lookahead = 0;
+ this.scanNewlines = false;
+ this.unexpectedEOF = false;
+ this.filename = f || "";
+ this.lineno = l || 1;
+ this.blackList = blackLists[Narcissus.options.version];
+ this.blockComments = null;
+ }
+
+ Tokenizer.prototype = {
+ get done() {
+ // We need to set scanOperand to true here because the first thing
+ // might be a regexp.
+ return this.peek(true) === END;
+ },
+
+ get token() {
+ return this.tokens[this.tokenIndex];
+ },
+
+ match: function (tt, scanOperand) {
+ return this.get(scanOperand) === tt || this.unget();
+ },
+
+ mustMatch: function (tt) {
+ if (!this.match(tt)) {
+ throw this.newSyntaxError("Missing " +
+ definitions.tokens[tt].toLowerCase());
+ }
+ return this.token;
+ },
+
+ peek: function (scanOperand) {
+ var tt, next;
+ if (this.lookahead) {
+ next = this.tokens[(this.tokenIndex + this.lookahead) & 3];
+ tt = (this.scanNewlines && next.lineno !== this.lineno)
+ ? NEWLINE
+ : next.type;
+ } else {
+ tt = this.get(scanOperand);
+ this.unget();
+ }
+ return tt;
+ },
+
+ peekOnSameLine: function (scanOperand) {
+ this.scanNewlines = true;
+ var tt = this.peek(scanOperand);
+ this.scanNewlines = false;
+ return tt;
+ },
+
+ lastBlockComment: function() {
+ var length = this.blockComments.length;
+ return length ? this.blockComments[length - 1] : null;
+ },
+
+ // Eat comments and whitespace.
+ skip: function () {
+ var input = this.source;
+ this.blockComments = [];
+ for (;;) {
+ var ch = input[this.cursor++];
+ var next = input[this.cursor];
+ // handle \r, \r\n and (always preferable) \n
+ if (ch === '\r') {
+ // if the next character is \n, we don't care about this at all
+ if (next === '\n') continue;
+
+ // otherwise, we want to consider this as a newline
+ ch = '\n';
+ }
+
+ if (ch === '\n' && !this.scanNewlines) {
+ this.lineno++;
+ } else if (ch === '/' && next === '*') {
+ var commentStart = ++this.cursor;
+ for (;;) {
+ ch = input[this.cursor++];
+ if (ch === undefined)
+ throw this.newSyntaxError("Unterminated comment");
+
+ if (ch === '*') {
+ next = input[this.cursor];
+ if (next === '/') {
+ var commentEnd = this.cursor - 1;
+ this.cursor++;
+ break;
+ }
+ } else if (ch === '\n') {
+ this.lineno++;
+ }
+ }
+ this.blockComments.push(input.substring(commentStart, commentEnd));
+ }
+ else if (ch === '-' && next === '-' &&
+ input[this.cursor + 1] === '>') {
+ this.cursor += 2;
+ }
+ else if ((ch === '/' && next === '/') ||
+ (ch === '<' && next === '!' &&
+ input[this.cursor + 1] === '-' &&
+ input[this.cursor + 2] === '-' &&
+ (this.cursor += 2))) {
+
+ // capture single line comments starts.
+ var commentStart = ++this.cursor;
+ for (;;) {
+ ch = input[this.cursor++];
+ if (ch === undefined) {
+ //this.lineno++;
+ break;
+ //throw this.newSyntaxError("Unterminated comment");
+ }
+ if (ch === '\r') {
+ // check for \r\n
+ if (next !== '\n') ch = '\n';
+ var commentEnd = this.cursor - 1;
+ }
+
+ if (ch === '\n') {
+ if (this.scanNewlines) {
+ this.cursor--;
+ } else {
+ this.lineno++;
+ }
+ var commentEnd = this.cursor - 1;
+ break;
+ }
+ }
+ this.blockComments.push(input.substring(commentStart, commentEnd));
+ // capture single line comments ends.
+ } else if (!(ch in definitions.whitespace)) {
+ this.cursor--;
+ return;
+ }
+ }
+ },
+
+ // Lex the exponential part of a number, if present. Return true iff an
+ // exponential part was found.
+ lexExponent: function() {
+ var input = this.source;
+ var next = input[this.cursor];
+ if (next === 'e' || next === 'E') {
+ this.cursor++;
+ ch = input[this.cursor++];
+ if (ch === '+' || ch === '-')
+ ch = input[this.cursor++];
+
+ if (ch < '0' || ch > '9')
+ throw this.newSyntaxError("Missing exponent");
+
+ do {
+ ch = input[this.cursor++];
+ } while (ch >= '0' && ch <= '9');
+ this.cursor--;
+
+ return true;
+ }
+
+ return false;
+ },
+
+ lexZeroNumber: function (ch) {
+ var token = this.token, input = this.source;
+ token.type = NUMBER;
+
+ ch = input[this.cursor++];
+ if (ch === '.') {
+ do {
+ ch = input[this.cursor++];
+ } while (ch >= '0' && ch <= '9');
+ this.cursor--;
+
+ this.lexExponent();
+ token.value = parseFloat(
+ input.substring(token.start, this.cursor));
+ } else if (ch === 'x' || ch === 'X') {
+ do {
+ ch = input[this.cursor++];
+ } while ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
+ (ch >= 'A' && ch <= 'F'));
+ this.cursor--;
+
+ token.value = parseInt(input.substring(token.start, this.cursor));
+ } else if (ch >= '0' && ch <= '7') {
+ do {
+ ch = input[this.cursor++];
+ } while (ch >= '0' && ch <= '7');
+ this.cursor--;
+
+ token.value = parseInt(input.substring(token.start, this.cursor));
+ } else {
+ this.cursor--;
+ this.lexExponent(); // 0E1, &c.
+ token.value = 0;
+ }
+ },
+
+ lexNumber: function (ch) {
+ var token = this.token, input = this.source;
+ token.type = NUMBER;
+
+ var floating = false;
+ do {
+ ch = input[this.cursor++];
+ if (ch === '.' && !floating) {
+ floating = true;
+ ch = input[this.cursor++];
+ }
+ } while (ch >= '0' && ch <= '9');
+
+ this.cursor--;
+
+ var exponent = this.lexExponent();
+ floating = floating || exponent;
+
+ var str = input.substring(token.start, this.cursor);
+ token.value = floating ? parseFloat(str) : parseInt(str);
+ },
+
+ lexDot: function (ch) {
+ var token = this.token, input = this.source;
+ var next = input[this.cursor];
+ if (next >= '0' && next <= '9') {
+ do {
+ ch = input[this.cursor++];
+ } while (ch >= '0' && ch <= '9');
+ this.cursor--;
+
+ this.lexExponent();
+
+ token.type = NUMBER;
+ token.value = parseFloat(
+ input.substring(token.start, this.cursor));
+ } else {
+ token.type = DOT;
+ token.assignOp = null;
+ token.value = '.';
+ }
+ },
+
+ lexString: function (ch) {
+ var token = this.token, input = this.source;
+ token.type = STRING;
+
+ var hasEscapes = false;
+ var delim = ch;
+ if (input.length <= this.cursor)
+ throw this.newSyntaxError("Unterminated string literal");
+ while ((ch = input[this.cursor++]) !== delim) {
+ if (this.cursor == input.length)
+ throw this.newSyntaxError("Unterminated string literal");
+ if (ch === '\\') {
+ hasEscapes = true;
+ if (++this.cursor == input.length)
+ throw this.newSyntaxError("Unterminated string literal");
+ }
+ }
+
+ token.value = hasEscapes
+ ? eval(input.substring(token.start, this.cursor))
+ : input.substring(token.start + 1, this.cursor - 1);
+ },
+
+ lexRegExp: function (ch) {
+ var token = this.token, input = this.source;
+ token.type = REGEXP;
+
+ do {
+ ch = input[this.cursor++];
+ if (ch === '\\') {
+ this.cursor++;
+ } else if (ch === '[') {
+ do {
+ if (ch === undefined)
+ throw this.newSyntaxError("Unterminated character class");
+
+ if (ch === '\\')
+ this.cursor++;
+
+ ch = input[this.cursor++];
+ } while (ch !== ']');
+ } else if (ch === undefined) {
+ throw this.newSyntaxError("Unterminated regex");
+ }
+ } while (ch !== '/');
+
+ do {
+ ch = input[this.cursor++];
+ } while (ch >= 'a' && ch <= 'z');
+
+ this.cursor--;
+
+ token.value = eval(input.substring(token.start, this.cursor));
+ },
+
+ lexOp: function (ch) {
+ var token = this.token, input = this.source;
+
+ // A bit ugly, but it seems wasteful to write a trie lookup routine
+ // for only 3 characters...
+ var node = opTokens[ch];
+ var next = input[this.cursor];
+ if (next in node) {
+ node = node[next];
+ this.cursor++;
+ next = input[this.cursor];
+ if (next in node) {
+ node = node[next];
+ this.cursor++;
+ next = input[this.cursor];
+ }
+ }
+
+ var op = node.op;
+ if (definitions.assignOps[op] && input[this.cursor] === '=') {
+ this.cursor++;
+ token.type = ASSIGN;
+ token.assignOp = definitions.tokenIds[definitions.opTypeNames[op]];
+ op += '=';
+ } else {
+ token.type = definitions.tokenIds[definitions.opTypeNames[op]];
+ token.assignOp = null;
+ }
+
+ token.value = op;
+ },
+
+ // FIXME: Unicode escape sequences
+ lexIdent: function (ch) {
+ var token = this.token;
+ var id = ch;
+
+ while ((ch = this.getValidIdentifierChar(false)) !== null) {
+ id += ch;
+ }
+
+ token.type = definitions.keywords[id] || IDENTIFIER;
+ if (token.type in this.blackList) {
+ // banned keyword, this is an identifier
+ token.type = IDENTIFIER;
+ }
+ token.value = id;
+ },
+
+ /*
+ * Tokenizer.get :: void -> token type
+ *
+ * Consume input *only* if there is no lookahead.
+ * Dispatch to the appropriate lexing function depending on the input.
+ */
+ get: function (scanOperand) {
+ var token;
+ while (this.lookahead) {
+ --this.lookahead;
+ this.tokenIndex = (this.tokenIndex + 1) & 3;
+ token = this.tokens[this.tokenIndex];
+ if (token.type !== NEWLINE || this.scanNewlines)
+ return token.type;
+ }
+
+ this.skip();
+
+ this.tokenIndex = (this.tokenIndex + 1) & 3;
+ token = this.tokens[this.tokenIndex];
+ if (!token)
+ this.tokens[this.tokenIndex] = token = {};
+
+ var input = this.source;
+ if (this.cursor >= input.length)
+ return token.type = END;
+
+ token.start = this.cursor;
+ token.lineno = this.lineno;
+
+ var ich = this.getValidIdentifierChar(true);
+ var ch = (ich === null) ? input[this.cursor++] : null;
+ if (ich !== null) {
+ this.lexIdent(ich);
+ } else if (scanOperand && ch === '/') {
+ this.lexRegExp(ch);
+ } else if (ch in opTokens) {
+ this.lexOp(ch);
+ } else if (ch === '.') {
+ this.lexDot(ch);
+ } else if (ch >= '1' && ch <= '9') {
+ this.lexNumber(ch);
+ } else if (ch === '0') {
+ this.lexZeroNumber(ch);
+ } else if (ch === '"' || ch === "'") {
+ this.lexString(ch);
+ } else if (this.scanNewlines && (ch === '\n' || ch === '\r')) {
+ // if this was a \r, look for \r\n
+ if (ch === '\r' && input[this.cursor] === '\n') this.cursor++;
+ token.type = NEWLINE;
+ token.value = '\n';
+ this.lineno++;
+ } else {
+ throw this.newSyntaxError("Illegal token");
+ }
+
+ token.end = this.cursor;
+ return token.type;
+ },
+
+ /*
+ * Tokenizer.unget :: void -> undefined
+ *
+ * Match depends on unget returning undefined.
+ */
+ unget: function () {
+ if (++this.lookahead === 4) throw "PANIC: too much lookahead!";
+ this.tokenIndex = (this.tokenIndex - 1) & 3;
+ },
+
+ newSyntaxError: function (m) {
+ m = (this.filename ? this.filename + ":" : "") + this.lineno + ": " + m;
+ var e = new SyntaxError(m, this.filename, this.lineno);
+ e.source = this.source;
+ e.cursor = this.lookahead
+ ? this.tokens[(this.tokenIndex + this.lookahead) & 3].start
+ : this.cursor;
+ return e;
+ },
+
+
+ /* Gets a single valid identifier char from the input stream, or null
+ * if there is none.
+ */
+ getValidIdentifierChar: function(first) {
+ var input = this.source;
+ if (this.cursor >= input.length) return null;
+ var ch = input[this.cursor];
+
+ // first check for \u escapes
+ if (ch === '\\' && input[this.cursor+1] === 'u') {
+ // get the character value
+ try {
+ ch = String.fromCharCode(parseInt(
+ input.substring(this.cursor + 2, this.cursor + 6),
+ 16));
+ } catch (ex) {
+ return null;
+ }
+ this.cursor += 5;
+ }
+
+ var valid = isValidIdentifierChar(ch, first);
+ if (valid) this.cursor++;
+ return (valid ? ch : null);
+ },
+ };
+
+
+ return {
+ isIdentifier: isIdentifier,
+ Tokenizer: Tokenizer
+ };
+
+}());