diff options
Diffstat (limited to 'data/chrome_worker/parser/jslex.js')
-rw-r--r-- | data/chrome_worker/parser/jslex.js | 719 |
1 files changed, 719 insertions, 0 deletions
diff --git a/data/chrome_worker/parser/jslex.js b/data/chrome_worker/parser/jslex.js new file mode 100644 index 0000000..c5c2673 --- /dev/null +++ b/data/chrome_worker/parser/jslex.js @@ -0,0 +1,719 @@ +/* vim: set sw=4 ts=4 et tw=78: */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is the Narcissus JavaScript engine. + * + * The Initial Developer of the Original Code is + * Brendan Eich <brendan@mozilla.org>. + * Portions created by the Initial Developer are Copyright (C) 2004 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Tom Austin <taustin@ucsc.edu> + * Brendan Eich <brendan@mozilla.org> + * Shu-Yu Guo <shu@rfrn.org> + * Stephan Herhut <stephan.a.herhut@intel.com> + * Dave Herman <dherman@mozilla.com> + * Dimitris Vardoulakis <dimvar@ccs.neu.edu> + * Patrick Walton <pcwalton@mozilla.com> + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/** + * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript. + * * + * Copyright (C) 2011, 2012, 2014 Loic J. Duros + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +/* + * Narcissus - JS implemented in JS. + * + * Lexical scanner. + */ + +"use strict"; + +Narcissus.lexer = (function() { + + var definitions = Narcissus.definitions; + + //throw Error (definitions.consts); + + // Set constants in the local scope. + //eval(definitions.consts); + const END = 0, + NEWLINE = 1, + SEMICOLON = 2, + COMMA = 3, + ASSIGN = 4, + HOOK = 5, + COLON = 6, + CONDITIONAL = 7, + OR = 8, + AND = 9, + BITWISE_OR = 10, + BITWISE_XOR = 11, + BITWISE_AND = 12, + EQ = 13, + NE = 14, + STRICT_EQ = 15, + STRICT_NE = 16, + LT = 17, + LE = 18, + GE = 19, + GT = 20, + LSH = 21, + RSH = 22, + URSH = 23, + PLUS = 24, + MINUS = 25, + MUL = 26, + DIV = 27, + MOD = 28, + NOT = 29, + BITWISE_NOT = 30, + UNARY_PLUS = 31, + UNARY_MINUS = 32, + INCREMENT = 33, + DECREMENT = 34, + DOT = 35, + LEFT_BRACKET = 36, + RIGHT_BRACKET = 37, + LEFT_CURLY = 38, + RIGHT_CURLY = 39, + LEFT_PAREN = 40, + RIGHT_PAREN = 41, + SCRIPT = 42, + BLOCK = 43, + LABEL = 44, + FOR_IN = 45, + CALL = 46, + NEW_WITH_ARGS = 47, + INDEX = 48, + ARRAY_INIT = 49, + OBJECT_INIT = 50, + PROPERTY_INIT = 51, + GETTER = 52, + SETTER = 53, + GROUP = 54, + LIST = 55, + LET_BLOCK = 56, + ARRAY_COMP = 57, + GENERATOR = 58, + COMP_TAIL = 59, + IDENTIFIER = 60, + NUMBER = 61, + STRING = 62, + REGEXP = 63, + BREAK = 64, + CASE = 65, + CATCH = 66, + CONST = 67, + CONTINUE = 68, + DEBUGGER = 69, + DEFAULT = 70, + DELETE = 71, + DO = 72, + ELSE = 73, + EXPORT = 74, + FALSE = 75, + FINALLY = 76, + FOR = 77, + FUNCTION = 78, + IF = 79, + IMPORT = 80, + IN = 81, + INSTANCEOF = 82, + LET = 83, + MODULE = 84, + NEW = 85, + NULL = 86, + RETURN = 87, + SWITCH = 88, + THIS = 89, + THROW = 90, + TRUE = 91, + TRY = 92, + TYPEOF = 93, + VAR = 94, + VOID = 95, + YIELD = 96, + WHILE = 97, + WITH = 98; + + // Banned keywords by language version + const blackLists = { 160: {}, 185: {}, harmony: {} }; +/* blackLists[160][LET] = true; + blackLists[160][MODULE] = true; + blackLists[160][YIELD] = true; + blackLists[185][MODULE] = true; +*/ + // Build up a trie of operator tokens. + var opTokens = {}; + for (var op in definitions.opTypeNames) { + if (op === '\n' || op === '.') + continue; + + var node = opTokens; + for (var i = 0; i < op.length; i++) { + var ch = op[i]; + if (!(ch in node)) + node[ch] = {}; + node = node[ch]; + node.op = op; + } + } + + /* + * Since JavaScript provides no convenient way to determine if a + * character is in a particular Unicode category, we use + * metacircularity to accomplish this (oh yeaaaah!) + */ + function isValidIdentifierChar(ch, first) { + // check directly for ASCII + if (ch <= "\u007F") { + if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch === '$' || ch === '_' || + (!first && (ch >= '0' && ch <= '9'))) { + return true; + } + return false; + } + + // create an object to test this in + var x = {}; + x["x"+ch] = true; + x[ch] = true; + + // then use eval to determine if it's a valid character + var valid = false; + try { + valid = (Function("x", "return (x." + (first?"":"x") + ch + ");")(x) === true); + } catch (ex) {} + + return valid; + } + + function isIdentifier(str) { + if (typeof str !== "string") + return false; + + if (str.length === 0) + return false; + + if (!isValidIdentifierChar(str[0], true)) + return false; + + for (var i = 1; i < str.length; i++) { + if (!isValidIdentifierChar(str[i], false)) + return false; + } + + return true; + } + + /* + * Tokenizer :: (source, filename, line number) -> Tokenizer + */ + function Tokenizer(s, f, l) { + this.cursor = 0; + this.source = String(s); + this.tokens = []; + this.tokenIndex = 0; + this.lookahead = 0; + this.scanNewlines = false; + this.unexpectedEOF = false; + this.filename = f || ""; + this.lineno = l || 1; + this.blackList = blackLists[Narcissus.options.version]; + this.blockComments = null; + } + + Tokenizer.prototype = { + get done() { + // We need to set scanOperand to true here because the first thing + // might be a regexp. + return this.peek(true) === END; + }, + + get token() { + return this.tokens[this.tokenIndex]; + }, + + match: function (tt, scanOperand) { + return this.get(scanOperand) === tt || this.unget(); + }, + + mustMatch: function (tt) { + if (!this.match(tt)) { + throw this.newSyntaxError("Missing " + + definitions.tokens[tt].toLowerCase()); + } + return this.token; + }, + + peek: function (scanOperand) { + var tt, next; + if (this.lookahead) { + next = this.tokens[(this.tokenIndex + this.lookahead) & 3]; + tt = (this.scanNewlines && next.lineno !== this.lineno) + ? NEWLINE + : next.type; + } else { + tt = this.get(scanOperand); + this.unget(); + } + return tt; + }, + + peekOnSameLine: function (scanOperand) { + this.scanNewlines = true; + var tt = this.peek(scanOperand); + this.scanNewlines = false; + return tt; + }, + + lastBlockComment: function() { + var length = this.blockComments.length; + return length ? this.blockComments[length - 1] : null; + }, + + // Eat comments and whitespace. + skip: function () { + var input = this.source; + this.blockComments = []; + for (;;) { + var ch = input[this.cursor++]; + var next = input[this.cursor]; + // handle \r, \r\n and (always preferable) \n + if (ch === '\r') { + // if the next character is \n, we don't care about this at all + if (next === '\n') continue; + + // otherwise, we want to consider this as a newline + ch = '\n'; + } + + if (ch === '\n' && !this.scanNewlines) { + this.lineno++; + } else if (ch === '/' && next === '*') { + var commentStart = ++this.cursor; + for (;;) { + ch = input[this.cursor++]; + if (ch === undefined) + throw this.newSyntaxError("Unterminated comment"); + + if (ch === '*') { + next = input[this.cursor]; + if (next === '/') { + var commentEnd = this.cursor - 1; + this.cursor++; + break; + } + } else if (ch === '\n') { + this.lineno++; + } + } + this.blockComments.push(input.substring(commentStart, commentEnd)); + } + else if (ch === '-' && next === '-' && + input[this.cursor + 1] === '>') { + this.cursor += 2; + } + else if ((ch === '/' && next === '/') || + (ch === '<' && next === '!' && + input[this.cursor + 1] === '-' && + input[this.cursor + 2] === '-' && + (this.cursor += 2))) { + + // capture single line comments starts. + var commentStart = ++this.cursor; + for (;;) { + ch = input[this.cursor++]; + if (ch === undefined) { + //this.lineno++; + break; + //throw this.newSyntaxError("Unterminated comment"); + } + if (ch === '\r') { + // check for \r\n + if (next !== '\n') ch = '\n'; + var commentEnd = this.cursor - 1; + } + + if (ch === '\n') { + if (this.scanNewlines) { + this.cursor--; + } else { + this.lineno++; + } + var commentEnd = this.cursor - 1; + break; + } + } + this.blockComments.push(input.substring(commentStart, commentEnd)); + // capture single line comments ends. + } else if (!(ch in definitions.whitespace)) { + this.cursor--; + return; + } + } + }, + + // Lex the exponential part of a number, if present. Return true iff an + // exponential part was found. + lexExponent: function() { + var input = this.source; + var next = input[this.cursor]; + if (next === 'e' || next === 'E') { + this.cursor++; + ch = input[this.cursor++]; + if (ch === '+' || ch === '-') + ch = input[this.cursor++]; + + if (ch < '0' || ch > '9') + throw this.newSyntaxError("Missing exponent"); + + do { + ch = input[this.cursor++]; + } while (ch >= '0' && ch <= '9'); + this.cursor--; + + return true; + } + + return false; + }, + + lexZeroNumber: function (ch) { + var token = this.token, input = this.source; + token.type = NUMBER; + + ch = input[this.cursor++]; + if (ch === '.') { + do { + ch = input[this.cursor++]; + } while (ch >= '0' && ch <= '9'); + this.cursor--; + + this.lexExponent(); + token.value = parseFloat( + input.substring(token.start, this.cursor)); + } else if (ch === 'x' || ch === 'X') { + do { + ch = input[this.cursor++]; + } while ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || + (ch >= 'A' && ch <= 'F')); + this.cursor--; + + token.value = parseInt(input.substring(token.start, this.cursor)); + } else if (ch >= '0' && ch <= '7') { + do { + ch = input[this.cursor++]; + } while (ch >= '0' && ch <= '7'); + this.cursor--; + + token.value = parseInt(input.substring(token.start, this.cursor)); + } else { + this.cursor--; + this.lexExponent(); // 0E1, &c. + token.value = 0; + } + }, + + lexNumber: function (ch) { + var token = this.token, input = this.source; + token.type = NUMBER; + + var floating = false; + do { + ch = input[this.cursor++]; + if (ch === '.' && !floating) { + floating = true; + ch = input[this.cursor++]; + } + } while (ch >= '0' && ch <= '9'); + + this.cursor--; + + var exponent = this.lexExponent(); + floating = floating || exponent; + + var str = input.substring(token.start, this.cursor); + token.value = floating ? parseFloat(str) : parseInt(str); + }, + + lexDot: function (ch) { + var token = this.token, input = this.source; + var next = input[this.cursor]; + if (next >= '0' && next <= '9') { + do { + ch = input[this.cursor++]; + } while (ch >= '0' && ch <= '9'); + this.cursor--; + + this.lexExponent(); + + token.type = NUMBER; + token.value = parseFloat( + input.substring(token.start, this.cursor)); + } else { + token.type = DOT; + token.assignOp = null; + token.value = '.'; + } + }, + + lexString: function (ch) { + var token = this.token, input = this.source; + token.type = STRING; + + var hasEscapes = false; + var delim = ch; + if (input.length <= this.cursor) + throw this.newSyntaxError("Unterminated string literal"); + while ((ch = input[this.cursor++]) !== delim) { + if (this.cursor == input.length) + throw this.newSyntaxError("Unterminated string literal"); + if (ch === '\\') { + hasEscapes = true; + if (++this.cursor == input.length) + throw this.newSyntaxError("Unterminated string literal"); + } + } + + token.value = hasEscapes + ? eval(input.substring(token.start, this.cursor)) + : input.substring(token.start + 1, this.cursor - 1); + }, + + lexRegExp: function (ch) { + var token = this.token, input = this.source; + token.type = REGEXP; + + do { + ch = input[this.cursor++]; + if (ch === '\\') { + this.cursor++; + } else if (ch === '[') { + do { + if (ch === undefined) + throw this.newSyntaxError("Unterminated character class"); + + if (ch === '\\') + this.cursor++; + + ch = input[this.cursor++]; + } while (ch !== ']'); + } else if (ch === undefined) { + throw this.newSyntaxError("Unterminated regex"); + } + } while (ch !== '/'); + + do { + ch = input[this.cursor++]; + } while (ch >= 'a' && ch <= 'z'); + + this.cursor--; + + token.value = eval(input.substring(token.start, this.cursor)); + }, + + lexOp: function (ch) { + var token = this.token, input = this.source; + + // A bit ugly, but it seems wasteful to write a trie lookup routine + // for only 3 characters... + var node = opTokens[ch]; + var next = input[this.cursor]; + if (next in node) { + node = node[next]; + this.cursor++; + next = input[this.cursor]; + if (next in node) { + node = node[next]; + this.cursor++; + next = input[this.cursor]; + } + } + + var op = node.op; + if (definitions.assignOps[op] && input[this.cursor] === '=') { + this.cursor++; + token.type = ASSIGN; + token.assignOp = definitions.tokenIds[definitions.opTypeNames[op]]; + op += '='; + } else { + token.type = definitions.tokenIds[definitions.opTypeNames[op]]; + token.assignOp = null; + } + + token.value = op; + }, + + // FIXME: Unicode escape sequences + lexIdent: function (ch) { + var token = this.token; + var id = ch; + + while ((ch = this.getValidIdentifierChar(false)) !== null) { + id += ch; + } + + token.type = definitions.keywords[id] || IDENTIFIER; + if (token.type in this.blackList) { + // banned keyword, this is an identifier + token.type = IDENTIFIER; + } + token.value = id; + }, + + /* + * Tokenizer.get :: void -> token type + * + * Consume input *only* if there is no lookahead. + * Dispatch to the appropriate lexing function depending on the input. + */ + get: function (scanOperand) { + var token; + while (this.lookahead) { + --this.lookahead; + this.tokenIndex = (this.tokenIndex + 1) & 3; + token = this.tokens[this.tokenIndex]; + if (token.type !== NEWLINE || this.scanNewlines) + return token.type; + } + + this.skip(); + + this.tokenIndex = (this.tokenIndex + 1) & 3; + token = this.tokens[this.tokenIndex]; + if (!token) + this.tokens[this.tokenIndex] = token = {}; + + var input = this.source; + if (this.cursor >= input.length) + return token.type = END; + + token.start = this.cursor; + token.lineno = this.lineno; + + var ich = this.getValidIdentifierChar(true); + var ch = (ich === null) ? input[this.cursor++] : null; + if (ich !== null) { + this.lexIdent(ich); + } else if (scanOperand && ch === '/') { + this.lexRegExp(ch); + } else if (ch in opTokens) { + this.lexOp(ch); + } else if (ch === '.') { + this.lexDot(ch); + } else if (ch >= '1' && ch <= '9') { + this.lexNumber(ch); + } else if (ch === '0') { + this.lexZeroNumber(ch); + } else if (ch === '"' || ch === "'") { + this.lexString(ch); + } else if (this.scanNewlines && (ch === '\n' || ch === '\r')) { + // if this was a \r, look for \r\n + if (ch === '\r' && input[this.cursor] === '\n') this.cursor++; + token.type = NEWLINE; + token.value = '\n'; + this.lineno++; + } else { + throw this.newSyntaxError("Illegal token"); + } + + token.end = this.cursor; + return token.type; + }, + + /* + * Tokenizer.unget :: void -> undefined + * + * Match depends on unget returning undefined. + */ + unget: function () { + if (++this.lookahead === 4) throw "PANIC: too much lookahead!"; + this.tokenIndex = (this.tokenIndex - 1) & 3; + }, + + newSyntaxError: function (m) { + m = (this.filename ? this.filename + ":" : "") + this.lineno + ": " + m; + var e = new SyntaxError(m, this.filename, this.lineno); + e.source = this.source; + e.cursor = this.lookahead + ? this.tokens[(this.tokenIndex + this.lookahead) & 3].start + : this.cursor; + return e; + }, + + + /* Gets a single valid identifier char from the input stream, or null + * if there is none. + */ + getValidIdentifierChar: function(first) { + var input = this.source; + if (this.cursor >= input.length) return null; + var ch = input[this.cursor]; + + // first check for \u escapes + if (ch === '\\' && input[this.cursor+1] === 'u') { + // get the character value + try { + ch = String.fromCharCode(parseInt( + input.substring(this.cursor + 2, this.cursor + 6), + 16)); + } catch (ex) { + return null; + } + this.cursor += 5; + } + + var valid = isValidIdentifierChar(ch, first); + if (valid) this.cursor++; + return (valid ? ch : null); + }, + }; + + + return { + isIdentifier: isIdentifier, + Tokenizer: Tokenizer + }; + +}()); |