From 8f99433c800d76b52deb589491aacaf73100c90b Mon Sep 17 00:00:00 2001 From: Emily Eisenberg Date: Sun, 7 Jul 2013 21:13:43 -0700 Subject: [PATCH] Use our own lexer, not jison's Summary: Build our own lexer and inject it into jison's parser, because jison's lexer notation is confusing and annoying, and it doesn't let us do some fun stuff. Test Plan: Run stuff, make sure it still works. Reviewers: spicyj Reviewed By: spicyj Differential Revision: http://phabricator.benalpert.com/D40 --- MJLite.js | 1 + lexer.js | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++ parser.jison | 32 +++++------------- 3 files changed, 103 insertions(+), 24 deletions(-) create mode 100644 lexer.js diff --git a/MJLite.js b/MJLite.js index e57eeb994..243e5ea93 100644 --- a/MJLite.js +++ b/MJLite.js @@ -1,4 +1,5 @@ var parser = require("./parser.jison"); +parser.lexer = require("./lexer"); var buildExpression = function(expression) { return _.map(expression, function(ex, i) { diff --git a/lexer.js b/lexer.js new file mode 100644 index 000000000..2ba404ac7 --- /dev/null +++ b/lexer.js @@ -0,0 +1,94 @@ +var DEFAULT_STATE = 0, + FUNC_STATE = 1; + +function Lexer() { +}; + +var funcs = [ + 'cdot', 'frac', 'lvert', 'rvert', 'pm', 'div' +]; + +var normals = [ + [/^[/|a-zA-Z0-9.]/, 'ORD'], + [/^[*+-]/, 'BIN'], + [/^\^/, '^'], + [/^_/, '_'], + [/^{/, '{'], + [/^}/, '}'], + [/^[(\[]/, 'OPEN'], + [/^[)\]]/, 'CLOSE'] +]; + +Lexer.prototype.doMatch = function(match) { + this.yytext = match; + this.yyleng = match.length; + + this.yylloc.first_column = this._pos; + this.yylloc.last_column = this._pos + match.length; + + this._pos += match.length; +}; + +Lexer.prototype.lex = function() { + // Get rid of whitespace + var whitespace = this._input.substr(this._pos).match(/^\s*/)[0]; + this._pos += whitespace.length; + + if (this._pos >= this._input.length) { + return 'EOF'; + } + + var toMatch = this._input.substr(this._pos); + + if (this.state === DEFAULT_STATE) { + if (/^\\/.test(toMatch)) { + this.state = FUNC_STATE; + this.doMatch('\\'); + return '\\'; + } else { + for (var i = 0; i < normals.length; i++) { + var normal = normals[i]; + + var match = toMatch.match(normal[0]); + if (match) { + this.doMatch(match[0]); + return normal[1]; + } + } + } + } else if (this.state === FUNC_STATE) { + for (var i = 0; i < funcs.length; i++) { + var func = funcs[i]; + + var regex = new RegExp('^' + func + '(?!a-zA-Z)'); + + var match = toMatch.match(regex); + if (match) { + this.doMatch(match[0]); + this.state = DEFAULT_STATE; + return func; + } + } + } + + throw "Unexpected character: '" + toMatch[0] + "' at position " + this._pos; +}; + +Lexer.prototype.setInput = function(input) { + this._input = input; + this._pos = 0; + + this.yyleng = 0; + this.yytext = ""; + this.yylineno = 0; + this.yylloc = { + first_line: 1, + first_column: 0, + last_line: 1, + last_column: 0 + }; + + this.state = DEFAULT_STATE; +}; + +module.exports = new Lexer(); diff --git a/parser.jison b/parser.jison index bb9b058db..23ca5f5d9 100644 --- a/parser.jison +++ b/parser.jison @@ -4,22 +4,6 @@ %lex %% -\s+ /* skip whitespace */ -cdot return 'CDOT' -frac return 'FRAC' -lvert return 'LVERT' -rvert return 'RVERT' -pm return 'PM' -div return 'DIV' -[/|a-zA-Z0-9] return 'ORD' -[*+-] return 'BIN' -\^ return '^' -[_] return '_' -[{] return '{' -[}] return '}' -[(] return 'OPEN' -[)] return 'CLOSE' -[\\] return '\\' <> return 'EOF' /lex @@ -37,7 +21,7 @@ div return 'DIV' %% /* language grammar */ expression - : ex EOF + : ex 'EOF' {return $1;} ; @@ -61,22 +45,22 @@ group {$$ = $1;} | '{' ex '}' {$$ = $2;} - | '\\' func + | '\' func {$$ = $2;} ; func - : 'CDOT' + : 'cdot' {$$ = [{type: 'bin', value: yytext}];} - | 'PM' + | 'pm' {$$ = [{type: 'bin', value: yytext}];} - | 'DIV' + | 'div' {$$ = [{type: 'bin', value: yytext}];} - | 'FRAC' group group + | 'frac' group group {$$ = [{type: 'frac', value: {numer: $2, denom: $3}}];} - | 'LVERT' + | 'lvert' {$$ = [{type: 'open', value: yytext}];} - | 'RVERT' + | 'rvert' {$$ = [{type: 'close', value: yytext}];} ;