Use our own lexer, not jison's

Summary:
Build our own lexer and inject it into jison's parser, because jison's
lexer notation is confusing and annoying, and it doesn't let us do some fun
stuff.

Test Plan: Run stuff, make sure it still works.

Reviewers: spicyj

Reviewed By: spicyj

Differential Revision: http://phabricator.benalpert.com/D40
This commit is contained in:
Emily Eisenberg 2013-07-07 21:13:43 -07:00
parent 33625f7b08
commit 8f99433c80
3 changed files with 103 additions and 24 deletions

View File

@ -1,4 +1,5 @@
var parser = require("./parser.jison");
parser.lexer = require("./lexer");
var buildExpression = function(expression) {
return _.map(expression, function(ex, i) {

94
lexer.js Normal file
View File

@ -0,0 +1,94 @@
var DEFAULT_STATE = 0,
FUNC_STATE = 1;
function Lexer() {
};
var funcs = [
'cdot', 'frac', 'lvert', 'rvert', 'pm', 'div'
];
var normals = [
[/^[/|a-zA-Z0-9.]/, 'ORD'],
[/^[*+-]/, 'BIN'],
[/^\^/, '^'],
[/^_/, '_'],
[/^{/, '{'],
[/^}/, '}'],
[/^[(\[]/, 'OPEN'],
[/^[)\]]/, 'CLOSE']
];
Lexer.prototype.doMatch = function(match) {
this.yytext = match;
this.yyleng = match.length;
this.yylloc.first_column = this._pos;
this.yylloc.last_column = this._pos + match.length;
this._pos += match.length;
};
Lexer.prototype.lex = function() {
// Get rid of whitespace
var whitespace = this._input.substr(this._pos).match(/^\s*/)[0];
this._pos += whitespace.length;
if (this._pos >= this._input.length) {
return 'EOF';
}
var toMatch = this._input.substr(this._pos);
if (this.state === DEFAULT_STATE) {
if (/^\\/.test(toMatch)) {
this.state = FUNC_STATE;
this.doMatch('\\');
return '\\';
} else {
for (var i = 0; i < normals.length; i++) {
var normal = normals[i];
var match = toMatch.match(normal[0]);
if (match) {
this.doMatch(match[0]);
return normal[1];
}
}
}
} else if (this.state === FUNC_STATE) {
for (var i = 0; i < funcs.length; i++) {
var func = funcs[i];
var regex = new RegExp('^' + func + '(?!a-zA-Z)');
var match = toMatch.match(regex);
if (match) {
this.doMatch(match[0]);
this.state = DEFAULT_STATE;
return func;
}
}
}
throw "Unexpected character: '" + toMatch[0] + "' at position " + this._pos;
};
Lexer.prototype.setInput = function(input) {
this._input = input;
this._pos = 0;
this.yyleng = 0;
this.yytext = "";
this.yylineno = 0;
this.yylloc = {
first_line: 1,
first_column: 0,
last_line: 1,
last_column: 0
};
this.state = DEFAULT_STATE;
};
module.exports = new Lexer();

View File

@ -4,22 +4,6 @@
%lex
%%
\s+ /* skip whitespace */
cdot return 'CDOT'
frac return 'FRAC'
lvert return 'LVERT'
rvert return 'RVERT'
pm return 'PM'
div return 'DIV'
[/|a-zA-Z0-9] return 'ORD'
[*+-] return 'BIN'
\^ return '^'
[_] return '_'
[{] return '{'
[}] return '}'
[(] return 'OPEN'
[)] return 'CLOSE'
[\\] return '\\'
<<EOF>> return 'EOF'
/lex
@ -37,7 +21,7 @@ div return 'DIV'
%% /* language grammar */
expression
: ex EOF
: ex 'EOF'
{return $1;}
;
@ -61,22 +45,22 @@ group
{$$ = $1;}
| '{' ex '}'
{$$ = $2;}
| '\\' func
| '\' func
{$$ = $2;}
;
func
: 'CDOT'
: 'cdot'
{$$ = [{type: 'bin', value: yytext}];}
| 'PM'
| 'pm'
{$$ = [{type: 'bin', value: yytext}];}
| 'DIV'
| 'div'
{$$ = [{type: 'bin', value: yytext}];}
| 'FRAC' group group
| 'frac' group group
{$$ = [{type: 'frac', value: {numer: $2, denom: $3}}];}
| 'LVERT'
| 'lvert'
{$$ = [{type: 'open', value: yytext}];}
| 'RVERT'
| 'rvert'
{$$ = [{type: 'close', value: yytext}];}
;