Use our own lexer, not jison's

Summary: Build our own lexer and inject it into jison's parser, because jison's lexer notation is confusing and annoying, and it doesn't let us do some fun stuff. Test Plan: Run stuff, make sure it still works. Reviewers: spicyj Reviewed By: spicyj Differential Revision: http://phabricator.benalpert.com/D40
2013-07-07 21:13:43 -07:00 · 2013-07-07 21:13:43 -07:00 · 8f99433c80
commit 8f99433c80
parent 33625f7b08
3 changed files with 103 additions and 24 deletions
--- a/MJLite.js
+++ b/MJLite.js
@ -1,4 +1,5 @@
 var parser = require("./parser.jison");
+parser.lexer = require("./lexer");

 var buildExpression = function(expression) {
    return _.map(expression, function(ex, i) {
--- a/lexer.js
+++ b/lexer.js
@ -0,0 +1,94 @@
+var DEFAULT_STATE = 0,
+    FUNC_STATE = 1;
+
+function Lexer() {
+};
+
+var funcs = [
+    'cdot', 'frac', 'lvert', 'rvert', 'pm', 'div'
+];
+
+var normals = [
+    [/^[/|a-zA-Z0-9.]/, 'ORD'],
+    [/^[*+-]/, 'BIN'],
+    [/^\^/, '^'],
+    [/^_/, '_'],
+    [/^{/, '{'],
+    [/^}/, '}'],
+    [/^[(\[]/, 'OPEN'],
+    [/^[)\]]/, 'CLOSE']
+];
+
+Lexer.prototype.doMatch = function(match) {
+    this.yytext = match;
+    this.yyleng = match.length;
+
+    this.yylloc.first_column = this._pos;
+    this.yylloc.last_column = this._pos + match.length;
+
+    this._pos += match.length;
+};
+
+Lexer.prototype.lex = function() {
+    // Get rid of whitespace
+    var whitespace = this._input.substr(this._pos).match(/^\s*/)[0];
+    this._pos += whitespace.length;
+
+    if (this._pos >= this._input.length) {
+        return 'EOF';
+    }
+
+    var toMatch = this._input.substr(this._pos);
+
+    if (this.state === DEFAULT_STATE) {
+        if (/^\\/.test(toMatch)) {
+            this.state = FUNC_STATE;
+            this.doMatch('\\');
+            return '\\';
+        } else {
+            for (var i = 0; i < normals.length; i++) {
+                var normal = normals[i];
+
+                var match = toMatch.match(normal[0]);
+                if (match) {
+                    this.doMatch(match[0]);
+                    return normal[1];
+                }
+            }
+        }
+    } else if (this.state === FUNC_STATE) {
+        for (var i = 0; i < funcs.length; i++) {
+            var func = funcs[i];
+
+            var regex = new RegExp('^' + func + '(?!a-zA-Z)');
+
+            var match = toMatch.match(regex);
+            if (match) {
+                this.doMatch(match[0]);
+                this.state = DEFAULT_STATE;
+                return func;
+            }
+        }
+    }
+
+    throw "Unexpected character: '" + toMatch[0] + "' at position " + this._pos;
+};
+
+Lexer.prototype.setInput = function(input) {
+    this._input = input;
+    this._pos = 0;
+
+    this.yyleng = 0;
+    this.yytext = "";
+    this.yylineno = 0;
+    this.yylloc = {
+        first_line: 1,
+        first_column: 0,
+        last_line: 1,
+        last_column: 0
+    };
+
+    this.state = DEFAULT_STATE;
+};
+
+module.exports = new Lexer();
--- a/parser.jison
+++ b/parser.jison
@ -4,22 +4,6 @@
 %lex
 %%

-\s+                   /* skip whitespace */
-cdot                  return 'CDOT'
-frac                  return 'FRAC'
-lvert                 return 'LVERT'
-rvert                 return 'RVERT'
-pm                    return 'PM'
-div                   return 'DIV'
-[/|a-zA-Z0-9]         return 'ORD'
-[*+-]                 return 'BIN'
-\^                    return '^'
-[_]                   return '_'
-[{]                   return '{'
-[}]                   return '}'
-[(]                   return 'OPEN'
-[)]                   return 'CLOSE'
-[\\]                  return '\\'
 <<EOF>>               return 'EOF'

 /lex
@ -37,7 +21,7 @@ div                   return 'DIV'
 %% /* language grammar */

 expression
-    : ex EOF
+    : ex 'EOF'
        {return $1;}
    ;

@ -61,22 +45,22 @@ group
        {$$ = $1;}
    | '{' ex '}'
        {$$ = $2;}
-    | '\\' func
+    | '\' func
        {$$ = $2;}
    ;

 func
-    : 'CDOT'
+    : 'cdot'
        {$$ = [{type: 'bin', value: yytext}];}
-    | 'PM'
+    | 'pm'
        {$$ = [{type: 'bin', value: yytext}];}
-    | 'DIV'
+    | 'div'
        {$$ = [{type: 'bin', value: yytext}];}
-    | 'FRAC' group group
+    | 'frac' group group
        {$$ = [{type: 'frac', value: {numer: $2, denom: $3}}];}
-    | 'LVERT'
+    | 'lvert'
        {$$ = [{type: 'open', value: yytext}];}
-    | 'RVERT'
+    | 'rvert'
        {$$ = [{type: 'close', value: yytext}];}
    ;