From 8f99433c800d76b52deb589491aacaf73100c90b Mon Sep 17 00:00:00 2001
From: Emily Eisenberg <xymostech@gmail.com>
Date: Sun, 7 Jul 2013 21:13:43 -0700
Subject: [PATCH] Use our own lexer, not jison's

Summary:
Build our own lexer and inject it into jison's parser, because jison's
lexer notation is confusing and annoying, and it doesn't let us do some fun
stuff.

Test Plan: Run stuff, make sure it still works.

Reviewers: spicyj

Reviewed By: spicyj

Differential Revision: http://phabricator.benalpert.com/D40
---
 MJLite.js    |  1 +
 lexer.js     | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 parser.jison | 32 +++++-------------
 3 files changed, 103 insertions(+), 24 deletions(-)
 create mode 100644 lexer.js

diff --git a/MJLite.js b/MJLite.js
index e57eeb994..243e5ea93 100644
--- a/MJLite.js
+++ b/MJLite.js
@@ -1,4 +1,5 @@
 var parser = require("./parser.jison");
+parser.lexer = require("./lexer");
 
 var buildExpression = function(expression) {
     return _.map(expression, function(ex, i) {
diff --git a/lexer.js b/lexer.js
new file mode 100644
index 000000000..2ba404ac7
--- /dev/null
+++ b/lexer.js
@@ -0,0 +1,94 @@
+var DEFAULT_STATE = 0,
+    FUNC_STATE = 1;
+
+function Lexer() {
+};
+
+var funcs = [
+    'cdot', 'frac', 'lvert', 'rvert', 'pm', 'div'
+];
+
+var normals = [
+    [/^[/|a-zA-Z0-9.]/, 'ORD'],
+    [/^[*+-]/, 'BIN'],
+    [/^\^/, '^'],
+    [/^_/, '_'],
+    [/^{/, '{'],
+    [/^}/, '}'],
+    [/^[(\[]/, 'OPEN'],
+    [/^[)\]]/, 'CLOSE']
+];
+
+Lexer.prototype.doMatch = function(match) {
+    this.yytext = match;
+    this.yyleng = match.length;
+
+    this.yylloc.first_column = this._pos;
+    this.yylloc.last_column = this._pos + match.length;
+
+    this._pos += match.length;
+};
+
+Lexer.prototype.lex = function() {
+    // Get rid of whitespace
+    var whitespace = this._input.substr(this._pos).match(/^\s*/)[0];
+    this._pos += whitespace.length;
+
+    if (this._pos >= this._input.length) {
+        return 'EOF';
+    }
+
+    var toMatch = this._input.substr(this._pos);
+
+    if (this.state === DEFAULT_STATE) {
+        if (/^\\/.test(toMatch)) {
+            this.state = FUNC_STATE;
+            this.doMatch('\\');
+            return '\\';
+        } else {
+            for (var i = 0; i < normals.length; i++) {
+                var normal = normals[i];
+
+                var match = toMatch.match(normal[0]);
+                if (match) {
+                    this.doMatch(match[0]);
+                    return normal[1];
+                }
+            }
+        }
+    } else if (this.state === FUNC_STATE) {
+        for (var i = 0; i < funcs.length; i++) {
+            var func = funcs[i];
+
+            var regex = new RegExp('^' + func + '(?!a-zA-Z)');
+
+            var match = toMatch.match(regex);
+            if (match) {
+                this.doMatch(match[0]);
+                this.state = DEFAULT_STATE;
+                return func;
+            }
+        }
+    }
+
+    throw "Unexpected character: '" + toMatch[0] + "' at position " + this._pos;
+};
+
+Lexer.prototype.setInput = function(input) {
+    this._input = input;
+    this._pos = 0;
+
+    this.yyleng = 0;
+    this.yytext = "";
+    this.yylineno = 0;
+    this.yylloc = {
+        first_line: 1,
+        first_column: 0,
+        last_line: 1,
+        last_column: 0
+    };
+
+    this.state = DEFAULT_STATE;
+};
+
+module.exports = new Lexer();
diff --git a/parser.jison b/parser.jison
index bb9b058db..23ca5f5d9 100644
--- a/parser.jison
+++ b/parser.jison
@@ -4,22 +4,6 @@
 %lex
 %%
 
-\s+                   /* skip whitespace */
-cdot                  return 'CDOT'
-frac                  return 'FRAC'
-lvert                 return 'LVERT'
-rvert                 return 'RVERT'
-pm                    return 'PM'
-div                   return 'DIV'
-[/|a-zA-Z0-9]         return 'ORD'
-[*+-]                 return 'BIN'
-\^                    return '^'
-[_]                   return '_'
-[{]                   return '{'
-[}]                   return '}'
-[(]                   return 'OPEN'
-[)]                   return 'CLOSE'
-[\\]                  return '\\'
 <<EOF>>               return 'EOF'
 
 /lex
@@ -37,7 +21,7 @@ div                   return 'DIV'
 %% /* language grammar */
 
 expression
-    : ex EOF
+    : ex 'EOF'
         {return $1;}
     ;
 
@@ -61,22 +45,22 @@ group
         {$$ = $1;}
     | '{' ex '}'
         {$$ = $2;}
-    | '\\' func
+    | '\' func
         {$$ = $2;}
     ;
 
 func
-    : 'CDOT'
+    : 'cdot'
         {$$ = [{type: 'bin', value: yytext}];}
-    | 'PM'
+    | 'pm'
         {$$ = [{type: 'bin', value: yytext}];}
-    | 'DIV'
+    | 'div'
         {$$ = [{type: 'bin', value: yytext}];}
-    | 'FRAC' group group
+    | 'frac' group group
         {$$ = [{type: 'frac', value: {numer: $2, denom: $3}}];}
-    | 'LVERT'
+    | 'lvert'
         {$$ = [{type: 'open', value: yytext}];}
-    | 'RVERT'
+    | 'rvert'
         {$$ = [{type: 'close', value: yytext}];}
     ;