First attempt at \text function

Summary: Make all of the parsing functions keep track of whether they are parsing in math mode or text mode. Then, add a separate lexing function to lex text mode, which is different than the normal mode because it does weird things with spacing and allows a different set of characters. Test Plan: - See that the normal tests work - See that the huxley screenshot looks reasonable - See that none of the other huxley screenshots changed Reviewers: alpert Reviewed By: alpert Differential Revision: http://phabricator.khanacademy.org/D7578
2014-03-26 22:17:41 -04:00 · 2014-03-26 22:17:41 -04:00 · 7723d3dcaf
commit 7723d3dcaf
parent 2eca338e23
8 changed files with 785 additions and 626 deletions
--- a/Lexer.js
+++ b/Lexer.js
@ -13,7 +13,7 @@ function LexResult(type, text, position) {
 }

 // "normal" types of tokens
-var normals = [
+var mathNormals = [
    [/^[/|@."`0-9]/, "textord"],
    [/^[a-zA-Z]/, "mathord"],
    [/^[*+-]/, "bin"],
@ -28,17 +28,30 @@ var normals = [
    [/^[)\]?!]/, "close"]
 ];

+var textNormals = [
+    [/^[a-zA-Z0-9`!@*()-=+\[\]'";:?\/.,]/, "textord"],
+    [/^{/, "{"],
+    [/^}/, "}"]
+];
+
 // Build a regex to easily parse the functions
 var anyFunc = /^\\(?:[a-zA-Z]+|.)/;

-// Lex a single token
-Lexer.prototype.lex = function(pos) {
+Lexer.prototype._innerLex = function(pos, normals, ignoreWhitespace) {
    var input = this._input.slice(pos);

    // Get rid of whitespace
-    var whitespace = input.match(/^\s*/)[0];
-    pos += whitespace.length;
-    input = input.slice(whitespace.length);
+    if (ignoreWhitespace) {
+        var whitespace = input.match(/^\s*/)[0];
+        pos += whitespace.length;
+        input = input.slice(whitespace.length);
+    } else {
+        // Do the funky concatenation of whitespace
+        var whitespace = input.match(/^( +|\\  +)/);
+        if (whitespace !== null) {
+            return new LexResult(" ", " ", pos + whitespace[0].length);
+        }
+    }

    // If there's no more input to parse, return an EOF token
    if (input.length === 0) {
@ -66,6 +79,15 @@ Lexer.prototype.lex = function(pos) {
    // We didn't match any of the tokens, so throw an error.
    throw new ParseError("Unexpected character: '" + input[0] +
        "' at position " + pos);
+}
+
+// Lex a single token
+Lexer.prototype.lex = function(pos, mode) {
+    if (mode === "math") {
+        return this._innerLex(pos, mathNormals, true);
+    } else if (mode === "text") {
+        return this._innerLex(pos, textNormals, false);
+    }
 };

 module.exports = Lexer;
--- a/Parser.js
+++ b/Parser.js
@ -16,9 +16,10 @@ function ParseResult(result, newPosition) {
 }

 // The resulting parse tree nodes of the parse tree.
-function ParseNode(type, value) {
+function ParseNode(type, value, mode) {
    this.type = type;
    this.value = value;
+    this.mode = mode;
 }

 // Checks a result to make sure it has the right type, and throws an
@ -37,27 +38,27 @@ Parser.prototype.parse = function(input) {
    this.lexer = new Lexer(input);

    // Try to parse the input
-    var parse = this.parseInput(0);
+    var parse = this.parseInput(0, "math");
    return parse.result;
 };

 // Parses an entire input tree
-Parser.prototype.parseInput = function(pos) {
+Parser.prototype.parseInput = function(pos, mode) {
    // Parse an expression
-    var expression = this.parseExpression(pos);
+    var expression = this.parseExpression(pos, mode);
    // If we succeeded, make sure there's an EOF at the end
-    var EOF = this.lexer.lex(expression.position);
+    var EOF = this.lexer.lex(expression.position, mode);
    expect(EOF, "EOF");
    return expression;
 };

 // Parses an "expression", which is a list of atoms
-Parser.prototype.parseExpression = function(pos) {
+Parser.prototype.parseExpression = function(pos, mode) {
    // Start with a list of nodes
    var expression = [];
    while (true) {
        // Try to parse atoms
-        var parse = this.parseAtom(pos);
+        var parse = this.parseAtom(pos, mode);
        if (parse) {
            // Copy them into the list
            expression.push(parse.result);
@ -70,12 +71,16 @@ Parser.prototype.parseExpression = function(pos) {
 };

 // Parses a superscript expression, like "^3"
-Parser.prototype.parseSuperscript = function(pos) {
+Parser.prototype.parseSuperscript = function(pos, mode) {
+    if (mode !== "math") {
+        throw new ParseError("Trying to parse superscript in non-math mode");
+    }
+
    // Try to parse a "^" character
-    var sup = this.lexer.lex(pos);
+    var sup = this.lexer.lex(pos, mode);
    if (sup.type === "^") {
        // If we got one, parse the corresponding group
-        var group = this.parseGroup(sup.position);
+        var group = this.parseGroup(sup.position, mode);
        if (group) {
            return group;
        } else {
@ -85,19 +90,23 @@ Parser.prototype.parseSuperscript = function(pos) {
    } else if (sup.type === "'") {
        var pos = sup.position;
        return new ParseResult(
-            new ParseNode("textord", "\\prime"), sup.position);
+            new ParseNode("textord", "\\prime"), sup.position, mode);
    } else {
        return null;
    }
 };

 // Parses a subscript expression, like "_3"
-Parser.prototype.parseSubscript = function(pos) {
+Parser.prototype.parseSubscript = function(pos, mode) {
+    if (mode !== "math") {
+        throw new ParseError("Trying to parse subscript in non-math mode");
+    }
+
    // Try to parse a "_" character
-    var sub = this.lexer.lex(pos);
+    var sub = this.lexer.lex(pos, mode);
    if (sub.type === "_") {
        // If we got one, parse the corresponding group
-        var group = this.parseGroup(sub.position);
+        var group = this.parseGroup(sub.position, mode);
        if (group) {
            return group;
        } else {
@ -111,12 +120,18 @@ Parser.prototype.parseSubscript = function(pos) {

 // Parses an atom, which consists of a nucleus, and an optional superscript and
 // subscript
-Parser.prototype.parseAtom = function(pos) {
+Parser.prototype.parseAtom = function(pos, mode) {
    // Parse the nucleus
-    var nucleus = this.parseGroup(pos);
+    var nucleus = this.parseGroup(pos, mode);
    var nextPos = pos;
    var nucleusNode;

+    // Text mode doesn't have superscripts or subscripts, so we only parse the
+    // nucleus in this case
+    if (mode === "text") {
+        return nucleus;
+    }
+
    if (nucleus) {
        nextPos = nucleus.position;
        nucleusNode = nucleus.result;
@ -129,7 +144,7 @@ Parser.prototype.parseAtom = function(pos) {
    // depending on whether those succeed, we return the correct type.
    while (true) {
        var node;
-        if ((node = this.parseSuperscript(nextPos))) {
+        if ((node = this.parseSuperscript(nextPos, mode))) {
            if (sup) {
                throw new ParseError("Parse error: Double superscript");
            }
@ -137,7 +152,7 @@ Parser.prototype.parseAtom = function(pos) {
            sup = node.result;
            continue;
        }
-        if ((node = this.parseSubscript(nextPos))) {
+        if ((node = this.parseSubscript(nextPos, mode))) {
            if (sub) {
                throw new ParseError("Parse error: Double subscript");
            }
@ -151,7 +166,7 @@ Parser.prototype.parseAtom = function(pos) {
    if (sup || sub) {
        return new ParseResult(
            new ParseNode("supsub", {base: nucleusNode, sup: sup,
-                    sub: sub}),
+                    sub: sub}, mode),
            nextPos);
    } else {
        return nucleus;
@ -160,25 +175,24 @@ Parser.prototype.parseAtom = function(pos) {

 // Parses a group, which is either a single nucleus (like "x") or an expression
 // in braces (like "{x+y}")
-Parser.prototype.parseGroup = function(pos) {
-    var start = this.lexer.lex(pos);
+Parser.prototype.parseGroup = function(pos, mode) {
+    var start = this.lexer.lex(pos, mode);
    // Try to parse an open brace
    if (start.type === "{") {
        // If we get a brace, parse an expression
-        var expression = this.parseExpression(start.position);
+        var expression = this.parseExpression(start.position, mode);
        // Make sure we get a close brace
-        var closeBrace = this.lexer.lex(expression.position);
+        var closeBrace = this.lexer.lex(expression.position, mode);
        expect(closeBrace, "}");
        return new ParseResult(
-            new ParseNode("ordgroup", expression.result),
+            new ParseNode("ordgroup", expression.result, mode),
            closeBrace.position);
    } else {
        // Otherwise, just return a nucleus
-        return this.parseNucleus(pos);
+        return this.parseNucleus(pos, mode);
    }
 };

-
 // A list of 1-argument color functions
 var colorFuncs = [
    "\\blue", "\\orange", "\\pink", "\\red", "\\green", "\\gray", "\\purple"
@ -200,12 +214,12 @@ var namedFns = [

 // Parses a "nucleus", which is either a single token from the tokenizer or a
 // function and its arguments
-Parser.prototype.parseNucleus = function(pos) {
-    var nucleus = this.lexer.lex(pos);
+Parser.prototype.parseNucleus = function(pos, mode) {
+    var nucleus = this.lexer.lex(pos, mode);

    if (utils.contains(colorFuncs, nucleus.type)) {
        // If this is a color function, parse its argument and return
-        var group = this.parseGroup(nucleus.position);
+        var group = this.parseGroup(nucleus.position, mode);
        if (group) {
            var atoms;
            if (group.result.type === "ordgroup") {
@ -215,55 +229,66 @@ Parser.prototype.parseNucleus = function(pos) {
            }
            return new ParseResult(
                new ParseNode("color",
-                    {color: nucleus.type.slice(1), value: atoms}),
+                    {color: nucleus.type.slice(1), value: atoms}, mode),
                group.position);
        } else {
            throw new ParseError(
                "Expected group after '" + nucleus.text + "'");
        }
-    } else if (utils.contains(sizeFuncs, nucleus.type)) {
+    } else if (mode === "math" && utils.contains(sizeFuncs, nucleus.type)) {
        // If this is a size function, parse its argument and return
-        var group = this.parseGroup(nucleus.position);
+        var group = this.parseGroup(nucleus.position, mode);
        if (group) {
            return new ParseResult(
                new ParseNode("sizing", {
                    size: "size" + (utils.indexOf(sizeFuncs, nucleus.type) + 1),
                    value: group.result
-                }),
+                }, mode),
                group.position);
        } else {
            throw new ParseError(
                "Expected group after '" + nucleus.text + "'");
        }
-    } else if (utils.contains(namedFns, nucleus.type)) {
+    } else if (mode === "math" && utils.contains(namedFns, nucleus.type)) {
        // If this is a named function, just return it plain
        return new ParseResult(
-            new ParseNode("namedfn", nucleus.text),
+            new ParseNode("namedfn", nucleus.text, mode),
            nucleus.position);
    } else if (nucleus.type === "\\llap" || nucleus.type === "\\rlap") {
        // If this is an llap or rlap, parse its argument and return
-        var group = this.parseGroup(nucleus.position);
+        var group = this.parseGroup(nucleus.position, mode);
        if (group) {
            return new ParseResult(
-                new ParseNode(nucleus.type.slice(1), group.result),
+                new ParseNode(nucleus.type.slice(1), group.result, mode),
                group.position);
        } else {
            throw new ParseError(
                "Expected group after '" + nucleus.text + "'");
        }
-    } else if (nucleus.type === "\\dfrac" || nucleus.type === "\\frac" ||
-            nucleus.type === "\\tfrac") {
+    } else if (mode === "math" && nucleus.type === "\\text") {
+        var group = this.parseGroup(nucleus.position, "text");
+        if (group) {
+            return new ParseResult(
+                new ParseNode(nucleus.type.slice(1), group.result, mode),
+                group.position);
+        } else {
+            throw new ParseError(
+                "Expected group after '" + nucleus.text + "'");
+        }
+    } else if (mode === "math" && (nucleus.type === "\\dfrac" ||
+                                   nucleus.type === "\\frac" ||
+                                   nucleus.type === "\\tfrac")) {
        // If this is a frac, parse its two arguments and return
-        var numer = this.parseGroup(nucleus.position);
+        var numer = this.parseGroup(nucleus.position, mode);
        if (numer) {
-            var denom = this.parseGroup(numer.position);
+            var denom = this.parseGroup(numer.position, mode);
            if (denom) {
                return new ParseResult(
                    new ParseNode("frac", {
                        numer: numer.result,
                        denom: denom.result,
                        size: nucleus.type.slice(1)
-                    }),
+                    }, mode),
                    denom.position);
            } else {
                throw new ParseError("Expected denominator after '" +
@ -273,17 +298,17 @@ Parser.prototype.parseNucleus = function(pos) {
            throw new ParseError("Parse error: Expected numerator after '" +
                nucleus.type + "'");
        }
-    } else if (nucleus.type === "\\KaTeX") {
+    } else if (mode === "math" && nucleus.type === "\\KaTeX") {
        // If this is a KaTeX node, return the special katex result
        return new ParseResult(
-            new ParseNode("katex", null),
+            new ParseNode("katex", null, mode),
            nucleus.position
        );
-    } else if (symbols[nucleus.text]) {
+    } else if (symbols[mode][nucleus.text]) {
        // Otherwise if this is a no-argument function, find the type it
        // corresponds to in the symbols map
        return new ParseResult(
-            new ParseNode(symbols[nucleus.text].group, nucleus.text),
+            new ParseNode(symbols[mode][nucleus.text].group, nucleus.text, mode),
            nucleus.position);
    } else {
        // Otherwise, we couldn't parse it
--- a/buildTree.js
+++ b/buildTree.js
@ -50,6 +50,7 @@ var groupToType = {
    ordgroup: "mord",
    namedfn: "mop",
    katex: "mord",
+    text: "mord",
 };

 var getTypeOfGroup = function(group) {
@ -69,11 +70,17 @@ var getTypeOfGroup = function(group) {

 var groupTypes = {
    mathord: function(group, options, prev) {
-        return makeSpan(["mord", options.color], [mathit(group.value)]);
+        return makeSpan(
+            ["mord", options.color],
+            [mathit(group.value, group.mode)]
+        );
    },

    textord: function(group, options, prev) {
-        return makeSpan(["mord", options.color], [mathrm(group.value)]);
+        return makeSpan(
+            ["mord", options.color],
+            [mathrm(group.value, group.mode)]
+        );
    },

    bin: function(group, options, prev) {
@ -88,15 +95,23 @@ var groupTypes = {
            group.type = "ord";
            className = "mord";
        }
-        return makeSpan([className, options.color], [mathrm(group.value)]);
+        return makeSpan(
+            [className, options.color],
+            [mathrm(group.value, group.mode)]
+        );
    },

    rel: function(group, options, prev) {
-        return makeSpan(["mrel", options.color], [mathrm(group.value)]);
+        return makeSpan(
+            ["mrel", options.color],
+            [mathrm(group.value, group.mode)]
+        );
    },

-    amsrel: function(group, options, prev) {
-        return makeSpan(["mrel", options.color], [amsrm(group.value)]);
+    text: function(group, options, prev) {
+        return makeSpan(["text mord", options.style.cls()],
+            [buildGroup(group.value, options.reset())]
+        );
    },

    supsub: function(group, options, prev) {
@ -185,11 +200,17 @@ var groupTypes = {
    },

    open: function(group, options, prev) {
-        return makeSpan(["mopen", options.color], [mathrm(group.value)]);
+        return makeSpan(
+            ["mopen", options.color],
+            [mathrm(group.value, group.mode)]
+        );
    },

    close: function(group, options, prev) {
-        return makeSpan(["mclose", options.color], [mathrm(group.value)]);
+        return makeSpan(
+            ["mclose", options.color],
+            [mathrm(group.value, group.mode)]
+        );
    },

    frac: function(group, options, prev) {
@ -283,8 +304,14 @@ var groupTypes = {
    },

    spacing: function(group, options, prev) {
-        if (group.value === "\\ " || group.value === "\\space") {
-            return makeSpan(["mord", "mspace"], [mathrm(group.value)]);
+        if (group.value === "\\ " || group.value === "\\space" ||
+            group.value === " ") {
+            return makeSpan(
+                ["mord", "mspace"],
+                [mathrm(group.value, group.mode)]
+            );
+        } else if(group.value === "~") {
+            return makeSpan(["mord", "mspace"], [mathrm(" ", group.mode)]);
        } else {
            var spacingClassMap = {
                "\\qquad": "qquad",
@ -311,7 +338,10 @@ var groupTypes = {
    },

    punct: function(group, options, prev) {
-        return makeSpan(["mpunct", options.color], [mathrm(group.value)]);
+        return makeSpan(
+            ["mpunct", options.color],
+            [mathrm(group.value, group.mode)]
+        );
    },

    ordgroup: function(group, options, prev) {
@ -323,26 +353,26 @@ var groupTypes = {
    namedfn: function(group, options, prev) {
        var chars = [];
        for (var i = 1; i < group.value.length; i++) {
-            chars.push(mathrm(group.value[i]));
+            chars.push(mathrm(group.value[i], group.mode));
        }

        return makeSpan(["mop", options.color], chars);
    },

    katex: function(group, options, prev) {
-        var k = makeSpan(["k"], [mathrm("K")]);
-        var a = makeSpan(["a"], [mathrm("A")]);
+        var k = makeSpan(["k"], [mathrm("K", group.mode)]);
+        var a = makeSpan(["a"], [mathrm("A", group.mode)]);

        a.height = (a.height + 0.2) * 0.75;
        a.depth = (a.height - 0.2) * 0.75;

-        var t = makeSpan(["t"], [mathrm("T")]);
-        var e = makeSpan(["e"], [mathrm("E")]);
+        var t = makeSpan(["t"], [mathrm("T", group.mode)]);
+        var e = makeSpan(["e"], [mathrm("E", group.mode)]);

        e.height = (e.height - 0.2155);
        e.depth = (e.depth + 0.2155);

-        var x = makeSpan(["x"], [mathrm("X")]);
+        var x = makeSpan(["x"], [mathrm("X", group.mode)]);

        return makeSpan(["katex-logo", options.color], [k, a, t, e, x]);
    },
@ -407,9 +437,9 @@ var buildGroup = function(group, options, prev) {
    }
 };

-var makeText = function(value, style) {
-    if (symbols[value].replace) {
-        value = symbols[value].replace;
+var makeText = function(value, style, mode) {
+    if (symbols[mode][value].replace) {
+        value = symbols[mode][value].replace;
    }

    var metrics = fontMetrics.getCharacterMetrics(value, style);
@ -432,15 +462,15 @@ var makeText = function(value, style) {
    }
 };

-var mathit = function(value) {
-    return makeSpan(["mathit"], [makeText(value, "math-italic")]);
+var mathit = function(value, mode) {
+    return makeSpan(["mathit"], [makeText(value, "math-italic", mode)]);
 };

-var mathrm = function(value) {
-    if (symbols[value].font === "main") {
-        return makeText(value, "main-regular");
+var mathrm = function(value, mode) {
+    if (symbols[mode][value].font === "main") {
+        return makeText(value, "main-regular", mode);
    } else {
-        return makeSpan(["amsrm"], [makeText(value, "ams-regular")]);
+        return makeSpan(["amsrm"], [makeText(value, "ams-regular", mode)]);
    }
 };

--- a/symbols.js
+++ b/symbols.js
--- a/test/huxley/Huxleyfile
+++ b/test/huxley/Huxleyfile
@ -34,5 +34,8 @@ url=http://localhost:7936/test/huxley/test.html?m=\Huge{x}\LARGE{y}\normalsize{z
 [SizingBaseline]
 url=http://localhost:7936/test/huxley/test.html?m=\tiny{a+b}a+b\Huge{a+b}&pre=x&post=M

+[Text]
+url=http://localhost:7936/test/huxley/test.html?m=\frac{a}{b}\text{c {ab} \ e}+fg
+
 [KaTeX]
-url=http://localhost:7936/test/huxley/test.html?m=\KaTeX
+url=http://localhost:7936/test/huxley/test.html?m=\KaTeX
--- a/test/huxley/Text.huxley/record.json
+++ b/test/huxley/Text.huxley/record.json
@ -0,0 +1 @@
+{"py/object": "huxley.run.Test", "screen_size": {"py/tuple": [1024, 768]}, "steps": [{"py/object": "huxley.steps.ScreenshotTestStep", "index": 0, "offset_time": 0}]}
--- a/test/huxley/Text.huxley/screenshot0.png
+++ b/test/huxley/Text.huxley/screenshot0.png
--- a/test/katex-tests.js
+++ b/test/katex-tests.js
@ -447,3 +447,52 @@ describe("A sizing parser", function() {
        }).toThrow();
    });
 });
+
+describe("A text parser", function() {
+    var textExpression = "\\text{a b}";
+    var badTextExpression = "\\text{a b%}";
+    var nestedTextExpression = "\\text{a {b} \\blue{c}}";
+    var spaceTextExpression = "\\text{  a \\ }";
+
+    it("should not fail", function() {
+        expect(function() {
+            parseTree(textExpression);
+        }).not.toThrow();
+    });
+
+    it("should produce a text", function() {
+        var parse = parseTree(textExpression)[0];
+
+        expect(parse.type).toMatch("text");
+        expect(parse.value).toBeDefined();
+    });
+
+    it("should produce textords instead of mathords", function() {
+        var parse = parseTree(textExpression)[0];
+        var group = parse.value.value;
+
+        expect(group[0].type).toMatch("textord");
+    });
+
+    it("should not parse bad text", function() {
+        expect(function() {
+            parseTree(badTextExpression);
+        }).toThrow();
+    });
+
+    it("should parse nested expressions", function() {
+        expect(function() {
+            parseTree(nestedTextExpression);
+        }).not.toThrow();
+    });
+
+    it("should contract spaces", function() {
+        var parse = parseTree(spaceTextExpression)[0];
+        var group = parse.value.value;
+
+        expect(group[0].type).toMatch("spacing");
+        expect(group[1].type).toMatch("textord");
+        expect(group[2].type).toMatch("spacing");
+        expect(group[3].type).toMatch("spacing");
+    });
+});
				`@ -0,0 +1 @@`
				`{"py/object": "huxley.run.Test", "screen_size": {"py/tuple": [1024, 768]}, "steps": [{"py/object": "huxley.steps.ScreenshotTestStep", "index": 0, "offset_time": 0}]}`