First attempt at \text function

Summary:
Make all of the parsing functions keep track of whether they are
parsing in math mode or text mode. Then, add a separate lexing function to lex
text mode, which is different than the normal mode because it does weird things
with spacing and allows a different set of characters.

Test Plan:
 - See that the normal tests work
 - See that the huxley screenshot looks reasonable
 - See that none of the other huxley screenshots changed

Reviewers: alpert

Reviewed By: alpert

Differential Revision: http://phabricator.khanacademy.org/D7578
This commit is contained in:
Emily Eisenberg 2014-03-26 22:17:41 -04:00
parent 2eca338e23
commit 7723d3dcaf
8 changed files with 785 additions and 626 deletions

View File

@ -13,7 +13,7 @@ function LexResult(type, text, position) {
}
// "normal" types of tokens
var normals = [
var mathNormals = [
[/^[/|@."`0-9]/, "textord"],
[/^[a-zA-Z]/, "mathord"],
[/^[*+-]/, "bin"],
@ -28,17 +28,30 @@ var normals = [
[/^[)\]?!]/, "close"]
];
var textNormals = [
[/^[a-zA-Z0-9`!@*()-=+\[\]'";:?\/.,]/, "textord"],
[/^{/, "{"],
[/^}/, "}"]
];
// Build a regex to easily parse the functions
var anyFunc = /^\\(?:[a-zA-Z]+|.)/;
// Lex a single token
Lexer.prototype.lex = function(pos) {
Lexer.prototype._innerLex = function(pos, normals, ignoreWhitespace) {
var input = this._input.slice(pos);
// Get rid of whitespace
var whitespace = input.match(/^\s*/)[0];
pos += whitespace.length;
input = input.slice(whitespace.length);
if (ignoreWhitespace) {
var whitespace = input.match(/^\s*/)[0];
pos += whitespace.length;
input = input.slice(whitespace.length);
} else {
// Do the funky concatenation of whitespace
var whitespace = input.match(/^( +|\\ +)/);
if (whitespace !== null) {
return new LexResult(" ", " ", pos + whitespace[0].length);
}
}
// If there's no more input to parse, return an EOF token
if (input.length === 0) {
@ -66,6 +79,15 @@ Lexer.prototype.lex = function(pos) {
// We didn't match any of the tokens, so throw an error.
throw new ParseError("Unexpected character: '" + input[0] +
"' at position " + pos);
}
// Lex a single token
Lexer.prototype.lex = function(pos, mode) {
if (mode === "math") {
return this._innerLex(pos, mathNormals, true);
} else if (mode === "text") {
return this._innerLex(pos, textNormals, false);
}
};
module.exports = Lexer;

117
Parser.js
View File

@ -16,9 +16,10 @@ function ParseResult(result, newPosition) {
}
// The resulting parse tree nodes of the parse tree.
function ParseNode(type, value) {
function ParseNode(type, value, mode) {
this.type = type;
this.value = value;
this.mode = mode;
}
// Checks a result to make sure it has the right type, and throws an
@ -37,27 +38,27 @@ Parser.prototype.parse = function(input) {
this.lexer = new Lexer(input);
// Try to parse the input
var parse = this.parseInput(0);
var parse = this.parseInput(0, "math");
return parse.result;
};
// Parses an entire input tree
Parser.prototype.parseInput = function(pos) {
Parser.prototype.parseInput = function(pos, mode) {
// Parse an expression
var expression = this.parseExpression(pos);
var expression = this.parseExpression(pos, mode);
// If we succeeded, make sure there's an EOF at the end
var EOF = this.lexer.lex(expression.position);
var EOF = this.lexer.lex(expression.position, mode);
expect(EOF, "EOF");
return expression;
};
// Parses an "expression", which is a list of atoms
Parser.prototype.parseExpression = function(pos) {
Parser.prototype.parseExpression = function(pos, mode) {
// Start with a list of nodes
var expression = [];
while (true) {
// Try to parse atoms
var parse = this.parseAtom(pos);
var parse = this.parseAtom(pos, mode);
if (parse) {
// Copy them into the list
expression.push(parse.result);
@ -70,12 +71,16 @@ Parser.prototype.parseExpression = function(pos) {
};
// Parses a superscript expression, like "^3"
Parser.prototype.parseSuperscript = function(pos) {
Parser.prototype.parseSuperscript = function(pos, mode) {
if (mode !== "math") {
throw new ParseError("Trying to parse superscript in non-math mode");
}
// Try to parse a "^" character
var sup = this.lexer.lex(pos);
var sup = this.lexer.lex(pos, mode);
if (sup.type === "^") {
// If we got one, parse the corresponding group
var group = this.parseGroup(sup.position);
var group = this.parseGroup(sup.position, mode);
if (group) {
return group;
} else {
@ -85,19 +90,23 @@ Parser.prototype.parseSuperscript = function(pos) {
} else if (sup.type === "'") {
var pos = sup.position;
return new ParseResult(
new ParseNode("textord", "\\prime"), sup.position);
new ParseNode("textord", "\\prime"), sup.position, mode);
} else {
return null;
}
};
// Parses a subscript expression, like "_3"
Parser.prototype.parseSubscript = function(pos) {
Parser.prototype.parseSubscript = function(pos, mode) {
if (mode !== "math") {
throw new ParseError("Trying to parse subscript in non-math mode");
}
// Try to parse a "_" character
var sub = this.lexer.lex(pos);
var sub = this.lexer.lex(pos, mode);
if (sub.type === "_") {
// If we got one, parse the corresponding group
var group = this.parseGroup(sub.position);
var group = this.parseGroup(sub.position, mode);
if (group) {
return group;
} else {
@ -111,12 +120,18 @@ Parser.prototype.parseSubscript = function(pos) {
// Parses an atom, which consists of a nucleus, and an optional superscript and
// subscript
Parser.prototype.parseAtom = function(pos) {
Parser.prototype.parseAtom = function(pos, mode) {
// Parse the nucleus
var nucleus = this.parseGroup(pos);
var nucleus = this.parseGroup(pos, mode);
var nextPos = pos;
var nucleusNode;
// Text mode doesn't have superscripts or subscripts, so we only parse the
// nucleus in this case
if (mode === "text") {
return nucleus;
}
if (nucleus) {
nextPos = nucleus.position;
nucleusNode = nucleus.result;
@ -129,7 +144,7 @@ Parser.prototype.parseAtom = function(pos) {
// depending on whether those succeed, we return the correct type.
while (true) {
var node;
if ((node = this.parseSuperscript(nextPos))) {
if ((node = this.parseSuperscript(nextPos, mode))) {
if (sup) {
throw new ParseError("Parse error: Double superscript");
}
@ -137,7 +152,7 @@ Parser.prototype.parseAtom = function(pos) {
sup = node.result;
continue;
}
if ((node = this.parseSubscript(nextPos))) {
if ((node = this.parseSubscript(nextPos, mode))) {
if (sub) {
throw new ParseError("Parse error: Double subscript");
}
@ -151,7 +166,7 @@ Parser.prototype.parseAtom = function(pos) {
if (sup || sub) {
return new ParseResult(
new ParseNode("supsub", {base: nucleusNode, sup: sup,
sub: sub}),
sub: sub}, mode),
nextPos);
} else {
return nucleus;
@ -160,25 +175,24 @@ Parser.prototype.parseAtom = function(pos) {
// Parses a group, which is either a single nucleus (like "x") or an expression
// in braces (like "{x+y}")
Parser.prototype.parseGroup = function(pos) {
var start = this.lexer.lex(pos);
Parser.prototype.parseGroup = function(pos, mode) {
var start = this.lexer.lex(pos, mode);
// Try to parse an open brace
if (start.type === "{") {
// If we get a brace, parse an expression
var expression = this.parseExpression(start.position);
var expression = this.parseExpression(start.position, mode);
// Make sure we get a close brace
var closeBrace = this.lexer.lex(expression.position);
var closeBrace = this.lexer.lex(expression.position, mode);
expect(closeBrace, "}");
return new ParseResult(
new ParseNode("ordgroup", expression.result),
new ParseNode("ordgroup", expression.result, mode),
closeBrace.position);
} else {
// Otherwise, just return a nucleus
return this.parseNucleus(pos);
return this.parseNucleus(pos, mode);
}
};
// A list of 1-argument color functions
var colorFuncs = [
"\\blue", "\\orange", "\\pink", "\\red", "\\green", "\\gray", "\\purple"
@ -200,12 +214,12 @@ var namedFns = [
// Parses a "nucleus", which is either a single token from the tokenizer or a
// function and its arguments
Parser.prototype.parseNucleus = function(pos) {
var nucleus = this.lexer.lex(pos);
Parser.prototype.parseNucleus = function(pos, mode) {
var nucleus = this.lexer.lex(pos, mode);
if (utils.contains(colorFuncs, nucleus.type)) {
// If this is a color function, parse its argument and return
var group = this.parseGroup(nucleus.position);
var group = this.parseGroup(nucleus.position, mode);
if (group) {
var atoms;
if (group.result.type === "ordgroup") {
@ -215,55 +229,66 @@ Parser.prototype.parseNucleus = function(pos) {
}
return new ParseResult(
new ParseNode("color",
{color: nucleus.type.slice(1), value: atoms}),
{color: nucleus.type.slice(1), value: atoms}, mode),
group.position);
} else {
throw new ParseError(
"Expected group after '" + nucleus.text + "'");
}
} else if (utils.contains(sizeFuncs, nucleus.type)) {
} else if (mode === "math" && utils.contains(sizeFuncs, nucleus.type)) {
// If this is a size function, parse its argument and return
var group = this.parseGroup(nucleus.position);
var group = this.parseGroup(nucleus.position, mode);
if (group) {
return new ParseResult(
new ParseNode("sizing", {
size: "size" + (utils.indexOf(sizeFuncs, nucleus.type) + 1),
value: group.result
}),
}, mode),
group.position);
} else {
throw new ParseError(
"Expected group after '" + nucleus.text + "'");
}
} else if (utils.contains(namedFns, nucleus.type)) {
} else if (mode === "math" && utils.contains(namedFns, nucleus.type)) {
// If this is a named function, just return it plain
return new ParseResult(
new ParseNode("namedfn", nucleus.text),
new ParseNode("namedfn", nucleus.text, mode),
nucleus.position);
} else if (nucleus.type === "\\llap" || nucleus.type === "\\rlap") {
// If this is an llap or rlap, parse its argument and return
var group = this.parseGroup(nucleus.position);
var group = this.parseGroup(nucleus.position, mode);
if (group) {
return new ParseResult(
new ParseNode(nucleus.type.slice(1), group.result),
new ParseNode(nucleus.type.slice(1), group.result, mode),
group.position);
} else {
throw new ParseError(
"Expected group after '" + nucleus.text + "'");
}
} else if (nucleus.type === "\\dfrac" || nucleus.type === "\\frac" ||
nucleus.type === "\\tfrac") {
} else if (mode === "math" && nucleus.type === "\\text") {
var group = this.parseGroup(nucleus.position, "text");
if (group) {
return new ParseResult(
new ParseNode(nucleus.type.slice(1), group.result, mode),
group.position);
} else {
throw new ParseError(
"Expected group after '" + nucleus.text + "'");
}
} else if (mode === "math" && (nucleus.type === "\\dfrac" ||
nucleus.type === "\\frac" ||
nucleus.type === "\\tfrac")) {
// If this is a frac, parse its two arguments and return
var numer = this.parseGroup(nucleus.position);
var numer = this.parseGroup(nucleus.position, mode);
if (numer) {
var denom = this.parseGroup(numer.position);
var denom = this.parseGroup(numer.position, mode);
if (denom) {
return new ParseResult(
new ParseNode("frac", {
numer: numer.result,
denom: denom.result,
size: nucleus.type.slice(1)
}),
}, mode),
denom.position);
} else {
throw new ParseError("Expected denominator after '" +
@ -273,17 +298,17 @@ Parser.prototype.parseNucleus = function(pos) {
throw new ParseError("Parse error: Expected numerator after '" +
nucleus.type + "'");
}
} else if (nucleus.type === "\\KaTeX") {
} else if (mode === "math" && nucleus.type === "\\KaTeX") {
// If this is a KaTeX node, return the special katex result
return new ParseResult(
new ParseNode("katex", null),
new ParseNode("katex", null, mode),
nucleus.position
);
} else if (symbols[nucleus.text]) {
} else if (symbols[mode][nucleus.text]) {
// Otherwise if this is a no-argument function, find the type it
// corresponds to in the symbols map
return new ParseResult(
new ParseNode(symbols[nucleus.text].group, nucleus.text),
new ParseNode(symbols[mode][nucleus.text].group, nucleus.text, mode),
nucleus.position);
} else {
// Otherwise, we couldn't parse it

View File

@ -50,6 +50,7 @@ var groupToType = {
ordgroup: "mord",
namedfn: "mop",
katex: "mord",
text: "mord",
};
var getTypeOfGroup = function(group) {
@ -69,11 +70,17 @@ var getTypeOfGroup = function(group) {
var groupTypes = {
mathord: function(group, options, prev) {
return makeSpan(["mord", options.color], [mathit(group.value)]);
return makeSpan(
["mord", options.color],
[mathit(group.value, group.mode)]
);
},
textord: function(group, options, prev) {
return makeSpan(["mord", options.color], [mathrm(group.value)]);
return makeSpan(
["mord", options.color],
[mathrm(group.value, group.mode)]
);
},
bin: function(group, options, prev) {
@ -88,15 +95,23 @@ var groupTypes = {
group.type = "ord";
className = "mord";
}
return makeSpan([className, options.color], [mathrm(group.value)]);
return makeSpan(
[className, options.color],
[mathrm(group.value, group.mode)]
);
},
rel: function(group, options, prev) {
return makeSpan(["mrel", options.color], [mathrm(group.value)]);
return makeSpan(
["mrel", options.color],
[mathrm(group.value, group.mode)]
);
},
amsrel: function(group, options, prev) {
return makeSpan(["mrel", options.color], [amsrm(group.value)]);
text: function(group, options, prev) {
return makeSpan(["text mord", options.style.cls()],
[buildGroup(group.value, options.reset())]
);
},
supsub: function(group, options, prev) {
@ -185,11 +200,17 @@ var groupTypes = {
},
open: function(group, options, prev) {
return makeSpan(["mopen", options.color], [mathrm(group.value)]);
return makeSpan(
["mopen", options.color],
[mathrm(group.value, group.mode)]
);
},
close: function(group, options, prev) {
return makeSpan(["mclose", options.color], [mathrm(group.value)]);
return makeSpan(
["mclose", options.color],
[mathrm(group.value, group.mode)]
);
},
frac: function(group, options, prev) {
@ -283,8 +304,14 @@ var groupTypes = {
},
spacing: function(group, options, prev) {
if (group.value === "\\ " || group.value === "\\space") {
return makeSpan(["mord", "mspace"], [mathrm(group.value)]);
if (group.value === "\\ " || group.value === "\\space" ||
group.value === " ") {
return makeSpan(
["mord", "mspace"],
[mathrm(group.value, group.mode)]
);
} else if(group.value === "~") {
return makeSpan(["mord", "mspace"], [mathrm(" ", group.mode)]);
} else {
var spacingClassMap = {
"\\qquad": "qquad",
@ -311,7 +338,10 @@ var groupTypes = {
},
punct: function(group, options, prev) {
return makeSpan(["mpunct", options.color], [mathrm(group.value)]);
return makeSpan(
["mpunct", options.color],
[mathrm(group.value, group.mode)]
);
},
ordgroup: function(group, options, prev) {
@ -323,26 +353,26 @@ var groupTypes = {
namedfn: function(group, options, prev) {
var chars = [];
for (var i = 1; i < group.value.length; i++) {
chars.push(mathrm(group.value[i]));
chars.push(mathrm(group.value[i], group.mode));
}
return makeSpan(["mop", options.color], chars);
},
katex: function(group, options, prev) {
var k = makeSpan(["k"], [mathrm("K")]);
var a = makeSpan(["a"], [mathrm("A")]);
var k = makeSpan(["k"], [mathrm("K", group.mode)]);
var a = makeSpan(["a"], [mathrm("A", group.mode)]);
a.height = (a.height + 0.2) * 0.75;
a.depth = (a.height - 0.2) * 0.75;
var t = makeSpan(["t"], [mathrm("T")]);
var e = makeSpan(["e"], [mathrm("E")]);
var t = makeSpan(["t"], [mathrm("T", group.mode)]);
var e = makeSpan(["e"], [mathrm("E", group.mode)]);
e.height = (e.height - 0.2155);
e.depth = (e.depth + 0.2155);
var x = makeSpan(["x"], [mathrm("X")]);
var x = makeSpan(["x"], [mathrm("X", group.mode)]);
return makeSpan(["katex-logo", options.color], [k, a, t, e, x]);
},
@ -407,9 +437,9 @@ var buildGroup = function(group, options, prev) {
}
};
var makeText = function(value, style) {
if (symbols[value].replace) {
value = symbols[value].replace;
var makeText = function(value, style, mode) {
if (symbols[mode][value].replace) {
value = symbols[mode][value].replace;
}
var metrics = fontMetrics.getCharacterMetrics(value, style);
@ -432,15 +462,15 @@ var makeText = function(value, style) {
}
};
var mathit = function(value) {
return makeSpan(["mathit"], [makeText(value, "math-italic")]);
var mathit = function(value, mode) {
return makeSpan(["mathit"], [makeText(value, "math-italic", mode)]);
};
var mathrm = function(value) {
if (symbols[value].font === "main") {
return makeText(value, "main-regular");
var mathrm = function(value, mode) {
if (symbols[mode][value].font === "main") {
return makeText(value, "main-regular", mode);
} else {
return makeSpan(["amsrm"], [makeText(value, "ams-regular")]);
return makeSpan(["amsrm"], [makeText(value, "ams-regular", mode)]);
}
};

1123
symbols.js

File diff suppressed because it is too large Load Diff

View File

@ -34,5 +34,8 @@ url=http://localhost:7936/test/huxley/test.html?m=\Huge{x}\LARGE{y}\normalsize{z
[SizingBaseline]
url=http://localhost:7936/test/huxley/test.html?m=\tiny{a+b}a+b\Huge{a+b}&pre=x&post=M
[Text]
url=http://localhost:7936/test/huxley/test.html?m=\frac{a}{b}\text{c {ab} \ e}+fg
[KaTeX]
url=http://localhost:7936/test/huxley/test.html?m=\KaTeX
url=http://localhost:7936/test/huxley/test.html?m=\KaTeX

View File

@ -0,0 +1 @@
{"py/object": "huxley.run.Test", "screen_size": {"py/tuple": [1024, 768]}, "steps": [{"py/object": "huxley.steps.ScreenshotTestStep", "index": 0, "offset_time": 0}]}

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

View File

@ -447,3 +447,52 @@ describe("A sizing parser", function() {
}).toThrow();
});
});
describe("A text parser", function() {
var textExpression = "\\text{a b}";
var badTextExpression = "\\text{a b%}";
var nestedTextExpression = "\\text{a {b} \\blue{c}}";
var spaceTextExpression = "\\text{ a \\ }";
it("should not fail", function() {
expect(function() {
parseTree(textExpression);
}).not.toThrow();
});
it("should produce a text", function() {
var parse = parseTree(textExpression)[0];
expect(parse.type).toMatch("text");
expect(parse.value).toBeDefined();
});
it("should produce textords instead of mathords", function() {
var parse = parseTree(textExpression)[0];
var group = parse.value.value;
expect(group[0].type).toMatch("textord");
});
it("should not parse bad text", function() {
expect(function() {
parseTree(badTextExpression);
}).toThrow();
});
it("should parse nested expressions", function() {
expect(function() {
parseTree(nestedTextExpression);
}).not.toThrow();
});
it("should contract spaces", function() {
var parse = parseTree(spaceTextExpression)[0];
var group = parse.value.value;
expect(group[0].type).toMatch("spacing");
expect(group[1].type).toMatch("textord");
expect(group[2].type).toMatch("spacing");
expect(group[3].type).toMatch("spacing");
});
});