scribble-math/Parser.js
Alex Lopatin ba84964d6f Add \enspace (0.5em space)
Summary:
TeX spacing is complicated - there's \enskip and \enspace and the two are
subtly different in some weird way. But \enspace seems to be the preferred
half em space in LaTeX, and the only one which MathJax supports.

Test Plan: Parsed and rendered correctly.

Reviewers: alpert

Reviewed By: alpert

Differential Revision: http://phabricator.khanacademy.org/D6437
2014-01-29 18:14:12 -08:00

430 lines
12 KiB
JavaScript

var Lexer = require("./Lexer");
var utils = require("./utils");
var ParseError = require("./ParseError");
// Main Parser class
function Parser() {
};
// Returned by the Parser.parse... functions. Stores the current results and
// the new lexer position.
function ParseResult(result, newPosition) {
this.result = result;
this.position = newPosition;
}
// The resulting parse tree nodes of the parse tree.
function ParseNode(type, value) {
this.type = type;
this.value = value;
}
// Checks a result to make sure it has the right type, and throws an
// appropriate error otherwise.
var expect = function(result, type) {
if (result.type !== type) {
throw new ParseError(
"Expected '" + type + "', got '" + result.type + "'");
}
};
// Main parsing function, which parses an entire input. Returns either a list
// of parseNodes or null if the parse fails.
Parser.prototype.parse = function(input) {
// Make a new lexer
this.lexer = new Lexer(input);
// Try to parse the input
var parse = this.parseInput(0);
return parse.result;
};
// Parses an entire input tree
Parser.prototype.parseInput = function(pos) {
// Parse an expression
var expression = this.parseExpression(pos);
// If we succeeded, make sure there's an EOF at the end
var EOF = this.lexer.lex(expression.position);
expect(EOF, "EOF");
return expression;
};
// Parses an "expression", which is a list of atoms
Parser.prototype.parseExpression = function(pos) {
// Start with a list of nodes
var expression = [];
while (true) {
// Try to parse atoms
var parse = this.parseAtom(pos);
if (parse) {
// Copy them into the list
expression.push(parse.result);
pos = parse.position;
} else {
break;
}
}
return new ParseResult(expression, pos);
};
// Parses a superscript expression, like "^3"
Parser.prototype.parseSuperscript = function(pos) {
// Try to parse a "^" character
var sup = this.lexer.lex(pos);
if (sup.type === "^") {
// If we got one, parse the corresponding group
var group = this.parseGroup(sup.position);
if (group) {
return group;
} else {
// Throw an error if we didn't find a group
throw new ParseError("Couldn't find group after '^'");
}
} else if (sup.type === "'") {
var pos = sup.position;
return new ParseResult(
new ParseNode("textord", "\\prime"), sup.position);
} else {
return null;
}
};
// Parses a subscript expression, like "_3"
Parser.prototype.parseSubscript = function(pos) {
// Try to parse a "_" character
var sub = this.lexer.lex(pos);
if (sub.type === "_") {
// If we got one, parse the corresponding group
var group = this.parseGroup(sub.position);
if (group) {
return group;
} else {
// Throw an error if we didn't find a group
throw new ParseError("Couldn't find group after '_'");
}
} else {
return null;
}
};
// Parses an atom, which consists of a nucleus, and an optional superscript and
// subscript
Parser.prototype.parseAtom = function(pos) {
// Parse the nucleus
var nucleus = this.parseGroup(pos);
var nextPos = pos;
var nucleusNode;
if (nucleus) {
nextPos = nucleus.position;
nucleusNode = nucleus.result;
}
var sup;
var sub;
// Now, we try to parse a subscript or a superscript (or both!), and
// depending on whether those succeed, we return the correct type.
while (true) {
var node;
if ((node = this.parseSuperscript(nextPos))) {
if (sup) {
throw new ParseError("Parse error: Double superscript");
}
nextPos = node.position;
sup = node.result;
continue;
}
if ((node = this.parseSubscript(nextPos))) {
if (sub) {
throw new ParseError("Parse error: Double subscript");
}
nextPos = node.position;
sub = node.result;
continue;
}
break;
}
if (sup || sub) {
return new ParseResult(
new ParseNode("supsub", {base: nucleusNode, sup: sup,
sub: sub}),
nextPos);
} else {
return nucleus;
}
}
// Parses a group, which is either a single nucleus (like "x") or an expression
// in braces (like "{x+y}")
Parser.prototype.parseGroup = function(pos) {
var start = this.lexer.lex(pos);
// Try to parse an open brace
if (start.type === "{") {
// If we get a brace, parse an expression
var expression = this.parseExpression(start.position);
// Make sure we get a close brace
var closeBrace = this.lexer.lex(expression.position);
expect(closeBrace, "}");
return new ParseResult(
new ParseNode("ordgroup", expression.result),
closeBrace.position);
} else {
// Otherwise, just return a nucleus
return this.parseNucleus(pos);
}
};
// A list of 1-argument color functions
var colorFuncs = [
"\\blue", "\\orange", "\\pink", "\\red", "\\green", "\\gray", "\\purple"
];
// A list of 1-argument sizing functions
var sizeFuncs = [
"\\tiny", "\\scriptsize", "\\footnotesize", "\\small", "\\normalsize",
"\\large", "\\Large", "\\LARGE", "\\huge", "\\Huge"
];
// A map of elements that don't have arguments, and should simply be placed
// into a group depending on their type. The keys are the groups that items can
// be placed in, and the values are lists of element types that should be
// placed in those groups.
//
// For example, if the lexer returns something of type "colon", we should
// return a node of type "punct"
var copyFuncs = {
"textord": [
"textord",
"\\$",
"\\%",
"\\angle",
"\\infty",
"\\prime",
"\\triangle",
"\\Gamma",
"\\Delta",
"\\Theta",
"\\Lambda",
"\\Xi",
"\\Pi",
"\\Sigma",
"\\Upsilon",
"\\Phi",
"\\Psi",
"\\Omega"
],
"mathord": [
"mathord",
"\\alpha",
"\\beta",
"\\gamma",
"\\delta",
"\\epsilon",
"\\zeta",
"\\eta",
"\\theta",
"\\iota",
"\\kappa",
"\\lambda",
"\\mu",
"\\nu",
"\\xi",
"\\omicron",
"\\pi",
"\\rho",
"\\sigma",
"\\tau",
"\\upsilon",
"\\phi",
"\\chi",
"\\psi",
"\\omega",
"\\varepsilon",
"\\vartheta",
"\\varpi",
"\\varrho",
"\\varsigma",
"\\varphi"
],
"bin": [
"bin",
"\\cdot",
"\\circ",
"\\div",
"\\pm",
"\\times"
],
"open": [
"open",
"\\langle",
"\\lvert"
],
"close": [
"close",
"\\rangle",
"\\rvert"
],
"rel": [
"rel",
"\\approx",
"\\cong",
"\\ge",
"\\geq",
"\\gets",
"\\in",
"\\leftarrow",
"\\le",
"\\leq",
"\\ne",
"\\neq",
"\\rightarrow",
"\\to"
],
"amsrel": [
"\\ngeq",
"\\nleq"
],
"spacing": [
"\\!",
"\\ ",
"\\,",
"\\:",
"\\;",
"\\enspace",
"\\qquad",
"\\quad",
"\\space"
],
"punct": [
"punct",
"\\colon"
],
"namedfn": [
"\\arcsin",
"\\arccos",
"\\arctan",
"\\arg",
"\\cos",
"\\cosh",
"\\cot",
"\\coth",
"\\csc",
"\\deg",
"\\dim",
"\\exp",
"\\hom",
"\\ker",
"\\lg",
"\\ln",
"\\log",
"\\sec",
"\\sin",
"\\sinh",
"\\tan",
"\\tanh"
]
};
// Build a list of all of the different functions in the copyFuncs list, to
// quickly check if the function should be interpreted by the map.
var funcToType = {};
for (var type in copyFuncs) {
for (var i = 0; i < copyFuncs[type].length; i++) {
var func = copyFuncs[type][i];
funcToType[func] = type;
}
}
// Parses a "nucleus", which is either a single token from the tokenizer or a
// function and its arguments
Parser.prototype.parseNucleus = function(pos) {
var nucleus = this.lexer.lex(pos);
if (utils.contains(colorFuncs, nucleus.type)) {
// If this is a color function, parse its argument and return
var group = this.parseGroup(nucleus.position);
if (group) {
var atoms;
if (group.result.type === "ordgroup") {
atoms = group.result.value;
} else {
atoms = [group.result];
}
return new ParseResult(
new ParseNode("color",
{color: nucleus.type.slice(1), value: atoms}),
group.position);
} else {
throw new ParseError(
"Expected group after '" + nucleus.text + "'");
}
} else if (utils.contains(sizeFuncs, nucleus.type)) {
// If this is a size function, parse its argument and return
var group = this.parseGroup(nucleus.position);
if (group) {
return new ParseResult(
new ParseNode("sizing", {
size: "size" + (utils.indexOf(sizeFuncs, nucleus.type) + 1),
value: group.result
}),
group.position);
} else {
throw new ParseError(
"Expected group after '" + nucleus.text + "'");
}
} else if (nucleus.type === "\\llap" || nucleus.type === "\\rlap") {
// If this is an llap or rlap, parse its argument and return
var group = this.parseGroup(nucleus.position);
if (group) {
return new ParseResult(
new ParseNode(nucleus.type.slice(1), group.result),
group.position);
} else {
throw new ParseError(
"Expected group after '" + nucleus.text + "'");
}
} else if (nucleus.type === "\\dfrac" || nucleus.type === "\\frac" ||
nucleus.type === "\\tfrac") {
// If this is a frac, parse its two arguments and return
var numer = this.parseGroup(nucleus.position);
if (numer) {
var denom = this.parseGroup(numer.position);
if (denom) {
return new ParseResult(
new ParseNode("frac", {
numer: numer.result,
denom: denom.result,
size: nucleus.type.slice(1)
}),
denom.position);
} else {
throw new ParseError("Expected denominator after '" +
nucleus.type + "'");
}
} else {
throw new ParseError("Parse error: Expected numerator after '" +
nucleus.type + "'");
}
} else if (nucleus.type === "\\KaTeX") {
return new ParseResult(
new ParseNode("katex", null),
nucleus.position
);
} else if (funcToType[nucleus.type]) {
// Otherwise if this is a no-argument function, find the type it
// corresponds to in the map and return
return new ParseResult(
new ParseNode(funcToType[nucleus.type], nucleus.text),
nucleus.position);
} else {
// Otherwise, we couldn't parse it
return null;
}
};
module.exports = Parser;