scribble-math/Parser.js
Emily Eisenberg 925c96dbe2 Add square roots (\sqrt)
Summary:
Follow the TeXbook instructions on how to construct square roots. Using
makeCustomSizedDelim, this becomes nearly trivial.

Test Plan:
 - Make sure normal tests work
 - Make sure the new huxley test looks good, and other huxley tests haven't changed.

Reviewers: alpert

Reviewed By: alpert

Differential Revision: http://phabricator.khanacademy.org/D12918
2014-09-06 15:08:23 -07:00

589 lines
21 KiB
JavaScript

var Lexer = require("./Lexer");
var utils = require("./utils");
var symbols = require("./symbols");
var ParseError = require("./ParseError");
// Main Parser class
function Parser() {
};
// Returned by the Parser.parse... functions. Stores the current results and
// the new lexer position.
function ParseResult(result, newPosition) {
this.result = result;
this.position = newPosition;
}
// The resulting parse tree nodes of the parse tree.
function ParseNode(type, value, mode) {
this.type = type;
this.value = value;
this.mode = mode;
}
// Checks a result to make sure it has the right type, and throws an
// appropriate error otherwise.
Parser.prototype.expect = function(result, type) {
if (result.type !== type) {
throw new ParseError(
"Expected '" + type + "', got '" + result.type + "'",
this.lexer, result.position
);
}
};
// Main parsing function, which parses an entire input. Returns either a list
// of parseNodes or null if the parse fails.
Parser.prototype.parse = function(input) {
// Make a new lexer
this.lexer = new Lexer(input);
// Try to parse the input
var parse = this.parseInput(0, "math");
return parse.result;
};
// Parses an entire input tree
Parser.prototype.parseInput = function(pos, mode) {
// Parse an expression
var expression = this.parseExpression(pos, mode);
// If we succeeded, make sure there's an EOF at the end
var EOF = this.lexer.lex(expression.position, mode);
this.expect(EOF, "EOF");
return expression;
};
// Parses an "expression", which is a list of atoms
Parser.prototype.parseExpression = function(pos, mode) {
// Start with a list of nodes
var expression = [];
while (true) {
// Try to parse atoms
var parse = this.parseAtom(pos, mode);
if (parse) {
// Copy them into the list
expression.push(parse.result);
pos = parse.position;
} else {
break;
}
}
return new ParseResult(expression, pos);
};
// Parses a superscript expression, like "^3"
Parser.prototype.parseSuperscript = function(pos, mode) {
if (mode !== "math") {
throw new ParseError(
"Trying to parse superscript in non-math mode", this.lexer, pos);
}
// Try to parse a "^" character
var sup = this.lexer.lex(pos, mode);
if (sup.type === "^") {
// If we got one, parse the corresponding group
var group = this.parseGroup(sup.position, mode);
if (group) {
return group;
} else {
// Throw an error if we didn't find a group
throw new ParseError(
"Couldn't find group after '^'", this.lexer, sup.position);
}
} else if (sup.type === "'") {
var pos = sup.position;
return new ParseResult(
new ParseNode("textord", "\\prime", mode), sup.position, mode);
} else {
return null;
}
};
// Parses a subscript expression, like "_3"
Parser.prototype.parseSubscript = function(pos, mode) {
if (mode !== "math") {
throw new ParseError(
"Trying to parse subscript in non-math mode", this.lexer, pos);
}
// Try to parse a "_" character
var sub = this.lexer.lex(pos, mode);
if (sub.type === "_") {
// If we got one, parse the corresponding group
var group = this.parseGroup(sub.position, mode);
if (group) {
return group;
} else {
// Throw an error if we didn't find a group
throw new ParseError(
"Couldn't find group after '_'", this.lexer, sub.position);
}
} else {
return null;
}
};
// Parses an atom, which consists of a nucleus, and an optional superscript and
// subscript
Parser.prototype.parseAtom = function(pos, mode) {
// Parse the nucleus
var nucleus = this.parseGroup(pos, mode);
var nextPos = pos;
var nucleusNode;
// Text mode doesn't have superscripts or subscripts, so we only parse the
// nucleus in this case
if (mode === "text") {
return nucleus;
}
if (nucleus) {
nextPos = nucleus.position;
nucleusNode = nucleus.result;
}
var sup;
var sub;
// Now, we try to parse a subscript or a superscript (or both!), and
// depending on whether those succeed, we return the correct type.
while (true) {
var node;
if ((node = this.parseSuperscript(nextPos, mode))) {
if (sup) {
throw new ParseError(
"Double superscript", this.lexer, nextPos);
}
nextPos = node.position;
sup = node.result;
continue;
}
if ((node = this.parseSubscript(nextPos, mode))) {
if (sub) {
throw new ParseError(
"Double subscript", this.lexer, nextPos);
}
nextPos = node.position;
sub = node.result;
continue;
}
break;
}
if (sup || sub) {
return new ParseResult(
new ParseNode("supsub", {base: nucleusNode, sup: sup,
sub: sub}, mode),
nextPos);
} else {
return nucleus;
}
}
// Parses a group, which is either a single nucleus (like "x") or an expression
// in braces (like "{x+y}")
Parser.prototype.parseGroup = function(pos, mode) {
var start = this.lexer.lex(pos, mode);
// Try to parse an open brace
if (start.type === "{") {
// If we get a brace, parse an expression
var expression = this.parseExpression(start.position, mode);
// Make sure we get a close brace
var closeBrace = this.lexer.lex(expression.position, mode);
this.expect(closeBrace, "}");
return new ParseResult(
new ParseNode("ordgroup", expression.result, mode),
closeBrace.position);
} else {
// Otherwise, just return a nucleus
return this.parseNucleus(pos, mode);
}
};
// Parses an implicit group, which is a group that starts where you want it, and
// ends right before a higher explicit group ends, or at EOL. It is used for
// functions that appear to affect the current style, like \Large or \textrm,
// where instead of keeping a style we just pretend that there is an implicit
// grouping after it until the end of the group.
Parser.prototype.parseImplicitGroup = function(pos, mode) {
// Since parseExpression already ends where we want it to, we just need to
// call that and it does what we want.
var expression = this.parseExpression(pos, mode);
return new ParseResult(
new ParseNode("ordgroup", expression.result, mode),
expression.position);
};
// Parses a custom color group, which looks like "{#ffffff}"
Parser.prototype.parseColorGroup = function(pos, mode) {
var start = this.lexer.lex(pos, mode);
// Try to parse an open brace
if (start.type === "{") {
// Parse the color
var color = this.lexer.lex(start.position, "color");
// Make sure we get a close brace
var closeBrace = this.lexer.lex(color.position, mode);
this.expect(closeBrace, "}");
return new ParseResult(
new ParseNode("color", color.text),
closeBrace.position);
} else {
// It has to have an open brace, so if it doesn't we throw
throw new ParseError(
"There must be braces around colors",
this.lexer, pos
);
}
};
// Parses a text group, which looks like "{#ffffff}"
Parser.prototype.parseTextGroup = function(pos, mode) {
var start = this.lexer.lex(pos, mode);
// Try to parse an open brace
if (start.type === "{") {
// Parse the text
var text = this.parseExpression(start.position, "text");
// Make sure we get a close brace
var closeBrace = this.lexer.lex(text.position, mode);
this.expect(closeBrace, "}");
return new ParseResult(
new ParseNode("ordgroup", text.result, "text"),
closeBrace.position);
} else {
// It has to have an open brace, so if it doesn't we throw
throw new ParseError(
"There must be braces around text",
this.lexer, pos
);
}
};
Parser.prototype.parseSizeGroup = function(pos, mode) {
var start = this.lexer.lex(pos, mode);
// Try to parse an open brace
if (start.type === "{") {
// Parse the size
var size = this.lexer.lex(start.position, "size");
// Make sure we get a close brace
var closeBrace = this.lexer.lex(size.position, mode);
this.expect(closeBrace, "}");
return new ParseResult(
new ParseNode("size", size.text),
closeBrace.position);
} else {
// It has to have an open brace, so if it doesn't we throw
throw new ParseError(
"There must be braces around sizes",
this.lexer, pos
);
}
};
var delimiters = [
"(", ")", "[", "\\lbrack", "]", "\\rbrack",
"\\{", "\\lbrace", "\\}", "\\rbrace",
"\\lfloor", "\\rfloor", "\\lceil", "\\rceil",
"<", ">", "\\langle", "\\rangle",
"/", "\\backslash",
"|", "\\vert", "\\|", "\\Vert",
"\\uparrow", "\\Uparrow",
"\\downarrow", "\\Downarrow",
"\\updownarrow", "\\Updownarrow",
"."
];
// Parse a single delimiter
Parser.prototype.parseDelimiter = function(pos, mode) {
var delim = this.lexer.lex(pos, mode);
if (utils.contains(delimiters, delim.text)) {
return new ParseResult(
new ParseNode("delimiter", delim.text),
delim.position);
} else {
return null;
}
};
// A list of 1-argument color functions
var colorFuncs = [
"\\blue", "\\orange", "\\pink", "\\red", "\\green", "\\gray", "\\purple"
];
// A list of 1-argument sizing functions
var sizeFuncs = [
"\\tiny", "\\scriptsize", "\\footnotesize", "\\small", "\\normalsize",
"\\large", "\\Large", "\\LARGE", "\\huge", "\\Huge"
];
// A list of math functions replaced by their names
var namedFns = [
"\\arcsin", "\\arccos", "\\arctan", "\\arg", "\\cos", "\\cosh",
"\\cot", "\\coth", "\\csc", "\\deg", "\\dim", "\\exp", "\\hom",
"\\ker", "\\lg", "\\ln", "\\log", "\\sec", "\\sin", "\\sinh",
"\\tan","\\tanh"
];
var delimiterSizes = {
"\\bigl" : {type: "open", size: 1},
"\\Bigl" : {type: "open", size: 2},
"\\biggl": {type: "open", size: 3},
"\\Biggl": {type: "open", size: 4},
"\\bigr" : {type: "close", size: 1},
"\\Bigr" : {type: "close", size: 2},
"\\biggr": {type: "close", size: 3},
"\\Biggr": {type: "close", size: 4},
"\\bigm" : {type: "rel", size: 1},
"\\Bigm" : {type: "rel", size: 2},
"\\biggm": {type: "rel", size: 3},
"\\Biggm": {type: "rel", size: 4},
"\\big" : {type: "textord", size: 1},
"\\Big" : {type: "textord", size: 2},
"\\bigg" : {type: "textord", size: 3},
"\\Bigg" : {type: "textord", size: 4}
};
// Parses a "nucleus", which is either a single token from the tokenizer or a
// function and its arguments
Parser.prototype.parseNucleus = function(pos, mode) {
var nucleus = this.lexer.lex(pos, mode);
if (utils.contains(colorFuncs, nucleus.type)) {
// If this is a color function, parse its argument and return
var group = this.parseGroup(nucleus.position, mode);
if (group) {
var atoms;
if (group.result.type === "ordgroup") {
atoms = group.result.value;
} else {
atoms = [group.result];
}
return new ParseResult(
new ParseNode("color",
{color: "katex-" + nucleus.type.slice(1), value: atoms},
mode),
group.position);
} else {
throw new ParseError(
"Expected group after '" + nucleus.text + "'",
this.lexer, nucleus.position
);
}
} else if (nucleus.type === "\\color") {
// If this is a custom color function, parse its first argument as a
// custom color and its second argument normally
var color = this.parseColorGroup(nucleus.position, mode);
if (color) {
var inner = this.parseGroup(color.position, mode);
if (inner) {
var atoms;
if (inner.result.type === "ordgroup") {
atoms = inner.result.value;
} else {
atoms = [inner.result];
}
return new ParseResult(
new ParseNode("color",
{color: color.result.value, value: atoms},
mode),
inner.position);
} else {
throw new ParseError(
"Expected second group after '" + nucleus.text + "'",
this.lexer, color.position
);
}
} else {
throw new ParseError(
"Expected color after '" + nucleus.text + "'",
this.lexer, nucleus.position
);
}
} else if (mode === "math" && utils.contains(sizeFuncs, nucleus.type)) {
// If this is a size function, parse its argument and return
var group = this.parseImplicitGroup(nucleus.position, mode);
return new ParseResult(
new ParseNode("sizing", {
size: "size" + (utils.indexOf(sizeFuncs, nucleus.type) + 1),
value: group.result
}, mode),
group.position);
} else if (mode === "math" && utils.contains(namedFns, nucleus.type)) {
// If this is a named function, just return it plain
return new ParseResult(
new ParseNode("namedfn", nucleus.text, mode),
nucleus.position);
} else if (mode === "math" && delimiterSizes[nucleus.type]) {
// If this is a delimiter size function, we parse a single delimiter
var delim = this.parseDelimiter(nucleus.position, mode);
if (delim) {
var type = delimiterSizes[nucleus.type].type;
return new ParseResult(
new ParseNode("delimsizing", {
size: delimiterSizes[nucleus.type].size,
type: delimiterSizes[nucleus.type].type,
value: delim.result.value
}, mode),
delim.position);
} else {
throw new ParseError(
"Expected delimiter after '" + nucleus.text + "'");
}
} else if (mode === "math" && nucleus.type === "\\left") {
// If we see a \left, first we parse the left delimiter
var leftDelim = this.parseDelimiter(nucleus.position, mode);
if (leftDelim) {
// Then, we parse an inner expression. Due to the handling of \right
// below, this should end just before the \right
var expression = this.parseExpression(leftDelim.position, mode);
// Make sure we see a \right
var right = this.lexer.lex(expression.position, mode);
this.expect(right, "\\right");
// Parse the right delimiter
var rightDelim = this.parseDelimiter(right.position, mode);
if (rightDelim) {
return new ParseResult(
new ParseNode("leftright", {
left: leftDelim.result.value,
right: rightDelim.result.value,
body: expression.result
}, mode),
rightDelim.position);
} else {
throw new ParseError(
"Expected delimiter after '" + right.text + "'");
}
} else {
throw new ParseError(
"Expected delimiter after '" + nucleus.text + "'");
}
} else if (mode === "math" && nucleus.type === "\\right") {
// If we see a right, we explicitly return null to break out of the
// parseExpression loop. The code for \left will handle the delimiter
return null;
} else if (nucleus.type === "\\llap" || nucleus.type === "\\rlap") {
// If this is an llap or rlap, parse its argument and return
var group = this.parseGroup(nucleus.position, mode);
if (group) {
return new ParseResult(
new ParseNode(nucleus.type.slice(1), group.result, mode),
group.position);
} else {
throw new ParseError(
"Expected group after '" + nucleus.text + "'",
this.lexer, nucleus.position
);
}
} else if (mode === "math" && nucleus.type === "\\text") {
var group = this.parseTextGroup(nucleus.position, mode);
if (group) {
return new ParseResult(
new ParseNode(nucleus.type.slice(1), group.result, mode),
group.position);
} else {
throw new ParseError(
"Expected group after '" + nucleus.text + "'",
this.lexer, nucleus.position
);
}
} else if (mode === "math" && (nucleus.type === "\\dfrac" ||
nucleus.type === "\\frac" ||
nucleus.type === "\\tfrac")) {
// If this is a frac, parse its two arguments and return
var numer = this.parseGroup(nucleus.position, mode);
if (numer) {
var denom = this.parseGroup(numer.position, mode);
if (denom) {
return new ParseResult(
new ParseNode("frac", {
numer: numer.result,
denom: denom.result,
size: nucleus.type.slice(1)
}, mode),
denom.position);
} else {
throw new ParseError("Expected denominator after '" +
nucleus.type + "'",
this.lexer, numer.position
);
}
} else {
throw new ParseError("Expected numerator after '" +
nucleus.type + "'",
this.lexer, nucleus.position
);
}
} else if (mode === "math" && nucleus.type === "\\KaTeX") {
// If this is a KaTeX node, return the special katex result
return new ParseResult(
new ParseNode("katex", null, mode),
nucleus.position
);
} else if (mode === "math" && nucleus.type === "\\overline") {
// If this is an overline, parse its argument and return
var group = this.parseGroup(nucleus.position, mode);
if (group) {
return new ParseResult(
new ParseNode("overline", group, mode),
group.position);
} else {
throw new ParseError("Expected group after '" +
nucleus.type + "'",
this.lexer, nucleus.position
);
}
} else if (mode === "math" && nucleus.type === "\\sqrt") {
// If this is a square root, parse its argument and return
var group = this.parseGroup(nucleus.position, mode);
if (group) {
return new ParseResult(
new ParseNode("sqrt", group, mode),
group.position);
} else {
throw new ParseError("Expected group after '" +
nucleus.type + "'",
this.lexer, nucleus.position
);
}
} else if (mode === "math" && nucleus.type === "\\rule") {
// Parse the width of the rule
var widthGroup = this.parseSizeGroup(nucleus.position, mode);
if (widthGroup) {
// Parse the height of the rule
var heightGroup = this.parseSizeGroup(widthGroup.position, mode);
if (heightGroup) {
return new ParseResult(
new ParseNode("rule", {
width: widthGroup.result.value,
height: heightGroup.result.value
}, mode),
heightGroup.position);
} else {
throw new ParseError("Expected second size group after '" +
nucleus.type + "'",
this.lexer, nucleus.position
);
}
} else {
throw new ParseError("Expected size group after '" +
nucleus.type + "'",
this.lexer, nucleus.position
);
}
} else if (symbols[mode][nucleus.text]) {
// Otherwise if this is a no-argument function, find the type it
// corresponds to in the symbols map
return new ParseResult(
new ParseNode(symbols[mode][nucleus.text].group, nucleus.text, mode),
nucleus.position);
} else {
// Otherwise, we couldn't parse it
return null;
}
};
module.exports = Parser;