scribble-math/Parser.js
Ben Alpert cab5af40b6 Always throw real ParseErrors
Also sneak in a colon.

Auditors: emily
2013-07-28 17:54:41 -07:00

399 lines
11 KiB
JavaScript

var Lexer = require("./Lexer");
var utils = require("./utils");
var ParseError = require("./ParseError");
// Main Parser class
function Parser() {
};
// Returned by the Parser.parse... functions. Stores the current results and
// the new lexer position.
function ParseResult(result, newPosition) {
this.result = result;
this.position = newPosition;
}
// The resulting parse tree nodes of the parse tree.
function ParseNode(type, value) {
this.type = type;
this.value = value;
}
// Checks a result to make sure it has the right type, and throws an
// appropriate error otherwise.
var expect = function(result, type) {
if (result.type !== type) {
throw new ParseError(
"Expected '" + type + "', got '" + result.type + "'");
}
};
// Main parsing function, which parses an entire input. Returns either a list
// of parseNodes or null if the parse fails.
Parser.prototype.parse = function(input) {
// Make a new lexer
this.lexer = new Lexer(input);
// Try to parse the input
var parse = this.parseInput(0);
return parse.result;
};
// Parses an entire input tree
Parser.prototype.parseInput = function(pos) {
// Parse an expression
var expression = this.parseExpression(pos);
// If we succeeded, make sure there's an EOF at the end
var EOF = this.lexer.lex(expression.position);
expect(EOF, "EOF");
return expression;
};
// Parses an "expression", which is a list of atoms
Parser.prototype.parseExpression = function(pos) {
// Start with a list of nodes
var expression = [];
while (true) {
// Try to parse atoms
var parse = this.parseAtom(pos);
if (parse) {
// Copy them into the list
expression.push(parse.result);
pos = parse.position;
} else {
break;
}
}
return new ParseResult(expression, pos);
};
// Parses a superscript expression, like "^3"
Parser.prototype.parseSuperscript = function(pos) {
// Try to parse a "^" character
var sup = this.lexer.lex(pos);
if (sup.type === "^") {
// If we got one, parse the corresponding group
var group = this.parseGroup(sup.position);
if (group) {
return group;
} else {
// Throw an error if we didn't find a group
throw new ParseError("Couldn't find group after '^'");
}
} else if (sup.type === "'") {
var pos = sup.position;
return new ParseResult(
new ParseNode("textord", "\\prime"), sup.position);
} else {
return null;
}
};
// Parses a subscript expression, like "_3"
Parser.prototype.parseSubscript = function(pos) {
// Try to parse a "_" character
var sub = this.lexer.lex(pos);
if (sub.type === "_") {
// If we got one, parse the corresponding group
var group = this.parseGroup(sub.position);
if (group) {
return group;
} else {
// Throw an error if we didn't find a group
throw new ParseError("Couldn't find group after '_'");
}
} else {
return null;
}
};
// Parses an atom, which consists of a nucleus, and an optional superscript and
// subscript
Parser.prototype.parseAtom = function(pos) {
// Parse the nucleus
var nucleus = this.parseGroup(pos);
var nextPos = pos;
var nucleusNode;
if (nucleus) {
nextPos = nucleus.position;
nucleusNode = nucleus.result;
}
var sup;
var sub;
// Now, we try to parse a subscript or a superscript (or both!), and
// depending on whether those succeed, we return the correct type.
while (true) {
var node;
if ((node = this.parseSuperscript(nextPos))) {
if (sup) {
throw new ParseError("Parse error: Double superscript");
}
nextPos = node.position;
sup = node.result;
continue;
}
if ((node = this.parseSubscript(nextPos))) {
if (sub) {
throw new ParseError("Parse error: Double subscript");
}
nextPos = node.position;
sub = node.result;
continue;
}
break;
}
if (sup && sub) {
return new ParseResult(
new ParseNode("supsub", {base: nucleusNode, sup: sup,
sub: sub}),
nextPos);
} else if (sup) {
return new ParseResult(
new ParseNode("sup", {base: nucleusNode, sup: sup}),
nextPos);
} else if (sub) {
return new ParseResult(
new ParseNode("sub", {base: nucleusNode, sub: sub}),
nextPos);
} else {
return nucleus;
}
}
// Parses a group, which is either a single nucleus (like "x") or an expression
// in braces (like "{x+y}")
Parser.prototype.parseGroup = function(pos) {
var start = this.lexer.lex(pos);
// Try to parse an open brace
if (start.type === "{") {
// If we get a brace, parse an expression
var expression = this.parseExpression(start.position);
// Make sure we get a close brace
var closeBrace = this.lexer.lex(expression.position);
expect(closeBrace, "}");
return new ParseResult(
new ParseNode("ordgroup", expression.result),
closeBrace.position);
} else {
// Otherwise, just return a nucleus
return this.parseNucleus(pos);
}
};
// A list of 1-argument color functions
var colorFuncs = [
"\\blue", "\\orange", "\\pink", "\\red", "\\green", "\\gray", "\\purple"
];
// A map of elements that don't have arguments, and should simply be placed
// into a group depending on their type. The keys are the groups that items can
// be placed in, and the values are lists of element types that should be
// placed in those groups.
//
// For example, if the lexer returns something of type "colon", we should
// return a node of type "punct"
var copyFuncs = {
"textord": [
"textord",
"\\$",
"\\angle",
"\\infty",
"\\prime",
"\\Gamma",
"\\Delta",
"\\Theta",
"\\Lambda",
"\\Xi",
"\\Pi",
"\\Sigma",
"\\Upsilon",
"\\Phi",
"\\Psi",
"\\Omega"
],
"mathord": [
"mathord",
"\\alpha",
"\\beta",
"\\gamma",
"\\delta",
"\\epsilon",
"\\zeta",
"\\eta",
"\\theta",
"\\iota",
"\\kappa",
"\\lambda",
"\\mu",
"\\nu",
"\\xi",
"\\omicron",
"\\pi",
"\\rho",
"\\sigma",
"\\tau",
"\\upsilon",
"\\phi",
"\\chi",
"\\psi",
"\\omega",
"\\varepsilon",
"\\vartheta",
"\\varpi",
"\\varrho",
"\\varsigma",
"\\varphi"
],
"bin": [
"bin",
"\\cdot",
"\\circ",
"\\div",
"\\pm",
"\\times"
],
"open": [
"open",
"\\lvert"
],
"close": [
"close",
"\\rvert"
],
"rel": [
"rel",
"\\geq",
"\\gets",
"\\leftarrow",
"\\leq",
"\\neq",
"\\ngeq",
"\\nleq",
"\\rightarrow",
"\\to"
],
"spacing": [
"\\ ",
"\\,",
"\\:",
"\\;",
"\\qquad",
"\\quad",
"\\space"
],
"punct": [
"punct",
"\\colon"
],
"namedfn": [
"\\arcsin",
"\\arccos",
"\\arctan",
"\\arg",
"\\cos",
"\\cosh",
"\\cot",
"\\coth",
"\\csc",
"\\deg",
"\\dim",
"\\exp",
"\\hom",
"\\ker",
"\\lg",
"\\ln",
"\\log",
"\\sec",
"\\sin",
"\\sinh",
"\\tan",
"\\tanh"
]
};
// Build a list of all of the different functions in the copyFuncs list, to
// quickly check if the function should be interpreted by the map.
var funcToType = {};
for (var type in copyFuncs) {
for (var i = 0; i < copyFuncs[type].length; i++) {
var func = copyFuncs[type][i];
funcToType[func] = type;
}
}
// Parses a "nucleus", which is either a single token from the tokenizer or a
// function and its arguments
Parser.prototype.parseNucleus = function(pos) {
var nucleus = this.lexer.lex(pos);
if (utils.contains(colorFuncs, nucleus.type)) {
// If this is a color function, parse its argument and return
var group = this.parseGroup(nucleus.position);
if (group) {
var atoms;
if (group.result.type === "ordgroup") {
atoms = group.result.value;
} else {
atoms = [group.result];
}
return new ParseResult(
new ParseNode("color",
{color: nucleus.type.slice(1), value: atoms}),
group.position);
} else {
throw new ParseError(
"Expected group after '" + nucleus.text + "'");
}
} else if (nucleus.type === "\\llap" || nucleus.type === "\\rlap") {
// If this is an llap or rlap, parse its argument and return
var group = this.parseGroup(nucleus.position);
if (group) {
return new ParseResult(
new ParseNode(nucleus.type.slice(1), group.result),
group.position);
} else {
throw new ParseError(
"Expected group after '" + nucleus.text + "'");
}
} else if (nucleus.type === "\\dfrac" || nucleus.type === "\\frac" ||
nucleus.type === "\\tfrac") {
// If this is a frac, parse its two arguments and return
var numer = this.parseGroup(nucleus.position);
if (numer) {
var denom = this.parseGroup(numer.position);
if (denom) {
return new ParseResult(
new ParseNode("frac", {
numer: numer.result,
denom: denom.result,
size: nucleus.type.slice(1)
}),
denom.position);
} else {
throw new ParseError("Expected denominator after '" +
nucleus.type + "'");
}
} else {
throw new ParseError("Parse error: Expected numerator after '" +
nucleus.type + "'");
}
} else if (funcToType[nucleus.type]) {
// Otherwise if this is a no-argument function, find the type it
// corresponds to in the map and return
return new ParseResult(
new ParseNode(funcToType[nucleus.type], nucleus.text),
nucleus.position);
} else {
// Otherwise, we couldn't parse it
return null;
}
};
module.exports = Parser;