
Summary: This diff does a couple different things: - There is now a metrics/ folder, which contains the property files describing the metrics if the fonts, as well as a script for reading and printing the metrics in javascript. - Fractions and superscripts/subscripts are now rendered in slightly different ways now (notably, no use of inline-table). This allows for much more precise positioning of the superscripts, subscripts, numerators, and denominators, while still having an appropriate baseline. Also, there is no longer a sup/sub/supsub distinction, there are only supsubs with null sup/sub. - Using the new font metrics and by implementing the formulas found in The TeX Book, Appendix G, the heights and depths of all of the sub-expressions in a formula are now calculated. These are currently used to: - Correctly position superscripts, subscripts, numerators, and denominators - Adjust the height and depth of the overall expression so it takes up the appropriate space - Because we have to add attributes (height and depth) to every attribute, I have changed the way DOM nodes are assembled. Now, instead of assembling the DOM elements inline (which is a problem because we need to track height/depth, and we shouldn't (and can't in IE 8) attach raw attributes to DOM nodes), we assemble a pseudo-DOM structure with the extra information, and then actually assemble it at the very end. The main page also now has an updated expression to show off and test the new and improved parsing. Test Plan: View the main page, make sure that the expression renders. Make sure that the tests pass. Make sure that expressions have the correct calculated height (this is most easily tested by viewing them on the main page and making sure that the top of the expression lines up with the bottom of the input box). Reviewers: alpert Reviewed By: alpert Differential Revision: http://phabricator.khanacademy.org/D3442
396 lines
11 KiB
JavaScript
396 lines
11 KiB
JavaScript
var Lexer = require("./Lexer");
|
|
var utils = require("./utils");
|
|
|
|
var ParseError = require("./ParseError");
|
|
|
|
// Main Parser class
|
|
function Parser() {
|
|
};
|
|
|
|
// Returned by the Parser.parse... functions. Stores the current results and
|
|
// the new lexer position.
|
|
function ParseResult(result, newPosition) {
|
|
this.result = result;
|
|
this.position = newPosition;
|
|
}
|
|
|
|
// The resulting parse tree nodes of the parse tree.
|
|
function ParseNode(type, value) {
|
|
this.type = type;
|
|
this.value = value;
|
|
}
|
|
|
|
// Checks a result to make sure it has the right type, and throws an
|
|
// appropriate error otherwise.
|
|
var expect = function(result, type) {
|
|
if (result.type !== type) {
|
|
throw new ParseError(
|
|
"Expected '" + type + "', got '" + result.type + "'");
|
|
}
|
|
};
|
|
|
|
// Main parsing function, which parses an entire input. Returns either a list
|
|
// of parseNodes or null if the parse fails.
|
|
Parser.prototype.parse = function(input) {
|
|
// Make a new lexer
|
|
this.lexer = new Lexer(input);
|
|
|
|
// Try to parse the input
|
|
var parse = this.parseInput(0);
|
|
return parse.result;
|
|
};
|
|
|
|
// Parses an entire input tree
|
|
Parser.prototype.parseInput = function(pos) {
|
|
// Parse an expression
|
|
var expression = this.parseExpression(pos);
|
|
// If we succeeded, make sure there's an EOF at the end
|
|
var EOF = this.lexer.lex(expression.position);
|
|
expect(EOF, "EOF");
|
|
return expression;
|
|
};
|
|
|
|
// Parses an "expression", which is a list of atoms
|
|
Parser.prototype.parseExpression = function(pos) {
|
|
// Start with a list of nodes
|
|
var expression = [];
|
|
while (true) {
|
|
// Try to parse atoms
|
|
var parse = this.parseAtom(pos);
|
|
if (parse) {
|
|
// Copy them into the list
|
|
expression.push(parse.result);
|
|
pos = parse.position;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
return new ParseResult(expression, pos);
|
|
};
|
|
|
|
// Parses a superscript expression, like "^3"
|
|
Parser.prototype.parseSuperscript = function(pos) {
|
|
// Try to parse a "^" character
|
|
var sup = this.lexer.lex(pos);
|
|
if (sup.type === "^") {
|
|
// If we got one, parse the corresponding group
|
|
var group = this.parseGroup(sup.position);
|
|
if (group) {
|
|
return group;
|
|
} else {
|
|
// Throw an error if we didn't find a group
|
|
throw new ParseError("Couldn't find group after '^'");
|
|
}
|
|
} else if (sup.type === "'") {
|
|
var pos = sup.position;
|
|
return new ParseResult(
|
|
new ParseNode("textord", "\\prime"), sup.position);
|
|
} else {
|
|
return null;
|
|
}
|
|
};
|
|
|
|
// Parses a subscript expression, like "_3"
|
|
Parser.prototype.parseSubscript = function(pos) {
|
|
// Try to parse a "_" character
|
|
var sub = this.lexer.lex(pos);
|
|
if (sub.type === "_") {
|
|
// If we got one, parse the corresponding group
|
|
var group = this.parseGroup(sub.position);
|
|
if (group) {
|
|
return group;
|
|
} else {
|
|
// Throw an error if we didn't find a group
|
|
throw new ParseError("Couldn't find group after '_'");
|
|
}
|
|
} else {
|
|
return null;
|
|
}
|
|
};
|
|
|
|
// Parses an atom, which consists of a nucleus, and an optional superscript and
|
|
// subscript
|
|
Parser.prototype.parseAtom = function(pos) {
|
|
// Parse the nucleus
|
|
var nucleus = this.parseGroup(pos);
|
|
var nextPos = pos;
|
|
var nucleusNode;
|
|
|
|
if (nucleus) {
|
|
nextPos = nucleus.position;
|
|
nucleusNode = nucleus.result;
|
|
}
|
|
|
|
var sup;
|
|
var sub;
|
|
|
|
// Now, we try to parse a subscript or a superscript (or both!), and
|
|
// depending on whether those succeed, we return the correct type.
|
|
while (true) {
|
|
var node;
|
|
if ((node = this.parseSuperscript(nextPos))) {
|
|
if (sup) {
|
|
throw new ParseError("Parse error: Double superscript");
|
|
}
|
|
nextPos = node.position;
|
|
sup = node.result;
|
|
continue;
|
|
}
|
|
if ((node = this.parseSubscript(nextPos))) {
|
|
if (sub) {
|
|
throw new ParseError("Parse error: Double subscript");
|
|
}
|
|
nextPos = node.position;
|
|
sub = node.result;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (sup || sub) {
|
|
return new ParseResult(
|
|
new ParseNode("supsub", {base: nucleusNode, sup: sup,
|
|
sub: sub}),
|
|
nextPos);
|
|
} else {
|
|
return nucleus;
|
|
}
|
|
}
|
|
|
|
// Parses a group, which is either a single nucleus (like "x") or an expression
|
|
// in braces (like "{x+y}")
|
|
Parser.prototype.parseGroup = function(pos) {
|
|
var start = this.lexer.lex(pos);
|
|
// Try to parse an open brace
|
|
if (start.type === "{") {
|
|
// If we get a brace, parse an expression
|
|
var expression = this.parseExpression(start.position);
|
|
// Make sure we get a close brace
|
|
var closeBrace = this.lexer.lex(expression.position);
|
|
expect(closeBrace, "}");
|
|
return new ParseResult(
|
|
new ParseNode("ordgroup", expression.result),
|
|
closeBrace.position);
|
|
} else {
|
|
// Otherwise, just return a nucleus
|
|
return this.parseNucleus(pos);
|
|
}
|
|
};
|
|
|
|
|
|
// A list of 1-argument color functions
|
|
var colorFuncs = [
|
|
"\\blue", "\\orange", "\\pink", "\\red", "\\green", "\\gray", "\\purple"
|
|
];
|
|
|
|
// A map of elements that don't have arguments, and should simply be placed
|
|
// into a group depending on their type. The keys are the groups that items can
|
|
// be placed in, and the values are lists of element types that should be
|
|
// placed in those groups.
|
|
//
|
|
// For example, if the lexer returns something of type "colon", we should
|
|
// return a node of type "punct"
|
|
var copyFuncs = {
|
|
"textord": [
|
|
"textord",
|
|
"\\$",
|
|
"\\angle",
|
|
"\\infty",
|
|
"\\prime",
|
|
"\\Gamma",
|
|
"\\Delta",
|
|
"\\Theta",
|
|
"\\Lambda",
|
|
"\\Xi",
|
|
"\\Pi",
|
|
"\\Sigma",
|
|
"\\Upsilon",
|
|
"\\Phi",
|
|
"\\Psi",
|
|
"\\Omega"
|
|
],
|
|
"mathord": [
|
|
"mathord",
|
|
"\\alpha",
|
|
"\\beta",
|
|
"\\gamma",
|
|
"\\delta",
|
|
"\\epsilon",
|
|
"\\zeta",
|
|
"\\eta",
|
|
"\\theta",
|
|
"\\iota",
|
|
"\\kappa",
|
|
"\\lambda",
|
|
"\\mu",
|
|
"\\nu",
|
|
"\\xi",
|
|
"\\omicron",
|
|
"\\pi",
|
|
"\\rho",
|
|
"\\sigma",
|
|
"\\tau",
|
|
"\\upsilon",
|
|
"\\phi",
|
|
"\\chi",
|
|
"\\psi",
|
|
"\\omega",
|
|
"\\varepsilon",
|
|
"\\vartheta",
|
|
"\\varpi",
|
|
"\\varrho",
|
|
"\\varsigma",
|
|
"\\varphi"
|
|
],
|
|
"bin": [
|
|
"bin",
|
|
"\\cdot",
|
|
"\\circ",
|
|
"\\div",
|
|
"\\pm",
|
|
"\\times"
|
|
],
|
|
"open": [
|
|
"open",
|
|
"\\lvert"
|
|
],
|
|
"close": [
|
|
"close",
|
|
"\\rvert"
|
|
],
|
|
"rel": [
|
|
"rel",
|
|
"\\geq",
|
|
"\\gets",
|
|
"\\leftarrow",
|
|
"\\leq",
|
|
"\\neq",
|
|
"\\ngeq",
|
|
"\\nleq",
|
|
"\\rightarrow",
|
|
"\\to"
|
|
],
|
|
"spacing": [
|
|
"\\ ",
|
|
"\\,",
|
|
"\\:",
|
|
"\\;",
|
|
"\\qquad",
|
|
"\\quad",
|
|
"\\space"
|
|
],
|
|
"punct": [
|
|
"punct",
|
|
"\\colon"
|
|
],
|
|
"namedfn": [
|
|
"\\arcsin",
|
|
"\\arccos",
|
|
"\\arctan",
|
|
"\\arg",
|
|
"\\cos",
|
|
"\\cosh",
|
|
"\\cot",
|
|
"\\coth",
|
|
"\\csc",
|
|
"\\deg",
|
|
"\\dim",
|
|
"\\exp",
|
|
"\\hom",
|
|
"\\ker",
|
|
"\\lg",
|
|
"\\ln",
|
|
"\\log",
|
|
"\\sec",
|
|
"\\sin",
|
|
"\\sinh",
|
|
"\\tan",
|
|
"\\tanh"
|
|
]
|
|
};
|
|
|
|
// Build a list of all of the different functions in the copyFuncs list, to
|
|
// quickly check if the function should be interpreted by the map.
|
|
var funcToType = {};
|
|
for (var type in copyFuncs) {
|
|
for (var i = 0; i < copyFuncs[type].length; i++) {
|
|
var func = copyFuncs[type][i];
|
|
funcToType[func] = type;
|
|
}
|
|
}
|
|
|
|
// Parses a "nucleus", which is either a single token from the tokenizer or a
|
|
// function and its arguments
|
|
Parser.prototype.parseNucleus = function(pos) {
|
|
var nucleus = this.lexer.lex(pos);
|
|
|
|
if (utils.contains(colorFuncs, nucleus.type)) {
|
|
// If this is a color function, parse its argument and return
|
|
var group = this.parseGroup(nucleus.position);
|
|
if (group) {
|
|
var atoms;
|
|
if (group.result.type === "ordgroup") {
|
|
atoms = group.result.value;
|
|
} else {
|
|
atoms = [group.result];
|
|
}
|
|
return new ParseResult(
|
|
new ParseNode("color",
|
|
{color: nucleus.type.slice(1), value: atoms}),
|
|
group.position);
|
|
} else {
|
|
throw new ParseError(
|
|
"Expected group after '" + nucleus.text + "'");
|
|
}
|
|
} else if (nucleus.type === "\\llap" || nucleus.type === "\\rlap") {
|
|
// If this is an llap or rlap, parse its argument and return
|
|
var group = this.parseGroup(nucleus.position);
|
|
if (group) {
|
|
return new ParseResult(
|
|
new ParseNode(nucleus.type.slice(1), group.result),
|
|
group.position);
|
|
} else {
|
|
throw new ParseError(
|
|
"Expected group after '" + nucleus.text + "'");
|
|
}
|
|
} else if (nucleus.type === "\\dfrac" || nucleus.type === "\\frac" ||
|
|
nucleus.type === "\\tfrac") {
|
|
// If this is a frac, parse its two arguments and return
|
|
var numer = this.parseGroup(nucleus.position);
|
|
if (numer) {
|
|
var denom = this.parseGroup(numer.position);
|
|
if (denom) {
|
|
return new ParseResult(
|
|
new ParseNode("frac", {
|
|
numer: numer.result,
|
|
denom: denom.result,
|
|
size: nucleus.type.slice(1)
|
|
}),
|
|
denom.position);
|
|
} else {
|
|
throw new ParseError("Expected denominator after '" +
|
|
nucleus.type + "'");
|
|
}
|
|
} else {
|
|
throw new ParseError("Parse error: Expected numerator after '" +
|
|
nucleus.type + "'");
|
|
}
|
|
} else if (nucleus.type === "\\KaTeX") {
|
|
return new ParseResult(
|
|
new ParseNode("katex", null),
|
|
nucleus.position
|
|
);
|
|
} else if (funcToType[nucleus.type]) {
|
|
// Otherwise if this is a no-argument function, find the type it
|
|
// corresponds to in the map and return
|
|
return new ParseResult(
|
|
new ParseNode(funcToType[nucleus.type], nucleus.text),
|
|
nucleus.position);
|
|
} else {
|
|
// Otherwise, we couldn't parse it
|
|
return null;
|
|
}
|
|
};
|
|
|
|
module.exports = Parser;
|