Replace our annoying lookup tables with a unified symbol table

Summary:
Before, there were a couple problems:
1. There were similar tables of functions in the buildTree and Parser modules,
   making it hard to add new functions, and code duplication is bad
2. We distinguished the font to use (mostly just ams vs normal) using a
   different parser class, which led to annoyance and some weird bugs
3. (Not actually in this code, but in D7578) We can't distinguish between
   functions that should be expanded in text mode and those that shouldn't.
To solve these problems, I've created an all-encompassing table in symbols.js
that fixes 1. and 2., and should fix 3. once it's merged in. The table keeps
track of each of the functions and their class, font, and possible replacement,
and then Parser and buildTree do lookups in this table.

Also added the functions for the new_definitions functions, to show how easy it
is to add new functions.

Test Plan:
1. Run the normal tests, see they all pass
2. Run huxley, see none of the screenshots changed
3. See that things like the main page loads
4. See that the new functions look correctly with
   '\barwedge\veebar\odot\oplus\otimes\oslash\circledcirc
    \boxdot\bigtriangleup\bigtriangledown\dagger\diamond
    \star\triangleleft\triangleright'

Reviewers: alpert

Reviewed By: alpert

Differential Revision: http://phabricator.khanacademy.org/D7704
This commit is contained in:
Emily Eisenberg 2014-03-26 01:10:10 -04:00
parent bff85c638b
commit 2eca338e23
3 changed files with 599 additions and 241 deletions

169
Parser.js
View File

@ -1,5 +1,6 @@
var Lexer = require("./Lexer");
var utils = require("./utils");
var symbols = require("./symbols");
var ParseError = require("./ParseError");
@ -189,155 +190,13 @@ var sizeFuncs = [
"\\large", "\\Large", "\\LARGE", "\\huge", "\\Huge"
];
// A map of elements that don't have arguments, and should simply be placed
// into a group depending on their type. The keys are the groups that items can
// be placed in, and the values are lists of element types that should be
// placed in those groups.
//
// For example, if the lexer returns something of type "colon", we should
// return a node of type "punct"
var copyFuncs = {
"textord": [
"textord",
"\\$",
"\\%",
"\\angle",
"\\infty",
"\\prime",
"\\triangle",
"\\Gamma",
"\\Delta",
"\\Theta",
"\\Lambda",
"\\Xi",
"\\Pi",
"\\Sigma",
"\\Upsilon",
"\\Phi",
"\\Psi",
"\\Omega"
],
"mathord": [
"mathord",
"\\alpha",
"\\beta",
"\\gamma",
"\\delta",
"\\epsilon",
"\\zeta",
"\\eta",
"\\theta",
"\\iota",
"\\kappa",
"\\lambda",
"\\mu",
"\\nu",
"\\xi",
"\\omicron",
"\\pi",
"\\rho",
"\\sigma",
"\\tau",
"\\upsilon",
"\\phi",
"\\chi",
"\\psi",
"\\omega",
"\\varepsilon",
"\\vartheta",
"\\varpi",
"\\varrho",
"\\varsigma",
"\\varphi"
],
"bin": [
"bin",
"\\cdot",
"\\circ",
"\\div",
"\\pm",
"\\times"
],
"open": [
"open",
"\\langle",
"\\lvert"
],
"close": [
"close",
"\\rangle",
"\\rvert"
],
"rel": [
"rel",
"\\approx",
"\\cong",
"\\ge",
"\\geq",
"\\gets",
"\\in",
"\\leftarrow",
"\\le",
"\\leq",
"\\ne",
"\\neq",
"\\rightarrow",
"\\to"
],
"amsrel": [
"\\ngeq",
"\\nleq"
],
"spacing": [
"\\!",
"\\ ",
"\\,",
"\\:",
"\\;",
"\\enspace",
"\\qquad",
"\\quad",
"\\space"
],
"punct": [
"punct",
"\\colon"
],
"namedfn": [
"\\arcsin",
"\\arccos",
"\\arctan",
"\\arg",
"\\cos",
"\\cosh",
"\\cot",
"\\coth",
"\\csc",
"\\deg",
"\\dim",
"\\exp",
"\\hom",
"\\ker",
"\\lg",
"\\ln",
"\\log",
"\\sec",
"\\sin",
"\\sinh",
"\\tan",
"\\tanh"
]
};
// Build a list of all of the different functions in the copyFuncs list, to
// quickly check if the function should be interpreted by the map.
var funcToType = {};
for (var type in copyFuncs) {
for (var i = 0; i < copyFuncs[type].length; i++) {
var func = copyFuncs[type][i];
funcToType[func] = type;
}
}
// A list of math functions replaced by their names
var namedFns = [
"\\arcsin", "\\arccos", "\\arctan", "\\arg", "\\cos", "\\cosh",
"\\cot", "\\coth", "\\csc", "\\deg", "\\dim", "\\exp", "\\hom",
"\\ker", "\\lg", "\\ln", "\\log", "\\sec", "\\sin", "\\sinh",
"\\tan","\\tanh"
];
// Parses a "nucleus", which is either a single token from the tokenizer or a
// function and its arguments
@ -376,6 +235,11 @@ Parser.prototype.parseNucleus = function(pos) {
throw new ParseError(
"Expected group after '" + nucleus.text + "'");
}
} else if (utils.contains(namedFns, nucleus.type)) {
// If this is a named function, just return it plain
return new ParseResult(
new ParseNode("namedfn", nucleus.text),
nucleus.position);
} else if (nucleus.type === "\\llap" || nucleus.type === "\\rlap") {
// If this is an llap or rlap, parse its argument and return
var group = this.parseGroup(nucleus.position);
@ -410,15 +274,16 @@ Parser.prototype.parseNucleus = function(pos) {
nucleus.type + "'");
}
} else if (nucleus.type === "\\KaTeX") {
// If this is a KaTeX node, return the special katex result
return new ParseResult(
new ParseNode("katex", null),
nucleus.position
);
} else if (funcToType[nucleus.type]) {
} else if (symbols[nucleus.text]) {
// Otherwise if this is a no-argument function, find the type it
// corresponds to in the map and return
// corresponds to in the symbols map
return new ParseResult(
new ParseNode(funcToType[nucleus.type], nucleus.text),
new ParseNode(symbols[nucleus.text].group, nucleus.text),
nucleus.position);
} else {
// Otherwise, we couldn't parse it

View File

@ -6,6 +6,7 @@ var domTree = require("./domTree");
var fontMetrics = require("./fontMetrics");
var parseTree = require("./parseTree");
var utils = require("./utils");
var symbols = require("./symbols");
var buildExpression = function(expression, options, prev) {
var groups = [];
@ -406,91 +407,9 @@ var buildGroup = function(group, options, prev) {
}
};
var charLookup = {
"*": "\u2217",
"-": "\u2212",
"`": "\u2018",
"\\ ": "\u00a0",
"\\$": "$",
"\\%": "%",
"\\angle": "\u2220",
"\\approx": "\u2248",
"\\cdot": "\u22c5",
"\\circ": "\u2218",
"\\colon": ":",
"\\cong": "\u2245",
"\\div": "\u00f7",
"\\ge": "\u2265",
"\\geq": "\u2265",
"\\gets": "\u2190",
"\\in": "\u2208",
"\\infty": "\u221e",
"\\langle": "\u27e8",
"\\leftarrow": "\u2190",
"\\le": "\u2264",
"\\leq": "\u2264",
"\\lvert": "|",
"\\ne": "\u2260",
"\\neq": "\u2260",
"\\ngeq": "\u2271",
"\\nleq": "\u2270",
"\\pm": "\u00b1",
"\\prime": "\u2032",
"\\rangle": "\u27e9",
"\\rightarrow": "\u2192",
"\\rvert": "|",
"\\space": "\u00a0",
"\\times": "\u00d7",
"\\to": "\u2192",
"\\triangle": "\u25b3",
"\\alpha": "\u03b1",
"\\beta": "\u03b2",
"\\gamma": "\u03b3",
"\\delta": "\u03b4",
"\\epsilon": "\u03f5",
"\\zeta": "\u03b6",
"\\eta": "\u03b7",
"\\theta": "\u03b8",
"\\iota": "\u03b9",
"\\kappa": "\u03ba",
"\\lambda": "\u03bb",
"\\mu": "\u03bc",
"\\nu": "\u03bd",
"\\xi": "\u03be",
"\\omicron": "o",
"\\pi": "\u03c0",
"\\rho": "\u03c1",
"\\sigma": "\u03c3",
"\\tau": "\u03c4",
"\\upsilon": "\u03c5",
"\\phi": "\u03d5",
"\\chi": "\u03c7",
"\\psi": "\u03c8",
"\\omega": "\u03c9",
"\\varepsilon": "\u03b5",
"\\vartheta": "\u03d1",
"\\varpi": "\u03d6",
"\\varrho": "\u03f1",
"\\varsigma": "\u03c2",
"\\varphi": "\u03c6",
"\\Gamma": "\u0393",
"\\Delta": "\u0394",
"\\Theta": "\u0398",
"\\Lambda": "\u039b",
"\\Xi": "\u039e",
"\\Pi": "\u03a0",
"\\Sigma": "\u03a3",
"\\Upsilon": "\u03a5",
"\\Phi": "\u03a6",
"\\Psi": "\u03a8",
"\\Omega": "\u03a9"
};
var makeText = function(value, style) {
if (value in charLookup) {
value = charLookup[value];
if (symbols[value].replace) {
value = symbols[value].replace;
}
var metrics = fontMetrics.getCharacterMetrics(value, style);
@ -518,11 +437,11 @@ var mathit = function(value) {
};
var mathrm = function(value) {
return makeText(value, "main-regular");
};
var amsrm = function(value) {
return makeSpan(["amsrm"], [makeText(value, "ams-regular")]);
if (symbols[value].font === "main") {
return makeText(value, "main-regular");
} else {
return makeSpan(["amsrm"], [makeText(value, "ams-regular")]);
}
};
var buildTree = function(tree) {

574
symbols.js Normal file
View File

@ -0,0 +1,574 @@
/* This file holds a list of all no-argument functions and single-character
* symbols (like 'a' or ';'). For each of the symbols, there are three
* properties they can have:
* - font (required): the font to be used for this * symbol. Either "main" (the
normal font), or "ams" (the ams fonts)
* - group (required): the ParseNode group type the symbol should have (i.e.
"textord" or "mathord" or
* - replace (optiona): the character that this symbol or function should be
* replaced with (i.e. "\phi" has a replace value of "\u03d5", the phi
* character in the main font)
*/
var symbols = {
"`": {
font: "main",
group: "textord",
replace: "\u2018"
},
"\\$": {
font: "main",
group: "textord",
replace: "$"
},
"\\%": {
font: "main",
group: "textord",
replace: "%"
},
"\\angle": {
font: "main",
group: "textord",
replace: "\u2220"
},
"\\infty": {
font: "main",
group: "textord",
replace: "\u221e"
},
"\\prime": {
font: "main",
group: "textord",
replace: "\u2032"
},
"\\triangle": {
font: "main",
group: "textord",
replace: "\u25b3"
},
"\\Gamma": {
font: "main",
group: "textord",
replace: "\u0393"
},
"\\Delta": {
font: "main",
group: "textord",
replace: "\u0394"
},
"\\Theta": {
font: "main",
group: "textord",
replace: "\u0398"
},
"\\Lambda": {
font: "main",
group: "textord",
replace: "\u039b"
},
"\\Xi": {
font: "main",
group: "textord",
replace: "\u039e"
},
"\\Pi": {
font: "main",
group: "textord",
replace: "\u03a0"
},
"\\Sigma": {
font: "main",
group: "textord",
replace: "\u03a3"
},
"\\Upsilon": {
font: "main",
group: "textord",
replace: "\u03a5"
},
"\\Phi": {
font: "main",
group: "textord",
replace: "\u03a6"
},
"\\Psi": {
font: "main",
group: "textord",
replace: "\u03a8"
},
"\\Omega": {
font: "main",
group: "textord",
replace: "\u03a9"
},
"\\alpha": {
font: "main",
group: "mathord",
replace: "\u03b1"
},
"\\beta": {
font: "main",
group: "mathord",
replace: "\u03b2"
},
"\\gamma": {
font: "main",
group: "mathord",
replace: "\u03b3"
},
"\\delta": {
font: "main",
group: "mathord",
replace: "\u03b4"
},
"\\epsilon": {
font: "main",
group: "mathord",
replace: "\u03f5"
},
"\\zeta": {
font: "main",
group: "mathord",
replace: "\u03b6"
},
"\\eta": {
font: "main",
group: "mathord",
replace: "\u03b7"
},
"\\theta": {
font: "main",
group: "mathord",
replace: "\u03b8"
},
"\\iota": {
font: "main",
group: "mathord",
replace: "\u03b9"
},
"\\kappa": {
font: "main",
group: "mathord",
replace: "\u03ba"
},
"\\lambda": {
font: "main",
group: "mathord",
replace: "\u03bb"
},
"\\mu": {
font: "main",
group: "mathord",
replace: "\u03bc"
},
"\\nu": {
font: "main",
group: "mathord",
replace: "\u03bd"
},
"\\xi": {
font: "main",
group: "mathord",
replace: "\u03be"
},
"\\omicron": {
font: "main",
group: "mathord",
replace: "o"
},
"\\pi": {
font: "main",
group: "mathord",
replace: "\u03c0"
},
"\\rho": {
font: "main",
group: "mathord",
replace: "\u03c1"
},
"\\sigma": {
font: "main",
group: "mathord",
replace: "\u03c3"
},
"\\tau": {
font: "main",
group: "mathord",
replace: "\u03c4"
},
"\\upsilon": {
font: "main",
group: "mathord",
replace: "\u03c5"
},
"\\phi": {
font: "main",
group: "mathord",
replace: "\u03d5"
},
"\\chi": {
font: "main",
group: "mathord",
replace: "\u03c7"
},
"\\psi": {
font: "main",
group: "mathord",
replace: "\u03c8"
},
"\\omega": {
font: "main",
group: "mathord",
replace: "\u03c9"
},
"\\varepsilon": {
font: "main",
group: "mathord",
replace: "\u03b5"
},
"\\vartheta": {
font: "main",
group: "mathord",
replace: "\u03d1"
},
"\\varpi": {
font: "main",
group: "mathord",
replace: "\u03d6"
},
"\\varrho": {
font: "main",
group: "mathord",
replace: "\u03f1"
},
"\\varsigma": {
font: "main",
group: "mathord",
replace: "\u03c2"
},
"\\varphi": {
font: "main",
group: "mathord",
replace: "\u03c6"
},
"*": {
font: "main",
group: "bin",
replace: "\u2217"
},
"+": {
font: "main",
group: "bin"
},
"-": {
font: "main",
group: "bin",
replace: "\u2212"
},
"\\cdot": {
font: "main",
group: "bin",
replace: "\u22c5"
},
"\\circ": {
font: "main",
group: "bin",
replace: "\u2218"
},
"\\div": {
font: "main",
group: "bin",
replace: "\u00f7"
},
"\\pm": {
font: "main",
group: "bin",
replace: "\u00b1"
},
"\\times": {
font: "main",
group: "bin",
replace: "\u00d7"
},
"(": {
font: "main",
group: "open"
},
"[": {
font: "main",
group: "open"
},
"\\langle": {
font: "main",
group: "open",
replace: "\u27e8"
},
"\\lvert": {
font: "main",
group: "open",
replace: "|"
},
")": {
font: "main",
group: "close"
},
"]": {
font: "main",
group: "close"
},
"?": {
font: "main",
group: "close"
},
"!": {
font: "main",
group: "close"
},
"\\rangle": {
font: "main",
group: "close",
replace: "\u27e9"
},
"\\rvert": {
font: "main",
group: "close",
replace: "|"
},
"=": {
font: "main",
group: "rel"
},
"<": {
font: "main",
group: "rel"
},
">": {
font: "main",
group: "rel"
},
":": {
font: "main",
group: "rel"
},
"\\approx": {
font: "main",
group: "rel",
replace: "\u2248"
},
"\\cong": {
font: "main",
group: "rel",
replace: "\u2245"
},
"\\ge": {
font: "main",
group: "rel",
replace: "\u2265"
},
"\\geq": {
font: "main",
group: "rel",
replace: "\u2265"
},
"\\gets": {
font: "main",
group: "rel",
replace: "\u2190"
},
"\\in": {
font: "main",
group: "rel",
replace: "\u2208"
},
"\\leftarrow": {
font: "main",
group: "rel",
replace: "\u2190"
},
"\\le": {
font: "main",
group: "rel",
replace: "\u2264"
},
"\\leq": {
font: "main",
group: "rel",
replace: "\u2264"
},
"\\ne": {
font: "main",
group: "rel",
replace: "\u2260"
},
"\\neq": {
font: "main",
group: "rel",
replace: "\u2260"
},
"\\rightarrow": {
font: "main",
group: "rel",
replace: "\u2192"
},
"\\to": {
font: "main",
group: "rel",
replace: "\u2192"
},
"\\ngeq": {
font: "ams",
group: "rel",
replace: "\u2271"
},
"\\nleq": {
font: "ams",
group: "rel",
replace: "\u2270"
},
"\\!": {
font: "main",
group: "spacing"
},
"\\ ": {
font: "main",
group: "spacing",
replace: "\u00a0"
},
"\\,": {
font: "main",
group: "spacing"
},
"\\:": {
font: "main",
group: "spacing"
},
"\\;": {
font: "main",
group: "spacing"
},
"\\enspace": {
font: "main",
group: "spacing"
},
"\\qquad": {
font: "main",
group: "spacing"
},
"\\quad": {
font: "main",
group: "spacing"
},
"\\space": {
font: "main",
group: "spacing",
replace: "\u00a0"
},
",": {
font: "main",
group: "punct"
},
";": {
font: "main",
group: "punct"
},
"\\colon": {
font: "main",
group: "punct",
replace: ":"
},
"\\barwedge": {
font: "ams",
group: "textord",
replace: "\u22bc"
},
"\\veebar": {
font: "ams",
group: "textord",
replace: "\u22bb"
},
"\\odot": {
font: "main",
group: "textord",
replace: "\u2299"
},
"\\oplus": {
font: "main",
group: "textord",
replace: "\u2295"
},
"\\otimes": {
font: "main",
group: "textord",
replace: "\u2297"
},
"\\oslash": {
font: "main",
group: "textord",
replace: "\u2298"
},
"\\circledcirc": {
font: "ams",
group: "textord",
replace: "\u229a"
},
"\\boxdot": {
font: "ams",
group: "textord",
replace: "\u22a1"
},
"\\bigtriangleup": {
font: "main",
group: "textord",
replace: "\u25b3"
},
"\\bigtriangledown": {
font: "main",
group: "textord",
replace: "\u25bd"
},
"\\dagger": {
font: "main",
group: "textord",
replace: "\u2020"
},
"\\diamond": {
font: "main",
group: "textord",
replace: "\u22c4"
},
"\\star": {
font: "main",
group: "textord",
replace: "\u22c6"
},
"\\triangleleft": {
font: "main",
group: "textord",
replace: "\u25c3"
},
"\\triangleright": {
font: "main",
group: "textord",
replace: "\u25b9"
}
};
var textSymbols = "0123456789/|@.\"";
for (var i = 0; i < textSymbols.length; i++) {
var ch = textSymbols.charAt(i);
symbols[ch] = {
font: "main",
group: "textord"
};
}
var mathSymbols = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
for (var i = 0; i < mathSymbols.length; i++) {
var ch = mathSymbols.charAt(i);
symbols[ch] = {
font: "main",
group: "mathord"
};
}
module.exports = symbols;