From 2eca338e238e194509a44ff7ffadd7f2163b7cf4 Mon Sep 17 00:00:00 2001 From: Emily Eisenberg Date: Wed, 26 Mar 2014 01:10:10 -0400 Subject: [PATCH] Replace our annoying lookup tables with a unified symbol table Summary: Before, there were a couple problems: 1. There were similar tables of functions in the buildTree and Parser modules, making it hard to add new functions, and code duplication is bad 2. We distinguished the font to use (mostly just ams vs normal) using a different parser class, which led to annoyance and some weird bugs 3. (Not actually in this code, but in D7578) We can't distinguish between functions that should be expanded in text mode and those that shouldn't. To solve these problems, I've created an all-encompassing table in symbols.js that fixes 1. and 2., and should fix 3. once it's merged in. The table keeps track of each of the functions and their class, font, and possible replacement, and then Parser and buildTree do lookups in this table. Also added the functions for the new_definitions functions, to show how easy it is to add new functions. Test Plan: 1. Run the normal tests, see they all pass 2. Run huxley, see none of the screenshots changed 3. See that things like the main page loads 4. See that the new functions look correctly with '\barwedge\veebar\odot\oplus\otimes\oslash\circledcirc \boxdot\bigtriangleup\bigtriangledown\dagger\diamond \star\triangleleft\triangleright' Reviewers: alpert Reviewed By: alpert Differential Revision: http://phabricator.khanacademy.org/D7704 --- Parser.js | 169 ++------------- buildTree.js | 97 +-------- symbols.js | 574 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 599 insertions(+), 241 deletions(-) create mode 100644 symbols.js diff --git a/Parser.js b/Parser.js index 6e5d3dc30..28eb38658 100644 --- a/Parser.js +++ b/Parser.js @@ -1,5 +1,6 @@ var Lexer = require("./Lexer"); var utils = require("./utils"); +var symbols = require("./symbols"); var ParseError = require("./ParseError"); @@ -189,155 +190,13 @@ var sizeFuncs = [ "\\large", "\\Large", "\\LARGE", "\\huge", "\\Huge" ]; -// A map of elements that don't have arguments, and should simply be placed -// into a group depending on their type. The keys are the groups that items can -// be placed in, and the values are lists of element types that should be -// placed in those groups. -// -// For example, if the lexer returns something of type "colon", we should -// return a node of type "punct" -var copyFuncs = { - "textord": [ - "textord", - "\\$", - "\\%", - "\\angle", - "\\infty", - "\\prime", - "\\triangle", - "\\Gamma", - "\\Delta", - "\\Theta", - "\\Lambda", - "\\Xi", - "\\Pi", - "\\Sigma", - "\\Upsilon", - "\\Phi", - "\\Psi", - "\\Omega" - ], - "mathord": [ - "mathord", - "\\alpha", - "\\beta", - "\\gamma", - "\\delta", - "\\epsilon", - "\\zeta", - "\\eta", - "\\theta", - "\\iota", - "\\kappa", - "\\lambda", - "\\mu", - "\\nu", - "\\xi", - "\\omicron", - "\\pi", - "\\rho", - "\\sigma", - "\\tau", - "\\upsilon", - "\\phi", - "\\chi", - "\\psi", - "\\omega", - "\\varepsilon", - "\\vartheta", - "\\varpi", - "\\varrho", - "\\varsigma", - "\\varphi" - ], - "bin": [ - "bin", - "\\cdot", - "\\circ", - "\\div", - "\\pm", - "\\times" - ], - "open": [ - "open", - "\\langle", - "\\lvert" - ], - "close": [ - "close", - "\\rangle", - "\\rvert" - ], - "rel": [ - "rel", - "\\approx", - "\\cong", - "\\ge", - "\\geq", - "\\gets", - "\\in", - "\\leftarrow", - "\\le", - "\\leq", - "\\ne", - "\\neq", - "\\rightarrow", - "\\to" - ], - "amsrel": [ - "\\ngeq", - "\\nleq" - ], - "spacing": [ - "\\!", - "\\ ", - "\\,", - "\\:", - "\\;", - "\\enspace", - "\\qquad", - "\\quad", - "\\space" - ], - "punct": [ - "punct", - "\\colon" - ], - "namedfn": [ - "\\arcsin", - "\\arccos", - "\\arctan", - "\\arg", - "\\cos", - "\\cosh", - "\\cot", - "\\coth", - "\\csc", - "\\deg", - "\\dim", - "\\exp", - "\\hom", - "\\ker", - "\\lg", - "\\ln", - "\\log", - "\\sec", - "\\sin", - "\\sinh", - "\\tan", - "\\tanh" - ] -}; - -// Build a list of all of the different functions in the copyFuncs list, to -// quickly check if the function should be interpreted by the map. -var funcToType = {}; -for (var type in copyFuncs) { - for (var i = 0; i < copyFuncs[type].length; i++) { - var func = copyFuncs[type][i]; - funcToType[func] = type; - } -} +// A list of math functions replaced by their names +var namedFns = [ + "\\arcsin", "\\arccos", "\\arctan", "\\arg", "\\cos", "\\cosh", + "\\cot", "\\coth", "\\csc", "\\deg", "\\dim", "\\exp", "\\hom", + "\\ker", "\\lg", "\\ln", "\\log", "\\sec", "\\sin", "\\sinh", + "\\tan","\\tanh" +]; // Parses a "nucleus", which is either a single token from the tokenizer or a // function and its arguments @@ -376,6 +235,11 @@ Parser.prototype.parseNucleus = function(pos) { throw new ParseError( "Expected group after '" + nucleus.text + "'"); } + } else if (utils.contains(namedFns, nucleus.type)) { + // If this is a named function, just return it plain + return new ParseResult( + new ParseNode("namedfn", nucleus.text), + nucleus.position); } else if (nucleus.type === "\\llap" || nucleus.type === "\\rlap") { // If this is an llap or rlap, parse its argument and return var group = this.parseGroup(nucleus.position); @@ -410,15 +274,16 @@ Parser.prototype.parseNucleus = function(pos) { nucleus.type + "'"); } } else if (nucleus.type === "\\KaTeX") { + // If this is a KaTeX node, return the special katex result return new ParseResult( new ParseNode("katex", null), nucleus.position ); - } else if (funcToType[nucleus.type]) { + } else if (symbols[nucleus.text]) { // Otherwise if this is a no-argument function, find the type it - // corresponds to in the map and return + // corresponds to in the symbols map return new ParseResult( - new ParseNode(funcToType[nucleus.type], nucleus.text), + new ParseNode(symbols[nucleus.text].group, nucleus.text), nucleus.position); } else { // Otherwise, we couldn't parse it diff --git a/buildTree.js b/buildTree.js index ffe803d86..4bb0608b4 100644 --- a/buildTree.js +++ b/buildTree.js @@ -6,6 +6,7 @@ var domTree = require("./domTree"); var fontMetrics = require("./fontMetrics"); var parseTree = require("./parseTree"); var utils = require("./utils"); +var symbols = require("./symbols"); var buildExpression = function(expression, options, prev) { var groups = []; @@ -406,91 +407,9 @@ var buildGroup = function(group, options, prev) { } }; -var charLookup = { - "*": "\u2217", - "-": "\u2212", - "`": "\u2018", - "\\ ": "\u00a0", - "\\$": "$", - "\\%": "%", - "\\angle": "\u2220", - "\\approx": "\u2248", - "\\cdot": "\u22c5", - "\\circ": "\u2218", - "\\colon": ":", - "\\cong": "\u2245", - "\\div": "\u00f7", - "\\ge": "\u2265", - "\\geq": "\u2265", - "\\gets": "\u2190", - "\\in": "\u2208", - "\\infty": "\u221e", - "\\langle": "\u27e8", - "\\leftarrow": "\u2190", - "\\le": "\u2264", - "\\leq": "\u2264", - "\\lvert": "|", - "\\ne": "\u2260", - "\\neq": "\u2260", - "\\ngeq": "\u2271", - "\\nleq": "\u2270", - "\\pm": "\u00b1", - "\\prime": "\u2032", - "\\rangle": "\u27e9", - "\\rightarrow": "\u2192", - "\\rvert": "|", - "\\space": "\u00a0", - "\\times": "\u00d7", - "\\to": "\u2192", - "\\triangle": "\u25b3", - - "\\alpha": "\u03b1", - "\\beta": "\u03b2", - "\\gamma": "\u03b3", - "\\delta": "\u03b4", - "\\epsilon": "\u03f5", - "\\zeta": "\u03b6", - "\\eta": "\u03b7", - "\\theta": "\u03b8", - "\\iota": "\u03b9", - "\\kappa": "\u03ba", - "\\lambda": "\u03bb", - "\\mu": "\u03bc", - "\\nu": "\u03bd", - "\\xi": "\u03be", - "\\omicron": "o", - "\\pi": "\u03c0", - "\\rho": "\u03c1", - "\\sigma": "\u03c3", - "\\tau": "\u03c4", - "\\upsilon": "\u03c5", - "\\phi": "\u03d5", - "\\chi": "\u03c7", - "\\psi": "\u03c8", - "\\omega": "\u03c9", - "\\varepsilon": "\u03b5", - "\\vartheta": "\u03d1", - "\\varpi": "\u03d6", - "\\varrho": "\u03f1", - "\\varsigma": "\u03c2", - "\\varphi": "\u03c6", - - "\\Gamma": "\u0393", - "\\Delta": "\u0394", - "\\Theta": "\u0398", - "\\Lambda": "\u039b", - "\\Xi": "\u039e", - "\\Pi": "\u03a0", - "\\Sigma": "\u03a3", - "\\Upsilon": "\u03a5", - "\\Phi": "\u03a6", - "\\Psi": "\u03a8", - "\\Omega": "\u03a9" -}; - var makeText = function(value, style) { - if (value in charLookup) { - value = charLookup[value]; + if (symbols[value].replace) { + value = symbols[value].replace; } var metrics = fontMetrics.getCharacterMetrics(value, style); @@ -518,11 +437,11 @@ var mathit = function(value) { }; var mathrm = function(value) { - return makeText(value, "main-regular"); -}; - -var amsrm = function(value) { - return makeSpan(["amsrm"], [makeText(value, "ams-regular")]); + if (symbols[value].font === "main") { + return makeText(value, "main-regular"); + } else { + return makeSpan(["amsrm"], [makeText(value, "ams-regular")]); + } }; var buildTree = function(tree) { diff --git a/symbols.js b/symbols.js new file mode 100644 index 000000000..0f0d28d1c --- /dev/null +++ b/symbols.js @@ -0,0 +1,574 @@ +/* This file holds a list of all no-argument functions and single-character + * symbols (like 'a' or ';'). For each of the symbols, there are three + * properties they can have: + * - font (required): the font to be used for this * symbol. Either "main" (the + normal font), or "ams" (the ams fonts) + * - group (required): the ParseNode group type the symbol should have (i.e. + "textord" or "mathord" or + * - replace (optiona): the character that this symbol or function should be + * replaced with (i.e. "\phi" has a replace value of "\u03d5", the phi + * character in the main font) + */ + +var symbols = { + "`": { + font: "main", + group: "textord", + replace: "\u2018" + }, + "\\$": { + font: "main", + group: "textord", + replace: "$" + }, + "\\%": { + font: "main", + group: "textord", + replace: "%" + }, + "\\angle": { + font: "main", + group: "textord", + replace: "\u2220" + }, + "\\infty": { + font: "main", + group: "textord", + replace: "\u221e" + }, + "\\prime": { + font: "main", + group: "textord", + replace: "\u2032" + }, + "\\triangle": { + font: "main", + group: "textord", + replace: "\u25b3" + }, + "\\Gamma": { + font: "main", + group: "textord", + replace: "\u0393" + }, + "\\Delta": { + font: "main", + group: "textord", + replace: "\u0394" + }, + "\\Theta": { + font: "main", + group: "textord", + replace: "\u0398" + }, + "\\Lambda": { + font: "main", + group: "textord", + replace: "\u039b" + }, + "\\Xi": { + font: "main", + group: "textord", + replace: "\u039e" + }, + "\\Pi": { + font: "main", + group: "textord", + replace: "\u03a0" + }, + "\\Sigma": { + font: "main", + group: "textord", + replace: "\u03a3" + }, + "\\Upsilon": { + font: "main", + group: "textord", + replace: "\u03a5" + }, + "\\Phi": { + font: "main", + group: "textord", + replace: "\u03a6" + }, + "\\Psi": { + font: "main", + group: "textord", + replace: "\u03a8" + }, + "\\Omega": { + font: "main", + group: "textord", + replace: "\u03a9" + }, + "\\alpha": { + font: "main", + group: "mathord", + replace: "\u03b1" + }, + "\\beta": { + font: "main", + group: "mathord", + replace: "\u03b2" + }, + "\\gamma": { + font: "main", + group: "mathord", + replace: "\u03b3" + }, + "\\delta": { + font: "main", + group: "mathord", + replace: "\u03b4" + }, + "\\epsilon": { + font: "main", + group: "mathord", + replace: "\u03f5" + }, + "\\zeta": { + font: "main", + group: "mathord", + replace: "\u03b6" + }, + "\\eta": { + font: "main", + group: "mathord", + replace: "\u03b7" + }, + "\\theta": { + font: "main", + group: "mathord", + replace: "\u03b8" + }, + "\\iota": { + font: "main", + group: "mathord", + replace: "\u03b9" + }, + "\\kappa": { + font: "main", + group: "mathord", + replace: "\u03ba" + }, + "\\lambda": { + font: "main", + group: "mathord", + replace: "\u03bb" + }, + "\\mu": { + font: "main", + group: "mathord", + replace: "\u03bc" + }, + "\\nu": { + font: "main", + group: "mathord", + replace: "\u03bd" + }, + "\\xi": { + font: "main", + group: "mathord", + replace: "\u03be" + }, + "\\omicron": { + font: "main", + group: "mathord", + replace: "o" + }, + "\\pi": { + font: "main", + group: "mathord", + replace: "\u03c0" + }, + "\\rho": { + font: "main", + group: "mathord", + replace: "\u03c1" + }, + "\\sigma": { + font: "main", + group: "mathord", + replace: "\u03c3" + }, + "\\tau": { + font: "main", + group: "mathord", + replace: "\u03c4" + }, + "\\upsilon": { + font: "main", + group: "mathord", + replace: "\u03c5" + }, + "\\phi": { + font: "main", + group: "mathord", + replace: "\u03d5" + }, + "\\chi": { + font: "main", + group: "mathord", + replace: "\u03c7" + }, + "\\psi": { + font: "main", + group: "mathord", + replace: "\u03c8" + }, + "\\omega": { + font: "main", + group: "mathord", + replace: "\u03c9" + }, + "\\varepsilon": { + font: "main", + group: "mathord", + replace: "\u03b5" + }, + "\\vartheta": { + font: "main", + group: "mathord", + replace: "\u03d1" + }, + "\\varpi": { + font: "main", + group: "mathord", + replace: "\u03d6" + }, + "\\varrho": { + font: "main", + group: "mathord", + replace: "\u03f1" + }, + "\\varsigma": { + font: "main", + group: "mathord", + replace: "\u03c2" + }, + "\\varphi": { + font: "main", + group: "mathord", + replace: "\u03c6" + }, + "*": { + font: "main", + group: "bin", + replace: "\u2217" + }, + "+": { + font: "main", + group: "bin" + }, + "-": { + font: "main", + group: "bin", + replace: "\u2212" + }, + "\\cdot": { + font: "main", + group: "bin", + replace: "\u22c5" + }, + "\\circ": { + font: "main", + group: "bin", + replace: "\u2218" + }, + "\\div": { + font: "main", + group: "bin", + replace: "\u00f7" + }, + "\\pm": { + font: "main", + group: "bin", + replace: "\u00b1" + }, + "\\times": { + font: "main", + group: "bin", + replace: "\u00d7" + }, + "(": { + font: "main", + group: "open" + }, + "[": { + font: "main", + group: "open" + }, + "\\langle": { + font: "main", + group: "open", + replace: "\u27e8" + }, + "\\lvert": { + font: "main", + group: "open", + replace: "|" + }, + ")": { + font: "main", + group: "close" + }, + "]": { + font: "main", + group: "close" + }, + "?": { + font: "main", + group: "close" + }, + "!": { + font: "main", + group: "close" + }, + "\\rangle": { + font: "main", + group: "close", + replace: "\u27e9" + }, + "\\rvert": { + font: "main", + group: "close", + replace: "|" + }, + "=": { + font: "main", + group: "rel" + }, + "<": { + font: "main", + group: "rel" + }, + ">": { + font: "main", + group: "rel" + }, + ":": { + font: "main", + group: "rel" + }, + "\\approx": { + font: "main", + group: "rel", + replace: "\u2248" + }, + "\\cong": { + font: "main", + group: "rel", + replace: "\u2245" + }, + "\\ge": { + font: "main", + group: "rel", + replace: "\u2265" + }, + "\\geq": { + font: "main", + group: "rel", + replace: "\u2265" + }, + "\\gets": { + font: "main", + group: "rel", + replace: "\u2190" + }, + "\\in": { + font: "main", + group: "rel", + replace: "\u2208" + }, + "\\leftarrow": { + font: "main", + group: "rel", + replace: "\u2190" + }, + "\\le": { + font: "main", + group: "rel", + replace: "\u2264" + }, + "\\leq": { + font: "main", + group: "rel", + replace: "\u2264" + }, + "\\ne": { + font: "main", + group: "rel", + replace: "\u2260" + }, + "\\neq": { + font: "main", + group: "rel", + replace: "\u2260" + }, + "\\rightarrow": { + font: "main", + group: "rel", + replace: "\u2192" + }, + "\\to": { + font: "main", + group: "rel", + replace: "\u2192" + }, + "\\ngeq": { + font: "ams", + group: "rel", + replace: "\u2271" + }, + "\\nleq": { + font: "ams", + group: "rel", + replace: "\u2270" + }, + "\\!": { + font: "main", + group: "spacing" + }, + "\\ ": { + font: "main", + group: "spacing", + replace: "\u00a0" + }, + "\\,": { + font: "main", + group: "spacing" + }, + "\\:": { + font: "main", + group: "spacing" + }, + "\\;": { + font: "main", + group: "spacing" + }, + "\\enspace": { + font: "main", + group: "spacing" + }, + "\\qquad": { + font: "main", + group: "spacing" + }, + "\\quad": { + font: "main", + group: "spacing" + }, + "\\space": { + font: "main", + group: "spacing", + replace: "\u00a0" + }, + ",": { + font: "main", + group: "punct" + }, + ";": { + font: "main", + group: "punct" + }, + "\\colon": { + font: "main", + group: "punct", + replace: ":" + }, + "\\barwedge": { + font: "ams", + group: "textord", + replace: "\u22bc" + }, + "\\veebar": { + font: "ams", + group: "textord", + replace: "\u22bb" + }, + "\\odot": { + font: "main", + group: "textord", + replace: "\u2299" + }, + "\\oplus": { + font: "main", + group: "textord", + replace: "\u2295" + }, + "\\otimes": { + font: "main", + group: "textord", + replace: "\u2297" + }, + "\\oslash": { + font: "main", + group: "textord", + replace: "\u2298" + }, + "\\circledcirc": { + font: "ams", + group: "textord", + replace: "\u229a" + }, + "\\boxdot": { + font: "ams", + group: "textord", + replace: "\u22a1" + }, + "\\bigtriangleup": { + font: "main", + group: "textord", + replace: "\u25b3" + }, + "\\bigtriangledown": { + font: "main", + group: "textord", + replace: "\u25bd" + }, + "\\dagger": { + font: "main", + group: "textord", + replace: "\u2020" + }, + "\\diamond": { + font: "main", + group: "textord", + replace: "\u22c4" + }, + "\\star": { + font: "main", + group: "textord", + replace: "\u22c6" + }, + "\\triangleleft": { + font: "main", + group: "textord", + replace: "\u25c3" + }, + "\\triangleright": { + font: "main", + group: "textord", + replace: "\u25b9" + } +}; + +var textSymbols = "0123456789/|@.\""; +for (var i = 0; i < textSymbols.length; i++) { + var ch = textSymbols.charAt(i); + symbols[ch] = { + font: "main", + group: "textord" + }; +} + +var mathSymbols = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; +for (var i = 0; i < mathSymbols.length; i++) { + var ch = mathSymbols.charAt(i); + symbols[ch] = { + font: "main", + group: "mathord" + }; +} + +module.exports = symbols;