From 1846929110a1b7b8561491fe13b64c8e85b97b66 Mon Sep 17 00:00:00 2001 From: Martin von Gagern Date: Tue, 7 Jul 2015 01:56:53 +0200 Subject: [PATCH 1/2] Add tool to check symbol group types against LaTeX At the moment, the tool isn't suitable for automatic regression tests yet, since some symbols will require human verification. That might change in the future though, with a list of manually verified symbols which can be skipped in automatic verification. For this reason the file got placed into the test directory. --- .gitignore | 4 ++ test/symgroups.js | 151 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 test/symgroups.js diff --git a/.gitignore b/.gitignore index d21b35f57..058d8cf4e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,7 @@ last.png diff.png /test/screenshotter/tex/ /test/screenshotter/diff/ +/test/symgroups.tex +/test/symgroups.aux +/test/symgroups.log +/test/symgroups.pdf diff --git a/test/symgroups.js b/test/symgroups.js new file mode 100644 index 000000000..70664d34c --- /dev/null +++ b/test/symgroups.js @@ -0,0 +1,151 @@ +"use strict"; + +var fs = require("fs"); +var childProcess = require("child_process"); + +var opts = require("nomnom") + .option("spacing", { + flag: true, + help: "Print mismatches involving spacing commands" + }) + .parse(); + +var symbols = require("../src/symbols"); +var keys = Object.keys(symbols.math); +keys.sort(); +var types = [ + "mathord", "op", "bin", "rel", "open", "close", "punct", "inner", + "spacing", "accent", "textord" +]; + +process.nextTick(writeTexFile); + +function writeTexFile() { + var tex = fs.createWriteStream("symgroups.tex"); + tex.on("finish", typeset); + tex.write("\\documentclass{article}\n" + + "\\usepackage{textcomp,amsmath,amssymb,gensymb}\n" + + "\\begin{document}\n" + + "\\showboxbreadth=\\maxdimen\\showboxdepth=\\maxdimen\n\n"); + keys.forEach(function(key, idx) { + var sym = symbols.math[key]; + var type = types.indexOf(sym.group) + 1; + tex.write("$" + idx + "+" + key + "+" + type + "\\showlists$\n\n"); + }); + tex.end("\\end{document}\n"); +} + +function typeset() { + var proc = childProcess.spawn( + "pdflatex", ["--interaction=nonstopmode", "symgroups"], + {stdio: "ignore"}); + proc.on("exit", function(code, signal) { + if (signal) { + throw new Error("pdflatex terminated by signal " + signal); + } + fs.readFile("symgroups.log", "ascii", evaluate); + }).on("error", function(err) { + throw err; + }); +} + +/* Consider the symbol "\sim" as an example. At the time of this + * writing, it has index 431 in our list, and is of group "rel" which + * is the fourth of the types listed above. So we construct an input line + * $431+\sim+4\showlists$ and receive corresponding output + * + * ### math mode entered at line 870 + * \mathord + * .\fam0 4 + * \mathord + * .\fam0 3 + * \mathord + * .\fam0 2 + * \mathbin + * .\fam0 + + * \mathrel + * .\fam2 ' + * \mathbin + * .\fam0 + + * \mathord + * .\fam0 4 + * ### horizontal mode entered at line 870 + * + * This is what we parse, using some regular expressions. + */ + +// Extract individual blocks, from switch to math mode up to switch back. +var reMM = /^### math mode entered.*\n([^]*?)###/mg; + +// Identify the parts separated by the plus signs +var reParts = /([^]*^\.\\fam0 \+\n)([^]+)(\\mathbin\n\.+\\fam0 \+[^]*)/m; + +// Variation of the above in case we have nothing between the plus signs +var reEmpty = /^\.\\fam0 \+\n\\mathbin\n\.\\fam0 \+/m; + +// Match any printed digit in the first or last of these parts +var reDigit = /^\.\\fam0 ([0-9])/mg; + +// Match the atom type, i.e. "\mathrel" in the above example +var reAtom = /\\([a-z]+)/; + +function evaluate(err, log) { + if (err) { + throw err; + } + + var match, nextIndex = 0; + while ((match = reMM.exec(log)) !== null) { + var list = match[1]; + match = reParts.exec(list); + if (!match) { + match = reEmpty.exec(list); + if (match) { + console.log(keys[nextIndex] + " (index " + nextIndex + + ") in LaTeX apparently " + + "doesn't contribute to the output.\n"); + nextIndex++; + continue; + } + console.error("Can't split this into parts:"); + console.error(list); + process.exit(2); + } + var idx = extractDigits(match[1]); + var atom = match[2]; + var katexType = types[extractDigits(match[3]) - 1] || "???"; + match = reAtom.exec(atom); + if (!match) { + console.error("Failed to find atom type"); + console.error(atom); + console.error(list); + process.exit(3); + } + var latexType = match[1]; + if (katexType !== latexType && "math" + katexType !== latexType && + (katexType !== "textord" || latexType !== "mathord") && + (katexType !== "spacing" || opts.spacing)) { + console.log(keys[idx] + " (index " + idx + ") has '" + katexType + + "' in KaTeX, but LaTeX uses '" + latexType + "':"); + console.log(atom); + } + if (nextIndex !== idx) { + console.error("Index " + nextIndex + " not found in log"); + process.exit(4); + } + nextIndex = idx + 1; + } + if (nextIndex !== keys.length) { + console.error("Processed " + nextIndex + + " out of " + keys.length + " symbols"); + process.exit(4); + } +} + +function extractDigits(str) { + var match, res = ""; + while ((match = reDigit.exec(str)) !== null) { + res += match[1]; + } + return +res; +} From 9942283db037dc1e68e2647343a8936d7b9f0693 Mon Sep 17 00:00:00 2001 From: Martin von Gagern Date: Tue, 7 Jul 2015 01:59:11 +0200 Subject: [PATCH 2/2] Fix incorrect symbol types These symbols should have different types, according to symgroups.js --- src/symbols.js | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/symbols.js b/src/symbols.js index 62b9ecb29..e5b1fd471 100644 --- a/src/symbols.js +++ b/src/symbols.js @@ -920,22 +920,22 @@ var symbols = { // AMS Delimiters "\\ulcorner": { font: "ams", - group: "textord", + group: "open", replace: "\u250c" }, "\\urcorner": { font: "ams", - group: "textord", + group: "close", replace: "\u2510" }, "\\llcorner": { font: "ams", - group: "textord", + group: "open", replace: "\u2514" }, "\\lrcorner": { font: "ams", - group: "textord", + group: "close", replace: "\u2518" }, @@ -2172,12 +2172,12 @@ var symbols = { }, "\\barwedge": { font: "ams", - group: "textord", + group: "bin", replace: "\u22bc" }, "\\veebar": { font: "ams", - group: "textord", + group: "bin", replace: "\u22bb" }, "\\odot": { @@ -2207,12 +2207,12 @@ var symbols = { }, "\\circledcirc": { font: "ams", - group: "textord", + group: "bin", replace: "\u229a" }, "\\boxdot": { font: "ams", - group: "textord", + group: "bin", replace: "\u22a1" }, "\\bigtriangleup": { @@ -2327,32 +2327,32 @@ var symbols = { }, "\\uparrow": { font: "main", - group: "textord", + group: "rel", replace: "\u2191" }, "\\Uparrow": { font: "main", - group: "textord", + group: "rel", replace: "\u21d1" }, "\\downarrow": { font: "main", - group: "textord", + group: "rel", replace: "\u2193" }, "\\Downarrow": { font: "main", - group: "textord", + group: "rel", replace: "\u21d3" }, "\\updownarrow": { font: "main", - group: "textord", + group: "rel", replace: "\u2195" }, "\\Updownarrow": { font: "main", - group: "textord", + group: "rel", replace: "\u21d5" }, "\\coprod": { @@ -2447,7 +2447,7 @@ var symbols = { }, "\\ldots": { font: "main", - group: "punct", + group: "inner", replace: "\u2026" }, "\\cdots": {