Add accents

Summary: Add support for math-mode accents. This involves a couple changes. First, in order to correctly position the accents, we must know the kern between every character and the "skewchar" in that font. To do this, we improve our tfm parser to run the mini-kern-language and calculate kerns. We then export these into fontMetrics.js. Then, we add normal support for accents. In particular, we do some special handling for supsubs around accents. This involves building the supsub separately without the accent, and then replacing its base with the built accent. Finally, the character in the fonts for the \vec command is a combining unicode character, so it is shifted to the left, but none of the other characters do this. We add some special handling for \vec to account for this. Fixes #7 Test Plan: - Make sure tests pass - Make sure no huxley screenshots changed, and the new one looks good Reviewers: alpert Reviewed By: alpert Differential Revision: http://phabricator.khanacademy.org/D13157
2014-09-13 21:30:35 -07:00 · 2014-09-13 21:30:35 -07:00 · 0dca731da6
commit 0dca731da6
parent 0d42a902ac
14 changed files with 340 additions and 33 deletions
--- a/buildCommon.js
+++ b/buildCommon.js
@ -12,11 +12,12 @@ var makeSymbol = function(value, style, mode, color, classes) {
    var symbolNode;
    if (metrics) {
        symbolNode = new domTree.symbolNode(
-            value, metrics.height, metrics.depth, metrics.italic, classes);
+            value, metrics.height, metrics.depth, metrics.italic, metrics.skew,
+            classes);
    } else {
        console && console.warn("No character metrics for '" + value +
            "' in style '" + style + "'");
-        symbolNode = new domTree.symbolNode(value, 0, 0, 0, classes);
+        symbolNode = new domTree.symbolNode(value, 0, 0, 0, 0, classes);
    }

    if (color) {
--- a/buildTree.js
+++ b/buildTree.js
@ -40,7 +40,8 @@ var groupToType = {
    overline: "mord",
    rule: "mord",
    leftright: "minner",
-    sqrt: "mord"
+    sqrt: "mord",
+    accent: "mord"
 };

 var getTypeOfGroup = function(group) {
@ -64,34 +65,50 @@ var getTypeOfGroup = function(group) {
    }
 };

-var isCharacterBox = function(group) {
-    if (group == null) {
-        return false;
-    } else if (group.type === "mathord" ||
-               group.type === "textord" ||
-               group.type === "bin" ||
-               group.type === "rel" ||
-               group.type === "open" ||
-               group.type === "close" ||
-               group.type === "punct") {
-        return true;
-    } else if (group.type === "ordgroup") {
-        return group.value.length === 1 && isCharacterBox(group.value[0]);
-    } else {
-        return false;
-    }
-};
-
 var shouldHandleSupSub = function(group, options) {
    if (group == null) {
        return false;
    } else if (group.type === "op") {
        return group.value.limits && options.style.id === Style.DISPLAY.id;
+    } else if (group.type === "accent") {
+        return isCharacterBox(group.value.base);
    } else {
        return null;
    }
 };

+var getBaseElem = function(group) {
+    if (group == null) {
+        return false;
+    } else if (group.type === "ordgroup") {
+        if (group.value.length === 1) {
+            return getBaseElem(group.value[0]);
+        } else {
+            return group;
+        }
+    } else if (group.type === "color") {
+        if (group.value.value.length === 1) {
+            return getBaseElem(group.value.value[0]);
+        } else {
+            return group;
+        }
+    } else {
+        return group;
+    }
+};
+
+var isCharacterBox = function(group) {
+    var baseElem = getBaseElem(group);
+
+    return baseElem.type === "mathord" ||
+        baseElem.type === "textord" ||
+        baseElem.type === "bin" ||
+        baseElem.type === "rel" ||
+        baseElem.type === "open" ||
+        baseElem.type === "close" ||
+        baseElem.type === "punct";
+};
+
 var groupTypes = {
    mathord: function(group, options, prev) {
        return buildCommon.mathit(
@ -112,7 +129,7 @@ var groupTypes = {
        }
        if (!prev || utils.contains(["mbin", "mopen", "mrel", "mop", "mpunct"],
                getTypeOfGroup(prevAtom))) {
-            group.type = "ord";
+            group.type = "textord";
            className = "mord";
        }

@ -725,6 +742,76 @@ var groupTypes = {
        rule.height = height;

        return rule;
+    },
+
+    accent: function(group, options, prev) {
+        var base = group.value.base;
+
+        var supsubGroup;
+        if (group.type === "supsub") {
+            var supsub = group;
+            group = group.value.base;
+            base = group.value.base;
+            supsub.value.base = base;
+
+            supsubGroup = buildGroup(
+                supsub, options.reset());
+        }
+
+        var body = buildGroup(
+            base, options.withStyle(options.style.cramp()));
+
+        var s;
+        if (isCharacterBox(group.value.base)) {
+            var baseChar = getBaseElem(group.value.base);
+            var baseGroup = buildGroup(
+                baseChar, options.withStyle(options.style.cramp()));
+            s = baseGroup.skew;
+        } else {
+            s = 0;
+        }
+
+        var delta = Math.min(body.height, fontMetrics.metrics.xHeight);
+
+        var accent = buildCommon.makeSymbol(
+            group.value.accent, "Main-Regular", "math", options.getColor());
+        accent.italic = 0;
+
+        // The \vec character that the fonts use is a combining character, and
+        // thus shows up much too far to the left. To account for this, we add a
+        // specific class which shifts the accent over to where we want it.
+        // TODO(emily): Fix this in a better way, like by changing the font
+        var vecClass = group.value.accent === "\\vec" ? "accent-vec" : null;
+
+        var accentBody = makeSpan(["accent-body", vecClass], [
+            makeSpan([], [accent])]);
+
+        var accentBody = buildCommon.makeVList([
+            {type: "elem", elem: body},
+            {type: "kern", size: -delta},
+            {type: "elem", elem: accentBody}
+        ], "firstBaseline", null, options);
+
+        accentBody.children[1].style.marginLeft = 2 * s + "em";
+
+        var accentWrap = makeSpan(["mord", "accent"], [accentBody]);
+
+        if (supsubGroup) {
+            // Here, we replace the "base" child of the supsub with our newly
+            // generated accent.
+            supsubGroup.children[0] = accentWrap;
+
+            // Since we don't rerun the height calculation after replacing the
+            // accent, we manually recalculate height.
+            supsubGroup.height = Math.max(accentWrap.height, supsubGroup.height);
+
+            // Accents should always be ords, even when their innards are not.
+            supsubGroup.classes[0] = "mord";
+
+            return supsubGroup;
+        } else {
+            return accentWrap;
+        }
    }
 };

--- a/domTree.js
+++ b/domTree.js
@ -103,11 +103,12 @@ documentFragment.prototype.toMarkup = function() {
    return markup;
 };

-function symbolNode(value, height, depth, italic, classes, style) {
+function symbolNode(value, height, depth, italic, skew, classes, style) {
    this.value = value || "";
    this.height = height || 0;
    this.depth = depth || 0;
    this.italic = italic || 0;
+    this.skew = skew || 0;
    this.classes = classes || [];
    this.style = style || {};
    this.maxFontSize = 0;
--- a/fontMetrics.js
+++ b/fontMetrics.js
--- a/functions.js
+++ b/functions.js
@ -394,6 +394,26 @@ var duplicatedFunctions = [
        data: {
            numArgs: 0
        }
+    },
+
+    // Accents
+    {
+        funcs: [
+            "\\acute", "\\grave", "\\ddot", "\\tilde", "\\bar", "\\breve",
+            "\\check", "\\hat", "\\vec", "\\dot"
+            // We don't support expanding accents yet
+            // "\\widetilde", "\\widehat"
+        ],
+        data: {
+            numArgs: 1,
+            handler: function(func, base) {
+                return {
+                    type: "accent",
+                    accent: func,
+                    base: base
+                };
+            }
+        }
    }
 ];

--- a/metrics/extract_tfms.py
+++ b/metrics/extract_tfms.py
@ -34,6 +34,23 @@ def main():
        'msbm10.tfm'
    ]

+    # Extracted by running `\font\a=<font>` and then `\showthe\skewchar\a` in
+    # TeX, where `<font>` is the name of the font listed here. The skewchar
+    # will be printed out in the output. If it outputs `-1`, that means there
+    # is no skewchar, so we use `None` here.
+    font_skewchar = {
+        'cmbsy10': None,
+        'cmbx10': None,
+        'cmex10': None,
+        'cmmi10': 127,
+        'cmmib10': None,
+        'cmr10': None,
+        'cmsy10': 48,
+        'cmti10': None,
+        'msam10': None,
+        'msbm10': None
+    }
+
    font_name_to_tfm = {}

    for font_name in fonts:
@ -57,10 +74,17 @@ def main():
            depth = round(tfm_char.depth - yshift / 1000.0, 5)
            italic = round(tfm_char.italic_correction, 5)

+            skewkern = 0.0
+            if (font_skewchar[font] and
+                    font_skewchar[font] in tfm_char.kern_table):
+                skewkern = round(
+                    tfm_char.kern_table[font_skewchar[font]], 5)
+
            families[family][char_num] = {
                'height': height,
                'depth': depth,
-                'italic': italic
+                'italic': italic,
+                'skew': skewkern,
            }

    sys.stdout.write(
--- a/metrics/extract_ttfs.py
+++ b/metrics/extract_ttfs.py
@ -37,11 +37,13 @@ def main():

                # TODO(emily): Figure out a real way to calculate this
                italic = 0
+                skew = 0

                start_json[font][ord(char)] = {
                    height: height / fontInfo.em,
                    depth: depth / fontInfo.em,
                    italic: italic / fontInfo.em,
+                    skew: skew / fontInfo.em,
                }

    sys.stdout.write(
--- a/metrics/parse_tfm.py
+++ b/metrics/parse_tfm.py
@ -12,18 +12,48 @@ class CharInfoWord(object):
        self.tag = b3 & 0b11
        self.remainder = b4

+    def has_ligkern(self):
+        return self.tag == 1
+
+    def ligkern_start(self):
+        return self.remainder
+
+
+class LigKernProgram(object):
+    def __init__(self, program):
+        self.program = program
+
+    def execute(self, start, next_char):
+        curr_instruction = start
+        while True:
+            instruction = self.program[curr_instruction]
+            (skip, inst_next_char, op, remainder) = instruction
+
+            if inst_next_char == next_char:
+                if op < 128:
+                    # Don't worry about ligatures for now, we only need kerns
+                    return None
+                else:
+                    return 256 * (op - 128) + remainder
+            elif skip >= 128:
+                return None
+            else:
+                curr_instruction += 1 + skip
+

 class TfmCharMetrics(object):
-    def __init__(self, width, height, depth, italic):
+    def __init__(self, width, height, depth, italic, kern_table):
        self.width = width
        self.height = height
        self.depth = depth
        self.italic_correction = italic
+        self.kern_table = kern_table


 class TfmFile(object):
    def __init__(self, start_char, end_char, char_info, width_table,
-                 height_table, depth_table, italic_table):
+                 height_table, depth_table, italic_table, ligkern_table,
+                 kern_table):
        self.start_char = start_char
        self.end_char = end_char
        self.char_info = char_info
@ -31,6 +61,8 @@ class TfmFile(object):
        self.height_table = height_table
        self.depth_table = depth_table
        self.italic_table = italic_table
+        self.ligkern_program = LigKernProgram(ligkern_table)
+        self.kern_table = kern_table

    def get_char_metrics(self, char_num):
        if char_num < self.start_char or char_num > self.end_char:
@ -38,11 +70,19 @@ class TfmFile(object):

        info = self.char_info[char_num + self.start_char]

+        char_kern_table = {}
+        if info.has_ligkern():
+            for char in range(self.start_char, self.end_char + 1):
+                kern = self.ligkern_program.execute(info.ligkern_start(), char)
+                if kern:
+                    char_kern_table[char] = self.kern_table[kern]
+
        return TfmCharMetrics(
            self.width_table[info.width_index],
            self.height_table[info.height_index],
            self.depth_table[info.depth_index],
-            self.italic_table[info.italic_index])
+            self.italic_table[info.italic_index],
+            char_kern_table)


 class TfmReader(object):
@ -96,10 +136,8 @@ def read_tfm_file(file_name):
        depth_table_size = reader.read_halfword()
        italic_table_size = reader.read_halfword()

-        # ligkern_table_size
-        reader.read_halfword()
-        # kern_table_size
-        reader.read_halfword()
+        ligkern_table_size = reader.read_halfword()
+        kern_table_size = reader.read_halfword()

        # extensible_table_size
        reader.read_halfword()
@ -142,8 +180,22 @@ def read_tfm_file(file_name):
        for i in range(italic_table_size):
            italic_table.append(reader.read_fixword())

+        ligkern_table = []
+        for i in range(ligkern_table_size):
+            skip = reader.read_byte()
+            next_char = reader.read_byte()
+            op = reader.read_byte()
+            remainder = reader.read_byte()
+
+            ligkern_table.append((skip, next_char, op, remainder))
+
+        kern_table = []
+        for i in range(kern_table_size):
+            kern_table.append(reader.read_fixword())
+
        # There is more information, like the ligkern, kern, extensible, and
        # param table, but we don't need these for now

        return TfmFile(start_char, end_char, char_info, width_table,
-                       height_table, depth_table, italic_table)
+                       height_table, depth_table, italic_table,
+                       ligkern_table, kern_table)
--- a/static/katex.less
+++ b/static/katex.less
@ -414,4 +414,21 @@ big parens
            text-align: center;
        }
    }
+
+    .accent {
+        > .vlist > span {
+            text-align: center;
+        }
+
+        .accent-body > span {
+            width: 0;
+        }
+
+        .accent-body.accent-vec > span {
+            position: relative;
+            // This value is half of the value that the MathJax's makeFF shifts
+            // it left. We center it by shifting it half way right again.
+            left: 0.326em;
+        }
+    }
 }
--- a/symbols.js
+++ b/symbols.js
@ -783,6 +783,56 @@ var symbols = {
            font: "main",
            group: "textord",
            replace: "\u22ee"
+        },
+        "\\acute": {
+            font: "main",
+            group: "accent",
+            replace: "\u00b4"
+        },
+        "\\grave": {
+            font: "main",
+            group: "accent",
+            replace: "\u0060"
+        },
+        "\\ddot": {
+            font: "main",
+            group: "accent",
+            replace: "\u00a8"
+        },
+        "\\tilde": {
+            font: "main",
+            group: "accent",
+            replace: "\u007e"
+        },
+        "\\bar": {
+            font: "main",
+            group: "accent",
+            replace: "\u00af"
+        },
+        "\\breve": {
+            font: "main",
+            group: "accent",
+            replace: "\u02d8"
+        },
+        "\\check": {
+            font: "main",
+            group: "accent",
+            replace: "\u02c7"
+        },
+        "\\hat": {
+            font: "main",
+            group: "accent",
+            replace: "\u005e"
+        },
+        "\\vec": {
+            font: "main",
+            group: "accent",
+            replace: "\u20d7"
+        },
+        "\\dot": {
+            font: "main",
+            group: "accent",
+            replace: "\u02d9"
        }
    },
    "text": {
--- a/test/huxley/Accents.hux/firefox-1.png
+++ b/test/huxley/Accents.hux/firefox-1.png
--- a/test/huxley/Accents.hux/record.json
+++ b/test/huxley/Accents.hux/record.json
@ -0,0 +1,5 @@
+[
+  {
+    "action": "screenshot"
+  }
+]
--- a/test/huxley/Huxleyfile.json
+++ b/test/huxley/Huxleyfile.json
@ -183,5 +183,11 @@
        "name": "SupSubOffsets",
        "screenSize": [1024, 768],
        "url": "http://localhost:7936/test/huxley/test.html?m=\\displaystyle \\int_{2+3}x f^{2+3}+3\\lim_{2+3+4+5}f"
+    },
+
+    {
+        "name": "Accents",
+        "screenSize": [1024, 768],
+        "url": "http://localhost:7936/test/huxley/test.html?m=\\vec{A}\\vec{x}\\vec x^2\\vec{x}_2^2\\vec{A}^2\\vec{xA}^2"
    }
 ]
--- a/test/katex-spec.js
+++ b/test/katex-spec.js
@ -998,3 +998,45 @@ describe("A markup generator", function() {
        expect(markup).not.toContain("marginRight");
    });
 });
+
+describe("An accent parser", function() {
+    it("should not fail", function() {
+        expect("\\vec{x}").toParse();
+        expect("\\vec{x^2}").toParse();
+        expect("\\vec{x}^2").toParse();
+        expect("\\vec x").toParse();
+    });
+
+    it("should produce accents", function() {
+        var parse = parseTree("\\vec x")[0];
+
+        expect(parse.type).toMatch("accent");
+    });
+
+    it("should be grouped more tightly than supsubs", function() {
+        var parse = parseTree("\\vec x^2")[0];
+
+        expect(parse.type).toMatch("supsub");
+    });
+
+    it("should not parse expanding accents", function() {
+        expect("\\widehat{x}").toNotParse();
+    });
+});
+
+describe("An accent builder", function() {
+    it("should not fail", function() {
+        expect("\\vec{x}").toBuild();
+        expect("\\vec{x}^2").toBuild();
+        expect("\\vec{x}_2").toBuild();
+        expect("\\vec{x}_2^2").toBuild();
+    });
+
+    it("should produce mords", function() {
+        expect(getBuilt("\\vec x")[0].classes).toContain("mord");
+        expect(getBuilt("\\vec +")[0].classes).toContain("mord");
+        expect(getBuilt("\\vec +")[0].classes).not.toContain("mbin");
+        expect(getBuilt("\\vec )^2")[0].classes).toContain("mord");
+        expect(getBuilt("\\vec )^2")[0].classes).not.toContain("mclose");
+    });
+});