KaTeX/metrics/extract_ttfs.py
Emily Eisenberg d61a04c80d Fix up small problems with #126
Summary:
 - Some of the symbols were of the wrong type (I wrote some scripts to
   search the TeX source for the correct types for the symbols; I should
   put these somewhere for future reference)
 - Some of the symbols had the incorrect unicode replacement value. I
   copied what MathJax used for the ones that were wrong. In one case,
   something had the wrong font, so I used the one MathJax used.
 - Some symbols were duplicates of existing symbols, those were removed.
 - Some symbols are text symbols that aren't usable in math mode (maybe
   they should be text symbols?) (`\aa`, `\AA`, etc.)
 - Some symbols didn't seem to work/I couldn't figure out how to make
   them work. For example, `\imath` and `\jmath` require an italic font
   in KaTeX_Main, but we don't have the ability to do that yet. `\dots`
   was removed because it's not an ordinary symbol. `\*vert` were
   removed because I don't understand what they're for, nor what they
   should look like.
 - Some symbols weren't actually in the KaTeX fonts; to make sure they
   don't fall back to something weird, I removed them. (`\S`, `\pounds`,
   etc.)

Test plan:
 - `make test`
 - See that all of the symbols in: `\equiv \prec \succ \sim \perp
   \preceq \succeq \simeq \mid \ll \gg \asymp \parallel \bowtie \smile
   \sqsubseteq \sqsupseteq \doteq \frown \ni \propto \vdash \dashv \owns
   \ldotp \cdotp \aleph \forall \hbar \exists \nabla \flat \ell \natural
   \clubsuit \wp \sharp \diamondsuit \Re \heartsuit \Im \spadesuit
   \rmoustache \lmoustache \rgroup \lgroup \mp \ominus \uplus \sqcap
   \ast \sqcup \bigcirc \bullet \ddagger \wr \amalg \longleftarrow
   \Leftarrow \Longleftarrow \longrightarrow \Rightarrow \Longrightarrow
   \leftrightarrow \longleftrightarrow \Leftrightarrow
   \Longleftrightarrow \mapsto \longmapsto \nearrow \hookleftarrow
   \hookrightarrow \searrow \leftharpoonup \rightharpoonup \swarrow
   \leftharpoondown \rightharpoondown \nwarrow \rightleftharpoons \nless
   \nleqslant \nleqq \lneq \lneqq \lvertneqq \lnsim \lnapprox \nprec
   \npreceq \precnsim \precnapprox \nsim \nshortmid \nmid \nvdash
   \nvDash \ntriangleleft \ntrianglelefteq \subsetneq \varsubsetneq
   \subsetneqq \varsubsetneqq \ngtr \ngeqslant \ngeqq \gneq \gneqq
   \gvertneqq \gnsim \gnapprox \nsucc \nsucceq \succnsim \succnapprox
   \ncong \nshortparallel \nparallel \nVDash \ntriangleright
   \ntrianglerighteq \nsupseteqq \supsetneq \varsupsetneq \supsetneqq
   \varsupsetneqq \nVdash \precneqq \succneqq \nsubseteqq \unlhd \unrhd
   \nleftarrow \nrightarrow \nLeftarrow \nRightarrow \nleftrightarrow
   \nLeftrightarrow \vartriangle \hslash \triangledown \lozenge
   \circledS \measuredangle \nexists \mho \Finv \Game \Bbbk \backprime
   \blacktriangle \blacktriangledown \blacksquare \blacklozenge \bigstar
   \sphericalangle \complement \eth \diagup \diagdown \square \Box
   \Diamond \beth \daleth \gimel \digamma \varkappa \ulcorner \urcorner
   \llcorner \lrcorner \leqq \leqslant \eqslantless \lesssim \lessapprox
   \approxeq \lessdot \lll \lessgtr \lesseqgtr \lesseqqgtr \doteqdot
   \risingdotseq \fallingdotseq \backsim \backsimeq \subseteqq \Subset
   \sqsubset \preccurlyeq \curlyeqprec \precsim \precapprox
   \vartriangleleft \trianglelefteq \vDash \Vvdash \smallsmile
   \smallfrown \bumpeq \Bumpeq \geqq \geqslant \eqslantgtr \gtrsim
   \gtrapprox \gtrdot \ggg \gtrless \gtreqless \gtreqqless \eqcirc
   \circeq \triangleq \thicksim \thickapprox \supseteqq \Supset
   \sqsupset \succcurlyeq \curlyeqsucc \succsim \succapprox
   \vartriangleright \trianglerighteq \Vdash \shortmid \shortparallel
   \between \pitchfork \varpropto \blacktriangleleft \therefore
   \backepsilon \blacktriangleright \because \llless \gggtr \lhd \rhd
   \eqsim \Join \Doteq \dotplus \smallsetminus \Cap \Cup \doublebarwedge
   \boxminus \boxplus \divideontimes \ltimes \rtimes \leftthreetimes
   \rightthreetimes \curlywedge \curlyvee \circleddash \circledast
   \centerdot \intercal \doublecap \doublecup \boxtimes \dashrightarrow
   \dashleftarrow \leftleftarrows \leftrightarrows \Lleftarrow
   \twoheadleftarrow \leftarrowtail \looparrowleft \leftrightharpoons
   \curvearrowleft \circlearrowleft \Lsh \upuparrows \upharpoonleft
   \downharpoonleft \multimap \leftrightsquigarrow \rightrightarrows
   \rightleftarrows \twoheadrightarrow \rightarrowtail \looparrowright
   \curvearrowright \circlearrowright \Rsh \downdownarrows
   \upharpoonright \downharpoonright \rightsquigarrow \leadsto
   \Rrightarrow \restriction \yen \dag \ddag` look the same as they do
   in LaTeX and don't print any warnings about unknown font metrics.

Auditors: alpert
2015-02-18 16:28:05 -08:00

99 lines
3.0 KiB
Python
Executable File

#!/usr/bin/env python
import fontforge
import sys
import json
# map of characters to extract
metrics_to_extract = {
# Font name
"AMS-Regular": {
u"\u21e2": None, # \dashrightarrow
u"\u21e0": None, # \dashleftarrow
},
"Main-Regular": {
# Skew and italic metrics can't be easily parsed from the TTF. Instead,
# we map each character to a "base character", which is a character
# from the same font with correct italic and skew metrics. A character
# maps to None if it doesn't have a base.
u"\u2260": None, # \neq
u"\u2245": None, # \cong
u"\u0020": None, # space
u"\u00a0": None, # nbsp
u"\u2026": None, # \ldots
u"\u22ef": None, # \cdots
u"\u22f1": None, # \ddots
u"\u22ee": None, # \vdots
u"\u22ee": None, # \vdots
u"\u22a8": None, # \models
u"\u22c8": None, # \bowtie
u"\u2250": None, # \doteq
u"\u23b0": None, # \lmoustache
u"\u23b1": None, # \rmoustache
u"\u27ee": None, # \lgroup
u"\u27ef": None, # \rgroup
u"\u27f5": None, # \longleftarrow
u"\u27f8": None, # \Longleftarrow
u"\u27f6": None, # \longrightarrow
u"\u27f9": None, # \Longrightarrow
u"\u27f7": None, # \longleftrightarrow
u"\u27fa": None, # \Longleftrightarrow
u"\u21a6": None, # \mapsto
u"\u27fc": None, # \longmapsto
u"\u21a9": None, # \hookleftarrow
u"\u21aa": None, # \hookrightarrow
u"\u21cc": None, # \rightleftharpoons
},
"Size1-Regular": {
u"\u222c": u"\u222b", # \iint, based on \int
u"\u222d": u"\u222b", # \iiint, based on \int
},
"Size2-Regular": {
u"\u222c": u"\u222b", # \iint, based on \int
u"\u222d": u"\u222b", # \iiint, based on \int
},
}
def main():
start_json = json.load(sys.stdin)
for font, chars in metrics_to_extract.iteritems():
fontInfo = fontforge.open("../static/fonts/KaTeX_" + font + ".ttf")
for glyph in fontInfo.glyphs():
try:
char = unichr(glyph.unicode)
except ValueError:
continue
if char in chars:
_, depth, _, height = glyph.boundingBox()
depth = -depth
base_char = chars[char]
if base_char:
base_char_str = str(ord(base_char))
base_metrics = start_json[font][base_char_str]
italic = base_metrics["italic"]
skew = base_metrics["skew"]
else:
italic = 0
skew = 0
start_json[font][ord(char)] = {
"height": height / fontInfo.em,
"depth": depth / fontInfo.em,
"italic": italic,
"skew": skew,
}
sys.stdout.write(
json.dumps(start_json, separators=(',', ':'), sort_keys=True))
if __name__ == "__main__":
main()