scribble-math/metrics/metric_parse.py
Emily Eisenberg 9fc357b8a1 Add italic correction
Summary: Add margin to the right of characters that have italic correction.

Test Plan:
Look at `f'` and `C'` before and after. Note that they both now look
good at small font sizes (like 17pt).

Reviewers: alpert

Reviewed By: alpert

Differential Revision: http://phabricator.khanacademy.org/D3534
2013-08-14 10:51:09 -07:00

337 lines
9.6 KiB
Python
Executable File

#!/usr/bin/env python
import re
import json
import os
expr_start = re.compile(r"\s*\(")
expr_value = re.compile(r"\s*([^\s\)]+)")
expr_end = re.compile(r"\s*\)")
empty = re.compile(r"\s*$")
def inner_parse_expr(expr):
l = []
while True:
if re.match(empty, expr):
return expr, l
elif re.match(expr_start, expr):
match = re.match(expr_start, expr)
new_expr = expr[len(match.group(0)):]
expr, group = inner_parse_expr(new_expr)
l.append(group)
elif re.match(expr_value, expr):
match = re.match(expr_value, expr)
expr = expr[len(match.group(0)):]
l.append(match.group(1))
elif re.match(expr_end, expr):
match = re.match(expr_end, expr)
new_expr = expr[len(match.group(0)):]
return new_expr, l
else:
raise ValueError("Invalid input")
def parse_expr(expr):
_, group = inner_parse_expr(expr)
return group
def read_pl(pl_file):
with open(pl_file) as pl:
return parse_expr(pl.read())
class Metric:
def __init__(self, char, height, depth, italic_correction):
self.char = char
self.height = float(height) / 1000
self.depth = float(depth) / 1000
self.italic_correction = float(italic_correction) / 1000
def __repr__(self):
return "Char {0} ({1:.3f}+{2:.3f})".format(
self.char, self.height, self.depth)
# Different styles of text, found on page 13 of The TeX book
ROMAN = "roman"
SLANTED = "slanted"
ITALIC = "italic"
TYPEWRITER = "typewriter"
BOLD = "bold"
# The numbers used in this map are all in octal, because that's how all the
# charts for the numbers are written
file_map = {
"cmmi10.pl": {
13: (u"\u03b1", ITALIC), # \alpha
14: (u"\u03b2", ITALIC), # \beta
15: (u"\u03b3", ITALIC), # \gamma
16: (u"\u03b4", ITALIC), # \delta
17: (u"\u03b5", ITALIC), # \varepsilon
20: (u"\u03b6", ITALIC), # \zeta
21: (u"\u03b7", ITALIC), # \eta
22: (u"\u03b8", ITALIC), # \theta
23: (u"\u03b9", ITALIC), # \iota
24: (u"\u03ba", ITALIC), # \kappa
25: (u"\u03bb", ITALIC), # \lambda
26: (u"\u03bc", ITALIC), # \mu
27: (u"\u03bd", ITALIC), # \nu
30: (u"\u03be", ITALIC), # \xi
31: (u"\u03c0", ITALIC), # \pi
32: (u"\u03c1", ITALIC), # \rho
33: (u"\u03c3", ITALIC), # \sigma
34: (u"\u03c4", ITALIC), # \tau
35: (u"\u03c5", ITALIC), # \upsilon
36: (u"\u03d5", ITALIC), # \phi
37: (u"\u03c7", ITALIC), # \chi
40: (u"\u03c8", ITALIC), # \psi
41: (u"\u03c9", ITALIC), # \omega
42: (u"\u03b5", ITALIC), # \epsilon
43: (u"\u03d1", ITALIC), # \vartheta
44: (u"\u03d6", ITALIC), # \varpi
45: (u"\u03f1", ITALIC), # \varrho
46: (u"\u03c2", ITALIC), # \varsigma
47: (u"\u03c6", ITALIC), # \varphi
72: (u".", ROMAN),
73: (u",", ROMAN),
74: (u"<", ROMAN),
75: (u"/", ROMAN),
76: (u">", ROMAN),
101: (u"A", ITALIC),
102: (u"B", ITALIC),
103: (u"C", ITALIC),
104: (u"D", ITALIC),
105: (u"E", ITALIC),
106: (u"F", ITALIC),
107: (u"G", ITALIC),
110: (u"H", ITALIC),
111: (u"I", ITALIC),
112: (u"J", ITALIC),
113: (u"K", ITALIC),
114: (u"L", ITALIC),
115: (u"M", ITALIC),
116: (u"N", ITALIC),
117: (u"O", ITALIC),
120: (u"P", ITALIC),
121: (u"Q", ITALIC),
122: (u"R", ITALIC),
123: (u"S", ITALIC),
124: (u"T", ITALIC),
125: (u"U", ITALIC),
126: (u"V", ITALIC),
127: (u"W", ITALIC),
130: (u"X", ITALIC),
131: (u"Y", ITALIC),
132: (u"Z", ITALIC),
141: (u"a", ITALIC),
142: (u"b", ITALIC),
143: (u"c", ITALIC),
144: (u"d", ITALIC),
145: (u"e", ITALIC),
146: (u"f", ITALIC),
147: (u"g", ITALIC),
150: (u"h", ITALIC),
151: (u"i", ITALIC),
152: (u"j", ITALIC),
153: (u"k", ITALIC),
154: (u"l", ITALIC),
155: (u"m", ITALIC),
156: (u"n", ITALIC),
157: (u"o", ITALIC),
160: (u"p", ITALIC),
161: (u"q", ITALIC),
162: (u"r", ITALIC),
163: (u"s", ITALIC),
164: (u"t", ITALIC),
165: (u"u", ITALIC),
166: (u"v", ITALIC),
167: (u"w", ITALIC),
170: (u"x", ITALIC),
171: (u"y", ITALIC),
172: (u"z", ITALIC),
},
"cmr10.pl": {
0: (u"\u0393", ROMAN), # \Gamma
1: (u"\u0394", ROMAN), # \Delta
2: (u"\u0398", ROMAN), # \Theta
3: (u"\u039b", ROMAN), # \Lambda
4: (u"\u039e", ROMAN), # \Xi
5: (u"\u03a0", ROMAN), # \Pi
6: (u"\u03a3", ROMAN), # \Sigma
7: (u"\u03a5", ROMAN), # \Upsilon
10: (u"\u03a6", ROMAN), # \Phi
11: (u"\u03a8", ROMAN), # \Psi
12: (u"\u03a9", ROMAN), # \Omega
41: (u"!", ROMAN),
42: (u"\"", ROMAN),
44: (u"$", ROMAN),
50: (u"(", ROMAN),
51: (u")", ROMAN),
53: (u"+", ROMAN),
60: (u"0", ROMAN),
61: (u"1", ROMAN),
62: (u"2", ROMAN),
63: (u"3", ROMAN),
64: (u"4", ROMAN),
65: (u"5", ROMAN),
66: (u"6", ROMAN),
67: (u"7", ROMAN),
70: (u"8", ROMAN),
71: (u"9", ROMAN),
72: (u":", ROMAN),
73: (u";", ROMAN),
75: (u"=", ROMAN),
77: (u"?", ROMAN),
100: (u"@", ROMAN),
101: (u"A", ROMAN),
102: (u"B", ROMAN),
103: (u"C", ROMAN),
104: (u"D", ROMAN),
105: (u"E", ROMAN),
106: (u"F", ROMAN),
107: (u"G", ROMAN),
110: (u"H", ROMAN),
111: (u"I", ROMAN),
112: (u"J", ROMAN),
113: (u"K", ROMAN),
114: (u"L", ROMAN),
115: (u"M", ROMAN),
116: (u"N", ROMAN),
117: (u"O", ROMAN),
120: (u"P", ROMAN),
121: (u"Q", ROMAN),
122: (u"R", ROMAN),
123: (u"S", ROMAN),
124: (u"T", ROMAN),
125: (u"U", ROMAN),
126: (u"V", ROMAN),
127: (u"W", ROMAN),
130: (u"X", ROMAN),
131: (u"Y", ROMAN),
132: (u"Z", ROMAN),
133: (u"[", ROMAN),
135: (u"]", ROMAN),
140: (u"`", ROMAN),
141: (u"a", ROMAN),
142: (u"b", ROMAN),
143: (u"c", ROMAN),
144: (u"d", ROMAN),
145: (u"e", ROMAN),
146: (u"f", ROMAN),
147: (u"g", ROMAN),
150: (u"h", ROMAN),
151: (u"i", ROMAN),
152: (u"j", ROMAN),
153: (u"k", ROMAN),
154: (u"l", ROMAN),
155: (u"m", ROMAN),
156: (u"n", ROMAN),
157: (u"o", ROMAN),
160: (u"p", ROMAN),
161: (u"q", ROMAN),
162: (u"r", ROMAN),
163: (u"s", ROMAN),
164: (u"t", ROMAN),
165: (u"u", ROMAN),
166: (u"v", ROMAN),
167: (u"w", ROMAN),
170: (u"x", ROMAN),
171: (u"y", ROMAN),
172: (u"z", ROMAN),
},
"cmsy10.pl": {
0: (u"\u2212", ROMAN), # -
1: (u"\u22c5", ROMAN), # \cdot
2: (u"\u00d7", ROMAN), # \times
3: (u"\u2217", ROMAN), # *
4: (u"\u00f7", ROMAN), # \div
6: (u"\u00b1", ROMAN), # \pm
16: (u"\u2218", ROMAN), # \circ
24: (u"\u2264", ROMAN), # \leq
25: (u"\u2265", ROMAN), # \geq
40: (u"\u2190", ROMAN), # \leftarrow
41: (u"\u2192", ROMAN), # \rightarrow
60: (u"\u2032", ROMAN), # \prime
61: (u"\u221e", ROMAN), # \infty
152: (u"|", ROMAN), # |
}
}
def read_metrics(pl_file, metrics):
pl = read_pl(pl_file)
metrics_to_read = file_map[pl_file]
for elem in pl:
if elem[0] == "CHARACTER":
if elem[1] == "C":
char = int(oct(ord(elem[2])))
elif elem[1] == "O":
char = int(elem[2])
else:
continue
if not char in metrics_to_read:
continue
map_char, map_style = metrics_to_read[char]
char_height = 0
char_depth = 0
italic_correction = 0
for metric in elem[3:]:
if metric[0] == "comment":
continue
elif metric[0] == "CHARHT":
char_height = int(metric[2])
elif metric[0] == "CHARDP":
char_depth = int(metric[2])
elif metric[0] == "CHARIC":
italic_correction = int(metric[2])
metrics[map_style].append(
Metric(map_char, char_height, char_depth,
italic_correction))
def print_metrics(metrics):
metric_map = {
style: {
"height": {
metric.char: metric.height for metric in metric_list
},
"depth": {
metric.char: metric.depth for metric in metric_list
},
"italicCorrection": {
metric.char: metric.italic_correction for metric in metric_list
},
} for style, metric_list in metrics.iteritems()
}
print "var metricMap = {0};".format(json.dumps(metric_map, indent=4))
if __name__ == "__main__":
os.chdir(os.path.dirname(os.path.realpath(__file__)))
metrics = {
"roman": [],
"italic": [],
"slanted": [],
"typewriter": [],
"bold": [],
}
for metric_file in file_map:
read_metrics(metric_file, metrics)
metrics["roman"].append(Metric(u'\u00a0', 0, 0, 0))
print_metrics(metrics)