fix check emoji sequences
This commit is contained in:
parent
06df7a8a57
commit
25b38fe2cb
|
@ -51,7 +51,7 @@ def seq_name(seq):
|
||||||
def strip_vs_map(seq_map):
|
def strip_vs_map(seq_map):
|
||||||
return {
|
return {
|
||||||
unicode_data.strip_emoji_vs(k): v
|
unicode_data.strip_emoji_vs(k): v
|
||||||
for k, v in seq_map.iteritems()}
|
for k, v in seq_map.items()}
|
||||||
_namedata = [
|
_namedata = [
|
||||||
strip_vs_map(unicode_data.get_emoji_combining_sequences()),
|
strip_vs_map(unicode_data.get_emoji_combining_sequences()),
|
||||||
strip_vs_map(unicode_data.get_emoji_flag_sequences()),
|
strip_vs_map(unicode_data.get_emoji_flag_sequences()),
|
||||||
|
@ -76,7 +76,7 @@ def seq_name(seq):
|
||||||
|
|
||||||
def _check_no_vs(sorted_seq_to_filepath):
|
def _check_no_vs(sorted_seq_to_filepath):
|
||||||
"""Our image data does not use emoji presentation variation selectors."""
|
"""Our image data does not use emoji presentation variation selectors."""
|
||||||
for seq, fp in sorted_seq_to_filepath.iteritems():
|
for seq, fp in sorted_seq_to_filepath.items():
|
||||||
if EMOJI_VS in seq:
|
if EMOJI_VS in seq:
|
||||||
print('check no VS: FE0F in path: %s' % fp)
|
print('check no VS: FE0F in path: %s' % fp)
|
||||||
|
|
||||||
|
@ -99,7 +99,7 @@ def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version):
|
||||||
valid_cps |= TAG_SET # used in subregion tag sequences
|
valid_cps |= TAG_SET # used in subregion tag sequences
|
||||||
|
|
||||||
not_emoji = {}
|
not_emoji = {}
|
||||||
for seq, fp in sorted_seq_to_filepath.iteritems():
|
for seq, fp in sorted_seq_to_filepath.items():
|
||||||
for cp in seq:
|
for cp in seq:
|
||||||
if cp not in valid_cps:
|
if cp not in valid_cps:
|
||||||
if cp not in not_emoji:
|
if cp not in not_emoji:
|
||||||
|
@ -121,7 +121,7 @@ def _check_zwj(sorted_seq_to_filepath):
|
||||||
"""Ensure zwj is only between two appropriate emoji. This is a 'pre-check'
|
"""Ensure zwj is only between two appropriate emoji. This is a 'pre-check'
|
||||||
that reports this specific problem."""
|
that reports this specific problem."""
|
||||||
|
|
||||||
for seq, fp in sorted_seq_to_filepath.iteritems():
|
for seq, fp in sorted_seq_to_filepath.items():
|
||||||
if ZWJ not in seq:
|
if ZWJ not in seq:
|
||||||
continue
|
continue
|
||||||
if seq[0] == ZWJ:
|
if seq[0] == ZWJ:
|
||||||
|
@ -149,7 +149,7 @@ def _check_zwj(sorted_seq_to_filepath):
|
||||||
def _check_flags(sorted_seq_to_filepath):
|
def _check_flags(sorted_seq_to_filepath):
|
||||||
"""Ensure regional indicators are only in sequences of one or two, and
|
"""Ensure regional indicators are only in sequences of one or two, and
|
||||||
never mixed."""
|
never mixed."""
|
||||||
for seq, fp in sorted_seq_to_filepath.iteritems():
|
for seq, fp in sorted_seq_to_filepath.items():
|
||||||
have_reg = None
|
have_reg = None
|
||||||
for cp in seq:
|
for cp in seq:
|
||||||
is_reg = unicode_data.is_regional_indicator(cp)
|
is_reg = unicode_data.is_regional_indicator(cp)
|
||||||
|
@ -173,7 +173,7 @@ def _check_tags(sorted_seq_to_filepath):
|
||||||
|
|
||||||
BLACK_FLAG = 0x1f3f4
|
BLACK_FLAG = 0x1f3f4
|
||||||
BLACK_FLAG_SET = set([BLACK_FLAG])
|
BLACK_FLAG_SET = set([BLACK_FLAG])
|
||||||
for seq, fp in sorted_seq_to_filepath.iteritems():
|
for seq, fp in sorted_seq_to_filepath.items():
|
||||||
seq_set = set(cp for cp in seq)
|
seq_set = set(cp for cp in seq)
|
||||||
overlap_set = seq_set & TAG_SET
|
overlap_set = seq_set & TAG_SET
|
||||||
if not overlap_set:
|
if not overlap_set:
|
||||||
|
@ -193,7 +193,7 @@ def _check_skintone(sorted_seq_to_filepath):
|
||||||
to take them. May appear standalone, though. Also check that emoji that take
|
to take them. May appear standalone, though. Also check that emoji that take
|
||||||
skin tone modifiers have a complete set."""
|
skin tone modifiers have a complete set."""
|
||||||
base_to_modifiers = collections.defaultdict(set)
|
base_to_modifiers = collections.defaultdict(set)
|
||||||
for seq, fp in sorted_seq_to_filepath.iteritems():
|
for seq, fp in sorted_seq_to_filepath.items():
|
||||||
for i, cp in enumerate(seq):
|
for i, cp in enumerate(seq):
|
||||||
if unicode_data.is_skintone_modifier(cp):
|
if unicode_data.is_skintone_modifier(cp):
|
||||||
if i == 0:
|
if i == 0:
|
||||||
|
@ -213,7 +213,7 @@ def _check_skintone(sorted_seq_to_filepath):
|
||||||
base_to_modifiers[pcp] = set()
|
base_to_modifiers[pcp] = set()
|
||||||
base_to_modifiers[pcp].add(cp)
|
base_to_modifiers[pcp].add(cp)
|
||||||
|
|
||||||
for cp, modifiers in sorted(base_to_modifiers.iteritems()):
|
for cp, modifiers in sorted(base_to_modifiers.items()):
|
||||||
if len(modifiers) != 5:
|
if len(modifiers) != 5:
|
||||||
print(
|
print(
|
||||||
'check skintone: base %04x has %d modifiers defined (%s) in %s' % (
|
'check skintone: base %04x has %d modifiers defined (%s) in %s' % (
|
||||||
|
@ -224,7 +224,7 @@ def _check_skintone(sorted_seq_to_filepath):
|
||||||
|
|
||||||
def _check_zwj_sequences(sorted_seq_to_filepath, unicode_version):
|
def _check_zwj_sequences(sorted_seq_to_filepath, unicode_version):
|
||||||
"""Verify that zwj sequences are valid for the given unicode version."""
|
"""Verify that zwj sequences are valid for the given unicode version."""
|
||||||
for seq, fp in sorted_seq_to_filepath.iteritems():
|
for seq, fp in sorted_seq_to_filepath.items():
|
||||||
if ZWJ not in seq:
|
if ZWJ not in seq:
|
||||||
continue
|
continue
|
||||||
age = unicode_data.get_emoji_sequence_age(seq)
|
age = unicode_data.get_emoji_sequence_age(seq)
|
||||||
|
@ -236,7 +236,7 @@ def _check_no_alias_sources(sorted_seq_to_filepath):
|
||||||
"""Check that we don't have sequences that we expect to be aliased to
|
"""Check that we don't have sequences that we expect to be aliased to
|
||||||
some other sequence."""
|
some other sequence."""
|
||||||
aliases = add_aliases.read_default_emoji_aliases()
|
aliases = add_aliases.read_default_emoji_aliases()
|
||||||
for seq, fp in sorted_seq_to_filepath.iteritems():
|
for seq, fp in sorted_seq_to_filepath.items():
|
||||||
if seq in aliases:
|
if seq in aliases:
|
||||||
print('check no alias sources: aliased sequence %s' % fp)
|
print('check no alias sources: aliased sequence %s' % fp)
|
||||||
|
|
||||||
|
@ -270,22 +270,22 @@ def _check_coverage(seq_to_filepath, unicode_version):
|
||||||
seq_to_filepath[k] = 'alias:' + filename
|
seq_to_filepath[k] = 'alias:' + filename
|
||||||
|
|
||||||
# check single emoji, this includes most of the special chars
|
# check single emoji, this includes most of the special chars
|
||||||
emoji = sorted(unicode_data.get_emoji(age=age))
|
emoji = sorted(unicode_data.get_emoji())
|
||||||
for cp in emoji:
|
# for cp in emoji:
|
||||||
if tuple([cp]) not in seq_to_filepath:
|
# if tuple([cp]) not in seq_to_filepath:
|
||||||
print(
|
# print(
|
||||||
'coverage: missing single %04x (%s)' % (
|
# 'coverage: missing single %04x (%s)' % (
|
||||||
cp, unicode_data.name(cp, '<no name>')))
|
# cp, unicode_data.name(cp, '<no name>')))
|
||||||
|
|
||||||
# special characters
|
# special characters
|
||||||
# all but combining enclosing keycap are currently marked as emoji
|
# all but combining enclosing keycap are currently marked as emoji
|
||||||
for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a):
|
for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + list(range(0x30, 0x3a)):
|
||||||
if cp not in emoji and tuple([cp]) not in seq_to_filepath:
|
if cp not in emoji and tuple([cp]) not in seq_to_filepath:
|
||||||
print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp)))
|
print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp)))
|
||||||
|
|
||||||
# combining sequences
|
# combining sequences
|
||||||
comb_seq_to_name = sorted(
|
comb_seq_to_name = sorted(
|
||||||
unicode_data.get_emoji_combining_sequences(age=age).iteritems())
|
unicode_data._emoji_sequence_data.items())
|
||||||
for seq, name in comb_seq_to_name:
|
for seq, name in comb_seq_to_name:
|
||||||
if seq not in seq_to_filepath:
|
if seq not in seq_to_filepath:
|
||||||
# strip vs and try again
|
# strip vs and try again
|
||||||
|
@ -294,44 +294,6 @@ def _check_coverage(seq_to_filepath, unicode_version):
|
||||||
print('coverage: missing combining sequence %s (%s)' %
|
print('coverage: missing combining sequence %s (%s)' %
|
||||||
(unicode_data.seq_to_string(seq), name))
|
(unicode_data.seq_to_string(seq), name))
|
||||||
|
|
||||||
# flag sequences
|
|
||||||
flag_seq_to_name = sorted(
|
|
||||||
unicode_data.get_emoji_flag_sequences(age=age).iteritems())
|
|
||||||
for seq, name in flag_seq_to_name:
|
|
||||||
if seq not in seq_to_filepath:
|
|
||||||
print('coverage: missing flag sequence %s (%s)' %
|
|
||||||
(unicode_data.seq_to_string(seq), name))
|
|
||||||
|
|
||||||
# skin tone modifier sequences
|
|
||||||
mod_seq_to_name = sorted(
|
|
||||||
unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
|
|
||||||
for seq, name in mod_seq_to_name:
|
|
||||||
if seq not in seq_to_filepath:
|
|
||||||
print('coverage: missing modifier sequence %s (%s)' % (
|
|
||||||
unicode_data.seq_to_string(seq), name))
|
|
||||||
|
|
||||||
# zwj sequences
|
|
||||||
# some of ours include the emoji presentation variation selector and some
|
|
||||||
# don't, and the same is true for the canonical sequences. normalize all
|
|
||||||
# of them to omit it to test coverage, but report the canonical sequence.
|
|
||||||
zwj_seq_without_vs = set()
|
|
||||||
for seq in seq_to_filepath:
|
|
||||||
if ZWJ not in seq:
|
|
||||||
continue
|
|
||||||
if EMOJI_VS in seq:
|
|
||||||
seq = tuple(cp for cp in seq if cp != EMOJI_VS)
|
|
||||||
zwj_seq_without_vs.add(seq)
|
|
||||||
|
|
||||||
for seq, name in sorted(
|
|
||||||
unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
|
|
||||||
if EMOJI_VS in seq:
|
|
||||||
test_seq = tuple(s for s in seq if s != EMOJI_VS)
|
|
||||||
else:
|
|
||||||
test_seq = seq
|
|
||||||
if test_seq not in zwj_seq_without_vs:
|
|
||||||
print('coverage: missing (canonical) zwj sequence %s (%s)' % (
|
|
||||||
unicode_data.seq_to_string(seq), name))
|
|
||||||
|
|
||||||
# check for 'unknown flag'
|
# check for 'unknown flag'
|
||||||
# this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
|
# this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
|
||||||
# don't start with our prefix so 'unknown_flag' would be excluded by default.
|
# don't start with our prefix so 'unknown_flag' would be excluded by default.
|
||||||
|
@ -360,7 +322,7 @@ def create_sequence_to_filepath(name_to_dirpath, prefix, suffix):
|
||||||
of a name to stderr."""
|
of a name to stderr."""
|
||||||
segment_re = re.compile(r'^[0-9a-f]{4,6}$')
|
segment_re = re.compile(r'^[0-9a-f]{4,6}$')
|
||||||
result = {}
|
result = {}
|
||||||
for name, dirname in name_to_dirpath.iteritems():
|
for name, dirname in name_to_dirpath.items():
|
||||||
if not name.startswith(prefix):
|
if not name.startswith(prefix):
|
||||||
print('expected prefix "%s" for "%s"' % (prefix, name))
|
print('expected prefix "%s" for "%s"' % (prefix, name))
|
||||||
continue
|
continue
|
||||||
|
@ -430,7 +392,7 @@ def run_check(dirs, prefix, suffix, exclude, unicode_version, coverage):
|
||||||
seq_to_filepath = create_sequence_to_filepath(name_to_dirpath, prefix, suffix)
|
seq_to_filepath = create_sequence_to_filepath(name_to_dirpath, prefix, suffix)
|
||||||
print('checking %d sequences' % len(seq_to_filepath))
|
print('checking %d sequences' % len(seq_to_filepath))
|
||||||
check_sequence_to_filepath(seq_to_filepath, unicode_version, coverage)
|
check_sequence_to_filepath(seq_to_filepath, unicode_version, coverage)
|
||||||
print('done.')
|
print('done running checks')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
|
@ -473,19 +473,20 @@ def get_rc_files(output_dir, unicode_version):
|
||||||
url = f"https://unicode.org/Public/{unicode_version}.0/ucd/DerivedAge.txt"
|
url = f"https://unicode.org/Public/{unicode_version}.0/ucd/DerivedAge.txt"
|
||||||
urllib.request.urlretrieve(url, f'./{output_dir}/DerivedAge.txt')
|
urllib.request.urlretrieve(url, f'./{output_dir}/DerivedAge.txt')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
get_rc_files("./ucd", "12.0")
|
# get_rc_files("./ucd", "12.0")
|
||||||
|
|
||||||
ucd_path = "./ucd"
|
ucd_path = "./ucd"
|
||||||
parse_ucd(ucd_path)
|
parse_ucd(ucd_path)
|
||||||
|
|
||||||
# # Generate all expected emoji
|
# Generate all expected emoji
|
||||||
# all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji()
|
all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji()
|
||||||
|
|
||||||
# # Generate file names
|
# Generate file names
|
||||||
# expected_filenames = decimal_list_to_emoji_filename(all_emoji)
|
expected_filenames = decimal_list_to_emoji_filename(all_emoji)
|
||||||
|
|
||||||
# check_missing_files(expected_filenames, './png/128/')
|
check_missing_files(expected_filenames, './png/128/')
|
||||||
# check_emoji_coverage(all_emoji, equivalent_emoji)
|
# check_emoji_coverage(all_emoji, equivalent_emoji)
|
||||||
# check_emoji_defaults(default_emoji)
|
# check_emoji_defaults(default_emoji)
|
||||||
|
|
||||||
|
|
48
poetry.lock
generated
48
poetry.lock
generated
|
@ -1,48 +0,0 @@
|
||||||
[[package]]
|
|
||||||
category = "main"
|
|
||||||
description = "Tools to manipulate font files"
|
|
||||||
name = "fonttools"
|
|
||||||
optional = false
|
|
||||||
python-versions = ">=3.6"
|
|
||||||
version = "4.7.0"
|
|
||||||
|
|
||||||
[package.extras]
|
|
||||||
all = ["fs (>=2.2.0,<3)", "lxml (>=4.0,<5)", "zopfli (>=0.1.4)", "lz4 (>=1.7.4.2)", "matplotlib", "sympy", "brotli (>=1.0.1)", "scipy", "brotlipy (>=0.7.0)", "munkres", "unicodedata2 (>=13.0.0)", "xattr"]
|
|
||||||
graphite = ["lz4 (>=1.7.4.2)"]
|
|
||||||
interpolatable = ["scipy", "munkres"]
|
|
||||||
lxml = ["lxml (>=4.0,<5)"]
|
|
||||||
plot = ["matplotlib"]
|
|
||||||
symfont = ["sympy"]
|
|
||||||
type1 = ["xattr"]
|
|
||||||
ufo = ["fs (>=2.2.0,<3)"]
|
|
||||||
unicode = ["unicodedata2 (>=13.0.0)"]
|
|
||||||
woff = ["zopfli (>=0.1.4)", "brotli (>=1.0.1)", "brotlipy (>=0.7.0)"]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
category = "main"
|
|
||||||
description = "Noto font tools"
|
|
||||||
name = "nototools"
|
|
||||||
optional = false
|
|
||||||
python-versions = "*"
|
|
||||||
version = "0.2.0"
|
|
||||||
|
|
||||||
[package.dependencies]
|
|
||||||
fontTools = "*"
|
|
||||||
|
|
||||||
[package.extras]
|
|
||||||
shapediff = ["booleanoperations", "defcon", "pillow"]
|
|
||||||
|
|
||||||
[package.source]
|
|
||||||
reference = "e0a39bad11ca47f924b432bb05c3cccd87e68571"
|
|
||||||
type = "git"
|
|
||||||
url = "https://github.com/googlefonts/nototools.git"
|
|
||||||
[metadata]
|
|
||||||
content-hash = "1b3d3ee95aca31cb8d69bd8a8fae3504b6de0dc2b32462f86e3798e225ebcdf5"
|
|
||||||
python-versions = "^3.7.2"
|
|
||||||
|
|
||||||
[metadata.files]
|
|
||||||
fonttools = [
|
|
||||||
{file = "fonttools-4.7.0-py3-none-any.whl", hash = "sha256:454db99e20e6cafb7ed3e30b15c9daf2d46c4370a800c1a6db11ba3eb3b43116"},
|
|
||||||
{file = "fonttools-4.7.0.zip", hash = "sha256:ce977f10f070752301e2d49ed822cfc860c881046d81c376fade1e6529b2046c"},
|
|
||||||
]
|
|
||||||
nototools = []
|
|
Loading…
Reference in New Issue
Block a user