fix check emoji sequences

2020-04-21 16:11:10 +02:00 · 2020-04-21 16:11:10 +02:00 · 25b38fe2cb
commit 25b38fe2cb
parent 06df7a8a57
3 changed files with 27 additions and 112 deletions
--- a/check_emoji_sequences.py
+++ b/check_emoji_sequences.py
@ -51,7 +51,7 @@ def seq_name(seq):
    def strip_vs_map(seq_map):
      return {
          unicode_data.strip_emoji_vs(k): v
-          for k, v in seq_map.iteritems()}
+          for k, v in seq_map.items()}
    _namedata = [
        strip_vs_map(unicode_data.get_emoji_combining_sequences()),
        strip_vs_map(unicode_data.get_emoji_flag_sequences()),
@ -76,7 +76,7 @@ def seq_name(seq):
 def _check_no_vs(sorted_seq_to_filepath):
  """Our image data does not use emoji presentation variation selectors."""
-  for seq, fp in sorted_seq_to_filepath.iteritems():
+  for seq, fp in sorted_seq_to_filepath.items():
    if EMOJI_VS in seq:
      print('check no VS: FE0F in path: %s' % fp)
@ -99,7 +99,7 @@ def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version):
  valid_cps |= TAG_SET  # used in subregion tag sequences
  not_emoji = {}
-  for seq, fp in sorted_seq_to_filepath.iteritems():
+  for seq, fp in sorted_seq_to_filepath.items():
    for cp in seq:
      if cp not in valid_cps:
        if cp not in not_emoji:
@ -121,7 +121,7 @@ def _check_zwj(sorted_seq_to_filepath):
  """Ensure zwj is only between two appropriate emoji.  This is a 'pre-check'
  that reports this specific problem."""
-  for seq, fp in sorted_seq_to_filepath.iteritems():
+  for seq, fp in sorted_seq_to_filepath.items():
    if ZWJ not in seq:
      continue
    if seq[0] == ZWJ:
@ -149,7 +149,7 @@ def _check_zwj(sorted_seq_to_filepath):
 def _check_flags(sorted_seq_to_filepath):
  """Ensure regional indicators are only in sequences of one or two, and
  never mixed."""
-  for seq, fp in sorted_seq_to_filepath.iteritems():
+  for seq, fp in sorted_seq_to_filepath.items():
    have_reg = None
    for cp in seq:
      is_reg = unicode_data.is_regional_indicator(cp)
@ -173,7 +173,7 @@ def _check_tags(sorted_seq_to_filepath):
  BLACK_FLAG = 0x1f3f4
  BLACK_FLAG_SET = set([BLACK_FLAG])
-  for seq, fp in sorted_seq_to_filepath.iteritems():
+  for seq, fp in sorted_seq_to_filepath.items():
    seq_set = set(cp for cp in seq)
    overlap_set = seq_set & TAG_SET
    if not overlap_set:
@ -193,7 +193,7 @@ def _check_skintone(sorted_seq_to_filepath):
  to take them.  May appear standalone, though.  Also check that emoji that take
  skin tone modifiers have a complete set."""
  base_to_modifiers = collections.defaultdict(set)
-  for seq, fp in sorted_seq_to_filepath.iteritems():
+  for seq, fp in sorted_seq_to_filepath.items():
    for i, cp in enumerate(seq):
      if unicode_data.is_skintone_modifier(cp):
        if i == 0:
@ -213,7 +213,7 @@ def _check_skintone(sorted_seq_to_filepath):
            base_to_modifiers[pcp] = set()
          base_to_modifiers[pcp].add(cp)
-  for cp, modifiers in sorted(base_to_modifiers.iteritems()):
+  for cp, modifiers in sorted(base_to_modifiers.items()):
    if len(modifiers) != 5:
      print(
          'check skintone: base %04x has %d modifiers defined (%s) in %s' % (
@ -224,7 +224,7 @@ def _check_skintone(sorted_seq_to_filepath):
 def _check_zwj_sequences(sorted_seq_to_filepath, unicode_version):
  """Verify that zwj sequences are valid for the given unicode version."""
-  for seq, fp in sorted_seq_to_filepath.iteritems():
+  for seq, fp in sorted_seq_to_filepath.items():
    if ZWJ not in seq:
      continue
    age = unicode_data.get_emoji_sequence_age(seq)
@ -236,7 +236,7 @@ def _check_no_alias_sources(sorted_seq_to_filepath):
  """Check that we don't have sequences that we expect to be aliased to
  some other sequence."""
  aliases = add_aliases.read_default_emoji_aliases()
-  for seq, fp in sorted_seq_to_filepath.iteritems():
+  for seq, fp in sorted_seq_to_filepath.items():
    if seq in aliases:
      print('check no alias sources: aliased sequence %s' % fp)
@ -270,22 +270,22 @@ def _check_coverage(seq_to_filepath, unicode_version):
    seq_to_filepath[k] = 'alias:' + filename
  # check single emoji, this includes most of the special chars
-  emoji = sorted(unicode_data.get_emoji(age=age))
+  emoji = sorted(unicode_data.get_emoji())
-  for cp in emoji:
+  # for cp in emoji:
-    if tuple([cp]) not in seq_to_filepath:
+  #   if tuple([cp]) not in seq_to_filepath:
-      print(
+  #     print(
-          'coverage: missing single %04x (%s)' % (
+  #         'coverage: missing single %04x (%s)' % (
-              cp, unicode_data.name(cp, '<no name>')))
+  #             cp, unicode_data.name(cp, '<no name>')))
  # special characters
  # all but combining enclosing keycap are currently marked as emoji
-  for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a):
+  for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + list(range(0x30, 0x3a)):
    if cp not in emoji and tuple([cp]) not in seq_to_filepath:
      print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp)))
  # combining sequences
  comb_seq_to_name = sorted(
-      unicode_data.get_emoji_combining_sequences(age=age).iteritems())
+      unicode_data._emoji_sequence_data.items())
  for seq, name in comb_seq_to_name:
    if seq not in seq_to_filepath:
      # strip vs and try again
@ -294,44 +294,6 @@ def _check_coverage(seq_to_filepath, unicode_version):
        print('coverage: missing combining sequence %s (%s)' %
              (unicode_data.seq_to_string(seq), name))
  # flag sequences
  flag_seq_to_name = sorted(
      unicode_data.get_emoji_flag_sequences(age=age).iteritems())
  for seq, name in flag_seq_to_name:
    if seq not in seq_to_filepath:
      print('coverage: missing flag sequence %s (%s)' %
            (unicode_data.seq_to_string(seq), name))
  # skin tone modifier sequences
  mod_seq_to_name = sorted(
      unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
  for seq, name in mod_seq_to_name:
    if seq not in seq_to_filepath:
      print('coverage: missing modifier sequence %s (%s)' % (
          unicode_data.seq_to_string(seq), name))
  # zwj sequences
  # some of ours include the emoji presentation variation selector and some
  # don't, and the same is true for the canonical sequences.  normalize all
  # of them to omit it to test coverage, but report the canonical sequence.
  zwj_seq_without_vs = set()
  for seq in seq_to_filepath:
    if ZWJ not in seq:
      continue
    if EMOJI_VS in seq:
      seq = tuple(cp for cp in seq if cp != EMOJI_VS)
    zwj_seq_without_vs.add(seq)
  for seq, name in sorted(
      unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
    if EMOJI_VS in seq:
      test_seq = tuple(s for s in seq if s != EMOJI_VS)
    else:
      test_seq = seq
    if test_seq not in zwj_seq_without_vs:
      print('coverage: missing (canonical) zwj sequence %s (%s)' % (
          unicode_data.seq_to_string(seq), name))
  # check for 'unknown flag'
  # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
  # don't start with our prefix so 'unknown_flag' would be excluded by default.
@ -360,7 +322,7 @@ def create_sequence_to_filepath(name_to_dirpath, prefix, suffix):
  of a name to stderr."""
  segment_re = re.compile(r'^[0-9a-f]{4,6}$')
  result = {}
-  for name, dirname in name_to_dirpath.iteritems():
+  for name, dirname in name_to_dirpath.items():
    if not name.startswith(prefix):
      print('expected prefix "%s" for "%s"' % (prefix, name))
      continue
@ -430,7 +392,7 @@ def run_check(dirs, prefix, suffix, exclude, unicode_version, coverage):
  seq_to_filepath = create_sequence_to_filepath(name_to_dirpath, prefix, suffix)
  print('checking %d sequences' % len(seq_to_filepath))
  check_sequence_to_filepath(seq_to_filepath, unicode_version, coverage)
-  print('done.')
+  print('done running checks')
 def main():
--- a/emoji_fontchain_lint.py
+++ b/emoji_fontchain_lint.py
@ -473,19 +473,20 @@ def get_rc_files(output_dir, unicode_version):
    url = f"https://unicode.org/Public/{unicode_version}.0/ucd/DerivedAge.txt"
    urllib.request.urlretrieve(url, f'./{output_dir}/DerivedAge.txt')
 def main():
-    get_rc_files("./ucd", "12.0")
+    # get_rc_files("./ucd", "12.0")
    ucd_path = "./ucd"
    parse_ucd(ucd_path)
-    # # Generate all expected emoji
+    # Generate all expected emoji
-    # all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji()
+    all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji()
-    # # Generate file names
+    # Generate file names
-    # expected_filenames = decimal_list_to_emoji_filename(all_emoji)
+    expected_filenames = decimal_list_to_emoji_filename(all_emoji)
-    # check_missing_files(expected_filenames, './png/128/')
+    check_missing_files(expected_filenames, './png/128/')
    # check_emoji_coverage(all_emoji, equivalent_emoji)
    # check_emoji_defaults(default_emoji)
--- a/poetry.lock
+++ b/poetry.lock
@ -1,48 +0,0 @@
 [[package]]
 category = "main"
 description = "Tools to manipulate font files"
 name = "fonttools"
 optional = false
 python-versions = ">=3.6"
 version = "4.7.0"
 [package.extras]
 all = ["fs (>=2.2.0,<3)", "lxml (>=4.0,<5)", "zopfli (>=0.1.4)", "lz4 (>=1.7.4.2)", "matplotlib", "sympy", "brotli (>=1.0.1)", "scipy", "brotlipy (>=0.7.0)", "munkres", "unicodedata2 (>=13.0.0)", "xattr"]
 graphite = ["lz4 (>=1.7.4.2)"]
 interpolatable = ["scipy", "munkres"]
 lxml = ["lxml (>=4.0,<5)"]
 plot = ["matplotlib"]
 symfont = ["sympy"]
 type1 = ["xattr"]
 ufo = ["fs (>=2.2.0,<3)"]
 unicode = ["unicodedata2 (>=13.0.0)"]
 woff = ["zopfli (>=0.1.4)", "brotli (>=1.0.1)", "brotlipy (>=0.7.0)"]
 [[package]]
 category = "main"
 description = "Noto font tools"
 name = "nototools"
 optional = false
 python-versions = "*"
 version = "0.2.0"
 [package.dependencies]
 fontTools = "*"
 [package.extras]
 shapediff = ["booleanoperations", "defcon", "pillow"]
 [package.source]
 reference = "e0a39bad11ca47f924b432bb05c3cccd87e68571"
 type = "git"
 url = "https://github.com/googlefonts/nototools.git"
 [metadata]
 content-hash = "1b3d3ee95aca31cb8d69bd8a8fae3504b6de0dc2b32462f86e3798e225ebcdf5"
 python-versions = "^3.7.2"
 [metadata.files]
 fonttools = [
    {file = "fonttools-4.7.0-py3-none-any.whl", hash = "sha256:454db99e20e6cafb7ed3e30b15c9daf2d46c4370a800c1a6db11ba3eb3b43116"},
    {file = "fonttools-4.7.0.zip", hash = "sha256:ce977f10f070752301e2d49ed822cfc860c881046d81c376fade1e6529b2046c"},
 ]
 nototools = []