fix check emoji sequences

2020-04-21 16:11:10 +02:00 · 2020-04-21 16:11:10 +02:00 · 25b38fe2cb
commit 25b38fe2cb
parent 06df7a8a57
3 changed files with 27 additions and 112 deletions
--- a/check_emoji_sequences.py
+++ b/check_emoji_sequences.py
@ -51,7 +51,7 @@ def seq_name(seq):
    def strip_vs_map(seq_map):
      return {
          unicode_data.strip_emoji_vs(k): v
-          for k, v in seq_map.iteritems()}
+          for k, v in seq_map.items()}
    _namedata = [
        strip_vs_map(unicode_data.get_emoji_combining_sequences()),
        strip_vs_map(unicode_data.get_emoji_flag_sequences()),
@ -76,7 +76,7 @@ def seq_name(seq):

 def _check_no_vs(sorted_seq_to_filepath):
  """Our image data does not use emoji presentation variation selectors."""
-  for seq, fp in sorted_seq_to_filepath.iteritems():
+  for seq, fp in sorted_seq_to_filepath.items():
    if EMOJI_VS in seq:
      print('check no VS: FE0F in path: %s' % fp)

@ -99,7 +99,7 @@ def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version):
  valid_cps |= TAG_SET  # used in subregion tag sequences

  not_emoji = {}
-  for seq, fp in sorted_seq_to_filepath.iteritems():
+  for seq, fp in sorted_seq_to_filepath.items():
    for cp in seq:
      if cp not in valid_cps:
        if cp not in not_emoji:
@ -121,7 +121,7 @@ def _check_zwj(sorted_seq_to_filepath):
  """Ensure zwj is only between two appropriate emoji.  This is a 'pre-check'
  that reports this specific problem."""

-  for seq, fp in sorted_seq_to_filepath.iteritems():
+  for seq, fp in sorted_seq_to_filepath.items():
    if ZWJ not in seq:
      continue
    if seq[0] == ZWJ:
@ -149,7 +149,7 @@ def _check_zwj(sorted_seq_to_filepath):
 def _check_flags(sorted_seq_to_filepath):
  """Ensure regional indicators are only in sequences of one or two, and
  never mixed."""
-  for seq, fp in sorted_seq_to_filepath.iteritems():
+  for seq, fp in sorted_seq_to_filepath.items():
    have_reg = None
    for cp in seq:
      is_reg = unicode_data.is_regional_indicator(cp)
@ -173,7 +173,7 @@ def _check_tags(sorted_seq_to_filepath):

  BLACK_FLAG = 0x1f3f4
  BLACK_FLAG_SET = set([BLACK_FLAG])
-  for seq, fp in sorted_seq_to_filepath.iteritems():
+  for seq, fp in sorted_seq_to_filepath.items():
    seq_set = set(cp for cp in seq)
    overlap_set = seq_set & TAG_SET
    if not overlap_set:
@ -193,7 +193,7 @@ def _check_skintone(sorted_seq_to_filepath):
  to take them.  May appear standalone, though.  Also check that emoji that take
  skin tone modifiers have a complete set."""
  base_to_modifiers = collections.defaultdict(set)
-  for seq, fp in sorted_seq_to_filepath.iteritems():
+  for seq, fp in sorted_seq_to_filepath.items():
    for i, cp in enumerate(seq):
      if unicode_data.is_skintone_modifier(cp):
        if i == 0:
@ -213,7 +213,7 @@ def _check_skintone(sorted_seq_to_filepath):
            base_to_modifiers[pcp] = set()
          base_to_modifiers[pcp].add(cp)

-  for cp, modifiers in sorted(base_to_modifiers.iteritems()):
+  for cp, modifiers in sorted(base_to_modifiers.items()):
    if len(modifiers) != 5:
      print(
          'check skintone: base %04x has %d modifiers defined (%s) in %s' % (
@ -224,7 +224,7 @@ def _check_skintone(sorted_seq_to_filepath):

 def _check_zwj_sequences(sorted_seq_to_filepath, unicode_version):
  """Verify that zwj sequences are valid for the given unicode version."""
-  for seq, fp in sorted_seq_to_filepath.iteritems():
+  for seq, fp in sorted_seq_to_filepath.items():
    if ZWJ not in seq:
      continue
    age = unicode_data.get_emoji_sequence_age(seq)
@ -236,7 +236,7 @@ def _check_no_alias_sources(sorted_seq_to_filepath):
  """Check that we don't have sequences that we expect to be aliased to
  some other sequence."""
  aliases = add_aliases.read_default_emoji_aliases()
-  for seq, fp in sorted_seq_to_filepath.iteritems():
+  for seq, fp in sorted_seq_to_filepath.items():
    if seq in aliases:
      print('check no alias sources: aliased sequence %s' % fp)

@ -270,22 +270,22 @@ def _check_coverage(seq_to_filepath, unicode_version):
    seq_to_filepath[k] = 'alias:' + filename

  # check single emoji, this includes most of the special chars
-  emoji = sorted(unicode_data.get_emoji(age=age))
-  for cp in emoji:
-    if tuple([cp]) not in seq_to_filepath:
-      print(
-          'coverage: missing single %04x (%s)' % (
-              cp, unicode_data.name(cp, '<no name>')))
+  emoji = sorted(unicode_data.get_emoji())
+  # for cp in emoji:
+  #   if tuple([cp]) not in seq_to_filepath:
+  #     print(
+  #         'coverage: missing single %04x (%s)' % (
+  #             cp, unicode_data.name(cp, '<no name>')))

  # special characters
  # all but combining enclosing keycap are currently marked as emoji
-  for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a):
+  for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + list(range(0x30, 0x3a)):
    if cp not in emoji and tuple([cp]) not in seq_to_filepath:
      print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp)))

  # combining sequences
  comb_seq_to_name = sorted(
-      unicode_data.get_emoji_combining_sequences(age=age).iteritems())
+      unicode_data._emoji_sequence_data.items())
  for seq, name in comb_seq_to_name:
    if seq not in seq_to_filepath:
      # strip vs and try again
@ -294,44 +294,6 @@ def _check_coverage(seq_to_filepath, unicode_version):
        print('coverage: missing combining sequence %s (%s)' %
              (unicode_data.seq_to_string(seq), name))

-  # flag sequences
-  flag_seq_to_name = sorted(
-      unicode_data.get_emoji_flag_sequences(age=age).iteritems())
-  for seq, name in flag_seq_to_name:
-    if seq not in seq_to_filepath:
-      print('coverage: missing flag sequence %s (%s)' %
-            (unicode_data.seq_to_string(seq), name))
-
-  # skin tone modifier sequences
-  mod_seq_to_name = sorted(
-      unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
-  for seq, name in mod_seq_to_name:
-    if seq not in seq_to_filepath:
-      print('coverage: missing modifier sequence %s (%s)' % (
-          unicode_data.seq_to_string(seq), name))
-
-  # zwj sequences
-  # some of ours include the emoji presentation variation selector and some
-  # don't, and the same is true for the canonical sequences.  normalize all
-  # of them to omit it to test coverage, but report the canonical sequence.
-  zwj_seq_without_vs = set()
-  for seq in seq_to_filepath:
-    if ZWJ not in seq:
-      continue
-    if EMOJI_VS in seq:
-      seq = tuple(cp for cp in seq if cp != EMOJI_VS)
-    zwj_seq_without_vs.add(seq)
-
-  for seq, name in sorted(
-      unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
-    if EMOJI_VS in seq:
-      test_seq = tuple(s for s in seq if s != EMOJI_VS)
-    else:
-      test_seq = seq
-    if test_seq not in zwj_seq_without_vs:
-      print('coverage: missing (canonical) zwj sequence %s (%s)' % (
-          unicode_data.seq_to_string(seq), name))
-
  # check for 'unknown flag'
  # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
  # don't start with our prefix so 'unknown_flag' would be excluded by default.
@ -360,7 +322,7 @@ def create_sequence_to_filepath(name_to_dirpath, prefix, suffix):
  of a name to stderr."""
  segment_re = re.compile(r'^[0-9a-f]{4,6}$')
  result = {}
-  for name, dirname in name_to_dirpath.iteritems():
+  for name, dirname in name_to_dirpath.items():
    if not name.startswith(prefix):
      print('expected prefix "%s" for "%s"' % (prefix, name))
      continue
@ -430,7 +392,7 @@ def run_check(dirs, prefix, suffix, exclude, unicode_version, coverage):
  seq_to_filepath = create_sequence_to_filepath(name_to_dirpath, prefix, suffix)
  print('checking %d sequences' % len(seq_to_filepath))
  check_sequence_to_filepath(seq_to_filepath, unicode_version, coverage)
-  print('done.')
+  print('done running checks')


 def main():
--- a/emoji_fontchain_lint.py
+++ b/emoji_fontchain_lint.py
@ -473,19 +473,20 @@ def get_rc_files(output_dir, unicode_version):
    url = f"https://unicode.org/Public/{unicode_version}.0/ucd/DerivedAge.txt"
    urllib.request.urlretrieve(url, f'./{output_dir}/DerivedAge.txt')

+
 def main():
-    get_rc_files("./ucd", "12.0")
+    # get_rc_files("./ucd", "12.0")

    ucd_path = "./ucd"
    parse_ucd(ucd_path)

-    # # Generate all expected emoji
-    # all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji()
+    # Generate all expected emoji
+    all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji()

-    # # Generate file names
-    # expected_filenames = decimal_list_to_emoji_filename(all_emoji)
+    # Generate file names
+    expected_filenames = decimal_list_to_emoji_filename(all_emoji)

-    # check_missing_files(expected_filenames, './png/128/')
+    check_missing_files(expected_filenames, './png/128/')
    # check_emoji_coverage(all_emoji, equivalent_emoji)
    # check_emoji_defaults(default_emoji)

--- a/poetry.lock
+++ b/poetry.lock
@ -1,48 +0,0 @@
-[[package]]
-category = "main"
-description = "Tools to manipulate font files"
-name = "fonttools"
-optional = false
-python-versions = ">=3.6"
-version = "4.7.0"
-
-[package.extras]
-all = ["fs (>=2.2.0,<3)", "lxml (>=4.0,<5)", "zopfli (>=0.1.4)", "lz4 (>=1.7.4.2)", "matplotlib", "sympy", "brotli (>=1.0.1)", "scipy", "brotlipy (>=0.7.0)", "munkres", "unicodedata2 (>=13.0.0)", "xattr"]
-graphite = ["lz4 (>=1.7.4.2)"]
-interpolatable = ["scipy", "munkres"]
-lxml = ["lxml (>=4.0,<5)"]
-plot = ["matplotlib"]
-symfont = ["sympy"]
-type1 = ["xattr"]
-ufo = ["fs (>=2.2.0,<3)"]
-unicode = ["unicodedata2 (>=13.0.0)"]
-woff = ["zopfli (>=0.1.4)", "brotli (>=1.0.1)", "brotlipy (>=0.7.0)"]
-
-[[package]]
-category = "main"
-description = "Noto font tools"
-name = "nototools"
-optional = false
-python-versions = "*"
-version = "0.2.0"
-
-[package.dependencies]
-fontTools = "*"
-
-[package.extras]
-shapediff = ["booleanoperations", "defcon", "pillow"]
-
-[package.source]
-reference = "e0a39bad11ca47f924b432bb05c3cccd87e68571"
-type = "git"
-url = "https://github.com/googlefonts/nototools.git"
-[metadata]
-content-hash = "1b3d3ee95aca31cb8d69bd8a8fae3504b6de0dc2b32462f86e3798e225ebcdf5"
-python-versions = "^3.7.2"
-
-[metadata.files]
-fonttools = [
-    {file = "fonttools-4.7.0-py3-none-any.whl", hash = "sha256:454db99e20e6cafb7ed3e30b15c9daf2d46c4370a800c1a6db11ba3eb3b43116"},
-    {file = "fonttools-4.7.0.zip", hash = "sha256:ce977f10f070752301e2d49ed822cfc860c881046d81c376fade1e6529b2046c"},
-]
-nototools = []