From 57f64367eddfa11cb9c0df9fb9f58ee85502d48d Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Wed, 24 Jul 2019 07:19:30 -0600 Subject: [PATCH] cs: implement `make-known-char-range-list` Compute the list (at compile time), instead of using a literal copy of the output at one point. Also, adjust the documentation to explain extra guarantees provided by `make-known-char-range-list`. Closes #2757 --- .../scribblings/reference/chars.scrbl | 7 +- racket/src/cs/Makefile | 1 + racket/src/cs/rumble.sls | 1 + racket/src/cs/rumble/char-range.ss | 93 +++ racket/src/cs/rumble/char.ss | 605 +----------------- 5 files changed, 102 insertions(+), 605 deletions(-) create mode 100644 racket/src/cs/rumble/char-range.ss diff --git a/pkgs/racket-doc/scribblings/reference/chars.scrbl b/pkgs/racket-doc/scribblings/reference/chars.scrbl index e3b49b216d..7b645f2e66 100644 --- a/pkgs/racket-doc/scribblings/reference/chars.scrbl +++ b/pkgs/racket-doc/scribblings/reference/chars.scrbl @@ -254,7 +254,12 @@ contains two integers and a boolean; the first integer is a starting code-point value (inclusive), the second integer is an ending code-point value (inclusive), and the boolean is @racket[#t] when all characters in the code-point range have identical results for all of -the character predicates above. The three-element lists are ordered in +the character predicates above, have analogous transformations +(shifting by the same amount, if any, in code-point space) for +@racket[char-downcase], @racket[char-upcase], and +@racket[char-titlecase], and have the same +decomposition--normalization behavior. +The three-element lists are ordered in the overall result list such that later lists represent larger code-point values, and all three-element lists are separated from every other by at least one code-point value that is not specified by diff --git a/racket/src/cs/Makefile b/racket/src/cs/Makefile index cb392f0f71..7e99ff9c29 100644 --- a/racket/src/cs/Makefile +++ b/racket/src/cs/Makefile @@ -275,6 +275,7 @@ RUMBLE_SRCS = rumble/define.ss \ rumble/bytes.ss \ rumble/string.ss \ rumble/char.ss \ + rumble/char-range.ss \ rumble/symbol.ss \ rumble/list.ss \ rumble/vector.ss \ diff --git a/racket/src/cs/rumble.sls b/racket/src/cs/rumble.sls index 963ac00de3..23604500d1 100644 --- a/racket/src/cs/rumble.sls +++ b/racket/src/cs/rumble.sls @@ -768,6 +768,7 @@ (include "rumble/bytes.ss") (include "rumble/string.ss") (include "rumble/char.ss") + (include "rumble/char-range.ss") (include "rumble/list.ss") (include "rumble/vector.ss") (include "rumble/box.ss") diff --git a/racket/src/cs/rumble/char-range.ss b/racket/src/cs/rumble/char-range.ss new file mode 100644 index 0000000000..c4e4cc2b15 --- /dev/null +++ b/racket/src/cs/rumble/char-range.ss @@ -0,0 +1,93 @@ + +(define (make-known-char-range-list) + (let-syntax ([mk + (lambda (stx) + (let-syntax ([define/who (lambda (stx) + (syntax-case stx () + [(_ . r) #'(define . r)]))] + [check (lambda (stx) #'(#%void))] + [with-global-lock* (lambda (stx) + (syntax-case stx () + [(_ e) #'e]))]) + (include "rumble/char.ss") + + (define (delta op c) + (- (char->integer (op c)) + (char->integer c))) + + (define (non-decomp? a) + (let ([s (string a)]) + (string=? (string-normalize-nfkd s) + s))) + + (define (char-same? a b) + (and (eqv? (char-alphabetic? a) + (char-alphabetic? b)) + (eqv? (char-lower-case? a) + (char-lower-case? b)) + (eqv? (char-upper-case? a) + (char-upper-case? b)) + (eqv? (char-title-case? a) + (char-title-case? b)) + (eqv? (char-numeric? a) + (char-numeric? b)) + (eqv? (char-symbolic? a) + (char-symbolic? b)) + (eqv? (char-punctuation? a) + (char-punctuation? b)) + (eqv? (char-graphic? a) + (char-graphic? b)) + (eqv? (char-whitespace? a) + (char-whitespace? b)) + (eqv? (char-blank? a) + (char-blank? b)) + (eqv? (char-iso-control? a) + (char-iso-control? b)) + (eqv? (char-general-category a) + (char-general-category b)) + ;; In addition to predicates, sameness checking + ;; has historcially counted deltas between upcase + ;; and downcase --- just because that was convenient, + ;; and not because it makes complete sense + (eqv? (delta char-upcase a) + (delta char-upcase b)) + (eqv? (delta char-downcase a) + (delta char-downcase b)) + (eqv? (delta char-titlecase a) + (delta char-titlecase b)) + ;; Check whether decomposition behavor is different + (eqv? (non-decomp? a) + (non-decomp? b)) + ;; Detect different combining classes + (eqv? (#%equal? (string a b) + (string-normalize-nfkd (string a b))) + (#%equal? (string b a) + (string-normalize-nfkd (string b a)))))) + + (define ranges + (let loop ([i 1] [start 0] [same? #t]) + (cond + [(= i #xD800) + (cons `(,start ,(sub1 i) ,same?) + (loop #xE001 #xE000 #t))] + [(= i #x110000) `((,start #x10FFFF ,same?))] + [(eq? 'cn (char-general-category (integer->char i))) + (cons `(,start ,(sub1 i) ,same?) + (let nloop ([i (add1 i)]) + (cond + [(= i #xD800) + (nloop #xE000)] + [(= i #x110000) '()] + [(eq? 'cn (char-general-category (integer->char i))) + (nloop (add1 i))] + [else + (loop (add1 i) i #t)])))] + [(and same? + (char-same? (integer->char i) + (integer->char (sub1 i)))) + (loop (add1 i) start #t)] + [else + (loop (add1 i) start #f)]))) + + (datum->syntax #'here (list 'quote ranges))))]) + (mk))) diff --git a/racket/src/cs/rumble/char.ss b/racket/src/cs/rumble/char.ss index 6a91cd14a4..ac330d1dc9 100644 --- a/racket/src/cs/rumble/char.ss +++ b/racket/src/cs/rumble/char.ss @@ -34,609 +34,6 @@ (define (char-general-category ch) (or (with-global-lock* (getprop (#%char-general-category ch) 'downcase #f)) (let* ([s (#%char-general-category ch)] - [ds (string->symbol (string-downcase (symbol->string s)))]) + [ds (string->symbol (string-downcase (#%symbol->string s)))]) (with-global-lock* (putprop s 'downcase ds)) ds))) - -;; FIXME -(define (make-known-char-range-list) - '((0 887 #f) - (890 895 #f) - (900 906 #f) - (908 908 #t) - (910 929 #f) - (931 1327 #f) - (1329 1366 #t) - (1369 1375 #f) - (1377 1415 #f) - (1417 1418 #f) - (1421 1423 #f) - (1425 1479 #f) - (1488 1514 #t) - (1520 1524 #f) - (1536 1564 #f) - (1566 1805 #f) - (1807 1866 #f) - (1869 1969 #f) - (1984 2042 #f) - (2048 2093 #f) - (2096 2110 #t) - (2112 2139 #f) - (2142 2142 #t) - (2208 2226 #t) - (2276 2435 #f) - (2437 2444 #t) - (2447 2448 #t) - (2451 2472 #t) - (2474 2480 #t) - (2482 2482 #t) - (2486 2489 #t) - (2492 2500 #f) - (2503 2504 #t) - (2507 2510 #f) - (2519 2519 #t) - (2524 2525 #t) - (2527 2531 #f) - (2534 2555 #f) - (2561 2563 #f) - (2565 2570 #t) - (2575 2576 #t) - (2579 2600 #t) - (2602 2608 #t) - (2610 2611 #f) - (2613 2614 #f) - (2616 2617 #t) - (2620 2620 #t) - (2622 2626 #f) - (2631 2632 #t) - (2635 2637 #f) - (2641 2641 #t) - (2649 2652 #f) - (2654 2654 #t) - (2662 2677 #f) - (2689 2691 #f) - (2693 2701 #t) - (2703 2705 #t) - (2707 2728 #t) - (2730 2736 #t) - (2738 2739 #t) - (2741 2745 #t) - (2748 2757 #f) - (2759 2761 #f) - (2763 2765 #f) - (2768 2768 #t) - (2784 2787 #f) - (2790 2801 #f) - (2817 2819 #f) - (2821 2828 #t) - (2831 2832 #t) - (2835 2856 #t) - (2858 2864 #t) - (2866 2867 #t) - (2869 2873 #t) - (2876 2884 #f) - (2887 2888 #f) - (2891 2893 #f) - (2902 2903 #f) - (2908 2909 #t) - (2911 2915 #f) - (2918 2935 #f) - (2946 2947 #f) - (2949 2954 #t) - (2958 2960 #t) - (2962 2965 #f) - (2969 2970 #t) - (2972 2972 #t) - (2974 2975 #t) - (2979 2980 #t) - (2984 2986 #t) - (2990 3001 #t) - (3006 3010 #f) - (3014 3016 #t) - (3018 3021 #f) - (3024 3024 #t) - (3031 3031 #t) - (3046 3066 #f) - (3072 3075 #f) - (3077 3084 #t) - (3086 3088 #t) - (3090 3112 #t) - (3114 3129 #t) - (3133 3140 #f) - (3142 3144 #f) - (3146 3149 #f) - (3157 3158 #f) - (3160 3161 #t) - (3168 3171 #f) - (3174 3183 #t) - (3192 3199 #f) - (3201 3203 #f) - (3205 3212 #t) - (3214 3216 #t) - (3218 3240 #t) - (3242 3251 #t) - (3253 3257 #t) - (3260 3268 #f) - (3270 3272 #f) - (3274 3277 #f) - (3285 3286 #t) - (3294 3294 #t) - (3296 3299 #f) - (3302 3311 #t) - (3313 3314 #t) - (3329 3331 #f) - (3333 3340 #t) - (3342 3344 #t) - (3346 3386 #t) - (3389 3396 #f) - (3398 3400 #t) - (3402 3406 #f) - (3415 3415 #t) - (3424 3427 #f) - (3430 3445 #f) - (3449 3455 #f) - (3458 3459 #t) - (3461 3478 #t) - (3482 3505 #t) - (3507 3515 #t) - (3517 3517 #t) - (3520 3526 #t) - (3530 3530 #t) - (3535 3540 #f) - (3542 3542 #t) - (3544 3551 #f) - (3558 3567 #t) - (3570 3572 #f) - (3585 3642 #f) - (3647 3675 #f) - (3713 3714 #t) - (3716 3716 #t) - (3719 3720 #t) - (3722 3722 #t) - (3725 3725 #t) - (3732 3735 #t) - (3737 3743 #t) - (3745 3747 #t) - (3749 3749 #t) - (3751 3751 #t) - (3754 3755 #t) - (3757 3769 #f) - (3771 3773 #f) - (3776 3780 #t) - (3782 3782 #t) - (3784 3789 #f) - (3792 3801 #t) - (3804 3807 #f) - (3840 3911 #f) - (3913 3948 #f) - (3953 3991 #f) - (3993 4028 #f) - (4030 4044 #f) - (4046 4058 #f) - (4096 4293 #f) - (4295 4295 #t) - (4301 4301 #t) - (4304 4680 #f) - (4682 4685 #t) - (4688 4694 #t) - (4696 4696 #t) - (4698 4701 #t) - (4704 4744 #t) - (4746 4749 #t) - (4752 4784 #t) - (4786 4789 #t) - (4792 4798 #t) - (4800 4800 #t) - (4802 4805 #t) - (4808 4822 #t) - (4824 4880 #t) - (4882 4885 #t) - (4888 4954 #t) - (4957 4988 #f) - (4992 5017 #f) - (5024 5108 #t) - (5120 5788 #f) - (5792 5880 #f) - (5888 5900 #t) - (5902 5908 #f) - (5920 5942 #f) - (5952 5971 #f) - (5984 5996 #t) - (5998 6000 #t) - (6002 6003 #t) - (6016 6109 #f) - (6112 6121 #t) - (6128 6137 #t) - (6144 6158 #f) - (6160 6169 #t) - (6176 6263 #f) - (6272 6314 #f) - (6320 6389 #t) - (6400 6430 #t) - (6432 6443 #f) - (6448 6459 #f) - (6464 6464 #t) - (6468 6509 #f) - (6512 6516 #t) - (6528 6571 #t) - (6576 6601 #f) - (6608 6618 #f) - (6622 6683 #f) - (6686 6750 #f) - (6752 6780 #f) - (6783 6793 #f) - (6800 6809 #t) - (6816 6829 #f) - (6832 6846 #f) - (6912 6987 #f) - (6992 7036 #f) - (7040 7155 #f) - (7164 7223 #f) - (7227 7241 #f) - (7245 7295 #f) - (7360 7367 #t) - (7376 7414 #f) - (7416 7417 #t) - (7424 7669 #f) - (7676 7957 #f) - (7960 7965 #t) - (7968 8005 #f) - (8008 8013 #t) - (8016 8023 #f) - (8025 8025 #t) - (8027 8027 #t) - (8029 8029 #t) - (8031 8061 #f) - (8064 8116 #f) - (8118 8132 #f) - (8134 8147 #f) - (8150 8155 #f) - (8157 8175 #f) - (8178 8180 #f) - (8182 8190 #f) - (8192 8292 #f) - (8294 8305 #f) - (8308 8334 #f) - (8336 8348 #t) - (8352 8381 #f) - (8400 8432 #f) - (8448 8585 #f) - (8592 9210 #f) - (9216 9254 #t) - (9280 9290 #t) - (9312 11123 #f) - (11126 11157 #t) - (11160 11193 #t) - (11197 11208 #t) - (11210 11217 #t) - (11264 11310 #t) - (11312 11358 #t) - (11360 11507 #f) - (11513 11557 #f) - (11559 11559 #t) - (11565 11565 #t) - (11568 11623 #t) - (11631 11632 #f) - (11647 11670 #f) - (11680 11686 #t) - (11688 11694 #t) - (11696 11702 #t) - (11704 11710 #t) - (11712 11718 #t) - (11720 11726 #t) - (11728 11734 #t) - (11736 11742 #t) - (11744 11842 #f) - (11904 11929 #t) - (11931 12019 #f) - (12032 12245 #t) - (12272 12283 #t) - (12288 12351 #f) - (12353 12438 #f) - (12441 12543 #f) - (12549 12589 #t) - (12593 12686 #t) - (12688 12730 #f) - (12736 12771 #t) - (12784 12830 #f) - (12832 13054 #f) - (13056 19893 #f) - (19904 40908 #f) - (40960 42124 #f) - (42128 42182 #t) - (42192 42539 #f) - (42560 42653 #f) - (42655 42743 #f) - (42752 42894 #f) - (42896 42925 #f) - (42928 42929 #f) - (42999 43051 #f) - (43056 43065 #f) - (43072 43127 #f) - (43136 43204 #f) - (43214 43225 #f) - (43232 43259 #f) - (43264 43347 #f) - (43359 43388 #f) - (43392 43469 #f) - (43471 43481 #f) - (43486 43518 #f) - (43520 43574 #f) - (43584 43597 #f) - (43600 43609 #t) - (43612 43714 #f) - (43739 43766 #f) - (43777 43782 #t) - (43785 43790 #t) - (43793 43798 #t) - (43808 43814 #t) - (43816 43822 #t) - (43824 43871 #f) - (43876 43877 #t) - (43968 44013 #f) - (44016 44025 #t) - (44032 55203 #t) - (55216 55238 #t) - (55243 55291 #t) - (57344 64109 #f) - (64112 64217 #t) - (64256 64262 #t) - (64275 64279 #t) - (64285 64310 #f) - (64312 64316 #t) - (64318 64318 #t) - (64320 64321 #t) - (64323 64324 #t) - (64326 64449 #f) - (64467 64831 #f) - (64848 64911 #t) - (64914 64967 #t) - (65008 65021 #f) - (65024 65049 #f) - (65056 65069 #f) - (65072 65106 #f) - (65108 65126 #f) - (65128 65131 #f) - (65136 65140 #f) - (65142 65276 #t) - (65279 65279 #t) - (65281 65470 #f) - (65474 65479 #t) - (65482 65487 #t) - (65490 65495 #t) - (65498 65500 #t) - (65504 65510 #f) - (65512 65518 #f) - (65529 65533 #f) - (65536 65547 #t) - (65549 65574 #t) - (65576 65594 #t) - (65596 65597 #t) - (65599 65613 #t) - (65616 65629 #t) - (65664 65786 #t) - (65792 65794 #t) - (65799 65843 #t) - (65847 65932 #f) - (65936 65947 #t) - (65952 65952 #t) - (66000 66045 #f) - (66176 66204 #t) - (66208 66256 #t) - (66272 66299 #f) - (66304 66339 #f) - (66352 66378 #f) - (66384 66426 #f) - (66432 66461 #t) - (66463 66499 #f) - (66504 66517 #f) - (66560 66717 #f) - (66720 66729 #t) - (66816 66855 #t) - (66864 66915 #t) - (66927 66927 #t) - (67072 67382 #t) - (67392 67413 #t) - (67424 67431 #t) - (67584 67589 #t) - (67592 67592 #t) - (67594 67637 #t) - (67639 67640 #t) - (67644 67644 #t) - (67647 67669 #t) - (67671 67742 #f) - (67751 67759 #t) - (67840 67867 #f) - (67871 67897 #f) - (67903 67903 #t) - (67968 68023 #t) - (68030 68031 #t) - (68096 68099 #f) - (68101 68102 #t) - (68108 68115 #f) - (68117 68119 #t) - (68121 68147 #t) - (68152 68154 #f) - (68159 68167 #f) - (68176 68184 #t) - (68192 68255 #f) - (68288 68326 #f) - (68331 68342 #f) - (68352 68405 #t) - (68409 68437 #f) - (68440 68466 #f) - (68472 68497 #f) - (68505 68508 #t) - (68521 68527 #t) - (68608 68680 #t) - (69216 69246 #f) - (69632 69709 #f) - (69714 69743 #f) - (69759 69825 #f) - (69840 69864 #t) - (69872 69881 #t) - (69888 69940 #f) - (69942 69955 #f) - (69968 70006 #f) - (70016 70088 #f) - (70093 70093 #t) - (70096 70106 #f) - (70113 70132 #t) - (70144 70161 #t) - (70163 70205 #f) - (70320 70378 #f) - (70384 70393 #t) - (70401 70403 #f) - (70405 70412 #t) - (70415 70416 #t) - (70419 70440 #t) - (70442 70448 #t) - (70450 70451 #t) - (70453 70457 #t) - (70460 70468 #f) - (70471 70472 #t) - (70475 70477 #f) - (70487 70487 #t) - (70493 70499 #f) - (70502 70508 #t) - (70512 70516 #t) - (70784 70855 #f) - (70864 70873 #t) - (71040 71093 #f) - (71096 71113 #f) - (71168 71236 #f) - (71248 71257 #t) - (71296 71351 #f) - (71360 71369 #t) - (71840 71922 #f) - (71935 71935 #t) - (72384 72440 #t) - (73728 74648 #t) - (74752 74862 #t) - (74864 74868 #t) - (77824 78894 #t) - (92160 92728 #t) - (92736 92766 #t) - (92768 92777 #t) - (92782 92783 #t) - (92880 92909 #t) - (92912 92917 #f) - (92928 92997 #f) - (93008 93017 #t) - (93019 93025 #t) - (93027 93047 #t) - (93053 93071 #t) - (93952 94020 #t) - (94032 94078 #f) - (94095 94111 #f) - (110592 110593 #t) - (113664 113770 #t) - (113776 113788 #t) - (113792 113800 #t) - (113808 113817 #t) - (113820 113827 #f) - (118784 119029 #t) - (119040 119078 #t) - (119081 119261 #f) - (119296 119365 #f) - (119552 119638 #t) - (119648 119665 #t) - (119808 119892 #f) - (119894 119964 #f) - (119966 119967 #t) - (119970 119970 #t) - (119973 119974 #t) - (119977 119980 #t) - (119982 119993 #f) - (119995 119995 #t) - (119997 120003 #t) - (120005 120069 #f) - (120071 120074 #t) - (120077 120084 #t) - (120086 120092 #t) - (120094 120121 #f) - (120123 120126 #t) - (120128 120132 #t) - (120134 120134 #t) - (120138 120144 #t) - (120146 120485 #f) - (120488 120779 #f) - (120782 120831 #t) - (124928 125124 #t) - (125127 125142 #f) - (126464 126467 #t) - (126469 126495 #t) - (126497 126498 #t) - (126500 126500 #t) - (126503 126503 #t) - (126505 126514 #t) - (126516 126519 #t) - (126521 126521 #t) - (126523 126523 #t) - (126530 126530 #t) - (126535 126535 #t) - (126537 126537 #t) - (126539 126539 #t) - (126541 126543 #t) - (126545 126546 #t) - (126548 126548 #t) - (126551 126551 #t) - (126553 126553 #t) - (126555 126555 #t) - (126557 126557 #t) - (126559 126559 #t) - (126561 126562 #t) - (126564 126564 #t) - (126567 126570 #t) - (126572 126578 #t) - (126580 126583 #t) - (126585 126588 #t) - (126590 126590 #t) - (126592 126601 #t) - (126603 126619 #t) - (126625 126627 #t) - (126629 126633 #t) - (126635 126651 #t) - (126704 126705 #t) - (126976 127019 #t) - (127024 127123 #t) - (127136 127150 #t) - (127153 127167 #t) - (127169 127183 #t) - (127185 127221 #t) - (127232 127244 #f) - (127248 127278 #t) - (127280 127339 #f) - (127344 127386 #f) - (127462 127490 #f) - (127504 127546 #t) - (127552 127560 #t) - (127568 127569 #t) - (127744 127788 #t) - (127792 127869 #t) - (127872 127950 #t) - (127956 127991 #t) - (128000 128254 #t) - (128256 128330 #t) - (128336 128377 #t) - (128379 128419 #t) - (128421 128578 #t) - (128581 128719 #t) - (128736 128748 #t) - (128752 128755 #t) - (128768 128883 #t) - (128896 128980 #t) - (129024 129035 #t) - (129040 129095 #t) - (129104 129113 #t) - (129120 129159 #t) - (129168 129197 #t) - (131072 173782 #t) - (173824 177972 #t) - (177984 178205 #t) - (194560 195101 #t) - (917505 917505 #t) - (917536 917631 #t) - (917760 917999 #t) - (983040 1048573 #t) - (1048576 1114109 #t)))