cs: implement make-known-char-range-list

Compute the list (at compile time), instead of using a literal copy of
the output at one point.

Also, adjust the documentation to explain extra guarantees provided by
`make-known-char-range-list`.

Closes #2757
This commit is contained in:
Matthew Flatt 2019-07-24 07:19:30 -06:00
parent f63ededab8
commit 57f64367ed
5 changed files with 102 additions and 605 deletions

View File

@ -254,7 +254,12 @@ contains two integers and a boolean; the first integer is a starting
code-point value (inclusive), the second integer is an ending
code-point value (inclusive), and the boolean is @racket[#t] when all
characters in the code-point range have identical results for all of
the character predicates above. The three-element lists are ordered in
the character predicates above, have analogous transformations
(shifting by the same amount, if any, in code-point space) for
@racket[char-downcase], @racket[char-upcase], and
@racket[char-titlecase], and have the same
decomposition--normalization behavior.
The three-element lists are ordered in
the overall result list such that later lists represent larger
code-point values, and all three-element lists are separated from
every other by at least one code-point value that is not specified by

View File

@ -275,6 +275,7 @@ RUMBLE_SRCS = rumble/define.ss \
rumble/bytes.ss \
rumble/string.ss \
rumble/char.ss \
rumble/char-range.ss \
rumble/symbol.ss \
rumble/list.ss \
rumble/vector.ss \

View File

@ -768,6 +768,7 @@
(include "rumble/bytes.ss")
(include "rumble/string.ss")
(include "rumble/char.ss")
(include "rumble/char-range.ss")
(include "rumble/list.ss")
(include "rumble/vector.ss")
(include "rumble/box.ss")

View File

@ -0,0 +1,93 @@
(define (make-known-char-range-list)
(let-syntax ([mk
(lambda (stx)
(let-syntax ([define/who (lambda (stx)
(syntax-case stx ()
[(_ . r) #'(define . r)]))]
[check (lambda (stx) #'(#%void))]
[with-global-lock* (lambda (stx)
(syntax-case stx ()
[(_ e) #'e]))])
(include "rumble/char.ss")
(define (delta op c)
(- (char->integer (op c))
(char->integer c)))
(define (non-decomp? a)
(let ([s (string a)])
(string=? (string-normalize-nfkd s)
s)))
(define (char-same? a b)
(and (eqv? (char-alphabetic? a)
(char-alphabetic? b))
(eqv? (char-lower-case? a)
(char-lower-case? b))
(eqv? (char-upper-case? a)
(char-upper-case? b))
(eqv? (char-title-case? a)
(char-title-case? b))
(eqv? (char-numeric? a)
(char-numeric? b))
(eqv? (char-symbolic? a)
(char-symbolic? b))
(eqv? (char-punctuation? a)
(char-punctuation? b))
(eqv? (char-graphic? a)
(char-graphic? b))
(eqv? (char-whitespace? a)
(char-whitespace? b))
(eqv? (char-blank? a)
(char-blank? b))
(eqv? (char-iso-control? a)
(char-iso-control? b))
(eqv? (char-general-category a)
(char-general-category b))
;; In addition to predicates, sameness checking
;; has historcially counted deltas between upcase
;; and downcase --- just because that was convenient,
;; and not because it makes complete sense
(eqv? (delta char-upcase a)
(delta char-upcase b))
(eqv? (delta char-downcase a)
(delta char-downcase b))
(eqv? (delta char-titlecase a)
(delta char-titlecase b))
;; Check whether decomposition behavor is different
(eqv? (non-decomp? a)
(non-decomp? b))
;; Detect different combining classes
(eqv? (#%equal? (string a b)
(string-normalize-nfkd (string a b)))
(#%equal? (string b a)
(string-normalize-nfkd (string b a))))))
(define ranges
(let loop ([i 1] [start 0] [same? #t])
(cond
[(= i #xD800)
(cons `(,start ,(sub1 i) ,same?)
(loop #xE001 #xE000 #t))]
[(= i #x110000) `((,start #x10FFFF ,same?))]
[(eq? 'cn (char-general-category (integer->char i)))
(cons `(,start ,(sub1 i) ,same?)
(let nloop ([i (add1 i)])
(cond
[(= i #xD800)
(nloop #xE000)]
[(= i #x110000) '()]
[(eq? 'cn (char-general-category (integer->char i)))
(nloop (add1 i))]
[else
(loop (add1 i) i #t)])))]
[(and same?
(char-same? (integer->char i)
(integer->char (sub1 i))))
(loop (add1 i) start #t)]
[else
(loop (add1 i) start #f)])))
(datum->syntax #'here (list 'quote ranges))))])
(mk)))

View File

@ -34,609 +34,6 @@
(define (char-general-category ch)
(or (with-global-lock* (getprop (#%char-general-category ch) 'downcase #f))
(let* ([s (#%char-general-category ch)]
[ds (string->symbol (string-downcase (symbol->string s)))])
[ds (string->symbol (string-downcase (#%symbol->string s)))])
(with-global-lock* (putprop s 'downcase ds))
ds)))
;; FIXME
(define (make-known-char-range-list)
'((0 887 #f)
(890 895 #f)
(900 906 #f)
(908 908 #t)
(910 929 #f)
(931 1327 #f)
(1329 1366 #t)
(1369 1375 #f)
(1377 1415 #f)
(1417 1418 #f)
(1421 1423 #f)
(1425 1479 #f)
(1488 1514 #t)
(1520 1524 #f)
(1536 1564 #f)
(1566 1805 #f)
(1807 1866 #f)
(1869 1969 #f)
(1984 2042 #f)
(2048 2093 #f)
(2096 2110 #t)
(2112 2139 #f)
(2142 2142 #t)
(2208 2226 #t)
(2276 2435 #f)
(2437 2444 #t)
(2447 2448 #t)
(2451 2472 #t)
(2474 2480 #t)
(2482 2482 #t)
(2486 2489 #t)
(2492 2500 #f)
(2503 2504 #t)
(2507 2510 #f)
(2519 2519 #t)
(2524 2525 #t)
(2527 2531 #f)
(2534 2555 #f)
(2561 2563 #f)
(2565 2570 #t)
(2575 2576 #t)
(2579 2600 #t)
(2602 2608 #t)
(2610 2611 #f)
(2613 2614 #f)
(2616 2617 #t)
(2620 2620 #t)
(2622 2626 #f)
(2631 2632 #t)
(2635 2637 #f)
(2641 2641 #t)
(2649 2652 #f)
(2654 2654 #t)
(2662 2677 #f)
(2689 2691 #f)
(2693 2701 #t)
(2703 2705 #t)
(2707 2728 #t)
(2730 2736 #t)
(2738 2739 #t)
(2741 2745 #t)
(2748 2757 #f)
(2759 2761 #f)
(2763 2765 #f)
(2768 2768 #t)
(2784 2787 #f)
(2790 2801 #f)
(2817 2819 #f)
(2821 2828 #t)
(2831 2832 #t)
(2835 2856 #t)
(2858 2864 #t)
(2866 2867 #t)
(2869 2873 #t)
(2876 2884 #f)
(2887 2888 #f)
(2891 2893 #f)
(2902 2903 #f)
(2908 2909 #t)
(2911 2915 #f)
(2918 2935 #f)
(2946 2947 #f)
(2949 2954 #t)
(2958 2960 #t)
(2962 2965 #f)
(2969 2970 #t)
(2972 2972 #t)
(2974 2975 #t)
(2979 2980 #t)
(2984 2986 #t)
(2990 3001 #t)
(3006 3010 #f)
(3014 3016 #t)
(3018 3021 #f)
(3024 3024 #t)
(3031 3031 #t)
(3046 3066 #f)
(3072 3075 #f)
(3077 3084 #t)
(3086 3088 #t)
(3090 3112 #t)
(3114 3129 #t)
(3133 3140 #f)
(3142 3144 #f)
(3146 3149 #f)
(3157 3158 #f)
(3160 3161 #t)
(3168 3171 #f)
(3174 3183 #t)
(3192 3199 #f)
(3201 3203 #f)
(3205 3212 #t)
(3214 3216 #t)
(3218 3240 #t)
(3242 3251 #t)
(3253 3257 #t)
(3260 3268 #f)
(3270 3272 #f)
(3274 3277 #f)
(3285 3286 #t)
(3294 3294 #t)
(3296 3299 #f)
(3302 3311 #t)
(3313 3314 #t)
(3329 3331 #f)
(3333 3340 #t)
(3342 3344 #t)
(3346 3386 #t)
(3389 3396 #f)
(3398 3400 #t)
(3402 3406 #f)
(3415 3415 #t)
(3424 3427 #f)
(3430 3445 #f)
(3449 3455 #f)
(3458 3459 #t)
(3461 3478 #t)
(3482 3505 #t)
(3507 3515 #t)
(3517 3517 #t)
(3520 3526 #t)
(3530 3530 #t)
(3535 3540 #f)
(3542 3542 #t)
(3544 3551 #f)
(3558 3567 #t)
(3570 3572 #f)
(3585 3642 #f)
(3647 3675 #f)
(3713 3714 #t)
(3716 3716 #t)
(3719 3720 #t)
(3722 3722 #t)
(3725 3725 #t)
(3732 3735 #t)
(3737 3743 #t)
(3745 3747 #t)
(3749 3749 #t)
(3751 3751 #t)
(3754 3755 #t)
(3757 3769 #f)
(3771 3773 #f)
(3776 3780 #t)
(3782 3782 #t)
(3784 3789 #f)
(3792 3801 #t)
(3804 3807 #f)
(3840 3911 #f)
(3913 3948 #f)
(3953 3991 #f)
(3993 4028 #f)
(4030 4044 #f)
(4046 4058 #f)
(4096 4293 #f)
(4295 4295 #t)
(4301 4301 #t)
(4304 4680 #f)
(4682 4685 #t)
(4688 4694 #t)
(4696 4696 #t)
(4698 4701 #t)
(4704 4744 #t)
(4746 4749 #t)
(4752 4784 #t)
(4786 4789 #t)
(4792 4798 #t)
(4800 4800 #t)
(4802 4805 #t)
(4808 4822 #t)
(4824 4880 #t)
(4882 4885 #t)
(4888 4954 #t)
(4957 4988 #f)
(4992 5017 #f)
(5024 5108 #t)
(5120 5788 #f)
(5792 5880 #f)
(5888 5900 #t)
(5902 5908 #f)
(5920 5942 #f)
(5952 5971 #f)
(5984 5996 #t)
(5998 6000 #t)
(6002 6003 #t)
(6016 6109 #f)
(6112 6121 #t)
(6128 6137 #t)
(6144 6158 #f)
(6160 6169 #t)
(6176 6263 #f)
(6272 6314 #f)
(6320 6389 #t)
(6400 6430 #t)
(6432 6443 #f)
(6448 6459 #f)
(6464 6464 #t)
(6468 6509 #f)
(6512 6516 #t)
(6528 6571 #t)
(6576 6601 #f)
(6608 6618 #f)
(6622 6683 #f)
(6686 6750 #f)
(6752 6780 #f)
(6783 6793 #f)
(6800 6809 #t)
(6816 6829 #f)
(6832 6846 #f)
(6912 6987 #f)
(6992 7036 #f)
(7040 7155 #f)
(7164 7223 #f)
(7227 7241 #f)
(7245 7295 #f)
(7360 7367 #t)
(7376 7414 #f)
(7416 7417 #t)
(7424 7669 #f)
(7676 7957 #f)
(7960 7965 #t)
(7968 8005 #f)
(8008 8013 #t)
(8016 8023 #f)
(8025 8025 #t)
(8027 8027 #t)
(8029 8029 #t)
(8031 8061 #f)
(8064 8116 #f)
(8118 8132 #f)
(8134 8147 #f)
(8150 8155 #f)
(8157 8175 #f)
(8178 8180 #f)
(8182 8190 #f)
(8192 8292 #f)
(8294 8305 #f)
(8308 8334 #f)
(8336 8348 #t)
(8352 8381 #f)
(8400 8432 #f)
(8448 8585 #f)
(8592 9210 #f)
(9216 9254 #t)
(9280 9290 #t)
(9312 11123 #f)
(11126 11157 #t)
(11160 11193 #t)
(11197 11208 #t)
(11210 11217 #t)
(11264 11310 #t)
(11312 11358 #t)
(11360 11507 #f)
(11513 11557 #f)
(11559 11559 #t)
(11565 11565 #t)
(11568 11623 #t)
(11631 11632 #f)
(11647 11670 #f)
(11680 11686 #t)
(11688 11694 #t)
(11696 11702 #t)
(11704 11710 #t)
(11712 11718 #t)
(11720 11726 #t)
(11728 11734 #t)
(11736 11742 #t)
(11744 11842 #f)
(11904 11929 #t)
(11931 12019 #f)
(12032 12245 #t)
(12272 12283 #t)
(12288 12351 #f)
(12353 12438 #f)
(12441 12543 #f)
(12549 12589 #t)
(12593 12686 #t)
(12688 12730 #f)
(12736 12771 #t)
(12784 12830 #f)
(12832 13054 #f)
(13056 19893 #f)
(19904 40908 #f)
(40960 42124 #f)
(42128 42182 #t)
(42192 42539 #f)
(42560 42653 #f)
(42655 42743 #f)
(42752 42894 #f)
(42896 42925 #f)
(42928 42929 #f)
(42999 43051 #f)
(43056 43065 #f)
(43072 43127 #f)
(43136 43204 #f)
(43214 43225 #f)
(43232 43259 #f)
(43264 43347 #f)
(43359 43388 #f)
(43392 43469 #f)
(43471 43481 #f)
(43486 43518 #f)
(43520 43574 #f)
(43584 43597 #f)
(43600 43609 #t)
(43612 43714 #f)
(43739 43766 #f)
(43777 43782 #t)
(43785 43790 #t)
(43793 43798 #t)
(43808 43814 #t)
(43816 43822 #t)
(43824 43871 #f)
(43876 43877 #t)
(43968 44013 #f)
(44016 44025 #t)
(44032 55203 #t)
(55216 55238 #t)
(55243 55291 #t)
(57344 64109 #f)
(64112 64217 #t)
(64256 64262 #t)
(64275 64279 #t)
(64285 64310 #f)
(64312 64316 #t)
(64318 64318 #t)
(64320 64321 #t)
(64323 64324 #t)
(64326 64449 #f)
(64467 64831 #f)
(64848 64911 #t)
(64914 64967 #t)
(65008 65021 #f)
(65024 65049 #f)
(65056 65069 #f)
(65072 65106 #f)
(65108 65126 #f)
(65128 65131 #f)
(65136 65140 #f)
(65142 65276 #t)
(65279 65279 #t)
(65281 65470 #f)
(65474 65479 #t)
(65482 65487 #t)
(65490 65495 #t)
(65498 65500 #t)
(65504 65510 #f)
(65512 65518 #f)
(65529 65533 #f)
(65536 65547 #t)
(65549 65574 #t)
(65576 65594 #t)
(65596 65597 #t)
(65599 65613 #t)
(65616 65629 #t)
(65664 65786 #t)
(65792 65794 #t)
(65799 65843 #t)
(65847 65932 #f)
(65936 65947 #t)
(65952 65952 #t)
(66000 66045 #f)
(66176 66204 #t)
(66208 66256 #t)
(66272 66299 #f)
(66304 66339 #f)
(66352 66378 #f)
(66384 66426 #f)
(66432 66461 #t)
(66463 66499 #f)
(66504 66517 #f)
(66560 66717 #f)
(66720 66729 #t)
(66816 66855 #t)
(66864 66915 #t)
(66927 66927 #t)
(67072 67382 #t)
(67392 67413 #t)
(67424 67431 #t)
(67584 67589 #t)
(67592 67592 #t)
(67594 67637 #t)
(67639 67640 #t)
(67644 67644 #t)
(67647 67669 #t)
(67671 67742 #f)
(67751 67759 #t)
(67840 67867 #f)
(67871 67897 #f)
(67903 67903 #t)
(67968 68023 #t)
(68030 68031 #t)
(68096 68099 #f)
(68101 68102 #t)
(68108 68115 #f)
(68117 68119 #t)
(68121 68147 #t)
(68152 68154 #f)
(68159 68167 #f)
(68176 68184 #t)
(68192 68255 #f)
(68288 68326 #f)
(68331 68342 #f)
(68352 68405 #t)
(68409 68437 #f)
(68440 68466 #f)
(68472 68497 #f)
(68505 68508 #t)
(68521 68527 #t)
(68608 68680 #t)
(69216 69246 #f)
(69632 69709 #f)
(69714 69743 #f)
(69759 69825 #f)
(69840 69864 #t)
(69872 69881 #t)
(69888 69940 #f)
(69942 69955 #f)
(69968 70006 #f)
(70016 70088 #f)
(70093 70093 #t)
(70096 70106 #f)
(70113 70132 #t)
(70144 70161 #t)
(70163 70205 #f)
(70320 70378 #f)
(70384 70393 #t)
(70401 70403 #f)
(70405 70412 #t)
(70415 70416 #t)
(70419 70440 #t)
(70442 70448 #t)
(70450 70451 #t)
(70453 70457 #t)
(70460 70468 #f)
(70471 70472 #t)
(70475 70477 #f)
(70487 70487 #t)
(70493 70499 #f)
(70502 70508 #t)
(70512 70516 #t)
(70784 70855 #f)
(70864 70873 #t)
(71040 71093 #f)
(71096 71113 #f)
(71168 71236 #f)
(71248 71257 #t)
(71296 71351 #f)
(71360 71369 #t)
(71840 71922 #f)
(71935 71935 #t)
(72384 72440 #t)
(73728 74648 #t)
(74752 74862 #t)
(74864 74868 #t)
(77824 78894 #t)
(92160 92728 #t)
(92736 92766 #t)
(92768 92777 #t)
(92782 92783 #t)
(92880 92909 #t)
(92912 92917 #f)
(92928 92997 #f)
(93008 93017 #t)
(93019 93025 #t)
(93027 93047 #t)
(93053 93071 #t)
(93952 94020 #t)
(94032 94078 #f)
(94095 94111 #f)
(110592 110593 #t)
(113664 113770 #t)
(113776 113788 #t)
(113792 113800 #t)
(113808 113817 #t)
(113820 113827 #f)
(118784 119029 #t)
(119040 119078 #t)
(119081 119261 #f)
(119296 119365 #f)
(119552 119638 #t)
(119648 119665 #t)
(119808 119892 #f)
(119894 119964 #f)
(119966 119967 #t)
(119970 119970 #t)
(119973 119974 #t)
(119977 119980 #t)
(119982 119993 #f)
(119995 119995 #t)
(119997 120003 #t)
(120005 120069 #f)
(120071 120074 #t)
(120077 120084 #t)
(120086 120092 #t)
(120094 120121 #f)
(120123 120126 #t)
(120128 120132 #t)
(120134 120134 #t)
(120138 120144 #t)
(120146 120485 #f)
(120488 120779 #f)
(120782 120831 #t)
(124928 125124 #t)
(125127 125142 #f)
(126464 126467 #t)
(126469 126495 #t)
(126497 126498 #t)
(126500 126500 #t)
(126503 126503 #t)
(126505 126514 #t)
(126516 126519 #t)
(126521 126521 #t)
(126523 126523 #t)
(126530 126530 #t)
(126535 126535 #t)
(126537 126537 #t)
(126539 126539 #t)
(126541 126543 #t)
(126545 126546 #t)
(126548 126548 #t)
(126551 126551 #t)
(126553 126553 #t)
(126555 126555 #t)
(126557 126557 #t)
(126559 126559 #t)
(126561 126562 #t)
(126564 126564 #t)
(126567 126570 #t)
(126572 126578 #t)
(126580 126583 #t)
(126585 126588 #t)
(126590 126590 #t)
(126592 126601 #t)
(126603 126619 #t)
(126625 126627 #t)
(126629 126633 #t)
(126635 126651 #t)
(126704 126705 #t)
(126976 127019 #t)
(127024 127123 #t)
(127136 127150 #t)
(127153 127167 #t)
(127169 127183 #t)
(127185 127221 #t)
(127232 127244 #f)
(127248 127278 #t)
(127280 127339 #f)
(127344 127386 #f)
(127462 127490 #f)
(127504 127546 #t)
(127552 127560 #t)
(127568 127569 #t)
(127744 127788 #t)
(127792 127869 #t)
(127872 127950 #t)
(127956 127991 #t)
(128000 128254 #t)
(128256 128330 #t)
(128336 128377 #t)
(128379 128419 #t)
(128421 128578 #t)
(128581 128719 #t)
(128736 128748 #t)
(128752 128755 #t)
(128768 128883 #t)
(128896 128980 #t)
(129024 129035 #t)
(129040 129095 #t)
(129104 129113 #t)
(129120 129159 #t)
(129168 129197 #t)
(131072 173782 #t)
(173824 177972 #t)
(177984 178205 #t)
(194560 195101 #t)
(917505 917505 #t)
(917536 917631 #t)
(917760 917999 #t)
(983040 1048573 #t)
(1048576 1114109 #t)))