svn: r413
This commit is contained in:
Matthew Flatt 2005-07-21 13:41:00 +00:00
parent beb196216e
commit 909ad1156f
30 changed files with 6050 additions and 4760 deletions

View File

@ -3,7 +3,7 @@
(all-except (lib "file.ss" "dynext") append-c-suffix)
(prefix dynext: (lib "link.ss" "dynext"))
(lib "file.ss")
(lib "13.ss" "srfi"))
(lib "string.ss" "srfi" "13"))
(provide make-gl-info)

View File

@ -145,9 +145,6 @@
(define-for-syntax gl-regex2 (regexp "(^GLU\\-)|(^GL\\-)"))
(define-for-syntax _-regex (regexp "_"))
(define-for-syntax (string-downcase s)
(list->string (map char-downcase (string->list s))))
(define-for-syntax (translate-cname name)
(string->symbol
(string-downcase

View File

@ -1,5 +1,10 @@
;; module loader for SRFI-13
(module |13| mzscheme
(require (lib "string.ss" "srfi" "13"))
(provide (all-from (lib "string.ss" "srfi" "13"))))
(provide (all-from-except (lib "string.ss" "srfi" "13")
s:string-upcase s:string-downcase s:string-titlecase)
(rename s:string-upcase string-upcase)
(rename s:string-downcase string-downcase)
(rename s:string-titlecase string-titlecase)))

View File

@ -104,7 +104,7 @@
string-compare string-compare-ci
string= string< string> string<= string>= string<>
string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>
string-downcase string-upcase string-titlecase
s:string-downcase s:string-upcase s:string-titlecase
string-downcase! string-upcase! string-titlecase!
string-take string-take-right
string-drop string-drop-right
@ -965,7 +965,7 @@
;; Capitalize every contiguous alpha sequence: capitalise
;; first char, lowercase rest.
(define (string-upcase s . maybe-start+end)
(define (s:string-upcase s . maybe-start+end)
(let-string-start+end (start end) 'string-upcase s maybe-start+end
(%string-map char-upcase s start end)))
@ -973,7 +973,7 @@
(let-string-start+end (start end) 'string-upcase! s maybe-start+end
(%string-map! char-upcase s start end)))
(define (string-downcase s . maybe-start+end)
(define (s:string-downcase s . maybe-start+end)
(let-string-start+end (start end) 'string-downcase s maybe-start+end
(%string-map char-downcase s start end)))
@ -1000,7 +1000,7 @@
(let-string-start+end (start end) 'string-titlecase! s maybe-start+end
(%string-titlecase! s start end)))
(define (string-titlecase s . maybe-start+end)
(define (s:string-titlecase s . maybe-start+end)
(let-string-start+end (start end) 'string-titlecase! s maybe-start+end
(let ((ans (substring s start end)))
(%string-titlecase! ans 0 (- end start))

View File

@ -39,10 +39,10 @@ or,
NOTE on SRFIs with name conflicts
---------------------------------
Certain SRFIs (currently SRFIs 1, 19, 43 and 45) provide names which
conflict with names provided by the 'mzscheme' language. Attempting
to require one of these SRFIs in a module written in the 'mzscheme'
language will result in an error.
Certain SRFIs (currently SRFIs 1, 13, 19, 43 and 45) provide names
which conflict with names provided by the 'mzscheme' language.
Attempting to require one of these SRFIs in a module written in the
'mzscheme' language will result in an error.
To address this problem, the PLT implementations of these SRFIs
provide a different module which renames the problematic exports to

View File

@ -111,7 +111,7 @@
;; Test escape printing:
(parameterize ([current-locale #f])
(test "\"\\a\\b\\t\\n\\f\\r\\e\\v\\\\\\\"A \\u0005A\\u000FP\\u000FP\u00DDD\u00FF7\\u00011\\U012345\""
(test "\"\\a\\b\\t\\n\\f\\r\\e\\v\\\\\\\"A \\u0005A\\u000FP\\u000FP\u00DDD\u00FF7\\u00011\\U00012345\""
'output-escapes
(let ([p (open-output-string)])
(write "\a\b\t\n\f\r\e\v\\\"\101\40\5A\xFP\xfP\xdDD\3777\0011\U12345" p)

View File

@ -102,13 +102,18 @@
(test (integer->char #x10000) readstr "#\\U10000")
(test (integer->char #x100000) readstr "#\\U100000")
(test (integer->char #x10FFFF) readstr "#\\U10FFFF")
(test 0 readstr "#\\U100000000")
(test (integer->char #x10FFFF) readstr "#\\U0010FFFF")
(test (integer->char #x0) readstr "#\\U00000000")
(test 1 readstr "#\\U000000011")
(err/rt-test (readstr "#\\uD800") exn:fail:read?)
(err/rt-test (readstr "#\\uD900") exn:fail:read?)
(err/rt-test (readstr "#\\UDFFF") exn:fail:read?)
(err/rt-test (readstr "#\\UFFFFFF") exn:fail:read?)
(err/rt-test (readstr "#\\U110000") exn:fail:read?)
(err/rt-test (readstr "#\\U1000000") exn:fail:read?)
(err/rt-test (readstr "#\\U10000000") exn:fail:read?)
(err/rt-test (readstr "#\\UFFFFFFFF") exn:fail:read?)
(define (astring n) (string (integer->char n)))

View File

@ -1158,7 +1158,10 @@
#\u00FC
#\u00FD
#\u00FE
#\u00FF))
#\u00FF
;; New definition of lower case:
#\u00AA
#\u00BA))
;; No upper case in latin-1
(check-all-latin-1
@ -1167,7 +1170,9 @@
(> (char->integer (char-upcase x)) 255))))
'(#\u00B5
#\u00DF
#\u00FF))
#\u00FF
#\u00AA
#\u00BA))
;; Latin-1 uppercase:
(check-all-latin-1
@ -1235,8 +1240,7 @@
(and (char-alphabetic? c)
(not (char-upper-case? c))
(not (char-lower-case? c))))
'(#\u00AA
#\u00BA))
'())
;; Complete titlecase list:
(check-all-unicode
@ -1295,7 +1299,7 @@
#\u2008
#\u2009
#\u200A
#\u200B
;; #\u200B --- in Unicode 4.0, this code point changed from Zs to Cf
#\u2028
#\u2029
#\u202F
@ -1386,7 +1390,7 @@
#\u2008
#\u2009
#\u200A
#\u200B
;; #\u200B --- see note above
#\u202F
#\u3000
;; Post SRFI-14?
@ -1435,4 +1439,46 @@
(with-handlers ([exn:fail:contract? void]) (bytes->string/locale #"xxx"))
(with-handlers ([exn:fail:contract? void]) (string->bytes/locale "xxx")))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; String-upcase, etc.:
(test "ABC!" string-upcase "abc!")
(test "Z\u7238Z" string-upcase "z\u7238z")
(test "STRASSE" string-upcase "Stra\xDFe")
(test "\u039A\u0391\u039F\u03A3" string-upcase "\u039A\u03b1\u03BF\u03C2")
(test "\u039A\u0391\u039F\u03A3" string-upcase "\u039A\u03b1\u03BF\u03C3")
(test "abc!" string-downcase "aBC!")
(test "z\u7238z" string-downcase "z\u7238Z")
(test "stra\xDFe" string-downcase "Stra\xDFe")
(test "\u03BA\u03b1\u03BF\u03C2" string-downcase "\u039A\u0391\u039F\u03A3")
(test "\u03C3" string-downcase "\u03A3")
(test "x\u03C2" string-downcase "X\u03A3")
(test "\u03BA\u03b1\u03BF\u03C3\u03C2" string-downcase "\u039A\u0391\u039F\u03A3\u03A3")
(test "\u03BA\u03b1\u03BF\u03C2 x" string-downcase "\u039A\u0391\u039F\u03A3 x")
(test "abc!" string-foldcase "aBC!")
(test "z\u7238z" string-foldcase "z\u7238Z")
(test "strasse" string-foldcase "Stra\xDFe")
(test "\u03BA\u03b1\u03BF\u03C3" string-foldcase "\u039A\u0391\u039F\u03A3")
(test "\u03C3" string-foldcase "\u03A3")
(test "x\u03C3" string-foldcase "X\u03A3")
(test "\u03BA\u03b1\u03BF\u03C3\u03C3" string-foldcase "\u039A\u0391\u039F\u03A3\u03A3")
(test "\u03BA\u03b1\u03BF\u03C3 x" string-foldcase "\u039A\u0391\u039F\u03A3 x")
(test "Abc!" string-titlecase "aBC!")
(test "Abc Two" string-titlecase "aBC twO")
(test "Abc!Two" string-titlecase "aBC!twO")
(test "Z\u7238Z" string-titlecase "z\u7238Z")
(test "Stra\xDFe" string-titlecase "stra\xDFe")
(test "Stra Sse" string-titlecase "stra \xDFe")
(test "\u039A\u03b1\u03BF\u03C2" string-titlecase "\u039A\u0391\u039F\u03A3")
(test "\u039A\u03b1\u03BF \u03A3x" string-titlecase "\u039A\u0391\u039F \u03A3x")
(test "\u03A3" string-titlecase "\u03A3")
(test "X\u03C2" string-titlecase "x\u03A3")
(test "\u039A\u03b1\u03BF\u03C3\u03C2" string-titlecase "\u039A\u0391\u039F\u03A3\u03A3")
(test "\u039A\u03b1\u03BF\u03C2 X" string-titlecase "\u039A\u0391\u039F\u03A3 x")
(report-errs)

View File

@ -4,7 +4,7 @@
(lib "port.ss")
(lib "pretty.ss")
(lib "xml.ss" "xml")
(lib "13.ss" "srfi")
(lib "string.ss" "srfi" "13")
"connection-manager.ss")
;; **************************************************

View File

@ -96,9 +96,11 @@ scheme_undefined
scheme_tail_call_waiting
scheme_multiple_values
scheme_uchar_table
scheme_uchar_cases_table
scheme_uchar_ups
scheme_uchar_downs
scheme_uchar_titles
scheme_uchar_folds
scheme_eval
scheme_eval_multi
scheme_eval_compiled
@ -211,6 +213,7 @@ scheme_make_immutable_sized_char_string
scheme_make_char_string_without_copying
scheme_alloc_char_string
scheme_append_char_string
scheme_string_recase
scheme_make_vector
scheme_make_integer_value
scheme_make_integer_value_from_unsigned

View File

@ -96,9 +96,11 @@ scheme_undefined
scheme_tail_call_waiting
scheme_multiple_values
scheme_uchar_table
scheme_uchar_cases_table
scheme_uchar_ups
scheme_uchar_downs
scheme_uchar_titles
scheme_uchar_folds
scheme_eval
scheme_eval_multi
scheme_eval_compiled
@ -218,6 +220,7 @@ scheme_make_immutable_sized_char_string
scheme_make_char_string_without_copying
scheme_alloc_char_string
scheme_append_char_string
scheme_string_recase
scheme_make_vector
scheme_make_integer_value
scheme_make_integer_value_from_unsigned

View File

@ -98,9 +98,11 @@ EXPORTS
scheme_tail_call_waiting
scheme_multiple_values
scheme_uchar_table
scheme_uchar_cases_table
scheme_uchar_ups
scheme_uchar_downs
scheme_uchar_titles
scheme_uchar_folds
scheme_eval
scheme_eval_multi
scheme_eval_compiled
@ -203,6 +205,7 @@ EXPORTS
scheme_make_char_string_without_copying
scheme_alloc_char_string
scheme_append_char_string
scheme_string_recase
scheme_make_vector
scheme_make_integer_value
scheme_make_integer_value_from_unsigned

View File

@ -520,24 +520,29 @@ typedef void (*Scheme_Type_Printer)(Scheme_Object *v, int for_display, Scheme_Pr
#define scheme_make_character(ch) ((((mzchar)ch) < 256) ? scheme_char_constants[(unsigned char)(ch)] : scheme_make_char(ch))
#define scheme_make_ascii_character(ch) scheme_char_constants[(unsigned char)(ch)];
#define scheme_uchar_find(x) (scheme_uchar_table[x >> 21][(x >> 8) & 0x1FFF][x & 0xFF])
#define scheme_uchar_find(table, x) (table[(x >> 8) & 0x1FFF][x & 0xFF])
#define scheme_isblank(x) ((scheme_uchar_find(x)) & 0x1)
#define scheme_issymbol(x) ((scheme_uchar_find(x)) & 0x2)
#define scheme_ispunc(x) ((scheme_uchar_find(x)) & 0x4)
#define scheme_iscontrol(x) ((scheme_uchar_find(x)) & 0x8)
#define scheme_isspace(x) ((scheme_uchar_find(x)) & 0x10)
#define scheme_isxdigit(x) ((scheme_uchar_find(x)) & 0x20)
#define scheme_isdigit(x) ((scheme_uchar_find(x)) & 0x40)
#define scheme_isalpha(x) ((scheme_uchar_find(x)) & 0x80)
#define scheme_istitle(x) ((scheme_uchar_find(x)) & 0x100)
#define scheme_isupper(x) ((scheme_uchar_find(x)) & 0x200)
#define scheme_islower(x) ((scheme_uchar_find(x)) & 0x400)
#define scheme_isgraphic(x) ((scheme_uchar_find(x)) & 0x800)
#define scheme_isblank(x) ((scheme_uchar_find(scheme_uchar_table, x)) & 0x1)
#define scheme_issymbol(x) ((scheme_uchar_find(scheme_uchar_table, x)) & 0x2)
#define scheme_ispunc(x) ((scheme_uchar_find(scheme_uchar_table, x)) & 0x4)
#define scheme_iscontrol(x) ((scheme_uchar_find(scheme_uchar_table, x)) & 0x8)
#define scheme_isspace(x) ((scheme_uchar_find(scheme_uchar_table, x)) & 0x10)
#define scheme_isxdigit(x) ((scheme_uchar_find(scheme_uchar_table, x)) & 0x20)
#define scheme_isdigit(x) ((scheme_uchar_find(scheme_uchar_table, x)) & 0x40)
#define scheme_isalpha(x) ((scheme_uchar_find(scheme_uchar_table, x)) & 0x80)
#define scheme_istitle(x) ((scheme_uchar_find(scheme_uchar_table, x)) & 0x100)
#define scheme_isupper(x) ((scheme_uchar_find(scheme_uchar_table, x)) & 0x200)
#define scheme_islower(x) ((scheme_uchar_find(scheme_uchar_table, x)) & 0x400)
#define scheme_isgraphic(x) ((scheme_uchar_find(scheme_uchar_table, x)) & 0x800)
#define scheme_iscaseignorable(x) ((scheme_uchar_find(scheme_uchar_table, x)) & 0x1000)
#define scheme_isspecialcasing(x) ((scheme_uchar_find(scheme_uchar_table, x)) & 0x2000)
#define scheme_toupper(x) (x + scheme_uchar_ups[(((scheme_uchar_find(x)) & 0x3F000) >> 12)])
#define scheme_tolower(x) (x + scheme_uchar_downs[(((scheme_uchar_find(x)) & 0xFC0000) >> 18)])
#define scheme_totitle(x) (x + scheme_uchar_titles[(((scheme_uchar_find(x)) & 0x3F000000) >> 24)])
#define scheme_iscased(x) ((scheme_uchar_find(scheme_uchar_table, x)) & 0x700)
#define scheme_toupper(x) (x + scheme_uchar_ups[scheme_uchar_find(scheme_uchar_cases_table, x)])
#define scheme_tolower(x) (x + scheme_uchar_downs[scheme_uchar_find(scheme_uchar_cases_table, x)])
#define scheme_totitle(x) (x + scheme_uchar_titles[scheme_uchar_find(scheme_uchar_cases_table, x)])
#define scheme_tofold(x) (x + scheme_uchar_folds[scheme_uchar_find(scheme_uchar_cases_table, x)])
/*========================================================================*/
/* procedure values */

View File

@ -285,7 +285,8 @@ regexp.@LTO@: $(srcdir)/schpriv.h $(srcdir)/schexn.h $(SCONFIG) $(srcdir)/../in
setjmpup.@LTO@: $(srcdir)/schpriv.h $(srcdir)/schexn.h $(SCONFIG) $(srcdir)/../include/scheme.h \
$(srcdir)/../src/stypes.h $(srcdir)/schmach.h
string.@LTO@: $(srcdir)/schpriv.h $(srcdir)/schexn.h $(SCONFIG) $(srcdir)/../include/scheme.h \
$(srcdir)/../src/stypes.h $(srcdir)/schvers.h $(srcdir)/mzmark.c $(srcdir)/strops.inc
$(srcdir)/../src/stypes.h $(srcdir)/schvers.h $(srcdir)/mzmark.c $(srcdir)/strops.inc \
$(srcdir)/schustr.inc
struct.@LTO@: $(srcdir)/schpriv.h $(srcdir)/schexn.h $(SCONFIG) $(srcdir)/../include/scheme.h \
$(srcdir)/../src/stypes.h $(srcdir)/mzmark.c
stxobj.@LTO@: $(srcdir)/schpriv.h $(srcdir)/schexn.h $(SCONFIG) $(srcdir)/../include/scheme.h \

View File

@ -57,6 +57,7 @@ static Scheme_Object *integer_to_char (int argc, Scheme_Object *argv[]);
static Scheme_Object *char_upcase (int argc, Scheme_Object *argv[]);
static Scheme_Object *char_downcase (int argc, Scheme_Object *argv[]);
static Scheme_Object *char_titlecase (int argc, Scheme_Object *argv[]);
static Scheme_Object *char_foldcase (int argc, Scheme_Object *argv[]);
static Scheme_Object *char_utf8_length (int argc, Scheme_Object *argv[]);
static Scheme_Object *char_map_list (int argc, Scheme_Object *argv[]);
@ -223,6 +224,11 @@ void scheme_init_char (Scheme_Env *env)
"char-titlecase",
1, 1, 1),
env);
scheme_add_global_constant("char-foldcase",
scheme_make_folding_prim(char_foldcase,
"char-foldcase",
1, 1, 1),
env);
scheme_add_global_constant("char-utf-8-length",
scheme_make_folding_prim(char_utf8_length,
@ -269,38 +275,38 @@ char_p (int argc, Scheme_Object *argv[])
return (SCHEME_CHARP(argv[0]) ? scheme_true : scheme_false);
}
#define charSTD_DOWNCASE(nl) nl;
#define charNO_DOWNCASE(nl) /* empty */
#define charSTD_FOLDCASE(nl) nl;
#define charNO_FOLDCASE(nl) /* empty */
#define GEN_CHAR_COMP(func_name, scheme_name, comp, DOWNCASE) \
#define GEN_CHAR_COMP(func_name, scheme_name, comp, FOLDCASE) \
static Scheme_Object *func_name(int argc, Scheme_Object *argv[]) \
{ int c, prev, i; Scheme_Object *rv = scheme_true; \
if (!SCHEME_CHARP(argv[0])) \
scheme_wrong_type(#scheme_name, "character", 0, argc, argv); \
prev = SCHEME_CHAR_VAL(argv[0]); \
DOWNCASE(prev = scheme_tolower(prev)) \
FOLDCASE(prev = scheme_tofold(prev)) \
for (i = 1; i < argc; i++) { \
if (!SCHEME_CHARP(argv[i])) \
scheme_wrong_type(#scheme_name, "character", i, argc, argv); \
c = SCHEME_CHAR_VAL(argv[i]); \
DOWNCASE(c = scheme_tolower(c)) \
FOLDCASE(c = scheme_tofold(c)) \
if (!(prev comp c)) rv = scheme_false; \
prev = c; \
} \
return rv; \
}
GEN_CHAR_COMP(char_eq, char=?, ==, charNO_DOWNCASE)
GEN_CHAR_COMP(char_lt, char<?, <, charNO_DOWNCASE)
GEN_CHAR_COMP(char_gt, char>?, >, charNO_DOWNCASE)
GEN_CHAR_COMP(char_lt_eq, char<=?, <=, charNO_DOWNCASE)
GEN_CHAR_COMP(char_gt_eq, char>=?, >=, charNO_DOWNCASE)
GEN_CHAR_COMP(char_eq, char=?, ==, charNO_FOLDCASE)
GEN_CHAR_COMP(char_lt, char<?, <, charNO_FOLDCASE)
GEN_CHAR_COMP(char_gt, char>?, >, charNO_FOLDCASE)
GEN_CHAR_COMP(char_lt_eq, char<=?, <=, charNO_FOLDCASE)
GEN_CHAR_COMP(char_gt_eq, char>=?, >=, charNO_FOLDCASE)
GEN_CHAR_COMP(char_eq_ci, char-ci=?, ==, charSTD_DOWNCASE)
GEN_CHAR_COMP(char_lt_ci, char-ci<?, <, charSTD_DOWNCASE)
GEN_CHAR_COMP(char_gt_ci, char-ci>?, >, charSTD_DOWNCASE)
GEN_CHAR_COMP(char_lt_eq_ci, char-ci<=?, <=, charSTD_DOWNCASE)
GEN_CHAR_COMP(char_gt_eq_ci, char-ci>=?, >=, charSTD_DOWNCASE)
GEN_CHAR_COMP(char_eq_ci, char-ci=?, ==, charSTD_FOLDCASE)
GEN_CHAR_COMP(char_lt_ci, char-ci<?, <, charSTD_FOLDCASE)
GEN_CHAR_COMP(char_gt_ci, char-ci>?, >, charSTD_FOLDCASE)
GEN_CHAR_COMP(char_lt_eq_ci, char-ci<=?, <=, charSTD_FOLDCASE)
GEN_CHAR_COMP(char_gt_eq_ci, char-ci>=?, >=, charSTD_FOLDCASE)
#define GEN_CHAR_TEST(func_name, scheme_name, pred) \
static Scheme_Object *func_name (int argc, Scheme_Object *argv[]) \
@ -378,6 +384,7 @@ static Scheme_Object *func_name (int argc, Scheme_Object *argv[]) \
GEN_RECASE(char_upcase, "char-upcase", scheme_toupper)
GEN_RECASE(char_downcase, "char-downcase", scheme_tolower)
GEN_RECASE(char_titlecase, "char-titlecase", scheme_totitle)
GEN_RECASE(char_foldcase, "char-foldcase", scheme_tofold)
static Scheme_Object *char_utf8_length (int argc, Scheme_Object *argv[])
{

File diff suppressed because it is too large Load Diff

View File

@ -2781,7 +2781,7 @@ namespace_variable_value(int argc, Scheme_Object *argv[])
if (!SCHEME_SYMBOLP(argv[0]))
scheme_wrong_type("namespace-variable-value", "symbol", 0, argc, argv);
use_map = ((argc > 0) ? SCHEME_TRUEP(argv[1]) : 1);
use_map = ((argc > 1) ? SCHEME_TRUEP(argv[1]) : 1);
if ((argc > 2) && SCHEME_TRUEP(argv[2])
&& !scheme_check_proc_arity(NULL, 0, 2, argc, argv))
scheme_wrong_type("namespace-variable-value", "procedure (arity 0) or #f", 1, argc, argv);

View File

@ -1,7 +1,9 @@
;; This script parses UnicodeData.txt (the standard Unicode database,
;; available from the web), and produces schuchar.inc, which is
;; used by scheme_isalpha, etc., and thus `char-alphabetic?', etc.
;; available from the web) and other such files, and it produces
;; "schuchar.inc" and "schustr.inc". The former is used by
;; scheme_isalpha, etc., and thus `char-alphabetic?', etc. The latter
;; is used for string operations.
;; Run as
;; mzscheme -r mk-uchar.ss
@ -23,9 +25,7 @@
punc-cats
sym-cats))
(define ups (cons (make-hash-table 'equal) (box 0)))
(define downs (cons (make-hash-table 'equal) (box 0)))
(define titles (cons (make-hash-table 'equal) (box 0)))
(define cases (cons (make-hash-table 'equal) (box 0)))
(define (indirect t v)
(let ([r (hash-table-get (car t) v (lambda () #f))])
@ -33,24 +33,23 @@
(let ([r (unbox (cdr t))])
(set-box! (cdr t) (add1 r))
(hash-table-put! (car t) v r)
(when (r . > . 63)
(when (r . > . 255)
(error "too many indirects"))
r))))
(define (combine up down title . l)
(define (combine . l)
;; The scheme_is...() macros in scheme.h must match
;; the bit layout produced here
(bitwise-ior
(arithmetic-shift (indirect ups up) 12)
(arithmetic-shift (indirect downs down) 18)
(arithmetic-shift (indirect titles title) 24)
(let loop ([l l][v 0])
(if (null? l)
v
(loop (cdr l) (bitwise-ior (arithmetic-shift v 1)
(if (car l)
1
0)))))))
(let loop ([l l][v 0])
(if (null? l)
v
(loop (cdr l) (bitwise-ior (arithmetic-shift v 1)
(if (car l)
1
0))))))
(define (combine-case up down title fold)
(indirect cases (list up down title fold)))
(define hexes (map char->integer (string->list "0123456789abcdefABCDEF")))
@ -58,9 +57,7 @@
;; the macros for accessing the table (in scheme.h) need to
;; be updated accordingly.
;; In practice, it's unlikely that anything will ever work
;; much better than 8. (At the time this was implemented,
;; 9 produced a table 10% smaller, but I left it at 8
;; because it feels more intuitively correct.)
;; much better than 8.
(define low-bits 8)
(define low (sub1 (expt 2 low-bits)))
@ -68,19 +65,22 @@
(define hi (arithmetic-shift (sub1 hi-count) low-bits))
(define top (make-vector hi-count #f))
(define top2 (make-vector hi-count #f))
(define range-bottom 0)
(define range-top -1)
(define range-v -1)
(define range-v2 -1)
(define ranges null)
(define ccount 0)
(define (map1 c v)
(define (map1 c v v2)
(set! ccount (add1 ccount))
(if (= c (add1 range-top))
(begin
(unless (= v range-v)
(unless (and (= v range-v)
(= v2 range-v2))
(set! range-v -1))
(set! range-top c))
(begin
@ -99,20 +99,104 @@
ranges))
(set! range-bottom c)
(set! range-top c)
(set! range-v v)))
(set! range-v v)
(set! range-v2 v2)))
(let ([top-index (arithmetic-shift c (- low-bits))])
(let ([vec (vector-ref top top-index)])
(let ([vec (vector-ref top top-index)]
[vec2 (vector-ref top2 top-index)])
(unless vec
(vector-set! top top-index (make-vector (add1 low))))
(let ([vec (vector-ref top top-index)])
(vector-set! vec (bitwise-and c low) v)))))
(unless vec2
(vector-set! top2 top-index (make-vector (add1 low))))
(let ([vec (vector-ref top top-index)]
[vec2 (vector-ref top2 top-index)])
(vector-set! vec (bitwise-and c low) v)
(vector-set! vec2 (bitwise-and c low) v2)))))
(define (mapn c from v)
(define (mapn c from v v2)
(if (= c from)
(map1 c v)
(map1 c v v2)
(begin
(map1 from v)
(mapn c (add1 from) v))))
(map1 from v v2)
(mapn c (add1 from) v v2))))
(define midletters
(call-with-input-file "WordBreakProperty.txt"
(lambda (i)
(let loop ()
(let ([re (regexp-match #rx"\n([0-9A-F]+) *; *MidLetter" i)])
(if re
(cons (string->number (bytes->string/latin-1 (cadr re)) 16)
(loop))
null))))))
(define (string->codes s)
(let ([m (regexp-match #rx"^[^0-9A-F]*([0-9A-F]+)" s)])
(if m
(cons (string->number (cadr m) 16)
(string->codes (substring s (string-length (car m)))))
null)))
;; This code assumes that Final_Sigma is the only condition that we care about:
(define case-foldings (make-hash-table 'equal))
(define special-case-foldings (make-hash-table 'equal))
(call-with-input-file "CaseFolding.txt"
(lambda (i)
(let loop ()
(let ([l (read-line i)])
(unless (eof-object? l)
(let ([m (regexp-match #rx"^([0-9A-F]+); *([CSF]) *;([^;]*)" l)])
(when m
(let ([code (string->number (cadr m) 16)]
[variant (list-ref m 2)]
[folded (string->codes (list-ref m 3))])
(if (string=? variant "F")
(hash-table-put! special-case-foldings code folded)
(hash-table-put! case-foldings code (car folded))))))
(loop))))))
;; This code assumes that Final_Sigma is the only condition that we care about:
(define special-casings (make-hash-table 'equal))
(define-struct special-casing (lower upper title folding final-sigma?))
(call-with-input-file "SpecialCasing.txt"
(lambda (i)
(let loop ()
(let ([l (read-line i)])
(unless (eof-object? l)
(let ([m (regexp-match #rx"^([0-9A-F]+);([^;]*);([^;]*);([^;]*);([^;]*)" l)])
(when (and m
(regexp-match #rx"^(?:(?: *Final_Sigma *)|(?: *))(?:$|[;#].*)" (list-ref m 5)))
(let ([code (string->number (cadr m) 16)]
[lower (string->codes (list-ref m 2))]
[upper (string->codes (list-ref m 4))]
[title (string->codes (list-ref m 3))]
[final-sigma? (and (regexp-match #rx"Final_Sigma" (list-ref m 5)) #t)])
(let ([folding (list (hash-table-get case-foldings code (lambda () code)))])
(hash-table-put! special-casings code (make-special-casing lower upper title folding final-sigma?))))))
(loop))))))
(define lower-case (make-hash-table 'equal))
(define upper-case (make-hash-table 'equal))
(with-input-from-file "DerivedCoreProperties.txt"
(lambda ()
(let loop ()
(let ([l (read-line)])
(unless (eof-object? l)
(let ([m (regexp-match #rx"^([0-9A-F.]+) *; (Lower|Upper)case" l)])
(when m
(let* ([start (string->number (car (regexp-match #rx"^[0-9A-F]+" (car m))) 16)]
[end (let ([m (regexp-match #rx"^[0-9A-F]+[.][.]([0-9A-F]+)" (car m))])
(if m
(string->number (cadr m) 16)
start))]
[t (if (string=? (caddr m) "Lower") lower-case upper-case)])
(let loop ([i start])
(hash-table-put! t i #t)
(unless (= i end)
(loop (add1 i)))))))
(loop))))))
(call-with-input-file "UnicodeData.txt"
(lambda (i)
@ -133,20 +217,27 @@
(if (regexp-match #rx", Last>" name)
(add1 prev-code)
code)
;; The booleans below are in most-siginficant-bit-first order
(combine
(if up (- up code) 0)
(if down (- down code) 0)
(if title (- title code) 0)
;; special-casing
(or (hash-table-get special-casings code (lambda () #f))
(hash-table-get special-case-foldings code (lambda () #f)))
;; case-ignoreable
(or (member code midletters)
(member cat '("Mn" "Me" "Cf" "Lm" "Sk")))
;; graphic
(member cat graphic-cats)
;; lowercase:
(hash-table-get lower-case code (lambda () #f))
#;
(and (not (<= #x2000 code #x2FFF))
(not down)
(or up
(regexp-match #rx"SMALL LETTER" name)
(regexp-match #rx"SMALL LIGATURE" name)))
;; uppercase;
(hash-table-get upper-case code (lambda () #f))
#;
(and (not (<= #x2000 code #x2FFF))
(not up)
(or down
@ -172,23 +263,50 @@
(member cat sym-cats)
;; blank
(or (string=? cat "Zs")
(= code #x9))))
(= code #x9)))
;; Cases
(combine-case
(if up (- up code) 0)
(if down (- down code) 0)
(if title (- title code) 0)
(let ([case-fold (hash-table-get case-foldings code (lambda () #f))])
(if case-fold (- case-fold code) 0))))
(loop code))))))))
(define vectors (make-hash-table 'equal))
(define vectors2 (make-hash-table 'equal))
(define pos 0)
(define pos2 0)
(current-output-port (open-output-file "schuchar.inc" 'truncate/replace))
(let loop ([i 0])
(unless (= i hi-count)
(let ([vec (vector-ref top i)])
(when vec
(unless (hash-table-get vectors vec (lambda () #f))
(set! pos (add1 pos))
(hash-table-put! vectors vec pos)))
(loop (add1 i)))))
(define (hash-vectors! top vectors get-pos set-pos!)
(let loop ([i 0])
(unless (= i hi-count)
(let ([vec (vector-ref top i)])
(when vec
(unless (hash-table-get vectors vec (lambda () #f))
(set-pos! (add1 (get-pos)))
(hash-table-put! vectors vec (get-pos))))
(loop (add1 i))))))
(hash-vectors! top vectors (lambda () pos) (lambda (v) (set! pos v)))
(hash-vectors! top2 vectors2 (lambda () pos2) (lambda (v) (set! pos2 v)))
;; copy folding special cases to the special-cases table, if not there already:
(hash-table-for-each special-case-foldings
(lambda (k v)
(let ([sc (hash-table-get special-casings k (lambda ()
(let ([sc (make-special-casing
(list k)
(list k)
(list k)
(list k)
#f)])
(hash-table-put! special-casings k sc)
sc)))])
(set-special-casing-folding! sc v))))
(define world-count (expt 2 10))
@ -197,19 +315,20 @@
(printf "/* Character count: ~a */~n" ccount)
(printf "/* Table size: ~a */~n~n"
(+ (* (add1 low)
(add1 (length (hash-table-map vectors cons))))
(* 2 hi-count)
world-count))
(* 2 (add1 (length (hash-table-map vectors cons)))))
(* (add1 low)
(* 1 (add1 (length (hash-table-map vectors2 cons)))))
(* 4 4 (unbox (cdr cases)))
(* 4 (* 2 hi-count))))
(printf "unsigned int **scheme_uchar_table[~a];~n~n" world-count)
(printf "static unsigned int *main_table[~a], *zero_table[~a];~n~n"
hi-count hi-count)
(printf "unsigned short *scheme_uchar_table[~a];~n" hi-count)
(printf "unsigned char *scheme_uchar_cases_table[~a];~n~n" hi-count)
(define print-row
(lambda (vec name)
(lambda (vec name pos hex?)
(printf " /* ~a */~n" name)
(let loop ([i 0])
(printf " ~a~a"
(printf (if hex? " 0x~x~a" " ~a~a")
(or (vector-ref vec i) "0")
(if (and (= name pos)
(= i low))
@ -219,22 +338,26 @@
(unless (= i low)
(loop (add1 i))))))
(printf "static unsigned int udata[] = {~n")
(define (print-table type suffix vectors pos hex?)
(printf "static unsigned ~a udata~a[] = {~n" type suffix)
(print-row (make-vector (add1 low) 0) 0 pos hex?)
(map (lambda (p)
(print-row (car p) (cdr p) pos hex?))
(quicksort
(hash-table-map vectors cons)
(lambda (a b) (< (cdr a) (cdr b)))))
(printf "};~n"))
(print-table "short" "" vectors pos #t)
(printf "\n")
(print-table "char" "_cases" vectors2 pos2 #f)
(print-row (make-vector (add1 low) 0) 0)
(map (lambda (p)
(print-row (car p) (cdr p)))
(quicksort
(hash-table-map vectors cons)
(lambda (a b) (< (cdr a) (cdr b)))))
(printf "};~n")
(define (print-shift t end name)
(printf "~n/* Case mapping size: ~a */~n" (hash-table-count (car cases)))
(define (print-shift t end select name)
(printf "~nint scheme_uchar_~a[] = {~n" name)
(for-each (lambda (p)
(printf " ~a~a"
(car p)
(select (car p))
(if (= (cdr p) (sub1 end))
""
","))
@ -244,9 +367,10 @@
(lambda (a b) (< (cdr a) (cdr b)))))
(printf " };~n"))
(print-shift (car ups) (unbox (cdr ups)) "ups")
(print-shift (car downs) (unbox (cdr downs)) "downs")
(print-shift (car titles) (unbox (cdr titles)) "titles")
(print-shift (car cases) (unbox (cdr cases)) car "ups")
(print-shift (car cases) (unbox (cdr cases)) cadr "downs")
(print-shift (car cases) (unbox (cdr cases)) caddr "titles")
(print-shift (car cases) (unbox (cdr cases)) cadddr "folds")
(set! ranges (cons (list range-bottom range-top (range-v . > . -1))
ranges))
@ -255,7 +379,7 @@
(printf "~n#define URANGE_VARIES 0x40000000~n")
(printf "static int mapped_uchar_ranges[] = {~n")
(for-each (lambda (r)
(printf "0x~x, 0x~x~a~a~n"
(printf " 0x~x, 0x~x~a~a~n"
(car r)
(cadr r)
(if (caddr r) "" " | URANGE_VARIES")
@ -267,36 +391,104 @@
(printf "~nstatic void init_uchar_table(void)~n{~n")
(printf " int i;~n~n")
(printf " scheme_uchar_table[0] = main_table;~n")
(printf " for (i = 1; i < ~a; i++) {~n" world-count)
(printf " scheme_uchar_table[i] = zero_table;~n")
(printf " }~n~n")
(printf " for (i = 0; i < ~a; i++) { ~n" hi-count)
(printf " main_table[i] = udata;~n")
(printf " zero_table[i] = udata;~n")
(printf " scheme_uchar_table[i] = udata;~n")
(printf " scheme_uchar_cases_table[i] = udata_cases;~n")
(printf " }~n")
(printf "~n")
(let loop ([i 0])
(unless (= i hi-count)
(let ([vec (vector-ref top i)])
(if vec
(let ([same-count (let loop ([j (add1 i)])
(if (equal? vec (vector-ref top j))
(loop (add1 j))
(- j i)))]
[vec-pos (* (add1 low) (hash-table-get vectors vec))])
(if (> same-count 4)
(begin
(printf " for (i = ~a; i < ~a; i++) {~n"
i (+ i same-count))
(printf " main_table[i] = udata + ~a;~n"
vec-pos)
(printf " }~n")
(loop (+ same-count i)))
(begin
(printf " main_table[~a] = udata + ~a;~n"
i
vec-pos)
(loop (add1 i)))))
(loop (add1 i))))))
(define (print-init top vectors suffix)
(let loop ([i 0])
(unless (= i hi-count)
(let ([vec (vector-ref top i)])
(if vec
(let ([same-count (let loop ([j (add1 i)])
(if (equal? vec (vector-ref top j))
(loop (add1 j))
(- j i)))]
[vec-pos (* (add1 low) (hash-table-get vectors vec))])
(if (> same-count 4)
(begin
(printf " for (i = ~a; i < ~a; i++) {~n"
i (+ i same-count))
(printf " scheme_uchar~a_table[i] = udata~a + ~a;~n"
suffix suffix
vec-pos)
(printf " }~n")
(loop (+ same-count i)))
(begin
(printf " scheme_uchar~a_table[~a] = udata~a + ~a;~n"
suffix
i
suffix
vec-pos)
(loop (add1 i)))))
(loop (add1 i)))))))
(print-init top vectors "")
(print-init top2 vectors2 "_cases")
(printf "}~n")
;; ----------------------------------------
(current-output-port (open-output-file "schustr.inc" 'truncate/replace))
(printf "/* Generated by mk-uchar.ss */~n~n")
(define specials null)
(define special-count 0)
(define (register-special l)
(let ([l (reverse l)])
(unless (let loop ([l l][specials specials])
(cond
[(null? l) #t]
[(null? specials) #f]
[(= (car l) (car specials)) (loop (cdr l) (cdr specials))]
[else #f]))
(set! specials (append l specials))
(set! special-count (+ special-count (length l))))
(- special-count (length l))))
(printf "#define NUM_SPECIAL_CASINGS ~a\n\n" (hash-table-count special-casings))
(printf "static int uchar_special_casings[] = {\n")
(printf " /* code, down len, off, up len, off, title len, off, fold len, off, final-sigma? */\n")
(let ([n (hash-table-count special-casings)])
(for-each (lambda (p)
(set! n (sub1 n))
(let ([code (car p)]
[sc (cdr p)])
(let ([lower-start (register-special (special-casing-lower sc))]
[upper-start (register-special (special-casing-upper sc))]
[title-start (register-special (special-casing-title sc))]
[folding-start (register-special (special-casing-folding sc))])
(printf " ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a~a"
code
(length (special-casing-lower sc)) lower-start
(length (special-casing-upper sc)) upper-start
(length (special-casing-title sc)) title-start
(length (special-casing-folding sc)) folding-start
(if (special-casing-final-sigma? sc) 1 0)
(if (zero? n) " " ",\n")))))
(quicksort (hash-table-map special-casings cons)
(lambda (a b) (< (car a) (car b))))))
(printf "};\n")
(printf "\n/* Offsets in scheme_uchar_special_casings point into here: */\n")
(printf "static int uchar_special_casing_data[] = {\n ")
(let ([n 0])
(for-each (lambda (v)
(printf
(cond
[(zero? n) "~a"]
[(zero? (modulo n 16)) ",\n ~a"]
[else ", ~a"])
v)
(set! n (add1 n)))
(reverse specials)))
(printf " };~n")
(printf "\n#define SPECIAL_CASE_FOLD_MAX ~a\n" (apply
max
(hash-table-map
special-casings
(lambda (k v)
(length (special-casing-folding v))))))

View File

@ -2161,7 +2161,7 @@ print_char_string(const char *str, int len,
if (esc) {
if (esc == minibuf) {
if (ustr[ui+delta] > 0xFFFF) {
sprintf(minibuf, "\\U%.6X", ustr[ui+delta]);
sprintf(minibuf, "\\U%.8X", ustr[ui+delta]);
} else
sprintf(minibuf, "\\u%.4X", ustr[ui+delta]);
}
@ -2472,7 +2472,7 @@ print_char(Scheme_Object *charobj, int notdisplay, PrintParams *pp)
minibuf[2 + ch] = 0;
} else {
if (ch > 0xFFFF)
sprintf(minibuf, "#\\U%.6X", ch);
sprintf(minibuf, "#\\U%.8X", ch);
else
sprintf(minibuf, "#\\u%.4X", ch);
}

View File

@ -2369,7 +2369,7 @@ read_string(int is_byte, int is_honu_char, Scheme_Object *port,
case 'u':
case 'U':
if (!is_byte) {
int maxc = ((ch == 'u') ? 4 : 6);
int maxc = ((ch == 'u') ? 4 : 8);
ch = scheme_getc_special_ok(port);
if (NOT_EOF_OR_SPECIAL(ch) && scheme_isxdigit(ch)) {
int count = 1;
@ -2718,7 +2718,7 @@ read_number_or_symbol(int init_ch, Scheme_Object *port,
int ungetc_ok;
int honu_mode, e_ok = 0;
int far_char_ok;
int single_escape, multiple_escape;
int single_escape, multiple_escape, norm_count = 0;
Getc_Fun_r getc_special_ok_fun;
ungetc_ok = scheme_peekc_is_ungetc(port);
@ -2828,7 +2828,28 @@ read_number_or_symbol(int init_ch, Scheme_Object *port,
}
if (!case_sens && !quoted && !running_quote)
ch = scheme_tolower(ch);
norm_count++;
else if (norm_count) {
/* case-normalize the last norm_count characters */
mzchar *s;
int newlen;
s = scheme_string_recase(buf, i - norm_count, norm_count, 3, 1, &newlen);
if (s != buf) {
if ((i + newlen - norm_count) >= size) {
oldsize = size;
oldbuf = buf;
size *= 2;
if (size <= (i + newlen - norm_count))
size = 2 * (i + (newlen - norm_count));
buf = (mzchar *)scheme_malloc_atomic((size + 1) * sizeof(mzchar));
memcpy(buf, oldbuf, oldsize * sizeof(mzchar));
}
memcpy(buf + i - norm_count, s, sizeof(mzchar) * newlen);
}
i += (newlen - norm_count);
norm_count = 0;
}
buf[i++] = ch;
@ -2858,6 +2879,22 @@ read_number_or_symbol(int init_ch, Scheme_Object *port,
return NULL;
}
if (norm_count) {
/* case-normalize the last norm_count characters */
mzchar *s;
int newlen;
s = scheme_string_recase(buf, i - norm_count, norm_count, 3, 1, &newlen);
if (s != buf) {
oldsize = size;
oldbuf = buf;
size = i + (newlen - norm_count) + 1;
buf = (mzchar *)scheme_malloc_atomic((size + 1) * sizeof(mzchar));
memcpy(buf, oldbuf, oldsize * sizeof(mzchar));
memcpy(buf + i - norm_count, s, sizeof(mzchar) * newlen);
}
i += (newlen - norm_count);
}
buf[i] = '\0';
if (!quoted_ever && (i == 1) && (buf[0] == '.') && !honu_mode) {
@ -3085,7 +3122,7 @@ read_character(Scheme_Object *port,
}
if (((ch == 'u') || (ch == 'U')) && NOT_EOF_OR_SPECIAL(next) && scheme_isxdigit(next)) {
int count = 0, n = 0, nbuf[8], maxc = ((ch == 'u') ? 4 : 6);
int count = 0, n = 0, nbuf[10], maxc = ((ch == 'u') ? 4 : 8);
while (count < maxc) {
ch = scheme_peekc_special_ok(port);
if (NOT_EOF_OR_SPECIAL(ch) && scheme_isxdigit(ch)) {
@ -3102,7 +3139,7 @@ read_character(Scheme_Object *port,
|| (n > 0x10FFFF)) {
scheme_read_err(port, stxsrc, line, col, pos, count + 2, 0, indentation,
"read: bad character constant #\\%c%u",
(maxc == 6) ? 'U' : 'u',
(maxc == 4) ? 'u' : 'U',
nbuf, count);
return NULL;
} else {

View File

@ -223,10 +223,12 @@ MZ_EXTERN Scheme_Object scheme_undefined[1];
MZ_EXTERN Scheme_Object *scheme_tail_call_waiting;
MZ_EXTERN Scheme_Object *scheme_multiple_values;
MZ_EXTERN unsigned int **scheme_uchar_table[];
MZ_EXTERN unsigned short *scheme_uchar_table[];
MZ_EXTERN unsigned char *scheme_uchar_cases_table[];
MZ_EXTERN int scheme_uchar_ups[];
MZ_EXTERN int scheme_uchar_downs[];
MZ_EXTERN int scheme_uchar_titles[];
MZ_EXTERN int scheme_uchar_folds[];
/*========================================================================*/
/* evaluation */
@ -444,6 +446,8 @@ MZ_EXTERN Scheme_Object *scheme_make_char_string_without_copying(mzchar *chars);
MZ_EXTERN Scheme_Object *scheme_alloc_char_string(int size, mzchar fill);
MZ_EXTERN Scheme_Object *scheme_append_char_string(Scheme_Object *, Scheme_Object *);
MZ_EXTERN mzchar *scheme_string_recase(mzchar *s, int d, int len, int mode, int inplace, int *_len);
MZ_EXTERN Scheme_Object *scheme_make_vector(int size, Scheme_Object *fill);
MZ_EXTERN Scheme_Object *scheme_make_integer_value(long i);
MZ_EXTERN Scheme_Object *scheme_make_integer_value_from_unsigned(unsigned long i);

View File

@ -179,10 +179,12 @@ Scheme_Object *(*scheme_make_void)(void);
Scheme_Object *scheme_undefined;
Scheme_Object *scheme_tail_call_waiting;
Scheme_Object *scheme_multiple_values;
unsigned int ***scheme_uchar_table;
unsigned short **scheme_uchar_table;
unsigned char **scheme_uchar_cases_table;
int *scheme_uchar_ups;
int *scheme_uchar_downs;
int *scheme_uchar_titles;
int *scheme_uchar_folds;
/*========================================================================*/
/* evaluation */
/*========================================================================*/
@ -361,6 +363,7 @@ Scheme_Object *(*scheme_make_immutable_sized_char_string)(mzchar *chars, long le
Scheme_Object *(*scheme_make_char_string_without_copying)(mzchar *chars);
Scheme_Object *(*scheme_alloc_char_string)(int size, mzchar fill);
Scheme_Object *(*scheme_append_char_string)(Scheme_Object *, Scheme_Object *);
mzchar *(*scheme_string_recase)(mzchar *s, int d, int len, int mode, int inplace, int *_len);
Scheme_Object *(*scheme_make_vector)(int size, Scheme_Object *fill);
Scheme_Object *(*scheme_make_integer_value)(long i);
Scheme_Object *(*scheme_make_integer_value_from_unsigned)(unsigned long i);

View File

@ -104,9 +104,11 @@
scheme_extension_table->scheme_tail_call_waiting = scheme_tail_call_waiting;
scheme_extension_table->scheme_multiple_values = scheme_multiple_values;
scheme_extension_table->scheme_uchar_table = scheme_uchar_table;
scheme_extension_table->scheme_uchar_cases_table = scheme_uchar_cases_table;
scheme_extension_table->scheme_uchar_ups = scheme_uchar_ups;
scheme_extension_table->scheme_uchar_downs = scheme_uchar_downs;
scheme_extension_table->scheme_uchar_titles = scheme_uchar_titles;
scheme_extension_table->scheme_uchar_folds = scheme_uchar_folds;
scheme_extension_table->scheme_eval = scheme_eval;
scheme_extension_table->scheme_eval_multi = scheme_eval_multi;
scheme_extension_table->scheme_eval_compiled = scheme_eval_compiled;
@ -239,6 +241,7 @@
scheme_extension_table->scheme_make_char_string_without_copying = scheme_make_char_string_without_copying;
scheme_extension_table->scheme_alloc_char_string = scheme_alloc_char_string;
scheme_extension_table->scheme_append_char_string = scheme_append_char_string;
scheme_extension_table->scheme_string_recase = scheme_string_recase;
scheme_extension_table->scheme_make_vector = scheme_make_vector;
scheme_extension_table->scheme_make_integer_value = scheme_make_integer_value;
scheme_extension_table->scheme_make_integer_value_from_unsigned = scheme_make_integer_value_from_unsigned;

View File

@ -104,9 +104,11 @@
#define scheme_tail_call_waiting (scheme_extension_table->scheme_tail_call_waiting)
#define scheme_multiple_values (scheme_extension_table->scheme_multiple_values)
#define scheme_uchar_table (scheme_extension_table->scheme_uchar_table)
#define scheme_uchar_cases_table (scheme_extension_table->scheme_uchar_cases_table)
#define scheme_uchar_ups (scheme_extension_table->scheme_uchar_ups)
#define scheme_uchar_downs (scheme_extension_table->scheme_uchar_downs)
#define scheme_uchar_titles (scheme_extension_table->scheme_uchar_titles)
#define scheme_uchar_folds (scheme_extension_table->scheme_uchar_folds)
#define scheme_eval (scheme_extension_table->scheme_eval)
#define scheme_eval_multi (scheme_extension_table->scheme_eval_multi)
#define scheme_eval_compiled (scheme_extension_table->scheme_eval_compiled)
@ -239,6 +241,7 @@
#define scheme_make_char_string_without_copying (scheme_extension_table->scheme_make_char_string_without_copying)
#define scheme_alloc_char_string (scheme_extension_table->scheme_alloc_char_string)
#define scheme_append_char_string (scheme_extension_table->scheme_append_char_string)
#define scheme_string_recase (scheme_extension_table->scheme_string_recase)
#define scheme_make_vector (scheme_extension_table->scheme_make_vector)
#define scheme_make_integer_value (scheme_extension_table->scheme_make_integer_value)
#define scheme_make_integer_value_from_unsigned (scheme_extension_table->scheme_make_integer_value_from_unsigned)

View File

@ -13,7 +13,7 @@
#define USE_COMPILED_STARTUP 1
#define EXPECTED_PRIM_COUNT 832
#define EXPECTED_PRIM_COUNT 837
#ifdef MZSCHEME_SOMETHING_OMITTED
# undef USE_COMPILED_STARTUP

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,156 @@
/* Generated by mk-uchar.ss */
#define NUM_SPECIAL_CASINGS 104
static int uchar_special_casings[] = {
/* code, down len, off, up len, off, title len, off, fold len, off, final-sigma? */
223, 1, 0, 2, 1, 2, 3, 2, 5, 0,
304, 2, 7, 1, 9, 1, 9, 2, 10, 0,
329, 1, 12, 2, 13, 2, 13, 2, 15, 0,
496, 1, 17, 2, 18, 2, 18, 2, 20, 0,
912, 1, 22, 3, 23, 3, 23, 3, 26, 0,
931, 1, 29, 1, 30, 1, 30, 1, 31, 1,
944, 1, 32, 3, 33, 3, 33, 3, 36, 0,
1415, 1, 39, 2, 40, 2, 42, 2, 44, 0,
7830, 1, 46, 2, 47, 2, 47, 2, 49, 0,
7831, 1, 51, 2, 52, 2, 52, 2, 54, 0,
7832, 1, 56, 2, 57, 2, 57, 2, 59, 0,
7833, 1, 61, 2, 62, 2, 62, 2, 64, 0,
7834, 1, 66, 2, 67, 2, 67, 2, 69, 0,
8016, 1, 71, 2, 72, 2, 72, 2, 74, 0,
8018, 1, 76, 3, 77, 3, 77, 3, 80, 0,
8020, 1, 83, 3, 84, 3, 84, 3, 87, 0,
8022, 1, 90, 3, 91, 3, 91, 3, 94, 0,
8064, 1, 97, 2, 98, 1, 100, 2, 101, 0,
8065, 1, 103, 2, 104, 1, 106, 2, 107, 0,
8066, 1, 109, 2, 110, 1, 112, 2, 113, 0,
8067, 1, 115, 2, 116, 1, 118, 2, 119, 0,
8068, 1, 121, 2, 122, 1, 124, 2, 125, 0,
8069, 1, 127, 2, 128, 1, 130, 2, 131, 0,
8070, 1, 133, 2, 134, 1, 136, 2, 137, 0,
8071, 1, 139, 2, 140, 1, 142, 2, 143, 0,
8072, 1, 145, 2, 146, 1, 148, 2, 149, 0,
8073, 1, 151, 2, 152, 1, 154, 2, 155, 0,
8074, 1, 157, 2, 158, 1, 160, 2, 161, 0,
8075, 1, 163, 2, 164, 1, 166, 2, 167, 0,
8076, 1, 169, 2, 170, 1, 172, 2, 173, 0,
8077, 1, 175, 2, 176, 1, 178, 2, 179, 0,
8078, 1, 181, 2, 182, 1, 184, 2, 185, 0,
8079, 1, 187, 2, 188, 1, 190, 2, 191, 0,
8080, 1, 193, 2, 194, 1, 196, 2, 197, 0,
8081, 1, 199, 2, 200, 1, 202, 2, 203, 0,
8082, 1, 205, 2, 206, 1, 208, 2, 209, 0,
8083, 1, 211, 2, 212, 1, 214, 2, 215, 0,
8084, 1, 217, 2, 218, 1, 220, 2, 221, 0,
8085, 1, 223, 2, 224, 1, 226, 2, 227, 0,
8086, 1, 229, 2, 230, 1, 232, 2, 233, 0,
8087, 1, 235, 2, 236, 1, 238, 2, 239, 0,
8088, 1, 241, 2, 242, 1, 244, 2, 245, 0,
8089, 1, 247, 2, 248, 1, 250, 2, 251, 0,
8090, 1, 253, 2, 254, 1, 256, 2, 257, 0,
8091, 1, 259, 2, 260, 1, 262, 2, 263, 0,
8092, 1, 265, 2, 266, 1, 268, 2, 269, 0,
8093, 1, 271, 2, 272, 1, 274, 2, 275, 0,
8094, 1, 277, 2, 278, 1, 280, 2, 281, 0,
8095, 1, 283, 2, 284, 1, 286, 2, 287, 0,
8096, 1, 289, 2, 290, 1, 292, 2, 293, 0,
8097, 1, 295, 2, 296, 1, 298, 2, 299, 0,
8098, 1, 301, 2, 302, 1, 304, 2, 305, 0,
8099, 1, 307, 2, 308, 1, 310, 2, 311, 0,
8100, 1, 313, 2, 314, 1, 316, 2, 317, 0,
8101, 1, 319, 2, 320, 1, 322, 2, 323, 0,
8102, 1, 325, 2, 326, 1, 328, 2, 329, 0,
8103, 1, 331, 2, 332, 1, 334, 2, 335, 0,
8104, 1, 337, 2, 338, 1, 340, 2, 341, 0,
8105, 1, 343, 2, 344, 1, 346, 2, 347, 0,
8106, 1, 349, 2, 350, 1, 352, 2, 353, 0,
8107, 1, 355, 2, 356, 1, 358, 2, 359, 0,
8108, 1, 361, 2, 362, 1, 364, 2, 365, 0,
8109, 1, 367, 2, 368, 1, 370, 2, 371, 0,
8110, 1, 373, 2, 374, 1, 376, 2, 377, 0,
8111, 1, 379, 2, 380, 1, 382, 2, 383, 0,
8114, 1, 385, 2, 386, 2, 388, 2, 390, 0,
8115, 1, 392, 2, 393, 1, 395, 2, 396, 0,
8116, 1, 398, 2, 399, 2, 401, 2, 403, 0,
8118, 1, 405, 2, 406, 2, 406, 2, 408, 0,
8119, 1, 410, 3, 411, 3, 414, 3, 417, 0,
8124, 1, 420, 2, 421, 1, 423, 2, 424, 0,
8130, 1, 426, 2, 427, 2, 429, 2, 431, 0,
8131, 1, 433, 2, 434, 1, 436, 2, 437, 0,
8132, 1, 439, 2, 440, 2, 442, 2, 444, 0,
8134, 1, 446, 2, 447, 2, 447, 2, 449, 0,
8135, 1, 451, 3, 452, 3, 455, 3, 458, 0,
8140, 1, 461, 2, 462, 1, 464, 2, 465, 0,
8146, 1, 467, 3, 468, 3, 468, 3, 471, 0,
8147, 1, 474, 3, 475, 3, 475, 3, 478, 0,
8150, 1, 481, 2, 482, 2, 482, 2, 484, 0,
8151, 1, 486, 3, 487, 3, 487, 3, 490, 0,
8162, 1, 493, 3, 494, 3, 494, 3, 497, 0,
8163, 1, 500, 3, 501, 3, 501, 3, 504, 0,
8164, 1, 507, 2, 508, 2, 508, 2, 510, 0,
8166, 1, 512, 2, 513, 2, 513, 2, 515, 0,
8167, 1, 517, 3, 518, 3, 518, 3, 521, 0,
8178, 1, 524, 2, 525, 2, 527, 2, 529, 0,
8179, 1, 531, 2, 532, 1, 534, 2, 535, 0,
8180, 1, 537, 2, 538, 2, 540, 2, 542, 0,
8182, 1, 544, 2, 545, 2, 545, 2, 547, 0,
8183, 1, 549, 3, 550, 3, 553, 3, 556, 0,
8188, 1, 559, 2, 560, 1, 562, 2, 563, 0,
64256, 1, 565, 2, 566, 2, 568, 2, 570, 0,
64257, 1, 572, 2, 573, 2, 575, 2, 577, 0,
64258, 1, 579, 2, 580, 2, 582, 2, 584, 0,
64259, 1, 586, 3, 587, 3, 590, 3, 593, 0,
64260, 1, 596, 3, 597, 3, 600, 3, 603, 0,
64261, 1, 606, 2, 607, 2, 609, 2, 611, 0,
64262, 1, 613, 2, 614, 2, 616, 2, 618, 0,
64275, 1, 620, 2, 621, 2, 623, 2, 625, 0,
64276, 1, 627, 2, 628, 2, 630, 2, 632, 0,
64277, 1, 634, 2, 635, 2, 637, 2, 639, 0,
64278, 1, 641, 2, 642, 2, 644, 2, 646, 0,
64279, 1, 648, 2, 649, 2, 651, 2, 653, 0 };
/* Offsets in scheme_uchar_special_casings point into here: */
static int uchar_special_casing_data[] = {
223, 83, 83, 83, 115, 115, 115, 105, 775, 304, 105, 775, 329, 700, 78, 700,
110, 496, 74, 780, 106, 780, 912, 921, 776, 769, 953, 776, 769, 962, 931, 963,
944, 933, 776, 769, 965, 776, 769, 1415, 1333, 1362, 1333, 1410, 1381, 1410, 7830, 72,
817, 104, 817, 7831, 84, 776, 116, 776, 7832, 87, 778, 119, 778, 7833, 89, 778,
121, 778, 7834, 65, 702, 97, 702, 8016, 933, 787, 965, 787, 8018, 933, 787, 768,
965, 787, 768, 8020, 933, 787, 769, 965, 787, 769, 8022, 933, 787, 834, 965, 787,
834, 8064, 7944, 921, 8072, 7936, 953, 8065, 7945, 921, 8073, 7937, 953, 8066, 7946, 921,
8074, 7938, 953, 8067, 7947, 921, 8075, 7939, 953, 8068, 7948, 921, 8076, 7940, 953, 8069,
7949, 921, 8077, 7941, 953, 8070, 7950, 921, 8078, 7942, 953, 8071, 7951, 921, 8079, 7943,
953, 8064, 7944, 921, 8072, 7936, 953, 8065, 7945, 921, 8073, 7937, 953, 8066, 7946, 921,
8074, 7938, 953, 8067, 7947, 921, 8075, 7939, 953, 8068, 7948, 921, 8076, 7940, 953, 8069,
7949, 921, 8077, 7941, 953, 8070, 7950, 921, 8078, 7942, 953, 8071, 7951, 921, 8079, 7943,
953, 8080, 7976, 921, 8088, 7968, 953, 8081, 7977, 921, 8089, 7969, 953, 8082, 7978, 921,
8090, 7970, 953, 8083, 7979, 921, 8091, 7971, 953, 8084, 7980, 921, 8092, 7972, 953, 8085,
7981, 921, 8093, 7973, 953, 8086, 7982, 921, 8094, 7974, 953, 8087, 7983, 921, 8095, 7975,
953, 8080, 7976, 921, 8088, 7968, 953, 8081, 7977, 921, 8089, 7969, 953, 8082, 7978, 921,
8090, 7970, 953, 8083, 7979, 921, 8091, 7971, 953, 8084, 7980, 921, 8092, 7972, 953, 8085,
7981, 921, 8093, 7973, 953, 8086, 7982, 921, 8094, 7974, 953, 8087, 7983, 921, 8095, 7975,
953, 8096, 8040, 921, 8104, 8032, 953, 8097, 8041, 921, 8105, 8033, 953, 8098, 8042, 921,
8106, 8034, 953, 8099, 8043, 921, 8107, 8035, 953, 8100, 8044, 921, 8108, 8036, 953, 8101,
8045, 921, 8109, 8037, 953, 8102, 8046, 921, 8110, 8038, 953, 8103, 8047, 921, 8111, 8039,
953, 8096, 8040, 921, 8104, 8032, 953, 8097, 8041, 921, 8105, 8033, 953, 8098, 8042, 921,
8106, 8034, 953, 8099, 8043, 921, 8107, 8035, 953, 8100, 8044, 921, 8108, 8036, 953, 8101,
8045, 921, 8109, 8037, 953, 8102, 8046, 921, 8110, 8038, 953, 8103, 8047, 921, 8111, 8039,
953, 8114, 8122, 921, 8122, 837, 8048, 953, 8115, 913, 921, 8124, 945, 953, 8116, 902,
921, 902, 837, 940, 953, 8118, 913, 834, 945, 834, 8119, 913, 834, 921, 913, 834,
837, 945, 834, 953, 8115, 913, 921, 8124, 945, 953, 8130, 8138, 921, 8138, 837, 8052,
953, 8131, 919, 921, 8140, 951, 953, 8132, 905, 921, 905, 837, 942, 953, 8134, 919,
834, 951, 834, 8135, 919, 834, 921, 919, 834, 837, 951, 834, 953, 8131, 919, 921,
8140, 951, 953, 8146, 921, 776, 768, 953, 776, 768, 8147, 921, 776, 769, 953, 776,
769, 8150, 921, 834, 953, 834, 8151, 921, 776, 834, 953, 776, 834, 8162, 933, 776,
768, 965, 776, 768, 8163, 933, 776, 769, 965, 776, 769, 8164, 929, 787, 961, 787,
8166, 933, 834, 965, 834, 8167, 933, 776, 834, 965, 776, 834, 8178, 8186, 921, 8186,
837, 8060, 953, 8179, 937, 921, 8188, 969, 953, 8180, 911, 921, 911, 837, 974, 953,
8182, 937, 834, 969, 834, 8183, 937, 834, 921, 937, 834, 837, 969, 834, 953, 8179,
937, 921, 8188, 969, 953, 64256, 70, 70, 70, 102, 102, 102, 64257, 70, 73, 70,
105, 102, 105, 64258, 70, 76, 70, 108, 102, 108, 64259, 70, 70, 73, 70, 102,
105, 102, 102, 105, 64260, 70, 70, 76, 70, 102, 108, 102, 102, 108, 64261, 83,
84, 83, 116, 115, 116, 64262, 83, 84, 83, 116, 115, 116, 64275, 1348, 1350, 1348,
1398, 1396, 1398, 64276, 1348, 1333, 1348, 1381, 1396, 1381, 64277, 1348, 1339, 1348, 1387, 1396,
1387, 64278, 1358, 1350, 1358, 1398, 1406, 1398, 64279, 1348, 1341, 1348, 1389, 1396, 1389 };
#define SPECIAL_CASE_FOLD_MAX 3

View File

@ -9,6 +9,6 @@
#define MZSCHEME_VERSION_MAJOR 299
#define MZSCHEME_VERSION_MINOR 107
#define MZSCHEME_VERSION_MINOR 108
#define MZSCHEME_VERSION "299.107" _MZ_SPECIAL_TAG
#define MZSCHEME_VERSION "299.108" _MZ_SPECIAL_TAG

View File

@ -49,6 +49,8 @@
# include "schsys.h"
#endif
#include "schustr.inc"
#ifdef USE_ICONV_DLL
typedef long iconv_t;
typedef int *(*errno_proc_t)();
@ -173,6 +175,10 @@ static Scheme_Object *string_ci_gt (int argc, Scheme_Object *argv[]);
static Scheme_Object *string_locale_ci_gt (int argc, Scheme_Object *argv[]);
static Scheme_Object *string_ci_lt_eq (int argc, Scheme_Object *argv[]);
static Scheme_Object *string_ci_gt_eq (int argc, Scheme_Object *argv[]);
static Scheme_Object *string_upcase (int argc, Scheme_Object *argv[]);
static Scheme_Object *string_downcase (int argc, Scheme_Object *argv[]);
static Scheme_Object *string_titlecase (int argc, Scheme_Object *argv[]);
static Scheme_Object *string_foldcase (int argc, Scheme_Object *argv[]);
static Scheme_Object *string_locale_upcase (int argc, Scheme_Object *argv[]);
static Scheme_Object *string_locale_downcase (int argc, Scheme_Object *argv[]);
static Scheme_Object *substring (int argc, Scheme_Object *argv[]);
@ -466,6 +472,27 @@ scheme_init_string (Scheme_Env *env)
env);
scheme_add_global_constant("string-upcase",
scheme_make_prim_w_arity(string_upcase,
"string-upcase",
1, 1),
env);
scheme_add_global_constant("string-downcase",
scheme_make_prim_w_arity(string_downcase,
"string-downcase",
1, 1),
env);
scheme_add_global_constant("string-titlecase",
scheme_make_prim_w_arity(string_titlecase,
"string-titlecase",
1, 1),
env);
scheme_add_global_constant("string-foldcase",
scheme_make_prim_w_arity(string_foldcase,
"string-foldcase",
1, 1),
env);
scheme_add_global_constant("string-locale-upcase",
scheme_make_prim_w_arity(string_locale_upcase,
"string-locale-upcase",
@ -963,7 +990,7 @@ GEN_STRING_COMP(string_gt, "string>?", mz_char_strcmp, >, 0, 0)
GEN_STRING_COMP(string_lt_eq, "string<=?", mz_char_strcmp, <=, 0, 0)
GEN_STRING_COMP(string_gt_eq, "string>=?", mz_char_strcmp, >=, 0, 0)
GEN_STRING_COMP(string_ci_eq, "string-ci=?", mz_char_strcmp_ci, ==, 0, 1)
GEN_STRING_COMP(string_ci_eq, "string-ci=?", mz_char_strcmp_ci, ==, 0, 0)
GEN_STRING_COMP(string_ci_lt, "string-ci<?", mz_char_strcmp_ci, <, 0, 0)
GEN_STRING_COMP(string_ci_gt, "string-ci>?", mz_char_strcmp_ci, >, 0, 0)
GEN_STRING_COMP(string_ci_lt_eq, "string-ci<=?", mz_char_strcmp_ci, <=, 0, 0)
@ -3034,6 +3061,192 @@ static void reset_locale(void)
}
}
static int find_special_casing(int ch)
{
/* Binary search */
int i, lo, hi, j;
i = NUM_SPECIAL_CASINGS >> 1;
lo = i;
hi = NUM_SPECIAL_CASINGS - i - 1;
while (1) {
if (uchar_special_casings[i * 10] == ch)
return i * 10;
if (uchar_special_casings[i * 10] > ch) {
j = i - lo;
i = j + (lo >> 1);
hi = lo - (i - j) - 1;
lo = i - j;
} else {
j = i + 1;
i = j + (hi >> 1);
lo = i - j;
hi = hi - (i - j) - 1;
}
}
}
static int is_final_sigma(int mode, mzchar *s, int d, int i, int len)
{
int j;
if (mode == 3)
return 1;
/* find a cased char before, skipping case-ignorable: */
for (j = i - 1; j >= d; j--) {
if (!scheme_iscaseignorable(s[j])) {
if (scheme_iscased(s[j]))
break;
else
return 0;
}
}
if (j < d)
return 0;
/* next non-case-ignorable must not be cased: */
for (j = i + 1; j < d + len; j++) {
if (!scheme_iscaseignorable(s[j])) {
return !scheme_iscased(s[j]);
}
}
return 1;
}
mzchar *scheme_string_recase(mzchar *s, int d, int len, int mode, int inplace, int *_len)
{
mzchar *t;
int i, extra = 0, pos, special = 0, td, prev_was_cased = 0, xmode = mode;
for (i = 0; i < len; i++) {
if (scheme_isspecialcasing(s[d+i])) {
pos = find_special_casing(s[d+i]);
if (!uchar_special_casings[pos + 9] || is_final_sigma(xmode, s, d, i, len)) {
special = 1;
extra += (uchar_special_casings[pos + 1 + (xmode << 1)] - 1);
}
}
if (mode == 2) {
if (!scheme_iscaseignorable(s[d+i]))
prev_was_cased = scheme_iscased(s[d+i]);
xmode = (prev_was_cased ? 0 : 2);
}
}
if (_len)
*_len = len + extra;
if (!extra && inplace) {
t = s;
td = d;
} else {
t = scheme_malloc_atomic(sizeof(mzchar) * (len + extra + 1));
td = 0;
}
if (!special) {
if (mode == 0) {
for (i = 0; i < len; i++) {
t[i+td] = scheme_tolower(s[i+d]);
}
} else if (mode == 1) {
for (i = 0; i < len; i++) {
t[i+td] = scheme_toupper(s[i+d]);
}
} else if (mode == 2) {
prev_was_cased = 0;
for (i = 0; i < len; i++) {
if (!prev_was_cased)
t[i+td] = scheme_totitle(s[i+d]);
else
t[i+td] = scheme_tolower(s[i+d]);
if (!scheme_iscaseignorable(s[i+d]))
prev_was_cased = scheme_iscased(s[i+d]);
}
} else /* if (mode == 3) */ {
for (i = 0; i < len; i++) {
t[i+td] = scheme_tofold(s[i+d]);
}
}
} else {
int j = 0, c;
prev_was_cased = 0;
for (i = 0; i < len; i++) {
if (mode == 0) {
t[j+td] = scheme_tolower(s[i+d]);
} else if (mode == 1) {
t[j+td] = scheme_toupper(s[i+d]);
} else if (mode == 2) {
if (!prev_was_cased) {
xmode = 2;
t[j+td] = scheme_totitle(s[i+d]);
} else {
xmode = 0;
t[j+td] = scheme_tolower(s[i+d]);
}
if (!scheme_iscaseignorable(s[i+d]))
prev_was_cased = scheme_iscased(s[i+d]);
} else /* if (mode == 3) */ {
t[j+td] = scheme_tofold(s[i+d]);
}
if (scheme_isspecialcasing(s[i+d])) {
pos = find_special_casing(s[i+d]);
if (!uchar_special_casings[pos + 9] || is_final_sigma(xmode, s, d, i, len)) {
c = uchar_special_casings[pos + 1 + (xmode << 1)];
pos = uchar_special_casings[pos + 2 + (xmode << 1)];
while (c--) {
t[(j++)+td] = uchar_special_casing_data[pos++];
}
} else
j++;
} else
j++;
}
}
return t;
}
static Scheme_Object *string_recase (const char *name, int argc, Scheme_Object *argv[], int mode)
{
mzchar *s;
int len;
if (!SCHEME_CHAR_STRINGP(argv[0]))
scheme_wrong_type(name, "string", 0, argc, argv);
s = SCHEME_CHAR_STR_VAL(argv[0]);
len = SCHEME_CHAR_STRLEN_VAL(argv[0]);
s = scheme_string_recase(s, 0, len, mode, 0, &len);
return scheme_make_sized_char_string(s, len, 0);
}
static Scheme_Object *string_upcase (int argc, Scheme_Object *argv[])
{
return string_recase("string-upcase", argc, argv, 1);
}
static Scheme_Object *string_downcase (int argc, Scheme_Object *argv[])
{
return string_recase("string-downcase", argc, argv, 0);
}
static Scheme_Object *string_titlecase (int argc, Scheme_Object *argv[])
{
return string_recase("string-titlecase", argc, argv, 2);
}
static Scheme_Object *string_foldcase (int argc, Scheme_Object *argv[])
{
return string_recase("string-foldcase", argc, argv, 3);
}
/**********************************************************************/
/* strcmps */
/**********************************************************************/
@ -3090,7 +3303,8 @@ static int mz_char_strcmp(const char *who, const mzchar *str1, int l1, const mzc
static int mz_char_strcmp_ci(const char *who, const mzchar *str1, int l1, const mzchar *str2, int l2,
int use_locale, int size_shortcut)
{
int endres;
int p1, p2, sp1, sp2, a, b;
mzchar spec1[SPECIAL_CASE_FOLD_MAX], spec2[SPECIAL_CASE_FOLD_MAX];
if (size_shortcut && (l1 != l2))
return 1;
@ -3104,30 +3318,54 @@ static int mz_char_strcmp_ci(const char *who, const mzchar *str1, int l1, const
}
#endif
if (l1 > l2) {
l1 = l2;
endres = 1;
} else {
if (l2 > l1)
endres = -1;
else
endres = 0;
}
p1 = sp1 = 0;
p2 = sp2 = 0;
while (l1--) {
unsigned int a, b;
while (((p1 < l1) || sp1) && ((p2 < l2) || sp2)) {
if (sp1) {
a = spec1[--sp1];
} else {
a = str1[p1];
if (scheme_isspecialcasing(a)) {
int pos, i;
pos = find_special_casing(a);
sp1 = uchar_special_casings[pos + 7];
pos = uchar_special_casings[pos + 8];
for (i = sp1; i--; pos++) {
spec1[i] = uchar_special_casing_data[pos];
}
a = spec1[--sp1];
} else {
a = scheme_tofold(a);
}
p1++;
}
a = *(str1++);
b = *(str2++);
a = scheme_toupper(a);
b = scheme_toupper(b);
if (sp2) {
b = spec2[--sp2];
} else {
b = str2[p2];
if (scheme_isspecialcasing(b)) {
int pos, i;
pos = find_special_casing(b);
sp2 = uchar_special_casings[pos + 7];
pos = uchar_special_casings[pos + 8];
for (i = sp2; i--; pos++) {
spec2[i] = uchar_special_casing_data[pos];
}
b = spec2[--sp2];
} else {
b = scheme_tofold(b);
}
p2++;
}
a = a - b;
if (a)
return a;
}
return endres;
return ((p1 < l1) || sp1) - ((p2 < l2) || sp2);
}
static int mz_strcmp(const char *who, unsigned char *str1, int l1, unsigned char *str2, int l2)

View File

@ -376,6 +376,9 @@ scheme_intern_exact_char_symbol(const mzchar *name, unsigned int len)
Scheme_Object *
scheme_intern_symbol(const char *name)
/* `name' must be ASCII; this function is not suitable for non-ASCII
conversion, necause it assumes that downcasing each C char
is good enough to normalize the case. */
{
if (!scheme_case_sensitive) {
unsigned long i, len;
@ -459,14 +462,35 @@ const char *scheme_symbol_name_and_size(Scheme_Object *sym, unsigned int *length
has_special = 1;
else if (s[i] == '|')
has_pipe = 1;
else if ((((unsigned char)s[i]) >= 'A')
&& (((unsigned char)s[i]) <= 'Z'))
has_upper = 1;
else if (flags & SCHEME_SNF_NEED_CASE) {
int ch = ((unsigned char *)s)[i];
if (ch > 127) {
/* Decode UTF-8. */
mzchar buf[2];
int ul = 2;
while (1) {
if (scheme_utf8_decode(s, i, i + ul,
buf, 0, 1,
NULL, 0, 0) > 0)
break;
ul++;
}
ch = buf[0];
if (scheme_isspecialcasing(ch)) {
mzchar *rc;
buf[1] = 0;
rc = scheme_string_recase(buf, 0, 1, 3, 1, NULL);
if ((rc != buf) || (rc[0] != ch))
has_upper = 1;
ch = 'a';
}
i += (ul - 1);
}
if (scheme_tofold(ch) != ch)
has_upper = 1;
}
}
if (!(flags & SCHEME_SNF_NEED_CASE))
has_upper = 0;
result = NULL;
total_length = 0;