repairs for bytes-utf-8-index

For traditional Racket, fix `bytes-utf-8-index` to accept 5 arguments
as documented. For Racket CS, fix `bytes-utf-8-index` to return an
index relative to the byte string's start.

Closes #2670
This commit is contained in:
Matthew Flatt 2019-05-30 07:50:15 -06:00
parent 8c652de835
commit bdb578606e
3 changed files with 31 additions and 17 deletions

View File

@ -628,25 +628,39 @@
[s (apply bytes (caddr p))]) [s (apply bytes (caddr p))])
(if (and (positive? (vector-length code-points)) (if (and (positive? (vector-length code-points))
(vector-ref code-points 0)) (vector-ref code-points 0))
(begin (let ([len (vector-length code-points)]
(test (vector-length code-points) bytes-utf-8-length s) [c0 (integer->char (vector-ref code-points 0))])
(test len bytes-utf-8-length s)
(test len bytes-utf-8-length s #f)
(test len bytes-utf-8-length s #\x)
(test len bytes-utf-8-length s #f 0)
(test len bytes-utf-8-length s #f 0 (bytes-length s))
(test (sub1 len) bytes-utf-8-length s #f (char-utf-8-length c0) (bytes-length s))
(test (sub1 len) bytes-utf-8-length s #f 0 (- (bytes-length s)
(char-utf-8-length
(integer->char
(vector-ref code-points (sub1 len))))))
(test code-points bytes->unicode-vector s) (test code-points bytes->unicode-vector s)
(test code-points bytes-any->unicode-vector s #f) (test code-points bytes-any->unicode-vector s #f)
(test s unicode-vector->bytes code-points) (test s unicode-vector->bytes code-points)
(test 0 bytes-utf-8-index s 0) (test 0 bytes-utf-8-index s 0)
(test (bytes-length s) bytes-utf-8-index (test 0 bytes-utf-8-index s 0 #f)
(bytes-append s #"x") (test 0 bytes-utf-8-index s 0 #\x)
(vector-length code-points)) (test 0 bytes-utf-8-index s 0 #f 0)
(if ((vector-length code-points) . > . 1) (when (len . > . 1)
(test (char-utf-8-length c0) bytes-utf-8-index s 1 #f 0)
(test (char-utf-8-length c0) bytes-utf-8-index s 0 #f (char-utf-8-length c0)))
(test (char-utf-8-length c0) bytes-utf-8-index (bytes-append s #"x") 1 #f 0)
(test (char-utf-8-length c0) bytes-utf-8-index (bytes-append s #"x") 0 #f (char-utf-8-length c0))
(test 0 bytes-utf-8-index s 0 #f 0 (bytes-length s))
(test #f bytes-utf-8-index s 0 #f (bytes-length s))
(test (bytes-length s) bytes-utf-8-index (bytes-append s #"x") len)
(if (len . > . 1)
(begin (begin
(test (integer->char (vector-ref code-points 0)) (test c0 bytes-utf-8-ref s 0)
bytes-utf-8-ref s 0) (test c0 bytes-utf-8-ref s 0 #f)
(test (integer->char (vector-ref code-points 0)) (test c0 bytes-utf-8-ref s 0 #f 0)
bytes-utf-8-ref s 0 #f) (test c0 bytes-utf-8-ref s 0 #f 0 (bytes-length s))
(test (integer->char (vector-ref code-points 0))
bytes-utf-8-ref s 0 #f 0)
(test (integer->char (vector-ref code-points 0))
bytes-utf-8-ref s 0 #f 0 (bytes-length s))
(test (integer->char (vector-ref code-points (test (integer->char (vector-ref code-points
(sub1 (vector-length code-points)))) (sub1 (vector-length code-points))))
bytes-utf-8-ref s (sub1 (vector-length code-points))) bytes-utf-8-ref s (sub1 (vector-length code-points)))

View File

@ -83,7 +83,7 @@
[(eq? state 'continues) [(eq? state 'continues)
(cond (cond
[(and get-index? ((+ start initial-used-bytes) . < . end)) [(and get-index? ((+ start initial-used-bytes) . < . end))
initial-used-bytes] (+ initial-used-bytes start)]
[else [else
;; Get one more byte ;; Get one more byte
(define str (and (not get-index?) (make-string 1))) (define str (and (not get-index?) (make-string 1)))
@ -98,7 +98,7 @@
(or (and (eq? state 'complete) (or (and (eq? state 'complete)
(= got-chars 1)))) (= got-chars 1))))
(if get-index? (if get-index?
initial-used-bytes (+ initial-used-bytes start)
(string-ref str 0))] (string-ref str 0))]
[else #f])])] [else #f])])]
[else #f])) [else #f]))

View File

@ -664,7 +664,7 @@ scheme_init_string (Scheme_Startup_Env *env)
SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT); SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
scheme_addto_prim_instance("bytes->immutable-bytes", p, env); scheme_addto_prim_instance("bytes->immutable-bytes", p, env);
p = scheme_make_immed_prim(byte_string_utf8_index, "bytes-utf-8-index", 2, 4); p = scheme_make_immed_prim(byte_string_utf8_index, "bytes-utf-8-index", 2, 5);
/* Incorrect, since the result can be #f: /* Incorrect, since the result can be #f:
SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_PRODUCES_FIXNUM); */ SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_PRODUCES_FIXNUM); */
scheme_addto_prim_instance("bytes-utf-8-index", p, env); scheme_addto_prim_instance("bytes-utf-8-index", p, env);