repair and tests for Windows path handling
In the process of extracting minimal Windows path encoding for rktio, I noticed a decoding issue with a path that ends with an unpaired high-surrogate value. Add a suitable tests and fix the old decoder (although it will probably go away).
This commit is contained in:
parent
d4742a0618
commit
83cb1a0d00
|
@ -1960,6 +1960,51 @@
|
|||
(string-append s s) ; causes a clear operation on the runstack for second argument
|
||||
(write-char #\A)))
|
||||
|
||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Check that unpaired surrogates are handled right in
|
||||
;; Windows paths
|
||||
|
||||
(when (eq? 'windows (system-type))
|
||||
(let-values ([(surrogate-hi surrogate-lo)
|
||||
(let ()
|
||||
(define c0 (bytes-open-converter "platform-UTF-8" "platform-UTF-16"))
|
||||
(let-values ([(bstr a b) (bytes-convert c0 (string->bytes/utf-8 "\U10AABB"))])
|
||||
(define c8 (bytes-open-converter "platform-UTF-16" "platform-UTF-8"))
|
||||
(let-values ([(bstr a b) (bytes-convert c8
|
||||
(bytes-append
|
||||
#".\0"
|
||||
(subbytes bstr 0 2)
|
||||
#".\0"
|
||||
(subbytes bstr 2 4)
|
||||
#".\0"))])
|
||||
(values (subbytes bstr 1 4) (subbytes bstr 5 8)))))])
|
||||
(let ([dir (make-temporary-file "weird~a" 'directory)]
|
||||
[fns (map bytes->path
|
||||
;; Each of these byte strings represents a strange
|
||||
;; filename where one or more of the 16-byte elements
|
||||
;; at at the `wchar_t*` level is an unpaired surrogate.
|
||||
;; NTFS filesystems will allow that, though.
|
||||
(list (bytes-append #"a" surrogate-hi #"z")
|
||||
(bytes-append #"a" surrogate-lo #"z")
|
||||
(bytes-append #"a" surrogate-lo surrogate-hi #"z")
|
||||
(bytes-append #"a" surrogate-hi)
|
||||
(bytes-append #"a" surrogate-lo)
|
||||
(bytes-append #"a" surrogate-lo surrogate-hi)))])
|
||||
;; If one of the paths works, we expect them all to work:
|
||||
(when (with-handlers ([exn:fail:filesystem? (lambda (x) #f)])
|
||||
(let ([p (build-path dir (car fns))])
|
||||
(call-with-output-file p void)
|
||||
(delete-file p)))
|
||||
(for ([fn (in-list fns)])
|
||||
(define p (build-path dir fn))
|
||||
(call-with-output-file p (lambda (o) (write-bytes (path->bytes fn) o))))
|
||||
(for ([fn (in-list fns)])
|
||||
(define p (build-path dir fn))
|
||||
(call-with-input-file* p (lambda (i) (test (path->bytes fn) read-bytes 100 i))))
|
||||
;; Make sure names are converted correctly back from `directory-list`:
|
||||
(test (sort fns path<?) sort (directory-list dir) path<?))
|
||||
(delete-directory/files dir))))
|
||||
|
||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(report-errs)
|
||||
|
|
|
@ -5266,6 +5266,17 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
|
|||
failmode = -1;
|
||||
i = end - 1; /* to ensure that failmode is returned */
|
||||
} else if (permissive) {
|
||||
# ifdef WINDOWS_UNICODE_SUPPORT
|
||||
if (pending_surrogate) {
|
||||
/* Unpaired surrogate before permissive replacements */
|
||||
if (utf16 && (j < dend)) {
|
||||
if (us)
|
||||
((unsigned short *)us)[j] = pending_surrogate;
|
||||
j++;
|
||||
}
|
||||
pending_surrogate = 0;
|
||||
}
|
||||
#endif
|
||||
for (i = oki; i < end; i++) {
|
||||
if (j < dend) {
|
||||
if (us) {
|
||||
|
@ -5286,8 +5297,18 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
|
|||
}
|
||||
|
||||
# ifdef WINDOWS_UNICODE_SUPPORT
|
||||
if (pending_surrogate)
|
||||
oki -= 3;
|
||||
if (pending_surrogate) {
|
||||
if (!might_continue) {
|
||||
/* Accept unpaired surrogate at end of input */
|
||||
if (j < dend) {
|
||||
if (us)
|
||||
((unsigned short *)us)[j] = pending_surrogate;
|
||||
j++;
|
||||
}
|
||||
} else {
|
||||
oki -= 3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (ipos)
|
||||
|
|
Loading…
Reference in New Issue
Block a user