repair and tests for Windows path handling
In the process of extracting minimal Windows path encoding for rktio, I noticed a decoding issue with a path that ends with an unpaired high-surrogate value. Add a suitable tests and fix the old decoder (although it will probably go away).
This commit is contained in:
parent
d4742a0618
commit
83cb1a0d00
|
@ -1960,6 +1960,51 @@
|
||||||
(string-append s s) ; causes a clear operation on the runstack for second argument
|
(string-append s s) ; causes a clear operation on the runstack for second argument
|
||||||
(write-char #\A)))
|
(write-char #\A)))
|
||||||
|
|
||||||
|
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; Check that unpaired surrogates are handled right in
|
||||||
|
;; Windows paths
|
||||||
|
|
||||||
|
(when (eq? 'windows (system-type))
|
||||||
|
(let-values ([(surrogate-hi surrogate-lo)
|
||||||
|
(let ()
|
||||||
|
(define c0 (bytes-open-converter "platform-UTF-8" "platform-UTF-16"))
|
||||||
|
(let-values ([(bstr a b) (bytes-convert c0 (string->bytes/utf-8 "\U10AABB"))])
|
||||||
|
(define c8 (bytes-open-converter "platform-UTF-16" "platform-UTF-8"))
|
||||||
|
(let-values ([(bstr a b) (bytes-convert c8
|
||||||
|
(bytes-append
|
||||||
|
#".\0"
|
||||||
|
(subbytes bstr 0 2)
|
||||||
|
#".\0"
|
||||||
|
(subbytes bstr 2 4)
|
||||||
|
#".\0"))])
|
||||||
|
(values (subbytes bstr 1 4) (subbytes bstr 5 8)))))])
|
||||||
|
(let ([dir (make-temporary-file "weird~a" 'directory)]
|
||||||
|
[fns (map bytes->path
|
||||||
|
;; Each of these byte strings represents a strange
|
||||||
|
;; filename where one or more of the 16-byte elements
|
||||||
|
;; at at the `wchar_t*` level is an unpaired surrogate.
|
||||||
|
;; NTFS filesystems will allow that, though.
|
||||||
|
(list (bytes-append #"a" surrogate-hi #"z")
|
||||||
|
(bytes-append #"a" surrogate-lo #"z")
|
||||||
|
(bytes-append #"a" surrogate-lo surrogate-hi #"z")
|
||||||
|
(bytes-append #"a" surrogate-hi)
|
||||||
|
(bytes-append #"a" surrogate-lo)
|
||||||
|
(bytes-append #"a" surrogate-lo surrogate-hi)))])
|
||||||
|
;; If one of the paths works, we expect them all to work:
|
||||||
|
(when (with-handlers ([exn:fail:filesystem? (lambda (x) #f)])
|
||||||
|
(let ([p (build-path dir (car fns))])
|
||||||
|
(call-with-output-file p void)
|
||||||
|
(delete-file p)))
|
||||||
|
(for ([fn (in-list fns)])
|
||||||
|
(define p (build-path dir fn))
|
||||||
|
(call-with-output-file p (lambda (o) (write-bytes (path->bytes fn) o))))
|
||||||
|
(for ([fn (in-list fns)])
|
||||||
|
(define p (build-path dir fn))
|
||||||
|
(call-with-input-file* p (lambda (i) (test (path->bytes fn) read-bytes 100 i))))
|
||||||
|
;; Make sure names are converted correctly back from `directory-list`:
|
||||||
|
(test (sort fns path<?) sort (directory-list dir) path<?))
|
||||||
|
(delete-directory/files dir))))
|
||||||
|
|
||||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(report-errs)
|
(report-errs)
|
||||||
|
|
|
@ -5266,6 +5266,17 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
|
||||||
failmode = -1;
|
failmode = -1;
|
||||||
i = end - 1; /* to ensure that failmode is returned */
|
i = end - 1; /* to ensure that failmode is returned */
|
||||||
} else if (permissive) {
|
} else if (permissive) {
|
||||||
|
# ifdef WINDOWS_UNICODE_SUPPORT
|
||||||
|
if (pending_surrogate) {
|
||||||
|
/* Unpaired surrogate before permissive replacements */
|
||||||
|
if (utf16 && (j < dend)) {
|
||||||
|
if (us)
|
||||||
|
((unsigned short *)us)[j] = pending_surrogate;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
pending_surrogate = 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
for (i = oki; i < end; i++) {
|
for (i = oki; i < end; i++) {
|
||||||
if (j < dend) {
|
if (j < dend) {
|
||||||
if (us) {
|
if (us) {
|
||||||
|
@ -5286,8 +5297,18 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
|
||||||
}
|
}
|
||||||
|
|
||||||
# ifdef WINDOWS_UNICODE_SUPPORT
|
# ifdef WINDOWS_UNICODE_SUPPORT
|
||||||
if (pending_surrogate)
|
if (pending_surrogate) {
|
||||||
|
if (!might_continue) {
|
||||||
|
/* Accept unpaired surrogate at end of input */
|
||||||
|
if (j < dend) {
|
||||||
|
if (us)
|
||||||
|
((unsigned short *)us)[j] = pending_surrogate;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
oki -= 3;
|
oki -= 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (ipos)
|
if (ipos)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user