repair and tests for Windows path handling

In the process of extracting minimal Windows path encoding for rktio,
I noticed a decoding issue with a path that ends with an unpaired
high-surrogate value. Add a suitable tests and fix the old decoder
(although it will probably go away).
This commit is contained in:
Matthew Flatt 2017-06-20 17:42:23 -06:00
parent d4742a0618
commit 83cb1a0d00
2 changed files with 68 additions and 2 deletions

View File

@ -1960,6 +1960,51 @@
(string-append s s) ; causes a clear operation on the runstack for second argument
(write-char #\A)))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Check that unpaired surrogates are handled right in
;; Windows paths
(when (eq? 'windows (system-type))
(let-values ([(surrogate-hi surrogate-lo)
(let ()
(define c0 (bytes-open-converter "platform-UTF-8" "platform-UTF-16"))
(let-values ([(bstr a b) (bytes-convert c0 (string->bytes/utf-8 "\U10AABB"))])
(define c8 (bytes-open-converter "platform-UTF-16" "platform-UTF-8"))
(let-values ([(bstr a b) (bytes-convert c8
(bytes-append
#".\0"
(subbytes bstr 0 2)
#".\0"
(subbytes bstr 2 4)
#".\0"))])
(values (subbytes bstr 1 4) (subbytes bstr 5 8)))))])
(let ([dir (make-temporary-file "weird~a" 'directory)]
[fns (map bytes->path
;; Each of these byte strings represents a strange
;; filename where one or more of the 16-byte elements
;; at at the `wchar_t*` level is an unpaired surrogate.
;; NTFS filesystems will allow that, though.
(list (bytes-append #"a" surrogate-hi #"z")
(bytes-append #"a" surrogate-lo #"z")
(bytes-append #"a" surrogate-lo surrogate-hi #"z")
(bytes-append #"a" surrogate-hi)
(bytes-append #"a" surrogate-lo)
(bytes-append #"a" surrogate-lo surrogate-hi)))])
;; If one of the paths works, we expect them all to work:
(when (with-handlers ([exn:fail:filesystem? (lambda (x) #f)])
(let ([p (build-path dir (car fns))])
(call-with-output-file p void)
(delete-file p)))
(for ([fn (in-list fns)])
(define p (build-path dir fn))
(call-with-output-file p (lambda (o) (write-bytes (path->bytes fn) o))))
(for ([fn (in-list fns)])
(define p (build-path dir fn))
(call-with-input-file* p (lambda (i) (test (path->bytes fn) read-bytes 100 i))))
;; Make sure names are converted correctly back from `directory-list`:
(test (sort fns path<?) sort (directory-list dir) path<?))
(delete-directory/files dir))))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(report-errs)

View File

@ -5266,6 +5266,17 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
failmode = -1;
i = end - 1; /* to ensure that failmode is returned */
} else if (permissive) {
# ifdef WINDOWS_UNICODE_SUPPORT
if (pending_surrogate) {
/* Unpaired surrogate before permissive replacements */
if (utf16 && (j < dend)) {
if (us)
((unsigned short *)us)[j] = pending_surrogate;
j++;
}
pending_surrogate = 0;
}
#endif
for (i = oki; i < end; i++) {
if (j < dend) {
if (us) {
@ -5286,8 +5297,18 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
}
# ifdef WINDOWS_UNICODE_SUPPORT
if (pending_surrogate)
oki -= 3;
if (pending_surrogate) {
if (!might_continue) {
/* Accept unpaired surrogate at end of input */
if (j < dend) {
if (us)
((unsigned short *)us)[j] = pending_surrogate;
j++;
}
} else {
oki -= 3;
}
}
#endif
if (ipos)