From 39b37dd8928827ed5bbcb98c45e3cc8fd72a5d5a Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Thu, 16 Jul 2020 13:25:49 -0600 Subject: [PATCH] repair inconsistencies with `simplify-path` CS didn't always return a complete path when simplifying in use-filesystem mode. On Windows, CS and BC were inconsistent with each other and the Unix behavior. --- .../scribblings/reference/paths.scrbl | 3 +- .../scribblings/reference/windows-paths.scrbl | 4 +- pkgs/racket-test-core/tests/racket/path.rktl | 28 ++++++- .../collects/setup/private/omitted-paths.rkt | 2 +- racket/src/io/path/simplify.rkt | 81 +++++++++++++++---- racket/src/io/path/split.rkt | 4 +- racket/src/racket/src/file.c | 27 ++++--- 7 files changed, 119 insertions(+), 30 deletions(-) diff --git a/pkgs/racket-doc/scribblings/reference/paths.scrbl b/pkgs/racket-doc/scribblings/reference/paths.scrbl index 8ad58ae717..adbfc52e35 100644 --- a/pkgs/racket-doc/scribblings/reference/paths.scrbl +++ b/pkgs/racket-doc/scribblings/reference/paths.scrbl @@ -422,7 +422,8 @@ and (on Windows) without changing the case of letters within the path. If @racket[path] syntactically refers to a directory, the result ends with a directory separator. -When @racket[path] is simplified and @racket[use-filesystem?] is true +When @racket[path] is simplified other than just converting slashes +to backslashes and @racket[use-filesystem?] is true (the default), a complete path is returned. If @racket[path] is relative, it is resolved with respect to the current directory. On @|AllUnix|, up-directory indicators are removed taking into account soft links (so diff --git a/pkgs/racket-doc/scribblings/reference/windows-paths.scrbl b/pkgs/racket-doc/scribblings/reference/windows-paths.scrbl index 0638d1fe61..e5098cbd5f 100644 --- a/pkgs/racket-doc/scribblings/reference/windows-paths.scrbl +++ b/pkgs/racket-doc/scribblings/reference/windows-paths.scrbl @@ -251,8 +251,8 @@ Windows paths are @techlink{cleanse}d as follows: In paths that start @litchar{\\?\}, redundant @litchar{\}s are removed, an extra @litchar{\} is added in a @litchar{\\?\REL} if an extra one is not already present to separate up-directory indicators from literal -path elements, and an extra @litchar{\} is removed after -@litchar{\\?\RED} if an extra one is present. +path elements, and an extra @litchar{\} is similarly added after +@litchar{\\?\RED} if an extra one is not already present. @;{>> I don't know what was meant to go in place of "???", and I can't figure out an example that could trigger this case: << When @litchar{\\?\} acts as the drive and the path contains ???, two diff --git a/pkgs/racket-test-core/tests/racket/path.rktl b/pkgs/racket-test-core/tests/racket/path.rktl index e30f5e2c35..bba18722f7 100644 --- a/pkgs/racket-test-core/tests/racket/path.rktl +++ b/pkgs/racket-test-core/tests/racket/path.rktl @@ -453,6 +453,9 @@ (test (path->directory-path (build-path 'same)) simplify-path (build-path 'same "a" 'same 'up 'same) #f) (arity-test simplify-path 1 2) +(test (build-path "no-such-dir" "b") simplify-path "no-such-dir/b" #t) +(test (path->complete-path (build-path "no-such-dir" "b")) simplify-path "no-such-dir//b" #t) + (arity-test cleanse-path 1 1) (arity-test expand-user-path 1 1) (arity-test resolve-path 1 1) @@ -631,6 +634,14 @@ (string->path "\\\\?\\REL\\\\a") #f) (lambda () (split-path (coerce "\\\\?\\REL\\b\\a")))) + (test-values (list (string->path "\\\\?\\REL\\b\\") + (string->path "\\\\?\\REL\\\\a") + #t) + (lambda () (split-path (coerce "\\\\?\\REL\\b\\a\\")))) + (test-values (list (string->path "\\") + (string->path "\\\\?\\REL\\\\..") + #t) + (lambda () (split-path (string->path "\\\\?\\RED\\\\..\\")))) (test (string->path "\\\\?\\RED\\\\a\\") build-path (coerce "\\\\?\\RED\\a") 'same) (test (string->path "\\") build-path (coerce "\\\\?\\RED\\a") 'up) @@ -779,13 +790,23 @@ (test (string->path "\\\\?\\c:\\a\\.") cleanse-path (coerce "\\\\?\\c:\\a\\\\.")) (test (string->path "\\\\?\\UNC\\a\\b\\.") cleanse-path (coerce "\\\\?\\UNC\\\\a\\b\\.")) (test (string->path "\\\\?\\UNC\\a\\b\\.") cleanse-path (coerce "\\\\?\\UNC\\\\a\\b\\\\.")) - (test (string->path "\\\\?\\RED\\\\..") cleanse-path (coerce "\\\\?\\RED\\..")) + (test (string->path "\\\\?\\RED\\\\..") cleanse-path (coerce "\\\\?\\RED\\\\..")) + (test (string->path "\\\\?\\REL\\\\..") cleanse-path (coerce "\\\\?\\REL\\\\..")) + (test (string->path "\\\\?\\RED\\\\..\\") cleanse-path (coerce "\\\\?\\RED\\\\..\\")) + (test (string->path "\\\\?\\REL\\\\..\\") cleanse-path (coerce "\\\\?\\REL\\\\..\\")) (test (string->path "\\\\?\\") cleanse-path (coerce "\\\\?\\\\")))]) (go cleanse-path) (test (string->path "\\\\?\\c:") cleanse-path (coerce "\\\\?\\c:")) (when (eq? 'windows (system-type)) (go simplify-path)) (go (lambda (p) (simplify-path p #f))) + (test (string->path "\\\\?\\RED\\\\..") cleanse-path (coerce "\\\\?\\RED\\..")) + (test (string->path "\\\\?\\RED\\\\..\\") cleanse-path (coerce "\\\\?\\RED\\..\\")) + (when (eq? 'windows (system-type)) + (test (build-path (current-drive) "\\\\?\\REL\\\\..") simplify-path (coerce "\\\\?\\RED\\..")) + (test (build-path (simplify-path (build-path (current-directory) 'up)) "\\\\?\\REL\\\\..") + simplify-path + (coerce "\\\\?\\REL\\..\\\\.."))) (test (string->path "a\\b") simplify-path (coerce "a/b") #f) (test (string->path "a\\b\\") simplify-path (coerce "a/b/") #f) (test (string->path "C:\\") simplify-path (coerce "C://") #f) @@ -796,6 +817,10 @@ (test (bytes->path #"\\\\f\\g\\") simplify-path (coerce "\\\\f\\g") #f) (test (bytes->path #"\\\\f\\g\\") simplify-path (coerce "//f/g") #f) + (when (eq? 'windows (system-type)) + ;; just slash conversion: no path->complete-path + (test (bytes->path #"no-such-dir\\g") simplify-path (coerce "no-such-dir/g") #t) + (test (path->complete-path "no-such-dir\\g") simplify-path (coerce "no-such-dir//g") #t)) (test (bytes->path #"\\\\?\\\\\\c:\\") simplify-path (coerce "\\\\?\\\\\\c:\\") #f) (test (bytes->path #"\\\\?\\\\\\c:") simplify-path (coerce "\\\\?\\\\\\c:") #f) @@ -830,6 +855,7 @@ (test (bytes->path #"\\\\?\\REL\\\\..") simplify-path (coerce "\\\\?\\REL\\\\..") #F) (test (bytes->path #"\\\\?\\REL\\\\..\\") simplify-path (coerce "\\\\?\\REL\\\\..\\") #F) (test (bytes->path #"a \\b") simplify-path (coerce "\\\\?\\REL\\\\a \\b") #f) + (test (bytes->path #"\\\\?\\REL\\\\a \\b \\") simplify-path (coerce "\\\\?\\REL\\\\a \\b \\") #f) (test (bytes->path #"\\\\?\\REL\\\\aux.bad\\b") simplify-path (coerce "\\\\?\\REL\\aux.bad\\b") #f) (test (bytes->path #"\\\\?\\REL\\\\a\\b ") simplify-path (coerce "\\\\?\\REL\\a\\b ") #f) (test (bytes->path #"\\\\?\\REL\\\\.\\b") simplify-path (coerce "\\\\?\\REL\\.\\b") #f) diff --git a/racket/collects/setup/private/omitted-paths.rkt b/racket/collects/setup/private/omitted-paths.rkt index 5aded3842b..ee4affd089 100644 --- a/racket/collects/setup/private/omitted-paths.rkt +++ b/racket/collects/setup/private/omitted-paths.rkt @@ -50,7 +50,7 @@ (let ([omit (info 'compile-omit-paths (lambda () '()))]) (if (eq? 'all omit) 'all - (map (lambda (e) (explode-path (simplify-path e))) + (map (lambda (e) (explode-path (simplify-path e #f))) ;; for backward compatibility (append omit (info 'compile-omit-files (lambda () '()))))))) (cond diff --git a/racket/src/io/path/simplify.rkt b/racket/src/io/path/simplify.rkt index aebd95a3f8..830ff89572 100644 --- a/racket/src/io/path/simplify.rkt +++ b/racket/src/io/path/simplify.rkt @@ -1,5 +1,6 @@ #lang racket/base -(require "../file/main.rkt" +(require racket/fixnum + "../file/main.rkt" (submod "../file/main.rkt" for-simplify) "path.rkt" "check.rkt" @@ -30,7 +31,16 @@ [else (define clean-p (cleanse-path/convert-slashes p)) (cond - [(simple? clean-p convention) clean-p] + [(simple? clean-p convention) + ;; The choice of creating a complete path in this case seems like + ;; it was probably the wrong one. The special treatement for Windows + ;; reflects a compromise between consistency and old behavior (where + ;; the conversion to a compleet path did not happe in the old behavior) + (if (or (not use-filesystem?) + (and (eq? 'windows (system-type)) + (same-modulo-slashes? p clean-p))) + clean-p + (path->complete-path clean-p (current-directory)))] [else (define l (explode-path clean-p)) (define simple-p @@ -121,16 +131,14 @@ (is-sep? b convention))) (cond [(and (eq? convention 'windows) - (cond - [(and - (= len 2) - (letter-drive-start? bstr 2)) - ;; Letter drive without trailing separator - #t] - [(non-normal-backslash-backslash-questionmark? bstr) - #t] - [else #f])) + (and (= len 2) + (letter-drive-start? bstr 2))) + ;; Letter drive without trailing separator #f] + [(and (eq? convention 'windows) + (backslash-backslash-questionmark-simple-status bstr)) + => (lambda (status) + (eq? status 'simple))] [else (let loop ([i 0]) (cond @@ -161,12 +169,39 @@ #f] [else (loop (add1 i))]))])) -(define (non-normal-backslash-backslash-questionmark? bstr) +(define (backslash-backslash-questionmark-simple-status bstr) (define-values (kind drive-len orig-drive-len clean-start-pos sep-bstr) (parse-backslash-backslash-questionmark bstr)) - ;; We could try harder to recognize normal forms, but for now - ;; we assume that some normalization is needed in a \\?\ path. - kind) + (cond + [(not kind) #f] + [(and (fx= (bytes-ref bstr 4) (char->integer #\R)) + (fx= (bytes-ref bstr 5) (char->integer #\E))) + ;; For \\?\REL\ and \\?\RED\ paths, in use-filesystem mode, + ;; we don't want to convert to a complete path unless there's + ;; some simplification possible. Rebuild the path to see + ;; whether it's already normalized and simple. + (let loop ([p (path bstr 'windows)] [accum '()] [as-dir? #f]) + (define-values (base-dir name dir?) (split-path p)) + (cond + [(symbol? name) 'non-simple] + [else + (define new-accum (cons name accum)) + (define new-as-dir? (if (null? accum) dir? as-dir?)) + (cond + [(path? base-dir) (loop base-dir new-accum new-as-dir?)] + [else + (define rebuilt0-p (apply build-path/convention-type 'windows new-accum)) + (define rebuilt-p (if new-as-dir? + (path (bytes-append (path-bytes rebuilt0-p) #"\\") 'windows) + rebuilt0-p)) + (define rebuilt-bstr (path-bytes (simplify-backslash-backslash-questionmark rebuilt-p))) + (if (bytes=? bstr rebuilt-bstr) + 'simple + 'non-simple)])]))] + + [else + ;; Conservatively assume non-simple + 'non-simple])) ;; ---------------------------------------- @@ -256,3 +291,19 @@ (bytes-append #"\\\\?\\UNC" (subbytes bstr 8))] [else (bytes-append #"\\\\?\\UNC" (subbytes bstr 7))])) + +(define (same-modulo-slashes? p1 p2) + (define bstr1 (path-bytes p1)) + (define bstr2 (path-bytes p2)) + (define len (bytes-length bstr1)) + (and (fx= len (bytes-length bstr2)) + (let loop ([i 0]) + (or (fx= i len) + (and (let ([b1 (bytes-ref bstr1 i)] + [b2 (bytes-ref bstr2 i)]) + (or (fx= b1 b2) + (and (fx= b1 (char->integer #\\)) + (fx= b2 (char->integer #\/))) + (and (fx= b1 (char->integer #\/)) + (fx= b2 (char->integer #\\))))) + (loop (fx+ i 1))))))) diff --git a/racket/src/io/path/split.rkt b/racket/src/io/path/split.rkt index a58e2db143..98d14648a9 100644 --- a/racket/src/io/path/split.rkt +++ b/racket/src/io/path/split.rkt @@ -268,7 +268,9 @@ (if (eqv? (bytes-ref bstr 8) (char->integer #\\)) #"" #"\\") - (subbytes bstr 8)) + (subbytes bstr 8 (let ([len (bytes-length bstr)]) + (if is-dir? (sub1 len) len)))) + 'windows)) (cond [explode? (list elem base)] diff --git a/racket/src/racket/src/file.c b/racket/src/racket/src/file.c index 8acbd6a395..acf6560d47 100644 --- a/racket/src/racket/src/file.c +++ b/racket/src/racket/src/file.c @@ -3560,7 +3560,7 @@ static Scheme_Object *convert_literal_relative(Scheme_Object *file) return file; } -static Scheme_Object *simplify_qm_path(Scheme_Object *path) +static Scheme_Object *simplify_qm_path(Scheme_Object *path, int *has_rel) { /* path is already expanded, so the only remaining clean-ups are dropping a trailing separator, @@ -3599,6 +3599,7 @@ static Scheme_Object *simplify_qm_path(Scheme_Object *path) start_special_check = lit_start; drive_end = lit_start - 1; } else if (drive_end < 0) { + /* \\?\REL\ */ int lit_start, dots_end; dots_end = get_slashslash_qm_dot_ups_end(s, len, &lit_start); if (lit_start == len) { @@ -3609,6 +3610,7 @@ static Scheme_Object *simplify_qm_path(Scheme_Object *path) if (dots_end < 9) drive_end = lit_start; /* no dots, so just keep the literal part */ else { + *has_rel = 1; drive_end = 8; /* \\?\REL\..\, and we keep the .. */ drop_extra_slash = dots_end; is_dir = 1; @@ -3740,8 +3742,8 @@ static Scheme_Object *do_simplify_path(Scheme_Object *path, Scheme_Object *cycle int isdir, cleaned_slashes = 0, must_be_dir = 0, last_was_dir = 0, did_first = 0; Scheme_Object *file = scheme_false, *base; - /* cleanse-path doesn't touch the filesystem. Always start with - that, to get things basically tidy. */ + /* cleanse-path doesn't touch the filesystem. On Windows, always + start with that, to get things basically tidy. */ if (kind == SCHEME_WINDOWS_PATH_KIND) { char *s; int expanded, add_sep = 0; @@ -3749,10 +3751,12 @@ static Scheme_Object *do_simplify_path(Scheme_Object *path, Scheme_Object *cycle NULL, &expanded, 0, 0, 0, kind, 0); { int slen; - if (expanded) + if (expanded) { + cleaned_slashes = 1; slen = strlen(s); - else + } else slen = SCHEME_PATH_LEN(path); + /* Note: not counting slash normalization as cleaned_slashes */ s = do_normal_path_seps(s, &slen, 0, 0, SCHEME_WINDOWS_PATH_KIND, &expanded); } if (expanded) { @@ -3792,7 +3796,7 @@ static Scheme_Object *do_simplify_path(Scheme_Object *path, Scheme_Object *cycle Also responsible for determining whether there's a redundant or missing trailing slash in the case that the path is just a root. */ - { + if (!cleaned_slashes || !use_filesystem) { char *s; int len, i, saw_dot = 0; s = SCHEME_PATH_VAL(path); @@ -3801,8 +3805,10 @@ static Scheme_Object *do_simplify_path(Scheme_Object *path, Scheme_Object *cycle if (kind == SCHEME_WINDOWS_PATH_KIND) { if (!skip && check_dos_slashslash_qm(s, len, NULL, NULL, NULL)) { if (!force_rel_up) { - int drive_end; - path = simplify_qm_path(path); + Scheme_Object *orig_path = path; + int drive_end, has_up = 0; + path = simplify_qm_path(path, &has_up); + cleaned_slashes = has_up; len = SCHEME_PATH_LEN(path); if (check_dos_slashslash_qm(SCHEME_PATH_VAL(path), len, &drive_end, NULL, NULL)) { /* If it's a drive... */ @@ -3811,7 +3817,10 @@ static Scheme_Object *do_simplify_path(Scheme_Object *path, Scheme_Object *cycle path = scheme_path_to_directory_path(path); } } - return path; + if (!SAME_OBJ(path, orig_path)) + cleaned_slashes = 1; + if (!cleaned_slashes || !use_filesystem) + return path; } else { /* force_rel_up means that we want a directory: */ return scheme_path_to_directory_path(path);