repair inconsistencies with simplify-path

CS didn't always return a complete path when simplifying in
use-filesystem mode. On Windows, CS and BC were inconsistent with each
other and the Unix behavior.
This commit is contained in:
Matthew Flatt 2020-07-16 13:25:49 -06:00
parent 6e3c111728
commit 39b37dd892
7 changed files with 119 additions and 30 deletions

View File

@ -422,7 +422,8 @@ and (on Windows) without changing the case of letters within the
path. If @racket[path] syntactically refers to a directory, the
result ends with a directory separator.
When @racket[path] is simplified and @racket[use-filesystem?] is true
When @racket[path] is simplified other than just converting slashes
to backslashes and @racket[use-filesystem?] is true
(the default), a complete path is returned. If @racket[path] is
relative, it is resolved with respect to the current directory.
On @|AllUnix|, up-directory indicators are removed taking into account soft links (so

View File

@ -251,8 +251,8 @@ Windows paths are @techlink{cleanse}d as follows: In paths that start
@litchar{\\?\}, redundant @litchar{\}s are removed, an extra
@litchar{\} is added in a @litchar{\\?\REL} if an extra one is
not already present to separate up-directory indicators from literal
path elements, and an extra @litchar{\} is removed after
@litchar{\\?\RED} if an extra one is present.
path elements, and an extra @litchar{\} is similarly added after
@litchar{\\?\RED} if an extra one is not already present.
@;{>> I don't know what was meant to go in place of "???", and I can't
figure out an example that could trigger this case: <<
When @litchar{\\?\} acts as the drive and the path contains ???, two

View File

@ -453,6 +453,9 @@
(test (path->directory-path (build-path 'same)) simplify-path (build-path 'same "a" 'same 'up 'same) #f)
(arity-test simplify-path 1 2)
(test (build-path "no-such-dir" "b") simplify-path "no-such-dir/b" #t)
(test (path->complete-path (build-path "no-such-dir" "b")) simplify-path "no-such-dir//b" #t)
(arity-test cleanse-path 1 1)
(arity-test expand-user-path 1 1)
(arity-test resolve-path 1 1)
@ -631,6 +634,14 @@
(string->path "\\\\?\\REL\\\\a")
#f)
(lambda () (split-path (coerce "\\\\?\\REL\\b\\a"))))
(test-values (list (string->path "\\\\?\\REL\\b\\")
(string->path "\\\\?\\REL\\\\a")
#t)
(lambda () (split-path (coerce "\\\\?\\REL\\b\\a\\"))))
(test-values (list (string->path "\\")
(string->path "\\\\?\\REL\\\\..")
#t)
(lambda () (split-path (string->path "\\\\?\\RED\\\\..\\"))))
(test (string->path "\\\\?\\RED\\\\a\\") build-path (coerce "\\\\?\\RED\\a") 'same)
(test (string->path "\\") build-path (coerce "\\\\?\\RED\\a") 'up)
@ -779,13 +790,23 @@
(test (string->path "\\\\?\\c:\\a\\.") cleanse-path (coerce "\\\\?\\c:\\a\\\\."))
(test (string->path "\\\\?\\UNC\\a\\b\\.") cleanse-path (coerce "\\\\?\\UNC\\\\a\\b\\."))
(test (string->path "\\\\?\\UNC\\a\\b\\.") cleanse-path (coerce "\\\\?\\UNC\\\\a\\b\\\\."))
(test (string->path "\\\\?\\RED\\\\..") cleanse-path (coerce "\\\\?\\RED\\.."))
(test (string->path "\\\\?\\RED\\\\..") cleanse-path (coerce "\\\\?\\RED\\\\.."))
(test (string->path "\\\\?\\REL\\\\..") cleanse-path (coerce "\\\\?\\REL\\\\.."))
(test (string->path "\\\\?\\RED\\\\..\\") cleanse-path (coerce "\\\\?\\RED\\\\..\\"))
(test (string->path "\\\\?\\REL\\\\..\\") cleanse-path (coerce "\\\\?\\REL\\\\..\\"))
(test (string->path "\\\\?\\") cleanse-path (coerce "\\\\?\\\\")))])
(go cleanse-path)
(test (string->path "\\\\?\\c:") cleanse-path (coerce "\\\\?\\c:"))
(when (eq? 'windows (system-type))
(go simplify-path))
(go (lambda (p) (simplify-path p #f)))
(test (string->path "\\\\?\\RED\\\\..") cleanse-path (coerce "\\\\?\\RED\\.."))
(test (string->path "\\\\?\\RED\\\\..\\") cleanse-path (coerce "\\\\?\\RED\\..\\"))
(when (eq? 'windows (system-type))
(test (build-path (current-drive) "\\\\?\\REL\\\\..") simplify-path (coerce "\\\\?\\RED\\.."))
(test (build-path (simplify-path (build-path (current-directory) 'up)) "\\\\?\\REL\\\\..")
simplify-path
(coerce "\\\\?\\REL\\..\\\\..")))
(test (string->path "a\\b") simplify-path (coerce "a/b") #f)
(test (string->path "a\\b\\") simplify-path (coerce "a/b/") #f)
(test (string->path "C:\\") simplify-path (coerce "C://") #f)
@ -796,6 +817,10 @@
(test (bytes->path #"\\\\f\\g\\") simplify-path (coerce "\\\\f\\g") #f)
(test (bytes->path #"\\\\f\\g\\") simplify-path (coerce "//f/g") #f)
(when (eq? 'windows (system-type))
;; just slash conversion: no path->complete-path
(test (bytes->path #"no-such-dir\\g") simplify-path (coerce "no-such-dir/g") #t)
(test (path->complete-path "no-such-dir\\g") simplify-path (coerce "no-such-dir//g") #t))
(test (bytes->path #"\\\\?\\\\\\c:\\") simplify-path (coerce "\\\\?\\\\\\c:\\") #f)
(test (bytes->path #"\\\\?\\\\\\c:") simplify-path (coerce "\\\\?\\\\\\c:") #f)
@ -830,6 +855,7 @@
(test (bytes->path #"\\\\?\\REL\\\\..") simplify-path (coerce "\\\\?\\REL\\\\..") #F)
(test (bytes->path #"\\\\?\\REL\\\\..\\") simplify-path (coerce "\\\\?\\REL\\\\..\\") #F)
(test (bytes->path #"a \\b") simplify-path (coerce "\\\\?\\REL\\\\a \\b") #f)
(test (bytes->path #"\\\\?\\REL\\\\a \\b \\") simplify-path (coerce "\\\\?\\REL\\\\a \\b \\") #f)
(test (bytes->path #"\\\\?\\REL\\\\aux.bad\\b") simplify-path (coerce "\\\\?\\REL\\aux.bad\\b") #f)
(test (bytes->path #"\\\\?\\REL\\\\a\\b ") simplify-path (coerce "\\\\?\\REL\\a\\b ") #f)
(test (bytes->path #"\\\\?\\REL\\\\.\\b") simplify-path (coerce "\\\\?\\REL\\.\\b") #f)

View File

@ -50,7 +50,7 @@
(let ([omit (info 'compile-omit-paths (lambda () '()))])
(if (eq? 'all omit)
'all
(map (lambda (e) (explode-path (simplify-path e)))
(map (lambda (e) (explode-path (simplify-path e #f)))
;; for backward compatibility
(append omit (info 'compile-omit-files (lambda () '())))))))
(cond

View File

@ -1,5 +1,6 @@
#lang racket/base
(require "../file/main.rkt"
(require racket/fixnum
"../file/main.rkt"
(submod "../file/main.rkt" for-simplify)
"path.rkt"
"check.rkt"
@ -30,7 +31,16 @@
[else
(define clean-p (cleanse-path/convert-slashes p))
(cond
[(simple? clean-p convention) clean-p]
[(simple? clean-p convention)
;; The choice of creating a complete path in this case seems like
;; it was probably the wrong one. The special treatement for Windows
;; reflects a compromise between consistency and old behavior (where
;; the conversion to a compleet path did not happe in the old behavior)
(if (or (not use-filesystem?)
(and (eq? 'windows (system-type))
(same-modulo-slashes? p clean-p)))
clean-p
(path->complete-path clean-p (current-directory)))]
[else
(define l (explode-path clean-p))
(define simple-p
@ -121,16 +131,14 @@
(is-sep? b convention)))
(cond
[(and (eq? convention 'windows)
(cond
[(and
(= len 2)
(letter-drive-start? bstr 2))
;; Letter drive without trailing separator
#t]
[(non-normal-backslash-backslash-questionmark? bstr)
#t]
[else #f]))
(and (= len 2)
(letter-drive-start? bstr 2)))
;; Letter drive without trailing separator
#f]
[(and (eq? convention 'windows)
(backslash-backslash-questionmark-simple-status bstr))
=> (lambda (status)
(eq? status 'simple))]
[else
(let loop ([i 0])
(cond
@ -161,12 +169,39 @@
#f]
[else (loop (add1 i))]))]))
(define (non-normal-backslash-backslash-questionmark? bstr)
(define (backslash-backslash-questionmark-simple-status bstr)
(define-values (kind drive-len orig-drive-len clean-start-pos sep-bstr)
(parse-backslash-backslash-questionmark bstr))
;; We could try harder to recognize normal forms, but for now
;; we assume that some normalization is needed in a \\?\ path.
kind)
(cond
[(not kind) #f]
[(and (fx= (bytes-ref bstr 4) (char->integer #\R))
(fx= (bytes-ref bstr 5) (char->integer #\E)))
;; For \\?\REL\ and \\?\RED\ paths, in use-filesystem mode,
;; we don't want to convert to a complete path unless there's
;; some simplification possible. Rebuild the path to see
;; whether it's already normalized and simple.
(let loop ([p (path bstr 'windows)] [accum '()] [as-dir? #f])
(define-values (base-dir name dir?) (split-path p))
(cond
[(symbol? name) 'non-simple]
[else
(define new-accum (cons name accum))
(define new-as-dir? (if (null? accum) dir? as-dir?))
(cond
[(path? base-dir) (loop base-dir new-accum new-as-dir?)]
[else
(define rebuilt0-p (apply build-path/convention-type 'windows new-accum))
(define rebuilt-p (if new-as-dir?
(path (bytes-append (path-bytes rebuilt0-p) #"\\") 'windows)
rebuilt0-p))
(define rebuilt-bstr (path-bytes (simplify-backslash-backslash-questionmark rebuilt-p)))
(if (bytes=? bstr rebuilt-bstr)
'simple
'non-simple)])]))]
[else
;; Conservatively assume non-simple
'non-simple]))
;; ----------------------------------------
@ -256,3 +291,19 @@
(bytes-append #"\\\\?\\UNC" (subbytes bstr 8))]
[else
(bytes-append #"\\\\?\\UNC" (subbytes bstr 7))]))
(define (same-modulo-slashes? p1 p2)
(define bstr1 (path-bytes p1))
(define bstr2 (path-bytes p2))
(define len (bytes-length bstr1))
(and (fx= len (bytes-length bstr2))
(let loop ([i 0])
(or (fx= i len)
(and (let ([b1 (bytes-ref bstr1 i)]
[b2 (bytes-ref bstr2 i)])
(or (fx= b1 b2)
(and (fx= b1 (char->integer #\\))
(fx= b2 (char->integer #\/)))
(and (fx= b1 (char->integer #\/))
(fx= b2 (char->integer #\\)))))
(loop (fx+ i 1)))))))

View File

@ -268,7 +268,9 @@
(if (eqv? (bytes-ref bstr 8) (char->integer #\\))
#""
#"\\")
(subbytes bstr 8))
(subbytes bstr 8 (let ([len (bytes-length bstr)])
(if is-dir? (sub1 len) len))))
'windows))
(cond
[explode? (list elem base)]

View File

@ -3560,7 +3560,7 @@ static Scheme_Object *convert_literal_relative(Scheme_Object *file)
return file;
}
static Scheme_Object *simplify_qm_path(Scheme_Object *path)
static Scheme_Object *simplify_qm_path(Scheme_Object *path, int *has_rel)
{
/* path is already expanded, so the only remaining
clean-ups are dropping a trailing separator,
@ -3599,6 +3599,7 @@ static Scheme_Object *simplify_qm_path(Scheme_Object *path)
start_special_check = lit_start;
drive_end = lit_start - 1;
} else if (drive_end < 0) {
/* \\?\REL\ */
int lit_start, dots_end;
dots_end = get_slashslash_qm_dot_ups_end(s, len, &lit_start);
if (lit_start == len) {
@ -3609,6 +3610,7 @@ static Scheme_Object *simplify_qm_path(Scheme_Object *path)
if (dots_end < 9)
drive_end = lit_start; /* no dots, so just keep the literal part */
else {
*has_rel = 1;
drive_end = 8; /* \\?\REL\..\, and we keep the .. */
drop_extra_slash = dots_end;
is_dir = 1;
@ -3740,8 +3742,8 @@ static Scheme_Object *do_simplify_path(Scheme_Object *path, Scheme_Object *cycle
int isdir, cleaned_slashes = 0, must_be_dir = 0, last_was_dir = 0, did_first = 0;
Scheme_Object *file = scheme_false, *base;
/* cleanse-path doesn't touch the filesystem. Always start with
that, to get things basically tidy. */
/* cleanse-path doesn't touch the filesystem. On Windows, always
start with that, to get things basically tidy. */
if (kind == SCHEME_WINDOWS_PATH_KIND) {
char *s;
int expanded, add_sep = 0;
@ -3749,10 +3751,12 @@ static Scheme_Object *do_simplify_path(Scheme_Object *path, Scheme_Object *cycle
NULL, &expanded, 0, 0, 0, kind, 0);
{
int slen;
if (expanded)
if (expanded) {
cleaned_slashes = 1;
slen = strlen(s);
else
} else
slen = SCHEME_PATH_LEN(path);
/* Note: not counting slash normalization as cleaned_slashes */
s = do_normal_path_seps(s, &slen, 0, 0, SCHEME_WINDOWS_PATH_KIND, &expanded);
}
if (expanded) {
@ -3792,7 +3796,7 @@ static Scheme_Object *do_simplify_path(Scheme_Object *path, Scheme_Object *cycle
Also responsible for determining whether there's a
redundant or missing trailing slash in the case that
the path is just a root. */
{
if (!cleaned_slashes || !use_filesystem) {
char *s;
int len, i, saw_dot = 0;
s = SCHEME_PATH_VAL(path);
@ -3801,8 +3805,10 @@ static Scheme_Object *do_simplify_path(Scheme_Object *path, Scheme_Object *cycle
if (kind == SCHEME_WINDOWS_PATH_KIND) {
if (!skip && check_dos_slashslash_qm(s, len, NULL, NULL, NULL)) {
if (!force_rel_up) {
int drive_end;
path = simplify_qm_path(path);
Scheme_Object *orig_path = path;
int drive_end, has_up = 0;
path = simplify_qm_path(path, &has_up);
cleaned_slashes = has_up;
len = SCHEME_PATH_LEN(path);
if (check_dos_slashslash_qm(SCHEME_PATH_VAL(path), len, &drive_end, NULL, NULL)) {
/* If it's a drive... */
@ -3811,7 +3817,10 @@ static Scheme_Object *do_simplify_path(Scheme_Object *path, Scheme_Object *cycle
path = scheme_path_to_directory_path(path);
}
}
return path;
if (!SAME_OBJ(path, orig_path))
cleaned_slashes = 1;
if (!cleaned_slashes || !use_filesystem)
return path;
} else {
/* force_rel_up means that we want a directory: */
return scheme_path_to_directory_path(path);