From 9fed5b585a694c7a6ac0c03b6fbe7ae1d8274b81 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Wed, 18 Jun 2014 22:12:20 -0600 Subject: [PATCH] windows: fix symbolic link handling to match the OS Windows parses relative-path links with yet another set of rules --- slightly different from the many other existing rules for parsing paths. Unfortunately, a few OS calls don't provide an option for having the OS follow links, so we have to re-implement (our best guess at) the OS's parsing of links. --- .../scribblings/reference/filesystem.scrbl | 5 +- .../scribblings/reference/paths.scrbl | 10 +- .../scribblings/reference/windows-paths.scrbl | 5 +- .../racket-test/tests/racket/win-link.rkt | 171 ++++++++++++++++++ racket/src/racket/src/file.c | 132 +++++++++++--- 5 files changed, 290 insertions(+), 33 deletions(-) create mode 100644 pkgs/racket-pkgs/racket-test/tests/racket/win-link.rkt diff --git a/pkgs/racket-pkgs/racket-doc/scribblings/reference/filesystem.scrbl b/pkgs/racket-pkgs/racket-doc/scribblings/reference/filesystem.scrbl index a55de2ae0b..1ce0e51cff 100644 --- a/pkgs/racket-pkgs/racket-doc/scribblings/reference/filesystem.scrbl +++ b/pkgs/racket-pkgs/racket-doc/scribblings/reference/filesystem.scrbl @@ -402,8 +402,9 @@ not expanded before writing the link. If the link is not created successfully,the @exnraise[exn:fail:filesystem]. On Windows XP and earlier, the @exnraise[exn:fail:unsupported]. On -later versions of Windows, the creation of links tends to be disallowed -by security policies. +later versions of Windows, the creation of links tends to be +disallowed by security policies. Furthermore, a relative-path link is +parsed specially; see @secref["windowspaths"] for more information. @history[#:changed "6.0.1.12" @elem{Added support for links on Windows.}]} diff --git a/pkgs/racket-pkgs/racket-doc/scribblings/reference/paths.scrbl b/pkgs/racket-pkgs/racket-doc/scribblings/reference/paths.scrbl index b72d5fb317..2979f78979 100644 --- a/pkgs/racket-pkgs/racket-doc/scribblings/reference/paths.scrbl +++ b/pkgs/racket-pkgs/racket-doc/scribblings/reference/paths.scrbl @@ -351,11 +351,11 @@ is returned (this may be a relative path with respect to the directory owning @racket[path]), otherwise @racket[path] is returned (after expansion). -On Windows, the path for a link should be simplified syntactically -using the whole link, so that an up-directory indicator removes a -preceding path element independent of whether the preceding element -itself refers to a link. See @secref["windowspaths"] for more -information. +On Windows, the path for a link should be simplified syntactically, so +that an up-directory indicator removes a preceding path element +independent of whether the preceding element itself refers to a +link. For relative-paths links, the path should be parsed specially; +see @secref["windowspaths"] for more information. @history[#:changed "6.0.1.12" @elem{Added support for links on Windows.}]} diff --git a/pkgs/racket-pkgs/racket-doc/scribblings/reference/windows-paths.scrbl b/pkgs/racket-pkgs/racket-doc/scribblings/reference/windows-paths.scrbl index 18d4085d53..c93af4b0b1 100644 --- a/pkgs/racket-pkgs/racket-doc/scribblings/reference/windows-paths.scrbl +++ b/pkgs/racket-pkgs/racket-doc/scribblings/reference/windows-paths.scrbl @@ -239,7 +239,10 @@ Even on variants of Windows that support symbolic links, up-directory sensitive to links. For example, if a path ends with @litchar{d\..\f} and @litchar{d} refers to a symbolic link that references a directory with a different parent than @litchar{d}, the path nevertheless refers -to @litchar{f} in the same directory as @litchar{d}. +to @litchar{f} in the same directory as @litchar{d}. A relative-path +link is parsed as if prefixed with @litchar{\\?\REL} paths, except +that @litchar{..} and @litchar{.} elements are allowed throughout the +path, and any number of redundant @litchar{/} separators are allowed. Windows paths are @techlink{cleanse}d as follows: In paths that start @litchar{\\?\}, redundant @litchar{\}s are removed, an extra diff --git a/pkgs/racket-pkgs/racket-test/tests/racket/win-link.rkt b/pkgs/racket-pkgs/racket-test/tests/racket/win-link.rkt new file mode 100644 index 0000000000..c14088a51d --- /dev/null +++ b/pkgs/racket-pkgs/racket-test/tests/racket/win-link.rkt @@ -0,0 +1,171 @@ +#lang racket/base +(require racket/file) + +;; Run this test on a Windows machine with a user that is allowed +;; to create symbolic links. Since that's not usually the case, +;; `raco test` will do nothing: +(module test racket/base) + +(define count 0) + +(define-syntax-rule (test expect get) + (do-test expect get #'get)) +(define (do-test expected got where) + (set! count (add1 count)) + (unless (equal? expected got) + (error 'test + (string-append "failure\n" + " expected: ~e\n" + " got: ~e\n" + " expression: ~s") + expected + got + where))) + +(define temp-dir (find-system-path 'temp-dir)) + +(define sub-name "link-sub") +(define sub (build-path temp-dir sub-name)) +(delete-directory/files sub #:must-exist? #f) +(make-directory* sub) + +(define (go build tbuild rbuild) + (test #f (link-exists? (build "l1"))) + + ;; t1 -> l1 + (make-file-or-directory-link (rbuild "t1") (build "l1")) + + (test #t (link-exists? (build "l1"))) + (test #f (file-exists? (build "l1"))) + (test #f (directory-exists? (build "l1"))) + + (make-directory (tbuild "t1")) + (test #t (link-exists? (build "l1"))) + (test #f (file-exists? (build "l1"))) + (test #t (directory-exists? (build "l1"))) + + ;; File via link to enclsoing dir + (call-with-output-file (build-path (tbuild "t1") "f") + (lambda (o) (display "t1-f" o))) + (test (list (string->path "f")) (directory-list (build "l1"))) + (test "t1-f" (file->string (build-path (build "l1") "f"))) + (test #t (file-exists? (build-path (build "l1") "f"))) + (test (file-or-directory-modify-seconds (build-path (tbuild "t1") "f")) + (file-or-directory-modify-seconds (build-path (build "l1") "f"))) + + ;; Link to file in dir + (make-file-or-directory-link (let ([p (rbuild "t1")]) + (if (path? p) + (build-path p "f2") + (string-append p "\\f2"))) + (build "l2")) + (call-with-output-file (build-path (tbuild "t1") "f2") + (lambda (o) (display "t1-f2" o))) + (test "t1-f2" (file->string (build "l2"))) + (delete-file (build-path (tbuild "t1") "f2")) + + ;; Link to dir in dir + (make-directory (build-path (tbuild "t1") "f2")) + (call-with-output-file (build-path (tbuild "t1") "f2" "f3") + (lambda (o) (display "t1-f2-f3" o))) + (test "t1-f2-f3" (file->string (build-path (build "l2") "f3"))) + (test (list (string->path "f3")) (directory-list (build "l2"))) + (delete-file (build "l2")) + + ;; Link to dir in dir with "." path elements + (make-file-or-directory-link (let ([p (rbuild "t1")]) + (if (path? p) + (build-path p 'same 'same "f2" 'same) + (string-append p "\\.\\.\\f2\\."))) + (build "l2")) + (test #t (directory-exists? (build "l2"))) + (test "t1-f2-f3" (file->string (build-path (build "l2") "f3"))) + (test (list (string->path "f3")) (directory-list (build "l2"))) + (delete-file (build "l2")) + + ;; Link with ".." to cancel first link element + (make-file-or-directory-link (let ([p (rbuild "t1")]) + (if (path? p) + (build-path p 'up "f3") + (string-append p "\\..\\f3"))) + (build "l3")) + (call-with-output-file (build-path (tbuild "t1") 'up "f3") + (lambda (o) (display "f3!" o))) + (test "f3!" (file->string (build "l3"))) + (delete-file (build-path (tbuild "t1") 'up "f3")) + (delete-file (build "l3")) + + ;; Link with ".." to go up from link's directory + (make-file-or-directory-link (let ([p (rbuild "t1")]) + (if (path? p) + (build-path p "f3") + (string-append "..\\" sub-name "\\" p "\\f3"))) + (build "l3")) + (call-with-output-file (build-path (tbuild "t1") "f3") + (lambda (o) (display "f3." o))) + (test "f3." (file->string (build "l3"))) + (delete-file (build-path (tbuild "t1") "f3")) + (delete-file (build "l3")) + + ;; Trailing ".." + (make-file-or-directory-link (let ([p (rbuild "t1")]) + (if (path? p) + (build-path p 'up) + (string-append p "\\.."))) + (build "l3")) + (call-with-output-file (build-path sub "f4") + (lambda (o) (display "(f4)" o))) + (test #t (directory-exists? (build "l3"))) + (test "(f4)" (file->string (build-path (build "l3") "f4"))) + (delete-file (build-path sub "f4")) + (delete-file (build "l3")) + + (delete-directory/files (tbuild "t1")) + (test #f (directory-exists? (build "l1"))) + (test #f (file-exists? (build-path (build "l1") "f"))) + + (call-with-output-file (tbuild "t1") + (lambda (o) (display "t1" o))) + (test "t1" (file->string (build "l1"))) + (test #t (file-exists? (build "l1"))) + (test (file-or-directory-modify-seconds (tbuild "t1")) + (file-or-directory-modify-seconds (build "l1"))) + + (delete-file (tbuild "t1")) + (delete-file (build "l1"))) + +(define (in-sub s) (build-path sub s)) +(define (in-sub/unc s) + (define e (explode-path (in-sub s))) + (apply build-path + (format "\\\\localhost\\~a$\\" + (substring (path->string (car e)) 0 1)) + (cdr e))) +(define (trailing-space/string s) + (string-append s " ")) +(define (trailing-space s) + (string->path-element (trailing-space/string s))) +(define (trailing-space-in-sub s) + (in-sub (trailing-space s))) +(define (trailing-space-in-sub/unc s) + (define-values (base name dir) (split-path (in-sub/unc s))) + (build-path base (trailing-space s))) + +(go in-sub in-sub values) +(go in-sub in-sub in-sub) +(parameterize ([current-directory sub]) + (go values values values) + (go values in-sub in-sub)) + +(go in-sub/unc in-sub values) +(go in-sub in-sub in-sub/unc) + +(parameterize ([current-directory sub]) + (go in-sub trailing-space trailing-space/string) + (go in-sub/unc trailing-space trailing-space/string)) +(go in-sub trailing-space-in-sub trailing-space/string) +(go in-sub trailing-space-in-sub/unc trailing-space/string) + +(delete-directory/files sub) + +(printf "~a tests passed\n" count) diff --git a/racket/src/racket/src/file.c b/racket/src/racket/src/file.c index 414b9af546..26f9792de9 100644 --- a/racket/src/racket/src/file.c +++ b/racket/src/racket/src/file.c @@ -2228,7 +2228,8 @@ static char *UNC_readlink(const char *fn) h = CreateFileW(WIDE_PATH(fn), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, - OPEN_EXISTING, mzFILE_FLAG_OPEN_REPARSE_POINT, NULL); + OPEN_EXISTING, mzFILE_FLAG_OPEN_REPARSE_POINT, + NULL); if (h == INVALID_HANDLE_VALUE) { errno = -1; @@ -2260,7 +2261,7 @@ static char *UNC_readlink(const char *fn) off = rp->u.SymbolicLinkReparseBuffer.PrintNameOffset; len = rp->u.SymbolicLinkReparseBuffer.PrintNameLength; - lk = (wchar_t *)scheme_malloc_atomic((len + 1) * sizeof(wchar_t)); + lk = (wchar_t *)scheme_malloc_atomic(len + 2); memcpy(lk, (char *)rp->u.SymbolicLinkReparseBuffer.PathBuffer + off, len); lk[len>>1] = 0; @@ -2268,6 +2269,90 @@ static char *UNC_readlink(const char *fn) return NARROW_PATH(lk); } +Scheme_Object *combine_link_path(char *copy, int len, char *clink, int clen, + int ssq, int drive_end) +{ + Scheme_Object *sp; + + /* Windows treats link paths purely syntactically (i.e., simplifying + "up" before consulting the filesystem). */ + + if (scheme_is_relative_path(clink, clen, SCHEME_WINDOWS_PATH_KIND)) { + /* Windows treats absolute paths in the general way, + allowing forward slahses, stripping trailing spaces, and + so on. It treats relative paths as "\\?\REL\"-like, but + allowing ".." as up, "." as same, and multiple adjacent "\"s. + So, we implement yet another path construction (which is + likely what Windows itself does). */ + int i; + char *copy2; + copy2 = (char *)scheme_malloc_atomic(len + clen + 10); + if (!ssq) { + /* Always use "\\?\" mode. */ + if (copy[1] == ':') { + memcpy(copy2, "\\\\?\\", 4); + memcpy(copy2+4, copy, len); + len += 4; + drive_end += 4; + } else { + memcpy(copy2, "\\\\?\\UNC", 7); + memcpy(copy2+7, copy+1, len-1); + len += 6; + drive_end += 6; + } + ssq = 1; + } else + memcpy(copy2, copy, len); + copy = copy2; + i = -1; /* start with implicit ".." */ + while (i < clen) { + if ((i < 0) + || ((i + 1 < clen) + && (clink[i] == '.') && (clink[i+1] == '.') + && ((i + 2 >= clen) + || (clink[i+2] == '\\')))) { + /* up directory; don't back over root */ + if (len <= drive_end) { + errno = -1; + return 0; + } + while ((len > drive_end) && (copy[len-1] != '\\')) { + len--; + } + if ((len > drive_end) && (copy[len-1] == '\\')) { + len--; + } + if (i < 0) + i = 0; + else + i += 3; + } else if ((clink[i] == '.') && ((i + 1 >= clen) + || (clink[i+1] == '\\'))) + i += 2; /* skip "." */ + else if (clink[i] == '\\') + i++; + else { + if (copy[len-1] != '\\') + copy[len++] = '\\'; + while ((i < clen) && (clink[i] != '\\')) { + copy[len++] = clink[i++]; + } + } + } + copy[len] = 0; + sp = scheme_make_sized_path(copy, len, 0); + } else { + sp = scheme_make_sized_path(clink, clen, 0); + sp = do_simplify_path(sp, scheme_null, 0, 0, 0, SCHEME_WINDOWS_PATH_KIND, 0); + if (SCHEME_FALSEP(sp)) { + errno = -1; + return 0; + } + } + + return sp; +} + # define MZ_UNC_READ 0x1 # define MZ_UNC_WRITE 0x2 # define MZ_UNC_EXEC 0x4 @@ -2283,7 +2368,7 @@ static int UNC_stat(char *dirname, int len, int *flags, int *isdir, int *islink, So, we use GetFileAttributesExW(). */ char *copy; WIN32_FILE_ATTRIBUTE_DATA fd; - int must_be_dir = 0, orig_len; + int must_be_dir = 0, drive_end, ssq; Scheme_Object *cycle_check = scheme_null; if (resolved_path) @@ -2298,32 +2383,38 @@ static int UNC_stat(char *dirname, int len, int *flags, int *isdir, int *islink, if (date) *date = scheme_false; - orig_len = len; - copy = scheme_malloc_atomic(len + 14); - if (check_dos_slashslash_qm(dirname, len, NULL, NULL, NULL)) { + ssq = check_dos_slashslash_qm(dirname, len, &drive_end, NULL, NULL); + if (ssq) { memcpy(copy, dirname, len + 1); } else { + if (check_dos_slashslash_drive(dirname, 0, len, &drive_end, 0, 0)) + drive_end++; + else + drive_end = 3; /* must be :/ */ + memcpy(copy, dirname, len + 1); while (IS_A_DOS_SEP(copy[len - 1])) { --len; - --orig_len; copy[len] = 0; must_be_dir = 1; } } - /* If we ended up with "\\?\X:", then drop the "\\?\" */ + + /* If we ended up with "\\?\X:" (and nothing after), then drop the "\\?\" */ if ((copy[0] == '\\')&& (copy[1] == '\\') && (copy[2] == '?') && (copy[3] == '\\') && is_drive_letter(copy[4]) && (copy[5] == ':') && !copy[6]) { memmove(copy, copy + 4, len - 4); len -= 4; + drive_end -= 4; copy[len] = 0; } - /* If we ended up with "\\?\\X:", then drop the "\\?\\" */ + /* If we ended up with "\\?\\X:" (and nothing after), then drop the "\\?\\" */ if ((copy[0] == '\\') && (copy[1] == '\\') && (copy[2] == '?') && (copy[3] == '\\') && (copy[4] == '\\') && is_drive_letter(copy[5]) && (copy[6] == ':') && !copy[7]) { memmove(copy, copy + 5, len - 5); len -= 5; + drive_end -= 5; copy[len] = 0; } @@ -2336,26 +2427,17 @@ static int UNC_stat(char *dirname, int len, int *flags, int *isdir, int *islink, *islink = 1; return 1; } else { - /* Resolve links ourselves. It's clear how Windows - treats links within a link target path, but it seems - to treat them purely syntactically (i.e., simplifying - "up" before consulting the filesystem). */ + /* Resolve links ourselves. (We wouldn't have to do this at + all if GetFileAttributesEx() and FindFirstFile() provided a + way to follow links.) */ + char *clink; Scheme_Object *sp, *cl, *cp; - copy = UNC_readlink(dirname); - while (orig_len && !IS_A_DOS_SEP(dirname[orig_len - 1])) { - --orig_len; - } - while (orig_len && IS_A_DOS_SEP(dirname[orig_len - 1])) { - --orig_len; - } - dirname = do_path_to_complete_path(copy, strlen(copy), dirname, orig_len, SCHEME_WINDOWS_PATH_KIND); - len = strlen(dirname); - sp = scheme_make_sized_path(dirname, len, 0); - sp = do_simplify_path(sp, scheme_null, 0, 0, 0, SCHEME_WINDOWS_PATH_KIND, 0); - if (SCHEME_FALSEP(sp)) { + clink = UNC_readlink(dirname); + if (!clink) { errno = -1; return 0; } + sp = combine_link_path(copy, len, clink, strlen(clink), ssq, drive_end); for (cl = cycle_check; !SCHEME_NULLP(cl); cl = SCHEME_CDR(cl)) { cp = SCHEME_CAR(cl); if ((SCHEME_PATH_LEN(cp) == SCHEME_PATH_LEN(sp))