net/url-string: change parsing of "file:" URLs for Windows

History of the parsing of "file:" URLs for Windows:

 * In response to PR 8060 (April 2006): special handling added to
   support ill-formed URLs that were (are?) commonly used for
   filesystem paths.

 * Follow-up to PR 8060 (April 2008): added `path->url` and
   `url->path`.

 * In response to #1086 (October 2015, the commit): changed
   Windows-specific handling to be more constrained and added support
   for the proper encoding of UNC paths --- where "proper" means
   "according to a blog post from late 2006", which appears to be as
   close as we get to documentation of the URL encoding for Windows
   paths.
This commit is contained in:
Matthew Flatt 2015-10-12 08:30:47 -06:00
parent 6c0ffe1ba2
commit 5afdae8af9

View File

@ -200,24 +200,30 @@
(url-error "Invalid URL string; bad scheme ~e: ~e" scheme str)) (url-error "Invalid URL string; bad scheme ~e: ~e" scheme str))
;; Windows => "file://xxx:/...." specifies a "xxx:/..." path ;; Windows => "file://xxx:/...." specifies a "xxx:/..." path
(let ([win-file? (and (or (equal? "" port) (not port)) (let ([win-file? (and (or (equal? "" port) (not port))
(equal? "file" scheme) (equal? "file" (and scheme (string-downcase scheme)))
(eq? 'windows (file-url-path-convention-type)) (eq? 'windows (file-url-path-convention-type))
(not (equal? host "")))]) (not (equal? host ""))
(or (regexp-match? "^[fF][iI][lL][eE]://[a-zA-Z]:" str)
(regexp-match? "^[fF][iI][lL][eE]:\\\\" str)))])
(when win-file? (when win-file?
(set! path (cond [(equal? "" port) (string-append host ":" path)] (set! path (cond [(equal? "" port) (string-append host ":" path)]
[(and path host) (string-append host "/" path)] [(and path host) (string-append host "/" path)]
[else (or path host)])) [else (or path host)]))
(set! port #f) (set! port #f)
(set! host "")) (set! host ""))
(define win-file-url (and win-file?
(path->url (bytes->path (string->bytes/utf-8 path) 'windows))))
(let* ([scheme (and scheme (string-downcase scheme))] (let* ([scheme (and scheme (string-downcase scheme))]
[host (and host (string-downcase host))] [host (if win-file-url
(url-host win-file-url)
(and host (string-downcase host)))]
[user (uri-decode/maybe user)] [user (uri-decode/maybe user)]
[port (and port (string->number port))] [port (and port (string->number port))]
[abs? (or (equal? "file" scheme) [abs? (or (equal? "file" scheme)
(regexp-match? #rx"^/" path))] (regexp-match? #rx"^/" path))]
[path (if win-file? [path (if win-file?
(separate-windows-path-strings path) (url-path win-file-url)
(separate-path-strings path))] (separate-path-strings path))]
[query (if query (form-urlencoded->alist query) '())] [query (if query (form-urlencoded->alist query) '())]
[fragment (uri-decode/maybe fragment)]) [fragment (uri-decode/maybe fragment)])
(make-url scheme user host port abs? path query fragment)))) (make-url scheme user host port abs? path query fragment))))
@ -236,9 +242,6 @@
(let ([strs (regexp-split #rx"/" str)]) (let ([strs (regexp-split #rx"/" str)])
(map separate-params (if (string=? "" (car strs)) (cdr strs) strs)))) (map separate-params (if (string=? "" (car strs)) (cdr strs) strs))))
(define (separate-windows-path-strings str)
(url-path (path->url (bytes->path (string->bytes/utf-8 str) 'windows))))
(define (separate-params s) (define (separate-params s)
(let ([lst (map path-segment-decode (regexp-split #rx";" s))]) (let ([lst (map path-segment-decode (regexp-split #rx";" s))])
(make-path/param (car lst) (cdr lst)))) (make-path/param (car lst) (cdr lst))))
@ -273,7 +276,7 @@
;; If original path is a directory the resulting URL ;; If original path is a directory the resulting URL
;; should have a trailing forward slash ;; should have a trailing forward slash
[url-tail (if dir? (list (make-path/param "" null)) null)] [url-tail (if dir? (list (make-path/param "" null)) null)]
[url-path [host+url-path
(let loop ([path spath][accum null]) (let loop ([path spath][accum null])
(let-values ([(base name dir?) (split-path path)]) (let-values ([(base name dir?) (split-path path)])
(cond (cond
@ -290,18 +293,18 @@
(cond (cond
[(regexp-match? #rx"^\\\\\\\\[?]\\\\[a-zA-Z]:" s) [(regexp-match? #rx"^\\\\\\\\[?]\\\\[a-zA-Z]:" s)
;; \\?\<drive>: path: ;; \\?\<drive>: path:
(regexp-split #rx"[/\\]+" (substring s 4))] (cons "" (regexp-split #rx"[/\\]+" (substring s 4)))]
[(regexp-match? #rx"^\\\\\\\\[?]\\\\UNC" s) [(regexp-match? #rx"^\\\\\\\\[?]\\\\UNC" s)
;; \\?\ UNC path: ;; \\?\ UNC path:
(regexp-split #rx"[/\\]+" (substring s 7))] (cons "" (regexp-split #rx"[/\\]+" (substring s 7)))]
[(regexp-match? #rx"^[/\\]" s) [(regexp-match? #rx"^[/\\]" s)
;; UNC path: ;; UNC path:
(regexp-split #rx"[/\\]+" s)] (cdr (regexp-split #rx"[/\\]+" s))]
[else [else
(list s)]))) (list "" s)])))
accum) accum)
;; On other platforms, we drop the root: ;; On other platforms, we drop the root:
accum)] (cons "" accum))]
[else [else
(let ([accum (cons (make-path/param (let ([accum (cons (make-path/param
(if (symbol? name) (if (symbol? name)
@ -311,9 +314,14 @@
null) null)
accum)]) accum)])
(if (eq? base 'relative) (if (eq? base 'relative)
accum (cons "" accum)
(loop base accum)))])))]) (loop base accum)))])))]
(make-url "file" #f "" #f (absolute-path? path) [host (let ([h (car host+url-path)])
(if (path/param? h)
(path/param-path h)
h))]
[url-path (cdr host+url-path)])
(make-url "file" #f host #f (absolute-path? path)
(if (null? url-tail) url-path (append url-path url-tail)) (if (null? url-tail) url-path (append url-path url-tail))
'() #f))) '() #f)))
@ -331,6 +339,7 @@
(if (and (url-path-absolute? url) (if (and (url-path-absolute? url)
(eq? 'windows kind)) (eq? 'windows kind))
;; If initial path is "", then build UNC path. ;; If initial path is "", then build UNC path.
;; Also build a UNC path if the host is non-#f.
(cond (cond
[(not (url-path-absolute? url)) [(not (url-path-absolute? url))
(apply build-path (map string->path-element/same strs))] (apply build-path (map string->path-element/same strs))]
@ -340,6 +349,15 @@
(string->path/win (string->path/win
(string-append "\\\\" (cadr strs) "\\" (caddr strs) "\\")) (string-append "\\\\" (cadr strs) "\\" (caddr strs) "\\"))
(map string->path-element/same (cdddr strs)))] (map string->path-element/same (cdddr strs)))]
[(and (url-host url)
(not (equal? (url-host url) ""))
(pair? strs))
(if (equal? (car strs) "")
(error 'file://->path "rmpty drive element: ~e" url)
(apply build-path
(string->path/win
(string-append "\\\\" (url-host url) "\\" (car strs) "\\"))
(map string->path-element/same (cdr strs))))]
[(pair? strs) [(pair? strs)
(apply build-path (string->path/win (car strs)) (apply build-path (string->path/win (car strs))
(map string->path-element/same (cdr strs)))] (map string->path-element/same (cdr strs)))]