update tests and docs for Windows path encoding in URLs
See 5afdae8af9
in the main repo.
This commit is contained in:
parent
23e2b806a2
commit
00ef691ff2
|
@ -113,24 +113,33 @@ The contract on @racket[str] insists that, if the url has a scheme,
|
|||
then the scheme begins with a letter and consists only of letters,
|
||||
numbers, @litchar{+}, @litchar{-}, and @litchar{.} characters.
|
||||
|
||||
If @racket[str] starts with @racket["file:"], then the path is always
|
||||
parsed as an absolute path, and the parsing details depend on
|
||||
@racket[file-url-path-convention-type]:
|
||||
If @racket[str] starts with @litchar{file:} (case-insensitively) and
|
||||
the value of the @racket[file-url-path-convention-type] parameter is
|
||||
@racket['windows], then special parsing rules apply to accommodate
|
||||
ill-formed but widely-recognized path encodings:
|
||||
|
||||
@itemize[
|
||||
|
||||
@item{@racket['unix] : If @racket["file:"] is followed by
|
||||
@litchar{//} and a non-@litchar{/}, then the first element
|
||||
after the @litchar{//} is parsed as a host (and maybe port);
|
||||
otherwise, the first element starts the path, and the host is
|
||||
@racket[""].}
|
||||
@item{If @litchar{file:} is followed by @litchar{//}, a letter, and
|
||||
@litchar{:}, then the @litchar{//} is stripped and the
|
||||
remainder parsed as a Windows path.}
|
||||
|
||||
@item{@racket['windows] : If @racket["file:"] is followed by
|
||||
@litchar{//}, then the @litchar{//} is stripped; the remainder
|
||||
parsed as a Windows path. The host is always @racket[""] and
|
||||
the port is always @racket[#f].}
|
||||
@item{If @litchar{file:} is followed by @litchar{\\}, then the
|
||||
@litchar{\\} is stripped and the remainder parsed as a Windows
|
||||
path.}
|
||||
|
||||
]
|
||||
|
||||
In both of these cases, the host is @racket[""], the port is
|
||||
@racket[#f], and path-element decoding (which extract parameters or
|
||||
replaces @litchar{%20} with a space, for example) is not applied to
|
||||
the path.
|
||||
|
||||
@history[#:changed "6.3.0.1" @elem{Changed handling of @litchar{file:}
|
||||
URLs when the value of
|
||||
@racket[file-url-path-convention-type]
|
||||
is @racket['windows].}]}
|
||||
|
||||
]}
|
||||
|
||||
@defproc[(combine-url/relative [base url?] [relative string?]) url?]{
|
||||
|
||||
|
@ -152,22 +161,8 @@ scheme @racket["http"].}
|
|||
@defproc[(url->string [URL url?]) string?]{
|
||||
|
||||
Generates a string corresponding to the contents of a @racket[url]
|
||||
struct. For a @racket["file:"] URL, the URL must not be relative, the
|
||||
result always starts @litchar{file://}, and the interpretation of the
|
||||
path depends on the value of @racket[file-url-path-convention-type]:
|
||||
|
||||
@itemize[
|
||||
|
||||
@item{@racket['unix] : Elements in @racket[URL] are treated as path
|
||||
elements. Empty strings in the path list are treated like
|
||||
@racket['same].}
|
||||
|
||||
@item{@racket['windows] : If the first element is @racket[""] then
|
||||
the next two elements define the UNC root, and the rest of the
|
||||
elements are treated as path elements. Empty strings in the
|
||||
path list are treated like @racket['same].}
|
||||
|
||||
]
|
||||
struct. For a @racket["file:"] URL, the URL must not be relative, and
|
||||
the result always starts @litchar{file://}.
|
||||
|
||||
The @racket[url->string] procedure uses
|
||||
@racket[alist->form-urlencoded] when formatting the query, so it is
|
||||
|
@ -182,7 +177,18 @@ The encoding of path segments and fragment is sensitive to the
|
|||
@defproc[(path->url [path (or/c path-string? path-for-some-system?)])
|
||||
url?]{
|
||||
|
||||
Converts a path to a @racket[url].}
|
||||
Converts a path to a @racket[url].
|
||||
|
||||
With the @racket['unix] path convention, the host in the resulting URL
|
||||
is always @racket[""], and the path is absolute from the root.
|
||||
|
||||
With the @racket['windows] path convention and a UNC path, the machine
|
||||
part of the UNC root is used as the URL's host, and the drive part of
|
||||
the root is the first element of the URL's path.
|
||||
|
||||
@history[#:changed "6.3.0.1" @elem{Changed @racket['windows] encoding
|
||||
of UNC paths.}]}
|
||||
|
||||
|
||||
|
||||
@defproc[(url->path [URL url?]
|
||||
|
@ -190,7 +196,36 @@ Converts a path to a @racket[url].}
|
|||
path-for-some-system?]{
|
||||
|
||||
Converts @racket[URL], which is assumed to be a @racket["file"] URL,
|
||||
to a path.}
|
||||
to a path.
|
||||
|
||||
For the @racket['unix] path convention, the URL's host is ignored, and
|
||||
the URL's path is formed relative to the root.
|
||||
|
||||
For the @racket['windows] path convention:
|
||||
|
||||
@itemlist[
|
||||
|
||||
@item{A non-@racket[""] value for the URL's host field creates a UNC
|
||||
path, where the host is the UNC root's machine name, the URL's
|
||||
path must be non-empty, and the first element of the URL's path
|
||||
is used as the drive part of the UNC root.}
|
||||
|
||||
@item{For legacy reasons, if the URL's host is @racket[""], the URL's
|
||||
path contains at least three elements, and and the first
|
||||
element of the URL's path is also @racket[""], then a UNC path
|
||||
is created by using the second and third elements of the path
|
||||
as the UNC root's machine and drive, respectively.}
|
||||
|
||||
@item{Otherwise, the URL's path is converted to a Windows path. The
|
||||
result is an absolute path if the URL's first path element
|
||||
corresponds to a drive, otherwise the result is a relative path
|
||||
(even though URLs are not intended to represent relative paths).}
|
||||
|
||||
]
|
||||
|
||||
@history[#:changed "6.3.0.1" @elem{Changed @racket['windows] treatment
|
||||
of a non-@racket[""] host.}]}
|
||||
|
||||
|
||||
|
||||
@defproc[(relative-path->relative-url-string [path (and/c (or/c path-string? path-for-some-system?)
|
||||
|
@ -206,8 +241,8 @@ URL ends with @litchar{/}.}
|
|||
|
||||
@defparam[file-url-path-convention-type kind (or/c 'unix 'windows)]{
|
||||
|
||||
Determines the default conversion to and from strings for
|
||||
@racket["file"] URLs. See @racket[string->url] and @racket[url->string].}
|
||||
Determines the default conversion from strings for
|
||||
@racket["file"] URLs; see @racket[string->url].}
|
||||
|
||||
|
||||
@defparam[current-url-encode-mode mode (or/c 'recommended 'unreserved)]{
|
||||
|
|
|
@ -166,11 +166,36 @@
|
|||
=> #("file" #f "foo" #f #t (#("abc") #("def.html")) () #f)))
|
||||
(parameterize ([file-url-path-convention-type 'windows])
|
||||
(test (string->url/vec "file://foo:/abc/def.html")
|
||||
=> #("file" #f "" #f #t (#("foo:") #("abc") #("def.html")) () #f)
|
||||
=> #("file" #f "foo" #f #t (#("abc") #("def.html")) () #f)
|
||||
(string->url/vec "file://foo/abc/def.html")
|
||||
=> #("file" #f "foo" #f #t (#("abc") #("def.html")) () #f)
|
||||
(string->url/vec "file:///foo/abc/def.html")
|
||||
=> #("file" #f "" #f #t (#("foo") #("abc") #("def.html")) () #f)
|
||||
(string->url/vec "file://c:/abc/def.html")
|
||||
=> #("file" #f "" #f #t (#("c:") #("abc") #("def.html")) () #f)
|
||||
(string->url/vec "fILe://C:/abc/def.html")
|
||||
=> #("file" #f "" #f #t (#("C:") #("abc") #("def.html")) () #f)
|
||||
(string->url/vec "file:c:/abc/def.html")
|
||||
=> #("file" #f #f #f #t (#("c:") #("abc") #("def.html")) () #f)
|
||||
(string->url/vec "file:/c:/abc/def.html")
|
||||
=> #("file" #f #f #f #t (#("c:") #("abc") #("def.html")) () #f)
|
||||
(string->url/vec "file:\\\\d\\c\\abc\\def.html")
|
||||
=> #("file" #f "" #f #t (#("") #("d") #("c") #("abc") #("def.html")) () #f)))
|
||||
=> #("file" #f "d" #f #t (#("c") #("abc") #("def.html")) () #f)
|
||||
;; Despite parsing as an "absolute" URL, will convert to a relative
|
||||
;; Windows path:
|
||||
(string->url/vec "file:///x/y")
|
||||
=> #("file" #f "" #f #t (#("x") #("y")) () #f)
|
||||
;; No path-element decoding when special parsing is triggered:
|
||||
(string->url/vec "file://c:/a%20b")
|
||||
=> #("file" #f "" #f #t (#("c:") #("a%20b")) () #f)
|
||||
(string->url/vec "file:\\\\\\\\d\\c\\a%20b")
|
||||
=> #("file" #f "" #f #t (#("") #("d") #("c") #("a%20b")) () #f)
|
||||
;; Path-element decoding applies for proper URL encodings:
|
||||
(string->url/vec "file:///c:/a%20b")
|
||||
=> #("file" #f "" #f #t (#("c:") #("a b")) () #f)
|
||||
(string->url/vec "file://d/c/a%20b")
|
||||
=> #("file" #f "d" #f #t (#("c") #("a b")) () #f)))
|
||||
|
||||
|
||||
(parameterize ([file-url-path-convention-type 'unix])
|
||||
;; but no effect on http://foo:/...
|
||||
|
@ -221,7 +246,18 @@
|
|||
(path->bytes (url->path (path->url (bytes->path #"\\\\?\\c:\\a/x\\b" 'windows)) 'windows))
|
||||
=> #"\\\\?\\c:\\a/x\\b"
|
||||
(path->bytes (url->path (path->url (bytes->path #"\\\\?\\UNC\\d\\\\c\\a/x\\b" 'windows)) 'windows))
|
||||
=> #"\\\\?\\UNC\\d\\c\\a/x\\b")
|
||||
=> #"\\\\?\\UNC\\d\\c\\a/x\\b"
|
||||
;; Supoprt proper encoding of UNC paths:
|
||||
(path->bytes (url->path (vec->url #("file" #f "m" #f #t (#("d")) () #f)) 'windows))
|
||||
=> #"\\\\m\\d\\"
|
||||
(path->bytes (url->path (vec->url #("file" #f "m" #f #t (#("d") #("x")) () #f)) 'windows))
|
||||
=> #"\\\\m\\d\\x"
|
||||
;; Supoprt old encoding of UNC paths:
|
||||
(path->bytes (url->path (vec->url #("file" #f "" #f #t (#("") #("m") #("d")) () #f)) 'windows))
|
||||
=> #"\\\\m\\d\\"
|
||||
(path->bytes (url->path (vec->url #("file" #f "" #f #t (#("") #("m") #("d") #("x")) () #f)) 'windows))
|
||||
=> #"\\\\m\\d\\x")
|
||||
|
||||
|
||||
;; see PR8809 (value-less keys in the query part)
|
||||
(test-s->u #("http" #f "foo.bar" #f #t (#("baz")) ((ugh . #f)) #f)
|
||||
|
|
Loading…
Reference in New Issue
Block a user