From 8e59e04b24d858aa2904dc1a403e00500fea9f2c Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Thu, 10 Apr 2008 19:05:35 +0000 Subject: [PATCH] change -q to config option, fix unlikely race condition in wake-on-signal, add path->url and url->path, and adjust URL parsing svn: r9239 original commit: 0153e122b7423d578acc480904f7a0fcfd52f46d --- collects/net/scribblings/url.scrbl | 73 +++++++++++++++++++++++++++--- collects/tests/mzscheme/net.ss | 67 +++++++++++++++++++-------- 2 files changed, 116 insertions(+), 24 deletions(-) diff --git a/collects/net/scribblings/url.scrbl b/collects/net/scribblings/url.scrbl index 7ad210140f..f0aa400672 100644 --- a/collects/net/scribblings/url.scrbl +++ b/collects/net/scribblings/url.scrbl @@ -73,7 +73,16 @@ URL that ends in a slash. For example, the result of @scheme[(string->url "http://www.drscheme.org/a/")] has a @scheme[path] field with strings @scheme["a"] and @scheme[""], while the result of @scheme[(string->url "http://www.drscheme.org/a")] has a -@scheme[path] field with only the string @scheme["a"].} +@scheme[path] field with only the string @scheme["a"]. + +When a @scheme["file"] URL is represented by a @scheme[url] structure, +the @scheme[path] field is mostly a list of path elements. For Unix +paths, the root directory is not included in @scheme[path]; its +presence or absence is implicit in the @scheme[path-absolute?] flag. +For Windows paths, the first element typically represents a drive, but +a UNC path is represented by a first element that is @scheme[""] and +then successive elements complete the drive components that are +separated by @litchar{/} or @litchar{\}.} @defstruct[path/param ([path (or/c string? (one-of/c 'up 'same))] [param (listof string?)])]{ @@ -95,7 +104,26 @@ Parses the URL specified by @scheme[str] into a @scheme[url] struct. The @scheme[string->url] procedure uses @scheme[form-urlencoded->alist] when parsing the query, so it is sensitive to the @scheme[current-alist-separator-mode] parameter for -determining the association separator.} +determining the association separator. + +If @scheme[str] starts with @scheme["file:"], then the path is always +parsed as an absolute path, and the parsing details depend on +@scheme[file-url-path-convention-type]: + +@itemize[ + + @item{@scheme['unix] : If @scheme["file:"] is followed by + @litchar{//} and a non-@litchar{/}, then the first element + after the @litchar{//} is parsed as a host (and maybe port); + otherwise, the first element starts the path, and the host is + @scheme[""].} + + @item{@scheme['windows] : If @scheme["file:"] is followed by + @litchar{//}, then the @litchar{//} is stripped; the remainder + parsed as a Windows path. The host is always @scheme[""] and + the port is always @scheme[#f].} + +]} @defproc[(combine-url/relative [base url?] [relative string?]) url?]{ @@ -117,17 +145,50 @@ scheme @scheme["http"].} @defproc[(url->string [URL url?]) string?]{ -Generates a string corresponding to the contents of a @scheme[url] struct. -For a @scheme["file:"] URL, empty strings in the path list are treated as -@scheme['same] for @scheme[build-path]. +Generates a string corresponding to the contents of a @scheme[url] +struct. For a @scheme["file:"] URL, the URL must not be relative, the +result always starts @litchar{file://}, and the interpretation of the +path depends on the value of @scheme[file-url-path-convention-type]: + +@itemize[ + + @item{@scheme['unix] : Elements in @scheme[URL] are treated as path + elements. Empty strings in the path list are treated like + @scheme['same].} + + @item{@scheme['windows] : If the first element is @scheme[""] then + the next two elements define the UNC root, and the rest of the + elements are treated as path elements. Empty strings in the + path list are treated like @scheme['same].} + +] The @scheme[url->string] procedure uses -@scheme[alist->form-urlencoded] when formatting the query, so it it +@scheme[alist->form-urlencoded] when formatting the query, so it is sensitive to the @scheme[current-alist-separator-mode] parameter for determining the association separator. The default is to separate associations with a @litchar{&}.} +@defproc[(path->url [path (or/c path-string? path-for-some-system?)]) + url?]{ + +Converts a path to a @scheme[url].} + + +@defproc[(url->path [URL url?] + [kind (one-of/c 'unix 'windows) (system-path-convention-type)]) + path-for-some-system?]{ + +Converts @scheme[URL], which is assumed to be a @scheme["file"] URL, +to a path.} + + +@defparam[file-url-path-convention-type kind (one-of/c 'unix 'windows)]{ + +Determines the default conversion to and from strings for +@scheme["file"] URLs. See @scheme[string->url] and @scheme[url->string].} + @deftogether[( @defproc[(get-pure-port [URL url?] diff --git a/collects/tests/mzscheme/net.ss b/collects/tests/mzscheme/net.ss index 6bec67a8a8..c467128119 100644 --- a/collects/tests/mzscheme/net.ss +++ b/collects/tests/mzscheme/net.ss @@ -11,7 +11,6 @@ (require net/url net/uri-codec mzlib/string - net/url-unit ; to get set-url:os-type! ) (test "%Pq" uri-decode "%Pq") @@ -280,8 +279,10 @@ (err/rt-test (string->url "a b://www.foo.com/") url-exception?) ;; test file: urls - (test-s->u #("file" #f #f #f #t (#("abc") #("def.html")) () #f) - "file:/abc/def.html") + (test-s->u #("file" #f "" #f #t (#("abc") #("def.html")) () #f) + "file:///abc/def.html") + (test "file:///abc/def.html" url->string (string->url "file:///abc/def.html")) + (test "file://a/b" url->string (string->url "file://a/b")) (test-s->u #("file" #f "localhost" #f #t (#("abc") #("def.html")) () #f) "file://localhost/abc/def.html") @@ -289,26 +290,56 @@ ;; test files: urls with colons, and the different parsing on Windows (test-s->u #("file" #f "localhost" 123 #t (#("abc") #("def.html")) () #f) "file://localhost:123/abc/def.html") - (set-url:os-type! 'unix) - ;; different parse for file://foo:/... - (test #("file" #f "foo" #f #t (#("abc") #("def.html")) () #f) + (parameterize ([file-url-path-convention-type 'unix]) + ;; different parse for file://foo:/... + (test #("file" #f "foo" #f #t (#("abc") #("def.html")) () #f) + string->url/vec + "file://foo:/abc/def.html")) + (parameterize ([file-url-path-convention-type 'windows]) + (test #("file" #f "" #f #t (#("foo:") #("abc") #("def.html")) () #f) string->url/vec "file://foo:/abc/def.html") - (set-url:os-type! 'windows) - (test #("file" #f #f #f #f (#("foo:") #("abc") #("def.html")) () #f) + (test #("file" #f "" #f #t (#("c:") #("abc") #("def.html")) () #f) string->url/vec - "file://foo:/abc/def.html") - (set-url:os-type! 'unix) - ;; but no effect on http://foo:/... - (test #("http" #f "foo" #f #t (#("abc") #("def.html")) () #f) + "file://c:/abc/def.html") + (test #("file" #f "" #f #t (#("") #("d") #("c") #("abc") #("def.html")) () #f) string->url/vec - "http://foo:/abc/def.html") - (set-url:os-type! 'windows) - (test #("http" #f "foo" #f #t (#("abc") #("def.html")) () #f) - string->url/vec - "http://foo:/abc/def.html") - (set-url:os-type! 'unix) + "file:\\\\d\\c\\abc\\def.html")) + (parameterize ([file-url-path-convention-type 'unix]) + ;; but no effect on http://foo:/... + (test #("http" #f "foo" #f #t (#("abc") #("def.html")) () #f) + string->url/vec + "http://foo:/abc/def.html")) + (parameterize ([file-url-path-convention-type 'windows]) + (test #("http" #f "foo" #f #t (#("abc") #("def.html")) () #f) + string->url/vec + "http://foo:/abc/def.html")) + + (test "file:///c:/a/b" + url->string (path->url (bytes->path #"c:\\a\\b" 'windows))) + (test "file:///c:/a/b" + url->string (path->url (bytes->path #"\\\\?\\c:\\a\\b" 'windows))) + + (test #"/a/b/c" path->bytes + (url->path (path->url (bytes->path #"/a/b/c" 'unix)) 'unix)) + (test #"a/b/c" path->bytes + (url->path (path->url (bytes->path #"a/b/c" 'unix)) 'unix)) + (test #"c:\\a\\b" path->bytes + (url->path (path->url (bytes->path #"c:/a/b" 'windows)) 'windows)) + (test #"a\\b" path->bytes + (url->path (path->url (bytes->path #"a/b" 'windows)) 'windows)) + (test #"\\\\d\\c\\a" path->bytes + (url->path (path->url (bytes->path #"//d/c/a" 'windows)) 'windows)) + (test #"c:\\a\\b" path->bytes + (url->path (path->url (bytes->path #"\\\\?\\c:\\a\\b" 'windows)) 'windows)) + (test #"\\\\d\\c\\a\\b" path->bytes + (url->path (path->url (bytes->path #"\\\\?\\UNC\\d\\c\\a\\b" 'windows)) 'windows)) + (test #"\\\\?\\c:\\a/x\\b" path->bytes + (url->path (path->url (bytes->path #"\\\\?\\c:\\a/x\\b" 'windows)) 'windows)) + (test #"\\\\?\\UNC\\d\\c\\a/x\\b" path->bytes + (url->path (path->url (bytes->path #"\\\\?\\UNC\\d\\\\c\\a/x\\b" 'windows)) 'windows)) + ;; see PR8809 (value-less keys in the query part) (test-s->u #("http" #f "foo.bar" #f #t (#("baz")) ((ugh . #f)) #f) "http://foo.bar/baz?ugh")