From 8e59e04b24d858aa2904dc1a403e00500fea9f2c Mon Sep 17 00:00:00 2001
From: Matthew Flatt <mflatt@racket-lang.org>
Date: Thu, 10 Apr 2008 19:05:35 +0000
Subject: [PATCH] change -q to config option, fix unlikely race condition in
 wake-on-signal, add path->url and url->path, and adjust URL parsing

svn: r9239

original commit: 0153e122b7423d578acc480904f7a0fcfd52f46d
---
 collects/net/scribblings/url.scrbl | 73 +++++++++++++++++++++++++++---
 collects/tests/mzscheme/net.ss     | 67 +++++++++++++++++++--------
 2 files changed, 116 insertions(+), 24 deletions(-)

diff --git a/collects/net/scribblings/url.scrbl b/collects/net/scribblings/url.scrbl
index 7ad210140f..f0aa400672 100644
--- a/collects/net/scribblings/url.scrbl
+++ b/collects/net/scribblings/url.scrbl
@@ -73,7 +73,16 @@ URL that ends in a slash. For example, the result of
 @scheme[(string->url "http://www.drscheme.org/a/")] has a
 @scheme[path] field with strings @scheme["a"] and @scheme[""], while
 the result of @scheme[(string->url "http://www.drscheme.org/a")] has a
-@scheme[path] field with only the string @scheme["a"].}
+@scheme[path] field with only the string @scheme["a"].
+
+When a @scheme["file"] URL is represented by a @scheme[url] structure,
+the @scheme[path] field is mostly a list of path elements. For Unix
+paths, the root directory is not included in @scheme[path]; its
+presence or absence is implicit in the @scheme[path-absolute?] flag.
+For Windows paths, the first element typically represents a drive, but
+a UNC path is represented by a first element that is @scheme[""] and
+then successive elements complete the drive components that are
+separated by @litchar{/} or @litchar{\}.}
 
 @defstruct[path/param ([path (or/c string? (one-of/c 'up 'same))]
                        [param (listof string?)])]{
@@ -95,7 +104,26 @@ Parses the URL specified by @scheme[str] into a @scheme[url]
 struct. The @scheme[string->url] procedure uses
 @scheme[form-urlencoded->alist] when parsing the query, so it is
 sensitive to the @scheme[current-alist-separator-mode] parameter for
-determining the association separator.}
+determining the association separator.
+
+If @scheme[str] starts with @scheme["file:"], then the path is always
+parsed as an absolute path, and the parsing details depend on
+@scheme[file-url-path-convention-type]:
+
+@itemize[
+
+ @item{@scheme['unix] : If @scheme["file:"] is followed by
+       @litchar{//} and a non-@litchar{/}, then the first element
+       after the @litchar{//} is parsed as a host (and maybe port);
+       otherwise, the first element starts the path, and the host is
+       @scheme[""].}
+
+ @item{@scheme['windows] : If @scheme["file:"] is followed by
+       @litchar{//}, then the @litchar{//} is stripped; the remainder
+       parsed as a Windows path. The host is always @scheme[""] and
+       the port is always @scheme[#f].}
+
+]}
 
 
 @defproc[(combine-url/relative [base url?] [relative string?]) url?]{
@@ -117,17 +145,50 @@ scheme @scheme["http"].}
 
 @defproc[(url->string [URL url?]) string?]{
 
-Generates a string corresponding to the contents of a @scheme[url] struct.
-For a @scheme["file:"] URL, empty strings in the path list are treated as
-@scheme['same] for @scheme[build-path].
+Generates a string corresponding to the contents of a @scheme[url]
+struct.  For a @scheme["file:"] URL, the URL must not be relative, the
+result always starts @litchar{file://}, and the interpretation of the
+path depends on the value of @scheme[file-url-path-convention-type]:
+
+@itemize[
+
+ @item{@scheme['unix] : Elements in @scheme[URL] are treated as path
+       elements. Empty strings in the path list are treated like
+       @scheme['same].}
+
+ @item{@scheme['windows] : If the first element is @scheme[""] then
+       the next two elements define the UNC root, and the rest of the
+       elements are treated as path elements. Empty strings in the
+       path list are treated like @scheme['same].}
+
+]
 
 The @scheme[url->string] procedure uses
-@scheme[alist->form-urlencoded] when formatting the query, so it it
+@scheme[alist->form-urlencoded] when formatting the query, so it is
 sensitive to the @scheme[current-alist-separator-mode] parameter for
 determining the association separator. The default is to separate
 associations with a @litchar{&}.}
 
 
+@defproc[(path->url [path (or/c path-string? path-for-some-system?)])
+         url?]{
+
+Converts a path to a @scheme[url].}
+
+
+@defproc[(url->path [URL url?]
+                    [kind (one-of/c 'unix 'windows) (system-path-convention-type)])
+         path-for-some-system?]{
+
+Converts @scheme[URL], which is assumed to be a @scheme["file"] URL,
+to a path.}
+
+
+@defparam[file-url-path-convention-type kind (one-of/c 'unix 'windows)]{
+
+Determines the default conversion to and from strings for
+@scheme["file"] URLs. See @scheme[string->url] and @scheme[url->string].}
+
 
 @deftogether[(
 @defproc[(get-pure-port [URL url?]
diff --git a/collects/tests/mzscheme/net.ss b/collects/tests/mzscheme/net.ss
index 6bec67a8a8..c467128119 100644
--- a/collects/tests/mzscheme/net.ss
+++ b/collects/tests/mzscheme/net.ss
@@ -11,7 +11,6 @@
 (require net/url
 	 net/uri-codec
          mzlib/string
-         net/url-unit ; to get set-url:os-type!
          )
 
 (test "%Pq" uri-decode "%Pq")
@@ -280,8 +279,10 @@
   (err/rt-test (string->url "a b://www.foo.com/") url-exception?)
 
   ;; test file: urls
-  (test-s->u #("file" #f #f #f #t (#("abc") #("def.html")) () #f)
-             "file:/abc/def.html")
+  (test-s->u #("file" #f "" #f #t (#("abc") #("def.html")) () #f)
+             "file:///abc/def.html")
+  (test "file:///abc/def.html" url->string (string->url "file:///abc/def.html"))
+  (test "file://a/b" url->string (string->url "file://a/b"))
 
   (test-s->u #("file" #f "localhost" #f #t (#("abc") #("def.html")) () #f)
              "file://localhost/abc/def.html")
@@ -289,26 +290,56 @@
   ;; test files: urls with colons, and the different parsing on Windows
   (test-s->u #("file" #f "localhost" 123 #t (#("abc") #("def.html")) () #f)
              "file://localhost:123/abc/def.html")
-  (set-url:os-type! 'unix)
-  ;; different parse for file://foo:/...
-  (test #("file" #f "foo" #f #t (#("abc") #("def.html")) () #f)
+  (parameterize ([file-url-path-convention-type 'unix])
+    ;; different parse for file://foo:/...
+    (test #("file" #f "foo" #f #t (#("abc") #("def.html")) () #f)
+          string->url/vec
+          "file://foo:/abc/def.html"))
+  (parameterize ([file-url-path-convention-type 'windows])
+    (test #("file" #f "" #f #t (#("foo:") #("abc") #("def.html")) () #f)
         string->url/vec
         "file://foo:/abc/def.html")
-  (set-url:os-type! 'windows)
-  (test #("file" #f #f #f #f (#("foo:") #("abc") #("def.html")) () #f)
+    (test #("file" #f "" #f #t (#("c:") #("abc") #("def.html")) () #f)
         string->url/vec
-        "file://foo:/abc/def.html")
-  (set-url:os-type! 'unix)
-  ;; but no effect on http://foo:/...
-  (test #("http" #f "foo" #f #t (#("abc") #("def.html")) () #f)
+        "file://c:/abc/def.html")
+    (test #("file" #f "" #f #t (#("") #("d") #("c") #("abc") #("def.html")) () #f)
         string->url/vec
-        "http://foo:/abc/def.html")
-  (set-url:os-type! 'windows)
-  (test #("http" #f "foo" #f #t (#("abc") #("def.html")) () #f)
-        string->url/vec
-        "http://foo:/abc/def.html")
-  (set-url:os-type! 'unix)
+        "file:\\\\d\\c\\abc\\def.html"))
 
+  (parameterize ([file-url-path-convention-type 'unix])
+    ;; but no effect on http://foo:/...
+    (test #("http" #f "foo" #f #t (#("abc") #("def.html")) () #f)
+          string->url/vec
+          "http://foo:/abc/def.html"))
+  (parameterize ([file-url-path-convention-type 'windows])
+    (test #("http" #f "foo" #f #t (#("abc") #("def.html")) () #f)
+          string->url/vec
+          "http://foo:/abc/def.html"))
+
+  (test "file:///c:/a/b"
+        url->string (path->url (bytes->path #"c:\\a\\b" 'windows)))    
+  (test "file:///c:/a/b"
+        url->string (path->url (bytes->path #"\\\\?\\c:\\a\\b" 'windows)))
+
+  (test #"/a/b/c" path->bytes
+        (url->path (path->url (bytes->path #"/a/b/c" 'unix)) 'unix))
+  (test #"a/b/c" path->bytes
+        (url->path (path->url (bytes->path #"a/b/c" 'unix)) 'unix))
+  (test #"c:\\a\\b" path->bytes
+        (url->path (path->url (bytes->path #"c:/a/b" 'windows)) 'windows))
+  (test #"a\\b" path->bytes
+        (url->path (path->url (bytes->path #"a/b" 'windows)) 'windows))
+  (test #"\\\\d\\c\\a" path->bytes
+        (url->path (path->url (bytes->path #"//d/c/a" 'windows)) 'windows))
+  (test #"c:\\a\\b" path->bytes
+        (url->path (path->url (bytes->path #"\\\\?\\c:\\a\\b" 'windows)) 'windows))
+  (test #"\\\\d\\c\\a\\b" path->bytes
+        (url->path (path->url (bytes->path #"\\\\?\\UNC\\d\\c\\a\\b" 'windows)) 'windows))
+  (test #"\\\\?\\c:\\a/x\\b" path->bytes
+        (url->path (path->url (bytes->path #"\\\\?\\c:\\a/x\\b" 'windows)) 'windows))
+  (test #"\\\\?\\UNC\\d\\c\\a/x\\b" path->bytes
+        (url->path (path->url (bytes->path #"\\\\?\\UNC\\d\\\\c\\a/x\\b" 'windows)) 'windows))
+    
   ;; see PR8809 (value-less keys in the query part)
   (test-s->u #("http" #f "foo.bar" #f #t (#("baz")) ((ugh . #f)) #f)
              "http://foo.bar/baz?ugh")