diff --git a/collects/browser/private/html.ss b/collects/browser/private/html.ss
index e60f07dac1..6f683ba665 100644
--- a/collects/browser/private/html.ss
+++ b/collects/browser/private/html.ss
@@ -234,8 +234,7 @@
(copy-port ip op)))))
'truncate)
(pop-status)
- (let* ([upath (url-path url)]
- [bitmap (make-object bitmap% tmp-filename)])
+ (let ([bitmap (make-object bitmap% tmp-filename)])
(with-handlers ([exn:fail?
(lambda (x)
(message-box "Warning"
diff --git a/collects/browser/private/hyper.ss b/collects/browser/private/hyper.ss
index 9b08697b98..95f2e34179 100644
--- a/collects/browser/private/hyper.ss
+++ b/collects/browser/private/hyper.ss
@@ -73,7 +73,8 @@ A test case:
;; assume that url-paths are all strings
;; (other wise the pages are treated as different)
- (equal? (url-path a) (url-path b))
+ (equal? (map path/param-path (url-path a))
+ (map path/param-path (url-path b)))
(equal? (url-query a) (url-query b)))))
@@ -108,7 +109,8 @@ A test case:
(normal-case-path (normalize-path (build-path (collection-path "mzlib")
'up
'up)))
- (normal-case-path (normalize-path (apply build-path (url-path url))))))]
+ (normal-case-path (normalize-path (apply build-path
+ (map path/param-path (url-path url)))))))]
[else (inner #f url-allows-evaling? url)]))
(define doc-notes null)
@@ -330,7 +332,7 @@ A test case:
(let ([p (url-path url)])
(and (not (null? p))
(regexp-match #rx"[.][^.]*$"
- (car (last-pair p))))))]
+ (path/param-path (car (last-pair p)))))))]
[html? (or (and mime-type (regexp-match #rx"text/html" mime-type))
(member path-extension '(".html" ".htm")))]
[text? (or (and mime-type (regexp-match #rx"text/plain" mime-type))
@@ -349,7 +351,9 @@ A test case:
(let* ([orig-name (and (url? url)
(let ([p (url-path url)])
(and (not (null? p))
- (car (last-pair p)))))]
+ (let ([lp (path/param-path (car (last-pair p)))])
+ (and (not (string=? "" lp))
+ lp)))))]
[size (let ([s (extract-field "content-length" mime-headers)])
(and s (let ([m (regexp-match #rx"[0-9]+" s)])
(and m (string->number (car m))))))]
@@ -447,8 +451,7 @@ A test case:
(queue-callback (lambda () (semaphore-post wait-to-start)))
(send d show #t)
(when exn
- (raise (make-exn:tcp-problem (exn-message exn)
- (current-continuation-marks)))))
+ (raise (make-exn:tcp-problem (exn-message exn) (current-continuation-marks)))))
(let ([sema (make-semaphore 0)])
(when (and tmp-plt-filename install?)
(run-installer tmp-plt-filename
@@ -467,7 +470,8 @@ A test case:
(current-continuation-marks)))))]
[(or (and (url? url)
(not (null? (url-path url)))
- (regexp-match "[.]html?$" (car (last-pair (url-path url)))))
+ (regexp-match #rx"[.]html?$"
+ (path/param-path (car (last-pair (url-path url))))))
(port? url)
html?)
; HTML
@@ -475,7 +479,7 @@ A test case:
(let* ([directory
(or (if (and (url? url)
(string=? "file" (url-scheme url)))
- (let ([path (url-path url)])
+ (let ([path (apply build-path (map path/param-path (url-path url)))])
(let-values ([(base name dir?) (split-path path)])
(if (string? base)
base
diff --git a/collects/help/private/gui.ss b/collects/help/private/gui.ss
index 2a142aa3d9..ec62f1a192 100644
--- a/collects/help/private/gui.ss
+++ b/collects/help/private/gui.ss
@@ -105,7 +105,7 @@
;; they will be caught elsewhere.
[(and (url-path url)
(not (null? (url-path url)))
- (regexp-match #rx".plt$" (car (last-pair (url-path url)))))
+ (regexp-match #rx".plt$" (path/param-path (car (last-pair (url-path url))))))
url]
;; files on download.plt-scheme.org in /doc are considered
@@ -119,7 +119,7 @@
(let* ([path (url-path url)]
[coll (and (pair? path)
(pair? (cdr path))
- (cadr path))]
+ (path/param-path (cadr path)))]
[coll-path (and coll (string->path coll))]
[doc-pr (and coll-path (assoc coll-path known-docs))])
@@ -136,7 +136,9 @@
url]
;; send the url off to another browser
- [(or (and (preferences:get 'drscheme:help-desk:ask-about-external-urls)
+ [(or (and (string? (url-scheme url))
+ (not (equal? (url-scheme url) "http")))
+ (and (preferences:get 'drscheme:help-desk:ask-about-external-urls)
(ask-user-about-separate-browser))
(preferences:get 'drscheme:help-desk:separate-browser))
(send-url (url->string url))
@@ -240,7 +242,7 @@
(define (is-download.plt-scheme.org/doc-url? url)
(and (equal? "download.plt-scheme.org" (url-host url))
(not (null? (url-path url)))
- (equal? (car (url-path url)) "^/doc")))
+ (equal? (path/param-path (car (url-path url))) "doc")))
(define (ask-user-about-separate-browser)
(define separate-default? (preferences:get 'drscheme:help-desk:separate-browser))
diff --git a/collects/help/private/tcp-intercept.ss b/collects/help/private/tcp-intercept.ss
index 602e1263ce..c74ce8258f 100644
--- a/collects/help/private/tcp-intercept.ss
+++ b/collects/help/private/tcp-intercept.ss
@@ -39,7 +39,8 @@
(url-user url)
""
#f
- (url-path url)
+ (url-path-absolute? url)
+ (url-path url)
(url-query url)
(url-fragment url)))])
(substring long 3 (string-length long)))]
diff --git a/collects/net/doc.txt b/collects/net/doc.txt
index 7a0ee9aa7e..a021d10cb8 100644
--- a/collects/net/doc.txt
+++ b/collects/net/doc.txt
@@ -29,38 +29,31 @@ http://www.ietf.org/rfc/rfc2396.txt
TYPES ----------------------------------------------------------------
-> url
- struct url (scheme user host port path fragment)
- scheme : string or #f
- user : string or #f
- host : string or #f
- port : number or #f
- path : (listof (union string path/param))
- query : (listof (cons symbol string))
- fragment : string or #f
+_url struct_
+ (define-struct url (scheme user host port path-absolute? path query fragment))
+> url-scheme : url -> (union false/c string?)
+> url-user : url -> (union false/c string?)
+> url-host : url -> (union false/c string?)
+> url-port : url -> (union false/c number?)
+> url-path-absolute? : url -> boolean?
+> url-path : url -> (listof path/param?)
+> url-query : url -> (listof (cons/c symbol? string?))
+> url-fragment : url -> (union false/c string?)
+> url? : any -> boolean
+> make-url : ...as-above.. -> url
- The basic structure for all URLs.
+ The basic structure for all URLs, as explained in rfc3986
+ http://www.ietf.org/rfc/rfc3986.txt
+
+For example, this url:
http://sky@www.cs.brown.edu:801/cgi-bin/finger;xyz?name=shriram;host=nw#top
- {-1} {2} {----3---------} {4}{---5---------} {6} {----7-------------} {8}
+ {-1} {2} {----3---------} {4}{---5-------------}{----7-------------} {8}
+ {6}
1 = scheme, 2 = user, 3 = host, 4 = port,
- 5 = path, 6 = param, 7 = query, 8 = fragment
-
- If the scheme is "file", then the path is a platform-dependent
- string. The library does, however, check for the presence of a
- fragment designator and, if there is one, separates it from the rest
- of the path. If the path is syntactically a directory, the last
- string the resulting structure's `path' list is an empty string.
- If the path is absolute, the `host' is the root path, otherwise
- `host' is #f.
-
- For non-"file" schemes, the path is a URL path as defined in the
- standard.
-
- If a path segment has a parameter, it is represented with
- an instance of the path/param struct. Otherwise, it is
- just represented as a string.
+ 5 = path, 6 = param (or last path segment),
+ 7 = query, 8 = fragment
The strings inside the fields user, path, query, and fragment are
represented directly as Scheme strings, ie without
@@ -74,16 +67,20 @@ TYPES ----------------------------------------------------------------
An empty string at the end of the list of paths
corresponds to a url that ends in a slash. For example,
- this url: http://www.drscheme.org/a/ has a path field
- '("a" "") and this url: http://www.drscheme.org/a
- has a path field '("a").
+ this url: http://www.drscheme.org/a/ has a path field with
+ strings "a" and "" and this url: http://www.drscheme.org/a
+ has a path field with only the string "a".
-> path/param
+_ path/param struct_
+ (define-struct path/param (path param))
- A pair of strings, accessible with _path/param-path_ and
- path/param-params_ that joins a path segment with its
- params in a url. The function _path/param?_ recognizes
- such pairs.
+> path/param-path : path/param -> (union string? (symbols 'up 'same))
+> path/param-param : path/param -> (listof string)
+> path/param? : any -> boolean
+> make-path/param : (union string? (symbols 'up 'same)) (listof string) -> path/param
+
+ A pair, that joins a path segment with its params in a
+ url.
> pure-port
@@ -114,42 +111,9 @@ PROCEDURES -----------------------------------------------------------
Given a base URL and a relative path, combines the two and returns a
new URL as per the URL combination specification. Call the
arguments base and relative. They are combined according to the
- following rules (applied in order until one matches):
- - If either argument is an empty URL, the result is the other
- argument.
- - If relative sports a scheme, then the result is relative.
- - If the base has scheme "file", the procedure uses the special rule
- specified below.
- - If relative specifies a host, then the result is relative.
- Failing the above, relative inherit's base's host and port. Then:
- - If the path of relative begins with a "/", the result is relative.
- - If the path of relative is empty, then relative inherits base's
- params and query, and the result is relative.
- - Otherwise base and relative are combined as per the standard
- specification of merging and normalization.
+ rules in rfc3986 (above).
- On combining "file" schemes:
-
- If the base has scheme "file", relative is treated as a
- slash-separated path (unless it contains an empty path --- only
- params, queries, and fragments --- in which case the path is not
- used). These path fragments are combined using build-path, starting
- with the base's path. Three path segments are special: ".."
- corresponds to an 'up directive to build-path, while "." and ""
- correspond to 'same. As a consequence, if relative begins with "/",
- this does not make it an absolute URL: the leading slash is treated
- as if the initial segment is "", so this has no effect, and base's
- path remains the base path of the result. If base refers to a
- directory, relative is indexed from that directory; if base refers
- to a file, relative is indexed from the directory containing the
- file. Note that if base does not refer to an actual directory that
- exists on the filesystem, then it must syntactically be a directory
- as understood by split-path.
-
- The above algorithm tests for the presence of a directory to
- correctly combine paths. As a result, it can raise any exception
- raised by directory-exists?. None of these exceptions is trapped by
- the procedure; consumers must be prepared for them.
+ This function does not raise any exceptions.
> (netscape/string->url string) -> url
diff --git a/collects/net/uri-codec-sig.ss b/collects/net/uri-codec-sig.ss
index f0aed959e9..2d13a558cf 100644
--- a/collects/net/uri-codec-sig.ss
+++ b/collects/net/uri-codec-sig.ss
@@ -5,6 +5,8 @@
(define-signature net:uri-codec^
(uri-encode
uri-decode
+ uri-path-segment-encode
+ uri-path-segment-decode
form-urlencoded-encode
form-urlencoded-decode
alist->form-urlencoded
diff --git a/collects/net/uri-codec-unit.ss b/collects/net/uri-codec-unit.ss
index 5c141f5127..d833fb3da5 100644
--- a/collects/net/uri-codec-unit.ss
+++ b/collects/net/uri-codec-unit.ss
@@ -1,3 +1,7 @@
+;; 1/2/2006: Added a mapping for uri path segments
+;; that allows more characters to remain decoded
+;; -robby
+
;;;
;;; ---- En/Decode URLs and form-urlencoded data
;;; Time-stamp: <03/04/25 10:31:31 noel>
@@ -75,6 +79,7 @@
(lib "match.ss")
(lib "string.ss")
(lib "etc.ss")
+ (lib "list.ss")
"uri-codec-sig.ss")
(provide uri-codec@)
@@ -109,13 +114,19 @@
;; Characters that sometimes map to themselves
(define safe-mapping
- (map (lambda (char)
- (cons char char))
+ (map (lambda (char) (cons char char))
'(#\- #\_ #\. #\! #\~ #\* #\' #\( #\))))
;; The strict URI mapping
(define uri-mapping
- (append alphanumeric-mapping safe-mapping))
+ (append alphanumeric-mapping
+ safe-mapping))
+
+ ;; The uri path segment mapping from RFC 3986
+ (define uri-path-segment-mapping
+ (append alphanumeric-mapping
+ safe-mapping
+ (map (λ (c) (cons c c)) (string->list "@+,=$&:"))))
;; The form-urlencoded mapping
(define form-urlencoded-mapping
@@ -156,6 +167,10 @@
(define-values (uri-encoding-vector uri-decoding-vector)
(make-codec-tables uri-mapping))
+
+ (define-values (uri-path-segment-encoding-vector
+ uri-path-segment-decoding-vector)
+ (make-codec-tables uri-path-segment-mapping))
(define-values (form-urlencoded-encoding-vector
form-urlencoded-decoding-vector)
@@ -198,7 +213,15 @@
;; string -> string
(define (uri-decode str)
(decode uri-decoding-vector str))
-
+
+ ;; string -> string
+ (define (uri-path-segment-encode str)
+ (encode uri-path-segment-encoding-vector str))
+
+ ;; string -> string
+ (define (uri-path-segment-decode str)
+ (decode uri-path-segment-decoding-vector str))
+
;; string -> string
(define (form-urlencoded-encode str)
(encode form-urlencoded-encoding-vector str))
diff --git a/collects/net/url-structs.ss b/collects/net/url-structs.ss
index ac8d1d6d51..d6feda78af 100644
--- a/collects/net/url-structs.ss
+++ b/collects/net/url-structs.ss
@@ -1,7 +1,7 @@
(module url-structs mzscheme
(require (lib "contract.ss"))
- (define-struct url (scheme user host port path query fragment))
+ (define-struct url (scheme user host port path-absolute? path query fragment))
(define-struct path/param (path param))
(provide/contract
@@ -9,8 +9,9 @@
[user (union false/c string?)]
[host (union false/c string?)]
[port (union false/c number?)]
- [path (listof (union string? path/param?))]
+ [path-absolute? boolean?]
+ [path (listof path/param?)]
[query (listof (cons/c symbol? string?))]
[fragment (union false/c string?)]))
- (struct path/param ([path string?]
- [param string?]))))
\ No newline at end of file
+ (struct path/param ([path (union string? (symbols 'up 'same))]
+ [param (listof string?)]))))
diff --git a/collects/net/url-unit.ss b/collects/net/url-unit.ss
index b43ff6dd19..06d70942af 100644
--- a/collects/net/url-unit.ss
+++ b/collects/net/url-unit.ss
@@ -13,7 +13,8 @@
(require (lib "file.ss")
(lib "unitsig.ss")
(lib "port.ss")
- "url-structs.ss"
+ (lib "string.ss")
+ "url-structs.ss"
"uri-codec.ss"
"url-sig.ss"
"tcp-sig.ss")
@@ -64,7 +65,9 @@
(define (url->file-path url)
(path->string
(apply build-path (or (url-host url) 'same)
- (map (lambda (x) (if (equal? x "") 'same x)) (url-path url)))))
+ (map (lambda (x) (if (equal? x "") 'same x))
+ (map path/param-path
+ (url-path url))))))
(define url->string
(lambda (url)
@@ -83,13 +86,18 @@
(string-append "#" fragment))))
(else
(let ((sa string-append))
- (sa (if scheme (sa scheme "://") "")
- (if user (sa (uri-encode user) "@") "")
- (if host host "")
- (if port (sa ":" (number->string port)) "")
- ; There used to be a "/" here, but that causes an
- ; extra leading slash -- wonder why it ever worked!
- (combine-path-strings path)
+ (sa (if scheme (sa scheme ":") "")
+ (if (or user host port)
+ (sa
+ "//"
+ (if user (sa (uri-encode user) "@") "")
+ (if host host "")
+ (if port (sa ":" (number->string port)) "")
+ ; There used to be a "/" here, but that causes an
+ ; extra leading slash -- wonder why it ever worked!
+ )
+ "")
+ (combine-path-strings (url-path-absolute? url) path)
;(if query (sa "?" (uri-encode query)) "")
(if (null? query) "" (sa "?" (alist->form-urlencoded query)))
(if fragment (sa "#" (uri-encode fragment)) ""))))))))
@@ -126,7 +134,8 @@
(url->string
(if proxy
url
- (make-url #f #f #f #f
+ (make-url #f #f #f #f
+ (url-path-absolute? url)
(url-path url)
(url-query url)
(url-fragment url))))))
@@ -223,62 +232,95 @@
(not (url-fragment url))
(null? (url-path url)))))
- (define (combine-url/relative base string)
- (let ([relative (string->url string)])
+
+ ;; transliteration of code in rfc 3986, section 5.2.2
+ (define (combine-url/relative Base string)
+ (let ([R (string->url string)]
+ [T (make-url #f #f #f #f #f '() '() #f)])
+ (if (url-scheme R)
+ (begin
+ (set-url-scheme! T (url-scheme R))
+ (set-url-user! T (url-user R)) ;; authority
+ (set-url-host! T (url-host R)) ;; authority
+ (set-url-port! T (url-port R)) ;; authority
+ (set-url-path-absolute?! T (url-path-absolute? R))
+ (set-url-path! T (remove-dot-segments (url-path R)))
+ (set-url-query! T (url-query R)))
+ (begin
+ (if (url-host R) ;; => authority is defined
+ (begin
+ (set-url-user! T (url-user R)) ;; authority
+ (set-url-host! T (url-host R)) ;; authority
+ (set-url-port! T (url-port R)) ;; authority
+ (set-url-path-absolute?! T (url-path-absolute? R))
+ (set-url-path! T (remove-dot-segments (url-path R)))
+ (set-url-query! T (url-query R)))
+ (begin
+ (if (null? (url-path R)) ;; => R has empty path
+ (begin
+ (set-url-path-absolute?! T (url-path-absolute? Base))
+ (set-url-path! T (url-path Base))
+ (if (not (null? (url-query R)))
+ (set-url-query! T (url-query R))
+ (set-url-query! T (url-query Base))))
+ (begin
+ (cond
+ [(url-path-absolute? R)
+ (set-url-path-absolute?! T #t)
+ (set-url-path! T (remove-dot-segments (url-path R)))]
+ [(and (null? (url-path Base))
+ (url-host Base))
+ (set-url-path-absolute?! T #t)
+ (set-url-path! T (remove-dot-segments (url-path R)))]
+ [else
+ (set-url-path-absolute?! T (url-path-absolute? Base))
+ (set-url-path! T (remove-dot-segments
+ (append (all-but-last (url-path Base))
+ (url-path R))))])
+ (set-url-query! T (url-query R))))
+ (set-url-user! T (url-user Base)) ;; authority
+ (set-url-host! T (url-host Base)) ;; authority
+ (set-url-port! T (url-port Base)))) ;; authority
+ (set-url-scheme! T (url-scheme Base))))
+ (set-url-fragment! T (url-fragment R))
+ T))
+
+ (define (all-but-last lst)
+ (cond
+ [(null? lst) null]
+ [(null? (cdr lst)) null]
+ [else (cons (car lst) (all-but-last (cdr lst)))]))
+
+ ;; cribbed from 5.2.4 in rfc 3986
+ ;; the strange cases 2 and 4 implicitly change urls
+ ;; with paths segments "." and ".." at the end
+ ;; into "./" and "../" respectively
+ (define (remove-dot-segments path)
+ (let loop ([path path]
+ [result '()])
(cond
- [(empty-url? base) ; Step 1
- relative]
- [(empty-url? relative) ; Step 2a
- base]
- [(url-scheme relative) ; Step 2b
- relative]
- [else ; Step 2c
- (set-url-scheme! relative (url-scheme base))
- (cond
- [(url-host relative) ; Step 3
- relative]
- [else
- (set-url-host! relative (url-host base))
- (set-url-port! relative (url-port base)) ; Unspecified!
- (let ([rel-path (url-path relative)])
- (cond
- [(and (not (equal? string "")) ; Step 4
- (char=? #\/ (string-ref string 0)))
- relative]
- [(or (not rel-path) ; Step 5
- (null? rel-path))
- (set-url-path! relative (url-path base))
- (when (url-query relative)
- (set-url-query! relative (url-query base)))
- relative]
- [else ; Step 6
- (merge-and-normalize
- (url-path base) relative)]))])])))
-
- (define (merge-and-normalize base-path relative-url)
- (let* ([joined
- (let loop ([base-path base-path])
- (cond
- [(null? base-path) (url-path relative-url)]
- [(null? (cdr base-path)) (url-path relative-url)]
- [else (cons (car base-path) (loop (cdr base-path)))]))]
- [reversed/simplified
- (if (null? joined)
- null
- (let loop ([segs (reverse joined)])
- (cond
- [(null? segs) null]
- [else (let ([fst (car segs)])
- (cond
- [(string=? fst ".")
- (loop (cdr segs))]
- [(string=? fst "..")
- (if (null? (cdr segs))
- segs
- (loop (cddr segs)))]
- [else (cons (car segs) (loop (cdr segs)))]))])))])
- (set-url-path! relative-url (reverse reversed/simplified))
- relative-url))
+ [(null? path) (reverse result)]
+ [(and (eq? (path/param-path (car path)) 'same)
+ (null? (cdr path)))
+ (loop (cdr path)
+ (cons (make-path/param "" '()) result))]
+ [(eq? (path/param-path (car path)) 'same)
+ (loop (cdr path)
+ result)]
+ [(and (eq? (path/param-path (car path)) 'up)
+ (null? (cdr path))
+ (not (null? result)))
+ (loop (cdr path)
+ (cons (make-path/param "" '()) (cdr result)))]
+ [(and (eq? (path/param-path (car path)) 'up)
+ (not (null? result)))
+ (loop (cdr path) (cdr result))]
+ [(and (eq? (path/param-path (car path)) 'up)
+ (null? result))
+ ;; when we go up too far, just drop the "up"s.
+ (loop (cdr path) result)]
+ [else
+ (loop (cdr path) (cons (car path) result))])))
;; call/input-url : url x (url -> in-port) x (in-port -> T)
;; [x list (str)] -> T
@@ -363,9 +405,10 @@
#f ; user
(and root (path->string root)) ; host
#f ; port
- (append (map path->string elems)
+ (absolute-path? path)
+ (append (map (λ (x) (make-path/param (path->string x) '())) elems)
(if (eq? kind 'dir)
- '("")
+ (list (make-path/param "" '()))
null))
'() ; query
fragment))
@@ -383,20 +426,27 @@
(get-num (lambda (pos skip-left skip-right)
(let ((s (get-str pos skip-left skip-right)))
(if s (string->number s) #f))))
- (host (get-str 5 0 0)))
- (make-url (get-str 2 0 1) ; scheme
+ (host (get-str 5 0 0))
+ (path (get-str 7 0 0))
+ (scheme (get-str 2 0 1)))
+ (when (string? scheme) (string-lowercase! scheme))
+ (when (string? host) (string-lowercase! host))
+ (make-url scheme
(uri-decode/maybe (get-str 4 0 1)) ; user
host
(get-num 6 1 0) ; port
+ (and (not (= 0 (string-length path)))
+ (char=? #\/ (string-ref path 0)))
(separate-path-strings
- (let ([path (get-str 7 0 0)])
- ;; If path is "" and the input is an absolute URL
- ;; with a hostname, then the intended path is "/",
- ;; but the URL is missing a "/" at the end.
- (if (and (string=? path "")
- host)
- "/"
- path)))
+ ;; If path is "" and the input is an absolute URL
+ ;; with a hostname, then the intended path is "/",
+ ;; but the URL is missing a "/" at the end.
+ path
+ #;
+ (if (and (string=? path "")
+ host)
+ "/"
+ path))
;(uri-decode/maybe (get-str 8 1 0)) ;
;query
(let ([q (get-str 8 1 0)])
@@ -411,7 +461,7 @@
;; in an attempt to be "friendly"
(and f (uri-decode (regexp-replace* "%([^0-9a-fA-F])" f "%25\\1"))))
- ;; separate-path-strings : string[starting with /] -> (listof (union string path/param))
+ ;; separate-path-strings : string[starting with /] -> (listof path/param)
(define (separate-path-strings str)
(cond
[(string=? str "") '()]
@@ -423,31 +473,57 @@
[(regexp-match #rx"([^/]*)/(.*)$" str)
=>
(lambda (m)
- (cons (maybe-separate-params (cadr m)) (loop (caddr m))))]
- [else (list (maybe-separate-params str))]))]))
+ (cons (separate-params (cadr m)) (loop (caddr m))))]
+ [else (list (separate-params str))]))]))
- (define (maybe-separate-params s)
+ (define (separate-params s)
+ (let ([lst (map path-segment-decode (regexp-split #rx";" s))])
+ (make-path/param (car lst) (cdr lst))))
+
+ (define (path-segment-decode p)
(cond
- [(regexp-match #rx"^([^;]*);(.*)$" s)
- =>
- (lambda (m)
- (make-path/param (cadr m) (caddr m)))]
- [else s]))
+ [(string=? p "..") 'up]
+ [(string=? p ".") 'same]
+ [else (uri-path-segment-decode p)]))
+
+ (define (path-segment-encode p)
+ (cond
+ [(eq? p 'up) ".."]
+ [(eq? p 'same) "."]
+ [(equal? p "..") "%2e%2e"]
+ [(equal? p ".") "%2e"]
+ [else (uri-path-segment-encode p)]))
+
+ (define (combine-path-strings absolute? path/params)
+ (cond
+ [(null? path/params) ""]
+ [else
+ (apply
+ string-append
+ (if absolute? "/" "")
+ (add-between
+ "/"
+ (map join-params path/params)))]))
- (define (combine-path-strings strs)
- (apply
+ (define (join-params s)
+ (apply
string-append
- (let loop ([strs strs])
- (cond
- [(null? strs) '()]
- [else (list* "/"
- (maybe-join-params (car strs))
- (loop (cdr strs)))]))))
-
- ;; needs to unquote things!
- (define (maybe-join-params s)
+ (add-between ";"
+ (map
+ path-segment-encode
+ (cons (path/param-path s)
+ (path/param-param s))))))
+
+ (define (add-between bet lst)
(cond
- [(string? s) s]
- [else (string-append (path/param-path s)
- ";"
- (path/param-param s))])))))
+ [(null? lst) null]
+ [(null? (cdr lst)) lst]
+ [else
+ (let loop ([fst (car lst)]
+ [lst (cdr lst)])
+ (cond
+ [(null? lst) (list fst)]
+ [else (list* fst
+ bet
+ (loop (car lst)
+ (cdr lst)))]))])))))
diff --git a/collects/net/url.ss b/collects/net/url.ss
index 33b1bc28e1..9c9400cf74 100644
--- a/collects/net/url.ss
+++ b/collects/net/url.ss
@@ -21,6 +21,7 @@
user
host
port
+ path-absolute?
path
query
fragment))
diff --git a/collects/tests/mzscheme/net.ss b/collects/tests/mzscheme/net.ss
index e29af77c18..7462896b19 100644
--- a/collects/tests/mzscheme/net.ss
+++ b/collects/tests/mzscheme/net.ss
@@ -140,11 +140,12 @@
(let ()
(define (test-s->u vec str)
- (define (string->url/vec str) (url->vec (string->url str)))
- (define (url/vec->string vec) (url->string (vec->url vec)))
(test vec string->url/vec str)
(test str url/vec->string vec))
+ (define (string->url/vec str) (url->vec (string->url str)))
+ (define (url/vec->string vec) (url->string (vec->url vec)))
+
(define (test-c-u/r expected base relative)
(define (combine-url/relative-vec x y)
(url->vec (combine-url/relative (vec->url x) y)))
@@ -156,82 +157,104 @@
(vector-ref vec 1)
(vector-ref vec 2)
(vector-ref vec 3)
- (map (lambda (x) (if (string? x)
- x
- (make-path/param (vector-ref x 0) (vector-ref x 1))))
- (vector-ref vec 4))
- (vector-ref vec 5)
- (vector-ref vec 6)))
+ (vector-ref vec 4)
+ (map (lambda (x)
+ (let ([lst (vector->list x)])
+ (make-path/param (car lst) (cdr lst))))
+ (vector-ref vec 5))
+ (vector-ref vec 6)
+ (vector-ref vec 7)))
(define (url->vec url)
(vector (url-scheme url)
(url-user url)
(url-host url)
(url-port url)
- (map (lambda (x) (if (string? x)
- x
- (vector (path/param-path x) (path/param-param x))))
+ (url-path-absolute? url)
+ (map (lambda (x) (list->vector (cons (path/param-path x) (path/param-param x))))
(url-path url))
(url-query url)
(url-fragment url)))
- (test-s->u (vector #f #f #f #f '("") '() #f)
+ (test-s->u (vector #f #f #f #f #t '(#("")) '() #f)
"/")
- (test-s->u (vector #f #f #f #f '() '() #f)
+ (test-s->u (vector #f #f #f #f #f '() '() #f)
"")
- (test-s->u (vector "http" #f "www.drscheme.org" #f '("") '() #f)
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #f '() '() #f)
+ "http://www.drscheme.org")
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t '(#("")) '() #f)
"http://www.drscheme.org/")
- (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '() #f)
+
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '() #f)
"http://www.drscheme.org/a/b/c")
- (test-s->u (vector "http" "robby" "www.drscheme.org" #f (list "a" "b" "c") '() #f)
+ (test-s->u (vector "http" "robby" "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '() #f)
"http://robby@www.drscheme.org/a/b/c")
- (test-s->u (vector "http" #f "www.drscheme.org" 8080 (list "a" "b" "c") '() #f)
+ (test-s->u (vector "http" #f "www.drscheme.org" 8080 #t (list #("a") #("b") #("c")) '() #f)
"http://www.drscheme.org:8080/a/b/c")
- (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '() "joe")
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '() "joe")
"http://www.drscheme.org/a/b/c#joe")
- (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tim . "")) #f)
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tim . "")) #f)
"http://www.drscheme.org/a/b/c?tim=")
- (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tim . "")) "joe")
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tim . "")) "joe")
"http://www.drscheme.org/a/b/c?tim=#joe")
- (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tim . "tim")) "joe")
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tim . "tim")) "joe")
"http://www.drscheme.org/a/b/c?tim=tim#joe")
- (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tam . "tom")) "joe")
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tam . "tom")) "joe")
"http://www.drscheme.org/a/b/c?tam=tom#joe")
- (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tam . "tom") (pam . "pom")) "joe")
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tam . "tom") (pam . "pom")) "joe")
"http://www.drscheme.org/a/b/c?tam=tom;pam=pom#joe")
(parameterize ([current-alist-separator-mode 'semi])
- (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tam . "tom") (pam . "pom")) "joe")
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tam . "tom") (pam . "pom")) "joe")
"http://www.drscheme.org/a/b/c?tam=tom;pam=pom#joe"))
(parameterize ([current-alist-separator-mode 'amp])
- (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tam . "tom") (pam . "pom")) "joe")
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tam . "tom") (pam . "pom")) "joe")
"http://www.drscheme.org/a/b/c?tam=tom&pam=pom#joe"))
- (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" #("c" "b")) '() #f)
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c" "b")) '() #f)
"http://www.drscheme.org/a/b/c;b")
- (test-s->u (vector "http" #f "www.drscheme.org" #f (list #("a" "x") "b" #("c" "b")) '() #f)
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a" "x") #("b") #("c" "b")) '() #f)
"http://www.drscheme.org/a;x/b/c;b")
;; test unquoting for %
- (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((ti#m . "")) "jo e")
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((ti#m . "")) "jo e")
"http://www.drscheme.org/a/b/c?ti%23m=#jo%20e")
- (test-s->u (vector "http" #f "www.drscheme.org" #f (list #("a " " a") " b " " c ") '() #f)
- "http://www.drscheme.org/a ; a/ b / c ")
- (test-s->u (vector "http" "robb y" "www.drscheme.org" #f '("") '() #f)
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a " " a") #(" b ") #(" c ")) '() #f)
+ "http://www.drscheme.org/a%20;%20a/%20b%20/%20c%20")
+ (test-s->u (vector "http" "robb y" "www.drscheme.org" #f #t '(#("")) '() #f)
"http://robb%20y@www.drscheme.org/")
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("%a") #("b/") #("c")) '() #f)
+ "http://www.drscheme.org/%25a/b%2f/c")
+
+ ;; test the characters that need to be encoded in paths vs those that do not need to
+ ;; be encoded in paths
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a:@!$&'()*+,=z") #("/?#[];") #("")) '() #f)
+ "http://www.drscheme.org/a:@!$&'()*+,=z/%2f%3f%23%5b%5d%3b/")
+
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #(".") #("..") '#(same) '#(up) #("...") #("abc.def")) '() #f)
+ "http://www.drscheme.org/%2e/%2e%2e/./../.../abc.def")
+ (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("." "") #(".." "") #(same "") #(up "") #("..." "") #("abc.def" "")) '() #f)
+ "http://www.drscheme.org/%2e;/%2e%2e;/.;/..;/...;/abc.def;")
- (test-s->u (vector "mailto" #f #f #f '("robby@plt-scheme.org") () #f)
+
+ (test (vector "http" "ROBBY" "www.drscheme.org" 80 #t '(#("INDEX.HTML" "XXX")) '((T . "P")) "YYY")
+ string->url/vec
+ "HTTP://ROBBY@WWW.DRSCHEME.ORG:80/INDEX.HTML;XXX?T=P#YYY")
+
+ (test-s->u (vector "mailto" #f #f #f #f '(#("robby@plt-scheme.org")) '() #f)
"mailto:robby@plt-scheme.org")
- (let ([empty-url (make-url #f #f #f #f '() '() #f)])
+ (let ([empty-url (make-url #f #f #f #f #f '() '() #f)])
(test-c-u/r (string->url "http://www.drscheme.org")
empty-url
- "http://www.drscheme.org")
- (test-c-u/r (string->url "http://www.drscheme.org")
- (string->url "http://www.drscheme.org")
- ""))
+ "http://www.drscheme.org"))
+
+ (test-c-u/r (string->url "http://www.drscheme.org")
+ (string->url "http://www.drscheme.org")
+ "")
(test-c-u/r (string->url "http://www.mzscheme.org")
(string->url "http://www.drscheme.org/")
"http://www.mzscheme.org")
+
(test-c-u/r (string->url "http://www.drscheme.org/index.html")
(string->url "http://www.drscheme.org/")
"index.html")
@@ -253,13 +276,95 @@
(test-c-u/r (string->url "http://www.drscheme.org/a/b/c/d/index.html")
(string->url "http://www.drscheme.org/a/b/c/")
"d/index.html")
+ (test-c-u/r (string->url "http://www.drscheme.org/a/b/index.html")
+ (string->url "http://www.drscheme.org/a/b/c/")
+ "../index.html")
+ (test-c-u/r (string->url "http://www.drscheme.org/a/b/c/index.html")
+ (string->url "http://www.drscheme.org/a/b/c/")
+ "./index.html")
+ (test-c-u/r (string->url "http://www.drscheme.org/a/b/c/%2e%2e/index.html")
+ (string->url "http://www.drscheme.org/a/b/c/")
+ "%2e%2e/index.html")
+ (test-c-u/r (string->url "http://www.drscheme.org/a/index.html")
+ (string->url "http://www.drscheme.org/a/b/../c/")
+ "../index.html")
+ (test-c-u/r (string->url "http://www.drscheme.org/a/b/c/d/index.html")
+ (string->url "http://www.drscheme.org/a/b/c/d/index.html#ghijkl")
+ "index.html")
+ (test-c-u/r (string->url "http://www.drscheme.org/a/b/c/d/index.html#abcdef")
+ (string->url "http://www.drscheme.org/a/b/c/d/index.html#ghijkl")
+ "#abcdef")
+
(test-c-u/r (string->url "file:///a/b/c/d/index.html")
(string->url "file:///a/b/c/")
"d/index.html")
(test-c-u/r (string->url "file:///a/b/d/index.html")
(string->url "file:///a/b/c")
"d/index.html")
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;
+ ;; tests from rfc 3986
+ ;;
+
+ (for-each
+ (λ (line)
+ (test-c-u/r (string->url (caddr line))
+ (string->url "http://a/b/c/d;p?q")
+ (car line)))
+ '(("g:h" = "g:h")
+ ("g" = "http://a/b/c/g")
+ ("./g" = "http://a/b/c/g")
+ ("g/" = "http://a/b/c/g/")
+ ("/g" = "http://a/g")
+ ("//g" = "http://g")
+ ("?y" = "http://a/b/c/d;p?y")
+ ("g?y" = "http://a/b/c/g?y")
+ ("#s" = "http://a/b/c/d;p?q#s")
+ ("g#s" = "http://a/b/c/g#s")
+ ("g?y#s" = "http://a/b/c/g?y#s")
+ (";x" = "http://a/b/c/;x")
+ ("g;x" = "http://a/b/c/g;x")
+ ("g;x?y#s" = "http://a/b/c/g;x?y#s")
+ ("" = "http://a/b/c/d;p?q")
+ ("." = "http://a/b/c/")
+ ("./" = "http://a/b/c/")
+ (".." = "http://a/b/")
+ ("../" = "http://a/b/")
+ ("../g" = "http://a/b/g")
+ ("../.." = "http://a/")
+ ("../../" = "http://a/")
+ ("../../g" = "http://a/g")
+
+ ;; abnormal examples follow
+
+ ("../../../g" = "http://a/g")
+ ("../../../../g" = "http://a/g")
+
+ ("/./g" = "http://a/g")
+ ("/../g" = "http://a/g")
+ ("g." = "http://a/b/c/g.")
+ (".g" = "http://a/b/c/.g")
+ ("g.." = "http://a/b/c/g..")
+ ("..g" = "http://a/b/c/..g")
+
+ ("./../g" = "http://a/b/g")
+ ("./g/." = "http://a/b/c/g/")
+ ("g/./h" = "http://a/b/c/g/h")
+ ("g/../h" = "http://a/b/c/h")
+ ("g;x=1/./y" = "http://a/b/c/g;x=1/y")
+ ("g;x=1/../y" = "http://a/b/c/y")
+
+ ("g?y/./x" = "http://a/b/c/g?y/./x")
+ ("g?y/../x" = "http://a/b/c/g?y/../x")
+ ("g#s/./x" = "http://a/b/c/g#s/./x")
+ ("g#s/../x" = "http://a/b/c/g#s/../x")
+ ("http:g" = "http:g") ; for strict parsers
+
+ ))
+
+
)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/collects/web-server/servlet-tables.ss b/collects/web-server/servlet-tables.ss
index 2342799d17..ee5dcc8495 100644
--- a/collects/web-server/servlet-tables.ss
+++ b/collects/web-server/servlet-tables.ss
@@ -141,10 +141,11 @@
;; url->param: url -> (union string #f)
(define (url->param a-url)
- (let ([l (filter path/param? (url-path a-url))])
+ (let ([l (filter (λ (x) (not (null? (path/param-param x))))
+ (url-path a-url))])
(and (not (null? l))
- (path/param-param (car l)))))
-
+ (car (path/param-param (car l))))))
+
;; insert-param: url string -> string
;; add a path/param to the path in a url
;; (assumes that there is only one path/param)
diff --git a/collects/web-server/util.ss b/collects/web-server/util.ss
index f279bdb006..a2e8912893 100644
--- a/collects/web-server/util.ss
+++ b/collects/web-server/util.ss
@@ -292,21 +292,5 @@
(cond
((char=? first #\+)
(values #\space rest))
- ((char=? first #\%)
- ; MF: I rewrote this code so that Spidey could eliminate all checks.
- ; I am more confident this way that this hairy expression doesn't barf.
- (if (pair? rest)
- (let ([rest-rest (cdr rest)])
- (if (pair? rest-rest)
- (values (integer->char
- (or (string->number (string (car rest) (car rest-rest)) 16)
- (raise (make-invalid-%-suffix
- (if (string->number (string (car rest)) 16)
- (car rest-rest)
- (car rest))))))
- (cdr rest-rest))
- (raise (make-incomplete-%-suffix rest))))
- (raise (make-incomplete-%-suffix rest))))
(else (values first rest)))))
- (cons this (loop rest))))))))
- )
+ (cons this (loop rest)))))))))