diff --git a/collects/browser/private/html.ss b/collects/browser/private/html.ss index e60f07dac1..6f683ba665 100644 --- a/collects/browser/private/html.ss +++ b/collects/browser/private/html.ss @@ -234,8 +234,7 @@ (copy-port ip op))))) 'truncate) (pop-status) - (let* ([upath (url-path url)] - [bitmap (make-object bitmap% tmp-filename)]) + (let ([bitmap (make-object bitmap% tmp-filename)]) (with-handlers ([exn:fail? (lambda (x) (message-box "Warning" diff --git a/collects/browser/private/hyper.ss b/collects/browser/private/hyper.ss index 9b08697b98..95f2e34179 100644 --- a/collects/browser/private/hyper.ss +++ b/collects/browser/private/hyper.ss @@ -73,7 +73,8 @@ A test case: ;; assume that url-paths are all strings ;; (other wise the pages are treated as different) - (equal? (url-path a) (url-path b)) + (equal? (map path/param-path (url-path a)) + (map path/param-path (url-path b))) (equal? (url-query a) (url-query b))))) @@ -108,7 +109,8 @@ A test case: (normal-case-path (normalize-path (build-path (collection-path "mzlib") 'up 'up))) - (normal-case-path (normalize-path (apply build-path (url-path url))))))] + (normal-case-path (normalize-path (apply build-path + (map path/param-path (url-path url)))))))] [else (inner #f url-allows-evaling? url)])) (define doc-notes null) @@ -330,7 +332,7 @@ A test case: (let ([p (url-path url)]) (and (not (null? p)) (regexp-match #rx"[.][^.]*$" - (car (last-pair p))))))] + (path/param-path (car (last-pair p)))))))] [html? (or (and mime-type (regexp-match #rx"text/html" mime-type)) (member path-extension '(".html" ".htm")))] [text? (or (and mime-type (regexp-match #rx"text/plain" mime-type)) @@ -349,7 +351,9 @@ A test case: (let* ([orig-name (and (url? url) (let ([p (url-path url)]) (and (not (null? p)) - (car (last-pair p)))))] + (let ([lp (path/param-path (car (last-pair p)))]) + (and (not (string=? "" lp)) + lp)))))] [size (let ([s (extract-field "content-length" mime-headers)]) (and s (let ([m (regexp-match #rx"[0-9]+" s)]) (and m (string->number (car m))))))] @@ -447,8 +451,7 @@ A test case: (queue-callback (lambda () (semaphore-post wait-to-start))) (send d show #t) (when exn - (raise (make-exn:tcp-problem (exn-message exn) - (current-continuation-marks))))) + (raise (make-exn:tcp-problem (exn-message exn) (current-continuation-marks))))) (let ([sema (make-semaphore 0)]) (when (and tmp-plt-filename install?) (run-installer tmp-plt-filename @@ -467,7 +470,8 @@ A test case: (current-continuation-marks)))))] [(or (and (url? url) (not (null? (url-path url))) - (regexp-match "[.]html?$" (car (last-pair (url-path url))))) + (regexp-match #rx"[.]html?$" + (path/param-path (car (last-pair (url-path url)))))) (port? url) html?) ; HTML @@ -475,7 +479,7 @@ A test case: (let* ([directory (or (if (and (url? url) (string=? "file" (url-scheme url))) - (let ([path (url-path url)]) + (let ([path (apply build-path (map path/param-path (url-path url)))]) (let-values ([(base name dir?) (split-path path)]) (if (string? base) base diff --git a/collects/help/private/gui.ss b/collects/help/private/gui.ss index 2a142aa3d9..ec62f1a192 100644 --- a/collects/help/private/gui.ss +++ b/collects/help/private/gui.ss @@ -105,7 +105,7 @@ ;; they will be caught elsewhere. [(and (url-path url) (not (null? (url-path url))) - (regexp-match #rx".plt$" (car (last-pair (url-path url))))) + (regexp-match #rx".plt$" (path/param-path (car (last-pair (url-path url)))))) url] ;; files on download.plt-scheme.org in /doc are considered @@ -119,7 +119,7 @@ (let* ([path (url-path url)] [coll (and (pair? path) (pair? (cdr path)) - (cadr path))] + (path/param-path (cadr path)))] [coll-path (and coll (string->path coll))] [doc-pr (and coll-path (assoc coll-path known-docs))]) @@ -136,7 +136,9 @@ url] ;; send the url off to another browser - [(or (and (preferences:get 'drscheme:help-desk:ask-about-external-urls) + [(or (and (string? (url-scheme url)) + (not (equal? (url-scheme url) "http"))) + (and (preferences:get 'drscheme:help-desk:ask-about-external-urls) (ask-user-about-separate-browser)) (preferences:get 'drscheme:help-desk:separate-browser)) (send-url (url->string url)) @@ -240,7 +242,7 @@ (define (is-download.plt-scheme.org/doc-url? url) (and (equal? "download.plt-scheme.org" (url-host url)) (not (null? (url-path url))) - (equal? (car (url-path url)) "^/doc"))) + (equal? (path/param-path (car (url-path url))) "doc"))) (define (ask-user-about-separate-browser) (define separate-default? (preferences:get 'drscheme:help-desk:separate-browser)) diff --git a/collects/help/private/tcp-intercept.ss b/collects/help/private/tcp-intercept.ss index 602e1263ce..c74ce8258f 100644 --- a/collects/help/private/tcp-intercept.ss +++ b/collects/help/private/tcp-intercept.ss @@ -39,7 +39,8 @@ (url-user url) "" #f - (url-path url) + (url-path-absolute? url) + (url-path url) (url-query url) (url-fragment url)))]) (substring long 3 (string-length long)))] diff --git a/collects/net/doc.txt b/collects/net/doc.txt index 7a0ee9aa7e..a021d10cb8 100644 --- a/collects/net/doc.txt +++ b/collects/net/doc.txt @@ -29,38 +29,31 @@ http://www.ietf.org/rfc/rfc2396.txt TYPES ---------------------------------------------------------------- -> url - struct url (scheme user host port path fragment) - scheme : string or #f - user : string or #f - host : string or #f - port : number or #f - path : (listof (union string path/param)) - query : (listof (cons symbol string)) - fragment : string or #f +_url struct_ + (define-struct url (scheme user host port path-absolute? path query fragment)) +> url-scheme : url -> (union false/c string?) +> url-user : url -> (union false/c string?) +> url-host : url -> (union false/c string?) +> url-port : url -> (union false/c number?) +> url-path-absolute? : url -> boolean? +> url-path : url -> (listof path/param?) +> url-query : url -> (listof (cons/c symbol? string?)) +> url-fragment : url -> (union false/c string?) +> url? : any -> boolean +> make-url : ...as-above.. -> url - The basic structure for all URLs. + The basic structure for all URLs, as explained in rfc3986 + http://www.ietf.org/rfc/rfc3986.txt + +For example, this url: http://sky@www.cs.brown.edu:801/cgi-bin/finger;xyz?name=shriram;host=nw#top - {-1} {2} {----3---------} {4}{---5---------} {6} {----7-------------} {8} + {-1} {2} {----3---------} {4}{---5-------------}{----7-------------} {8} + {6} 1 = scheme, 2 = user, 3 = host, 4 = port, - 5 = path, 6 = param, 7 = query, 8 = fragment - - If the scheme is "file", then the path is a platform-dependent - string. The library does, however, check for the presence of a - fragment designator and, if there is one, separates it from the rest - of the path. If the path is syntactically a directory, the last - string the resulting structure's `path' list is an empty string. - If the path is absolute, the `host' is the root path, otherwise - `host' is #f. - - For non-"file" schemes, the path is a URL path as defined in the - standard. - - If a path segment has a parameter, it is represented with - an instance of the path/param struct. Otherwise, it is - just represented as a string. + 5 = path, 6 = param (or last path segment), + 7 = query, 8 = fragment The strings inside the fields user, path, query, and fragment are represented directly as Scheme strings, ie without @@ -74,16 +67,20 @@ TYPES ---------------------------------------------------------------- An empty string at the end of the list of paths corresponds to a url that ends in a slash. For example, - this url: http://www.drscheme.org/a/ has a path field - '("a" "") and this url: http://www.drscheme.org/a - has a path field '("a"). + this url: http://www.drscheme.org/a/ has a path field with + strings "a" and "" and this url: http://www.drscheme.org/a + has a path field with only the string "a". -> path/param +_ path/param struct_ + (define-struct path/param (path param)) - A pair of strings, accessible with _path/param-path_ and - path/param-params_ that joins a path segment with its - params in a url. The function _path/param?_ recognizes - such pairs. +> path/param-path : path/param -> (union string? (symbols 'up 'same)) +> path/param-param : path/param -> (listof string) +> path/param? : any -> boolean +> make-path/param : (union string? (symbols 'up 'same)) (listof string) -> path/param + + A pair, that joins a path segment with its params in a + url. > pure-port @@ -114,42 +111,9 @@ PROCEDURES ----------------------------------------------------------- Given a base URL and a relative path, combines the two and returns a new URL as per the URL combination specification. Call the arguments base and relative. They are combined according to the - following rules (applied in order until one matches): - - If either argument is an empty URL, the result is the other - argument. - - If relative sports a scheme, then the result is relative. - - If the base has scheme "file", the procedure uses the special rule - specified below. - - If relative specifies a host, then the result is relative. - Failing the above, relative inherit's base's host and port. Then: - - If the path of relative begins with a "/", the result is relative. - - If the path of relative is empty, then relative inherits base's - params and query, and the result is relative. - - Otherwise base and relative are combined as per the standard - specification of merging and normalization. + rules in rfc3986 (above). - On combining "file" schemes: - - If the base has scheme "file", relative is treated as a - slash-separated path (unless it contains an empty path --- only - params, queries, and fragments --- in which case the path is not - used). These path fragments are combined using build-path, starting - with the base's path. Three path segments are special: ".." - corresponds to an 'up directive to build-path, while "." and "" - correspond to 'same. As a consequence, if relative begins with "/", - this does not make it an absolute URL: the leading slash is treated - as if the initial segment is "", so this has no effect, and base's - path remains the base path of the result. If base refers to a - directory, relative is indexed from that directory; if base refers - to a file, relative is indexed from the directory containing the - file. Note that if base does not refer to an actual directory that - exists on the filesystem, then it must syntactically be a directory - as understood by split-path. - - The above algorithm tests for the presence of a directory to - correctly combine paths. As a result, it can raise any exception - raised by directory-exists?. None of these exceptions is trapped by - the procedure; consumers must be prepared for them. + This function does not raise any exceptions. > (netscape/string->url string) -> url diff --git a/collects/net/uri-codec-sig.ss b/collects/net/uri-codec-sig.ss index f0aed959e9..2d13a558cf 100644 --- a/collects/net/uri-codec-sig.ss +++ b/collects/net/uri-codec-sig.ss @@ -5,6 +5,8 @@ (define-signature net:uri-codec^ (uri-encode uri-decode + uri-path-segment-encode + uri-path-segment-decode form-urlencoded-encode form-urlencoded-decode alist->form-urlencoded diff --git a/collects/net/uri-codec-unit.ss b/collects/net/uri-codec-unit.ss index 5c141f5127..d833fb3da5 100644 --- a/collects/net/uri-codec-unit.ss +++ b/collects/net/uri-codec-unit.ss @@ -1,3 +1,7 @@ +;; 1/2/2006: Added a mapping for uri path segments +;; that allows more characters to remain decoded +;; -robby + ;;; ;;; ---- En/Decode URLs and form-urlencoded data ;;; Time-stamp: <03/04/25 10:31:31 noel> @@ -75,6 +79,7 @@ (lib "match.ss") (lib "string.ss") (lib "etc.ss") + (lib "list.ss") "uri-codec-sig.ss") (provide uri-codec@) @@ -109,13 +114,19 @@ ;; Characters that sometimes map to themselves (define safe-mapping - (map (lambda (char) - (cons char char)) + (map (lambda (char) (cons char char)) '(#\- #\_ #\. #\! #\~ #\* #\' #\( #\)))) ;; The strict URI mapping (define uri-mapping - (append alphanumeric-mapping safe-mapping)) + (append alphanumeric-mapping + safe-mapping)) + + ;; The uri path segment mapping from RFC 3986 + (define uri-path-segment-mapping + (append alphanumeric-mapping + safe-mapping + (map (λ (c) (cons c c)) (string->list "@+,=$&:")))) ;; The form-urlencoded mapping (define form-urlencoded-mapping @@ -156,6 +167,10 @@ (define-values (uri-encoding-vector uri-decoding-vector) (make-codec-tables uri-mapping)) + + (define-values (uri-path-segment-encoding-vector + uri-path-segment-decoding-vector) + (make-codec-tables uri-path-segment-mapping)) (define-values (form-urlencoded-encoding-vector form-urlencoded-decoding-vector) @@ -198,7 +213,15 @@ ;; string -> string (define (uri-decode str) (decode uri-decoding-vector str)) - + + ;; string -> string + (define (uri-path-segment-encode str) + (encode uri-path-segment-encoding-vector str)) + + ;; string -> string + (define (uri-path-segment-decode str) + (decode uri-path-segment-decoding-vector str)) + ;; string -> string (define (form-urlencoded-encode str) (encode form-urlencoded-encoding-vector str)) diff --git a/collects/net/url-structs.ss b/collects/net/url-structs.ss index ac8d1d6d51..d6feda78af 100644 --- a/collects/net/url-structs.ss +++ b/collects/net/url-structs.ss @@ -1,7 +1,7 @@ (module url-structs mzscheme (require (lib "contract.ss")) - (define-struct url (scheme user host port path query fragment)) + (define-struct url (scheme user host port path-absolute? path query fragment)) (define-struct path/param (path param)) (provide/contract @@ -9,8 +9,9 @@ [user (union false/c string?)] [host (union false/c string?)] [port (union false/c number?)] - [path (listof (union string? path/param?))] + [path-absolute? boolean?] + [path (listof path/param?)] [query (listof (cons/c symbol? string?))] [fragment (union false/c string?)])) - (struct path/param ([path string?] - [param string?])))) \ No newline at end of file + (struct path/param ([path (union string? (symbols 'up 'same))] + [param (listof string?)])))) diff --git a/collects/net/url-unit.ss b/collects/net/url-unit.ss index b43ff6dd19..06d70942af 100644 --- a/collects/net/url-unit.ss +++ b/collects/net/url-unit.ss @@ -13,7 +13,8 @@ (require (lib "file.ss") (lib "unitsig.ss") (lib "port.ss") - "url-structs.ss" + (lib "string.ss") + "url-structs.ss" "uri-codec.ss" "url-sig.ss" "tcp-sig.ss") @@ -64,7 +65,9 @@ (define (url->file-path url) (path->string (apply build-path (or (url-host url) 'same) - (map (lambda (x) (if (equal? x "") 'same x)) (url-path url))))) + (map (lambda (x) (if (equal? x "") 'same x)) + (map path/param-path + (url-path url)))))) (define url->string (lambda (url) @@ -83,13 +86,18 @@ (string-append "#" fragment)))) (else (let ((sa string-append)) - (sa (if scheme (sa scheme "://") "") - (if user (sa (uri-encode user) "@") "") - (if host host "") - (if port (sa ":" (number->string port)) "") - ; There used to be a "/" here, but that causes an - ; extra leading slash -- wonder why it ever worked! - (combine-path-strings path) + (sa (if scheme (sa scheme ":") "") + (if (or user host port) + (sa + "//" + (if user (sa (uri-encode user) "@") "") + (if host host "") + (if port (sa ":" (number->string port)) "") + ; There used to be a "/" here, but that causes an + ; extra leading slash -- wonder why it ever worked! + ) + "") + (combine-path-strings (url-path-absolute? url) path) ;(if query (sa "?" (uri-encode query)) "") (if (null? query) "" (sa "?" (alist->form-urlencoded query))) (if fragment (sa "#" (uri-encode fragment)) "")))))))) @@ -126,7 +134,8 @@ (url->string (if proxy url - (make-url #f #f #f #f + (make-url #f #f #f #f + (url-path-absolute? url) (url-path url) (url-query url) (url-fragment url)))))) @@ -223,62 +232,95 @@ (not (url-fragment url)) (null? (url-path url))))) - (define (combine-url/relative base string) - (let ([relative (string->url string)]) + + ;; transliteration of code in rfc 3986, section 5.2.2 + (define (combine-url/relative Base string) + (let ([R (string->url string)] + [T (make-url #f #f #f #f #f '() '() #f)]) + (if (url-scheme R) + (begin + (set-url-scheme! T (url-scheme R)) + (set-url-user! T (url-user R)) ;; authority + (set-url-host! T (url-host R)) ;; authority + (set-url-port! T (url-port R)) ;; authority + (set-url-path-absolute?! T (url-path-absolute? R)) + (set-url-path! T (remove-dot-segments (url-path R))) + (set-url-query! T (url-query R))) + (begin + (if (url-host R) ;; => authority is defined + (begin + (set-url-user! T (url-user R)) ;; authority + (set-url-host! T (url-host R)) ;; authority + (set-url-port! T (url-port R)) ;; authority + (set-url-path-absolute?! T (url-path-absolute? R)) + (set-url-path! T (remove-dot-segments (url-path R))) + (set-url-query! T (url-query R))) + (begin + (if (null? (url-path R)) ;; => R has empty path + (begin + (set-url-path-absolute?! T (url-path-absolute? Base)) + (set-url-path! T (url-path Base)) + (if (not (null? (url-query R))) + (set-url-query! T (url-query R)) + (set-url-query! T (url-query Base)))) + (begin + (cond + [(url-path-absolute? R) + (set-url-path-absolute?! T #t) + (set-url-path! T (remove-dot-segments (url-path R)))] + [(and (null? (url-path Base)) + (url-host Base)) + (set-url-path-absolute?! T #t) + (set-url-path! T (remove-dot-segments (url-path R)))] + [else + (set-url-path-absolute?! T (url-path-absolute? Base)) + (set-url-path! T (remove-dot-segments + (append (all-but-last (url-path Base)) + (url-path R))))]) + (set-url-query! T (url-query R)))) + (set-url-user! T (url-user Base)) ;; authority + (set-url-host! T (url-host Base)) ;; authority + (set-url-port! T (url-port Base)))) ;; authority + (set-url-scheme! T (url-scheme Base)))) + (set-url-fragment! T (url-fragment R)) + T)) + + (define (all-but-last lst) + (cond + [(null? lst) null] + [(null? (cdr lst)) null] + [else (cons (car lst) (all-but-last (cdr lst)))])) + + ;; cribbed from 5.2.4 in rfc 3986 + ;; the strange cases 2 and 4 implicitly change urls + ;; with paths segments "." and ".." at the end + ;; into "./" and "../" respectively + (define (remove-dot-segments path) + (let loop ([path path] + [result '()]) (cond - [(empty-url? base) ; Step 1 - relative] - [(empty-url? relative) ; Step 2a - base] - [(url-scheme relative) ; Step 2b - relative] - [else ; Step 2c - (set-url-scheme! relative (url-scheme base)) - (cond - [(url-host relative) ; Step 3 - relative] - [else - (set-url-host! relative (url-host base)) - (set-url-port! relative (url-port base)) ; Unspecified! - (let ([rel-path (url-path relative)]) - (cond - [(and (not (equal? string "")) ; Step 4 - (char=? #\/ (string-ref string 0))) - relative] - [(or (not rel-path) ; Step 5 - (null? rel-path)) - (set-url-path! relative (url-path base)) - (when (url-query relative) - (set-url-query! relative (url-query base))) - relative] - [else ; Step 6 - (merge-and-normalize - (url-path base) relative)]))])]))) - - (define (merge-and-normalize base-path relative-url) - (let* ([joined - (let loop ([base-path base-path]) - (cond - [(null? base-path) (url-path relative-url)] - [(null? (cdr base-path)) (url-path relative-url)] - [else (cons (car base-path) (loop (cdr base-path)))]))] - [reversed/simplified - (if (null? joined) - null - (let loop ([segs (reverse joined)]) - (cond - [(null? segs) null] - [else (let ([fst (car segs)]) - (cond - [(string=? fst ".") - (loop (cdr segs))] - [(string=? fst "..") - (if (null? (cdr segs)) - segs - (loop (cddr segs)))] - [else (cons (car segs) (loop (cdr segs)))]))])))]) - (set-url-path! relative-url (reverse reversed/simplified)) - relative-url)) + [(null? path) (reverse result)] + [(and (eq? (path/param-path (car path)) 'same) + (null? (cdr path))) + (loop (cdr path) + (cons (make-path/param "" '()) result))] + [(eq? (path/param-path (car path)) 'same) + (loop (cdr path) + result)] + [(and (eq? (path/param-path (car path)) 'up) + (null? (cdr path)) + (not (null? result))) + (loop (cdr path) + (cons (make-path/param "" '()) (cdr result)))] + [(and (eq? (path/param-path (car path)) 'up) + (not (null? result))) + (loop (cdr path) (cdr result))] + [(and (eq? (path/param-path (car path)) 'up) + (null? result)) + ;; when we go up too far, just drop the "up"s. + (loop (cdr path) result)] + [else + (loop (cdr path) (cons (car path) result))]))) ;; call/input-url : url x (url -> in-port) x (in-port -> T) ;; [x list (str)] -> T @@ -363,9 +405,10 @@ #f ; user (and root (path->string root)) ; host #f ; port - (append (map path->string elems) + (absolute-path? path) + (append (map (λ (x) (make-path/param (path->string x) '())) elems) (if (eq? kind 'dir) - '("") + (list (make-path/param "" '())) null)) '() ; query fragment)) @@ -383,20 +426,27 @@ (get-num (lambda (pos skip-left skip-right) (let ((s (get-str pos skip-left skip-right))) (if s (string->number s) #f)))) - (host (get-str 5 0 0))) - (make-url (get-str 2 0 1) ; scheme + (host (get-str 5 0 0)) + (path (get-str 7 0 0)) + (scheme (get-str 2 0 1))) + (when (string? scheme) (string-lowercase! scheme)) + (when (string? host) (string-lowercase! host)) + (make-url scheme (uri-decode/maybe (get-str 4 0 1)) ; user host (get-num 6 1 0) ; port + (and (not (= 0 (string-length path))) + (char=? #\/ (string-ref path 0))) (separate-path-strings - (let ([path (get-str 7 0 0)]) - ;; If path is "" and the input is an absolute URL - ;; with a hostname, then the intended path is "/", - ;; but the URL is missing a "/" at the end. - (if (and (string=? path "") - host) - "/" - path))) + ;; If path is "" and the input is an absolute URL + ;; with a hostname, then the intended path is "/", + ;; but the URL is missing a "/" at the end. + path + #; + (if (and (string=? path "") + host) + "/" + path)) ;(uri-decode/maybe (get-str 8 1 0)) ; ;query (let ([q (get-str 8 1 0)]) @@ -411,7 +461,7 @@ ;; in an attempt to be "friendly" (and f (uri-decode (regexp-replace* "%([^0-9a-fA-F])" f "%25\\1")))) - ;; separate-path-strings : string[starting with /] -> (listof (union string path/param)) + ;; separate-path-strings : string[starting with /] -> (listof path/param) (define (separate-path-strings str) (cond [(string=? str "") '()] @@ -423,31 +473,57 @@ [(regexp-match #rx"([^/]*)/(.*)$" str) => (lambda (m) - (cons (maybe-separate-params (cadr m)) (loop (caddr m))))] - [else (list (maybe-separate-params str))]))])) + (cons (separate-params (cadr m)) (loop (caddr m))))] + [else (list (separate-params str))]))])) - (define (maybe-separate-params s) + (define (separate-params s) + (let ([lst (map path-segment-decode (regexp-split #rx";" s))]) + (make-path/param (car lst) (cdr lst)))) + + (define (path-segment-decode p) (cond - [(regexp-match #rx"^([^;]*);(.*)$" s) - => - (lambda (m) - (make-path/param (cadr m) (caddr m)))] - [else s])) + [(string=? p "..") 'up] + [(string=? p ".") 'same] + [else (uri-path-segment-decode p)])) + + (define (path-segment-encode p) + (cond + [(eq? p 'up) ".."] + [(eq? p 'same) "."] + [(equal? p "..") "%2e%2e"] + [(equal? p ".") "%2e"] + [else (uri-path-segment-encode p)])) + + (define (combine-path-strings absolute? path/params) + (cond + [(null? path/params) ""] + [else + (apply + string-append + (if absolute? "/" "") + (add-between + "/" + (map join-params path/params)))])) - (define (combine-path-strings strs) - (apply + (define (join-params s) + (apply string-append - (let loop ([strs strs]) - (cond - [(null? strs) '()] - [else (list* "/" - (maybe-join-params (car strs)) - (loop (cdr strs)))])))) - - ;; needs to unquote things! - (define (maybe-join-params s) + (add-between ";" + (map + path-segment-encode + (cons (path/param-path s) + (path/param-param s)))))) + + (define (add-between bet lst) (cond - [(string? s) s] - [else (string-append (path/param-path s) - ";" - (path/param-param s))]))))) + [(null? lst) null] + [(null? (cdr lst)) lst] + [else + (let loop ([fst (car lst)] + [lst (cdr lst)]) + (cond + [(null? lst) (list fst)] + [else (list* fst + bet + (loop (car lst) + (cdr lst)))]))]))))) diff --git a/collects/net/url.ss b/collects/net/url.ss index 33b1bc28e1..9c9400cf74 100644 --- a/collects/net/url.ss +++ b/collects/net/url.ss @@ -21,6 +21,7 @@ user host port + path-absolute? path query fragment)) diff --git a/collects/tests/mzscheme/net.ss b/collects/tests/mzscheme/net.ss index e29af77c18..7462896b19 100644 --- a/collects/tests/mzscheme/net.ss +++ b/collects/tests/mzscheme/net.ss @@ -140,11 +140,12 @@ (let () (define (test-s->u vec str) - (define (string->url/vec str) (url->vec (string->url str))) - (define (url/vec->string vec) (url->string (vec->url vec))) (test vec string->url/vec str) (test str url/vec->string vec)) + (define (string->url/vec str) (url->vec (string->url str))) + (define (url/vec->string vec) (url->string (vec->url vec))) + (define (test-c-u/r expected base relative) (define (combine-url/relative-vec x y) (url->vec (combine-url/relative (vec->url x) y))) @@ -156,82 +157,104 @@ (vector-ref vec 1) (vector-ref vec 2) (vector-ref vec 3) - (map (lambda (x) (if (string? x) - x - (make-path/param (vector-ref x 0) (vector-ref x 1)))) - (vector-ref vec 4)) - (vector-ref vec 5) - (vector-ref vec 6))) + (vector-ref vec 4) + (map (lambda (x) + (let ([lst (vector->list x)]) + (make-path/param (car lst) (cdr lst)))) + (vector-ref vec 5)) + (vector-ref vec 6) + (vector-ref vec 7))) (define (url->vec url) (vector (url-scheme url) (url-user url) (url-host url) (url-port url) - (map (lambda (x) (if (string? x) - x - (vector (path/param-path x) (path/param-param x)))) + (url-path-absolute? url) + (map (lambda (x) (list->vector (cons (path/param-path x) (path/param-param x)))) (url-path url)) (url-query url) (url-fragment url))) - (test-s->u (vector #f #f #f #f '("") '() #f) + (test-s->u (vector #f #f #f #f #t '(#("")) '() #f) "/") - (test-s->u (vector #f #f #f #f '() '() #f) + (test-s->u (vector #f #f #f #f #f '() '() #f) "") - (test-s->u (vector "http" #f "www.drscheme.org" #f '("") '() #f) + (test-s->u (vector "http" #f "www.drscheme.org" #f #f '() '() #f) + "http://www.drscheme.org") + (test-s->u (vector "http" #f "www.drscheme.org" #f #t '(#("")) '() #f) "http://www.drscheme.org/") - (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '() #f) + + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '() #f) "http://www.drscheme.org/a/b/c") - (test-s->u (vector "http" "robby" "www.drscheme.org" #f (list "a" "b" "c") '() #f) + (test-s->u (vector "http" "robby" "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '() #f) "http://robby@www.drscheme.org/a/b/c") - (test-s->u (vector "http" #f "www.drscheme.org" 8080 (list "a" "b" "c") '() #f) + (test-s->u (vector "http" #f "www.drscheme.org" 8080 #t (list #("a") #("b") #("c")) '() #f) "http://www.drscheme.org:8080/a/b/c") - (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '() "joe") + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '() "joe") "http://www.drscheme.org/a/b/c#joe") - (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tim . "")) #f) + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tim . "")) #f) "http://www.drscheme.org/a/b/c?tim=") - (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tim . "")) "joe") + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tim . "")) "joe") "http://www.drscheme.org/a/b/c?tim=#joe") - (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tim . "tim")) "joe") + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tim . "tim")) "joe") "http://www.drscheme.org/a/b/c?tim=tim#joe") - (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tam . "tom")) "joe") + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tam . "tom")) "joe") "http://www.drscheme.org/a/b/c?tam=tom#joe") - (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tam . "tom") (pam . "pom")) "joe") + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tam . "tom") (pam . "pom")) "joe") "http://www.drscheme.org/a/b/c?tam=tom;pam=pom#joe") (parameterize ([current-alist-separator-mode 'semi]) - (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tam . "tom") (pam . "pom")) "joe") + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tam . "tom") (pam . "pom")) "joe") "http://www.drscheme.org/a/b/c?tam=tom;pam=pom#joe")) (parameterize ([current-alist-separator-mode 'amp]) - (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tam . "tom") (pam . "pom")) "joe") + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tam . "tom") (pam . "pom")) "joe") "http://www.drscheme.org/a/b/c?tam=tom&pam=pom#joe")) - (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" #("c" "b")) '() #f) + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c" "b")) '() #f) "http://www.drscheme.org/a/b/c;b") - (test-s->u (vector "http" #f "www.drscheme.org" #f (list #("a" "x") "b" #("c" "b")) '() #f) + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a" "x") #("b") #("c" "b")) '() #f) "http://www.drscheme.org/a;x/b/c;b") ;; test unquoting for % - (test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((ti#m . "")) "jo e") + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((ti#m . "")) "jo e") "http://www.drscheme.org/a/b/c?ti%23m=#jo%20e") - (test-s->u (vector "http" #f "www.drscheme.org" #f (list #("a " " a") " b " " c ") '() #f) - "http://www.drscheme.org/a ; a/ b / c ") - (test-s->u (vector "http" "robb y" "www.drscheme.org" #f '("") '() #f) + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a " " a") #(" b ") #(" c ")) '() #f) + "http://www.drscheme.org/a%20;%20a/%20b%20/%20c%20") + (test-s->u (vector "http" "robb y" "www.drscheme.org" #f #t '(#("")) '() #f) "http://robb%20y@www.drscheme.org/") + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("%a") #("b/") #("c")) '() #f) + "http://www.drscheme.org/%25a/b%2f/c") + + ;; test the characters that need to be encoded in paths vs those that do not need to + ;; be encoded in paths + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a:@!$&'()*+,=z") #("/?#[];") #("")) '() #f) + "http://www.drscheme.org/a:@!$&'()*+,=z/%2f%3f%23%5b%5d%3b/") + + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #(".") #("..") '#(same) '#(up) #("...") #("abc.def")) '() #f) + "http://www.drscheme.org/%2e/%2e%2e/./../.../abc.def") + (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("." "") #(".." "") #(same "") #(up "") #("..." "") #("abc.def" "")) '() #f) + "http://www.drscheme.org/%2e;/%2e%2e;/.;/..;/...;/abc.def;") - (test-s->u (vector "mailto" #f #f #f '("robby@plt-scheme.org") () #f) + + (test (vector "http" "ROBBY" "www.drscheme.org" 80 #t '(#("INDEX.HTML" "XXX")) '((T . "P")) "YYY") + string->url/vec + "HTTP://ROBBY@WWW.DRSCHEME.ORG:80/INDEX.HTML;XXX?T=P#YYY") + + (test-s->u (vector "mailto" #f #f #f #f '(#("robby@plt-scheme.org")) '() #f) "mailto:robby@plt-scheme.org") - (let ([empty-url (make-url #f #f #f #f '() '() #f)]) + (let ([empty-url (make-url #f #f #f #f #f '() '() #f)]) (test-c-u/r (string->url "http://www.drscheme.org") empty-url - "http://www.drscheme.org") - (test-c-u/r (string->url "http://www.drscheme.org") - (string->url "http://www.drscheme.org") - "")) + "http://www.drscheme.org")) + + (test-c-u/r (string->url "http://www.drscheme.org") + (string->url "http://www.drscheme.org") + "") (test-c-u/r (string->url "http://www.mzscheme.org") (string->url "http://www.drscheme.org/") "http://www.mzscheme.org") + (test-c-u/r (string->url "http://www.drscheme.org/index.html") (string->url "http://www.drscheme.org/") "index.html") @@ -253,13 +276,95 @@ (test-c-u/r (string->url "http://www.drscheme.org/a/b/c/d/index.html") (string->url "http://www.drscheme.org/a/b/c/") "d/index.html") + (test-c-u/r (string->url "http://www.drscheme.org/a/b/index.html") + (string->url "http://www.drscheme.org/a/b/c/") + "../index.html") + (test-c-u/r (string->url "http://www.drscheme.org/a/b/c/index.html") + (string->url "http://www.drscheme.org/a/b/c/") + "./index.html") + (test-c-u/r (string->url "http://www.drscheme.org/a/b/c/%2e%2e/index.html") + (string->url "http://www.drscheme.org/a/b/c/") + "%2e%2e/index.html") + (test-c-u/r (string->url "http://www.drscheme.org/a/index.html") + (string->url "http://www.drscheme.org/a/b/../c/") + "../index.html") + (test-c-u/r (string->url "http://www.drscheme.org/a/b/c/d/index.html") + (string->url "http://www.drscheme.org/a/b/c/d/index.html#ghijkl") + "index.html") + (test-c-u/r (string->url "http://www.drscheme.org/a/b/c/d/index.html#abcdef") + (string->url "http://www.drscheme.org/a/b/c/d/index.html#ghijkl") + "#abcdef") + (test-c-u/r (string->url "file:///a/b/c/d/index.html") (string->url "file:///a/b/c/") "d/index.html") (test-c-u/r (string->url "file:///a/b/d/index.html") (string->url "file:///a/b/c") "d/index.html") + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; + ;; tests from rfc 3986 + ;; + + (for-each + (λ (line) + (test-c-u/r (string->url (caddr line)) + (string->url "http://a/b/c/d;p?q") + (car line))) + '(("g:h" = "g:h") + ("g" = "http://a/b/c/g") + ("./g" = "http://a/b/c/g") + ("g/" = "http://a/b/c/g/") + ("/g" = "http://a/g") + ("//g" = "http://g") + ("?y" = "http://a/b/c/d;p?y") + ("g?y" = "http://a/b/c/g?y") + ("#s" = "http://a/b/c/d;p?q#s") + ("g#s" = "http://a/b/c/g#s") + ("g?y#s" = "http://a/b/c/g?y#s") + (";x" = "http://a/b/c/;x") + ("g;x" = "http://a/b/c/g;x") + ("g;x?y#s" = "http://a/b/c/g;x?y#s") + ("" = "http://a/b/c/d;p?q") + ("." = "http://a/b/c/") + ("./" = "http://a/b/c/") + (".." = "http://a/b/") + ("../" = "http://a/b/") + ("../g" = "http://a/b/g") + ("../.." = "http://a/") + ("../../" = "http://a/") + ("../../g" = "http://a/g") + + ;; abnormal examples follow + + ("../../../g" = "http://a/g") + ("../../../../g" = "http://a/g") + + ("/./g" = "http://a/g") + ("/../g" = "http://a/g") + ("g." = "http://a/b/c/g.") + (".g" = "http://a/b/c/.g") + ("g.." = "http://a/b/c/g..") + ("..g" = "http://a/b/c/..g") + + ("./../g" = "http://a/b/g") + ("./g/." = "http://a/b/c/g/") + ("g/./h" = "http://a/b/c/g/h") + ("g/../h" = "http://a/b/c/h") + ("g;x=1/./y" = "http://a/b/c/g;x=1/y") + ("g;x=1/../y" = "http://a/b/c/y") + + ("g?y/./x" = "http://a/b/c/g?y/./x") + ("g?y/../x" = "http://a/b/c/g?y/../x") + ("g#s/./x" = "http://a/b/c/g#s/./x") + ("g#s/../x" = "http://a/b/c/g#s/../x") + ("http:g" = "http:g") ; for strict parsers + + )) + + ) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/collects/web-server/servlet-tables.ss b/collects/web-server/servlet-tables.ss index 2342799d17..ee5dcc8495 100644 --- a/collects/web-server/servlet-tables.ss +++ b/collects/web-server/servlet-tables.ss @@ -141,10 +141,11 @@ ;; url->param: url -> (union string #f) (define (url->param a-url) - (let ([l (filter path/param? (url-path a-url))]) + (let ([l (filter (λ (x) (not (null? (path/param-param x)))) + (url-path a-url))]) (and (not (null? l)) - (path/param-param (car l))))) - + (car (path/param-param (car l)))))) + ;; insert-param: url string -> string ;; add a path/param to the path in a url ;; (assumes that there is only one path/param) diff --git a/collects/web-server/util.ss b/collects/web-server/util.ss index f279bdb006..a2e8912893 100644 --- a/collects/web-server/util.ss +++ b/collects/web-server/util.ss @@ -292,21 +292,5 @@ (cond ((char=? first #\+) (values #\space rest)) - ((char=? first #\%) - ; MF: I rewrote this code so that Spidey could eliminate all checks. - ; I am more confident this way that this hairy expression doesn't barf. - (if (pair? rest) - (let ([rest-rest (cdr rest)]) - (if (pair? rest-rest) - (values (integer->char - (or (string->number (string (car rest) (car rest-rest)) 16) - (raise (make-invalid-%-suffix - (if (string->number (string (car rest)) 16) - (car rest-rest) - (car rest)))))) - (cdr rest-rest)) - (raise (make-incomplete-%-suffix rest)))) - (raise (make-incomplete-%-suffix rest)))) (else (values first rest))))) - (cons this (loop rest)))))))) - ) + (cons this (loop rest)))))))))