fixed url code in various ways

svn: r1752
This commit is contained in:
Robby Findler 2006-01-03 14:02:25 +00:00
parent 35fa1e0b26
commit c6992e0307
13 changed files with 414 additions and 251 deletions

View File

@ -234,8 +234,7 @@
(copy-port ip op))))) (copy-port ip op)))))
'truncate) 'truncate)
(pop-status) (pop-status)
(let* ([upath (url-path url)] (let ([bitmap (make-object bitmap% tmp-filename)])
[bitmap (make-object bitmap% tmp-filename)])
(with-handlers ([exn:fail? (with-handlers ([exn:fail?
(lambda (x) (lambda (x)
(message-box "Warning" (message-box "Warning"

View File

@ -73,7 +73,8 @@ A test case:
;; assume that url-paths are all strings ;; assume that url-paths are all strings
;; (other wise the pages are treated as different) ;; (other wise the pages are treated as different)
(equal? (url-path a) (url-path b)) (equal? (map path/param-path (url-path a))
(map path/param-path (url-path b)))
(equal? (url-query a) (url-query b))))) (equal? (url-query a) (url-query b)))))
@ -108,7 +109,8 @@ A test case:
(normal-case-path (normalize-path (build-path (collection-path "mzlib") (normal-case-path (normalize-path (build-path (collection-path "mzlib")
'up 'up
'up))) 'up)))
(normal-case-path (normalize-path (apply build-path (url-path url))))))] (normal-case-path (normalize-path (apply build-path
(map path/param-path (url-path url)))))))]
[else (inner #f url-allows-evaling? url)])) [else (inner #f url-allows-evaling? url)]))
(define doc-notes null) (define doc-notes null)
@ -330,7 +332,7 @@ A test case:
(let ([p (url-path url)]) (let ([p (url-path url)])
(and (not (null? p)) (and (not (null? p))
(regexp-match #rx"[.][^.]*$" (regexp-match #rx"[.][^.]*$"
(car (last-pair p))))))] (path/param-path (car (last-pair p)))))))]
[html? (or (and mime-type (regexp-match #rx"text/html" mime-type)) [html? (or (and mime-type (regexp-match #rx"text/html" mime-type))
(member path-extension '(".html" ".htm")))] (member path-extension '(".html" ".htm")))]
[text? (or (and mime-type (regexp-match #rx"text/plain" mime-type)) [text? (or (and mime-type (regexp-match #rx"text/plain" mime-type))
@ -349,7 +351,9 @@ A test case:
(let* ([orig-name (and (url? url) (let* ([orig-name (and (url? url)
(let ([p (url-path url)]) (let ([p (url-path url)])
(and (not (null? p)) (and (not (null? p))
(car (last-pair p)))))] (let ([lp (path/param-path (car (last-pair p)))])
(and (not (string=? "" lp))
lp)))))]
[size (let ([s (extract-field "content-length" mime-headers)]) [size (let ([s (extract-field "content-length" mime-headers)])
(and s (let ([m (regexp-match #rx"[0-9]+" s)]) (and s (let ([m (regexp-match #rx"[0-9]+" s)])
(and m (string->number (car m))))))] (and m (string->number (car m))))))]
@ -447,8 +451,7 @@ A test case:
(queue-callback (lambda () (semaphore-post wait-to-start))) (queue-callback (lambda () (semaphore-post wait-to-start)))
(send d show #t) (send d show #t)
(when exn (when exn
(raise (make-exn:tcp-problem (exn-message exn) (raise (make-exn:tcp-problem (exn-message exn) (current-continuation-marks)))))
(current-continuation-marks)))))
(let ([sema (make-semaphore 0)]) (let ([sema (make-semaphore 0)])
(when (and tmp-plt-filename install?) (when (and tmp-plt-filename install?)
(run-installer tmp-plt-filename (run-installer tmp-plt-filename
@ -467,7 +470,8 @@ A test case:
(current-continuation-marks)))))] (current-continuation-marks)))))]
[(or (and (url? url) [(or (and (url? url)
(not (null? (url-path url))) (not (null? (url-path url)))
(regexp-match "[.]html?$" (car (last-pair (url-path url))))) (regexp-match #rx"[.]html?$"
(path/param-path (car (last-pair (url-path url))))))
(port? url) (port? url)
html?) html?)
; HTML ; HTML
@ -475,7 +479,7 @@ A test case:
(let* ([directory (let* ([directory
(or (if (and (url? url) (or (if (and (url? url)
(string=? "file" (url-scheme url))) (string=? "file" (url-scheme url)))
(let ([path (url-path url)]) (let ([path (apply build-path (map path/param-path (url-path url)))])
(let-values ([(base name dir?) (split-path path)]) (let-values ([(base name dir?) (split-path path)])
(if (string? base) (if (string? base)
base base

View File

@ -105,7 +105,7 @@
;; they will be caught elsewhere. ;; they will be caught elsewhere.
[(and (url-path url) [(and (url-path url)
(not (null? (url-path url))) (not (null? (url-path url)))
(regexp-match #rx".plt$" (car (last-pair (url-path url))))) (regexp-match #rx".plt$" (path/param-path (car (last-pair (url-path url))))))
url] url]
;; files on download.plt-scheme.org in /doc are considered ;; files on download.plt-scheme.org in /doc are considered
@ -119,7 +119,7 @@
(let* ([path (url-path url)] (let* ([path (url-path url)]
[coll (and (pair? path) [coll (and (pair? path)
(pair? (cdr path)) (pair? (cdr path))
(cadr path))] (path/param-path (cadr path)))]
[coll-path (and coll (string->path coll))] [coll-path (and coll (string->path coll))]
[doc-pr (and coll-path (assoc coll-path known-docs))]) [doc-pr (and coll-path (assoc coll-path known-docs))])
@ -136,7 +136,9 @@
url] url]
;; send the url off to another browser ;; send the url off to another browser
[(or (and (preferences:get 'drscheme:help-desk:ask-about-external-urls) [(or (and (string? (url-scheme url))
(not (equal? (url-scheme url) "http")))
(and (preferences:get 'drscheme:help-desk:ask-about-external-urls)
(ask-user-about-separate-browser)) (ask-user-about-separate-browser))
(preferences:get 'drscheme:help-desk:separate-browser)) (preferences:get 'drscheme:help-desk:separate-browser))
(send-url (url->string url)) (send-url (url->string url))
@ -240,7 +242,7 @@
(define (is-download.plt-scheme.org/doc-url? url) (define (is-download.plt-scheme.org/doc-url? url)
(and (equal? "download.plt-scheme.org" (url-host url)) (and (equal? "download.plt-scheme.org" (url-host url))
(not (null? (url-path url))) (not (null? (url-path url)))
(equal? (car (url-path url)) "^/doc"))) (equal? (path/param-path (car (url-path url))) "doc")))
(define (ask-user-about-separate-browser) (define (ask-user-about-separate-browser)
(define separate-default? (preferences:get 'drscheme:help-desk:separate-browser)) (define separate-default? (preferences:get 'drscheme:help-desk:separate-browser))

View File

@ -39,7 +39,8 @@
(url-user url) (url-user url)
"" ""
#f #f
(url-path url) (url-path-absolute? url)
(url-path url)
(url-query url) (url-query url)
(url-fragment url)))]) (url-fragment url)))])
(substring long 3 (string-length long)))] (substring long 3 (string-length long)))]

View File

@ -29,38 +29,31 @@ http://www.ietf.org/rfc/rfc2396.txt
TYPES ---------------------------------------------------------------- TYPES ----------------------------------------------------------------
> url _url struct_
struct url (scheme user host port path fragment) (define-struct url (scheme user host port path-absolute? path query fragment))
scheme : string or #f > url-scheme : url -> (union false/c string?)
user : string or #f > url-user : url -> (union false/c string?)
host : string or #f > url-host : url -> (union false/c string?)
port : number or #f > url-port : url -> (union false/c number?)
path : (listof (union string path/param)) > url-path-absolute? : url -> boolean?
query : (listof (cons symbol string)) > url-path : url -> (listof path/param?)
fragment : string or #f > url-query : url -> (listof (cons/c symbol? string?))
> url-fragment : url -> (union false/c string?)
> url? : any -> boolean
> make-url : ...as-above.. -> url
The basic structure for all URLs. The basic structure for all URLs, as explained in rfc3986
http://www.ietf.org/rfc/rfc3986.txt
For example, this url:
http://sky@www.cs.brown.edu:801/cgi-bin/finger;xyz?name=shriram;host=nw#top http://sky@www.cs.brown.edu:801/cgi-bin/finger;xyz?name=shriram;host=nw#top
{-1} {2} {----3---------} {4}{---5---------} {6} {----7-------------} {8} {-1} {2} {----3---------} {4}{---5-------------}{----7-------------} {8}
{6}
1 = scheme, 2 = user, 3 = host, 4 = port, 1 = scheme, 2 = user, 3 = host, 4 = port,
5 = path, 6 = param, 7 = query, 8 = fragment 5 = path, 6 = param (or last path segment),
7 = query, 8 = fragment
If the scheme is "file", then the path is a platform-dependent
string. The library does, however, check for the presence of a
fragment designator and, if there is one, separates it from the rest
of the path. If the path is syntactically a directory, the last
string the resulting structure's `path' list is an empty string.
If the path is absolute, the `host' is the root path, otherwise
`host' is #f.
For non-"file" schemes, the path is a URL path as defined in the
standard.
If a path segment has a parameter, it is represented with
an instance of the path/param struct. Otherwise, it is
just represented as a string.
The strings inside the fields user, path, query, and fragment are The strings inside the fields user, path, query, and fragment are
represented directly as Scheme strings, ie without represented directly as Scheme strings, ie without
@ -74,16 +67,20 @@ TYPES ----------------------------------------------------------------
An empty string at the end of the list of paths An empty string at the end of the list of paths
corresponds to a url that ends in a slash. For example, corresponds to a url that ends in a slash. For example,
this url: http://www.drscheme.org/a/ has a path field this url: http://www.drscheme.org/a/ has a path field with
'("a" "") and this url: http://www.drscheme.org/a strings "a" and "" and this url: http://www.drscheme.org/a
has a path field '("a"). has a path field with only the string "a".
> path/param _ path/param struct_
(define-struct path/param (path param))
A pair of strings, accessible with _path/param-path_ and > path/param-path : path/param -> (union string? (symbols 'up 'same))
path/param-params_ that joins a path segment with its > path/param-param : path/param -> (listof string)
params in a url. The function _path/param?_ recognizes > path/param? : any -> boolean
such pairs. > make-path/param : (union string? (symbols 'up 'same)) (listof string) -> path/param
A pair, that joins a path segment with its params in a
url.
> pure-port > pure-port
@ -114,42 +111,9 @@ PROCEDURES -----------------------------------------------------------
Given a base URL and a relative path, combines the two and returns a Given a base URL and a relative path, combines the two and returns a
new URL as per the URL combination specification. Call the new URL as per the URL combination specification. Call the
arguments base and relative. They are combined according to the arguments base and relative. They are combined according to the
following rules (applied in order until one matches): rules in rfc3986 (above).
- If either argument is an empty URL, the result is the other
argument.
- If relative sports a scheme, then the result is relative.
- If the base has scheme "file", the procedure uses the special rule
specified below.
- If relative specifies a host, then the result is relative.
Failing the above, relative inherit's base's host and port. Then:
- If the path of relative begins with a "/", the result is relative.
- If the path of relative is empty, then relative inherits base's
params and query, and the result is relative.
- Otherwise base and relative are combined as per the standard
specification of merging and normalization.
On combining "file" schemes: This function does not raise any exceptions.
If the base has scheme "file", relative is treated as a
slash-separated path (unless it contains an empty path --- only
params, queries, and fragments --- in which case the path is not
used). These path fragments are combined using build-path, starting
with the base's path. Three path segments are special: ".."
corresponds to an 'up directive to build-path, while "." and ""
correspond to 'same. As a consequence, if relative begins with "/",
this does not make it an absolute URL: the leading slash is treated
as if the initial segment is "", so this has no effect, and base's
path remains the base path of the result. If base refers to a
directory, relative is indexed from that directory; if base refers
to a file, relative is indexed from the directory containing the
file. Note that if base does not refer to an actual directory that
exists on the filesystem, then it must syntactically be a directory
as understood by split-path.
The above algorithm tests for the presence of a directory to
correctly combine paths. As a result, it can raise any exception
raised by directory-exists?. None of these exceptions is trapped by
the procedure; consumers must be prepared for them.
> (netscape/string->url string) -> url > (netscape/string->url string) -> url

View File

@ -5,6 +5,8 @@
(define-signature net:uri-codec^ (define-signature net:uri-codec^
(uri-encode (uri-encode
uri-decode uri-decode
uri-path-segment-encode
uri-path-segment-decode
form-urlencoded-encode form-urlencoded-encode
form-urlencoded-decode form-urlencoded-decode
alist->form-urlencoded alist->form-urlencoded

View File

@ -1,3 +1,7 @@
;; 1/2/2006: Added a mapping for uri path segments
;; that allows more characters to remain decoded
;; -robby
;;; ;;;
;;; <uri-codec-unit.ss> ---- En/Decode URLs and form-urlencoded data ;;; <uri-codec-unit.ss> ---- En/Decode URLs and form-urlencoded data
;;; Time-stamp: <03/04/25 10:31:31 noel> ;;; Time-stamp: <03/04/25 10:31:31 noel>
@ -75,6 +79,7 @@
(lib "match.ss") (lib "match.ss")
(lib "string.ss") (lib "string.ss")
(lib "etc.ss") (lib "etc.ss")
(lib "list.ss")
"uri-codec-sig.ss") "uri-codec-sig.ss")
(provide uri-codec@) (provide uri-codec@)
@ -109,13 +114,19 @@
;; Characters that sometimes map to themselves ;; Characters that sometimes map to themselves
(define safe-mapping (define safe-mapping
(map (lambda (char) (map (lambda (char) (cons char char))
(cons char char))
'(#\- #\_ #\. #\! #\~ #\* #\' #\( #\)))) '(#\- #\_ #\. #\! #\~ #\* #\' #\( #\))))
;; The strict URI mapping ;; The strict URI mapping
(define uri-mapping (define uri-mapping
(append alphanumeric-mapping safe-mapping)) (append alphanumeric-mapping
safe-mapping))
;; The uri path segment mapping from RFC 3986
(define uri-path-segment-mapping
(append alphanumeric-mapping
safe-mapping
(map (λ (c) (cons c c)) (string->list "@+,=$&:"))))
;; The form-urlencoded mapping ;; The form-urlencoded mapping
(define form-urlencoded-mapping (define form-urlencoded-mapping
@ -157,6 +168,10 @@
(define-values (uri-encoding-vector uri-decoding-vector) (define-values (uri-encoding-vector uri-decoding-vector)
(make-codec-tables uri-mapping)) (make-codec-tables uri-mapping))
(define-values (uri-path-segment-encoding-vector
uri-path-segment-decoding-vector)
(make-codec-tables uri-path-segment-mapping))
(define-values (form-urlencoded-encoding-vector (define-values (form-urlencoded-encoding-vector
form-urlencoded-decoding-vector) form-urlencoded-decoding-vector)
(make-codec-tables form-urlencoded-mapping)) (make-codec-tables form-urlencoded-mapping))
@ -199,6 +214,14 @@
(define (uri-decode str) (define (uri-decode str)
(decode uri-decoding-vector str)) (decode uri-decoding-vector str))
;; string -> string
(define (uri-path-segment-encode str)
(encode uri-path-segment-encoding-vector str))
;; string -> string
(define (uri-path-segment-decode str)
(decode uri-path-segment-decoding-vector str))
;; string -> string ;; string -> string
(define (form-urlencoded-encode str) (define (form-urlencoded-encode str)
(encode form-urlencoded-encoding-vector str)) (encode form-urlencoded-encoding-vector str))

View File

@ -1,7 +1,7 @@
(module url-structs mzscheme (module url-structs mzscheme
(require (lib "contract.ss")) (require (lib "contract.ss"))
(define-struct url (scheme user host port path query fragment)) (define-struct url (scheme user host port path-absolute? path query fragment))
(define-struct path/param (path param)) (define-struct path/param (path param))
(provide/contract (provide/contract
@ -9,8 +9,9 @@
[user (union false/c string?)] [user (union false/c string?)]
[host (union false/c string?)] [host (union false/c string?)]
[port (union false/c number?)] [port (union false/c number?)]
[path (listof (union string? path/param?))] [path-absolute? boolean?]
[path (listof path/param?)]
[query (listof (cons/c symbol? string?))] [query (listof (cons/c symbol? string?))]
[fragment (union false/c string?)])) [fragment (union false/c string?)]))
(struct path/param ([path string?] (struct path/param ([path (union string? (symbols 'up 'same))]
[param string?])))) [param (listof string?)]))))

View File

@ -13,7 +13,8 @@
(require (lib "file.ss") (require (lib "file.ss")
(lib "unitsig.ss") (lib "unitsig.ss")
(lib "port.ss") (lib "port.ss")
"url-structs.ss" (lib "string.ss")
"url-structs.ss"
"uri-codec.ss" "uri-codec.ss"
"url-sig.ss" "url-sig.ss"
"tcp-sig.ss") "tcp-sig.ss")
@ -64,7 +65,9 @@
(define (url->file-path url) (define (url->file-path url)
(path->string (path->string
(apply build-path (or (url-host url) 'same) (apply build-path (or (url-host url) 'same)
(map (lambda (x) (if (equal? x "") 'same x)) (url-path url))))) (map (lambda (x) (if (equal? x "") 'same x))
(map path/param-path
(url-path url))))))
(define url->string (define url->string
(lambda (url) (lambda (url)
@ -83,13 +86,18 @@
(string-append "#" fragment)))) (string-append "#" fragment))))
(else (else
(let ((sa string-append)) (let ((sa string-append))
(sa (if scheme (sa scheme "://") "") (sa (if scheme (sa scheme ":") "")
(if user (sa (uri-encode user) "@") "") (if (or user host port)
(if host host "") (sa
(if port (sa ":" (number->string port)) "") "//"
; There used to be a "/" here, but that causes an (if user (sa (uri-encode user) "@") "")
; extra leading slash -- wonder why it ever worked! (if host host "")
(combine-path-strings path) (if port (sa ":" (number->string port)) "")
; There used to be a "/" here, but that causes an
; extra leading slash -- wonder why it ever worked!
)
"")
(combine-path-strings (url-path-absolute? url) path)
;(if query (sa "?" (uri-encode query)) "") ;(if query (sa "?" (uri-encode query)) "")
(if (null? query) "" (sa "?" (alist->form-urlencoded query))) (if (null? query) "" (sa "?" (alist->form-urlencoded query)))
(if fragment (sa "#" (uri-encode fragment)) "")))))))) (if fragment (sa "#" (uri-encode fragment)) ""))))))))
@ -127,6 +135,7 @@
(if proxy (if proxy
url url
(make-url #f #f #f #f (make-url #f #f #f #f
(url-path-absolute? url)
(url-path url) (url-path url)
(url-query url) (url-query url)
(url-fragment url)))))) (url-fragment url))))))
@ -223,62 +232,95 @@
(not (url-fragment url)) (not (url-fragment url))
(null? (url-path url))))) (null? (url-path url)))))
(define (combine-url/relative base string)
(let ([relative (string->url string)])
(cond
[(empty-url? base) ; Step 1
relative]
[(empty-url? relative) ; Step 2a
base]
[(url-scheme relative) ; Step 2b
relative]
[else ; Step 2c
(set-url-scheme! relative (url-scheme base))
(cond
[(url-host relative) ; Step 3
relative]
[else
(set-url-host! relative (url-host base))
(set-url-port! relative (url-port base)) ; Unspecified!
(let ([rel-path (url-path relative)])
(cond
[(and (not (equal? string "")) ; Step 4
(char=? #\/ (string-ref string 0)))
relative]
[(or (not rel-path) ; Step 5
(null? rel-path))
(set-url-path! relative (url-path base))
(when (url-query relative)
(set-url-query! relative (url-query base)))
relative]
[else ; Step 6
(merge-and-normalize
(url-path base) relative)]))])])))
(define (merge-and-normalize base-path relative-url) ;; transliteration of code in rfc 3986, section 5.2.2
(let* ([joined (define (combine-url/relative Base string)
(let loop ([base-path base-path]) (let ([R (string->url string)]
(cond [T (make-url #f #f #f #f #f '() '() #f)])
[(null? base-path) (url-path relative-url)] (if (url-scheme R)
[(null? (cdr base-path)) (url-path relative-url)] (begin
[else (cons (car base-path) (loop (cdr base-path)))]))] (set-url-scheme! T (url-scheme R))
[reversed/simplified (set-url-user! T (url-user R)) ;; authority
(if (null? joined) (set-url-host! T (url-host R)) ;; authority
null (set-url-port! T (url-port R)) ;; authority
(let loop ([segs (reverse joined)]) (set-url-path-absolute?! T (url-path-absolute? R))
(cond (set-url-path! T (remove-dot-segments (url-path R)))
[(null? segs) null] (set-url-query! T (url-query R)))
[else (let ([fst (car segs)]) (begin
(cond (if (url-host R) ;; => authority is defined
[(string=? fst ".") (begin
(loop (cdr segs))] (set-url-user! T (url-user R)) ;; authority
[(string=? fst "..") (set-url-host! T (url-host R)) ;; authority
(if (null? (cdr segs)) (set-url-port! T (url-port R)) ;; authority
segs (set-url-path-absolute?! T (url-path-absolute? R))
(loop (cddr segs)))] (set-url-path! T (remove-dot-segments (url-path R)))
[else (cons (car segs) (loop (cdr segs)))]))])))]) (set-url-query! T (url-query R)))
(set-url-path! relative-url (reverse reversed/simplified)) (begin
relative-url)) (if (null? (url-path R)) ;; => R has empty path
(begin
(set-url-path-absolute?! T (url-path-absolute? Base))
(set-url-path! T (url-path Base))
(if (not (null? (url-query R)))
(set-url-query! T (url-query R))
(set-url-query! T (url-query Base))))
(begin
(cond
[(url-path-absolute? R)
(set-url-path-absolute?! T #t)
(set-url-path! T (remove-dot-segments (url-path R)))]
[(and (null? (url-path Base))
(url-host Base))
(set-url-path-absolute?! T #t)
(set-url-path! T (remove-dot-segments (url-path R)))]
[else
(set-url-path-absolute?! T (url-path-absolute? Base))
(set-url-path! T (remove-dot-segments
(append (all-but-last (url-path Base))
(url-path R))))])
(set-url-query! T (url-query R))))
(set-url-user! T (url-user Base)) ;; authority
(set-url-host! T (url-host Base)) ;; authority
(set-url-port! T (url-port Base)))) ;; authority
(set-url-scheme! T (url-scheme Base))))
(set-url-fragment! T (url-fragment R))
T))
(define (all-but-last lst)
(cond
[(null? lst) null]
[(null? (cdr lst)) null]
[else (cons (car lst) (all-but-last (cdr lst)))]))
;; cribbed from 5.2.4 in rfc 3986
;; the strange cases 2 and 4 implicitly change urls
;; with paths segments "." and ".." at the end
;; into "./" and "../" respectively
(define (remove-dot-segments path)
(let loop ([path path]
[result '()])
(cond
[(null? path) (reverse result)]
[(and (eq? (path/param-path (car path)) 'same)
(null? (cdr path)))
(loop (cdr path)
(cons (make-path/param "" '()) result))]
[(eq? (path/param-path (car path)) 'same)
(loop (cdr path)
result)]
[(and (eq? (path/param-path (car path)) 'up)
(null? (cdr path))
(not (null? result)))
(loop (cdr path)
(cons (make-path/param "" '()) (cdr result)))]
[(and (eq? (path/param-path (car path)) 'up)
(not (null? result)))
(loop (cdr path) (cdr result))]
[(and (eq? (path/param-path (car path)) 'up)
(null? result))
;; when we go up too far, just drop the "up"s.
(loop (cdr path) result)]
[else
(loop (cdr path) (cons (car path) result))])))
;; call/input-url : url x (url -> in-port) x (in-port -> T) ;; call/input-url : url x (url -> in-port) x (in-port -> T)
;; [x list (str)] -> T ;; [x list (str)] -> T
@ -363,9 +405,10 @@
#f ; user #f ; user
(and root (path->string root)) ; host (and root (path->string root)) ; host
#f ; port #f ; port
(append (map path->string elems) (absolute-path? path)
(append (map (λ (x) (make-path/param (path->string x) '())) elems)
(if (eq? kind 'dir) (if (eq? kind 'dir)
'("") (list (make-path/param "" '()))
null)) null))
'() ; query '() ; query
fragment)) fragment))
@ -383,20 +426,27 @@
(get-num (lambda (pos skip-left skip-right) (get-num (lambda (pos skip-left skip-right)
(let ((s (get-str pos skip-left skip-right))) (let ((s (get-str pos skip-left skip-right)))
(if s (string->number s) #f)))) (if s (string->number s) #f))))
(host (get-str 5 0 0))) (host (get-str 5 0 0))
(make-url (get-str 2 0 1) ; scheme (path (get-str 7 0 0))
(scheme (get-str 2 0 1)))
(when (string? scheme) (string-lowercase! scheme))
(when (string? host) (string-lowercase! host))
(make-url scheme
(uri-decode/maybe (get-str 4 0 1)) ; user (uri-decode/maybe (get-str 4 0 1)) ; user
host host
(get-num 6 1 0) ; port (get-num 6 1 0) ; port
(and (not (= 0 (string-length path)))
(char=? #\/ (string-ref path 0)))
(separate-path-strings (separate-path-strings
(let ([path (get-str 7 0 0)]) ;; If path is "" and the input is an absolute URL
;; If path is "" and the input is an absolute URL ;; with a hostname, then the intended path is "/",
;; with a hostname, then the intended path is "/", ;; but the URL is missing a "/" at the end.
;; but the URL is missing a "/" at the end. path
(if (and (string=? path "") #;
host) (if (and (string=? path "")
"/" host)
path))) "/"
path))
;(uri-decode/maybe (get-str 8 1 0)) ; ;(uri-decode/maybe (get-str 8 1 0)) ;
;query ;query
(let ([q (get-str 8 1 0)]) (let ([q (get-str 8 1 0)])
@ -411,7 +461,7 @@
;; in an attempt to be "friendly" ;; in an attempt to be "friendly"
(and f (uri-decode (regexp-replace* "%([^0-9a-fA-F])" f "%25\\1")))) (and f (uri-decode (regexp-replace* "%([^0-9a-fA-F])" f "%25\\1"))))
;; separate-path-strings : string[starting with /] -> (listof (union string path/param)) ;; separate-path-strings : string[starting with /] -> (listof path/param)
(define (separate-path-strings str) (define (separate-path-strings str)
(cond (cond
[(string=? str "") '()] [(string=? str "") '()]
@ -423,31 +473,57 @@
[(regexp-match #rx"([^/]*)/(.*)$" str) [(regexp-match #rx"([^/]*)/(.*)$" str)
=> =>
(lambda (m) (lambda (m)
(cons (maybe-separate-params (cadr m)) (loop (caddr m))))] (cons (separate-params (cadr m)) (loop (caddr m))))]
[else (list (maybe-separate-params str))]))])) [else (list (separate-params str))]))]))
(define (maybe-separate-params s) (define (separate-params s)
(let ([lst (map path-segment-decode (regexp-split #rx";" s))])
(make-path/param (car lst) (cdr lst))))
(define (path-segment-decode p)
(cond (cond
[(regexp-match #rx"^([^;]*);(.*)$" s) [(string=? p "..") 'up]
=> [(string=? p ".") 'same]
(lambda (m) [else (uri-path-segment-decode p)]))
(make-path/param (cadr m) (caddr m)))]
[else s]))
(define (combine-path-strings strs) (define (path-segment-encode p)
(cond
[(eq? p 'up) ".."]
[(eq? p 'same) "."]
[(equal? p "..") "%2e%2e"]
[(equal? p ".") "%2e"]
[else (uri-path-segment-encode p)]))
(define (combine-path-strings absolute? path/params)
(cond
[(null? path/params) ""]
[else
(apply
string-append
(if absolute? "/" "")
(add-between
"/"
(map join-params path/params)))]))
(define (join-params s)
(apply (apply
string-append string-append
(let loop ([strs strs]) (add-between ";"
(cond (map
[(null? strs) '()] path-segment-encode
[else (list* "/" (cons (path/param-path s)
(maybe-join-params (car strs)) (path/param-param s))))))
(loop (cdr strs)))]))))
;; needs to unquote things! (define (add-between bet lst)
(define (maybe-join-params s)
(cond (cond
[(string? s) s] [(null? lst) null]
[else (string-append (path/param-path s) [(null? (cdr lst)) lst]
";" [else
(path/param-param s))]))))) (let loop ([fst (car lst)]
[lst (cdr lst)])
(cond
[(null? lst) (list fst)]
[else (list* fst
bet
(loop (car lst)
(cdr lst)))]))])))))

View File

@ -21,6 +21,7 @@
user user
host host
port port
path-absolute?
path path
query query
fragment)) fragment))

View File

@ -140,11 +140,12 @@
(let () (let ()
(define (test-s->u vec str) (define (test-s->u vec str)
(define (string->url/vec str) (url->vec (string->url str)))
(define (url/vec->string vec) (url->string (vec->url vec)))
(test vec string->url/vec str) (test vec string->url/vec str)
(test str url/vec->string vec)) (test str url/vec->string vec))
(define (string->url/vec str) (url->vec (string->url str)))
(define (url/vec->string vec) (url->string (vec->url vec)))
(define (test-c-u/r expected base relative) (define (test-c-u/r expected base relative)
(define (combine-url/relative-vec x y) (define (combine-url/relative-vec x y)
(url->vec (combine-url/relative (vec->url x) y))) (url->vec (combine-url/relative (vec->url x) y)))
@ -156,82 +157,104 @@
(vector-ref vec 1) (vector-ref vec 1)
(vector-ref vec 2) (vector-ref vec 2)
(vector-ref vec 3) (vector-ref vec 3)
(map (lambda (x) (if (string? x) (vector-ref vec 4)
x (map (lambda (x)
(make-path/param (vector-ref x 0) (vector-ref x 1)))) (let ([lst (vector->list x)])
(vector-ref vec 4)) (make-path/param (car lst) (cdr lst))))
(vector-ref vec 5) (vector-ref vec 5))
(vector-ref vec 6))) (vector-ref vec 6)
(vector-ref vec 7)))
(define (url->vec url) (define (url->vec url)
(vector (url-scheme url) (vector (url-scheme url)
(url-user url) (url-user url)
(url-host url) (url-host url)
(url-port url) (url-port url)
(map (lambda (x) (if (string? x) (url-path-absolute? url)
x (map (lambda (x) (list->vector (cons (path/param-path x) (path/param-param x))))
(vector (path/param-path x) (path/param-param x))))
(url-path url)) (url-path url))
(url-query url) (url-query url)
(url-fragment url))) (url-fragment url)))
(test-s->u (vector #f #f #f #f '("") '() #f) (test-s->u (vector #f #f #f #f #t '(#("")) '() #f)
"/") "/")
(test-s->u (vector #f #f #f #f '() '() #f) (test-s->u (vector #f #f #f #f #f '() '() #f)
"") "")
(test-s->u (vector "http" #f "www.drscheme.org" #f '("") '() #f) (test-s->u (vector "http" #f "www.drscheme.org" #f #f '() '() #f)
"http://www.drscheme.org")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t '(#("")) '() #f)
"http://www.drscheme.org/") "http://www.drscheme.org/")
(test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '() #f)
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '() #f)
"http://www.drscheme.org/a/b/c") "http://www.drscheme.org/a/b/c")
(test-s->u (vector "http" "robby" "www.drscheme.org" #f (list "a" "b" "c") '() #f) (test-s->u (vector "http" "robby" "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '() #f)
"http://robby@www.drscheme.org/a/b/c") "http://robby@www.drscheme.org/a/b/c")
(test-s->u (vector "http" #f "www.drscheme.org" 8080 (list "a" "b" "c") '() #f) (test-s->u (vector "http" #f "www.drscheme.org" 8080 #t (list #("a") #("b") #("c")) '() #f)
"http://www.drscheme.org:8080/a/b/c") "http://www.drscheme.org:8080/a/b/c")
(test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '() "joe") (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '() "joe")
"http://www.drscheme.org/a/b/c#joe") "http://www.drscheme.org/a/b/c#joe")
(test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tim . "")) #f) (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tim . "")) #f)
"http://www.drscheme.org/a/b/c?tim=") "http://www.drscheme.org/a/b/c?tim=")
(test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tim . "")) "joe") (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tim . "")) "joe")
"http://www.drscheme.org/a/b/c?tim=#joe") "http://www.drscheme.org/a/b/c?tim=#joe")
(test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tim . "tim")) "joe") (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tim . "tim")) "joe")
"http://www.drscheme.org/a/b/c?tim=tim#joe") "http://www.drscheme.org/a/b/c?tim=tim#joe")
(test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tam . "tom")) "joe") (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tam . "tom")) "joe")
"http://www.drscheme.org/a/b/c?tam=tom#joe") "http://www.drscheme.org/a/b/c?tam=tom#joe")
(test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tam . "tom") (pam . "pom")) "joe") (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tam . "tom") (pam . "pom")) "joe")
"http://www.drscheme.org/a/b/c?tam=tom;pam=pom#joe") "http://www.drscheme.org/a/b/c?tam=tom;pam=pom#joe")
(parameterize ([current-alist-separator-mode 'semi]) (parameterize ([current-alist-separator-mode 'semi])
(test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tam . "tom") (pam . "pom")) "joe") (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tam . "tom") (pam . "pom")) "joe")
"http://www.drscheme.org/a/b/c?tam=tom;pam=pom#joe")) "http://www.drscheme.org/a/b/c?tam=tom;pam=pom#joe"))
(parameterize ([current-alist-separator-mode 'amp]) (parameterize ([current-alist-separator-mode 'amp])
(test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((tam . "tom") (pam . "pom")) "joe") (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tam . "tom") (pam . "pom")) "joe")
"http://www.drscheme.org/a/b/c?tam=tom&pam=pom#joe")) "http://www.drscheme.org/a/b/c?tam=tom&pam=pom#joe"))
(test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" #("c" "b")) '() #f) (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c" "b")) '() #f)
"http://www.drscheme.org/a/b/c;b") "http://www.drscheme.org/a/b/c;b")
(test-s->u (vector "http" #f "www.drscheme.org" #f (list #("a" "x") "b" #("c" "b")) '() #f) (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a" "x") #("b") #("c" "b")) '() #f)
"http://www.drscheme.org/a;x/b/c;b") "http://www.drscheme.org/a;x/b/c;b")
;; test unquoting for % ;; test unquoting for %
(test-s->u (vector "http" #f "www.drscheme.org" #f (list "a" "b" "c") '((ti#m . "")) "jo e") (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((ti#m . "")) "jo e")
"http://www.drscheme.org/a/b/c?ti%23m=#jo%20e") "http://www.drscheme.org/a/b/c?ti%23m=#jo%20e")
(test-s->u (vector "http" #f "www.drscheme.org" #f (list #("a " " a") " b " " c ") '() #f) (test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a " " a") #(" b ") #(" c ")) '() #f)
"http://www.drscheme.org/a ; a/ b / c ") "http://www.drscheme.org/a%20;%20a/%20b%20/%20c%20")
(test-s->u (vector "http" "robb y" "www.drscheme.org" #f '("") '() #f) (test-s->u (vector "http" "robb y" "www.drscheme.org" #f #t '(#("")) '() #f)
"http://robb%20y@www.drscheme.org/") "http://robb%20y@www.drscheme.org/")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("%a") #("b/") #("c")) '() #f)
"http://www.drscheme.org/%25a/b%2f/c")
(test-s->u (vector "mailto" #f #f #f '("robby@plt-scheme.org") () #f) ;; test the characters that need to be encoded in paths vs those that do not need to
;; be encoded in paths
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a:@!$&'()*+,=z") #("/?#[];") #("")) '() #f)
"http://www.drscheme.org/a:@!$&'()*+,=z/%2f%3f%23%5b%5d%3b/")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #(".") #("..") '#(same) '#(up) #("...") #("abc.def")) '() #f)
"http://www.drscheme.org/%2e/%2e%2e/./../.../abc.def")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("." "") #(".." "") #(same "") #(up "") #("..." "") #("abc.def" "")) '() #f)
"http://www.drscheme.org/%2e;/%2e%2e;/.;/..;/...;/abc.def;")
(test (vector "http" "ROBBY" "www.drscheme.org" 80 #t '(#("INDEX.HTML" "XXX")) '((T . "P")) "YYY")
string->url/vec
"HTTP://ROBBY@WWW.DRSCHEME.ORG:80/INDEX.HTML;XXX?T=P#YYY")
(test-s->u (vector "mailto" #f #f #f #f '(#("robby@plt-scheme.org")) '() #f)
"mailto:robby@plt-scheme.org") "mailto:robby@plt-scheme.org")
(let ([empty-url (make-url #f #f #f #f '() '() #f)]) (let ([empty-url (make-url #f #f #f #f #f '() '() #f)])
(test-c-u/r (string->url "http://www.drscheme.org") (test-c-u/r (string->url "http://www.drscheme.org")
empty-url empty-url
"http://www.drscheme.org") "http://www.drscheme.org"))
(test-c-u/r (string->url "http://www.drscheme.org")
(string->url "http://www.drscheme.org") (test-c-u/r (string->url "http://www.drscheme.org")
"")) (string->url "http://www.drscheme.org")
"")
(test-c-u/r (string->url "http://www.mzscheme.org") (test-c-u/r (string->url "http://www.mzscheme.org")
(string->url "http://www.drscheme.org/") (string->url "http://www.drscheme.org/")
"http://www.mzscheme.org") "http://www.mzscheme.org")
(test-c-u/r (string->url "http://www.drscheme.org/index.html") (test-c-u/r (string->url "http://www.drscheme.org/index.html")
(string->url "http://www.drscheme.org/") (string->url "http://www.drscheme.org/")
"index.html") "index.html")
@ -253,6 +276,25 @@
(test-c-u/r (string->url "http://www.drscheme.org/a/b/c/d/index.html") (test-c-u/r (string->url "http://www.drscheme.org/a/b/c/d/index.html")
(string->url "http://www.drscheme.org/a/b/c/") (string->url "http://www.drscheme.org/a/b/c/")
"d/index.html") "d/index.html")
(test-c-u/r (string->url "http://www.drscheme.org/a/b/index.html")
(string->url "http://www.drscheme.org/a/b/c/")
"../index.html")
(test-c-u/r (string->url "http://www.drscheme.org/a/b/c/index.html")
(string->url "http://www.drscheme.org/a/b/c/")
"./index.html")
(test-c-u/r (string->url "http://www.drscheme.org/a/b/c/%2e%2e/index.html")
(string->url "http://www.drscheme.org/a/b/c/")
"%2e%2e/index.html")
(test-c-u/r (string->url "http://www.drscheme.org/a/index.html")
(string->url "http://www.drscheme.org/a/b/../c/")
"../index.html")
(test-c-u/r (string->url "http://www.drscheme.org/a/b/c/d/index.html")
(string->url "http://www.drscheme.org/a/b/c/d/index.html#ghijkl")
"index.html")
(test-c-u/r (string->url "http://www.drscheme.org/a/b/c/d/index.html#abcdef")
(string->url "http://www.drscheme.org/a/b/c/d/index.html#ghijkl")
"#abcdef")
(test-c-u/r (string->url "file:///a/b/c/d/index.html") (test-c-u/r (string->url "file:///a/b/c/d/index.html")
(string->url "file:///a/b/c/") (string->url "file:///a/b/c/")
@ -260,6 +302,69 @@
(test-c-u/r (string->url "file:///a/b/d/index.html") (test-c-u/r (string->url "file:///a/b/d/index.html")
(string->url "file:///a/b/c") (string->url "file:///a/b/c")
"d/index.html") "d/index.html")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; tests from rfc 3986
;;
(for-each
(λ (line)
(test-c-u/r (string->url (caddr line))
(string->url "http://a/b/c/d;p?q")
(car line)))
'(("g:h" = "g:h")
("g" = "http://a/b/c/g")
("./g" = "http://a/b/c/g")
("g/" = "http://a/b/c/g/")
("/g" = "http://a/g")
("//g" = "http://g")
("?y" = "http://a/b/c/d;p?y")
("g?y" = "http://a/b/c/g?y")
("#s" = "http://a/b/c/d;p?q#s")
("g#s" = "http://a/b/c/g#s")
("g?y#s" = "http://a/b/c/g?y#s")
(";x" = "http://a/b/c/;x")
("g;x" = "http://a/b/c/g;x")
("g;x?y#s" = "http://a/b/c/g;x?y#s")
("" = "http://a/b/c/d;p?q")
("." = "http://a/b/c/")
("./" = "http://a/b/c/")
(".." = "http://a/b/")
("../" = "http://a/b/")
("../g" = "http://a/b/g")
("../.." = "http://a/")
("../../" = "http://a/")
("../../g" = "http://a/g")
;; abnormal examples follow
("../../../g" = "http://a/g")
("../../../../g" = "http://a/g")
("/./g" = "http://a/g")
("/../g" = "http://a/g")
("g." = "http://a/b/c/g.")
(".g" = "http://a/b/c/.g")
("g.." = "http://a/b/c/g..")
("..g" = "http://a/b/c/..g")
("./../g" = "http://a/b/g")
("./g/." = "http://a/b/c/g/")
("g/./h" = "http://a/b/c/g/h")
("g/../h" = "http://a/b/c/h")
("g;x=1/./y" = "http://a/b/c/g;x=1/y")
("g;x=1/../y" = "http://a/b/c/y")
("g?y/./x" = "http://a/b/c/g?y/./x")
("g?y/../x" = "http://a/b/c/g?y/../x")
("g#s/./x" = "http://a/b/c/g#s/./x")
("g#s/../x" = "http://a/b/c/g#s/../x")
("http:g" = "http:g") ; for strict parsers
))
) )
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@ -141,9 +141,10 @@
;; url->param: url -> (union string #f) ;; url->param: url -> (union string #f)
(define (url->param a-url) (define (url->param a-url)
(let ([l (filter path/param? (url-path a-url))]) (let ([l (filter (λ (x) (not (null? (path/param-param x))))
(url-path a-url))])
(and (not (null? l)) (and (not (null? l))
(path/param-param (car l))))) (car (path/param-param (car l))))))
;; insert-param: url string -> string ;; insert-param: url string -> string
;; add a path/param to the path in a url ;; add a path/param to the path in a url

View File

@ -292,21 +292,5 @@
(cond (cond
((char=? first #\+) ((char=? first #\+)
(values #\space rest)) (values #\space rest))
((char=? first #\%)
; MF: I rewrote this code so that Spidey could eliminate all checks.
; I am more confident this way that this hairy expression doesn't barf.
(if (pair? rest)
(let ([rest-rest (cdr rest)])
(if (pair? rest-rest)
(values (integer->char
(or (string->number (string (car rest) (car rest-rest)) 16)
(raise (make-invalid-%-suffix
(if (string->number (string (car rest)) 16)
(car rest-rest)
(car rest))))))
(cdr rest-rest))
(raise (make-incomplete-%-suffix rest))))
(raise (make-incomplete-%-suffix rest))))
(else (values first rest))))) (else (values first rest)))))
(cons this (loop rest)))))))) (cons this (loop rest)))))))))
)