diff --git a/collects/net/uri-codec-sig.ss b/collects/net/uri-codec-sig.ss index f0aed95..2d13a55 100644 --- a/collects/net/uri-codec-sig.ss +++ b/collects/net/uri-codec-sig.ss @@ -5,6 +5,8 @@ (define-signature net:uri-codec^ (uri-encode uri-decode + uri-path-segment-encode + uri-path-segment-decode form-urlencoded-encode form-urlencoded-decode alist->form-urlencoded diff --git a/collects/net/uri-codec-unit.ss b/collects/net/uri-codec-unit.ss index 5c141f5..d833fb3 100644 --- a/collects/net/uri-codec-unit.ss +++ b/collects/net/uri-codec-unit.ss @@ -1,3 +1,7 @@ +;; 1/2/2006: Added a mapping for uri path segments +;; that allows more characters to remain decoded +;; -robby + ;;; ;;; ---- En/Decode URLs and form-urlencoded data ;;; Time-stamp: <03/04/25 10:31:31 noel> @@ -75,6 +79,7 @@ (lib "match.ss") (lib "string.ss") (lib "etc.ss") + (lib "list.ss") "uri-codec-sig.ss") (provide uri-codec@) @@ -109,13 +114,19 @@ ;; Characters that sometimes map to themselves (define safe-mapping - (map (lambda (char) - (cons char char)) + (map (lambda (char) (cons char char)) '(#\- #\_ #\. #\! #\~ #\* #\' #\( #\)))) ;; The strict URI mapping (define uri-mapping - (append alphanumeric-mapping safe-mapping)) + (append alphanumeric-mapping + safe-mapping)) + + ;; The uri path segment mapping from RFC 3986 + (define uri-path-segment-mapping + (append alphanumeric-mapping + safe-mapping + (map (λ (c) (cons c c)) (string->list "@+,=$&:")))) ;; The form-urlencoded mapping (define form-urlencoded-mapping @@ -156,6 +167,10 @@ (define-values (uri-encoding-vector uri-decoding-vector) (make-codec-tables uri-mapping)) + + (define-values (uri-path-segment-encoding-vector + uri-path-segment-decoding-vector) + (make-codec-tables uri-path-segment-mapping)) (define-values (form-urlencoded-encoding-vector form-urlencoded-decoding-vector) @@ -198,7 +213,15 @@ ;; string -> string (define (uri-decode str) (decode uri-decoding-vector str)) - + + ;; string -> string + (define (uri-path-segment-encode str) + (encode uri-path-segment-encoding-vector str)) + + ;; string -> string + (define (uri-path-segment-decode str) + (decode uri-path-segment-decoding-vector str)) + ;; string -> string (define (form-urlencoded-encode str) (encode form-urlencoded-encoding-vector str)) diff --git a/collects/net/url-unit.ss b/collects/net/url-unit.ss index b43ff6d..06d7094 100644 --- a/collects/net/url-unit.ss +++ b/collects/net/url-unit.ss @@ -13,7 +13,8 @@ (require (lib "file.ss") (lib "unitsig.ss") (lib "port.ss") - "url-structs.ss" + (lib "string.ss") + "url-structs.ss" "uri-codec.ss" "url-sig.ss" "tcp-sig.ss") @@ -64,7 +65,9 @@ (define (url->file-path url) (path->string (apply build-path (or (url-host url) 'same) - (map (lambda (x) (if (equal? x "") 'same x)) (url-path url))))) + (map (lambda (x) (if (equal? x "") 'same x)) + (map path/param-path + (url-path url)))))) (define url->string (lambda (url) @@ -83,13 +86,18 @@ (string-append "#" fragment)))) (else (let ((sa string-append)) - (sa (if scheme (sa scheme "://") "") - (if user (sa (uri-encode user) "@") "") - (if host host "") - (if port (sa ":" (number->string port)) "") - ; There used to be a "/" here, but that causes an - ; extra leading slash -- wonder why it ever worked! - (combine-path-strings path) + (sa (if scheme (sa scheme ":") "") + (if (or user host port) + (sa + "//" + (if user (sa (uri-encode user) "@") "") + (if host host "") + (if port (sa ":" (number->string port)) "") + ; There used to be a "/" here, but that causes an + ; extra leading slash -- wonder why it ever worked! + ) + "") + (combine-path-strings (url-path-absolute? url) path) ;(if query (sa "?" (uri-encode query)) "") (if (null? query) "" (sa "?" (alist->form-urlencoded query))) (if fragment (sa "#" (uri-encode fragment)) "")))))))) @@ -126,7 +134,8 @@ (url->string (if proxy url - (make-url #f #f #f #f + (make-url #f #f #f #f + (url-path-absolute? url) (url-path url) (url-query url) (url-fragment url)))))) @@ -223,62 +232,95 @@ (not (url-fragment url)) (null? (url-path url))))) - (define (combine-url/relative base string) - (let ([relative (string->url string)]) + + ;; transliteration of code in rfc 3986, section 5.2.2 + (define (combine-url/relative Base string) + (let ([R (string->url string)] + [T (make-url #f #f #f #f #f '() '() #f)]) + (if (url-scheme R) + (begin + (set-url-scheme! T (url-scheme R)) + (set-url-user! T (url-user R)) ;; authority + (set-url-host! T (url-host R)) ;; authority + (set-url-port! T (url-port R)) ;; authority + (set-url-path-absolute?! T (url-path-absolute? R)) + (set-url-path! T (remove-dot-segments (url-path R))) + (set-url-query! T (url-query R))) + (begin + (if (url-host R) ;; => authority is defined + (begin + (set-url-user! T (url-user R)) ;; authority + (set-url-host! T (url-host R)) ;; authority + (set-url-port! T (url-port R)) ;; authority + (set-url-path-absolute?! T (url-path-absolute? R)) + (set-url-path! T (remove-dot-segments (url-path R))) + (set-url-query! T (url-query R))) + (begin + (if (null? (url-path R)) ;; => R has empty path + (begin + (set-url-path-absolute?! T (url-path-absolute? Base)) + (set-url-path! T (url-path Base)) + (if (not (null? (url-query R))) + (set-url-query! T (url-query R)) + (set-url-query! T (url-query Base)))) + (begin + (cond + [(url-path-absolute? R) + (set-url-path-absolute?! T #t) + (set-url-path! T (remove-dot-segments (url-path R)))] + [(and (null? (url-path Base)) + (url-host Base)) + (set-url-path-absolute?! T #t) + (set-url-path! T (remove-dot-segments (url-path R)))] + [else + (set-url-path-absolute?! T (url-path-absolute? Base)) + (set-url-path! T (remove-dot-segments + (append (all-but-last (url-path Base)) + (url-path R))))]) + (set-url-query! T (url-query R)))) + (set-url-user! T (url-user Base)) ;; authority + (set-url-host! T (url-host Base)) ;; authority + (set-url-port! T (url-port Base)))) ;; authority + (set-url-scheme! T (url-scheme Base)))) + (set-url-fragment! T (url-fragment R)) + T)) + + (define (all-but-last lst) + (cond + [(null? lst) null] + [(null? (cdr lst)) null] + [else (cons (car lst) (all-but-last (cdr lst)))])) + + ;; cribbed from 5.2.4 in rfc 3986 + ;; the strange cases 2 and 4 implicitly change urls + ;; with paths segments "." and ".." at the end + ;; into "./" and "../" respectively + (define (remove-dot-segments path) + (let loop ([path path] + [result '()]) (cond - [(empty-url? base) ; Step 1 - relative] - [(empty-url? relative) ; Step 2a - base] - [(url-scheme relative) ; Step 2b - relative] - [else ; Step 2c - (set-url-scheme! relative (url-scheme base)) - (cond - [(url-host relative) ; Step 3 - relative] - [else - (set-url-host! relative (url-host base)) - (set-url-port! relative (url-port base)) ; Unspecified! - (let ([rel-path (url-path relative)]) - (cond - [(and (not (equal? string "")) ; Step 4 - (char=? #\/ (string-ref string 0))) - relative] - [(or (not rel-path) ; Step 5 - (null? rel-path)) - (set-url-path! relative (url-path base)) - (when (url-query relative) - (set-url-query! relative (url-query base))) - relative] - [else ; Step 6 - (merge-and-normalize - (url-path base) relative)]))])]))) - - (define (merge-and-normalize base-path relative-url) - (let* ([joined - (let loop ([base-path base-path]) - (cond - [(null? base-path) (url-path relative-url)] - [(null? (cdr base-path)) (url-path relative-url)] - [else (cons (car base-path) (loop (cdr base-path)))]))] - [reversed/simplified - (if (null? joined) - null - (let loop ([segs (reverse joined)]) - (cond - [(null? segs) null] - [else (let ([fst (car segs)]) - (cond - [(string=? fst ".") - (loop (cdr segs))] - [(string=? fst "..") - (if (null? (cdr segs)) - segs - (loop (cddr segs)))] - [else (cons (car segs) (loop (cdr segs)))]))])))]) - (set-url-path! relative-url (reverse reversed/simplified)) - relative-url)) + [(null? path) (reverse result)] + [(and (eq? (path/param-path (car path)) 'same) + (null? (cdr path))) + (loop (cdr path) + (cons (make-path/param "" '()) result))] + [(eq? (path/param-path (car path)) 'same) + (loop (cdr path) + result)] + [(and (eq? (path/param-path (car path)) 'up) + (null? (cdr path)) + (not (null? result))) + (loop (cdr path) + (cons (make-path/param "" '()) (cdr result)))] + [(and (eq? (path/param-path (car path)) 'up) + (not (null? result))) + (loop (cdr path) (cdr result))] + [(and (eq? (path/param-path (car path)) 'up) + (null? result)) + ;; when we go up too far, just drop the "up"s. + (loop (cdr path) result)] + [else + (loop (cdr path) (cons (car path) result))]))) ;; call/input-url : url x (url -> in-port) x (in-port -> T) ;; [x list (str)] -> T @@ -363,9 +405,10 @@ #f ; user (and root (path->string root)) ; host #f ; port - (append (map path->string elems) + (absolute-path? path) + (append (map (λ (x) (make-path/param (path->string x) '())) elems) (if (eq? kind 'dir) - '("") + (list (make-path/param "" '())) null)) '() ; query fragment)) @@ -383,20 +426,27 @@ (get-num (lambda (pos skip-left skip-right) (let ((s (get-str pos skip-left skip-right))) (if s (string->number s) #f)))) - (host (get-str 5 0 0))) - (make-url (get-str 2 0 1) ; scheme + (host (get-str 5 0 0)) + (path (get-str 7 0 0)) + (scheme (get-str 2 0 1))) + (when (string? scheme) (string-lowercase! scheme)) + (when (string? host) (string-lowercase! host)) + (make-url scheme (uri-decode/maybe (get-str 4 0 1)) ; user host (get-num 6 1 0) ; port + (and (not (= 0 (string-length path))) + (char=? #\/ (string-ref path 0))) (separate-path-strings - (let ([path (get-str 7 0 0)]) - ;; If path is "" and the input is an absolute URL - ;; with a hostname, then the intended path is "/", - ;; but the URL is missing a "/" at the end. - (if (and (string=? path "") - host) - "/" - path))) + ;; If path is "" and the input is an absolute URL + ;; with a hostname, then the intended path is "/", + ;; but the URL is missing a "/" at the end. + path + #; + (if (and (string=? path "") + host) + "/" + path)) ;(uri-decode/maybe (get-str 8 1 0)) ; ;query (let ([q (get-str 8 1 0)]) @@ -411,7 +461,7 @@ ;; in an attempt to be "friendly" (and f (uri-decode (regexp-replace* "%([^0-9a-fA-F])" f "%25\\1")))) - ;; separate-path-strings : string[starting with /] -> (listof (union string path/param)) + ;; separate-path-strings : string[starting with /] -> (listof path/param) (define (separate-path-strings str) (cond [(string=? str "") '()] @@ -423,31 +473,57 @@ [(regexp-match #rx"([^/]*)/(.*)$" str) => (lambda (m) - (cons (maybe-separate-params (cadr m)) (loop (caddr m))))] - [else (list (maybe-separate-params str))]))])) + (cons (separate-params (cadr m)) (loop (caddr m))))] + [else (list (separate-params str))]))])) - (define (maybe-separate-params s) + (define (separate-params s) + (let ([lst (map path-segment-decode (regexp-split #rx";" s))]) + (make-path/param (car lst) (cdr lst)))) + + (define (path-segment-decode p) (cond - [(regexp-match #rx"^([^;]*);(.*)$" s) - => - (lambda (m) - (make-path/param (cadr m) (caddr m)))] - [else s])) + [(string=? p "..") 'up] + [(string=? p ".") 'same] + [else (uri-path-segment-decode p)])) + + (define (path-segment-encode p) + (cond + [(eq? p 'up) ".."] + [(eq? p 'same) "."] + [(equal? p "..") "%2e%2e"] + [(equal? p ".") "%2e"] + [else (uri-path-segment-encode p)])) + + (define (combine-path-strings absolute? path/params) + (cond + [(null? path/params) ""] + [else + (apply + string-append + (if absolute? "/" "") + (add-between + "/" + (map join-params path/params)))])) - (define (combine-path-strings strs) - (apply + (define (join-params s) + (apply string-append - (let loop ([strs strs]) - (cond - [(null? strs) '()] - [else (list* "/" - (maybe-join-params (car strs)) - (loop (cdr strs)))])))) - - ;; needs to unquote things! - (define (maybe-join-params s) + (add-between ";" + (map + path-segment-encode + (cons (path/param-path s) + (path/param-param s)))))) + + (define (add-between bet lst) (cond - [(string? s) s] - [else (string-append (path/param-path s) - ";" - (path/param-param s))]))))) + [(null? lst) null] + [(null? (cdr lst)) lst] + [else + (let loop ([fst (car lst)] + [lst (cdr lst)]) + (cond + [(null? lst) (list fst)] + [else (list* fst + bet + (loop (car lst) + (cdr lst)))]))])))))