fixed url code in various ways

svn: r1752

original commit: c6992e0307932fad24a9287b5e8525a27b02abd3
This commit is contained in:
Robby Findler 2006-01-03 14:02:25 +00:00
parent 1398263012
commit 07736c9ff7
3 changed files with 206 additions and 105 deletions

View File

@ -5,6 +5,8 @@
(define-signature net:uri-codec^
(uri-encode
uri-decode
uri-path-segment-encode
uri-path-segment-decode
form-urlencoded-encode
form-urlencoded-decode
alist->form-urlencoded

View File

@ -1,3 +1,7 @@
;; 1/2/2006: Added a mapping for uri path segments
;; that allows more characters to remain decoded
;; -robby
;;;
;;; <uri-codec-unit.ss> ---- En/Decode URLs and form-urlencoded data
;;; Time-stamp: <03/04/25 10:31:31 noel>
@ -75,6 +79,7 @@
(lib "match.ss")
(lib "string.ss")
(lib "etc.ss")
(lib "list.ss")
"uri-codec-sig.ss")
(provide uri-codec@)
@ -109,13 +114,19 @@
;; Characters that sometimes map to themselves
(define safe-mapping
(map (lambda (char)
(cons char char))
(map (lambda (char) (cons char char))
'(#\- #\_ #\. #\! #\~ #\* #\' #\( #\))))
;; The strict URI mapping
(define uri-mapping
(append alphanumeric-mapping safe-mapping))
(append alphanumeric-mapping
safe-mapping))
;; The uri path segment mapping from RFC 3986
(define uri-path-segment-mapping
(append alphanumeric-mapping
safe-mapping
(map (λ (c) (cons c c)) (string->list "@+,=$&:"))))
;; The form-urlencoded mapping
(define form-urlencoded-mapping
@ -156,6 +167,10 @@
(define-values (uri-encoding-vector uri-decoding-vector)
(make-codec-tables uri-mapping))
(define-values (uri-path-segment-encoding-vector
uri-path-segment-decoding-vector)
(make-codec-tables uri-path-segment-mapping))
(define-values (form-urlencoded-encoding-vector
form-urlencoded-decoding-vector)
@ -198,7 +213,15 @@
;; string -> string
(define (uri-decode str)
(decode uri-decoding-vector str))
;; string -> string
(define (uri-path-segment-encode str)
(encode uri-path-segment-encoding-vector str))
;; string -> string
(define (uri-path-segment-decode str)
(decode uri-path-segment-decoding-vector str))
;; string -> string
(define (form-urlencoded-encode str)
(encode form-urlencoded-encoding-vector str))

View File

@ -13,7 +13,8 @@
(require (lib "file.ss")
(lib "unitsig.ss")
(lib "port.ss")
"url-structs.ss"
(lib "string.ss")
"url-structs.ss"
"uri-codec.ss"
"url-sig.ss"
"tcp-sig.ss")
@ -64,7 +65,9 @@
(define (url->file-path url)
(path->string
(apply build-path (or (url-host url) 'same)
(map (lambda (x) (if (equal? x "") 'same x)) (url-path url)))))
(map (lambda (x) (if (equal? x "") 'same x))
(map path/param-path
(url-path url))))))
(define url->string
(lambda (url)
@ -83,13 +86,18 @@
(string-append "#" fragment))))
(else
(let ((sa string-append))
(sa (if scheme (sa scheme "://") "")
(if user (sa (uri-encode user) "@") "")
(if host host "")
(if port (sa ":" (number->string port)) "")
; There used to be a "/" here, but that causes an
; extra leading slash -- wonder why it ever worked!
(combine-path-strings path)
(sa (if scheme (sa scheme ":") "")
(if (or user host port)
(sa
"//"
(if user (sa (uri-encode user) "@") "")
(if host host "")
(if port (sa ":" (number->string port)) "")
; There used to be a "/" here, but that causes an
; extra leading slash -- wonder why it ever worked!
)
"")
(combine-path-strings (url-path-absolute? url) path)
;(if query (sa "?" (uri-encode query)) "")
(if (null? query) "" (sa "?" (alist->form-urlencoded query)))
(if fragment (sa "#" (uri-encode fragment)) ""))))))))
@ -126,7 +134,8 @@
(url->string
(if proxy
url
(make-url #f #f #f #f
(make-url #f #f #f #f
(url-path-absolute? url)
(url-path url)
(url-query url)
(url-fragment url))))))
@ -223,62 +232,95 @@
(not (url-fragment url))
(null? (url-path url)))))
(define (combine-url/relative base string)
(let ([relative (string->url string)])
;; transliteration of code in rfc 3986, section 5.2.2
(define (combine-url/relative Base string)
(let ([R (string->url string)]
[T (make-url #f #f #f #f #f '() '() #f)])
(if (url-scheme R)
(begin
(set-url-scheme! T (url-scheme R))
(set-url-user! T (url-user R)) ;; authority
(set-url-host! T (url-host R)) ;; authority
(set-url-port! T (url-port R)) ;; authority
(set-url-path-absolute?! T (url-path-absolute? R))
(set-url-path! T (remove-dot-segments (url-path R)))
(set-url-query! T (url-query R)))
(begin
(if (url-host R) ;; => authority is defined
(begin
(set-url-user! T (url-user R)) ;; authority
(set-url-host! T (url-host R)) ;; authority
(set-url-port! T (url-port R)) ;; authority
(set-url-path-absolute?! T (url-path-absolute? R))
(set-url-path! T (remove-dot-segments (url-path R)))
(set-url-query! T (url-query R)))
(begin
(if (null? (url-path R)) ;; => R has empty path
(begin
(set-url-path-absolute?! T (url-path-absolute? Base))
(set-url-path! T (url-path Base))
(if (not (null? (url-query R)))
(set-url-query! T (url-query R))
(set-url-query! T (url-query Base))))
(begin
(cond
[(url-path-absolute? R)
(set-url-path-absolute?! T #t)
(set-url-path! T (remove-dot-segments (url-path R)))]
[(and (null? (url-path Base))
(url-host Base))
(set-url-path-absolute?! T #t)
(set-url-path! T (remove-dot-segments (url-path R)))]
[else
(set-url-path-absolute?! T (url-path-absolute? Base))
(set-url-path! T (remove-dot-segments
(append (all-but-last (url-path Base))
(url-path R))))])
(set-url-query! T (url-query R))))
(set-url-user! T (url-user Base)) ;; authority
(set-url-host! T (url-host Base)) ;; authority
(set-url-port! T (url-port Base)))) ;; authority
(set-url-scheme! T (url-scheme Base))))
(set-url-fragment! T (url-fragment R))
T))
(define (all-but-last lst)
(cond
[(null? lst) null]
[(null? (cdr lst)) null]
[else (cons (car lst) (all-but-last (cdr lst)))]))
;; cribbed from 5.2.4 in rfc 3986
;; the strange cases 2 and 4 implicitly change urls
;; with paths segments "." and ".." at the end
;; into "./" and "../" respectively
(define (remove-dot-segments path)
(let loop ([path path]
[result '()])
(cond
[(empty-url? base) ; Step 1
relative]
[(empty-url? relative) ; Step 2a
base]
[(url-scheme relative) ; Step 2b
relative]
[else ; Step 2c
(set-url-scheme! relative (url-scheme base))
(cond
[(url-host relative) ; Step 3
relative]
[else
(set-url-host! relative (url-host base))
(set-url-port! relative (url-port base)) ; Unspecified!
(let ([rel-path (url-path relative)])
(cond
[(and (not (equal? string "")) ; Step 4
(char=? #\/ (string-ref string 0)))
relative]
[(or (not rel-path) ; Step 5
(null? rel-path))
(set-url-path! relative (url-path base))
(when (url-query relative)
(set-url-query! relative (url-query base)))
relative]
[else ; Step 6
(merge-and-normalize
(url-path base) relative)]))])])))
(define (merge-and-normalize base-path relative-url)
(let* ([joined
(let loop ([base-path base-path])
(cond
[(null? base-path) (url-path relative-url)]
[(null? (cdr base-path)) (url-path relative-url)]
[else (cons (car base-path) (loop (cdr base-path)))]))]
[reversed/simplified
(if (null? joined)
null
(let loop ([segs (reverse joined)])
(cond
[(null? segs) null]
[else (let ([fst (car segs)])
(cond
[(string=? fst ".")
(loop (cdr segs))]
[(string=? fst "..")
(if (null? (cdr segs))
segs
(loop (cddr segs)))]
[else (cons (car segs) (loop (cdr segs)))]))])))])
(set-url-path! relative-url (reverse reversed/simplified))
relative-url))
[(null? path) (reverse result)]
[(and (eq? (path/param-path (car path)) 'same)
(null? (cdr path)))
(loop (cdr path)
(cons (make-path/param "" '()) result))]
[(eq? (path/param-path (car path)) 'same)
(loop (cdr path)
result)]
[(and (eq? (path/param-path (car path)) 'up)
(null? (cdr path))
(not (null? result)))
(loop (cdr path)
(cons (make-path/param "" '()) (cdr result)))]
[(and (eq? (path/param-path (car path)) 'up)
(not (null? result)))
(loop (cdr path) (cdr result))]
[(and (eq? (path/param-path (car path)) 'up)
(null? result))
;; when we go up too far, just drop the "up"s.
(loop (cdr path) result)]
[else
(loop (cdr path) (cons (car path) result))])))
;; call/input-url : url x (url -> in-port) x (in-port -> T)
;; [x list (str)] -> T
@ -363,9 +405,10 @@
#f ; user
(and root (path->string root)) ; host
#f ; port
(append (map path->string elems)
(absolute-path? path)
(append (map (λ (x) (make-path/param (path->string x) '())) elems)
(if (eq? kind 'dir)
'("")
(list (make-path/param "" '()))
null))
'() ; query
fragment))
@ -383,20 +426,27 @@
(get-num (lambda (pos skip-left skip-right)
(let ((s (get-str pos skip-left skip-right)))
(if s (string->number s) #f))))
(host (get-str 5 0 0)))
(make-url (get-str 2 0 1) ; scheme
(host (get-str 5 0 0))
(path (get-str 7 0 0))
(scheme (get-str 2 0 1)))
(when (string? scheme) (string-lowercase! scheme))
(when (string? host) (string-lowercase! host))
(make-url scheme
(uri-decode/maybe (get-str 4 0 1)) ; user
host
(get-num 6 1 0) ; port
(and (not (= 0 (string-length path)))
(char=? #\/ (string-ref path 0)))
(separate-path-strings
(let ([path (get-str 7 0 0)])
;; If path is "" and the input is an absolute URL
;; with a hostname, then the intended path is "/",
;; but the URL is missing a "/" at the end.
(if (and (string=? path "")
host)
"/"
path)))
;; If path is "" and the input is an absolute URL
;; with a hostname, then the intended path is "/",
;; but the URL is missing a "/" at the end.
path
#;
(if (and (string=? path "")
host)
"/"
path))
;(uri-decode/maybe (get-str 8 1 0)) ;
;query
(let ([q (get-str 8 1 0)])
@ -411,7 +461,7 @@
;; in an attempt to be "friendly"
(and f (uri-decode (regexp-replace* "%([^0-9a-fA-F])" f "%25\\1"))))
;; separate-path-strings : string[starting with /] -> (listof (union string path/param))
;; separate-path-strings : string[starting with /] -> (listof path/param)
(define (separate-path-strings str)
(cond
[(string=? str "") '()]
@ -423,31 +473,57 @@
[(regexp-match #rx"([^/]*)/(.*)$" str)
=>
(lambda (m)
(cons (maybe-separate-params (cadr m)) (loop (caddr m))))]
[else (list (maybe-separate-params str))]))]))
(cons (separate-params (cadr m)) (loop (caddr m))))]
[else (list (separate-params str))]))]))
(define (maybe-separate-params s)
(define (separate-params s)
(let ([lst (map path-segment-decode (regexp-split #rx";" s))])
(make-path/param (car lst) (cdr lst))))
(define (path-segment-decode p)
(cond
[(regexp-match #rx"^([^;]*);(.*)$" s)
=>
(lambda (m)
(make-path/param (cadr m) (caddr m)))]
[else s]))
[(string=? p "..") 'up]
[(string=? p ".") 'same]
[else (uri-path-segment-decode p)]))
(define (path-segment-encode p)
(cond
[(eq? p 'up) ".."]
[(eq? p 'same) "."]
[(equal? p "..") "%2e%2e"]
[(equal? p ".") "%2e"]
[else (uri-path-segment-encode p)]))
(define (combine-path-strings absolute? path/params)
(cond
[(null? path/params) ""]
[else
(apply
string-append
(if absolute? "/" "")
(add-between
"/"
(map join-params path/params)))]))
(define (combine-path-strings strs)
(apply
(define (join-params s)
(apply
string-append
(let loop ([strs strs])
(cond
[(null? strs) '()]
[else (list* "/"
(maybe-join-params (car strs))
(loop (cdr strs)))]))))
;; needs to unquote things!
(define (maybe-join-params s)
(add-between ";"
(map
path-segment-encode
(cons (path/param-path s)
(path/param-param s))))))
(define (add-between bet lst)
(cond
[(string? s) s]
[else (string-append (path/param-path s)
";"
(path/param-param s))])))))
[(null? lst) null]
[(null? (cdr lst)) lst]
[else
(let loop ([fst (car lst)]
[lst (cdr lst)])
(cond
[(null? lst) (list fst)]
[else (list* fst
bet
(loop (car lst)
(cdr lst)))]))])))))