more formatting, better regexp use

svn: r3096
This commit is contained in:
Eli Barzilay 2006-05-28 19:35:25 +00:00
parent 646c91cc10
commit 28822a155d
3 changed files with 122 additions and 163 deletions

View File

@ -78,8 +78,8 @@
(require (lib "unitsig.ss")
(lib "match.ss")
(lib "string.ss")
(lib "etc.ss")
(lib "list.ss")
(lib "etc.ss")
"uri-codec-sig.ss")
(provide uri-codec@)
@ -88,27 +88,20 @@
(unit/sig net:uri-codec^
(import)
(define (self-map-char ch) (cons ch ch))
(define (self-map-chars str) (map self-map-char (string->list str)))
;; The characters that always map to themselves
(define alphanumeric-mapping
(map (lambda (char) (cons char char))
'(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
#\A #\B #\C #\D #\E #\F #\G #\H #\I #\J
#\K #\L #\M #\N #\O #\P #\Q #\R #\S #\T
#\U #\V #\W #\X #\Y #\Z #\a #\b #\c #\d
#\e #\f #\g #\h #\i #\j #\k #\l #\m #\n
#\o #\p #\q #\r #\s #\t #\u #\v #\w #\x
#\y #\z)))
(self-map-chars
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"))
;; Characters that sometimes map to themselves
(define safe-mapping
(map (lambda (char) (cons char char))
'(#\- #\_ #\. #\! #\~ #\* #\' #\( #\))))
(define safe-mapping (self-map-chars "-_.!~*'()"))
;; The strict URI mapping
(define uri-mapping
(append alphanumeric-mapping
safe-mapping))
(define uri-mapping (append alphanumeric-mapping safe-mapping))
;; The uri path segment mapping from RFC 3986
(define uri-path-segment-mapping
(append alphanumeric-mapping
@ -117,29 +110,21 @@
;; The form-urlencoded mapping
(define form-urlencoded-mapping
(append '((#\. . #\.)
(#\- . #\-)
(#\* . #\*)
(#\_ . #\_)
(#\space . #\+))
alphanumeric-mapping))
`(,@(self-map-chars ".-*_") (#\space . #\+) ,@alphanumeric-mapping))
(define (number->hex-string number)
(let ((hex (number->string number 16)))
(string-append "%"
(if (= (string-length hex) 1)
(string-append "0" hex)
hex))))
(define (hex n) (string-ref "0123456789ABCDEF" n))
(string #\% (hex (quotient number 16)) (hex (modulo number 16))))
(define (hex-string->number hex-string)
(string->number (substring hex-string 1 3) 16))
(define ascii-size 128)
;; (listof (cons char char)) -> (values (vectorof string) (vectorof string))
(define (make-codec-tables alist)
(let ((encoding-table (build-vector ascii-size number->hex-string))
(decoding-table (build-vector ascii-size values)))
(let ([encoding-table (build-vector ascii-size number->hex-string)]
[decoding-table (build-vector ascii-size values)])
(for-each (match-lambda
[(orig . enc)
(vector-set! encoding-table
@ -153,7 +138,7 @@
(define-values (uri-encoding-vector uri-decoding-vector)
(make-codec-tables uri-mapping))
(define-values (uri-path-segment-encoding-vector
uri-path-segment-decoding-vector)
(make-codec-tables uri-path-segment-mapping))
@ -216,7 +201,7 @@
;; string -> string
(define (uri-path-segment-decode str)
(decode uri-path-segment-decoding-vector str))
;; string -> string
(define (form-urlencoded-encode str)
(encode form-urlencoded-encoding-vector str))
@ -228,80 +213,74 @@
;; listof (cons string string) -> string
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
;; listof (cons symbol string) -> string
(define alist->form-urlencoded
(opt-lambda (args)
(let* ([mode (current-alist-separator-mode)]
[format-one
(lambda (arg)
(let* ([name (car arg)]
[value (cdr arg)])
(string-append
(form-urlencoded-encode (symbol->string name))
"="
(form-urlencoded-encode value))))]
[strs (let loop ([args args])
(cond
[(null? args) null]
[(null? (cdr args)) (list (format-one (car args)))]
[else (list* (format-one (car args))
(if (eq? mode 'amp)
"&"
";")
(loop (cdr args)))]))])
(apply string-append strs))))
(define (alist->form-urlencoded args)
(let* ([mode (current-alist-separator-mode)]
[format-one
(lambda (arg)
(let* ([name (car arg)]
[value (cdr arg)])
(string-append (form-urlencoded-encode (symbol->string name))
"="
(form-urlencoded-encode value))))]
[strs (let loop ([args args])
(cond
[(null? args) null]
[(null? (cdr args)) (list (format-one (car args)))]
[else (list* (format-one (car args))
(if (eq? mode 'amp) "&" ";")
(loop (cdr args)))]))])
(apply string-append strs)))
;; string -> listof (cons string string)
;; string -> listof (cons string string)
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
(define form-urlencoded->alist
(opt-lambda (str)
(define key-regexp #rx"[^=]*")
(define value-regexp (case (current-alist-separator-mode)
[(semi) #rx"[^;]*"]
[(amp) #rx"[^&]*"]
[else #rx"[^&;]*"]))
(define (next-key str start)
(if (>= start (string-length str))
#f
(match (regexp-match-positions key-regexp str start)
[((start . end))
(vector (let ([s (form-urlencoded-decode (substring str start end))])
(string->symbol s))
(add1 end))]
[#f #f])))
(define (next-value str start)
(if (>= start (string-length str))
#f
(match (regexp-match-positions value-regexp str start)
[((start . end))
(vector (form-urlencoded-decode (substring str start end))
(add1 end))]
[#f #f])))
(define (next-pair str start)
(match (next-key str start)
[#(key start)
(match (next-value str start)
[#(value start)
(vector (cons key value) start)]
[#f
(vector (cons key "") (string-length str))])]
[#f #f]))
(let loop ([start 0]
[end (string-length str)]
[make-alist (lambda (x) x)])
(cond
[(>= start end) (make-alist '())]
[else
(match (next-pair str start)
[#(pair next-start)
(loop next-start end (lambda (x) (make-alist (cons pair x))))]
[#f (make-alist '())])]))))
(define (form-urlencoded->alist str)
(define key-regexp #rx"[^=]*")
(define value-regexp (case (current-alist-separator-mode)
[(semi) #rx"[^;]*"]
[(amp) #rx"[^&]*"]
[else #rx"[^&;]*"]))
(define (next-key str start)
(and (< start (string-length str))
(match (regexp-match-positions key-regexp str start)
[((start . end))
(vector (let ([s (form-urlencoded-decode
(substring str start end))])
(string->symbol s))
(add1 end))]
[#f #f])))
(define (next-value str start)
(and (< start (string-length str))
(match (regexp-match-positions value-regexp str start)
[((start . end))
(vector (form-urlencoded-decode (substring str start end))
(add1 end))]
[#f #f])))
(define (next-pair str start)
(match (next-key str start)
[#(key start)
(match (next-value str start)
[#(value start)
(vector (cons key value) start)]
[#f
(vector (cons key "") (string-length str))])]
[#f #f]))
(let loop ([start 0]
[end (string-length str)]
[make-alist (lambda (x) x)])
(if (>= start end)
(make-alist '())
(match (next-pair str start)
[#(pair next-start)
(loop next-start end (lambda (x) (make-alist (cons pair x))))]
[#f (make-alist '())]))))
(define current-alist-separator-mode
(make-parameter 'amp-or-semi (lambda (s)
(unless (memq s '(amp semi amp-or-semi))
(raise-type-error 'current-alist-separator-mode
"'amp, 'semi, or 'amp-or-semi"
s))
s))))))
(make-parameter 'amp-or-semi
(lambda (s)
(unless (memq s '(amp semi amp-or-semi))
(raise-type-error 'current-alist-separator-mode
"'amp, 'semi, or 'amp-or-semi"
s))
s))))))
;;; uri-codec-unit.ss ends here

View File

@ -1,6 +1,6 @@
(module url-structs mzscheme
(require (lib "contract.ss"))
(define-struct url (scheme user host port path-absolute? path query fragment))
(define-struct path/param (path param))

View File

@ -352,65 +352,45 @@
;; string->url : str -> url
;; New implementation, mostly provided by Neil Van Dyke
(define string->url
(let ((rx (regexp (string-append
"^"
"[ \t\f\r\n]*"
"(" ; <1 front-opt
"([a-zA-Z]*:)?" ; =2 scheme-colon-opt
"(" ; <3 slashslash-opt
"//"
"([^:/@;?#]*@)?" ; =4 user-at-opt
"([^:/@;?#]*)?" ; =5 host-opt
"(:[0-9]*)?" ; =6 colon-port-opt
")?" ; >3 slashslash-opt
")?" ; >1 front-opt
"([^?#]*)" ; =7 path
"(\\?[^#]*)?" ; =8 question-query-opt
"(#.*)?" ; =9 hash-fragment-opt
"[ \t\f\r\n]*"
"$"))))
(lambda (str)
(let ((match (regexp-match-positions rx str)))
(if match
(let* ((get-str (lambda (pos skip-left skip-right)
(let ((pair (list-ref match pos)))
(if pair
(substring str
(+ (car pair) skip-left)
(- (cdr pair) skip-right))
#f))))
(get-num (lambda (pos skip-left skip-right)
(let ((s (get-str pos skip-left skip-right)))
(if s (string->number s) #f))))
(host (get-str 5 0 0))
(path (get-str 7 0 0))
(scheme (get-str 2 0 1)))
(when (string? scheme) (string-lowercase! scheme))
(when (string? host) (string-lowercase! host))
(make-url scheme
(uri-decode/maybe (get-str 4 0 1)) ; user
host
(get-num 6 1 0) ; port
(and (not (= 0 (string-length path)))
(char=? #\/ (string-ref path 0)))
(separate-path-strings
;; If path is "" and the input is an absolute URL
;; with a hostname, then the intended path is "/",
;; but the URL is missing a "/" at the end.
path
#;
(if (and (string=? path "")
host)
"/"
path))
;(uri-decode/maybe (get-str 8 1 0)) ;
;query
(let ([q (get-str 8 1 0)])
(if q (form-urlencoded->alist q) '()))
(uri-decode/maybe (get-str 9 1 0)) ; fragment
))
(url-error "Invalid URL string: ~e" str))))))
(define url-rx
(regexp (string-append
"^"
"[ \t\f\r\n]*"
"(?:" ; <A front-opt
"(?:([a-zA-Z]*):)?" ; =1 scheme-colon-opt
"(?:" ; <B slashslash-opt
"//"
"(?:([^:/@;?#]*)@)?" ; =2 user-at-opt
"([^:/@;?#]*)?" ; =3 host-opt
"(?::([0-9]*))?" ; =4 colon-port-opt
")?" ; >B slashslash-opt
")?" ; >A front-opt
"([^?#]*)" ; =5 path
"(?:\\?([^#]*))?" ; =6 question-query-opt
"(?:#(.*))?" ; =7 hash-fragment-opt
"[ \t\f\r\n]*"
"$")))
(define (string->url str)
(apply
(lambda (scheme user host port path query fragment)
(let* ([user (uri-decode/maybe user)]
[port (and port (string->number port))]
[abs? (and (not (= 0 (string-length path)))
(char=? #\/ (string-ref path 0)))]
[path (separate-path-strings
;; If path is "" and the input is an absolute URL
;; with a hostname, then the intended path is "/",
;; but the URL is missing a "/" at the end.
path
#;
(if (and (string=? path "") host) "/" path))]
[query (if query (form-urlencoded->alist query) '())]
[fragment (uri-decode/maybe fragment)])
(when (string? scheme) (string-lowercase! scheme))
(when (string? host) (string-lowercase! host))
(make-url scheme user host port abs? path query fragment)))
(cdr (or (regexp-match url-rx str)
(url-error "Invalid URL string: ~e" str)))))
(define (uri-decode/maybe f)
;; If #f, and leave unmolested any % that is followed by hex digit