more formatting, better regexp use

svn: r3096
This commit is contained in:
Eli Barzilay 2006-05-28 19:35:25 +00:00
parent 646c91cc10
commit 28822a155d
3 changed files with 122 additions and 163 deletions

View File

@ -78,8 +78,8 @@
(require (lib "unitsig.ss") (require (lib "unitsig.ss")
(lib "match.ss") (lib "match.ss")
(lib "string.ss") (lib "string.ss")
(lib "etc.ss")
(lib "list.ss") (lib "list.ss")
(lib "etc.ss")
"uri-codec-sig.ss") "uri-codec-sig.ss")
(provide uri-codec@) (provide uri-codec@)
@ -88,27 +88,20 @@
(unit/sig net:uri-codec^ (unit/sig net:uri-codec^
(import) (import)
(define (self-map-char ch) (cons ch ch))
(define (self-map-chars str) (map self-map-char (string->list str)))
;; The characters that always map to themselves ;; The characters that always map to themselves
(define alphanumeric-mapping (define alphanumeric-mapping
(map (lambda (char) (cons char char)) (self-map-chars
'(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"))
#\A #\B #\C #\D #\E #\F #\G #\H #\I #\J
#\K #\L #\M #\N #\O #\P #\Q #\R #\S #\T
#\U #\V #\W #\X #\Y #\Z #\a #\b #\c #\d
#\e #\f #\g #\h #\i #\j #\k #\l #\m #\n
#\o #\p #\q #\r #\s #\t #\u #\v #\w #\x
#\y #\z)))
;; Characters that sometimes map to themselves ;; Characters that sometimes map to themselves
(define safe-mapping (define safe-mapping (self-map-chars "-_.!~*'()"))
(map (lambda (char) (cons char char))
'(#\- #\_ #\. #\! #\~ #\* #\' #\( #\))))
;; The strict URI mapping ;; The strict URI mapping
(define uri-mapping (define uri-mapping (append alphanumeric-mapping safe-mapping))
(append alphanumeric-mapping
safe-mapping))
;; The uri path segment mapping from RFC 3986 ;; The uri path segment mapping from RFC 3986
(define uri-path-segment-mapping (define uri-path-segment-mapping
(append alphanumeric-mapping (append alphanumeric-mapping
@ -117,29 +110,21 @@
;; The form-urlencoded mapping ;; The form-urlencoded mapping
(define form-urlencoded-mapping (define form-urlencoded-mapping
(append '((#\. . #\.) `(,@(self-map-chars ".-*_") (#\space . #\+) ,@alphanumeric-mapping))
(#\- . #\-)
(#\* . #\*)
(#\_ . #\_)
(#\space . #\+))
alphanumeric-mapping))
(define (number->hex-string number) (define (number->hex-string number)
(let ((hex (number->string number 16))) (define (hex n) (string-ref "0123456789ABCDEF" n))
(string-append "%" (string #\% (hex (quotient number 16)) (hex (modulo number 16))))
(if (= (string-length hex) 1)
(string-append "0" hex)
hex))))
(define (hex-string->number hex-string) (define (hex-string->number hex-string)
(string->number (substring hex-string 1 3) 16)) (string->number (substring hex-string 1 3) 16))
(define ascii-size 128) (define ascii-size 128)
;; (listof (cons char char)) -> (values (vectorof string) (vectorof string)) ;; (listof (cons char char)) -> (values (vectorof string) (vectorof string))
(define (make-codec-tables alist) (define (make-codec-tables alist)
(let ((encoding-table (build-vector ascii-size number->hex-string)) (let ([encoding-table (build-vector ascii-size number->hex-string)]
(decoding-table (build-vector ascii-size values))) [decoding-table (build-vector ascii-size values)])
(for-each (match-lambda (for-each (match-lambda
[(orig . enc) [(orig . enc)
(vector-set! encoding-table (vector-set! encoding-table
@ -153,7 +138,7 @@
(define-values (uri-encoding-vector uri-decoding-vector) (define-values (uri-encoding-vector uri-decoding-vector)
(make-codec-tables uri-mapping)) (make-codec-tables uri-mapping))
(define-values (uri-path-segment-encoding-vector (define-values (uri-path-segment-encoding-vector
uri-path-segment-decoding-vector) uri-path-segment-decoding-vector)
(make-codec-tables uri-path-segment-mapping)) (make-codec-tables uri-path-segment-mapping))
@ -216,7 +201,7 @@
;; string -> string ;; string -> string
(define (uri-path-segment-decode str) (define (uri-path-segment-decode str)
(decode uri-path-segment-decoding-vector str)) (decode uri-path-segment-decoding-vector str))
;; string -> string ;; string -> string
(define (form-urlencoded-encode str) (define (form-urlencoded-encode str)
(encode form-urlencoded-encoding-vector str)) (encode form-urlencoded-encoding-vector str))
@ -228,80 +213,74 @@
;; listof (cons string string) -> string ;; listof (cons string string) -> string
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris ;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
;; listof (cons symbol string) -> string ;; listof (cons symbol string) -> string
(define alist->form-urlencoded (define (alist->form-urlencoded args)
(opt-lambda (args) (let* ([mode (current-alist-separator-mode)]
(let* ([mode (current-alist-separator-mode)] [format-one
[format-one (lambda (arg)
(lambda (arg) (let* ([name (car arg)]
(let* ([name (car arg)] [value (cdr arg)])
[value (cdr arg)]) (string-append (form-urlencoded-encode (symbol->string name))
(string-append "="
(form-urlencoded-encode (symbol->string name)) (form-urlencoded-encode value))))]
"=" [strs (let loop ([args args])
(form-urlencoded-encode value))))] (cond
[strs (let loop ([args args]) [(null? args) null]
(cond [(null? (cdr args)) (list (format-one (car args)))]
[(null? args) null] [else (list* (format-one (car args))
[(null? (cdr args)) (list (format-one (car args)))] (if (eq? mode 'amp) "&" ";")
[else (list* (format-one (car args)) (loop (cdr args)))]))])
(if (eq? mode 'amp) (apply string-append strs)))
"&"
";")
(loop (cdr args)))]))])
(apply string-append strs))))
;; string -> listof (cons string string) ;; string -> listof (cons string string)
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris ;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
(define form-urlencoded->alist (define (form-urlencoded->alist str)
(opt-lambda (str) (define key-regexp #rx"[^=]*")
(define key-regexp #rx"[^=]*") (define value-regexp (case (current-alist-separator-mode)
(define value-regexp (case (current-alist-separator-mode) [(semi) #rx"[^;]*"]
[(semi) #rx"[^;]*"] [(amp) #rx"[^&]*"]
[(amp) #rx"[^&]*"] [else #rx"[^&;]*"]))
[else #rx"[^&;]*"])) (define (next-key str start)
(define (next-key str start) (and (< start (string-length str))
(if (>= start (string-length str)) (match (regexp-match-positions key-regexp str start)
#f [((start . end))
(match (regexp-match-positions key-regexp str start) (vector (let ([s (form-urlencoded-decode
[((start . end)) (substring str start end))])
(vector (let ([s (form-urlencoded-decode (substring str start end))]) (string->symbol s))
(string->symbol s)) (add1 end))]
(add1 end))] [#f #f])))
[#f #f]))) (define (next-value str start)
(define (next-value str start) (and (< start (string-length str))
(if (>= start (string-length str)) (match (regexp-match-positions value-regexp str start)
#f [((start . end))
(match (regexp-match-positions value-regexp str start) (vector (form-urlencoded-decode (substring str start end))
[((start . end)) (add1 end))]
(vector (form-urlencoded-decode (substring str start end)) [#f #f])))
(add1 end))] (define (next-pair str start)
[#f #f]))) (match (next-key str start)
(define (next-pair str start) [#(key start)
(match (next-key str start) (match (next-value str start)
[#(key start) [#(value start)
(match (next-value str start) (vector (cons key value) start)]
[#(value start) [#f
(vector (cons key value) start)] (vector (cons key "") (string-length str))])]
[#f [#f #f]))
(vector (cons key "") (string-length str))])] (let loop ([start 0]
[#f #f])) [end (string-length str)]
(let loop ([start 0] [make-alist (lambda (x) x)])
[end (string-length str)] (if (>= start end)
[make-alist (lambda (x) x)]) (make-alist '())
(cond (match (next-pair str start)
[(>= start end) (make-alist '())] [#(pair next-start)
[else (loop next-start end (lambda (x) (make-alist (cons pair x))))]
(match (next-pair str start) [#f (make-alist '())]))))
[#(pair next-start)
(loop next-start end (lambda (x) (make-alist (cons pair x))))]
[#f (make-alist '())])]))))
(define current-alist-separator-mode (define current-alist-separator-mode
(make-parameter 'amp-or-semi (lambda (s) (make-parameter 'amp-or-semi
(unless (memq s '(amp semi amp-or-semi)) (lambda (s)
(raise-type-error 'current-alist-separator-mode (unless (memq s '(amp semi amp-or-semi))
"'amp, 'semi, or 'amp-or-semi" (raise-type-error 'current-alist-separator-mode
s)) "'amp, 'semi, or 'amp-or-semi"
s)))))) s))
s))))))
;;; uri-codec-unit.ss ends here ;;; uri-codec-unit.ss ends here

View File

@ -1,6 +1,6 @@
(module url-structs mzscheme (module url-structs mzscheme
(require (lib "contract.ss")) (require (lib "contract.ss"))
(define-struct url (scheme user host port path-absolute? path query fragment)) (define-struct url (scheme user host port path-absolute? path query fragment))
(define-struct path/param (path param)) (define-struct path/param (path param))

View File

@ -352,65 +352,45 @@
;; string->url : str -> url ;; string->url : str -> url
;; New implementation, mostly provided by Neil Van Dyke ;; New implementation, mostly provided by Neil Van Dyke
(define string->url (define url-rx
(let ((rx (regexp (string-append (regexp (string-append
"^" "^"
"[ \t\f\r\n]*" "[ \t\f\r\n]*"
"(" ; <1 front-opt "(?:" ; <A front-opt
"([a-zA-Z]*:)?" ; =2 scheme-colon-opt "(?:([a-zA-Z]*):)?" ; =1 scheme-colon-opt
"(" ; <3 slashslash-opt "(?:" ; <B slashslash-opt
"//" "//"
"([^:/@;?#]*@)?" ; =4 user-at-opt "(?:([^:/@;?#]*)@)?" ; =2 user-at-opt
"([^:/@;?#]*)?" ; =5 host-opt "([^:/@;?#]*)?" ; =3 host-opt
"(:[0-9]*)?" ; =6 colon-port-opt "(?::([0-9]*))?" ; =4 colon-port-opt
")?" ; >3 slashslash-opt ")?" ; >B slashslash-opt
")?" ; >1 front-opt ")?" ; >A front-opt
"([^?#]*)" ; =7 path "([^?#]*)" ; =5 path
"(\\?[^#]*)?" ; =8 question-query-opt "(?:\\?([^#]*))?" ; =6 question-query-opt
"(#.*)?" ; =9 hash-fragment-opt "(?:#(.*))?" ; =7 hash-fragment-opt
"[ \t\f\r\n]*" "[ \t\f\r\n]*"
"$")))) "$")))
(lambda (str) (define (string->url str)
(let ((match (regexp-match-positions rx str))) (apply
(if match (lambda (scheme user host port path query fragment)
(let* ((get-str (lambda (pos skip-left skip-right) (let* ([user (uri-decode/maybe user)]
(let ((pair (list-ref match pos))) [port (and port (string->number port))]
(if pair [abs? (and (not (= 0 (string-length path)))
(substring str (char=? #\/ (string-ref path 0)))]
(+ (car pair) skip-left) [path (separate-path-strings
(- (cdr pair) skip-right)) ;; If path is "" and the input is an absolute URL
#f)))) ;; with a hostname, then the intended path is "/",
(get-num (lambda (pos skip-left skip-right) ;; but the URL is missing a "/" at the end.
(let ((s (get-str pos skip-left skip-right))) path
(if s (string->number s) #f)))) #;
(host (get-str 5 0 0)) (if (and (string=? path "") host) "/" path))]
(path (get-str 7 0 0)) [query (if query (form-urlencoded->alist query) '())]
(scheme (get-str 2 0 1))) [fragment (uri-decode/maybe fragment)])
(when (string? scheme) (string-lowercase! scheme)) (when (string? scheme) (string-lowercase! scheme))
(when (string? host) (string-lowercase! host)) (when (string? host) (string-lowercase! host))
(make-url scheme (make-url scheme user host port abs? path query fragment)))
(uri-decode/maybe (get-str 4 0 1)) ; user (cdr (or (regexp-match url-rx str)
host (url-error "Invalid URL string: ~e" str)))))
(get-num 6 1 0) ; port
(and (not (= 0 (string-length path)))
(char=? #\/ (string-ref path 0)))
(separate-path-strings
;; If path is "" and the input is an absolute URL
;; with a hostname, then the intended path is "/",
;; but the URL is missing a "/" at the end.
path
#;
(if (and (string=? path "")
host)
"/"
path))
;(uri-decode/maybe (get-str 8 1 0)) ;
;query
(let ([q (get-str 8 1 0)])
(if q (form-urlencoded->alist q) '()))
(uri-decode/maybe (get-str 9 1 0)) ; fragment
))
(url-error "Invalid URL string: ~e" str))))))
(define (uri-decode/maybe f) (define (uri-decode/maybe f)
;; If #f, and leave unmolested any % that is followed by hex digit ;; If #f, and leave unmolested any % that is followed by hex digit