more formatting, better regexp use

svn: r3096
This commit is contained in:
Eli Barzilay 2006-05-28 19:35:25 +00:00
parent 646c91cc10
commit 28822a155d
3 changed files with 122 additions and 163 deletions

View File

@ -78,8 +78,8 @@
(require (lib "unitsig.ss") (require (lib "unitsig.ss")
(lib "match.ss") (lib "match.ss")
(lib "string.ss") (lib "string.ss")
(lib "etc.ss")
(lib "list.ss") (lib "list.ss")
(lib "etc.ss")
"uri-codec-sig.ss") "uri-codec-sig.ss")
(provide uri-codec@) (provide uri-codec@)
@ -88,26 +88,19 @@
(unit/sig net:uri-codec^ (unit/sig net:uri-codec^
(import) (import)
(define (self-map-char ch) (cons ch ch))
(define (self-map-chars str) (map self-map-char (string->list str)))
;; The characters that always map to themselves ;; The characters that always map to themselves
(define alphanumeric-mapping (define alphanumeric-mapping
(map (lambda (char) (cons char char)) (self-map-chars
'(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"))
#\A #\B #\C #\D #\E #\F #\G #\H #\I #\J
#\K #\L #\M #\N #\O #\P #\Q #\R #\S #\T
#\U #\V #\W #\X #\Y #\Z #\a #\b #\c #\d
#\e #\f #\g #\h #\i #\j #\k #\l #\m #\n
#\o #\p #\q #\r #\s #\t #\u #\v #\w #\x
#\y #\z)))
;; Characters that sometimes map to themselves ;; Characters that sometimes map to themselves
(define safe-mapping (define safe-mapping (self-map-chars "-_.!~*'()"))
(map (lambda (char) (cons char char))
'(#\- #\_ #\. #\! #\~ #\* #\' #\( #\))))
;; The strict URI mapping ;; The strict URI mapping
(define uri-mapping (define uri-mapping (append alphanumeric-mapping safe-mapping))
(append alphanumeric-mapping
safe-mapping))
;; The uri path segment mapping from RFC 3986 ;; The uri path segment mapping from RFC 3986
(define uri-path-segment-mapping (define uri-path-segment-mapping
@ -117,19 +110,11 @@
;; The form-urlencoded mapping ;; The form-urlencoded mapping
(define form-urlencoded-mapping (define form-urlencoded-mapping
(append '((#\. . #\.) `(,@(self-map-chars ".-*_") (#\space . #\+) ,@alphanumeric-mapping))
(#\- . #\-)
(#\* . #\*)
(#\_ . #\_)
(#\space . #\+))
alphanumeric-mapping))
(define (number->hex-string number) (define (number->hex-string number)
(let ((hex (number->string number 16))) (define (hex n) (string-ref "0123456789ABCDEF" n))
(string-append "%" (string #\% (hex (quotient number 16)) (hex (modulo number 16))))
(if (= (string-length hex) 1)
(string-append "0" hex)
hex))))
(define (hex-string->number hex-string) (define (hex-string->number hex-string)
(string->number (substring hex-string 1 3) 16)) (string->number (substring hex-string 1 3) 16))
@ -138,8 +123,8 @@
;; (listof (cons char char)) -> (values (vectorof string) (vectorof string)) ;; (listof (cons char char)) -> (values (vectorof string) (vectorof string))
(define (make-codec-tables alist) (define (make-codec-tables alist)
(let ((encoding-table (build-vector ascii-size number->hex-string)) (let ([encoding-table (build-vector ascii-size number->hex-string)]
(decoding-table (build-vector ascii-size values))) [decoding-table (build-vector ascii-size values)])
(for-each (match-lambda (for-each (match-lambda
[(orig . enc) [(orig . enc)
(vector-set! encoding-table (vector-set! encoding-table
@ -228,15 +213,13 @@
;; listof (cons string string) -> string ;; listof (cons string string) -> string
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris ;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
;; listof (cons symbol string) -> string ;; listof (cons symbol string) -> string
(define alist->form-urlencoded (define (alist->form-urlencoded args)
(opt-lambda (args)
(let* ([mode (current-alist-separator-mode)] (let* ([mode (current-alist-separator-mode)]
[format-one [format-one
(lambda (arg) (lambda (arg)
(let* ([name (car arg)] (let* ([name (car arg)]
[value (cdr arg)]) [value (cdr arg)])
(string-append (string-append (form-urlencoded-encode (symbol->string name))
(form-urlencoded-encode (symbol->string name))
"=" "="
(form-urlencoded-encode value))))] (form-urlencoded-encode value))))]
[strs (let loop ([args args]) [strs (let loop ([args args])
@ -244,33 +227,29 @@
[(null? args) null] [(null? args) null]
[(null? (cdr args)) (list (format-one (car args)))] [(null? (cdr args)) (list (format-one (car args)))]
[else (list* (format-one (car args)) [else (list* (format-one (car args))
(if (eq? mode 'amp) (if (eq? mode 'amp) "&" ";")
"&"
";")
(loop (cdr args)))]))]) (loop (cdr args)))]))])
(apply string-append strs)))) (apply string-append strs)))
;; string -> listof (cons string string) ;; string -> listof (cons string string)
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris ;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
(define form-urlencoded->alist (define (form-urlencoded->alist str)
(opt-lambda (str)
(define key-regexp #rx"[^=]*") (define key-regexp #rx"[^=]*")
(define value-regexp (case (current-alist-separator-mode) (define value-regexp (case (current-alist-separator-mode)
[(semi) #rx"[^;]*"] [(semi) #rx"[^;]*"]
[(amp) #rx"[^&]*"] [(amp) #rx"[^&]*"]
[else #rx"[^&;]*"])) [else #rx"[^&;]*"]))
(define (next-key str start) (define (next-key str start)
(if (>= start (string-length str)) (and (< start (string-length str))
#f
(match (regexp-match-positions key-regexp str start) (match (regexp-match-positions key-regexp str start)
[((start . end)) [((start . end))
(vector (let ([s (form-urlencoded-decode (substring str start end))]) (vector (let ([s (form-urlencoded-decode
(substring str start end))])
(string->symbol s)) (string->symbol s))
(add1 end))] (add1 end))]
[#f #f]))) [#f #f])))
(define (next-value str start) (define (next-value str start)
(if (>= start (string-length str)) (and (< start (string-length str))
#f
(match (regexp-match-positions value-regexp str start) (match (regexp-match-positions value-regexp str start)
[((start . end)) [((start . end))
(vector (form-urlencoded-decode (substring str start end)) (vector (form-urlencoded-decode (substring str start end))
@ -288,16 +267,16 @@
(let loop ([start 0] (let loop ([start 0]
[end (string-length str)] [end (string-length str)]
[make-alist (lambda (x) x)]) [make-alist (lambda (x) x)])
(cond (if (>= start end)
[(>= start end) (make-alist '())] (make-alist '())
[else
(match (next-pair str start) (match (next-pair str start)
[#(pair next-start) [#(pair next-start)
(loop next-start end (lambda (x) (make-alist (cons pair x))))] (loop next-start end (lambda (x) (make-alist (cons pair x))))]
[#f (make-alist '())])])))) [#f (make-alist '())]))))
(define current-alist-separator-mode (define current-alist-separator-mode
(make-parameter 'amp-or-semi (lambda (s) (make-parameter 'amp-or-semi
(lambda (s)
(unless (memq s '(amp semi amp-or-semi)) (unless (memq s '(amp semi amp-or-semi))
(raise-type-error 'current-alist-separator-mode (raise-type-error 'current-alist-separator-mode
"'amp, 'semi, or 'amp-or-semi" "'amp, 'semi, or 'amp-or-semi"

View File

@ -352,65 +352,45 @@
;; string->url : str -> url ;; string->url : str -> url
;; New implementation, mostly provided by Neil Van Dyke ;; New implementation, mostly provided by Neil Van Dyke
(define string->url (define url-rx
(let ((rx (regexp (string-append (regexp (string-append
"^" "^"
"[ \t\f\r\n]*" "[ \t\f\r\n]*"
"(" ; <1 front-opt "(?:" ; <A front-opt
"([a-zA-Z]*:)?" ; =2 scheme-colon-opt "(?:([a-zA-Z]*):)?" ; =1 scheme-colon-opt
"(" ; <3 slashslash-opt "(?:" ; <B slashslash-opt
"//" "//"
"([^:/@;?#]*@)?" ; =4 user-at-opt "(?:([^:/@;?#]*)@)?" ; =2 user-at-opt
"([^:/@;?#]*)?" ; =5 host-opt "([^:/@;?#]*)?" ; =3 host-opt
"(:[0-9]*)?" ; =6 colon-port-opt "(?::([0-9]*))?" ; =4 colon-port-opt
")?" ; >3 slashslash-opt ")?" ; >B slashslash-opt
")?" ; >1 front-opt ")?" ; >A front-opt
"([^?#]*)" ; =7 path "([^?#]*)" ; =5 path
"(\\?[^#]*)?" ; =8 question-query-opt "(?:\\?([^#]*))?" ; =6 question-query-opt
"(#.*)?" ; =9 hash-fragment-opt "(?:#(.*))?" ; =7 hash-fragment-opt
"[ \t\f\r\n]*" "[ \t\f\r\n]*"
"$")))) "$")))
(lambda (str) (define (string->url str)
(let ((match (regexp-match-positions rx str))) (apply
(if match (lambda (scheme user host port path query fragment)
(let* ((get-str (lambda (pos skip-left skip-right) (let* ([user (uri-decode/maybe user)]
(let ((pair (list-ref match pos))) [port (and port (string->number port))]
(if pair [abs? (and (not (= 0 (string-length path)))
(substring str (char=? #\/ (string-ref path 0)))]
(+ (car pair) skip-left) [path (separate-path-strings
(- (cdr pair) skip-right))
#f))))
(get-num (lambda (pos skip-left skip-right)
(let ((s (get-str pos skip-left skip-right)))
(if s (string->number s) #f))))
(host (get-str 5 0 0))
(path (get-str 7 0 0))
(scheme (get-str 2 0 1)))
(when (string? scheme) (string-lowercase! scheme))
(when (string? host) (string-lowercase! host))
(make-url scheme
(uri-decode/maybe (get-str 4 0 1)) ; user
host
(get-num 6 1 0) ; port
(and (not (= 0 (string-length path)))
(char=? #\/ (string-ref path 0)))
(separate-path-strings
;; If path is "" and the input is an absolute URL ;; If path is "" and the input is an absolute URL
;; with a hostname, then the intended path is "/", ;; with a hostname, then the intended path is "/",
;; but the URL is missing a "/" at the end. ;; but the URL is missing a "/" at the end.
path path
#; #;
(if (and (string=? path "") (if (and (string=? path "") host) "/" path))]
host) [query (if query (form-urlencoded->alist query) '())]
"/" [fragment (uri-decode/maybe fragment)])
path)) (when (string? scheme) (string-lowercase! scheme))
;(uri-decode/maybe (get-str 8 1 0)) ; (when (string? host) (string-lowercase! host))
;query (make-url scheme user host port abs? path query fragment)))
(let ([q (get-str 8 1 0)]) (cdr (or (regexp-match url-rx str)
(if q (form-urlencoded->alist q) '())) (url-error "Invalid URL string: ~e" str)))))
(uri-decode/maybe (get-str 9 1 0)) ; fragment
))
(url-error "Invalid URL string: ~e" str))))))
(define (uri-decode/maybe f) (define (uri-decode/maybe f)
;; If #f, and leave unmolested any % that is followed by hex digit ;; If #f, and leave unmolested any % that is followed by hex digit