more formatting, better regexp use
svn: r3096
This commit is contained in:
parent
646c91cc10
commit
28822a155d
|
@ -78,8 +78,8 @@
|
|||
(require (lib "unitsig.ss")
|
||||
(lib "match.ss")
|
||||
(lib "string.ss")
|
||||
(lib "etc.ss")
|
||||
(lib "list.ss")
|
||||
(lib "etc.ss")
|
||||
"uri-codec-sig.ss")
|
||||
|
||||
(provide uri-codec@)
|
||||
|
@ -88,26 +88,19 @@
|
|||
(unit/sig net:uri-codec^
|
||||
(import)
|
||||
|
||||
(define (self-map-char ch) (cons ch ch))
|
||||
(define (self-map-chars str) (map self-map-char (string->list str)))
|
||||
|
||||
;; The characters that always map to themselves
|
||||
(define alphanumeric-mapping
|
||||
(map (lambda (char) (cons char char))
|
||||
'(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
|
||||
#\A #\B #\C #\D #\E #\F #\G #\H #\I #\J
|
||||
#\K #\L #\M #\N #\O #\P #\Q #\R #\S #\T
|
||||
#\U #\V #\W #\X #\Y #\Z #\a #\b #\c #\d
|
||||
#\e #\f #\g #\h #\i #\j #\k #\l #\m #\n
|
||||
#\o #\p #\q #\r #\s #\t #\u #\v #\w #\x
|
||||
#\y #\z)))
|
||||
(self-map-chars
|
||||
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"))
|
||||
|
||||
;; Characters that sometimes map to themselves
|
||||
(define safe-mapping
|
||||
(map (lambda (char) (cons char char))
|
||||
'(#\- #\_ #\. #\! #\~ #\* #\' #\( #\))))
|
||||
(define safe-mapping (self-map-chars "-_.!~*'()"))
|
||||
|
||||
;; The strict URI mapping
|
||||
(define uri-mapping
|
||||
(append alphanumeric-mapping
|
||||
safe-mapping))
|
||||
(define uri-mapping (append alphanumeric-mapping safe-mapping))
|
||||
|
||||
;; The uri path segment mapping from RFC 3986
|
||||
(define uri-path-segment-mapping
|
||||
|
@ -117,19 +110,11 @@
|
|||
|
||||
;; The form-urlencoded mapping
|
||||
(define form-urlencoded-mapping
|
||||
(append '((#\. . #\.)
|
||||
(#\- . #\-)
|
||||
(#\* . #\*)
|
||||
(#\_ . #\_)
|
||||
(#\space . #\+))
|
||||
alphanumeric-mapping))
|
||||
`(,@(self-map-chars ".-*_") (#\space . #\+) ,@alphanumeric-mapping))
|
||||
|
||||
(define (number->hex-string number)
|
||||
(let ((hex (number->string number 16)))
|
||||
(string-append "%"
|
||||
(if (= (string-length hex) 1)
|
||||
(string-append "0" hex)
|
||||
hex))))
|
||||
(define (hex n) (string-ref "0123456789ABCDEF" n))
|
||||
(string #\% (hex (quotient number 16)) (hex (modulo number 16))))
|
||||
|
||||
(define (hex-string->number hex-string)
|
||||
(string->number (substring hex-string 1 3) 16))
|
||||
|
@ -138,8 +123,8 @@
|
|||
|
||||
;; (listof (cons char char)) -> (values (vectorof string) (vectorof string))
|
||||
(define (make-codec-tables alist)
|
||||
(let ((encoding-table (build-vector ascii-size number->hex-string))
|
||||
(decoding-table (build-vector ascii-size values)))
|
||||
(let ([encoding-table (build-vector ascii-size number->hex-string)]
|
||||
[decoding-table (build-vector ascii-size values)])
|
||||
(for-each (match-lambda
|
||||
[(orig . enc)
|
||||
(vector-set! encoding-table
|
||||
|
@ -228,15 +213,13 @@
|
|||
;; listof (cons string string) -> string
|
||||
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
|
||||
;; listof (cons symbol string) -> string
|
||||
(define alist->form-urlencoded
|
||||
(opt-lambda (args)
|
||||
(define (alist->form-urlencoded args)
|
||||
(let* ([mode (current-alist-separator-mode)]
|
||||
[format-one
|
||||
(lambda (arg)
|
||||
(let* ([name (car arg)]
|
||||
[value (cdr arg)])
|
||||
(string-append
|
||||
(form-urlencoded-encode (symbol->string name))
|
||||
(string-append (form-urlencoded-encode (symbol->string name))
|
||||
"="
|
||||
(form-urlencoded-encode value))))]
|
||||
[strs (let loop ([args args])
|
||||
|
@ -244,33 +227,29 @@
|
|||
[(null? args) null]
|
||||
[(null? (cdr args)) (list (format-one (car args)))]
|
||||
[else (list* (format-one (car args))
|
||||
(if (eq? mode 'amp)
|
||||
"&"
|
||||
";")
|
||||
(if (eq? mode 'amp) "&" ";")
|
||||
(loop (cdr args)))]))])
|
||||
(apply string-append strs))))
|
||||
(apply string-append strs)))
|
||||
|
||||
;; string -> listof (cons string string)
|
||||
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
|
||||
(define form-urlencoded->alist
|
||||
(opt-lambda (str)
|
||||
(define (form-urlencoded->alist str)
|
||||
(define key-regexp #rx"[^=]*")
|
||||
(define value-regexp (case (current-alist-separator-mode)
|
||||
[(semi) #rx"[^;]*"]
|
||||
[(amp) #rx"[^&]*"]
|
||||
[else #rx"[^&;]*"]))
|
||||
(define (next-key str start)
|
||||
(if (>= start (string-length str))
|
||||
#f
|
||||
(and (< start (string-length str))
|
||||
(match (regexp-match-positions key-regexp str start)
|
||||
[((start . end))
|
||||
(vector (let ([s (form-urlencoded-decode (substring str start end))])
|
||||
(vector (let ([s (form-urlencoded-decode
|
||||
(substring str start end))])
|
||||
(string->symbol s))
|
||||
(add1 end))]
|
||||
[#f #f])))
|
||||
(define (next-value str start)
|
||||
(if (>= start (string-length str))
|
||||
#f
|
||||
(and (< start (string-length str))
|
||||
(match (regexp-match-positions value-regexp str start)
|
||||
[((start . end))
|
||||
(vector (form-urlencoded-decode (substring str start end))
|
||||
|
@ -288,16 +267,16 @@
|
|||
(let loop ([start 0]
|
||||
[end (string-length str)]
|
||||
[make-alist (lambda (x) x)])
|
||||
(cond
|
||||
[(>= start end) (make-alist '())]
|
||||
[else
|
||||
(if (>= start end)
|
||||
(make-alist '())
|
||||
(match (next-pair str start)
|
||||
[#(pair next-start)
|
||||
(loop next-start end (lambda (x) (make-alist (cons pair x))))]
|
||||
[#f (make-alist '())])]))))
|
||||
[#f (make-alist '())]))))
|
||||
|
||||
(define current-alist-separator-mode
|
||||
(make-parameter 'amp-or-semi (lambda (s)
|
||||
(make-parameter 'amp-or-semi
|
||||
(lambda (s)
|
||||
(unless (memq s '(amp semi amp-or-semi))
|
||||
(raise-type-error 'current-alist-separator-mode
|
||||
"'amp, 'semi, or 'amp-or-semi"
|
||||
|
|
|
@ -352,65 +352,45 @@
|
|||
|
||||
;; string->url : str -> url
|
||||
;; New implementation, mostly provided by Neil Van Dyke
|
||||
(define string->url
|
||||
(let ((rx (regexp (string-append
|
||||
(define url-rx
|
||||
(regexp (string-append
|
||||
"^"
|
||||
"[ \t\f\r\n]*"
|
||||
"(" ; <1 front-opt
|
||||
"([a-zA-Z]*:)?" ; =2 scheme-colon-opt
|
||||
"(" ; <3 slashslash-opt
|
||||
"(?:" ; <A front-opt
|
||||
"(?:([a-zA-Z]*):)?" ; =1 scheme-colon-opt
|
||||
"(?:" ; <B slashslash-opt
|
||||
"//"
|
||||
"([^:/@;?#]*@)?" ; =4 user-at-opt
|
||||
"([^:/@;?#]*)?" ; =5 host-opt
|
||||
"(:[0-9]*)?" ; =6 colon-port-opt
|
||||
")?" ; >3 slashslash-opt
|
||||
")?" ; >1 front-opt
|
||||
"([^?#]*)" ; =7 path
|
||||
"(\\?[^#]*)?" ; =8 question-query-opt
|
||||
"(#.*)?" ; =9 hash-fragment-opt
|
||||
"(?:([^:/@;?#]*)@)?" ; =2 user-at-opt
|
||||
"([^:/@;?#]*)?" ; =3 host-opt
|
||||
"(?::([0-9]*))?" ; =4 colon-port-opt
|
||||
")?" ; >B slashslash-opt
|
||||
")?" ; >A front-opt
|
||||
"([^?#]*)" ; =5 path
|
||||
"(?:\\?([^#]*))?" ; =6 question-query-opt
|
||||
"(?:#(.*))?" ; =7 hash-fragment-opt
|
||||
"[ \t\f\r\n]*"
|
||||
"$"))))
|
||||
(lambda (str)
|
||||
(let ((match (regexp-match-positions rx str)))
|
||||
(if match
|
||||
(let* ((get-str (lambda (pos skip-left skip-right)
|
||||
(let ((pair (list-ref match pos)))
|
||||
(if pair
|
||||
(substring str
|
||||
(+ (car pair) skip-left)
|
||||
(- (cdr pair) skip-right))
|
||||
#f))))
|
||||
(get-num (lambda (pos skip-left skip-right)
|
||||
(let ((s (get-str pos skip-left skip-right)))
|
||||
(if s (string->number s) #f))))
|
||||
(host (get-str 5 0 0))
|
||||
(path (get-str 7 0 0))
|
||||
(scheme (get-str 2 0 1)))
|
||||
(when (string? scheme) (string-lowercase! scheme))
|
||||
(when (string? host) (string-lowercase! host))
|
||||
(make-url scheme
|
||||
(uri-decode/maybe (get-str 4 0 1)) ; user
|
||||
host
|
||||
(get-num 6 1 0) ; port
|
||||
(and (not (= 0 (string-length path)))
|
||||
(char=? #\/ (string-ref path 0)))
|
||||
(separate-path-strings
|
||||
"$")))
|
||||
(define (string->url str)
|
||||
(apply
|
||||
(lambda (scheme user host port path query fragment)
|
||||
(let* ([user (uri-decode/maybe user)]
|
||||
[port (and port (string->number port))]
|
||||
[abs? (and (not (= 0 (string-length path)))
|
||||
(char=? #\/ (string-ref path 0)))]
|
||||
[path (separate-path-strings
|
||||
;; If path is "" and the input is an absolute URL
|
||||
;; with a hostname, then the intended path is "/",
|
||||
;; but the URL is missing a "/" at the end.
|
||||
path
|
||||
#;
|
||||
(if (and (string=? path "")
|
||||
host)
|
||||
"/"
|
||||
path))
|
||||
;(uri-decode/maybe (get-str 8 1 0)) ;
|
||||
;query
|
||||
(let ([q (get-str 8 1 0)])
|
||||
(if q (form-urlencoded->alist q) '()))
|
||||
(uri-decode/maybe (get-str 9 1 0)) ; fragment
|
||||
))
|
||||
(url-error "Invalid URL string: ~e" str))))))
|
||||
(if (and (string=? path "") host) "/" path))]
|
||||
[query (if query (form-urlencoded->alist query) '())]
|
||||
[fragment (uri-decode/maybe fragment)])
|
||||
(when (string? scheme) (string-lowercase! scheme))
|
||||
(when (string? host) (string-lowercase! host))
|
||||
(make-url scheme user host port abs? path query fragment)))
|
||||
(cdr (or (regexp-match url-rx str)
|
||||
(url-error "Invalid URL string: ~e" str)))))
|
||||
|
||||
(define (uri-decode/maybe f)
|
||||
;; If #f, and leave unmolested any % that is followed by hex digit
|
||||
|
|
Loading…
Reference in New Issue
Block a user