more formatting, better regexp use
svn: r3096
This commit is contained in:
parent
646c91cc10
commit
28822a155d
|
@ -78,8 +78,8 @@
|
||||||
(require (lib "unitsig.ss")
|
(require (lib "unitsig.ss")
|
||||||
(lib "match.ss")
|
(lib "match.ss")
|
||||||
(lib "string.ss")
|
(lib "string.ss")
|
||||||
(lib "etc.ss")
|
|
||||||
(lib "list.ss")
|
(lib "list.ss")
|
||||||
|
(lib "etc.ss")
|
||||||
"uri-codec-sig.ss")
|
"uri-codec-sig.ss")
|
||||||
|
|
||||||
(provide uri-codec@)
|
(provide uri-codec@)
|
||||||
|
@ -88,27 +88,20 @@
|
||||||
(unit/sig net:uri-codec^
|
(unit/sig net:uri-codec^
|
||||||
(import)
|
(import)
|
||||||
|
|
||||||
|
(define (self-map-char ch) (cons ch ch))
|
||||||
|
(define (self-map-chars str) (map self-map-char (string->list str)))
|
||||||
|
|
||||||
;; The characters that always map to themselves
|
;; The characters that always map to themselves
|
||||||
(define alphanumeric-mapping
|
(define alphanumeric-mapping
|
||||||
(map (lambda (char) (cons char char))
|
(self-map-chars
|
||||||
'(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
|
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"))
|
||||||
#\A #\B #\C #\D #\E #\F #\G #\H #\I #\J
|
|
||||||
#\K #\L #\M #\N #\O #\P #\Q #\R #\S #\T
|
|
||||||
#\U #\V #\W #\X #\Y #\Z #\a #\b #\c #\d
|
|
||||||
#\e #\f #\g #\h #\i #\j #\k #\l #\m #\n
|
|
||||||
#\o #\p #\q #\r #\s #\t #\u #\v #\w #\x
|
|
||||||
#\y #\z)))
|
|
||||||
|
|
||||||
;; Characters that sometimes map to themselves
|
;; Characters that sometimes map to themselves
|
||||||
(define safe-mapping
|
(define safe-mapping (self-map-chars "-_.!~*'()"))
|
||||||
(map (lambda (char) (cons char char))
|
|
||||||
'(#\- #\_ #\. #\! #\~ #\* #\' #\( #\))))
|
|
||||||
|
|
||||||
;; The strict URI mapping
|
;; The strict URI mapping
|
||||||
(define uri-mapping
|
(define uri-mapping (append alphanumeric-mapping safe-mapping))
|
||||||
(append alphanumeric-mapping
|
|
||||||
safe-mapping))
|
|
||||||
|
|
||||||
;; The uri path segment mapping from RFC 3986
|
;; The uri path segment mapping from RFC 3986
|
||||||
(define uri-path-segment-mapping
|
(define uri-path-segment-mapping
|
||||||
(append alphanumeric-mapping
|
(append alphanumeric-mapping
|
||||||
|
@ -117,29 +110,21 @@
|
||||||
|
|
||||||
;; The form-urlencoded mapping
|
;; The form-urlencoded mapping
|
||||||
(define form-urlencoded-mapping
|
(define form-urlencoded-mapping
|
||||||
(append '((#\. . #\.)
|
`(,@(self-map-chars ".-*_") (#\space . #\+) ,@alphanumeric-mapping))
|
||||||
(#\- . #\-)
|
|
||||||
(#\* . #\*)
|
|
||||||
(#\_ . #\_)
|
|
||||||
(#\space . #\+))
|
|
||||||
alphanumeric-mapping))
|
|
||||||
|
|
||||||
(define (number->hex-string number)
|
(define (number->hex-string number)
|
||||||
(let ((hex (number->string number 16)))
|
(define (hex n) (string-ref "0123456789ABCDEF" n))
|
||||||
(string-append "%"
|
(string #\% (hex (quotient number 16)) (hex (modulo number 16))))
|
||||||
(if (= (string-length hex) 1)
|
|
||||||
(string-append "0" hex)
|
|
||||||
hex))))
|
|
||||||
|
|
||||||
(define (hex-string->number hex-string)
|
(define (hex-string->number hex-string)
|
||||||
(string->number (substring hex-string 1 3) 16))
|
(string->number (substring hex-string 1 3) 16))
|
||||||
|
|
||||||
(define ascii-size 128)
|
(define ascii-size 128)
|
||||||
|
|
||||||
;; (listof (cons char char)) -> (values (vectorof string) (vectorof string))
|
;; (listof (cons char char)) -> (values (vectorof string) (vectorof string))
|
||||||
(define (make-codec-tables alist)
|
(define (make-codec-tables alist)
|
||||||
(let ((encoding-table (build-vector ascii-size number->hex-string))
|
(let ([encoding-table (build-vector ascii-size number->hex-string)]
|
||||||
(decoding-table (build-vector ascii-size values)))
|
[decoding-table (build-vector ascii-size values)])
|
||||||
(for-each (match-lambda
|
(for-each (match-lambda
|
||||||
[(orig . enc)
|
[(orig . enc)
|
||||||
(vector-set! encoding-table
|
(vector-set! encoding-table
|
||||||
|
@ -153,7 +138,7 @@
|
||||||
|
|
||||||
(define-values (uri-encoding-vector uri-decoding-vector)
|
(define-values (uri-encoding-vector uri-decoding-vector)
|
||||||
(make-codec-tables uri-mapping))
|
(make-codec-tables uri-mapping))
|
||||||
|
|
||||||
(define-values (uri-path-segment-encoding-vector
|
(define-values (uri-path-segment-encoding-vector
|
||||||
uri-path-segment-decoding-vector)
|
uri-path-segment-decoding-vector)
|
||||||
(make-codec-tables uri-path-segment-mapping))
|
(make-codec-tables uri-path-segment-mapping))
|
||||||
|
@ -216,7 +201,7 @@
|
||||||
;; string -> string
|
;; string -> string
|
||||||
(define (uri-path-segment-decode str)
|
(define (uri-path-segment-decode str)
|
||||||
(decode uri-path-segment-decoding-vector str))
|
(decode uri-path-segment-decoding-vector str))
|
||||||
|
|
||||||
;; string -> string
|
;; string -> string
|
||||||
(define (form-urlencoded-encode str)
|
(define (form-urlencoded-encode str)
|
||||||
(encode form-urlencoded-encoding-vector str))
|
(encode form-urlencoded-encoding-vector str))
|
||||||
|
@ -228,80 +213,74 @@
|
||||||
;; listof (cons string string) -> string
|
;; listof (cons string string) -> string
|
||||||
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
|
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
|
||||||
;; listof (cons symbol string) -> string
|
;; listof (cons symbol string) -> string
|
||||||
(define alist->form-urlencoded
|
(define (alist->form-urlencoded args)
|
||||||
(opt-lambda (args)
|
(let* ([mode (current-alist-separator-mode)]
|
||||||
(let* ([mode (current-alist-separator-mode)]
|
[format-one
|
||||||
[format-one
|
(lambda (arg)
|
||||||
(lambda (arg)
|
(let* ([name (car arg)]
|
||||||
(let* ([name (car arg)]
|
[value (cdr arg)])
|
||||||
[value (cdr arg)])
|
(string-append (form-urlencoded-encode (symbol->string name))
|
||||||
(string-append
|
"="
|
||||||
(form-urlencoded-encode (symbol->string name))
|
(form-urlencoded-encode value))))]
|
||||||
"="
|
[strs (let loop ([args args])
|
||||||
(form-urlencoded-encode value))))]
|
(cond
|
||||||
[strs (let loop ([args args])
|
[(null? args) null]
|
||||||
(cond
|
[(null? (cdr args)) (list (format-one (car args)))]
|
||||||
[(null? args) null]
|
[else (list* (format-one (car args))
|
||||||
[(null? (cdr args)) (list (format-one (car args)))]
|
(if (eq? mode 'amp) "&" ";")
|
||||||
[else (list* (format-one (car args))
|
(loop (cdr args)))]))])
|
||||||
(if (eq? mode 'amp)
|
(apply string-append strs)))
|
||||||
"&"
|
|
||||||
";")
|
|
||||||
(loop (cdr args)))]))])
|
|
||||||
(apply string-append strs))))
|
|
||||||
|
|
||||||
;; string -> listof (cons string string)
|
;; string -> listof (cons string string)
|
||||||
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
|
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
|
||||||
(define form-urlencoded->alist
|
(define (form-urlencoded->alist str)
|
||||||
(opt-lambda (str)
|
(define key-regexp #rx"[^=]*")
|
||||||
(define key-regexp #rx"[^=]*")
|
(define value-regexp (case (current-alist-separator-mode)
|
||||||
(define value-regexp (case (current-alist-separator-mode)
|
[(semi) #rx"[^;]*"]
|
||||||
[(semi) #rx"[^;]*"]
|
[(amp) #rx"[^&]*"]
|
||||||
[(amp) #rx"[^&]*"]
|
[else #rx"[^&;]*"]))
|
||||||
[else #rx"[^&;]*"]))
|
(define (next-key str start)
|
||||||
(define (next-key str start)
|
(and (< start (string-length str))
|
||||||
(if (>= start (string-length str))
|
(match (regexp-match-positions key-regexp str start)
|
||||||
#f
|
[((start . end))
|
||||||
(match (regexp-match-positions key-regexp str start)
|
(vector (let ([s (form-urlencoded-decode
|
||||||
[((start . end))
|
(substring str start end))])
|
||||||
(vector (let ([s (form-urlencoded-decode (substring str start end))])
|
(string->symbol s))
|
||||||
(string->symbol s))
|
(add1 end))]
|
||||||
(add1 end))]
|
[#f #f])))
|
||||||
[#f #f])))
|
(define (next-value str start)
|
||||||
(define (next-value str start)
|
(and (< start (string-length str))
|
||||||
(if (>= start (string-length str))
|
(match (regexp-match-positions value-regexp str start)
|
||||||
#f
|
[((start . end))
|
||||||
(match (regexp-match-positions value-regexp str start)
|
(vector (form-urlencoded-decode (substring str start end))
|
||||||
[((start . end))
|
(add1 end))]
|
||||||
(vector (form-urlencoded-decode (substring str start end))
|
[#f #f])))
|
||||||
(add1 end))]
|
(define (next-pair str start)
|
||||||
[#f #f])))
|
(match (next-key str start)
|
||||||
(define (next-pair str start)
|
[#(key start)
|
||||||
(match (next-key str start)
|
(match (next-value str start)
|
||||||
[#(key start)
|
[#(value start)
|
||||||
(match (next-value str start)
|
(vector (cons key value) start)]
|
||||||
[#(value start)
|
[#f
|
||||||
(vector (cons key value) start)]
|
(vector (cons key "") (string-length str))])]
|
||||||
[#f
|
[#f #f]))
|
||||||
(vector (cons key "") (string-length str))])]
|
(let loop ([start 0]
|
||||||
[#f #f]))
|
[end (string-length str)]
|
||||||
(let loop ([start 0]
|
[make-alist (lambda (x) x)])
|
||||||
[end (string-length str)]
|
(if (>= start end)
|
||||||
[make-alist (lambda (x) x)])
|
(make-alist '())
|
||||||
(cond
|
(match (next-pair str start)
|
||||||
[(>= start end) (make-alist '())]
|
[#(pair next-start)
|
||||||
[else
|
(loop next-start end (lambda (x) (make-alist (cons pair x))))]
|
||||||
(match (next-pair str start)
|
[#f (make-alist '())]))))
|
||||||
[#(pair next-start)
|
|
||||||
(loop next-start end (lambda (x) (make-alist (cons pair x))))]
|
|
||||||
[#f (make-alist '())])]))))
|
|
||||||
|
|
||||||
(define current-alist-separator-mode
|
(define current-alist-separator-mode
|
||||||
(make-parameter 'amp-or-semi (lambda (s)
|
(make-parameter 'amp-or-semi
|
||||||
(unless (memq s '(amp semi amp-or-semi))
|
(lambda (s)
|
||||||
(raise-type-error 'current-alist-separator-mode
|
(unless (memq s '(amp semi amp-or-semi))
|
||||||
"'amp, 'semi, or 'amp-or-semi"
|
(raise-type-error 'current-alist-separator-mode
|
||||||
s))
|
"'amp, 'semi, or 'amp-or-semi"
|
||||||
s))))))
|
s))
|
||||||
|
s))))))
|
||||||
|
|
||||||
;;; uri-codec-unit.ss ends here
|
;;; uri-codec-unit.ss ends here
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
(module url-structs mzscheme
|
(module url-structs mzscheme
|
||||||
(require (lib "contract.ss"))
|
(require (lib "contract.ss"))
|
||||||
|
|
||||||
(define-struct url (scheme user host port path-absolute? path query fragment))
|
(define-struct url (scheme user host port path-absolute? path query fragment))
|
||||||
(define-struct path/param (path param))
|
(define-struct path/param (path param))
|
||||||
|
|
||||||
|
|
|
@ -352,65 +352,45 @@
|
||||||
|
|
||||||
;; string->url : str -> url
|
;; string->url : str -> url
|
||||||
;; New implementation, mostly provided by Neil Van Dyke
|
;; New implementation, mostly provided by Neil Van Dyke
|
||||||
(define string->url
|
(define url-rx
|
||||||
(let ((rx (regexp (string-append
|
(regexp (string-append
|
||||||
"^"
|
"^"
|
||||||
"[ \t\f\r\n]*"
|
"[ \t\f\r\n]*"
|
||||||
"(" ; <1 front-opt
|
"(?:" ; <A front-opt
|
||||||
"([a-zA-Z]*:)?" ; =2 scheme-colon-opt
|
"(?:([a-zA-Z]*):)?" ; =1 scheme-colon-opt
|
||||||
"(" ; <3 slashslash-opt
|
"(?:" ; <B slashslash-opt
|
||||||
"//"
|
"//"
|
||||||
"([^:/@;?#]*@)?" ; =4 user-at-opt
|
"(?:([^:/@;?#]*)@)?" ; =2 user-at-opt
|
||||||
"([^:/@;?#]*)?" ; =5 host-opt
|
"([^:/@;?#]*)?" ; =3 host-opt
|
||||||
"(:[0-9]*)?" ; =6 colon-port-opt
|
"(?::([0-9]*))?" ; =4 colon-port-opt
|
||||||
")?" ; >3 slashslash-opt
|
")?" ; >B slashslash-opt
|
||||||
")?" ; >1 front-opt
|
")?" ; >A front-opt
|
||||||
"([^?#]*)" ; =7 path
|
"([^?#]*)" ; =5 path
|
||||||
"(\\?[^#]*)?" ; =8 question-query-opt
|
"(?:\\?([^#]*))?" ; =6 question-query-opt
|
||||||
"(#.*)?" ; =9 hash-fragment-opt
|
"(?:#(.*))?" ; =7 hash-fragment-opt
|
||||||
"[ \t\f\r\n]*"
|
"[ \t\f\r\n]*"
|
||||||
"$"))))
|
"$")))
|
||||||
(lambda (str)
|
(define (string->url str)
|
||||||
(let ((match (regexp-match-positions rx str)))
|
(apply
|
||||||
(if match
|
(lambda (scheme user host port path query fragment)
|
||||||
(let* ((get-str (lambda (pos skip-left skip-right)
|
(let* ([user (uri-decode/maybe user)]
|
||||||
(let ((pair (list-ref match pos)))
|
[port (and port (string->number port))]
|
||||||
(if pair
|
[abs? (and (not (= 0 (string-length path)))
|
||||||
(substring str
|
(char=? #\/ (string-ref path 0)))]
|
||||||
(+ (car pair) skip-left)
|
[path (separate-path-strings
|
||||||
(- (cdr pair) skip-right))
|
;; If path is "" and the input is an absolute URL
|
||||||
#f))))
|
;; with a hostname, then the intended path is "/",
|
||||||
(get-num (lambda (pos skip-left skip-right)
|
;; but the URL is missing a "/" at the end.
|
||||||
(let ((s (get-str pos skip-left skip-right)))
|
path
|
||||||
(if s (string->number s) #f))))
|
#;
|
||||||
(host (get-str 5 0 0))
|
(if (and (string=? path "") host) "/" path))]
|
||||||
(path (get-str 7 0 0))
|
[query (if query (form-urlencoded->alist query) '())]
|
||||||
(scheme (get-str 2 0 1)))
|
[fragment (uri-decode/maybe fragment)])
|
||||||
(when (string? scheme) (string-lowercase! scheme))
|
(when (string? scheme) (string-lowercase! scheme))
|
||||||
(when (string? host) (string-lowercase! host))
|
(when (string? host) (string-lowercase! host))
|
||||||
(make-url scheme
|
(make-url scheme user host port abs? path query fragment)))
|
||||||
(uri-decode/maybe (get-str 4 0 1)) ; user
|
(cdr (or (regexp-match url-rx str)
|
||||||
host
|
(url-error "Invalid URL string: ~e" str)))))
|
||||||
(get-num 6 1 0) ; port
|
|
||||||
(and (not (= 0 (string-length path)))
|
|
||||||
(char=? #\/ (string-ref path 0)))
|
|
||||||
(separate-path-strings
|
|
||||||
;; If path is "" and the input is an absolute URL
|
|
||||||
;; with a hostname, then the intended path is "/",
|
|
||||||
;; but the URL is missing a "/" at the end.
|
|
||||||
path
|
|
||||||
#;
|
|
||||||
(if (and (string=? path "")
|
|
||||||
host)
|
|
||||||
"/"
|
|
||||||
path))
|
|
||||||
;(uri-decode/maybe (get-str 8 1 0)) ;
|
|
||||||
;query
|
|
||||||
(let ([q (get-str 8 1 0)])
|
|
||||||
(if q (form-urlencoded->alist q) '()))
|
|
||||||
(uri-decode/maybe (get-str 9 1 0)) ; fragment
|
|
||||||
))
|
|
||||||
(url-error "Invalid URL string: ~e" str))))))
|
|
||||||
|
|
||||||
(define (uri-decode/maybe f)
|
(define (uri-decode/maybe f)
|
||||||
;; If #f, and leave unmolested any % that is followed by hex digit
|
;; If #f, and leave unmolested any % that is followed by hex digit
|
||||||
|
|
Loading…
Reference in New Issue
Block a user