Misc improvements to `net/uri-codec'.
Fix a bug in encoding strings with characters between 128 and 256, avoid extra bindings for conversion vectors; remove calling `noels-tests' from `main' since it's already called from `tests', etc.
This commit is contained in:
parent
f16184d69f
commit
c2280ed8dc
|
@ -83,26 +83,18 @@ See more in PR8831.
|
||||||
;; Draws inspiration from encode-decode.scm by Kurt Normark and a code
|
;; Draws inspiration from encode-decode.scm by Kurt Normark and a code
|
||||||
;; sample provided by Eli Barzilay
|
;; sample provided by Eli Barzilay
|
||||||
|
|
||||||
(require racket/match racket/string racket/list)
|
(require racket/string racket/list)
|
||||||
|
|
||||||
(provide uri-encode
|
(provide uri-encode uri-decode
|
||||||
uri-decode
|
uri-path-segment-encode uri-path-segment-decode
|
||||||
uri-path-segment-encode
|
uri-userinfo-encode uri-userinfo-decode
|
||||||
uri-path-segment-decode
|
uri-unreserved-encode uri-unreserved-decode
|
||||||
uri-userinfo-encode
|
uri-path-segment-unreserved-encode uri-path-segment-unreserved-decode
|
||||||
uri-userinfo-decode
|
form-urlencoded-encode form-urlencoded-decode
|
||||||
uri-unreserved-encode
|
alist->form-urlencoded form-urlencoded->alist
|
||||||
uri-unreserved-decode
|
|
||||||
uri-path-segment-unreserved-encode
|
|
||||||
uri-path-segment-unreserved-decode
|
|
||||||
form-urlencoded-encode
|
|
||||||
form-urlencoded-decode
|
|
||||||
alist->form-urlencoded
|
|
||||||
form-urlencoded->alist
|
|
||||||
current-alist-separator-mode)
|
current-alist-separator-mode)
|
||||||
|
|
||||||
(define (self-map-char ch) (cons ch ch))
|
(define (self-map-chars str) (map (λ (ch) (cons ch ch)) (string->list str)))
|
||||||
(define (self-map-chars str) (map self-map-char (string->list str)))
|
|
||||||
|
|
||||||
;; The characters that always map to themselves
|
;; The characters that always map to themselves
|
||||||
(define alphanumeric-mapping
|
(define alphanumeric-mapping
|
||||||
|
@ -114,33 +106,25 @@ See more in PR8831.
|
||||||
(define safe-mapping (self-map-chars "-_.!~*'()"))
|
(define safe-mapping (self-map-chars "-_.!~*'()"))
|
||||||
|
|
||||||
;; The strict URI mapping
|
;; The strict URI mapping
|
||||||
(define uri-mapping (append alphanumeric-mapping safe-mapping))
|
(define uri-mapping `(,@alphanumeric-mapping ,@safe-mapping))
|
||||||
|
|
||||||
;; The uri path segment mapping from RFC 3986
|
;; The uri path segment mapping from RFC 3986
|
||||||
(define path-segment-extra-mapping (self-map-chars "@+,=$&:"))
|
(define path-segment-extra-mapping (self-map-chars "@+,=$&:"))
|
||||||
(define uri-path-segment-mapping
|
(define uri-path-segment-mapping `(,@uri-mapping ,@path-segment-extra-mapping))
|
||||||
(append uri-mapping
|
|
||||||
path-segment-extra-mapping))
|
|
||||||
|
|
||||||
;; from RFC 3986
|
;; from RFC 3986
|
||||||
(define unreserved-mapping
|
(define unreserved-mapping `(,@alphanumeric-mapping ,@(self-map-chars "-._~")))
|
||||||
(append alphanumeric-mapping
|
|
||||||
(self-map-chars "-._~")))
|
|
||||||
|
|
||||||
;; The uri path segment mapping from RFC 3986
|
;; The uri path segment mapping from RFC 3986
|
||||||
(define uri-path-segment-unreserved-mapping
|
(define uri-path-segment-unreserved-mapping
|
||||||
(append unreserved-mapping
|
`(,@unreserved-mapping ,@path-segment-extra-mapping))
|
||||||
path-segment-extra-mapping))
|
|
||||||
|
|
||||||
;; from RFC 3986
|
;; from RFC 3986
|
||||||
(define sub-delims-mapping
|
(define sub-delims-mapping (self-map-chars "!$&'()*+,;="))
|
||||||
(self-map-chars "!$&'()*+,;="))
|
|
||||||
|
|
||||||
;; The uri userinfo mapping from RFC 3986
|
;; The uri userinfo mapping from RFC 3986
|
||||||
(define uri-userinfo-mapping
|
(define uri-userinfo-mapping
|
||||||
(append unreserved-mapping
|
`(,@unreserved-mapping ,@sub-delims-mapping ,@(self-map-chars ":")))
|
||||||
sub-delims-mapping
|
|
||||||
(self-map-chars ":")))
|
|
||||||
|
|
||||||
;; The form-urlencoded mapping
|
;; The form-urlencoded mapping
|
||||||
(define form-urlencoded-mapping
|
(define form-urlencoded-mapping
|
||||||
|
@ -152,175 +136,97 @@ See more in PR8831.
|
||||||
|
|
||||||
(define ascii-size 128)
|
(define ascii-size 128)
|
||||||
|
|
||||||
;; (listof (cons char char)) -> (values (vectorof string) (vectorof string))
|
;; (listof (cons char char)) -> (values (vectorof string) (vectorof int))
|
||||||
(define (make-codec-tables alist)
|
(define (make-codec-tables alist)
|
||||||
(let ([encoding-table (build-vector ascii-size number->hex-string)]
|
(define encoding-table (build-vector ascii-size number->hex-string))
|
||||||
[decoding-table (build-vector ascii-size values)])
|
(define decoding-table (build-vector ascii-size values))
|
||||||
(for-each (match-lambda
|
(for ([orig+enc (in-list alist)])
|
||||||
[(cons orig enc)
|
|
||||||
(vector-set! encoding-table
|
(vector-set! encoding-table
|
||||||
(char->integer orig)
|
(char->integer (car orig+enc))
|
||||||
(string enc))
|
(string (cdr orig+enc)))
|
||||||
(vector-set! decoding-table
|
(vector-set! decoding-table
|
||||||
(char->integer enc)
|
(char->integer (cdr orig+enc))
|
||||||
(char->integer orig))])
|
(char->integer (car orig+enc))))
|
||||||
alist)
|
(values encoding-table decoding-table))
|
||||||
|
|
||||||
(values encoding-table decoding-table)))
|
|
||||||
|
|
||||||
(define-values (uri-encoding-vector uri-decoding-vector)
|
|
||||||
(make-codec-tables uri-mapping))
|
|
||||||
|
|
||||||
(define-values (uri-path-segment-encoding-vector
|
|
||||||
uri-path-segment-decoding-vector)
|
|
||||||
(make-codec-tables uri-path-segment-mapping))
|
|
||||||
|
|
||||||
(define-values (uri-userinfo-encoding-vector
|
|
||||||
uri-userinfo-decoding-vector)
|
|
||||||
(make-codec-tables uri-userinfo-mapping))
|
|
||||||
|
|
||||||
(define-values (uri-unreserved-encoding-vector
|
|
||||||
uri-unreserved-decoding-vector)
|
|
||||||
(make-codec-tables unreserved-mapping))
|
|
||||||
|
|
||||||
(define-values (uri-path-segment-unreserved-encoding-vector
|
|
||||||
uri-path-segment-unreserved-decoding-vector)
|
|
||||||
(make-codec-tables uri-path-segment-unreserved-mapping))
|
|
||||||
|
|
||||||
(define-values (form-urlencoded-encoding-vector
|
|
||||||
form-urlencoded-decoding-vector)
|
|
||||||
(make-codec-tables form-urlencoded-mapping))
|
|
||||||
|
|
||||||
;; vector string -> string
|
;; vector string -> string
|
||||||
(define (encode table str)
|
(define (encode table str)
|
||||||
;; First, check for an ASCII string with no conversion needed:
|
;; First, check for an ASCII string with no conversion needed:
|
||||||
(if (for/and ([char (in-string str)])
|
(if (for/and ([char (in-string str)])
|
||||||
(define v (char->integer char))
|
(define v (char->integer char))
|
||||||
(and (byte? v)
|
(and (< v ascii-size)
|
||||||
(let ([s (vector-ref table v)])
|
(let ([s (vector-ref table v)])
|
||||||
(and (= 1 (string-length s))
|
(and (= 1 (string-length s))
|
||||||
(eq? char (string-ref s 0))))))
|
(eq? char (string-ref s 0))))))
|
||||||
str
|
str
|
||||||
(apply string-append
|
(string-append* (for/list ([byte (in-bytes (string->bytes/utf-8 str))])
|
||||||
(for/list ([byte (in-bytes (string->bytes/utf-8 str))])
|
|
||||||
(if (< byte ascii-size)
|
(if (< byte ascii-size)
|
||||||
(vector-ref table byte)
|
(vector-ref table byte)
|
||||||
(number->hex-string byte))))))
|
(number->hex-string byte))))))
|
||||||
|
|
||||||
;; vector string -> string
|
;; vector string -> string
|
||||||
(define (decode table str)
|
(define (decode table str)
|
||||||
(define internal-decode
|
(define max-ascii (integer->char ascii-size))
|
||||||
(match-lambda [(list) (list)]
|
(define (internal-decode l)
|
||||||
[(list* #\% (? hex-digit? char1) (? hex-digit? char2) rest)
|
(if (null? l) '()
|
||||||
;; This used to consult the table again, but I think that's
|
(let* ([c (car l)] [l (cdr l)]
|
||||||
;; wrong. For example %2b should produce +, not a space.
|
[hex (and (equal? #\% c) (pair? l) (pair? (cdr l))
|
||||||
(cons (string->number (string char1 char2) 16)
|
(string->number (string (car l) (cadr l)) 16))])
|
||||||
(internal-decode rest))]
|
(if hex (cons hex (internal-decode (cddr l)))
|
||||||
[(cons (? ascii-char? char) rest)
|
(cons (if (char<? c max-ascii)
|
||||||
(cons (vector-ref table (char->integer char))
|
(vector-ref table (char->integer c))
|
||||||
(internal-decode rest))]
|
;; This should probably error, but strings to be decoded
|
||||||
[(cons char rest)
|
;; might come from misbehaving sources; maybe it's better
|
||||||
;; JBC : this appears to handle strings containing
|
;; to add some parameter for a permissive mode
|
||||||
;; non-ascii characters; shouldn't this just be an
|
(bytes->list (string->bytes/utf-8 (string c))))
|
||||||
;; error?
|
(internal-decode l))))))
|
||||||
(append
|
|
||||||
(bytes->list (string->bytes/utf-8 (string char)))
|
|
||||||
(internal-decode rest))]))
|
|
||||||
(bytes->string/utf-8 (apply bytes (internal-decode (string->list str)))))
|
(bytes->string/utf-8 (apply bytes (internal-decode (string->list str)))))
|
||||||
|
|
||||||
(define (ascii-char? c)
|
;; Utility for defining codecs
|
||||||
(< (char->integer c) ascii-size))
|
(define-syntax-rule (define-codecs [encoder decoder mapping] ...)
|
||||||
|
(begin (define-values [encoder decoder]
|
||||||
|
(let-values ([(v:en v:de) (make-codec-tables mapping)])
|
||||||
|
(define (encoder str) (encode v:en str))
|
||||||
|
(define (decoder str) (decode v:de str))
|
||||||
|
(values encoder decoder)))
|
||||||
|
...))
|
||||||
|
|
||||||
(define (hex-digit? c)
|
;; All of these are string -> string
|
||||||
(or (char<=? #\0 c #\9)
|
(define-codecs
|
||||||
(char<=? #\a c #\f)
|
[uri-encode uri-decode uri-mapping]
|
||||||
(char<=? #\A c #\F)))
|
[uri-path-segment-encode uri-path-segment-decode uri-path-segment-mapping]
|
||||||
|
[uri-userinfo-encode uri-userinfo-decode uri-userinfo-mapping]
|
||||||
;; string -> string
|
[uri-unreserved-encode uri-unreserved-decode unreserved-mapping]
|
||||||
(define (uri-encode str)
|
[uri-path-segment-unreserved-encode uri-path-segment-unreserved-decode
|
||||||
(encode uri-encoding-vector str))
|
uri-path-segment-unreserved-mapping]
|
||||||
|
[form-urlencoded-encode form-urlencoded-decode form-urlencoded-mapping])
|
||||||
;; string -> string
|
|
||||||
(define (uri-decode str)
|
|
||||||
(decode uri-decoding-vector str))
|
|
||||||
|
|
||||||
;; string -> string
|
|
||||||
(define (uri-path-segment-encode str)
|
|
||||||
(encode uri-path-segment-encoding-vector str))
|
|
||||||
|
|
||||||
;; string -> string
|
|
||||||
(define (uri-path-segment-decode str)
|
|
||||||
(decode uri-path-segment-decoding-vector str))
|
|
||||||
|
|
||||||
;; string -> string
|
|
||||||
(define (uri-userinfo-encode str)
|
|
||||||
(encode uri-userinfo-encoding-vector str))
|
|
||||||
|
|
||||||
;; string -> string
|
|
||||||
(define (uri-userinfo-decode str)
|
|
||||||
(decode uri-userinfo-decoding-vector str))
|
|
||||||
|
|
||||||
;; string -> string
|
|
||||||
(define (uri-unreserved-encode str)
|
|
||||||
(encode uri-unreserved-encoding-vector str))
|
|
||||||
|
|
||||||
;; string -> string
|
|
||||||
(define (uri-unreserved-decode str)
|
|
||||||
(decode uri-unreserved-decoding-vector str))
|
|
||||||
|
|
||||||
;; string -> string
|
|
||||||
(define (uri-path-segment-unreserved-encode str)
|
|
||||||
(encode uri-path-segment-unreserved-encoding-vector str))
|
|
||||||
|
|
||||||
;; string -> string
|
|
||||||
(define (uri-path-segment-unreserved-decode str)
|
|
||||||
(decode uri-path-segment-unreserved-decoding-vector str))
|
|
||||||
|
|
||||||
;; string -> string
|
|
||||||
(define (form-urlencoded-encode str)
|
|
||||||
(encode form-urlencoded-encoding-vector str))
|
|
||||||
|
|
||||||
;; string -> string
|
|
||||||
(define (form-urlencoded-decode str)
|
|
||||||
(decode form-urlencoded-decoding-vector str))
|
|
||||||
|
|
||||||
;; listof (cons string string) -> string
|
;; listof (cons string string) -> string
|
||||||
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
|
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
|
||||||
;; listof (cons symbol string) -> string
|
;; listof (cons symbol string) -> string
|
||||||
(define (alist->form-urlencoded args)
|
(define (alist->form-urlencoded args)
|
||||||
(let* ([sep (if (memq (current-alist-separator-mode) '(semi semi-or-amp))
|
(string-join
|
||||||
";" "&")]
|
(for/list ([arg (in-list args)])
|
||||||
[format-one
|
(define name (form-urlencoded-encode (symbol->string (car arg))))
|
||||||
(lambda (arg)
|
(define value (and (cdr arg) (form-urlencoded-encode (cdr arg))))
|
||||||
(let* ([name (car arg)]
|
(if value (string-append name "=" value) name))
|
||||||
[value (cdr arg)]
|
(if (memq (current-alist-separator-mode) '(semi semi-or-amp)) ";" "&")))
|
||||||
[name (form-urlencoded-encode (symbol->string name))]
|
|
||||||
[value (and value (form-urlencoded-encode value))])
|
|
||||||
(if value (string-append name "=" value) name)))]
|
|
||||||
[strs (if (null? args)
|
|
||||||
'()
|
|
||||||
(cons (format-one (car args))
|
|
||||||
(apply append
|
|
||||||
(map (lambda (a) (list sep (format-one a)))
|
|
||||||
(cdr args)))))])
|
|
||||||
(apply string-append strs)))
|
|
||||||
|
|
||||||
;; string -> listof (cons string string)
|
;; string -> listof (cons string string)
|
||||||
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
|
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
|
||||||
(define (form-urlencoded->alist str)
|
(define (form-urlencoded->alist str)
|
||||||
(define keyval-regexp #rx"=")
|
(define sep-regexp
|
||||||
(define value-regexp
|
|
||||||
(case (current-alist-separator-mode)
|
(case (current-alist-separator-mode)
|
||||||
[(semi) #rx"[;]"]
|
[(semi) #rx"[;]"]
|
||||||
[(amp) #rx"[&]"]
|
[(amp) #rx"[&]"]
|
||||||
[else #rx"[&;]"]))
|
[else #rx"[&;]"]))
|
||||||
(define (parse-keyval keyval)
|
(if (equal? "" str) '()
|
||||||
(let (;; m = #f => no "=..." part
|
(for/list ([keyval (in-list (regexp-split sep-regexp str))])
|
||||||
[m (regexp-match-positions keyval-regexp keyval)])
|
;; m = #f => no "=..." part
|
||||||
|
(define m (regexp-match-positions #rx"=" keyval))
|
||||||
(cons (string->symbol (form-urlencoded-decode
|
(cons (string->symbol (form-urlencoded-decode
|
||||||
(if m (substring keyval 0 (caar m)) keyval)))
|
(if m (substring keyval 0 (caar m)) keyval)))
|
||||||
(and m (form-urlencoded-decode (substring keyval (cdar m)))))))
|
(and m (form-urlencoded-decode (substring keyval (cdar m))))))))
|
||||||
(if (equal? "" str) '() (map parse-keyval (regexp-split value-regexp str))))
|
|
||||||
|
|
||||||
(define current-alist-separator-mode
|
(define current-alist-separator-mode
|
||||||
(make-parameter 'amp-or-semi
|
(make-parameter 'amp-or-semi
|
||||||
|
|
|
@ -19,7 +19,6 @@
|
||||||
do (dns:tests)
|
do (dns:tests)
|
||||||
do (url:tests)
|
do (url:tests)
|
||||||
do (ucodec:tests)
|
do (ucodec:tests)
|
||||||
do (ucodec:noels-tests)
|
|
||||||
do (cgi:tests)
|
do (cgi:tests)
|
||||||
do (ftp:tests)
|
do (ftp:tests)
|
||||||
do (head:tests)
|
do (head:tests)
|
||||||
|
|
|
@ -85,11 +85,10 @@
|
||||||
|
|
||||||
(uri-path-segment-decode "M~%28@%3B%20") => "M~(@; "
|
(uri-path-segment-decode "M~%28@%3B%20") => "M~(@; "
|
||||||
(uri-path-segment-unreserved-decode "M~(@%3B%20") => "M~(@; "
|
(uri-path-segment-unreserved-decode "M~(@%3B%20") => "M~(@; "
|
||||||
|
(uri-encode "æçè") => "%C3%A6%C3%A7%C3%A8"
|
||||||
))
|
))
|
||||||
|
|
||||||
;; tests adapted from Noel Welsh's original test suite
|
;; tests adapted from Noel Welsh's original test suite
|
||||||
(provide noels-tests)
|
|
||||||
(module+ main (noels-tests))
|
|
||||||
(define (noels-tests)
|
(define (noels-tests)
|
||||||
(define (pad2 str)
|
(define (pad2 str)
|
||||||
(if (= (string-length str) 1) (string-append "0" str) str))
|
(if (= (string-length str) 1) (string-append "0" str) str))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user