net/uri-codec: added `uri-path-segment-unreserved-encode'

This commit is contained in:
Matthew Flatt 2012-12-17 06:32:03 -07:00
parent 148a1d4387
commit 3d1f1289ef
4 changed files with 48 additions and 7 deletions

View File

@ -28,7 +28,11 @@ less than 128).
The encoding, in line with RFC 2396's recommendation, represents a The encoding, in line with RFC 2396's recommendation, represents a
character as-is, if possible. The decoding allows any characters character as-is, if possible. The decoding allows any characters
to be represented by their hex values, and allows characters to be to be represented by their hex values, and allows characters to be
incorrectly represented as-is. incorrectly represented as-is. The library provides ``unreserved''
encoders that encode @litchar{!}, @litchar{*}, @litchar{'},
@litchar{(}, and @litchar{)} using their hex representation,
which is not recommended by RFC 2396 but avoids problems with some
contexts.
The rules for the @tt{application/x-www-form-urlencoded} mimetype The rules for the @tt{application/x-www-form-urlencoded} mimetype
given in the HTML 4.0 spec are: given in the HTML 4.0 spec are:
@ -52,15 +56,17 @@ given in the HTML 4.0 spec are:
] ]
These rules differs slightly from the straight encoding in RFC 2396 in These @tt{application/x-www-form-urlencoded} rules differs slightly from the straight encoding in RFC 2396 in
that @litchar{+} is allowed, and it represents a space. The that @litchar{+} is allowed, and it represents a space. The
@racketmodname[net/uri-codec] library follows this convention, @racketmodname[net/uri-codec] library follows this convention,
encoding a space as @litchar{+} and decoding @litchar{+} as a space. encoding a space as @litchar{+} and decoding @litchar{+} as a space.
In addtion, since there appear to be some brain-dead decoders on the In addition, since there appear to be some broken decoders on the
web, the library also encodes @litchar{!}, @litchar{~}, @litchar{'}, web, the library also encodes @litchar{!}, @litchar{~}, @litchar{'},
@litchar{(}, and @litchar{)} using their hex representation, which is @litchar{(}, and @litchar{)} using their hex representation, which is
the same choice as made by the Java's @tt{URLEncoder}. the same choice as made by the Java's @tt{URLEncoder}.
@; ---------------------------------------- @; ----------------------------------------
@section[#:tag "uri-codec-proc"]{Functions} @section[#:tag "uri-codec-proc"]{Functions}
@ -92,6 +98,14 @@ Encodes a string according to the rules in @cite["RFC3986"](section 2.3) for the
@defproc[(uri-unreserved-decode [str string?]) string?]{ @defproc[(uri-unreserved-decode [str string?]) string?]{
Decodes a string according to the rules in @cite["RFC3986"](section 2.3) for the unreserved characters. Decodes a string according to the rules in @cite["RFC3986"](section 2.3) for the unreserved characters.
} }
@defproc[(uri-path-segment-unreserved-encode [str string?]) string?]{
Encodes a string according to the rules in @cite["RFC3986"] for path segments,
but also encodes characters that @racket[uri-unreserved-encode] encodes
and that @racket[uri-encode] does not.
}
@defproc[(uri-path-segment-unreserved-decode [str string?]) string?]{
Decodes a string according to the rules in @cite["RFC3986"] for path segments.
}
@defproc[(form-urlencoded-encode [str string?]) string?]{ @defproc[(form-urlencoded-encode [str string?]) string?]{
@ -184,7 +198,9 @@ Imports nothing, exports @racket[uri-codec^].}
@defsignature[uri-codec^ ()]{} @defsignature[uri-codec^ ()]{}
Includes everything exported by the @racketmodname[net/uri-codec] module. Includes everything exported by the @racketmodname[net/uri-codec]
module except @racket[uri-path-segment-unreserved-encode] and
@racket[uri-path-segment-unreserved-decode].
@close-eval[uri-codec-eval] @close-eval[uri-codec-eval]

View File

@ -93,6 +93,8 @@ See more in PR8831.
uri-userinfo-decode uri-userinfo-decode
uri-unreserved-encode uri-unreserved-encode
uri-unreserved-decode uri-unreserved-decode
uri-path-segment-unreserved-encode
uri-path-segment-unreserved-decode
form-urlencoded-encode form-urlencoded-encode
form-urlencoded-decode form-urlencoded-decode
alist->form-urlencoded alist->form-urlencoded
@ -115,16 +117,21 @@ See more in PR8831.
(define uri-mapping (append alphanumeric-mapping safe-mapping)) (define uri-mapping (append alphanumeric-mapping safe-mapping))
;; The uri path segment mapping from RFC 3986 ;; The uri path segment mapping from RFC 3986
(define path-segment-extra-mapping (self-map-chars "@+,=$&:"))
(define uri-path-segment-mapping (define uri-path-segment-mapping
(append alphanumeric-mapping (append uri-mapping
safe-mapping path-segment-extra-mapping))
(self-map-chars "@+,=$&:")))
;; from RFC 3986 ;; from RFC 3986
(define unreserved-mapping (define unreserved-mapping
(append alphanumeric-mapping (append alphanumeric-mapping
(self-map-chars "-._~"))) (self-map-chars "-._~")))
;; The uri path segment mapping from RFC 3986
(define uri-path-segment-unreserved-mapping
(append unreserved-mapping
path-segment-extra-mapping))
;; from RFC 3986 ;; from RFC 3986
(define sub-delims-mapping (define sub-delims-mapping
(self-map-chars "!$&'()*+,;=")) (self-map-chars "!$&'()*+,;="))
@ -176,6 +183,10 @@ See more in PR8831.
uri-unreserved-decoding-vector) uri-unreserved-decoding-vector)
(make-codec-tables unreserved-mapping)) (make-codec-tables unreserved-mapping))
(define-values (uri-path-segment-unreserved-encoding-vector
uri-path-segment-unreserved-decoding-vector)
(make-codec-tables uri-path-segment-unreserved-mapping))
(define-values (form-urlencoded-encoding-vector (define-values (form-urlencoded-encoding-vector
form-urlencoded-decoding-vector) form-urlencoded-decoding-vector)
(make-codec-tables form-urlencoded-mapping)) (make-codec-tables form-urlencoded-mapping))
@ -249,6 +260,14 @@ See more in PR8831.
(define (uri-unreserved-decode str) (define (uri-unreserved-decode str)
(decode uri-unreserved-decoding-vector str)) (decode uri-unreserved-decoding-vector str))
;; string -> string
(define (uri-path-segment-unreserved-encode str)
(encode uri-path-segment-unreserved-encoding-vector str))
;; string -> string
(define (uri-path-segment-unreserved-decode str)
(decode uri-path-segment-unreserved-decoding-vector str))
;; string -> string ;; string -> string
(define (form-urlencoded-encode str) (define (form-urlencoded-encode str)
(encode form-urlencoded-encoding-vector str)) (encode form-urlencoded-encoding-vector str))

View File

@ -75,11 +75,16 @@
(uri-path-segment-encode "M~(@; ") => "M~(@%3B%20" (uri-path-segment-encode "M~(@; ") => "M~(@%3B%20"
(uri-userinfo-encode "M~(@; ") => "M~(%40;%20" (uri-userinfo-encode "M~(@; ") => "M~(%40;%20"
(uri-unreserved-encode "M~(@; ") => "M~%28%40%3B%20" (uri-unreserved-encode "M~(@; ") => "M~%28%40%3B%20"
(uri-path-segment-unreserved-encode "M~(@; ") => "M~%28@%3B%20"
;; matching decodes: ;; matching decodes:
(uri-decode "M~(%40%3B%20") => "M~(@; " (uri-decode "M~(%40%3B%20") => "M~(@; "
(uri-path-segment-decode "M~(@%3B%20") => "M~(@; " (uri-path-segment-decode "M~(@%3B%20") => "M~(@; "
(uri-userinfo-decode "M~(%40;%20") => "M~(@; " (uri-userinfo-decode "M~(%40;%20") => "M~(@; "
(uri-unreserved-decode "M~%28%40%3B%20") => "M~(@; " (uri-unreserved-decode "M~%28%40%3B%20") => "M~(@; "
(uri-path-segment-unreserved-decode "M~%28@%3B%20") => "M~(@; "
(uri-path-segment-decode "M~%28@%3B%20") => "M~(@; "
(uri-path-segment-unreserved-decode "M~(@%3B%20") => "M~(@; "
)) ))
;; tests adapted from Noel Welsh's original test suite ;; tests adapted from Noel Welsh's original test suite

View File

@ -4,6 +4,7 @@ r5rs, r6rs: fixed case and cond to disallow internal definitions
in clauses in clauses
Add #fx() and #fl() reader forms for flvectors and fxvectors Add #fx() and #fl() reader forms for flvectors and fxvectors
racket/serialize: fxvectors and flvectors are serializable racket/serialize: fxvectors and flvectors are serializable
racket/net: added uri-path-segment-unreserved-encode
Version 5.3.1.8 Version 5.3.1.8
file/untar: added file/untar: added