diff --git a/collects/net/scribblings/uri-codec.scrbl b/collects/net/scribblings/uri-codec.scrbl index f25e274e7b..b67109497a 100644 --- a/collects/net/scribblings/uri-codec.scrbl +++ b/collects/net/scribblings/uri-codec.scrbl @@ -28,7 +28,11 @@ less than 128). The encoding, in line with RFC 2396's recommendation, represents a character as-is, if possible. The decoding allows any characters to be represented by their hex values, and allows characters to be -incorrectly represented as-is. +incorrectly represented as-is. The library provides ``unreserved'' +encoders that encode @litchar{!}, @litchar{*}, @litchar{'}, +@litchar{(}, and @litchar{)} using their hex representation, +which is not recommended by RFC 2396 but avoids problems with some +contexts. The rules for the @tt{application/x-www-form-urlencoded} mimetype given in the HTML 4.0 spec are: @@ -52,15 +56,17 @@ given in the HTML 4.0 spec are: ] -These rules differs slightly from the straight encoding in RFC 2396 in +These @tt{application/x-www-form-urlencoded} rules differs slightly from the straight encoding in RFC 2396 in that @litchar{+} is allowed, and it represents a space. The @racketmodname[net/uri-codec] library follows this convention, encoding a space as @litchar{+} and decoding @litchar{+} as a space. -In addtion, since there appear to be some brain-dead decoders on the +In addition, since there appear to be some broken decoders on the web, the library also encodes @litchar{!}, @litchar{~}, @litchar{'}, @litchar{(}, and @litchar{)} using their hex representation, which is the same choice as made by the Java's @tt{URLEncoder}. + + @; ---------------------------------------- @section[#:tag "uri-codec-proc"]{Functions} @@ -92,6 +98,14 @@ Encodes a string according to the rules in @cite["RFC3986"](section 2.3) for the @defproc[(uri-unreserved-decode [str string?]) string?]{ Decodes a string according to the rules in @cite["RFC3986"](section 2.3) for the unreserved characters. } +@defproc[(uri-path-segment-unreserved-encode [str string?]) string?]{ +Encodes a string according to the rules in @cite["RFC3986"] for path segments, +but also encodes characters that @racket[uri-unreserved-encode] encodes +and that @racket[uri-encode] does not. +} +@defproc[(uri-path-segment-unreserved-decode [str string?]) string?]{ +Decodes a string according to the rules in @cite["RFC3986"] for path segments. +} @defproc[(form-urlencoded-encode [str string?]) string?]{ @@ -184,7 +198,9 @@ Imports nothing, exports @racket[uri-codec^].} @defsignature[uri-codec^ ()]{} -Includes everything exported by the @racketmodname[net/uri-codec] module. +Includes everything exported by the @racketmodname[net/uri-codec] +module except @racket[uri-path-segment-unreserved-encode] and +@racket[uri-path-segment-unreserved-decode]. @close-eval[uri-codec-eval] diff --git a/collects/net/uri-codec.rkt b/collects/net/uri-codec.rkt index e3571338ce..da4b106ea1 100644 --- a/collects/net/uri-codec.rkt +++ b/collects/net/uri-codec.rkt @@ -93,6 +93,8 @@ See more in PR8831. uri-userinfo-decode uri-unreserved-encode uri-unreserved-decode + uri-path-segment-unreserved-encode + uri-path-segment-unreserved-decode form-urlencoded-encode form-urlencoded-decode alist->form-urlencoded @@ -115,16 +117,21 @@ See more in PR8831. (define uri-mapping (append alphanumeric-mapping safe-mapping)) ;; The uri path segment mapping from RFC 3986 +(define path-segment-extra-mapping (self-map-chars "@+,=$&:")) (define uri-path-segment-mapping - (append alphanumeric-mapping - safe-mapping - (self-map-chars "@+,=$&:"))) + (append uri-mapping + path-segment-extra-mapping)) ;; from RFC 3986 (define unreserved-mapping (append alphanumeric-mapping (self-map-chars "-._~"))) +;; The uri path segment mapping from RFC 3986 +(define uri-path-segment-unreserved-mapping + (append unreserved-mapping + path-segment-extra-mapping)) + ;; from RFC 3986 (define sub-delims-mapping (self-map-chars "!$&'()*+,;=")) @@ -176,6 +183,10 @@ See more in PR8831. uri-unreserved-decoding-vector) (make-codec-tables unreserved-mapping)) +(define-values (uri-path-segment-unreserved-encoding-vector + uri-path-segment-unreserved-decoding-vector) + (make-codec-tables uri-path-segment-unreserved-mapping)) + (define-values (form-urlencoded-encoding-vector form-urlencoded-decoding-vector) (make-codec-tables form-urlencoded-mapping)) @@ -249,6 +260,14 @@ See more in PR8831. (define (uri-unreserved-decode str) (decode uri-unreserved-decoding-vector str)) +;; string -> string +(define (uri-path-segment-unreserved-encode str) + (encode uri-path-segment-unreserved-encoding-vector str)) + +;; string -> string +(define (uri-path-segment-unreserved-decode str) + (decode uri-path-segment-unreserved-decoding-vector str)) + ;; string -> string (define (form-urlencoded-encode str) (encode form-urlencoded-encoding-vector str)) diff --git a/collects/tests/net/uri-codec.rkt b/collects/tests/net/uri-codec.rkt index d6836e90d8..780c7d9103 100644 --- a/collects/tests/net/uri-codec.rkt +++ b/collects/tests/net/uri-codec.rkt @@ -75,11 +75,16 @@ (uri-path-segment-encode "M~(@; ") => "M~(@%3B%20" (uri-userinfo-encode "M~(@; ") => "M~(%40;%20" (uri-unreserved-encode "M~(@; ") => "M~%28%40%3B%20" + (uri-path-segment-unreserved-encode "M~(@; ") => "M~%28@%3B%20" ;; matching decodes: (uri-decode "M~(%40%3B%20") => "M~(@; " (uri-path-segment-decode "M~(@%3B%20") => "M~(@; " (uri-userinfo-decode "M~(%40;%20") => "M~(@; " (uri-unreserved-decode "M~%28%40%3B%20") => "M~(@; " + (uri-path-segment-unreserved-decode "M~%28@%3B%20") => "M~(@; " + + (uri-path-segment-decode "M~%28@%3B%20") => "M~(@; " + (uri-path-segment-unreserved-decode "M~(@%3B%20") => "M~(@; " )) ;; tests adapted from Noel Welsh's original test suite diff --git a/doc/release-notes/racket/HISTORY.txt b/doc/release-notes/racket/HISTORY.txt index 57fa1e1b46..ddc3fba029 100644 --- a/doc/release-notes/racket/HISTORY.txt +++ b/doc/release-notes/racket/HISTORY.txt @@ -4,6 +4,7 @@ r5rs, r6rs: fixed case and cond to disallow internal definitions in clauses Add #fx() and #fl() reader forms for flvectors and fxvectors racket/serialize: fxvectors and flvectors are serializable +racket/net: added uri-path-segment-unreserved-encode Version 5.3.1.8 file/untar: added