diff --git a/collects/net/scribblings/uri-codec.scrbl b/collects/net/scribblings/uri-codec.scrbl index 6429b238f4..04a41496e2 100644 --- a/collects/net/scribblings/uri-codec.scrbl +++ b/collects/net/scribblings/uri-codec.scrbl @@ -86,6 +86,12 @@ Encodes a string according to the rules in @cite["RFC3986"] for the userinfo fie @defproc[(uri-userinfo-decode [str string?]) string?]{ Decodes a string according to the rules in @cite["RFC3986"] for the userinfo field. } +@defproc[(uri-unreserved-encode [str string?]) string?]{ +Encodes a string according to the rules in @cite["RFC3986"](section 2.3) for the unreserved characters. +} +@defproc[(uri-unreserved-decode [str string?]) string?]{ +Decodes a string according to the rules in @cite["RFC3986"](section 2.3) for the unreserved characters. +} @defproc[(form-urlencoded-encode [str string?]) string?]{ diff --git a/collects/net/uri-codec.rkt b/collects/net/uri-codec.rkt index 7c0c53ae64..e3571338ce 100644 --- a/collects/net/uri-codec.rkt +++ b/collects/net/uri-codec.rkt @@ -91,6 +91,8 @@ See more in PR8831. uri-path-segment-decode uri-userinfo-encode uri-userinfo-decode + uri-unreserved-encode + uri-unreserved-decode form-urlencoded-encode form-urlencoded-decode alist->form-urlencoded @@ -141,9 +143,6 @@ See more in PR8831. (define (hex n) (string-ref "0123456789ABCDEF" n)) (string #\% (hex (quotient number 16)) (hex (modulo number 16)))) -(define (hex-string->number hex-string) - (string->number (substring hex-string 1 3) 16)) - (define ascii-size 128) ;; (listof (cons char char)) -> (values (vectorof string) (vectorof string)) @@ -159,6 +158,7 @@ See more in PR8831. (char->integer enc) (char->integer orig))]) alist) + (values encoding-table decoding-table))) (define-values (uri-encoding-vector uri-decoding-vector) @@ -172,6 +172,9 @@ See more in PR8831. uri-userinfo-decoding-vector) (make-codec-tables uri-userinfo-mapping)) +(define-values (uri-unreserved-encoding-vector + uri-unreserved-decoding-vector) + (make-codec-tables unreserved-mapping)) (define-values (form-urlencoded-encoding-vector form-urlencoded-decoding-vector) @@ -198,6 +201,9 @@ See more in PR8831. (cons (vector-ref table (char->integer char)) (internal-decode rest))] [(cons char rest) + ;; JBC : this appears to handle strings containing + ;; non-ascii characters; shouldn't this just be an + ;; error? (append (bytes->list (string->bytes/utf-8 (string char))) (internal-decode rest))])) @@ -235,6 +241,13 @@ See more in PR8831. (define (uri-userinfo-decode str) (decode uri-userinfo-decoding-vector str)) +;; string -> string +(define (uri-unreserved-encode str) + (encode uri-unreserved-encoding-vector str)) + +;; string -> string +(define (uri-unreserved-decode str) + (decode uri-unreserved-decoding-vector str)) ;; string -> string (define (form-urlencoded-encode str) diff --git a/collects/tests/net/uri-codec.rkt b/collects/tests/net/uri-codec.rkt index 2517f6f43f..d6836e90d8 100644 --- a/collects/tests/net/uri-codec.rkt +++ b/collects/tests/net/uri-codec.rkt @@ -69,7 +69,17 @@ (uri-userinfo-decode "hello") => "hello" (uri-userinfo-decode "hello%20there") => "hello there" (uri-userinfo-decode "hello:there") => "hello:there" - + + ;; tried to choose characters from each subset: + (uri-encode "M~(@; ") => "M~(%40%3B%20" + (uri-path-segment-encode "M~(@; ") => "M~(@%3B%20" + (uri-userinfo-encode "M~(@; ") => "M~(%40;%20" + (uri-unreserved-encode "M~(@; ") => "M~%28%40%3B%20" + ;; matching decodes: + (uri-decode "M~(%40%3B%20") => "M~(@; " + (uri-path-segment-decode "M~(@%3B%20") => "M~(@; " + (uri-userinfo-decode "M~(%40;%20") => "M~(@; " + (uri-unreserved-decode "M~%28%40%3B%20") => "M~(@; " )) ;; tests adapted from Noel Welsh's original test suite