From e8f889eb05eabfa99a36dadefa1d76f5e44a85a8 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Mon, 17 Dec 2012 06:56:38 -0700 Subject: [PATCH] net/url: add `current-url-encode-mode' Changing `current-url-encode-mode' from 'recommended to 'unreserved causes `url->string' to encode !, *, ', (, and ) using %, which can avoid confusing some parsers. See also https://github.com/plt/racket/pull/198 original commit: b8b8260379716cf38136ff7f5fec1b33a824e151 --- collects/net/scribblings/url.scrbl | 31 +++++++++++++++++++++++++----- collects/tests/net/url.rkt | 7 +++++-- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/collects/net/scribblings/url.scrbl b/collects/net/scribblings/url.scrbl index 26c9322858..9e24e5662f 100644 --- a/collects/net/scribblings/url.scrbl +++ b/collects/net/scribblings/url.scrbl @@ -81,7 +81,7 @@ a UNC path is represented by a first element that is @racket[""] and then successive elements complete the drive components that are separated by @litchar{/} or @litchar{\}.} -@defstruct[path/param ([path (or/c string? (one-of/c 'up 'same))] +@defstruct[path/param ([path (or/c string? (or/c 'up 'same))] [param (listof string?)])]{ A pair that joins a path segment with its params in a URL.} @@ -169,7 +169,10 @@ The @racket[url->string] procedure uses @racket[alist->form-urlencoded] when formatting the query, so it is sensitive to the @racket[current-alist-separator-mode] parameter for determining the association separator. The default is to separate -associations with a @litchar{&}.} +associations with a @litchar{&}. + +The encoding of path segments and fragment is sensitive to the +@racket[current-url-encode-mode] parameter.} @defproc[(path->url [path (or/c path-string? path-for-some-system?)]) @@ -179,19 +182,36 @@ Converts a path to a @racket[url].} @defproc[(url->path [URL url?] - [kind (one-of/c 'unix 'windows) (system-path-convention-type)]) + [kind (or/c 'unix 'windows) (system-path-convention-type)]) path-for-some-system?]{ Converts @racket[URL], which is assumed to be a @racket["file"] URL, to a path.} -@defparam[file-url-path-convention-type kind (one-of/c 'unix 'windows)]{ +@defparam[file-url-path-convention-type kind (or/c 'unix 'windows)]{ Determines the default conversion to and from strings for @racket["file"] URLs. See @racket[string->url] and @racket[url->string].} +@defparam[current-url-encode-mode mode (or/c 'recommended 'unreserved)]{ + +Determines how @racket[url->string] encodes @litchar{!}, @litchar{*}, @litchar{'}, @litchar{(}, +and @litchar{)} in path segments and fragments: +@racket['recommended] leave them as-is, while @racket['unreserved] +encodes them using @litchar{%}. The @racket['recommended] mode corresponds +to the recommendations of RFC 2396 @cite["RFC2396"], but @racket['unreserved] +avoids characters that are in some contexts mistaken for delimiters around +URLs. + +Internally, @racket['recommended] mode uses +@racket[uri-path-segment-encode] and @racket[uri-encode], while +@racket['unreserved] mode uses +@racket[uri-path-segment-unreserved-encode] and +@racket[uri-unreserved-encode].} + + @deftogether[( @defproc[(get-pure-port [URL url?] [header (listof string?) null] @@ -428,7 +448,8 @@ Note that @racket[net/url] does not provide the @defsignature[url^ ()]{ Includes everything exported by the @racketmodname[net/url] module -except @racket[current-https-protocol]. Note that the exports of +except @racket[current-https-protocol] and @racket[current-url-encode-mode]. +Note that the exports of @racketmodname[net/url] and the @racket[url^] signature do not include @racket[current-connect-scheme].} diff --git a/collects/tests/net/url.rkt b/collects/tests/net/url.rkt index 91371eb6ec..8c47a3ce1f 100644 --- a/collects/tests/net/url.rkt +++ b/collects/tests/net/url.rkt @@ -110,8 +110,11 @@ ;; test the characters that need to be encoded in paths vs those that do not need to ;; be encoded in paths - (test-s->u #("http" #f "www.drscheme.org" #f #t (#("a:@!$&'()*+,=z") #("/?#[];") #("")) () #f) - "http://www.drscheme.org/a:@!$&'()*+,=z/%2F%3F%23%5B%5D%3B/") + (test-s->u #("http" #f "www.drscheme.org" #f #t (#("a:@!$&'()*+,=z") #("/?#[];") #("")) () "@!$&'()*+,=z") + "http://www.drscheme.org/a:@!$&'()*+,=z/%2F%3F%23%5B%5D%3B/#%40!%24%26'()*%2B%2C%3Dz") + (parameterize ([current-url-encode-mode 'unreserved]) + (test-s->u #("http" #f "www.drscheme.org" #f #t (#("a:@!$&'()*+,=z") #("/?#[];") #("")) () "@!$&'()*+,=z") + "http://www.drscheme.org/a:@%21$&%27%28%29%2A+,=z/%2F%3F%23%5B%5D%3B/#%40%21%24%26%27%28%29%2A%2B%2C%3Dz")) (test-s->u #("http" #f "www.drscheme.org" #f #t (#(".") #("..") #(same) #(up) #("...") #("abc.def")) () #f) "http://www.drscheme.org/%2e/%2e%2e/./../.../abc.def")