net/url: add `current-url-encode-mode'

Changing `current-url-encode-mode' from 'recommended to 'unreserved
causes `url->string' to encode !, *, ', (, and ) using %, which
can avoid confusing some parsers.

See also https://github.com/plt/racket/pull/198
This commit is contained in:
Matthew Flatt 2012-12-17 06:56:38 -07:00
parent 3d1f1289ef
commit b8b8260379
4 changed files with 48 additions and 10 deletions

View File

@ -81,7 +81,7 @@ a UNC path is represented by a first element that is @racket[""] and
then successive elements complete the drive components that are
separated by @litchar{/} or @litchar{\}.}
@defstruct[path/param ([path (or/c string? (one-of/c 'up 'same))]
@defstruct[path/param ([path (or/c string? (or/c 'up 'same))]
[param (listof string?)])]{
A pair that joins a path segment with its params in a URL.}
@ -169,7 +169,10 @@ The @racket[url->string] procedure uses
@racket[alist->form-urlencoded] when formatting the query, so it is
sensitive to the @racket[current-alist-separator-mode] parameter for
determining the association separator. The default is to separate
associations with a @litchar{&}.}
associations with a @litchar{&}.
The encoding of path segments and fragment is sensitive to the
@racket[current-url-encode-mode] parameter.}
@defproc[(path->url [path (or/c path-string? path-for-some-system?)])
@ -179,19 +182,36 @@ Converts a path to a @racket[url].}
@defproc[(url->path [URL url?]
[kind (one-of/c 'unix 'windows) (system-path-convention-type)])
[kind (or/c 'unix 'windows) (system-path-convention-type)])
path-for-some-system?]{
Converts @racket[URL], which is assumed to be a @racket["file"] URL,
to a path.}
@defparam[file-url-path-convention-type kind (one-of/c 'unix 'windows)]{
@defparam[file-url-path-convention-type kind (or/c 'unix 'windows)]{
Determines the default conversion to and from strings for
@racket["file"] URLs. See @racket[string->url] and @racket[url->string].}
@defparam[current-url-encode-mode mode (or/c 'recommended 'unreserved)]{
Determines how @racket[url->string] encodes @litchar{!}, @litchar{*}, @litchar{'}, @litchar{(},
and @litchar{)} in path segments and fragments:
@racket['recommended] leave them as-is, while @racket['unreserved]
encodes them using @litchar{%}. The @racket['recommended] mode corresponds
to the recommendations of RFC 2396 @cite["RFC2396"], but @racket['unreserved]
avoids characters that are in some contexts mistaken for delimiters around
URLs.
Internally, @racket['recommended] mode uses
@racket[uri-path-segment-encode] and @racket[uri-encode], while
@racket['unreserved] mode uses
@racket[uri-path-segment-unreserved-encode] and
@racket[uri-unreserved-encode].}
@deftogether[(
@defproc[(get-pure-port [URL url?]
[header (listof string?) null]
@ -428,7 +448,8 @@ Note that @racket[net/url] does not provide the
@defsignature[url^ ()]{
Includes everything exported by the @racketmodname[net/url] module
except @racket[current-https-protocol]. Note that the exports of
except @racket[current-https-protocol] and @racket[current-url-encode-mode].
Note that the exports of
@racketmodname[net/url] and the @racket[url^] signature do not include
@racket[current-connect-scheme].}

View File

@ -87,7 +87,7 @@
(combine-path-strings (url-path-absolute? url) path)
;; (if query (sa "?" (uri-encode query)) "")
(if (null? query) "" (sa "?" (alist->form-urlencoded query)))
(if fragment (sa "#" (uri-encode fragment)) ""))))
(if fragment (sa "#" (uri-encode* fragment)) ""))))
;; url->default-port : url -> num
(define (url->default-port url)
@ -545,7 +545,7 @@
[(eq? p 'same) "."]
[(equal? p "..") "%2e%2e"]
[(equal? p ".") "%2e"]
[else (uri-path-segment-encode p)]))
[else (uri-path-segment-encode* p)]))
(define (combine-path-strings absolute? path/params)
(cond [(null? path/params) ""]
@ -688,6 +688,18 @@
(tcp-abandon-port client->server)
server->client))
(define current-url-encode-mode (make-parameter 'recommended))
(define (uri-encode* str)
(case (current-url-encode-mode)
[(unreserved) (uri-unreserved-encode str)]
[(recommended) (uri-encode str)]))
(define (uri-path-segment-encode* str)
(case (current-url-encode-mode)
[(unreserved) (uri-path-segment-unreserved-encode str)]
[(recommended) (uri-path-segment-encode str)]))
(provide (struct-out url) (struct-out path/param))
(provide/contract
@ -728,4 +740,5 @@
(current-proxy-servers
(parameter/c (or/c false/c (listof (list/c string? string? number?)))))
(file-url-path-convention-type
(parameter/c (one-of/c 'unix 'windows))))
(parameter/c (one-of/c 'unix 'windows)))
(current-url-encode-mode (parameter/c (one-of/c 'recommended 'unreserved))))

View File

@ -110,8 +110,11 @@
;; test the characters that need to be encoded in paths vs those that do not need to
;; be encoded in paths
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a:@!$&'()*+,=z") #("/?#[];") #("")) () #f)
"http://www.drscheme.org/a:@!$&'()*+,=z/%2F%3F%23%5B%5D%3B/")
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a:@!$&'()*+,=z") #("/?#[];") #("")) () "@!$&'()*+,=z")
"http://www.drscheme.org/a:@!$&'()*+,=z/%2F%3F%23%5B%5D%3B/#%40!%24%26'()*%2B%2C%3Dz")
(parameterize ([current-url-encode-mode 'unreserved])
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a:@!$&'()*+,=z") #("/?#[];") #("")) () "@!$&'()*+,=z")
"http://www.drscheme.org/a:@%21$&%27%28%29%2A+,=z/%2F%3F%23%5B%5D%3B/#%40%21%24%26%27%28%29%2A%2B%2C%3Dz"))
(test-s->u #("http" #f "www.drscheme.org" #f #t (#(".") #("..") #(same) #(up) #("...") #("abc.def")) () #f)
"http://www.drscheme.org/%2e/%2e%2e/./../.../abc.def")

View File

@ -5,6 +5,7 @@ r5rs, r6rs: fixed case and cond to disallow internal definitions
Add #fx() and #fl() reader forms for flvectors and fxvectors
racket/serialize: fxvectors and flvectors are serializable
racket/net: added uri-path-segment-unreserved-encode
racket/url: added current-url-encode-mode
Version 5.3.1.8
file/untar: added