diff --git a/collects/net/doc.txt b/collects/net/doc.txt index 2d425adb51..4b6bc62535 100644 --- a/collects/net/doc.txt +++ b/collects/net/doc.txt @@ -61,7 +61,7 @@ For example, this url: `url->string' translate things like %20 into spaces and back again. By default, query associations are parsed with either ";" or "&" as - a separator, and they are generated with ";" as a separator. The + a separator, and they are generated with "&" as a separator. The `current-alist-separator-mode' parameter for "uri-codec.ss" adjusts this default. @@ -131,9 +131,8 @@ PROCEDURES ----------------------------------------------------------- The `url->string' procedure uses `alist->form-urlencoded' when formatting the query, so it it sensitive to the `current-alist-separator-mode' parameter for determining the - association separator. In particular, the default is to separate - associations with ";" (based on modern recommendations) instead of - "&". + association separator. The default is to separate associations with + a "&". > (decode-some-url-parts url) -> url @@ -2291,25 +2290,21 @@ PROCEDURES ----------------------------------------------------------- A parameter that determines the separator used/recognized between alist pairs in `form-urlencoded->alist', `alist->form-urlencoded', `url->string', and `string->url'. The possible mode symbols are - 'amp, 'semi, and 'amp-or-semi. + 'amp, 'semi, 'amp-or-semi, or 'semi-or-amp. The default value is 'amp-or-semi, which means that both "&" and ";" - are treated as separators when parsing, and ";" is used as a + are treated as separators when parsing, and "&" is used as a separator when encoding. The other modes use/recognize only of the separators. - Examples for '((name . "shriram") (host "nw")): + Examples for '((x . "foo") (y . "bar") (z . "baz")): - Mode Parse Generate - ------ -------------------- -------------------- - - 'amp name=shriram&host=nw name=shriram&host=nw - - 'semi name=shriram;host=nw name=shriram;host=nw - - 'amp-or-semi name=shriram&host=nw name=shriram;host=nw - or - name=shriram;host=nw + Mode Parse Generate + ------ ----------------- ----------------- + 'amp x=foo&y=bar&z=baz x=foo&y=bar&z=baz + 'semi x=foo;y=bar;z=baz x=foo;y=bar;z=baz + 'amp-or-semi x=foo&y=bar;z=baz x=foo&y=bar&z=baz + 'semi-or-amp x=foo&y=bar;z=baz x=foo;y=bar;z=baz ========================================================================== _GIF_ writing, _animated GIF_ writing diff --git a/collects/net/uri-codec-unit.ss b/collects/net/uri-codec-unit.ss index 6769444fad..e3d23d598b 100644 --- a/collects/net/uri-codec-unit.ss +++ b/collects/net/uri-codec-unit.ss @@ -1,97 +1,15 @@ -;; 1/2/2006: Added a mapping for uri path segments -;; that allows more characters to remain decoded -;; -robby - - #| -People often seem to wonder why semicolons are the default in this code, -and not ampersands. Here's are the best answers we have: +People used to wonder why semicolons were the default. We then +decided to switch the default back to ampersands -- -From: Doug Orleans -To: plt-scheme@list.cs.brown.edu -Subject: Re: [plt-scheme] Problem fetching a URL -Date: Wed, 11 Oct 2006 16:18:40 -0400 -X-Mailer: VM 7.19 under 21.4 (patch 19) "Constant Variable" XEmacs Lucid + http://www.w3.org/TR/html401/appendix/notes.html#h-B.2.2 -Robby Findler writes: - > Do you (or does anyone else) have a reference to an rfc or similar that - > actually says what the syntax for queries is supposed to be? - > rfc3986.txt (the latest url syntax rfc I know of) doesn't seem to say. + We recommend that HTTP server implementors, and in particular, CGI + implementors support the use of ";" in place of "&" to save authors + the trouble of escaping "&" characters in this manner. -The HTML 4.01 spec defines the MIME type application/x-www-form-urlencoded: - -http://www.w3.org/TR/html401/interact/forms.html#form-content-type - -See also XForms, which uses a "separator" attribute whose default -value is a semicolon: - -http://www.w3.org/TR/xforms/slice11.html#serialize-urlencode -http://www.w3.org/TR/xforms/slice3.html#structure-model-submission - ---dougorleans@gmail.com -_________________________________________________ - For list-related administrative tasks: - http://list.cs.brown.edu/mailman/listinfo/plt-scheme - - - - - - -From: John David Stone -To: plt-scheme@list.cs.brown.edu -Subject: Re: [plt-scheme] Problem fetching a URL -Date: Wed, 11 Oct 2006 11:36:14 -0500 -X-Mailer: VM 7.19 under Emacs 21.4.1 - ------BEGIN PGP SIGNED MESSAGE----- -Hash: SHA1 - - Danny Yoo: - - > > Just out of curiosity, why is current-alist-separator-mode using - > > semicolons by default rather than ampersands? I understand that - > > flexibility is nice, but this is the fifth time I've seen people hit this - > > as a roadblock; shouldn't the default be what's most commonly used? - - Robby Findler: - - > It is my understanding that semi-colons are more standards compliant. - > That's why it is the default. - - According to the RFC1738 (http://www.ietf.org/rfc/rfc1738.txt), -semicolons and ampersands are equally acceptable in URLs that begin with -`http://' (section 5, page 17), but semicolons are ``reserved'' characters -(section 3.3, page 8) and so are allowed to appear unencoded in the URL -(section 2.2, page 3), whereas ampersands are not reserved in such URLs and -so must be encoded (as, say, &). RFC2141 -(http://www.ietf.org/rfc/rfc2141.txt) extends this rule to Uniform Resource -Names generally, classifying ampersands as ``excluded'' characters that -must be encoded whenever used in URNs (section 2.4, page 3). - - The explanation and rationale is given in Appendix B -(``Performance, implementation, and design notes,'' -http://www.w3.org/TR/html401/appendix/notes.html) of the World Wide Web -Consortium's technical report defining HTML 4.01. Section B.2.2 -(``Ampersands in URI attribute values'') of that appendix notes that the -use of ampersands in URLs to carry information derived from forms is, in -practice, a serious glitch and source of errors, since it tempts careless -implementers and authors of HTML documents to insert those ampersands in -URLs without encoding them. This practice conflicts with the simple and -otherwise standard convention, derived from SGML, that an ampersand is -always the opening delimiter of a character entity reference. So the World -Wide Web Consortium encourages implementers to use and recognize semicolons -rather than ampersands in URLs that carry information derived from forms. - ------BEGIN PGP SIGNATURE----- -Version: GnuPG v1.4.5 (GNU/Linux) -Comment: Processed by Mailcrypt 3.5.8+ - -iD4DBQFFLR16bBGsCPR0ElQRAizVAJddgT63LKc6UWqRyHh57aqWjSXGAJ4wyseS -JALQefhDMCATcl2/bZL0bw== -=W2uS ------END PGP SIGNATURE----- +See more in PR8831. |# @@ -305,7 +223,8 @@ JALQefhDMCATcl2/bZL0bw== ;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris ;; listof (cons symbol string) -> string (define (alist->form-urlencoded args) - (let* ([sep (if (eq? (current-alist-separator-mode) 'amp) "&" ";")] + (let* ([sep (if (memq (current-alist-separator-mode) '(semi semi-or-amp)) + ";" "&")] [format-one (lambda (arg) (let* ([name (car arg)] @@ -341,11 +260,11 @@ JALQefhDMCATcl2/bZL0bw== (define current-alist-separator-mode (make-parameter 'amp-or-semi - (lambda (s) - (unless (memq s '(amp semi amp-or-semi)) - (raise-type-error 'current-alist-separator-mode - "'amp, 'semi, or 'amp-or-semi" - s)) - s)))) + (lambda (s) + (unless (memq s '(amp semi amp-or-semi semi-or-amp)) + (raise-type-error 'current-alist-separator-mode + "'amp, 'semi, 'amp-or-semi, or 'semi-or-amp" + s)) + s)))) ;;; uri-codec-unit.ss ends here diff --git a/collects/tests/mzscheme/net.ss b/collects/tests/mzscheme/net.ss index 2f68bf579b..9ddbdc13e6 100644 --- a/collects/tests/mzscheme/net.ss +++ b/collects/tests/mzscheme/net.ss @@ -18,28 +18,33 @@ (test "%P" uri-decode "%P") (test "a=hel%2Blo+%E7%88%B8" alist->form-urlencoded '((a . "hel+lo \u7238"))) (test '((a . "hel+lo \u7238")) form-urlencoded->alist (alist->form-urlencoded '((a . "hel+lo \u7238")))) -(test "a=hel%2Blo;b=good-bye" alist->form-urlencoded '((a . "hel+lo") (b . "good-bye"))) +(test "a=hel%2Blo&b=good-bye" alist->form-urlencoded '((a . "hel+lo") (b . "good-bye"))) +(let* ([alist '((a . "hel+lo") (b . "good-bye"))] + [ampstr "a=hel%2Blo&b=good-bye"] + [semistr "a=hel%2Blo;b=good-bye"]) + (define (test:alist<->str mode str) + (parameterize ([current-alist-separator-mode + (or mode (current-alist-separator-mode))]) + (test str alist->form-urlencoded alist) + (test alist form-urlencoded->alist str))) + (test:alist<->str #f ampstr) ; the default + (test:alist<->str 'amp ampstr) + (test:alist<->str 'amp-or-semi ampstr) + (test:alist<->str 'semi semistr) + (test:alist<->str 'semi-or-amp semistr)) +(test '((x . "foo") (y . "bar") (z . "baz")) + form-urlencoded->alist "x=foo&y=bar;z=baz") (parameterize ([current-alist-separator-mode 'semi]) - (test "a=hel%2Blo;b=good-bye" alist->form-urlencoded '((a . "hel+lo") (b . "good-bye")))) -(parameterize ([current-alist-separator-mode 'amp]) - (test "a=hel%2Blo&b=good-bye" alist->form-urlencoded '((a . "hel+lo") (b . "good-bye")))) -(test '((a . "hel+lo") (b . "good-bye")) form-urlencoded->alist (alist->form-urlencoded '((a . "hel+lo") (b . "good-bye")))) -(parameterize ([current-alist-separator-mode 'amp]) - (test '((a . "hel+lo") (b . "good-bye")) form-urlencoded->alist (alist->form-urlencoded '((a . "hel+lo") (b . "good-bye"))))) -(test '((a . "hel+lo") (b . "good-bye")) form-urlencoded->alist - (parameterize ([current-alist-separator-mode 'amp]) - (alist->form-urlencoded '((a . "hel+lo") (b . "good-bye"))))) -(parameterize ([current-alist-separator-mode 'semi]) - (test '((a . "hel+lo&b=good-bye")) form-urlencoded->alist + (test '((a . "hel+lo&b=good-bye")) form-urlencoded->alist (parameterize ([current-alist-separator-mode 'amp]) (alist->form-urlencoded '((a . "hel+lo") (b . "good-bye")))))) (parameterize ([current-alist-separator-mode 'amp]) - (test '((a . "hel+lo;b=good-bye")) form-urlencoded->alist + (test '((a . "hel+lo;b=good-bye")) form-urlencoded->alist (parameterize ([current-alist-separator-mode 'semi]) (alist->form-urlencoded '((a . "hel+lo") (b . "good-bye")))))) -(test "aNt=hi" alist->form-urlencoded '((aNt . "hi"))) -(test '((aNt . "hi")) form-urlencoded->alist (alist->form-urlencoded '((aNt . "hi")))) -(test "aNt=hi" alist->form-urlencoded (form-urlencoded->alist "aNt=hi")) +(test "aNt=Hi" alist->form-urlencoded '((aNt . "Hi"))) +(test '((aNt . "Hi")) form-urlencoded->alist (alist->form-urlencoded '((aNt . "Hi")))) +(test "aNt=Hi" alist->form-urlencoded (form-urlencoded->alist "aNt=Hi")) (test 'amp-or-semi current-alist-separator-mode) (err/rt-test (current-alist-separator-mode 'bad)) @@ -100,7 +105,7 @@ (test "" alist->form-urlencoded '()) (test "key=hello+there" alist->form-urlencoded '((key . "hello there"))) - (test "key1=hi;key2=hello" alist->form-urlencoded '((key1 . "hi") (key2 . "hello"))) + (test "key1=hi&key2=hello" alist->form-urlencoded '((key1 . "hi") (key2 . "hello"))) (test "key1=hello+there" alist->form-urlencoded '((key1 . "hello there"))) (test "hello" uri-decode "hello") @@ -214,7 +219,7 @@ (test-s->u #("http" #f "www.drscheme.org" #f #t (#("a") #("b") #("c")) ((tam . "tom")) "joe") "http://www.drscheme.org/a/b/c?tam=tom#joe") (test-s->u #("http" #f "www.drscheme.org" #f #t (#("a") #("b") #("c")) ((tam . "tom") (pam . "pom")) "joe") - "http://www.drscheme.org/a/b/c?tam=tom;pam=pom#joe") + "http://www.drscheme.org/a/b/c?tam=tom&pam=pom#joe") (parameterize ([current-alist-separator-mode 'semi]) (test-s->u #("http" #f "www.drscheme.org" #f #t (#("a") #("b") #("c")) ((tam . "tom") (pam . "pom")) "joe") "http://www.drscheme.org/a/b/c?tam=tom;pam=pom#joe")) @@ -310,12 +315,15 @@ (test-s->u #("http" #f "foo.bar" #f #t (#("baz")) ((ugh . "")) #f) "http://foo.bar/baz?ugh=") (test-s->u #("http" #f "foo.bar" #f #t (#("baz")) ((ugh . #f) (x . "y") (|1| . "2")) #f) - "http://foo.bar/baz?ugh;x=y;1=2") + "http://foo.bar/baz?ugh&x=y&1=2") + (test-s->u #("http" #f "foo.bar" #f #t (#("baz")) ((ugh . "") (x . "y") (|1| . "2")) #f) + "http://foo.bar/baz?ugh=&x=y&1=2") (parameterize ([current-alist-separator-mode 'amp]) (test-s->u #("http" #f "foo.bar" #f #t (#("baz")) ((ugh . #f) (x . "y") (|1| . "2")) #f) "http://foo.bar/baz?ugh&x=y&1=2")) - (test-s->u #("http" #f "foo.bar" #f #t (#("baz")) ((ugh . "") (x . "y") (|1| . "2")) #f) - "http://foo.bar/baz?ugh=;x=y;1=2") + (parameterize ([current-alist-separator-mode 'semi]) + (test-s->u #("http" #f "foo.bar" #f #t (#("baz")) ((ugh . #f) (x . "y") (|1| . "2")) #f) + "http://foo.bar/baz?ugh;x=y;1=2")) ;; test case sensitivity (test #("http" "ROBBY" "www.drscheme.org" 80 #t (#("INDEX.HTML" "XXX")) ((T . "P")) "YYY")