From 8f3c97eab82623bb42fa1e83b5e51e4a5fabf30d Mon Sep 17 00:00:00 2001 From: Robby Findler Date: Sun, 25 Jan 2009 20:12:08 +0000 Subject: [PATCH] allows colons in the userinfo field of a url (ie, does not encode them) svn: r13282 original commit: 134138916fbfaf52b45a43f62eb557eab7ecb142 --- collects/net/uri-codec-sig.ss | 2 ++ collects/net/uri-codec-unit.ss | 32 +++++++++++++++++++++++++++++++- collects/net/url-unit.ss | 9 ++++++--- 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/collects/net/uri-codec-sig.ss b/collects/net/uri-codec-sig.ss index 0dc0c70..b19780f 100644 --- a/collects/net/uri-codec-sig.ss +++ b/collects/net/uri-codec-sig.ss @@ -4,6 +4,8 @@ uri-encode uri-decode uri-path-segment-encode uri-path-segment-decode +uri-userinfo-encode +uri-userinfo-decode form-urlencoded-encode form-urlencoded-decode alist->form-urlencoded diff --git a/collects/net/uri-codec-unit.ss b/collects/net/uri-codec-unit.ss index 88441c8..c992d62 100644 --- a/collects/net/uri-codec-unit.ss +++ b/collects/net/uri-codec-unit.ss @@ -106,6 +106,7 @@ See more in PR8831. "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")) ;; Characters that sometimes map to themselves +;; called 'mark' in RFC 3986 (define safe-mapping (self-map-chars "-_.!~*'()")) ;; The strict URI mapping @@ -115,7 +116,22 @@ See more in PR8831. (define uri-path-segment-mapping (append alphanumeric-mapping safe-mapping - (map (λ (c) (cons c c)) (string->list "@+,=$&:")))) + (self-map-chars "@+,=$&:"))) + +;; from RFC 3986 +(define unreserved-mapping + (append alphanumeric-mapping + (self-map-chars "-._~"))) + +;; from RFC 3986 +(define sub-delims-mapping + (self-map-chars "!$&'()*+,;=")) + +;; The uri userinfo mapping from RFC 3986 +(define uri-userinfo-mapping + (append unreserved-mapping + sub-delims-mapping + (self-map-chars ":"))) ;; The form-urlencoded mapping (define form-urlencoded-mapping @@ -152,6 +168,11 @@ See more in PR8831. uri-path-segment-decoding-vector) (make-codec-tables uri-path-segment-mapping)) +(define-values (uri-userinfo-encoding-vector + uri-userinfo-decoding-vector) + (make-codec-tables uri-userinfo-mapping)) + + (define-values (form-urlencoded-encoding-vector form-urlencoded-decoding-vector) (make-codec-tables form-urlencoded-mapping)) @@ -207,6 +228,15 @@ See more in PR8831. (define (uri-path-segment-decode str) (decode uri-path-segment-decoding-vector str)) +;; string -> string +(define (uri-userinfo-encode str) + (encode uri-userinfo-encoding-vector str)) + +;; string -> string +(define (uri-userinfo-decode str) + (decode uri-userinfo-decoding-vector str)) + + ;; string -> string (define (form-urlencoded-encode str) (encode form-urlencoded-encoding-vector str)) diff --git a/collects/net/url-unit.ss b/collects/net/url-unit.ss index 0185515..4b40321 100644 --- a/collects/net/url-unit.ss +++ b/collects/net/url-unit.ss @@ -1,3 +1,5 @@ +#lang scheme/unit + ;; To do: ;; Handle HTTP/file errors. ;; Not throw away MIME headers. @@ -9,7 +11,6 @@ ;; "impure" = they have text waiting ;; "pure" = the MIME headers have been read -#lang scheme/unit (require scheme/port "url-structs.ss" "uri-codec.ss" @@ -71,7 +72,7 @@ (sa (if scheme (sa scheme ":") "") (if (or user host port) (sa "//" - (if user (sa (uri-encode user) "@") "") + (if user (sa (uri-userinfo-encode user) "@") "") (if host host "") (if port (sa ":" (number->string port)) "") ;; There used to be a "/" here, but that causes an @@ -398,7 +399,9 @@ (cdr (or (regexp-match url-rx str) (url-error "Invalid URL string: ~e" str))))) -(define (uri-decode/maybe f) +(define (uri-decode/maybe f) (friendly-decode/maybe f uri-decode)) + +(define (friendly-decode/maybe f uri-decode) ;; If #f, and leave unmolested any % that is followed by hex digit ;; if a % is not followed by a hex digit, replace it with %25 ;; in an attempt to be "friendly"