diff --git a/collects/net/uri-codec-unit.ss b/collects/net/uri-codec-unit.ss index e1d16cbb1f..254ee4c9cf 100644 --- a/collects/net/uri-codec-unit.ss +++ b/collects/net/uri-codec-unit.ss @@ -78,8 +78,8 @@ (require (lib "unitsig.ss") (lib "match.ss") (lib "string.ss") - (lib "etc.ss") (lib "list.ss") + (lib "etc.ss") "uri-codec-sig.ss") (provide uri-codec@) @@ -88,27 +88,20 @@ (unit/sig net:uri-codec^ (import) + (define (self-map-char ch) (cons ch ch)) + (define (self-map-chars str) (map self-map-char (string->list str))) + ;; The characters that always map to themselves (define alphanumeric-mapping - (map (lambda (char) (cons char char)) - '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 - #\A #\B #\C #\D #\E #\F #\G #\H #\I #\J - #\K #\L #\M #\N #\O #\P #\Q #\R #\S #\T - #\U #\V #\W #\X #\Y #\Z #\a #\b #\c #\d - #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n - #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x - #\y #\z))) + (self-map-chars + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")) ;; Characters that sometimes map to themselves - (define safe-mapping - (map (lambda (char) (cons char char)) - '(#\- #\_ #\. #\! #\~ #\* #\' #\( #\)))) + (define safe-mapping (self-map-chars "-_.!~*'()")) ;; The strict URI mapping - (define uri-mapping - (append alphanumeric-mapping - safe-mapping)) - + (define uri-mapping (append alphanumeric-mapping safe-mapping)) + ;; The uri path segment mapping from RFC 3986 (define uri-path-segment-mapping (append alphanumeric-mapping @@ -117,29 +110,21 @@ ;; The form-urlencoded mapping (define form-urlencoded-mapping - (append '((#\. . #\.) - (#\- . #\-) - (#\* . #\*) - (#\_ . #\_) - (#\space . #\+)) - alphanumeric-mapping)) + `(,@(self-map-chars ".-*_") (#\space . #\+) ,@alphanumeric-mapping)) (define (number->hex-string number) - (let ((hex (number->string number 16))) - (string-append "%" - (if (= (string-length hex) 1) - (string-append "0" hex) - hex)))) + (define (hex n) (string-ref "0123456789ABCDEF" n)) + (string #\% (hex (quotient number 16)) (hex (modulo number 16)))) (define (hex-string->number hex-string) (string->number (substring hex-string 1 3) 16)) (define ascii-size 128) - + ;; (listof (cons char char)) -> (values (vectorof string) (vectorof string)) (define (make-codec-tables alist) - (let ((encoding-table (build-vector ascii-size number->hex-string)) - (decoding-table (build-vector ascii-size values))) + (let ([encoding-table (build-vector ascii-size number->hex-string)] + [decoding-table (build-vector ascii-size values)]) (for-each (match-lambda [(orig . enc) (vector-set! encoding-table @@ -153,7 +138,7 @@ (define-values (uri-encoding-vector uri-decoding-vector) (make-codec-tables uri-mapping)) - + (define-values (uri-path-segment-encoding-vector uri-path-segment-decoding-vector) (make-codec-tables uri-path-segment-mapping)) @@ -216,7 +201,7 @@ ;; string -> string (define (uri-path-segment-decode str) (decode uri-path-segment-decoding-vector str)) - + ;; string -> string (define (form-urlencoded-encode str) (encode form-urlencoded-encoding-vector str)) @@ -228,80 +213,74 @@ ;; listof (cons string string) -> string ;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris ;; listof (cons symbol string) -> string - (define alist->form-urlencoded - (opt-lambda (args) - (let* ([mode (current-alist-separator-mode)] - [format-one - (lambda (arg) - (let* ([name (car arg)] - [value (cdr arg)]) - (string-append - (form-urlencoded-encode (symbol->string name)) - "=" - (form-urlencoded-encode value))))] - [strs (let loop ([args args]) - (cond - [(null? args) null] - [(null? (cdr args)) (list (format-one (car args)))] - [else (list* (format-one (car args)) - (if (eq? mode 'amp) - "&" - ";") - (loop (cdr args)))]))]) - (apply string-append strs)))) + (define (alist->form-urlencoded args) + (let* ([mode (current-alist-separator-mode)] + [format-one + (lambda (arg) + (let* ([name (car arg)] + [value (cdr arg)]) + (string-append (form-urlencoded-encode (symbol->string name)) + "=" + (form-urlencoded-encode value))))] + [strs (let loop ([args args]) + (cond + [(null? args) null] + [(null? (cdr args)) (list (format-one (car args)))] + [else (list* (format-one (car args)) + (if (eq? mode 'amp) "&" ";") + (loop (cdr args)))]))]) + (apply string-append strs))) - ;; string -> listof (cons string string) + ;; string -> listof (cons string string) ;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris - (define form-urlencoded->alist - (opt-lambda (str) - (define key-regexp #rx"[^=]*") - (define value-regexp (case (current-alist-separator-mode) - [(semi) #rx"[^;]*"] - [(amp) #rx"[^&]*"] - [else #rx"[^&;]*"])) - (define (next-key str start) - (if (>= start (string-length str)) - #f - (match (regexp-match-positions key-regexp str start) - [((start . end)) - (vector (let ([s (form-urlencoded-decode (substring str start end))]) - (string->symbol s)) - (add1 end))] - [#f #f]))) - (define (next-value str start) - (if (>= start (string-length str)) - #f - (match (regexp-match-positions value-regexp str start) - [((start . end)) - (vector (form-urlencoded-decode (substring str start end)) - (add1 end))] - [#f #f]))) - (define (next-pair str start) - (match (next-key str start) - [#(key start) - (match (next-value str start) - [#(value start) - (vector (cons key value) start)] - [#f - (vector (cons key "") (string-length str))])] - [#f #f])) - (let loop ([start 0] - [end (string-length str)] - [make-alist (lambda (x) x)]) - (cond - [(>= start end) (make-alist '())] - [else - (match (next-pair str start) - [#(pair next-start) - (loop next-start end (lambda (x) (make-alist (cons pair x))))] - [#f (make-alist '())])])))) + (define (form-urlencoded->alist str) + (define key-regexp #rx"[^=]*") + (define value-regexp (case (current-alist-separator-mode) + [(semi) #rx"[^;]*"] + [(amp) #rx"[^&]*"] + [else #rx"[^&;]*"])) + (define (next-key str start) + (and (< start (string-length str)) + (match (regexp-match-positions key-regexp str start) + [((start . end)) + (vector (let ([s (form-urlencoded-decode + (substring str start end))]) + (string->symbol s)) + (add1 end))] + [#f #f]))) + (define (next-value str start) + (and (< start (string-length str)) + (match (regexp-match-positions value-regexp str start) + [((start . end)) + (vector (form-urlencoded-decode (substring str start end)) + (add1 end))] + [#f #f]))) + (define (next-pair str start) + (match (next-key str start) + [#(key start) + (match (next-value str start) + [#(value start) + (vector (cons key value) start)] + [#f + (vector (cons key "") (string-length str))])] + [#f #f])) + (let loop ([start 0] + [end (string-length str)] + [make-alist (lambda (x) x)]) + (if (>= start end) + (make-alist '()) + (match (next-pair str start) + [#(pair next-start) + (loop next-start end (lambda (x) (make-alist (cons pair x))))] + [#f (make-alist '())])))) (define current-alist-separator-mode - (make-parameter 'amp-or-semi (lambda (s) - (unless (memq s '(amp semi amp-or-semi)) - (raise-type-error 'current-alist-separator-mode - "'amp, 'semi, or 'amp-or-semi" - s)) - s)))))) + (make-parameter 'amp-or-semi + (lambda (s) + (unless (memq s '(amp semi amp-or-semi)) + (raise-type-error 'current-alist-separator-mode + "'amp, 'semi, or 'amp-or-semi" + s)) + s)))))) ;;; uri-codec-unit.ss ends here diff --git a/collects/net/url-structs.ss b/collects/net/url-structs.ss index 315682a219..50a0c714bd 100644 --- a/collects/net/url-structs.ss +++ b/collects/net/url-structs.ss @@ -1,6 +1,6 @@ (module url-structs mzscheme (require (lib "contract.ss")) - + (define-struct url (scheme user host port path-absolute? path query fragment)) (define-struct path/param (path param)) diff --git a/collects/net/url-unit.ss b/collects/net/url-unit.ss index 36437440bd..b69b04b0a3 100644 --- a/collects/net/url-unit.ss +++ b/collects/net/url-unit.ss @@ -352,65 +352,45 @@ ;; string->url : str -> url ;; New implementation, mostly provided by Neil Van Dyke - (define string->url - (let ((rx (regexp (string-append - "^" - "[ \t\f\r\n]*" - "(" ; <1 front-opt - "([a-zA-Z]*:)?" ; =2 scheme-colon-opt - "(" ; <3 slashslash-opt - "//" - "([^:/@;?#]*@)?" ; =4 user-at-opt - "([^:/@;?#]*)?" ; =5 host-opt - "(:[0-9]*)?" ; =6 colon-port-opt - ")?" ; >3 slashslash-opt - ")?" ; >1 front-opt - "([^?#]*)" ; =7 path - "(\\?[^#]*)?" ; =8 question-query-opt - "(#.*)?" ; =9 hash-fragment-opt - "[ \t\f\r\n]*" - "$")))) - (lambda (str) - (let ((match (regexp-match-positions rx str))) - (if match - (let* ((get-str (lambda (pos skip-left skip-right) - (let ((pair (list-ref match pos))) - (if pair - (substring str - (+ (car pair) skip-left) - (- (cdr pair) skip-right)) - #f)))) - (get-num (lambda (pos skip-left skip-right) - (let ((s (get-str pos skip-left skip-right))) - (if s (string->number s) #f)))) - (host (get-str 5 0 0)) - (path (get-str 7 0 0)) - (scheme (get-str 2 0 1))) - (when (string? scheme) (string-lowercase! scheme)) - (when (string? host) (string-lowercase! host)) - (make-url scheme - (uri-decode/maybe (get-str 4 0 1)) ; user - host - (get-num 6 1 0) ; port - (and (not (= 0 (string-length path))) - (char=? #\/ (string-ref path 0))) - (separate-path-strings - ;; If path is "" and the input is an absolute URL - ;; with a hostname, then the intended path is "/", - ;; but the URL is missing a "/" at the end. - path - #; - (if (and (string=? path "") - host) - "/" - path)) - ;(uri-decode/maybe (get-str 8 1 0)) ; - ;query - (let ([q (get-str 8 1 0)]) - (if q (form-urlencoded->alist q) '())) - (uri-decode/maybe (get-str 9 1 0)) ; fragment - )) - (url-error "Invalid URL string: ~e" str)))))) + (define url-rx + (regexp (string-append + "^" + "[ \t\f\r\n]*" + "(?:" ; B slashslash-opt + ")?" ; >A front-opt + "([^?#]*)" ; =5 path + "(?:\\?([^#]*))?" ; =6 question-query-opt + "(?:#(.*))?" ; =7 hash-fragment-opt + "[ \t\f\r\n]*" + "$"))) + (define (string->url str) + (apply + (lambda (scheme user host port path query fragment) + (let* ([user (uri-decode/maybe user)] + [port (and port (string->number port))] + [abs? (and (not (= 0 (string-length path))) + (char=? #\/ (string-ref path 0)))] + [path (separate-path-strings + ;; If path is "" and the input is an absolute URL + ;; with a hostname, then the intended path is "/", + ;; but the URL is missing a "/" at the end. + path + #; + (if (and (string=? path "") host) "/" path))] + [query (if query (form-urlencoded->alist query) '())] + [fragment (uri-decode/maybe fragment)]) + (when (string? scheme) (string-lowercase! scheme)) + (when (string? host) (string-lowercase! host)) + (make-url scheme user host port abs? path query fragment))) + (cdr (or (regexp-match url-rx str) + (url-error "Invalid URL string: ~e" str))))) (define (uri-decode/maybe f) ;; If #f, and leave unmolested any % that is followed by hex digit