diff --git a/racket/collects/net/url-exception.rkt b/racket/collects/net/url-exception.rkt new file mode 100644 index 0000000000..1dcff0b49c --- /dev/null +++ b/racket/collects/net/url-exception.rkt @@ -0,0 +1,19 @@ +#lang racket/base +(require racket/string + racket/contract/base + racket/list) + +(define-struct (url-exception exn:fail) ()) +(define (-url-exception? x) + (or (url-exception? x) + + ;; two of the errors that string->url can raise are + ;; now contract violations instead of url-expcetion + ;; structs. since only the url-exception? predicate + ;; was exported, we just add this in to the predicate + ;; to preserve backwards compatibility + (and (exn:fail:contract? x) + (regexp-match? #rx"^string->url:" (exn-message x))))) + +(provide (struct-out url-exception)) +(provide/contract (-url-exception? (any/c . -> . boolean?))) diff --git a/racket/collects/net/url-string.rkt b/racket/collects/net/url-string.rkt new file mode 100644 index 0000000000..77cb5560e4 --- /dev/null +++ b/racket/collects/net/url-string.rkt @@ -0,0 +1,400 @@ +#lang racket/base +(require racket/string + racket/contract/base + racket/list + "url-structs.rkt" + "url-exception.rkt" + "uri-codec.rkt") + +;; To do: +;; Handle HTTP/file errors. +;; Not throw away MIME headers. +;; Determine file type. + +(define-logger net/url) + +;; ---------------------------------------------------------------------- + +;; Input ports have two statuses: +;; "impure" = they have text waiting +;; "pure" = the MIME headers have been read + +(define file-url-path-convention-type (make-parameter (system-path-convention-type))) + +(define (url-error fmt . args) + (raise (make-url-exception + (apply format fmt + (map (lambda (arg) (if (url? arg) (url->string arg) arg)) + args)) + (current-continuation-marks)))) + +(define (url->string url) + (let ([scheme (url-scheme url)] + [user (url-user url)] + [host (url-host url)] + [port (url-port url)] + [path (url-path url)] + [query (url-query url)] + [fragment (url-fragment url)] + [sa list] + [sa* (lambda (l) + (apply string-append + (let loop ([l l]) + (cond + [(null? l) l] + [(pair? (car l)) + (append (loop (car l)) + (loop (cdr l)))] + [(null? (car l)) (loop (cdr l))] + [else (cons (car l) (loop (cdr l)))]))))]) + (when (and (equal? scheme "file") + (not (url-path-absolute? url))) + (raise-mismatch-error 'url->string + "cannot convert relative file URL to a string: " + url)) + (sa* + (append + (if scheme (sa scheme ":") null) + (if (or user host port) + (sa "//" + (if user (sa (uri-userinfo-encode user) "@") null) + (if host host null) + (if port (sa ":" (number->string port)) null)) + (if (equal? "file" scheme) ; always need "//" for "file" URLs + '("//") + null)) + (combine-path-strings (url-path-absolute? url) path) + ;; (if query (sa "?" (uri-encode query)) "") + (if (null? query) null (sa "?" (alist->form-urlencoded query))) + (if fragment (sa "#" (uri-encode* fragment)) null))))) + +;; transliteration of code in rfc 3986, section 5.2.2 +(define (combine-url/relative Base string) + (let ([R (string->url string)] + [T (make-url #f #f #f #f #f '() '() #f)]) + (if (url-scheme R) + (begin + (set-url-scheme! T (url-scheme R)) + (set-url-user! T (url-user R)) ;; authority + (set-url-host! T (url-host R)) ;; authority + (set-url-port! T (url-port R)) ;; authority + (set-url-path-absolute?! T (url-path-absolute? R)) + (set-url-path! T (remove-dot-segments (url-path R))) + (set-url-query! T (url-query R))) + (begin + (if (url-host R) ;; => authority is defined + (begin + (set-url-user! T (url-user R)) ;; authority + (set-url-host! T (url-host R)) ;; authority + (set-url-port! T (url-port R)) ;; authority + (set-url-path-absolute?! T (url-path-absolute? R)) + (set-url-path! T (remove-dot-segments (url-path R))) + (set-url-query! T (url-query R))) + (begin + (if (null? (url-path R)) ;; => R has empty path + (begin + (set-url-path-absolute?! T (url-path-absolute? Base)) + (set-url-path! T (url-path Base)) + (if (not (null? (url-query R))) + (set-url-query! T (url-query R)) + (set-url-query! T (url-query Base)))) + (begin + (cond + [(url-path-absolute? R) + (set-url-path-absolute?! T #t) + (set-url-path! T (remove-dot-segments (url-path R)))] + [(and (null? (url-path Base)) + (url-host Base)) + (set-url-path-absolute?! T #t) + (set-url-path! T (remove-dot-segments (url-path R)))] + [else + (set-url-path-absolute?! T (url-path-absolute? Base)) + (set-url-path! T (remove-dot-segments + (append (all-but-last (url-path Base)) + (url-path R))))]) + (set-url-query! T (url-query R)))) + (set-url-user! T (url-user Base)) ;; authority + (set-url-host! T (url-host Base)) ;; authority + (set-url-port! T (url-port Base)))) ;; authority + (set-url-scheme! T (url-scheme Base)))) + (set-url-fragment! T (url-fragment R)) + T)) + +(define (all-but-last lst) + (cond [(null? lst) null] + [(null? (cdr lst)) null] + [else (cons (car lst) (all-but-last (cdr lst)))])) + +;; cribbed from 5.2.4 in rfc 3986 +;; the strange [*] cases implicitly change urls +;; with paths segments "." and ".." at the end +;; into "./" and "../" respectively +(define (remove-dot-segments path) + (let loop ([path path] [result '()]) + (if (null? path) + (reverse result) + (let ([fst (path/param-path (car path))] + [rst (cdr path)]) + (loop rst + (cond + [(and (eq? fst 'same) (null? rst)) + (cons (make-path/param "" '()) result)] ; [*] + [(eq? fst 'same) + result] + [(and (eq? fst 'up) (null? rst) (not (null? result))) + (cons (make-path/param "" '()) (cdr result))] ; [*] + [(and (eq? fst 'up) (not (null? result))) + (cdr result)] + [(and (eq? fst 'up) (null? result)) + ;; when we go up too far, just drop the "up"s. + result] + [else + (cons (car path) result)])))))) + +;; netscape/string->url : str -> url +(define (netscape/string->url string) + (let ([url (string->url string)]) + (cond [(url-scheme url) url] + [(string=? string "") + (url-error "Can't resolve empty string as URL")] + [else (set-url-scheme! url + (if (char=? (string-ref string 0) #\/) "file" "http")) + url]))) + +;; URL parsing regexp +;; this is following the regexp in Appendix B of rfc 3986, except for using +;; `*' instead of `+' for the scheme part (it is checked later anyway, and +;; we don't want to parse it as a path element), and the user@host:port is +;; parsed here. +(define url-regexp + (regexp (string-append + "^" + "(?:" ; / scheme-colon-opt + "([^:/?#]*)" ; | #1 = scheme-opt + ":)?" ; \ + "(?://" ; / slash-slash-authority-opt + "(?:" ; | / user-at-opt + "([^/?#@]*)" ; | | #2 = user-opt + "@)?" ; | \ + "([^/?#:]*)?" ; | #3 = host-opt + "(?::" ; | / colon-port-opt + "([0-9]*)" ; | | #4 = port-opt + ")?" ; | \ + ")?" ; \ + "([^?#]*)" ; #5 = path + "(?:\\?" ; / question-query-opt + "([^#]*)" ; | #6 = query-opt + ")?" ; \ + "(?:#" ; / hash-fragment-opt + "(.*)" ; | #7 = fragment-opt + ")?" ; \ + "$"))) + +;; string->url : str -> url +;; Original version by Neil Van Dyke +(define (string->url str) + (apply + (lambda (scheme user host port path query fragment) + (when (and scheme (not (regexp-match? #rx"^[a-zA-Z][a-zA-Z0-9+.-]*$" + scheme))) + (url-error "Invalid URL string; bad scheme ~e: ~e" scheme str)) + ;; Windows => "file://xxx:/...." specifies a "xxx:/..." path + (let ([win-file? (and (or (equal? "" port) (not port)) + (equal? "file" scheme) + (eq? 'windows (file-url-path-convention-type)) + (not (equal? host "")))]) + (when win-file? + (set! path (cond [(equal? "" port) (string-append host ":" path)] + [(and path host) (string-append host "/" path)] + [else (or path host)])) + (set! port #f) + (set! host "")) + (let* ([scheme (and scheme (string-downcase scheme))] + [host (and host (string-downcase host))] + [user (uri-decode/maybe user)] + [port (and port (string->number port))] + [abs? (or (equal? "file" scheme) + (regexp-match? #rx"^/" path))] + [path (if win-file? + (separate-windows-path-strings path) + (separate-path-strings path))] + [query (if query (form-urlencoded->alist query) '())] + [fragment (uri-decode/maybe fragment)]) + (make-url scheme user host port abs? path query fragment)))) + (cdr (regexp-match url-regexp str)))) + +(define (uri-decode/maybe f) (friendly-decode/maybe f uri-decode)) + +(define (friendly-decode/maybe f uri-decode) + ;; If #f, and leave unmolested any % that is followed by hex digit + ;; if a % is not followed by a hex digit, replace it with %25 + ;; in an attempt to be "friendly" + (and f (uri-decode (regexp-replace* #rx"%([^0-9a-fA-F])" f "%25\\1")))) + +;; separate-path-strings : string[starting with /] -> (listof path/param) +(define (separate-path-strings str) + (let ([strs (regexp-split #rx"/" str)]) + (map separate-params (if (string=? "" (car strs)) (cdr strs) strs)))) + +(define (separate-windows-path-strings str) + (url-path (path->url (bytes->path (string->bytes/utf-8 str) 'windows)))) + +(define (separate-params s) + (let ([lst (map path-segment-decode (regexp-split #rx";" s))]) + (make-path/param (car lst) (cdr lst)))) + +(define (path-segment-decode p) + (cond [(string=? p "..") 'up] + [(string=? p ".") 'same] + [else (uri-path-segment-decode p)])) + +(define (path-segment-encode p) + (cond [(eq? p 'up) ".."] + [(eq? p 'same) "."] + [(equal? p "..") "%2e%2e"] + [(equal? p ".") "%2e"] + [else (uri-path-segment-encode* p)])) + +(define (combine-path-strings absolute? path/params) + (cond [(null? path/params) null] + [else (let ([p (add-between (map join-params path/params) "/")]) + (if absolute? (cons "/" p) p))])) + +(define (join-params s) + (if (null? (path/param-param s)) + (path-segment-encode (path/param-path s)) + (string-join (map path-segment-encode + (cons (path/param-path s) (path/param-param s))) + ";"))) + +(define (path->url path) + (let* ([spath (simplify-path path #f)] + [dir? (let-values ([(b n dir?) (split-path spath)]) dir?)] + ;; If original path is a directory the resulting URL + ;; should have a trailing forward slash + [url-tail (if dir? (list (make-path/param "" null)) null)] + [url-path + (let loop ([path spath][accum null]) + (let-values ([(base name dir?) (split-path path)]) + (cond + [(not base) + (if (eq? (path-convention-type path) 'windows) + ;; For Windows, massage the root: + (append (map + (lambda (s) + (make-path/param s null)) + (let ([s (regexp-replace + #rx"[/\\\\]$" + (bytes->string/utf-8 (path->bytes name)) + "")]) + (cond + [(regexp-match? #rx"^\\\\\\\\[?]\\\\[a-zA-Z]:" s) + ;; \\?\: path: + (regexp-split #rx"[/\\]+" (substring s 4))] + [(regexp-match? #rx"^\\\\\\\\[?]\\\\UNC" s) + ;; \\?\ UNC path: + (regexp-split #rx"[/\\]+" (substring s 7))] + [(regexp-match? #rx"^[/\\]" s) + ;; UNC path: + (regexp-split #rx"[/\\]+" s)] + [else + (list s)]))) + accum) + ;; On other platforms, we drop the root: + accum)] + [else + (let ([accum (cons (make-path/param + (if (symbol? name) + name + (bytes->string/utf-8 + (path-element->bytes name))) + null) + accum)]) + (if (eq? base 'relative) + accum + (loop base accum)))])))]) + (make-url "file" #f "" #f (absolute-path? path) + (if (null? url-tail) url-path (append url-path url-tail)) + '() #f))) + +(define (file://->path url [kind (system-path-convention-type)]) + (let ([strs (map path/param-path (url-path url))] + [string->path-element/same + (lambda (e) + (if (symbol? e) + e + (if (string=? e "") + 'same + (bytes->path-element (string->bytes/locale e) kind))))] + [string->path/win (lambda (s) + (bytes->path (string->bytes/utf-8 s) 'windows))]) + (if (and (url-path-absolute? url) + (eq? 'windows kind)) + ;; If initial path is "", then build UNC path. + (cond + [(not (url-path-absolute? url)) + (apply build-path (map string->path-element/same strs))] + [(and ((length strs) . >= . 3) + (equal? (car strs) "")) + (apply build-path + (string->path/win + (string-append "\\\\" (cadr strs) "\\" (caddr strs) "\\")) + (map string->path-element/same (cdddr strs)))] + [(pair? strs) + (apply build-path (string->path/win (car strs)) + (map string->path-element/same (cdr strs)))] + [else (error 'file://->path "no path elements: ~e" url)]) + (let ([elems (map string->path-element/same strs)]) + (if (url-path-absolute? url) + (apply build-path (bytes->path #"/" 'unix) elems) + (apply build-path elems)))))) + +(define (url->path url [kind (system-path-convention-type)]) + (file://->path url kind)) + +(define (relative-path->relative-url-string path) + (define s (string-join (for/list ([e (in-list (explode-path path))]) + (cond + [(eq? e 'same) "."] + [(eq? e 'up) ".."] + [else + (uri-encode* (path-element->string e))])) + "/")) + ;; Add "/" to reflect directory-ness: + (let-values ([(base name dir?) (split-path path)]) + (if dir? + (string-append s "/") + s))) + +(define current-url-encode-mode (make-parameter 'recommended)) + +(define (uri-encode* str) + (case (current-url-encode-mode) + [(unreserved) (uri-unreserved-encode str)] + [(recommended) (uri-encode str)])) + +(define (uri-path-segment-encode* str) + (case (current-url-encode-mode) + [(unreserved) (uri-path-segment-unreserved-encode str)] + [(recommended) (uri-path-segment-encode str)])) + +(provide (struct-out url) (struct-out path/param)) + +(provide/contract + (string->url (-> (and/c string? + (or/c #rx"^[a-zA-Z][a-zA-Z0-9+.-]*:" + (not/c #rx"^[^:/?#]*:"))) + url?)) + (path->url ((or/c path-string? path-for-some-system?) . -> . url?)) + (relative-path->relative-url-string ((and/c (or/c path-string? path-for-some-system?) + relative-path?) + . -> . string?)) + (url->string (url? . -> . string?)) + (url->path (->* (url?) ((one-of/c 'unix 'windows)) path-for-some-system?)) + (file://->path (->* (url?) ((one-of/c 'unix 'windows)) path-for-some-system?)) + (netscape/string->url (string? . -> . url?)) + (combine-url/relative (url? string? . -> . url?)) + (rename -url-exception? url-exception? (any/c . -> . boolean?)) + (file-url-path-convention-type + (parameter/c (one-of/c 'unix 'windows))) + (current-url-encode-mode (parameter/c (one-of/c 'recommended 'unreserved)))) diff --git a/racket/collects/net/url.rkt b/racket/collects/net/url.rkt index b479c967e3..c299c40d0a 100644 --- a/racket/collects/net/url.rkt +++ b/racket/collects/net/url.rkt @@ -6,8 +6,9 @@ racket/match (prefix-in hc: "http-client.rkt") (only-in "url-connect.rkt" current-https-protocol) - "url-structs.rkt" - "uri-codec.rkt") + "uri-codec.rkt" + "url-string.rkt" + (only-in "url-exception.rkt" make-url-exception)) ;; To do: ;; Handle HTTP/file errors. @@ -22,20 +23,6 @@ ;; "impure" = they have text waiting ;; "pure" = the MIME headers have been read -(define-struct (url-exception exn:fail) ()) -(define (-url-exception? x) - (or (url-exception? x) - - ;; two of the errors that string->url can raise are - ;; now contract violations instead of url-expcetion - ;; structs. since only the url-exception? predicate - ;; was exported, we just add this in to the predicate - ;; to preserve backwards compatibility - (and (exn:fail:contract? x) - (regexp-match? #rx"^string->url:" (exn-message x))))) - -(define file-url-path-convention-type (make-parameter (system-path-convention-type))) - (define current-proxy-servers (make-parameter null (lambda (v) @@ -65,46 +52,6 @@ args)) (current-continuation-marks)))) -(define (url->string url) - (let ([scheme (url-scheme url)] - [user (url-user url)] - [host (url-host url)] - [port (url-port url)] - [path (url-path url)] - [query (url-query url)] - [fragment (url-fragment url)] - [sa list] - [sa* (lambda (l) - (apply string-append - (let loop ([l l]) - (cond - [(null? l) l] - [(pair? (car l)) - (append (loop (car l)) - (loop (cdr l)))] - [(null? (car l)) (loop (cdr l))] - [else (cons (car l) (loop (cdr l)))]))))]) - (when (and (equal? scheme "file") - (not (url-path-absolute? url))) - (raise-mismatch-error 'url->string - "cannot convert relative file URL to a string: " - url)) - (sa* - (append - (if scheme (sa scheme ":") null) - (if (or user host port) - (sa "//" - (if user (sa (uri-userinfo-encode user) "@") null) - (if host host null) - (if port (sa ":" (number->string port)) null)) - (if (equal? "file" scheme) ; always need "//" for "file" URLs - '("//") - null)) - (combine-path-strings (url-path-absolute? url) path) - ;; (if query (sa "?" (uri-encode query)) "") - (if (null? query) null (sa "?" (alist->form-urlencoded query))) - (if fragment (sa "#" (uri-encode* fragment)) null))))) - ;; url->default-port : url -> num (define (url->default-port url) (let ([scheme (url-scheme url)]) @@ -151,38 +98,6 @@ #:data post-data) hc) -(define (file://->path url [kind (system-path-convention-type)]) - (let ([strs (map path/param-path (url-path url))] - [string->path-element/same - (lambda (e) - (if (symbol? e) - e - (if (string=? e "") - 'same - (bytes->path-element (string->bytes/locale e) kind))))] - [string->path/win (lambda (s) - (bytes->path (string->bytes/utf-8 s) 'windows))]) - (if (and (url-path-absolute? url) - (eq? 'windows kind)) - ;; If initial path is "", then build UNC path. - (cond - [(not (url-path-absolute? url)) - (apply build-path (map string->path-element/same strs))] - [(and ((length strs) . >= . 3) - (equal? (car strs) "")) - (apply build-path - (string->path/win - (string-append "\\\\" (cadr strs) "\\" (caddr strs) "\\")) - (map string->path-element/same (cdddr strs)))] - [(pair? strs) - (apply build-path (string->path/win (car strs)) - (map string->path-element/same (cdr strs)))] - [else (error 'file://->path "no path elements: ~e" url)]) - (let ([elems (map string->path-element/same strs)]) - (if (url-path-absolute? url) - (apply build-path (bytes->path #"/" 'unix) elems) - (apply build-path elems)))))) - ;; file://get-pure-port : url -> in-port (define (file://get-pure-port url) (open-input-file (file://->path url))) @@ -319,89 +234,6 @@ (copy-port server->client (current-output-port)) (close-input-port server->client)) -;; transliteration of code in rfc 3986, section 5.2.2 -(define (combine-url/relative Base string) - (let ([R (string->url string)] - [T (make-url #f #f #f #f #f '() '() #f)]) - (if (url-scheme R) - (begin - (set-url-scheme! T (url-scheme R)) - (set-url-user! T (url-user R)) ;; authority - (set-url-host! T (url-host R)) ;; authority - (set-url-port! T (url-port R)) ;; authority - (set-url-path-absolute?! T (url-path-absolute? R)) - (set-url-path! T (remove-dot-segments (url-path R))) - (set-url-query! T (url-query R))) - (begin - (if (url-host R) ;; => authority is defined - (begin - (set-url-user! T (url-user R)) ;; authority - (set-url-host! T (url-host R)) ;; authority - (set-url-port! T (url-port R)) ;; authority - (set-url-path-absolute?! T (url-path-absolute? R)) - (set-url-path! T (remove-dot-segments (url-path R))) - (set-url-query! T (url-query R))) - (begin - (if (null? (url-path R)) ;; => R has empty path - (begin - (set-url-path-absolute?! T (url-path-absolute? Base)) - (set-url-path! T (url-path Base)) - (if (not (null? (url-query R))) - (set-url-query! T (url-query R)) - (set-url-query! T (url-query Base)))) - (begin - (cond - [(url-path-absolute? R) - (set-url-path-absolute?! T #t) - (set-url-path! T (remove-dot-segments (url-path R)))] - [(and (null? (url-path Base)) - (url-host Base)) - (set-url-path-absolute?! T #t) - (set-url-path! T (remove-dot-segments (url-path R)))] - [else - (set-url-path-absolute?! T (url-path-absolute? Base)) - (set-url-path! T (remove-dot-segments - (append (all-but-last (url-path Base)) - (url-path R))))]) - (set-url-query! T (url-query R)))) - (set-url-user! T (url-user Base)) ;; authority - (set-url-host! T (url-host Base)) ;; authority - (set-url-port! T (url-port Base)))) ;; authority - (set-url-scheme! T (url-scheme Base)))) - (set-url-fragment! T (url-fragment R)) - T)) - -(define (all-but-last lst) - (cond [(null? lst) null] - [(null? (cdr lst)) null] - [else (cons (car lst) (all-but-last (cdr lst)))])) - -;; cribbed from 5.2.4 in rfc 3986 -;; the strange [*] cases implicitly change urls -;; with paths segments "." and ".." at the end -;; into "./" and "../" respectively -(define (remove-dot-segments path) - (let loop ([path path] [result '()]) - (if (null? path) - (reverse result) - (let ([fst (path/param-path (car path))] - [rst (cdr path)]) - (loop rst - (cond - [(and (eq? fst 'same) (null? rst)) - (cons (make-path/param "" '()) result)] ; [*] - [(eq? fst 'same) - result] - [(and (eq? fst 'up) (null? rst) (not (null? result))) - (cons (make-path/param "" '()) (cdr result))] ; [*] - [(and (eq? fst 'up) (not (null? result))) - (cdr result)] - [(and (eq? fst 'up) (null? result)) - ;; when we go up too far, just drop the "up"s. - result] - [else - (cons (car path) result)])))))) - ;; call/input-url : url x (url -> in-port) x (in-port -> T) ;; [x list (str)] -> T (define call/input-url @@ -427,189 +259,6 @@ (purify-port in-port) in-port) -;; netscape/string->url : str -> url -(define (netscape/string->url string) - (let ([url (string->url string)]) - (cond [(url-scheme url) url] - [(string=? string "") - (url-error "Can't resolve empty string as URL")] - [else (set-url-scheme! url - (if (char=? (string-ref string 0) #\/) "file" "http")) - url]))) - -;; URL parsing regexp -;; this is following the regexp in Appendix B of rfc 3986, except for using -;; `*' instead of `+' for the scheme part (it is checked later anyway, and -;; we don't want to parse it as a path element), and the user@host:port is -;; parsed here. -(define url-regexp - (regexp (string-append - "^" - "(?:" ; / scheme-colon-opt - "([^:/?#]*)" ; | #1 = scheme-opt - ":)?" ; \ - "(?://" ; / slash-slash-authority-opt - "(?:" ; | / user-at-opt - "([^/?#@]*)" ; | | #2 = user-opt - "@)?" ; | \ - "([^/?#:]*)?" ; | #3 = host-opt - "(?::" ; | / colon-port-opt - "([0-9]*)" ; | | #4 = port-opt - ")?" ; | \ - ")?" ; \ - "([^?#]*)" ; #5 = path - "(?:\\?" ; / question-query-opt - "([^#]*)" ; | #6 = query-opt - ")?" ; \ - "(?:#" ; / hash-fragment-opt - "(.*)" ; | #7 = fragment-opt - ")?" ; \ - "$"))) - -;; string->url : str -> url -;; Original version by Neil Van Dyke -(define (string->url str) - (apply - (lambda (scheme user host port path query fragment) - (when (and scheme (not (regexp-match? #rx"^[a-zA-Z][a-zA-Z0-9+.-]*$" - scheme))) - (url-error "Invalid URL string; bad scheme ~e: ~e" scheme str)) - ;; Windows => "file://xxx:/...." specifies a "xxx:/..." path - (let ([win-file? (and (or (equal? "" port) (not port)) - (equal? "file" scheme) - (eq? 'windows (file-url-path-convention-type)) - (not (equal? host "")))]) - (when win-file? - (set! path (cond [(equal? "" port) (string-append host ":" path)] - [(and path host) (string-append host "/" path)] - [else (or path host)])) - (set! port #f) - (set! host "")) - (let* ([scheme (and scheme (string-downcase scheme))] - [host (and host (string-downcase host))] - [user (uri-decode/maybe user)] - [port (and port (string->number port))] - [abs? (or (equal? "file" scheme) - (regexp-match? #rx"^/" path))] - [path (if win-file? - (separate-windows-path-strings path) - (separate-path-strings path))] - [query (if query (form-urlencoded->alist query) '())] - [fragment (uri-decode/maybe fragment)]) - (make-url scheme user host port abs? path query fragment)))) - (cdr (regexp-match url-regexp str)))) - -(define (uri-decode/maybe f) (friendly-decode/maybe f uri-decode)) - -(define (friendly-decode/maybe f uri-decode) - ;; If #f, and leave unmolested any % that is followed by hex digit - ;; if a % is not followed by a hex digit, replace it with %25 - ;; in an attempt to be "friendly" - (and f (uri-decode (regexp-replace* #rx"%([^0-9a-fA-F])" f "%25\\1")))) - -;; separate-path-strings : string[starting with /] -> (listof path/param) -(define (separate-path-strings str) - (let ([strs (regexp-split #rx"/" str)]) - (map separate-params (if (string=? "" (car strs)) (cdr strs) strs)))) - -(define (separate-windows-path-strings str) - (url-path (path->url (bytes->path (string->bytes/utf-8 str) 'windows)))) - -(define (separate-params s) - (let ([lst (map path-segment-decode (regexp-split #rx";" s))]) - (make-path/param (car lst) (cdr lst)))) - -(define (path-segment-decode p) - (cond [(string=? p "..") 'up] - [(string=? p ".") 'same] - [else (uri-path-segment-decode p)])) - -(define (path-segment-encode p) - (cond [(eq? p 'up) ".."] - [(eq? p 'same) "."] - [(equal? p "..") "%2e%2e"] - [(equal? p ".") "%2e"] - [else (uri-path-segment-encode* p)])) - -(define (combine-path-strings absolute? path/params) - (cond [(null? path/params) null] - [else (let ([p (add-between (map join-params path/params) "/")]) - (if absolute? (cons "/" p) p))])) - -(define (join-params s) - (if (null? (path/param-param s)) - (path-segment-encode (path/param-path s)) - (string-join (map path-segment-encode - (cons (path/param-path s) (path/param-param s))) - ";"))) - -(define (path->url path) - (let* ([spath (simplify-path path #f)] - [dir? (let-values ([(b n dir?) (split-path spath)]) dir?)] - ;; If original path is a directory the resulting URL - ;; should have a trailing forward slash - [url-tail (if dir? (list (make-path/param "" null)) null)] - [url-path - (let loop ([path spath][accum null]) - (let-values ([(base name dir?) (split-path path)]) - (cond - [(not base) - (if (eq? (path-convention-type path) 'windows) - ;; For Windows, massage the root: - (append (map - (lambda (s) - (make-path/param s null)) - (let ([s (regexp-replace - #rx"[/\\\\]$" - (bytes->string/utf-8 (path->bytes name)) - "")]) - (cond - [(regexp-match? #rx"^\\\\\\\\[?]\\\\[a-zA-Z]:" s) - ;; \\?\: path: - (regexp-split #rx"[/\\]+" (substring s 4))] - [(regexp-match? #rx"^\\\\\\\\[?]\\\\UNC" s) - ;; \\?\ UNC path: - (regexp-split #rx"[/\\]+" (substring s 7))] - [(regexp-match? #rx"^[/\\]" s) - ;; UNC path: - (regexp-split #rx"[/\\]+" s)] - [else - (list s)]))) - accum) - ;; On other platforms, we drop the root: - accum)] - [else - (let ([accum (cons (make-path/param - (if (symbol? name) - name - (bytes->string/utf-8 - (path-element->bytes name))) - null) - accum)]) - (if (eq? base 'relative) - accum - (loop base accum)))])))]) - (make-url "file" #f "" #f (absolute-path? path) - (if (null? url-tail) url-path (append url-path url-tail)) - '() #f))) - -(define (relative-path->relative-url-string path) - (define s (string-join (for/list ([e (in-list (explode-path path))]) - (cond - [(eq? e 'same) "."] - [(eq? e 'up) ".."] - [else - (uri-encode* (path-element->string e))])) - "/")) - ;; Add "/" to reflect directory-ness: - (let-values ([(base name dir?) (split-path path)]) - (if dir? - (string-append s "/") - s))) - -(define (url->path url [kind (system-path-convention-type)]) - (file://->path url kind)) - ;; delete-pure-port : url [x list (str)] -> in-port (define (delete-pure-port url [strings '()]) (method-pure-port 'delete url #f strings)) @@ -689,32 +338,9 @@ #:data data) (http-conn-impure-port hc))) -(define current-url-encode-mode (make-parameter 'recommended)) - -(define (uri-encode* str) - (case (current-url-encode-mode) - [(unreserved) (uri-unreserved-encode str)] - [(recommended) (uri-encode str)])) - -(define (uri-path-segment-encode* str) - (case (current-url-encode-mode) - [(unreserved) (uri-path-segment-unreserved-encode str)] - [(recommended) (uri-path-segment-encode str)])) - -(provide (struct-out url) (struct-out path/param)) +(provide (all-from-out "url-string.rkt")) (provide/contract - (string->url (-> (and/c string? - (or/c #rx"^[a-zA-Z][a-zA-Z0-9+.-]*:" - (not/c #rx"^[^:/?#]*:"))) - url?)) - (path->url ((or/c path-string? path-for-some-system?) . -> . url?)) - (relative-path->relative-url-string ((and/c (or/c path-string? path-for-some-system?) - relative-path?) - . -> . string?)) - (url->string (url? . -> . string?)) - (url->path (->* (url?) ((one-of/c 'unix 'windows)) path-for-some-system?)) - (get-pure-port (->* (url?) ((listof string?) #:redirections exact-nonnegative-integer?) input-port?)) (get-impure-port (->* (url?) ((listof string?)) input-port?)) (post-pure-port (->* (url? (or/c false/c bytes?)) ((listof string?)) input-port?)) @@ -738,7 +364,6 @@ (rename hc:http-conn? http-connection? (any/c . -> . boolean?)) (make-http-connection (-> hc:http-conn?)) (http-connection-close (hc:http-conn? . -> . void?)) - (netscape/string->url (string? . -> . url?)) (call/input-url (case-> (-> url? (-> url? input-port?) (-> input-port? any) @@ -748,13 +373,9 @@ (-> input-port? any) (listof string?) any))) - (combine-url/relative (url? string? . -> . url?)) - (rename -url-exception? url-exception? (any/c . -> . boolean?)) + (url-exception? (any/c . -> . boolean?)) (current-proxy-servers - (parameter/c (or/c false/c (listof (list/c string? string? number?))))) - (file-url-path-convention-type - (parameter/c (one-of/c 'unix 'windows))) - (current-url-encode-mode (parameter/c (one-of/c 'recommended 'unreserved)))) + (parameter/c (or/c false/c (listof (list/c string? string? number?)))))) (define (http-sendrecv/url u #:method [method-bss #"GET"]