url-strings: Separate URL parsing from URL actions
`net/url` provides functions for both converting strings and paths to and from URLs. `net/url` also includes functions for creating (pure and import) network ports. This functionality `require` the HTTP client stack which is unnecessary when URLs simple need parsing for their "bits". New library: `net/url-strings` handles `url->string` and `string->url` (and also the related `path->url` and `url->path` functions). This is required by net/url for compatability. `net/url-exception.rkt` is factored out for use by both libraries. - See also racket/net changes for T&D url-string.rkt changes requested by mflatt url-strings.rkt is now called url-string.rkt identifiers from url-string.rkt are reprovided by url.rkt using (all-from-out "url-string.rkt") instead of explicit exports
This commit is contained in:
parent
a970f9bf6e
commit
9473f394b7
19
racket/collects/net/url-exception.rkt
Normal file
19
racket/collects/net/url-exception.rkt
Normal file
|
@ -0,0 +1,19 @@
|
|||
#lang racket/base
|
||||
(require racket/string
|
||||
racket/contract/base
|
||||
racket/list)
|
||||
|
||||
(define-struct (url-exception exn:fail) ())
|
||||
(define (-url-exception? x)
|
||||
(or (url-exception? x)
|
||||
|
||||
;; two of the errors that string->url can raise are
|
||||
;; now contract violations instead of url-expcetion
|
||||
;; structs. since only the url-exception? predicate
|
||||
;; was exported, we just add this in to the predicate
|
||||
;; to preserve backwards compatibility
|
||||
(and (exn:fail:contract? x)
|
||||
(regexp-match? #rx"^string->url:" (exn-message x)))))
|
||||
|
||||
(provide (struct-out url-exception))
|
||||
(provide/contract (-url-exception? (any/c . -> . boolean?)))
|
400
racket/collects/net/url-string.rkt
Normal file
400
racket/collects/net/url-string.rkt
Normal file
|
@ -0,0 +1,400 @@
|
|||
#lang racket/base
|
||||
(require racket/string
|
||||
racket/contract/base
|
||||
racket/list
|
||||
"url-structs.rkt"
|
||||
"url-exception.rkt"
|
||||
"uri-codec.rkt")
|
||||
|
||||
;; To do:
|
||||
;; Handle HTTP/file errors.
|
||||
;; Not throw away MIME headers.
|
||||
;; Determine file type.
|
||||
|
||||
(define-logger net/url)
|
||||
|
||||
;; ----------------------------------------------------------------------
|
||||
|
||||
;; Input ports have two statuses:
|
||||
;; "impure" = they have text waiting
|
||||
;; "pure" = the MIME headers have been read
|
||||
|
||||
(define file-url-path-convention-type (make-parameter (system-path-convention-type)))
|
||||
|
||||
(define (url-error fmt . args)
|
||||
(raise (make-url-exception
|
||||
(apply format fmt
|
||||
(map (lambda (arg) (if (url? arg) (url->string arg) arg))
|
||||
args))
|
||||
(current-continuation-marks))))
|
||||
|
||||
(define (url->string url)
|
||||
(let ([scheme (url-scheme url)]
|
||||
[user (url-user url)]
|
||||
[host (url-host url)]
|
||||
[port (url-port url)]
|
||||
[path (url-path url)]
|
||||
[query (url-query url)]
|
||||
[fragment (url-fragment url)]
|
||||
[sa list]
|
||||
[sa* (lambda (l)
|
||||
(apply string-append
|
||||
(let loop ([l l])
|
||||
(cond
|
||||
[(null? l) l]
|
||||
[(pair? (car l))
|
||||
(append (loop (car l))
|
||||
(loop (cdr l)))]
|
||||
[(null? (car l)) (loop (cdr l))]
|
||||
[else (cons (car l) (loop (cdr l)))]))))])
|
||||
(when (and (equal? scheme "file")
|
||||
(not (url-path-absolute? url)))
|
||||
(raise-mismatch-error 'url->string
|
||||
"cannot convert relative file URL to a string: "
|
||||
url))
|
||||
(sa*
|
||||
(append
|
||||
(if scheme (sa scheme ":") null)
|
||||
(if (or user host port)
|
||||
(sa "//"
|
||||
(if user (sa (uri-userinfo-encode user) "@") null)
|
||||
(if host host null)
|
||||
(if port (sa ":" (number->string port)) null))
|
||||
(if (equal? "file" scheme) ; always need "//" for "file" URLs
|
||||
'("//")
|
||||
null))
|
||||
(combine-path-strings (url-path-absolute? url) path)
|
||||
;; (if query (sa "?" (uri-encode query)) "")
|
||||
(if (null? query) null (sa "?" (alist->form-urlencoded query)))
|
||||
(if fragment (sa "#" (uri-encode* fragment)) null)))))
|
||||
|
||||
;; transliteration of code in rfc 3986, section 5.2.2
|
||||
(define (combine-url/relative Base string)
|
||||
(let ([R (string->url string)]
|
||||
[T (make-url #f #f #f #f #f '() '() #f)])
|
||||
(if (url-scheme R)
|
||||
(begin
|
||||
(set-url-scheme! T (url-scheme R))
|
||||
(set-url-user! T (url-user R)) ;; authority
|
||||
(set-url-host! T (url-host R)) ;; authority
|
||||
(set-url-port! T (url-port R)) ;; authority
|
||||
(set-url-path-absolute?! T (url-path-absolute? R))
|
||||
(set-url-path! T (remove-dot-segments (url-path R)))
|
||||
(set-url-query! T (url-query R)))
|
||||
(begin
|
||||
(if (url-host R) ;; => authority is defined
|
||||
(begin
|
||||
(set-url-user! T (url-user R)) ;; authority
|
||||
(set-url-host! T (url-host R)) ;; authority
|
||||
(set-url-port! T (url-port R)) ;; authority
|
||||
(set-url-path-absolute?! T (url-path-absolute? R))
|
||||
(set-url-path! T (remove-dot-segments (url-path R)))
|
||||
(set-url-query! T (url-query R)))
|
||||
(begin
|
||||
(if (null? (url-path R)) ;; => R has empty path
|
||||
(begin
|
||||
(set-url-path-absolute?! T (url-path-absolute? Base))
|
||||
(set-url-path! T (url-path Base))
|
||||
(if (not (null? (url-query R)))
|
||||
(set-url-query! T (url-query R))
|
||||
(set-url-query! T (url-query Base))))
|
||||
(begin
|
||||
(cond
|
||||
[(url-path-absolute? R)
|
||||
(set-url-path-absolute?! T #t)
|
||||
(set-url-path! T (remove-dot-segments (url-path R)))]
|
||||
[(and (null? (url-path Base))
|
||||
(url-host Base))
|
||||
(set-url-path-absolute?! T #t)
|
||||
(set-url-path! T (remove-dot-segments (url-path R)))]
|
||||
[else
|
||||
(set-url-path-absolute?! T (url-path-absolute? Base))
|
||||
(set-url-path! T (remove-dot-segments
|
||||
(append (all-but-last (url-path Base))
|
||||
(url-path R))))])
|
||||
(set-url-query! T (url-query R))))
|
||||
(set-url-user! T (url-user Base)) ;; authority
|
||||
(set-url-host! T (url-host Base)) ;; authority
|
||||
(set-url-port! T (url-port Base)))) ;; authority
|
||||
(set-url-scheme! T (url-scheme Base))))
|
||||
(set-url-fragment! T (url-fragment R))
|
||||
T))
|
||||
|
||||
(define (all-but-last lst)
|
||||
(cond [(null? lst) null]
|
||||
[(null? (cdr lst)) null]
|
||||
[else (cons (car lst) (all-but-last (cdr lst)))]))
|
||||
|
||||
;; cribbed from 5.2.4 in rfc 3986
|
||||
;; the strange [*] cases implicitly change urls
|
||||
;; with paths segments "." and ".." at the end
|
||||
;; into "./" and "../" respectively
|
||||
(define (remove-dot-segments path)
|
||||
(let loop ([path path] [result '()])
|
||||
(if (null? path)
|
||||
(reverse result)
|
||||
(let ([fst (path/param-path (car path))]
|
||||
[rst (cdr path)])
|
||||
(loop rst
|
||||
(cond
|
||||
[(and (eq? fst 'same) (null? rst))
|
||||
(cons (make-path/param "" '()) result)] ; [*]
|
||||
[(eq? fst 'same)
|
||||
result]
|
||||
[(and (eq? fst 'up) (null? rst) (not (null? result)))
|
||||
(cons (make-path/param "" '()) (cdr result))] ; [*]
|
||||
[(and (eq? fst 'up) (not (null? result)))
|
||||
(cdr result)]
|
||||
[(and (eq? fst 'up) (null? result))
|
||||
;; when we go up too far, just drop the "up"s.
|
||||
result]
|
||||
[else
|
||||
(cons (car path) result)]))))))
|
||||
|
||||
;; netscape/string->url : str -> url
|
||||
(define (netscape/string->url string)
|
||||
(let ([url (string->url string)])
|
||||
(cond [(url-scheme url) url]
|
||||
[(string=? string "")
|
||||
(url-error "Can't resolve empty string as URL")]
|
||||
[else (set-url-scheme! url
|
||||
(if (char=? (string-ref string 0) #\/) "file" "http"))
|
||||
url])))
|
||||
|
||||
;; URL parsing regexp
|
||||
;; this is following the regexp in Appendix B of rfc 3986, except for using
|
||||
;; `*' instead of `+' for the scheme part (it is checked later anyway, and
|
||||
;; we don't want to parse it as a path element), and the user@host:port is
|
||||
;; parsed here.
|
||||
(define url-regexp
|
||||
(regexp (string-append
|
||||
"^"
|
||||
"(?:" ; / scheme-colon-opt
|
||||
"([^:/?#]*)" ; | #1 = scheme-opt
|
||||
":)?" ; \
|
||||
"(?://" ; / slash-slash-authority-opt
|
||||
"(?:" ; | / user-at-opt
|
||||
"([^/?#@]*)" ; | | #2 = user-opt
|
||||
"@)?" ; | \
|
||||
"([^/?#:]*)?" ; | #3 = host-opt
|
||||
"(?::" ; | / colon-port-opt
|
||||
"([0-9]*)" ; | | #4 = port-opt
|
||||
")?" ; | \
|
||||
")?" ; \
|
||||
"([^?#]*)" ; #5 = path
|
||||
"(?:\\?" ; / question-query-opt
|
||||
"([^#]*)" ; | #6 = query-opt
|
||||
")?" ; \
|
||||
"(?:#" ; / hash-fragment-opt
|
||||
"(.*)" ; | #7 = fragment-opt
|
||||
")?" ; \
|
||||
"$")))
|
||||
|
||||
;; string->url : str -> url
|
||||
;; Original version by Neil Van Dyke
|
||||
(define (string->url str)
|
||||
(apply
|
||||
(lambda (scheme user host port path query fragment)
|
||||
(when (and scheme (not (regexp-match? #rx"^[a-zA-Z][a-zA-Z0-9+.-]*$"
|
||||
scheme)))
|
||||
(url-error "Invalid URL string; bad scheme ~e: ~e" scheme str))
|
||||
;; Windows => "file://xxx:/...." specifies a "xxx:/..." path
|
||||
(let ([win-file? (and (or (equal? "" port) (not port))
|
||||
(equal? "file" scheme)
|
||||
(eq? 'windows (file-url-path-convention-type))
|
||||
(not (equal? host "")))])
|
||||
(when win-file?
|
||||
(set! path (cond [(equal? "" port) (string-append host ":" path)]
|
||||
[(and path host) (string-append host "/" path)]
|
||||
[else (or path host)]))
|
||||
(set! port #f)
|
||||
(set! host ""))
|
||||
(let* ([scheme (and scheme (string-downcase scheme))]
|
||||
[host (and host (string-downcase host))]
|
||||
[user (uri-decode/maybe user)]
|
||||
[port (and port (string->number port))]
|
||||
[abs? (or (equal? "file" scheme)
|
||||
(regexp-match? #rx"^/" path))]
|
||||
[path (if win-file?
|
||||
(separate-windows-path-strings path)
|
||||
(separate-path-strings path))]
|
||||
[query (if query (form-urlencoded->alist query) '())]
|
||||
[fragment (uri-decode/maybe fragment)])
|
||||
(make-url scheme user host port abs? path query fragment))))
|
||||
(cdr (regexp-match url-regexp str))))
|
||||
|
||||
(define (uri-decode/maybe f) (friendly-decode/maybe f uri-decode))
|
||||
|
||||
(define (friendly-decode/maybe f uri-decode)
|
||||
;; If #f, and leave unmolested any % that is followed by hex digit
|
||||
;; if a % is not followed by a hex digit, replace it with %25
|
||||
;; in an attempt to be "friendly"
|
||||
(and f (uri-decode (regexp-replace* #rx"%([^0-9a-fA-F])" f "%25\\1"))))
|
||||
|
||||
;; separate-path-strings : string[starting with /] -> (listof path/param)
|
||||
(define (separate-path-strings str)
|
||||
(let ([strs (regexp-split #rx"/" str)])
|
||||
(map separate-params (if (string=? "" (car strs)) (cdr strs) strs))))
|
||||
|
||||
(define (separate-windows-path-strings str)
|
||||
(url-path (path->url (bytes->path (string->bytes/utf-8 str) 'windows))))
|
||||
|
||||
(define (separate-params s)
|
||||
(let ([lst (map path-segment-decode (regexp-split #rx";" s))])
|
||||
(make-path/param (car lst) (cdr lst))))
|
||||
|
||||
(define (path-segment-decode p)
|
||||
(cond [(string=? p "..") 'up]
|
||||
[(string=? p ".") 'same]
|
||||
[else (uri-path-segment-decode p)]))
|
||||
|
||||
(define (path-segment-encode p)
|
||||
(cond [(eq? p 'up) ".."]
|
||||
[(eq? p 'same) "."]
|
||||
[(equal? p "..") "%2e%2e"]
|
||||
[(equal? p ".") "%2e"]
|
||||
[else (uri-path-segment-encode* p)]))
|
||||
|
||||
(define (combine-path-strings absolute? path/params)
|
||||
(cond [(null? path/params) null]
|
||||
[else (let ([p (add-between (map join-params path/params) "/")])
|
||||
(if absolute? (cons "/" p) p))]))
|
||||
|
||||
(define (join-params s)
|
||||
(if (null? (path/param-param s))
|
||||
(path-segment-encode (path/param-path s))
|
||||
(string-join (map path-segment-encode
|
||||
(cons (path/param-path s) (path/param-param s)))
|
||||
";")))
|
||||
|
||||
(define (path->url path)
|
||||
(let* ([spath (simplify-path path #f)]
|
||||
[dir? (let-values ([(b n dir?) (split-path spath)]) dir?)]
|
||||
;; If original path is a directory the resulting URL
|
||||
;; should have a trailing forward slash
|
||||
[url-tail (if dir? (list (make-path/param "" null)) null)]
|
||||
[url-path
|
||||
(let loop ([path spath][accum null])
|
||||
(let-values ([(base name dir?) (split-path path)])
|
||||
(cond
|
||||
[(not base)
|
||||
(if (eq? (path-convention-type path) 'windows)
|
||||
;; For Windows, massage the root:
|
||||
(append (map
|
||||
(lambda (s)
|
||||
(make-path/param s null))
|
||||
(let ([s (regexp-replace
|
||||
#rx"[/\\\\]$"
|
||||
(bytes->string/utf-8 (path->bytes name))
|
||||
"")])
|
||||
(cond
|
||||
[(regexp-match? #rx"^\\\\\\\\[?]\\\\[a-zA-Z]:" s)
|
||||
;; \\?\<drive>: path:
|
||||
(regexp-split #rx"[/\\]+" (substring s 4))]
|
||||
[(regexp-match? #rx"^\\\\\\\\[?]\\\\UNC" s)
|
||||
;; \\?\ UNC path:
|
||||
(regexp-split #rx"[/\\]+" (substring s 7))]
|
||||
[(regexp-match? #rx"^[/\\]" s)
|
||||
;; UNC path:
|
||||
(regexp-split #rx"[/\\]+" s)]
|
||||
[else
|
||||
(list s)])))
|
||||
accum)
|
||||
;; On other platforms, we drop the root:
|
||||
accum)]
|
||||
[else
|
||||
(let ([accum (cons (make-path/param
|
||||
(if (symbol? name)
|
||||
name
|
||||
(bytes->string/utf-8
|
||||
(path-element->bytes name)))
|
||||
null)
|
||||
accum)])
|
||||
(if (eq? base 'relative)
|
||||
accum
|
||||
(loop base accum)))])))])
|
||||
(make-url "file" #f "" #f (absolute-path? path)
|
||||
(if (null? url-tail) url-path (append url-path url-tail))
|
||||
'() #f)))
|
||||
|
||||
(define (file://->path url [kind (system-path-convention-type)])
|
||||
(let ([strs (map path/param-path (url-path url))]
|
||||
[string->path-element/same
|
||||
(lambda (e)
|
||||
(if (symbol? e)
|
||||
e
|
||||
(if (string=? e "")
|
||||
'same
|
||||
(bytes->path-element (string->bytes/locale e) kind))))]
|
||||
[string->path/win (lambda (s)
|
||||
(bytes->path (string->bytes/utf-8 s) 'windows))])
|
||||
(if (and (url-path-absolute? url)
|
||||
(eq? 'windows kind))
|
||||
;; If initial path is "", then build UNC path.
|
||||
(cond
|
||||
[(not (url-path-absolute? url))
|
||||
(apply build-path (map string->path-element/same strs))]
|
||||
[(and ((length strs) . >= . 3)
|
||||
(equal? (car strs) ""))
|
||||
(apply build-path
|
||||
(string->path/win
|
||||
(string-append "\\\\" (cadr strs) "\\" (caddr strs) "\\"))
|
||||
(map string->path-element/same (cdddr strs)))]
|
||||
[(pair? strs)
|
||||
(apply build-path (string->path/win (car strs))
|
||||
(map string->path-element/same (cdr strs)))]
|
||||
[else (error 'file://->path "no path elements: ~e" url)])
|
||||
(let ([elems (map string->path-element/same strs)])
|
||||
(if (url-path-absolute? url)
|
||||
(apply build-path (bytes->path #"/" 'unix) elems)
|
||||
(apply build-path elems))))))
|
||||
|
||||
(define (url->path url [kind (system-path-convention-type)])
|
||||
(file://->path url kind))
|
||||
|
||||
(define (relative-path->relative-url-string path)
|
||||
(define s (string-join (for/list ([e (in-list (explode-path path))])
|
||||
(cond
|
||||
[(eq? e 'same) "."]
|
||||
[(eq? e 'up) ".."]
|
||||
[else
|
||||
(uri-encode* (path-element->string e))]))
|
||||
"/"))
|
||||
;; Add "/" to reflect directory-ness:
|
||||
(let-values ([(base name dir?) (split-path path)])
|
||||
(if dir?
|
||||
(string-append s "/")
|
||||
s)))
|
||||
|
||||
(define current-url-encode-mode (make-parameter 'recommended))
|
||||
|
||||
(define (uri-encode* str)
|
||||
(case (current-url-encode-mode)
|
||||
[(unreserved) (uri-unreserved-encode str)]
|
||||
[(recommended) (uri-encode str)]))
|
||||
|
||||
(define (uri-path-segment-encode* str)
|
||||
(case (current-url-encode-mode)
|
||||
[(unreserved) (uri-path-segment-unreserved-encode str)]
|
||||
[(recommended) (uri-path-segment-encode str)]))
|
||||
|
||||
(provide (struct-out url) (struct-out path/param))
|
||||
|
||||
(provide/contract
|
||||
(string->url (-> (and/c string?
|
||||
(or/c #rx"^[a-zA-Z][a-zA-Z0-9+.-]*:"
|
||||
(not/c #rx"^[^:/?#]*:")))
|
||||
url?))
|
||||
(path->url ((or/c path-string? path-for-some-system?) . -> . url?))
|
||||
(relative-path->relative-url-string ((and/c (or/c path-string? path-for-some-system?)
|
||||
relative-path?)
|
||||
. -> . string?))
|
||||
(url->string (url? . -> . string?))
|
||||
(url->path (->* (url?) ((one-of/c 'unix 'windows)) path-for-some-system?))
|
||||
(file://->path (->* (url?) ((one-of/c 'unix 'windows)) path-for-some-system?))
|
||||
(netscape/string->url (string? . -> . url?))
|
||||
(combine-url/relative (url? string? . -> . url?))
|
||||
(rename -url-exception? url-exception? (any/c . -> . boolean?))
|
||||
(file-url-path-convention-type
|
||||
(parameter/c (one-of/c 'unix 'windows)))
|
||||
(current-url-encode-mode (parameter/c (one-of/c 'recommended 'unreserved))))
|
|
@ -6,8 +6,9 @@
|
|||
racket/match
|
||||
(prefix-in hc: "http-client.rkt")
|
||||
(only-in "url-connect.rkt" current-https-protocol)
|
||||
"url-structs.rkt"
|
||||
"uri-codec.rkt")
|
||||
"uri-codec.rkt"
|
||||
"url-string.rkt"
|
||||
(only-in "url-exception.rkt" make-url-exception))
|
||||
|
||||
;; To do:
|
||||
;; Handle HTTP/file errors.
|
||||
|
@ -22,20 +23,6 @@
|
|||
;; "impure" = they have text waiting
|
||||
;; "pure" = the MIME headers have been read
|
||||
|
||||
(define-struct (url-exception exn:fail) ())
|
||||
(define (-url-exception? x)
|
||||
(or (url-exception? x)
|
||||
|
||||
;; two of the errors that string->url can raise are
|
||||
;; now contract violations instead of url-expcetion
|
||||
;; structs. since only the url-exception? predicate
|
||||
;; was exported, we just add this in to the predicate
|
||||
;; to preserve backwards compatibility
|
||||
(and (exn:fail:contract? x)
|
||||
(regexp-match? #rx"^string->url:" (exn-message x)))))
|
||||
|
||||
(define file-url-path-convention-type (make-parameter (system-path-convention-type)))
|
||||
|
||||
(define current-proxy-servers
|
||||
(make-parameter null
|
||||
(lambda (v)
|
||||
|
@ -65,46 +52,6 @@
|
|||
args))
|
||||
(current-continuation-marks))))
|
||||
|
||||
(define (url->string url)
|
||||
(let ([scheme (url-scheme url)]
|
||||
[user (url-user url)]
|
||||
[host (url-host url)]
|
||||
[port (url-port url)]
|
||||
[path (url-path url)]
|
||||
[query (url-query url)]
|
||||
[fragment (url-fragment url)]
|
||||
[sa list]
|
||||
[sa* (lambda (l)
|
||||
(apply string-append
|
||||
(let loop ([l l])
|
||||
(cond
|
||||
[(null? l) l]
|
||||
[(pair? (car l))
|
||||
(append (loop (car l))
|
||||
(loop (cdr l)))]
|
||||
[(null? (car l)) (loop (cdr l))]
|
||||
[else (cons (car l) (loop (cdr l)))]))))])
|
||||
(when (and (equal? scheme "file")
|
||||
(not (url-path-absolute? url)))
|
||||
(raise-mismatch-error 'url->string
|
||||
"cannot convert relative file URL to a string: "
|
||||
url))
|
||||
(sa*
|
||||
(append
|
||||
(if scheme (sa scheme ":") null)
|
||||
(if (or user host port)
|
||||
(sa "//"
|
||||
(if user (sa (uri-userinfo-encode user) "@") null)
|
||||
(if host host null)
|
||||
(if port (sa ":" (number->string port)) null))
|
||||
(if (equal? "file" scheme) ; always need "//" for "file" URLs
|
||||
'("//")
|
||||
null))
|
||||
(combine-path-strings (url-path-absolute? url) path)
|
||||
;; (if query (sa "?" (uri-encode query)) "")
|
||||
(if (null? query) null (sa "?" (alist->form-urlencoded query)))
|
||||
(if fragment (sa "#" (uri-encode* fragment)) null)))))
|
||||
|
||||
;; url->default-port : url -> num
|
||||
(define (url->default-port url)
|
||||
(let ([scheme (url-scheme url)])
|
||||
|
@ -151,38 +98,6 @@
|
|||
#:data post-data)
|
||||
hc)
|
||||
|
||||
(define (file://->path url [kind (system-path-convention-type)])
|
||||
(let ([strs (map path/param-path (url-path url))]
|
||||
[string->path-element/same
|
||||
(lambda (e)
|
||||
(if (symbol? e)
|
||||
e
|
||||
(if (string=? e "")
|
||||
'same
|
||||
(bytes->path-element (string->bytes/locale e) kind))))]
|
||||
[string->path/win (lambda (s)
|
||||
(bytes->path (string->bytes/utf-8 s) 'windows))])
|
||||
(if (and (url-path-absolute? url)
|
||||
(eq? 'windows kind))
|
||||
;; If initial path is "", then build UNC path.
|
||||
(cond
|
||||
[(not (url-path-absolute? url))
|
||||
(apply build-path (map string->path-element/same strs))]
|
||||
[(and ((length strs) . >= . 3)
|
||||
(equal? (car strs) ""))
|
||||
(apply build-path
|
||||
(string->path/win
|
||||
(string-append "\\\\" (cadr strs) "\\" (caddr strs) "\\"))
|
||||
(map string->path-element/same (cdddr strs)))]
|
||||
[(pair? strs)
|
||||
(apply build-path (string->path/win (car strs))
|
||||
(map string->path-element/same (cdr strs)))]
|
||||
[else (error 'file://->path "no path elements: ~e" url)])
|
||||
(let ([elems (map string->path-element/same strs)])
|
||||
(if (url-path-absolute? url)
|
||||
(apply build-path (bytes->path #"/" 'unix) elems)
|
||||
(apply build-path elems))))))
|
||||
|
||||
;; file://get-pure-port : url -> in-port
|
||||
(define (file://get-pure-port url)
|
||||
(open-input-file (file://->path url)))
|
||||
|
@ -319,89 +234,6 @@
|
|||
(copy-port server->client (current-output-port))
|
||||
(close-input-port server->client))
|
||||
|
||||
;; transliteration of code in rfc 3986, section 5.2.2
|
||||
(define (combine-url/relative Base string)
|
||||
(let ([R (string->url string)]
|
||||
[T (make-url #f #f #f #f #f '() '() #f)])
|
||||
(if (url-scheme R)
|
||||
(begin
|
||||
(set-url-scheme! T (url-scheme R))
|
||||
(set-url-user! T (url-user R)) ;; authority
|
||||
(set-url-host! T (url-host R)) ;; authority
|
||||
(set-url-port! T (url-port R)) ;; authority
|
||||
(set-url-path-absolute?! T (url-path-absolute? R))
|
||||
(set-url-path! T (remove-dot-segments (url-path R)))
|
||||
(set-url-query! T (url-query R)))
|
||||
(begin
|
||||
(if (url-host R) ;; => authority is defined
|
||||
(begin
|
||||
(set-url-user! T (url-user R)) ;; authority
|
||||
(set-url-host! T (url-host R)) ;; authority
|
||||
(set-url-port! T (url-port R)) ;; authority
|
||||
(set-url-path-absolute?! T (url-path-absolute? R))
|
||||
(set-url-path! T (remove-dot-segments (url-path R)))
|
||||
(set-url-query! T (url-query R)))
|
||||
(begin
|
||||
(if (null? (url-path R)) ;; => R has empty path
|
||||
(begin
|
||||
(set-url-path-absolute?! T (url-path-absolute? Base))
|
||||
(set-url-path! T (url-path Base))
|
||||
(if (not (null? (url-query R)))
|
||||
(set-url-query! T (url-query R))
|
||||
(set-url-query! T (url-query Base))))
|
||||
(begin
|
||||
(cond
|
||||
[(url-path-absolute? R)
|
||||
(set-url-path-absolute?! T #t)
|
||||
(set-url-path! T (remove-dot-segments (url-path R)))]
|
||||
[(and (null? (url-path Base))
|
||||
(url-host Base))
|
||||
(set-url-path-absolute?! T #t)
|
||||
(set-url-path! T (remove-dot-segments (url-path R)))]
|
||||
[else
|
||||
(set-url-path-absolute?! T (url-path-absolute? Base))
|
||||
(set-url-path! T (remove-dot-segments
|
||||
(append (all-but-last (url-path Base))
|
||||
(url-path R))))])
|
||||
(set-url-query! T (url-query R))))
|
||||
(set-url-user! T (url-user Base)) ;; authority
|
||||
(set-url-host! T (url-host Base)) ;; authority
|
||||
(set-url-port! T (url-port Base)))) ;; authority
|
||||
(set-url-scheme! T (url-scheme Base))))
|
||||
(set-url-fragment! T (url-fragment R))
|
||||
T))
|
||||
|
||||
(define (all-but-last lst)
|
||||
(cond [(null? lst) null]
|
||||
[(null? (cdr lst)) null]
|
||||
[else (cons (car lst) (all-but-last (cdr lst)))]))
|
||||
|
||||
;; cribbed from 5.2.4 in rfc 3986
|
||||
;; the strange [*] cases implicitly change urls
|
||||
;; with paths segments "." and ".." at the end
|
||||
;; into "./" and "../" respectively
|
||||
(define (remove-dot-segments path)
|
||||
(let loop ([path path] [result '()])
|
||||
(if (null? path)
|
||||
(reverse result)
|
||||
(let ([fst (path/param-path (car path))]
|
||||
[rst (cdr path)])
|
||||
(loop rst
|
||||
(cond
|
||||
[(and (eq? fst 'same) (null? rst))
|
||||
(cons (make-path/param "" '()) result)] ; [*]
|
||||
[(eq? fst 'same)
|
||||
result]
|
||||
[(and (eq? fst 'up) (null? rst) (not (null? result)))
|
||||
(cons (make-path/param "" '()) (cdr result))] ; [*]
|
||||
[(and (eq? fst 'up) (not (null? result)))
|
||||
(cdr result)]
|
||||
[(and (eq? fst 'up) (null? result))
|
||||
;; when we go up too far, just drop the "up"s.
|
||||
result]
|
||||
[else
|
||||
(cons (car path) result)]))))))
|
||||
|
||||
;; call/input-url : url x (url -> in-port) x (in-port -> T)
|
||||
;; [x list (str)] -> T
|
||||
(define call/input-url
|
||||
|
@ -427,189 +259,6 @@
|
|||
(purify-port in-port)
|
||||
in-port)
|
||||
|
||||
;; netscape/string->url : str -> url
|
||||
(define (netscape/string->url string)
|
||||
(let ([url (string->url string)])
|
||||
(cond [(url-scheme url) url]
|
||||
[(string=? string "")
|
||||
(url-error "Can't resolve empty string as URL")]
|
||||
[else (set-url-scheme! url
|
||||
(if (char=? (string-ref string 0) #\/) "file" "http"))
|
||||
url])))
|
||||
|
||||
;; URL parsing regexp
|
||||
;; this is following the regexp in Appendix B of rfc 3986, except for using
|
||||
;; `*' instead of `+' for the scheme part (it is checked later anyway, and
|
||||
;; we don't want to parse it as a path element), and the user@host:port is
|
||||
;; parsed here.
|
||||
(define url-regexp
|
||||
(regexp (string-append
|
||||
"^"
|
||||
"(?:" ; / scheme-colon-opt
|
||||
"([^:/?#]*)" ; | #1 = scheme-opt
|
||||
":)?" ; \
|
||||
"(?://" ; / slash-slash-authority-opt
|
||||
"(?:" ; | / user-at-opt
|
||||
"([^/?#@]*)" ; | | #2 = user-opt
|
||||
"@)?" ; | \
|
||||
"([^/?#:]*)?" ; | #3 = host-opt
|
||||
"(?::" ; | / colon-port-opt
|
||||
"([0-9]*)" ; | | #4 = port-opt
|
||||
")?" ; | \
|
||||
")?" ; \
|
||||
"([^?#]*)" ; #5 = path
|
||||
"(?:\\?" ; / question-query-opt
|
||||
"([^#]*)" ; | #6 = query-opt
|
||||
")?" ; \
|
||||
"(?:#" ; / hash-fragment-opt
|
||||
"(.*)" ; | #7 = fragment-opt
|
||||
")?" ; \
|
||||
"$")))
|
||||
|
||||
;; string->url : str -> url
|
||||
;; Original version by Neil Van Dyke
|
||||
(define (string->url str)
|
||||
(apply
|
||||
(lambda (scheme user host port path query fragment)
|
||||
(when (and scheme (not (regexp-match? #rx"^[a-zA-Z][a-zA-Z0-9+.-]*$"
|
||||
scheme)))
|
||||
(url-error "Invalid URL string; bad scheme ~e: ~e" scheme str))
|
||||
;; Windows => "file://xxx:/...." specifies a "xxx:/..." path
|
||||
(let ([win-file? (and (or (equal? "" port) (not port))
|
||||
(equal? "file" scheme)
|
||||
(eq? 'windows (file-url-path-convention-type))
|
||||
(not (equal? host "")))])
|
||||
(when win-file?
|
||||
(set! path (cond [(equal? "" port) (string-append host ":" path)]
|
||||
[(and path host) (string-append host "/" path)]
|
||||
[else (or path host)]))
|
||||
(set! port #f)
|
||||
(set! host ""))
|
||||
(let* ([scheme (and scheme (string-downcase scheme))]
|
||||
[host (and host (string-downcase host))]
|
||||
[user (uri-decode/maybe user)]
|
||||
[port (and port (string->number port))]
|
||||
[abs? (or (equal? "file" scheme)
|
||||
(regexp-match? #rx"^/" path))]
|
||||
[path (if win-file?
|
||||
(separate-windows-path-strings path)
|
||||
(separate-path-strings path))]
|
||||
[query (if query (form-urlencoded->alist query) '())]
|
||||
[fragment (uri-decode/maybe fragment)])
|
||||
(make-url scheme user host port abs? path query fragment))))
|
||||
(cdr (regexp-match url-regexp str))))
|
||||
|
||||
(define (uri-decode/maybe f) (friendly-decode/maybe f uri-decode))
|
||||
|
||||
(define (friendly-decode/maybe f uri-decode)
|
||||
;; If #f, and leave unmolested any % that is followed by hex digit
|
||||
;; if a % is not followed by a hex digit, replace it with %25
|
||||
;; in an attempt to be "friendly"
|
||||
(and f (uri-decode (regexp-replace* #rx"%([^0-9a-fA-F])" f "%25\\1"))))
|
||||
|
||||
;; separate-path-strings : string[starting with /] -> (listof path/param)
|
||||
(define (separate-path-strings str)
|
||||
(let ([strs (regexp-split #rx"/" str)])
|
||||
(map separate-params (if (string=? "" (car strs)) (cdr strs) strs))))
|
||||
|
||||
(define (separate-windows-path-strings str)
|
||||
(url-path (path->url (bytes->path (string->bytes/utf-8 str) 'windows))))
|
||||
|
||||
(define (separate-params s)
|
||||
(let ([lst (map path-segment-decode (regexp-split #rx";" s))])
|
||||
(make-path/param (car lst) (cdr lst))))
|
||||
|
||||
(define (path-segment-decode p)
|
||||
(cond [(string=? p "..") 'up]
|
||||
[(string=? p ".") 'same]
|
||||
[else (uri-path-segment-decode p)]))
|
||||
|
||||
(define (path-segment-encode p)
|
||||
(cond [(eq? p 'up) ".."]
|
||||
[(eq? p 'same) "."]
|
||||
[(equal? p "..") "%2e%2e"]
|
||||
[(equal? p ".") "%2e"]
|
||||
[else (uri-path-segment-encode* p)]))
|
||||
|
||||
(define (combine-path-strings absolute? path/params)
|
||||
(cond [(null? path/params) null]
|
||||
[else (let ([p (add-between (map join-params path/params) "/")])
|
||||
(if absolute? (cons "/" p) p))]))
|
||||
|
||||
(define (join-params s)
|
||||
(if (null? (path/param-param s))
|
||||
(path-segment-encode (path/param-path s))
|
||||
(string-join (map path-segment-encode
|
||||
(cons (path/param-path s) (path/param-param s)))
|
||||
";")))
|
||||
|
||||
(define (path->url path)
|
||||
(let* ([spath (simplify-path path #f)]
|
||||
[dir? (let-values ([(b n dir?) (split-path spath)]) dir?)]
|
||||
;; If original path is a directory the resulting URL
|
||||
;; should have a trailing forward slash
|
||||
[url-tail (if dir? (list (make-path/param "" null)) null)]
|
||||
[url-path
|
||||
(let loop ([path spath][accum null])
|
||||
(let-values ([(base name dir?) (split-path path)])
|
||||
(cond
|
||||
[(not base)
|
||||
(if (eq? (path-convention-type path) 'windows)
|
||||
;; For Windows, massage the root:
|
||||
(append (map
|
||||
(lambda (s)
|
||||
(make-path/param s null))
|
||||
(let ([s (regexp-replace
|
||||
#rx"[/\\\\]$"
|
||||
(bytes->string/utf-8 (path->bytes name))
|
||||
"")])
|
||||
(cond
|
||||
[(regexp-match? #rx"^\\\\\\\\[?]\\\\[a-zA-Z]:" s)
|
||||
;; \\?\<drive>: path:
|
||||
(regexp-split #rx"[/\\]+" (substring s 4))]
|
||||
[(regexp-match? #rx"^\\\\\\\\[?]\\\\UNC" s)
|
||||
;; \\?\ UNC path:
|
||||
(regexp-split #rx"[/\\]+" (substring s 7))]
|
||||
[(regexp-match? #rx"^[/\\]" s)
|
||||
;; UNC path:
|
||||
(regexp-split #rx"[/\\]+" s)]
|
||||
[else
|
||||
(list s)])))
|
||||
accum)
|
||||
;; On other platforms, we drop the root:
|
||||
accum)]
|
||||
[else
|
||||
(let ([accum (cons (make-path/param
|
||||
(if (symbol? name)
|
||||
name
|
||||
(bytes->string/utf-8
|
||||
(path-element->bytes name)))
|
||||
null)
|
||||
accum)])
|
||||
(if (eq? base 'relative)
|
||||
accum
|
||||
(loop base accum)))])))])
|
||||
(make-url "file" #f "" #f (absolute-path? path)
|
||||
(if (null? url-tail) url-path (append url-path url-tail))
|
||||
'() #f)))
|
||||
|
||||
(define (relative-path->relative-url-string path)
|
||||
(define s (string-join (for/list ([e (in-list (explode-path path))])
|
||||
(cond
|
||||
[(eq? e 'same) "."]
|
||||
[(eq? e 'up) ".."]
|
||||
[else
|
||||
(uri-encode* (path-element->string e))]))
|
||||
"/"))
|
||||
;; Add "/" to reflect directory-ness:
|
||||
(let-values ([(base name dir?) (split-path path)])
|
||||
(if dir?
|
||||
(string-append s "/")
|
||||
s)))
|
||||
|
||||
(define (url->path url [kind (system-path-convention-type)])
|
||||
(file://->path url kind))
|
||||
|
||||
;; delete-pure-port : url [x list (str)] -> in-port
|
||||
(define (delete-pure-port url [strings '()])
|
||||
(method-pure-port 'delete url #f strings))
|
||||
|
@ -689,32 +338,9 @@
|
|||
#:data data)
|
||||
(http-conn-impure-port hc)))
|
||||
|
||||
(define current-url-encode-mode (make-parameter 'recommended))
|
||||
|
||||
(define (uri-encode* str)
|
||||
(case (current-url-encode-mode)
|
||||
[(unreserved) (uri-unreserved-encode str)]
|
||||
[(recommended) (uri-encode str)]))
|
||||
|
||||
(define (uri-path-segment-encode* str)
|
||||
(case (current-url-encode-mode)
|
||||
[(unreserved) (uri-path-segment-unreserved-encode str)]
|
||||
[(recommended) (uri-path-segment-encode str)]))
|
||||
|
||||
(provide (struct-out url) (struct-out path/param))
|
||||
(provide (all-from-out "url-string.rkt"))
|
||||
|
||||
(provide/contract
|
||||
(string->url (-> (and/c string?
|
||||
(or/c #rx"^[a-zA-Z][a-zA-Z0-9+.-]*:"
|
||||
(not/c #rx"^[^:/?#]*:")))
|
||||
url?))
|
||||
(path->url ((or/c path-string? path-for-some-system?) . -> . url?))
|
||||
(relative-path->relative-url-string ((and/c (or/c path-string? path-for-some-system?)
|
||||
relative-path?)
|
||||
. -> . string?))
|
||||
(url->string (url? . -> . string?))
|
||||
(url->path (->* (url?) ((one-of/c 'unix 'windows)) path-for-some-system?))
|
||||
|
||||
(get-pure-port (->* (url?) ((listof string?) #:redirections exact-nonnegative-integer?) input-port?))
|
||||
(get-impure-port (->* (url?) ((listof string?)) input-port?))
|
||||
(post-pure-port (->* (url? (or/c false/c bytes?)) ((listof string?)) input-port?))
|
||||
|
@ -738,7 +364,6 @@
|
|||
(rename hc:http-conn? http-connection? (any/c . -> . boolean?))
|
||||
(make-http-connection (-> hc:http-conn?))
|
||||
(http-connection-close (hc:http-conn? . -> . void?))
|
||||
(netscape/string->url (string? . -> . url?))
|
||||
(call/input-url (case-> (-> url?
|
||||
(-> url? input-port?)
|
||||
(-> input-port? any)
|
||||
|
@ -748,13 +373,9 @@
|
|||
(-> input-port? any)
|
||||
(listof string?)
|
||||
any)))
|
||||
(combine-url/relative (url? string? . -> . url?))
|
||||
(rename -url-exception? url-exception? (any/c . -> . boolean?))
|
||||
(url-exception? (any/c . -> . boolean?))
|
||||
(current-proxy-servers
|
||||
(parameter/c (or/c false/c (listof (list/c string? string? number?)))))
|
||||
(file-url-path-convention-type
|
||||
(parameter/c (one-of/c 'unix 'windows)))
|
||||
(current-url-encode-mode (parameter/c (one-of/c 'recommended 'unreserved))))
|
||||
(parameter/c (or/c false/c (listof (list/c string? string? number?))))))
|
||||
|
||||
(define (http-sendrecv/url u
|
||||
#:method [method-bss #"GET"]
|
||||
|
|
Loading…
Reference in New Issue
Block a user