racket/collects/net/url-unit.ss
2005-05-27 18:56:37 +00:00

437 lines
19 KiB
Scheme

;; To do:
;; Handle HTTP/file errors.
;; Not throw away MIME headers.
;; Determine file type.
;; ----------------------------------------------------------------------
;; Input ports have two statuses:
;; "impure" = they have text waiting
;; "pure" = the MIME headers have been read
(module url-unit mzscheme
(require (lib "file.ss")
(lib "unitsig.ss")
(lib "port.ss")
"uri-codec.ss"
"url-sig.ss"
"tcp-sig.ss")
(provide url@)
(define url@
(unit/sig net:url^
(import net:tcp^)
(define-struct (url-exception exn:fail) ())
(define current-proxy-servers
(make-parameter null (lambda (v)
(unless (and (list? v)
(andmap
(lambda (v)
(and (list? v)
(= 3 (length v))
(equal? (car v) "http")
(string? (car v))
(number? (caddr v))
(exact? (caddr v))
(integer? (caddr v))
(<= 1 (caddr v) 65535)))
v))
(raise-type-error
'current-proxy-servers
"list of list of scheme, string, and exact integer in [1,65535]"
v))
(apply
list-immutable
(map (lambda (v)
(list-immutable (string->immutable-string (car v))
(string->immutable-string (cadr v))
(caddr v)))
v)))))
(define url-error
(lambda (fmt . args)
(let ((s (string->immutable-string
(apply format fmt (map (lambda (arg)
(if (url? arg)
(url->string arg)
arg))
args)))))
(raise (make-url-exception s (current-continuation-marks))))))
(define-struct url (scheme user host port path query fragment))
(define-struct path/param (path param))
(define url->string
(lambda (url)
(let ((scheme (url-scheme url))
(user (url-user url))
(host (url-host url))
(port (url-port url))
(path (url-path url))
(query (url-query url))
(fragment (url-fragment url)))
(cond
((and scheme (string=? scheme "file"))
(string-append "file:" path
(or (and (not fragment) "")
(string-append "#" fragment))))
(else
(let ((sa string-append))
(sa (if scheme (sa scheme "://") "")
(if user (sa (uri-encode user) "@") "")
(if host host "")
(if port (sa ":" (number->string port)) "")
; There used to be a "/" here, but that causes an
; extra leading slash -- wonder why it ever worked!
(combine-path-strings path)
;(if query (sa "?" (uri-encode query)) "")
(if (null? query) "" (sa "?" (alist->form-urlencoded query)))
(if fragment (sa "#" (uri-encode fragment)) ""))))))))
;; url->default-port : url -> num
(define url->default-port
(lambda (url)
(let ((scheme (url-scheme url)))
(cond
((not scheme) 80)
((string=? scheme "http") 80)
(else
(url-error "Scheme ~a not supported" (url-scheme url)))))))
;; make-ports : url -> in-port x out-port
(define make-ports
(lambda (url proxy)
(let ((port-number (if proxy
(caddr proxy)
(or (url-port url)
(url->default-port url))))
(host (if proxy
(cadr proxy)
(url-host url))))
(tcp-connect host port-number))))
;; http://getpost-impure-port : bool x url x union (str, #f) x list (str) -> in-port
(define http://getpost-impure-port
(lambda (get? url post-data strings)
(let*-values (((proxy) (assoc (url-scheme url) (current-proxy-servers)))
((server->client client->server)
(make-ports url proxy)))
(let ((access-string
(url->string
(if proxy
url
(make-url #f #f #f #f
(url-path url)
(url-query url)
(url-fragment url))))))
(for-each (lambda (s)
(display (string-append s "\r\n") client->server))
(cons (format "~a ~a HTTP/1.0" (if get? "GET" "POST") access-string)
(cons (format "Host: ~a" (url-host url))
(if post-data
(cons
(format "Content-Length: ~a" (bytes-length post-data))
strings)
strings)))))
(display "\r\n" client->server)
(when post-data
(display post-data client->server))
(tcp-abandon-port client->server) ; flushes
server->client)))
;; file://get-pure-port : url -> in-port
(define file://get-pure-port
(lambda (url)
(when (url-host url)
(url-error "Don't know how to get files from remote hosts"))
(open-input-file (apply build-path (url-path url)))))
(define (schemeless-url url)
(url-error "Missing protocol (usually \"http:\") at the beginning of URL: ~a" url))
;; getpost-impure-port : bool x url x list (str) -> in-port
(define getpost-impure-port
(lambda (get? url post-data strings)
(let ((scheme (url-scheme url)))
(cond
((not scheme)
(schemeless-url url))
((string=? scheme "http")
(http://getpost-impure-port get? url post-data strings))
((string=? scheme "file")
(url-error "There are no impure file: ports"))
(else
(url-error "Scheme ~a unsupported" scheme))))))
;; get-impure-port : url [x list (str)] -> in-port
(define get-impure-port
(case-lambda
[(url) (get-impure-port url '())]
[(url strings) (getpost-impure-port #t url #f strings)]))
;; post-impure-port : url x bytes [x list (str)] -> in-port
(define post-impure-port
(case-lambda
[(url post-data) (post-impure-port url post-data '())]
[(url post-data strings) (getpost-impure-port #f url post-data strings)]))
;; getpost-pure-port : bool x url x list (str) -> in-port
(define getpost-pure-port
(lambda (get? url post-data strings)
(let ((scheme (url-scheme url)))
(cond
((not scheme)
(schemeless-url url))
((string=? scheme "http")
(let ((port (http://getpost-impure-port get? url post-data strings)))
(with-handlers ([void (lambda (exn)
(close-input-port port)
(raise exn))])
(purify-port port))
port))
((string=? scheme "file")
(file://get-pure-port url))
(else
(url-error "Scheme ~a unsupported" scheme))))))
;; get-pure-port : url [x list (str)] -> in-port
(define get-pure-port
(case-lambda
[(url) (get-pure-port url '())]
[(url strings) (getpost-pure-port #t url #f strings)]))
;; post-pure-port : url bytes [x list (str)] -> in-port
(define post-pure-port
(case-lambda
[(url post-data) (post-pure-port url post-data '())]
[(url post-data strings) (getpost-pure-port #f url post-data strings)]))
;; display-pure-port : in-port -> ()
(define display-pure-port
(lambda (server->client)
(copy-port server->client (current-output-port))
(close-input-port server->client)))
(define empty-url?
(lambda (url)
(and (not (url-scheme url))
(not (url-query url))
(not (url-fragment url))
(null? (url-path url)))))
(define (combine-url/relative base string)
(let ([relative (string->url string)])
(cond
[(empty-url? base) ; Step 1
relative]
[(empty-url? relative) ; Step 2a
base]
[(url-scheme relative) ; Step 2b
relative]
[else ; Step 2c
(set-url-scheme! relative (url-scheme base))
(cond
[(url-host relative) ; Step 3
relative]
[else
(set-url-host! relative (url-host base))
(set-url-port! relative (url-port base)) ; Unspecified!
(let ([rel-path (url-path relative)])
(cond
[(and (not (equal? string "")) ; Step 4
(char=? #\/ (string-ref string 0)))
relative]
[(or (not rel-path) ; Step 5
(null? rel-path))
(set-url-path! relative (url-path base))
(when (url-query relative)
(set-url-query! relative (url-query base)))
relative]
[else ; Step 6
(merge-and-normalize
(url-path base) relative)]))])])))
(define (merge-and-normalize base-path relative-url)
(let* ([joined
(let loop ([base-path base-path])
(cond
[(null? base-path) (url-path relative-url)]
[(null? (cdr base-path)) (url-path relative-url)]
[else (cons (car base-path) (loop (cdr base-path)))]))]
[reversed/simplified
(if (null? joined)
null
(let loop ([segs (reverse joined)])
(cond
[(null? segs) null]
[else (let ([fst (car segs)])
(cond
[(string=? fst ".")
(loop (cdr segs))]
[(string=? fst "..")
(if (null? (cdr segs))
segs
(loop (cddr segs)))]
[else (cons (car segs) (loop (cdr segs)))]))])))])
(set-url-path! relative-url (reverse reversed/simplified))
relative-url))
;; call/input-url : url x (url -> in-port) x (in-port -> T)
;; [x list (str)] -> T
(define call/input-url
(let ((handle-port (lambda (server->client handler)
(dynamic-wind (lambda () 'do-nothing)
(lambda () (handler server->client))
(lambda () (close-input-port server->client))))))
(case-lambda
((url getter handler)
(handle-port (getter url) handler))
((url getter handler params)
(handle-port (getter url params) handler)))))
;; purify-port : in-port -> header-string
(define purify-port
(lambda (port)
(let ([m (regexp-match-peek-positions #rx"^HTTP/.*?((\r\n\r\n)|(\n\n)|(\r\r))" port)])
(if m
(read-string (cdar m) port)
""))))
(define character-set-size 256)
;; netscape/string->url : str -> url
(define netscape/string->url
(lambda (string)
(let ((url (string->url string)))
(if (url-scheme url)
url
(if (string=? string "")
(url-error "Can't resolve empty string as URL")
(begin
(set-url-scheme! url
(if (char=? (string-ref string 0) #\/)
"file"
"http"))
url))))))
;; string->url : str -> url
;; New implementation, mostly provided by Neil Van Dyke
(define string->url
(let ((rx (regexp (string-append
"^"
"[ \t\f\r\n]*"
"(" ; <1 front-opt
"([a-zA-Z]*:)?" ; =2 scheme-colon-opt
"(" ; <3 slashslash-opt
"//"
"([^:/@;?#]*@)?" ; =4 user-at-opt
"([^:/@;?#]+)?" ; =5 host-opt
"(:[0-9]*)?" ; =6 colon-port-opt
")?" ; >3 slashslash-opt
")?" ; >1 front-opt
"([^?#]*)" ; =7 path
"(\\?[^#]*)?" ; =8 question-query-opt
"(#.*)?" ; =9 hash-fragment-opt
"[ \t\f\r\n]*"
"$"))))
(lambda (str)
(let ([m (regexp-match #rx"^[ \t\f\r\n]*file:(.*)$" str)])
;; File scheme:
(if m
(let ([path+fragment (regexp-match #rx"^([^#]*)(#(.*))?$" (cadr m))])
(let ([path (cadr path+fragment)]
[fragment (caddr path+fragment)])
(if (or (relative-path? path)
(absolute-path? path))
(make-url "file"
#f ; user
#f ; host
#f ; port
(separate-path-strings path)
'() ; query
fragment)
(url-error "scheme 'file' path ~s neither relative nor absolute" path))))
;; Other scheme:
(let ((match (regexp-match-positions rx str)))
(if match
(let* ((get-str (lambda (pos skip-left skip-right)
(let ((pair (list-ref match pos)))
(if pair
(substring str
(+ (car pair) skip-left)
(- (cdr pair) skip-right))
#f))))
(get-num (lambda (pos skip-left skip-right)
(let ((s (get-str pos skip-left skip-right)))
(if s (string->number s) #f))))
(host (get-str 5 0 0)))
(make-url (get-str 2 0 1) ; scheme
(uri-decode/maybe (get-str 4 0 1)) ; user
host
(get-num 6 1 0) ; port
(separate-path-strings
(let ([path (get-str 7 0 0)])
;; If path is "" and the input is an absolute URL
;; with a hostname, then the intended path is "/",
;; but the URL is missing a "/" at the end.
(if (and (string=? path "")
host)
"/"
path)))
;(uri-decode/maybe (get-str 8 1 0)) ;
;query
(let ([q (get-str 8 1 0)])
(if q (form-urlencoded->alist q) '()))
(uri-decode/maybe (get-str 9 1 0)) ; fragment
))
(url-error "Invalid URL string: ~e" str))))))))
(define (uri-decode/maybe f)
;; If #f, and leave unmolested any % that is followed by hex digit
;; if a % is not followed by a hex digit, replace it with %25
;; in an attempt to be "friendly"
(and f (uri-decode (regexp-replace* "%([^0-9a-fA-F])" f "%25\\1"))))
;; separate-path-strings : string[starting with /] -> (listof (union string path/param))
(define (separate-path-strings str)
(cond
[(string=? str "") '()]
[else
(let loop ([str (if (char=? #\/ (string-ref str 0))
(substring str 1 (string-length str))
str)])
(cond
[(regexp-match #rx"([^/]*)/(.*)$" str)
=>
(lambda (m)
(cons (maybe-separate-params (cadr m)) (loop (caddr m))))]
[else (list (maybe-separate-params str))]))]))
(define (maybe-separate-params s)
(cond
[(regexp-match #rx"^([^;]*);(.*)$" s)
=>
(lambda (m)
(make-path/param (cadr m) (caddr m)))]
[else s]))
(define (combine-path-strings strs)
(apply
string-append
(let loop ([strs strs])
(cond
[(null? strs) '()]
[else (list* "/"
(maybe-join-params (car strs))
(loop (cdr strs)))]))))
;; needs to unquote things!
(define (maybe-join-params s)
(cond
[(string? s) s]
[else (string-append (path/param-path s)
";"
(path/param-param s))])))))