;; To do: ;; Handle HTTP/file errors. ;; Not throw away MIME headers. ;; Determine file type. ;; ---------------------------------------------------------------------- ;; Input ports have two statuses: ;; "impure" = they have text waiting ;; "pure" = the MIME headers have been read (module url-unit mzscheme (require (lib "file.ss") (lib "unitsig.ss") (lib "port.ss") "uri-codec.ss" "url-sig.ss" "tcp-sig.ss") (provide url@) (define url@ (unit/sig net:url^ (import net:tcp^) (define-struct (url-exception exn:fail) ()) (define current-proxy-servers (make-parameter null (lambda (v) (unless (and (list? v) (andmap (lambda (v) (and (list? v) (= 3 (length v)) (equal? (car v) "http") (string? (car v)) (number? (caddr v)) (exact? (caddr v)) (integer? (caddr v)) (<= 1 (caddr v) 65535))) v)) (raise-type-error 'current-proxy-servers "list of list of scheme, string, and exact integer in [1,65535]" v)) (apply list-immutable (map (lambda (v) (list-immutable (string->immutable-string (car v)) (string->immutable-string (cadr v)) (caddr v))) v))))) (define url-error (lambda (fmt . args) (let ((s (string->immutable-string (apply format fmt (map (lambda (arg) (if (url? arg) (url->string arg) arg)) args))))) (raise (make-url-exception s (current-continuation-marks)))))) (define-struct url (scheme user host port path query fragment)) (define-struct path/param (path param)) (define url->string (lambda (url) (let ((scheme (url-scheme url)) (user (url-user url)) (host (url-host url)) (port (url-port url)) (path (url-path url)) (query (url-query url)) (fragment (url-fragment url))) (cond ((and scheme (string=? scheme "file")) (string-append "file:" path (or (and (not fragment) "") (string-append "#" fragment)))) (else (let ((sa string-append)) (sa (if scheme (sa scheme "://") "") (if user (sa (uri-encode user) "@") "") (if host host "") (if port (sa ":" (number->string port)) "") ; There used to be a "/" here, but that causes an ; extra leading slash -- wonder why it ever worked! (combine-path-strings path) ;(if query (sa "?" (uri-encode query)) "") (if (null? query) "" (sa "?" (alist->form-urlencoded query))) (if fragment (sa "#" (uri-encode fragment)) "")))))))) ;; url->default-port : url -> num (define url->default-port (lambda (url) (let ((scheme (url-scheme url))) (cond ((not scheme) 80) ((string=? scheme "http") 80) (else (url-error "Scheme ~a not supported" (url-scheme url))))))) ;; make-ports : url -> in-port x out-port (define make-ports (lambda (url proxy) (let ((port-number (if proxy (caddr proxy) (or (url-port url) (url->default-port url)))) (host (if proxy (cadr proxy) (url-host url)))) (tcp-connect host port-number)))) ;; http://getpost-impure-port : bool x url x union (str, #f) x list (str) -> in-port (define http://getpost-impure-port (lambda (get? url post-data strings) (let*-values (((proxy) (assoc (url-scheme url) (current-proxy-servers))) ((server->client client->server) (make-ports url proxy))) (let ((access-string (url->string (if proxy url (make-url #f #f #f #f (url-path url) (url-query url) (url-fragment url)))))) (for-each (lambda (s) (display (string-append s "\r\n") client->server)) (cons (format "~a ~a HTTP/1.0" (if get? "GET" "POST") access-string) (cons (format "Host: ~a" (url-host url)) (if post-data (cons (format "Content-Length: ~a" (bytes-length post-data)) strings) strings))))) (display "\r\n" client->server) (when post-data (display post-data client->server)) (tcp-abandon-port client->server) ; flushes server->client))) ;; file://get-pure-port : url -> in-port (define file://get-pure-port (lambda (url) (when (url-host url) (url-error "Don't know how to get files from remote hosts")) (open-input-file (apply build-path (url-path url))))) (define (schemeless-url url) (url-error "Missing protocol (usually \"http:\") at the beginning of URL: ~a" url)) ;; getpost-impure-port : bool x url x list (str) -> in-port (define getpost-impure-port (lambda (get? url post-data strings) (let ((scheme (url-scheme url))) (cond ((not scheme) (schemeless-url url)) ((string=? scheme "http") (http://getpost-impure-port get? url post-data strings)) ((string=? scheme "file") (url-error "There are no impure file: ports")) (else (url-error "Scheme ~a unsupported" scheme)))))) ;; get-impure-port : url [x list (str)] -> in-port (define get-impure-port (case-lambda [(url) (get-impure-port url '())] [(url strings) (getpost-impure-port #t url #f strings)])) ;; post-impure-port : url x bytes [x list (str)] -> in-port (define post-impure-port (case-lambda [(url post-data) (post-impure-port url post-data '())] [(url post-data strings) (getpost-impure-port #f url post-data strings)])) ;; getpost-pure-port : bool x url x list (str) -> in-port (define getpost-pure-port (lambda (get? url post-data strings) (let ((scheme (url-scheme url))) (cond ((not scheme) (schemeless-url url)) ((string=? scheme "http") (let ((port (http://getpost-impure-port get? url post-data strings))) (with-handlers ([void (lambda (exn) (close-input-port port) (raise exn))]) (purify-port port)) port)) ((string=? scheme "file") (file://get-pure-port url)) (else (url-error "Scheme ~a unsupported" scheme)))))) ;; get-pure-port : url [x list (str)] -> in-port (define get-pure-port (case-lambda [(url) (get-pure-port url '())] [(url strings) (getpost-pure-port #t url #f strings)])) ;; post-pure-port : url bytes [x list (str)] -> in-port (define post-pure-port (case-lambda [(url post-data) (post-pure-port url post-data '())] [(url post-data strings) (getpost-pure-port #f url post-data strings)])) ;; display-pure-port : in-port -> () (define display-pure-port (lambda (server->client) (copy-port server->client (current-output-port)) (close-input-port server->client))) (define empty-url? (lambda (url) (and (not (url-scheme url)) (not (url-query url)) (not (url-fragment url)) (null? (url-path url))))) (define (combine-url/relative base string) (let ([relative (string->url string)]) (cond [(empty-url? base) ; Step 1 relative] [(empty-url? relative) ; Step 2a base] [(url-scheme relative) ; Step 2b relative] [else ; Step 2c (set-url-scheme! relative (url-scheme base)) (cond [(url-host relative) ; Step 3 relative] [else (set-url-host! relative (url-host base)) (set-url-port! relative (url-port base)) ; Unspecified! (let ([rel-path (url-path relative)]) (cond [(and (not (equal? string "")) ; Step 4 (char=? #\/ (string-ref string 0))) relative] [(or (not rel-path) ; Step 5 (null? rel-path)) (set-url-path! relative (url-path base)) (when (url-query relative) (set-url-query! relative (url-query base))) relative] [else ; Step 6 (merge-and-normalize (url-path base) relative)]))])]))) (define (merge-and-normalize base-path relative-url) (let* ([joined (let loop ([base-path base-path]) (cond [(null? base-path) (url-path relative-url)] [(null? (cdr base-path)) (url-path relative-url)] [else (cons (car base-path) (loop (cdr base-path)))]))] [reversed/simplified (if (null? joined) null (let loop ([segs (reverse joined)]) (cond [(null? segs) null] [else (let ([fst (car segs)]) (cond [(string=? fst ".") (loop (cdr segs))] [(string=? fst "..") (if (null? (cdr segs)) segs (loop (cddr segs)))] [else (cons (car segs) (loop (cdr segs)))]))])))]) (set-url-path! relative-url (reverse reversed/simplified)) relative-url)) ;; call/input-url : url x (url -> in-port) x (in-port -> T) ;; [x list (str)] -> T (define call/input-url (let ((handle-port (lambda (server->client handler) (dynamic-wind (lambda () 'do-nothing) (lambda () (handler server->client)) (lambda () (close-input-port server->client)))))) (case-lambda ((url getter handler) (handle-port (getter url) handler)) ((url getter handler params) (handle-port (getter url params) handler))))) ;; purify-port : in-port -> header-string (define purify-port (lambda (port) (let ([m (regexp-match-peek-positions #rx"^HTTP/.*?((\r\n\r\n)|(\n\n)|(\r\r))" port)]) (if m (read-string (cdar m) port) "")))) (define character-set-size 256) ;; netscape/string->url : str -> url (define netscape/string->url (lambda (string) (let ((url (string->url string))) (if (url-scheme url) url (if (string=? string "") (url-error "Can't resolve empty string as URL") (begin (set-url-scheme! url (if (char=? (string-ref string 0) #\/) "file" "http")) url)))))) ;; string->url : str -> url ;; New implementation, mostly provided by Neil Van Dyke (define string->url (let ((rx (regexp (string-append "^" "[ \t\f\r\n]*" "(" ; <1 front-opt "([a-zA-Z]*:)?" ; =2 scheme-colon-opt "(" ; <3 slashslash-opt "//" "([^:/@;?#]*@)?" ; =4 user-at-opt "([^:/@;?#]+)?" ; =5 host-opt "(:[0-9]*)?" ; =6 colon-port-opt ")?" ; >3 slashslash-opt ")?" ; >1 front-opt "([^?#]*)" ; =7 path "(\\?[^#]*)?" ; =8 question-query-opt "(#.*)?" ; =9 hash-fragment-opt "[ \t\f\r\n]*" "$")))) (lambda (str) (let ([m (regexp-match #rx"^[ \t\f\r\n]*file:(.*)$" str)]) ;; File scheme: (if m (let ([path+fragment (regexp-match #rx"^([^#]*)(#(.*))?$" (cadr m))]) (let ([path (cadr path+fragment)] [fragment (caddr path+fragment)]) (if (or (relative-path? path) (absolute-path? path)) (make-url "file" #f ; user #f ; host #f ; port (separate-path-strings path) '() ; query fragment) (url-error "scheme 'file' path ~s neither relative nor absolute" path)))) ;; Other scheme: (let ((match (regexp-match-positions rx str))) (if match (let* ((get-str (lambda (pos skip-left skip-right) (let ((pair (list-ref match pos))) (if pair (substring str (+ (car pair) skip-left) (- (cdr pair) skip-right)) #f)))) (get-num (lambda (pos skip-left skip-right) (let ((s (get-str pos skip-left skip-right))) (if s (string->number s) #f)))) (host (get-str 5 0 0))) (make-url (get-str 2 0 1) ; scheme (uri-decode/maybe (get-str 4 0 1)) ; user host (get-num 6 1 0) ; port (separate-path-strings (let ([path (get-str 7 0 0)]) ;; If path is "" and the input is an absolute URL ;; with a hostname, then the intended path is "/", ;; but the URL is missing a "/" at the end. (if (and (string=? path "") host) "/" path))) ;(uri-decode/maybe (get-str 8 1 0)) ; ;query (let ([q (get-str 8 1 0)]) (if q (form-urlencoded->alist q) '())) (uri-decode/maybe (get-str 9 1 0)) ; fragment )) (url-error "Invalid URL string: ~e" str)))))))) (define (uri-decode/maybe f) ;; If #f, and leave unmolested any % that is followed by hex digit ;; if a % is not followed by a hex digit, replace it with %25 ;; in an attempt to be "friendly" (and f (uri-decode (regexp-replace* "%([^0-9a-fA-F])" f "%25\\1")))) ;; separate-path-strings : string[starting with /] -> (listof (union string path/param)) (define (separate-path-strings str) (cond [(string=? str "") '()] [else (let loop ([str (if (char=? #\/ (string-ref str 0)) (substring str 1 (string-length str)) str)]) (cond [(regexp-match #rx"([^/]*)/(.*)$" str) => (lambda (m) (cons (maybe-separate-params (cadr m)) (loop (caddr m))))] [else (list (maybe-separate-params str))]))])) (define (maybe-separate-params s) (cond [(regexp-match #rx"^([^;]*);(.*)$" s) => (lambda (m) (make-path/param (cadr m) (caddr m)))] [else s])) (define (combine-path-strings strs) (apply string-append (let loop ([strs strs]) (cond [(null? strs) '()] [else (list* "/" (maybe-join-params (car strs)) (loop (cdr strs)))])))) ;; needs to unquote things! (define (maybe-join-params s) (cond [(string? s) s] [else (string-append (path/param-path s) ";" (path/param-param s))])))))