;; To do: ;; Handle HTTP/file errors. ;; Not throw away MIME headers. ;; Determine file type. ;; ---------------------------------------------------------------------- ;; Input ports have two statuses: ;; "impure" = they have text waiting ;; "pure" = the MIME headers have been read (module url-unit mzscheme (require (lib "file.ss") (lib "unitsig.ss") (lib "port.ss") (lib "string.ss") "url-structs.ss" "uri-codec.ss" "url-sig.ss" "tcp-sig.ss") (provide url@) (define url@ (unit/sig net:url^ (import net:tcp^) (define-struct (url-exception exn:fail) ()) (define current-proxy-servers (make-parameter null (lambda (v) (unless (and (list? v) (andmap (lambda (v) (and (list? v) (= 3 (length v)) (equal? (car v) "http") (string? (car v)) (number? (caddr v)) (exact? (caddr v)) (integer? (caddr v)) (<= 1 (caddr v) 65535))) v)) (raise-type-error 'current-proxy-servers "list of list of scheme, string, and exact integer in [1,65535]" v)) (apply list-immutable (map (lambda (v) (list-immutable (string->immutable-string (car v)) (string->immutable-string (cadr v)) (caddr v))) v))))) (define url-error (lambda (fmt . args) (let ((s (string->immutable-string (apply format fmt (map (lambda (arg) (if (url? arg) (url->string arg) arg)) args))))) (raise (make-url-exception s (current-continuation-marks)))))) (define url->string (lambda (url) (let ((scheme (url-scheme url)) (user (url-user url)) (host (url-host url)) (port (url-port url)) (path (url-path url)) (query (url-query url)) (fragment (url-fragment url))) (let ((sa string-append)) (sa (if scheme (sa scheme ":") "") (if (or user host port) (sa "//" (if user (sa (uri-encode user) "@") "") (if host host "") (if port (sa ":" (number->string port)) "") ; There used to be a "/" here, but that causes an ; extra leading slash -- wonder why it ever worked! ) "") (combine-path-strings (url-path-absolute? url) path) ;(if query (sa "?" (uri-encode query)) "") (if (null? query) "" (sa "?" (alist->form-urlencoded query))) (if fragment (sa "#" (uri-encode fragment)) "")))))) ;; url->default-port : url -> num (define url->default-port (lambda (url) (let ((scheme (url-scheme url))) (cond ((not scheme) 80) ((string=? scheme "http") 80) (else (url-error "Scheme ~a not supported" (url-scheme url))))))) ;; make-ports : url -> in-port x out-port (define make-ports (lambda (url proxy) (let ((port-number (if proxy (caddr proxy) (or (url-port url) (url->default-port url)))) (host (if proxy (cadr proxy) (url-host url)))) (tcp-connect host port-number)))) ;; http://getpost-impure-port : bool x url x union (str, #f) x list (str) -> in-port (define http://getpost-impure-port (lambda (get? url post-data strings) (let*-values (((proxy) (assoc (url-scheme url) (current-proxy-servers))) ((server->client client->server) (make-ports url proxy))) (let ((access-string (url->string (if proxy url (make-url #f #f #f #f (url-path-absolute? url) (url-path url) (url-query url) (url-fragment url)))))) (for-each (lambda (s) (display (string-append s "\r\n") client->server)) (cons (format "~a ~a HTTP/1.0" (if get? "GET" "POST") access-string) (cons (format "Host: ~a" (url-host url)) (if post-data (cons (format "Content-Length: ~a" (bytes-length post-data)) strings) strings))))) (display "\r\n" client->server) (when post-data (display post-data client->server)) (tcp-abandon-port client->server) ; flushes server->client))) ;; file://get-pure-port : url -> in-port (define (file://get-pure-port url) (open-input-file (apply build-path (map path/param-path (url-path url))))) (define (schemeless-url url) (url-error "Missing protocol (usually \"http:\") at the beginning of URL: ~a" url)) ;; getpost-impure-port : bool x url x list (str) -> in-port (define getpost-impure-port (lambda (get? url post-data strings) (let ((scheme (url-scheme url))) (cond ((not scheme) (schemeless-url url)) ((string=? scheme "http") (http://getpost-impure-port get? url post-data strings)) ((string=? scheme "file") (url-error "There are no impure file: ports")) (else (url-error "Scheme ~a unsupported" scheme)))))) ;; get-impure-port : url [x list (str)] -> in-port (define get-impure-port (case-lambda [(url) (get-impure-port url '())] [(url strings) (getpost-impure-port #t url #f strings)])) ;; post-impure-port : url x bytes [x list (str)] -> in-port (define post-impure-port (case-lambda [(url post-data) (post-impure-port url post-data '())] [(url post-data strings) (getpost-impure-port #f url post-data strings)])) ;; getpost-pure-port : bool x url x list (str) -> in-port (define getpost-pure-port (lambda (get? url post-data strings) (let ((scheme (url-scheme url))) (cond ((not scheme) (schemeless-url url)) ((string=? scheme "http") (let ((port (http://getpost-impure-port get? url post-data strings))) (with-handlers ([void (lambda (exn) (close-input-port port) (raise exn))]) (purify-port port)) port)) ((string=? scheme "file") (file://get-pure-port url)) (else (url-error "Scheme ~a unsupported" scheme)))))) ;; get-pure-port : url [x list (str)] -> in-port (define get-pure-port (case-lambda [(url) (get-pure-port url '())] [(url strings) (getpost-pure-port #t url #f strings)])) ;; post-pure-port : url bytes [x list (str)] -> in-port (define post-pure-port (case-lambda [(url post-data) (post-pure-port url post-data '())] [(url post-data strings) (getpost-pure-port #f url post-data strings)])) ;; display-pure-port : in-port -> () (define display-pure-port (lambda (server->client) (copy-port server->client (current-output-port)) (close-input-port server->client))) (define empty-url? (lambda (url) (and (not (url-scheme url)) (not (url-query url)) (not (url-fragment url)) (null? (url-path url))))) ;; transliteration of code in rfc 3986, section 5.2.2 (define (combine-url/relative Base string) (let ([R (string->url string)] [T (make-url #f #f #f #f #f '() '() #f)]) (if (url-scheme R) (begin (set-url-scheme! T (url-scheme R)) (set-url-user! T (url-user R)) ;; authority (set-url-host! T (url-host R)) ;; authority (set-url-port! T (url-port R)) ;; authority (set-url-path-absolute?! T (url-path-absolute? R)) (set-url-path! T (remove-dot-segments (url-path R))) (set-url-query! T (url-query R))) (begin (if (url-host R) ;; => authority is defined (begin (set-url-user! T (url-user R)) ;; authority (set-url-host! T (url-host R)) ;; authority (set-url-port! T (url-port R)) ;; authority (set-url-path-absolute?! T (url-path-absolute? R)) (set-url-path! T (remove-dot-segments (url-path R))) (set-url-query! T (url-query R))) (begin (if (null? (url-path R)) ;; => R has empty path (begin (set-url-path-absolute?! T (url-path-absolute? Base)) (set-url-path! T (url-path Base)) (if (not (null? (url-query R))) (set-url-query! T (url-query R)) (set-url-query! T (url-query Base)))) (begin (cond [(url-path-absolute? R) (set-url-path-absolute?! T #t) (set-url-path! T (remove-dot-segments (url-path R)))] [(and (null? (url-path Base)) (url-host Base)) (set-url-path-absolute?! T #t) (set-url-path! T (remove-dot-segments (url-path R)))] [else (set-url-path-absolute?! T (url-path-absolute? Base)) (set-url-path! T (remove-dot-segments (append (all-but-last (url-path Base)) (url-path R))))]) (set-url-query! T (url-query R)))) (set-url-user! T (url-user Base)) ;; authority (set-url-host! T (url-host Base)) ;; authority (set-url-port! T (url-port Base)))) ;; authority (set-url-scheme! T (url-scheme Base)))) (set-url-fragment! T (url-fragment R)) T)) (define (all-but-last lst) (cond [(null? lst) null] [(null? (cdr lst)) null] [else (cons (car lst) (all-but-last (cdr lst)))])) ;; cribbed from 5.2.4 in rfc 3986 ;; the strange cases 2 and 4 implicitly change urls ;; with paths segments "." and ".." at the end ;; into "./" and "../" respectively (define (remove-dot-segments path) (let loop ([path path] [result '()]) (cond [(null? path) (reverse result)] [(and (eq? (path/param-path (car path)) 'same) (null? (cdr path))) (loop (cdr path) (cons (make-path/param "" '()) result))] [(eq? (path/param-path (car path)) 'same) (loop (cdr path) result)] [(and (eq? (path/param-path (car path)) 'up) (null? (cdr path)) (not (null? result))) (loop (cdr path) (cons (make-path/param "" '()) (cdr result)))] [(and (eq? (path/param-path (car path)) 'up) (not (null? result))) (loop (cdr path) (cdr result))] [(and (eq? (path/param-path (car path)) 'up) (null? result)) ;; when we go up too far, just drop the "up"s. (loop (cdr path) result)] [else (loop (cdr path) (cons (car path) result))]))) ;; call/input-url : url x (url -> in-port) x (in-port -> T) ;; [x list (str)] -> T (define call/input-url (let ((handle-port (lambda (server->client handler) (dynamic-wind (lambda () 'do-nothing) (lambda () (handler server->client)) (lambda () (close-input-port server->client)))))) (case-lambda ((url getter handler) (handle-port (getter url) handler)) ((url getter handler params) (handle-port (getter url params) handler))))) ;; purify-port : in-port -> header-string (define purify-port (lambda (port) (let ([m (regexp-match-peek-positions #rx"^HTTP/.*?((\r\n\r\n)|(\n\n)|(\r\r))" port)]) (if m (read-string (cdar m) port) "")))) (define character-set-size 256) ;; netscape/string->url : str -> url (define netscape/string->url (lambda (string) (let ((url (string->url string))) (if (url-scheme url) url (if (string=? string "") (url-error "Can't resolve empty string as URL") (begin (set-url-scheme! url (if (char=? (string-ref string 0) #\/) "file" "http")) url)))))) ;; string->url : str -> url ;; New implementation, mostly provided by Neil Van Dyke (define string->url (let ((rx (regexp (string-append "^" "[ \t\f\r\n]*" "(" ; <1 front-opt "([a-zA-Z]*:)?" ; =2 scheme-colon-opt "(" ; <3 slashslash-opt "//" "([^:/@;?#]*@)?" ; =4 user-at-opt "([^:/@;?#]*)?" ; =5 host-opt "(:[0-9]*)?" ; =6 colon-port-opt ")?" ; >3 slashslash-opt ")?" ; >1 front-opt "([^?#]*)" ; =7 path "(\\?[^#]*)?" ; =8 question-query-opt "(#.*)?" ; =9 hash-fragment-opt "[ \t\f\r\n]*" "$")))) (lambda (str) (let ((match (regexp-match-positions rx str))) (if match (let* ((get-str (lambda (pos skip-left skip-right) (let ((pair (list-ref match pos))) (if pair (substring str (+ (car pair) skip-left) (- (cdr pair) skip-right)) #f)))) (get-num (lambda (pos skip-left skip-right) (let ((s (get-str pos skip-left skip-right))) (if s (string->number s) #f)))) (host (get-str 5 0 0)) (path (get-str 7 0 0)) (scheme (get-str 2 0 1))) (when (string? scheme) (string-lowercase! scheme)) (when (string? host) (string-lowercase! host)) (make-url scheme (uri-decode/maybe (get-str 4 0 1)) ; user host (get-num 6 1 0) ; port (and (not (= 0 (string-length path))) (char=? #\/ (string-ref path 0))) (separate-path-strings ;; If path is "" and the input is an absolute URL ;; with a hostname, then the intended path is "/", ;; but the URL is missing a "/" at the end. path #; (if (and (string=? path "") host) "/" path)) ;(uri-decode/maybe (get-str 8 1 0)) ; ;query (let ([q (get-str 8 1 0)]) (if q (form-urlencoded->alist q) '())) (uri-decode/maybe (get-str 9 1 0)) ; fragment )) (url-error "Invalid URL string: ~e" str)))))) (define (uri-decode/maybe f) ;; If #f, and leave unmolested any % that is followed by hex digit ;; if a % is not followed by a hex digit, replace it with %25 ;; in an attempt to be "friendly" (and f (uri-decode (regexp-replace* "%([^0-9a-fA-F])" f "%25\\1")))) ;; separate-path-strings : string[starting with /] -> (listof path/param) (define (separate-path-strings str) (cond [(string=? str "") '()] [else (let loop ([str (if (char=? #\/ (string-ref str 0)) (substring str 1 (string-length str)) str)]) (cond [(regexp-match #rx"([^/]*)/(.*)$" str) => (lambda (m) (cons (separate-params (cadr m)) (loop (caddr m))))] [else (list (separate-params str))]))])) (define (separate-params s) (let ([lst (map path-segment-decode (regexp-split #rx";" s))]) (make-path/param (car lst) (cdr lst)))) (define (path-segment-decode p) (cond [(string=? p "..") 'up] [(string=? p ".") 'same] [else (uri-path-segment-decode p)])) (define (path-segment-encode p) (cond [(eq? p 'up) ".."] [(eq? p 'same) "."] [(equal? p "..") "%2e%2e"] [(equal? p ".") "%2e"] [else (uri-path-segment-encode p)])) (define (combine-path-strings absolute? path/params) (cond [(null? path/params) ""] [else (apply string-append (if absolute? "/" "") (add-between "/" (map join-params path/params)))])) (define (join-params s) (apply string-append (add-between ";" (map path-segment-encode (cons (path/param-path s) (path/param-param s)))))) (define (add-between bet lst) (cond [(null? lst) null] [(null? (cdr lst)) lst] [else (let loop ([fst (car lst)] [lst (cdr lst)]) (cond [(null? lst) (list fst)] [else (list* fst bet (loop (car lst) (cdr lst)))]))])))))