From 50ade25b28cdeef71efdb6aff87c2905f1e6f1d9 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Sat, 13 Apr 2013 09:34:14 -0600 Subject: [PATCH] net/url: support HTTP/1.1 connections --- collects/net/scribblings/url.scrbl | 44 +++++++++-- collects/net/url.rkt | 113 ++++++++++++++++++++------- doc/release-notes/racket/HISTORY.txt | 1 + 3 files changed, 121 insertions(+), 37 deletions(-) diff --git a/collects/net/scribblings/url.scrbl b/collects/net/scribblings/url.scrbl index 9e24e5662f..9fe0a99c67 100644 --- a/collects/net/scribblings/url.scrbl +++ b/collects/net/scribblings/url.scrbl @@ -22,6 +22,9 @@ any other file. Currently the only supported protocols are @racket["http"], @racket["https"], and sometimes @racket["file"]. +The @racketmodname[net/url] logs information and background-thread +errors to a logger named @racket['net/url]. + @section{URL Structure} @declare-exporting[net/url-structs net/url] @@ -337,18 +340,45 @@ empty string, or it will be a string matching the following regexp: [url url?] [headers (listof string?) '()] [#:redirections redirections exact-nonnegative-integer? 0] - [#:status? status? boolean? #f]) + [#:status? status? boolean? #f] + [#:connection connection (or/c #f http-connection?)]) (values input-port? string?)]{ This function is an alternative to calling @racket[get-impure-port] and - @racket[purify-port] when needing to follow redirections. + @racket[purify-port] when needing to follow redirections. It also + supports HTTP/1.1 connections, which are used when the @racket[connection] + argument is not @racket[#f]. - That is, it does a GET request on @racket[url], follows up to - @racket[redirections] redirections and returns a port containing - the data as well as the headers for the final connection. If - @racket[status?] is true, then the status line is included in the - result string. + The @racket[get-pure-port/headers] function performs a GET request + on @racket[url], follows up to @racket[redirections] redirections + and returns a port containing the data as well as the headers for + the final connection. If @racket[status?] is true, then the status + line is included in the result string. + + A given @racket[connection] should be used for communication + with a particular HTTP/1.1 server, unless @racket[connection] is closed + (via @racket[http-connection-close]) between uses for different servers. + If @racket[connection] is provided, read all data from the result port + before making a new request with the same @racket[connection]. (Reusing + a @racket[connection] without reading all data may or may not work.) } +@deftogether[( +@defproc[(http-connection? [v any/c]) boolean?] +@defproc[(make-http-connection) http-connection?] +@defproc[(http-connection-close [connection http-connection?]) void?] +)]{ + +A HTTP connection value represents a potentially persistent connection +with a HTTP/1.1 server for use with @racket[get-pure-port/headers]. + +The @racket[make-http-connection] creates a ``connection'' that is +initially unconnected. Each call to @racket[get-pure-port/headers] +leaves a connection either connected or unconnected, depending on +whether the server allows the connection to continue. The +@racket[http-connection-close] function unconnects, but it does not +prevent further use of the connection value.} + + @defproc*[([(call/input-url [URL url?] [connect (url? . -> . input-port?)] [handle (input-port? . -> . any)]) diff --git a/collects/net/url.rkt b/collects/net/url.rkt index f21f1a8d2c..ab9f32eeee 100644 --- a/collects/net/url.rkt +++ b/collects/net/url.rkt @@ -9,6 +9,8 @@ ;; Not throw away MIME headers. ;; Determine file type. +(define-logger net/url) + ;; ---------------------------------------------------------------------- ;; Input ports have two statuses: @@ -106,8 +108,10 @@ (parameterize ([current-connect-scheme (url-scheme url)]) (tcp-connect host port-number)))) -;; http://getpost-impure-port : bool x url x union (str, #f) x list (str) -> in-port -(define (http://getpost-impure-port get? url post-data strings) +;; http://getpost-impure-port : bool x url x union (str, #f) x list (str) +; -> (values in-port out-port) +(define (http://getpost-impure-port get? url post-data strings + make-ports 1.1?) (define proxy (assoc (url-scheme url) (current-proxy-servers))) (define-values (server->client client->server) (make-ports url proxy)) (define access-string @@ -125,7 +129,7 @@ (define (println . xs) (for-each (lambda (x) (display x client->server)) xs) (display "\r\n" client->server)) - (println (if get? "GET " "POST ") access-string " HTTP/1.0") + (println (if get? "GET " "POST ") access-string " HTTP/1." (if 1.1? "1" "0")) (println "Host: " (url-host url) (let ([p (url-port url)]) (if p (format ":~a" p) ""))) (when post-data (println "Content-Length: " (bytes-length post-data))) @@ -133,8 +137,9 @@ (println) (when post-data (display post-data client->server)) (flush-output client->server) - (tcp-abandon-port client->server) - server->client) + (unless 1.1? + (tcp-abandon-port client->server)) + (values server->client client->server)) (define (file://->path url [kind (system-path-convention-type)]) (let ([strs (map path/param-path (url-path url))] @@ -181,7 +186,8 @@ (cond [(not scheme) (schemeless-url url)] [(or (string=? scheme "http") (string=? scheme "https")) - (http://getpost-impure-port get? url post-data strings)] + (define-values (s->c c->s) (http://getpost-impure-port get? url post-data strings make-ports #f)) + s->c] [(string=? scheme "file") (url-error "There are no impure file: ports")] [else (url-error "Scheme ~a unsupported" scheme)]))) @@ -205,9 +211,10 @@ [(or (not get?) ;; do not follow redirections for POST (zero? redirections)) - (let ([port (http://getpost-impure-port - get? url post-data strings)]) - (purify-http-port port))] + (let-values ([(s->c c->s) (http://getpost-impure-port + get? url post-data strings + make-ports #f)]) + (purify-http-port s->c))] [else (define-values (port header) (get-pure-port/headers url strings #:redirections redirections)) @@ -216,22 +223,44 @@ (file://get-pure-port url)] [else (url-error "Scheme ~a unsupported" scheme)]))) +(struct http-connection (s->c c->s) + #:mutable) + +(define (make-http-connection) (http-connection #f #f)) + +(define (http-connection-close conn) + (when (http-connection-c->s conn) + (tcp-abandon-port (http-connection-c->s conn)) + (set-http-connection-c->s! conn #f) + (close-input-port (http-connection-s->c conn)) + (set-http-connection-s->c! conn #f))) + (define (get-pure-port/headers url [strings '()] #:redirections [redirections 0] - #:status? [status? #f]) - (let redirection-loop ([redirections redirections] [url url]) - (define ip - (http://getpost-impure-port #t url #f strings)) + #:status? [status? #f] + #:connection [conn #f]) + (let redirection-loop ([redirections redirections] [url url] [use-conn conn]) + (define-values (ip op) + (http://getpost-impure-port #t url #f strings + (if (and use-conn + (http-connection-s->c use-conn)) + (lambda (url proxy) + (log-net/url-debug "reusing connection") + (values (http-connection-s->c use-conn) + (http-connection-c->s use-conn))) + make-ports) + (and conn #t))) (define status (read-line ip 'return-linefeed)) - (define-values (new-url chunked? headers) - (let loop ([new-url #f] [chunked? #f] [headers '()]) + (define-values (new-url chunked? close? headers) + (let loop ([new-url #f] [chunked? #f] [close? #f] [headers '()]) (define line (read-line ip 'return-linefeed)) (when (eof-object? line) (error 'getpost-pure-port - "connection ended before headers ended (when trying to follow a redirection)")) + "connection ended before headers ended")) (cond - [(equal? line "") (values new-url chunked? headers)] - [(equal? chunked-header-line line) (loop new-url #t (cons line headers))] + [(equal? line "") (values new-url chunked? close? headers)] + [(equal? chunked-header-line line) (loop new-url #t close? (cons line headers))] + [(equal? close-header-line line) (loop new-url chunked? #t (cons line headers))] [(regexp-match #rx"^Location: (.*)$" line) => (λ (m) @@ -248,20 +277,34 @@ (url-path next-url) (url-query next-url) (url-fragment next-url)))) - (loop (or next-url new-url) chunked? (cons line headers)))] - [else (loop new-url chunked? (cons line headers))]))) + (loop (or next-url new-url) chunked? close? (cons line headers)))] + [else (loop new-url chunked? close? (cons line headers))]))) (define redirection-status-line? (regexp-match #rx"^HTTP/[0-9]+[.][0-9]+ 3[0-9][0-9]" status)) + (define (close-ip) + (unless (and use-conn (not close?)) + (close-input-port ip))) + (when use-conn + (if close? + (begin + (log-net/url-info "connection closed by server") + (tcp-abandon-port op) + (set-http-connection-s->c! use-conn #f) + (set-http-connection-c->s! use-conn #f)) + (begin + (set-http-connection-s->c! use-conn ip) + (set-http-connection-c->s! use-conn op)))) (cond [(and redirection-status-line? new-url (not (zero? redirections))) - (close-input-port ip) - (redirection-loop (- redirections 1) new-url)] + (close-ip) + (log-net/url-info "redirection: ~a" (url->string new-url)) + (redirection-loop (- redirections 1) new-url #f)] [else (define-values (in-pipe out-pipe) (make-pipe)) (thread (λ () (http-pipe-data chunked? ip out-pipe) - (close-input-port ip))) + (close-ip))) (values in-pipe (apply string-append (map (λ (x) (string-append x "\r\n")) (if status? @@ -411,14 +454,17 @@ (http-read-headers ip)))) (define chunked-header-line "Transfer-Encoding: chunked") +(define close-header-line "Connection: close") (define (http-pipe-data chunked? ip op) - (if chunked? - (http-pipe-chunk ip op) - (begin - (copy-port ip op) - (flush-output op) - (close-output-port op)))) + (with-handlers ([exn:fail? (lambda (exn) + (log-net/url-error (exn-message exn)))]) + (if chunked? + (http-pipe-chunk ip op) + (begin + (copy-port ip op) + (flush-output op) + (close-output-port op))))) (define (http-pipe-chunk ip op) (define crlf-bytes (make-bytes 2)) @@ -723,8 +769,15 @@ (put-impure-port (->* (url? bytes?) ((listof string?)) input-port?)) (display-pure-port (input-port? . -> . void?)) (purify-port (input-port? . -> . string?)) - (get-pure-port/headers (->* (url?) ((listof string?) #:redirections exact-nonnegative-integer? #:status? boolean?) + (get-pure-port/headers (->* (url?) + ((listof string?) + #:redirections exact-nonnegative-integer? + #:status? boolean? + #:connection (or/c #f http-connection?)) (values input-port? string?))) + (http-connection? (any/c . -> . boolean?)) + (make-http-connection (-> http-connection?)) + (http-connection-close (http-connection? . -> . void?)) (netscape/string->url (string? . -> . url?)) (call/input-url (case-> (-> url? (-> url? input-port?) diff --git a/doc/release-notes/racket/HISTORY.txt b/doc/release-notes/racket/HISTORY.txt index 6062e02e99..1de9260827 100644 --- a/doc/release-notes/racket/HISTORY.txt +++ b/doc/release-notes/racket/HISTORY.txt @@ -1,5 +1,6 @@ Version 5.3.4.3 Added make-environment-variables +net/url: add support for HTTP/1.1 connections Version 5.3.4.2 Added current-environment-variables, environment-variables-ref,