net/url: support HTTP/1.1 connections

This commit is contained in:
Matthew Flatt 2013-04-13 09:34:14 -06:00
parent c188be0441
commit 50ade25b28
3 changed files with 121 additions and 37 deletions

View File

@ -22,6 +22,9 @@ any other file.
Currently the only supported protocols are @racket["http"], Currently the only supported protocols are @racket["http"],
@racket["https"], and sometimes @racket["file"]. @racket["https"], and sometimes @racket["file"].
The @racketmodname[net/url] logs information and background-thread
errors to a logger named @racket['net/url].
@section{URL Structure} @section{URL Structure}
@declare-exporting[net/url-structs net/url] @declare-exporting[net/url-structs net/url]
@ -337,18 +340,45 @@ empty string, or it will be a string matching the following regexp:
[url url?] [url url?]
[headers (listof string?) '()] [headers (listof string?) '()]
[#:redirections redirections exact-nonnegative-integer? 0] [#:redirections redirections exact-nonnegative-integer? 0]
[#:status? status? boolean? #f]) [#:status? status? boolean? #f]
[#:connection connection (or/c #f http-connection?)])
(values input-port? string?)]{ (values input-port? string?)]{
This function is an alternative to calling @racket[get-impure-port] and This function is an alternative to calling @racket[get-impure-port] and
@racket[purify-port] when needing to follow redirections. @racket[purify-port] when needing to follow redirections. It also
supports HTTP/1.1 connections, which are used when the @racket[connection]
argument is not @racket[#f].
That is, it does a GET request on @racket[url], follows up to The @racket[get-pure-port/headers] function performs a GET request
@racket[redirections] redirections and returns a port containing on @racket[url], follows up to @racket[redirections] redirections
the data as well as the headers for the final connection. If and returns a port containing the data as well as the headers for
@racket[status?] is true, then the status line is included in the the final connection. If @racket[status?] is true, then the status
result string. line is included in the result string.
A given @racket[connection] should be used for communication
with a particular HTTP/1.1 server, unless @racket[connection] is closed
(via @racket[http-connection-close]) between uses for different servers.
If @racket[connection] is provided, read all data from the result port
before making a new request with the same @racket[connection]. (Reusing
a @racket[connection] without reading all data may or may not work.)
} }
@deftogether[(
@defproc[(http-connection? [v any/c]) boolean?]
@defproc[(make-http-connection) http-connection?]
@defproc[(http-connection-close [connection http-connection?]) void?]
)]{
A HTTP connection value represents a potentially persistent connection
with a HTTP/1.1 server for use with @racket[get-pure-port/headers].
The @racket[make-http-connection] creates a ``connection'' that is
initially unconnected. Each call to @racket[get-pure-port/headers]
leaves a connection either connected or unconnected, depending on
whether the server allows the connection to continue. The
@racket[http-connection-close] function unconnects, but it does not
prevent further use of the connection value.}
@defproc*[([(call/input-url [URL url?] @defproc*[([(call/input-url [URL url?]
[connect (url? . -> . input-port?)] [connect (url? . -> . input-port?)]
[handle (input-port? . -> . any)]) [handle (input-port? . -> . any)])

View File

@ -9,6 +9,8 @@
;; Not throw away MIME headers. ;; Not throw away MIME headers.
;; Determine file type. ;; Determine file type.
(define-logger net/url)
;; ---------------------------------------------------------------------- ;; ----------------------------------------------------------------------
;; Input ports have two statuses: ;; Input ports have two statuses:
@ -106,8 +108,10 @@
(parameterize ([current-connect-scheme (url-scheme url)]) (parameterize ([current-connect-scheme (url-scheme url)])
(tcp-connect host port-number)))) (tcp-connect host port-number))))
;; http://getpost-impure-port : bool x url x union (str, #f) x list (str) -> in-port ;; http://getpost-impure-port : bool x url x union (str, #f) x list (str)
(define (http://getpost-impure-port get? url post-data strings) ; -> (values in-port out-port)
(define (http://getpost-impure-port get? url post-data strings
make-ports 1.1?)
(define proxy (assoc (url-scheme url) (current-proxy-servers))) (define proxy (assoc (url-scheme url) (current-proxy-servers)))
(define-values (server->client client->server) (make-ports url proxy)) (define-values (server->client client->server) (make-ports url proxy))
(define access-string (define access-string
@ -125,7 +129,7 @@
(define (println . xs) (define (println . xs)
(for-each (lambda (x) (display x client->server)) xs) (for-each (lambda (x) (display x client->server)) xs)
(display "\r\n" client->server)) (display "\r\n" client->server))
(println (if get? "GET " "POST ") access-string " HTTP/1.0") (println (if get? "GET " "POST ") access-string " HTTP/1." (if 1.1? "1" "0"))
(println "Host: " (url-host url) (println "Host: " (url-host url)
(let ([p (url-port url)]) (if p (format ":~a" p) ""))) (let ([p (url-port url)]) (if p (format ":~a" p) "")))
(when post-data (println "Content-Length: " (bytes-length post-data))) (when post-data (println "Content-Length: " (bytes-length post-data)))
@ -133,8 +137,9 @@
(println) (println)
(when post-data (display post-data client->server)) (when post-data (display post-data client->server))
(flush-output client->server) (flush-output client->server)
(tcp-abandon-port client->server) (unless 1.1?
server->client) (tcp-abandon-port client->server))
(values server->client client->server))
(define (file://->path url [kind (system-path-convention-type)]) (define (file://->path url [kind (system-path-convention-type)])
(let ([strs (map path/param-path (url-path url))] (let ([strs (map path/param-path (url-path url))]
@ -181,7 +186,8 @@
(cond [(not scheme) (cond [(not scheme)
(schemeless-url url)] (schemeless-url url)]
[(or (string=? scheme "http") (string=? scheme "https")) [(or (string=? scheme "http") (string=? scheme "https"))
(http://getpost-impure-port get? url post-data strings)] (define-values (s->c c->s) (http://getpost-impure-port get? url post-data strings make-ports #f))
s->c]
[(string=? scheme "file") [(string=? scheme "file")
(url-error "There are no impure file: ports")] (url-error "There are no impure file: ports")]
[else (url-error "Scheme ~a unsupported" scheme)]))) [else (url-error "Scheme ~a unsupported" scheme)])))
@ -205,9 +211,10 @@
[(or (not get?) [(or (not get?)
;; do not follow redirections for POST ;; do not follow redirections for POST
(zero? redirections)) (zero? redirections))
(let ([port (http://getpost-impure-port (let-values ([(s->c c->s) (http://getpost-impure-port
get? url post-data strings)]) get? url post-data strings
(purify-http-port port))] make-ports #f)])
(purify-http-port s->c))]
[else [else
(define-values (port header) (define-values (port header)
(get-pure-port/headers url strings #:redirections redirections)) (get-pure-port/headers url strings #:redirections redirections))
@ -216,22 +223,44 @@
(file://get-pure-port url)] (file://get-pure-port url)]
[else (url-error "Scheme ~a unsupported" scheme)]))) [else (url-error "Scheme ~a unsupported" scheme)])))
(struct http-connection (s->c c->s)
#:mutable)
(define (make-http-connection) (http-connection #f #f))
(define (http-connection-close conn)
(when (http-connection-c->s conn)
(tcp-abandon-port (http-connection-c->s conn))
(set-http-connection-c->s! conn #f)
(close-input-port (http-connection-s->c conn))
(set-http-connection-s->c! conn #f)))
(define (get-pure-port/headers url [strings '()] (define (get-pure-port/headers url [strings '()]
#:redirections [redirections 0] #:redirections [redirections 0]
#:status? [status? #f]) #:status? [status? #f]
(let redirection-loop ([redirections redirections] [url url]) #:connection [conn #f])
(define ip (let redirection-loop ([redirections redirections] [url url] [use-conn conn])
(http://getpost-impure-port #t url #f strings)) (define-values (ip op)
(http://getpost-impure-port #t url #f strings
(if (and use-conn
(http-connection-s->c use-conn))
(lambda (url proxy)
(log-net/url-debug "reusing connection")
(values (http-connection-s->c use-conn)
(http-connection-c->s use-conn)))
make-ports)
(and conn #t)))
(define status (read-line ip 'return-linefeed)) (define status (read-line ip 'return-linefeed))
(define-values (new-url chunked? headers) (define-values (new-url chunked? close? headers)
(let loop ([new-url #f] [chunked? #f] [headers '()]) (let loop ([new-url #f] [chunked? #f] [close? #f] [headers '()])
(define line (read-line ip 'return-linefeed)) (define line (read-line ip 'return-linefeed))
(when (eof-object? line) (when (eof-object? line)
(error 'getpost-pure-port (error 'getpost-pure-port
"connection ended before headers ended (when trying to follow a redirection)")) "connection ended before headers ended"))
(cond (cond
[(equal? line "") (values new-url chunked? headers)] [(equal? line "") (values new-url chunked? close? headers)]
[(equal? chunked-header-line line) (loop new-url #t (cons line headers))] [(equal? chunked-header-line line) (loop new-url #t close? (cons line headers))]
[(equal? close-header-line line) (loop new-url chunked? #t (cons line headers))]
[(regexp-match #rx"^Location: (.*)$" line) [(regexp-match #rx"^Location: (.*)$" line)
=> =>
(λ (m) (λ (m)
@ -248,20 +277,34 @@
(url-path next-url) (url-path next-url)
(url-query next-url) (url-query next-url)
(url-fragment next-url)))) (url-fragment next-url))))
(loop (or next-url new-url) chunked? (cons line headers)))] (loop (or next-url new-url) chunked? close? (cons line headers)))]
[else (loop new-url chunked? (cons line headers))]))) [else (loop new-url chunked? close? (cons line headers))])))
(define redirection-status-line? (define redirection-status-line?
(regexp-match #rx"^HTTP/[0-9]+[.][0-9]+ 3[0-9][0-9]" status)) (regexp-match #rx"^HTTP/[0-9]+[.][0-9]+ 3[0-9][0-9]" status))
(define (close-ip)
(unless (and use-conn (not close?))
(close-input-port ip)))
(when use-conn
(if close?
(begin
(log-net/url-info "connection closed by server")
(tcp-abandon-port op)
(set-http-connection-s->c! use-conn #f)
(set-http-connection-c->s! use-conn #f))
(begin
(set-http-connection-s->c! use-conn ip)
(set-http-connection-c->s! use-conn op))))
(cond (cond
[(and redirection-status-line? new-url (not (zero? redirections))) [(and redirection-status-line? new-url (not (zero? redirections)))
(close-input-port ip) (close-ip)
(redirection-loop (- redirections 1) new-url)] (log-net/url-info "redirection: ~a" (url->string new-url))
(redirection-loop (- redirections 1) new-url #f)]
[else [else
(define-values (in-pipe out-pipe) (make-pipe)) (define-values (in-pipe out-pipe) (make-pipe))
(thread (thread
(λ () (λ ()
(http-pipe-data chunked? ip out-pipe) (http-pipe-data chunked? ip out-pipe)
(close-input-port ip))) (close-ip)))
(values in-pipe (values in-pipe
(apply string-append (map (λ (x) (string-append x "\r\n")) (apply string-append (map (λ (x) (string-append x "\r\n"))
(if status? (if status?
@ -411,14 +454,17 @@
(http-read-headers ip)))) (http-read-headers ip))))
(define chunked-header-line "Transfer-Encoding: chunked") (define chunked-header-line "Transfer-Encoding: chunked")
(define close-header-line "Connection: close")
(define (http-pipe-data chunked? ip op) (define (http-pipe-data chunked? ip op)
(if chunked? (with-handlers ([exn:fail? (lambda (exn)
(http-pipe-chunk ip op) (log-net/url-error (exn-message exn)))])
(begin (if chunked?
(copy-port ip op) (http-pipe-chunk ip op)
(flush-output op) (begin
(close-output-port op)))) (copy-port ip op)
(flush-output op)
(close-output-port op)))))
(define (http-pipe-chunk ip op) (define (http-pipe-chunk ip op)
(define crlf-bytes (make-bytes 2)) (define crlf-bytes (make-bytes 2))
@ -723,8 +769,15 @@
(put-impure-port (->* (url? bytes?) ((listof string?)) input-port?)) (put-impure-port (->* (url? bytes?) ((listof string?)) input-port?))
(display-pure-port (input-port? . -> . void?)) (display-pure-port (input-port? . -> . void?))
(purify-port (input-port? . -> . string?)) (purify-port (input-port? . -> . string?))
(get-pure-port/headers (->* (url?) ((listof string?) #:redirections exact-nonnegative-integer? #:status? boolean?) (get-pure-port/headers (->* (url?)
((listof string?)
#:redirections exact-nonnegative-integer?
#:status? boolean?
#:connection (or/c #f http-connection?))
(values input-port? string?))) (values input-port? string?)))
(http-connection? (any/c . -> . boolean?))
(make-http-connection (-> http-connection?))
(http-connection-close (http-connection? . -> . void?))
(netscape/string->url (string? . -> . url?)) (netscape/string->url (string? . -> . url?))
(call/input-url (case-> (-> url? (call/input-url (case-> (-> url?
(-> url? input-port?) (-> url? input-port?)

View File

@ -1,5 +1,6 @@
Version 5.3.4.3 Version 5.3.4.3
Added make-environment-variables Added make-environment-variables
net/url: add support for HTTP/1.1 connections
Version 5.3.4.2 Version 5.3.4.2
Added current-environment-variables, environment-variables-ref, Added current-environment-variables, environment-variables-ref,