From 11a3d9b0acb602fd1de5e114a316b6e6d6c1168d Mon Sep 17 00:00:00 2001 From: Robby Findler Date: Mon, 3 Oct 2011 08:10:43 -0500 Subject: [PATCH] add support for redirections to get-pure-port and add get-pure-port/headers --- collects/net/scribblings/url.scrbl | 19 ++++- collects/net/url.rkt | 66 +++++++++++++--- collects/tests/net/url-port.rkt | 113 ++++++++++++++++++++------- doc/release-notes/racket/HISTORY.txt | 1 + 4 files changed, 158 insertions(+), 41 deletions(-) diff --git a/collects/net/scribblings/url.scrbl b/collects/net/scribblings/url.scrbl index e1b2db5f32..0ed9d25aad 100644 --- a/collects/net/scribblings/url.scrbl +++ b/collects/net/scribblings/url.scrbl @@ -188,7 +188,8 @@ Determines the default conversion to and from strings for @deftogether[( @defproc[(get-pure-port [URL url?] - [header (listof string?) null]) + [header (listof string?) null] + [#:redirections redirections exact-nonnegative-integer? 0]) input-port?] @defproc[(head-pure-port [URL url?] [header (listof string?) null]) @@ -204,7 +205,9 @@ optional list of strings can be used to send header lines to the server. The GET method is used to retrieve whatever information is identified -by @racket[URL]. +by @racket[URL]. If @racket[redirections] is not @racket[0], then +@racket[get-pure-port] will follow redirections from the server, +up to the limit given by @racket[redirections]. The HEAD method is identical to GET, except the server must not return a message body. The meta-information returned in a response to a HEAD @@ -304,6 +307,18 @@ empty string, or it will be a string matching the following regexp: #rx"^HTTP/.*?(\r\n\r\n|\n\n|\r\r)" ]} +@defproc[(get-pure-port/headers + [url url?] + [headers (listof string?) '()] + [#:redirections redirections exact-nonnegative-integer? 0]) + (values input-port? string?)]{ + This function is an alternative to calling @racket[get-impure-port] and + @racket[purify-port] when needing to follow redirections. + + That is, it does a GET request on @racket[url], follows up to + @racket[redirections] redirections and returns a port containing + the data as well as the headers for the final connection. +} @defproc*[([(call/input-url [URL url?] [connect (url? . -> . input-port?)] diff --git a/collects/net/url.rkt b/collects/net/url.rkt index 72dce076fc..0a9e41adb3 100644 --- a/collects/net/url.rkt +++ b/collects/net/url.rkt @@ -191,20 +191,58 @@ (schemeless-url url)] [(or (string=? scheme "http") (string=? scheme "https")) - (let loop ([redirections redirections]) - (cond - [(zero? redirections) - (let ([port (http://getpost-impure-port - get? url post-data strings)]) - (purify-http-port port))] - [else - (let ([port (http://getpost-impure-port - get? url post-data strings)]) - (purify-http-port port))]))] + (cond + [(or (not get?) + ;; do not follow redirections for POST + (zero? redirections)) + (let ([port (http://getpost-impure-port + get? url post-data strings)]) + (purify-http-port port))] + [else + (define-values (port header) + (get-pure-port/headers url strings #:redirections redirections)) + port])] [(string=? scheme "file") (file://get-pure-port url)] [else (url-error "Scheme ~a unsupported" scheme)]))) +(define (get-pure-port/headers url [strings '()] #:redirections [redirections 0]) + (let redirection-loop ([redirections redirections] [url url]) + (define ip + (http://getpost-impure-port #t url #f '())) + (define status (read-line ip 'return-linefeed)) + (define-values (new-url chunked? headers) + (let loop ([new-url #f] [chunked? #f] [headers '()]) + (define line (read-line ip 'return-linefeed)) + (when (eof-object? line) + (error 'getpost-pure-port + "connection ended before headers ended (when trying to follow a redirection)")) + (cond + [(equal? line "") (values new-url chunked? headers)] + [(equal? chunked-header-line line) (loop new-url #t (cons line headers))] + [(regexp-match #rx"^Location: (.*)$" line) + => + (λ (m) + (define m1 (list-ref m 1)) + (define url (with-handlers ((exn:fail? (λ (x) #f))) + (string->url m1))) + (loop (or url new-url) chunked? (cons line headers)))] + [else (loop new-url chunked? (cons line headers))]))) + (define redirection-status-line? + (regexp-match #rx"^HTTP/[0-9]+[.][0-9]+ 3[0-9][0-9]" status)) + (cond + [(and redirection-status-line? new-url (not (zero? redirections))) + (redirection-loop (- redirections 1) new-url)] + [else + (define-values (in-pipe out-pipe) (make-pipe)) + (thread + (λ () + (http-pipe-data chunked? ip out-pipe) + (close-input-port ip))) + (values in-pipe + (apply string-append (map (λ (x) (string-append x "\r\n")) + (reverse headers))))]))) + ;; get-pure-port : url [x list (str)] -> in-port (define (get-pure-port url [strings '()] #:redirections [redirections 0]) (getpost-pure-port #t url #f strings redirections)) @@ -342,11 +380,13 @@ (error 'purify-http-port "Connection ended before headers ended")) (if (string=? l "") #f - (if (string=? l "Transfer-Encoding: chunked") + (if (string=? l chunked-header-line) (begin (http-read-headers ip) #t) (http-read-headers ip)))) +(define chunked-header-line "Transfer-Encoding: chunked") + (define (http-pipe-data chunked? ip op) (if chunked? (http-pipe-chunk ip op) @@ -626,7 +666,7 @@ (url->string (url? . -> . string?)) (url->path (->* (url?) ((one-of/c 'unix 'windows)) path-for-some-system?)) - (get-pure-port (->* (url?) ((listof string?)) input-port?)) + (get-pure-port (->* (url?) ((listof string?) #:redirections exact-nonnegative-integer?) input-port?)) (get-impure-port (->* (url?) ((listof string?)) input-port?)) (post-pure-port (->* (url? (or/c false/c bytes?)) ((listof string?)) input-port?)) (post-impure-port (->* (url? bytes?) ((listof string?)) input-port?)) @@ -638,6 +678,8 @@ (put-impure-port (->* (url? bytes?) ((listof string?)) input-port?)) (display-pure-port (input-port? . -> . void?)) (purify-port (input-port? . -> . string?)) + (get-pure-port/headers (->* (url?) ((listof string?) #:redirections exact-nonnegative-integer?) + (values input-port? string?))) (netscape/string->url (string? . -> . url?)) (call/input-url (case-> (-> url? (-> url? input-port?) diff --git a/collects/tests/net/url-port.rkt b/collects/tests/net/url-port.rkt index d7e4cdda9e..3b0a48e5e4 100644 --- a/collects/tests/net/url-port.rkt +++ b/collects/tests/net/url-port.rkt @@ -4,38 +4,48 @@ openssl tests/eli-tester) -(define (run-tests scheme wrap-ports) - (define ((make-tester url->port) response) - (define port-no 9001) +(define (run-tests scheme wrap-ports skip-actual-redirect?) + (define ((make-tester url->port) response . responses) + (define first-port-no 9001) (define server-cust (make-custodian)) (parameterize ([current-custodian server-cust]) - (thread - (λ () - (run-server port-no - (lambda (ip op) - (let-values ([(ip op) (wrap-ports ip op)]) - (regexp-match #rx"(\r\n|^)\r\n" ip) - (display response op) - (close-output-port op) - (close-input-port ip))) - +inf.0)))) + (for ([response (in-list (cons response responses))] + [port-no (in-naturals first-port-no)]) + (thread + (λ () + (run-server port-no + (lambda (ip op) + (let-values ([(ip op) (wrap-ports ip op)]) + (regexp-match #rx"(\r\n|^)\r\n" ip) + (display response op) + (close-output-port op) + (close-input-port ip))) + +inf.0))))) (sleep 1) (dynamic-wind - void + void + (λ () + (call-with-values (λ () - (port->string - (url->port - (url scheme #f "localhost" port-no - #t empty empty #f)))) - (λ () - (custodian-shutdown-all server-cust)))) + (url->port + (url scheme #f "localhost" first-port-no + #t empty empty #f))) + (λ vals (apply values (cons (port->string (car vals)) (cdr vals)))))) + (λ () + (custodian-shutdown-all server-cust)))) (define get-pure (make-tester get-pure-port)) (define get-impure (make-tester get-impure-port)) - + (define get-pure/redirect + (make-tester (λ (x) (get-pure-port x #:redirections 1)))) + (define get-pure/headers + (make-tester get-pure-port/headers)) + (define get-pure/headers/redirect + (make-tester (λ (x) (get-pure-port/headers x #:redirections 1)))) + (test (get-pure "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\n\r\n24\r\nThis is the data in the first chunk \r\n1A\r\nand this is the second one\r\n0\r\n") @@ -52,21 +62,70 @@ => "This is the data in the first chand this is the second oneXXXXXXX" - (get-impure - "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\n\r\n23\r\nThis is the data in the first chunk\r\n1A\r\nand this is the second one\r\n0\r\n") + (get-pure/redirect + "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\n\r\n24\r\nThis is the data in the first chunk \r\n1A\r\nand this is the second one\r\n0\r\n") => - "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\n\r\n23\r\nThis is the data in the first chunk\r\n1A\r\nand this is the second one\r\n0\r\n" + "This is the data in the first chunk and this is the second one" + (get-pure/redirect + "HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\n\r\nThis is the data in the first chunk and this is the second one") + => + "This is the data in the first chunk and this is the second one" + + (get-pure/redirect + "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\n\r\n20\r\nThis is the data in the first ch\r\n21\r\nand this is the second oneXXXXXXX\r\n0\r\n") + => + "This is the data in the first chand this is the second oneXXXXXXX" + (get-impure "HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\n\r\nThis is the data in the first chunk and this is the second one\r\n") => "HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\n\r\nThis is the data in the first chunk and this is the second one\r\n" - )) + + (get-pure/headers + "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\n\r\n24\r\nThis is the data in the first chunk \r\n1A\r\nand this is the second one\r\n0\r\n") + => + (values "This is the data in the first chunk and this is the second one" + "Content-Type: text/plain\r\nTransfer-Encoding: chunked\r\n") + + (get-pure/headers + "HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\n\r\nThis is the data in the first chunk and this is the second one") + => + (values "This is the data in the first chunk and this is the second one" + "Content-Type: text/plain\r\n") + + (get-pure/headers + "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\n\r\n20\r\nThis is the data in the first ch\r\n21\r\nand this is the second oneXXXXXXX\r\n0\r\n") + => + (values "This is the data in the first chand this is the second oneXXXXXXX" + "Content-Type: text/plain\r\nTransfer-Encoding: chunked\r\n") + + (get-pure/headers + "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\nAnother-Header: ta-daa\r\n\r\n20\r\nThis is the data in the first ch\r\n21\r\nand this is the second oneXXXXXXX\r\n0\r\n") + => + (values "This is the data in the first chand this is the second oneXXXXXXX" + "Content-Type: text/plain\r\nTransfer-Encoding: chunked\r\nAnother-Header: ta-daa\r\n") + ) + + (unless skip-actual-redirect? + (test + (get-pure/redirect + "HTTP/1.1 301 Moved Permanently\r\nLocation: http://localhost:9002/whatever\r\n\r\nstuff" + (string-append + "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\n\r\n" + "24\r\nThis is the data in the first chunk \r\n1A\r\nand this is the second one\r\n0\r\n")) + + (get-pure/headers/redirect + "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\nAnother-Header: ta-daa\r\n\r\n20\r\nThis is the data in the first ch\r\n21\r\nand this is the second oneXXXXXXX\r\n0\r\n") + => + (values "This is the data in the first chand this is the second oneXXXXXXX" + "Content-Type: text/plain\r\nTransfer-Encoding: chunked\r\nAnother-Header: ta-daa\r\n")))) -(run-tests "http" values) +(run-tests "http" values #f) (run-tests "https" (let ([ctx (ssl-make-server-context)]) (ssl-load-certificate-chain! ctx (collection-file-path "test.pem" "openssl")) (ssl-load-private-key! ctx (collection-file-path "test.pem" "openssl")) (lambda (in out) - (ports->ssl-ports in out #:mode 'accept #:context ctx)))) + (ports->ssl-ports in out #:mode 'accept #:context ctx))) + #t) diff --git a/doc/release-notes/racket/HISTORY.txt b/doc/release-notes/racket/HISTORY.txt index 8f93f281f1..797eb13635 100644 --- a/doc/release-notes/racket/HISTORY.txt +++ b/doc/release-notes/racket/HISTORY.txt @@ -3,6 +3,7 @@ Added exn:fail:syntax:unbound Added date*, which extends date to include nanoseconds and a time zone name Changed seconds->date to accept a real number return a date* +Added support for redirections to get-pure-port Version 5.1.3.10 Added variable-reference->module-declare-inspector, which allows