diff --git a/collects/net/scribblings/url.scrbl b/collects/net/scribblings/url.scrbl index 559d58f05a..944078e70c 100644 --- a/collects/net/scribblings/url.scrbl +++ b/collects/net/scribblings/url.scrbl @@ -1,6 +1,8 @@ #lang scribble/doc @(require "common.rkt" scribble/bnf - (for-label net/url net/url-unit net/url-sig net/head net/uri-codec)) + (for-label net/url net/url-unit net/url-sig + net/head net/uri-codec net/tcp-sig + openssl)) @title[#:tag "url"]{URLs and HTTP} @@ -16,8 +18,8 @@ whether or not you wish to examine its MIME headers. At this point, you have a regular input port with which to process the document, as with any other file. -Currently the only supported protocols are @scheme["http"] and -sometimes @scheme["file"]. +Currently the only supported protocols are @scheme["http"], +@racket["https"], and sometimes @scheme["file"]. @section{URL Structure} @@ -52,7 +54,7 @@ The basic structure for all URLs, which is explained in RFC 3986 }| The strings inside the @scheme[user], @scheme[path], @scheme[query], -and @scheme[fragment] fields are represented directly as Scheme +and @scheme[fragment] fields are represented directly as Racket strings, without URL-syntax-specific quoting. The procedures @scheme[string->url] and @scheme[url->string] translate encodings such as @litchar{%20} into spaces and back again. @@ -212,6 +214,14 @@ request. The DELETE method is used to delete the entity identified by @scheme[URL]. +@bold{Beware:} By default, @scheme["https"] scheme handling does not +verify a server's certificate (i.e., it's equivalent of clicking +through a browser's warnings), so communication is safe, but the +identity of the server is not verified. To validate the server's +certificate, set @racket[current-https-protocol] to a context created +with @racket[ssl-make-client-context], and enable certificate validation +in the context with @racket[ssl-set-verify!]. + The @scheme["file"] scheme for URLs is handled only by @scheme[get-pure-port], which uses @scheme[open-input-file], does not handle exceptions, and ignores the optional strings.} @@ -246,7 +256,11 @@ port} contains both the returned headers and the body. The Initiates a POST/PUT request for @scheme[URL] and sends the @scheme[post] byte string. The result is a @tech{pure port}, which contains the body of the response is returned. The optional list of -strings can be used to send header lines to the server.} +strings can be used to send header lines to the server. + +@bold{Beware:} See @racket[get-pure-port] for warnings about +@scheme["https"] certificate validation.} + @deftogether[( @defproc[(post-impure-port [URL url?] @@ -335,6 +349,14 @@ connections. Each mapping is a list of three elements: Currently, the only proxiable scheme is @scheme["http"]. The default mapping is the empty list (i.e., no proxies).} +@defparam[current-https-protocol protocol (or/c ssl-client-context? symbol?)]{ + +A parameter that determines the connection mode for @racket["https"] +connections; the parameter value is passed as the third argument to +@racket[ssl-connect] when creating an @racket["https"] connection. +Set this parameter to validate a server's certificates, for example, +as described with @racket[get-pure-port].} + @; ---------------------------------------- @section{URL Unit} @@ -343,7 +365,15 @@ mapping is the empty list (i.e., no proxies).} @defthing[url@ unit?]{ -Imports @scheme[tcp^], exports @scheme[url^].} +Imports @scheme[tcp^], exports @scheme[url+scheme^]. + +The @racket[current-connect-scheme] parameter is set to the scheme of +a URL when @racket[tcp-connect] is called to create a connection. A +@racket[tcp-connect] variant linked to @racket[url@] can check this +parameter to choose the connection mode; in particular, +@racket[net/url] supplies a @racket[tcp-connect] that actually uses +@racket[ssl-connect] when @racket[(current-connect-scheme)] produces +@racket["https"].} @; ---------------------------------------- @@ -353,5 +383,10 @@ Imports @scheme[tcp^], exports @scheme[url^].} @defsignature[url^ ()]{ -Includes everything exported by the @schememodname[net/url] module.} +Includes everything exported by the @schememodname[net/url] module +except @racket[current-https-protocol].} + +@defsignature[url+scheme^ (url^)]{ + +Adds @racket[current-connect-scheme] to @racket[url^].} diff --git a/collects/net/url-sig.rkt b/collects/net/url-sig.rkt index c09482a68a..4475c39d3e 100644 --- a/collects/net/url-sig.rkt +++ b/collects/net/url-sig.rkt @@ -1,24 +1,34 @@ -#lang racket/signature +#lang racket/base +(require racket/unit) + +(provide url^ url+scheme^) + +(define-signature url^ + (get-pure-port + get-impure-port + post-pure-port + post-impure-port + head-pure-port + head-impure-port + delete-pure-port + delete-impure-port + put-pure-port + put-impure-port + display-pure-port + purify-port + netscape/string->url + string->url + url->string + path->url + url->path + call/input-url + combine-url/relative + url-exception? + current-proxy-servers + file-url-path-convention-type)) + +(define-signature url+scheme^ extends url^ + (current-connect-scheme)) + + -get-pure-port -get-impure-port -post-pure-port -post-impure-port -head-pure-port -head-impure-port -delete-pure-port -delete-impure-port -put-pure-port -put-impure-port -display-pure-port -purify-port -netscape/string->url -string->url -url->string -path->url -url->path -call/input-url -combine-url/relative -url-exception? -current-proxy-servers -file-url-path-convention-type diff --git a/collects/net/url-unit.rkt b/collects/net/url-unit.rkt index 245a7716f9..be20aab1e6 100644 --- a/collects/net/url-unit.rkt +++ b/collects/net/url-unit.rkt @@ -14,11 +14,8 @@ (require racket/port racket/string "url-structs.rkt" "uri-codec.rkt" "url-sig.rkt" "tcp-sig.rkt") -;; See "HACK" below. -(require (prefix-in r: racket/tcp)) - (import tcp^) -(export url^) +(export url+scheme^) (define-struct (url-exception exn:fail) ()) @@ -92,25 +89,7 @@ [(string=? scheme "https") 443] [else (url-error "Scheme ~a not supported" (url-scheme url))]))) -;; HACK: if `tcp-connect' is void, then instead of using the input unit -;; we dispatch (based on the url scheme) directly to the built in tcp -;; functionality or to the ssl functions. This makes it possible to -;; have net/url provide an implementation that handles both http and -;; https, while code that uses this unit directly (like old code that -;; slaps together an ssl version) continues to work. -(define dispatch-on-scheme? (void? tcp-connect)) -(define get-ssl - (let ([connect #f] [abandon #f]) - ;; require the ssl code only when needed - (lambda (name) - (unless connect - (define-values/invoke-unit - ((dynamic-require 'net/ssl-tcp-unit 'make-ssl-tcp@) - #f #f #f #f #f #f #f) - (import) (export tcp^)) - (set! connect tcp-connect) - (set! abandon tcp-abandon-port)) - (case name [(connect) connect] [(abandon) abandon])))) +(define current-connect-scheme (make-parameter "http")) ;; make-ports : url -> in-port x out-port (define (make-ports url proxy) @@ -118,11 +97,8 @@ (caddr proxy) (or (url-port url) (url->default-port url)))] [host (if proxy (cadr proxy) (url-host url))]) - ((cond - [(not dispatch-on-scheme?) tcp-connect] - [(string=? (url-scheme url) "https") (get-ssl 'connect)] - [else r:tcp-connect]) - host port-number))) + (parameterize ([current-connect-scheme (url-scheme url)]) + (tcp-connect host port-number)))) ;; http://getpost-impure-port : bool x url x union (str, #f) x list (str) -> in-port (define (http://getpost-impure-port get? url post-data strings) @@ -151,10 +127,7 @@ (println) (when post-data (display post-data client->server)) (flush-output client->server) - ((cond [(not dispatch-on-scheme?) tcp-abandon-port] - [(string=? (url-scheme url) "https") (get-ssl 'abandon)] - [else r:tcp-abandon-port]) - client->server) + (tcp-abandon-port client->server) server->client) (define (file://->path url [kind (system-path-convention-type)]) diff --git a/collects/net/url.rkt b/collects/net/url.rkt index 2cf7fead33..7ceea392c4 100644 --- a/collects/net/url.rkt +++ b/collects/net/url.rkt @@ -1,13 +1,26 @@ #lang racket/base (require racket/unit racket/contract - "url-structs.rkt" "url-sig.rkt" "url-unit.rkt" - "tcp-sig.rkt") + (rename-in racket/tcp + [tcp-connect plain-tcp-connect] + [tcp-abandon-port plain-tcp-abandon-port]) + openssl + "tcp-sig.rkt" + "url-structs.rkt" "url-sig.rkt" "url-unit.rkt") + +;; Define `tcp-connect' and `tcp-abandon-port' to fit with +;; `current-connect-scheme' from `url-unt@' +(define (tcp-connect host port) + (cond + [(equal? (current-connect-scheme) "https") + (ssl-connect host port (current-https-protocol))] + [else + (plain-tcp-connect host port)])) + +(define (tcp-abandon-port port) + (cond + [(ssl-port? port) (ssl-abandon-port port)] + [else (plain-tcp-abandon-port port)])) -;; Define `tcp@' as a unit that uses void for `tcp-connect', which will -;; make the url-unit code dispatch to either the built in tcp functions -;; or the ssl functions. (See the "HACK" comment there.) -(require (except-in racket/tcp tcp-connect)) -(define tcp-connect (void)) (define-unit-from-context tcp@ tcp^) (define-compound-unit/infer url+tcp@ @@ -18,6 +31,9 @@ (provide (struct-out url) (struct-out path/param)) +(define current-https-protocol (make-parameter 'sslv2-or-v3)) +(provide current-https-protocol) + (provide/contract (string->url ((or/c bytes? string?) . -> . url?)) (path->url ((or/c path-string? path-for-some-system?) . -> . url?)) diff --git a/collects/tests/net/url-port.rkt b/collects/tests/net/url-port.rkt index 5c12e5bc81..526542258d 100644 --- a/collects/tests/net/url-port.rkt +++ b/collects/tests/net/url-port.rkt @@ -1,55 +1,67 @@ #lang racket (require net/url mzlib/thread + openssl tests/eli-tester) -(define ((make-tester url->port) response) - (define port-no 9001) - (define server-cust - (make-custodian)) - (parameterize ([current-custodian server-cust]) - (thread - (λ () - (run-server port-no - (lambda (ip op) - (thread (λ () (port->string ip))) - (display response op) - (flush-output op)) - +inf.0)))) - (sleep 1) - (dynamic-wind - void - (λ () - (port->string - (url->port - (url "http" #f "localhost" port-no - #t empty empty #f)))) - (λ () - (custodian-shutdown-all server-cust)))) +(define (run-tests scheme wrap-ports) + (define ((make-tester url->port) response) + (define port-no 9001) + (define server-cust + (make-custodian)) + (parameterize ([current-custodian server-cust]) + (thread + (λ () + (run-server port-no + (lambda (ip op) + (let-values ([(ip op) (wrap-ports ip op)]) + (regexp-match #rx"(\r\n|^)\r\n" ip) + (display response op) + (close-output-port op) + (close-input-port ip))) + +inf.0)))) + (sleep 1) + (dynamic-wind + void + (λ () + (port->string + (url->port + (url scheme #f "localhost" port-no + #t empty empty #f)))) + (λ () + (custodian-shutdown-all server-cust)))) -(define get-pure - (make-tester get-pure-port)) -(define get-impure - (make-tester get-impure-port)) + (define get-pure + (make-tester get-pure-port)) + (define get-impure + (make-tester get-impure-port)) + + (test + (get-pure + "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\n\r\n24\r\nThis is the data in the first chunk \r\n1A\r\nand this is the second one\r\n0\r\n") + => + "This is the data in the first chunk and this is the second one" + + (get-pure + "HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\n\r\nThis is the data in the first chunk and this is the second one") + => + "This is the data in the first chunk and this is the second one" + + (get-impure + "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\n\r\n23\r\nThis is the data in the first chunk\r\n1A\r\nand this is the second one\r\n0\r\n") + => + "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\n\r\n23\r\nThis is the data in the first chunk\r\n1A\r\nand this is the second one\r\n0\r\n" + + (get-impure + "HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\n\r\nThis is the data in the first chunk and this is the second one\r\n") + => + "HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\n\r\nThis is the data in the first chunk and this is the second one\r\n" + )) + +(run-tests "http" values) +(run-tests "https" (let ([ctx (ssl-make-server-context)]) + (ssl-load-certificate-chain! ctx (collection-file-path "test.pem" "openssl")) + (ssl-load-private-key! ctx (collection-file-path "test.pem" "openssl")) + (lambda (in out) + (ports->ssl-ports in out #:mode 'accept #:context ctx)))) -(test - (get-pure - "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\n\r\n24\r\nThis is the data in the first chunk \r\n1A\r\nand this is the second one\r\n0\r\n") - => - "This is the data in the first chunk and this is the second one" - - (get-pure - "HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\n\r\nThis is the data in the first chunk and this is the second one") - => - "This is the data in the first chunk and this is the second one" - - (get-impure - "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\n\r\n23\r\nThis is the data in the first chunk\r\n1A\r\nand this is the second one\r\n0\r\n") - => - "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\n\r\n23\r\nThis is the data in the first chunk\r\n1A\r\nand this is the second one\r\n0\r\n" - - (get-impure - "HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\n\r\nThis is the data in the first chunk and this is the second one\r\n") - => - "HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\n\r\nThis is the data in the first chunk and this is the second one\r\n" - ) \ No newline at end of file