net/git-checkout: support the smart HTTP(S) transport

Git-based hosting services most commonly support the smart HTTPS
protocol, which carries "git://"-format payload in a fairly straightforward
way. (Supporting the dumb protocol looks much more difficult.)
This commit is contained in:
Matthew Flatt 2014-10-15 09:12:09 -06:00
parent babd420293
commit bbf154ba36
2 changed files with 110 additions and 11 deletions

View File

@ -6,10 +6,11 @@
@defmodule[net/git-checkout]{The @racketmodname[net/git-checkout]
library provides support for extracting a directory tree from a Git
repository that is hosted by a server that implements @tt{git://}
protocol. The @racketmodname[net/git-checkout] library does not rely
on external binaries (such as a @exec{git} client) or native libraries
(such as @filepath{libgit}).}
repository that is hosted by a server that implements the @tt{git://}
protocol or the ``smart'' protocol over HTTP(S). The
@racketmodname[net/git-checkout] library does not rely on external
binaries (such as a @exec{git} client) or Git-specific native
libraries (such as @filepath{libgit}).}
When run as a program, @racket[net/git-checkout] accepts command-line
arguments to drive the checkout. Use
@ -22,20 +23,29 @@ for information on command-line arguments and flags.
[repository string?]
[#:dest-dir dest-dir path-string?]
[#:ref ref string? "master"]
[#:transport transport (or/c 'git 'http 'https) 'git]
[#:depth depth (or/c #f positive-exact-integer?) 1]
[#:quiet? quiet? any/c #f]
[#:tmp-dir given-tmp-dir (or/c #f path-string?) #f]
[#:clean-tmp-dir? clean-tmp-dir? any/c (not given-tmp-dir)]
[#:port port (integer-in 1 65535) 9418])
[#:port port (integer-in 1 65535) (case transport
[(git) 9418]
[(http) 80]
[(https) 443])])
void?]{
Contacts the @tt{git://} server at @racket[hostname] and @racket[port]
Contacts the server at @racket[hostname] and @racket[port]
to download the repository whose name on the server is
@racket[repository] (normally ending in @filepath{.git}). The tree
within the repository that is identified by @racket[ref] (which can be
a branch, tag, commit ID, or tree ID) is extracted to
@racket[dest-dir].
If @racket[transport] is @racket['git], then the server is contacted
using Git's native transport. If @racket[transport] is
@racket['http] or @racket['https], then the server is contacted using
HTTP(S) and the ``smart'' Git protocol.
A local clone of the repository is @emph{not} preserved, but is
instead discarded after the tree is extracted to @racket[dest-dir].
If @racket[dest-dir] does not exist, it is created. If

View File

@ -5,7 +5,8 @@
racket/port
racket/string
file/gunzip
openssl/sha1)
openssl/sha1
net/url)
;; Stefan Saasen's "Reimplementing 'git clone' in Haskell from the bottom up"
;; http://stefan.saasen.me/articles/git-clone-in-haskell-from-the-bottom-up/
@ -19,12 +20,16 @@
(define (git-checkout host
repo
#:dest-dir dest-dir
#:transport [transport 'git]
#:ref [ref "master"]
#:depth [given-depth 1]
#:quiet? [quiet? #f]
#:tmp-dir [given-tmp-dir #f]
#:clean-tmp-dir? [clean-tmp-dir? (not given-tmp-dir)]
#:port [port 9418])
#:port [port (case transport
[(git) 9418]
[(http) 80]
[(https) 443])])
(let retry-loop ([given-depth given-depth])
(define tmp-dir (or given-tmp-dir
(make-temporary-file "git~a" 'directory)))
@ -37,7 +42,7 @@
(log-git-checkout-info msg))
(status "Contacting ~a" host)
(define-values (i o) (tcp-connect host port))
(define-values (i o) (initial-connect transport host port repo))
((let/ec esc
(dynamic-wind
void
@ -70,6 +75,7 @@
#f])))
;; Tell the server which commits we need
(set!-values (i o) (want-step transport host port repo i o))
(for ([want-commit (in-list want-commits)]
[pos (in-naturals)])
(write-pkt o "want " want-commit (if (zero? pos) " " "") "\n"))
@ -77,8 +83,14 @@
(write-pkt o "deepen " depth "\n"))
(write-pkt o)
;; Tell the server that we're ready for the objects
(write-pkt o "done\n")
(set!-values (i o) (done-step transport host port repo i o))
(when depth
;; If we wrote `deepen`, then the server replies with `shallow`s
;; If we wrote `deepen`, then the server replies with `shallow`s.
;; Note that these were available before writing `done` in the
;; case of the 'git transport, but it works here for all transports.
(let loop ()
(define r (read-pkt i))
(cond
@ -90,7 +102,6 @@
(error 'git-checkout "expected shallow, got ~s" r)])))
;; Tell the server that we're ready for the objects
(write-pkt o "done\n")
(define nak (read-pkt i))
(unless (equal? #"NAK\n" nak)
(error 'git-checkout "expected NAK, got ~s" nak))
@ -147,6 +158,75 @@
(close-input-port i)
(close-output-port o)))))))
;; ----------------------------------------
;; Transports: git, http, and https
;; initial-connect: transport-sym string natural string
;; -> (values input-port output-port)
;; Contacts the server and returns an output port for writing
;; the request (ignored if not needed for the the transport)
;; and an input port from reading the available references
(define (initial-connect transport host port repo)
(case transport
[(git)
(tcp-connect host port)]
[(http https)
(define i
(get-pure-port
(string->url
(~a transport "://" host ":" port "/" repo
"/info/refs?service=git-upload-pack"))))
(define ok? #f)
(dynamic-wind
void
(lambda ()
(unless (regexp-match-peek #px#"^[0-9a-f]{4}#" i)
(error 'git-checkout (~a "response is not consistent with the Git protocol\n"
" initial portion: ~s")
(read-bytes 64 i)))
(define pkt (read-pkt i))
(define term-pkt (read-pkt i))
(unless (eof-object? term-pkt)
(error 'git-checkout (~a "expected a null packet, received something else\n"
" packet: ~s")
term-pkt))
(set! ok? #t)
(values i (open-output-nowhere)))
(lambda ()
(unless ok? (close-input-port i))))]
[else
(error 'git-checkout "unrecognized transport\n given: ~e" transport)]))
;; want-step: transport-sym string natural string input-port output-port
;; -> (values input-port output-port)
;; Replaces the connection, if appropriate to the transport, for
;; writing the wanted references.
(define (want-step transport host port repo i o)
(case transport
[(git) (values i o)]
[(http https)
(close-input-port i)
(values (open-input-bytes #"") (open-output-bytes))]))
;; done-step: transport-sym string natural string input-port output-port
;; -> (values input-port output-port)
;; Replaces the connection, if appropriate to the transport, after
;; writing the wanted references and before reading the server's
;; response.
(define (done-step transport host port repo i o)
(case transport
[(git) (values i o)]
[(http https)
(define s (get-output-bytes o))
(define i
(post-pure-port
(string->url
(~a transport "://" host ":" port "/" repo
"/git-upload-pack"))
s
(list "Content-Type: application/x-git-upload-pack-request")))
(values i (open-output-nowhere))]))
;; ----------------------------------------
;; parse-server-capabilities : bytes -> (listof string)
@ -678,9 +758,17 @@
(define depth 1)
(define ref "master")
(define tmp-dir #f)
(define transport 'git)
(define-values (host repo dest)
(command-line
#:once-any
[("--git") "Use the Git transport (the default)"
(set! transport 'git)]
[("--http") "Use the \"smart\" HTTP transport"
(set! transport 'http)]
[("--https") "Use the \"smart\" HTTPS transport"
(set! transport 'https)]
#:once-each
[("--depth") d "Commit depth of <d> (default is 1, 0 means \"all\")"
(set! depth (string->number d))
@ -694,6 +782,7 @@
(values host repo dest)))
(git-checkout host repo
#:transport transport
#:dest-dir dest
#:tmp-dir tmp-dir
#:ref ref