#lang meta/web (define-context "stubs/git" #:robots (add-newlines (for/list ([d '(plt libs testing play)]) @list{Disallow: /@|d|/}))) (provide git) (define git @page[#:title "Development Repository" #:file ""]{ This is a stub page to get the header for the gitweb server.}) ;; ---------------------------------------------------------------------------- ;; gitweb stuff (define header+footer (lazy (cdr (or (regexp-match ;; extract just the meat between the ... #rx"]*>(.*?){{{BODY}}}(.*?)" (xml->string @page[#:id 'git #:html-only #t "{{{BODY}}}"])) (error 'gitweb-skeleton "internal error"))))) (define header @plain[#:file "header.html" (car (force header+footer))]) (define footer @plain[#:file "footer.html" (cadr (force header+footer))]) (define gitweb-logo (copyfile (in-here "gitweb-logo.png") "tiny-logo.png")) (define home-text (lazy @text{ @p{This is the Racket git server.} @p{See the "brief", PLT-oriented @intro{introduction to git}.}})) (define home-file @plain[#:file "home-text.html" home-text]) (define gitweb-config @plain[#:file "gitweb_config.perl"]{ our $projectroot = "repos"; @|| # used in the page's title tag our $site_name = "Racket Repository"; # header/footer and home text html files our $site_header = "header.html"; our $site_footer = "footer.html"; our $home_text = "@(regexp-replace #rx"^.*/" (home-file) "")"; push @"@"stylesheets, "@(the-resources 'style-path)"; @|| our $favicon = "@(the-resources 'icon-path)"; our $logo = "@gitweb-logo"; our $logo_url = "http://racket-lang.org/"; our $logo_label = "Racket Homepage"; @|| # specification of visible repositories (managed by gitolite) our $projects_list = "plt-repos"; our $projects_list_description_width = 80; our $default_projects_order = "age"; # hide repositories that are not listed in the above our $strict_export = 1; @|| # show repository only if this file exists our $export_ok = "git-daemon-export-ok"; @|| # respond with "server busy" above this our $maxload = 4; @|| # label for the "home" link our $home_link_str = "root"; @|| $feature{'pathinfo'}{'default'} = [1]; $my_uri = "http://git.racket-lang.org/"; $home_link = "http://git.racket-lang.org/"; # ? $feature{'javascript-actions'}{'default'} = [0]; @|| # some smaller features, usually overrideable by individual repos $feature{'grep'}{'default'} = [1]; $feature{'grep'}{'override'} = 1; $feature{'blame'}{'default'} = [1]; $feature{'blame'}{'override'} = 1; $feature{'pickaxe'}{'default'} = [1]; $feature{'pickaxe'}{'override'} = 1; $feature{'snapshot'}{'default'} = ['zip', 'tgz']; # $feature{'snapshot'}{'override'} = 1; $feature{'avatar'}{'default'} = ['gravatar']; # $feature{'avatar'}{'override'} = 1; }) ;; ---------------------------------------------------------------------------- ;; git "guide" (define intro (let () (define (cmd . text) (span class: "code" text)) (define (path . text) (span class: "path" text)) (define (man name . text) (a href: (list "http://www.kernel.org/pub/software/scm/git/docs/" (and name (list name ".html"))) (if (null? text) (span class: "man" name) text))) (define (selflink . url) (a href: url url)) (define git-host "git.racket-lang.org") (define at-racket "@racket-lang.org") (define at-git-racket "@git.racket-lang.org") (define (npre . text) (apply pre style: "margin-left: 0;" text)) (define style @style/inline[type: 'text/css]{ .p { display: block; margin: 1em 0; @; text-indent: 1em; } .code, .path, .man, pre { font-family: monospace; font-size: large; font-weight: bold; background-color: #eeeeee; } .code, .path, .man { white-space: nowrap; } .the_text a:link, .the_text a:visited { text-decoration: underline; } .the_text pre { margin-left: 2em; padding: 0.6em 0 0.6em 0.6em; } .the_text ul, .the_text ol, .the_text dl, .the_text li, .the_text dt, .the_text dd { margin-top: 1em; margin-bottom: 1em; }}) ;; xhtml strict doesn't allow lists inside

, so fake our own paragraphs ;; using divs: (define p* (make-separated-tag values (λ text (apply div class: 'p text)))) @page[#:title "git intro" #:extra-headers style]{ @sections[#:newpages? #t] @div[class: 'the_text]{ @section{Getting git} @p*{ I @strong{highly} recommend getting a new git installation. Git itself is pretty stable (that is, you probably will not run into bugs with whatever version you have installed), but there are many usability related improvements. Specifically, I am using 1.7.x and it is likely that some things in this document are specific to that version. @~ You can @a[href: "http://git-scm.com/download"]{download a recent version}, available in binary form for several popular platforms. In addition to these, you can get a build for @ul*{ @~ Ubuntu: @pre{sudo add-apt-repository ppa:git-core/ppa sudo apt-get update sudo apt-get install git-core} @~ OSX using macports: @pre{sudo port selfupdate sudo port install git-core +svn}} (For OSX, you can also get @a[href: "http://gitx.frim.nl/"]{@cmd{GitX}} — it's a good gui front-end for git, similar to @cmd{gitk} and @cmd{git gui}.) @~ You can also build git from source is — here are the steps that I'm using to install a new version: @pre{GVER=1.7.10 BASE=http://git-core.googlecode.com/files TARGET=/usr/local cd /tmp; curl $BASE/git-$GVER.tar.gz | gunzip | tar xf -; cd git-$GVER make prefix=$TARGET all && sudo make prefix=$TARGET install} If you do this and you want the @man[#f]{man pages} too, then getting the pre-built man pages is the easiest route (building them requires some “exotic” tools): @pre{cd $TARGET/share/man curl $BASE/git-manpages-$GVER.tar.gz | gunzip | sudo tar xf -}} @section{General git setup} @p*{ Commits to a git repository are done locally, so git needs to know who you are. (Unlike subversion, where you need to identify yourself to be able to talk to the server, so the commit object is created there based on who you authenticated as.) To get git to know you, run the following two commands: @pre{git config --global user.name "My Name" git config --global user.email "foo@at-racket"} This sets your @em{default} name and email for @em{all} repositories — it stores this information in @path{~/.gitconfig} which is the global configuration file, used for such defaults. You can edit this file directly too — it is in a fairly obvious textual format. There is a lot that can be configured, see below for some of these (and see the @man{git-config} man page for many more details). @~ In addition to this file, each repository has its own configuration file (located at @path{.git/config}). Whenever git needs to check some option, it will use both the repository-specific config file (if you're in a repository) and the global one. The @cmd{--global} flag above tells git to set the option in the global file. Note that a configuration file cannot be part of the repository itself — so when you get a repository, you still need to do any local configuration you want. (This is intentional, since the configuration file can specify various commands to run, so it avoids a major security hazard.) @~ Important: this sets your default identity name and email for @em{all} repositories. This may be a problem if you want to commit to different git repositories under different identities. See the section on customizing git below for more details on this.} @section{SSH setup} @p*{ Since git is a distributed system, you can do everything locally on your own repository, but obviously, the goal is to communicate with other people so you'll need to push these changes somewhere else. The most popular way to communicate with remote repositories — including repositories on the PLT server, is via ssh. (Access is controlled via a tool called “gitolite” — more on this below.) The username and hostname of the server is @cmd{git@at-git-racket} — and you should be able to connect to this account using the ssh identity key that corresponds to the public key that you use with the git server. To try it, run @pre{ssh git@at-git-racket} and the server (gitolite, actually) should reply with information about your current permissions. The exact details of this is not important for now, just the fact that you were able to connect and get some reply. @~ Using an ssh configuration file (usually @path{~/.ssh/config}), you can set up a short name for the server. For example, you can have this: @pre{Host pltgit HostName @git-host User git} and now you can simply use @cmd{ssh pltgit info} instead of the last example: @cmd{ssh} will know that @cmd{pltgit} is actually defined as @cmd{git@at-git-racket}. @~ This is the @strong{preferred} way to set things up: besides being more convenient in that you need to type less — it is also a useful extra level of indirection, so if the server settings ever change (for example, we might switch to a non-standard port number), you can simply edit your ssh config file, and continue working as usual. In addition, such a configuration is needed if you created a specific ssh identity file to be used with git — specifying an alternative identity file on the @cmd{ssh} command line is possible (an @cmd{-i} flag, in the case of openssh), but remember that most of your interactions with the remote server are done implicitly through git. (It is possible to configure how git invokes ssh, but it is much easier to just configure ssh). In this case, you will have: @pre{Host pltgit HostName @git-host User git IdentityFile ~/.ssh/my-plt-git-identity-file} @~ In addition to an ssh configuration file, git also has a way to create prefix shorthands. For example, if you use this configuration: @pre|{git config --global url.git@foo.org:.insteadOf foo:}| then whenever git expects a repository URL, it will replace @cmd{foo:} with @cmd|{git@foo.org:}|, for example: @pre{git clone foo:bar} While it is possible to use this instead of an ssh config file to access the @cmd{plt} repository, the former is preferable. The reason for that is that you will also interact with the server directly via ssh commands (described in the following section). Keeping the alias in your ssh configuration means that you will use the same alias for both @cmd{git} commands and other @cmd{ssh}-based commands. You may still want to use it for other servers, specifically, here is a popular setup for github (this is configuration text that you can paste into your global @cmd{.gitconfig} file): @pre|{[url "git://github.com/"] insteadOf = github: [url "git@github.com:"] pushInsteadOf = github: pushInsteadOf = git://github.com/}| It translates @cmd{github:} to a github read-only @cmd{git://} URL, and it translates pushes to the same prefix to use github's ssh URLs. Note that it also translates the read-only @cmd{git://} url to an ssh url for pushing. (The same setup can be used for @cmd{gist.github.com}, to deal with github gists via @cmd{git}.)} @section{Gitolite: the server's gateway} @p*{ All access to the PLT server is done via @cmd{ssh}, and this is where gitolite comes in as the “who can do what” manager. What actually happens on the server is that no matter what command you're trying to run (as you usually would, for example: @cmd{ssh somewhere ls}), the server has settings that make it always run its own command — and that is a gitolite script. The script knows the command that you were actually trying to run, and it will reply based on that. In the above ssh example, you're not specifying any command (so if it wasn't for the pre-set gitolite script, you'd be asking for a remote shell to start), and gitolite responds by telling you about your permissions. @~ This is actually the @cmd{info} command, so you get the same reply with @cmd{ssh pltgit info}. Again, this connects to ssh and tries to run @cmd{info}; gitolite sees that you're trying to run @cmd{info}, and instead of running it, it responds with that information. There are a few additional commands that you can use this way — these are all “meta commands” in the sense that you're not interacting with a git process on the other end, but rather get gitolite to perform various tasks on your behalf. You can run the @cmd{help} command (@cmd{ssh pltgit help}) to see a list of available commands. They are mostly useful in dealing with your private repositories on the server, which will be discussed further below.} @section{A (very) quick introduction to git} @p*{ This is a quick description; see the last section for more resources (specifically, @a[href: "http://eagain.net/articles/git-for-computer-scientists/"]{ Git for Computer Scientists} covers these basics well). Understanding how git models and stores data will make it significantly easier to work with it. @~ A git repository is actually a database of a few kinds of objects, which form a DAG. There are only a few of these kinds of objects, and they are all addressed by the SHA1 checksum of their contents. You will generally see a lot of these SHA1 strings (40 hexadecimal characters), since they form a kind of a universal address for such objects. (For convenience, any unique prefix of a SHA1 can be used with git commands when you need to refer to it.) Whenever the following descriptions mention a pointer — this is actually such a SHA1 hash.} @ul*{ @~ A @em{blob} object is a generic container for any information, which (usually) represents a file. This object has no pointers to any other objects. It does not have anything except for the actual contents: no name, permission bits, etc. @~ A @em{tree} object represents a directory hierarchy: it contains a list of names, and for each name a pointer to the object that is its contents. Some of these will point at blobs (when the tree contains a file), and some of these will point at other trees (when it contains a sub-tree). (These objects are similar to directories in a file system in that they contain all “meta” information on files: their names and permission bits are kept here.) @~ A @em{commit} object represents a versioned snapshot of a tree, forming a line of work. It has the following bits of information: @ul*{@~ tree: a pointer to the tree object that was committed @~ parent: a pointer to the previous commit, which this one revised @~ author: the identity of the commit author (name, email, date) @~ committer: the identity of the committer @~ the text of the commit message (which can be arbitrarily long)} The parent field is actually any number of parents: there will be no parents if this is the first commit in the line of work, or more than one parent if this is a “merge” commit that merges two lines of work. Furthermore, there is nothing that prevents a git repository from having completely separate lines of work — in fact, you can have several independent projects contained in your repository. @br @small{(Note that git distinguishes the author of a commit from the person who actually performed the commit, for example — a patch could be created by X, and sent to Y to be committed.)} @~ Finally, there is a @em{tag} object, which is very roughly a pointer to another object (almost always a commit), and is not important for now.} @p*{ The fact that all of these objects are addressed by the SHA1 hash of their contents has some immediate important implications.} @ul*{ @~ Since SHA1 are cryptographic checksums, they can be considered @em{unique} for all practical purposes. @~ The git repository is inherently hash-consed: you can never have “two identical files” in git — because a file is stored at its SHA1 hash, two identical files will always be stored once. (Note that the name of a file is stored in the tree that contains it, so the SHA1 of the contents does not depend on it.) The same holds for two trees: if you have two identical directories (same contents of files, same names, etc), then there will actually be only one tree stored in the repository. @~ Furthermore, these addresses are actually global: any two repositories that hold a file with the same contents will have it at the exact same SHA1 hash. (For example, if I have a repository that contains several projects, and each project contains several copies of the same LGPL text, then I'll have only a single blob object with that contents.) This is not only making the store efficient, it also makes it possible to refer to an object by its hash — for example, you can refer to the SHA1 of a specific file at a specific version in an email, and this will have the exact same meaning for anyone that reads the file (eg, anyone can run @cmd{git show @i{SHA1}} to see that file). (This does require that the readers have the actual object in their repository, of course — but no mistakes can happen, statistically speaking.) @~ This holds for commits too: since a commit has the SHA1 of the tree it points to, then the commit SHA1 depends on the tree it points to. More importantly, since a commit object has the SHA1 of its parent(s), then the commit depends on them. This means that “replaying” a number of commits on a different parent commit (eg, when doing a “rebase”) will always result in a separate line of commit objects. These SHA1s are also global, meaning that talking about a specific revision by its SHA1 will always refer to it unambiguously (as long as others have that object in their repositories). @~ By itself, this kind of storage @em{cannot} have any reference cycle. (At least there is no practical way to get one.) The storage is therefore inherently a DAG. In addition to this object store, git does have a number of external references (eg, a branch is actually a pointer to a SHA1) — and those could be arbitrary, but the object storage itself cannot have cycles. @~ The fact that a commit has a pointer to a tree is what makes git keep revisions of the whole tree — a commit cannot mark a change to a subtree. (At least not with the usual higher-level commands that git implements.)} @p*{ On top of this object store, there is a layer of meta-information about it. The most important component here are branches (and tags). A branch is basically a file that has the SHA1 of a specific commit (for example, your @cmd{master} branch is a SHA1 that is stored in @path{.git/refs/heads/master}). This is what makes branch creation extremely cheap: all you need to do is create a new file with the SHA1. @~ In addition, the @cmd{HEAD} (where your working directory is currently), will usually have a symbolic reference rather than a SHA1 (you can see this symbolic reference in the @path{.git/HEAD} file, which should usually look like @cmd{ref: refs/heads/@i{branch-name}}). When you commit a new version, a new commit object is created, and the branch that the @cmd{HEAD} points to is updated. It is also possible to checkout a specific SHA1 of a commit directly — the result of this is called “detached HEAD”, since the HEAD is not a symbolic reference. The possible danger in doing this is that @cmd{git commit} will create new commits that are derived from the one you're on, but no branch is updated; if you later checkout something else, no reference is left to your new commit which means that it could be lost now. For this reason, if you checkout a SHA1 directly, git will spit out a detailed warning, including instructions on how you could name your current position (create a branch that points there). @~ Tags come in two flavors: lightweight tags are SHA1 pointers like branches. The problem with this is that such a tag could easily move to a different commit, which is considered bad practice. For this reason, there are also “annotated tags”, which are tag objects that are created in the object store. These tags contain information that is similar to a commit (there's the tagger's identity, the commit that it points to, and a log message) — and they are reliable since you can refer to their SHA1. In this case, the symbolic reference for such a tag (its name) will point to the tag object in the store (it is also possible to move it, but that would also be bad practice). Furthermore, tags (of both kinds) can point to any object in the store — they can point to a tree or even to a specific blob. This is sometimes used to store meta-information (eg, web pages) inside the repository. (The repository for git itself has a tag that points to a blob holding the maintainer's GPG key.) @~ Note that all of this is under a more high level of managing information between branches and repositories, with push/pull being the main operations at that level. A high-level overview (more below): @ul*{ @~ a branch is a line of development, represented as a pointer to the commit at its tip; @~ branches can be organized into hierarchies using @path{/} as a separator; @~ some branches are local, and some are remote — remote ones are named @path{remotes/origin/@i{branch}}; @~ local branches are represented as files in @path{.git/refs/heads/@i{branch}} and remote ones are in @path{.git/refs/remotes/origin/@i{branch}}; @~ @cmd{origin} is just the conventional name for the original repository you cloned — later on you can add new remote repositories so you can push and pull to/from them conveniently; @~ some local branches are set to track remote ones, usually (but not necessarily) the two will have the same name; @~ you can also have local branches to track other local branches (with pushing and pulling happening inside your repository); @~ @cmd{git fetch} is used to update your remote branches — ie, connect to the remote repository, get new commits (and the required parents and trees), and update your remote branch with the new tips; @~ @cmd{git merge} and @cmd{git rebase} are used to update one branch with commits on another; @~ @cmd{git pull} is, roughly speaking, a convenient way to do a fetch followed by a merge (or a rebase, when used with @cmd{--rebase}).} @~ There are several git tools that are relevant here. These are @em{not} commands that you need to know for everyday use — so you can ignore this part. It's only relevant if you want to see more of the low level structure (or maybe if you want to write code that interfaces with a repository at this level).} @dl*{ @~ @cmd{git show @i{SHA1}} @~ Show the object, in some appropriate way based on the type of the object. (For blobs it shows the contents, for trees you get a listing of its contents, and for commits it shows the log and the patch.) @~ @cmd{git cat-file {-t | -s | @i{type} | -p} @i{SHA1}} @~ A more low-level command that tells you the type/size of an object (@cmd{-t} and @cmd{-s}), or shows the contents of an object as-is when given a type. @cmd{-p} will “pretty-print” the object, eg, showing the contents of a tree object instead of dumping its binary encoding. @~ @cmd{git gc} @~ Starts from a rootset holding all known references (branches, tags, etc), and collects dangling objects. Such objects are generated due to various reasons — for example, rebasing means that new commits are generated, and the old ones are kept around. Actually, this will not remove recently referenced objects — there is a protection mechanism that keeps them around for a while, so if you somehow mess things up there is still a way to recover. @~ @cmd{git fsck} @~ Does a “file system check” on the repository. @~ @cmd{git rev-parse @i{symbolic-name}} @~ Prints out the full SHA1 of a symbolic name (eg, a branch name or a tag name). Will also print out the SHA1 given a possibly short prefix of one. (Actually, this command can also show other information about a repository, which makes it an important entry point for programs that deal with a repository.)} @section{Clone the PLT repository} @p*{ As you probably know by now, in git you don't checkout a repository — you clone it, getting a copy of the complete repository you cloned. This includes the object store and the various references (branches and tags). There are several ways to get the PLT repository, but the one that is relevant to work on it is to do so through ssh — since this allows pushing changes back to the server. (It is also possible to clone from one place and push to another, but if you start with cloning through ssh your clone will be already set up to push changes back.) The information that gitolite gives you (with @cmd{ssh pltgit info}, assuming the above ssh setup) includes two repositories that you have write access to: @cmd{plt} is the main repository, and @cmd{play} is setup similarly (intended to try things out, see the “Fooling around” section below). To get the main repository, run @pre{git clone pltgit:plt} which will create a @path{plt} directory with your new clone. You can now start working in this directory. @~ The repository is also available from other sources, some can be used for read-only cloning: @ul*{ @~ @cmd{git clone git://@|git-host|/plt.git}@br cloning the repository using git's own network protocol @~ @cmd{git clone http://@|git-host|/plt.git}@br clone the repository over http @~ @cmd{git clone http://github.com/plt/racket.git}@br this uses the repository mirror on github, which is automatically kept in sync (you can also use @cmd{https://...})} and some present a web interface for additional information: @ul*{ @~ @cmd{@selflink{http://@|git-host|/plt}}@br a web interface to inspect the repository @~ @cmd{@selflink{https://github.com/plt/racket}}@br github's fancier web interface}} @section{Start working: git commits vs subversion commits} @p*{ As seen in the previous section, you start with @pre{git clone pltgit:plt@";" cd plt} And now you get to actually do some work. @~ For the normal cycle of operations, working with git is not all that different from working with subversion — you would change some files, and then: @pre{git commit some/paths} or @pre{git commit some/paths -m "add some feature" -m "requires another"} only now the commit lives only in @strong{your clone only}, not in the server (which is why committing is blindingly fast, not requiring a network connection). To push your commits to the server, run @cmd{git push}, and to pull updates from the server run @cmd{git pull}. This is obviously very much oversimplifying the process: mainly neglecting to talk about updates on the server when you already have local changes. (See below for a more detailed explanation.) Note that in these examples I'm explicitly specifying the paths to commit, either the files that you want to commit or a directory where you want to commit all changes. See the section below on the “staging area” for more details. @~ One major difference to keep in mind is that git commits are @strong{not} like subversion commits. (This is confusing since many places that discuss the difference between the two and/or try to teach git to subversion users almost always work under the assumption that commits in the two systems are the same.) The thing is that git commits are done at a finer level than subversion commits — since a commit is done locally and not on the server. To really imitate how subversion works, you would push all commits right after you create them — essentially equating commits with pushes, which is how you work with a subversion repository. But by just @em{not} doing this, you will immediately get some of the benefits that git gives you. So a better way to think about it is: in git you commit at points that make sense for the respective changes, usually at a finer level than subversion commits. Then, you push back a bunch of commits to the server — whether one or a hundred. The point where you push your changes to the server is effectively the point where you decide that you're in a good enough state to make your work public. @~ Incidentally, following this intuition, drdr is running a build for every push to the server — not for every commit. When you push to the server, it will tell you which push number this is — these numbers are going to be used by drdr, and they (very!) roughly correspond to subversion commits. (Currently, every push gets a number, but in the future this might be used only for pushes to the master branch.) There's no plan at the moment to use these numbers for anything else.} @section{Fooling around with git} @p*{ Experimenting with git is easy to do, and the server is set up to make it even easier. You can use one of the following ways to experiment safely with the main repository: @ul*{ @~ There is a @cmd{play} repository on the server. This repository is very similar to the @cmd{plt} repository, and it is set up in the same way that @cmd{plt} is. Feel free to destroy it in any way you want, even if it becomes unusable, it's easy to just recreate it. @~ You can create (and later delete) your own repositories — including making your own copy of the main repository, an operation that is known as “fork”. Your fork will be created efficiently (ie, creating a fork of the @cmd{plt} repository is cheap), but any changes made to it will not affect the main repository. A fork is created with a gitolite command, and once it's there you can clone it and eventually delete it. Here are the relevant commands — use your actual username in place of @cmd{$user} (or have @cmd{$user} set to your username): @ol*{@~ @cmd{ssh pltgit fork plt $user/myplt} @~ @cmd{git clone pltgit:$user/myplt} @~ ...play with this clone, push, pull, etc... @~ @cmd{ssh pltgit delete $user/myplt}} More on user repositories below.}} @section{The staging area} @p*{ Something that tends to confuse people is git's “staging area”. This is a concept that is unique to git — roughly speaking, you can have three versions of a tree: @ul*{ @~ the files that you actually see (and edit) — the working directory, @~ another is the staging area which you add stuff to from the working tree using @cmd{git add}, @~ and then there is the tree that is in the HEAD with all prior versions.} @~ The thing that can confuse here is that when you @cmd{git add some/file} for a file that you edited (or created) and then edit it further, then the version will get committed by a plain @cmd{git commit} will be the one that was added. Note that @cmd{git status} will tell you which modifications are in the staging area waiting to be committed, and which modifications are in your working directory — in the given example, it will tell you that @path{some/file} is in both. @~ The staging area can be useful at times, but most likely at the beginning stages you will want to just avoid it. The good news is that it is easy to do so.} @dl*{ @~ @strong{Avoid the @cmd{-a} flag}. @~ @p*{ Before we see how to ignore it, note that there are many web pages that will tell you to use @cmd{-a} with the commit command. This will make git commit all changes to tracked files — @strong{including tracked files that are outside of your current directory}, and this can make you commit changes that you didn't intend to commit.} @~ @strong{Always specify a path to @cmd{git commit}}. @~ @p*{ The easiest way to avoid the staging area is to specify the path(s) to what you want to commit, possibly @path{.} for all changes in the current directory (and below). Specifying a path this way will make @cmd{git commit} behave very similarly to subversion: tracked files that were modified will get committed, and added files (with @cmd{git add}) that are listed in the paths-to-be-committed are also committed. Tracked and added files that are not listed (and not in a specified subdirectory) are left as-is. So, if you had a habit of doing this with subversion (@cmd{svn commit .}), then git will essentially do the same. You will still need to use @cmd{git add} for newly created files, but this is essentially the same as with subversion. @~ It is also possible to “make up new git commands” for yourself. See the following section on the subject: it adds a new @cmd{git ci} command that passes @path{.} to @cmd{git commit}, similarly to what @cmd{svn ci} does by default.} @~ It is a good idea to @strong{avoid using the @cmd{-m} flag}, until you're more comfortable with git. @~ @p*{ Let git pop up an editor to write the commit log: the file that you will edit will list the changes that you are about to commit as well as changes that you are not going to commit. Glancing through it, you will see changes that you missed, furthermore, the paths are relative which makes it easy to quickly distinguish paths in the current directory and outside of it (the latter will begin with @path{../}). If you see any problems, just make sure that you have no commit message in the editor and when you quit it git will abort the commit (same as subversion).} @~ @strong{Don't push out all commits to the server immediately}. @~ @p*{ Even if you did commit something by mistake, it is possible to undo the commit — run @cmd{git reset HEAD^}, which will undo the last commit (it moves the branch to the parent commit), and the changes that are no longer committed will be left in your working directory, so you get to try the commit again. Note that this is possible @strong{only if you didn't push out the commit} that you're undoing — if you did, then the server will later not allow you to push changes that are not strict extensions of what it has (since this is likely to confuse other people who already got your commit). @~ So in general, remember that you can commit often, and commit when it makes sense to do so, and push commits out only when you're done with whatever you were working on. Consider your local git history as something that you have full control over: you can undo commits and redo them (in fact, @cmd{git commit --amend} does just that: undo the last commit, and combine it with new changes — it's a solution for “oops commits”), you can rebase them, and you can just throw away everything and start from scratch. But when you do push your history out, the party is over, and any mistakes will need to be rectified in further commits (eg, you can no longer use that @cmd{--amend} flag, you have to do an “oops commit”). @~ (BTW, strictly speaking, it is only the policy on our server that forbids such rewritten history — since this is likely to be a mistake for now, and if it happens most people will be confused about what needs to be done.) @~ Also, as said above, pushing all commits immediately means that you're essentially restricting yourself to the same mode of operation as subversion. Same mode, but more complicated — and you won't enjoy any of the benefits, which will guarantee that you will suffer.}} @section{Configuring and extending git} @p*{ As mentioned above, git uses several configuration files that customize various aspects. The two important ones are your global file (@path{~/.gitconfig}) and a per-repository file in @path{.git/config} at the repository root — whenever a value is needed, git will first consult the repository configuration, and if the option is not set there it will try your global version. (It will also look at a system-wide configuration, but this is irrelevant here.) Configuration options names are separated by a @path{.}, and configuration files have a simple syntax, with @cmd{foo.bar} option listed as a @cmd{bar = some value} line in a @cmd{[foo]} section. Note that you can set @em{any} configuration you want to, no restrictions. This can be useful for customizing various extensions, including scripts that you may want to write (for example, the git server has a script that checks the @cmd{hooks.counter} option to know if it should keep track of pushes). This is facilitated by several options to @cmd{git config} which makes it easy to query configurations from scripts etc. @~ To edit your configuration options you can either use the @cmd{git config} command, or you can edit the file directly. When @cmd{git config} changes the file it rewrites only part of the file and leaves the rest untouched, which conveniently leaves your own format and any comments you might want to include. To set a value through the command and then get it: @pre{git config foo.bar some-value git config foo.bar} and you can add a @cmd{--global} flag to either form to use only your global configuration file. There are many other options — for dealing with keys that must be booleans or integers, for keys with multiple values, etc. The @man{git-config} man page will tell you much more on this. @~ The man page also lists the configuration options that customize various functionalities. Here are some important ones that you should consider setting (each listed as a command that sets it globally): @ul*{ @~ @npre{git config --global user.name "My Name" git config --global user.email "foo@at-git-racket"} @p*{ As said in the beginning of this text, you will likely want to set a default username and email for yourself. But note that if you do set this globally, it will be your default identity for all repositories. This makes sense only if you commit to PLT-related repositories, but it can be confusing if you're also committing to some other non-PLT-related repositories and want to commit under a different email (or name) — for example, you may want to commit to a public project with a gmail address, and to a departmental repository with your @cmd|{foo@cs.bar.edu}| email. @~ You could set the racket-lang.org identity locally in your PLT clone or you could set your other identity in the other repository, but in any case you should be aware of this and avoid letting git guess your name and email. (Some confusion is likely to happen anyway, and git has a way to “map” some name/email to another when mistakes happen.)} @~ @npre{git config --global push.default tracking} @p*{ By default, when you run @cmd{git push}, git will push all branches that correspond to branches in the remote repository. This can be surprising if you're working on several branches since it will push them all out. Setting this option to @cmd{tracking} will make git push the current branch to the branch it is tracking. @~ Another option for this is @cmd{current}, which makes @cmd{git push} always push the current branch to the remote it was cloned from. This is convenient in that you never need to set up how local branches track remote ones — it's as if all local branches @em{always} track all remote branches under the same name. For example, after you clone an empty repository (see the user repositories section below), a @cmd{git push} will push a master branch remotely — whereas with @cmd{tracking} you need to have the first push explicitly specify the branch to push, usually @cmd{git push origin master} (this sets things up so later you can just run @cmd{git push}). However, using @cmd{current} you can no longer push from one local branch to another local branch it is set to track. @~ So a possible conclusion here is that you should use @cmd{tracking}, unless you plan on branches to always track remote branches by the same name. @cmd{tracking} is often preferred over @cmd{current}.} @~ @npre{git config --global core.excludesfile "~/.gitignore"} @p*{ You'll probably want to always ignore a number of common patterns, like backup files or OSX @path{.DS_Store} files. To do this, you first set a default file as shown here (note that @path{~} is quoted, and git will expand it to whatever your home directory is). If you have this setting, you can then create a file at this path with patterns for files that you always want to ignore. This file has shell-patterns (and possibly @cmd{#}-comments) — for example: @pre{# backups *~ # autosaves (note the #-quoting) \#* # OSX junk .DS_Store} (See the @man{gitignore} man page for a few more details.) @~ In addition to this, git repositories can have their own @path{.gitignore} files (unlike @path{.git/config} files), which are combined hierarchically together with this global option. In fact, you don't really need to set the above ignores for the PLT repository since they're already included in its toplevel @path{.gitignore} file — but doing so is still a good idea since you're likely to work on other repositories too.} @~ @npre{git config --global core.editor emacs git config --global core.pager less} @p*{ These two settings are used to tell git which command to use for editing log messages, and which command is used to paginate output. (The former might already be set in your environment as the value of the @cmd{EDITOR} variable.) If you set the latter to @cmd{cat}, git will just spill all output directly. In addition to these, you can also control which individual commands use a pager, for example, to disable the pager for @cmd{git log}, you can do this: @pre{git config --global pager.log false}} @~ @npre{git config --global color.ui auto @i{...} git config --global color.branch.current yellow red bold git config --global color.branch.local yellow git config --global color.branch.remote green @i{...}} @p*{ These settings control how git uses colors: whether it shows them, and which colors it will use for various outputs. There are many of these settings, which you can find in the @man{git-config} man page.} @~ @p*{ @cmd{remote.origin.*}, @cmd{branch.master.*} @~ Git keeps track of what the @cmd{origin} repository and how branches track other branches in the configuration file too. (You will have such entries for all known remote repositories and branches.) Usually you set these values (often implicitly) via various git commands — but you might want to look in your configuration file if you want to tweak things yourself. Note that for configuration names with more than two parts, the section name will something like @cmd{[remote "origin"]}.} @~ @p*{ @cmd{sendemail.identity}, @cmd{sendemail.from}, @cmd{sendemail.bcc}, @cmd{sendemail.suppresscc}, ... @~ These settings configure @cmd{git send-email}, which is used to send patches from your repository elsewhere. You will probably want to customize them if/when you get to use this facility often. (See below.)}} @~ In addition to these settings, you can extend git with your own aliases and commands. Aliases are stored in your git configuration — so you can use @cmd{git config} to create an alias, for example, @pre{git config --global alias.up "pull --stat --all"} creates a global @cmd{git up} command which is actually a short alias for running @cmd{git pull --stat --all}. @~ @strong{Notes about aliases:} @ul*{ @~ To edit aliases, it is more convenient to edit your configuration file directly. @~ Since aliases are stored in git configuration files, they can be made local to each repository. @~ When command-line arguments are given to the alias, they will be appended to the alias text. @~ Aliases @em{cannot} override git commands; this is intentional, to avoid scripts breaking due to modified commands. @~ An alias that starts with a @cmd{!} character will be run as a shell command. For example, you can use @pre{k = "!gitk -d"} to make @cmd{git k} run the gitk program with the @cmd{-d} flag. @~ Some aliases that I find useful are: @pre{# satisfy the "up instinct" up = pull --ff-only --stat --all # quick status, similar to what subversion shows st = status -s # we will be dealing more with branches br = branch}} @~ In addition to aliases, you can create new git commands using a script that is called @cmd{git-@i{something}} somewhere in your path. (Note that these cannot override known git commands either.) Such commands will be available as @cmd{git @i{something}}. One use for this is using our facility for managing file properties — the @path{collects/meta/props} program. To do this put this in a file called @cmd{git-prop} somewhere in your PATH, for example, @path{~/bin/git-prop}: @pre|{#!/bin/sh top="$(git rev-parse --show-toplevel)" || exit 1 exec "$top/collects/meta/props" "$@"}| then run @cmd{chmod +x ~/bin/git-prop}, and you can now use it as a git command (try @cmd{git prop -h}). Note the use of the @cmd{rev-parse} command: it will display the repository root, which means that you will get to run the props script of the repository you're @em{currently} using. (There are many git commands that are useful for such scripts.) @~ Another useful script is @cmd{git-ci} which mimics the behavior of @cmd{svn commit} (avoiding confusion with the staging area). As said above, a good way to achieve this is to specify the current directory (@path{.}) if you don't specify any other path. If you save this as @path{git-ci}, you will get a @cmd{git ci} that does just that: @pre|{ #!/bin/sh add_dot=yes; for p; do if [ -e "$p" ]; then add_dot=no; fi; done if [ -e "$(git rev-parse --git-dir)/MERGE_HEAD" ]; then add_dot=no; fi if [ -d "$(git rev-parse --git-dir)/rebase-apply" ]; then add_dot=no; fi if [ $add_dot = yes ]; then git commit . "$@"; else git commit "$@"; fi }| This small script will basically check all arguments and see if one is an existing path (or if you're resolving a merge). If none are, it adds @path{.} as a first argument (this avoids confusing it as a value for some flag). Note that this is not completely foolproof: for example, if you'll use the @cmd{-m .} hack, it will assume that you did specify a path. (But you should really avoid such log messages.)} @section{User repositories} @p*{ As mentioned above, the PLT server allows you to create your own repositories. Repositories on the server can be organized in a nested directory structure, and you “own” all repositories that are in a directory with your username. The gitolite @cmd{info} command that was mentioned above shows you this with a @cmd{C CREATER/.*} line: this means that you can create any repository if it is in a subdirectory with your username. (In this discussion, “the server” is actually the gitolite script that runs on the server.) @~ Any git operation that you do on a repository that you own which does not exist will make the server create it for you — for example, if you clone such a repository. To run these examples use your git username instead of @cmd{$user}, or simply set the @cmd{user} variable in your shell (as this example shows) — this is only to make copy-pasting easy, of course. @pre{user=eli # your own username here git clone pltgit:$user/foo} (You are encouraged to run these commands — at the end of this section you'll see how you can clean things up.) @~ What will happen now is (a) git will initialize a @path{foo} repository for you, (b) it will connect to the server to clone its contents, (c) the server will notice that it doesn't exist so it will create it, (d) your git process will clone the empty result. Because the remote repository is empty, git will complain that “You appear to have cloned an empty repository” — this is expected, so you shouldn't worry about it. Once you have your (empty) clone, you can populate it as usual, then push the new content back to the server's copy: @pre{cd foo @i{...create some files...} git add . git commit -m "initial content" git push origin master} Note that the last command explicitly names the branch to push over — once this push is done, git will remember this relation and further pushes can be done with just @cmd{git push}. If you happen to forget this and use @cmd{git push}, then git will not push anything, and it will tell you about it and suggest specifying a branch. On the other hand, if you set the @cmd{push.default} configuration option to @cmd{current} (as described in the customization section above), then even in this first push you can just run @cmd{git push} since git assumes that you always want local branches to correspond to remote branches by the same name. @~ Instead of cloning a repository to create a new one, you could also start with an existing repository and simply push it to a yet-to-exist repository on the server. Again, the server will see that it doesn't exist and will create it for you (provided that it is in your directory). To continue the above example, I could now create a new repository from @cmd{foo}: @pre{# (still in the foo directory) git push pltgit:$user/bar} @~ There is, however, an issue of efficiency here: with this last command I just created a second copy of it all. This could be problematic if you have a large repository (eg, a copy of the @cmd{plt} repository). (Note that with subversion this is the only way to do things, but there you would create copies inside the tree, which subversion optimizes.) One nice feature of git is that creating a clone of a repository on the same filesystem will use hard-links for the clone, which makes the clone use very little additional space. But the problem is that you have no access to the PLT server. The solution here is in the form of a gitolite @cmd{fork} command (this is actually our own extension) — this command will create a clone on the server, starting from a specified repository. I could therefore create my @path{bar} repository as a copy of @path{foo} with the following: @pre{ssh pltgit fork $user/foo $user/bar} (Note that if you follow these examples and you already have @path{bar}, the server will tell you about it.) The result is a @path{$user/bar} repository that was cloned from @path{$user/foo}, and the two share their store using hard links. If the two repositories are updated with identical content, the new content will not be shared, but for a large repository like the @cmd{plt} repository you still get the benefit of having the bulk of the data shared (the complete store, at the time of forking). To get a feeling for how fast this is, you can now clone the @cmd{plt} repository to your own private copy: @pre{ssh pltgit fork plt $user/myplt} This would seem suspiciously fast for such a large repository — but this repository has most of the data packed (objects in the store are put in large “pack files”), so there are not too many files, and the server-side cloning basically created hard links to these files. The result is fast, efficient (even in speed: when you interact with your clone, files are likely to be paged in memory), and cheap. @~ As we've seen above, the gitolite @cmd{info} command lists the permissions that you have, but it doesn't show you the actual repositories. For this, there is an @cmd{expand} command. (Yes, this is not a great name; it's related to how gitolite was intended to be used. Remember that there is also a @cmd{help} command that describes the available gitolite commands.) When you run the expand command — @cmd{ssh pltgit expand} — you get a listing of all of the repositories that you can access, each with an indication of read permissions (@cmd{R}) or write permission (@cmd{W}). A @cmd|{@}| indicator means that you have the respective permissions because it is allowed for all users. Each repository is also listed with its owner, or @cmd{} in case it is a globally configured repository. @~ Some of the gitolite commands are used to configure your repositories — you can only use these with repositories that you own. @ul*{ @~ @cmd{getperms} and @cmd{setperms} — these are used to get or set permissions for your repositories. The first will print the current permissions (which will be initially empty), and the second will read the permissions on its input and set them. The format of the permissions is simple: each line begins with an @cmd{R} or @cmd{RW}, and then the usernames that this permission applies to. You can use the magic username @cmd|{@all}| to grant access to everyone in the system. For example, to grant read permissions to everyone, and write permissions to user1, create a file with: @pre|{R @all RW user1 user2}| and then run the setperms commands with this as its input: @pre{ssh pltgit setperms $user/foo < the-file} You can also just run the @cmd{setperms} command and type in the permissions directly. Note that these permissions are not cumulative: every use of @cmd{setperms} specifies all permissions. (We might have a more convenient interface for all of this in the future.) @~ @cmd{config} — this command can be used to set known configuration options in your repositories. It works with sub-verbs: @dl*{@~ @cmd{ssh pltgit config list} @~ Displays the known configuration options @~ @cmd{ssh pltgit config get @i{repo} @i{config}} @~ Displays the configuration value of a specific repository @~ @cmd{ssh pltgit config set @i{repo} @i{config} @i{value}} @~ Sets the configuration value of a specific repository} These configuration options can customize aspects of the scripts that run after every push — currently, you can use this to set an email address to send notification emails to. Other configurations may be added in the future. (Note that this does not let you set any configuration, since some of these can execute arbitrary commands.) @~ @cmd{delete} — finally, this command can be used to delete repositories. For example, to clean up the above, you can now run: @pre{ssh pltgit delete $user/foo ssh pltgit delete $user/bar} The repositories are moved to a temporary holding directory, and will eventually be removed. The bottom line here is that if you lost anything by mistake recently, chances are there's a backup of your repository.}} @section{Working with git} @subsection[#:newpage? #f]{Basics} @p*{ The above description is much simplified in that it doesn't deal with development that happens outside of your own work — and such development obviously changes the story. Overall, this is not too different from working with subversion: if there were any changes on the server you need to update your working copy first, and this implies dealing with conflicts if there are any. But the way to deal with such things in git is significantly different than dealing with them in subversion, and this difference is at the technical level (different commands) and at the workflow level (you will likely branch much more, and you're likely to push less frequently than you commit with subversion). @~ To start working, you first need to get a repository clone. Usually you will clone the PLT repository (or a private copy that you do your work in), but remember that to experiment with git you have the @cmd{play} repository or you could make a fork of the PLT repository to play with and remove it when you're done. Either way, be sure to try these things out — it will make your life much easier in the future. @~ In the following examples I will use an empty repository to demonstrate things and I will list the exact commands that I'm using (this means that I will use unix commands to create and edit files, and use @cmd{-m} when committing). Lines that I enter are displayed with a @cmd{$} prompt, most output lines are omitted, comments start with @cmd{#}, and @cmd{$user} is set to my username. Note that if you try this yourself, the SHA1s of commits will be different (the reason for that is that a commit object includes the author name and email and the date). Note also that in some places I will “jump to an earlier continuation”: start from an earlier state and do something different — so if you want to try these things out it will be convenient to put the commands in a shell script so you can re-run it to get to the earlier state. @~ First, I create a private empty repository, populate it, and update the remote repository: @pre{$ user=eli # your own username here $ mkdir /tmp/sandbox; cd /tmp/sandbox $ ssh pltgit delete $user/foo # delete previous repository, if any $ git clone pltgit:$user/foo $ cd foo $ echo "foo" > foo; echo "bar" > bar $ git add . $ git st # uses the `st' alias as shown above A bar A foo $ git commit -m "initial content" . [master (root-commit) 87f1f02] initial content @i{...} # git tells us the branch we committed to, the new commit SHA1 and # that this is the first commit, and the log message; we can verify # this now with `git log' $ git log commit 87f1f02c23b32e7f9b... # this is the commit object I created @i{...} $ git push # (or `git push origin master' if needed) To pltgit:eli/foo # where we pushed to, and the branch * [new branch] master -> master} @~ @small{[A quick note on commit messages: several git command consider the first paragraph of your commit message as a short description for it. This is all of the text up to the first blank line if you write a commit message in an editor, or the first @cmd{-m} message if you use it with @cmd{git commit} (it accepts multiple @cmd{-m} arguments, for multiple paragraphs). Keep this in mind when composing such messages.]} @~ To see what happens when multiple people commit to the repository, we create a second clone of our repository now in a @path{foo2} directory: @pre{$ cd .. $ git clone pltgit:$user/foo foo2 $ cd foo # go back to foo now} @~ Lets make two new commits now: @pre{$ echo "more foo" >> foo; echo "more bar" >> bar $ git ci -m "more stuff" # uses the `git-ci' script from above [master b7d3c41] more stuff $ echo "even more foo" >> foo $ git ci -m "even more stuff" [master 18bc0e6] even more stuff} @~ At this point, instead of blindly pushing these commits, lets look around first. One useful tool for inspecting the history is @cmd{gitk} — if you run it now, you will see the simple 3-commit graph, and two of them are marked as branches — clearly showing that your local master branch is 2 commits ahead of the remote one. This could be different at this point: someone else might have pushed more commits to the remote — remember that your remote master branch (@cmd{remotes/origin/master}) is not really what's on the remote, but rather what you know about it last time you pulled from it. @~ Another useful command to examine the history is @cmd{git log}, which can show commit history in many ways. As things stand in the current repository, if you just run @cmd{git log}, you will see a listing of the same three commits that gitk shows. To get a more condensed format with one-line-per-commit, use @cmd{--oneline}. Another thing that you can do is show a specific range of commits — you can do this by specifying two revisions separated with @cmd{..}, where the revisions can be written explicitly using the (short prefix) SHA1 form, or more conveniently using a symbolic name (eg, branch, tag, HEAD): @pre{$ git log origin/master..master} @~ Since we @em{are} currently on the master branch, we could use @cmd{HEAD} for the second one (@cmd{origin/master..HEAD}), but this is also the default, so an even shorter form is @cmd{origin/master..}. In addition to @cmd{git log}, you can also use @cmd{git diff} in a similar way, but instead of a commit listing, you get the diff between the two specified points, so @pre{$ git diff origin/master..} will show you the changes that you did not yet push. Note that there are a number of places where git will guess the full name of branches, for example, @cmd{origin/master} is actually a short name for @cmd{remotes/origin/master}. In a similar way, just @cmd{origin} will make git guess that you're talking about @cmd{origin/master}. @~ In these cases, the revision specification for @cmd{log} and @cmd{diff} are the same, but this is a little misleading: @cmd{git diff} usually works by comparing two specific end points in your history, but @cmd{git log} actually works on a @em{set of commits} rather than on a range. The @cmd{R1..R2} notation is actually shorthand for @cmd{^R1 R2} — specifying a commit means “the commit and all of its parents”, and a @cmd{^} prefix negates a set, so @cmd{^R1 R2} means “include the set of commits leading to R2 (inclusive), but exclude the ones leading to R1 (inclusive)”. @~ In addition to this range/set specification, there is a lot to specifying a revision set. As mentioned, you can use a SHA1 (or a shorter unique prefix), or you can use a symbolic name. You can also use @cmd{R^} for the parent of @cmd{R} (the first parent in the case of merge commits, which have more than one parent), @cmd{R^^} would be the grand parent, @cmd{R~3} is the 3rd-generation parent commit. There is also @cmd|{R@{yesterday}}|, @cmd|{R@{1 month 2 weeks go}}| etc for a symbolic name R — which refers to the branch/HEAD at that point in time (this refers to @em{your own} version of it at that time; there are @cmd{--since} and @cmd{--until} flags to filter commits by the time they were made at). You can also use a -N flag (where N is an integer) to show only N commits. Finally, you can use a branch name R with @cmd|{R@{upstream}}| (short: @cmd|{R@{u}}|) to refer to the “upstream” version of the branch — the branch that R is set to follow. This is particularly convenient for things like @pre|{$ git log --oneline @{u}..}| which will always show the commits that you have over the branch your current one follows. (For example, you could set up an alias to use this.) @~ We now continue by pushing our two commits to the remote server. Since we already did a push, a plain @cmd{git push} works fine. @pre{$ git push # no need to specify a branch now To pltgit:eli/foo 87f1f02..18bc0e6 master -> master} @~ As you can see in the last line, git tells us that we pushed from our local @cmd{master} branch to the remote @cmd{master} branch, and that this made it advance from the first commit we pushed (87f1f02) to the last one we created now (18bc0e6). @~ Since we've made some progress in one place, we can go to the @path{foo2} clone to see what happens when we update a repository that did not have these changes: @pre{$ cd ../foo2 $ git pull @i{...} From pltgit:eli/foo 87f1f02..18bc0e6 master -> origin/master Updating 87f1f02..18bc0e6 Fast-forward bar | 1 + foo | 2 ++ 2 files changed, 3 insertions(+), 0 deletions(-)} @~ This looks expected — git shows the new commits that we received, and they're the same as what we pushed earlier. Using @cmd{git pull} is actually doing two things: it's running @cmd{git fetch} first to update your remote branch from the server, and then it uses @cmd{git merge} to merge it into your master branch. The point where @cmd{get merge} starts is the “Updating” line — and there's an important thing to note here: the next line says “Fast-forward”, which is a special kind of a merge. When you merge some branch into your branch, and this branch is a proper superset of your branch (it has commits that your branch doesn't, and all commits in your branch are included in it), git will simply “move your branch forward” to the other: it will update your branch to the tip of the merged one, and then your working directory will be update accordingly. @~ It is often better to do the fetch first, so you can see the changes that happened remotely before you merge them. To do this we're going to use @cmd{git fetch}, avoiding the merge step that @cmd{git pull} does. In fact, since creating a merge commit is something that you might want to always do, @cmd{git pull --ff-only} will only do the merge if it will be a fast-forward merge. @~ Assuming we start again from the @path{foo2} repository before the above pull, we get the same output up to the point where the merge started: @pre|{$ git fetch @i{...} From pltgit:eli/foo 87f1f02..18bc0e6 master -> origin/master $ git log --oneline @{u}.. # nothing $ git log --oneline ..@{u} # the same two commits}| The first log doesn't show anything, since we have no commits over the ones in the remote (the “upstream” of our current branch). To see this, consider that after expanding empty names to @cmd{HEAD}, and the @cmd|{@{u}}| to the remote branch name we get @cmd{remotes/origin/master..master}, and this is short for @cmd{^remotes/origin/master master} — the set of commits made of our master branch and all parents, minus the set of commits from the remote branch and up — since it's ahead of that, we get an empty set. The second log command reverses the two, giving us the set of commits that the remote has and the local branch doesn't. In addition to @cmd{git log}, you can use @cmd{gitk} to inspect the repository: use the @cmd{--all} flag to make it show all branches. Either way you'll be able to see that a fast-forward merge is possible.} @subsection{Concurrent development} @p*{ Again, we'll assume starting with the @path{foo2} repository before the pull. We will now create a new commit before we get changes. This makes it similar to commits pushed to the server while you do your work — so let's see how this common story goes and try to push this change: @pre{ $ echo "blah blah" > blah $ git add blah $ git ci -m "blah" $ git push To pltgit:eli/foo ! [rejected] master -> master (non-fast-forward) error: failed to push some refs to 'pltgit:eli/foo' To prevent you from losing history, non-fast-forward updates were rejected Merge the remote changes before pushing again. See the 'Note about fast-forwards' section of 'git push --help' for details.} @~ As expected, git refuses to push our change. The terminology in the error message is a little confusing — what it basically says is that the commit(s) that we are trying to push are not an extension of the tip of the master branch on the server. A “non-fast-forward update” in this case would mean that we'd set the master branch on the remote to be the same as our branch — but this means that whatever commit that were pushed to the server (the two commits we pushed out from the @path{foo} clone in this case) will be lost. @~ To merge in the remote changes, we need to pull them in. We'll now look at three different ways to do this.} @h3{1. Playing it safe} @p*{ First, as we've seen above, doing a separate @cmd{git fetch} step would allow you to see where things stand before you do anything. Alternatively, we can use the @cmd{--ff-only} variant of @cmd{git pull}, which will do a merge only if it's a fast-forward one, covering the trivial cases. If a fast-forward merge cannot be done, it will tell you about it and then stop: @pre{$ git pull --ff-only @i{...} From pltgit:eli/foo 87f1f02..18bc0e6 master -> origin/master fatal: Not possible to fast-forward, aborting.} @~ We can now use the usual tools to see where things stand. The following are all useful here: @ul*{ @~ @cmd{gitk --all}@br This visualizes the commit graph. If you do this, you will see the four commits that we have so far: the initial commit at the root, the commit that we did in this clone, and the two commit that we retrieved and are waiting on @cmd{remotes/origin/master}. @~ (a) @cmd|{git log @{u}..}|@br (b) @cmd|{git log ..@{u}}|@br (c) @cmd|{git diff @{u}..}|@br (c) @cmd|{git diff ..@{u}}|@br Inspect the commit that the local branch has over the remote (a), and the two that the remote has over the local one (b); look at the difference between the local branch and the remote either way (c). @~ (a) @cmd|{git log --left-right --oneline @{u}...}|@br (b) @cmd|{git log --left-right --oneline ...@{u}}|@br (c) @cmd|{git log --graph --oneline --all}|@br An alternative notation for specifying commit sets for @cmd{git log} is @cmd{R1...R2} (with @em{three} dots) — this stands for all commits from both R1 and R2 and their parents, but excluding commits from their “merge base” — the parent commit that both descend from. As demonstrated in (a), this is especially useful with the @cmd{--left-right} flag: you'll see the commits that are new on the remote branch and the ones that are new on the local one, with @cmd{<} or @cmd{>} indicating which side each commit is coming from. Yet another way that @cmd{git log} can be used is (c) with a @cmd{--graph} flag, which makes it render the commit graph in ASCII-art. @~ @cmd{git show-branch -a}@br This is another potentially useful commands that shows how commits are distributed over branches in your repository. In this case you will see a matrix with the four commits in separate rows, and each will have a @cmd{+} or @cmd{*} indicating whether it is included in a branch. @~ (a) @cmd|{git diff @{u}...}|@br (b) @cmd|{git diff ...@{u}}|@br The three-dots notation is also used by @cmd{git diff}, with a slightly different semantics than in @cmd{git log} (remember that @cmd{git log} talks about commit sets, and @cmd{git diff} compares two specific points). In the @cmd{diff} case, these compare a specific branch tip to the merge base of this branch and another, which means that you see a diff with the work done on one branch that is not included in the other (this is unlike the two-dot syntax where you get the diff between the two branch tips). In the first (a) example you will see all changes that you did locally, and in (b) the changes that were done remotely.}} @h3{2. Merging (not the fast-forward variant)} @p*{ Now that we did a @cmd{git fetch} or a @cmd{git pull --ff-only} to update the remote branch, we can proceed with merging it into our branch — which we can do in one of two ways: @pre{$ git merge origin # merge origin/master into master Merge made by recursive. @i{...} # -or- $ git pull # will do a merge as usual} @~ Using @cmd{merge} is better for the usual reason: @cmd{git pull} can bring in more updates that were pushed by others since you fetched, and include them in the merge. Note that if instead of running a separate fetch or pulling with @cmd{--ff-only} you were using @cmd{git pull}, then you'd essentially get to the same point we are now at. @~ Either way, the merge tells us “Merge made by recursive” — which is important here: if we see “Fast-forward” it means that no new commits were made, but “Merge made ...” means that a merge commit was created (“by recursive” refers to the merge strategy, git has several of them). If we look at the commit graph now with gitk or with @cmd{git log}, we'll see the new commit that was created: @pre{$ git log --oneline --graph --date-order * 12bf7ee Merge remote branch 'origin' |\ * | a40a45f blah | * 18bc0e6 even more stuff | * b7d3c41 more stuff |/ * 87f1f02 initial content} The merge was successful without manual intervention (you didn't need to resolve any conflicts), so it proceeded immediately to create the commit that connects the two lines of development, and used some standard template for the commit log. This is the safe thing to do as the default for git, but in this case it makes the history complicated with no good reason — it just happened that while we were working on @cmd{blah} someone else pushed an unrelated change. (It could be related: perhaps one of the remote commits was referring to the file that I've added, but especially in the PLT case this would be very rare.) If you use gitk to look at the recent history of the repository, you'll see many such commits, and they can make it harder to figure out how the development went on.} @h3{3. Rebasing} @p*{ To get a simple/readable history, the goal is to have a more linear history: just have the two remote commits and move our commit to follow them (which would be the history that you get with subversion under similar conditions). But our commit object already points to its parent, so it @em{cannot} move: in the above graph, @cmd{a40a45f} is a hash that was computed based on @cmd{87f1f02} being its parent, and changing a parent means getting a new hash and therefore a new commit object. @~ This is where @cmd{git rebase} gets into the picture. Assuming that we didn't merge as described above, we would just use @cmd{rebase} instead: @pre{$ git log --graph --all --oneline * a40a45f blah # \ | * 18bc0e6 even more stuff # \ | * b7d3c41 more stuff # > same tree as before the merge |/ # / * 87f1f02 initial content # / $ git rebase origin First, rewinding head to replay your work on top of it... Applying: blah # git tells us that it re-applies this $ git log --graph --all --oneline * 6ebd1fb blah # \ * 18bc0e6 even more stuff # \ the resulting history * b7d3c41 more stuff # / is linear * 87f1f02 initial content # /} @~ Here's what git did in this rebase: it (1) moved the HEAD to the merge base between your local branch and the remote one — the @cmd{87f1f02} commit; (2) did a fast-forward merge, which just moves your local branch to the tip of the remote one; (3) it now @em{replays the same changes} that you had in your commits (only @cmd{a40a45f} in this case) on top of the new tip, leading to @em{new} commit objects (@cmd{6ebd1fb} in here). So we end up with a fresh commit object, and the old one (@cmd{a40a45f}) is gone. (It's not really gone — it's kept in your repository store for a while, to protect you from losing work.) If you look at the complete details of the new commit using gitk or @cmd{git log}, you will see that this commit has different dates for the author and the committer date: @pre{$ git log --format=fuller -1 6ebd1fb # all details, show only one commit @i{...} AuthorDate: 2010-05-02 10:26:00 -0500 @i{...} CommitDate: 2010-05-02 10:30:00 -0500} This is because rebasing just created a new commit — but the author time is still considered the same. In any case, you now have linear history that is a proper descendant of the remote branch, and you can now push your changes out. @~ Practically every place where you read about rebasing in git will warn you about not doing it for public history. The problem is that if someone had a copy of your previous branch (@cmd{a40a45f}), then next time they will update, if their copy of the branch is updated, then things will change in a nasty way: even more so if they've committed more changes on that branch. (This “someone” can also be yourself, of course.) This also explains why @cmd{git pull} does not rebase by default. @~ As far as the PLT repository goes, server will never allow pushes that are not strict extensions of what's on it (in other words, it only allows fast-forward pushes) — so you won't be able to mess things up for others. But as long as your change is something that you work on privately, there is absolutely no problem in doing this. Note that this is the same thing as with re-doing commits because of mistakes: as long as a commit did not go out, you can fix it in multiple ways; when it does get pushed to the server, the only practical way to fix it is by pushing another commit. (In some extreme cases we may do such a thing: for example, if you commit a passwords file, then there is no other way to remove it completely from the repository — but these are very rare, and such fixes affect everyone.) @~ It is therefore best to get one of two habits when you do a @cmd{git pull}: either use @cmd{--ff-only} or @cmd{--rebase}. The latter is a little more convenient but you might not feel comfortable about doing a rebase automatically — it @em{might} just be that someone worked on the same set of files, and you really prefer a plain merge. For this, you might prefer using @cmd{--ff-only} which will automatically work in the trivial cases, and otherwise leave you in a state where you can look at things and decide how to proceed yourself.} @subsection{Additional forms of history tweaking} @p*{ As described in the previous section, rebasing is not some kind of a magical operation: it is really just an expected by-product of the way git works — of the fact that commits can be created as descendants of any commit (not just tip commits). You could perform a rebase manually by starting from some commit, then inspect each of the changesets that the rebased history contains, and play them back on the new commit. (Lumping this tediousness lead to a script, which lead to the rebase command.) This means that you don't really have to limit yourself to replaying these commits exactly as they were — for example, you could write new commit log messages, combine two commits into one, drop some commits, or reorder their order. @~ The rebase command has a flag that makes doing all of these things easy: @cmd{--interactive}. Continuing the above example, we now have four commits in our history — and say that we want to tweak the last two. If we now run @pre{$ git rebase HEAD~2} we ask git to rebase our current head off of its grandparent commit (remember that @cmd{HEAD^} is the parent, @cmd{HEAD^^} is the grandparent, and @cmd{HEAD~2} is an alternative syntax for @cmd{HEAD^^}). Since it @em{is} already based there, @cmd{git rebase} does nothing, and tells us that the branch is up to date. But if we add @cmd{--interactive}, we get something different: git pops up an editor with this text: @pre{pick 18bc0e6 even more stuff pick 6ebd1fb blah # Rebase b7d3c41..6ebd1fb onto b7d3c41 # @i{...}} This is a listing of the last two commits with their one-line log messages. As the text that is below these lines say, you can replace the @cmd{pick} before a commit with a different command: you can use @cmd{reword} to get to write a different log message (you will get another editor window to do the editing), @cmd{squash} to combine a commit with the previous one (it will let you edit the log message for the combination), @cmd{fixup} which does the same but discards the log message, and finally @cmd{edit} will make the rebasing process stop at the relevant commit and let you tweak it before it continues. In addition, removing a commit line means that the commit will be skipped, and reordering lines will replay the commits in a different order. If any of these lead to conflicts, the rebasing will stop for a manual resolution, and you'll need to @cmd{git rebase --continue} when resolved, or @cmd{git rebase --abort} to get back to the original state. @~ Note that since our @path{foo2} clone tracks a public @path{foo} repository, this particular rebase is bad: we intend to edit the last two commits, but only one is local to @path{foo2} — the other is a commit that we got from @path{foo}, and changing it means that we will get a rebased commit based on its parent, and the server will forbid pushing it later. If you see that you went too far when you see the rebase editor, all you need to do is keep those lines untouched: in the trivial case of leaving an initial set of commits in unmodified, they will be “rebased” by leaving them in as is. (If you inspect the history later, you will see that they have the same SHA1s.) @~ A useful case of using @cmd{squash} (or @cmd{fixup}) with interactive rebasing is doing @cmd{checkpoint} commits frequently, and eventually combining them to a single commit. This demonstrates one a popular git principle: keep commits as logical units that correspond with the changes done, since there is no central server that dictates a public-commit-or-nothing. Doing these will make it easier in the future to deal with the history: inspect the changeset as a whole, undo it, and it also works well with finding bugs in the history — you can have checkpoints for intermediate states of the code even if it doesn't work, since this state will eventually be hidden. @~ A particularly common case for editing history is “oops commits”: you just made some change, committed it, and then realized that something is wrong — you forgot to change some related reference, to remove some debugging printout, or to describe some new aspect of the commit. You could use @cmd{git rebase HEAD^} in these cases to rebase just the last commit while editing it, but there is a much more convenient way to do this: @cmd{git commit --amend}. Usually, @cmd{git commit} creates a new commit based on the current branch tip and a given commit log message, but with @cmd{--amend} it does something different: it takes a snapshot of the tree as usual, but it makes the commit be a descendant of the tip's parent commit. For example, assuming we didn't really change anything with the rebase above, our history and recent change is now: @pre|{$ git log --oneline 6ebd1fb blah 18bc0e6 even more stuff b7d3c41 more stuff 87f1f02 initial content $ git log --oneline -p -1 # -p => show patch, -1 => only one 6ebd1fb blah diff --git a/blah b/blah @i{...} --- /dev/null +++ b/blah @@ -0,0 +1 @@ +blah blah # this is the recent change}| This is obviously wrong — we need to have three “blah”s there. With subversion we would now need to perform an “oops, forgot a blah” commit, and in fact, we would need to do the same with git if we push these change out now. But as long as we didn't, we can fix it without an additional commit using @cmd{--amend}: @pre{$ echo "blah blah blah" > blah $ git add blah $ git ci --amend -m "blah^3" [master 5cf863d] blah^3 # the re-made commit $ git log --oneline 5cf863d blah^3 18bc0e6 even more stuff b7d3c41 more stuff 87f1f02 initial content} As you can see, the last commit is gone (remember that it is still backed up, in case of problems), and there is a completely new commit instead. Usually, @cmd{--amend} is used without @cmd{-m} — the log message editor will be initiated with the previous log message, so you can edit it instead of rewriting it from scratch. If there were no modifications to be committed, a @cmd{git commit --amend} is a convenient way to edit the last commit message only. If you leave the message untouched, a new commit will still be made — one with a new commit time; and if you delete the text completely, the re-commit will be aborted, and you will be left with the old one intact.} @subsection{Resetting the tree} @p*{ Both the @cmd{commit --amend} feature and rebasing build on the ability to “move” the current branch tip to some earlier commit in its history. To do this directly, git provides a @cmd{git reset} command, which can move the current branch tip to a specified commit, and adjust the working directory and/or the staging area accordingly. For example, for the @cmd{--amend} functionality, you will use @cmd{HEAD^} to move the branch tip to its parent commit. You can of course specify any other commit to move to, and since git branches are effectively short bookmarks, you can create branches to be able to move to them later on (or as targets for rebasing, merging, etc). In addition, you can use @cmd{HEAD} (or just omit the target, since @cmd{HEAD} is the default) to only change the working directory (or staging area). @~ The reset command has three major modes for its work, specified with a flag. (See the @man{git-reset} man page for a more thorough explanation, with lots of usage examples, some have evolved into their own functionality — like the @cmd{--amend} feature.) Using @cmd{HEAD^} as the target commit, here are some summaries of how it can be used: @ul*{ @~ @cmd{git reset --hard HEAD^}@br This will move the branch tip to the previous commit, and will change the working tree and the staging area to match. Translation: completely forget the last commit and any work in the working directory. @~ @cmd{git reset --soft HEAD^}@br Moves the branch tip, but does not change the working directory or the staging area. Translation: undo the last commit, and leave your working directory in a state where @cmd{git commit} will get the same change in. (Note: not the @cmd{git ci} script — this will add changes in the working directory, if any.) @~ @cmd{git reset --mixed HEAD^}@br Moves the branch tip and the staging area to the parent commit, and any modifications done by the commit that are going to be lost are put in your working directory. Translation: similar to the @cmd{--soft} version, except that the staging area is cleared, so to recommit the changes you will need to add files again (or use @cmd{git ci} as usual). @br (Note: This is the default mode.)} @~ When using @cmd{HEAD} (which is the also default when nothing is mentioned), the branch tip is not moved, and we get: @ul*{ @~ @cmd{git reset --hard}@br Get rid of all changes to the working directory and the staging area. Translation: lose all work that was not committed, getting back to the content on the branch (a convenient way to do something similar to an @cmd{svn revert -R .} in the root of a subversion working directory). @~ @cmd{git reset --mixed}@br Get rid of all changes to the staging area, leaving your working directory intact. Translation: lose everything that was added to the staging area. If you're avoiding it (for example, if you only use the @cmd{git ci} script), then this would be a no-op other than new files that were @cmd{git add}ed. @~ (@cmd{git reset --soft} is a no-op.)} @~ When @cmd{git reset} changes the HEAD, it creates another toplevel reference name called @cmd{ORIG_HEAD} that points to the previous commit that @cmd{HEAD} pointed at, so if you happen to @cmd{git reset --hard HEAD^} by mistake, you can immediately get back to it with @cmd{git reset --hard ORIG_HEAD} (but changes in the working tree would still be lost). (@cmd{git merge} is another command that changes the @cmd{HEAD}, and it also saves the previous value in @cmd{ORIG_HEAD}.) Finally, note that @cmd{git reset} can be restricted to make it work only on a specific set of paths, not on the whole repository.} @subsection{Other forms of reverting} @p*{ While we're on the topic of reverting files, there are three more things worth mentioning: @ul*{ @~ @cmd{git checkout -- @i{path ...}}@br @p*{ When @cmd{git checkout} is given some paths, it will only check out the relevant files from their state in the staging area. This is a more popular way to revert changes to a specific file. If you avoid using the staging area, then this is roughly the same as using reset with the @cmd{--hard} flag on the paths, since your staging area will usually be the same as your @cmd{HEAD}. (Note that the @cmd{--} is optional, and needed only when a path name can be confused with a branch name.) @~ As with @cmd{reset}, you can also specify a branch to check the path(s) from — which is useful to try some files from a different branch selectively. However, note that unlike subversion, git does not remember the association of the branch and the paths that were checked out of it (the branch is not “sticky”) — the files will simply be considered as modified (and they will not be updated when the branch is, unless you do the same checkout).} @~ @cmd{git show HEAD:@i{path}}@br @p*{ This shows the file as it exists in the @cmd{HEAD}, making it useful to inspect the file before you made some additional modifications (similar to @cmd{svn cat @i{path}}). You can also omit the @cmd{HEAD} — using @cmd{:@i{path}} will show the file in the staging area, which will usually be the same as the @cmd{HEAD}. One caveat to note here is that the path should be the full path relative to the repository root. (Note: I have a wrapper @cmd{git cat} script that emulates @cmd{svn cat}, I'll add it if anyone wants.)} @~ @cmd{git revert @i{commit}}@br @p*{ The @cmd{git revert} command is used to revert the changes introduced by the given commit. It will basically apply the change in that commit in reverse, then ask you for a log message for a new commit where the message is initially populated with text indicating the commit that was applied in reverse. @~ Note that this is very different from @cmd{svn revert} — it is more like @pre{svn merge -c-123@";" svn commit "Revert revision 123"} Since this is a frequent source of confusion, the @man{git-revert} man page mentions it at the top, and it refers readers to @cmd{git reset} and @cmd{git checkout} as the way to do the equivalent of @cmd{svn revert} (which are described above.)}}} @subsection{Dealing with conflicts} @p*{ We'll now see how to deal with merge conflicts. First, we'll set up the repository for a conflict. Continuing with the @path{foo2} clone, we'll first create a file (which I'll do here using shell commands, to make it easy to play with), commit, and push the new history (which includes the blah work) back to the server. Note the use of @cmd{git branch -v} which shows the local @cmd{master} branch and the fact that there's two commits that we haven't pushed out yet. @pre{$ echo "#lang racket" > foo $ echo "(define (foo x)" >> foo $ echo " (* x x))" >> foo $ git ci -m "turn foo into a library" [master fd856ef] turn foo into a library $ git branch -v * master fd856ef [ahead 2] turn foo into a library $ git push To pltgit:eli/foo 18bc0e6..fd856ef master -> master} @~ Now hop over to the @path{foo} clone, get the changes (the relevant bits of the output are shown), edit the file (using sed, to make it a command line), inspect the change, commit it, and push. @pre|{$ cd ../foo $ git pull From pltgit:eli/foo 18bc0e6..fd856ef master -> origin/master Updating 18bc0e6..fd856ef Fast-forward blah | 1 + foo | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 blah $ sed -i '2s/x/[x 0]/' foo $ git diff diff --git a/foo b/foo index 78d9889..b81de80 100644 --- a/foo +++ b/foo @@ -1,3 +1,3 @@ #lang racket -(define (foo x) +(define (foo [x 0]) (* x x)) $ git ci -m 'add a default value' [master 5035c9a] add a default value $ git push To pltgit:eli/foo fd856ef..5035c9a master -> master}| @~ And now get back to @path{foo2}, and before we pull, modify the same line by adding a comment and commit, then do a @cmd{--ff-only} pull and watch it refuse to merge as expected, then look at the history so far. @pre{$ cd ../foo2 $ sed -i '2s/$/ ; int->int/' foo $ git ci -m 'document the type of foo' [master 21a78df] document the type of foo $ git pull --ff-only From pltgit:eli/foo fd856ef..5035c9a master -> origin/master fatal: Not possible to fast-forward, aborting. $ git log --graph --all --oneline -4 * 21a78df document the type of foo # ← our change | * 5035c9a add a default value # ← the conflicting change we pulled |/ * fd856ef turn foo into a library * 5cf863d blah^3} @~ Rebasing is the common thing to do, but let's see what happens with a plain @cmd{merge} first: @pre{$ git merge origin Auto-merging foo CONFLICT (content): Merge conflict in foo Automatic merge failed; fix conflicts and then commit the result.} @~ We now have a conflict that needs to be resolved before we can finish the merge. Using @cmd{git st} (the alias listed above for the svn-like status that @cmd{git status -s} produces) shows a new @cmd{UU} status for @path{foo} — this indicates an “unmerged” (conflicted) file. To investigate further, we use a plain @cmd{git status}, which tells us that our history diverged from the remote (we already know that since @cmd{pull --ff-only} failed) and count the diverging commits, and it also tells us that @path{foo} is unmerged and hints at using @cmd{git add} to resolve it: @pre{$ git st UU foo $ git status # On branch master # Your branch and 'origin/master' have diverged, # and have 1 and 1 different commit(s) each, respectively. # Unmerged paths: # (use "git add/rm ..." as appropriate to mark resolution) # both modified: foo} You can also see that git knows about the conflict and refuses to do a commit: @pre{ $ git commit fatal: 'commit' is not possible because you have unmerged files. Please, fix them up in the work tree, and then use 'git add/rm ' as appropriate to mark resolution and make a commit, or use 'git commit -a'.} @~ In most cases the way to continue is simple: open the conflicted file in your editor, look for the conflict markers and fix the code. Then, as suggested above, use @cmd{git add @i{file}} which tells git that the file is resolved, and finally use @cmd{git commit} to commit the result. (Note that using @cmd{git commit @i{file}} will not work, which is why the @cmd{git-ci} script avoids adding a @path{.} if the tree requires resolving a merge.) I'll simulate the editing part with echos, and then mark it resolved: @pre{$ echo "#lang racket" > foo $ echo "(define (foo [x 0]) ; int->int" >> foo $ echo " (* x x))" >> foo $ git add foo # ← tell git that it's resolved} And now the last step is to run @cmd{git commit}, which will start your editor to edit the log message — it will be populated by text that indicates the merge and the file that had conflicts, which you can commit as is, or add some text regarding the way it was resolved. @~ At this point (or before we started working on resolving the conflict), we can get back to the original state using @cmd{reset}: @pre{$ git reset --hard HEAD is now at 21a78df document the type of foo} @~ This kind of reset is generally useful if you had some problematic conflict to resolve and you want to back up completely and re-try. But now that we've at the start, we will see what happens when we try to rebase with the conflict instead: @pre{ $ git rebase origin First, rewinding head to replay your work on top of it... Applying: document the type of foo @i{...} CONFLICT (content): Merge conflict in foo Failed to merge in the changes. @i{...} When you have resolved this problem run "git rebase --continue". If you would prefer to skip this patch, instead run "git rebase --skip". To restore the original branch and stop rebasing run "git rebase --abort".} @~ Obviously, we get a different message (note that @cmd{git status} will now tell you that you're not currently on any branch — a result of being in the middle of a rebase). The process that follows is very similar to the merge case: edit the conflict away, then @cmd{git add} the file. There are two differences: (1) after you @cmd{git add} the resolved files, you should use @cmd{git rebase --continue} instead of committing[*]; (2) if you want to abort the merge, use @cmd{git rebase --abort} instead of using reset. @small{([*] If you did commit, then it means that you wrote a new log message for the replayed commit, and you can just as well use the @cmd{--skip} flag so rebasing continues with the rest, or you can use @cmd{reset} to undo your commit and let rebase do it for you.)} @~ When you're in a conflicted state, there are a few git tools that help you in the resolution work. The first useful utility is @cmd{git diff}: when there's a conflict, all files that were automatically merged are already going to be in your staging area, and parts of conflicted files that could be merged merged will be there too. This leaves only the conflict regions in your working directory, which means that @cmd{git diff} will show you only the conflicts (since by default it shows differences between the working directory and the staging area). Also, the diff output itself is not a standard one. At the current point of conflict during the rebase that we started, this is what we'll see: @pre|{$ git diff diff --cc foo index b81de80,86a4c54..0000000 --- a/foo +++ b/foo @@@ -1,3 -1,3 +1,7 @@@ #lang racket ++<<<<<<< HEAD +(define (foo [x 0]) ++======= + (define (foo x) ; int->int ++>>>>>>> document the type of foo (* x x))}| @~ The diff header uses @cmd{--cc} which indicates git's “combined diff format”, used to represent merge commits (any commit with more than one parent). The next line has the two SHA1s of the two files that are merged. The diff itself starts with three @cmd|{@}|s, and instead of a single indicator character (@cmd{+}, @cmd{-}, or @cmd{ }), there are two — indicating a three-way diff between the two versions and their common ancestor version. In the above you can see that the line with the optional argument is coming from @cmd{HEAD}, and the type-annotated one is coming from its commit. You might notice that this look backwards, since we're in the repository where we committed the type annotation to the HEAD — but we're now rebasing, which means that we start from the remote branch and merge our local changes into it, essentially making the rebase perform merges in the other way than plain merges. The conflict markers themselves are marked as new in both versions, and the labels that follow them depend on available information (in a @cmd{merge}, we would see @cmd{HEAD} and @cmd{origin}). @~ During a conflict resolution, the staging area actually holds three versions of each file: the common ancestor, our version, and the merged version. These things are called “file stages”, and they can be accessed using a special syntax: @pre{$ git show :1:foo # the common ancestor of both versions $ git show :2:foo # our version (optional argument) $ git show :3:foo # merged version (type-annotated)} (Again, remember that this is a rebase, so the last two are swapped.) You can also checkout one of these versions using @cmd{git checkout foo}, giving it an @cmd{--ours} or @cmd{--theirs} flag to specify which version you want to use; and you can use @cmd{git diff} to compare against them. For example, we resolve the file (as above) and then try the different diffs (before we mark it as resolved) — these examples only show the changed lines from each of the diffs: @pre{$ echo "#lang racket" > foo $ echo "(define (foo [x 0]) ; int->int" >> foo $ echo " (* x x))" >> foo $ git diff -1 foo # can also use --base -(define (foo x) # original version +(define (foo [x 0]) ; int->int # new version $ git diff -2 foo # can also use --ours -(define (foo [x 0]) +(define (foo [x 0]) ; int->int $ git diff -3 foo # can also use --theirs -(define (foo x) ; int->int +(define (foo [x 0]) ; int->int} @~ Finally, @cmd{git log} and @cmd{gitk} accept a @cmd{--merge} flag which shows commits relevant to a merge. With @cmd{git log} the @cmd{--left-right} flag is useful here, since you'll see which side the relevant commits are on. (But this works only in @cmd{git merge}, not in rebasing.) @~ Again, when you're happy with the resolution, you @cmd{git add} the file, and because we're doing a @cmd{rebase} rather than a @cmd{merge}, use use it to continue: @pre{$ git add foo $ git rebase --continue Applying: document the type of foo $ git log --graph --all --oneline -4 @i{...linear history...}} Note that @cmd{git rebase --continue} did the commit of the resolved content for you, and it used the previous commit message you've written. This is a good rule-of-thumb for deciding whether you should rebase or merge: if the commit message are still fine as a description of the modifications, then a rebase is fine; otherwise you might want to @cmd{merge} instead.} @subsection{Copying/renaming files} @p*{ Git is, by design, tracking snapshots of the complete repository tree. Specifically, it does @em{not} keep explicit track of file/directory copies and renames. Instead, it provides ways to infer such changes in the repository based on the content. As a result of this, there are almost no git commands that deal with file movements: @ul*{ @~ There is no @cmd{git copy} command: you just copy the file and add the new one as usual. @~ There @em{is} a @cmd{git rm} command, but its purpose is mostly to remove a file from the staging area. You could also just remove the file outside of git, and then use either @cmd{git commit @i{removed-file}} or @cmd{git commit @i{containing-directory}} to remove it (or using the above script — @cmd{git ci} in the same directory). @cmd{git rm} will delete the file from the staging area so you can do a plain @cmd{git commit} without naming any paths. @~ For the same reason, there is a @cmd{git mv} command — it uses @cmd{git rm} as above to update the staging area, and if you're fine with ignoring it, then you can just rename the file outside of git, and @cmd{git add} the new version — but as we will soon see, it's really best to use @cmd{git mv} to avoid the possible confusion if you want the file's history to be visible.} @~ To try things out, let's properly name the @path{foo} library: @pre{$ mv foo foo.rkt $ git st D foo ?? foo.rkt} As you can see, we forgot to @cmd{git add} the new file, so if we commit now we'll only be committing the deletion. An important thing to note here is that when git infers file copying and renaming, it does so only when the operations appear in a @em{single} commit. So if we commit this change and later commit a new version with the new file will make it lose connection to its history. As long as you didn't push the new commits out, you can still fix it: simply use @cmd{git rebase --interactive}, and squash the file addition together with the deletion. But let's start over and do the rename the easy way: @pre{$ rm foo.rkt $ git reset --hard $ git mv foo foo.rkt $ git ci -m "properly name the foo library"} to see this commit, we can use @cmd{git show} (which can show arbitrary objects, but with no arguments it shows the @cmd{HEAD}). @cmd{git diff} can also be used to show only the diff part — using the @cmd{HEAD^!} syntax that roughly means the range from the previous HEAD to the current one: @pre{$ git show @i{...log message...} @i{...addition+deletion...} $ git diff HEAD^! @i{...addition+deletion...} $ git diff --stat HEAD^! # shows an overview of the changes foo | 3 --- foo.rkt | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) $ git log --oneline foo.rkt 599b3b6 properly name the foo library} All of these show the two operations as disconnected, and the log doesn't show any of the prior history. The thing is that you need to ask git to look for file operations, and the @cmd{-M} and @cmd{-C} flags do that. In addition, @cmd{git log} needs a @cmd{--follow} flag to make it follow history beyond renames (but note that it can do that only when given a single file path). For example: @pre{$ git diff -M --stat HEAD^! foo => foo.rkt | 0 1 files changed, 0 insertions(+), 0 deletions(-) $ git log --oneline --follow foo.rkt 599b3b6 properly name the foo library 0fb8291 document the type of foo 5035c9a add a default value @i{...}} In this case the rename was a trivial one as were no other changes. This makes it especially easy to find renames since the SHA1 of the file would be the same. But git considers such operations as renames as long as they're “similar enough” — for example, if you just rename some files and change some @cmd{require}s as a result, it will be detected as renames. (The usual claim is that when the content is not similar enough, you can just as well claim that the file is new.) If you think that you might be doing too many changes to some files, and you want to preserve the connection, you can do only the rename in one commit, and then the modifications in the next. @~ An added benefit of this mode of work is that @cmd{git blame} can find lines in files that were copied from other files, and deal naturally with a file that is split into two files etc. Like @cmd{log} and @cmd{diff}, it needs some flags to do the extra work (see @cmd{-M} and @cmd{-C}).} @subsection{Managing branches} @p*{ As seen in various places above, a branch in git is basically just a SHA1 pointer to a commit (and therefore to the whole line of commits in its development line), with a naming hierarchy that follows some conventions (@path{/}-separated, @cmd{master} as the main one, @cmd{remotes} prefix for remote branches, @cmd{origin} for the default remote server name, etc). You can see all of this in the toplevel @path{.git} meta directory — there is a @path{HEAD} file which represents the head, its content will be a line that looks like @cmd{ref: refs/heads/master}, and there will be a @cmd{refs/heads/master} file with a content that is the actual SHA1. There are, of course, various other bits of meta-data, so it's not a good idea to change such files directly (for example, when there are many names git will create a “packed” reference file with many references for efficiency) — but overall this is the basic idea. @~ Branches come in two main kinds: local branches and remote ones, with remote branches having a name that begins with @cmd{remotes/origin/}. (Later we'll see how to add new remote repositories — remote branches from these will have names that start with @cmd{remotes/@i{remote-name}/} instead.) The difference between the two is that a remote branch is a way to mirror a branch on a remote repository — it is not intended for local work. For example, if you try to check out a remote branch, git will check out a “detached head” (details on this below). If you do that, you'll see that the @path{HEAD} file will have an explicit SHA1 rather than the usual @cmd{ref: @i{branch-name}}. @~ The @cmd{git branch} command is the main way to manage branches. With no flags, it will just print out the list of local branches, marking the current branch with a @cmd{*}. You can add flags to show remote branches instead (@cmd{-r}), both kinds (@cmd{-a}), and also to list more information on the branches (@cmd{-v}): @pre{$ git branch * master $ git branch -r origin/HEAD -> origin/master # (this one is symbolic too) origin/master $ git branch -av * master 599b3b6 [ahead 2] properly name the foo library remotes/origin/HEAD -> origin/master remotes/origin/master 5035c9a add a default value} @~ When given a single name argument, a branch by that name will be created, and it will point to where the @cmd{HEAD} currently points to; a second argument can be a name of an existing branch (or any commit) that the new branch will start at. In addition to creating branches starting from the current head, this can be useful in creating branches that start from elsewhere, even from a “detached head”. For example, say that in our current repository we want to try out some work based on the state of things before the last commit (which renamed the @path{foo} file). We can check out @cmd{HEAD^} (which will lead to a detached HEAD), and then create a branch for it: @pre{$ git checkout HEAD^ @i{...} You are in 'detached HEAD' state. @i{...} HEAD is now at 0fb8291... document the type of foo $ cat .git/HEAD 0fb8291... # doesn't point to a branch $ git branch * (no branch) # you can see it here too master $ git status # Not currently on any branch. # and here $ git branch pre-rename # create a branch here $ git branch * (no branch) # we're still detached master pre-rename $ git checkout pre-rename Switched to branch 'pre-rename'} As you can see, creating a branch doesn't check it out — even when the new branch is exactly where we already are. The difference is related to the nature of @cmd{HEAD}: it is usually an indirect reference to a branch name, and when a commit is made, the branch that @cmd{HEAD} points to is updated. But when we are using a detached HEAD, it points directly at a SHA1 — committing in this state will work, and the HEAD will point at the newly made commit — but there will be no branch that will be updated, so if you checkout a different branch (or a different commit) now, the commits you made are “lost”. @~ The main reason that such commits will be lost is that git branches don't live inside the repository store — and dealing with branches is not something that gets recorded as part of the history. To make things safer, git maintains something that is known as the “reflog”, which keeps track of where your branches have been — those are kept for a while (usually around a month), which means that you can easily go back to a previous commit if it seems that you lost one (eg, as a result of committing on a detached HEAD). (You can see these files in the @path{.git/logs} directory.) @~ Since creating a new branch and checking it out is a common combination, the @cmd{checkout} command can create a branch before checking it out. Use the @cmd{-b} flag for this: @pre{$ git checkout -b also-pre-rename Switched to a new branch 'also-pre-rename' $ git checkout -b post-rename master Switched to a new branch 'post-rename' $ ls bar blah foo.rkt $ echo "one more line" >> foo.rkt $ git ci -m "one more line"} @~ Finally, you use the @cmd{-d} flag to delete branches. @pre{ $ git branch -d post-rename # won't allow it error: Cannot delete the branch 'post-rename' which you are currently on. $ git checkout master Switched to branch 'master' Your branch is ahead of 'origin/master' by 2 commits. $ git branch -d post-rename error: The branch 'post-rename' is not fully merged. If you are sure you want to delete it, run 'git branch -D post-rename'.} As you can see, git refuses to delete a branch that has unmerged work, since this can lead to losing that unmerged work — so you need to use @cmd{-D} for that. In addition, you usually don't delete remote branches, when you do, you need to use the @cmd{-r} flag too.} @subsection{Using branches} @p*{ Since git branches are so light weight, they fit any kind of parallel work you need to do on several different topics. A result of that is that it is possible to start a new branches for any work you'd want to do — and this is common enough that there's a name for such branches, they're called “topic branches”. Such branches are created from the master branch (usually) and worked on in parallel. At any point where you want to work on something new, you would create a new branch for it and switch to it (committing any work you might have on your current branch before you do so): @pre{$ git checkout -b improve-bar master # switch to a fresh topic branch Switched to a new branch 'improve-bar' $ echo "even more bar" >> bar # work there $ git ci -m "improved bar" # save that work $ git checkout post-rename # go back to where we were} @~ If you need to commit changes before you create the new branch, you shouldn't have any problems doing so — because you can change where a branch points to, you can just commit whatever you have and then get back to it: @pre{ $ echo "another line" >> foo.rkt # at this point you remember that you need to do something else in the # `improve-bar' line of work. $ git ci -m "checkpoint" $ git checkout improve-bar # ...work... $ git checkout post-rename $ git log --oneline -2 e9a4fcd checkpoint # this is our temporary checkpoint commit d92fb0a one more line $ git reset HEAD^ # undo it Unstaged changes after reset: M foo.rkt # git tells us that this is now uncommitted $ git st M foo.rkt # ... as does `status' $ git log --oneline -2 d92fb0a one more line # the temporary commit is gone 599b3b6 properly name the foo library} @~ You can even decide on some convention to use in some cases, then create new git commands as scripts that will do the work for you. In this case, you could write a command that will do a “checkpoint” commit if needed, switch to another branch, and if the first commit there has only @cmd{checkpoint} as its log message, undo it as above. There are several git convenience commands that started out this way — in this case, checkout the @cmd{git stash} command which allows you to save the current work by pushing it on a “work in progress” stack, and later pop it back out (possibly on a different branch). @~ Earlier we've seen how to merge or rebase your master branch from the remote master branch, but the full story is that you can merge and rebase @em{any} two branches. This makes branches very flexible: you can create a branch A from an existing branch B, eventually merging/rebasing it back into A, or directly into master and dump A. At any point you can run @cmd{gitk --all} to see where things stand — in our current repository, this shows us that there are redundant @cmd{pre-rename} and @cmd{also-pre-rename} branches, that out @cmd{master} branch is two commits ahead of the remote one, and that we have @cmd{improve-bar} and @cmd{post-rename} branches with 1 and 2 commits over our @cmd{master} branch. If we're done with these two branches, we can now merge/rebase them to our @cmd{master}, or merge/rebase one to the other and the result to @cmd{master}, and then push everything out. @~ To make working with branches even easier, git has a notion of an “upstream branch” — this is a per-branch setting that tells git which branch the current one is based on. By default, any branch that is created with a remote branch as its starting point will have that remote branch set as its upstream. We've seen how git treats that information in various places so far: @cmd{git status} and @cmd{git branch -v} both use it, and using a second @cmd{-v} with the latter shows also the upstream branch: @pre{ $ git reset --hard # dump the above uncommitted change $ git checkout master Switched to branch 'master' Your branch is ahead of 'origin/master' by 2 commits. $ git status # On branch master # Your branch is ahead of 'origin/master' by 2 commits. @i{...} $ git branch -v @i{...} * master 599b3b6 [ahead 2] properly name the foo library @i{...} $ git branch -vv * master 599b3b6 [origin/master: ahead 2] properly name the foo library} @~ In addition to that, we've seen the @cmd|{@{upstream}}| and @cmd|{@{u}}| notation that refers to the upstream branch, making it convenient to further examine pending changes that weren't incorporated upstream: @pre|{$ git log --oneline @{upstream}.. 599b3b6 properly name the foo library 0fb8291 document the type of foo}| @~ And finally, @cmd{git pull} and @cmd{git push} know where to pull from and push to based on this setting. Overall, this is a very useful feature to have when you have many branches, therefore it is possible to use it between local branches too. There are two ways to do this: when a branch is created with either @cmd{git branch B} or @cmd{git checkout -b B}, you can use the @cmd{--track} flag to set up tracking to the initial branch it's based on. @pre{$ git branch -t b1 master Branch b1 set up to track local branch master. $ git checkout -tb b2 master Branch b2 set up to track local branch master. Switched to a new branch 'b2'} (Note: if you're using @cmd{checkout}, then the @cmd{--track} flag should precede the @cmd{-b} flag, as done above.) If a branch already exists, you can use @cmd{git branch --set-upstream} to set the upstream information. @pre{$ git branch --set-upstream post-rename Branch post-rename set up to track local branch b2. $ git branch --set-upstream improve-bar master Branch improve-bar set up to track local branch master.} As seen here, if it is given just a branch name, the current branch is set as its upstream. @cmd{git branch} can also change the upstream branch, for example, if the above tracking of @cmd{b2} was a mistake: @pre{$ git branch --set-upstream post-rename master Branch post-rename set up to track local branch master.} Either way, we can now see this information in the git commands that do so, as well as use @cmd|{@{upstream}}|: @pre|{$ git branch -vv b1 599b3b6 [master] properly name the foo library * b2 599b3b6 [master] properly name the foo library improve-bar e60c168 [master: ahead 1] improved bar master 599b3b6 [origin/master: ahead 2] properly name the foo @; library post-rename d92fb0a [master: ahead 1] one more line $ git checkout improve-bar Switched to branch 'improve-bar' Your branch is ahead of 'master' by 1 commit. $ git log --oneline @{upstream}.. e60c168 improved bar}| @~ In addition, we can use @cmd{git pull} to get changes on the upstream branch merged or rebased on the current one: @pre{$ git pull From . * branch master -> FETCH_HEAD Already up-to-date.} Nothing actually happened here, because the current branch (@cmd{improve-bar}) already contains all of the commits on the master branch. You can see that this is a local pull since git says @cmd{From .}, which stands for “our own repository”. You can also do a @cmd{push} now, which will make the current additional commit (listed with @cmd|{@{upstream}..}|) appear on the @cmd{master} branch: @pre{$ git push To . 599b3b6..e60c168 improve-bar -> master} @~ Since the @cmd{improve-bar} line of development is unrelated to the one in @cmd{post-rename}, it is now one commit behind the @cmd{master} branch, and cannot be pushed as is: @pre{ $ git checkout post-rename Switched to branch 'post-rename' Your branch and 'master' have diverged, and have 1 and 1 different commit(s) each, respectively. $ git branch -vv improve-bar e60c168 [master] improved bar master e60c168 [origin/master: ahead 3] improved bar * post-rename d92fb0a [master: ahead 1, behind 1] one more line $ git push To . ! [rejected] post-rename -> master (non-fast-forward) error: failed to push some refs to '.' To prevent you from losing history, non-fast-forward updates were rejected @i{...}} Dealing with this is similar to dealing with updates on the remote server — for example, we can rebase the branch before pushing it: @pre{$ git pull --rebase From . * branch master -> FETCH_HEAD First, rewinding head to replay your work on top of it... Applying: one more line $ git push To . e60c168..7bdec0c post-rename -> master} @~ When you use @cmd{git push} to push changes when you have no upstream branch set, or when you push to a different branch than the one set, you can use @cmd{--set-upstream} to make git remember the push target as the upstream. Therefore, an easy way to create a new branch that tracks a possibly new remote branch by the same name is: @pre{$ git checkout -b my-branch Switched to a new branch 'my-branch' $ git push origin my-branch --set-upstream To pltgit:eli/foo * [new branch] my-branch -> my-branch Branch my-branch set up to track remote branch my-branch from origin.} And when you deal with remote branches this way, you might want to have a local branch that tracks a remote one with a different name. To do this, you use a syntax for the branch to push that specifies the local branch to push and the remote one to push to: @pre{$ git push origin my-branch:different-branch --set-upstream To pltgit:eli/foo * [new branch] my-branch -> different-branch Branch my-branch set up to track remote branch different-branch from @; origin.} @~ Finally, note that git stores the upstream information in the repository-local configuration file. If we look at it now, we will see the various upstreams that we have set: @pre{$ cat .git/config @i{...} [remote "origin"] fetch = +refs/heads/*:refs/remotes/origin/* url = pltgit:eli/foo [branch "master"] remote = origin merge = refs/heads/master @i{...}} this is the upstream that was made by default when we first checked out our clone, together with the information of where the @cmd{origin} repository is. Following that are the ones we've setup later: @pre{@i{...} [branch "b1"] remote = . merge = refs/heads/master [branch "b2"] remote = . merge = refs/heads/master [branch "post-rename"] remote = . merge = refs/heads/master [branch "improve-bar"] remote = . merge = refs/heads/master [branch "my-branch"] remote = origin merge = refs/heads/different-branch} @~ Note that there are branches that track local branches (ones with a @cmd{remote = .} setting), and ones that track remote ones; and also note that the @cmd{my-branch} branch tracks a remote branch with a different name. Since the settings are stored as configurations, it is possible to inspect and change them using @cmd{git config}, or even edit the config file directly. @pre{$ git config branch.my-branch.remote origin $ git config branch.my-branch.merge refs/heads/different-branch}} @subsection{Managing remotes} @p*{ The distributed nature of git means that you can interact with multiple remote repositories. You could have work done with other people done locally, where people push/pull from each other's clones (possibly by sending around patches, as described below), and eventually when the changes are ready push them back to the main repository. You can even have your repository track multiple unrelated remote repositories, essentially giving you branches that have @em{unrelated} histories. @~ By default, when you clone a remote repository git names it @cmd{origin} — and that name appears in many places, most notably in remote branch names. As seen in the above config, git remember where the origin repository is via a configuration: @pre{$ git config remote.origin.url pltgit:eli/foo $ git config remote.origin.fetch +refs/heads/*:refs/remotes/origin/*} The first one is the url of the remote repository, and the second one is which branches we want to get from it. As with branches, you can use @cmd{git config} to change this information, or you can edit the file directly, but there is a command that does this more conveniently, keeping things consistent: @pre{$ git remote # lists all known remotes origin $ git remote -v # remotes and push/pull specs origin pltgit:eli/foo (fetch) origin pltgit:eli/foo (push) $ git remote show origin # see a detailed description * remote origin Fetch URL: pltgit:eli/foo Push URL: pltgit:eli/foo HEAD branch: master Remote branches: different-branch tracked master tracked my-branch tracked Local branches configured for 'git pull': master merges with remote master my-branch merges with remote different-branch Local refs configured for 'git push': master pushes to master (fast-forwardable) my-branch pushes to my-branch (up to date)} @~ The @cmd{git remote show} variant will actually query the remote repository for its state (using @cmd{git ls-remote}) by default, and tell you when a local branch that tracks a remote one is out-of-date. @~ There are a few more sub-verbs for the @cmd{git remote} command which you can see on the @man{git-remote} man page, the most important one is for adding a remote: @cmd{git remote add @i{short-name} @i{url}}. This is especially convenient if you want to have a fork of the @cmd{plt} repository, with most interaction happening against it, but occasionally pull/push updates from/to the main repository. @~ Of course, remember that you don't need to add remotes to push and pull from them. You could do the same by explicitly specifying a url for the repository you want to interact with. For example, you could have repositories in different accounts on different machines, and synchronize your work between them by pushing and pulling directly from one repository to another. (Reminder: if you do this, then you're likely to have “checkpoint commits” — when you're done with the work, you can do an interactive rebase, and squash these checkpoint changes back into logical commit.) But if you do this often enough, you will likely find it more convenient to add a named remote.} @subsection{Using private repositories} @p*{ A particularly useful use-case for adding a new remote is when you want to have private work done in your own fork of the @cmd{plt} repository. Such a mode of work is not strictly necessary — you could just do your work in your repository in a long-lived branch, but there are certain cases where working with a private repository on the server might be more convenient. For example, you might want to collaborate with someone else (that has access) via the server, or you might use a private fork of the @cmd{plt} repository as a central point for synchronizing work from clones on different filesystems as described at the end of the last section (the difference from that is that you basically use the PLT server as your synchronization point). Other than having the main repository reside on the PLT server, working with a private repository is not different than working with any other repository. @~ There are two facts that are worth reminding when you deal with a private repository. First, remember that creating a private fork is cheap: creating a new git clone of a repository will use hard links to the repository store object, most of which will be contained in large packed files. The cost in terms of space and time for creating a new clone is therefore minimal when done on the same filesystem — and using the gitolite @cmd{fork} command is doing just that. Please use the @cmd{fork} command to create a private clone — gitolite has a feature where it creates any repository that you refer to (as long as it has a name that you're allowed to create — starts with your username); this means that you could clone the main @cmd{plt} repository and push from it into a private repository that doesn't exist: it will be created, but such a copy will not share storage with the main repository — it will require a new copy, and it will be slow to create. @~ The second thing to remember is that due to the nature of the git store, any object, including commits, is stored exactly once. Since commits contain their parents, having a specific commit means that you have its complete history — therefore, pulling in any branch from any repository will always require getting only commits that you don't already have. As a result, pulling and pushing to/from any repository will be efficient and move around only those commits that are missing on the other side. @~ You can choose one of two basic approaches to working with a private fork: you can have the public repository cloned but have branches pushed to your private one, or you can have your private fork cloned and occasionally push updates to the public one. A way to use these two approaches are described and explained now. These examples use the @cmd{play} repository as an example (which you are encouraged to experiment with). Note that you can use a hybrid approach: you can think about a repository as a container for commit histories, pushing and pulling from any other repository, including the copy you're working with, the main @cmd{plt} repository, or a private fork (your own or another). Note also that since forks are cheap, you can keep several of them around, for example, you can have a fork for each long-lived branch — it's up to you to settle on a layout that is convenient for your work.} @h3{Using a clone of the public repository, pushing branches to your private one:} @ol*{ @~ Setting up: @ul*{@~ Create a fork: @pre{ssh pltgit fork play $user/play} @~ Get a local copy of the main repository: @pre{git clone pltgit:play} (or continue working in an existing one) @~ Set up a convenient name for your private repository: @pre{cd play git remote add my-fork pltgit:$user/play}} @~ To start working on a private branch, create one, and push it to your private repository: @pre{git checkout -b my-branch} Then use @cmd{push} to create this branch in your private repository, with @cmd{--set-upstream} so git will remember this setting: @pre{git push --set-upstream my-fork my-branch} You can also have your branch named differently in your fork, for example: @pre{git push --set-upstream my-fork my-branch:master} will save your branch as the @cmd{master} branch in your fork. This might be convenient if you want to clone your private repository elsewhere and work only on this branch. @~ You can now work as usual in your repository, pushing/pulling changes to/from the master branch will go to the public repository, and doing so from @cmd{my-branch} will go to your private fork. You can merge changes on the @cmd{master} branch to your private one, or rebase your branch onto it. However, note that the server will not allow pushing a rebased history to your clone. (More details at the end of this section.) You can bypass that by pushing to a new branch while keeping your local branch name: @pre{git push --set-upstream my-fork my-branch:my-branch-2} @~ When you're done merge your branch (possibly rebasing it first) to the master branch, and push as usual. @~ If you want to delete branches on your fork (either because you pushed a rebased version under a new name, or because you're done with that line of work), use @pre{git push my-fork :my-branch} Using an empty branch name for the local branch that you push is the way to delete remote branches. (As with local branches, this might lead to losing commits, so be careful. As always, git has a few safety mechanisms in place, so even if did this by mistake, it is very likely recoverable.)} @h3{Using a clone of your private repository, pushing changes to the public one:} @ol*{ @~ Setting up: @ul*{@~ Create a fork: @pre{ssh pltgit fork play $user/play} @~ Get a local copy: @pre{git clone pltgit:$user/play} @~ Set up a convenient name for the main repository: @pre{cd play git remote add -t master main pltgit:play} (@cmd{-t master} tells git to have only the @cmd{master} branch retrieved.)} @~ Now you can work in this repository as usual — edit, commit, push, etc. @~ To push changes to the main repository, first make sure that you're on the branch with the changes that you want to push, and then: @ul*{@~ Get the recent tree from the main repository @pre{git fetch main} @~ Rebase or merge your changes with this: @pre{git rebase main/master} or @pre{git merge main/master} @~ Push the changes back: @pre{git push main}} Note that rebasing your branch on top of main/master means that it will be rewritten, which means that you will not be able to push your branch back to your clone. This is because rewritten histories are currently forbidden by the configuration, but this will probably change in the future. Still, even if the server would allow pushing a rebased history it (you will need to use @cmd{-f} to force such a push), you would need to deal with the rebased branch in other clones you might have. Because of this, a rebase is fine if you're done with the work that you're pushing, otherwise, a merge is more convenient.} @section{Collaborating with others} @p*{ Git makes it very easy to collaborate with anyone, anywhere. You should think about repositories as being parts a network which can be synced in any topology that is convenient for you. In the case of the PLT repository, the main repository on the git server is the central point where the official repository lives, and people who can push are directly syncing content into it. People who cannot push directly do so through someone who can, by sending out patches or “pull requests”. The same applies for any repository, of course, including private repositories, even ones that you maintain yourself independently of the PLT server. @~ In the case of a patch-based workflow, the two sides that are involved are the patch author, and the receiver that integrates it into his/her own clone (and from there it goes to the main repository as usual). The work that each side does is described in the next two subsections. @~ Following that there is a description for making your repository public, which you will need if you're working on a private repository, but it is also useful for your collaborator to do so you can use a pull-request workflow. In this mode there is no need to email patches; instead, both people make their repositories readable to each other, and when some work is ready on one person's repository, the other pulls the commits. This is described in the last subsection.} @subsection{Patch-based workflow@br — instructions for the patch sender side} @h4{Executive summary:} @ol*{@~ Work in a @cmd{plt} repository clone (possibly in your own branch) @~ @npre{$ git send-email origin/master} @~ You're done — thanks! @~ When the patch is applied, you will get the changes through @cmd{origin/master}, so if you worked on your master branch, make sure to use @cmd{git pull --rebase} which will notice that your changes were applied; if you worked on a branch, then you can now delete it (the commit objects will be different from the ones you've made).} @h4{Longer version:} @ol*{ @~ Work & commit as usual. In general, it is a good idea to use @cmd{Signed-off-by: @i{Your Name} <@i|{your@email}|>} in commit messages, which is a conventional way to declare that you agree for your work to be released as part of the PLT project, under the terms of the LGPL. @cmd{git commit} will add that for you if you use the @cmd{-s} flag. You can also make git do this later, when you send the patches out. @~ Make sure that you're working with a relatively recent clone, and that you're on the branch where you did your work. In most cases, this would be the master branch, but you can do your work in your own branch too, of course. @~ @p*{ Verify that your commits are all in your history. You can see the commits that you have over the @cmd{plt} history with @pre{git log --oneline origin/master..} these are the commits that you're going to send over now. (You can use the usual git toolset to tweak them further, or specify only some commits, etc.) @~ A relevant point to consider here is that git takes the first paragraph of each commit message as a subject line. When sending out a patch, this is made concrete by actually using it as the emails's subject — so it is a good idea to make sure that this log looks fine, since the @cmd{--oneline} option will make it show those subjects. @~ Obviously, you should also make sure that the commits have clear descriptions of your work. People who in the core group often have some general context that they are aware of, so some commit messages can be cryptic or even worse (eg, you might find @cmd{.} as a commit message) — don't mimic this... As a more occasional contributor, you should explain your work in more details. (There's no policy on commit messages, but you do need to go through some person on the team.)} @~ @p*{ At this point you should decide how to send your patches. Emailing them is be the most convenient way to do this — to do this, you would use the @cmd{send-email} command: @pre{git send-email origin/master..} or if you send only some commits, use a different specification. To make things even easier, a single commit specification is considered as the starting point and all of the following commits (up to your branch's tip) will be included in the emails (in contrast to other git commands like @cmd{log}, where a single commit name is considered as the set of commits leading up to it) — so you can do this: @pre{git send-email origin/master} This will ask you a bunch of questions — it's easy to answer but you can also specify them as command-line options. if you intend to do this frequently it might be a good idea to make it easier with some settings in your global .gitconfig file. For example, I have these settings: @pre|{[sendemail] from = Eli Barzilay bcc = eli@barzilay.org suppresscc = self}| and you can see more in the @man{git-config} and @man{git-send-email} man pages. The address to send the patches to is also configurable — you can use something like @pre{to = dev@at-racket} or @pre{to = someone@at-racket} depending on who you send your patches to — but this is better done as a repository-local configuration option (or just use the @cmd{--to} flag). @~ You can add a @cmd{-s} flag to the command, to make git add @cmd{Signed-off-by} lines to commit messages. (See above for what this means.) @~ If you want to send the files in some other way (eg, send them all packaged in an archive as attachment[*]), then just use @cmd{format-patch} instead of @cmd{send-email} — git will create a number of patch files in your current directory, which will be named @path{NNNN-text.patch} where the text is made out of the subject lines of the commit messages (the first line). You can even run @pre{git format-patch origin/master --stdout > my-patch} to concatenate them all and send the resulting file over. @~ @small{[*] Note that doing this means that it is not as easy to read your patch, so avoid doing this if you want to make it easier to read and accept it. On the other hand, if you're working with someone specific, they might prefer attachments (for example, it's easier to save the attached file from gmail).}} @~ Once the commits have been pushed to the main repository, you would get them when you pull to update. The commits will now be different objects than the ones you have — since the information changed (at least the committer information will be different, the log message might have been edited, etc). If you made your commits on your master branch which is set to track the @cmd{plt} master branch (the usual setup), then make sure that you run @cmd{git pull --rebase} to update — this will identify the commits as already included and will not include them in the rebased master. But if you made your commits in a private branch, and assuming that you didn't do any additional work there, then you can now just delete that branch. (If you did do more work there, then you should rebase it, to avoid resending the same patches again.)} @subsection{Patch-based workflow@br — instructions for the patch receiver side} @p*{ Accepting patches that were sent via email (on any other way), is also simple. The command to do this is @cmd{git am}, which expects an argument that is a mailbox file holding the patch emails, or you could run it and pipe a patch email into it.} @h4{Executive summary:} @ol*{@~ @npre{$ git checkout master} @~ Save (unmodified) patch emails into a mail folder file. @~ @npre{$ git am -3 @i{the-mail-folder}} @~ Push the changes up to the server} @h4{Longer version:} @ol*{ @~ While you will not be author of the commits, you will be their committed, so you should of course be aware of the changes, and be willing to maintain the new code and other work that is implied. So the first step that you should do is review the patch and make sure that you are willing to accept responsibility for it. @~ Save the patch emails to a mail folder (usually a file). You must take care to save the emails @em{as is}, including the date, author, and subject headers, and avoiding text that could have been butchered by your email client. For example, if you're using gmail, then use the “show original” option to view the raw email text, and save that to a file (even in this format gmail will have a first line with a bunch of spaces — it's best to remove that). Otherwise, gmail does things like wrap lines, replaces spaces by non-breaking spaces, or remove spaces. Alternatively, extract patch files from an archive if that's what you received, or save a single attachment file etc. @~ In your repository clone, make sure that you're on the branch that you want to integrate the changes into. You could do this in your master branch, or in a new topic branch (especially if there is more than one patch). @~ Run @cmd{git am -3 mail-folder} (@cmd{am} stands for “apply-mail”) with the mail folder that you've created above. It will apply the patches and commit them one by one. Like @cmd{git rebase}, if there are conflicts the process will stop so you can resolve it — and then run the @cmd{am} command with @cmd{--continue}, or @cmd{--skip} this commit and continue with the rest, or @cmd{--abort} to go back to the start. The @cmd{-3} flags tells git that if a conflicting patch comes from the above @cmd{format-patch}, and it specifies files that we have, then try a 3-way merge — this will make things generally better (and it can identify more patches that were applied, instead of showing them as conflicts). @br You can also use an @cmd{-i} flag to the command to get an interactive version — for each commit it will ask you what to do with it, and let you edit the log message. @~ Finally push the commits as usual.} @subsection{Making a private repository publicly available} @p*{ If you're working with “outside people” (people with no accounts on the PLT server, and no direct file-system access etc) on a private repository, you will need to find some way to make your repository available for cloning. An easy way to do so is to put it on a filesystem that those people can access — eg, if you're all in the same department. Another easy way to make a repository available is to find a hosting service like github and others — there are many options here, some are free but limited, and some cost money; if you prefer this easy solution, keep in mind that you can pay for the duration of the collaboration and at the end you can simply keep your repository clone to yourself (eg, if you're working on a paper then there's no need to pay once all work is done). @~ But if you want to do it yourself, the quickest and most convenient way to make a repository public is to put it in a directory that is available on the web. Such repositories can be cloned directly from the URL the repository is available at — there's no need to setup a server in a special way, and no need to run cgi scripts.} @h4{Executive summary:} @ol*{@~ @npre{$ git clone --bare @i{your-repo} ~/public_html/@i{repo}.git} @~ @npre{$ cd ~/public_html/@i{repo}.git/hooks; mv post-update.sample post-update; chmod +x post-update} @~ @npre{$ git remote add public ~/public_html/@i{repo}.git} @~ Tell people to clone from @cmd{http://some.where/~you/@i{repo}.git} @~ Work, apply email patches, and: @cmd{git push public}} @h4{Longer version:} @ol*{ @~ Make a “bare” repository — this is a repository that has no working directory: @pre{git clone --bare @i{your-repo} @i{repo}.git} This will create a @path{@i{repo}.git} directory holding the bare repository. You should use some path in a directory where you have web pages published. @~ The URL where the directory is found at is what other people should use when cloning. @~ You can now push to this repository, and other people will see it too. To make things easier, you can set a remote name for this repository, so it's easy for you to push changes to it. @pre{git remote add public ~/public_html/@i{repo}.git} And now you can use @cmd{git push public}. (You can also pull from it, but since you're going to be the only one who pushes into it, that will not be necessary.) @~ One thing to be aware of is that while a repository can be published through HTTP this way, git considers that a “dumb protocol” (because there is no proper interaction between the two sides). To still make cloning possible, you will need to maintain some meta-files that hold entry points to the objects in your repository — to get this, run: @pre{git update-server-info} You need to run this after each update to the repository — and to automate this you can have a hook do it for you. In the bare repository you will find a @path{hooks} directory with a file called @path{post-update.sample} — simply rename this file to @path{post-update}, and make it executable with @cmd{chmod +x post-update}. From now on every push to this repository will run the hook and keep the meta files updated.} @subsection{Pull-request workflow} @p*{ A possibly easier way for people to contribute work is to make their repositories available somehow. In the case of a private repository, the two sides can be in a shared file system, with read permissions for each other; or achieved as described in the previous subsection. In the case of contributing to the @cmd{plt} repository, the contributor can maintain a public fork of the @cmd{plt} repository (eg, by forking the @cmd{plt} github mirror at @selflink{https://github.com/plt/racket} directly on github). @~ In this workflow there is no need to mail patches — instead, the receiver simply pulls them directly from the sender's repository. For example, someone tells you that they have some new commits in a @cmd{foo} branch of their repository. Since this is a repository that you can access, and since it shares history with yours, you can just pull that branch in, for example: @pre{git checkout -b someones-work git pull @i{someones-repository-url}} Note that the @cmd{pull} will merge the changes, creating a merge commit if your @cmd{master} branch cannot be fast-forwarded. To avoid this, you can use @cmd{fetch} instead: @pre{git checkout -b someones-work git fetch @i{someones-repository-url}} Either way, this fetches the remote repository's HEAD. You can create the branch in a single fetch command by specifying the remote branch name, and the local branch to fetch into, for example: @pre{git fetch @i{someones-repository-url} master:someone} @~ If you expect to do this often (eg, you're going to suggest fixes for the work and get new work in), then you can add a @cmd{someone} remote to be used more conveniently: @pre{git remote add someone @i{someones-repository-url} git fetch someone git checkout -b some-branch someone/some-branch} possibly using -t to make the branch track the remote one: @pre{git checkout -tb some-branch someone/some-branch} Note that there is no need to create a branch before the @cmd{fetch}, since it will be fetched to a @cmd{remotes/someone/master} branch. @~ Once you pulled in the branch, you can inspect the changes, merge them, rebase them, etc. The important point here is that you have a copy of the contributed line of work, which you can use with the usual git toolset. @~ When/if you're happy with the changes, you can simply integrate them to your master branch, and if this is in a clone of the @cmd{plt} repository, then at this point you can simply push these commits to the main server. Once that happens, the contributor can update their own clone, and continue working as usual. @~ Git has a tool that makes this mode of work a little more organized and robust for the contributor: @cmd{git request-pull}. This simple command (surprisingly, it has no flags) expects a commit that marks the start of the new work (actually, the last one before it, eg, @cmd{origin/master}), and the url of the repository. For example: @pre{git request-pull origin git://github.com/someone/somefork.git} @~ Of course, the contributor doesn't have to work directly in the available repository — in the case of github or with an over-the-web setup like the one described in the previous subsection the public repository is a bare one, and no work can be done directly on it. So what actually happens is: the contributor works on his/her own repository, pushes changes to the public one, and then requests a pull. @~ The @cmd{request-pull} command will therefore check that the new commits are indeed available at that location, and find out the branch that they're on (in case it's different than the branch that someone is working on). It then prints out a “pull request” text with a description of the changes, the url that was specified, the branch name with the new work, and a summary of the files that were changed. In short, all the relevant information is there, and it even verified that the commits are indeed available — merging them in is now easy. @~ (As a sidenote, you can use @cmd{.} as the url: @cmd{git request-pull origin .}, and get a condensed summary of your changes.)} @subsection{Pull-request workflow@br — recipe for the sender side} @ol*{@~ Clone the @cmd{plt} repository and work with it as usual, commit your work @~ Make your repository publicly available @~ @npre{$ git request-pull origin @i{your-repository-url}} @~ Send the resulting text to @cmd{dev@at-racket} @~ You're done — thanks!} @p{Alternatively, you can fork the @cmd{plt} repository on github: @cmd{https://github.com/plt/racket}, commit, then do a pull request. Note: it is better to send a note about your pull request to @cmd{dev@at-racket}, or you can do the pull request directly with git as listed above (using github to have a public repository).} @subsection{Pull-request workflow@br — recipe for the receiver side} @p{This recipe is for getting some remote work in as a one-time job. If you need to cooperate more closely with someone, you will want to add the remote repository with @cmd{git remote} as shown above.} @ol*{ @~ Get a @cmd{plt} clone, or use your own (it's safe to do the latter, no need for a new clone unless you're paranoid): @pre{git clone pltgit:plt cd plt} @~ Get the foreign repository's master branch (or any other branch) into a local branch: @pre{git fetch @i{remote-repository-url} master:foo} This pulls the @cmd{master} branch of the remote repository into a local @cmd{foo} branch (you can use other names, of course). @~ Inspect the changes as usual @pre{git log master..foo # new commits git diff master...foo # changes git log -p master..foo # both} (See above for more details on these.) @~ If you're happy with the change and want to get it as-is, you can simply @cmd{merge} the branch: @pre{git merge foo} But unless the remote work was done from the point your @cmd{master} points at (i.e., there were no new commits), this will generate a merge commit that might not be desired. To avoid it, you can rebase the branch against your @cmd{master} and then do the @cmd{merge} (which will now be a fast-forward) merge: @pre{git checkout foo git rebase master git checkout master git merge foo} @~ You no longer need the @cmd{foo} branch, so delete it with: @pre{git branch -d foo} @~ Push things back as usual} @subsection{Merging github pull-requests} @p*{ Github is popular enough that some people prefer to work with a github fork of the PLT repository, and then send a pull request. Merging these pull requests can be done as with any other repository, as explained in the previous section. However, with github there is an easy way to deal with it. @~ A pull request has a URL like @cmd{https://github.com/plt/racket/pull/123} which you can use in your browser to inspect the changes. To apply the changes locally, a convenient feature is that you can add a @cmd{.patch} suffix to every pull request URL which will have a text version of the patch. This means that applying the patch is particularly easy on the command line, for example: @pre{curl https://github.com/plt/racket/pull/123.patch | git am} will fetch the patch text and apply it (and you can now push as usual, or locally inspect the ptach and possibly edit it in the usual ways).} @section{Additional Resources} @dl*{ @~ @strong{Quick and short:} @~ @dl*{ @~ @selflink{http://eagain.net/articles/git-for-computer-scientists/} @~ Basic description of what makes a git repository @~ Cheat sheets: @~ @dl*{ @~ @selflink{http://gitref.org/} @~ Quick reference thing, with links to the git man pages and the progit book @~ @selflink{http://jonas.nitro.dk/git/quick-reference.html} @~ Really short @~ @selflink{http://cheat.errtheblog.com/s/git} @~ Explains some more @~ @selflink{http://ktown.kde.org/~zrusin/git/git-cheat-sheet.svg} @~ Short, intended for printing @~ @selflink{http://help.github.com/git-cheat-sheets/} @~ Similar} @~ @selflink{http://git.or.cz/course/svn.html} @~ subversion->git crash course @~ @selflink{http://www.kernel.org/pub/software/scm/git/docs/everyday.html} @~ Nice summary of a few things, but too verbose or too advanced in some places, and also a little outdated.} @~ @strong{Books:} @~ @dl*{ @~ @selflink{http://book.git-scm.com/} @~ The git community book. Also, there are a bunch of videos linked, and some tutorial links in the “Welcome” part. @~ @selflink{http://progit.org/book/} @~ A frequently recommended book. (Also some good blog entries.) @~ @selflink{http://www-cs-students.stanford.edu/~blynn/gitmagic/} @~ Another good book (a bit more verbose than the previous one)} @~ @strong{Misc:} @~ @dl*{ @~ @selflink{http://www.kernel.org/pub/software/scm/git/docs/@; gittutorial.html} @~ The git tutorial, also available as the @man{gittutorial} man page. @~ @selflink{http://help.github.com/} @~ Some github guides, well-organized by levels. @~ @selflink{http://www.gitready.com/} @~ A kind of a collection of small tips; didn't change in a while though. @~ @selflink{http://marklodato.github.com/visual-git-guide/} @~ This is a short visual document about git. But it goes a little fast, so it would be useful after you're comfortable with the basics.}} }}))