git-tutorial/index.html
2021-03-31 15:48:06 +01:00

704 lines
24 KiB
HTML

<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title></title>
<style>
body { width: 63rem; font-size: 1.2rem; text-align:justify; }
textarea { display:block; width: 63rem; height: 18rem; font-size: 1.2rem; }
input { display:block; font-size: 1.2rem; }
table, td { border:thin solid black; border-collapse: collapse; font-size: 1.2rem; }
.specialchar { color: red; }
.hex-prefix { color: lightgrey; }
.hex { color: brown; }
.hex-hash { border: thin solid brown; display: block; width: max-content; }
.hex-hash.hilite-src { background: lightyellow; border-color: red; }
.object-hash.hilite-dest { background: lightyellow; border-color: red; }
.object-hash { border: thin solid transparent; }
.space { text-decoration: underline; color: brown; opacity: 0.5; }
.deflated { color: red; }
.directory { color: darkcyan; }
.error { color: orangered; }
</style>
</head>
<body>
<a href="https://git-scm.com/book/en/v2/Git-Internals-Git-Objects">git-book</a>
<script src="sha1.js"></script>
<script src="pako.min.js"></script>
<script>
function ___stringToUint8Array(s) {
var s = ""+s;
var a = [];
for (var i = 0; i < s.length; i++) {
a.push(s.charCodeAt(i));
}
return new Uint8Array(a);
}
function ___uint8ArrayToString(a) {
var s = [];
for (var i = 0; i < a.length; i++) {
s.push(String.fromCharCode(a[i]));
}
return s.join('');
}
sha1 = Sha1.hash;
deflate = function(s) { return ___uint8ArrayToString(pako.deflate(___stringToUint8Array(s))); }
inflate = function(s) { return ___uint8ArrayToString(pako.deflate(___stringToUint8Array(s))); }
var global_element_id = 0
function ___specialchars(str) {
return String(str)
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
}
function ___left_pad(s, char, len) {
var s = ""+s;
while (s.length < len) { s = char + s; }
return s;
}
function ___to_hex_for_printf(s) {
var s = String(s);
var hex = ""
for (var i = 0; i < s.length; i++) {
var h = ___left_pad(s.charCodeAt(i).toString(16), '0', 2);
hex += '<span class="hex-prefix">\\x<span class="hex">' + h + '</span></span>';
}
return '<span style="display: block;">' + hex + '</span>';
}
function ___to_hex(s) {
var s = String(s);
var hex = ""
for (var i = 0; i < s.length; i++) {
hex += ___left_pad(s.charCodeAt(i).toString(16), '0', 2);
}
return hex;
}
function ___specialchars_and_colour(s) {
return ___specialchars(s)
.replace(/[^-a-zA-Z0-9+_/!%$@.()]/g, function (c) {
switch (c) {
case " ": return '<span class="space">&nbsp;</span>'; break;
case "\0": return '<span class="specialchar">\\000</span>'; break;
case "\r": return '<span class="specialchar">\\r</span>'; break;
case "\n": return '<span class="specialchar">\\n</span>'; break;
case "\t": return '<span class="specialchar">\\t</span>'; break;
default: return '<span class="specialchar">\\x'+___left_pad(c.charCodeAt(0).toString(16), 0, 2)+'</span>'; break;
}
});
}
function getOffset(elt) {
if (elt) {
var o = getOffset(elt.offsetParent);
return { left: elt.offsetLeft + o.left, top: elt.offsetTop + o.top };
} else {
return { left: 0, top: 0 };
}
}
function ___hilite(src, dest) {
var src = document.getElementById(src);
src.classList.add('hilite-src');
var dests = document.getElementsByClassName(dest);
var lines = document.getElementById('lines');
lines.innerHTML = '';
for (var d = 0; d < dests.length; d++) {
dests[d].classList.add('hilite-dest');
var osrc = getOffset(src);
var tr = dests[d];
while (tr !== null && tr.tagName.toLowerCase() != 'tr') { tr = tr.parentElement; }
var otr = getOffset(tr);
var l1 = document.createElement('div');
lines.appendChild(l1);
l1.style.position = 'absolute';
var l2 = document.createElement('div');
lines.appendChild(l2);
l2.style.position = 'absolute';
var l3 = document.createElement('div');
lines.appendChild(l3);
l3.style.position = 'absolute';
var op = getOffset(l1.offsetParent);
var xa = osrc.left - op.left + src.offsetWidth;
var ya = osrc.top - op.top + src.offsetHeight / 2;
var xb = otr.left - op.left + tr.offsetWidth;
var yb = otr.top - op.top + tr.offsetHeight / 2;
var x = Math.max(xa, xb) + (50 * i);
if (ya > yb) {
var tmpx = xa;
var tmpy = ya;
xa = xb;
ya = yb;
xb = tmpx;
yb = tmpy;
}
var p1 = { left: xa, top: ya };
var p2 = { left: x, top: ya };
var p3 = { left: x, top: yb };
var p4 = { left: xb, top: yb };
// line 1
l1.style.width = p2.left-p1.left;
console.log(l1.style.width);
l1.style.height = '1px';
l1.style.backgroundColor = 'red';
l1.style.top = p1.top;
l1.style.left = p1.left;
// line 2
l2.style.width = '1px';
l2.style.height = p3.top-p2.top;
l2.style.backgroundColor = 'red';
l2.style.top = p2.top;
l2.style.left = p2.left;
// line 3
l3.style.width = p3.left-p4.left;
l3.style.height = '1px';
l3.style.backgroundColor = 'red';
l3.style.top = p4.top;
l3.style.left = p4.left;
}
}
function ___lolite(src, dest) {
var src = document.getElementById(src);
src.classList.remove('hilite-src');
var dests = document.getElementsByClassName(dest);
for (var d = 0; d < dests.length; d++) {
dests[d].classList.remove('hilite-dest');
}
}
function ___specialchars_and_colour_and_hex(s) {
if (s.substr(0,5) == "tree ") {
sp = s.split('\0');
sp[0] = ___specialchars_and_colour(sp[0]);
sp[1] = ___specialchars_and_colour(sp[1]);
for (i = 2; i < sp.length; i++) {
var id=global_element_id++;
var hash = "object-hash-"+___to_hex(sp[i].substr(0,20));
sp[i] = '<span id="'+id+'" class="hex-hash" onmouseover="___hilite('+id+',\''+hash+'\')" onmouseout="___lolite('+id+',\''+hash+'\')">'
+ ___to_hex_for_printf(sp[i].substr(0,10))
+ ___to_hex_for_printf(sp[i].substr(10,10))
+ '</span>'
+ ___specialchars_and_colour(sp[i].substr(20));
}
return sp.join('<span class="specialchar">\\000</span>');
} else {
return ___specialchars_and_colour(s);
}
}
function ___specialchars_and_colour_and_hex_and_zlib(s) {
try {
var inflated = pako.inflate(___stringToUint8Array(s));
} catch(e) {
var inflated = false;
}
if (inflated) {
var id=global_element_id++;
return '<span onClick="___deflated_click('+id+')">'
+ '<span id="deflated'+id+'-pretty">'
+ '<span class="deflated">deflated:</span>'
+ ___specialchars_and_colour_and_hex(___uint8ArrayToString(inflated))
+ '</span>'
+ '<span id="deflated'+id+'-raw" style="display:none">'
+ ___specialchars_and_colour_and_hex(s)
+ '</span>'
+ '</span>';
} else {
return ___specialchars_and_colour_and_hex(s);
}
}
function ___bytestring_to_printf(bs, trailing_x) {
return 'printf ' + bs.replace(/[^a-zA-Z0-9_]/g, function(c) {
return '\\\\x' + ___left_pad(c.charCodeAt(0).toString(16), 0, 2);
}) + (trailing_x ? 'x' : '');
}
function ___filesystem_to_printf(fs) {
var entries = Object.entries(fs)
.map(function (x) {
if (x[1] === null) {
return 'd="$('+___bytestring_to_printf(x[0], true)+')"; mkdir "${d%x}";';
} else {
return 'f="$('+___bytestring_to_printf(x[0], true)+')"; '+___bytestring_to_printf(x[1], false)+' > "${f%x}";';
}
})
// directories start with 'd' which sorts before 'f'
.sort((a,b) => a[0] < b[0] ? -1 : (a[0] > b[0] ? 1 : 0));
return entries.join(' ');
}
function ___deflated_click(id) {
if (document.getElementById('deflated'+id+'-pretty').style.display != "none") {
document.getElementById('deflated'+id+'-pretty').style.display = "none";
document.getElementById('deflated'+id+'-raw').style.display = "inherit";
} else {
document.getElementById('deflated'+id+'-pretty').style.display = "inherit";
document.getElementById('deflated'+id+'-raw').style.display = "none";
}
}
function ___format_filepath(x) {
var sp = x.split('/');
if (sp.length > 3 && sp[sp.length-3] == 'objects' && /^[0-9a-f]{2}$/.test(sp[sp.length-2]) && /^[0-9a-f]{38}$/.test(sp[sp.length-1])) {
return ___specialchars_and_colour(sp.slice(0, sp.length-2).join('/')+(sp.length > 0 ? '/' : ''))
+ '<span class="object-hash object-hash-'+sp.slice(sp.length-2).join('')+'">'
+ ___specialchars_and_colour(sp.slice(sp.length-2).join('/'))
+ "</span>";
} else {
return ___specialchars_and_colour(x);
}
}
function ___format_entry(x) {
return "<tr><td><code>"
+ ___format_filepath(x[0])
+ "</code></td><td>"
+ (x[1] === null
? '<span class="directory">Directory</span>'
: ("<code>" + ___specialchars_and_colour_and_hex_and_zlib(x[1]) + "</code>"))
+ "</td></tr>";
}
function ___filesystem_to_string(fs) {
var entries = Object.entries(fs)
.sort((a,b) => a[0] < b[0] ? -1 : (a[0] > b[0] ? 1 : 0))
.map(___format_entry);
var id = global_element_id++;
return "Filesystem contents: " + entries.length + " files and directories. "
+ '<a href="javascript: ___copyprintf_click(\'elem-'+id+'\');">'
+ "Copy commands to recreate in *nix terminal"
+ "</a>."
+ "<br />"
+ '<textarea id="elem-'+id+'" disabled="disabled" style="display:none">'
+ ___specialchars(___filesystem_to_printf(fs))
+ '</textarea>'
+ "<table>" + entries.join('') + "</table>";
}
function ___copyprintf_click(id) {
var elem = document.getElementById(id);
if (elem.style.display != "none") {
elem.style.display = "none";
} else {
elem.style.display = "inherit";
elem.focus();
elem.disabled = false;
elem.select();
elem.setSelectionRange(0, elem.value.length * 10); // for mobile devices?
document.execCommand('copy');
elem.disabled = true;
}
}
var global_filesystem=false;
function ___git_eval(current) {
var script = '';
for (i = 0; i <= current; i++) {
script += document.getElementById('in' + i).value;
}
script += "document.getElementById('out' + current).innerHTML = ___filesystem_to_string(filesystem); filesystem;";
try {
global_filesystem = eval(script);
} catch (e) {
var error = ___specialchars("" + e + "\n\n" + e.stack);
document.getElementById('out' + current).innerHTML = '<pre class="error">' + error + '</pre>';
}
}
</script>
<div id="lines"></div>
<h2>Introduction</h2>
<p>
GIT is based on a simple model, with a lot of shorthands for common
use cases. This model is sometimes hard to guess just from the
everyday commands. To illustrate how GIT works, we'll implement a
stripped down clone of GIT in a few lines of JavaScript.
</p>
<h2>The Operating System's filesystem</h2>
<p>We will simulate the Operating System's filesystem with a very
simple key-value store. In this very simple filesystem, directories
are entries mapped to <code>null</code> and files are entries mapped
to strings.</p>
<textarea id="in0">
var filesystem = {};
</textarea>
<input type="button" value="eval" onClick="___git_eval(0)">
<div id="out0"></div>
<p>The filesystem exposes functions to read an entire file, create or
replace an entire file, and create a directory.</p>
<textarea id="in1">
function read(filename) {
return filesystem[filename];
}
function write(filename, data) {
return filesystem[filename] = ""+data;
}
function mkdir(dirname) {
return filesystem[dirname] = null;
}
</textarea>
<input type="button" value="eval" onClick="___git_eval(1)">
<div id="out1"></div>
<p>It will be handy for some operations to list the contents of a
directory.</p>
<textarea id="in2">
function listdir(dirname) {
var depth = dirname.split('/').length + 1;
var descendents = filesystem
.filter(filename => filename.startsWith(dirname + '/'));
var children = descendents
.map(filename => filename.split('/')[depth]);
// remove duplicates:
return Array.from(new Set(children));
}
</textarea>
<input type="button" value="eval" onClick="___git_eval(2)">
<div id="out2"></div>
<h2>Example working directory</h2>
<p>Our imaginary user will create a <code>proj</code> directory,
and start filling in some files.</p>
<textarea id="in3">
var workdir='proj';
mkdir('proj');
write('proj/README', 'This is my Scheme project.\n');
mkdir('proj/src');
write('proj/src/main.scm', '(map (lambda (x) (+ x 1)) (list 1 2 3))\n');
</textarea>
<input type="button" value="eval" onClick="___git_eval(3)">
<div id="out3"></div>
<h2><code>git init</code> (creating <code>.git</code>)</h2>
<p>The first thing to do is to initialize the GIT directory.
For now, only the <code>.git</code> folder is needed, The rest
of the function implementing <code>git init</code> will be
implemented later.</p>
<textarea id="in4">
function join_paths(a, b) {
return (a == "") ? b : (a + "/" + b);
}
function git_init_mkdir() {
mkdir(join_paths(workdir, '.git'));
}
git_init_mkdir();
</textarea>
<input type="button" value="eval" onClick="___git_eval(4)">
<div id="out4"></div>
<h2><code>git hash-object</code> (storing a copy of a file in <code>.git</code>)</h2>
<p>The most basic element of a GIT repository is an object. It is a
copy of a file that is stored in GIT&apos;s database. That copy is
stored under a unique name. The unique name is obtained by hashing the
contents of the file. <!-- or have a hash oracle that always returns a
new number. --></p>
<textarea id="in5">
function hash_object(must_write, type, is_data, path_or_data) {
var data = is_data ? path_or_data : read(workdir + "/" + path_or_data);
object_contents = type + ' ' + data.length + '\0' + data;
var hash = sha1(object_contents)
if (must_write) {
mkdir(workdir + '/.git/objects');
mkdir(workdir + '/.git/objects/' + hash.slice(0,2));
var object_path = workdir + '/.git/objects/' + hash.slice(0,2) + '/' + hash.slice(2);
write(object_path, deflate(object_contents));
}
return hash;
}
</textarea>
<input type="button" value="eval" onClick="___git_eval(5)">
<div id="out5"></div>
<p>So far, our GIT database does not know about any of the user&apos;s
files. In order to add the contents of the <code>README</code> file in
the database, we use <code>git hash-object -w -t blob README</code>,
where <code>-w</code> tells GIT to <em>write</em> the object in its
database, and <code>-t blob</code> indicates that we want to create
a <em>blob</em> object, i.e. the contents of a file.</p>
<textarea id="in6">
// git hash-object -w -t blob README
hash_object(true, 'blob', false, 'README');
</textarea>
<input type="button" value="eval" onClick="___git_eval(6)">
<p>The objects stored in the GIT database are compressed with zlib
(using the "deflate" compression method). The filesystem view shows
the <span class="deflated">deflated:</span> followed by the uncompressed
data. Click on the file contents to toggle between this pretty-printed
view and the raw compressed data.
</p>
<div id="out6"></div>
<p>You will notice that the database does not contain the name of the
file, only its contents, stored under a unique identifier which is
derived by hashing its contents. Let&apos;s add the second user file
to the database.</p>
<textarea id="in7">
// git hash-object -w -t blob src/main.scm
hash_object(true, 'blob', false, 'src/main.scm');
</textarea>
<input type="button" value="eval" onClick="___git_eval(7)">
<div id="out7"></div>
<h2><code>zlib</code> compression</h2>
<p>The real implementation of GIT compresses objects with zlib. To
view a zlib-compressed object in your terminal, simply write this
declaration in your shell, and then call e.g. <code>unzlib
.git/objects/95/d318ae78cee607a77c453ead4db344fc1221b7</code></p>
<pre>
unzlib() {
python -c \
"import sys,zlib; \
sys.stdout.buffer.write(zlib.decompress(open(sys.argv[1], 'rb').read()));" \
"$1"
}
</pre>
<h2>Storing trees (list of hashed files and subtrees)</h2>
<p>Now GIT knows about the contents of both of the user's
files, but it would be nice to also store the filenames.
This is done by creating a <em>tree</em> object</p>
<p>A tree object can contain files (by associating the file's blob to its name), or directories (by associating the hash of other subtrees to their name).
The mode (<code>100644</code> for the file and <code>40000</code>) incidates the permissions, and is given in octal using <a href="https://unix.stackexchange.com/a/145118/19059">the values used by *nix</a></p>
<textarea id="in8">
// base_directory is a string
// filenames is a list of strings
// subtrees is a list of {name, hash} objects.
function store_tree(base_directory, filenames, subtrees) {
var get_file_hash = filename =>
from_hex(hash_object(true, 'blob', false, join_paths(base_directory, filename)));
var blobs = filenames.map(filename =>
"100644 " + filename + "\0" + get_file_hash(filename));
var trees = subtrees.map(subtree =>
"40000 " + subtree.name + "\0" + from_hex(subtree.hash));
tree_contents = blobs.join('') + trees.join('');
// cat tree_contents | git hash-object -w -t tree --stdin
return hash_object(true, 'tree', true, tree_contents);
}
</textarea>
<input type="button" value="eval" onClick="___git_eval(8)">
<div id="out8"></div>
This function needs a small utility to convert hashes encoded in hexadecimal to a binary form.
<textarea id="in9">
function from_hex(hex) {
var hex = String(hex);
var str = ""
for (var i = 0; i < hex.length; i+=2) {
str += String.fromCharCode(parseInt(hex.substr(i, 2), 16));
}
return str;
}
</textarea>
<input type="button" value="eval" onClick="___git_eval(9)">
<div id="out9"></div>
<textarea id="in10">
//hash_src_tree = store_tree("src/", ["main.scm"], []);
// store_tree("", ["README"], [{name:"src", hash:hash_src_tree}]);
</textarea>
<input type="button" value="eval" onClick="___git_eval(10)">
<div id="out10"></div>
Making trees out of the subfolders one by one is cumbersome. Here's a utility function which takes a list of paths, and builds a tree from those.
<textarea id="in11">
function store_tree_from_paths(paths) {
var hierarchy = { subfolders: {}, files: [] };
for (var i = 0; i < paths.length; i++) {
var path_components = paths[i].split('/');
var h = hierarchy;
for (var j = 0; j < path_components.length - 1; j++) {
if (! h.subfolders.hasOwnProperty(path_components[j])) {
h.subfolders[path_components[j]] = { subfolders: {}, files: [] };
}
h = h.subfolders[path_components[j]];
}
h.files.push(path_components[i]);
}
var to_tree = function(base_directory, hierarchy) {
var subtrees = [];
for (var i in hierarchy.subfolders) {
if (hierarchy.subfolders.hasOwnProperty(i)) {
subtrees.push({ name: i, hash: to_tree(join_paths(base_directory, i), hierarchy.subfolders[i]) });
}
}
return store_tree(base_directory, hierarchy.files, subtrees);
}
return to_tree("", hierarchy);
}
// git add README src/main.scm
store_tree_from_paths(["README", "src/main.scm"]);
</textarea>
<input type="button" value="eval" id="initial-focus" onClick="___git_eval(11)">
<div id="out11"></div>
<p>Now that the GIT database contains the entire tree for the current version,
a commit can be created. A commit contains
<ul>
<li>a pointer to the tree</li>
<li>a pointer to the previous ("parent") commit (or to multiple parent commits merging them)</li>
<li>information about the author (the person who initially wrote the code)</li>
<li>information about the committer (the person who adds the code to the GIT
database, often the same person as the author, but it can be a different person
e.g. when someone else makes changes to the history or applies a patch recieved
by e-mail)</li>
<li>a description</li>
</ul>
</p>
<textarea id="in12">
function store_commit(tree, parents, author, committer, message) {
var commit_contents = '';
commit_contents += 'tree ' + tree + '\n';
for (var i = 0; i < parents.length; i++) {
commit_contents += 'parent ' + parents[i] + '\n';
}
commit_contents += 'author ' + author.name
+ ' <' + author.email + '> '
+ format_date(author.date) + ' '
+ format_timezone(author.timezoneMinutes) + '\n';
commit_contents += 'committer ' + committer.name
+ ' <' + committer.email + '> '
+ format_date(committer.date) + ' '
+ format_timezone(committer.timezoneMinutes) + '\n';
commit_contents += '\n';
commit_contents += '' + message + (message[message.length-1] == '\n' ? '' : '\n');
// cat commit_contents | git hash-object -w -t commit --stdin
return hash_object(true, 'commit', true, commit_contents);
}
function format_date(d) {
return Math.floor((+d) / 1000);
}
function left_pad(s, char, len) {
while ((''+s).length < len) { s = '' + char + s; }
return s;
}
function format_timezone(tm) {
var h = Math.floor(Math.abs(+tm)/60);
var m = Math.abs(+tm)%60;
return (tm >= 0 ? '+' : '-') + left_pad(h, '0', 2) + left_pad(m, '0', 2);
}
</textarea>
<input type="button" value="eval" onClick="___git_eval(12)">
<div id="out12"></div>
<p>It is now possible to store a commit in the database. This saves
a copy of the tree along with some metadata about this version.
The first commit has no parent, which is represented by passing
the empty list.</p>
<textarea id="in13">
store_commit(
store_tree_from_paths(["README", "src/main.scm"]),
[],
{name:'Example User', email:'user@example.com', date:new Date(1617120803000), timezoneMinutes: +60},
{name:'Example User', email:'user@example.com', date:new Date(1617120803000), timezoneMinutes: +60},
'Initial commit');
</textarea>
<input type="button" value="eval" onClick="___git_eval(13)">
<div id="out13"></div>
<h2>Branches</h2>
<textarea id="in14">
</textarea>
<input type="button" value="eval" onClick="___git_eval(14)">
<div id="out14"></div>
<h2><code>HEAD</code></h2>
<p>
write here
</p>
<textarea id="in15">
write(join_paths(workdir, '.git/HEAD'), 'ref: refs/heads/main');
</textarea>
<input type="button" value="eval" onClick="___git_eval(15)">
<div id="out15"></div>
<h2>Tags</h2>
<textarea id="in16">
gitconfig = {
user: {
name: 'Example User',
email: 'user@example.com',
}
}
</textarea>
<input type="button" value="eval" onClick="___git_eval(16)">
<div id="out16"></div>
<h2><code>git commit</code></h2>
<p></p>
<textarea id="in17">
gitconfig = {
user: {
name: 'Example User',
email: 'user@example.com',
}
}
function git_commit(file_paths) {
var now = Date.now();
var timezoneMinutes = -(now.getTimezoneOffset());
store_commit(
store_tree_from_paths(file_paths),
[parse_head(join_paths(workdir, '.git/HEAD'))],
{name:gitconfig.user.name, email:gitconfig.user.email, date:now, timezoneMinutes:timezoneMinutes },
{name:gitconfig.user.name, email:gitconfig.user.email, date:now, timezoneMinutes:timezoneMinutes },
'Initial commit');
}
</textarea>
<input type="button" value="eval" onClick="___git_eval(17)">
<div id="out17"></div>
END OF DOCUMENT
<script>
document.getElementById("initial-focus").focus();
</script>
<p></p>
<textarea id="in000">
</textarea>
<input type="button" value="eval" onClick="___git_eval(000)">
<div id="out000"></div>
<!--
/*
function partition(array, filter) {
var pass = [];
var fail = [];
for (i = 0; i < array.length; i++) {
if (filter(array[i])) {
pass.push(array[i]);
} else {
fail.push(array[i]);
}
}
return { "pass" : pass, "fail" : fail }
}
function filenames_to_hierarchy(filenames) {
for (var i = 0; i < filenames.length; i++) {
filenames[i];
}
}*/
-->
</body>
</html>