Removed the arcane regexp in parse_tree, added some prose
This commit is contained in:
parent
792c5bc78f
commit
f2b3940df6
|
@ -136,4 +136,5 @@ article#git-tutorial .onlytoc { display: none; }
|
||||||
|
|
||||||
#git-tutorial .exercise-task { border: thin solid #80c5c5; background: #f1faff; padding: 1em }
|
#git-tutorial .exercise-task { border: thin solid #80c5c5; background: #f1faff; padding: 1em }
|
||||||
#git-tutorial .exercise-reason { border: thin solid #80c5c5; background: #f8fdff; padding: 1em }
|
#git-tutorial .exercise-reason { border: thin solid #80c5c5; background: #f8fdff; padding: 1em }
|
||||||
#git-tutorial .exercise-reason:before { content: "Rationale "; margin-bottom: 0.7em; font-weight: bold; display: block; }
|
#git-tutorial .exercise-reason:before { content: "Rationale "; margin-bottom: 0.7em; font-weight: bold; display: block; }
|
||||||
|
#git-tutorial .log-alert { color: red; font-weight: 500; }
|
|
@ -590,11 +590,17 @@ function ___copyprintf_click(id) {
|
||||||
}
|
}
|
||||||
var ___script_log_header = ''
|
var ___script_log_header = ''
|
||||||
+ 'var ___log = [];\n'
|
+ 'var ___log = [];\n'
|
||||||
|
+ 'var alert = (function (real_console, real_alert) {\n'
|
||||||
|
+ ' return function(message) {\n'
|
||||||
|
+ ' ___log[___log.length] = { alert: true, txt: message };\n'
|
||||||
|
+ ' real_console.log("alert:", message);\n'
|
||||||
|
+ ' };\n'
|
||||||
|
+ '})(window.console, window.alert);\n'
|
||||||
+ 'var console = (function(real_console) {\n'
|
+ 'var console = (function(real_console) {\n'
|
||||||
+ ' return {\n'
|
+ ' return {\n'
|
||||||
+ ' log: function() {\n'
|
+ ' log: function() {\n'
|
||||||
+ ' ___log[___log.length] = Array.from(arguments);\n'
|
+ ' ___log[___log.length] = { alert: false, txt: Array.from(arguments).map(function (x) { return x.toString(); }).join(", ") };\n'
|
||||||
+ ' real_console.log.apply(console, arguments);\n'
|
+ ' real_console.log.apply(real_console, arguments);\n'
|
||||||
+ ' },\n'
|
+ ' },\n'
|
||||||
+ ' assert: real_console.assert,\n'
|
+ ' assert: real_console.assert,\n'
|
||||||
+ ' };\n'
|
+ ' };\n'
|
||||||
|
@ -810,19 +816,30 @@ function ___filesystem_to_graphview(filesystem, previous_filesystem) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function ___eval_result_to_html(id, filesystem, previous_filesystem, log, quiet) {
|
function ___log_to_html(log) {
|
||||||
var loghtml = '<pre class="log">' + log.map(function(l) { return l.map(function (x) { return x.toString(); }).join(', '); }).join('\n') + '</pre>'
|
return '<pre class="log">'
|
||||||
|
+ log.map(function(l) {
|
||||||
|
return '<div class="' + (l.alert ? 'log-alert' : 'log-log') + '">'
|
||||||
|
+ ___specialchars(l.txt)
|
||||||
|
+ '</div>';
|
||||||
|
}).join('\n')
|
||||||
|
+ '</pre>'
|
||||||
|
}
|
||||||
|
|
||||||
|
function ___eval_result_to_html(id, filesystem, previous_filesystem, log, quiet, omit_graph) {
|
||||||
|
var loghtml = ___log_to_html(log);
|
||||||
var table = ___filesystem_to_string(filesystem, quiet, previous_filesystem);
|
var table = ___filesystem_to_string(filesystem, quiet, previous_filesystem);
|
||||||
var gv = ___filesystem_to_graphview(filesystem, previous_filesystem);
|
var gv = ___filesystem_to_graphview(filesystem, previous_filesystem);
|
||||||
var html = (log.length > 0 ? '<p>Console output:</p>' + loghtml : '')
|
var html = (log.length > 0 ? '<p>Console output:</p>' + loghtml : '')
|
||||||
+ gv.html
|
+ (omit_graph ? '' : gv.html)
|
||||||
+ table;
|
+ table;
|
||||||
document.getElementById(id).innerHTML = '<div class="hilite-wrapper">' + html + '</div>';
|
document.getElementById(id).innerHTML = '<div class="hilite-wrapper">' + html + '</div>';
|
||||||
gv.js();
|
if (!omit_graph) { gv.js(); }
|
||||||
}
|
}
|
||||||
function ___git_eval(current) {
|
function ___git_eval(current) {
|
||||||
document.getElementById('hide-eval-' + current).style.display = '';
|
document.getElementById('hide-eval-' + current).style.display = '';
|
||||||
var script = ___script_log_header;
|
var script = ___script_log_header;
|
||||||
|
script += 'try {';
|
||||||
for (i = 0; i <= current - 1; i++) {
|
for (i = 0; i <= current - 1; i++) {
|
||||||
script += ___textarea_value(___global_editors[i]);
|
script += ___textarea_value(___global_editors[i]);
|
||||||
}
|
}
|
||||||
|
@ -832,6 +849,13 @@ function ___git_eval(current) {
|
||||||
+ '___log = [];\n';
|
+ '___log = [];\n';
|
||||||
script += ___textarea_value(___global_editors[current]);
|
script += ___textarea_value(___global_editors[current]);
|
||||||
script += '\n'
|
script += '\n'
|
||||||
|
+ '} catch (e) {'
|
||||||
|
+ ' if (("" + e.message).indexOf("GIT: assertion failed: ") != 0) {'
|
||||||
|
+ ' throw e;'
|
||||||
|
+ ' } else {'
|
||||||
|
+ ' ___log.push({ alert: true, txt: "command failed" });'
|
||||||
|
+ ' }'
|
||||||
|
+ '}'
|
||||||
+ '"End of the script";\n'
|
+ '"End of the script";\n'
|
||||||
+ '\n'
|
+ '\n'
|
||||||
+ '\n'
|
+ '\n'
|
||||||
|
|
87
index.html
87
index.html
|
@ -17,7 +17,7 @@
|
||||||
<script src="git-tutorial.js"></script>
|
<script src="git-tutorial.js"></script>
|
||||||
<script class="example">
|
<script class="example">
|
||||||
var examples=[];
|
var examples=[];
|
||||||
function ___h2f(hash) { return 'proj/.git/objects/'+hash.substr(0,2)+'/'+hash.substr(2); }
|
function ___h2f(hash) { return 'proj/.git/objects/'+hash.substring(0,2)+'/'+hash.substring(2); }
|
||||||
function ___example(id, f) {
|
function ___example(id, f) {
|
||||||
examples.push(function () {
|
examples.push(function () {
|
||||||
var result = f();
|
var result = f();
|
||||||
|
@ -29,7 +29,7 @@ function ___example(id, f) {
|
||||||
for (var i = 0; i < result.previous_names.length; i++) {
|
for (var i = 0; i < result.previous_names.length; i++) {
|
||||||
previous_fs[result.previous_names[i]] = filesystem[result.previous_names[i]];
|
previous_fs[result.previous_names[i]] = filesystem[result.previous_names[i]];
|
||||||
}
|
}
|
||||||
___eval_result_to_html(id, fs, previous_fs, [], true);
|
___eval_result_to_html(id, fs, previous_fs, [], true, result.omit_graph);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
@ -251,8 +251,8 @@ function hash_object(must_write, type, is_data, path_or_data) {
|
||||||
|
|
||||||
if (must_write) {
|
if (must_write) {
|
||||||
mkdir(join_paths(current_directory, '.git/objects'));
|
mkdir(join_paths(current_directory, '.git/objects'));
|
||||||
mkdir(join_paths(current_directory, '.git/objects/' + hash.substr(0,2)));
|
mkdir(join_paths(current_directory, '.git/objects/' + hash.substring(0,2)));
|
||||||
var object_path = join_paths(current_directory, '.git/objects/' + hash.substr(0,2) + '/' + hash.substr(2));
|
var object_path = join_paths(current_directory, '.git/objects/' + hash.substring(0,2) + '/' + hash.substring(2));
|
||||||
// deflate() compresses using zlib
|
// deflate() compresses using zlib
|
||||||
write(object_path, deflate(object_contents));
|
write(object_path, deflate(object_contents));
|
||||||
}
|
}
|
||||||
|
@ -363,7 +363,7 @@ function hex_to_raw_bytes(hex) {
|
||||||
var hex = String(hex);
|
var hex = String(hex);
|
||||||
var str = ""
|
var str = ""
|
||||||
for (var i = 0; i < hex.length; i+=2) {
|
for (var i = 0; i < hex.length; i+=2) {
|
||||||
str += String.fromCharCode(parseInt(hex.substr(i, 2), 16));
|
str += String.fromCharCode(parseInt(hex.substring(i, i + 2), 16));
|
||||||
}
|
}
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
@ -571,7 +571,7 @@ any other non-branch reference.</p>
|
||||||
<div id="example-reference"></div>
|
<div id="example-reference"></div>
|
||||||
<script class="example">
|
<script class="example">
|
||||||
___example('example-reference', function() {
|
___example('example-reference', function() {
|
||||||
var h2f = function(hash) { return 'proj/.git/objects/'+hash.substr(0,2)+'/'+hash.substr(2); }
|
var h2f = function(hash) { return 'proj/.git/objects/'+hash.substring(0,2)+'/'+hash.substring(2); }
|
||||||
var main = h2f(hash_object(true, 'blob', false, 'src/main.scm'));
|
var main = h2f(hash_object(true, 'blob', false, 'src/main.scm'));
|
||||||
var readme = h2f(hash_object(true, 'blob', false, 'README'));
|
var readme = h2f(hash_object(true, 'blob', false, 'README'));
|
||||||
var src = h2f(store_tree("src", ["main.scm"], []));
|
var src = h2f(store_tree("src", ["main.scm"], []));
|
||||||
|
@ -611,7 +611,7 @@ references, we need to get rid of the newline first.</p>
|
||||||
<textarea>
|
<textarea>
|
||||||
// Removes the newline at the end of a string, if present.
|
// Removes the newline at the end of a string, if present.
|
||||||
function trim_newline(s) {
|
function trim_newline(s) {
|
||||||
if (s.endsWith('\n')) { return s.substr(0, s.length-1); } else { return s; }
|
if (s.endsWith('\n')) { return s.substring(0, s.length-1); } else { return s; }
|
||||||
}
|
}
|
||||||
</textarea>
|
</textarea>
|
||||||
|
|
||||||
|
@ -628,7 +628,7 @@ with <code>ref: </code>, the rest of the line will be returned.</p>
|
||||||
function git_symbolic_ref(ref) {
|
function git_symbolic_ref(ref) {
|
||||||
var ref_file = join_paths(current_directory, '.git/' + ref);
|
var ref_file = join_paths(current_directory, '.git/' + ref);
|
||||||
if (exists(ref_file) && read(ref_file).startsWith('ref: ')) {
|
if (exists(ref_file) && read(ref_file).startsWith('ref: ')) {
|
||||||
var result = trim_newline(read(ref_file)).substr('ref: '.length);
|
var result = trim_newline(read(ref_file)).substring('ref: '.length);
|
||||||
var recursive = git_symbolic_ref(result);
|
var recursive = git_symbolic_ref(result);
|
||||||
return recursive || result;
|
return recursive || result;
|
||||||
} else {
|
} else {
|
||||||
|
@ -650,7 +650,7 @@ function git_symbolic_ref(ref) {
|
||||||
<div id="example-recursive-ref"></div>
|
<div id="example-recursive-ref"></div>
|
||||||
<script class="example">
|
<script class="example">
|
||||||
___example('example-recursive-ref', function() {
|
___example('example-recursive-ref', function() {
|
||||||
var h2f = function(hash) { return 'proj/.git/objects/'+hash.substr(0,2)+'/'+hash.substr(2); }
|
var h2f = function(hash) { return 'proj/.git/objects/'+hash.substring(0,2)+'/'+hash.substring(2); }
|
||||||
var main = h2f(hash_object(true, 'blob', false, 'src/main.scm'));
|
var main = h2f(hash_object(true, 'blob', false, 'src/main.scm'));
|
||||||
var readme = h2f(hash_object(true, 'blob', false, 'README'));
|
var readme = h2f(hash_object(true, 'blob', false, 'README'));
|
||||||
var src = h2f(store_tree("src", ["main.scm"], []));
|
var src = h2f(store_tree("src", ["main.scm"], []));
|
||||||
|
@ -750,7 +750,7 @@ to the branch, and this pointer can be read e.g. by <code>git rev-parse</code>.<
|
||||||
<div id="example-git-branch"></div>
|
<div id="example-git-branch"></div>
|
||||||
<script class="example">
|
<script class="example">
|
||||||
___example('example-git-branch', function() {
|
___example('example-git-branch', function() {
|
||||||
var h2f = function(hash) { return 'proj/.git/objects/'+hash.substr(0,2)+'/'+hash.substr(2); }
|
var h2f = function(hash) { return 'proj/.git/objects/'+hash.substring(0,2)+'/'+hash.substring(2); }
|
||||||
var main = h2f(hash_object(true, 'blob', false, 'src/main.scm'));
|
var main = h2f(hash_object(true, 'blob', false, 'src/main.scm'));
|
||||||
var readme = h2f(hash_object(true, 'blob', false, 'README'));
|
var readme = h2f(hash_object(true, 'blob', false, 'README'));
|
||||||
var src = h2f(store_tree("src", ["main.scm"], []));
|
var src = h2f(store_tree("src", ["main.scm"], []));
|
||||||
|
@ -1113,16 +1113,24 @@ function checkout_tree(path_prefix, hash) {
|
||||||
The parsers will check that their input looks reasonably well-formed, using <code>assert()</code>.</p>
|
The parsers will check that their input looks reasonably well-formed, using <code>assert()</code>.</p>
|
||||||
<textarea>
|
<textarea>
|
||||||
function assert(boolean, text) {
|
function assert(boolean, text) {
|
||||||
if (! boolean) { alert("assertion failed: " + text); throw new Error(text); }
|
if (! boolean) { alert("GIT: assertion failed: " + text); throw new Error("GIT: assertion failed: " + text); }
|
||||||
}
|
}
|
||||||
</textarea>
|
</textarea>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<section id="parsed-compressed">
|
<section id="parsed-compressed">
|
||||||
<h1>Reading compressed objects</h1>
|
<h1>Reading compressed objects</h1>
|
||||||
|
<p>The GIT objects which are stored in <code>.git/objects</code> are compressed with <code>zlib</code>, and need to be
|
||||||
|
uncompressed before they can be parsed. The actual implementation of GIT also stores some objects in <em>packs</em>. Packs
|
||||||
|
contain a large number of objects, and used a form of delta compression, which effectively stores objects as the diff with
|
||||||
|
another similar object, in order to optimize the disk space usage.</p>
|
||||||
|
<p>Our simplified implementation only deals with zlib-compressed objects, and cannot read from pack files. The function below
|
||||||
|
extracts the type and length, which form the header present in all objects, and returns those along with the contents of the
|
||||||
|
object.
|
||||||
|
</p>
|
||||||
<textarea>
|
<textarea>
|
||||||
function parse_object(hash) {
|
function parse_object(hash) {
|
||||||
var compressed = read(join_paths(current_directory, '.git/objects/' + hash.substr(0,2) + '/' + hash.substr(2)));
|
var compressed = read(join_paths(current_directory, '.git/objects/' + hash.substring(0,2) + '/' + hash.substring(2)));
|
||||||
var inflated = inflate(compressed);
|
var inflated = inflate(compressed);
|
||||||
var split = inflated.match(/^([\s\S]*?) ([\s\S]*?)\0([\s\S]*)$/);
|
var split = inflated.match(/^([\s\S]*?) ([\s\S]*?)\0([\s\S]*)$/);
|
||||||
|
|
||||||
|
@ -1139,31 +1147,54 @@ function parse_object(hash) {
|
||||||
|
|
||||||
<section id="parse-tree">
|
<section id="parse-tree">
|
||||||
<h1>Parsing tree objects</h1>
|
<h1>Parsing tree objects</h1>
|
||||||
|
<p>We will start by parsing tree objects. As a reminder, a tree object has the following form:</p>
|
||||||
|
<div id="example-tree-objects-parse"></div>
|
||||||
|
<script class="example">
|
||||||
|
___example('example-tree-objects-parse', function() {
|
||||||
|
var main = ___h2f(hash_object(true, 'blob', false, 'src/main.scm'));
|
||||||
|
var readme = ___h2f(hash_object(true, 'blob', false, 'README'));
|
||||||
|
var src = ___h2f(store_tree("src", ["main.scm"], []));
|
||||||
|
var proj = ___h2f(paths_to_tree(["README", "src/main.scm"]));
|
||||||
|
var previous_names = [ ];
|
||||||
|
var names = [ proj ];
|
||||||
|
return { filesystem: filesystem, names: names, previous_names: previous_names, omit_graph: true };
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
<p>
|
||||||
|
After the object header, we have a mode, a filename, a null byte and a hash consisting of 20 bytes.
|
||||||
|
The null byte cannot appear in the mode or filename, so we use this null + hash as a delimiter
|
||||||
|
(the non-greedy match ensures the null byte terminator will not match with a <code>00</code> byte in the hash)
|
||||||
|
</p>
|
||||||
<textarea>
|
<textarea>
|
||||||
function parse_tree(hash) {
|
function parse_tree(hash) {
|
||||||
var tree = parse_object(hash);
|
var tree = parse_object(hash);
|
||||||
var split = tree.contents.split(/(?<=\0[\s\S]{20})/);
|
var i = 0;
|
||||||
assert(split, 'invalid contents of tree object');
|
|
||||||
var entries = [];
|
var entries = [];
|
||||||
for (var i = 0; i < split.length; i++) {
|
while (i < tree.contents.length) {
|
||||||
entries.push(parse_tree_entry(split[i]));
|
// skip to the null terminator
|
||||||
|
var space_offset = tree.contents.indexOf(' ', i);
|
||||||
|
var null_offset = tree.contents.indexOf('\0', i);
|
||||||
|
|
||||||
|
// add 20 bytes for the hash that follows, and check the object isn't shorter than that
|
||||||
|
if (space_offset < null_offset && null_offset + 20 < tree.contents.length) {
|
||||||
|
var mode = tree.contents.substring(i, space_offset);
|
||||||
|
var name = tree.contents.substring(space_offset+1, null_offset);
|
||||||
|
var hash = to_hex(tree.contents.substring(null_offset + 1, null_offset + 1 + 20));
|
||||||
|
entries.push({ mode: mode, name: name, hash: hash });
|
||||||
|
} else {
|
||||||
|
assert(false, 'invalid contents of tree object');
|
||||||
|
}
|
||||||
|
|
||||||
|
i = null_offset + 20 + 1;
|
||||||
}
|
}
|
||||||
return entries;
|
return entries;
|
||||||
}
|
}
|
||||||
</textarea>
|
</textarea>
|
||||||
|
|
||||||
<textarea>
|
<p>
|
||||||
function parse_tree_entry(entry) {
|
The <code>parse_tree</code> function above needs a small utility to convert hashes represented using
|
||||||
var split = entry.match(/^([0-9]+) ([\s\S]*)\0([\s\S]{20})$/);
|
raw bytes to a hexadecimal representation.
|
||||||
assert(split, 'invalid entry in tree object');
|
</p>
|
||||||
var mode = split[1];
|
|
||||||
var name = split[2];
|
|
||||||
var hash = to_hex(split[3]);
|
|
||||||
return { mode: mode, name: name, hash: hash };
|
|
||||||
}
|
|
||||||
</textarea>
|
|
||||||
|
|
||||||
<p>The <code>parse_tree</code> function above needs a small utility to convert hashes represented using raw bytes to a hexadecimal representation.</p>
|
|
||||||
<textarea id="in19">
|
<textarea id="in19">
|
||||||
function to_hex(bin) {
|
function to_hex(bin) {
|
||||||
var bin = String(bin);
|
var bin = String(bin);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user