Removed the arcane regexp in parse_tree, added some prose

This commit is contained in:
Suzanne Soy 2021-06-23 22:02:50 +01:00
parent 792c5bc78f
commit f2b3940df6
3 changed files with 91 additions and 35 deletions

View File

@ -137,3 +137,4 @@ article#git-tutorial .onlytoc { display: none; }
#git-tutorial .exercise-task { border: thin solid #80c5c5; background: #f1faff; padding: 1em } #git-tutorial .exercise-task { border: thin solid #80c5c5; background: #f1faff; padding: 1em }
#git-tutorial .exercise-reason { border: thin solid #80c5c5; background: #f8fdff; padding: 1em } #git-tutorial .exercise-reason { border: thin solid #80c5c5; background: #f8fdff; padding: 1em }
#git-tutorial .exercise-reason:before { content: "Rationale "; margin-bottom: 0.7em; font-weight: bold; display: block; } #git-tutorial .exercise-reason:before { content: "Rationale "; margin-bottom: 0.7em; font-weight: bold; display: block; }
#git-tutorial .log-alert { color: red; font-weight: 500; }

View File

@ -590,11 +590,17 @@ function ___copyprintf_click(id) {
} }
var ___script_log_header = '' var ___script_log_header = ''
+ 'var ___log = [];\n' + 'var ___log = [];\n'
+ 'var alert = (function (real_console, real_alert) {\n'
+ ' return function(message) {\n'
+ ' ___log[___log.length] = { alert: true, txt: message };\n'
+ ' real_console.log("alert:", message);\n'
+ ' };\n'
+ '})(window.console, window.alert);\n'
+ 'var console = (function(real_console) {\n' + 'var console = (function(real_console) {\n'
+ ' return {\n' + ' return {\n'
+ ' log: function() {\n' + ' log: function() {\n'
+ ' ___log[___log.length] = Array.from(arguments);\n' + ' ___log[___log.length] = { alert: false, txt: Array.from(arguments).map(function (x) { return x.toString(); }).join(", ") };\n'
+ ' real_console.log.apply(console, arguments);\n' + ' real_console.log.apply(real_console, arguments);\n'
+ ' },\n' + ' },\n'
+ ' assert: real_console.assert,\n' + ' assert: real_console.assert,\n'
+ ' };\n' + ' };\n'
@ -810,19 +816,30 @@ function ___filesystem_to_graphview(filesystem, previous_filesystem) {
} }
} }
function ___eval_result_to_html(id, filesystem, previous_filesystem, log, quiet) { function ___log_to_html(log) {
var loghtml = '<pre class="log">' + log.map(function(l) { return l.map(function (x) { return x.toString(); }).join(', '); }).join('\n') + '</pre>' return '<pre class="log">'
+ log.map(function(l) {
return '<div class="' + (l.alert ? 'log-alert' : 'log-log') + '">'
+ ___specialchars(l.txt)
+ '</div>';
}).join('\n')
+ '</pre>'
}
function ___eval_result_to_html(id, filesystem, previous_filesystem, log, quiet, omit_graph) {
var loghtml = ___log_to_html(log);
var table = ___filesystem_to_string(filesystem, quiet, previous_filesystem); var table = ___filesystem_to_string(filesystem, quiet, previous_filesystem);
var gv = ___filesystem_to_graphview(filesystem, previous_filesystem); var gv = ___filesystem_to_graphview(filesystem, previous_filesystem);
var html = (log.length > 0 ? '<p>Console output:</p>' + loghtml : '') var html = (log.length > 0 ? '<p>Console output:</p>' + loghtml : '')
+ gv.html + (omit_graph ? '' : gv.html)
+ table; + table;
document.getElementById(id).innerHTML = '<div class="hilite-wrapper">' + html + '</div>'; document.getElementById(id).innerHTML = '<div class="hilite-wrapper">' + html + '</div>';
gv.js(); if (!omit_graph) { gv.js(); }
} }
function ___git_eval(current) { function ___git_eval(current) {
document.getElementById('hide-eval-' + current).style.display = ''; document.getElementById('hide-eval-' + current).style.display = '';
var script = ___script_log_header; var script = ___script_log_header;
script += 'try {';
for (i = 0; i <= current - 1; i++) { for (i = 0; i <= current - 1; i++) {
script += ___textarea_value(___global_editors[i]); script += ___textarea_value(___global_editors[i]);
} }
@ -832,6 +849,13 @@ function ___git_eval(current) {
+ '___log = [];\n'; + '___log = [];\n';
script += ___textarea_value(___global_editors[current]); script += ___textarea_value(___global_editors[current]);
script += '\n' script += '\n'
+ '} catch (e) {'
+ ' if (("" + e.message).indexOf("GIT: assertion failed: ") != 0) {'
+ ' throw e;'
+ ' } else {'
+ ' ___log.push({ alert: true, txt: "command failed" });'
+ ' }'
+ '}'
+ '"End of the script";\n' + '"End of the script";\n'
+ '\n' + '\n'
+ '\n' + '\n'

View File

@ -17,7 +17,7 @@
<script src="git-tutorial.js"></script> <script src="git-tutorial.js"></script>
<script class="example"> <script class="example">
var examples=[]; var examples=[];
function ___h2f(hash) { return 'proj/.git/objects/'+hash.substr(0,2)+'/'+hash.substr(2); } function ___h2f(hash) { return 'proj/.git/objects/'+hash.substring(0,2)+'/'+hash.substring(2); }
function ___example(id, f) { function ___example(id, f) {
examples.push(function () { examples.push(function () {
var result = f(); var result = f();
@ -29,7 +29,7 @@ function ___example(id, f) {
for (var i = 0; i < result.previous_names.length; i++) { for (var i = 0; i < result.previous_names.length; i++) {
previous_fs[result.previous_names[i]] = filesystem[result.previous_names[i]]; previous_fs[result.previous_names[i]] = filesystem[result.previous_names[i]];
} }
___eval_result_to_html(id, fs, previous_fs, [], true); ___eval_result_to_html(id, fs, previous_fs, [], true, result.omit_graph);
}); });
} }
</script> </script>
@ -251,8 +251,8 @@ function hash_object(must_write, type, is_data, path_or_data) {
if (must_write) { if (must_write) {
mkdir(join_paths(current_directory, '.git/objects')); mkdir(join_paths(current_directory, '.git/objects'));
mkdir(join_paths(current_directory, '.git/objects/' + hash.substr(0,2))); mkdir(join_paths(current_directory, '.git/objects/' + hash.substring(0,2)));
var object_path = join_paths(current_directory, '.git/objects/' + hash.substr(0,2) + '/' + hash.substr(2)); var object_path = join_paths(current_directory, '.git/objects/' + hash.substring(0,2) + '/' + hash.substring(2));
// deflate() compresses using zlib // deflate() compresses using zlib
write(object_path, deflate(object_contents)); write(object_path, deflate(object_contents));
} }
@ -363,7 +363,7 @@ function hex_to_raw_bytes(hex) {
var hex = String(hex); var hex = String(hex);
var str = "" var str = ""
for (var i = 0; i < hex.length; i+=2) { for (var i = 0; i < hex.length; i+=2) {
str += String.fromCharCode(parseInt(hex.substr(i, 2), 16)); str += String.fromCharCode(parseInt(hex.substring(i, i + 2), 16));
} }
return str; return str;
} }
@ -571,7 +571,7 @@ any other non-branch reference.</p>
<div id="example-reference"></div> <div id="example-reference"></div>
<script class="example"> <script class="example">
___example('example-reference', function() { ___example('example-reference', function() {
var h2f = function(hash) { return 'proj/.git/objects/'+hash.substr(0,2)+'/'+hash.substr(2); } var h2f = function(hash) { return 'proj/.git/objects/'+hash.substring(0,2)+'/'+hash.substring(2); }
var main = h2f(hash_object(true, 'blob', false, 'src/main.scm')); var main = h2f(hash_object(true, 'blob', false, 'src/main.scm'));
var readme = h2f(hash_object(true, 'blob', false, 'README')); var readme = h2f(hash_object(true, 'blob', false, 'README'));
var src = h2f(store_tree("src", ["main.scm"], [])); var src = h2f(store_tree("src", ["main.scm"], []));
@ -611,7 +611,7 @@ references, we need to get rid of the newline first.</p>
<textarea> <textarea>
// Removes the newline at the end of a string, if present. // Removes the newline at the end of a string, if present.
function trim_newline(s) { function trim_newline(s) {
if (s.endsWith('\n')) { return s.substr(0, s.length-1); } else { return s; } if (s.endsWith('\n')) { return s.substring(0, s.length-1); } else { return s; }
} }
</textarea> </textarea>
@ -628,7 +628,7 @@ with <code>ref: </code>, the rest of the line will be returned.</p>
function git_symbolic_ref(ref) { function git_symbolic_ref(ref) {
var ref_file = join_paths(current_directory, '.git/' + ref); var ref_file = join_paths(current_directory, '.git/' + ref);
if (exists(ref_file) && read(ref_file).startsWith('ref: ')) { if (exists(ref_file) && read(ref_file).startsWith('ref: ')) {
var result = trim_newline(read(ref_file)).substr('ref: '.length); var result = trim_newline(read(ref_file)).substring('ref: '.length);
var recursive = git_symbolic_ref(result); var recursive = git_symbolic_ref(result);
return recursive || result; return recursive || result;
} else { } else {
@ -650,7 +650,7 @@ function git_symbolic_ref(ref) {
<div id="example-recursive-ref"></div> <div id="example-recursive-ref"></div>
<script class="example"> <script class="example">
___example('example-recursive-ref', function() { ___example('example-recursive-ref', function() {
var h2f = function(hash) { return 'proj/.git/objects/'+hash.substr(0,2)+'/'+hash.substr(2); } var h2f = function(hash) { return 'proj/.git/objects/'+hash.substring(0,2)+'/'+hash.substring(2); }
var main = h2f(hash_object(true, 'blob', false, 'src/main.scm')); var main = h2f(hash_object(true, 'blob', false, 'src/main.scm'));
var readme = h2f(hash_object(true, 'blob', false, 'README')); var readme = h2f(hash_object(true, 'blob', false, 'README'));
var src = h2f(store_tree("src", ["main.scm"], [])); var src = h2f(store_tree("src", ["main.scm"], []));
@ -750,7 +750,7 @@ to the branch, and this pointer can be read e.g. by <code>git rev-parse</code>.<
<div id="example-git-branch"></div> <div id="example-git-branch"></div>
<script class="example"> <script class="example">
___example('example-git-branch', function() { ___example('example-git-branch', function() {
var h2f = function(hash) { return 'proj/.git/objects/'+hash.substr(0,2)+'/'+hash.substr(2); } var h2f = function(hash) { return 'proj/.git/objects/'+hash.substring(0,2)+'/'+hash.substring(2); }
var main = h2f(hash_object(true, 'blob', false, 'src/main.scm')); var main = h2f(hash_object(true, 'blob', false, 'src/main.scm'));
var readme = h2f(hash_object(true, 'blob', false, 'README')); var readme = h2f(hash_object(true, 'blob', false, 'README'));
var src = h2f(store_tree("src", ["main.scm"], [])); var src = h2f(store_tree("src", ["main.scm"], []));
@ -1113,16 +1113,24 @@ function checkout_tree(path_prefix, hash) {
The parsers will check that their input looks reasonably well-formed, using <code>assert()</code>.</p> The parsers will check that their input looks reasonably well-formed, using <code>assert()</code>.</p>
<textarea> <textarea>
function assert(boolean, text) { function assert(boolean, text) {
if (! boolean) { alert("assertion failed: " + text); throw new Error(text); } if (! boolean) { alert("GIT: assertion failed: " + text); throw new Error("GIT: assertion failed: " + text); }
} }
</textarea> </textarea>
</section> </section>
<section id="parsed-compressed"> <section id="parsed-compressed">
<h1>Reading compressed objects</h1> <h1>Reading compressed objects</h1>
<p>The GIT objects which are stored in <code>.git/objects</code> are compressed with <code>zlib</code>, and need to be
uncompressed before they can be parsed. The actual implementation of GIT also stores some objects in <em>packs</em>. Packs
contain a large number of objects, and used a form of delta compression, which effectively stores objects as the diff with
another similar object, in order to optimize the disk space usage.</p>
<p>Our simplified implementation only deals with zlib-compressed objects, and cannot read from pack files. The function below
extracts the type and length, which form the header present in all objects, and returns those along with the contents of the
object.
</p>
<textarea> <textarea>
function parse_object(hash) { function parse_object(hash) {
var compressed = read(join_paths(current_directory, '.git/objects/' + hash.substr(0,2) + '/' + hash.substr(2))); var compressed = read(join_paths(current_directory, '.git/objects/' + hash.substring(0,2) + '/' + hash.substring(2)));
var inflated = inflate(compressed); var inflated = inflate(compressed);
var split = inflated.match(/^([\s\S]*?) ([\s\S]*?)\0([\s\S]*)$/); var split = inflated.match(/^([\s\S]*?) ([\s\S]*?)\0([\s\S]*)$/);
@ -1139,31 +1147,54 @@ function parse_object(hash) {
<section id="parse-tree"> <section id="parse-tree">
<h1>Parsing tree objects</h1> <h1>Parsing tree objects</h1>
<p>We will start by parsing tree objects. As a reminder, a tree object has the following form:</p>
<div id="example-tree-objects-parse"></div>
<script class="example">
___example('example-tree-objects-parse', function() {
var main = ___h2f(hash_object(true, 'blob', false, 'src/main.scm'));
var readme = ___h2f(hash_object(true, 'blob', false, 'README'));
var src = ___h2f(store_tree("src", ["main.scm"], []));
var proj = ___h2f(paths_to_tree(["README", "src/main.scm"]));
var previous_names = [ ];
var names = [ proj ];
return { filesystem: filesystem, names: names, previous_names: previous_names, omit_graph: true };
});
</script>
<p>
After the object header, we have a mode, a filename, a null byte and a hash consisting of 20 bytes.
The null byte cannot appear in the mode or filename, so we use this null + hash as a delimiter
(the non-greedy match ensures the null byte terminator will not match with a <code>00</code> byte in the hash)
</p>
<textarea> <textarea>
function parse_tree(hash) { function parse_tree(hash) {
var tree = parse_object(hash); var tree = parse_object(hash);
var split = tree.contents.split(/(?<=\0[\s\S]{20})/); var i = 0;
assert(split, 'invalid contents of tree object');
var entries = []; var entries = [];
for (var i = 0; i < split.length; i++) { while (i < tree.contents.length) {
entries.push(parse_tree_entry(split[i])); // skip to the null terminator
var space_offset = tree.contents.indexOf(' ', i);
var null_offset = tree.contents.indexOf('\0', i);
// add 20 bytes for the hash that follows, and check the object isn't shorter than that
if (space_offset < null_offset && null_offset + 20 < tree.contents.length) {
var mode = tree.contents.substring(i, space_offset);
var name = tree.contents.substring(space_offset+1, null_offset);
var hash = to_hex(tree.contents.substring(null_offset + 1, null_offset + 1 + 20));
entries.push({ mode: mode, name: name, hash: hash });
} else {
assert(false, 'invalid contents of tree object');
}
i = null_offset + 20 + 1;
} }
return entries; return entries;
} }
</textarea> </textarea>
<textarea> <p>
function parse_tree_entry(entry) { The <code>parse_tree</code> function above needs a small utility to convert hashes represented using
var split = entry.match(/^([0-9]+) ([\s\S]*)\0([\s\S]{20})$/); raw bytes to a hexadecimal representation.
assert(split, 'invalid entry in tree object'); </p>
var mode = split[1];
var name = split[2];
var hash = to_hex(split[3]);
return { mode: mode, name: name, hash: hash };
}
</textarea>
<p>The <code>parse_tree</code> function above needs a small utility to convert hashes represented using raw bytes to a hexadecimal representation.</p>
<textarea id="in19"> <textarea id="in19">
function to_hex(bin) { function to_hex(bin) {
var bin = String(bin); var bin = String(bin);