92 lines
3.5 KiB
Python
Executable File
92 lines
3.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
|
|
def debug(s):
|
|
#print(s, file=sys.stderr, flush=True)
|
|
pass
|
|
|
|
|
|
def hashFile(filename):
|
|
result = subprocess.check_output(['sha256sum', '--binary', '--zero', filename])[0:64]
|
|
debug("hashFile("+filename+") = "+str(result))
|
|
return result
|
|
|
|
def hash1(bytes_):
|
|
result = subprocess.check_output(['sha256sum', '--binary', '--zero'], input=bytes_)[0:64]
|
|
debug("hash1("+str(bytes_)+") = "+str(result))
|
|
return result
|
|
|
|
#
|
|
# TODO: use this to get the hashes and names for all roots of the DAG (commits that are reachable only through one (or several) direct branch names, but not transitively as ancestors of other commits)
|
|
#
|
|
git_command='''
|
|
(
|
|
(
|
|
git log --format=%P --all {HEAD} {FETCH_HEAD} | tr ' ' \\n | grep -v '^$' | LC_ALL=C sort -u | sed -e 'p;p';
|
|
git rev-parse {HEAD} {FETCH_HEAD} --all | LC_ALL=C sort -u
|
|
) | LC_ALL=C sort | uniq -u;
|
|
for ref in {HEAD} {FETCH_HEAD}; do echo "$(git rev-parse $ref) $ref"; done; git for-each-ref --format='%(objectname) %(refname)'
|
|
) | LC_ALL=C sort \
|
|
| awk 'BEGIN {{ h="" }} {{ if (length($0) == 40) {{ h=$0 }} else {{ if (substr($0,1,40) == h) print $0 }} }}' \
|
|
| LC_ALL=C sort -k 2
|
|
'''
|
|
|
|
def ref_exists(path, ref):
|
|
try:
|
|
subprocess.check_output("git rev-parse --verify "+ref+" 2>/dev/null", cwd=path, shell=True)
|
|
return True
|
|
except subprocess.CalledProcessError:
|
|
return False
|
|
|
|
def hashGit(path):
|
|
FETCH_HEAD = "FETCH_HEAD" if ref_exists(path, "FETCH_HEAD") else ''
|
|
HEAD = "HEAD" if ref_exists(path, "HEAD") else ''
|
|
result = subprocess.check_output(['sh', '-c', git_command.format(HEAD=HEAD, FETCH_HEAD=FETCH_HEAD)], cwd=path)
|
|
debug("hashGit("+path+") = "+str(result))
|
|
return result
|
|
|
|
def hashSqlite3(path):
|
|
result= subprocess.check_output(['sh', '-c', 'sqlite3 "$1" .dump | LC_ALL=C sort | sha256sum --binary --zero', '--', os.path.abspath(path)])
|
|
debug("hashSqlite3("+path+") = "+str(result))
|
|
return result
|
|
|
|
def ignore_exitcode(cmd, **kwargs):
|
|
try:
|
|
return subprocess.check_output(cmd, **kwargs)
|
|
except subprocess.CalledProcessError:
|
|
return ''
|
|
|
|
def is_git(x):
|
|
return os.path.isdir(x) \
|
|
and (ignore_exitcode("git rev-parse --is-inside-git-dir 2>/dev/null", cwd=x, shell=True).strip() == b'true' or
|
|
ignore_exitcode("git rev-parse --is-inside-work-tree 2>/dev/null", cwd=x, shell=True).strip() == b'true')
|
|
# TODO: if a file which is inside a larger git dir is passed on the CLI, this still returns True :-(
|
|
|
|
def recur(depth, x):
|
|
# initial list of paths
|
|
if isinstance(x, list):
|
|
debug("ROOT " + str(depth) + ' [' + ', '.join(x) + ']')
|
|
return b'root\0' + b''.join(recur(depth + 1, os.path.abspath(path)) + b' ' + path.encode('utf-8') + b'\0' for path in sorted(x))
|
|
# GIT repo
|
|
elif is_git(x):
|
|
debug("GIT DIR " + str(depth) + ' ' + x)
|
|
return hash1(b'git-versioned folder\0' + hashGit(x))
|
|
# directory
|
|
elif os.path.isdir(x):
|
|
debug("DIR " + str(depth) + ' ' + x)
|
|
return hash1(b'directory\0' + b''.join(recur(depth + 1, os.path.join(x, entry)) + b' ' + entry.encode('utf-8') + b'\0' for entry in sorted(os.listdir(x))))
|
|
elif b'SQLite 3.x database' in subprocess.check_output(["file", x]):
|
|
debug("SQLITE3 " + str(depth) + ' ' + x)
|
|
return hashSqlite3(x)
|
|
# Just a file
|
|
elif os.path.isfile(x):
|
|
debug("PLAIN FILE " + str(depth) + ' ' + x)
|
|
return hashFile(x)
|
|
else:
|
|
sys.exit("unknown file type for %s" % os.path.abspath(x))
|
|
|
|
print(hash1(recur(0, sys.argv[1:])).decode('utf-8'), flush=True)
|