diff --git a/README b/README index 8dbec85..9b81098 100644 --- a/README +++ b/README @@ -5,22 +5,26 @@ Lisez le code source des scripts avant de les utiliser ! Normalement aucun d'eux Utilisation ----------- -Mettre à jour la base de donnée des hash (le script peut être interrompu avec `ctrl-C` et il reprendra là où il s'était arrêté, à quelques secondes près (au dernier message `commit!` en fait): +1. Mettre à jour la base de donnée des hash (le script peut être interrompu avec `ctrl-C` et il reprendra là où il s'était arrêté, à quelques secondes près (au dernier message `commit!` en fait): - ./updatehash.py /home/moi/hash.db /home/moi/documents + updatehash.py ./documents-hash.db ./documents -Ensuite lancer à la main certaines commandes que j'ai notées dans updatehash.sql (je n'en ai pas encore fait un script) +2. Puis cacher les fichiers qui sont dans ./documents/vrac et qui ont une copie dans ./documents/tri (les deux ensembles de fichiers ne doivent pas se chevaucher): -Puis cacher automatiquement les dossiers qui ne contiennent que des fichiers dont le nom commence par `.%` (et aussi les dossiers vides). + dotpercent-files.py ./documents-hash.db --vrac ./documents/vrac --tri ./documents/tri > "undo-dotpercent-files-$(date).sh" - ./dotpercent-dirs.py /home/moi/documents/vrac > "undo-dotpercent-dirs-$(date).sh" +3. Puis cacher automatiquement les dossiers qui ne contiennent que des fichiers dont le nom commence par `.%` (et aussi les dossiers vides). -Ensuite déplacer à la main ce que vous voulez depuis `/home/moi/documents/vrac` vers `/home/moi/documents/tri` + dotpercent-dirs.py ./documents/vrac > "undo-dotpercent-dirs-$(date).sh" -Puis nettoyer les quelques fichiers et dossiers préfixés par `.%` qui ont pu être déplacés dans `/home/moi/documents/tri`: +4. Ensuite déplacer à la main ce que vous voulez depuis `./documents/vrac` vers `./documents/tri` - ./unhide-dotpercent.py /home/moi/documents/tri > "undo-unhide-dotpercent-tri-$(date).sh" +5. Puis nettoyer les quelques fichiers et dossiers préfixés par `.%` qui ont pu être déplacés dans `./documents/tri`: -Puis quand vous avez fini, ou que vous en avez marre, vous pouvez ré-afficher les fichiers et dossiers préfixés par `.%` qui restent dans `/home/moi/documents/vrac` + unhide-dotpercent.py ./documents/tri > "undo-unhide-dotpercent-tri-$(date).sh" - ./unhide-dotpercent.py /home/moi/documents/vrac > "undo-unhide-dotpercent-vrac-$(date).sh" +6. Recommencer à l'étape 1 jusqu'à satisfaction. + +7. Puis quand vous avez fini, ou que vous en aveez marre, vous pouvez ré-afficher les fichiers et dossiers préfixés par `.%` qui restent dans `./documents/vrac` + + unhide-dotpercent.py ./documents/vrac > "undo-unhide-dotpercent-vrac-$(date).sh" diff --git a/dotpercent-files.py b/dotpercent-files.py new file mode 100755 index 0000000..d8f4d34 --- /dev/null +++ b/dotpercent-files.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import os +import sys +import sqlite3 + +# Common functions + +def removePrefix(fileName): + while fileName[0:2] == ".%": + fileName = fileName[2:] + return fileName + +def removePrefixPath(path): + return '/'.join([removePrefix(component) for component in path.split('/')]) + +def prefixedExists(path): + components = path.split('/') + prefixedPaths = ('/'.join(components[0:i] + ['.%' + component for component in components[i:]]) for i in reversed(xrange(len(components)))) + return any((os.path.exists(prefixedPath) for prefixedPath in prefixedPaths)) + +# Code for this utility + +prefix = '.%' + +def help(): + print 'usage: %s database.db --vrac ./dbl/vrac-1 ./dbl/vrac-2 ./dbl/vrac-3 --tri ./dbl/tri-1 ./dbl/tri-2 ./dbl/tri-3' % sys.argv[0] + sys.exit(1) + +vracs = [] +tris = [] + +db = sys.argv[1] + +if len(sys.argv) < 6: + help() + +state=None +for arg in sys.argv[2:]: + if arg == '-h' or arg == '--help': + help() + elif arg == '--vrac': + state = "vrac" + elif arg == '--tri': + state = "tri" + elif state == 'tri': + if arg[-1:] == '/': + tris.append(arg) + else + tris.append(arg + '/') + elif state == 'vrac': + if arg[-1:] == '/': + vracs.append(arg) + else + vracs.append(arg + '/') + else: + help() + +print 'vracs=%s' % ', '.join(vracs) +print 'tris=%s' % ', '.join(tris) + +connection = sqlite3.connect(db) +connection.text_factory = str # For utf-8 file names… +cursor = connection.cursor() + +cursor.execute("create temp table hashesVrac(id, hash);") +for path in vracs: + likepath=('' + path).replace('%', '%%') + '%'; + cursor.execute("insert into hashesVrac select rowid,size||'#'||md5||'#'||sha1 from files where path like ?;", (likepath,)) + +cursor.execute("create temp table hashesTri(id, hash);") +for path in tris: + likepath=('' + path).replace('%', '%%') + '%'; + cursor.execute("insert into hashesTri select rowid,size||'#'||md5||'#'||sha1 from files where path like ?;", (likepath,)) + +cursor.execute("create index i_hashesTri_hash on hashesTri(hash);") +cursor.execute("create index i_hashesVrac_hash on hashesVrac(hash);") + +for fpath, in cursor.execute("select (select path from files where rowid == hashesVrac.id) as path from hashesVrac where hashesVrac.hash in (select hash from hashesTri);"): + dest = '%s/%s%s' % (os.path.dirname(fpath), prefix, os.path.basename(fpath),) + if prefixedExists(fpath) and not os.path.exists(fpath): + pass # Already moved + elif not os.path.exists(fpath): + print "# Couldn't hide %s as %s: source doesn't exist" % (fpath, dest,) + print "i-have-not-moved-because-no-source -i -- '%s' '%s'" % (fpath.replace("'", "'\\''"), dest.replace("'", "'\\''"),) + elif os.path.exists(dest): + print "# Couldn't hide %s as %s: destination exists" % (fpath, dest,) + print "i-have-not-moved-because-dest-exists -i -- '%s' '%s'" % (fpath.replace("'", "'\\''"), dest.replace("'", "'\\''"),) + else: + print "i-have-moved -i -- '%s' '%s'" % (fpath.replace("'", "'\\''"), dest.replace("'", "'\\''"),) + os.rename(fpath, dest) diff --git a/unhide-dotpercent.py b/unhide-dotpercent.py index a07ce3f..51b841a 100755 --- a/unhide-dotpercent.py +++ b/unhide-dotpercent.py @@ -3,6 +3,15 @@ import os import sys +# Common functions + +def removePrefix(fileName): + while fileName[0:2] == ".%": + fileName = fileName[2:] + return fileName + +# Code for this utility + def walk(path): #print >> sys.stderr, path for f in os.listdir(path): @@ -10,10 +19,7 @@ def walk(path): if os.path.isdir(fpath) and not os.path.islink(fpath): walk(fpath) if f[0:2] == ".%": - ff = f - while ff[0:2] == ".%": - ff = ff[2:] - dest = os.path.join(path, ff) + dest = os.path.join(path, removePrefix(f)) if not os.path.exists(dest): print "i-have-moved -i -- '%s' '%s'" % (fpath.replace("'", "'\\''"), dest.replace("'", "'\\''")) os.rename(fpath, dest) diff --git a/updatehash.py b/updatehash.py index 2a389c1..438edb9 100755 --- a/updatehash.py +++ b/updatehash.py @@ -6,6 +6,19 @@ import sqlite3 import time import sys import stat +import math + +# Common functions + +def removePrefix(fileName): + while fileName[0:2] == ".%": + fileName = fileName[2:] + return fileName + +def removePrefixPath(path): + return '/'.join([removePrefix(component) for component in path.split('/')]) + +# Code for this utility def checksumFile(path): md5 = hashlib.md5() @@ -25,10 +38,9 @@ def fileInfo(path): return {'mtime':st.st_mtime, 'size':st.st_size} def initdb(cursor): - cursor.execute("create table if not exists files(tag,timestamp,path primary key,md5,sha1,mtime,size)") - cursor.execute("create index if not exists i_files_tag on files(tag)") + cursor.execute("create table if not exists files(timestamp,path primary key,md5,sha1,mtime,size)") cursor.execute("create index if not exists i_files_path_md5_sha1 on files(path,md5,sha1)") - cursor.execute("create table if not exists removedfiles(rmtimestamp,tag,timestamp,path,md5,sha1,mtime,size)") + cursor.execute("create table if not exists removedfiles(rmtimestamp,timestamp,path,md5,sha1,mtime,size)") def cacheFileInfo(cursor, path): cursor.execute('select mtime,size from files where path = ?', (path,)) @@ -44,19 +56,25 @@ def update(connection,cursor,path): for d in os.walk(path): dirpath=d[0] for f in d[2]: - fpath = os.path.join(dirpath, f) - if os.path.isfile(fpath): - fi = fileInfo(fpath) + prefixPath = os.path.join(dirpath, f) + if os.path.isfile(prefixPath): + fi = fileInfo(prefixPath) if fi is None: - print "!skipping", fpath + print "!skipping: no fileinfo: ", prefixPath continue + fpath = removePrefixPath(prefixPath) + if fpath != prefixPath and os.path.exists(fpath): + print "!skipping: collision between '%s' and '%s'" % (prefixPath, fpath,) cfi = cacheFileInfo(cursor,fpath) cursor.execute("insert into newfiles(path) values(?)", (fpath,)) if fi != cfi: - print " updating", fpath - sums = checksumFile(fpath) - values = ('no tag',timestamp,fpath,sums['md5'],sums['sha1'],fi['mtime'],fi['size']) - cursor.execute("insert or replace into files(tag,timestamp,path,md5,sha1,mtime,size) values(?,?,?,?,?,?,?)", values) + if fpath != prefixPath: + print " updating %s (%s)" % (prefixPath, fpath,) + else: + print " updating %s" % (fpath,) + sums = checksumFile(prefixPath) + values = (timestamp,fpath,sums['md5'],sums['sha1'],fi['mtime'],fi['size']) + cursor.execute("insert or replace into files(timestamp,path,md5,sha1,mtime,size) values(?,?,?,?,?,?)", values) currentTime = time.clock() if abs(lastTime-currentTime) >= 10: @@ -65,14 +83,29 @@ def update(connection,cursor,path): print "commit!" connection.commit() print "commit!" + print "cleaning up..." + likepath=('' + path).replace('%', '%%') + '%'; cursor.execute("create temp table deletedfiles(path)") cursor.execute("create index i_deletedfiles_path on deletedfiles(path)") - likepath=('' + path).replace('%', '%%') + '%'; cursor.execute("insert into deletedfiles(path) select path from files where path like ?", (likepath,)); + + nbFilesBefore = cursor.execute("select count(*) from deletedfiles").fetchone()[0]; + nbFilesAfter = cursor.execute("select count(*) from newfiles").fetchone()[0]; + print 'number of files before: ', nbFilesBefore + print 'number of files after: ', nbFilesAfter + cursor.execute("delete from deletedfiles where path in newfiles"); - cursor.execute("insert into removedfiles(rmtimestamp,tag,timestamp,path,md5,sha1,mtime,size) select ?,tag,timestamp,path,md5,sha1,mtime,size from files where path in deletedfiles", (timestamp,)) - cursor.execute("delete from files where path in deletedfiles") + nbFilesDelete = cursor.execute("select count(*) from deletedfiles").fetchone()[0]; + print 'number of files to remove from database (moved in table removedfiles): ', nbFilesDelete + + if (nbFilesAfter < math.ceil(nbFilesBefore * 0.5)): + print "!!! Not deleting hashes from database: there are less than 50% files after. Did you forget to mount your harddisk?" + else: + cursor.execute("insert into removedfiles(rmtimestamp,timestamp,path,md5,sha1,mtime,size)" + + " select ?,timestamp,path,md5,sha1,mtime,size from files where path in deletedfiles", (timestamp,)) + cursor.execute("delete from files where path in deletedfiles") + connection.commit() def walk(db,path):