Script pour cacher les fichiers (dotpercent-files.py). Ajustement des autres scripts pour ignorer les .% en début des noms de fichiers/dossiers (du coup updatehash.py ne met rien à jour après avoir fait dotpercent-files.py). Détection quand plus de 50% des fichiers ont disparu: ne pas nettoyer la base de données dans ce cas.
This commit is contained in:
parent
7c040f7bb2
commit
92bbf16a1e
24
README
24
README
|
@ -5,22 +5,26 @@ Lisez le code source des scripts avant de les utiliser ! Normalement aucun d'eux
|
|||
Utilisation
|
||||
-----------
|
||||
|
||||
Mettre à jour la base de donnée des hash (le script peut être interrompu avec `ctrl-C` et il reprendra là où il s'était arrêté, à quelques secondes près (au dernier message `commit!` en fait):
|
||||
1. Mettre à jour la base de donnée des hash (le script peut être interrompu avec `ctrl-C` et il reprendra là où il s'était arrêté, à quelques secondes près (au dernier message `commit!` en fait):
|
||||
|
||||
./updatehash.py /home/moi/hash.db /home/moi/documents
|
||||
updatehash.py ./documents-hash.db ./documents
|
||||
|
||||
Ensuite lancer à la main certaines commandes que j'ai notées dans updatehash.sql (je n'en ai pas encore fait un script)
|
||||
2. Puis cacher les fichiers qui sont dans ./documents/vrac et qui ont une copie dans ./documents/tri (les deux ensembles de fichiers ne doivent pas se chevaucher):
|
||||
|
||||
Puis cacher automatiquement les dossiers qui ne contiennent que des fichiers dont le nom commence par `.%` (et aussi les dossiers vides).
|
||||
dotpercent-files.py ./documents-hash.db --vrac ./documents/vrac --tri ./documents/tri > "undo-dotpercent-files-$(date).sh"
|
||||
|
||||
./dotpercent-dirs.py /home/moi/documents/vrac > "undo-dotpercent-dirs-$(date).sh"
|
||||
3. Puis cacher automatiquement les dossiers qui ne contiennent que des fichiers dont le nom commence par `.%` (et aussi les dossiers vides).
|
||||
|
||||
Ensuite déplacer à la main ce que vous voulez depuis `/home/moi/documents/vrac` vers `/home/moi/documents/tri`
|
||||
dotpercent-dirs.py ./documents/vrac > "undo-dotpercent-dirs-$(date).sh"
|
||||
|
||||
Puis nettoyer les quelques fichiers et dossiers préfixés par `.%` qui ont pu être déplacés dans `/home/moi/documents/tri`:
|
||||
4. Ensuite déplacer à la main ce que vous voulez depuis `./documents/vrac` vers `./documents/tri`
|
||||
|
||||
./unhide-dotpercent.py /home/moi/documents/tri > "undo-unhide-dotpercent-tri-$(date).sh"
|
||||
5. Puis nettoyer les quelques fichiers et dossiers préfixés par `.%` qui ont pu être déplacés dans `./documents/tri`:
|
||||
|
||||
Puis quand vous avez fini, ou que vous en avez marre, vous pouvez ré-afficher les fichiers et dossiers préfixés par `.%` qui restent dans `/home/moi/documents/vrac`
|
||||
unhide-dotpercent.py ./documents/tri > "undo-unhide-dotpercent-tri-$(date).sh"
|
||||
|
||||
./unhide-dotpercent.py /home/moi/documents/vrac > "undo-unhide-dotpercent-vrac-$(date).sh"
|
||||
6. Recommencer à l'étape 1 jusqu'à satisfaction.
|
||||
|
||||
7. Puis quand vous avez fini, ou que vous en aveez marre, vous pouvez ré-afficher les fichiers et dossiers préfixés par `.%` qui restent dans `./documents/vrac`
|
||||
|
||||
unhide-dotpercent.py ./documents/vrac > "undo-unhide-dotpercent-vrac-$(date).sh"
|
||||
|
|
91
dotpercent-files.py
Executable file
91
dotpercent-files.py
Executable file
|
@ -0,0 +1,91 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import sys
|
||||
import sqlite3
|
||||
|
||||
# Common functions
|
||||
|
||||
def removePrefix(fileName):
|
||||
while fileName[0:2] == ".%":
|
||||
fileName = fileName[2:]
|
||||
return fileName
|
||||
|
||||
def removePrefixPath(path):
|
||||
return '/'.join([removePrefix(component) for component in path.split('/')])
|
||||
|
||||
def prefixedExists(path):
|
||||
components = path.split('/')
|
||||
prefixedPaths = ('/'.join(components[0:i] + ['.%' + component for component in components[i:]]) for i in reversed(xrange(len(components))))
|
||||
return any((os.path.exists(prefixedPath) for prefixedPath in prefixedPaths))
|
||||
|
||||
# Code for this utility
|
||||
|
||||
prefix = '.%'
|
||||
|
||||
def help():
|
||||
print 'usage: %s database.db --vrac ./dbl/vrac-1 ./dbl/vrac-2 ./dbl/vrac-3 --tri ./dbl/tri-1 ./dbl/tri-2 ./dbl/tri-3' % sys.argv[0]
|
||||
sys.exit(1)
|
||||
|
||||
vracs = []
|
||||
tris = []
|
||||
|
||||
db = sys.argv[1]
|
||||
|
||||
if len(sys.argv) < 6:
|
||||
help()
|
||||
|
||||
state=None
|
||||
for arg in sys.argv[2:]:
|
||||
if arg == '-h' or arg == '--help':
|
||||
help()
|
||||
elif arg == '--vrac':
|
||||
state = "vrac"
|
||||
elif arg == '--tri':
|
||||
state = "tri"
|
||||
elif state == 'tri':
|
||||
if arg[-1:] == '/':
|
||||
tris.append(arg)
|
||||
else
|
||||
tris.append(arg + '/')
|
||||
elif state == 'vrac':
|
||||
if arg[-1:] == '/':
|
||||
vracs.append(arg)
|
||||
else
|
||||
vracs.append(arg + '/')
|
||||
else:
|
||||
help()
|
||||
|
||||
print 'vracs=%s' % ', '.join(vracs)
|
||||
print 'tris=%s' % ', '.join(tris)
|
||||
|
||||
connection = sqlite3.connect(db)
|
||||
connection.text_factory = str # For utf-8 file names…
|
||||
cursor = connection.cursor()
|
||||
|
||||
cursor.execute("create temp table hashesVrac(id, hash);")
|
||||
for path in vracs:
|
||||
likepath=('' + path).replace('%', '%%') + '%';
|
||||
cursor.execute("insert into hashesVrac select rowid,size||'#'||md5||'#'||sha1 from files where path like ?;", (likepath,))
|
||||
|
||||
cursor.execute("create temp table hashesTri(id, hash);")
|
||||
for path in tris:
|
||||
likepath=('' + path).replace('%', '%%') + '%';
|
||||
cursor.execute("insert into hashesTri select rowid,size||'#'||md5||'#'||sha1 from files where path like ?;", (likepath,))
|
||||
|
||||
cursor.execute("create index i_hashesTri_hash on hashesTri(hash);")
|
||||
cursor.execute("create index i_hashesVrac_hash on hashesVrac(hash);")
|
||||
|
||||
for fpath, in cursor.execute("select (select path from files where rowid == hashesVrac.id) as path from hashesVrac where hashesVrac.hash in (select hash from hashesTri);"):
|
||||
dest = '%s/%s%s' % (os.path.dirname(fpath), prefix, os.path.basename(fpath),)
|
||||
if prefixedExists(fpath) and not os.path.exists(fpath):
|
||||
pass # Already moved
|
||||
elif not os.path.exists(fpath):
|
||||
print "# Couldn't hide %s as %s: source doesn't exist" % (fpath, dest,)
|
||||
print "i-have-not-moved-because-no-source -i -- '%s' '%s'" % (fpath.replace("'", "'\\''"), dest.replace("'", "'\\''"),)
|
||||
elif os.path.exists(dest):
|
||||
print "# Couldn't hide %s as %s: destination exists" % (fpath, dest,)
|
||||
print "i-have-not-moved-because-dest-exists -i -- '%s' '%s'" % (fpath.replace("'", "'\\''"), dest.replace("'", "'\\''"),)
|
||||
else:
|
||||
print "i-have-moved -i -- '%s' '%s'" % (fpath.replace("'", "'\\''"), dest.replace("'", "'\\''"),)
|
||||
os.rename(fpath, dest)
|
|
@ -3,6 +3,15 @@
|
|||
import os
|
||||
import sys
|
||||
|
||||
# Common functions
|
||||
|
||||
def removePrefix(fileName):
|
||||
while fileName[0:2] == ".%":
|
||||
fileName = fileName[2:]
|
||||
return fileName
|
||||
|
||||
# Code for this utility
|
||||
|
||||
def walk(path):
|
||||
#print >> sys.stderr, path
|
||||
for f in os.listdir(path):
|
||||
|
@ -10,10 +19,7 @@ def walk(path):
|
|||
if os.path.isdir(fpath) and not os.path.islink(fpath):
|
||||
walk(fpath)
|
||||
if f[0:2] == ".%":
|
||||
ff = f
|
||||
while ff[0:2] == ".%":
|
||||
ff = ff[2:]
|
||||
dest = os.path.join(path, ff)
|
||||
dest = os.path.join(path, removePrefix(f))
|
||||
if not os.path.exists(dest):
|
||||
print "i-have-moved -i -- '%s' '%s'" % (fpath.replace("'", "'\\''"), dest.replace("'", "'\\''"))
|
||||
os.rename(fpath, dest)
|
||||
|
|
|
@ -6,6 +6,19 @@ import sqlite3
|
|||
import time
|
||||
import sys
|
||||
import stat
|
||||
import math
|
||||
|
||||
# Common functions
|
||||
|
||||
def removePrefix(fileName):
|
||||
while fileName[0:2] == ".%":
|
||||
fileName = fileName[2:]
|
||||
return fileName
|
||||
|
||||
def removePrefixPath(path):
|
||||
return '/'.join([removePrefix(component) for component in path.split('/')])
|
||||
|
||||
# Code for this utility
|
||||
|
||||
def checksumFile(path):
|
||||
md5 = hashlib.md5()
|
||||
|
@ -25,10 +38,9 @@ def fileInfo(path):
|
|||
return {'mtime':st.st_mtime, 'size':st.st_size}
|
||||
|
||||
def initdb(cursor):
|
||||
cursor.execute("create table if not exists files(tag,timestamp,path primary key,md5,sha1,mtime,size)")
|
||||
cursor.execute("create index if not exists i_files_tag on files(tag)")
|
||||
cursor.execute("create table if not exists files(timestamp,path primary key,md5,sha1,mtime,size)")
|
||||
cursor.execute("create index if not exists i_files_path_md5_sha1 on files(path,md5,sha1)")
|
||||
cursor.execute("create table if not exists removedfiles(rmtimestamp,tag,timestamp,path,md5,sha1,mtime,size)")
|
||||
cursor.execute("create table if not exists removedfiles(rmtimestamp,timestamp,path,md5,sha1,mtime,size)")
|
||||
|
||||
def cacheFileInfo(cursor, path):
|
||||
cursor.execute('select mtime,size from files where path = ?', (path,))
|
||||
|
@ -44,19 +56,25 @@ def update(connection,cursor,path):
|
|||
for d in os.walk(path):
|
||||
dirpath=d[0]
|
||||
for f in d[2]:
|
||||
fpath = os.path.join(dirpath, f)
|
||||
if os.path.isfile(fpath):
|
||||
fi = fileInfo(fpath)
|
||||
prefixPath = os.path.join(dirpath, f)
|
||||
if os.path.isfile(prefixPath):
|
||||
fi = fileInfo(prefixPath)
|
||||
if fi is None:
|
||||
print "!skipping", fpath
|
||||
print "!skipping: no fileinfo: ", prefixPath
|
||||
continue
|
||||
fpath = removePrefixPath(prefixPath)
|
||||
if fpath != prefixPath and os.path.exists(fpath):
|
||||
print "!skipping: collision between '%s' and '%s'" % (prefixPath, fpath,)
|
||||
cfi = cacheFileInfo(cursor,fpath)
|
||||
cursor.execute("insert into newfiles(path) values(?)", (fpath,))
|
||||
if fi != cfi:
|
||||
print " updating", fpath
|
||||
sums = checksumFile(fpath)
|
||||
values = ('no tag',timestamp,fpath,sums['md5'],sums['sha1'],fi['mtime'],fi['size'])
|
||||
cursor.execute("insert or replace into files(tag,timestamp,path,md5,sha1,mtime,size) values(?,?,?,?,?,?,?)", values)
|
||||
if fpath != prefixPath:
|
||||
print " updating %s (%s)" % (prefixPath, fpath,)
|
||||
else:
|
||||
print " updating %s" % (fpath,)
|
||||
sums = checksumFile(prefixPath)
|
||||
values = (timestamp,fpath,sums['md5'],sums['sha1'],fi['mtime'],fi['size'])
|
||||
cursor.execute("insert or replace into files(timestamp,path,md5,sha1,mtime,size) values(?,?,?,?,?,?)", values)
|
||||
|
||||
currentTime = time.clock()
|
||||
if abs(lastTime-currentTime) >= 10:
|
||||
|
@ -65,14 +83,29 @@ def update(connection,cursor,path):
|
|||
print "commit!"
|
||||
connection.commit()
|
||||
print "commit!"
|
||||
|
||||
print "cleaning up..."
|
||||
likepath=('' + path).replace('%', '%%') + '%';
|
||||
cursor.execute("create temp table deletedfiles(path)")
|
||||
cursor.execute("create index i_deletedfiles_path on deletedfiles(path)")
|
||||
likepath=('' + path).replace('%', '%%') + '%';
|
||||
cursor.execute("insert into deletedfiles(path) select path from files where path like ?", (likepath,));
|
||||
|
||||
nbFilesBefore = cursor.execute("select count(*) from deletedfiles").fetchone()[0];
|
||||
nbFilesAfter = cursor.execute("select count(*) from newfiles").fetchone()[0];
|
||||
print 'number of files before: ', nbFilesBefore
|
||||
print 'number of files after: ', nbFilesAfter
|
||||
|
||||
cursor.execute("delete from deletedfiles where path in newfiles");
|
||||
cursor.execute("insert into removedfiles(rmtimestamp,tag,timestamp,path,md5,sha1,mtime,size) select ?,tag,timestamp,path,md5,sha1,mtime,size from files where path in deletedfiles", (timestamp,))
|
||||
cursor.execute("delete from files where path in deletedfiles")
|
||||
nbFilesDelete = cursor.execute("select count(*) from deletedfiles").fetchone()[0];
|
||||
print 'number of files to remove from database (moved in table removedfiles): ', nbFilesDelete
|
||||
|
||||
if (nbFilesAfter < math.ceil(nbFilesBefore * 0.5)):
|
||||
print "!!! Not deleting hashes from database: there are less than 50% files after. Did you forget to mount your harddisk?"
|
||||
else:
|
||||
cursor.execute("insert into removedfiles(rmtimestamp,timestamp,path,md5,sha1,mtime,size)"
|
||||
+ " select ?,timestamp,path,md5,sha1,mtime,size from files where path in deletedfiles", (timestamp,))
|
||||
cursor.execute("delete from files where path in deletedfiles")
|
||||
|
||||
connection.commit()
|
||||
|
||||
def walk(db,path):
|
||||
|
|
Loading…
Reference in New Issue
Block a user