97 lines
3.2 KiB
Python
Executable File
97 lines
3.2 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
import hashlib
|
|
import os
|
|
import sqlite3
|
|
import time
|
|
import sys
|
|
import stat
|
|
|
|
def checksumFile(path):
|
|
md5 = hashlib.md5()
|
|
sha1 = hashlib.sha1()
|
|
with open(path,'rb') as f:
|
|
while True:
|
|
chunk = f.read(2*md5.block_size*sha1.block_size)
|
|
if not chunk:
|
|
return {'md5':md5.hexdigest(), 'sha1':sha1.hexdigest()}
|
|
md5.update(chunk)
|
|
sha1.update(chunk)
|
|
|
|
def fileInfo(path):
|
|
st = os.lstat(path)
|
|
if not stat.S_ISREG(st.st_mode):
|
|
return None
|
|
return {'mtime':st.st_mtime, 'size':st.st_size}
|
|
|
|
def initdb(cursor):
|
|
cursor.execute("create table if not exists files(tag,timestamp,path primary key,md5,sha1,mtime,size)")
|
|
cursor.execute("create index if not exists i_files_tag on files(tag)")
|
|
cursor.execute("create index if not exists i_files_path_md5_sha1 on files(path,md5,sha1)")
|
|
cursor.execute("create table if not exists removedfiles(rmtimestamp,tag,timestamp,path,md5,sha1,mtime,size)")
|
|
|
|
def cacheFileInfo(cursor, path):
|
|
cursor.execute('select mtime,size from files where path = ?', (path,))
|
|
data = cursor.fetchone()
|
|
return data and {'mtime':data[0], 'size':data[1]}
|
|
|
|
def update(connection,cursor,path):
|
|
cursor.execute("create temp table newfiles(path)")
|
|
cursor.execute("create index i_newfiles_path on newfiles(path)")
|
|
timestamp = time.time()
|
|
currentTime = time.clock()
|
|
lastTime = currentTime
|
|
for d in os.walk(path):
|
|
dirpath=d[0]
|
|
for f in d[2]:
|
|
fpath = os.path.join(dirpath, f)
|
|
if os.path.isfile(fpath):
|
|
fi = fileInfo(fpath)
|
|
if fi is None:
|
|
print "!skipping", fpath
|
|
continue
|
|
cfi = cacheFileInfo(cursor,fpath)
|
|
cursor.execute("insert into newfiles(path) values(?)", (fpath,))
|
|
if fi != cfi:
|
|
print " updating", fpath
|
|
sums = checksumFile(fpath)
|
|
values = ('no tag',timestamp,fpath,sums['md5'],sums['sha1'],fi['mtime'],fi['size'])
|
|
cursor.execute("insert or replace into files(tag,timestamp,path,md5,sha1,mtime,size) values(?,?,?,?,?,?,?)", values)
|
|
|
|
currentTime = time.clock()
|
|
if abs(lastTime-currentTime) >= 10:
|
|
lastTime = currentTime
|
|
connection.commit()
|
|
print "commit!"
|
|
connection.commit()
|
|
print "commit!"
|
|
print "cleaning up..."
|
|
cursor.execute("create temp table deletedfiles(path)")
|
|
cursor.execute("create index i_deletedfiles_path on deletedfiles(path)")
|
|
likepath=('' + path).replace('%', '%%') + '%';
|
|
cursor.execute("insert into deletedfiles(path) select path from files where path like ?", (likepath,));
|
|
cursor.execute("delete from deletedfiles where path in newfiles");
|
|
cursor.execute("insert into removedfiles(rmtimestamp,tag,timestamp,path,md5,sha1,mtime,size) select ?,tag,timestamp,path,md5,sha1,mtime,size from files where path in deletedfiles", (timestamp,))
|
|
cursor.execute("delete from files where path in deletedfiles")
|
|
connection.commit()
|
|
|
|
def walk(db,path):
|
|
connection = sqlite3.connect(db)
|
|
connection.text_factory = str # For utf-8 file names…
|
|
cursor = connection.cursor()
|
|
initdb(cursor)
|
|
update(connection, cursor, path)
|
|
cursor.close()
|
|
|
|
def help():
|
|
print 'Usage : %s database-file directory' % sys.argv[0]
|
|
sys.exit(1)
|
|
|
|
if len(sys.argv) != 3:
|
|
help()
|
|
for arg in sys.argv[1:]:
|
|
if arg == '-h' or arg == '--help':
|
|
help()
|
|
|
|
walk(sys.argv[1], sys.argv[2])
|