Remdoubles scripts (should have been commited a while ago).

This commit is contained in:
Georges Dupéron 2011-09-04 11:28:06 +02:00
parent e6b27d3e85
commit f72d3aca11
6 changed files with 160 additions and 1 deletions

37
fast-remdoubles.py Executable file
View File

@ -0,0 +1,37 @@
#!/usr/bin/python
import sys
import os
import errno
import shutil
def identicalFiles(pathA, pathB):
bufsize = 4096
with open(pathA, 'rb') as a:
with open(pathB, 'rb') as b:
while True:
dataA = a.read(bufsize)
dataB = b.read(bufsize)
if dataA != dataB:
return False
if not dataA:
return True
nblines=0
for supprimable in sys.stdin:
nblines = (nblines+1)%10240
if nblines == 0:
os.system("sync");
original = sys.stdin.next()
supprimable = supprimable[0:-1]
original = original[0:-1]
if supprimable[0:2] == '1/' and os.path.exists(supprimable) and os.path.exists(original) and identicalFiles(original, supprimable):
destfile = "delete/" + supprimable
try:
os.makedirs(os.path.dirname(destfile))
except OSError as e:
if e.errno == errno.EEXIST:
pass
else:
raise
shutil.move(supprimable, destfile);

38
fast-syncdoubles.py Executable file
View File

@ -0,0 +1,38 @@
#!/usr/bin/python
import sys
import os
import errno
import shutil
def identicalFiles(pathA, pathB):
bufsize = 4096
with open(pathA, 'rb') as a:
with open(pathB, 'rb') as b:
while True:
dataA = a.read(bufsize)
dataB = b.read(bufsize)
if dataA != dataB:
return False
if not dataA:
return True
nblines=0
for supprimable in sys.stdin:
nblines = (nblines+1)%10240
if nblines == 0:
os.system("sync");
original = sys.stdin.next()
supprimable = supprimable[0:-1]
original = original[0:-1]
if supprimable[0:2] == '1/' and os.path.exists(supprimable) and os.path.exists(original) and identicalFiles(original, supprimable):
destfile = "sync/" + original
if not os.path.exists(destfile):
try:
os.makedirs(os.path.dirname(destfile))
except OSError as e:
if e.errno == errno.EEXIST:
pass
else:
raise
shutil.move(supprimable, destfile);

28
pre-fast-remdoubles.py Executable file
View File

@ -0,0 +1,28 @@
#!/usr/bin/python
import sys
import os
import errno
import shutil
oldhash = ""
original = []
supprimable = []
nblines=0
for line in sys.stdin:
nblines = (nblines+1)%500
if nblines == 0:
os.system("sync");
hash = line[0:48]
file = line[50:-1]
if hash != oldhash:
for o,s in zip(original,supprimable):
sys.stdout.write(s+"\n"+o+"\0")
supprimable = []
original = []
if file[0:2] == '1/': # Delete files in the directory named 1
supprimable.append(file)
else:
original.append(file)
oldhash = hash

2
pre-fast-remdoubles.py.sh Executable file
View File

@ -0,0 +1,2 @@
#!/bin/sh
./pre-fast-remdoubles.py | sort -z | tr '\0' '\n'

50
remdoubles-full.py Executable file
View File

@ -0,0 +1,50 @@
#!/usr/bin/python
import sys
import os
import errno
import shutil
def identicalFiles(pathA, pathB):
bufsize = 4096
with open(pathA, 'rb') as a:
with open(pathB, 'rb') as b:
while True:
dataA = a.read(bufsize)
dataB = b.read(bufsize)
if dataA != dataB:
return False
if not dataA:
return True
oldhash = ""
original = []
supprimable = []
for line in sys.stdin:
hash = line[0:48]
file = line[50:-1]
if hash != oldhash:
if original != []:
for i in supprimable:
if os.path.exists(i):
for j in original:
if os.path.exists(i) and os.path.exists(j) and identicalFiles(j, i):
print i + " |||||||||| " + j
destfile = "delete/" + i
try:
os.makedirs(os.path.dirname(destfile))
except OSError as e:
if e.errno == errno.EEXIST:
pass
else:
raise
shutil.move(i, destfile);
break
supprimable = []
original = []
if file[0:2] == '1/': # Delete files in the directory named 1
supprimable.append(file)
else:
original.append(file)
oldhash = hash

View File

@ -20,7 +20,11 @@ def identicalFiles(pathA, pathB):
oldhash = ""
original = ""
supprimable = []
nblines=0
for line in sys.stdin:
nblines = (nblines+1)%500
if nblines == 0:
os.system("sync");
hash = line[0:48]
file = line[50:-1]
if hash != oldhash:
@ -39,7 +43,7 @@ for line in sys.stdin:
shutil.move(i, destfile);
supprimable = []
original = ""
if file[0:2] == 'c/': # Delete files in the directory named c
if file[0:2] == 'd/': # Delete files in the directory named d
supprimable.append(file)
else:
if original == "" and os.path.exists(file):