From f72d3aca11c0800008a5ad90ca2442f9abc4c38b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Georges=20Dup=C3=A9ron?= Date: Sun, 4 Sep 2011 11:28:06 +0200 Subject: [PATCH] Remdoubles scripts (should have been commited a while ago). --- fast-remdoubles.py | 37 +++++++++++++++++++++++++++++ fast-syncdoubles.py | 38 +++++++++++++++++++++++++++++ pre-fast-remdoubles.py | 28 ++++++++++++++++++++++ pre-fast-remdoubles.py.sh | 2 ++ remdoubles-full.py | 50 +++++++++++++++++++++++++++++++++++++++ remdoubles.py | 6 ++++- 6 files changed, 160 insertions(+), 1 deletion(-) create mode 100755 fast-remdoubles.py create mode 100755 fast-syncdoubles.py create mode 100755 pre-fast-remdoubles.py create mode 100755 pre-fast-remdoubles.py.sh create mode 100755 remdoubles-full.py diff --git a/fast-remdoubles.py b/fast-remdoubles.py new file mode 100755 index 0000000..4c5a00b --- /dev/null +++ b/fast-remdoubles.py @@ -0,0 +1,37 @@ +#!/usr/bin/python + +import sys +import os +import errno +import shutil + +def identicalFiles(pathA, pathB): + bufsize = 4096 + with open(pathA, 'rb') as a: + with open(pathB, 'rb') as b: + while True: + dataA = a.read(bufsize) + dataB = b.read(bufsize) + if dataA != dataB: + return False + if not dataA: + return True + +nblines=0 +for supprimable in sys.stdin: + nblines = (nblines+1)%10240 + if nblines == 0: + os.system("sync"); + original = sys.stdin.next() + supprimable = supprimable[0:-1] + original = original[0:-1] + if supprimable[0:2] == '1/' and os.path.exists(supprimable) and os.path.exists(original) and identicalFiles(original, supprimable): + destfile = "delete/" + supprimable + try: + os.makedirs(os.path.dirname(destfile)) + except OSError as e: + if e.errno == errno.EEXIST: + pass + else: + raise + shutil.move(supprimable, destfile); diff --git a/fast-syncdoubles.py b/fast-syncdoubles.py new file mode 100755 index 0000000..a3ce6c7 --- /dev/null +++ b/fast-syncdoubles.py @@ -0,0 +1,38 @@ +#!/usr/bin/python + +import sys +import os +import errno +import shutil + +def identicalFiles(pathA, pathB): + bufsize = 4096 + with open(pathA, 'rb') as a: + with open(pathB, 'rb') as b: + while True: + dataA = a.read(bufsize) + dataB = b.read(bufsize) + if dataA != dataB: + return False + if not dataA: + return True + +nblines=0 +for supprimable in sys.stdin: + nblines = (nblines+1)%10240 + if nblines == 0: + os.system("sync"); + original = sys.stdin.next() + supprimable = supprimable[0:-1] + original = original[0:-1] + if supprimable[0:2] == '1/' and os.path.exists(supprimable) and os.path.exists(original) and identicalFiles(original, supprimable): + destfile = "sync/" + original + if not os.path.exists(destfile): + try: + os.makedirs(os.path.dirname(destfile)) + except OSError as e: + if e.errno == errno.EEXIST: + pass + else: + raise + shutil.move(supprimable, destfile); diff --git a/pre-fast-remdoubles.py b/pre-fast-remdoubles.py new file mode 100755 index 0000000..4552370 --- /dev/null +++ b/pre-fast-remdoubles.py @@ -0,0 +1,28 @@ +#!/usr/bin/python + +import sys +import os +import errno +import shutil + +oldhash = "" +original = [] +supprimable = [] +nblines=0 +for line in sys.stdin: + nblines = (nblines+1)%500 + if nblines == 0: + os.system("sync"); + hash = line[0:48] + file = line[50:-1] + if hash != oldhash: + for o,s in zip(original,supprimable): + sys.stdout.write(s+"\n"+o+"\0") + supprimable = [] + original = [] + if file[0:2] == '1/': # Delete files in the directory named 1 + supprimable.append(file) + else: + original.append(file) + + oldhash = hash diff --git a/pre-fast-remdoubles.py.sh b/pre-fast-remdoubles.py.sh new file mode 100755 index 0000000..5171223 --- /dev/null +++ b/pre-fast-remdoubles.py.sh @@ -0,0 +1,2 @@ +#!/bin/sh +./pre-fast-remdoubles.py | sort -z | tr '\0' '\n' diff --git a/remdoubles-full.py b/remdoubles-full.py new file mode 100755 index 0000000..6e7be76 --- /dev/null +++ b/remdoubles-full.py @@ -0,0 +1,50 @@ +#!/usr/bin/python + +import sys +import os +import errno +import shutil + +def identicalFiles(pathA, pathB): + bufsize = 4096 + with open(pathA, 'rb') as a: + with open(pathB, 'rb') as b: + while True: + dataA = a.read(bufsize) + dataB = b.read(bufsize) + if dataA != dataB: + return False + if not dataA: + return True + +oldhash = "" +original = [] +supprimable = [] +for line in sys.stdin: + hash = line[0:48] + file = line[50:-1] + if hash != oldhash: + if original != []: + for i in supprimable: + if os.path.exists(i): + for j in original: + if os.path.exists(i) and os.path.exists(j) and identicalFiles(j, i): + print i + " |||||||||| " + j + destfile = "delete/" + i + try: + os.makedirs(os.path.dirname(destfile)) + except OSError as e: + if e.errno == errno.EEXIST: + pass + else: + raise + shutil.move(i, destfile); + break + supprimable = [] + original = [] + if file[0:2] == '1/': # Delete files in the directory named 1 + supprimable.append(file) + else: + original.append(file) + + oldhash = hash diff --git a/remdoubles.py b/remdoubles.py index 24ca8ad..84394c6 100755 --- a/remdoubles.py +++ b/remdoubles.py @@ -20,7 +20,11 @@ def identicalFiles(pathA, pathB): oldhash = "" original = "" supprimable = [] +nblines=0 for line in sys.stdin: + nblines = (nblines+1)%500 + if nblines == 0: + os.system("sync"); hash = line[0:48] file = line[50:-1] if hash != oldhash: @@ -39,7 +43,7 @@ for line in sys.stdin: shutil.move(i, destfile); supprimable = [] original = "" - if file[0:2] == 'c/': # Delete files in the directory named c + if file[0:2] == 'd/': # Delete files in the directory named d supprimable.append(file) else: if original == "" and os.path.exists(file):