"""
################################################################################
Usage: "python diffall.py dir1 dir2".
Recursive directory tree comparison: report unique files that exist in only
dir1 or dir2, report files of the same name in dir1 and dir2 with differing
contents, report instances of same name but different type in dir1 and dir2,
and do the same for all subdirectories of the same names in and below dir1
and dir2. A summary of diffs appears at end of output, but search redirected
output for "DIFF" and "unique" strings for further details. New: (3E) limit
reads to 1M for large files, (3E) catch same name=file/dir, (4E) avoid extra
os.listdir() calls in dirdiff.comparedirs() by passing results here along.
################################################################################
"""
import os, dirdiff
blocksize = 1024 * 1024 # up to 1M per read
def intersect(seq1, seq2):
"""
Return all items in both seq1 and seq2;
a set(seq1) & set(seq2) woud work too, but sets are randomly
ordered, so any platform-dependent directory order would be lost
"""
return [item for item in seq1 if item in seq2]
def comparetrees(dir1, dir2, diffs, verbose=False):
"""
Compare all subdirectories and files in two directory trees;
uses binary files to prevent Unicode decoding and endline transforms,
as trees might contain arbitrary binary files as well as arbitrary text;
may need bytes listdir arg for undecodable filenames on some platforms
"""
# compare file name lists
print('-' * 20)
names1 = os.listdir(dir1)
names2 = os.listdir(dir2)
if not dirdiff.comparedirs(dir1, dir2, names1, names2):
diffs.append('unique files at %s - %s' % (dir1, dir2))
print('Comparing contents')
common = intersect(names1, names2)
missed = common[:]
# compare contents of files in common
for name in common:
path1 = os.path.join(dir1, name)
path2 = os.path.join(dir2, name)
if os.path.isfile(path1) and os.path.isfile(path2):
missed.remove(name)
file1 = open(path1, 'rb')
file2 = open(path2, 'rb')
while True:
bytes1 = file1.read(blocksize)
bytes2 = file2.read(blocksize)
if (not bytes1) and (not bytes2):
if verbose: print(name, 'matches')
break
if bytes1 != bytes2:
diffs.append('files differ at %s - %s' % (path1, path2))
print(name, 'DIFFERS')
break
# recur to compare directories in common
for name in common:
path1 = os.path.join(dir1, name)
path2 = os.path.join(dir2, name)
if os.path.isdir(path1) and os.path.isdir(path2):
missed.remove(name)
comparetrees(path1, path2, diffs, verbose)
# same name but not both files or dirs?
for name in missed:
diffs.append('files missed at %s - %s: %s' % (dir1, dir2, name))
print(name, 'DIFFERS')
if name == 'main':
dir1, dir2 = dirdiff.getargs()
diffs = []
comparetrees(dir1, dir2, diffs, True) # changes diffs in-place
print('=' * 40) # walk, report diffs list
if not diffs:
print('No diffs found.')
else:
print('Diffs found:', len(diffs))
for diff in diffs: print('-', diff)
"""
################################################################################
Usage: python dirdiff.py dir1-path dir2-path
Compare two directories to find files that exist in one but not the other.
This version uses the os.listdir function and list difference. Note that
this script checks only filenames, not file contents--see diffall.py for an
extension that does the latter by comparing .read() results.
################################################################################
"""
import os, sys
def reportdiffs(unique1, unique2, dir1, dir2):
"""
Generate diffs report for one dir: part of comparedirs output
"""
if not (unique1 or unique2):
print('Directory lists are identical')
else:
if unique1:
print('Files unique to', dir1)
for file in unique1:
print('...', file)
if unique2:
print('Files unique to', dir2)
for file in unique2:
print('...', file)
def difference(seq1, seq2):
"""
Return all items in seq1 only;
a set(seq1) - set(seq2) would work too, but sets are randomly
ordered, so any platform-dependent directory order would be lost
"""
return [item for item in seq1 if item not in seq2]
def comparedirs(dir1, dir2, files1=None, files2=None):
"""
Compare directory contents, but not actual files;
may need bytes listdir arg for undecodable filenames on some platforms
"""
print('Comparing', dir1, 'to', dir2)
files1 = os.listdir(dir1) if files1 is None else files1
files2 = os.listdir(dir2) if files2 is None else files2
unique1 = difference(files1, files2)
unique2 = difference(files2, files1)
reportdiffs(unique1, unique2, dir1, dir2)
return not (unique1 or unique2) # true if no diffs
def getargs():
"Args for command-line mode"
try:
dir1, dir2 = sys.argv[1:] # 2 command-line args
except:
print('Usage: dirdiff.py dir1 dir2')
sys.exit(1)
else:
return (dir1, dir2)
if name == 'main':
dir1, dir2 = getargs()
comparedirs(dir1, dir2)