python 比较两个目录 脚本

"""
################################################################################
Usage: "python diffall.py dir1 dir2".
Recursive directory tree comparison: report unique files that exist in only
dir1 or dir2, report files of the same name in dir1 and dir2 with differing
contents, report instances of same name but different type in dir1 and dir2,
and do the same for all subdirectories of the same names in and below dir1
and dir2. A summary of diffs appears at end of output, but search redirected
output for "DIFF" and "unique" strings for further details. New: (3E) limit
reads to 1M for large files, (3E) catch same name=file/dir, (4E) avoid extra
os.listdir() calls in dirdiff.comparedirs() by passing results here along.
################################################################################
"""

import os, dirdiff
blocksize = 1024 * 1024 # up to 1M per read

def intersect(seq1, seq2):
"""
Return all items in both seq1 and seq2;
a set(seq1) & set(seq2) woud work too, but sets are randomly
ordered, so any platform-dependent directory order would be lost
"""
return [item for item in seq1 if item in seq2]

def comparetrees(dir1, dir2, diffs, verbose=False):
"""
Compare all subdirectories and files in two directory trees;
uses binary files to prevent Unicode decoding and endline transforms,
as trees might contain arbitrary binary files as well as arbitrary text;
may need bytes listdir arg for undecodable filenames on some platforms
"""
# compare file name lists
print('-' * 20)
names1 = os.listdir(dir1)
names2 = os.listdir(dir2)
if not dirdiff.comparedirs(dir1, dir2, names1, names2):
diffs.append('unique files at %s - %s' % (dir1, dir2))

print('Comparing contents')
common = intersect(names1, names2)
missed = common[:]

# compare contents of files in common
for name in common:
    path1 = os.path.join(dir1, name)
    path2 = os.path.join(dir2, name)
    if os.path.isfile(path1) and os.path.isfile(path2):
        missed.remove(name)
        file1 = open(path1, 'rb')
        file2 = open(path2, 'rb')
        while True:
            bytes1 = file1.read(blocksize)
            bytes2 = file2.read(blocksize)
            if (not bytes1) and (not bytes2):
                if verbose: print(name, 'matches')
                break
            if bytes1 != bytes2:
                diffs.append('files differ at %s - %s' % (path1, path2))
                print(name, 'DIFFERS')
                break

# recur to compare directories in common
for name in common:
    path1 = os.path.join(dir1, name)
    path2 = os.path.join(dir2, name)
    if os.path.isdir(path1) and os.path.isdir(path2):
        missed.remove(name)
        comparetrees(path1, path2, diffs, verbose)

# same name but not both files or dirs?
for name in missed:
    diffs.append('files missed at %s - %s: %s' % (dir1, dir2, name))
    print(name, 'DIFFERS')

if name == 'main':
dir1, dir2 = dirdiff.getargs()
diffs = []
comparetrees(dir1, dir2, diffs, True) # changes diffs in-place
print('=' * 40) # walk, report diffs list
if not diffs:
print('No diffs found.')
else:
print('Diffs found:', len(diffs))
for diff in diffs: print('-', diff)


"""
################################################################################
Usage: python dirdiff.py dir1-path dir2-path
Compare two directories to find files that exist in one but not the other.
This version uses the os.listdir function and list difference. Note that
this script checks only filenames, not file contents--see diffall.py for an
extension that does the latter by comparing .read() results.
################################################################################
"""

import os, sys

def reportdiffs(unique1, unique2, dir1, dir2):
"""
Generate diffs report for one dir: part of comparedirs output
"""
if not (unique1 or unique2):
print('Directory lists are identical')
else:
if unique1:
print('Files unique to', dir1)
for file in unique1:
print('...', file)
if unique2:
print('Files unique to', dir2)
for file in unique2:
print('...', file)

def difference(seq1, seq2):
"""
Return all items in seq1 only;
a set(seq1) - set(seq2) would work too, but sets are randomly
ordered, so any platform-dependent directory order would be lost
"""
return [item for item in seq1 if item not in seq2]

def comparedirs(dir1, dir2, files1=None, files2=None):
"""
Compare directory contents, but not actual files;
may need bytes listdir arg for undecodable filenames on some platforms
"""
print('Comparing', dir1, 'to', dir2)
files1 = os.listdir(dir1) if files1 is None else files1
files2 = os.listdir(dir2) if files2 is None else files2
unique1 = difference(files1, files2)
unique2 = difference(files2, files1)
reportdiffs(unique1, unique2, dir1, dir2)
return not (unique1 or unique2) # true if no diffs

def getargs():
"Args for command-line mode"
try:
dir1, dir2 = sys.argv[1:] # 2 command-line args
except:
print('Usage: dirdiff.py dir1 dir2')
sys.exit(1)
else:
return (dir1, dir2)

if name == 'main':
dir1, dir2 = getargs()
comparedirs(dir1, dir2)

©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

相关阅读更多精彩内容

友情链接更多精彩内容