7.5 目录操作
除了支持文件操作,os 和 os.path 模块还提供了大量的目录操作方法。

5C833114B856BD099F9DF3FA105CB614.jpg
代码示例。
>>> import os
>>> os.getcwd() # 返回当前工作目录
'D:\\Python'
>>> os.mkdir(os.getcwd() + '\\temp') # 创建目录
>>> os.chdir(os.getcwd() + '\\temp') # 改变当前工作目录
>>> os.getcwd()
'D:\\Python\\temp'
>>> os.listdir('.')
[]
>>> os.mkdir(os.getcwd() + '\\test')
>>> os.rmdir('test') # 删除目录
可以使用递归的方法遍历指定目录下所有子目录和文件。
import os
def visitDir(path):
if not os.path.isdir(path):
print('Error: ', path, ' is not a directory or does not exist.')
return
for lists in os.listdir(path):
sub_path = os.path.join(path, lists)
print(sub_path)
if os.path.isdir(sub_path):
visitDir(sub_path)
visitDir(r'D:\Python')
也可使用 os 模块的 walk() 方法遍历目录。
import os
def visitDir2(path):
if not os.path.isdir(path):
print('Error: ', path, ' is not a directory or does not exist.')
return
list_dirs = os.walk(path)
for root, dirs, files in list_dirs:
for d in dirs:
print(os.path.join(root, d))
for f in files:
print(os.path.join(root, f))
visitDir2(r'D:\Python')
7.6 案例精选
(1)计算 CRC32 值。可以使用 zlib 和 binascii 模块的方法来计算任意字符串的 CRC32 的值。
>>> import zlib
>>> print(zlib.crc32('1234'.encode()))
2615402659
>>> print(zlib.crc32('1111'.encode()))
3690580215
>>> print(zlib.crc32('SDIBT'.encode()))
2095416137
(2)计算文件最长行的长度。
>>> f = open(r'D:\Python\test.py', 'r')
>>> longest = max(len(line.strip()) for line in f)
>>> f.close()
>>> print(longest)
37
(3)计算字符串 MD5 的值。
import hashlib
import os
import sys
fileName = sys.argv[1]
if os.path.isfile(fileName):
with open(fileName, 'r') as fp:
lines = fp.readlines()
data = ''.join(lines).encode()
print(hashlib.md5(data).hexdigest())
(4)判断一个文件是否为 GIF 图像文件。
>>> def is_gif(fname):
with open(fname, 'rb') as fp:
return fp.read(4) == b'GIF8'
>>> is_gif(r'D:\Python\loading.gif')
True
(5)比较两个文件是否相同。
>>> import difflib
>>> A = open(r'D:\Python\test1.txt', 'r')
>>> B = open(r'D:\Python\test2.txt', 'r')
>>> contextA = A.read()
>>> contextB = B.read()
>>> contextB
'hello'
>>> s = difflib.SequenceMatcher(lambda x: x == "", contextA, contextB) # 比较文本内容,返回差异部分
>>> result = s.get_opcodes()
>>> for tag, i1, i2, j1, j2 in result:
print("%s contextA[%d:%d] = %s, contextB[%d:%d] = %s"%(tag, i1, i2, contextA[i1:i2], j1, j2, contextB[j1:j2]))
insert contextA[0:0] = , contextB[0:5] = hello
(6)使用 xlwt 模块写入 Excel 文件。
from xlwt import *
book = Workbook()
sheet1 = book.add_sheet("First")
al = Alignment()
al.horz = Alignment.HORZ_CENTER
al.vert = Alignment.VERT_CENTER
borders = Borders()
borders.bottom = Borders.THICK
style = XFStyle()
style.alignment = al
style.borders = borders
row0 = sheet1.row(0)
row0.write(0, 'test', style = style)
book.save(r'D:\Python\test.xls')
(7)编写程序,进行文件夹增量备份。
import os
import filecmp
import shutil
import sys
def autoBackup(scrDir, dstDir):
if ((not os.path.isdir(scrDir)) or (not os.path.isdir(dstDir)) or
(os.path.abspath(scrDir) != scrDir) or (os.path.abspath(dstDir) != dstDir)):
usage()
for item in os.listdir(scrDir):
scrItem = os.path.join(scrDir, item)
dstItem = scrItem.replace(scrDir, dstDir)
if os.path.isdir(scrItem):
# 创建新增文件夹,保证目标文件夹的结构与原始文件夹一致
if not os.path.exists(dstItem):
os.makedirs(dstItem)
print('make directory ' + dstItem)
autoBackup(scrItem, dstItem)
elif os.path.isfile(scrItem):
# 只复制新增或修改过的文件
if ((not os.path.exists(dstItem)) or
(not filecmp.cmp(scrItem, dstItem, shallow = False))):
shutil.copy(scrItem, dstItem)
print('file:' + scrItem + '==>' + dstItem)
def usage():
print('scrDir and dstDir must be existing absolute path of certain directory.')
sys.exit(0)
if __name__ == '__main__':
if len(sys.argv) != 3:
usage()
scrDir, dstDir = sys.argv[1], sys.argv[2]
## scrDir = r'D:\Python\lab_py_0424'
## dstDir = r'D:\Python\lab_py_copy'
autoBackup(scrDir, dstDir)
(8)统计指定文件夹大小及文件和子文件夹数量。
import os
totalSize = 0
fileNum = 0
dirNum = 0
def visitDir(path):
global totalSize
global fileNum
global dirNum
for lists in os.listdir(path):
sub_path = os.path.join(path, lists)
if os.path.isfile(sub_path):
fileNum = fileNum + 1 # 统计文件数量
totalSize = totalSize + os.path.getsize(sub_path) # 统计文件总大小
elif os.path.isdir(sub_path):
dirNum = dirNum + 1 # 统计文件夹数量
visitDir(sub_path) # 递归遍历子文件夹
def main(path):
if not os.path.isdir(path):
print('Error:"', path, '" is not a directory or does not exist.')
return
visitDir(path)
def sizeConvert(size): # 单位换算
K, M, G = 1024, 1024 * 1024, 1024 * 1024 * 1024
if size >= G:
return str(size / G) + 'G Bytes'
elif size >= M:
return str(size / M) + 'M Bytes'
elif size >= K:
return str(size / K) + 'K Bytes'
else:
return str(size) + 'Bytes'
def output(path):
print('The total size of ' + path + ' is:' + sizeConvert(totalSize) + ' (' +
str(totalSize) + 'Bytes)')
print('The total number of files in ' + path + ' is:', fileNum)
print('The total number of directories in ' + path + ' is:', dirNum)
if __name__ == '__main__':
path = r'D:\Python'
main(path)
output(path)
(9)递归删除指定文件夹中指定类型的文件和大小为 0 字节的文件。
from os.path import isdir, join, splitext, getsize
from os import remove, listdir
import sys
filetypes = ['.tmp', '.log', '.obj', '.txt']
def delCertainFiles(directory):
if not isdir(directory):
return
for filename in listdir(directory):
temp = join(directory, filename)
if isdir(temp):
delCertainFiles(temp)
elif (splitext(temp)[1] in filetypes) or getsize(temp) == 0:
remove(temp)
print(temp, ' deleted....')
def main():
directory = r'D:\Python'
# directory = sys.argv[1]
delCertainFiles(directory)
main()
(10)使用扩展库 openpyxl 读写 Excel 文件。
>>> import openpyxl
>>> from openpyxl import Workbook
>>> fn = r'D:\test.xlsx'
>>> wb = Workbook()
>>> ws = wb.create_sheet(title = 'hello, world')
>>> ws['A1'] = '这是第一个单元格'
>>> ws['B1'] = '3.1415926'
>>> wb.save(fn)
>>> wb = openpyxl.load_workbook(fn)
>>> ws = wb.worksheets[1]
>>> print(ws['A1'].value)
这是第一个单元格
>>> ws.append([1, 2, 3, 4, 5])
>>> ws.merge_cells('F2:F3')
>>> ws['F2'] = "=sum(A2:E2)"
>>> for r in range(10, 15):
for c in range(3, 8):
_ = ws.cell(row = r, column = c, value = r * c)
>>> wb.save(fn)
(11)每次成绩在 Excel 文件(包括 3 列:姓名、课程、成绩)中,要求统计所有学生每门课程最高成绩。下面的代码先模拟生成随机成绩数据,然后统计分析。
import openpyxl
from openpyxl import Workbook
import random
# 生成随机数据
def generateRandomInformation(filename):
workbook = Workbook()
worksheet = workbook.worksheets[0]
worksheet.append(['姓名', '课程', '成绩'])
# 中文名字的第一个字、第二个字、第三个字
first = tuple('赵钱李孙')
middle = tuple('伟均谭东')
last = tuple('坤艳志')
# 课程名称
subjects = ('语文', '数学', '英语')
# 随机生成 200 个数据
for i in range(200):
line = []
r = random.randint(1, 100)
name = random.choice(first)
# 一定概率生成两个字的名字
if r > 25:
name = name + random.choice(middle)
name = name + random.choice(last)
# 依次生成姓名、课程、成绩
line.append(name)
line.append(random.choice(subjects))
line.append(random.randint(0, 100))
worksheet.append(line)
# 保存数据
workbook.save(filename)
def getResult(oldfile, newfile):
# 用于存放结果数据的字典
result = dict()
# 打开原始数据
workbook = openpyxl.load_workbook(oldfile)
worksheet = workbook.worksheets[0]
# 遍历原始数据
for row in list(worksheet.rows)[1:]:
#
name, subject, grade = row[0].value, row[1].value, row[2].value
# print('name = ', name, ', subject = ', subject, ', grade = ', grade)
#
t = result.get(name, {})
#
f = t.get(subject, 0)
#
if grade > f:
t[subject] = grade
result[name] = t
#
workbook1 = Workbook()
worksheet1 = workbook1.worksheets[0]
worksheet1.append(['姓名', '课程', '成绩'])
#
for name, t in result.items():
for subject, grade in t.items():
worksheet1.append([name, subject, grade])
workbook1.save(newfile)
if __name__ == '__main__':
oldfile = r'd:\Python\test.xlsx'
newfile = r'd:\Python\result.xlsx'
generateRandomInformation(oldfile)
getResult(oldfile, newfile)
(12)查看指定 zip 压缩文件中的文件列表。
>>> import zipfile
>>> fp = zipfile.ZipFile(r'D:\Python\lab_py.zip')
>>> for f in fp.namelist():
print(f)
lab_py_0424/
lab_py_0424/lab_py_0424_part1/
lab_py_0424/lab_py_0424_part2/
lab_py_0424/lab_py_0424_part1/ex_file.md
lab_py_0424/lab_py_0424_part1/gettysburg_address.txt
lab_py_0424/lab_py_0424_part1/lab_word_count.md
lab_py_0424/lab_py_0424_part1/word_counter.py
lab_py_0424/lab_py_0424_part2/lab_py_0424.md
lab_py_0424/lab_py_0424_part2/mpg.py
lab_py_0424/lab_py_0424_part2/mpg_write.py
lab_py_0424/lab_py_0424_part2/trip.csv
>>> fp.close
<bound method ZipFile.close of <zipfile.ZipFile filename='D:\\Python\\lab_py.zip' mode='r'>>