前言
由于在公司接触大量的关于公民隐私的数据,所以才有了这个代码,菜鸟程序员,所以代码方面不是写的很漂亮。这篇代码是为了扫描出所有含有身份证号的excel,并移动到相应的文件夹内,创建日志、解压压缩包等,逻辑很简单,具体的可以看代码,都有注释。后面print()比较多,是因为最后我是把这篇代码用pyinstaller打包成exe程序,给其他同事使用。桌面窗口程序自己不是很熟练,所以没加。
当然根据这个代码,也可以写出像电脑管家杀毒那样,扫描所有文件,挑出你所需要的文件。
代码
# coding=utf-8
import os
import re
import sys
import xlrd
import time
import shutil
import zipfile
import datetime
from unrar import rarfile
class Logger(object):
"""
重写log日志类方法
"""
def __init__(self, fileN="Default.log"):
self.terminal = sys.stdout
self.log = open(fileN, "a")
def write(self, message):
self.terminal.write(message)
self.log.write(message)
def flush(self):
pass
def traversal_zip(root_path):
"""
扫描出所有的压缩文件,并输出路径
:param root_path:
:return:
"""
list_rar = []
list_zip = []
list_7z = []
list_files = os.walk(root_path)
for dirpath, dirnames, filename in list_files:
for file in filename:
path_file = os.path.join(dirpath, file)
if re.search("\.7z$", path_file):
list_7z.append([dirpath, path_file])
elif re.search("\.zip$", path_file):
list_zip.append([dirpath, path_file])
elif re.search("\.rar$", path_file):
list_rar.append([dirpath, path_file])
else:
pass
return list_zip, list_rar, list_7z
def traversal_xlsx(root_path):
"""
扫描主目录下,所有的excel文件,并输出路径
:param root_path:
:return:
"""
list_path_file = []
list_path = []
list_files = os.walk(root_path)
for dirpath, dirnames, filename in list_files:
for file in filename:
path_file = os.path.join(dirpath, file)
if re.search("\.XLSX$", path_file) or re.search("\.xlsx$", path_file) or re.search("\.XLS$",
path_file) or re.search(
"\.xls$", path_file):
list_path_file.append(path_file)
list_path.append(dirpath)
else:
pass
return list_path_file, list_path
def xlsx_nameid(xlsx_path, dirname, path):
"""
打开Excel,判断是否有类似于身份证号码的列,如果有,移动到汇总文件夹
:param xlsx_path:
:param dirname:
:return:
"""
try:
book = xlrd.open_workbook(xlsx_path)
sheet = book.sheets()
for m in range(len(sheet)):
sheet1 = book.sheet_by_index(m)
nrow = sheet1.nrows
ncol = sheet1.ncols
print(" 工作簿{0},总行数/总列数:{1}/{2}".format(m + 1, nrow, ncol))
for x in range(ncol):
value = str(sheet1.cell(3, x).value).strip()
if len(value) == 18:
print(" 第{0}列第一检索条件符合:{1}".format(x + 1, value))
value1 = str(sheet1.cell(8, x).value).strip()
if len(value1) == 18:
print(" 第{0}列第二检索条件符合:{1}".format(x + 1, value1))
value2 = str(sheet1.cell(nrow // 2, x).value).strip()
if len(value2) == 18:
print(" 第{0}列第三检索条件符合:{1}".format(x + 1, value2))
xiugai_path = xlsx_path
xiugai_path1 = xiugai_path.replace("\\", "_").replace(":", "_")
print(" 修改文件名,并移动", xiugai_path1)
os.rename(xlsx_path, path + "\\" + xiugai_path1)
shutil.move(path + "\\" + xiugai_path1, dirname)
print(" 已移动到汇总文件夹")
else:
print(" 第{0}列不符合第三检索条件".format(x + 1))
else:
print(" 第{0}列不符合第二检索条件".format(x + 1))
else:
print(" 第{0}列不符合第一检索条件".format(x + 1))
except BaseException as e:
print(e)
def rar(path, zip_pathname, path1):
"""
解压rar文件的函数
:param path:
:param path1:
:return:
"""
try:
rar_name = path.replace(".rar", "rar")
os.mkdir(rar_name + "_程序自动解压文件夹")
rf = rarfile.RarFile(path) # 待解压文件
rf.extractall(rar_name + "_程序自动解压文件夹") # 解压指定文件路径
xiugai_path1 = path.replace("\\", "_").replace(":", "_")
os.rename(path, path1 + "\\" + xiugai_path1)
shutil.move(path1 + "\\" + xiugai_path1, zip_pathname)
print(" 正常解压,并移动", zip_pathname)
except BaseException as e:
print(e)
def zip(path, zip_pathname, path1):
"""
解压zip压缩文件函数、方法。因为解压乱码的问题,修改了zipfile文件(网上的其他方法亲测不管用)
:param path:
:param path1:
:return:
"""
global zip_file
zipname = path.replace(".zip", "zip")
try:
os.mkdir(zipname + "_程序自动解压文件夹")
except:
os.mkdir(zipname + "_程序自动解压文件夹01")
try:
zip_file = zipfile.ZipFile(path)
zip_list = zip_file.namelist()
for f in zip_list:
zip_file.extract(f, zipname + "_程序自动解压文件夹")
zip_file.close()
xiugai_path1 = path.replace("\\", "_").replace(":", "_")
os.rename(path, path1 + "\\" + xiugai_path1)
shutil.move(path1 + "\\" + xiugai_path1, zip_pathname)
print(" 正常解压,并移动", zip_pathname)
except BaseException as e:
try:
print(e)
except:
print("文件内容或者文件名有不兼容的字符")
try:
os.rmdir(zipname + "_程序自动解压文件夹")
except:
pass
try:
zip_file.close()
except:
pass
def zip_7z(path, zip_pathname, path1):
"""
解压7z压缩包文件函数
:param path:
:param path1:
:return:
"""
try:
zipexe_path = os.getcwd() + "\\7za.exe"
zipname = path.replace(".7z", "7z")
os.mkdir(zipname + "_程序自动解压文件夹")
newname = zipname + "_程序自动解压文件夹"
cmd = zipexe_path + " x " + path + " -o" + newname + " -aoa"
os.system(cmd)
xiugai_path1 = path.replace("\\", "_").replace(":", "_")
os.rename(path, path1 + "\\" + xiugai_path1)
shutil.move(path1 + "\\" + xiugai_path1, zip_pathname)
print(" 正常解压,并移动", zip_pathname)
except BaseException as e:
print(e)
if __name__ == '__main__':
print("*****************程序开始运行****************")
print("程序会扫描您所指定的目录下所有Excel文件,对压缩包进行解压,移动出文件内含有身份证号码的Excel文件,并移出压缩包")
root_path = input("***请输入要执行的总文件夹(类似于D:\\test\\test):")
dirname = input("注意***请输入Excel汇总的文件夹:")
zip_pathname = input("注意***请输入压缩包汇总的文件夹:")
logpath1 = os.getcwd()
try:
os.mkdir(logpath1 + "\\log")
except:
pass
now_time0 = str(datetime.datetime.now())
now_time01 = now_time0.split(".")[0]
now_time02 = now_time01.replace(" ", "_").replace(":", "_")
sys.stdout = Logger(logpath1 + "\\log\\" + now_time02 + ".txt")
print("日志存放在:", logpath1 + "\\log\\" + now_time02 + ".txt")
list_zip_rar_7z = traversal_zip(root_path)
x = y = z = 0
print("目录下所有rar文件开始解压")
for list_rar in list_zip_rar_7z[1]:
y += 1
print("{0}/{1}".format(y, len(list_zip_rar_7z[1])))
rar(list_rar[1], zip_pathname, list_rar[0])
print("目录下所有zip文件开始解压")
for list_zip in list_zip_rar_7z[0]:
x += 1
print("{0}/{1}".format(x, len(list_zip_rar_7z[0])))
zip(list_zip[1], zip_pathname, list_zip[0])
print("目录下所有7z文件开始解压")
for list_7z in list_zip_rar_7z[2]:
z += 1
print("{0}/{1}".format(z, len(list_zip_rar_7z[2])))
zip_7z(list_7z[1], zip_pathname, list_7z[0])
print("所有压缩包已解压至压缩包所在目录下")
print("**************************************************")
print("开始判断输出Excel文件")
xlsx_file_path = traversal_xlsx(root_path)
for i in range(len(xlsx_file_path[0])):
xlsx_nameid(xlsx_file_path[0][i], dirname, xlsx_file_path[1][i])
print("程序运行结束,请右上角直接关闭程序")
print("或者等待十分钟后会自动关闭程序")
time.sleep(600)
欢迎指正,共同进步!
如果有问题,请及时告知,会在第一时间给出解释,并更改有问题的部分。