今日学习
功能介绍
- 计算指定文件的md5值
- 爬取绿盟升级包名称和md5值,并生成excle
- 计算指定文件夹里的所有文件的md5值并与官网上爬取的绿盟升级包名称和md5值做比较
- 自动下载同一类型(增量更新)升级包到指定文件夹
import requests
from lxml import etree
import os
import hashlib
import getopt
import sys
import xlwt
url = ''
path = ''
# 爬取网页上升级包的名称和md5值,放到字典dic1中
def spider_filename_md5(url):
response = requests.get(url)
html = response.content
html = str(html, encoding='utf-8')
s = etree.HTML(html)
name = s.xpath('/html/body/section/div/section/div/div[2]/div[2]/table/tr/td/a')
name = [str(i.text) for i in name]
md5 = s.xpath('/html/body/section/div/section/div/div[2]/div[2]/table/tr[2]/td[1]/text()')
md5 = [str(i.strip()) for i in md5]
dic1 = dict(zip(name, md5))
excel(dic1)
return dic1
def excel(dic):
# 创建workbook(其实就是excel,后来保存一下就行)
workbook = xlwt.Workbook(encoding='utf-8')
# 创建表
worksheet = workbook.add_sheet('sheet1')
# 往单元格内写入内容:写入表头
worksheet.write(0, 0, label="name")
worksheet.write(0, 1, label="md5")
# 往单元格内写入内容:写入内容
i = 1
for word in dic:
worksheet.write(i, 0, label=word)
worksheet.write(i, 1, label=dic[word])
i = i + 1
workbook.save('md5.xls')
# 计算文件md5方法
def md5(path,Bytes=1024):
md5_1 = hashlib.md5()
with open(path,'rb') as f:
while 1:
data =f.read(Bytes)
if data:
md5_1.update(data)
else:
break
ret = md5_1.hexdigest()
return ret
# 取出指定文件夹中升级包的名称和md5值,放到字典dic1中
def local_filename_md5(path):
name = [file for file in os.listdir(path)]
dic2 = {}
for i in name:
dis={i:md5(path+'\\'+i)}
dic2.update(dis)
return dic2
# 本地文件的md5与正确文件的md5比对
def duibi(dic1,dic2):
for i in dic2.keys():
# print(i)
# print(dic1.keys())
if i in dic1.keys():
if dic2[i] == dic1[i]:
# print(i+'文件正确')
pass
else:
print(i+':md5值不正确')
else:
print('目标网址无此本地文件:'+i)
# 自动从官网下载补丁包(增量更新)
def down_file(url):
response = requests.get(url)
html = response.content
html = str(html, encoding='utf-8')
s = etree.HTML(html)
down_path = s.xpath('/html/body/section/div/section/div/div[2]/div[2]/table/tr[1]/td[1]/a/@href')
down_path = [str('http://update.nsfocus.com' + i) for i in down_path]
name = s.xpath('/html/body/section/div/section/div/div[2]/div[2]/table/tr/td/a')
name = [str(i.text) for i in name]
dic3 = dict(zip(down_path, name))
file_folder_name = s.xpath('/html/body/section/div/section/div/div[2]/div[2]/h2')
file_folder_name = [str(i.text) for i in file_folder_name]
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language':'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Accept-Encoding':'gzip, deflate',
'Connection':'close',
'Upgrade-Insecure-Requests':'1',
}
for i in down_path:
download = requests.get(i,headers=headers)
path = os.path.abspath('.')
name_path1 = path+'\\'+str(file_folder_name[0])
name_path2 = name_path1 +'\\'+str(dic3[i])
if not os.path.exists(name_path1):
os.mkdir(name_path1)
else:
if not os.path.exists(name_path2):
with open(name_path2, 'wb') as f:
f.write(download.content)
else:
pass
def use():
print("helpinfo:")
print("获取指定文件的md5值。格式:-m -p 文件路径包含文件名字;例子 python md5.py -m -p c:\\a\\p.bat")
print("爬取指定网页升级包名称和md5值,并生成md5.excel。格式:-n -u url ;例子 python md5.py -n -u http://*.*.*.* ")
print("获取指定文件夹内所有文件的md5值,并与官网比较。格式:-l -u url -s 文件夹目录 ")
print("下载指定网页的升级包(增量更新) 格式:-d -u url")
opts,args = getopt.getopt(sys.argv[1:], "hldmnsp:u:")
if __name__ == '__main__':
listen = None
for a,b in opts:
if a == "-h":
use()
elif a == "-m":
listen = 1
elif a == "-n":
listen = 2
elif a == "-l":
listen = 3
elif a == "-p":
path = b
elif a == "-u":
url = b
elif a == "-d":
listen = 4
else:
assert False, "Unhandled Option"
if listen == 1:
print(md5(path))
elif listen == 2:
spider_filename_md5(url)
elif listen == 3:
duibi(spider_filename_md5(url),local_filename_md5(path))
elif listen == 4:
down_file(url)
else:
pass