效果图:
对应的英雄目录
对应的英雄目录
英雄对应的皮肤
代码如下:
import requests, re, os
# 获取王者荣耀官网英雄资料界面的html
def getNameAndUrl(url, toPath):
headers = {
"Accept" : "application/json, text/javascript, */*; q=0.01",
"X-Requested-With" : "XMLHttpRequest",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
"Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8"
}
response = requests.get(url, headers=headers)
htmlStr = response.text.encode('ISO-8859-1').decode('GBK')
return htmlStr
# 定义获取英雄名的函数
def getHerName():
htmlStr = getNameAndUrl(url, toPath)
# 匹配英雄名字正则
heroNameRe = re.compile(r'(width="91" height="91" alt=")(.*?)(">)')
heroNameList = heroNameRe.findall(htmlStr)
# 获取所以英雄名字
heroNameLIST = []
for i in heroNameList:
heroNameLIST.append(os.path.join(toPath, i[1]))
return heroNameLIST
# 定义获取单个英雄的资料url
def getHeroUrl():
htmlStr = getNameAndUrl(url, toPath)
# 匹配每个英雄对应的url正则
heroUrlre = re.compile(r'(<li><a href=")(.*?)(" target="_blank"><img src=)')
heroUrlListre = heroUrlre.findall(htmlStr)
# 获取所有英雄的url路径
urla = r'http://pvp.qq.com/web201605/' # 网站首页
heroURLList = [] # 定义一个空的列表,存放每个英雄的Url路径
for i in heroUrlListre:
heroURLList.append(urla + i[1]) # 将每个英雄的url添加到列表
return heroURLList
toPath = r"C:\Users\yanji\Desktop\王者荣耀"
url = r"http://pvp.qq.com/web201605/herolist.shtml"
# 定义创建英雄目录
def mkdirHeroDir(path):
try:
for i in path:
os.mkdir(i)
except:
return "目录已经存在"
# 主函数获取图片并保存
def getHeroImage(heroURL, heroPath):
mkdirHeroDir(getHerName())
try:
for hero in range(len(heroPath)):
response = requests.get(heroURL[hero])
htmlStr = response.text.encode('ISO-8859-1').decode('GBK')
age = re.compile(r'(<div class="zk-con1 zk-con" style="background:url\(\'//game.gtimg.cn)(.*?)(..jpg\'\))')
c = re.split(",",str(age.findall(htmlStr)[0]))
for i in range(1,8):
urla = "http://game.gtimg.cn" + (c[1].split("'"))[1] + str(i) +".jpg"
Response = requests.get(urla)
imageName = os.path.join(heroPath[hero],str(i) + ".jpg")
if Response.status_code == 200:
try:
with open(imageName, "wb") as f:
f.write(Response.content)
f.close()
except:
return "连接超时"
except:
return ("错误")
# 调用主函数
getHeroImage(getHeroUrl(), getHerName())
.
.
requests补充,定义专用于发送请求的函数:
import requests
from retrying import retry
headers = { "User-Agent": "Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Mobile Safari/537.36",}
@retry(stop_max_attempt_number = 5) #可以判断下面的函数如果报错, 重新连接三次
def parseUrl(url):
print("*" * 100)
response = requests.get(url, headers = headers, timeout = 5) # 可能会超时报错
assert response.status_code == 200 #可能会请求不成功报错
return response.content.decode()
def pares_url(url):
try:
html = parseUrl(url)
except Exception as e:
print("报错了:", e)
html = None
return html
if __name__ == '__main__':
# html = parseUrl("http://www.baaidu.com")
html = pares_url("www.baidu.com") #使用错误的url地址,查看retry的效果(结合上面的print("*" * 100))
print(html)
if html is None:
print("请求不成功")
else:
print("请求成功了")