requests模块爬取王者荣耀所有英雄皮肤

效果图:
对应的英雄目录
英雄对应的皮肤
代码如下:
import requests, re, os

# 获取王者荣耀官网英雄资料界面的html
def getNameAndUrl(url, toPath):
    headers = {
        "Accept" : "application/json, text/javascript, */*; q=0.01",
        "X-Requested-With" : "XMLHttpRequest",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
        "Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8"
    }
    response = requests.get(url, headers=headers)
    htmlStr = response.text.encode('ISO-8859-1').decode('GBK')
    return htmlStr

# 定义获取英雄名的函数
def getHerName():
    htmlStr = getNameAndUrl(url, toPath)
    # 匹配英雄名字正则
    heroNameRe = re.compile(r'(width="91" height="91" alt=")(.*?)(">)')
    heroNameList = heroNameRe.findall(htmlStr)

    # 获取所以英雄名字
    heroNameLIST = []
    for i in heroNameList:
        heroNameLIST.append(os.path.join(toPath, i[1]))
    return heroNameLIST

# 定义获取单个英雄的资料url
def getHeroUrl():
    htmlStr = getNameAndUrl(url, toPath)
    # 匹配每个英雄对应的url正则
    heroUrlre = re.compile(r'(<li><a href=")(.*?)(" target="_blank"><img src=)')
    heroUrlListre = heroUrlre.findall(htmlStr)

    # 获取所有英雄的url路径
    urla = r'http://pvp.qq.com/web201605/'    # 网站首页
    heroURLList = []        # 定义一个空的列表,存放每个英雄的Url路径
    for i in heroUrlListre:
        heroURLList.append(urla + i[1])    # 将每个英雄的url添加到列表
    return heroURLList

toPath = r"C:\Users\yanji\Desktop\王者荣耀"
url = r"http://pvp.qq.com/web201605/herolist.shtml"

#  定义创建英雄目录
def mkdirHeroDir(path):
    try:
        for i in path:
            os.mkdir(i)
    except:
        return "目录已经存在"

# 主函数获取图片并保存
def getHeroImage(heroURL, heroPath):
    mkdirHeroDir(getHerName())
    try:
        for hero in  range(len(heroPath)):
            response = requests.get(heroURL[hero])
            htmlStr = response.text.encode('ISO-8859-1').decode('GBK')

            age = re.compile(r'(<div class="zk-con1 zk-con" style="background:url\(\'//game.gtimg.cn)(.*?)(..jpg\'\))')

            c = re.split(",",str(age.findall(htmlStr)[0]))

            for i in range(1,8):
                urla = "http://game.gtimg.cn" + (c[1].split("'"))[1] + str(i) +".jpg"
                Response = requests.get(urla)

                imageName = os.path.join(heroPath[hero],str(i) + ".jpg")

                if Response.status_code == 200:
                    try:
                        with open(imageName, "wb") as f:
                            f.write(Response.content)
                        f.close()
                    except:
                        return "连接超时"
    except:
        return ("错误")

# 调用主函数
getHeroImage(getHeroUrl(), getHerName())



.
.
requests补充,定义专用于发送请求的函数:
import requests
from retrying import retry

headers = { "User-Agent": "Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Mobile Safari/537.36",}

@retry(stop_max_attempt_number = 5)     #可以判断下面的函数如果报错, 重新连接三次
def parseUrl(url):
    print("*" * 100)
    response = requests.get(url, headers = headers, timeout = 5)   # 可能会超时报错
    assert response.status_code == 200  #可能会请求不成功报错
    return response.content.decode()

def pares_url(url):
    try:
        html = parseUrl(url)
    except Exception as  e:
        print("报错了:", e)
        html = None
    return html

if __name__ == '__main__':
   # html = parseUrl("http://www.baaidu.com")
    html = pares_url("www.baidu.com")    #使用错误的url地址,查看retry的效果(结合上面的print("*" * 100))
    print(html)
    if html is None:
        print("请求不成功")
    else:
        print("请求成功了")

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容