1.包含字典的列表的排序(lambda排序)
video_list是要排序的列表
x['times']中的times是排序的关键字
reverse=True表示降序排序,默认为升序
sorted_list = sorted(video_list, key=lambda x: x['times'], reverse=True)
代码:
# 爬取哔哩哔哩视频热门视频排行榜
# 导入
from matplotlib import pyplot as plt
import requests
from lxml import etree
import numpy as np
plt.rcParams["font.sans-serif"] = ['SimHei'] # 设置字体
# plt.rcParams['axes.unicode_minus'] = False
# 使用字典定义请求头
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"}
# 要爬取的网址
req=requests.get('https://www.bilibili.com/ranking?spm_id_from=333.334.b_62616e6e65725f6c696e6b.1', headers=headers)
dom = etree.HTML(req.content.decode("utf-8"))
# 存放爬取到的xml(列表存放的)
block = dom.xpath('//ul[@class="rank-list"]/li')
# print(len(block))
# 打印提取到的结果
t = etree.tostring(block[0], encoding="utf-8", pretty_print=True)
# t2= etree.tostring(block[1], encoding="utf-8", pretty_print=True)
# print(t.decode("utf-8"))
# print(t2.decode("utf-8"))
# 用于存放爬取到的信息
video_list = []
for i in range(0, len(block)):
# 视频排行
video_asc = block[i].xpath('.//div[@class="num"]/text()')[0]
# print(video_asc)
# 获取视频标题
video_name = block[i].xpath('.//div[@class="lazy-img cover"]/img/@alt')[0]
# print(video_name)
# 播放次数
video_times = block[i].xpath('.//div[@class="detail"]/span[1]/text()')[0].replace('万','')
video_times = float(video_times)
# print(type(video_times))
# print(video_times)
# 视频作者
video_author = block[i].xpath('.//div[@class="detail"]/a/span/text()')[0]
# print(video_author)
# 将信息添加到列表中
video_list.append({
'asc': video_asc,
'name': video_name,
'times': video_times,
'author': video_author
})
# 将爬取的信息保存到txt文件中
with open("./哔哩哔哩热门视频排行榜.txt", "w", encoding='utf-8') as f: # 设置文件对象
for i in video_list:
# print(i)
f.writelines('排行:'+i['asc']+' 视频标题:'+i['name']+'\n'+'\t\t播放次数:'+str(i['times'])+'万 up主:'+i['author']+'\n')
# 将上榜视频播放量最高的前五up主以柱状图的显示
sorted_list = sorted(video_list, key=lambda x: x['times'], reverse=True)
# x表示x轴
x = []
# y轴
y = []
#定义函数来显示柱状上的数值
def autolabel(rects):
for rect in rects:
height = rect.get_height()
plt.text(rect.get_x()+rect.get_width()/2.-0.25, 1.01*height, '%s' % float(height))
for i in range(0, 5):
# 将前五的up主名存放到列表x
x.append( sorted_list[i]['author'])
# 将前五的视频播放次数存放到表y
y.append(sorted_list[i]['times'])
# 以这种图方式显示
a = plt.bar(np.arange(len(x)),y)
autolabel(a)
plt.xlabel('up主名')
plt.ylabel('播放次数(万次)')
plt.bar(x, y)
plt.show()
效果
将排名保存到txt文件中
将播放最高的前五up主以柱状图显示