python学习的第四天

import requests

import re

from lxmlimport html

import pandasas pd

from matplotlibimport pyplotas plt

plt.rcParams["font.sans-serif"] = ['SimHei']

plt.rcParams['axes.unicode_minus'] =False

def spider_douban(isbn):

dy_list = []

# 目标站点地址

url ='https://movie.douban.com/cinema/later/chongqing/?qq-pf-to=pcqq.group'.format(isbn)

# 获取站点str类型的响应

headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"}

resp = requests.get(url, headers=headers)

html_data = resp.text

# 提取目标站的信息

selector = html.fromstring(html_data)

div_list = selector.xpath('//div[@id="showing-soon"]/div')

print('您好，共有{}家'.format(len(div_list)))

# 遍历div

for divin div_list:

# 电影名

dtitle = div.xpath('./div/h3/a/text()')[0].strip()

#上映日期，

ddata = div.xpath('./div/ul/li[1]/text()')[0]

# 类型

dtype = div.xpath('./div/ul/li[2]/text()')[0]

# 上映国家

dc = div.xpath('./div/ul/li[3]/text()')[0]

# 想看人数

dnum = div.xpath('./div/ul/li[4]/span/text()')[0]

dnum =int(dnum.replace('人想看', ''))

# 添加每一个商家的图书信息

dy_list.append({

'dtitle':dtitle,

'ddata':ddata,

'dtype':dtype,

'dc':dc,

'dnum':dnum

})

# 按照人气进行排序

dy_list.sort(key=lambda x: x['dnum'], reverse=True)

# 展示人气最高的前5 柱状图

# 电影的名称

top5_dy = [dy_list[i]for iin range(5)]

top5_dy.sort(key=lambda x: x['dnum'])

x = [x['dtitle']for xin top5_dy]

# 电影的人气

y = [x['dnum']for xin top5_dy]

plt.barh(x, y)

plt.show()

# 存储成csv文件

df = pd.DataFrame(dy_list)

df.to_csv('douban.csv')

# 电影国家的占比图饼图

dc_list = [dc_list['dc']for dc_listin dy_list]

counts = {}

for wordin dc_list:

counts[word] = counts.get(word, 0) +1

items =list(counts.items())

dcounts = []

dlabels = []

for iin range(len(items)):

role, count = items[i]

dcounts.append(count)

dlabels.append(role)

explode = [0, 0, 0, 0]

colors = ['red', 'purple', 'blue', 'yellow']

plt.pie(dcounts, explode=explode, shadow=True, labels=dlabels, autopct='%1.1f%%', colors=colors)

plt.legend(loc=2)

plt.axis('equal')

plt.show()

spider_douban('9787115428028')

python学习的第四天

推荐阅读更多精彩内容