废话不多说,先上代码
import requests
from lxml import etree
from pyecharts import Bar
import operator
def parse(url):
resp = requests.get(url)
source = resp.content.decode('utf-8')
html = etree.HTML(source)
big_div = html.xpath('//div[@class="hanml"]/div[1]/div')
weather_list = []
for div in big_div:
trs = div.xpath('.//tr[position()>2]')
for tr in trs[1:]:
city = tr.xpath('./td[1]/a/text()')[0]
phenomena = tr.xpath('./td[2]/text()')[0]
wind = tr.xpath('./td[3]/span/text()')
diretion = wind[0]
force = wind[1]
tmax = tr.xpath('./td[4]/text()')[0]
# print(city, phenomena, (diretion, force), tmax)
night_phenomena = tr.xpath('./td[5]/text()')[0]
night_wind = tr.xpath('./td[6]/span/text()')
night_diretion = wind[0]
night_force = wind[1]
night_tmax = tr.xpath('./td[7]/text()')[0]
# print(night_phenomena, (night_diretion, night_force), night_tmax)
weather_dict = dict(city=city,
phenomena=phenomena,
diretion=diretion,
force=force,
tmax=tmax,
night_phenomena=night_phenomena,
night_diretion=night_diretion,
night_force=night_diretion,
night_tmax=night_tmax)
weather_list.append(weather_dict)
return weather_list
# print(len(weather_list))
# print(weather_list)
def gene_charts(weather_list, name):
bar = Bar(name, name)
weather_list = sorted(weather_list, key=operator.itemgetter('tmax'), reverse=True)
kwargs = dict(
name=name,
x_axis=[item['city'] for item in weather_list],
y_axis=[item['tmax'] for item in weather_list]
)
bar.add(**kwargs)
bar.render(name + '.html')
def main():
region_dict = dict(
华北='http://www.weather.com.cn/textFC/hb.shtml',
东北='http://www.weather.com.cn/textFC/db.shtml',
华东='http://www.weather.com.cn/textFC/hd.shtml',
华中='http://www.weather.com.cn/textFC/hz.shtml',
华南='http://www.weather.com.cn/textFC/hn.shtml',
西北='http://www.weather.com.cn/textFC/xb.shtml',
西南 = 'http://www.weather.com.cn/textFC/xn.shtml',
# 港澳台 = 'http://www.weather.com.cn/textFC/gat.shtml'
)
for name, url in region_dict.items():
weather_list = parse(url)
gene_charts(weather_list, name=name)
if __name__ == '__main__':
main()
效果图(pyecharts生成)