首先,搜索 qq新闻,找到 kangji feiyan 页面, 点击数据,就可以进入
检查
点击xhr
刷新,
需要点击下方,省 详情。
刷出来, prov
然后,复制 curl bash
粘贴进来,就会生成 python请求命令。
地址是:https://api.inews.qq.com/newsqa/v1/query/pubished/daily/list?province=%E5%90%89%E6%9E%97&
发现,后面的 province 参数,可以改变省名称。不断获取各个省的。
prov_list= ['河北','贵州','黑龙江','台湾','四川','山西','新疆','北京','天津','广东','浙江','云南','澳门','福建','海南','湖北','甘肃','香港','陕西','青海','广西','江苏','西藏','辽宁','上海','安徽','山东','宁夏','江西','湖南','重庆','内蒙古','吉林','河南' ]
def make_request(prov_name):
params= {
'province': prov_name,
}
response= requests.get('https://api.inews.qq.com/newsqa/v1/query/pubished/daily/list',headers=headers,params=params,cookies=cookies)
rj= response.json()
return rj
data= make_request(prov_1)
得到一个省的数据后,保留需要的。
只提取confirmed
def extract_data(data):
covid_by_month= data['data']
dct_prov= {"date_list": [],"confirm_list": [],"year": []}
for iin range(len(covid_by_month)):
num_confirm= covid_by_month[i]['confirm_add']
dct_prov['confirm_list'].append(num_confirm)
riqi= covid_by_month[i]['date']
nian= covid_by_month[i]['year']
dct_prov['date_list'].append(riqi)
dct_prov['year'].append(nian)
df_prov= pd.DataFrame(dct_prov)
return df_prov
data_prepare[0]
data_beijing= make_request(beijing)
df_beijing= extract_data(data_beijing)
df_beijing
df_beijing.to_excel("2022——爬虫与数据分析/data_dir_爬虫课程/北京covid.xlsx")
然后重复上面的步骤,分别得到7个省份。
然后提取每个月的一个,保存,然后合并。
最后得到数据形式:
df= pd.read_json('{"\\u4e0a\\u6d77":{"2020-2":4,"2020-3":5,"2020-4":4,"2020-5":0,"2020-6":1,"2020-7":1,"2020-8":5,"2020-9":3,"2020-10":5,"2020-11":5,"2020-12":5,"2021-1":5,"2021-2":3,"2021-3":2,"2021-4":3,"2021-5":3,"2021-6":3,"2021-7":3,"2021-8":5,"2021-9":4,"2021-10":4,"2021-11":4,"2021-12":7,"2022-1":21,"2022-2":20,"2022-3":67,"2022-4":340},"\\u5c71\\u4e1c":{"2020-2":19,"2020-3":0,"2020-4":0,"2020-5":0,"2020-6":0,"2020-7":0,"2020-8":1,"2020-9":0,"2020-10":0,"2020-11":0,"2020-12":0,"2021-1":0,"2021-2":0,"2021-3":0,"2021-4":0,"2021-5":0,"2021-6":0,"2021-7":0,"2021-8":1,"2021-9":0,"2021-10":1,"2021-11":1,"2021-12":0,"2022-1":1,"2022-2":2,"2022-3":46,"2022-4":8},"\\u5317\\u4eac":{"2020-2":9,"2020-3":5,"2020-4":0,"2020-5":0,"2020-6":10,"2020-7":0,"2020-8":0,"2020-9":0,"2020-10":0,"2020-11":0,"2020-12":1,"2021-1":1,"2021-2":0,"2021-3":0,"2021-4":0,"2021-5":0,"2021-6":0,"2021-7":0,"2021-8":1,"2021-9":0,"2021-10":1,"2021-11":1,"2021-12":0,"2022-1":4,"2022-2":4,"2022-3":8,"2022-4":5},"\\u9655\\u897f":{"2020-2":5,"2020-3":0,"2020-4":2,"2020-5":0,"2020-6":0,"2020-7":0,"2020-8":1,"2020-9":1,"2020-10":1,"2020-11":1,"2020-12":0,"2021-1":1,"2021-2":0,"2021-3":0,"2021-4":0,"2021-5":0,"2021-6":0,"2021-7":1,"2021-8":0,"2021-9":0,"2021-10":1,"2021-11":0,"2021-12":49,"2022-1":19,"2022-2":0,"2022-3":12,"2022-4":0},"\\u6e56\\u5317":{"2020-2":2106,"2020-3":28,"2020-4":0,"2020-5":0,"2020-6":0,"2020-7":0,"2020-8":0,"2020-9":0,"2020-10":0,"2020-11":0,"2020-12":0,"2021-1":0,"2021-2":0,"2021-3":0,"2021-4":0,"2021-5":0,"2021-6":0,"2021-7":1,"2021-8":3,"2021-9":0,"2021-10":0,"2021-11":0,"2021-12":0,"2022-1":0,"2022-2":1,"2022-3":1,"2022-4":0},"\\u5409\\u6797":{"2020-2":2,"2020-3":0,"2020-4":0,"2020-5":1,"2020-6":0,"2020-7":0,"2020-8":0,"2020-9":0,"2020-10":0,"2020-11":0,"2020-12":0,"2021-1":12,"2021-2":0,"2021-3":0,"2021-4":0,"2021-5":0,"2021-6":0,"2021-7":0,"2021-8":0,"2021-9":0,"2021-10":0,"2021-11":0,"2021-12":0,"2022-1":0,"2022-2":0,"2022-3":972,"2022-4":1010},"\\u9999\\u6e2f":{"2020-2":3,"2020-3":21,"2020-4":10,"2020-5":1,"2020-6":4,"2020-7":66,"2020-8":49,"2020-9":9,"2020-10":7,"2020-11":33,"2020-12":81,"2021-1":51,"2021-2":19,"2021-3":15,"2021-4":10,"2021-5":2,"2021-6":2,"2021-7":1,"2021-8":3,"2021-9":3,"2021-10":4,"2021-11":3,"2021-12":6,"2022-1":35,"2022-2":2371,"2022-3":6651,"2022-4":3050}}')
import bar_chart_raceas bcr
bcr.bar_chart_race(
df =df_all2,
filename = 'covid19_7prov.gif',
orientation = 'h',
sort = 'desc',
)
得到图形。
没有设置 时间,图形跳动的太快了。后期可以自己调节。