简单的新闻爬虫,腾讯新闻
import requests, json
# 新闻爬虫主要函数
def qq_news(url):
# 伪装请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'
}
response = requests.get(url, headers=headers) # 发送请求获取响应
if response.status_code != 200:
return "请求失败,状态码%s" % response.status_code
datas = json.loads(response.text)["data"] # 解析json数据,并提取关键信息
# 提取详细信息
for data in datas:
news_dict = {}
news_dict["title"] = data["title"]
news_dict["source"] = data["source"]
news_dict["vurl"] = data["vurl"]
news_dict["update_time"] = data["update_time"]
print(news_dict)
# 爬5页
for page in range(5):
url = 'https://pacaio.match.qq.com/irs/rcd?cid=137&token=d0f13d594edfc180f5bf6b845456f3ea&id=&ext=top&page={}'.format(
page)
qq_news(url)