B站搜索结果爬虫

import requests
import json
import pandas as pd
# b站api:http://api.bilibili.com/x/web-interface/search/type?search_type=video&highlight=1&keyword=insta360&from_source=banner_search&page=3&jsonp=jsonp&callback=__jp1 &nbsp


# 好像data都不重要
data = {"search_type":"video",
      "highlight":"1",
      "keyword":"keyword",
      "from_source":"banner_search",
      "page":10,
      "jsonp":"jsonp",
      "callback":"__jp1"
}
header = {
    "Referer":"https://search.bilibili.com/all?keyword=insta360&from_source=banner_search",
    "User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"
}

urls = []
for i in range(51)[1:]:
    url = "http://api.bilibili.com/x/web-interface/search/type?search_type=video&highlight=1&keyword=insta360&page=%i"%i
    urls.append(url)
# print(urls)

list = []

for url in urls:
    r = requests.get(url=url,data=data)
    # print(r.text)
    content=json.loads(r.text)["data"]["result"] #解析返回来的json
    list.extend(content)
df = pd.DataFrame(list)
df.to_csv("insta360_result.csv") 
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容