Python作业4

#爬取微博好友圈内容

import requests

import json

import re

headers = {

'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Mobile Safari/537.36',

    'cookie':'SSOLoginState=1560404105; ALF=1562996105; SCF=AuVhQEh4SshgI_tE32fQGS7ByXPRtNvPQait3IiKkoX7agpW_nl3m7DwIwUJIFDlutumzYmgjiBY-djZ54vQ0tM.; SUB=_2A25wBZDZDeRhGeRL41YY8inJyTmIHXVTCTCRrDV6PUNbktAKLUnYkW1NUvHUPIe9y7-kszq23AaY_NsLWBJnMqo0; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhlkSzPL69hk93TYFWZeAVx5JpX5KMhUgL.Fozf1hB4eoMfeo-2dJLoIc.LxKqL1hnL1K2LxKqL1KnLB-qLxKqLB-BLBKeLxK-L1-eLBKnLxK-L1K5LB-eLxKnLB-qL1hBLxK-L1h.LBKMLxKML1-2L1hBLxK-LBo5L12qLxKBLB.zL1K.LxK-LBK-LBoSKUgf_9Pzt; SUHB=0rr6WLVjpfF9wa; MLOGIN=1; _T_WM=91003095608; WEIBOCN_FROM=1110005030; M_WEIBOCN_PARAMS=luicode%3D20000174%26uicode%3D20000174; XSRF-TOKEN=bc06bd'

}

url ='https://m.weibo.cn/feed/circle?'

def get_info(url, page):

res = requests.get(url, headers=headers)

json_data = json.loads(res.text)

print(json_data)

statuses = json_data['data']['statuses']

for statusein statuses:

text = statuse['text']

new_text = re.sub('[a-zA-Z0-9<="-":;>//../////_-]+', '', text, re.S)

print(new_text)

next_cursor = json_data['data']['next_cursor']

page = page +1

    if page <=18:

next_url ='https://m.weibo.cn/feed/circle?max_id={}'.format(next_cursor)

get_info(next_url, page)

else:

pass

get_info(url, 1)

©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容