我们发现ins一页只给出了70个帖子的内容,那么剩下的帖子都哪里去了呢?
我们通过控制台可以发现,在下拉时每次都会发送动态包,里面就是隐藏的帖子啦
本次目标就是获取同一tag下所有帖子的shortcode
import requests
import json
import pymongo
def save_to_Mongo(result):
try:
if db[MONGO_TABLE].insert(result):
print('存储到MongoDB成功', result)
except Exception:
print('存储到MongoDb失败', result)
MONGO_URl = 'localhost:27017'
MONGO_DB = 'ins'
client = pymongo.MongoClient(MONGO_URl)
db = client[MONGO_DB]
MONGO_TABLE = 'shortcode'
code_list = []
url = 'https://www.instagram.com/explore/tags/%EC%9B%94%EC%A0%95%EB%A6%AC%EB%A7%9B%EC%A7%91/?__a=1'
headers = {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",
"x-ig-app-id": "936619743392459"
}
res = requests.get(url, headers=headers).text
ins_data = json.loads(res)
for i in range(0, len(ins_data['graphql']['hashtag']['edge_hashtag_to_media']['edges'])):
# code_list.append(str(ins_data['graphql']['hashtag']['edge_hashtag_to_media']['edges'][i]['node']['shortcode']))
code_dict = {'_id': str(ins_data['graphql']['hashtag']['edge_hashtag_to_media']['edges'][i]['node']['shortcode']), 'tag_name': '월정리맛집'}
save_to_Mongo(code_dict)
first_after = ins_data['graphql']['hashtag']['edge_hashtag_to_media']['page_info']['end_cursor']
def get_code(after_info):
payload = {
'query_hash': 'f12c9ec5e46a3173b2969c712ad84744',
"tag_name": '월정리맛집',
"first": '50',
"after": after_info
}
res = requests.get('https://www.instagram.com/graphql/query', params=payload, headers=headers).text
ins_data = json.loads(res)
after_next = ins_data['data']['hashtag']['edge_hashtag_to_media']['page_info']['end_cursor']
for i in range(0, len(ins_data['data']['hashtag']['edge_hashtag_to_media']['edges'])):
# code_list.append(str(ins_data['data']['hashtag']['edge_hashtag_to_media']['edges'][i]['node']['shortcode']))
code_dict = {
'_id': str(ins_data['data']['hashtag']['edge_hashtag_to_media']['edges'][i]['node']['shortcode']),
'tag_name': '월정리맛집'
}
save_to_Mongo(code_dict)
if ins_data['data']['hashtag']['edge_hashtag_to_media']['page_info']['has_next_page']:
print('get code!')
return after_next
else:
after_next = 'over'
print(after_next)
return after_next
def get_all_code(rst_after):
after = get_code(rst_after)
while after != 'over':
print(after)
after = get_code(after)
get_all_code(first_after)