import requests
import json
from pprint import pp, pprint
from datetime import datetime
import time
import os
import os.path
from pathlib import Path
import random
from retrying import retry
class Discovery:
def checkTime(self):
time.sleep(12)
def createRootDir(self):
# create root folder
p_novel = Path('novel')
# os.path.exists(p_novel)
if p_novel.exists():
pprint("novel is exists...")
else:
os.mkdir("novel")
def createDirs(self, b_path):
# b_path = "novel/books"
if os.path.exists(b_path):
return
else:
os.makedirs(b_path)
@retry
def doRequest(self):
self.checkTime()
pprint("get home data")
# GET
url = "http://localhost:3000/api/category/discovery?pageNum=1&pageSize=20"
pprint(url)
r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'})
res = r.json()
data = res['data']
# pprint(data)
list = data['list']
# pprint(list)
fileName = 'novel/home.json'
with open(fileName,'w') as fileObj:
json.dump(res, fileObj, ensure_ascii=False)
for index in range(len(list)):
item = list[index]
bookList = item['bookList']
categoryName = item['categoryName']
type = item['type']
# pprint(bookList)
# pprint(categoryName)
# pprint(type)
for ii in range(len(bookList)):
book = bookList[ii]
# pprint(book)
bookId = book['bookId']
# pprint(bookId)
self.doRequestDetail(bookId)
break
break
@retry
def doRequestDetail(self, bookId):
self.checkTime()
pprint("get detail: " + str(bookId))
url = "http://localhost:3000/api/book/getDetail?bookId=" + str(bookId)
pprint(url)
r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'})
res = r.json()
data = res['data']
self.createDirs("novel/books/" + str(bookId))
fileName = 'novel/books/' + str(bookId) + '/detail_' + str(bookId) + '.json'
with open(fileName,'w') as fileObj:
json.dump(data, fileObj, ensure_ascii=False)
self.doRequestChapters(bookId)
@retry
def doRequestChapters(self, bookId):
self.checkTime()
pprint("get chapters: " + str(bookId))
url = "http://localhost:3000/api/chapter/getByBookId?bookId=" + str(bookId) + "&chapterId=0"
pprint(url)
r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'})
res = r.json()
data = res['data']
chapters = []
chapter1 = data['chapters'][0]
chapter2 = data['chapters'][1]
chapters.append(chapter1)
chapters.append(chapter2)
data['chapters'] = chapters
fileName = 'novel/books/' + str(bookId) + '/chapters_' + str(bookId) + '.json'
with open(fileName,'w') as fileObj:
json.dump(data, fileObj, ensure_ascii=False)
self.doRequestContent(bookId, [chapter1['id']])
self.doRequestContent(bookId, [chapter2['id']])
@retry
def doRequestContent(self, bookId, chapters):
self.checkTime()
pprint("get content: " + str(bookId))
pprint(chapters)
payload = {"bookId": bookId, "chapterIdList": chapters}
url = "http://localhost:3000/api/chapter/get"
r = requests.post(url, data=json.dumps(payload), headers={'Content-Type': 'application/json', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'})
res = r.json()
data = res
self.createDirs("novel/books/" + str(bookId) + "/content")
fileName = 'novel/books/' + str(bookId) + '/content/' + str(bookId) + '_' + str(chapters[0]) + '.json'
with open(fileName,'w') as fileObj:
json.dump(data, fileObj, ensure_ascii=False)
discovery = Discovery()
# discovery.createRootDir()
discovery.createDirs("novel/books")
discovery.doRequest()
python http 请求简单示例
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。
推荐阅读更多精彩内容
- 一、为什么从HTTP请求开始 无论我们通过浏览器打开网站、访问网页,还是通过脚本对URL网址进行访问,本质上都是对...
- 摘要:教你如何快速爬取一个网页信息;urllib模块中常用的方法介绍;通过修改头信息来伪装成浏览器访问页面;Get...
- Table of Contents HTTP 请求方法[#http-%E8%AF%B7%E6%B1%82%E6%9...
- 该示例展示如何调用对方写好的http接口,数据格式对方需要双方协调定义好。 思路如下:我方调用对方接口(另一个服务...