import bs4
import requests
import SendToEmail
KindleEmail='xxxx@kindle.cn'
SinaEmail='xxxxx@sina.com'
homepage_url='https://www.wuxiaworld.com/novel/wu-dong-qian-kun'
novel_file = 'WuDongQianKun.txt'
# headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
# 'Chrome/51.0.2704.63 Safari/537.36'}
res= requests.get(homepage_url)
print(res.status_code)
soup = bs4.BeautifulSoup(res.text,'lxml')
# print(soup.prettify())
chapters = soup.select('.chapter-item')
chapters_addr = [ ('https://www.wuxiaworld.com'+chapter.select('a')[0].get('href')) for chapter in chapters]
chapters_name = [chapter.select('span')[0].getText() for chapter in chapters] #span tag 章节名称
with open(novel_file,'w',encoding = 'utf8') as f: # 新建一个叫做mynovel的文本文件
for i in range(len(chapters_addr)): # 使用循环来对应到章节url和名称
request = requests.get(chapters_addr[i])
if request.status_code == 200: # 判断连接状态码,如果是200就继续解析
soup = bs4.BeautifulSoup(request.text,'lxml')
content = soup.select('.fr-view') #正文在tag fr-view 中
a = '' #下面是对章节文本进行处理,确保能够正确的换行和显示
for j in content:
b = j.select('p')
for k in b:
a += k.getText()+ '\n\n'
f.write(chapters_name[i]+'\n\n') # 写入章节名
f.write(a+'\n\n')# 写入内容
print('已下载:',chapters_name[i])
print('下载完成,开始发送...')
SendToEmail.sendKindle(KindleEmail)
print('Done!')
然后是推送部分
import yagmail
def sendKindle(mailAddr):
yag = yagmail.SMTP(user='XXXXXX@163.com', password='XXXXXXXXXX', host='smtp.163.com')#输入你的邮箱user和password
yag.send(to=mailAddr,subject='Convert',contents=['E:\document\Python\Crawler_WuxiaWorld\WuDongQianKun.txt'])