利用Python抓取Wuxiaworld文章 并推送到kindle


import bs4
import requests
import SendToEmail

KindleEmail='xxxx@kindle.cn'
SinaEmail='xxxxx@sina.com'

homepage_url='https://www.wuxiaworld.com/novel/wu-dong-qian-kun'
novel_file = 'WuDongQianKun.txt'
# headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
#                         'Chrome/51.0.2704.63 Safari/537.36'}



res= requests.get(homepage_url)
print(res.status_code)
soup = bs4.BeautifulSoup(res.text,'lxml')
# print(soup.prettify())
chapters = soup.select('.chapter-item')

chapters_addr = [ ('https://www.wuxiaworld.com'+chapter.select('a')[0].get('href')) for chapter in chapters]
chapters_name = [chapter.select('span')[0].getText() for chapter in chapters] #span tag 章节名称

with open(novel_file,'w',encoding = 'utf8') as f: # 新建一个叫做mynovel的文本文件
    for i in range(len(chapters_addr)): # 使用循环来对应到章节url和名称
        request = requests.get(chapters_addr[i])
        if request.status_code == 200: # 判断连接状态码,如果是200就继续解析
            soup = bs4.BeautifulSoup(request.text,'lxml')
            content = soup.select('.fr-view') #正文在tag fr-view 中
            a = '' #下面是对章节文本进行处理,确保能够正确的换行和显示
            for j in content:
                b = j.select('p')
                for k in b:
                    a += k.getText()+ '\n\n'
            f.write(chapters_name[i]+'\n\n') # 写入章节名
            f.write(a+'\n\n')# 写入内容
            print('已下载:',chapters_name[i])

print('下载完成,开始发送...')
SendToEmail.sendKindle(KindleEmail)
print('Done!')


然后是推送部分

import yagmail

def sendKindle(mailAddr):
    yag = yagmail.SMTP(user='XXXXXX@163.com', password='XXXXXXXXXX', host='smtp.163.com')#输入你的邮箱user和password
    yag.send(to=mailAddr,subject='Convert',contents=['E:\document\Python\Crawler_WuxiaWorld\WuDongQianKun.txt'])

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容