import requests
import random
from bs4 import BeautifulSoup
import smtplib # 发送邮件模块
from email.mime.text import MIMEText # 定义邮件内容
from email.header import Header # 定义邮件标题
smtpserver = 'smtp.qq.com'
# 发送邮箱用户名密码
user = ''
password = ''
# 发送和接收邮箱
sender = ''
receive = ''
user_agent = [
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
"Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)"]
def main():
print('正在爬取数据')
url = 'https://36kr.com/newsflashes'
headers = {'User-Agent': random.choice(user_agent)}
response = requests.get(url, headers=headers)
response=response.content.decode('utf-8', 'ignore')
soup = BeautifulSoup(response, 'lxml')
news = soup.find_all('a', class_='item-title')
news_list=[]
for i in news:
title=i.get_text()
href='https://36kr.com'+i['href']
news_list.append(title+'<br>'+href)
info='<br></br>'.join(news_list)
print('正在发送信息')
send_email(info)
def send_email(content):
# 通过QQ邮箱发送
title='36kr快讯'
subject = title
msg = MIMEText(content, 'html', 'utf-8')
msg['Subject'] = Header(subject, 'utf-8')
msg['From'] = sender
msg['To'] = receive
# SSL协议端口号要使用465
smtp = smtplib.SMTP_SSL(smtpserver, 465) # 这里是服务器端口!
# HELO 向服务器标识用户身份
smtp.helo(smtpserver)
# 服务器返回结果确认
smtp.ehlo(smtpserver)
# 登录邮箱服务器用户名和密码
smtp.login(user, password)
smtp.sendmail(sender, receive, msg.as_string())
smtp.quit()
if __name__ == '__main__':
main()
Task05 爬虫入门与综合应用
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- 前言 前段时间抢茅台脚本非常火,它是Python脚本,加上刚好最近在学习Python,我们准备通过这个脚本,来加深...
- 学习文档:http://datawhale.club/t/topic/202[http://datawhale.c...
- 经过前面的候选文章召回和特征工程,将文章和用户点击日志信息处理成监督学习数据集,接下来就使用排序模型或二分类模型搭...