由于需要看通知内容,但是每次从手机上看需要输入账号密码之后才能看,于是便萌生了用 Python 获取通知内容并定时发送到自己邮箱的想法。
实现并不算复杂,用 BeautifulSoup 抓取内容,Redis 记录文章是否阅读过,Jinja2 是邮件内容的模板引擎。用和风天气
的API 在邮件正文前加了个天气预报。
只是有一个点要注意,启动程序前要先留意 locale (Linux 命令)输出的内容是否为zh_CN.UTF-8。最后我是写了个 shell 脚本启动并在运行前 export LC_ALL=zh_CN.UTF-8
Redis
Welcome to Jinja2 — Jinja2 Documentation (2.9)
yagmail 0.10.190 : Python Package Index
API说明文档 | 和风天气
Beautiful Soup 4.4.0 文档 — beautifulsoup 4.4.0 文档
程序在启动的时候加-t
的参数只会给自己的邮箱发邮件,用作测试(当然需要提前配置好)
实现如下:
主文件
#!/usr/bin/python3
# -*- coding:utf-8 -*-
'''
【留意!!】
启动程序前要先留意 locale (Linux 命令)输出的内容是否为zh_CN.UTF-8
建议写 shell 脚本启动并在运行前 export LC_ALL=zh_CN.UTF-8
'''
from conf import *
from sys import argv
from urllib.parse import unquote
from bs4 import BeautifulSoup
from jinja2 import Environment,FileSystemLoader,select_autoescape
import re,os,json,time,redis,yagmail,requests
session = requests.Session()
session.headers.update({'UserAgent':'Mozilla/5.0 (X11; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0'})
jinja2_env = Environment(
loader = FileSystemLoader(os.getcwd()+'/template'),
autoescape = select_autoescape(['html'])
)
def printf(string):
if string:
print(time.strftime("%Y-%m-%d %H:%M:%S : ", time.localtime()) + string)
def article_id_exist(id):
if not id:
printf('empty article id')
return False
if not id.isdigit():
printf('need number instead of other value type')
return False
r = redis.StrictRedis(host='localhost', port=6379, db=0)
if not r.get(id):
r.set(id,"True")
return False
else:
return True
def get_weather_data():
weather_request_url = WEATHER_API_URL + 'city=' + WEATHER_API_CITY + '&&key=' + WEATHER_API_KEY
weather_request = requests.get(weather_request_url)
weather_data = []
if weather_request.status_code == 200:
weather_data = json.loads(weather_request.content.decode(encoding='utf-8'))
return weather_data
def get_index():
session.get(url=ROOT_URL+'/UserLogin.aspx')
result = session.post(ROOT_URL+'/UserLogin.aspx', data=LOGIN_DATA)
if result.status_code == 200 and result.url == ROOT_URL+'/':
category = session.get(ROOT_URL + '/ArticleList.aspx?category=4')
if category.status_code == 200:
return category.content
else:
printf('login failed '+str(result.status_code) )
def parse_html(html):
if not html:
printf('empty html')
return
html_soup = BeautifulSoup(html, 'lxml')
articles = html_soup.find('div', attrs={'class': 'articles'})
if not articles:
printf('article not found')
return
article_result = []
for val in articles.find_all('p'):
article_id = val.find('a')['href'][-6:]
article_url = ROOT_URL + val.find('a')['href'][1:]
article_date = val.find_all('span')[1].getText()[:-1]
article_title = val.find('a')['title']
article_author = val.span['title']
article_attachment = []
article_excerpt = ''
if article_id_exist(article_id):
printf('article exist in database %s' % (article_title))
continue
article_detail = session.get(url=article_url)
if article_detail.status_code != 200:
printf('get article detail error %s' % (article_id) )
continue
article_soup = BeautifulSoup(article_detail.content,'lxml')
article_content = article_soup.find('div', attrs={'id': 'articleBody'})
article_link = article_content.find_all('a')
attachment_url_pattern = re.compile(r'http://news.gdut.edu.cn/DepartmentUploadFiles/(.+)/files/(.+)')
for link in article_link:
if 'http://news.gdut.edu.cn/DepartmentUploadFiles' not in link['href']:
printf('%s do not have file attachment' % (link['href']))
continue
match = attachment_url_pattern.match(link['href'])
if not match:
printf('%s do not have file attachment' % (link['href']))
continue
attachment_name = match.group(2)
attachment_url = link['href']
if '%' in attachment_name:
attachment_name = unquote(attachment_name)
article_attachment.append({'attach_name':attachment_name,'attach_url':attachment_url})
info = ''.join(article_content.getText().split())
info = info.replace(article_title, '')
info = info.replace('单位:'+article_author,'')
article_excerpt = article_excerpt.join(info[:150])
article_result.append(
{
'url':article_url,
'date':article_date,
'title':article_title,
'author':article_author,
'excerpt':article_excerpt,
'attachment':article_attachment
}
)
return article_result
if __name__ == '__main__':
welcome_string = [
'周日:今天是周末的最后一天,好好珍惜时间\n',
'周一:你从周末的作息里调整过来了吗?把上周的通知邮件都删了吧\n',
'周二:吾日三省吾身\n',
'周三:生活仍将继续\n',
'周四:未来近在咫尺\n',
'周五:明天就是周末了,加油!\n',
'周六:你今天打算做什么?别浪费时间\n',
]
welcome_content = welcome_string[ int( time.strftime('%w',time.localtime(time.time())) ) ]
update_content = '最近更新:'+VERSION+':'+ANNOUNCEMENT+'\n'
weather_data = get_weather_data()
printf('get weather data finish')
weather_render = jinja2_env.get_template('weather.html')
now = weather_data['HeWeather5'][0]['now']
forecast = weather_data['HeWeather5'][0]['hourly_forecast'];
weather_content = weather_render.render(now=now,forecast=forecast)
index = get_index()
article_data = parse_html(index)
article_render = jinja2_env.get_template('article.html')
if article_data:
article_content = article_render.render(articles=article_data)
else:
article_content = article_render.render()
mail_client = yagmail.SMTP(user=SEND_MAIL_USER, password=SEND_MAIL_PWD, host=SEND_MAIL_HOST, port=SEND_MAIL_PORT)
mail_content = welcome_content + weather_content + update_content + article_content
if len(argv) == 2 and '-t' in argv:
for addr in SEND_TO_LIST_TEST:
printf('sending[test user]: ' + addr)
mail_client.send(addr, subject=SEND_MAIL_SUBJECT, contents=mail_content)
time.sleep(1)
else:
for addr in SEND_TO_LIST:
printf('sending : '+addr)
mail_client.send(addr,subject=SEND_MAIL_SUBJECT,contents =mail_content)
time.sleep(1)
同级目录下的 conf.py 的配置文件
#!/usr/bin/python3
# -*- coding:utf-8 -*-
import time
#通知网站的地址
ROOT_URL = 'http://test.com'
LOGIN_DATA = {}
LOGIN_DATA['__VIEWSTATE'] = '/wEPDwUKLTQwOTA4NzE2NmQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgEFI2N0bDAwJENvbnRlbnRQbGFjZUhvbGRlcjEkQ2hlY2tCb3gxBufpEJuDDaf6eTj0A4Cn2Erf8u98KcGrQqATTB3mEaQ='
LOGIN_DATA['__EVENTVALIDATION'] = '/wEWBQKb37HjDwLgvLy9BQKi4MPwCQL+zqO2BAKA4sljg4IvzC7ksG01o7aN0RZUOKEC4lV0bTeXI4zrbaQsj0c='
# 联系校内人员获取账号密码,此处的账号密码无效
LOGIN_DATA['ctl00$ContentPlaceHolder1$userEmail'] = 'test'
LOGIN_DATA['ctl00$ContentPlaceHolder1$userPassWord'] = 'test'
LOGIN_DATA['ctl00$ContentPlaceHolder1$CheckBox1'] = 'on'
LOGIN_DATA['ctl00$ContentPlaceHolder1$Button1'] = '%E7%99%BB%E5%BD%95'
#发送者邮箱
SEND_MAIL_USER = 'account'
#发送者邮箱对应的密码
SEND_MAIL_PWD = 'password'
#腾讯企业邮箱
SEND_MAIL_HOST = 'smtp.exmail.qq.com'
#发送端口
SEND_MAIL_PORT = 465
#邮件正文标题
SEND_MAIL_SUBJECT = time.strftime("%Y-%m-%d",time.localtime()) + '@今日校内通知'
#接收邮件的人
SEND_TO_LIST = [
'mail@mail.com',
]
#用来测试接收邮件的用户,加上-t选项即可
SEND_TO_LIST_TEST = ['mail@mail.com']
#和风天气API地址
WEATHER_API_URL = 'https://free-api.heweather.com/v5/weather?'
#天气API城市,拼音汉字均可
WEATHER_API_CITY = 'guangzhou'
#免费版key,一天4000次调用,注册后可用
WEATHER_API_KEY = 'key'
ANNOUNCEMENT = '重构,使用模板引擎取代字符串拼接生成邮件内容(https://github.com/ypingcn/)'
VERSION = '2017.09.26'
template文件夹的内容是邮件正文的模板
- article.html
{%- if articles %}
<p> 今日的新闻通知如下 </p>
<ul>
{%- for article in articles %}
<li>
<a href='{{article.url}}'>
<font color="red"> {{ article.title }} </font>
</a>
{{ article.author }} - {{ article.date }}
{{ article.excerpt }}
{%- for link in article.attachment %}
<a href='{{link.attach_url}}'>{{ link.attach_name }}</a>
{%- endfor %}
</li>
{%- endfor %}
</ul>
{%- else %}
<p> 暂无未读的新闻通知 </p>
{%- endif %}
- weather.html
<p>天气:{{ now.cond.txt }},气温:{{ now.tmp }}℃,体感温度:{{ now.fl }}摄氏度</p>
<br>未来几个小时内的天气预报为:
{%- for hour in forecast %}
<br>{{ hour.date }} : {{ hour.cond.txt }}
{%- endfor %}
写的不是太好,还是有很多需要改正的地方。以后再作修改。
来自个人 Python 文集