python爬虫:一个关于去哪儿吃饭的问题

每天中午到了饭点,都在为去哪儿吃饭发愁。自从有了这个程序一切都简单了,
where_list.txt文件中存放的是想要去吃饭的地方或者想要吃的午餐。比如:

0 东来顺
1 楼外楼
0 松鹤楼
1 全聚德
0 谭家菜馆
1 北京饭店
1 油条豆浆

其中0表示近距离,1表示远距离

程序需要安装使用BeautifulSoup模块,请提前安装。
希望能给你吃饭带来帮助。

#!/bin/python
# -*- coding: utf-8 -*-

import urllib2
import sys
from bs4 import BeautifulSoup
import os
import re
from random import choice

#获取网页内容
def getUrlContents(url, num=5):
    if not url:
        print '参数URL为空'
        exit(0)

    try:
        html = urllib2.urlopen(url).read()
    except urllib2.URLError as e:
        print 'Download error: ', e.reason
        html = None
        if num > 0 :
            time.sleep(2)
            return download(url, num-1)

    return html

#获取城市空气质量
def getAirFromPm25(city):
    html = getUrlContents('http://www.pm25.com/'+city+'.html')
    if not html:
        exit(0)
    
    html = BeautifulSoup(html, "lxml")
    info0 = html.find(class_="bi_loaction_city").get_text()
    ret =  u"\n🏘  所在城市: "+info0+u"天气"+" \n\n", 
    info0 = html.find(class_="bi_info_weather")
    info1 = html.find(class_="bi_aqiarea_num").get_text()
    info2 = html.find(class_="bi_aqiarea_right").get_text()
    info2 = re.split('\n*', info2)
    for txt in info2:
        if re.search("AQI", txt):
            info2.remove(txt)
        if not txt:
            info2.remove(txt)

    ret += u"🍀  污染指数: "+info1+' '+''.join(info2)+' \n', 
    info3 = html.find(class_="bi_aqiarea_bottom").get_text().strip()
    info3 = re.split('\n*', info3);
    tmp = '';
    for xx in info3:
        tmp += u"   "+xx+' \n'
    ret += (tmp,)

    return ''.join(ret)

#污染指数
def getAqi(city):
    html = getUrlContents('http://www.pm25.com/'+city+'.html')
    if not html:
        exit(0)
    
    html = BeautifulSoup(html, "lxml")
    aqi = html.find(class_="bi_aqiarea_num").get_text().strip()

    return int(aqi)

#污染得分
def getMsgByAqi(aqi):
    msg = ''
    if aqi*1.0<=150:
        msg = {"info":u'🚜 🚜 🚜  适宜远距离吃饭', 'tag':1}
    else:
        msg = {"info":u'🚶🏻 🚶🏻 🚶🏻  适宜近距离吃饭', 'tag':0}

    return msg

#获取吃饭商家
def getWhereList(file_name):
    if not os.path.exists('./'+file_name):
        print "\033[43;31m😫    无法获取吃饭资源 \033[0m\n"
        exit(0)

    content = ''
    with open('./'+file_name, 'rb') as f:
        content = f.read()
    if not content:
        print "\033[43;31m👿 👿 👿    无法获取吃饭位置  \033[0m\n"
        exit(0)

    content = re.split('\n*', content)
    cont0 = []
    cont1 = []
    for cont in content:
        if not cont:
            continue
        cont = re.split('\s*', cont)
        if int(cont[0]):
            cont1 += [cont[-1], ]
        else:
            cont0 = [cont[-1], ]

    return {0:cont0, 1:cont1}

#获取吃饭地方
def getMeatLocation(where_list):
    if not where_list:
        print "\033[43;31m🌳 🌳 🌳  没有合适的吃饭地方,请自便  \033[0m\n"

    meat = "\033[43;31m  🍚 🥝 🍎   今天去:"
    meat += choice(where_list)
    meat += "  🍇 🍓 🍒   \033[0m\n"

    return meat


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print '请输入城市'
        exit(0)
    
    where_list = 'where_list.txt'
    if len(sys.argv) > 2:
        where_list = sys.argv[2]
    #http://www.pm25.com//shijiazhuang.html
    city = sys.argv[1]
    air = getAirFromPm25(city)
    print air

    msg = getMsgByAqi(getAqi(city))
    print msg['info']
    where = getWhereList(where_list)
    meat = getMeatLocation(where[msg['tag']])
    print meat
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容