爬取大众点评（深圳）美食

import requests

from lxmlimport etree

import pandasas pd

import time

from pandasimport DataFrame,Series

headers = {

'Cookie':'************************************************',

'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36',

'Connection':'keep-alive'

}

info_list = []

def get_url(url):

res = requests.get(url,headers = headers)

selector = etree.HTML(res.text)

hrefs = selector.xpath('//*[@id="classfy"]/a/@href')

for hrefin hrefs:

print(href)

for iin range(30):

new_href = href +'p' +str(i +1)

#print(i+1,new_href)

get_href(new_href)

def get_href(new_href):

html = requests.get(new_href, headers=headers)

selector_2 = etree.HTML(html.text)

htmls = selector_2.xpath('//*[@id="shop-all-list"]/ul/li')

#print(htmls)

# //*[@id="shop-all-list"]/ul/li[1]/div[2]/div[1]/a[1]/h4

for html_3in htmls:

prices = html_3.xpath('div[2]/div[2]/a[2]/b/text()')

shangqu = html_3.xpath('div[2]/div[3]/a[2]/span/text()')

pinglun = html_3.xpath('div[2]/div[2]/a[1]/b/text()')

kouwei = html_3.xpath('div[2]/span/span[1]/b/text()')

huanjing = html_3.xpath('div[2]/span/span[2]/b/text()')

fuwu = html_3.xpath('div[2]/span/span[3]/b/text()')

info = {

'店名': html_3.xpath('div[2]/div[1]/a[1]/h4/text()')[0],

'星级': html_3.xpath('div[2]/div[2]/span/@title')[0],

'评论数': pinglun[0]if len(pinglun) !=0 else " ",

'均价': prices[0]if len(prices) !=0 else " ",

'类型': html_3.xpath('div[2]/div[3]/a[1]/span/text()')[0],

'商区': shangqu[0]if len(shangqu) !=0 else " ",

'地址': html_3.xpath('div[2]/div[3]/span/text()')[0],

'口味': kouwei[0]if len(kouwei) !=0 else " ",

'环境': huanjing[0]if len(huanjing) !=0 else " ",

'服务': fuwu[0]if len(fuwu) !=0 else " "

}

info_list.append(info)

time.sleep(3)

if __name__ =='__main__':

url ='http://www.dianping.com/shenzhen/ch10'

get_url(url)

data = pd.DataFrame(info_list,columns=['店名', "星级", "评论数", "均价", "类型", "商区","地址", "口味", "环境", "服务"])

print(data)

data.to_csv(r'C:\Users\Administrator\Desktop\大众点评.csv', header=True, index=False, mode='a+', encoding='gb18030')

爬取大众点评（深圳）美食

推荐阅读更多精彩内容