爬取58上海二手市场(转转)
流程
from bs4 import BeautifulSoup
import requests
import time
import pymongo
# 建立数据库
client = pymongo.MongoClient('localhost', 27017)
ceshi = client['ceshi']
url_list = ceshi['url_list']
item_info = ceshi['url_info']
def get_links_from(channel, pages, who_sells=0):
# spider 1
list_view = '{}{}/pn{}/'.format(channel, str(who_sells), str(pages))
wb_data = requests.get(list_view)
time.sleep(1)
soup = BeautifulSoup(wb_data.text, 'lxml')
if soup.find('td', 't'):
for link in soup.select('td.t > a.t'):
item_link = link.get('href').split('?')[0]
url_list.insert_one({'url': item_link})
print(item_link)
else:
pass
# Noting!
def get_item_info(url):
# 获取商品详情
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text, 'lxml')
if soup.select('span.soldout_btn'):
print('商品不存在')
else:
title = soup.title.text
price = soup.select('div.price_li > span > i')[0].text
area = soup.select('div.palce_li > span > i')[0].text
item_info.insert_one({'title': title, 'price': price, 'area': area})
print(title, price, area)