MongoDB第一段代码,xiaozhu短租数据

frombs4importBeautifulSoup

importrequests

importtime

importpymongo

client = pymongo.MongoClient('localhost',27017)

duanzu = client['duanzu']

sheet_lines = duanzu['sheet_lines']

url=['http://bj.xiaozhu.com/search-duanzufang-p{}-0/?startDate=2016-06-19&endDate=2016-06-19'.format(i)foriinrange(1,3,1)]

lianjie1=[]

defsexss(valuse):

job=valuse

job3=[]

foriinjob:

job1=i.get('class')

ifjob1[0]=='member_ico1':

job2='女'

job3.append(job2)

elifjob1[0]=='member_ico':

job2='男'

job3.append(job2)

else:

job2='性别未知'

job3.append(job2)

return(job3)

deflian(url1):

wb_data=requests.get(url1)

soup=BeautifulSoup(wb_data.text,'lxml')

lianjie=soup.find_all(style='cursor:pointer')

foriinlianjie:

abc=i.get('detailurl')

url=abc

time.sleep(1)

wb_data=requests.get(url)

soup=BeautifulSoup(wb_data.text,'lxml')

titles=soup.select(' h4 > em')

addresss=soup.select('p > span.pr5')

prices=soup.select('div.day_l > span')

images=soup.find_all(id='curBigImage')

imagespeople=soup.select('div.member_pic > a > img')

sexs=soup.select('div.member_pic > div')

name_oweners=soup.select('div.w_240 > h6 > a')

job4=sexss(sexs)

info = []

fortitle,address,price,image,imagepeople,sex,name_owenerinzip(titles,addresss,prices,images,imagespeople,sexs,name_oweners):

data={

'title':title.get_text(),

'address':address.get_text(),

'price':int(price.get_text()),

'image':image.get('src'),

'imagepeople':imagepeople.get('src'),

'sex':job4,

'name_owener':name_owener.get_text()

}

info.append(data)

sheet_lines.insert_one(data)

foriininfo:

print(i['title'],i['address'],str( i['price'])+'¥',i['image'],i['imagepeople'],i['sex'],i['name_owener'])

foriinurl:

countent=lian(i)

foriteminsheet_lines.find({'price':{'$gte':500}}):

print(item)

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容