作业1_3

importrequests

frombs4importBeautifulSoup

importtime

url='http://bj.xiaozhu.com/fangzi/597754001.html'

defhouse_info(url,data=None):

wb_data=requests.get(url)

time.sleep(1)

soup=BeautifulSoup(wb_data.text,'lxml')

titles=soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em')

addresses=soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span.pr5')

prices=soup.select(' div.day_l > span')

house_images=soup.find_all(id='curBigImage')

#print(house_images[0].get('src'))

owner_images=soup.select(' div.js_box.clearfix > div.member_pic > a > img')

#print(owner_images)

sexs=soup.select('div.js_box.clearfix > div.member_pic > div')

sex1=[]

forsexinsexs:

sex=str(sex)

if('member_ico1'insex):

sex1.append('女')

elif('member_ico'insexand'member_ico1'not insex):

sex1.append('男')

else:

sex1.append('空')

owner_names=soup.select(' div.js_box.clearfix > div.w_240 > h6 > a')

#print(owner_names[0].get_text())

fortitle,address,price,house_image,owner_image,owner_name,sexinzip(titles,addresses,prices,house_images,owner_images,owner_names,sex1):

data={

'title':title.get_text(),

'address':address.get_text().rstrip(),

'price':price.get_text(),

'house_image':house_image.get('src'),

'owner_image':owner_image.get('src'),

'owner_name':owner_name.get_text(),

'sex':sex

}

print(data)

url1=['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i))foriinrange(1,12)]

defmulti_info(url):

wb_data=requests.get(url)

soup=BeautifulSoup(wb_data.text,'lxml')

page_url=soup.select('a.resule_img_a')

#print(page_url[0])

#page_url2=page_url[0].find_all('a',href=re.compile(r"/fangzi/\d\.html"))

#print(page_url2)

forpage_url1inpage_url:

house_info(str(page_url1['href']))

forurl2inurl1:

multi_info(url2)

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容

友情链接更多精彩内容