作业1

frombs4importBeautifulSoup

importrequests

importtime

url='http://bj.58.com/pingbandiannao/25936435448255x.shtml?psid=110197818191709710732024550&entinfo=25936435448255_0&iuType=p_0'

defget_58_goods_page(url):

wb_data=requests.get(url)

soup=BeautifulSoup(wb_data.text,'lxml')

cates=soup.find_all('a',href='http://bj.58.com/pbdn/')

#print(cates[0].get_text())

titles=soup.select('div.person_add_top.no_ident_top > div.per_ad_left > div.col_sub.mainTitle > h1')

#print(titles[0].get_text())

times=soup.select('ul.mtit_con_left.fl > li.time')

# print(times[0].get_text())

prices=soup.select('div.person_add_top.no_ident_top > div.per_ad_left > div.col_sub.sumary > ul > li > div.su_con > span')

# print(prices[0].get_text()+'元')

locations=soup.select(' div.person_add_top.no_ident_top > div.per_ad_left > div.col_sub.sumary > ul > li > div.su_con > span > a')

# print(locations[0].get_text(),'-',locations[1].get_text())

iflen(locations)==2:

location1=locations[0].get_text() +'-'+ locations[1].get_text()

eliflen(locations)==1:

location1=locations[0].get_text()

forcate,title,time,price,locationinzip(cates,titles,times,prices,locations):

data={

'cate':cate.get_text(),

'title':title.get_text(),

'time':time.get_text(),

'price':price.get_text()+'元',

'location':location1,

'url':url

}

print(data)

# get_58_goods_page(url)

url2='http://bj.58.com/pbdn/0/pn'

defget_58_index_page(url):

page2=[]

wb_data=requests.get(url)

soup=BeautifulSoup(wb_data.text,'lxml')

# page=soup.find_all(class_='t')

pages=soup.select('a.t')

forpageinpages:

page1=page.get('href')

if'entinfo'inpage1and'zhineng'not inpage1and'jing'not inpage1:

page2.append(page1)

returnpage2

defnumber_of_url(start,end,url):

fornuminrange(start,end+1):

page=url+str(num)

page2=get_58_index_page(page)

forpage3inpage2:

time.sleep(2)

get_58_goods_page(page3)

number_of_url(1,2,url2)

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容

  • importrequests frombs4importBeautifulSoup importtime url=...
    xilixjd阅读 384评论 0 0
  • frombs4importBeautifulSoup xjd1=[] withopen('index.html',...
    xilixjd阅读 207评论 0 0
  • importurllib.request frombs4importBeautifulSoup importreq...
    xilixjd阅读 220评论 0 0
  • 这是网易微专业之《前端工程师》CSS单元测试题,我做了两次,第一次在没上课之前做的,全靠边写边测试,错了五六题吧,...
    荷小音阅读 4,528评论 3 5
  • 《大学肆年》目录 唐英、刘敏这对金童玉女是人人羡慕的模范情侣,像是一对花丛中热恋的蝴蝶,出出入入成双成对,唐英每天...
    帝恶道阅读 627评论 0 0