importpickle
frombs4importBeautifulSoup
importrequests
#把网页html存到本地文件,测试用
defhtml_save(url):
filename =input('请输入要保存文件名称')
f =open(filename,'wb')
html = requests.get(url).text
html_list = [url,html]
pickle.dump(html_list,f)
f.close()
#读取本地html文件,测试用
defhtml_load(filename):
f =open(filename,'rb')
html_list = pickle.load(f)
f.close()
returnhtml_list
#解析html文件
defhtml_jx(html,all_list):
soup = BeautifulSoup(html,'lxml')
name_list = soup.select('a.t')
price_list = soup.select('b.pri')
all_list.append([name_list[i].string,price_list[i].string])
returnall_list
#找到下一页连接
deffind_next(html):
soup = BeautifulSoup(html,'lxml')
next_link ='http://zz.58.com'+soup.select('a.next')[0].get('href')
returnnext_link
all_list = []
html = requests.get('http://zz.58.com/bijiben/0/?PGTID=0d100000-0015-624b-2e87-3e5214b563a9&ClickID=1').text
foreachinrange(12):
all_list = html_jx(html,all_list)
ifeach !=11:
url_ne = find_next(html)
html = requests.get(url_ne).text
#把信息列表保存为本地文件
f =open('lifile','wb')
pickle.dump(all_list,f)
f.close()