代码:
import requests
from bs4 import BeautifulSoup
#header={
# 'User-Agent':'Mozilla/5.0(Windows NT 10.0;Win64;x64;rv:58.0)Gecko/20100101 Firefox/58.0',
# 'Connection':'keep-alive'
# }
def page_link():
for item in range(1,25):
url='http://bang.dangdang.com/books/bestsellers/01.00.00.00.00.00-24hours-0-0-1-{}'.format(item)
data=requests.get(url)
soup=BeautifulSoup(data.text,'lxml')
with open(r'C:\Users\Administrator\Desktop\test.txt', 'a+',encoding='utf-8') as f:
for book in soup.select('.bang_list_mode > li'):
name=book.select('.name')[0].text
href=book.select('.name > a')[0].attrs['href']
star=book.select('.bang_list li .level ')
comment=book.select('.star > a')[0].attrs['href']
comment_num=book.select('.star > a')[0].text
author=book.select('.publisher_info')[0].text
price=book.select('.price .price_n')[0].text
data={
'书名':name,
'url':href,
'作者':author,
'评论':comment_num,
'评论链接':comment,
'价格':price
}
print(data)
print('*'*100)
f.write(name + href + '\n' + author + price + '\n'+comment_num + comment +'\n')
f.write('*'*100+'\n')
f.close()
#print(data.encoding) #Gb2312
if __name__ == '__main__':
page_link()
结果显示: