import requests
from bs4 import BeautifulSoup
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1;WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3298.4 Safari/537.36'
}
def gettop250(url):
html=requests.get(url,headers=headers)
b=BeautifulSoup(html.text,'lxml')
trs = b.select('tr.item')
for tr in trs:
aa=tr.select('td>a')
atitle=tr.select('div.pl2>a')
aritle=tr.select('p.pl')
inq=tr.select('span.inq')
print(aa[0].get("href"))
print(aa[0].select("img")[0].get("src"))
print(atitle[0].get('title'))
print(aritle[0].get_text())
if len(inq)>=1:print(inq[0].get_text())
for i in range(0,250,25):
gettop250("https://book.douban.com/top250?start={}".format(i))