import requests
from lxmlimport etree
import xlwt
all_info_list=[]
info_list=[]
c=[]
all_id_list=[]
all_content_list=[]
all_laugh_list=[]
all_comment_list=[]
#url='https://www.qiushibaike.com/text/page/1.html'
def get_info(url):
res = requests.get(url)
html = etree.HTML(res.text)
id = html.xpath('//div[@class="author clearfix"]//h2/text()')
#id = html.xpath('//div[@class="col1"]/div/div/a/h2/text()')
for id1in id:
all_id_list.append(id1)
# id= html.xpath('//div[@class="col1"]/div/div/span/h2/text()')
content=html.xpath('//div[@class="content"]//span')
for nrin content:
if nr.attrib =='':
all_content_list.append(nr.text)
laugh=html.xpath('//div[@class="stats"]/span/i/text()')
for laugh1in laugh:
all_laugh_list.append(laugh1)
comment=html.xpath('//span[@class="stats-comments"]/a/i/text()')
# print(comment)
for comment1in comment:
all_comment_list.append(comment1)
# all_id_list.append(id)
# all_content_list.append(c)
# all_laugh_list.append(laugh)
# all_comment_list.append(comment)
# print(all_laugh_list)
if __name__ =='__main__':
book = xlwt.Workbook(encoding='utf-8')
sheet = book.add_sheet('Sheet1')
header = ['id','content','laugh','comment']
for tin range(len(header)):
sheet.write(0, t, header[t])
urls = ['https://www.qiushibaike.com/text/page/{}'.format(str(i))for iin range(1,14)]
for urlin urls:
get_info(url)
# 写用户名列
i =1
j =0
for datain all_id_list:
sheet.write(i, j, data)
i +=1
# 写content列
i =1
j =1
# for data in content:
for datain all_content_list:
sheet.write(i, j, data)
i +=1
# 写laugh列
i =1
j =2
for datain all_laugh_list:
sheet.write(i, j, data)
i +=1
# 写comment列
i =1
j =3
for datain all_comment_list:
sheet.write(i, j, data)
i +=1
book.save('C:/Users/madin/Desktop/pytest.xls')