import requests
from lxml import etree
import xlwt
all_info_list=[]
def get_info(url):
res = requests.get(url)
html = etree.HTML(res.text)
infos = html.xpath('//div[@class="recommend-article"]/ul/li/div[@class="recmd-right"]')
for info in infos:
title = info.xpath('a/text()')
laughs = info.xpath('div/div/span[1]/text()')
comments = info.xpath('div/div/span[4]/text()')
id = info.xpath('div/a/span/text()')
# print(title,laughs,comments,id)
info_list = [title, laughs, comments, id]
all_info_list.append(info_list)
if __name__ == '__main__':
book = xlwt.Workbook(encoding='utf-8')
sheet = book.add_sheet('Sheet1')
header = ['题目','好笑数','评论数','作者']
for t in range(len(header)):
sheet.write(0, t, header[t])
urls = ['https://www.qiushibaike.com/8hr/page/{}/'.format(str(i)) for i in range(1,14)]
for url in urls:
get_info(url)
i = 1
for list in all_info_list:
j = 0
for data in list:
sheet.write(i, j, data)
j += 1
i += 1
book.save('C:/Users/madin/Desktop/糗事百科.xls')