# 本程序用于爬取新浪网->小说->男生小说人气榜中所有的书名、分类、作者及其排名
# 爬取后的数据保存在test.xls中
import requests
from lxmlimport etree
import xlwt
all_info_list = []
def get_info(url_1):
res = requests.get(url_1)
html = etree.HTML(res.text)
info_s = html.xpath('//table[@class="child-t-tab"]/tbody/tr')
for infoin info_s:
rank1 = info.xpath('td[1]')[0]
rank = rank1.xpath('string(.)').strip()
novel_book1 = info.xpath('td[2]/a')[0]
novel_book = novel_book1.xpath('string(.)').strip()
sort1 = info.xpath('td[3]/a')[0]
sort = sort1.xpath('string(.)').strip()
author1 = info.xpath('td[4]')[0]
author = author1.xpath('string(.)').strip()
info_list = [rank, novel_book, sort, author]
all_info_list.append(info_list)
if __name__ =='__main__':
book = xlwt.Workbook(encoding='utf-8')
sheet = book.add_sheet('Sheet1')
header = ['rank', 'novel_book', 'sort', 'author']
for tin range(len(header)):
sheet.write(0, t, header[t])
urls = ['http://vip.book.sina.com.cn/weibobook/ranklist.php?channel=boy&type=click&pos=20097&vt=4&page={}'.format(str(i))for iin range(1, 5)]
for urlin urls:
get_info(url)
i =1
for lin all_info_list:
j =0
for datain l:
sheet.write(i, j, data)
j +=1
i +=1
book.save('C:/Users/madin/Desktop/test.xls')