-- coding: utf-8 --
****说明 start_urls 默认传递给 parse方法解析数据,
scrapy.Request(next_rul) 没有callback参数默认返回给 parse方法
class DlSpider(scrapy.Spider):
name = 'dl'
# allowed_domains = ['quanshuwang.com']
start_urls = ['http://www.quanshuwang.com/book/44/44683/15379609.html'] #默认传递给 parse方法处理
def parse(self, response):
chapter_url = response.url
chapter_title = response.xpath('//strong[@class="l jieqi_title"]/text()').extract_first()
chapter_content = "".join(response.xpath('//div[@class="mainContenr"]/text()').extract()[1:-1])
next_url = response.xpath('//a[@class="next"]/@href').extract_first()
with open('book/' + chapter_title + '.txt','w',encoding='utf-8') as f:
f.write(chapter_url)
f.write(chapter_content + '\n')
if next_url: #如果url不为空.
return scrapy.Request(next_url) #通过scrapy.Request(url)默认返回给 parse方法解析url,
else:
print('任务结束')```