编写middleware.py 文件中的类
from fake_useragent import UserAgent
class RandomUserAgentMiddleware(object):
def __init__(self, crawler):
super(RandomUserAgentMiddleware, self).__init__()
self.ua = UserAgent()
@classmethod
def from_crawler(cls, crawler):
return cls(crawler)
def process_requests(self, request, spider):
request.headers.setdefault("User-Agent", self.ua.random)
为每个spider配置私有配置
class MySpider(scrapy.Spider):
name = 'myspider'
custom_settings = {
'SOME_SETTING': 'some value',
}
# 这个优先级要比settings.py中的要高,通过custom_settings中的配置会覆盖settings.py中的配置。