我个人建议您每次必须爬网时都启动一个新的蜘蛛,但是如果您想使该过程继续下去,我建议您使用spider_idle
信号:
@classmethod
def from_crawler(cls, crawler, *args, **kwargs):
spider = super(MySpider, cls).from_crawler(crawler, *args, **kwargs)
crawler.signals.connect(spider.spider_closed, signals.spider_closed)
crawler.signals.connect(spider.spider_idle, signals.spider_idle)
return spider
...
def spider_idle(self, spider):
# read database again and send new requests
# check that sending new requests here is different
self.crawler.engine.crawl(
Request(
new_url,
callback=self.parse),
spider
)