@@ -41,6 +41,7 @@ class Crawler(object):
4141 Attributes:
4242 queue (:class:`nyawc.Queue`): The request/response pair queue containing everything to crawl.
4343 __options (:class:`nyawc.Options`): The options to use for the current crawling runtime.
44+ __should_spawn_new_requests (bool): If the crawler should start spwaning new requests.
4445 __should_stop (bool): If the crawler should stop the crawling process.
4546 __stopping (bool): If the crawler is stopping the crawling process.
4647 __stopped (bool): If the crawler finished stopping the crawler process.
@@ -59,6 +60,7 @@ def __init__(self, options):
5960
6061 self .queue = Queue (options )
6162 self .__options = options
63+ self .__should_spawn_new_requests = False
6264 self .__should_stop = False
6365 self .__stopping = False
6466 self .__stopped = False
@@ -101,6 +103,8 @@ def __spawn_new_requests(self):
101103
102104 """
103105
106+ self .__should_spawn_new_requests = False
107+
104108 in_progress_count = len (self .queue .get_all (QueueItem .STATUS_IN_PROGRESS ))
105109
106110 while in_progress_count < self .__options .performance .max_threads :
@@ -140,6 +144,10 @@ def __crawler_start(self):
140144 The main thread will sleep until the crawler is finished. This enables
141145 quiting the application using sigints (see http://stackoverflow.com/a/11816038/2491049).
142146
147+ Note:
148+ `__crawler_stop()` and `__spawn_new_requests()` are called here on the main thread to
149+ prevent thread recursion and deadlocks.
150+
143151 """
144152
145153 try :
@@ -154,6 +162,9 @@ def __crawler_start(self):
154162 if self .__should_stop :
155163 self .__crawler_stop ()
156164
165+ if self .__should_spawn_new_requests :
166+ self .__spawn_new_requests ()
167+
157168 time .sleep (1 )
158169
159170 def __crawler_stop (self ):
@@ -208,7 +219,7 @@ def __request_start(self, queue_item):
208219
209220 if action == CrawlerActions .DO_SKIP_TO_NEXT :
210221 self .queue .move (queue_item , QueueItem .STATUS_FINISHED )
211- self .__spawn_new_requests ()
222+ self .__should_spawn_new_requests = True
212223
213224 if action == CrawlerActions .DO_CONTINUE_CRAWLING or action is None :
214225 self .queue .move (queue_item , QueueItem .STATUS_IN_PROGRESS )
@@ -251,7 +262,7 @@ def __request_finish(self, queue_item, new_requests, request_failed=False):
251262 self .__should_stop = True
252263
253264 if action == CrawlerActions .DO_CONTINUE_CRAWLING or action is None :
254- self .__spawn_new_requests ()
265+ self .__should_spawn_new_requests = True
255266
256267 def __add_scraped_requests_to_queue (self , queue_item , scraped_requests ):
257268 """Convert the scraped requests to queue items, return them and also add them to the queue.
0 commit comments