Fixed "can't start new thread" exception due to thread recursion.

tijme · tijme · commit ba5322a85f87 · 2017-09-20T09:31:05.000+02:00
diff --git a/nyawc/Crawler.py b/nyawc/Crawler.py
@@ -41,6 +41,7 @@ class Crawler(object):
     Attributes:
         queue (:class:`nyawc.Queue`): The request/response pair queue containing everything to crawl.
         __options (:class:`nyawc.Options`): The options to use for the current crawling runtime.
+        __should_spawn_new_requests (bool): If the crawler should start spwaning new requests.
         __should_stop (bool): If the crawler should stop the crawling process.
         __stopping (bool): If the crawler is stopping the crawling process.
         __stopped (bool): If the crawler finished stopping the crawler process.
@@ -59,6 +60,7 @@ def __init__(self, options):
 
         self.queue = Queue(options)
         self.__options = options
+        self.__should_spawn_new_requests = False
         self.__should_stop = False
         self.__stopping = False
         self.__stopped = False
@@ -101,6 +103,8 @@ def __spawn_new_requests(self):
 
         """
 
+        self.__should_spawn_new_requests = False
+
         in_progress_count = len(self.queue.get_all(QueueItem.STATUS_IN_PROGRESS))
 
         while in_progress_count < self.__options.performance.max_threads:
@@ -140,6 +144,10 @@ def __crawler_start(self):
             The main thread will sleep until the crawler is finished. This enables
             quiting the application using sigints (see http://stackoverflow.com/a/11816038/2491049).
 
+        Note:
+            `__crawler_stop()` and `__spawn_new_requests()` are called here on the main thread to
+            prevent thread recursion and deadlocks.
+
         """
 
         try:
@@ -154,6 +162,9 @@ def __crawler_start(self):
             if self.__should_stop:
                 self.__crawler_stop()
 
+            if self.__should_spawn_new_requests:
+                self.__spawn_new_requests()
+
             time.sleep(1)
 
     def __crawler_stop(self):
@@ -208,7 +219,7 @@ def __request_start(self, queue_item):
 
         if action == CrawlerActions.DO_SKIP_TO_NEXT:
             self.queue.move(queue_item, QueueItem.STATUS_FINISHED)
-            self.__spawn_new_requests()
+            self.__should_spawn_new_requests = True
 
         if action == CrawlerActions.DO_CONTINUE_CRAWLING or action is None:
             self.queue.move(queue_item, QueueItem.STATUS_IN_PROGRESS)
@@ -251,7 +262,7 @@ def __request_finish(self, queue_item, new_requests, request_failed=False):
             self.__should_stop = True
 
         if action == CrawlerActions.DO_CONTINUE_CRAWLING or action is None:
-            self.__spawn_new_requests()
+            self.__should_spawn_new_requests = True
 
     def __add_scraped_requests_to_queue(self, queue_item, scraped_requests):
         """Convert the scraped requests to queue items, return them and also add them to the queue.