Skip to content

Commit ba5322a

Browse files
committed
Fixed "can't start new thread" exception due to thread recursion.
1 parent 63bff8f commit ba5322a

File tree

1 file changed

+13
-2
lines changed

1 file changed

+13
-2
lines changed

nyawc/Crawler.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class Crawler(object):
4141
Attributes:
4242
queue (:class:`nyawc.Queue`): The request/response pair queue containing everything to crawl.
4343
__options (:class:`nyawc.Options`): The options to use for the current crawling runtime.
44+
__should_spawn_new_requests (bool): If the crawler should start spwaning new requests.
4445
__should_stop (bool): If the crawler should stop the crawling process.
4546
__stopping (bool): If the crawler is stopping the crawling process.
4647
__stopped (bool): If the crawler finished stopping the crawler process.
@@ -59,6 +60,7 @@ def __init__(self, options):
5960

6061
self.queue = Queue(options)
6162
self.__options = options
63+
self.__should_spawn_new_requests = False
6264
self.__should_stop = False
6365
self.__stopping = False
6466
self.__stopped = False
@@ -101,6 +103,8 @@ def __spawn_new_requests(self):
101103
102104
"""
103105

106+
self.__should_spawn_new_requests = False
107+
104108
in_progress_count = len(self.queue.get_all(QueueItem.STATUS_IN_PROGRESS))
105109

106110
while in_progress_count < self.__options.performance.max_threads:
@@ -140,6 +144,10 @@ def __crawler_start(self):
140144
The main thread will sleep until the crawler is finished. This enables
141145
quiting the application using sigints (see http://stackoverflow.com/a/11816038/2491049).
142146
147+
Note:
148+
`__crawler_stop()` and `__spawn_new_requests()` are called here on the main thread to
149+
prevent thread recursion and deadlocks.
150+
143151
"""
144152

145153
try:
@@ -154,6 +162,9 @@ def __crawler_start(self):
154162
if self.__should_stop:
155163
self.__crawler_stop()
156164

165+
if self.__should_spawn_new_requests:
166+
self.__spawn_new_requests()
167+
157168
time.sleep(1)
158169

159170
def __crawler_stop(self):
@@ -208,7 +219,7 @@ def __request_start(self, queue_item):
208219

209220
if action == CrawlerActions.DO_SKIP_TO_NEXT:
210221
self.queue.move(queue_item, QueueItem.STATUS_FINISHED)
211-
self.__spawn_new_requests()
222+
self.__should_spawn_new_requests = True
212223

213224
if action == CrawlerActions.DO_CONTINUE_CRAWLING or action is None:
214225
self.queue.move(queue_item, QueueItem.STATUS_IN_PROGRESS)
@@ -251,7 +262,7 @@ def __request_finish(self, queue_item, new_requests, request_failed=False):
251262
self.__should_stop = True
252263

253264
if action == CrawlerActions.DO_CONTINUE_CRAWLING or action is None:
254-
self.__spawn_new_requests()
265+
self.__should_spawn_new_requests = True
255266

256267
def __add_scraped_requests_to_queue(self, queue_item, scraped_requests):
257268
"""Convert the scraped requests to queue items, return them and also add them to the queue.

0 commit comments

Comments
 (0)