From 3d444b40f6d9969e346b4c63de083179b6c26088 Mon Sep 17 00:00:00 2001 From: Abhijit Paithankar Date: Mon, 22 Dec 2025 22:36:45 -0800 Subject: [PATCH] explicitly delete objects in async ckpt worker loop and call gc --- megatron/core/dist_checkpointing/strategies/async_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/megatron/core/dist_checkpointing/strategies/async_utils.py b/megatron/core/dist_checkpointing/strategies/async_utils.py index 4c1aab1b1d..63c29612fe 100644 --- a/megatron/core/dist_checkpointing/strategies/async_utils.py +++ b/megatron/core/dist_checkpointing/strategies/async_utils.py @@ -436,7 +436,6 @@ def __del__(self): self.close() @staticmethod - @_disable_gc() def async_loop( rank: int, queue: mp.JoinableQueue, @@ -486,7 +485,9 @@ def async_loop( logger.debug(f"{rank} has completed saving {item.call_idx}") comp_q.put(item.call_idx) queue.task_done() - + del async_fn_args + del item + gc.collect() logger.info(f"PersistentAsyncCaller: persistent ckpt worker for {rank} has terminated")