Skip to content

Commit b4bed4b

Browse files
committed
bugfix: skip empty safetensors file when inplace pin memory
1 parent 0eef081 commit b4bed4b

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

checkpoint_engine/pin_memory.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,12 @@ def _pin(t: torch.Tensor):
256256
# Remove the file after successfully loading. This will avoid doubling the memory usage.
257257
# We assume files in /dev/shm/ are temporary files. So it's safe to remove them after loading.
258258
os.remove(file_path)
259+
if not metas:
260+
# TODO: should we still return this buffer?
261+
assert buffer.nbytes == 0, f"buffer nbytes {buffer.nbytes} should be 0"
262+
logger.warning(f"[rank{rank}] no metas found in {file_path}, skip pin memory")
263+
return MemoryBuffer(buffer=buffer, size=buffer.nbytes, metas=[], manually_pinned=False)
264+
259265
_pin(buffer)
260266
logger.info(
261267
f"[rank{rank}] inplace pin memory for file {file_path} finished, size {buffer.nbytes / 1024 / 1024:.2f}MiB"

0 commit comments

Comments
 (0)