From ec3acd7014114b772ff70426ca5261471e21cf2d Mon Sep 17 00:00:00 2001 From: Hrishith Thadicherla Date: Mon, 30 Mar 2026 10:51:20 +0530 Subject: [PATCH 1/5] Just adding comment for testing ci/cd Signed-off-by: Hrishith Thadicherla --- modelopt/onnx/quantization/ort_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modelopt/onnx/quantization/ort_utils.py b/modelopt/onnx/quantization/ort_utils.py index 173fbb06d0..6a5df1e1f5 100755 --- a/modelopt/onnx/quantization/ort_utils.py +++ b/modelopt/onnx/quantization/ort_utils.py @@ -70,6 +70,7 @@ def _check_for_libcudnn(): f" for your ORT version at https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements." ) else: + # Not found in system path — try preloading from Python site-packages logger.error(f"cuDNN library not found in {env_variable}") raise FileNotFoundError( f"{lib_pattern} is not accessible in {env_variable}! Please make sure that the path to that library" From e6917aa0bea608cb1e8707cc2367868f37b3427f Mon Sep 17 00:00:00 2001 From: Hrishith Thadicherla Date: Mon, 30 Mar 2026 11:36:36 +0530 Subject: [PATCH 2/5] Added fallback to preload cudnn dlls from torch venv package or nvidia-cudnn-cu12 package incase the dlls don't exist in system path Signed-off-by: Hrishith Thadicherla --- modelopt/onnx/quantization/ort_utils.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/modelopt/onnx/quantization/ort_utils.py b/modelopt/onnx/quantization/ort_utils.py index 6a5df1e1f5..c5c0a95558 100755 --- a/modelopt/onnx/quantization/ort_utils.py +++ b/modelopt/onnx/quantization/ort_utils.py @@ -71,11 +71,26 @@ def _check_for_libcudnn(): ) else: # Not found in system path — try preloading from Python site-packages - logger.error(f"cuDNN library not found in {env_variable}") + logger.warning(f"cuDNN not found in {env_variable}. Trying onnxruntime.preload_dlls()...") + if hasattr(ort, "preload_dlls"): + try: + ort.preload_dlls() + logger.info( + "onnxruntime.preload_dlls() succeeded; CUDA/cuDNN DLLs preloaded from site-packages." + " Please check that this is the correct version needed for your ORT version at" + " https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements." + ) + return True + except Exception as e: + logger.warning(f"onnxruntime.preload_dlls() also failed: {e}") + + logger.error(f"cuDNN library not found in {env_variable} or site-packages") raise FileNotFoundError( - f"{lib_pattern} is not accessible in {env_variable}! Please make sure that the path to that library" - f" is in the env var to use the CUDA or TensorRT EP and ensure that the correct version is available." - f" Versioning compatibility can be checked at https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements." + f"{lib_pattern} is not accessible in {env_variable} and onnxruntime.preload_dlls()" + f" could not locate it either. Please make sure that the path to that library is in the" + f" env var, or install the cuDNN pip package (e.g. nvidia-cudnn-cu12) to use the CUDA or" + f" TensorRT EP. Versioning compatibility can be checked at" + f" https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements." ) return found From bf0674b69e56549e4762455b114a64b834fccf0a Mon Sep 17 00:00:00 2001 From: Hrishith Thadicherla Date: Mon, 30 Mar 2026 12:30:41 +0530 Subject: [PATCH 3/5] added exception to not run ort.preload_dlls() when running on 3.10 python Signed-off-by: Hrishith Thadicherla --- modelopt/onnx/quantization/ort_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modelopt/onnx/quantization/ort_utils.py b/modelopt/onnx/quantization/ort_utils.py index c5c0a95558..f9e7432da0 100755 --- a/modelopt/onnx/quantization/ort_utils.py +++ b/modelopt/onnx/quantization/ort_utils.py @@ -18,6 +18,7 @@ import glob import os import platform +import sys from collections.abc import Sequence import onnxruntime as ort @@ -72,7 +73,7 @@ def _check_for_libcudnn(): else: # Not found in system path — try preloading from Python site-packages logger.warning(f"cuDNN not found in {env_variable}. Trying onnxruntime.preload_dlls()...") - if hasattr(ort, "preload_dlls"): + if hasattr(ort, "preload_dlls") and sys.version_info[:2] != (3, 10): try: ort.preload_dlls() logger.info( From 0c8f2f15053a4dd764fff13c07702ccb8cb59a85 Mon Sep 17 00:00:00 2001 From: Hrishith Thadicherla Date: Wed, 1 Apr 2026 11:31:40 +0530 Subject: [PATCH 4/5] changed comments for better understanding Signed-off-by: Hrishith Thadicherla --- modelopt/onnx/quantization/ort_utils.py | 31 +++++++++++++++++-------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/modelopt/onnx/quantization/ort_utils.py b/modelopt/onnx/quantization/ort_utils.py index f9e7432da0..9c9d2128a9 100755 --- a/modelopt/onnx/quantization/ort_utils.py +++ b/modelopt/onnx/quantization/ort_utils.py @@ -71,26 +71,37 @@ def _check_for_libcudnn(): f" for your ORT version at https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements." ) else: - # Not found in system path — try preloading from Python site-packages - logger.warning(f"cuDNN not found in {env_variable}. Trying onnxruntime.preload_dlls()...") + logger.error(f"cuDNN library not found in {env_variable}") + + # Fallback: ORT >=1.20 ships a preload_dlls() helper that loads CUDA/cuDNN + # DLLs bundled inside pip packages (e.g. nvidia-cudnn-cu12) so they don't + # need to be on the system PATH / LD_LIBRARY_PATH. + # However, preload_dlls() is broken on Python 3.10 (missing os.add_dll_directory + # behaviour), so we skip it for that version. if hasattr(ort, "preload_dlls") and sys.version_info[:2] != (3, 10): + logger.warning( + f"cuDNN not found in {env_variable}. " + "Attempting onnxruntime.preload_dlls() to load from site-packages..." + ) try: ort.preload_dlls() logger.info( - "onnxruntime.preload_dlls() succeeded; CUDA/cuDNN DLLs preloaded from site-packages." - " Please check that this is the correct version needed for your ORT version at" + "onnxruntime.preload_dlls() succeeded — CUDA/cuDNN DLLs loaded from site-packages." + " Verify version compatibility at" " https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements." ) return True except Exception as e: - logger.warning(f"onnxruntime.preload_dlls() also failed: {e}") + logger.warning(f"onnxruntime.preload_dlls() failed: {e}") - logger.error(f"cuDNN library not found in {env_variable} or site-packages") raise FileNotFoundError( - f"{lib_pattern} is not accessible in {env_variable} and onnxruntime.preload_dlls()" - f" could not locate it either. Please make sure that the path to that library is in the" - f" env var, or install the cuDNN pip package (e.g. nvidia-cudnn-cu12) to use the CUDA or" - f" TensorRT EP. Versioning compatibility can be checked at" + f"{lib_pattern} is not accessible via {env_variable} or site-packages.\n" + f"To fix this, either:\n" + f" 1. Add the directory containing {lib_pattern} to your {env_variable} env var, or\n" + f" 2. Install the cuDNN pip package (Python>=3.11 only):" + f" pip install nvidia-cudnn-cu12 (or nvidia-cudnn-cu13)\n" + f"This is required for the CUDA / TensorRT execution provider.\n" + f"Check version compatibility at" f" https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements." ) return found From 2430961bd8c63697b5b250c39a428091b489c0c7 Mon Sep 17 00:00:00 2001 From: Hrishith Thadicherla Date: Wed, 1 Apr 2026 12:00:02 +0530 Subject: [PATCH 5/5] changed comments for better understanding and also fixed the error catching mechanism Signed-off-by: Hrishith Thadicherla --- modelopt/onnx/quantization/ort_utils.py | 34 +++++++++++++++++++------ 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/modelopt/onnx/quantization/ort_utils.py b/modelopt/onnx/quantization/ort_utils.py index 9c9d2128a9..fe5cdaced9 100755 --- a/modelopt/onnx/quantization/ort_utils.py +++ b/modelopt/onnx/quantization/ort_utils.py @@ -16,10 +16,12 @@ """Provides basic ORT inference utils, should be replaced by modelopt.torch.ort_client.""" import glob +import io import os import platform import sys from collections.abc import Sequence +from contextlib import redirect_stderr, redirect_stdout import onnxruntime as ort from onnxruntime.quantization.operators.qdq_base_operator import QDQOperatorBase @@ -71,8 +73,6 @@ def _check_for_libcudnn(): f" for your ORT version at https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements." ) else: - logger.error(f"cuDNN library not found in {env_variable}") - # Fallback: ORT >=1.20 ships a preload_dlls() helper that loads CUDA/cuDNN # DLLs bundled inside pip packages (e.g. nvidia-cudnn-cu12) so they don't # need to be on the system PATH / LD_LIBRARY_PATH. @@ -83,21 +83,39 @@ def _check_for_libcudnn(): f"cuDNN not found in {env_variable}. " "Attempting onnxruntime.preload_dlls() to load from site-packages..." ) + # preload_dlls() does not raise on failure — it silently prints + # "Failed to load ..." messages. Capture its output and check + # whether the key cuDNN DLL actually loaded. + cudnn_dll = "cudnn" if platform.system() == "Windows" else "libcudnn_adv" + captured = io.StringIO() try: - ort.preload_dlls() + with redirect_stdout(captured), redirect_stderr(captured): + ort.preload_dlls() + except Exception as e: + logger.warning(f"onnxruntime.preload_dlls() raised an exception: {e}") + + preload_output = captured.getvalue() + if preload_output: + logger.debug(f"preload_dlls() output:\n{preload_output}") + + if f"Failed to load {cudnn_dll}" in preload_output: + logger.error( + f"onnxruntime.preload_dlls() was called but {cudnn_dll} failed to load. " + "cuDNN DLLs were NOT successfully loaded from site-packages." + ) + else: logger.info( - "onnxruntime.preload_dlls() succeeded — CUDA/cuDNN DLLs loaded from site-packages." - " Verify version compatibility at" + "onnxruntime.preload_dlls() succeeded — CUDA/cuDNN DLLs loaded" + " from site-packages. Verify version compatibility at" " https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements." ) return True - except Exception as e: - logger.warning(f"onnxruntime.preload_dlls() failed: {e}") raise FileNotFoundError( f"{lib_pattern} is not accessible via {env_variable} or site-packages.\n" f"To fix this, either:\n" - f" 1. Add the directory containing {lib_pattern} to your {env_variable} env var, or\n" + f" 1. Add the directory containing {lib_pattern} to your" + f" {env_variable} env var, or\n" f" 2. Install the cuDNN pip package (Python>=3.11 only):" f" pip install nvidia-cudnn-cu12 (or nvidia-cudnn-cu13)\n" f"This is required for the CUDA / TensorRT execution provider.\n"