From ec3acd7014114b772ff70426ca5261471e21cf2d Mon Sep 17 00:00:00 2001
From: Hrishith Thadicherla <hthadicherla@nvidia.com>
Date: Mon, 30 Mar 2026 10:51:20 +0530
Subject: [PATCH 1/5] Just adding comment for testing ci/cd

Signed-off-by: Hrishith Thadicherla <hthadicherla@nvidia.com>
---
 modelopt/onnx/quantization/ort_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modelopt/onnx/quantization/ort_utils.py b/modelopt/onnx/quantization/ort_utils.py
index 173fbb06d0..6a5df1e1f5 100755
--- a/modelopt/onnx/quantization/ort_utils.py
+++ b/modelopt/onnx/quantization/ort_utils.py
@@ -70,6 +70,7 @@ def _check_for_libcudnn():
             f" for your ORT version at https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements."
         )
     else:
+        # Not found in system path — try preloading from Python site-packages
         logger.error(f"cuDNN library not found in {env_variable}")
         raise FileNotFoundError(
             f"{lib_pattern} is not accessible in {env_variable}! Please make sure that the path to that library"

From e6917aa0bea608cb1e8707cc2367868f37b3427f Mon Sep 17 00:00:00 2001
From: Hrishith Thadicherla <hthadicherla@nvidia.com>
Date: Mon, 30 Mar 2026 11:36:36 +0530
Subject: [PATCH 2/5] Added fallback to preload cudnn dlls from torch venv
 package or nvidia-cudnn-cu12 package incase the dlls don't exist in system
 path

Signed-off-by: Hrishith Thadicherla <hthadicherla@nvidia.com>
---
 modelopt/onnx/quantization/ort_utils.py | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/modelopt/onnx/quantization/ort_utils.py b/modelopt/onnx/quantization/ort_utils.py
index 6a5df1e1f5..c5c0a95558 100755
--- a/modelopt/onnx/quantization/ort_utils.py
+++ b/modelopt/onnx/quantization/ort_utils.py
@@ -71,11 +71,26 @@ def _check_for_libcudnn():
         )
     else:
         # Not found in system path — try preloading from Python site-packages
-        logger.error(f"cuDNN library not found in {env_variable}")
+        logger.warning(f"cuDNN not found in {env_variable}. Trying onnxruntime.preload_dlls()...")
+        if hasattr(ort, "preload_dlls"):
+            try:
+                ort.preload_dlls()
+                logger.info(
+                    "onnxruntime.preload_dlls() succeeded; CUDA/cuDNN DLLs preloaded from site-packages."
+                    " Please check that this is the correct version needed for your ORT version at"
+                    " https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements."
+                )
+                return True
+            except Exception as e:
+                logger.warning(f"onnxruntime.preload_dlls() also failed: {e}")
+
+        logger.error(f"cuDNN library not found in {env_variable} or site-packages")
         raise FileNotFoundError(
-            f"{lib_pattern} is not accessible in {env_variable}! Please make sure that the path to that library"
-            f" is in the env var to use the CUDA or TensorRT EP and ensure that the correct version is available."
-            f" Versioning compatibility can be checked at https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements."
+            f"{lib_pattern} is not accessible in {env_variable} and onnxruntime.preload_dlls()"
+            f" could not locate it either. Please make sure that the path to that library is in the"
+            f" env var, or install the cuDNN pip package (e.g. nvidia-cudnn-cu12) to use the CUDA or"
+            f" TensorRT EP. Versioning compatibility can be checked at"
+            f" https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements."
         )
     return found
 

From bf0674b69e56549e4762455b114a64b834fccf0a Mon Sep 17 00:00:00 2001
From: Hrishith Thadicherla <hthadicherla@nvidia.com>
Date: Mon, 30 Mar 2026 12:30:41 +0530
Subject: [PATCH 3/5] added exception to not run ort.preload_dlls() when
 running on 3.10 python

Signed-off-by: Hrishith Thadicherla <hthadicherla@nvidia.com>
---
 modelopt/onnx/quantization/ort_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modelopt/onnx/quantization/ort_utils.py b/modelopt/onnx/quantization/ort_utils.py
index c5c0a95558..f9e7432da0 100755
--- a/modelopt/onnx/quantization/ort_utils.py
+++ b/modelopt/onnx/quantization/ort_utils.py
@@ -18,6 +18,7 @@
 import glob
 import os
 import platform
+import sys
 from collections.abc import Sequence
 
 import onnxruntime as ort
@@ -72,7 +73,7 @@ def _check_for_libcudnn():
     else:
         # Not found in system path — try preloading from Python site-packages
         logger.warning(f"cuDNN not found in {env_variable}. Trying onnxruntime.preload_dlls()...")
-        if hasattr(ort, "preload_dlls"):
+        if hasattr(ort, "preload_dlls") and sys.version_info[:2] != (3, 10):
             try:
                 ort.preload_dlls()
                 logger.info(

From 0c8f2f15053a4dd764fff13c07702ccb8cb59a85 Mon Sep 17 00:00:00 2001
From: Hrishith Thadicherla <hthadicherla@nvidia.com>
Date: Wed, 1 Apr 2026 11:31:40 +0530
Subject: [PATCH 4/5] changed comments for better understanding

Signed-off-by: Hrishith Thadicherla <hthadicherla@nvidia.com>
---
 modelopt/onnx/quantization/ort_utils.py | 31 +++++++++++++++++--------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/modelopt/onnx/quantization/ort_utils.py b/modelopt/onnx/quantization/ort_utils.py
index f9e7432da0..9c9d2128a9 100755
--- a/modelopt/onnx/quantization/ort_utils.py
+++ b/modelopt/onnx/quantization/ort_utils.py
@@ -71,26 +71,37 @@ def _check_for_libcudnn():
             f" for your ORT version at https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements."
         )
     else:
-        # Not found in system path — try preloading from Python site-packages
-        logger.warning(f"cuDNN not found in {env_variable}. Trying onnxruntime.preload_dlls()...")
+        logger.error(f"cuDNN library not found in {env_variable}")
+
+        # Fallback: ORT >=1.20 ships a preload_dlls() helper that loads CUDA/cuDNN
+        # DLLs bundled inside pip packages (e.g. nvidia-cudnn-cu12) so they don't
+        # need to be on the system PATH / LD_LIBRARY_PATH.
+        # However, preload_dlls() is broken on Python 3.10 (missing os.add_dll_directory
+        # behaviour), so we skip it for that version.
         if hasattr(ort, "preload_dlls") and sys.version_info[:2] != (3, 10):
+            logger.warning(
+                f"cuDNN not found in {env_variable}. "
+                "Attempting onnxruntime.preload_dlls() to load from site-packages..."
+            )
             try:
                 ort.preload_dlls()
                 logger.info(
-                    "onnxruntime.preload_dlls() succeeded; CUDA/cuDNN DLLs preloaded from site-packages."
-                    " Please check that this is the correct version needed for your ORT version at"
+                    "onnxruntime.preload_dlls() succeeded — CUDA/cuDNN DLLs loaded from site-packages."
+                    " Verify version compatibility at"
                     " https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements."
                 )
                 return True
             except Exception as e:
-                logger.warning(f"onnxruntime.preload_dlls() also failed: {e}")
+                logger.warning(f"onnxruntime.preload_dlls() failed: {e}")
 
-        logger.error(f"cuDNN library not found in {env_variable} or site-packages")
         raise FileNotFoundError(
-            f"{lib_pattern} is not accessible in {env_variable} and onnxruntime.preload_dlls()"
-            f" could not locate it either. Please make sure that the path to that library is in the"
-            f" env var, or install the cuDNN pip package (e.g. nvidia-cudnn-cu12) to use the CUDA or"
-            f" TensorRT EP. Versioning compatibility can be checked at"
+            f"{lib_pattern} is not accessible via {env_variable} or site-packages.\n"
+            f"To fix this, either:\n"
+            f"  1. Add the directory containing {lib_pattern} to your {env_variable} env var, or\n"
+            f"  2. Install the cuDNN pip package (Python>=3.11 only):"
+            f" pip install nvidia-cudnn-cu12 (or nvidia-cudnn-cu13)\n"
+            f"This is required for the CUDA / TensorRT execution provider.\n"
+            f"Check version compatibility at"
             f" https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements."
         )
     return found

From 2430961bd8c63697b5b250c39a428091b489c0c7 Mon Sep 17 00:00:00 2001
From: Hrishith Thadicherla <hthadicherla@nvidia.com>
Date: Wed, 1 Apr 2026 12:00:02 +0530
Subject: [PATCH 5/5] changed comments for better understanding and also fixed
 the error catching mechanism

Signed-off-by: Hrishith Thadicherla <hthadicherla@nvidia.com>
---
 modelopt/onnx/quantization/ort_utils.py | 34 +++++++++++++++++++------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/modelopt/onnx/quantization/ort_utils.py b/modelopt/onnx/quantization/ort_utils.py
index 9c9d2128a9..fe5cdaced9 100755
--- a/modelopt/onnx/quantization/ort_utils.py
+++ b/modelopt/onnx/quantization/ort_utils.py
@@ -16,10 +16,12 @@
 """Provides basic ORT inference utils, should be replaced by modelopt.torch.ort_client."""
 
 import glob
+import io
 import os
 import platform
 import sys
 from collections.abc import Sequence
+from contextlib import redirect_stderr, redirect_stdout
 
 import onnxruntime as ort
 from onnxruntime.quantization.operators.qdq_base_operator import QDQOperatorBase
@@ -71,8 +73,6 @@ def _check_for_libcudnn():
             f" for your ORT version at https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements."
         )
     else:
-        logger.error(f"cuDNN library not found in {env_variable}")
-
         # Fallback: ORT >=1.20 ships a preload_dlls() helper that loads CUDA/cuDNN
         # DLLs bundled inside pip packages (e.g. nvidia-cudnn-cu12) so they don't
         # need to be on the system PATH / LD_LIBRARY_PATH.
@@ -83,21 +83,39 @@ def _check_for_libcudnn():
                 f"cuDNN not found in {env_variable}. "
                 "Attempting onnxruntime.preload_dlls() to load from site-packages..."
             )
+            # preload_dlls() does not raise on failure — it silently prints
+            # "Failed to load ..." messages.  Capture its output and check
+            # whether the key cuDNN DLL actually loaded.
+            cudnn_dll = "cudnn" if platform.system() == "Windows" else "libcudnn_adv"
+            captured = io.StringIO()
             try:
-                ort.preload_dlls()
+                with redirect_stdout(captured), redirect_stderr(captured):
+                    ort.preload_dlls()
+            except Exception as e:
+                logger.warning(f"onnxruntime.preload_dlls() raised an exception: {e}")
+
+            preload_output = captured.getvalue()
+            if preload_output:
+                logger.debug(f"preload_dlls() output:\n{preload_output}")
+
+            if f"Failed to load {cudnn_dll}" in preload_output:
+                logger.error(
+                    f"onnxruntime.preload_dlls() was called but {cudnn_dll} failed to load. "
+                    "cuDNN DLLs were NOT successfully loaded from site-packages."
+                )
+            else:
                 logger.info(
-                    "onnxruntime.preload_dlls() succeeded — CUDA/cuDNN DLLs loaded from site-packages."
-                    " Verify version compatibility at"
+                    "onnxruntime.preload_dlls() succeeded — CUDA/cuDNN DLLs loaded"
+                    " from site-packages. Verify version compatibility at"
                     " https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements."
                 )
                 return True
-            except Exception as e:
-                logger.warning(f"onnxruntime.preload_dlls() failed: {e}")
 
         raise FileNotFoundError(
             f"{lib_pattern} is not accessible via {env_variable} or site-packages.\n"
             f"To fix this, either:\n"
-            f"  1. Add the directory containing {lib_pattern} to your {env_variable} env var, or\n"
+            f"  1. Add the directory containing {lib_pattern} to your"
+            f" {env_variable} env var, or\n"
             f"  2. Install the cuDNN pip package (Python>=3.11 only):"
             f" pip install nvidia-cudnn-cu12 (or nvidia-cudnn-cu13)\n"
             f"This is required for the CUDA / TensorRT execution provider.\n"