lint

yuki-97 · yuki-97 · commit 33ea56d565b9 · 2025-12-15T06:44:37.000-08:00
Signed-off-by: Yuki Huang &lt;yukih@nvidia.com&gt;
diff --git a/nemo_rl/models/policy/workers/megatron_policy_worker.py b/nemo_rl/models/policy/workers/megatron_policy_worker.py
@@ -2062,7 +2062,9 @@ def _iter_params_with_optional_kv_scales(
         This helper is used by both IPC-based streaming and collective broadcast
         so that the logic for adding KV scales stays consistent in one place.
         """
-        from nemo_rl.models.generation.vllm.quantization.fp8_train_utils import get_vllm_qkv_scale_names
+        from nemo_rl.models.generation.vllm.quantization.fp8_train_utils import (
+            get_vllm_qkv_scale_names,
+        )
 
         base_iter = self.megatron_bridge.export_hf_weights(
             [self.model],
@@ -2469,7 +2471,9 @@ def calibrate_qkv_fp8_scales(
             { "format": "fp8", "percentile": float, "margin": float,
               "layers": { layer_name: {"k_scale": float, "v_scale": float[, "q_scale": float] } } }
         """
-        from nemo_rl.models.generation.vllm.quantization.fp8_train_utils import convert_calibration_to_vllm_format
+        from nemo_rl.models.generation.vllm.quantization.fp8_train_utils import (
+            convert_calibration_to_vllm_format,
+        )
 
         # Allow overriding FP8 max for Q, K, V via environment variables for ease of testing.
         # Defaults align with FP8 e4m3 max magnitude.
diff --git a/pyproject.toml b/pyproject.toml
@@ -52,11 +52,7 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-fsdp = [
-  "flash-attn==2.8.1",
-  "mamba-ssm",
-  "causal-conv1d",
-]
+fsdp = ["flash-attn==2.8.1", "mamba-ssm", "causal-conv1d"]
 automodel = [
   "nemo-automodel",
   # Flash-attn version should be selected to satisfy both TE + vLLM requirements (xformers in particular)
@@ -77,9 +73,7 @@ vllm = [
   "num2words>=0.5.14",
 ]
 # Remove this once https://github.com/NVIDIA-NeMo/RL/issues/501 resolved
-vllm_for_train = [
-  "vllm==0.11.0",
-]
+vllm_for_train = ["vllm==0.11.0"]
 mcore = [
   # also need cudnn (https://developer.nvidia.com/cudnn-downloads?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=20.04&target_type=deb_network)
   # wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb
diff --git a/pyrefly.toml b/pyrefly.toml
@@ -101,6 +101,7 @@ project-includes = [
   "nemo_rl/models/generation/interfaces.py",
   "nemo_rl/models/generation/vllm/__init__.py",
   "nemo_rl/models/generation/vllm/config.py",
+  "nemo_rl/models/generation/vllm/quantization/fp8_train_utils.py",
   "nemo_rl/models/generation/vllm/utils.py",
   "nemo_rl/models/generation/vllm/vllm_backend.py",
   "nemo_rl/models/huggingface/__init__.py",