Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions examples/configs/grpo_math_1B.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,15 @@ policy:
top_k: null
stop_token_ids: null
stop_strings: null
mcore_generation_config:
buffer_size_gb: 20 # Total GPU memory (in GB) allocated for KV cache buffers
buffer_guaranteed_fraction: 0.1 # Fraction of buffer reserved for guaranteed active requests
num_cuda_graphs: 16 # Number of CUDA graphs to pre-compile for different batch sizes
block_size_tokens: 256 # Size of each KV cache block in tokens (affects memory granularity)
use_cuda_graphs_for_non_decode_steps: true # Enable CUDA graphs for prefill/context processing
enable_chunked_prefill: true # Split long prefills into chunks for better memory management
unified_memory_level: 0 # Unified memory usage level (0=disabled, higher values enable more aggressive paging)
max_tokens: ${policy.max_total_sequence_length} # Maximum number of tokens to use in a single step
vllm_cfg:
async_engine: false
precision: ${policy.precision}
Expand Down
2 changes: 1 addition & 1 deletion examples/configs/grpo_math_1B_megatron.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ policy:
use_cuda_graphs_for_non_decode_steps: true # Enable CUDA graphs for prefill/context processing
enable_chunked_prefill: true # Split long prefills into chunks for better memory management
unified_memory_level: 0 # Unified memory usage level (0=disabled, higher values enable more aggressive paging)
max_tokens: 16384 # Maximum number of tokens to use in a single step
max_tokens: ${policy.max_total_sequence_length} # Maximum number of tokens to use in a single step

vllm_cfg:
tensor_parallel_size: 1
Expand Down
2 changes: 1 addition & 1 deletion examples/configs/recipes/llm/dapo-qwen2.5-7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ checkpointing:
checkpoint_dir: results/dapo-qwen2.5-7b
keep_top_k: 5
save_period: 5
model_save_format: "dcp"
model_save_format: null
policy:
model_name: Qwen/Qwen2.5-Math-7B
hf_config_overrides:
Expand Down
4 changes: 4 additions & 0 deletions nemo_rl/models/generation/vllm/vllm_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,10 @@ def _patch_vllm_vit_flash_attn_backend():
)
# disable quantization
vllm_kwargs["hf_overrides"]["quantization_config"] = {}
elif "Gemma3ForConditionalGeneration" in getattr(hf_config, "architectures", []):
if self.cfg["vllm_cfg"]["skip_tokenizer_init"]:
print("Gemma3ForConditionalGeneration models may crash when skip_tokenizer_init is True. NeMo-RL is forcing it to False for this architecture. See https://github.com/NVIDIA-NeMo/RL/issues/1681 for more details.")
self.cfg["vllm_cfg"]["skip_tokenizer_init"] = False

llm_kwargs = dict(
model=self.model_name,
Expand Down
2 changes: 1 addition & 1 deletion nemo_rl/models/policy/workers/dtensor_policy_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -1839,7 +1839,7 @@ def move_buffer_to_device(
) -> nn.Module:
# FSDP modules do not move buffers to the device automatically
for v in model.buffers():
v.data = v.data.to(device)
v = v.to(device)

return model

Expand Down
8 changes: 4 additions & 4 deletions nemo_rl/models/policy/workers/dtensor_policy_worker_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,10 +324,10 @@ def __init__(
print(
"[WARNING]: sequence_parallel=True, but tp_size=1 which has no effect. Enable tp_size > 1 to use sequence parallelism."
)
elif sequence_parallel_enabled and tp_size > 1:
raise RuntimeError(
"Sequence parallel + tp_size >1 is currently broken in torch==2.8.0. See https://github.com/NVIDIA-NeMo/Automodel/issues/652 for more details."
)
#elif sequence_parallel_enabled and tp_size > 1:
# raise RuntimeError(
# "Sequence parallel + tp_size >1 is currently broken in torch==2.8.0. See https://github.com/NVIDIA-NeMo/Automodel/issues/652 for more details."
# )

if cp_size > 1:
assert not isinstance(self.model, Gemma3ForCausalLM), (
Expand Down
31 changes: 22 additions & 9 deletions nemo_rl/utils/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,23 @@ def __init__(self, cfg: TensorboardConfig, log_dir: Optional[str] = None):
self.writer = SummaryWriter(log_dir=log_dir)
print(f"Initialized TensorboardLogger at {log_dir}")

@staticmethod
def _coerce_to_scalar(value: Any) -> int | float | bool | str | None:
"""Coerce a value to a Python scalar for TensorBoard logging.

Returns the coerced value, or None if it can't be converted to a scalar.
"""
if isinstance(value, (int, float, bool, str)):
return value
if isinstance(value, (np.floating, np.integer, np.bool_)):
return value.item()
if isinstance(value, np.ndarray) and (value.ndim == 0 or value.size == 1):
return value.item()
if isinstance(value, torch.Tensor) and (value.ndim == 0 or value.numel() == 1):
return value.item()
# dict, list, multi-element arrays/tensors, or incompatible types
return None

def log_metrics(
self,
metrics: dict[str, Any],
Expand All @@ -137,23 +154,19 @@ def log_metrics(
step_metric: Optional step metric name (ignored in TensorBoard)
"""
for name, value in metrics.items():
# NeMo-Gym will add additional metrics like wandb histograms. However, some people will log to Tensorboard instead which may not be compatible
# This logic catches non-compatible objects being logged.
if not isinstance(value, (int, float, bool, str)):
continue

if prefix:
name = f"{prefix}/{name}"

# Skip non-scalar values that TensorBoard can't handle
if isinstance(value, (dict, list)):
scalar = self._coerce_to_scalar(value)
if scalar is None:
print(
f"Warning: Skipping non-scalar metric '{name}' for TensorBoard logging (type: {type(value).__name__})"
f"Warning: Skipping metric '{name}' for TensorBoard logging "
f"(unsupported type: {type(value).__name__})"
)
continue

try:
self.writer.add_scalar(name, value, step)
self.writer.add_scalar(name, scalar, step)
except Exception as e:
print(f"Warning: Failed to log metric '{name}' to TensorBoard: {e}")
continue
Expand Down
34 changes: 34 additions & 0 deletions tests/check_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,38 @@ def mean(value, range_start=1, range_end=0, ignore_top_p=0.0):
return statistics.mean(vals)


def median(value, range_start=1, range_end=0):
"""Return the median of values (or a range of values) in a dictionary.

Note:
step, and ranges, are 1 indexed. Range_end is exclusive.
range_end=0 means to include until the last step in the run

Args:
value: Dictionary of step -> value
range_start: Starting step (1-indexed, default=1)
range_end: Ending step (1-indexed, exclusive, 0 means last step)
"""

## find potential offset that might arise from resuming from a checkpoint
max_step_reached = builtins.max([int(s) for s in value.keys()])
## this is the number of steps that occurred prior to resuming
offset = max_step_reached - len(value)

num_elem = len(value)
if range_start < 0:
range_start += num_elem + 1 + offset
if range_end <= 0:
range_end += num_elem + 1 + offset

vals = []
for step, v in value.items():
if range_start <= int(step) and int(step) < range_end:
vals.append(float(v))

return statistics.median(vals)


def evaluate_check(data: dict, check: str) -> tuple[bool, str, object]:
"""Evaluate a check against the data.

Expand All @@ -109,6 +141,7 @@ def evaluate_check(data: dict, check: str) -> tuple[bool, str, object]:
"min": min,
"max": max,
"mean": mean,
"median": median,
"ratio_above": ratio_above,
}

Expand Down Expand Up @@ -152,6 +185,7 @@ def main():
# Use helper functions
python check_metrics.py results.json "min(data['class_f1']) > 0.6"
python check_metrics.py results.json "mean(data['accuracies']) > 0.85"
python check_metrics.py results.json "median(data['accuracies']) > 0.85"
python check_metrics.py results.json "mean(data['loss'], ignore_top_p=0.05) < 1.5"
python check_metrics.py results.json "ratio_above(data['error'], 1.05) < 0.02"
"""
Expand Down
5 changes: 4 additions & 1 deletion tests/test_suites/llm/dapo-qwen2.5-7b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
# Only run metrics if the target step is reached
if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
uv run tests/check_metrics.py $JSON_METRICS \
'mean(data["train/token_mult_prob_error"]) < 1.1' \
'median(data["train/token_mult_prob_error"]) < 1.1' \
'data["train/token_mult_prob_error"]["20"] < 1.05' \
'data["train/reward"]["20"] > -0.45' \
'data["train/filtered_reward"]["20"] > -0.2'

# Clean up checkpoint directory after successful run to save space.
rm -rf "$CKPT_DIR"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ uv run examples/run_distillation_math.py \
distillation.val_period=20 \
logger.log_dir=$LOG_DIR \
logger.wandb_enabled=True \
logger.wandb.project=nemo-rl-distillation \
logger.wandb.project=nemo-rl \
logger.wandb.name=$EXP_NAME \
logger.monitor_gpus=True \
logger.tensorboard_enabled=True \
Expand All @@ -39,4 +39,7 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
'data["train/loss"]["10"] < 0.5' \
'max(data["ray/node.0.gpu.0.mem_gb"]) < 70' \
'mean(data["timing/train/total_step_time"], -6, -1) < 500'

# Clean up checkpoint directory after successful run to save space.
rm -rf "$CKPT_DIR"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ uv run examples/run_distillation_math.py \
distillation.val_period=20 \
logger.log_dir=$LOG_DIR \
logger.wandb_enabled=True \
logger.wandb.project=nemo-rl-distillation \
logger.wandb.project=nemo-rl \
logger.wandb.name=$EXP_NAME \
logger.monitor_gpus=True \
logger.tensorboard_enabled=True \
Expand All @@ -39,4 +39,7 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
'data["train/loss"]["10"] < 0.5' \
'max(data["ray/node.0.gpu.0.mem_gb"]) < 75' \
'mean(data["timing/train/total_step_time"], -6, -1) < 500'

# Clean up checkpoint directory after successful run to save space.
rm -rf "$CKPT_DIR"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ uv run examples/run_distillation_math.py \
distillation.max_num_steps=$MAX_STEPS \
logger.log_dir=$LOG_DIR \
logger.wandb_enabled=True \
logger.wandb.project=nemo-rl-distillation \
logger.wandb.project=nemo-rl \
logger.wandb.name=$EXP_NAME \
logger.monitor_gpus=True \
logger.tensorboard_enabled=True \
Expand All @@ -38,4 +38,7 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
'data["train/loss"]["20"] < 0.3' \
'data["validation/accuracy"]["20"] > 0.1' \
'mean(data["timing/train/total_step_time"], -6, -1) < 1000'

# Clean up checkpoint directory after successful run to save space.
rm -rf "$CKPT_DIR"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ uv run examples/run_distillation_math.py \
distillation.max_num_steps=$MAX_STEPS \
logger.log_dir=$LOG_DIR \
logger.wandb_enabled=True \
logger.wandb.project=nemo-rl-distillation \
logger.wandb.project=nemo-rl \
logger.wandb.name=$EXP_NAME \
logger.monitor_gpus=True \
logger.tensorboard_enabled=True \
Expand All @@ -38,4 +38,7 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
'data["train/loss"]["100"] < 0.25' \
'data["validation/accuracy"]["100"] > 0.2' \
'mean(data["timing/train/total_step_time"], -6, -1) < 1600'

# Clean up checkpoint directory after successful run to save space.
rm -rf "$CKPT_DIR"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ uv run examples/run_distillation_math.py \
distillation.max_num_steps=$MAX_STEPS \
logger.log_dir=$LOG_DIR \
logger.wandb_enabled=True \
logger.wandb.project=nemo-rl-distillation \
logger.wandb.project=nemo-rl \
logger.wandb.name=$EXP_NAME \
logger.monitor_gpus=True \
logger.tensorboard_enabled=True \
Expand All @@ -38,4 +38,7 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
'data["train/loss"]["20"] < 0.3' \
'data["validation/accuracy"]["20"] > 0.1' \
'mean(data["timing/train/total_step_time"], -6, -1) < 1000'

# Clean up checkpoint directory after successful run to save space.
rm -rf "$CKPT_DIR"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ uv run examples/run_distillation_math.py \
distillation.max_num_steps=$MAX_STEPS \
logger.log_dir=$LOG_DIR \
logger.wandb_enabled=True \
logger.wandb.project=nemo-rl-distillation \
logger.wandb.project=nemo-rl \
logger.wandb.name=$EXP_NAME \
logger.monitor_gpus=True \
logger.tensorboard_enabled=True \
Expand All @@ -38,4 +38,7 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
'data["train/loss"]["20"] < 0.3' \
'data["validation/accuracy"]["20"] > 0.1' \
'mean(data["timing/train/total_step_time"], -6, -1) < 1000'

# Clean up checkpoint directory after successful run to save space.
rm -rf "$CKPT_DIR"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,7 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
'data["train/preference_loss"]["1"] < 0.69316' \
'data["train/preference_loss"]["20"] < 0.6' \
'mean(data["timing/train/total_step_time"], -10, -1) < 7.8'

# Clean up checkpoint directory after successful run to save space.
rm -rf "$CKPT_DIR"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,13 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
# Only run metrics if the target step is reached
if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
uv run tests/check_metrics.py $JSON_METRICS \
'data["train/loss"]["1"] < 3.6' \
'data["train/loss"]["1"] < 3.65' \
'data["train/loss"]["150"] < 3.0' \
'data["train/preference_loss"]["1"] > 0.69314' \
'data["train/preference_loss"]["1"] < 0.69316' \
'data["train/preference_loss"]["150"] < 0.4' \
'mean(data["timing/train/total_step_time"], -11, -1) < 24'

# Clean up checkpoint directory after successful run to save space.
rm -rf "$CKPT_DIR"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,13 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
# Only run metrics if the target step is reached
if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
uv run tests/check_metrics.py $JSON_METRICS \
'data["train/loss"]["1"] < 3.6' \
'data["train/loss"]["1"] < 3.65' \
'data["train/loss"]["150"] < 3.0' \
'data["train/preference_loss"]["1"] > 0.69314' \
'data["train/preference_loss"]["1"] < 0.69316' \
'data["train/preference_loss"]["150"] < 0.4' \
'mean(data["timing/train/total_step_time"], -11, -1) < 11.5'

# Clean up checkpoint directory after successful run to save space.
rm -rf "$CKPT_DIR"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,7 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
'data["train/preference_loss"]["1"] < 0.69316' \
'data["train/preference_loss"]["20"] < 0.6' \
'mean(data["timing/train/total_step_time"], -10) < 6.7'

# Clean up checkpoint directory after successful run to save space.
rm -rf "$CKPT_DIR"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,7 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
'data["train/preference_loss"]["1"] > 0.6930' \
'data["train/preference_loss"]["1"] < 0.6932' \
'data["train/preference_loss"]["150"] < 0.68'

# Clean up checkpoint directory after successful run to save space.
rm -rf "$CKPT_DIR"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,7 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
'data["train/loss"]["1"] < 0.69316' \
'data["train/loss"]["150"] < 0.55' \
'mean(data["timing/train/total_step_time"], -11, -1) < 1.3'

# Clean up checkpoint directory after successful run to save space.
rm -rf "$CKPT_DIR"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
# Only run metrics if the target step is reached
if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
uv run tests/check_metrics.py $JSON_METRICS \
'data["train/loss"]["1"] > 0.6990' \
'data["train/loss"]["1"] < 0.6992' \
'data["train/loss"]["1"] > 0.680' \
'data["train/loss"]["1"] < 0.70' \
'data["train/loss"]["100"] < 0.60'
fi

# Clean up checkpoint directory after successful run to save space.
rm -rf "$CKPT_DIR"
fi
3 changes: 3 additions & 0 deletions tests/test_suites/llm/grpo-dapomath17k-dsv3-megatron.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,7 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
uv run tests/check_metrics.py $JSON_METRICS \
'min(data["train/token_mult_prob_error"]) < 1.05' \
'data["train/reward"]["10"] > 0.4'

# Clean up checkpoint directory after successful run to save space.
rm -rf "$CKPT_DIR"
fi
5 changes: 4 additions & 1 deletion tests/test_suites/llm/grpo-deepscaler-1.5b-16K.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
# Only run metrics if the target step is reached
if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
uv run tests/check_metrics.py $JSON_METRICS \
'mean(data["train/token_mult_prob_error"]) < 1.05' \
'median(data["train/token_mult_prob_error"]) < 1.05' \
"data['train/token_mult_prob_error']['$MAX_STEPS'] < 1.05"
fi

Expand All @@ -66,3 +66,6 @@ cat ${RUN_LOG}.aime-16k | grep "score=" | sed 's/.*score=\([^ ]*\).*/{"sco
# 240 step checkpoint 0.3
uv run tests/check_metrics.py ${RUN_LOG}-16k-metric.json \
'data["score"] >= 0.2396'

# Clean up checkpoint directory after successful run to save space.
rm -rf "$CKPT_DIR"
Loading
Loading