From 316d43e8c3a7a7c5c120637b60d9ef0d6d24f098 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Thu, 13 Nov 2025 14:50:18 -0800 Subject: [PATCH 01/39] Miscellaneous infra. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 23 ++++++++++++++++++++--- nemo_gym/config_types.py | 3 ++- nemo_gym/global_config.py | 2 +- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index 5dc554f6..b9c26412 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -54,15 +54,20 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: # pragma: no cover + # https://docs.astral.sh/uv/reference/cli/#uv-venv--seed + pre_install_cmd = "uv pip install setuptools setuptools_scm packaging wheel" + install_cmd = "uv pip install -r requirements.txt" head_server_deps = global_config_dict[HEAD_SERVER_DEPS_KEY_NAME] install_cmd += " " + " ".join(head_server_deps) - return f"""cd {dir_path} \\ - && uv venv --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]} \\ + cmd = f"""cd {dir_path} \\ + && uv venv --seed --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]} \\ && source .venv/bin/activate \\ + && {pre_install_cmd} \\ && {install_cmd} \\ - """ + """ + return cmd def _run_command(command: str, working_directory: Path) -> Popen: # pragma: no cover @@ -228,6 +233,18 @@ def poll(self) -> None: for process_name, process in self._processes.items(): if process.poll() is not None: + proc_out, proc_err = process.communicate() + print(f"DEBUG: Process `{process_name}` finished unexpectedly!") + print(f"DEBUG: Process `{process_name}` stdout:", flush=True) + if isinstance(proc_out, bytes): + print(proc_out.decode("utf-8"), flush=True) + else: + print(proc_out, flush=True) + print(f"DEBUG: Process `{process_name}` stderr:", flush=True) + if isinstance(proc_err, bytes): + print(proc_err.decode("utf-8"), flush=True) + else: + print(proc_err, flush=True) raise RuntimeError(f"Process `{process_name}` finished unexpectedly!") def wait_for_spinup(self) -> None: diff --git a/nemo_gym/config_types.py b/nemo_gym/config_types.py index 550072be..4213f42e 100644 --- a/nemo_gym/config_types.py +++ b/nemo_gym/config_types.py @@ -196,8 +196,8 @@ class DatasetConfig(BaseModel): Literal["MIT"], Literal["Creative Commons Attribution 4.0 International"], Literal["Creative Commons Attribution-ShareAlike 4.0 International"], + Literal["NVIDIA Internal Use Only, Do Not Distribute"], Literal["TBD"], - Literal["MIT"], ] ] = None @@ -224,6 +224,7 @@ class Domain(str, Enum): LONG_CONTEXT = "long_context" SAFETY = "safety" GAMES = "games" + TRANSLATION = "translation" E2E = "e2e" OTHER = "other" diff --git a/nemo_gym/global_config.py b/nemo_gym/global_config.py index 3528f3dd..a57f67fb 100644 --- a/nemo_gym/global_config.py +++ b/nemo_gym/global_config.py @@ -255,7 +255,7 @@ def parse(self, parse_config: Optional[GlobalConfigDictParserConfig] = None) -> # Constrain sensitive package versions global_config_dict[HEAD_SERVER_DEPS_KEY_NAME] = [ # The ray version is very sensitive. The children ray versions must exactly match those of the parent ray. - f"ray=={ray_version}", + f"ray[default]=={ray_version}", # OpenAI version is also sensitive since it changes so often and may introduce subtle incompatibilities. f"openai=={openai_version}", ] From 4ecd8d3b249d99ad2d76e895cbb9f846f828ba6e Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 11:04:25 -0800 Subject: [PATCH 02/39] Remove DEBUG. Comment. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index b9c26412..c64fcd2c 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -54,6 +54,7 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: # pragma: no cover + # For python >= 3.12, uv venv --seed no longer installs setuptools and wheels. # https://docs.astral.sh/uv/reference/cli/#uv-venv--seed pre_install_cmd = "uv pip install setuptools setuptools_scm packaging wheel" @@ -234,13 +235,13 @@ def poll(self) -> None: for process_name, process in self._processes.items(): if process.poll() is not None: proc_out, proc_err = process.communicate() - print(f"DEBUG: Process `{process_name}` finished unexpectedly!") - print(f"DEBUG: Process `{process_name}` stdout:", flush=True) + print(f"Process `{process_name}` finished unexpectedly!") + print(f"Process `{process_name}` stdout:", flush=True) if isinstance(proc_out, bytes): print(proc_out.decode("utf-8"), flush=True) else: print(proc_out, flush=True) - print(f"DEBUG: Process `{process_name}` stderr:", flush=True) + print(f"Process `{process_name}` stderr:", flush=True) if isinstance(proc_err, bytes): print(proc_err.decode("utf-8"), flush=True) else: From 8103dbfe9a3a3fdf3cb41f8b5b34634954f97c91 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 11:14:18 -0800 Subject: [PATCH 03/39] Comment about ray package extra. Signed-off-by: Peter Jin --- nemo_gym/global_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nemo_gym/global_config.py b/nemo_gym/global_config.py index a57f67fb..0136d8e7 100644 --- a/nemo_gym/global_config.py +++ b/nemo_gym/global_config.py @@ -255,6 +255,7 @@ def parse(self, parse_config: Optional[GlobalConfigDictParserConfig] = None) -> # Constrain sensitive package versions global_config_dict[HEAD_SERVER_DEPS_KEY_NAME] = [ # The ray version is very sensitive. The children ray versions must exactly match those of the parent ray. + # The ray extra [default] should also exactly match extra in the top-level Gym pyproject.toml. f"ray[default]=={ray_version}", # OpenAI version is also sensitive since it changes so often and may introduce subtle incompatibilities. f"openai=={openai_version}", From dc493d5e7fdf6cd48b927491b04419bcf5a4243c Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 11:16:30 -0800 Subject: [PATCH 04/39] The. Signed-off-by: Peter Jin --- nemo_gym/global_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo_gym/global_config.py b/nemo_gym/global_config.py index 0136d8e7..b5499903 100644 --- a/nemo_gym/global_config.py +++ b/nemo_gym/global_config.py @@ -255,7 +255,7 @@ def parse(self, parse_config: Optional[GlobalConfigDictParserConfig] = None) -> # Constrain sensitive package versions global_config_dict[HEAD_SERVER_DEPS_KEY_NAME] = [ # The ray version is very sensitive. The children ray versions must exactly match those of the parent ray. - # The ray extra [default] should also exactly match extra in the top-level Gym pyproject.toml. + # The ray extra [default] should also exactly match the extra in the top-level Gym pyproject.toml. f"ray[default]=={ray_version}", # OpenAI version is also sensitive since it changes so often and may introduce subtle incompatibilities. f"openai=={openai_version}", From 9502d8287c7de85b38df8381b58c82d28f3d30ef Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 12:38:08 -0800 Subject: [PATCH 05/39] Fix test (?). Signed-off-by: Peter Jin --- tests/unit_tests/test_global_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit_tests/test_global_config.py b/tests/unit_tests/test_global_config.py index 196da9ec..038c7512 100644 --- a/tests/unit_tests/test_global_config.py +++ b/tests/unit_tests/test_global_config.py @@ -41,7 +41,7 @@ def _mock_versions_for_testing(self, monkeypatch: MonkeyPatch) -> Dict[str, str] monkeypatch.setattr(nemo_gym.global_config, "python_version", python_version_mock) return { - "head_server_deps": ["ray==test ray version", "openai==test openai version"], + "head_server_deps": ["ray[default]==test ray version", "openai==test openai version"], "python_version": "test python version", } From 0475d5ef5efd76523ef2e802d3f0917af4b75288 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 12:40:15 -0800 Subject: [PATCH 06/39] Initial support for server pyproject.toml (WIP). Signed-off-by: Peter Jin --- nemo_gym/cli.py | 47 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index 56b6d2d6..cfd3738d 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -54,27 +54,48 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: # pragma: no cover - # For python >= 3.12, uv venv --seed no longer installs setuptools and wheels. - # https://docs.astral.sh/uv/reference/cli/#uv-venv--seed - pre_install_cmd = "uv pip install setuptools setuptools_scm packaging wheel" + pyproject_toml = False + with open(f"{dir_path}/pyproject.toml", "r") as _f: + pyproject_toml = True - install_cmd = "uv pip install -r requirements.txt" - head_server_deps = global_config_dict[HEAD_SERVER_DEPS_KEY_NAME] - install_cmd += " " + " ".join(head_server_deps) - - cmd = f"""cd {dir_path} \\ - && uv venv --seed --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]} \\ + cmd = f"""uv venv --seed --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]} \\ && source .venv/bin/activate \\ - && {pre_install_cmd} \\ - && {install_cmd} \\ """ + + if pyproject_toml: + cmd += """&& uv pip install --editable . \\ + """ + + else: + # For python >= 3.12, uv venv --seed no longer installs setuptools and wheels. + # https://docs.astral.sh/uv/reference/cli/#uv-venv--seed + pre_install_cmd = "uv pip install setuptools setuptools_scm packaging wheel" + + install_cmd = "uv pip install -r requirements.txt" + head_server_deps = global_config_dict[HEAD_SERVER_DEPS_KEY_NAME] + install_cmd += " " + " ".join(head_server_deps) + + cmd += f"""&& {pre_install_cmd} \\ + && {install_cmd} \\ + """ + return cmd def _run_command(command: str, working_directory: Path) -> Popen: # pragma: no cover custom_env = environ.copy() - custom_env["PYTHONPATH"] = f"{working_directory.absolute()}:{custom_env.get('PYTHONPATH', '')}" - return Popen(command, executable="/bin/bash", shell=True, env=custom_env) + py_path = custom_env.get("PYTHONPATH", None) + if py_path is not None: + custom_env["PYTHONPATH"] = f"{working_directory.absolute()}:{py_path}" + else: + custom_env["PYTHONPATH"] = working_directory.absolute() + return Popen( + command, + executable="/bin/bash", + shell=True, + cwd=working_directory, + env=custom_env, + ) class RunConfig(BaseNeMoGymCLIConfig): From d86756b73bc60e14ef9b6bca98dfb9dbd23270ab Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 12:46:34 -0800 Subject: [PATCH 07/39] Fix pyproject.toml check. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index cfd3738d..085d6171 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -55,8 +55,11 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: # pragma: no cover pyproject_toml = False - with open(f"{dir_path}/pyproject.toml", "r") as _f: - pyproject_toml = True + try: + with open(f"{dir_path}/pyproject.toml", "r") as _f: + pyproject_toml = True + except OSError: + pass cmd = f"""uv venv --seed --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]} \\ && source .venv/bin/activate \\ From 79028a613187dac7201910dd97f469ab64cd9638 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 12:52:20 -0800 Subject: [PATCH 08/39] Working directory Path. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index 085d6171..720e8376 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -56,7 +56,7 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: # pragma: no cover pyproject_toml = False try: - with open(f"{dir_path}/pyproject.toml", "r") as _f: + with open(f"{dir_path / 'pyproject.toml'}", "r") as _f: pyproject_toml = True except OSError: pass @@ -85,18 +85,19 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: return cmd -def _run_command(command: str, working_directory: Path) -> Popen: # pragma: no cover +def _run_command(command: str, working_dir_path: Path) -> Popen: # pragma: no cover + work_dir = f"{working_dir_path.absolute()}" custom_env = environ.copy() py_path = custom_env.get("PYTHONPATH", None) if py_path is not None: - custom_env["PYTHONPATH"] = f"{working_directory.absolute()}:{py_path}" + custom_env["PYTHONPATH"] = f"{work_dir}:{py_path}" else: - custom_env["PYTHONPATH"] = working_directory.absolute() + custom_env["PYTHONPATH"] = work_dir return Popen( command, executable="/bin/bash", shell=True, - cwd=working_directory, + cwd=work_dir, env=custom_env, ) From 7e62b1d1790275f458a58c9b961f3dfd560fe91c Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 14:13:10 -0800 Subject: [PATCH 09/39] Install a server venv from pyproject.toml if available. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 16 +++++----- .../vllm_model/pyproject.toml | 30 +++++++++++++++++++ 2 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 responses_api_models/vllm_model/pyproject.toml diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index 720e8376..0e859824 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -54,6 +54,8 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: # pragma: no cover + head_server_deps = global_config_dict[HEAD_SERVER_DEPS_KEY_NAME] + pyproject_toml = False try: with open(f"{dir_path / 'pyproject.toml'}", "r") as _f: @@ -61,12 +63,11 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: except OSError: pass - cmd = f"""uv venv --seed --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]} \\ - && source .venv/bin/activate \\ - """ - if pyproject_toml: - cmd += """&& uv pip install --editable . \\ + cmd = f"""uv venv --seed --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]} \\ + && source .venv/bin/activate \\ + && uv pip install {' '.join(head_server_deps)} \\ + && uv pip install --editable . \\ """ else: @@ -75,10 +76,11 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: pre_install_cmd = "uv pip install setuptools setuptools_scm packaging wheel" install_cmd = "uv pip install -r requirements.txt" - head_server_deps = global_config_dict[HEAD_SERVER_DEPS_KEY_NAME] install_cmd += " " + " ".join(head_server_deps) - cmd += f"""&& {pre_install_cmd} \\ + cmd = f"""uv venv --seed --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]} \\ + && source .venv/bin/activate \\ + && {pre_install_cmd} \\ && {install_cmd} \\ """ diff --git a/responses_api_models/vllm_model/pyproject.toml b/responses_api_models/vllm_model/pyproject.toml new file mode 100644 index 00000000..86ef4fc0 --- /dev/null +++ b/responses_api_models/vllm_model/pyproject.toml @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[project] +name = "vllm_model" +version = "0.2.0rc0" +requires-python = ">=3.12" +dependencies = [ + "nemo-gym[dev]", + "vllm", +] + +[tool.uv.sources] +nemo-gym = { path = "../.." } + +[build-system] +requires = ["setuptools>=61", "setuptools-scm"] +build-backend = "setuptools.build_meta" From 36efb946517b7c526056fa7f15f0898a1f633fe4 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 14:15:50 -0800 Subject: [PATCH 10/39] Deprecated vllm_model requirements.txt. Signed-off-by: Peter Jin --- responses_api_models/vllm_model/requirements.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 responses_api_models/vllm_model/requirements.txt diff --git a/responses_api_models/vllm_model/requirements.txt b/responses_api_models/vllm_model/requirements.txt deleted file mode 100644 index 00ed8321..00000000 --- a/responses_api_models/vllm_model/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --e nemo-gym[dev] @ ../../ From 8d49b953fe04d4199f6dd3dbde959d6c09da1ff0 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 14:16:52 -0800 Subject: [PATCH 11/39] Consistently use dashes in package names. Signed-off-by: Peter Jin --- responses_api_models/vllm_model/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/responses_api_models/vllm_model/pyproject.toml b/responses_api_models/vllm_model/pyproject.toml index 86ef4fc0..bedc9703 100644 --- a/responses_api_models/vllm_model/pyproject.toml +++ b/responses_api_models/vllm_model/pyproject.toml @@ -14,7 +14,7 @@ # limitations under the License. [project] -name = "vllm_model" +name = "vllm-model" version = "0.2.0rc0" requires-python = ">=3.12" dependencies = [ From 6fb0a952d017b53dafc5661c1a89bc4ef5153a0a Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 14:18:52 -0800 Subject: [PATCH 12/39] Lint. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index 0e859824..fa2a53a4 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -66,7 +66,7 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: if pyproject_toml: cmd = f"""uv venv --seed --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]} \\ && source .venv/bin/activate \\ - && uv pip install {' '.join(head_server_deps)} \\ + && uv pip install {" ".join(head_server_deps)} \\ && uv pip install --editable . \\ """ From 7231efa1a6d00dd3695a3663c587ac330f912abf Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 14:25:26 -0800 Subject: [PATCH 13/39] Cleanup. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index fa2a53a4..f2052173 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -56,6 +56,8 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: # pragma: no cover head_server_deps = global_config_dict[HEAD_SERVER_DEPS_KEY_NAME] + uv_venv_cmd = f"uv venv --seed --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]}" + pyproject_toml = False try: with open(f"{dir_path / 'pyproject.toml'}", "r") as _f: @@ -64,7 +66,7 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: pass if pyproject_toml: - cmd = f"""uv venv --seed --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]} \\ + cmd = f"""{uv_venv_cmd} \\ && source .venv/bin/activate \\ && uv pip install {" ".join(head_server_deps)} \\ && uv pip install --editable . \\ @@ -78,7 +80,7 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: install_cmd = "uv pip install -r requirements.txt" install_cmd += " " + " ".join(head_server_deps) - cmd = f"""uv venv --seed --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]} \\ + cmd = f"""{uv_venv_cmd} \\ && source .venv/bin/activate \\ && {pre_install_cmd} \\ && {install_cmd} \\ From 8fc0d9d544cec3cfb0cfb5d6001c9db8a5bfa238 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 15:33:12 -0800 Subject: [PATCH 14/39] VLLM server spinup. Signed-off-by: Peter Jin --- responses_api_models/vllm_model/app.py | 49 ++++++++++++++++++- .../vllm_model/pyproject.toml | 3 ++ .../vllm_model/vllm_model/__init__.py | 0 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 responses_api_models/vllm_model/vllm_model/__init__.py diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index b9a61f99..192427b1 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -13,8 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. import re +from multiprocessing import Process from time import time -from typing import ClassVar, Dict, List, Optional, Tuple, Union +from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union from uuid import uuid4 from aiohttp.client_exceptions import ClientResponseError @@ -66,16 +67,62 @@ class VLLMModelConfig(BaseResponsesAPIModelConfig): uses_reasoning_parser: bool replace_developer_role_with_system: bool = False + spinup_server: bool = False + server_args: Optional[Dict[str, Any]] = None + def model_post_init(self, context): if isinstance(self.base_url, str): self.base_url = [self.base_url] return super().model_post_init(context) +def _spinup_vllm_server(config: VLLMModelConfig) -> None: + import sys + + import uvloop + import vllm.engine.arg_utils + import vllm.entrypoints.openai.api_server + import vllm.entrypoints.openai.cli_args + import vllm.utils + + sys.argv = sys.argv[:1] + sys.argv.append("--model") + sys.argv.append(config.model) + if config.server_args: + for k, v in config.server_args.items(): + if isinstance(v, bool): + if not v: + arg_key = f"--no-{k.replace('_', '-')}" + else: + arg_key = f"--{k.replace('_', '-')}" + sys.argv.append(arg_key) + else: + arg_key = f"--{k.replace('_', '-')}" + sys.argv.append(arg_key) + sys.argv.append(f"{v}") + + server_args = vllm.utils.FlexibleArgumentParser() + server_args = vllm.entrypoints.openai.cli_args.make_arg_parser(server_args) + server_args = server_args.parse_args() + vllm.entrypoints.openai.cli_args.validate_parsed_serve_args(server_args) + + uvloop.run(vllm.entrypoints.openai.api_server.run_server(server_args)) + + class VLLMModel(SimpleResponsesAPIModel): config: VLLMModelConfig def model_post_init(self, context): + self._vllm_proc = None + if self.config.spinup_server: + vllm_proc = Process( + target=_spinup_vllm_server, + args=(self.config,), + daemon=False, + ) + vllm_proc.start() + self._vllm_proc = vllm_proc + self._clients = [ NeMoGymAsyncOpenAI( base_url=base_url, diff --git a/responses_api_models/vllm_model/pyproject.toml b/responses_api_models/vllm_model/pyproject.toml index bedc9703..982d6f41 100644 --- a/responses_api_models/vllm_model/pyproject.toml +++ b/responses_api_models/vllm_model/pyproject.toml @@ -25,6 +25,9 @@ dependencies = [ [tool.uv.sources] nemo-gym = { path = "../.." } +[tool.setuptools.packages.find] +include = ["vllm_model"] + [build-system] requires = ["setuptools>=61", "setuptools-scm"] build-backend = "setuptools.build_meta" diff --git a/responses_api_models/vllm_model/vllm_model/__init__.py b/responses_api_models/vllm_model/vllm_model/__init__.py new file mode 100644 index 00000000..e69de29b From 8975e9840746ef6a656ad5973b978574da15f506 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 15:54:49 -0800 Subject: [PATCH 15/39] VLLM server host and port. Signed-off-by: Peter Jin --- responses_api_models/vllm_model/app.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index 192427b1..cecab0ba 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -27,6 +27,7 @@ Body, SimpleResponsesAPIModel, ) +from nemo_gym.global_config import find_open_port from nemo_gym.openai_utils import ( RESPONSES_TO_TRAIN, NeMoGymAsyncOpenAI, @@ -85,9 +86,16 @@ def _spinup_vllm_server(config: VLLMModelConfig) -> None: import vllm.entrypoints.openai.cli_args import vllm.utils + server_host = "127.0.0.1" + server_port = find_open_port() + sys.argv = sys.argv[:1] sys.argv.append("--model") sys.argv.append(config.model) + sys.argv.append("--host") + sys.argv.append(server_host) + sys.argv.append("--port") + sys.argv.append(server_port) if config.server_args: for k, v in config.server_args.items(): if isinstance(v, bool): From 51ba6fccfb634af431eff7d6ecdcedea420d1207 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 16:00:50 -0800 Subject: [PATCH 16/39] Allocate the free port for VLLM in the model server process. Signed-off-by: Peter Jin --- responses_api_models/vllm_model/app.py | 41 ++++++++++++++++---------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index cecab0ba..2b84bce0 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -77,7 +77,7 @@ def model_post_init(self, context): return super().model_post_init(context) -def _spinup_vllm_server(config: VLLMModelConfig) -> None: +def _spinup_vllm_server(config: VLLMModelConfig, server_host, server_port) -> None: import sys import uvloop @@ -86,9 +86,6 @@ def _spinup_vllm_server(config: VLLMModelConfig) -> None: import vllm.entrypoints.openai.cli_args import vllm.utils - server_host = "127.0.0.1" - server_port = find_open_port() - sys.argv = sys.argv[:1] sys.argv.append("--model") sys.argv.append(config.model) @@ -121,23 +118,35 @@ class VLLMModel(SimpleResponsesAPIModel): config: VLLMModelConfig def model_post_init(self, context): - self._vllm_proc = None + self._server_proc = None if self.config.spinup_server: - vllm_proc = Process( + server_host = "127.0.0.1" + server_port = f"{find_open_port()}" + + server_proc = Process( target=_spinup_vllm_server, - args=(self.config,), + args=(self.config, server_host, server_port), daemon=False, ) - vllm_proc.start() - self._vllm_proc = vllm_proc + server_proc.start() - self._clients = [ - NeMoGymAsyncOpenAI( - base_url=base_url, - api_key=self.config.api_key, - ) - for base_url in self.config.base_url - ] + self._server_proc = server_proc + self._clients = [ + NeMoGymAsyncOpenAI( + base_url=f"http://{server_host}:{server_port}/v1", + api_key=self.config.api_key, + ) + ] + + else: + self._server_proc = None + self._clients = [ + NeMoGymAsyncOpenAI( + base_url=base_url, + api_key=self.config.api_key, + ) + for base_url in self.config.base_url + ] self._session_id_to_client: Dict[str, NeMoGymAsyncOpenAI] = dict() From aa97796614b174c2d8ad4d2c9ad5edf1585c6106 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 16:02:24 -0800 Subject: [PATCH 17/39] Type. Signed-off-by: Peter Jin --- responses_api_models/vllm_model/app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index 2b84bce0..1f316b30 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -77,7 +77,7 @@ def model_post_init(self, context): return super().model_post_init(context) -def _spinup_vllm_server(config: VLLMModelConfig, server_host, server_port) -> None: +def _spinup_vllm_server(config: VLLMModelConfig, server_host: str, server_port: int) -> None: import sys import uvloop @@ -92,7 +92,7 @@ def _spinup_vllm_server(config: VLLMModelConfig, server_host, server_port) -> No sys.argv.append("--host") sys.argv.append(server_host) sys.argv.append("--port") - sys.argv.append(server_port) + sys.argv.append(f"{server_port}") if config.server_args: for k, v in config.server_args.items(): if isinstance(v, bool): @@ -121,7 +121,7 @@ def model_post_init(self, context): self._server_proc = None if self.config.spinup_server: server_host = "127.0.0.1" - server_port = f"{find_open_port()}" + server_port = find_open_port() server_proc = Process( target=_spinup_vllm_server, From 6ec9325a1f408269a321a462ae8dc16412a730f5 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 16:07:51 -0800 Subject: [PATCH 18/39] Fix for pyproject.toml (this works lol). Signed-off-by: Peter Jin --- responses_api_models/vllm_model/pyproject.toml | 1 + responses_api_models/vllm_model/vllm_model/__init__.py | 0 2 files changed, 1 insertion(+) delete mode 100644 responses_api_models/vllm_model/vllm_model/__init__.py diff --git a/responses_api_models/vllm_model/pyproject.toml b/responses_api_models/vllm_model/pyproject.toml index 982d6f41..19da05fb 100644 --- a/responses_api_models/vllm_model/pyproject.toml +++ b/responses_api_models/vllm_model/pyproject.toml @@ -26,6 +26,7 @@ dependencies = [ nemo-gym = { path = "../.." } [tool.setuptools.packages.find] +where = [".."] include = ["vllm_model"] [build-system] diff --git a/responses_api_models/vllm_model/vllm_model/__init__.py b/responses_api_models/vllm_model/vllm_model/__init__.py deleted file mode 100644 index e69de29b..00000000 From 33ec3f9b61f93e969b292d4d9071a63ca637966d Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 16:38:57 -0800 Subject: [PATCH 19/39] VLLM server "routing" (just re-using the existing multiple clients). Signed-off-by: Peter Jin --- responses_api_models/vllm_model/app.py | 69 +++++++++++++++++++------- 1 file changed, 51 insertions(+), 18 deletions(-) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index 1f316b30..5de792a8 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -71,13 +71,20 @@ class VLLMModelConfig(BaseResponsesAPIModelConfig): spinup_server: bool = False server_args: Optional[Dict[str, Any]] = None + enable_router: bool = False + router_backend: str = "ray" + router_dp_size: Optional[int] = 1 + def model_post_init(self, context): if isinstance(self.base_url, str): self.base_url = [self.base_url] return super().model_post_init(context) -def _spinup_vllm_server(config: VLLMModelConfig, server_host: str, server_port: int) -> None: +def _spinup_vllm_server( + config: VLLMModelConfig, server_host: str, server_port: int, router_dp_rank: Optional[int] +) -> None: + import os import sys import uvloop @@ -93,6 +100,11 @@ def _spinup_vllm_server(config: VLLMModelConfig, server_host: str, server_port: sys.argv.append(server_host) sys.argv.append("--port") sys.argv.append(f"{server_port}") + sys.argv.append("--distributed-executor-backend") + if config.enable_router: + sys.argv.append(config.router_backend) + else: + sys.argv.append("mp") if config.server_args: for k, v in config.server_args.items(): if isinstance(v, bool): @@ -106,6 +118,14 @@ def _spinup_vllm_server(config: VLLMModelConfig, server_host: str, server_port: sys.argv.append(arg_key) sys.argv.append(f"{v}") + if config.enable_router and config.router_backend == "mp": + tp_size = (config.server_args or {}).get("tensor_parallel_size", 1) + tp_start = router_dp_rank * tp_size + tp_ranks = [] + for tp_rank_offset in range(tp_size): + tp_ranks.append(tp_start + tp_rank_offset) + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([f"{r}" for r in tp_ranks]) + server_args = vllm.utils.FlexibleArgumentParser() server_args = vllm.entrypoints.openai.cli_args.make_arg_parser(server_args) server_args = server_args.parse_args() @@ -118,28 +138,41 @@ class VLLMModel(SimpleResponsesAPIModel): config: VLLMModelConfig def model_post_init(self, context): - self._server_proc = None if self.config.spinup_server: - server_host = "127.0.0.1" - server_port = find_open_port() - - server_proc = Process( - target=_spinup_vllm_server, - args=(self.config, server_host, server_port), - daemon=False, - ) - server_proc.start() + self._server_procs = [] + self._clients = [] + + router_dp_size = 1 + if self.config.enable_router: + router_dp_size = max(1, self.config.router_dp_size) + + for router_dp_rank in range(router_dp_size): + # FIXME: this server host is wrong for multi-node via ray. + server_host = "127.0.0.1" + server_port = find_open_port() + + server_proc = Process( + target=_spinup_vllm_server, + args=( + self.config, + server_host, + server_port, + router_dp_rank if self.config.enable_router else None, + ), + daemon=False, + ) + server_proc.start() - self._server_proc = server_proc - self._clients = [ - NeMoGymAsyncOpenAI( - base_url=f"http://{server_host}:{server_port}/v1", - api_key=self.config.api_key, + self._server_procs.append(server_proc) + self._clients.append( + NeMoGymAsyncOpenAI( + base_url=f"http://{server_host}:{server_port}/v1", + api_key=self.config.api_key, + ) ) - ] else: - self._server_proc = None + self._server_procs = None self._clients = [ NeMoGymAsyncOpenAI( base_url=base_url, From 77cda8519833dd9201e04e7f577a387abaa65de6 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 16:43:12 -0800 Subject: [PATCH 20/39] Better order. Signed-off-by: Peter Jin --- responses_api_models/vllm_model/pyproject.toml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/responses_api_models/vllm_model/pyproject.toml b/responses_api_models/vllm_model/pyproject.toml index 19da05fb..29407a10 100644 --- a/responses_api_models/vllm_model/pyproject.toml +++ b/responses_api_models/vllm_model/pyproject.toml @@ -22,13 +22,13 @@ dependencies = [ "vllm", ] -[tool.uv.sources] -nemo-gym = { path = "../.." } +[build-system] +requires = ["setuptools>=61", "setuptools-scm"] +build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] where = [".."] include = ["vllm_model"] -[build-system] -requires = ["setuptools>=61", "setuptools-scm"] -build-backend = "setuptools.build_meta" +[tool.uv.sources] +nemo-gym = { path = "../.." } From 7201c8f84e63974ae600c12bf0f44975921ff978 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 17:55:56 -0800 Subject: [PATCH 21/39] Comment. Signed-off-by: Peter Jin --- responses_api_models/vllm_model/app.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index 5de792a8..10b17341 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -72,6 +72,8 @@ class VLLMModelConfig(BaseResponsesAPIModelConfig): server_args: Optional[Dict[str, Any]] = None enable_router: bool = False + # router_backend values should be one of "ray" or "mp" (matching the allowed + # values of VLLM --distributed-executor-backend). router_backend: str = "ray" router_dp_size: Optional[int] = 1 From 834d9b9d6a0f4c0c3cb772b30d63e83b64ef18dc Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 17:58:32 -0800 Subject: [PATCH 22/39] Default to "mp" backend. Signed-off-by: Peter Jin --- responses_api_models/vllm_model/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index 10b17341..aa1149e6 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -74,7 +74,7 @@ class VLLMModelConfig(BaseResponsesAPIModelConfig): enable_router: bool = False # router_backend values should be one of "ray" or "mp" (matching the allowed # values of VLLM --distributed-executor-backend). - router_backend: str = "ray" + router_backend: str = "mp" router_dp_size: Optional[int] = 1 def model_post_init(self, context): From 5ee8b5739e12266bc701cf373c2fff99d7bf9ac5 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 18:06:58 -0800 Subject: [PATCH 23/39] Cleanup. Signed-off-by: Peter Jin --- responses_api_models/vllm_model/app.py | 29 +++++++++++++------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index aa1149e6..8464abb9 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -87,7 +87,6 @@ def _spinup_vllm_server( config: VLLMModelConfig, server_host: str, server_port: int, router_dp_rank: Optional[int] ) -> None: import os - import sys import uvloop import vllm.engine.arg_utils @@ -95,18 +94,18 @@ def _spinup_vllm_server( import vllm.entrypoints.openai.cli_args import vllm.utils - sys.argv = sys.argv[:1] - sys.argv.append("--model") - sys.argv.append(config.model) - sys.argv.append("--host") - sys.argv.append(server_host) - sys.argv.append("--port") - sys.argv.append(f"{server_port}") - sys.argv.append("--distributed-executor-backend") + argv = [] + argv.append("--model") + argv.append(config.model) + argv.append("--host") + argv.append(server_host) + argv.append("--port") + argv.append(f"{server_port}") + argv.append("--distributed-executor-backend") if config.enable_router: - sys.argv.append(config.router_backend) + argv.append(config.router_backend) else: - sys.argv.append("mp") + argv.append("mp") if config.server_args: for k, v in config.server_args.items(): if isinstance(v, bool): @@ -114,11 +113,11 @@ def _spinup_vllm_server( arg_key = f"--no-{k.replace('_', '-')}" else: arg_key = f"--{k.replace('_', '-')}" - sys.argv.append(arg_key) + argv.append(arg_key) else: arg_key = f"--{k.replace('_', '-')}" - sys.argv.append(arg_key) - sys.argv.append(f"{v}") + argv.append(arg_key) + argv.append(f"{v}") if config.enable_router and config.router_backend == "mp": tp_size = (config.server_args or {}).get("tensor_parallel_size", 1) @@ -130,7 +129,7 @@ def _spinup_vllm_server( server_args = vllm.utils.FlexibleArgumentParser() server_args = vllm.entrypoints.openai.cli_args.make_arg_parser(server_args) - server_args = server_args.parse_args() + server_args = server_args.parse_args(argv) vllm.entrypoints.openai.cli_args.validate_parsed_serve_args(server_args) uvloop.run(vllm.entrypoints.openai.api_server.run_server(server_args)) From 10b529582144044e53e56213f14c61e7fc446917 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 18:08:05 -0800 Subject: [PATCH 24/39] Cleanup. Signed-off-by: Peter Jin --- responses_api_models/vllm_model/app.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index 8464abb9..f06b7860 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -106,18 +106,17 @@ def _spinup_vllm_server( argv.append(config.router_backend) else: argv.append("mp") - if config.server_args: - for k, v in config.server_args.items(): - if isinstance(v, bool): - if not v: - arg_key = f"--no-{k.replace('_', '-')}" - else: - arg_key = f"--{k.replace('_', '-')}" - argv.append(arg_key) + for k, v in (config.server_args or {}).items(): + if isinstance(v, bool): + if not v: + arg_key = f"--no-{k.replace('_', '-')}" else: arg_key = f"--{k.replace('_', '-')}" - argv.append(arg_key) - argv.append(f"{v}") + argv.append(arg_key) + else: + arg_key = f"--{k.replace('_', '-')}" + argv.append(arg_key) + argv.append(f"{v}") if config.enable_router and config.router_backend == "mp": tp_size = (config.server_args or {}).get("tensor_parallel_size", 1) From e4c55736c2dfdfe67cd9fe9435bd554682371adb Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 20:19:10 -0800 Subject: [PATCH 25/39] Non-async VLLM server heartbeat to avoid early asyncio event loop. Signed-off-by: Peter Jin --- responses_api_models/vllm_model/app.py | 56 ++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index f06b7860..78a52899 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -12,9 +12,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import json import re +import urllib from multiprocessing import Process -from time import time +from time import sleep, time from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union from uuid import uuid4 @@ -75,7 +77,7 @@ class VLLMModelConfig(BaseResponsesAPIModelConfig): # router_backend values should be one of "ray" or "mp" (matching the allowed # values of VLLM --distributed-executor-backend). router_backend: str = "mp" - router_dp_size: Optional[int] = 1 + router_dp_size: int = 1 def model_post_init(self, context): if isinstance(self.base_url, str): @@ -134,11 +136,47 @@ def _spinup_vllm_server( uvloop.run(vllm.entrypoints.openai.api_server.run_server(server_args)) +# Use this to query the VLLM servers during spinup without having to start an +# asyncio event loop for the async client. +def _vllm_server_heartbeat(base_url: str): + req_headers = { + "Content-Type": "application/json", + "Accept": "application/json", + } + req_body = { + "messages": [ + { + "role": "user", + "content": "hi", + } + ], + "max_tokens": 8, + "temperature": 1.0, + } + req_data = json.dumps(req_body).encode("utf-8") + req_url = f"{base_url}/chat/completions" + req = urllib.request.Request( + req_url, + headers=req_headers, + data=req_data, + ) + with urllib.request.urlopen(req, timeout=5) as out: + out_status = out.status + out_data = out.read() + output = out_data.decode("utf-8") + return { + "_status": out_status, + "output": output, + "except": None, + } + + class VLLMModel(SimpleResponsesAPIModel): config: VLLMModelConfig def model_post_init(self, context): if self.config.spinup_server: + self._server_urls = [] self._server_procs = [] self._clients = [] @@ -150,6 +188,7 @@ def model_post_init(self, context): # FIXME: this server host is wrong for multi-node via ray. server_host = "127.0.0.1" server_port = find_open_port() + server_url = f"http://{server_host}:{server_port}/v1" server_proc = Process( target=_spinup_vllm_server, @@ -163,15 +202,26 @@ def model_post_init(self, context): ) server_proc.start() + self._server_urls.append(server_url) self._server_procs.append(server_proc) self._clients.append( NeMoGymAsyncOpenAI( - base_url=f"http://{server_host}:{server_port}/v1", + base_url=server_url, api_key=self.config.api_key, ) ) + for server_url in self._server_urls: + while True: + try: + _vllm_server_heartbeat(server_url) + break + except Exception: + sleep(5) + continue + else: + self._server_urls = None self._server_procs = None self._clients = [ NeMoGymAsyncOpenAI( From 0a8da20c6743e1ddd4015cf99878e9764a9278ee Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sat, 15 Nov 2025 20:29:47 -0800 Subject: [PATCH 26/39] With pyproject.toml, no pre-install command needed. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index f2052173..8508de0b 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -73,16 +73,11 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: """ else: - # For python >= 3.12, uv venv --seed no longer installs setuptools and wheels. - # https://docs.astral.sh/uv/reference/cli/#uv-venv--seed - pre_install_cmd = "uv pip install setuptools setuptools_scm packaging wheel" - install_cmd = "uv pip install -r requirements.txt" install_cmd += " " + " ".join(head_server_deps) cmd = f"""{uv_venv_cmd} \\ && source .venv/bin/activate \\ - && {pre_install_cmd} \\ && {install_cmd} \\ """ From ad0e2fcf0f7761abf0c097d677c8222c06165a9c Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Sun, 16 Nov 2025 19:24:17 -0800 Subject: [PATCH 27/39] Improved server venv pyproject install that does not use editable. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index 8508de0b..59cb565c 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -68,8 +68,7 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: if pyproject_toml: cmd = f"""{uv_venv_cmd} \\ && source .venv/bin/activate \\ - && uv pip install {" ".join(head_server_deps)} \\ - && uv pip install --editable . \\ + && uv pip install --refresh . '../..[dev]' {" ".join(head_server_deps)} \\ """ else: From 0436b473d1abc3ef97391f2b0b1f5985ec067c5a Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Mon, 17 Nov 2025 10:22:31 -0800 Subject: [PATCH 28/39] Packaging and setup. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 3 ++- responses_api_models/vllm_model/pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index 59cb565c..8508de0b 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -68,7 +68,8 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: if pyproject_toml: cmd = f"""{uv_venv_cmd} \\ && source .venv/bin/activate \\ - && uv pip install --refresh . '../..[dev]' {" ".join(head_server_deps)} \\ + && uv pip install {" ".join(head_server_deps)} \\ + && uv pip install --editable . \\ """ else: diff --git a/responses_api_models/vllm_model/pyproject.toml b/responses_api_models/vllm_model/pyproject.toml index 29407a10..53f6af50 100644 --- a/responses_api_models/vllm_model/pyproject.toml +++ b/responses_api_models/vllm_model/pyproject.toml @@ -31,4 +31,4 @@ where = [".."] include = ["vllm_model"] [tool.uv.sources] -nemo-gym = { path = "../.." } +nemo-gym = { path = "../..", editable = true } From 854609f585eb28aa94ae1911b9146c15979ed03a Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Mon, 17 Nov 2025 13:59:30 -0800 Subject: [PATCH 29/39] Revert VLLMModel changes (moving to PR #318). Signed-off-by: Peter Jin --- responses_api_models/vllm_model/app.py | 165 ++----------------------- 1 file changed, 9 insertions(+), 156 deletions(-) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index 78a52899..b9a61f99 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -12,12 +12,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import json import re -import urllib -from multiprocessing import Process -from time import sleep, time -from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union +from time import time +from typing import ClassVar, Dict, List, Optional, Tuple, Union from uuid import uuid4 from aiohttp.client_exceptions import ClientResponseError @@ -29,7 +26,6 @@ Body, SimpleResponsesAPIModel, ) -from nemo_gym.global_config import find_open_port from nemo_gym.openai_utils import ( RESPONSES_TO_TRAIN, NeMoGymAsyncOpenAI, @@ -70,166 +66,23 @@ class VLLMModelConfig(BaseResponsesAPIModelConfig): uses_reasoning_parser: bool replace_developer_role_with_system: bool = False - spinup_server: bool = False - server_args: Optional[Dict[str, Any]] = None - - enable_router: bool = False - # router_backend values should be one of "ray" or "mp" (matching the allowed - # values of VLLM --distributed-executor-backend). - router_backend: str = "mp" - router_dp_size: int = 1 - def model_post_init(self, context): if isinstance(self.base_url, str): self.base_url = [self.base_url] return super().model_post_init(context) -def _spinup_vllm_server( - config: VLLMModelConfig, server_host: str, server_port: int, router_dp_rank: Optional[int] -) -> None: - import os - - import uvloop - import vllm.engine.arg_utils - import vllm.entrypoints.openai.api_server - import vllm.entrypoints.openai.cli_args - import vllm.utils - - argv = [] - argv.append("--model") - argv.append(config.model) - argv.append("--host") - argv.append(server_host) - argv.append("--port") - argv.append(f"{server_port}") - argv.append("--distributed-executor-backend") - if config.enable_router: - argv.append(config.router_backend) - else: - argv.append("mp") - for k, v in (config.server_args or {}).items(): - if isinstance(v, bool): - if not v: - arg_key = f"--no-{k.replace('_', '-')}" - else: - arg_key = f"--{k.replace('_', '-')}" - argv.append(arg_key) - else: - arg_key = f"--{k.replace('_', '-')}" - argv.append(arg_key) - argv.append(f"{v}") - - if config.enable_router and config.router_backend == "mp": - tp_size = (config.server_args or {}).get("tensor_parallel_size", 1) - tp_start = router_dp_rank * tp_size - tp_ranks = [] - for tp_rank_offset in range(tp_size): - tp_ranks.append(tp_start + tp_rank_offset) - os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([f"{r}" for r in tp_ranks]) - - server_args = vllm.utils.FlexibleArgumentParser() - server_args = vllm.entrypoints.openai.cli_args.make_arg_parser(server_args) - server_args = server_args.parse_args(argv) - vllm.entrypoints.openai.cli_args.validate_parsed_serve_args(server_args) - - uvloop.run(vllm.entrypoints.openai.api_server.run_server(server_args)) - - -# Use this to query the VLLM servers during spinup without having to start an -# asyncio event loop for the async client. -def _vllm_server_heartbeat(base_url: str): - req_headers = { - "Content-Type": "application/json", - "Accept": "application/json", - } - req_body = { - "messages": [ - { - "role": "user", - "content": "hi", - } - ], - "max_tokens": 8, - "temperature": 1.0, - } - req_data = json.dumps(req_body).encode("utf-8") - req_url = f"{base_url}/chat/completions" - req = urllib.request.Request( - req_url, - headers=req_headers, - data=req_data, - ) - with urllib.request.urlopen(req, timeout=5) as out: - out_status = out.status - out_data = out.read() - output = out_data.decode("utf-8") - return { - "_status": out_status, - "output": output, - "except": None, - } - - class VLLMModel(SimpleResponsesAPIModel): config: VLLMModelConfig def model_post_init(self, context): - if self.config.spinup_server: - self._server_urls = [] - self._server_procs = [] - self._clients = [] - - router_dp_size = 1 - if self.config.enable_router: - router_dp_size = max(1, self.config.router_dp_size) - - for router_dp_rank in range(router_dp_size): - # FIXME: this server host is wrong for multi-node via ray. - server_host = "127.0.0.1" - server_port = find_open_port() - server_url = f"http://{server_host}:{server_port}/v1" - - server_proc = Process( - target=_spinup_vllm_server, - args=( - self.config, - server_host, - server_port, - router_dp_rank if self.config.enable_router else None, - ), - daemon=False, - ) - server_proc.start() - - self._server_urls.append(server_url) - self._server_procs.append(server_proc) - self._clients.append( - NeMoGymAsyncOpenAI( - base_url=server_url, - api_key=self.config.api_key, - ) - ) - - for server_url in self._server_urls: - while True: - try: - _vllm_server_heartbeat(server_url) - break - except Exception: - sleep(5) - continue - - else: - self._server_urls = None - self._server_procs = None - self._clients = [ - NeMoGymAsyncOpenAI( - base_url=base_url, - api_key=self.config.api_key, - ) - for base_url in self.config.base_url - ] + self._clients = [ + NeMoGymAsyncOpenAI( + base_url=base_url, + api_key=self.config.api_key, + ) + for base_url in self.config.base_url + ] self._session_id_to_client: Dict[str, NeMoGymAsyncOpenAI] = dict() From dc6ffef014b72217e5c3bb0ba4a147cee6914b03 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Mon, 17 Nov 2025 16:17:42 -0800 Subject: [PATCH 30/39] One line uv pip install. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index 8508de0b..2e968dd4 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -68,8 +68,7 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: if pyproject_toml: cmd = f"""{uv_venv_cmd} \\ && source .venv/bin/activate \\ - && uv pip install {" ".join(head_server_deps)} \\ - && uv pip install --editable . \\ + && uv pip install '-e .' {" ".join(head_server_deps)} \\ """ else: From e8afd2de5b1077b9c5537621cf66f42fce4d9a78 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Thu, 20 Nov 2025 09:40:38 -0800 Subject: [PATCH 31/39] Print the names of servers yet to have finished spinning up. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index 2e968dd4..56638222 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -24,7 +24,7 @@ from subprocess import Popen from threading import Thread from time import sleep -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Tuple import rich import uvicorn @@ -279,11 +279,17 @@ def wait_for_spinup(self) -> None: self.poll() statuses = self.check_http_server_statuses() - num_spun_up = statuses.count("success") + num_spun_up = 0 + waiting = [] + for name, status in statuses: + if status == "success": + num_spun_up += 1 + else: + waiting.append(name) if len(statuses) != num_spun_up: print( f"""{num_spun_up} / {len(statuses)} servers ready ({statuses.count("timeout")} timed out, {statuses.count("connection_error")} connection errored, {statuses.count("unknown_error")} had unknown errors). -Waiting for servers to spin up. Sleeping {sleep_interval}s...""" +Waiting for servers {waiting} to spin up. Sleeping {sleep_interval}s...""" ) else: print(f"All {num_spun_up} / {len(statuses)} servers ready! Polling every 60s") @@ -325,7 +331,7 @@ async def sleep(): finally: self.shutdown() - def check_http_server_statuses(self) -> List[ServerStatus]: + def check_http_server_statuses(self) -> List[Tuple[str, ServerStatus]]: print( "Checking for HTTP server statuses (you should see some HTTP requests to `/` that may 404. This is expected.)" ) @@ -333,7 +339,7 @@ def check_http_server_statuses(self) -> List[ServerStatus]: for server_instance_display_config in self._server_instance_display_configs: name = server_instance_display_config.config_path status = self._server_client.poll_for_status(name) - statuses.append(status) + statuses.append((name, status)) return statuses From 014278439c44aae91c5791407fdf0b183c0a6949 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Thu, 20 Nov 2025 10:43:36 -0800 Subject: [PATCH 32/39] Formatting. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index 56638222..f4499572 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -289,7 +289,8 @@ def wait_for_spinup(self) -> None: if len(statuses) != num_spun_up: print( f"""{num_spun_up} / {len(statuses)} servers ready ({statuses.count("timeout")} timed out, {statuses.count("connection_error")} connection errored, {statuses.count("unknown_error")} had unknown errors). -Waiting for servers {waiting} to spin up. Sleeping {sleep_interval}s...""" +Waiting for servers to spin up: {waiting} +Sleeping {sleep_interval}s...""" ) else: print(f"All {num_spun_up} / {len(statuses)} servers ready! Polling every 60s") From 66b788dbf4fb466d810d523ce904d0c00227dd75 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Tue, 2 Dec 2025 14:44:50 -0800 Subject: [PATCH 33/39] Revert to just cd into working dir. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index 78b706d4..1c9563f5 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -61,7 +61,7 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: # pragma: no cover head_server_deps = global_config_dict[HEAD_SERVER_DEPS_KEY_NAME] - uv_venv_cmd = f"uv venv --seed --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]}" + uv_venv_cmd = f"uv venv --seed --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]} .venv" pyproject_toml = False try: @@ -71,16 +71,19 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: pass if pyproject_toml: - cmd = f"""{uv_venv_cmd} \\ + install_cmd = f"""uv pip install '-e .' {" ".join(head_server_deps)}""" + + cmd = f"""cd {dir_path} \\ + && {uv_venv_cmd} \\ && source .venv/bin/activate \\ - && uv pip install '-e .' {" ".join(head_server_deps)} \\ + && {install_cmd} \\ """ else: - install_cmd = "uv pip install -r requirements.txt" - install_cmd += " " + " ".join(head_server_deps) + install_cmd = f"""uv pip install -r requirements.txt {" ".join(head_server_deps)}""" - cmd = f"""{uv_venv_cmd} \\ + cmd = f"""cd {dir_path} \\ + && {uv_venv_cmd} \\ && source .venv/bin/activate \\ && {install_cmd} \\ """ @@ -100,7 +103,6 @@ def _run_command(command: str, working_dir_path: Path) -> Popen: # pragma: no c command, executable="/bin/bash", shell=True, - cwd=work_dir, env=custom_env, ) From a78f226a786685c253d8a48566612cfc6ccf763e Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Tue, 2 Dec 2025 14:54:08 -0800 Subject: [PATCH 34/39] Deduplicate. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index 1c9563f5..c418004d 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -72,21 +72,14 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: if pyproject_toml: install_cmd = f"""uv pip install '-e .' {" ".join(head_server_deps)}""" - - cmd = f"""cd {dir_path} \\ - && {uv_venv_cmd} \\ - && source .venv/bin/activate \\ - && {install_cmd} \\ - """ - else: install_cmd = f"""uv pip install -r requirements.txt {" ".join(head_server_deps)}""" - cmd = f"""cd {dir_path} \\ - && {uv_venv_cmd} \\ - && source .venv/bin/activate \\ - && {install_cmd} \\ - """ + cmd = f"""cd {dir_path} \\ + && {uv_venv_cmd} \\ + && source .venv/bin/activate \\ + && {install_cmd} \\ + """ return cmd From fdb54feeeb9f453c52957ced13c04a7419c45cad Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Tue, 2 Dec 2025 14:56:29 -0800 Subject: [PATCH 35/39] Also add explicit check for requirements.txt. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index c418004d..b9f29bcc 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -64,16 +64,24 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: uv_venv_cmd = f"uv venv --seed --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]} .venv" pyproject_toml = False + requirements_txt = False try: with open(f"{dir_path / 'pyproject.toml'}", "r") as _f: pyproject_toml = True except OSError: pass + try: + with open(f"{dir_path / 'requirements.txt'}", "r") as _f: + requirements_txt = True + except OSError: + pass if pyproject_toml: install_cmd = f"""uv pip install '-e .' {" ".join(head_server_deps)}""" - else: + elif requirements_txt: install_cmd = f"""uv pip install -r requirements.txt {" ".join(head_server_deps)}""" + else: + raise RuntimeError(f"Missing pyproject.toml or requirements.txt for uv venv setup in server dir: {dir_path}") cmd = f"""cd {dir_path} \\ && {uv_venv_cmd} \\ From 3fb29115ab42d310217c1dd103158d75c9684804 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Tue, 2 Dec 2025 14:57:45 -0800 Subject: [PATCH 36/39] Revert format. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index b9f29bcc..9c447024 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -100,12 +100,7 @@ def _run_command(command: str, working_dir_path: Path) -> Popen: # pragma: no c custom_env["PYTHONPATH"] = f"{work_dir}:{py_path}" else: custom_env["PYTHONPATH"] = work_dir - return Popen( - command, - executable="/bin/bash", - shell=True, - env=custom_env, - ) + return Popen(command, executable="/bin/bash", shell=True, env=custom_env) class RunConfig(BaseNeMoGymCLIConfig): From fd9859584719f408e08b7cf6c135765bf61aed19 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Mon, 8 Dec 2025 17:11:45 -0800 Subject: [PATCH 37/39] Sync vllm_model pyproject.toml. Signed-off-by: Peter Jin --- responses_api_models/vllm_model/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/responses_api_models/vllm_model/pyproject.toml b/responses_api_models/vllm_model/pyproject.toml index 53f6af50..48d9a54c 100644 --- a/responses_api_models/vllm_model/pyproject.toml +++ b/responses_api_models/vllm_model/pyproject.toml @@ -23,8 +23,8 @@ dependencies = [ ] [build-system] -requires = ["setuptools>=61", "setuptools-scm"] build-backend = "setuptools.build_meta" +requires = ["setuptools>=61", "setuptools-scm"] [tool.setuptools.packages.find] where = [".."] From 987cf5c74fbf86fa7e0c5e0845ae0acb373f487c Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Tue, 9 Dec 2025 14:36:03 -0800 Subject: [PATCH 38/39] Minimum version of vllm >= 0.11.2. Signed-off-by: Peter Jin --- responses_api_models/vllm_model/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/responses_api_models/vllm_model/pyproject.toml b/responses_api_models/vllm_model/pyproject.toml index 48d9a54c..3459458b 100644 --- a/responses_api_models/vllm_model/pyproject.toml +++ b/responses_api_models/vllm_model/pyproject.toml @@ -19,7 +19,7 @@ version = "0.2.0rc0" requires-python = ">=3.12" dependencies = [ "nemo-gym[dev]", - "vllm", + "vllm>=0.11.2", ] [build-system] From 912f23a919d54f6ebdf7ddaa0d694f4798eefeb0 Mon Sep 17 00:00:00 2001 From: Peter Jin Date: Wed, 10 Dec 2025 14:30:01 -0800 Subject: [PATCH 39/39] Log with stdout/stderr redirection. Signed-off-by: Peter Jin --- nemo_gym/cli.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/nemo_gym/cli.py b/nemo_gym/cli.py index f2fac966..0ad353bb 100644 --- a/nemo_gym/cli.py +++ b/nemo_gym/cli.py @@ -25,7 +25,7 @@ from os.path import exists from pathlib import Path from signal import SIGINT -from subprocess import Popen +from subprocess import Popen, PIPE from threading import Thread from time import sleep from typing import Dict, List, Optional, Tuple @@ -92,7 +92,9 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str: return cmd -def _run_command(command: str, working_dir_path: Path) -> Popen: # pragma: no cover +def _run_command( + command: str, working_dir_path: Path, top_level_path: Optional[str] = None +) -> Popen: # pragma: no cover work_dir = f"{working_dir_path.absolute()}" custom_env = environ.copy() py_path = custom_env.get("PYTHONPATH", None) @@ -100,7 +102,19 @@ def _run_command(command: str, working_dir_path: Path) -> Popen: # pragma: no c custom_env["PYTHONPATH"] = f"{work_dir}:{py_path}" else: custom_env["PYTHONPATH"] = work_dir - return Popen(command, executable="/bin/bash", shell=True, env=custom_env) + redirect_stdout = None + redirect_stderr = None + if top_level_path: + redirect_stdout = PIPE + redirect_stderr = PIPE + return Popen( + command, + executable="/bin/bash", + shell=True, + env=custom_env, + stdout=redirect_stdout, + stderr=redirect_stderr, + ) class RunConfig(BaseNeMoGymCLIConfig): @@ -220,7 +234,7 @@ def start(self, global_config_dict_parser_config: GlobalConfigDictParserConfig) {NEMO_GYM_CONFIG_PATH_ENV_VAR_NAME}={shlex.quote(top_level_path)} \\ python {str(entrypoint_fpath)}""" - process = _run_command(command, dir_path) + process = _run_command(command, dir_path, top_level_path) self._processes[top_level_path] = process host = server_config_dict.get("host")