Skip to content

Commit 0d65924

Browse files
authored
[OpenAI] Add traceback for internal server errors for HTTP inference endpoint (#1026)
Before this PR, it is hard to see what the root cause of an error is when we hit an `INTERNAL_SERVER_ERROR`. This PR adds Traceback. We test that the traceback is indeed there in the unit test by mocking an error.
1 parent 0b16386 commit 0d65924

File tree

2 files changed

+42
-2
lines changed

2 files changed

+42
-2
lines changed

skyrl-train/skyrl_train/inference_engines/inference_engine_client_http_endpoint.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,9 @@ async def handle_openai_request(raw_request: Request, endpoint: str) -> JSONResp
164164
)
165165
return JSONResponse(content=error_response.model_dump(), status_code=HTTPStatus.BAD_REQUEST.value)
166166
except Exception as e:
167+
# Log full traceback server-side for debugging, but don't expose to client (CWE-209)
168+
tb = traceback.format_exc()
169+
logger.error(f"Error when handling {endpoint} request in SkyRL:\n{tb}")
167170
error_response = ErrorResponse(
168171
error=ErrorInfo(
169172
message=f"Error when handling {endpoint} request in SkyRL: {str(e)}",

skyrl-train/tests/gpu/gpu_ci/test_inference_engine_client_http_endpoint.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from http import HTTPStatus
1717
from typing import Any, Dict, List, Union, Tuple
1818
from pathlib import Path
19+
from unittest.mock import patch
1920
import ray
2021
import threading
2122
import requests
@@ -25,12 +26,14 @@
2526
from litellm import completion as litellm_completion
2627
from litellm import acompletion as litellm_async_completion
2728
from litellm import atext_completion as litellm_async_text_completion
29+
import logging
2830

2931
from skyrl_train.config import SkyRLConfig
3032
from skyrl_train.inference_engines.inference_engine_client import InferenceEngineClient
3133
from skyrl_train.inference_engines.base import ConversationType
3234
from tests.gpu.utils import init_worker_with_type, get_test_prompts
3335
from skyrl_train.inference_engines.utils import get_sampling_params_for_backend
36+
import skyrl_train.inference_engines.inference_engine_client_http_endpoint as http_endpoint_module
3437
from skyrl_train.inference_engines.inference_engine_client_http_endpoint import (
3538
serve,
3639
wait_for_server_ready,
@@ -595,9 +598,12 @@ class TestSchema(BaseModel):
595598

596599
# TODO(Charlie): sglang has slightly different error response format. We need to handle it.
597600
@pytest.mark.vllm
598-
def test_http_endpoint_error_handling(ray_init_fixture):
601+
def test_http_endpoint_error_handling(ray_init_fixture, caplog):
599602
"""
600-
Test error handling for various invalid requests.
603+
Test error handling for various invalid requests and internal server errors.
604+
605+
Tests validation errors (400) for invalid requests and verifies that internal
606+
server errors (500) are logged with traceback server-side (not exposed to client).
601607
"""
602608
try:
603609
cfg = get_test_actor_config(num_inference_engines=2, model=MODEL_QWEN2_5)
@@ -718,6 +724,37 @@ def test_http_endpoint_error_handling(ray_init_fixture):
718724
r = requests.post(f"{base_url}/v1/completions", json=bad_payload)
719725
assert r.status_code == HTTPStatus.BAD_REQUEST
720726

727+
# Test internal server errors (500) return proper error responses
728+
# Traceback is logged server-side only (not exposed to client per CWE-209)
729+
caplog.set_level(logging.ERROR)
730+
original_client = http_endpoint_module._global_inference_engine_client
731+
732+
internal_error_cases = [
733+
(
734+
"chat_completion",
735+
"/v1/chat/completions",
736+
{"messages": [{"role": "user", "content": "Hello"}]},
737+
KeyError("choices"),
738+
),
739+
("completion", "/v1/completions", {"prompt": "Hello"}, RuntimeError("Simulated internal error")),
740+
]
741+
for method_name, endpoint, extra_payload, exception in internal_error_cases:
742+
743+
async def mock_raises(*args, exc=exception, **kwargs):
744+
raise exc
745+
746+
caplog.clear()
747+
with patch.object(original_client, method_name, side_effect=mock_raises):
748+
response = requests.post(f"{base_url}{endpoint}", json={"model": MODEL_QWEN2_5, **extra_payload})
749+
assert response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR
750+
error_data = response.json()
751+
error_message = error_data["error"]["message"]
752+
assert str(exception) in error_message or type(exception).__name__ in error_message
753+
assert "Traceback" not in error_message # Not exposed to client (CWE-209)
754+
assert error_data["error"]["code"] == 500
755+
assert "Traceback (most recent call last):" in caplog.text # Logged server-side
756+
assert type(exception).__name__ in caplog.text
757+
721758
finally:
722759
shutdown_server(host=SERVER_HOST, port=server_port, max_wait_seconds=5)
723760
if server_thread.is_alive():

0 commit comments

Comments
 (0)