Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 73 additions & 3 deletions tests/unit/vertexai/genai/test_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
import pandas as pd
import pytest


_TEST_PROJECT = "test-project"
_TEST_LOCATION = "us-central1"

Expand Down Expand Up @@ -2079,6 +2078,73 @@ def test_has_tool_call_with_agent_event(self):


@pytest.mark.usefixtures("google_auth_mock")
class TestRunAgent:
"""Unit tests for the _run_agent function."""

@mock.patch.object(_evals_common, "_execute_inference_concurrently")
def test_run_agent_rewrites_gemini_3_model_name(
self, mock_execute_inference_concurrently, mock_api_client_fixture
):
mock_execute_inference_concurrently.return_value = []
user_simulator_config = vertexai_genai_types.evals.UserSimulatorConfig(
model_name="gemini-3-preview"
)
prompt_dataset = pd.DataFrame({"prompt": ["prompt1"]})
with mock.patch.dict(os.environ, clear=True):
os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1"

def mock_execute(*args, **kwargs):
assert os.environ["GOOGLE_CLOUD_LOCATION"] == "global"
return []

mock_execute_inference_concurrently.side_effect = mock_execute

_evals_common._run_agent(
api_client=mock_api_client_fixture,
agent_engine=mock.Mock(),
agent=None,
prompt_dataset=prompt_dataset,
user_simulator_config=user_simulator_config,
)

assert (
user_simulator_config.model_name
== f"projects/{mock_api_client_fixture.project}/locations/global/publishers/google/models/gemini-3-preview"
)
assert os.environ.get("GOOGLE_CLOUD_LOCATION") == "us-central1"

@mock.patch.object(_evals_common, "_execute_inference_concurrently")
def test_run_agent_rewrites_gemini_3_model_name_empty_env(
self, mock_execute_inference_concurrently, mock_api_client_fixture
):
mock_execute_inference_concurrently.return_value = []
user_simulator_config = vertexai_genai_types.evals.UserSimulatorConfig(
model_name="gemini-3-preview"
)
prompt_dataset = pd.DataFrame({"prompt": ["prompt1"]})
with mock.patch.dict(os.environ, clear=True):

def mock_execute(*args, **kwargs):
assert os.environ["GOOGLE_CLOUD_LOCATION"] == "global"
return []

mock_execute_inference_concurrently.side_effect = mock_execute

_evals_common._run_agent(
api_client=mock_api_client_fixture,
agent_engine=mock.Mock(),
agent=None,
prompt_dataset=prompt_dataset,
user_simulator_config=user_simulator_config,
)

assert (
user_simulator_config.model_name
== f"projects/{mock_api_client_fixture.project}/locations/global/publishers/google/models/gemini-3-preview"
)
assert "GOOGLE_CLOUD_LOCATION" not in os.environ


class TestRunAgentInternal:
"""Unit tests for the _run_agent_internal function."""

Expand Down Expand Up @@ -5054,7 +5120,9 @@ async def test_run_adk_user_simulation_success(
assert turn["events"][1]["content"] == {"text": "agent msg"}

mock_scenario_cls.assert_called_once_with(
starting_prompt="start", conversation_plan="plan"
starting_prompt="start",
conversation_plan="plan",
user_persona="EVALUATOR",
)
mock_session_input_cls.assert_called_once()

Expand Down Expand Up @@ -5113,7 +5181,9 @@ async def test_run_adk_user_simulation_missing_session_inputs(
await _evals_common._run_adk_user_simulation(row, mock_agent)

mock_scenario_cls.assert_called_once_with(
starting_prompt="start", conversation_plan="plan"
starting_prompt="start",
conversation_plan="plan",
user_persona="EVALUATOR",
)
mock_session_input_cls.assert_called_once_with(
app_name="user_simulation_app",
Expand Down
74 changes: 51 additions & 23 deletions vertexai/_genai/_evals_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,10 @@ async def _run_adk_user_simulation(
"""Runs a multi-turn user simulation using ADK's EvaluationGenerator."""
starting_prompt = row.get("starting_prompt")
conversation_plan = row.get("conversation_plan")
user_persona = row.get("user_persona", "EVALUATOR")

if pd.isna(user_persona) or not user_persona:
user_persona = "EVALUATOR"

if not starting_prompt or not conversation_plan:
raise ValueError(
Expand All @@ -967,7 +971,9 @@ async def _run_adk_user_simulation(
)

scenario = ConversationScenario(
starting_prompt=starting_prompt, conversation_plan=conversation_plan
starting_prompt=starting_prompt,
conversation_plan=conversation_plan,
user_persona=user_persona,
)

user_simulator_kwargs: dict[str, Any] = {}
Expand Down Expand Up @@ -1877,28 +1883,50 @@ def _run_agent(
]
]:
"""Internal helper to run inference using Gemini model with concurrency."""
if agent_engine:
return _execute_inference_concurrently(
api_client=api_client,
agent_engine=agent_engine,
prompt_dataset=prompt_dataset,
progress_desc="Agent Run",
gemini_config=None,
user_simulator_config=None,
inference_fn=_execute_agent_run_with_retry,
)
elif agent:
return _execute_inference_concurrently(
api_client=api_client,
agent=agent,
prompt_dataset=prompt_dataset,
progress_desc="Local Agent Run",
gemini_config=None,
user_simulator_config=user_simulator_config,
inference_fn=_execute_local_agent_run_with_retry,
)
else:
raise ValueError("Neither agent_engine nor agent is provided.")
original_location = os.environ.get("GOOGLE_CLOUD_LOCATION")
location_overridden = False

if user_simulator_config and user_simulator_config.model_name:
model_name = user_simulator_config.model_name
if model_name.startswith("gemini-3") and "/" not in model_name:
logger.warning(
"Model %s is only available in the global region. Routing to global.",
model_name,
)
user_simulator_config.model_name = f"projects/{api_client.project}/locations/global/publishers/google/models/{model_name}"
if original_location != "global":
os.environ["GOOGLE_CLOUD_LOCATION"] = "global"
location_overridden = True

try:
if agent_engine:
return _execute_inference_concurrently(
api_client=api_client,
agent_engine=agent_engine,
prompt_dataset=prompt_dataset,
progress_desc="Agent Run",
gemini_config=None,
user_simulator_config=None,
inference_fn=_execute_agent_run_with_retry,
)
elif agent:
return _execute_inference_concurrently(
api_client=api_client,
agent=agent,
prompt_dataset=prompt_dataset,
progress_desc="Local Agent Run",
gemini_config=None,
user_simulator_config=user_simulator_config,
inference_fn=_execute_local_agent_run_with_retry,
)
else:
raise ValueError("Neither agent_engine nor agent is provided.")
finally:
if location_overridden:
if original_location is None:
del os.environ["GOOGLE_CLOUD_LOCATION"]
else:
os.environ["GOOGLE_CLOUD_LOCATION"] = original_location


def _execute_agent_run_with_retry(
Expand Down
4 changes: 4 additions & 0 deletions vertexai/_genai/evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2216,6 +2216,8 @@ def create_evaluation_run(
If `agent_info` is provided without `inference_configs`, this config is used
to automatically construct the inference configuration. If not specified,
or if `max_turn` is not set, `max_turn` defaults to 5.
The `model_name` inside this config can be either a full model path or a
short model name, e.g. `gemini-3-preview-flash`.
inference_configs: The candidate to inference config map for the evaluation run.
The key is the candidate name, and the value is the inference config.
If provided, `agent_info` must be None. If omitted and `agent_info` is provided,
Expand Down Expand Up @@ -3486,6 +3488,8 @@ async def create_evaluation_run(
If `agent_info` is provided without `inference_configs`, this config is used
to automatically construct the inference configuration. If not specified,
or if `max_turn` is not set, `max_turn` defaults to 5.
The `model_name` inside this config can be either a full model path or a
short model name, e.g. `gemini-3-preview-flash`.
inference_configs: The candidate to inference config map for the evaluation run.
The key is the candidate name, and the value is the inference config.
If provided, `agent_info` must be None. If omitted and `agent_info` is provided,
Expand Down
Loading