diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py
index f444a59dd0..4f09093fa7 100644
--- a/tests/unit/vertexai/genai/test_evals.py
+++ b/tests/unit/vertexai/genai/test_evals.py
@@ -20,6 +20,7 @@
 import re
 import statistics
 import sys
+import tempfile
 import unittest
 from unittest import mock
 
@@ -44,7 +45,6 @@
 import pandas as pd
 import pytest
 
-
 _TEST_PROJECT = "test-project"
 _TEST_LOCATION = "us-central1"
 
@@ -755,27 +755,27 @@ def test_inference_with_local_destination(
             mock_generate_content_response
         )
 
-        local_dest_dir = "/tmp/test/output_dir"
-        config = vertexai_genai_types.EvalRunInferenceConfig(dest=local_dest_dir)
+        with tempfile.TemporaryDirectory() as local_dest_dir:
+            config = vertexai_genai_types.EvalRunInferenceConfig(dest=local_dest_dir)
 
-        inference_result = self.client.evals.run_inference(
-            model="gemini-pro", src=mock_df, config=config
-        )
+            inference_result = self.client.evals.run_inference(
+                model="gemini-pro", src=mock_df, config=config
+            )
 
-        mock_makedirs.assert_called_once_with(local_dest_dir, exist_ok=True)
-        expected_save_path = os.path.join(local_dest_dir, "inference_results.jsonl")
-        mock_df_to_json.assert_called_once_with(
-            expected_save_path, orient="records", lines=True
-        )
-        expected_df = pd.DataFrame(
-            {
-                "prompt": ["local save"],
-                "response": ["local response"],
-            }
-        )
-        pd.testing.assert_frame_equal(inference_result.eval_dataset_df, expected_df)
-        assert inference_result.candidate_name == "gemini-pro"
-        assert inference_result.gcs_source is None
+            mock_makedirs.assert_called_once_with(local_dest_dir, exist_ok=True)
+            expected_save_path = os.path.join(local_dest_dir, "inference_results.jsonl")
+            mock_df_to_json.assert_called_once_with(
+                expected_save_path, orient="records", lines=True
+            )
+            expected_df = pd.DataFrame(
+                {
+                    "prompt": ["local save"],
+                    "response": ["local response"],
+                }
+            )
+            pd.testing.assert_frame_equal(inference_result.eval_dataset_df, expected_df)
+            assert inference_result.candidate_name == "gemini-pro"
+            assert inference_result.gcs_source is None
 
     @mock.patch.object(_evals_common, "Models")
     @mock.patch.object(_evals_utils, "EvalDatasetLoader")
@@ -816,201 +816,199 @@ def test_inference_from_request_column_save_to_local_dir(
             mock_generate_content_responses
         )
 
-        local_dest_dir = "/tmp/test_output_dir"
-        config = vertexai_genai_types.EvalRunInferenceConfig(dest=local_dest_dir)
+        with tempfile.TemporaryDirectory() as local_dest_dir:
+            config = vertexai_genai_types.EvalRunInferenceConfig(dest=local_dest_dir)
 
-        inference_result = self.client.evals.run_inference(
-            model="gemini-pro", src=mock_df, config=config
-        )
+            inference_result = self.client.evals.run_inference(
+                model="gemini-pro", src=mock_df, config=config
+            )
 
-        mock_models.return_value.generate_content.assert_has_calls(
-            [
-                mock.call(
-                    model="gemini-pro",
-                    contents="req 1",
-                    config=genai_types.GenerateContentConfig(),
-                ),
-                mock.call(
-                    model="gemini-pro",
-                    contents="req 2",
-                    config=genai_types.GenerateContentConfig(),
+            mock_models.return_value.generate_content.assert_has_calls(
+                [
+                    mock.call(
+                        model="gemini-pro",
+                        contents="req 1",
+                        config=genai_types.GenerateContentConfig(),
+                    ),
+                    mock.call(
+                        model="gemini-pro",
+                        contents="req 2",
+                        config=genai_types.GenerateContentConfig(),
+                    ),
+                ],
+                any_order=True,
+            )
+            expected_df = pd.DataFrame(
+                {
+                    "prompt": ["prompt 1", "prompt 2"],
+                    "request": ["req 1", "req 2"],
+                    "response": ["resp 1", "resp 2"],
+                }
+            )
+            pd.testing.assert_frame_equal(
+                inference_result.eval_dataset_df.sort_values(by="request").reset_index(
+                    drop=True
                 ),
-            ],
-            any_order=True,
-        )
-        expected_df = pd.DataFrame(
-            {
-                "prompt": ["prompt 1", "prompt 2"],
-                "request": ["req 1", "req 2"],
-                "response": ["resp 1", "resp 2"],
-            }
-        )
-        pd.testing.assert_frame_equal(
-            inference_result.eval_dataset_df.sort_values(by="request").reset_index(
-                drop=True
-            ),
-            expected_df.sort_values(by="request").reset_index(drop=True),
-        )
+                expected_df.sort_values(by="request").reset_index(drop=True),
+            )
 
-        saved_file_path = os.path.join(local_dest_dir, "inference_results.jsonl")
-        with open(saved_file_path, "r") as f:
-            saved_records = [json.loads(line) for line in f]
-        expected_records = expected_df.to_dict(orient="records")
-        assert sorted(saved_records, key=lambda x: x["request"]) == sorted(
-            expected_records, key=lambda x: x["request"]
-        )
-        os.remove(saved_file_path)
-        os.rmdir(local_dest_dir)
-        assert inference_result.candidate_name == "gemini-pro"
-        assert inference_result.gcs_source is None
+            saved_file_path = os.path.join(local_dest_dir, "inference_results.jsonl")
+            with open(saved_file_path, "r") as f:
+                saved_records = [json.loads(line) for line in f]
+            expected_records = expected_df.to_dict(orient="records")
+            assert sorted(saved_records, key=lambda x: x["request"]) == sorted(
+                expected_records, key=lambda x: x["request"]
+            )
+            assert inference_result.candidate_name == "gemini-pro"
+            assert inference_result.gcs_source is None
 
     @mock.patch.object(_evals_common, "Models")
     def test_inference_from_local_jsonl_file(self, mock_models):
-        local_src_path = "/tmp/input.jsonl"
-        input_records = [
-            {"prompt": "prompt 1", "other_col": "val 1"},
-            {"prompt": "prompt 2", "other_col": "val 2"},
-        ]
-        with open(local_src_path, "w") as f:
-            for record in input_records:
-                f.write(json.dumps(record) + "\n")
-
-        mock_generate_content_responses = [
-            genai_types.GenerateContentResponse(
-                candidates=[
-                    genai_types.Candidate(
-                        content=genai_types.Content(
-                            parts=[genai_types.Part(text="resp 1")]
-                        ),
-                        finish_reason=genai_types.FinishReason.STOP,
-                    )
-                ],
-                prompt_feedback=None,
-            ),
-            genai_types.GenerateContentResponse(
-                candidates=[
-                    genai_types.Candidate(
-                        content=genai_types.Content(
-                            parts=[genai_types.Part(text="resp 2")]
-                        ),
-                        finish_reason=genai_types.FinishReason.STOP,
-                    )
-                ],
-                prompt_feedback=None,
-            ),
-        ]
-        mock_models.return_value.generate_content.side_effect = (
-            mock_generate_content_responses
-        )
+        with tempfile.TemporaryDirectory() as temp_dir:
+            local_src_path = os.path.join(temp_dir, "input.jsonl")
+            input_records = [
+                {"prompt": "prompt 1", "other_col": "val 1"},
+                {"prompt": "prompt 2", "other_col": "val 2"},
+            ]
+            with open(local_src_path, "w") as f:
+                for record in input_records:
+                    f.write(json.dumps(record) + "\n")
+
+            mock_generate_content_responses = [
+                genai_types.GenerateContentResponse(
+                    candidates=[
+                        genai_types.Candidate(
+                            content=genai_types.Content(
+                                parts=[genai_types.Part(text="resp 1")]
+                            ),
+                            finish_reason=genai_types.FinishReason.STOP,
+                        )
+                    ],
+                    prompt_feedback=None,
+                ),
+                genai_types.GenerateContentResponse(
+                    candidates=[
+                        genai_types.Candidate(
+                            content=genai_types.Content(
+                                parts=[genai_types.Part(text="resp 2")]
+                            ),
+                            finish_reason=genai_types.FinishReason.STOP,
+                        )
+                    ],
+                    prompt_feedback=None,
+                ),
+            ]
+            mock_models.return_value.generate_content.side_effect = (
+                mock_generate_content_responses
+            )
 
-        inference_result = self.client.evals.run_inference(
-            model="gemini-pro", src=local_src_path
-        )
+            inference_result = self.client.evals.run_inference(
+                model="gemini-pro", src=local_src_path
+            )
 
-        expected_df = pd.DataFrame(
-            {
-                "prompt": ["prompt 1", "prompt 2"],
-                "other_col": ["val 1", "val 2"],
-                "response": ["resp 1", "resp 2"],
-            }
-        )
-        pd.testing.assert_frame_equal(
-            inference_result.eval_dataset_df.sort_values(by="prompt").reset_index(
-                drop=True
-            ),
-            expected_df.sort_values(by="prompt").reset_index(drop=True),
-        )
-        mock_models.return_value.generate_content.assert_has_calls(
-            [
-                mock.call(
-                    model="gemini-pro",
-                    contents="prompt 1",
-                    config=genai_types.GenerateContentConfig(),
-                ),
-                mock.call(
-                    model="gemini-pro",
-                    contents="prompt 2",
-                    config=genai_types.GenerateContentConfig(),
+            expected_df = pd.DataFrame(
+                {
+                    "prompt": ["prompt 1", "prompt 2"],
+                    "other_col": ["val 1", "val 2"],
+                    "response": ["resp 1", "resp 2"],
+                }
+            )
+            pd.testing.assert_frame_equal(
+                inference_result.eval_dataset_df.sort_values(by="prompt").reset_index(
+                    drop=True
                 ),
-            ],
-            any_order=True,
-        )
-        os.remove(local_src_path)
-        assert inference_result.candidate_name == "gemini-pro"
-        assert inference_result.gcs_source is None
+                expected_df.sort_values(by="prompt").reset_index(drop=True),
+            )
+            mock_models.return_value.generate_content.assert_has_calls(
+                [
+                    mock.call(
+                        model="gemini-pro",
+                        contents="prompt 1",
+                        config=genai_types.GenerateContentConfig(),
+                    ),
+                    mock.call(
+                        model="gemini-pro",
+                        contents="prompt 2",
+                        config=genai_types.GenerateContentConfig(),
+                    ),
+                ],
+                any_order=True,
+            )
+            assert inference_result.candidate_name == "gemini-pro"
+            assert inference_result.gcs_source is None
 
     @pytest.mark.skip(reason="currently flakey")
     @mock.patch.object(_evals_common, "Models")
     def test_inference_from_local_csv_file(self, mock_models):
-        local_src_path = "/tmp/input.csv"
-        input_df = pd.DataFrame(
-            {"prompt": ["prompt 1", "prompt 2"], "other_col": ["val 1", "val 2"]}
-        )
-        input_df.to_csv(local_src_path, index=False)
+        with tempfile.TemporaryDirectory() as temp_dir:
+            local_src_path = os.path.join(temp_dir, "input.csv")
+            input_df = pd.DataFrame(
+                {"prompt": ["prompt 1", "prompt 2"], "other_col": ["val 1", "val 2"]}
+            )
+            input_df.to_csv(local_src_path, index=False)
 
-        mock_generate_content_responses = [
-            genai_types.GenerateContentResponse(
-                candidates=[
-                    genai_types.Candidate(
-                        content=genai_types.Content(
-                            parts=[genai_types.Part(text="resp 1")]
-                        ),
-                        finish_reason=genai_types.FinishReason.STOP,
-                    )
-                ],
-                prompt_feedback=None,
-            ),
-            genai_types.GenerateContentResponse(
-                candidates=[
-                    genai_types.Candidate(
-                        content=genai_types.Content(
-                            parts=[genai_types.Part(text="resp 2")]
-                        ),
-                        finish_reason=genai_types.FinishReason.STOP,
-                    )
-                ],
-                prompt_feedback=None,
-            ),
-        ]
-        mock_models.return_value.generate_content.side_effect = (
-            mock_generate_content_responses
-        )
+            mock_generate_content_responses = [
+                genai_types.GenerateContentResponse(
+                    candidates=[
+                        genai_types.Candidate(
+                            content=genai_types.Content(
+                                parts=[genai_types.Part(text="resp 1")]
+                            ),
+                            finish_reason=genai_types.FinishReason.STOP,
+                        )
+                    ],
+                    prompt_feedback=None,
+                ),
+                genai_types.GenerateContentResponse(
+                    candidates=[
+                        genai_types.Candidate(
+                            content=genai_types.Content(
+                                parts=[genai_types.Part(text="resp 2")]
+                            ),
+                            finish_reason=genai_types.FinishReason.STOP,
+                        )
+                    ],
+                    prompt_feedback=None,
+                ),
+            ]
+            mock_models.return_value.generate_content.side_effect = (
+                mock_generate_content_responses
+            )
 
-        inference_result = self.client.evals.run_inference(
-            model="gemini-pro", src=local_src_path
-        )
+            inference_result = self.client.evals.run_inference(
+                model="gemini-pro", src=local_src_path
+            )
 
-        expected_df = pd.DataFrame(
-            {
-                "prompt": ["prompt 1", "prompt 2"],
-                "other_col": ["val 1", "val 2"],
-                "response": ["resp 1", "resp 2"],
-            }
-        )
-        pd.testing.assert_frame_equal(
-            inference_result.eval_dataset_df.sort_values(by="prompt").reset_index(
-                drop=True
-            ),
-            expected_df.sort_values(by="prompt").reset_index(drop=True),
-        )
-        mock_models.return_value.generate_content.assert_has_calls(
-            [
-                mock.call(
-                    model="gemini-pro",
-                    contents="prompt 1",
-                    config=genai_types.GenerateContentConfig(),
-                ),
-                mock.call(
-                    model="gemini-pro",
-                    contents="prompt 2",
-                    config=genai_types.GenerateContentConfig(),
+            expected_df = pd.DataFrame(
+                {
+                    "prompt": ["prompt 1", "prompt 2"],
+                    "other_col": ["val 1", "val 2"],
+                    "response": ["resp 1", "resp 2"],
+                }
+            )
+            pd.testing.assert_frame_equal(
+                inference_result.eval_dataset_df.sort_values(by="prompt").reset_index(
+                    drop=True
                 ),
-            ],
-            any_order=True,
-        )
-        os.remove(local_src_path)
-        assert inference_result.candidate_name == "gemini-pro"
-        assert inference_result.gcs_source is None
+                expected_df.sort_values(by="prompt").reset_index(drop=True),
+            )
+            mock_models.return_value.generate_content.assert_has_calls(
+                [
+                    mock.call(
+                        model="gemini-pro",
+                        contents="prompt 1",
+                        config=genai_types.GenerateContentConfig(),
+                    ),
+                    mock.call(
+                        model="gemini-pro",
+                        contents="prompt 2",
+                        config=genai_types.GenerateContentConfig(),
+                    ),
+                ],
+                any_order=True,
+            )
+            assert inference_result.candidate_name == "gemini-pro"
+            assert inference_result.gcs_source is None
 
     @mock.patch.object(_evals_common, "Models")
     @mock.patch.object(_evals_utils, "EvalDatasetLoader")
@@ -2079,6 +2077,99 @@ def test_has_tool_call_with_agent_event(self):
 
 
 @pytest.mark.usefixtures("google_auth_mock")
+class TestRunAgent:
+    """Unit tests for the _run_agent function."""
+
+    @mock.patch.object(_evals_common, "_execute_inference_concurrently")
+    def test_run_agent_rewrites_gemini_3_model_name(
+        self, mock_execute_inference_concurrently, mock_api_client_fixture
+    ):
+        mock_execute_inference_concurrently.return_value = []
+        user_simulator_config = vertexai_genai_types.evals.UserSimulatorConfig(
+            model_name="gemini-3-preview"
+        )
+        prompt_dataset = pd.DataFrame({"prompt": ["prompt1"]})
+        with mock.patch.dict(os.environ, clear=True):
+            os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1"
+
+            def mock_execute(*args, **kwargs):
+                assert os.environ["GOOGLE_CLOUD_LOCATION"] == "global"
+                return []
+
+            mock_execute_inference_concurrently.side_effect = mock_execute
+
+            _evals_common._run_agent(
+                api_client=mock_api_client_fixture,
+                agent_engine=mock.Mock(),
+                agent=None,
+                prompt_dataset=prompt_dataset,
+                user_simulator_config=user_simulator_config,
+                allow_cross_region_model=True,
+            )
+
+            assert (
+                user_simulator_config.model_name
+                == f"projects/{mock_api_client_fixture.project}/locations/global/publishers/google/models/gemini-3-preview"
+            )
+            assert os.environ.get("GOOGLE_CLOUD_LOCATION") == "us-central1"
+
+    @mock.patch.object(_evals_common, "_execute_inference_concurrently")
+    def test_run_agent_raises_error_if_gemini_3_and_allow_cross_region_model_false(
+        self, mock_execute_inference_concurrently, mock_api_client_fixture
+    ):
+        user_simulator_config = vertexai_genai_types.evals.UserSimulatorConfig(
+            model_name="gemini-3-preview"
+        )
+        prompt_dataset = pd.DataFrame({"prompt": ["prompt1"]})
+        with mock.patch.dict(os.environ, clear=True):
+            os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1"
+
+            with pytest.raises(
+                ValueError,
+                match="The model 'gemini-3-preview' is currently only available in the 'global' region.",
+            ):
+                _evals_common._run_agent(
+                    api_client=mock_api_client_fixture,
+                    agent_engine=mock.Mock(),
+                    agent=None,
+                    prompt_dataset=prompt_dataset,
+                    user_simulator_config=user_simulator_config,
+                    allow_cross_region_model=False,
+                )
+
+    @mock.patch.object(_evals_common, "_execute_inference_concurrently")
+    def test_run_agent_rewrites_gemini_3_model_name_empty_env(
+        self, mock_execute_inference_concurrently, mock_api_client_fixture
+    ):
+        mock_execute_inference_concurrently.return_value = []
+        user_simulator_config = vertexai_genai_types.evals.UserSimulatorConfig(
+            model_name="gemini-3-preview"
+        )
+        prompt_dataset = pd.DataFrame({"prompt": ["prompt1"]})
+        with mock.patch.dict(os.environ, clear=True):
+
+            def mock_execute(*args, **kwargs):
+                assert os.environ["GOOGLE_CLOUD_LOCATION"] == "global"
+                return []
+
+            mock_execute_inference_concurrently.side_effect = mock_execute
+
+            _evals_common._run_agent(
+                api_client=mock_api_client_fixture,
+                agent_engine=mock.Mock(),
+                agent=None,
+                prompt_dataset=prompt_dataset,
+                user_simulator_config=user_simulator_config,
+                allow_cross_region_model=True,
+            )
+
+            assert (
+                user_simulator_config.model_name
+                == f"projects/{mock_api_client_fixture.project}/locations/global/publishers/google/models/gemini-3-preview"
+            )
+            assert "GOOGLE_CLOUD_LOCATION" not in os.environ
+
+
 class TestRunAgentInternal:
     """Unit tests for the _run_agent_internal function."""
 
diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py
index aace1f4ead..b51017a889 100644
--- a/vertexai/_genai/_evals_common.py
+++ b/vertexai/_genai/_evals_common.py
@@ -1143,6 +1143,7 @@ def _execute_inference(
     prompt_template: Optional[Union[str, types.PromptTemplateOrDict]] = None,
     location: Optional[str] = None,
     user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None,
+    allow_cross_region_model: bool = False,
 ) -> pd.DataFrame:
     """Executes inference on a given dataset using the specified model.
 
@@ -1250,6 +1251,7 @@ def _execute_inference(
             agent=agent,
             prompt_dataset=prompt_dataset,
             user_simulator_config=user_simulator_config,
+            allow_cross_region_model=allow_cross_region_model,
         )
         end_time = time.time()
         logger.info("Agent Run completed in %.2f seconds.", end_time - start_time)
@@ -1823,6 +1825,7 @@ def _run_agent_internal(
     agent: Optional[LlmAgent],
     prompt_dataset: pd.DataFrame,
     user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None,
+    allow_cross_region_model: bool = False,
 ) -> pd.DataFrame:
     """Runs an agent."""
     raw_responses = _run_agent(
@@ -1831,6 +1834,7 @@ def _run_agent_internal(
         agent=agent,
         prompt_dataset=prompt_dataset,
         user_simulator_config=user_simulator_config,
+        allow_cross_region_model=allow_cross_region_model,
     )
     processed_intermediate_events = []
     processed_responses = []
@@ -1872,6 +1876,7 @@ def _run_agent(
     agent: Optional[LlmAgent],
     prompt_dataset: pd.DataFrame,
     user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None,
+    allow_cross_region_model: bool = False,
 ) -> list[
     Union[
         list[dict[str, Any]],
@@ -1880,28 +1885,60 @@ def _run_agent(
     ]
 ]:
     """Internal helper to run inference using Gemini model with concurrency."""
-    if agent_engine:
-        return _execute_inference_concurrently(
-            api_client=api_client,
-            agent_engine=agent_engine,
-            prompt_dataset=prompt_dataset,
-            progress_desc="Agent Run",
-            gemini_config=None,
-            user_simulator_config=None,
-            inference_fn=_execute_agent_run_with_retry,
-        )
-    elif agent:
-        return _execute_inference_concurrently(
-            api_client=api_client,
-            agent=agent,
-            prompt_dataset=prompt_dataset,
-            progress_desc="Local Agent Run",
-            gemini_config=None,
-            user_simulator_config=user_simulator_config,
-            inference_fn=_execute_local_agent_run_with_retry,
-        )
-    else:
-        raise ValueError("Neither agent_engine nor agent is provided.")
+    original_location = os.environ.get("GOOGLE_CLOUD_LOCATION")
+    location_overridden = False
+
+    if user_simulator_config and user_simulator_config.model_name:
+        model_name = user_simulator_config.model_name
+        if model_name.startswith("gemini-3") and "/" not in model_name:
+            current_location = original_location or api_client.location or "us-central1"
+            if current_location != "global" and not allow_cross_region_model:
+                raise ValueError(
+                    f"The model '{model_name}' is currently only available in the"
+                    " 'global' region. Because this request originated in"
+                    f" '{current_location}', you must explicitly set "
+                    "allow_cross_region_model=True to allow your data to be routed outside"
+                    " of your request's region."
+                )
+
+            logger.warning(
+                "Model %s is only available in the global region. Routing to global.",
+                model_name,
+            )
+            user_simulator_config.model_name = f"projects/{api_client.project}/locations/global/publishers/google/models/{model_name}"
+            if original_location != "global":
+                os.environ["GOOGLE_CLOUD_LOCATION"] = "global"
+                location_overridden = True
+
+    try:
+        if agent_engine:
+            return _execute_inference_concurrently(
+                api_client=api_client,
+                agent_engine=agent_engine,
+                prompt_dataset=prompt_dataset,
+                progress_desc="Agent Run",
+                gemini_config=None,
+                user_simulator_config=None,
+                inference_fn=_execute_agent_run_with_retry,
+            )
+        elif agent:
+            return _execute_inference_concurrently(
+                api_client=api_client,
+                agent=agent,
+                prompt_dataset=prompt_dataset,
+                progress_desc="Local Agent Run",
+                gemini_config=None,
+                user_simulator_config=user_simulator_config,
+                inference_fn=_execute_local_agent_run_with_retry,
+            )
+        else:
+            raise ValueError("Neither agent_engine nor agent is provided.")
+    finally:
+        if location_overridden:
+            if original_location is None:
+                del os.environ["GOOGLE_CLOUD_LOCATION"]
+            else:
+                os.environ["GOOGLE_CLOUD_LOCATION"] = original_location
 
 
 def _execute_agent_run_with_retry(
diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py
index ece5486c35..ce516c8cf6 100644
--- a/vertexai/_genai/evals.py
+++ b/vertexai/_genai/evals.py
@@ -1957,6 +1957,7 @@ def run_inference(
                 - dest: The destination path for storage of the inference results.
                 - prompt_template: The template string to use for constructing prompts.
                 - generate_content_config: The config for the Gemini generate content call.
+                - allow_cross_region_model: Opt-in flag to authorize cross-region routing for LLM models.
 
         Returns:
           The evaluation dataset.
@@ -1992,6 +1993,7 @@ def run_inference(
             location=location,
             config=config.generate_content_config,
             user_simulator_config=getattr(config, "user_simulator_config", None),
+            allow_cross_region_model=getattr(config, "allow_cross_region_model", False),
         )
 
     def evaluate(
@@ -2437,6 +2439,8 @@ def create_evaluation_run(
               If `agent_info` is provided without `inference_configs`, this config is used
               to automatically construct the inference configuration. If not specified,
               or if `max_turn` is not set, `max_turn` defaults to 5.
+              The `model_name` inside this config can be either a full model path or a
+              short model name, e.g. `gemini-3-preview-flash`.
           inference_configs: The candidate to inference config map for the evaluation run.
               The key is the candidate name, and the value is the inference config.
               If provided, `agent_info` must be None. If omitted and `agent_info` is provided,
@@ -3928,6 +3932,8 @@ async def create_evaluation_run(
               If `agent_info` is provided without `inference_configs`, this config is used
               to automatically construct the inference configuration. If not specified,
               or if `max_turn` is not set, `max_turn` defaults to 5.
+              The `model_name` inside this config can be either a full model path or a
+              short model name, e.g. `gemini-3-preview-flash`.
           inference_configs: The candidate to inference config map for the evaluation run.
               The key is the candidate name, and the value is the inference config.
               If provided, `agent_info` must be None. If omitted and `agent_info` is provided,
diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py
index fdd1262d4c..535fc9cb26 100644
--- a/vertexai/_genai/types/common.py
+++ b/vertexai/_genai/types/common.py
@@ -16011,6 +16011,10 @@ class EvalRunInferenceConfig(_common.BaseModel):
         description="""Configuration for user simulation in multi-turn agent scraping. If provided, and the dataset contains
       conversation plans, user simulation will be triggered.""",
     )
+    allow_cross_region_model: Optional[bool] = Field(
+        default=None,
+        description="""Opt-in flag to authorize cross-region routing for LLM models.""",
+    )
 
 
 class EvalRunInferenceConfigDict(TypedDict, total=False):
@@ -16029,6 +16033,9 @@ class EvalRunInferenceConfigDict(TypedDict, total=False):
     """Configuration for user simulation in multi-turn agent scraping. If provided, and the dataset contains
       conversation plans, user simulation will be triggered."""
 
+    allow_cross_region_model: Optional[bool]
+    """Opt-in flag to authorize cross-region routing for LLM models."""
+
 
 EvalRunInferenceConfigOrDict = Union[EvalRunInferenceConfig, EvalRunInferenceConfigDict]