From 582b1edc1ef603cfb0cd2d421bf52a7e4e18add9 Mon Sep 17 00:00:00 2001
From: A Vertex SDK engineer <vertex-sdk-bot@google.com>
Date: Thu, 2 Apr 2026 21:45:06 -0700
Subject: [PATCH] feat: GenAI Client(evals) - add user-facing
 generate_loss_clusters with LRO polling and replay tests

PiperOrigin-RevId: 893874547
---
 .../replays/test_generate_loss_clusters.py    |  76 +++++
 tests/unit/vertexai/genai/test_evals.py       | 311 +++++++++++++++++-
 vertexai/_genai/_evals_common.py              |  18 +-
 vertexai/_genai/_evals_utils.py               | 110 ++++++-
 vertexai/_genai/_transformers.py              | 149 ++++++++-
 vertexai/_genai/evals.py                      |  94 ++++++
 vertexai/_genai/types/common.py               |  11 +
 7 files changed, 755 insertions(+), 14 deletions(-)
 create mode 100644 tests/unit/vertexai/genai/replays/test_generate_loss_clusters.py

diff --git a/tests/unit/vertexai/genai/replays/test_generate_loss_clusters.py b/tests/unit/vertexai/genai/replays/test_generate_loss_clusters.py
new file mode 100644
index 0000000000..f585feb9d5
--- /dev/null
+++ b/tests/unit/vertexai/genai/replays/test_generate_loss_clusters.py
@@ -0,0 +1,76 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# pylint: disable=protected-access,bad-continuation,missing-function-docstring
+
+from tests.unit.vertexai.genai.replays import pytest_helper
+from vertexai import types
+import pytest
+
+
+def test_gen_loss_clusters(client):
+  """Tests that generate_loss_clusters() returns GenerateLossClustersResponse."""
+  eval_result = types.EvaluationResult()
+  response = client.evals.generate_loss_clusters(
+      eval_result=eval_result,
+      config=types.LossAnalysisConfig(
+          metric="multi_turn_task_success_v1",
+          candidate="travel-agent",
+      ),
+  )
+  assert isinstance(response, types.GenerateLossClustersResponse)
+  assert len(response.results) == 1
+  result = response.results[0]
+  assert result.config.metric == "multi_turn_task_success_v1"
+  assert result.config.candidate == "travel-agent"
+  assert len(result.clusters) == 2
+  assert result.clusters[0].cluster_id == "cluster-1"
+  assert result.clusters[0].taxonomy_entry.l1_category == "Tool Calling"
+  assert (
+      result.clusters[0].taxonomy_entry.l2_category == "Missing Tool Invocation"
+  )
+  assert result.clusters[0].item_count == 3
+  assert result.clusters[1].cluster_id == "cluster-2"
+  assert result.clusters[1].taxonomy_entry.l1_category == "Hallucination"
+  assert result.clusters[1].item_count == 2
+
+
+pytest_plugins = ("pytest_asyncio",)
+
+
+@pytest.mark.asyncio
+async def test_gen_loss_clusters_async(client):
+  """Tests that generate_loss_clusters() async returns GenerateLossClustersResponse."""
+  eval_result = types.EvaluationResult()
+  response = await client.aio.evals.generate_loss_clusters(
+      eval_result=eval_result,
+      config=types.LossAnalysisConfig(
+          metric="multi_turn_task_success_v1",
+          candidate="travel-agent",
+      ),
+  )
+  assert isinstance(response, types.GenerateLossClustersResponse)
+  assert len(response.results) == 1
+  result = response.results[0]
+  assert result.config.metric == "multi_turn_task_success_v1"
+  assert len(result.clusters) == 2
+  assert result.clusters[0].cluster_id == "cluster-1"
+  assert result.clusters[1].cluster_id == "cluster-2"
+
+
+pytestmark = pytest_helper.setup(
+    file=__file__,
+    globals_for_file=globals(),
+    test_method="evals.generate_loss_clusters",
+)
diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py
index 573f7f04d4..902769c372 100644
--- a/tests/unit/vertexai/genai/test_evals.py
+++ b/tests/unit/vertexai/genai/test_evals.py
@@ -29,6 +29,7 @@
 from google.cloud.aiplatform import initializer as aiplatform_initializer
 from vertexai import _genai
 from vertexai._genai import _evals_data_converters
+from vertexai._genai import _evals_utils
 from vertexai._genai import _evals_metric_handlers
 from vertexai._genai import _evals_visualization
 from vertexai._genai import _evals_metric_loaders
@@ -264,6 +265,296 @@ def test_t_inline_results(self):
         assert payload[0]["candidate_results"][0]["candidate"] == "gemini-pro"
         assert payload[0]["candidate_results"][0]["score"] == 0.0
 
+    def test_t_inline_results_sanitizes_agent_data(self):
+        """Tests that t_inline_results strips SDK-only fields from agent_data."""
+        eval_result = common_types.EvaluationResult(
+            eval_case_results=[
+                common_types.EvalCaseResult(
+                    eval_case_index=0,
+                    response_candidate_results=[
+                        common_types.ResponseCandidateResult(
+                            response_index=0,
+                            metric_results={
+                                "multi_turn_task_success_v1": common_types.EvalCaseMetricResult(
+                                    score=0.0,
+                                    explanation="Failed",
+                                )
+                            },
+                        )
+                    ],
+                )
+            ],
+            evaluation_dataset=[
+                common_types.EvaluationDataset(
+                    eval_cases=[
+                        common_types.EvalCase(
+                            agent_data=vertexai_genai_types.evals.AgentData(
+                                turns=[
+                                    vertexai_genai_types.evals.ConversationTurn(
+                                        turn_index=0,
+                                        turn_id="turn_0",
+                                        events=[
+                                            vertexai_genai_types.evals.AgentEvent(
+                                                author="user",
+                                                content=genai_types.Content(
+                                                    role="user",
+                                                    parts=[genai_types.Part(text="Hello")],
+                                                ),
+                                            ),
+                                            vertexai_genai_types.evals.AgentEvent(
+                                                author="model",
+                                                content=genai_types.Content(
+                                                    role="model",
+                                                    parts=[
+                                                        genai_types.Part(
+                                                            function_call=genai_types.FunctionCall(
+                                                                name="search",
+                                                                args={"q": "test"},
+                                                            )
+                                                        )
+                                                    ],
+                                                ),
+                                            ),
+                                            vertexai_genai_types.evals.AgentEvent(
+                                                author="model",
+                                                content=genai_types.Content(
+                                                    role="model",
+                                                    parts=[
+                                                        genai_types.Part(
+                                                            function_response=genai_types.FunctionResponse(
+                                                                name="search",
+                                                                response={"result": "ok"},
+                                                            )
+                                                        )
+                                                    ],
+                                                ),
+                                            ),
+                                        ],
+                                    )
+                                ]
+                            )
+                        )
+                    ]
+                )
+            ],
+            metadata=common_types.EvaluationRunMetadata(
+                candidate_names=["travel-agent"]
+            ),
+        )
+
+        payload = _transformers.t_inline_results([eval_result])
+        assert len(payload) == 1
+
+        agent_data = payload[0]["request"]["prompt"]["agent_data"]
+        assert "turns" in agent_data
+        events = agent_data["turns"][0]["events"]
+        assert len(events) == 3
+
+        # Check text part is preserved
+        text_part = events[0]["content"]["parts"][0]
+        assert "text" in text_part
+        assert text_part["text"] == "Hello"
+
+        # Check function_call is preserved (API-recognized field)
+        fc_part = events[1]["content"]["parts"][0]
+        assert "function_call" in fc_part
+        assert fc_part["function_call"]["name"] == "search"
+        # SDK-only fields must NOT be present
+        assert "tool_call" not in fc_part
+        assert "tool_response" not in fc_part
+        assert "part_metadata" not in fc_part
+
+        # Check function_response is preserved but will_continue is stripped
+        fr_part = events[2]["content"]["parts"][0]
+        assert "function_response" in fr_part
+        assert fr_part["function_response"]["name"] == "search"
+        assert "will_continue" not in fr_part["function_response"]
+
+    def test_sanitize_agent_data_from_dataframe(self):
+        """Tests sanitization when agent_data comes from DataFrame (dict form)."""
+        # Simulate agent_data stored in DataFrame with SDK-only fields
+        raw_agent_data = {
+            "turns": [{
+                "turn_index": 0,
+                "turn_id": "turn_0",
+                "events": [{
+                    "author": "model",
+                    "content": {
+                        "role": "model",
+                        "parts": [{
+                            "function_call": {"name": "find_flights", "args": {"origin": "NYC"}},
+                            "tool_call": None,
+                            "tool_response": None,
+                            "part_metadata": None,
+                        }],
+                    },
+                }, {
+                    "author": "model",
+                    "content": {
+                        "role": "model",
+                        "parts": [{
+                            "function_response": {
+                                "name": "find_flights",
+                                "response": {"flights": []},
+                                "will_continue": False,
+                                "scheduling": None,
+                            },
+                        }],
+                    },
+                }],
+            }],
+        }
+
+        sanitized = _transformers._sanitize_agent_data(raw_agent_data)
+
+        parts_0 = sanitized["turns"][0]["events"][0]["content"]["parts"][0]
+        assert "function_call" in parts_0
+        assert "tool_call" not in parts_0
+        assert "tool_response" not in parts_0
+        assert "part_metadata" not in parts_0
+
+        parts_1 = sanitized["turns"][0]["events"][1]["content"]["parts"][0]
+        assert "function_response" in parts_1
+        assert parts_1["function_response"]["name"] == "find_flights"
+        assert "will_continue" not in parts_1["function_response"]
+        assert "scheduling" not in parts_1["function_response"]
+
+    def test_sanitize_agent_data_skips_error_payload(self):
+        """Tests that error payloads from failed agent runs are stripped."""
+        error_data = {"error": "Multi-turn agent run with user simulation failed"}
+        sanitized = _transformers._sanitize_agent_data(error_data)
+        assert "error" not in sanitized
+        assert sanitized == {}
+
+    def test_t_inline_results_skips_error_agent_data_in_df(self):
+        """Tests that t_inline_results skips error agent_data from DataFrame."""
+        error_json = json.dumps({"error": "Agent run failed"})
+        df = pd.DataFrame({
+            "prompt": ["test"],
+            "agent_data": [error_json],
+        })
+        eval_result = common_types.EvaluationResult(
+            eval_case_results=[
+                common_types.EvalCaseResult(
+                    eval_case_index=0,
+                    response_candidate_results=[
+                        common_types.ResponseCandidateResult(
+                            response_index=0,
+                            metric_results={
+                                "metric_v1": common_types.EvalCaseMetricResult(
+                                    score=0.0,
+                                )
+                            },
+                        )
+                    ],
+                )
+            ],
+            evaluation_dataset=[
+                common_types.EvaluationDataset(eval_dataset_df=df)
+            ],
+            metadata=common_types.EvaluationRunMetadata(
+                candidate_names=["agent"]
+            ),
+        )
+        payload = _transformers.t_inline_results([eval_result])
+        assert len(payload) == 1
+        # The prompt should have no agent_data (error was skipped)
+        assert "agent_data" not in payload[0]["request"]["prompt"]
+
+
+class TestLossAnalysis:
+    """Unit tests for loss analysis types and visualization."""
+
+    def test_response_structure(self):
+        response = common_types.GenerateLossClustersResponse(
+            analysis_time="2026-04-01T10:00:00Z",
+            results=[
+                common_types.LossAnalysisResult(
+                    config=common_types.LossAnalysisConfig(
+                        metric="multi_turn_task_success_v1",
+                        candidate="travel-agent",
+                    ),
+                    analysis_time="2026-04-01T10:00:00Z",
+                    clusters=[
+                        common_types.LossCluster(
+                            cluster_id="cluster-1",
+                            taxonomy_entry=common_types.LossTaxonomyEntry(
+                                l1_category="Tool Calling",
+                                l2_category="Missing Tool Invocation",
+                                description="The agent failed to invoke a required tool.",
+                            ),
+                            item_count=3,
+                        ),
+                        common_types.LossCluster(
+                            cluster_id="cluster-2",
+                            taxonomy_entry=common_types.LossTaxonomyEntry(
+                                l1_category="Hallucination",
+                                l2_category="Hallucination of Action",
+                                description="Verbally confirmed action without tool.",
+                            ),
+                            item_count=2,
+                        ),
+                    ],
+                )
+            ],
+        )
+        assert len(response.results) == 1
+        assert response.analysis_time == "2026-04-01T10:00:00Z"
+        result = response.results[0]
+        assert result.config.metric == "multi_turn_task_success_v1"
+        assert len(result.clusters) == 2
+        assert result.clusters[0].cluster_id == "cluster-1"
+        assert result.clusters[0].item_count == 3
+        assert result.clusters[1].cluster_id == "cluster-2"
+
+    def test_response_show_with_results(self, capsys):
+        response = common_types.GenerateLossClustersResponse(
+            results=[
+                common_types.LossAnalysisResult(
+                    config=common_types.LossAnalysisConfig(
+                        metric="test_metric",
+                        candidate="test-candidate",
+                    ),
+                    clusters=[
+                        common_types.LossCluster(
+                            cluster_id="c1",
+                            taxonomy_entry=common_types.LossTaxonomyEntry(
+                                l1_category="Cat1",
+                                l2_category="SubCat1",
+                            ),
+                            item_count=5,
+                        ),
+                    ],
+                )
+            ],
+        )
+        response.show()
+        captured = capsys.readouterr()
+        assert "test_metric" in captured.out
+        assert "c1" in captured.out
+
+    def test_loss_analysis_result_show(self, capsys):
+        result = common_types.LossAnalysisResult(
+            config=common_types.LossAnalysisConfig(
+                metric="test_metric",
+                candidate="test-candidate",
+            ),
+            clusters=[
+                common_types.LossCluster(
+                    cluster_id="c1",
+                    taxonomy_entry=common_types.LossTaxonomyEntry(
+                        l1_category="DirectCat",
+                        l2_category="DirectSubCat",
+                    ),
+                    item_count=7,
+                ),
+            ],
+        )
+        result.show()
+        captured = capsys.readouterr()
+        assert "test_metric" in captured.out
+        assert "c1" in captured.out
+
 
 class TestEvals:
     """Unit tests for the GenAI client."""
@@ -1570,7 +1861,7 @@ def test_run_inference_with_local_agent(
         mock_runner_instance = mock_runner.return_value
         stream_run_return_value_1 = [
             mock.Mock(
-                model_dump=lambda: {
+                model_dump=lambda **kwargs: {
                     "id": "1",
                     "content": {"parts": [{"text": "intermediate1"}]},
                     "timestamp": 123,
@@ -1578,7 +1869,7 @@ def test_run_inference_with_local_agent(
                 }
             ),
             mock.Mock(
-                model_dump=lambda: {
+                model_dump=lambda **kwargs: {
                     "id": "2",
                     "content": {"parts": [{"text": "agent response"}]},
                     "timestamp": 124,
@@ -1588,7 +1879,7 @@ def test_run_inference_with_local_agent(
         ]
         stream_run_return_value_2 = [
             mock.Mock(
-                model_dump=lambda: {
+                model_dump=lambda **kwargs: {
                     "id": "3",
                     "content": {"parts": [{"text": "intermediate2"}]},
                     "timestamp": 125,
@@ -1596,7 +1887,7 @@ def test_run_inference_with_local_agent(
                 }
             ),
             mock.Mock(
-                model_dump=lambda: {
+                model_dump=lambda **kwargs: {
                     "id": "4",
                     "content": {"parts": [{"text": "agent response 2"}]},
                     "timestamp": 126,
@@ -2314,9 +2605,15 @@ async def test_run_adk_user_simulation_with_intermediate_events(
             turn["events"][3]["content"]["parts"][0]["text"]
             == "There are no laptops matching your search."
         )
-        mock_invocation.user_content.model_dump.assert_called_with(mode="json")
-        mock_event_1.content.model_dump.assert_called_with(mode="json")
-        mock_invocation.final_response.model_dump.assert_called_with(mode="json")
+        mock_invocation.user_content.model_dump.assert_called_with(
+            mode="json", exclude_none=True
+        )
+        mock_event_1.content.model_dump.assert_called_with(
+            mode="json", exclude_none=True
+        )
+        mock_invocation.final_response.model_dump.assert_called_with(
+            mode="json", exclude_none=True
+        )
 
     @mock.patch.object(_evals_common, "_run_agent")
     def test_run_agent_internal_malformed_event(self, mock_run_agent):
diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py
index a62fe2083b..e724630033 100644
--- a/vertexai/_genai/_evals_common.py
+++ b/vertexai/_genai/_evals_common.py
@@ -1014,7 +1014,9 @@ async def _run_adk_user_simulation(
             events.append(
                 {
                     "author": "user",
-                    "content": invocation.user_content.model_dump(mode="json"),
+                    "content": invocation.user_content.model_dump(
+                        mode="json", exclude_none=True
+                    ),
                     "event_time": datetime.datetime.fromtimestamp(
                         invocation.creation_timestamp, tz=datetime.timezone.utc
                     ),
@@ -1030,7 +1032,9 @@ async def _run_adk_user_simulation(
                         {
                             "author": ie.author,
                             "content": (
-                                ie.content.model_dump(mode="json")
+                                ie.content.model_dump(
+                                    mode="json", exclude_none=True
+                                )
                                 if ie.content
                                 else None
                             ),
@@ -1044,7 +1048,9 @@ async def _run_adk_user_simulation(
                     events.append(
                         {
                             "author": "tool_call",
-                            "content": tool_call.model_dump(mode="json"),
+                            "content": tool_call.model_dump(
+                                mode="json", exclude_none=True
+                            ),
                             "event_time": datetime.datetime.fromtimestamp(
                                 invocation.creation_timestamp, tz=datetime.timezone.utc
                             ),
@@ -1055,7 +1061,9 @@ async def _run_adk_user_simulation(
             events.append(
                 {
                     "author": "agent",
-                    "content": invocation.final_response.model_dump(mode="json"),
+                    "content": invocation.final_response.model_dump(
+                        mode="json", exclude_none=True
+                    ),
                     "event_time": datetime.datetime.fromtimestamp(
                         invocation.creation_timestamp, tz=datetime.timezone.utc
                     ),
@@ -2021,7 +2029,7 @@ async def _execute_local_agent_run_with_retry_async(
                     new_message=new_message_content,
                 ):
                     if event:
-                        event = event.model_dump()
+                        event = event.model_dump(exclude_none=True)
                     if event and CONTENT in event and PARTS in event[CONTENT]:
                         events.append(event)
                 return events
diff --git a/vertexai/_genai/_evals_utils.py b/vertexai/_genai/_evals_utils.py
index 9d4dd4fc71..9ddeb4ae97 100644
--- a/vertexai/_genai/_evals_utils.py
+++ b/vertexai/_genai/_evals_utils.py
@@ -15,9 +15,11 @@
 """Utility functions for evals."""
 
 import abc
+import asyncio
+import json
 import logging
 import os
-import json
+import time
 from typing import Any, Optional, Union
 
 from google.genai._api_client import BaseApiClient
@@ -366,6 +368,112 @@ def _postprocess_user_scenarios_response(
     )
 
 
+def _display_loss_analysis_result(
+    result: types.LossAnalysisResult,
+) -> None:
+    """Displays a LossAnalysisResult as a formatted pandas DataFrame."""
+    metric = result.config.metric if result.config else None
+    candidate = result.config.candidate if result.config else None
+    rows = []
+    for cluster in result.clusters or []:
+        entry = cluster.taxonomy_entry
+        row = {
+            "metric": metric,
+            "candidate": candidate,
+            "cluster_id": cluster.cluster_id,
+            "l1_category": entry.l1_category if entry else None,
+            "l2_category": entry.l2_category if entry else None,
+            "description": entry.description if entry else None,
+            "item_count": cluster.item_count,
+        }
+        rows.append(row)
+
+    if not rows:
+        logger.info("No loss clusters found.")
+        return
+
+    df = pd.DataFrame(rows)
+    try:
+        from IPython.display import display  # pylint: disable=g-import-not-at-top
+
+        display(df)
+    except ImportError:
+        print(df.to_string())  # pylint: disable=print-function
+
+
+
+
+
+def _poll_operation(
+    api_client: BaseApiClient,
+    operation: types.GenerateLossClustersOperation,
+    poll_interval_seconds: float = 5.0,
+) -> types.GenerateLossClustersOperation:
+    """Polls a long-running operation until completion.
+
+    Args:
+        api_client: The API client to use for polling.
+        operation: The initial operation returned from the API call.
+        poll_interval_seconds: Time between polls.
+
+    Returns:
+        The completed operation.
+    """
+    if operation.done:
+        return operation
+    start_time = time.time()
+    while True:
+        response = api_client.request("get", operation.name, {}, None)
+        response_dict = {} if not response.body else json.loads(response.body)
+        polled = types.GenerateLossClustersOperation._from_response(
+            response=response_dict, kwargs={}
+        )
+        if polled.done:
+            return polled
+        elapsed = int(time.time() - start_time)
+        logger.info(
+            "Loss analysis operation still running... Elapsed time: %d seconds",
+            elapsed,
+        )
+        time.sleep(poll_interval_seconds)
+
+
+async def _poll_operation_async(
+    api_client: BaseApiClient,
+    operation: types.GenerateLossClustersOperation,
+    poll_interval_seconds: float = 5.0,
+) -> types.GenerateLossClustersOperation:
+    """Polls a long-running operation until completion (async).
+
+    Args:
+        api_client: The API client to use for polling.
+        operation: The initial operation returned from the API call.
+        poll_interval_seconds: Time between polls.
+
+    Returns:
+        The completed operation.
+    """
+    if operation.done:
+        return operation
+    start_time = time.time()
+    while True:
+        response = await api_client.async_request(
+            "get", operation.name, {}, None
+        )
+        response_dict = {} if not response.body else json.loads(response.body)
+        polled = types.GenerateLossClustersOperation._from_response(
+            response=response_dict, kwargs={}
+        )
+        if polled.done:
+            return polled
+        elapsed = int(time.time() - start_time)
+        logger.info(
+            "Loss analysis operation still running... Elapsed time: %d seconds",
+            elapsed,
+        )
+        await asyncio.sleep(poll_interval_seconds)
+
+
 def _validate_dataset_agent_data(
     dataset: types.EvaluationDataset,
     inference_configs: Optional[dict[str, Any]] = None,
diff --git a/vertexai/_genai/_transformers.py b/vertexai/_genai/_transformers.py
index 34c471ef85..e694966b8e 100644
--- a/vertexai/_genai/_transformers.py
+++ b/vertexai/_genai/_transformers.py
@@ -14,6 +14,7 @@
 #
 
 """Transformers module for Vertex addons."""
+import json
 import re
 from typing import Any
 
@@ -260,6 +261,118 @@ def t_metric_for_registry(
     return metric_payload_item
 
 
+_ALLOWED_PART_FIELDS = frozenset({
+    "text", "inline_data", "file_data", "function_call", "function_response",
+    "video_metadata", "thought", "thought_signature", "code_execution_result",
+    "executable_code", "media_resolution",
+})
+
+
+def _sanitize_agent_data(agent_data: dict[str, Any]) -> dict[str, Any]:
+    """Strips SDK-only fields from agent_data so the API accepts the payload.
+
+    The SDK's AgentData model may contain fields like 'tool_call',
+    'tool_response', 'part_metadata', and 'will_continue' that don't exist
+    in the API's AgentData / Content proto. This function recursively removes
+    them from content parts and keeps only API-recognized top-level fields.
+    """
+    if not isinstance(agent_data, dict):
+        return agent_data
+
+    sanitized = {}
+    for key, value in agent_data.items():
+        if key == "turns" and isinstance(value, list):
+            sanitized["turns"] = [
+                _sanitize_turn(t) for t in value if isinstance(t, dict)
+            ]
+        elif key == "agents" and isinstance(value, dict):
+            sanitized["agents"] = {
+                k: _sanitize_agent_config(v) if isinstance(v, dict) else v
+                for k, v in value.items()
+            }
+        # Skip unknown top-level fields (e.g. "error" from failed agent runs).
+    return sanitized
+
+
+def _sanitize_agent_config(config: dict[str, Any]) -> dict[str, Any]:
+    """Sanitizes an AgentConfig dict, keeping only API-known fields."""
+    allowed = {"agent_id", "agent_type", "description", "instruction", "tools", "sub_agents"}
+    return {k: v for k, v in config.items() if k in allowed}
+
+
+def _sanitize_turn(turn: dict[str, Any]) -> dict[str, Any]:
+    """Sanitizes a ConversationTurn dict."""
+    sanitized = {}
+    for key, value in turn.items():
+        if key == "events" and isinstance(value, list):
+            sanitized["events"] = [
+                _sanitize_event(e) for e in value if isinstance(e, dict)
+            ]
+        else:
+            sanitized[key] = value
+    return sanitized
+
+
+def _sanitize_event(event: dict[str, Any]) -> dict[str, Any]:
+    """Sanitizes an AgentEvent dict."""
+    sanitized = {}
+    for key, value in event.items():
+        if key == "content" and isinstance(value, dict):
+            sanitized["content"] = _sanitize_content(value)
+        elif key in ("author", "event_time", "state_delta", "active_tools"):
+            sanitized[key] = value
+        # Skip unknown event-level fields.
+    return sanitized
+
+
+def _sanitize_content(content: dict[str, Any]) -> dict[str, Any]:
+    """Sanitizes a Content dict, stripping unknown fields from parts."""
+    sanitized = {}
+    for key, value in content.items():
+        if key == "parts" and isinstance(value, list):
+            sanitized["parts"] = [
+                _sanitize_part(p) for p in value if isinstance(p, dict)
+            ]
+        elif key == "role":
+            sanitized["role"] = value
+    return sanitized
+
+
+def _sanitize_part(part: dict[str, Any]) -> dict[str, Any]:
+    """Keeps only API-recognized fields in a Part dict."""
+    sanitized = {}
+    for key, value in part.items():
+        if key in _ALLOWED_PART_FIELDS:
+            if key == "function_response" and isinstance(value, dict):
+                # Strip unknown sub-fields like 'will_continue'.
+                sanitized[key] = {
+                    k: v for k, v in value.items()
+                    if k in ("name", "id", "response")
+                }
+            else:
+                sanitized[key] = value
+    return sanitized
+
+
+def _extract_agent_data_from_df(
+    eval_dataset: Any,
+    case_idx: int,
+) -> Any:
+    """Extracts agent_data from a DataFrame-based EvaluationDataset by row index."""
+    if not eval_dataset:
+        return None
+    ds = eval_dataset[0] if isinstance(eval_dataset, list) else eval_dataset
+    df = getv(ds, ["eval_dataset_df"])
+    if df is None or not hasattr(df, "iloc"):
+        return None
+    if case_idx < 0 or case_idx >= len(df):
+        return None
+    row = df.iloc[case_idx]
+    if "agent_data" not in row or row["agent_data"] is None:
+        return None
+    return row["agent_data"]
+
+
 def t_inline_results(
     eval_results: list[Any],
 ) -> list[dict[str, Any]]:
@@ -292,7 +405,13 @@ def t_inline_results(
 
                 if agent_data:
                     if hasattr(agent_data, "model_dump"):
-                        prompt_payload["agent_data"] = agent_data.model_dump()
+                        prompt_payload["agent_data"] = _sanitize_agent_data(
+                            agent_data.model_dump()
+                        )
+                    elif isinstance(agent_data, dict):
+                        prompt_payload["agent_data"] = _sanitize_agent_data(
+                            agent_data
+                        )
                     else:
                         prompt_payload["agent_data"] = agent_data
                 elif prompt:
@@ -302,6 +421,34 @@ def t_inline_results(
                     if text:
                         prompt_payload["text"] = str(text)
 
+            # Fallback: extract agent_data from the DataFrame when eval_cases
+            # are not available (e.g., run_inference -> evaluate flow).
+            if not prompt_payload:
+                df_agent_data = _extract_agent_data_from_df(
+                    eval_dataset, case_idx
+                )
+                if df_agent_data is not None:
+                    if hasattr(df_agent_data, "model_dump"):
+                        prompt_payload["agent_data"] = _sanitize_agent_data(
+                            df_agent_data.model_dump()
+                        )
+                    elif isinstance(df_agent_data, str):
+                        try:
+                            parsed = json.loads(df_agent_data)
+                            if isinstance(parsed, dict) and "error" in parsed:
+                                pass  # Skip error payloads from failed agent runs.
+                            else:
+                                prompt_payload["agent_data"] = (
+                                    _sanitize_agent_data(parsed)
+                                )
+                        except (json.JSONDecodeError, ValueError):
+                            pass
+                    elif isinstance(df_agent_data, dict):
+                        if "error" not in df_agent_data:
+                            prompt_payload["agent_data"] = _sanitize_agent_data(
+                                df_agent_data
+                            )
+
             cand_results = getv(case_result, ["response_candidate_results"]) or []
             for resp_cand_result in cand_results:
                 resp_idx = getv(resp_cand_result, ["response_index"]) or 0
diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py
index 0adf80cd6e..eb8ffe4b5f 100644
--- a/vertexai/_genai/evals.py
+++ b/vertexai/_genai/evals.py
@@ -2454,6 +2454,53 @@ def generate_conversation_scenarios(
         )
         return _evals_utils._postprocess_user_scenarios_response(response)
 
+    @_common.experimental_warning(
+        "The Vertex SDK GenAI evals.generate_loss_clusters module is experimental, "
+        "and may change in future versions."
+    )
+    def generate_loss_clusters(
+        self,
+        *,
+        eval_result: types.EvaluationResult,
+        config: types.LossAnalysisConfigOrDict,
+    ) -> types.GenerateLossClustersResponse:
+        """Generates loss clusters from evaluation results.
+
+        Analyzes "Pass/Fail" signals from rubric-based autoraters and groups
+        them into semantic "Loss Patterns" (e.g., "Hallucination of Action").
+
+        This method calls the GenerateLossClusters LRO and polls until
+        completion, returning the results directly.
+
+        Args:
+            eval_result: The EvaluationResult object returned from
+                client.evals.evaluate().
+            config: Configuration for the loss analysis, specifying the
+                metric and candidate to analyze. Can be a LossAnalysisConfig
+                object or a dict.
+
+        Returns:
+            A GenerateLossClustersResponse containing the analysis results.
+            Call .show() to visualize, or access .results for individual
+            LossAnalysisResult objects (each with their own .show()).
+        """
+        parsed_config = (
+            types.LossAnalysisConfig.model_validate(config)
+            if isinstance(config, dict)
+            else config
+        )
+        operation = self._generate_loss_clusters(
+            inline_results=[eval_result],
+            configs=[parsed_config],
+        )
+        completed = _evals_utils._poll_operation(
+            api_client=self._api_client,
+            operation=operation,
+        )
+        if completed.error:
+            raise RuntimeError(f"Loss analysis operation failed: {completed.error}")
+        return completed.response
+
     @_common.experimental_warning(
         "The Vertex SDK GenAI evals.create_evaluation_metric method is experimental, "
         "and may change in future versions."
@@ -3731,6 +3778,53 @@ async def generate_conversation_scenarios(
         )
         return _evals_utils._postprocess_user_scenarios_response(response)
 
+    @_common.experimental_warning(
+        "The Vertex SDK GenAI evals.generate_loss_clusters module is experimental, "
+        "and may change in future versions."
+    )
+    async def generate_loss_clusters(
+        self,
+        *,
+        eval_result: types.EvaluationResult,
+        config: types.LossAnalysisConfigOrDict,
+    ) -> types.GenerateLossClustersResponse:
+        """Generates loss clusters from evaluation results.
+
+        Analyzes "Pass/Fail" signals from rubric-based autoraters and groups
+        them into semantic "Loss Patterns" (e.g., "Hallucination of Action").
+
+        This method calls the GenerateLossClusters LRO and polls until
+        completion, returning the results directly.
+
+        Args:
+            eval_result: The EvaluationResult object returned from
+                client.evals.evaluate().
+            config: Configuration for the loss analysis, specifying the
+                metric and candidate to analyze. Can be a LossAnalysisConfig
+                object or a dict.
+
+        Returns:
+            A GenerateLossClustersResponse containing the analysis results.
+            Call .show() to visualize, or access .results for individual
+            LossAnalysisResult objects (each with their own .show()).
+        """
+        parsed_config = (
+            types.LossAnalysisConfig.model_validate(config)
+            if isinstance(config, dict)
+            else config
+        )
+        operation = await self._generate_loss_clusters(
+            inline_results=[eval_result],
+            configs=[parsed_config],
+        )
+        completed = await _evals_utils._poll_operation_async(
+            api_client=self._api_client,
+            operation=operation,
+        )
+        if completed.error:
+            raise RuntimeError(f"Loss analysis operation failed: {completed.error}")
+        return completed.response
+
     @_common.experimental_warning(
         "The Vertex SDK GenAI evals.create_evaluation_metric module is experimental, "
         "and may change in future versions."
diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py
index 2d9f4b50ce..d3616a460b 100644
--- a/vertexai/_genai/types/common.py
+++ b/vertexai/_genai/types/common.py
@@ -4920,6 +4920,12 @@ class LossAnalysisResult(_common.BaseModel):
         default=None, description="""The list of identified loss clusters."""
     )
 
+    def show(self) -> None:
+        """Shows the loss analysis result as a formatted pandas DataFrame."""
+        from .. import _evals_utils
+
+        _evals_utils._display_loss_analysis_result(self)
+
 
 class LossAnalysisResultDict(TypedDict, total=False):
     """The top-level result for loss analysis."""
@@ -4948,6 +4954,11 @@ class GenerateLossClustersResponse(_common.BaseModel):
         description="""The analysis results, one per config provided in the request.""",
     )
 
+    def show(self) -> None:
+        """Shows all loss analysis results."""
+        for result in self.results or []:
+            result.show()
+
 
 class GenerateLossClustersResponseDict(TypedDict, total=False):
     """Response message for EvaluationAnalyticsService.GenerateLossClusters."""