googleapis · copybara-service · Apr 3, 2026
@@ -0,0 +1,76 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# pylint: disable=protected-access,bad-continuation,missing-function-docstring
+
+from tests.unit.vertexai.genai.replays import pytest_helper
+from vertexai import types
+import pytest
+
+
+def test_gen_loss_clusters(client):
+  """Tests that generate_loss_clusters() returns GenerateLossClustersResponse."""
+  eval_result = types.EvaluationResult()
+  response = client.evals.generate_loss_clusters(
+      eval_result=eval_result,
+      config=types.LossAnalysisConfig(
+          metric="multi_turn_task_success_v1",
+          candidate="travel-agent",
+      ),
+  )
+  assert isinstance(response, types.GenerateLossClustersResponse)
+  assert len(response.results) == 1
+  result = response.results[0]
+  assert result.config.metric == "multi_turn_task_success_v1"
+  assert result.config.candidate == "travel-agent"
+  assert len(result.clusters) == 2
+  assert result.clusters[0].cluster_id == "cluster-1"
+  assert result.clusters[0].taxonomy_entry.l1_category == "Tool Calling"
+  assert (
+      result.clusters[0].taxonomy_entry.l2_category == "Missing Tool Invocation"
+  )
+  assert result.clusters[0].item_count == 3
+  assert result.clusters[1].cluster_id == "cluster-2"
+  assert result.clusters[1].taxonomy_entry.l1_category == "Hallucination"
+  assert result.clusters[1].item_count == 2
+
+
+pytest_plugins = ("pytest_asyncio",)
+
+
+@pytest.mark.asyncio
+async def test_gen_loss_clusters_async(client):
+  """Tests that generate_loss_clusters() async returns GenerateLossClustersResponse."""
+  eval_result = types.EvaluationResult()
+  response = await client.aio.evals.generate_loss_clusters(
+      eval_result=eval_result,
+      config=types.LossAnalysisConfig(
+          metric="multi_turn_task_success_v1",
+          candidate="travel-agent",
+      ),
+  )
+  assert isinstance(response, types.GenerateLossClustersResponse)
+  assert len(response.results) == 1
+  result = response.results[0]
+  assert result.config.metric == "multi_turn_task_success_v1"
+  assert len(result.clusters) == 2
+  assert result.clusters[0].cluster_id == "cluster-1"
+  assert result.clusters[1].cluster_id == "cluster-2"
+
+
+pytestmark = pytest_helper.setup(
+    file=__file__,
+    globals_for_file=globals(),
+    test_method="evals.generate_loss_clusters",
+)
@@ -29,6 +29,7 @@
 from google.cloud.aiplatform import initializer as aiplatform_initializer
 from vertexai import _genai
 from vertexai._genai import _evals_data_converters
+from vertexai._genai import _evals_utils
 from vertexai._genai import _evals_metric_handlers
 from vertexai._genai import _evals_visualization
 from vertexai._genai import _evals_metric_loaders
@@ -265,6 +266,282 @@ def test_t_inline_results(self):
         assert payload[0]["candidate_results"][0]["score"] == 0.0
 
 
+class TestLossAnalysis:
+    """Unit tests for loss analysis types and visualization."""
+
+    def test_response_structure(self):
+        response = common_types.GenerateLossClustersResponse(
+            analysis_time="2026-04-01T10:00:00Z",
+            results=[
+                common_types.LossAnalysisResult(
+                    config=common_types.LossAnalysisConfig(
+                        metric="multi_turn_task_success_v1",
+                        candidate="travel-agent",
+                    ),
+                    analysis_time="2026-04-01T10:00:00Z",
+                    clusters=[
+                        common_types.LossCluster(
+                            cluster_id="cluster-1",
+                            taxonomy_entry=common_types.LossTaxonomyEntry(
+                                l1_category="Tool Calling",
+                                l2_category="Missing Tool Invocation",
+                                description="The agent failed to invoke a required tool.",
+                            ),
+                            item_count=3,
+                        ),
+                        common_types.LossCluster(
+                            cluster_id="cluster-2",
+                            taxonomy_entry=common_types.LossTaxonomyEntry(
+                                l1_category="Hallucination",
+                                l2_category="Hallucination of Action",
+                                description="Verbally confirmed action without tool.",
+                            ),
+                            item_count=2,
+                        ),
+                    ],
+                )
+            ],
+        )
+        assert len(response.results) == 1
+        assert response.analysis_time == "2026-04-01T10:00:00Z"
+        result = response.results[0]
+        assert result.config.metric == "multi_turn_task_success_v1"
+        assert len(result.clusters) == 2
+        assert result.clusters[0].cluster_id == "cluster-1"
+        assert result.clusters[0].item_count == 3
+        assert result.clusters[1].cluster_id == "cluster-2"
+
+    def test_response_show_with_results(self, capsys):
+        response = common_types.GenerateLossClustersResponse(
+            results=[
+                common_types.LossAnalysisResult(
+                    config=common_types.LossAnalysisConfig(
+                        metric="test_metric",
+                        candidate="test-candidate",
+                    ),
+                    clusters=[
+                        common_types.LossCluster(
+                            cluster_id="c1",
+                            taxonomy_entry=common_types.LossTaxonomyEntry(
+                                l1_category="Cat1",
+                                l2_category="SubCat1",
+                            ),
+                            item_count=5,
+                        ),
+                    ],
+                )
+            ],
+        )
+        response.show()
+        captured = capsys.readouterr()
+        assert "test_metric" in captured.out
+        assert "c1" in captured.out
+
+    def test_loss_analysis_result_show(self, capsys):
+        result = common_types.LossAnalysisResult(
+            config=common_types.LossAnalysisConfig(
+                metric="test_metric",
+                candidate="test-candidate",
+            ),
+            clusters=[
+                common_types.LossCluster(
+                    cluster_id="c1",
+                    taxonomy_entry=common_types.LossTaxonomyEntry(
+                        l1_category="DirectCat",
+                        l2_category="DirectSubCat",
+                    ),
+                    item_count=7,
+                ),
+            ],
+        )
+        result.show()
+        captured = capsys.readouterr()
+        assert "test_metric" in captured.out
+        assert "c1" in captured.out
+
+
+def _make_eval_result(
+    metrics=None,
+    candidate_names=None,
+):
+    """Helper to create an EvaluationResult with the given metrics and candidates."""
+    metrics = metrics or ["task_success_v1"]
+    candidate_names = candidate_names or ["agent-1"]
+
+    metric_results = {}
+    for m in metrics:
+        metric_results[m] = common_types.EvalCaseMetricResult(metric_name=m)
+
+    eval_case_results = [
+        common_types.EvalCaseResult(
+            eval_case_index=0,
+            response_candidate_results=[
+                common_types.ResponseCandidateResult(
+                    response_index=0,
+                    metric_results=metric_results,
+                )
+            ],
+        )
+    ]
+    metadata = common_types.EvaluationRunMetadata(
+        candidate_names=candidate_names,
+    )
+    return common_types.EvaluationResult(
+        eval_case_results=eval_case_results,
+        metadata=metadata,
+    )
+
+
+class TestResolveMetricName:
+    """Unit tests for _resolve_metric_name."""
+
+    def test_none_returns_none(self):
+        assert _evals_utils._resolve_metric_name(None) is None
+
+    def test_string_passes_through(self):
+        assert _evals_utils._resolve_metric_name("task_success_v1") == "task_success_v1"
+
+    def test_metric_object_extracts_name(self):
+        metric = common_types.Metric(name="multi_turn_task_success_v1")
+        assert (
+            _evals_utils._resolve_metric_name(metric)
+            == "multi_turn_task_success_v1"
+        )
+
+    def test_object_with_name_attr(self):
+        """Tests that any object with a .name attribute works (e.g., LazyLoadedPrebuiltMetric)."""
+
+        class FakeMetric:
+            name = "tool_use_quality_v1"
+
+        assert _evals_utils._resolve_metric_name(FakeMetric()) == "tool_use_quality_v1"
+
+    def test_lazy_loaded_prebuilt_metric_resolves_versioned_name(self):
+        """Tests that LazyLoadedPrebuiltMetric resolves to the versioned API spec name."""
+
+        class FakeLazyMetric:
+            name = "MULTI_TURN_TASK_SUCCESS"
+
+            def _get_api_metric_spec_name(self):
+                return "multi_turn_task_success_v1"
+
+        assert (
+            _evals_utils._resolve_metric_name(FakeLazyMetric())
+            == "multi_turn_task_success_v1"
+        )
+
+    def test_lazy_loaded_prebuilt_metric_falls_back_to_name(self):
+        """Tests fallback to .name when _get_api_metric_spec_name returns None."""
+
+        class FakeLazyMetricNoSpec:
+            name = "CUSTOM_METRIC"
+
+            def _get_api_metric_spec_name(self):
+                return None
+
+        assert (
+            _evals_utils._resolve_metric_name(FakeLazyMetricNoSpec())
+            == "CUSTOM_METRIC"
+        )
+
+
+class TestResolveLossAnalysisConfig:
+    """Unit tests for _resolve_loss_analysis_config."""
+
+    def test_auto_infer_single_metric_and_candidate(self):
+        eval_result = _make_eval_result(
+            metrics=["task_success_v1"], candidate_names=["agent-1"]
+        )
+        resolved = _evals_utils._resolve_loss_analysis_config(
+            eval_result=eval_result
+        )
+        assert resolved.metric == "task_success_v1"
+        assert resolved.candidate == "agent-1"
+
+    def test_explicit_metric_and_candidate(self):
+        eval_result = _make_eval_result(
+            metrics=["m1", "m2"], candidate_names=["c1", "c2"]
+        )
+        resolved = _evals_utils._resolve_loss_analysis_config(
+            eval_result=eval_result, metric="m1", candidate="c2"
+        )
+        assert resolved.metric == "m1"
+        assert resolved.candidate == "c2"
+
+    def test_config_provides_metric_and_candidate(self):
+        eval_result = _make_eval_result(
+            metrics=["m1"], candidate_names=["c1"]
+        )
+        config = common_types.LossAnalysisConfig(
+            metric="m1", candidate="c1", predefined_taxonomy="my_taxonomy"
+        )
+        resolved = _evals_utils._resolve_loss_analysis_config(
+            eval_result=eval_result, config=config
+        )
+        assert resolved.metric == "m1"
+        assert resolved.candidate == "c1"
+        assert resolved.predefined_taxonomy == "my_taxonomy"
+
+    def test_explicit_args_override_config(self):
+        eval_result = _make_eval_result(
+            metrics=["m1", "m2"], candidate_names=["c1", "c2"]
+        )
+        config = common_types.LossAnalysisConfig(metric="m1", candidate="c1")
+        resolved = _evals_utils._resolve_loss_analysis_config(
+            eval_result=eval_result, config=config, metric="m2", candidate="c2"
+        )
+        assert resolved.metric == "m2"
+        assert resolved.candidate == "c2"
+
+    def test_error_multiple_metrics_no_explicit(self):
+        eval_result = _make_eval_result(
+            metrics=["m1", "m2"], candidate_names=["c1"]
+        )
+        with pytest.raises(ValueError, match="multiple metrics"):
+            _evals_utils._resolve_loss_analysis_config(eval_result=eval_result)
+
+    def test_error_multiple_candidates_no_explicit(self):
+        eval_result = _make_eval_result(
+            metrics=["m1"], candidate_names=["c1", "c2"]
+        )
+        with pytest.raises(ValueError, match="multiple candidates"):
+            _evals_utils._resolve_loss_analysis_config(eval_result=eval_result)
+
+    def test_error_invalid_metric(self):
+        eval_result = _make_eval_result(
+            metrics=["m1"], candidate_names=["c1"]
+        )
+        with pytest.raises(ValueError, match="not found in eval_result"):
+            _evals_utils._resolve_loss_analysis_config(
+                eval_result=eval_result, metric="nonexistent"
+            )
+
+    def test_error_invalid_candidate(self):
+        eval_result = _make_eval_result(
+            metrics=["m1"], candidate_names=["c1"]
+        )
+        with pytest.raises(ValueError, match="not found in eval_result"):
+            _evals_utils._resolve_loss_analysis_config(
+                eval_result=eval_result, candidate="nonexistent"
+            )
+
+    def test_no_candidates_defaults_to_candidate_1(self):
+        eval_result = _make_eval_result(metrics=["m1"], candidate_names=[])
+        eval_result = eval_result.model_copy(
+            update={"metadata": common_types.EvaluationRunMetadata()}
+        )
+        resolved = _evals_utils._resolve_loss_analysis_config(
+            eval_result=eval_result
+        )
+        assert resolved.metric == "m1"
+        assert resolved.candidate == "candidate_1"
+
+    def test_no_eval_case_results_raises(self):
+        eval_result = common_types.EvaluationResult()
+        with pytest.raises(ValueError, match="no metric results"):
+            _evals_utils._resolve_loss_analysis_config(eval_result=eval_result)
+
+
 class TestEvals:
     """Unit tests for the GenAI client."""