googleapis
diff --git a/‎tests/unit/vertexai/genai/replays/test_evaluate_predefined_metrics.py‎
Lines changed: 48 additions & 1 deletion b/‎tests/unit/vertexai/genai/replays/test_evaluate_predefined_metrics.py‎
Lines changed: 48 additions & 1 deletion
diff --git a/‎vertexai/_genai/_evals_metric_handlers.py‎
Lines changed: 18 additions & 0 deletions b/‎vertexai/_genai/_evals_metric_handlers.py‎
Lines changed: 18 additions & 0 deletions
@@ -37,7 +37,7 @@ def test_evaluation_result(client):
  )
 
  predefined_metrics = [
- types.PrebuiltMetric.GENERAL_QUALITY,
+ types.RubricMetric.GENERAL_QUALITY,
  ]
 
  evaluation_result = client.evals.evaluate(
@@ -201,6 +201,53 @@ def test_multi_turn_predefined_metric(client):
  assert case_result.response_candidate_results is not None
 
 
+def test_evaluation_grounding_metric(client):
+ """Tests that grounding metric produces a correctly structured EvaluationResult."""
+ prompts_df = pd.DataFrame(
+ {
+ "prompt": ["Explain the concept of machine learning in simple terms."],
+ "response": [
+ "Machine learning is a type of artificial intelligence that allows"
+ " computers to learn from data without being explicitly programmed."
+ ],
+ "context": [
+ "Article: 'Intro to AI', Section 2.1\n"
+ "Machine learning (ML) is a subfield of artificial intelligence (AI). "
+ "The core idea of machine learning is that it allows computer systems to "
+ "learn from and adapt to new data without being explicitly programmed. "
+ "Instead of a developer writing code for every possible scenario, the "
+ "system builds a model based on patterns in training data."
+ ],
+ }
+ )
+
+ eval_dataset = types.EvaluationDataset(
+ eval_dataset_df=prompts_df,
+ candidate_name="gemini-2.5-flash",
+ )
+
+ evaluation_result = client.evals.evaluate(
+ dataset=eval_dataset,
+ metrics=[
+ types.RubricMetric.GROUNDING,
+ ],
+ )
+
+ assert isinstance(evaluation_result, types.EvaluationResult)
+
+ assert evaluation_result.summary_metrics is not None
+ for summary in evaluation_result.summary_metrics:
+ assert isinstance(summary, types.AggregatedMetricResult)
+ assert summary.metric_name is not None
+ assert summary.mean_score is not None
+
+ assert evaluation_result.eval_case_results is not None
+ for case_result in evaluation_result.eval_case_results:
+ assert isinstance(case_result, types.EvalCaseResult)
+ assert case_result.eval_case_index is not None
+ assert case_result.response_candidate_results is not None
+
+
 pytestmark = pytest_helper.setup(
  file=__file__,
  globals_for_file=globals(),
 
@@ -871,13 +871,31 @@ def _build_request_payload(
  eval_case.prompt
  )
 
+ other_data_map = {}
+ if hasattr(eval_case, "context") and eval_case.context:
+ if isinstance(eval_case.context, str):
+ other_data_map["context"] = types.InstanceData(text=eval_case.context)
+ elif isinstance(eval_case.context, genai_types.Content):
+ other_data_map["context"] = (
+ PredefinedMetricHandler._content_to_instance_data(eval_case.context)
+ )
+ else:
+ logger.warning(
+ f"Unsupported type for context: {type(eval_case.context)}"
+ )
+
  instance_payload = types.EvaluationInstance(
  prompt=prompt_instance_data,
  response=PredefinedMetricHandler._content_to_instance_data(
  response_content
  ),
  reference=reference_instance_data,
  rubric_groups=eval_case.rubric_groups,
+ other_data=(
+ types.MapInstance(map_instance=other_data_map)
+ if other_data_map
+ else None
+ ),
  )
 
  return {