Skip to content

Commit e75d91f

Browse files
jsondaicopybara-github
authored andcommitted
fix: GenAI Client(evals) - Add support for context in Grounding metric
PiperOrigin-RevId: 809162951
1 parent 13a626b commit e75d91f

File tree

2 files changed

+66
-1
lines changed

2 files changed

+66
-1
lines changed

tests/unit/vertexai/genai/replays/test_evaluate_predefined_metrics.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def test_evaluation_result(client):
3737
)
3838

3939
predefined_metrics = [
40-
types.PrebuiltMetric.GENERAL_QUALITY,
40+
types.RubricMetric.GENERAL_QUALITY,
4141
]
4242

4343
evaluation_result = client.evals.evaluate(
@@ -201,6 +201,53 @@ def test_multi_turn_predefined_metric(client):
201201
assert case_result.response_candidate_results is not None
202202

203203

204+
def test_evaluation_grounding_metric(client):
205+
"""Tests that grounding metric produces a correctly structured EvaluationResult."""
206+
prompts_df = pd.DataFrame(
207+
{
208+
"prompt": ["Explain the concept of machine learning in simple terms."],
209+
"response": [
210+
"Machine learning is a type of artificial intelligence that allows"
211+
" computers to learn from data without being explicitly programmed."
212+
],
213+
"context": [
214+
"Article: 'Intro to AI', Section 2.1\n"
215+
"Machine learning (ML) is a subfield of artificial intelligence (AI). "
216+
"The core idea of machine learning is that it allows computer systems to "
217+
"learn from and adapt to new data without being explicitly programmed. "
218+
"Instead of a developer writing code for every possible scenario, the "
219+
"system builds a model based on patterns in training data."
220+
],
221+
}
222+
)
223+
224+
eval_dataset = types.EvaluationDataset(
225+
eval_dataset_df=prompts_df,
226+
candidate_name="gemini-2.5-flash",
227+
)
228+
229+
evaluation_result = client.evals.evaluate(
230+
dataset=eval_dataset,
231+
metrics=[
232+
types.RubricMetric.GROUNDING,
233+
],
234+
)
235+
236+
assert isinstance(evaluation_result, types.EvaluationResult)
237+
238+
assert evaluation_result.summary_metrics is not None
239+
for summary in evaluation_result.summary_metrics:
240+
assert isinstance(summary, types.AggregatedMetricResult)
241+
assert summary.metric_name is not None
242+
assert summary.mean_score is not None
243+
244+
assert evaluation_result.eval_case_results is not None
245+
for case_result in evaluation_result.eval_case_results:
246+
assert isinstance(case_result, types.EvalCaseResult)
247+
assert case_result.eval_case_index is not None
248+
assert case_result.response_candidate_results is not None
249+
250+
204251
pytestmark = pytest_helper.setup(
205252
file=__file__,
206253
globals_for_file=globals(),

vertexai/_genai/_evals_metric_handlers.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -871,13 +871,31 @@ def _build_request_payload(
871871
eval_case.prompt
872872
)
873873

874+
other_data_map = {}
875+
if hasattr(eval_case, "context") and eval_case.context:
876+
if isinstance(eval_case.context, str):
877+
other_data_map["context"] = types.InstanceData(text=eval_case.context)
878+
elif isinstance(eval_case.context, genai_types.Content):
879+
other_data_map["context"] = (
880+
PredefinedMetricHandler._content_to_instance_data(eval_case.context)
881+
)
882+
else:
883+
logger.warning(
884+
f"Unsupported type for context: {type(eval_case.context)}"
885+
)
886+
874887
instance_payload = types.EvaluationInstance(
875888
prompt=prompt_instance_data,
876889
response=PredefinedMetricHandler._content_to_instance_data(
877890
response_content
878891
),
879892
reference=reference_instance_data,
880893
rubric_groups=eval_case.rubric_groups,
894+
other_data=(
895+
types.MapInstance(map_instance=other_data_map)
896+
if other_data_map
897+
else None
898+
),
881899
)
882900

883901
return {

0 commit comments

Comments
 (0)