@@ -37,7 +37,7 @@ def test_evaluation_result(client):
37
37
)
38
38
39
39
predefined_metrics = [
40
- types .PrebuiltMetric .GENERAL_QUALITY ,
40
+ types .RubricMetric .GENERAL_QUALITY ,
41
41
]
42
42
43
43
evaluation_result = client .evals .evaluate (
@@ -201,6 +201,53 @@ def test_multi_turn_predefined_metric(client):
201
201
assert case_result .response_candidate_results is not None
202
202
203
203
204
+ def test_evaluation_grounding_metric (client ):
205
+ """Tests that grounding metric produces a correctly structured EvaluationResult."""
206
+ prompts_df = pd .DataFrame (
207
+ {
208
+ "prompt" : ["Explain the concept of machine learning in simple terms." ],
209
+ "response" : [
210
+ "Machine learning is a type of artificial intelligence that allows"
211
+ " computers to learn from data without being explicitly programmed."
212
+ ],
213
+ "context" : [
214
+ "Article: 'Intro to AI', Section 2.1\n "
215
+ "Machine learning (ML) is a subfield of artificial intelligence (AI). "
216
+ "The core idea of machine learning is that it allows computer systems to "
217
+ "learn from and adapt to new data without being explicitly programmed. "
218
+ "Instead of a developer writing code for every possible scenario, the "
219
+ "system builds a model based on patterns in training data."
220
+ ],
221
+ }
222
+ )
223
+
224
+ eval_dataset = types .EvaluationDataset (
225
+ eval_dataset_df = prompts_df ,
226
+ candidate_name = "gemini-2.5-flash" ,
227
+ )
228
+
229
+ evaluation_result = client .evals .evaluate (
230
+ dataset = eval_dataset ,
231
+ metrics = [
232
+ types .RubricMetric .GROUNDING ,
233
+ ],
234
+ )
235
+
236
+ assert isinstance (evaluation_result , types .EvaluationResult )
237
+
238
+ assert evaluation_result .summary_metrics is not None
239
+ for summary in evaluation_result .summary_metrics :
240
+ assert isinstance (summary , types .AggregatedMetricResult )
241
+ assert summary .metric_name is not None
242
+ assert summary .mean_score is not None
243
+
244
+ assert evaluation_result .eval_case_results is not None
245
+ for case_result in evaluation_result .eval_case_results :
246
+ assert isinstance (case_result , types .EvalCaseResult )
247
+ assert case_result .eval_case_index is not None
248
+ assert case_result .response_candidate_results is not None
249
+
250
+
204
251
pytestmark = pytest_helper .setup (
205
252
file = __file__ ,
206
253
globals_for_file = globals (),
0 commit comments