Skip to content

Commit 7757886

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals) - Add get_evaluation_set and get_evaluation_item methods to Vertex AI GenAI SDK evals
PiperOrigin-RevId: 815805880
1 parent 0c932b9 commit 7757886

File tree

6 files changed

+1126
-7
lines changed

6 files changed

+1126
-7
lines changed

tests/unit/vertexai/genai/replays/conftest.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,29 +131,39 @@ def _get_replay_id(use_vertex: bool, replays_prefix: str) -> str:
131131
EVAL_CONFIG_GCS_URI = (
132132
"gs://vertex-ai-generative-ai-eval-sdk-resources/metrics/text_quality/v1.0.0.yaml"
133133
)
134+
EVAL_ITEM_REQUEST_GCS_URI = (
135+
"gs://lakeyk-limited-bucket/agora_eval_080525/request_4813679498589372416.json"
136+
)
137+
EVAL_ITEM_RESULT_GCS_URI = (
138+
"gs://lakeyk-limited-bucket/agora_eval_080525/result_1486082323915997184.json"
139+
)
140+
EVAL_GCS_URI_ITEMS = {
141+
EVAL_CONFIG_GCS_URI: "test_resources/mock_eval_config.yaml",
142+
EVAL_ITEM_REQUEST_GCS_URI: "test_resources/request_4813679498589372416.json",
143+
EVAL_ITEM_RESULT_GCS_URI: "test_resources/result_1486082323915997184.json",
144+
}
134145

135146

136147
def _mock_read_file_contents_side_effect(uri: str):
137148
"""
138149
Side effect to mock GcsUtils.read_file_contents for eval test test_batch_evaluate.
139150
"""
140-
if uri == EVAL_CONFIG_GCS_URI:
151+
if uri in EVAL_GCS_URI_ITEMS:
141152
# Construct the absolute path to the local mock file.
142153
current_dir = os.path.dirname(__file__)
143-
local_yaml_path = os.path.join(
144-
current_dir, "test_resources/mock_eval_config.yaml"
145-
)
154+
local_mock_file_path = os.path.join(current_dir, EVAL_GCS_URI_ITEMS[uri])
146155
try:
147-
with open(local_yaml_path, "r") as f:
156+
with open(local_mock_file_path, "r") as f:
148157
return f.read()
149158
except FileNotFoundError:
150159
raise FileNotFoundError(
151-
"The mock data file 'mock_eval_config.yaml' was not found."
160+
f"The mock data file '{EVAL_GCS_URI_ITEMS[uri]}' was not found."
152161
)
153162

154163
raise ValueError(
155164
f"Unexpected GCS URI '{uri}' in replay test. Only "
156-
f"'{EVAL_CONFIG_GCS_URI}' is mocked."
165+
f"'{EVAL_CONFIG_GCS_URI}', '{EVAL_ITEM_REQUEST_GCS_URI}', and "
166+
f"'{EVAL_ITEM_RESULT_GCS_URI}' are mocked."
157167
)
158168

159169

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
# pylint: disable=protected-access,bad-continuation,missing-function-docstring
16+
17+
from tests.unit.vertexai.genai.replays import pytest_helper
18+
from vertexai import types
19+
import datetime
20+
import pytest
21+
22+
23+
def test_get_eval_item_response(client):
24+
"""Tests that get_evaluation_item() returns a correctly structured EvaluationItem."""
25+
evaluation_item_name = "projects/503583131166/locations/us-central1/evaluationItems/1486082323915997184"
26+
evaluation_item = client.evals.get_evaluation_item(name=evaluation_item_name)
27+
assert isinstance(evaluation_item, types.EvaluationItem)
28+
check_item_1486082323915997184(evaluation_item, evaluation_item_name)
29+
30+
31+
def test_get_eval_item_request(client):
32+
"""Tests that get_evaluation_item() returns a correctly structured EvaluationItem with request."""
33+
evaluation_item_name = "projects/503583131166/locations/us-central1/evaluationItems/4813679498589372416"
34+
evaluation_item = client.evals.get_evaluation_item(name=evaluation_item_name)
35+
assert isinstance(evaluation_item, types.EvaluationItem)
36+
check_item_4813679498589372416(evaluation_item, evaluation_item_name)
37+
38+
39+
pytest_plugins = ("pytest_asyncio",)
40+
41+
42+
@pytest.mark.asyncio
43+
async def test_get_eval_item_response_async(client):
44+
"""Tests that get_evaluation_item() returns a correctly structured EvaluationItem."""
45+
eval_item_id = "1486082323915997184"
46+
evaluation_item_name = (
47+
f"projects/503583131166/locations/us-central1/evaluationItems/{eval_item_id}"
48+
)
49+
evaluation_item = await client.aio.evals.get_evaluation_item(name=eval_item_id)
50+
check_item_1486082323915997184(evaluation_item, evaluation_item_name)
51+
52+
53+
@pytest.mark.asyncio
54+
async def test_get_eval_item_request_async(client):
55+
"""Tests that get_evaluation_item() returns a correctly structured EvaluationItem with request."""
56+
eval_item_id = "4813679498589372416"
57+
evaluation_item_name = (
58+
f"projects/503583131166/locations/us-central1/evaluationItems/{eval_item_id}"
59+
)
60+
evaluation_item = await client.aio.evals.get_evaluation_item(name=eval_item_id)
61+
check_item_4813679498589372416(evaluation_item, evaluation_item_name)
62+
63+
64+
def check_item_1486082323915997184(
65+
evaluation_item: types.EvaluationItem, evaluation_item_name: str
66+
):
67+
assert evaluation_item.name == evaluation_item_name
68+
assert evaluation_item.display_name == "universal result for 7119522507803066368"
69+
assert evaluation_item.evaluation_item_type == types.EvaluationItemType.RESULT
70+
assert (
71+
evaluation_item.gcs_uri
72+
== "gs://lakeyk-limited-bucket/agora_eval_080525/result_1486082323915997184.json"
73+
)
74+
assert evaluation_item.create_time == datetime.datetime(
75+
2025, 9, 8, 20, 55, 46, 713792, tzinfo=datetime.timezone.utc
76+
)
77+
assert isinstance(evaluation_item.evaluation_response, types.EvaluationItemResult)
78+
assert (
79+
evaluation_item.evaluation_response.evaluation_request
80+
== "projects/503583131166/locations/us-central1/evaluationItems/7119522507803066368"
81+
)
82+
assert (
83+
evaluation_item.evaluation_response.evaluation_run
84+
== "projects/503583131166/locations/us-central1/evaluationRuns/1957799200510967808"
85+
)
86+
# Check the first candidate result.
87+
candidate_result = evaluation_item.evaluation_response.candidate_results[0]
88+
assert candidate_result.candidate == "gemini-2.0-flash-001@default"
89+
assert candidate_result.metric == "universal"
90+
assert candidate_result.score == 0.2857143
91+
# Check the first rubric verdict.
92+
rubric_verdict = candidate_result.rubric_verdicts[0]
93+
assert rubric_verdict.verdict
94+
assert (
95+
rubric_verdict.reasoning
96+
== "The entire response is written in the English language."
97+
)
98+
assert rubric_verdict.evaluated_rubric.type == "LANGUAGE:PRIMARY_RESPONSE_LANGUAGE"
99+
assert rubric_verdict.evaluated_rubric.importance == "HIGH"
100+
assert (
101+
rubric_verdict.evaluated_rubric.content.property.description
102+
== "The response is in English."
103+
)
104+
# Check the request.
105+
request = evaluation_item.evaluation_response.request
106+
assert (
107+
"There is a wide range of potato varieties to choose from"
108+
in request.prompt.text
109+
)
110+
assert request.candidate_responses[0].candidate == "gemini-2.0-flash-001@default"
111+
assert "Pick out your potato variety" in request.candidate_responses[0].text
112+
113+
114+
def check_item_4813679498589372416(
115+
evaluation_item: types.EvaluationItem, evaluation_item_name: str
116+
):
117+
assert evaluation_item.name == evaluation_item_name
118+
assert evaluation_item.display_name == "4813679498589372416"
119+
assert evaluation_item.evaluation_item_type == types.EvaluationItemType.REQUEST
120+
assert (
121+
evaluation_item.gcs_uri
122+
== "gs://lakeyk-limited-bucket/agora_eval_080525/request_4813679498589372416.json"
123+
)
124+
assert evaluation_item.create_time == datetime.datetime(
125+
2025, 9, 8, 20, 55, 46, 338353, tzinfo=datetime.timezone.utc
126+
)
127+
assert isinstance(evaluation_item.evaluation_request, types.EvaluationItemRequest)
128+
# Check the request.
129+
request = evaluation_item.evaluation_request
130+
assert (
131+
"If your ball is curving during flight from left to right"
132+
in request.prompt.text
133+
)
134+
# Check the first candidate response.
135+
assert request.candidate_responses[0].candidate == "gemini-2.0-flash-001@default"
136+
assert (
137+
"Keep your knees bent during the backswing"
138+
in request.candidate_responses[0].text
139+
)
140+
141+
142+
pytestmark = pytest_helper.setup(
143+
file=__file__,
144+
globals_for_file=globals(),
145+
test_method="evals.get_evaluation_item",
146+
)
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
# pylint: disable=protected-access,bad-continuation,missing-function-docstring
16+
17+
from tests.unit.vertexai.genai.replays import pytest_helper
18+
from vertexai import types
19+
import datetime
20+
import pytest
21+
22+
23+
def test_get_eval_set(client):
24+
"""Tests that get_evaluation_set() returns a correctly structured EvaluationSet."""
25+
evaluation_set_name = (
26+
"projects/503583131166/locations/us-central1/evaluationSets/102386522778501120"
27+
)
28+
evaluation_set = client.evals.get_evaluation_set(name=evaluation_set_name)
29+
assert isinstance(evaluation_set, types.EvaluationSet)
30+
check_set_102386522778501120(evaluation_set, evaluation_set_name)
31+
32+
33+
pytest_plugins = ("pytest_asyncio",)
34+
35+
36+
@pytest.mark.asyncio
37+
async def test_get_eval_set_async(client):
38+
"""Tests that get_evaluation_set() returns a correctly structured EvaluationSet."""
39+
eval_set_id = "102386522778501120"
40+
evaluation_set_name = (
41+
f"projects/503583131166/locations/us-central1/evaluationSets/{eval_set_id}"
42+
)
43+
evaluation_set = await client.aio.evals.get_evaluation_set(name=eval_set_id)
44+
check_set_102386522778501120(evaluation_set, evaluation_set_name)
45+
46+
47+
def check_set_102386522778501120(
48+
evaluation_set: types.EvaluationSet, evaluation_set_name: str
49+
):
50+
assert evaluation_set.name == evaluation_set_name
51+
assert (
52+
evaluation_set.display_name
53+
== "Results Set for EvaluationRun 1957799200510967808"
54+
)
55+
assert evaluation_set.evaluation_items == [
56+
"projects/503583131166/locations/us-central1/evaluationItems/2748216119486578688",
57+
"projects/503583131166/locations/us-central1/evaluationItems/1486082323915997184",
58+
"projects/503583131166/locations/us-central1/evaluationItems/2219043163270545408",
59+
"projects/503583131166/locations/us-central1/evaluationItems/8570244537769787392",
60+
"projects/503583131166/locations/us-central1/evaluationItems/2112082672120496128",
61+
"projects/503583131166/locations/us-central1/evaluationItems/8192505119024087040",
62+
"projects/503583131166/locations/us-central1/evaluationItems/1383625432393318400",
63+
"projects/503583131166/locations/us-central1/evaluationItems/5832267070561058816",
64+
"projects/503583131166/locations/us-central1/evaluationItems/1733991409653907456",
65+
"projects/503583131166/locations/us-central1/evaluationItems/2549142942207967232",
66+
"projects/503583131166/locations/us-central1/evaluationItems/8565740938142416896",
67+
"projects/503583131166/locations/us-central1/evaluationItems/6069620844672319488",
68+
"projects/503583131166/locations/us-central1/evaluationItems/7777822109585113088",
69+
"projects/503583131166/locations/us-central1/evaluationItems/5656415578861076480",
70+
"projects/503583131166/locations/us-central1/evaluationItems/5926842662735839232",
71+
"projects/503583131166/locations/us-central1/evaluationItems/648623899457617920",
72+
"projects/503583131166/locations/us-central1/evaluationItems/4349245787016790016",
73+
"projects/503583131166/locations/us-central1/evaluationItems/1119038954285301760",
74+
"projects/503583131166/locations/us-central1/evaluationItems/5741983971781115904",
75+
]
76+
assert evaluation_set.create_time == datetime.datetime(
77+
2025, 9, 8, 20, 55, 46, 413954, tzinfo=datetime.timezone.utc
78+
)
79+
assert evaluation_set.update_time == datetime.datetime(
80+
2025, 9, 8, 20, 55, 46, 413954, tzinfo=datetime.timezone.utc
81+
)
82+
assert evaluation_set.metadata is None
83+
84+
85+
pytestmark = pytest_helper.setup(
86+
file=__file__,
87+
globals_for_file=globals(),
88+
test_method="evals.get_evaluation_set",
89+
)

vertexai/_genai/_evals_common.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -975,3 +975,37 @@ def _execute_evaluation(
975975
"Evaluation results uploaded successfully to GCS: %s", uploaded_path
976976
)
977977
return evaluation_result
978+
979+
980+
def _convert_gcs_to_evaluation_item_result(
981+
api_client: BaseApiClient,
982+
gcs_uri: str,
983+
) -> types.EvaluationItemResult:
984+
"""Converts a json file to an EvaluationItemResult."""
985+
logger.info("Loading evaluation item result from GCS: %s", gcs_uri)
986+
gcs_utils = _evals_utils.GcsUtils(api_client=api_client)
987+
try:
988+
eval_item_data = json.loads(gcs_utils.read_file_contents(gcs_uri))
989+
return types.EvaluationItemResult(**eval_item_data)
990+
except Exception as e:
991+
logger.error(
992+
"Failed to load evaluation result from GCS: %s. Error: %s", gcs_uri, e
993+
)
994+
return types.EvaluationItemResult()
995+
996+
997+
def _convert_gcs_to_evaluation_item_request(
998+
api_client: BaseApiClient,
999+
gcs_uri: str,
1000+
) -> types.EvaluationItemRequest:
1001+
"""Converts a json file to an EvaluationItemRequest."""
1002+
logger.info("Loading evaluation item request from GCS: %s", gcs_uri)
1003+
gcs_utils = _evals_utils.GcsUtils(api_client=api_client)
1004+
try:
1005+
eval_item_data = json.loads(gcs_utils.read_file_contents(gcs_uri))
1006+
return types.EvaluationItemRequest(**eval_item_data)
1007+
except Exception as e:
1008+
logger.error(
1009+
"Failed to load evaluation request from GCS: %s. Error: %s", gcs_uri, e
1010+
)
1011+
return types.EvaluationItemRequest()

0 commit comments

Comments
 (0)