googleapis
diff --git a/‎tests/unit/vertexai/genai/replays/conftest.py‎
Lines changed: 17 additions & 7 deletions b/‎tests/unit/vertexai/genai/replays/conftest.py‎
Lines changed: 17 additions & 7 deletions
diff --git a/‎tests/unit/vertexai/genai/replays/test_get_evaluation_item.py‎
Lines changed: 146 additions & 0 deletions b/‎tests/unit/vertexai/genai/replays/test_get_evaluation_item.py‎
Lines changed: 146 additions & 0 deletions
diff --git a/‎tests/unit/vertexai/genai/replays/test_get_evaluation_set.py‎
Lines changed: 89 additions & 0 deletions b/‎tests/unit/vertexai/genai/replays/test_get_evaluation_set.py‎
Lines changed: 89 additions & 0 deletions
diff --git a/‎vertexai/_genai/_evals_common.py‎
Lines changed: 34 additions & 0 deletions b/‎vertexai/_genai/_evals_common.py‎
Lines changed: 34 additions & 0 deletions
@@ -131,29 +131,39 @@ def _get_replay_id(use_vertex: bool, replays_prefix: str) -> str:
 EVAL_CONFIG_GCS_URI = (
  "gs://vertex-ai-generative-ai-eval-sdk-resources/metrics/text_quality/v1.0.0.yaml"
 )
+EVAL_ITEM_REQUEST_GCS_URI = (
+ "gs://lakeyk-limited-bucket/agora_eval_080525/request_4813679498589372416.json"
+)
+EVAL_ITEM_RESULT_GCS_URI = (
+ "gs://lakeyk-limited-bucket/agora_eval_080525/result_1486082323915997184.json"
+)
+EVAL_GCS_URI_ITEMS = {
+ EVAL_CONFIG_GCS_URI: "test_resources/mock_eval_config.yaml",
+ EVAL_ITEM_REQUEST_GCS_URI: "test_resources/request_4813679498589372416.json",
+ EVAL_ITEM_RESULT_GCS_URI: "test_resources/result_1486082323915997184.json",
+}
 
 
 def _mock_read_file_contents_side_effect(uri: str):
  """
  Side effect to mock GcsUtils.read_file_contents for eval test test_batch_evaluate.
  """
- if uri == EVAL_CONFIG_GCS_URI:
+ if uri in EVAL_GCS_URI_ITEMS:
  # Construct the absolute path to the local mock file.
  current_dir = os.path.dirname(__file__)
- local_yaml_path = os.path.join(
- current_dir, "test_resources/mock_eval_config.yaml"
- )
+ local_mock_file_path = os.path.join(current_dir, EVAL_GCS_URI_ITEMS[uri])
  try:
- with open(local_yaml_path, "r") as f:
+ with open(local_mock_file_path, "r") as f:
  return f.read()
  except FileNotFoundError:
  raise FileNotFoundError(
- "The mock data file 'mock_eval_config.yaml' was not found."
+ f"The mock data file '{EVAL_GCS_URI_ITEMS[uri]}' was not found."
  )
 
  raise ValueError(
  f"Unexpected GCS URI '{uri}' in replay test. Only "
- f"'{EVAL_CONFIG_GCS_URI}' is mocked."
+ f"'{EVAL_CONFIG_GCS_URI}', '{EVAL_ITEM_REQUEST_GCS_URI}', and "
+ f"'{EVAL_ITEM_RESULT_GCS_URI}' are mocked."
  )
 
 
 
@@ -0,0 +1,146 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# pylint: disable=protected-access,bad-continuation,missing-function-docstring
+
+from tests.unit.vertexai.genai.replays import pytest_helper
+from vertexai import types
+import datetime
+import pytest
+
+
+def test_get_eval_item_response(client):
+ """Tests that get_evaluation_item() returns a correctly structured EvaluationItem."""
+ evaluation_item_name = "projects/503583131166/locations/us-central1/evaluationItems/1486082323915997184"
+ evaluation_item = client.evals.get_evaluation_item(name=evaluation_item_name)
+ assert isinstance(evaluation_item, types.EvaluationItem)
+ check_item_1486082323915997184(evaluation_item, evaluation_item_name)
+
+
+def test_get_eval_item_request(client):
+ """Tests that get_evaluation_item() returns a correctly structured EvaluationItem with request."""
+ evaluation_item_name = "projects/503583131166/locations/us-central1/evaluationItems/4813679498589372416"
+ evaluation_item = client.evals.get_evaluation_item(name=evaluation_item_name)
+ assert isinstance(evaluation_item, types.EvaluationItem)
+ check_item_4813679498589372416(evaluation_item, evaluation_item_name)
+
+
+pytest_plugins = ("pytest_asyncio",)
+
+
+@pytest.mark.asyncio
+async def test_get_eval_item_response_async(client):
+ """Tests that get_evaluation_item() returns a correctly structured EvaluationItem."""
+ eval_item_id = "1486082323915997184"
+ evaluation_item_name = (
+ f"projects/503583131166/locations/us-central1/evaluationItems/{eval_item_id}"
+ )
+ evaluation_item = await client.aio.evals.get_evaluation_item(name=eval_item_id)
+ check_item_1486082323915997184(evaluation_item, evaluation_item_name)
+
+
+@pytest.mark.asyncio
+async def test_get_eval_item_request_async(client):
+ """Tests that get_evaluation_item() returns a correctly structured EvaluationItem with request."""
+ eval_item_id = "4813679498589372416"
+ evaluation_item_name = (
+ f"projects/503583131166/locations/us-central1/evaluationItems/{eval_item_id}"
+ )
+ evaluation_item = await client.aio.evals.get_evaluation_item(name=eval_item_id)
+ check_item_4813679498589372416(evaluation_item, evaluation_item_name)
+
+
+def check_item_1486082323915997184(
+ evaluation_item: types.EvaluationItem, evaluation_item_name: str
+):
+ assert evaluation_item.name == evaluation_item_name
+ assert evaluation_item.display_name == "universal result for 7119522507803066368"
+ assert evaluation_item.evaluation_item_type == types.EvaluationItemType.RESULT
+ assert (
+ evaluation_item.gcs_uri
+ == "gs://lakeyk-limited-bucket/agora_eval_080525/result_1486082323915997184.json"
+ )
+ assert evaluation_item.create_time == datetime.datetime(
+ 2025, 9, 8, 20, 55, 46, 713792, tzinfo=datetime.timezone.utc
+ )
+ assert isinstance(evaluation_item.evaluation_response, types.EvaluationItemResult)
+ assert (
+ evaluation_item.evaluation_response.evaluation_request
+ == "projects/503583131166/locations/us-central1/evaluationItems/7119522507803066368"
+ )
+ assert (
+ evaluation_item.evaluation_response.evaluation_run
+ == "projects/503583131166/locations/us-central1/evaluationRuns/1957799200510967808"
+ )
+ # Check the first candidate result.
+ candidate_result = evaluation_item.evaluation_response.candidate_results[0]
+ assert candidate_result.candidate == "gemini-2.0-flash-001@default"
+ assert candidate_result.metric == "universal"
+ assert candidate_result.score == 0.2857143
+ # Check the first rubric verdict.
+ rubric_verdict = candidate_result.rubric_verdicts[0]
+ assert rubric_verdict.verdict
+ assert (
+ rubric_verdict.reasoning
+ == "The entire response is written in the English language."
+ )
+ assert rubric_verdict.evaluated_rubric.type == "LANGUAGE:PRIMARY_RESPONSE_LANGUAGE"
+ assert rubric_verdict.evaluated_rubric.importance == "HIGH"
+ assert (
+ rubric_verdict.evaluated_rubric.content.property.description
+ == "The response is in English."
+ )
+ # Check the request.
+ request = evaluation_item.evaluation_response.request
+ assert (
+ "There is a wide range of potato varieties to choose from"
+ in request.prompt.text
+ )
+ assert request.candidate_responses[0].candidate == "gemini-2.0-flash-001@default"
+ assert "Pick out your potato variety" in request.candidate_responses[0].text
+
+
+def check_item_4813679498589372416(
+ evaluation_item: types.EvaluationItem, evaluation_item_name: str
+):
+ assert evaluation_item.name == evaluation_item_name
+ assert evaluation_item.display_name == "4813679498589372416"
+ assert evaluation_item.evaluation_item_type == types.EvaluationItemType.REQUEST
+ assert (
+ evaluation_item.gcs_uri
+ == "gs://lakeyk-limited-bucket/agora_eval_080525/request_4813679498589372416.json"
+ )
+ assert evaluation_item.create_time == datetime.datetime(
+ 2025, 9, 8, 20, 55, 46, 338353, tzinfo=datetime.timezone.utc
+ )
+ assert isinstance(evaluation_item.evaluation_request, types.EvaluationItemRequest)
+ # Check the request.
+ request = evaluation_item.evaluation_request
+ assert (
+ "If your ball is curving during flight from left to right"
+ in request.prompt.text
+ )
+ # Check the first candidate response.
+ assert request.candidate_responses[0].candidate == "gemini-2.0-flash-001@default"
+ assert (
+ "Keep your knees bent during the backswing"
+ in request.candidate_responses[0].text
+ )
+
+
+pytestmark = pytest_helper.setup(
+ file=__file__,
+ globals_for_file=globals(),
+ test_method="evals.get_evaluation_item",
+)
@@ -0,0 +1,89 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# pylint: disable=protected-access,bad-continuation,missing-function-docstring
+
+from tests.unit.vertexai.genai.replays import pytest_helper
+from vertexai import types
+import datetime
+import pytest
+
+
+def test_get_eval_set(client):
+ """Tests that get_evaluation_set() returns a correctly structured EvaluationSet."""
+ evaluation_set_name = (
+ "projects/503583131166/locations/us-central1/evaluationSets/102386522778501120"
+ )
+ evaluation_set = client.evals.get_evaluation_set(name=evaluation_set_name)
+ assert isinstance(evaluation_set, types.EvaluationSet)
+ check_set_102386522778501120(evaluation_set, evaluation_set_name)
+
+
+pytest_plugins = ("pytest_asyncio",)
+
+
+@pytest.mark.asyncio
+async def test_get_eval_set_async(client):
+ """Tests that get_evaluation_set() returns a correctly structured EvaluationSet."""
+ eval_set_id = "102386522778501120"
+ evaluation_set_name = (
+ f"projects/503583131166/locations/us-central1/evaluationSets/{eval_set_id}"
+ )
+ evaluation_set = await client.aio.evals.get_evaluation_set(name=eval_set_id)
+ check_set_102386522778501120(evaluation_set, evaluation_set_name)
+
+
+def check_set_102386522778501120(
+ evaluation_set: types.EvaluationSet, evaluation_set_name: str
+):
+ assert evaluation_set.name == evaluation_set_name
+ assert (
+ evaluation_set.display_name
+ == "Results Set for EvaluationRun 1957799200510967808"
+ )
+ assert evaluation_set.evaluation_items == [
+ "projects/503583131166/locations/us-central1/evaluationItems/2748216119486578688",
+ "projects/503583131166/locations/us-central1/evaluationItems/1486082323915997184",
+ "projects/503583131166/locations/us-central1/evaluationItems/2219043163270545408",
+ "projects/503583131166/locations/us-central1/evaluationItems/8570244537769787392",
+ "projects/503583131166/locations/us-central1/evaluationItems/2112082672120496128",
+ "projects/503583131166/locations/us-central1/evaluationItems/8192505119024087040",
+ "projects/503583131166/locations/us-central1/evaluationItems/1383625432393318400",
+ "projects/503583131166/locations/us-central1/evaluationItems/5832267070561058816",
+ "projects/503583131166/locations/us-central1/evaluationItems/1733991409653907456",
+ "projects/503583131166/locations/us-central1/evaluationItems/2549142942207967232",
+ "projects/503583131166/locations/us-central1/evaluationItems/8565740938142416896",
+ "projects/503583131166/locations/us-central1/evaluationItems/6069620844672319488",
+ "projects/503583131166/locations/us-central1/evaluationItems/7777822109585113088",
+ "projects/503583131166/locations/us-central1/evaluationItems/5656415578861076480",
+ "projects/503583131166/locations/us-central1/evaluationItems/5926842662735839232",
+ "projects/503583131166/locations/us-central1/evaluationItems/648623899457617920",
+ "projects/503583131166/locations/us-central1/evaluationItems/4349245787016790016",
+ "projects/503583131166/locations/us-central1/evaluationItems/1119038954285301760",
+ "projects/503583131166/locations/us-central1/evaluationItems/5741983971781115904",
+ ]
+ assert evaluation_set.create_time == datetime.datetime(
+ 2025, 9, 8, 20, 55, 46, 413954, tzinfo=datetime.timezone.utc
+ )
+ assert evaluation_set.update_time == datetime.datetime(
+ 2025, 9, 8, 20, 55, 46, 413954, tzinfo=datetime.timezone.utc
+ )
+ assert evaluation_set.metadata is None
+
+
+pytestmark = pytest_helper.setup(
+ file=__file__,
+ globals_for_file=globals(),
+ test_method="evals.get_evaluation_set",
+)
@@ -975,3 +975,37 @@ def _execute_evaluation(
  "Evaluation results uploaded successfully to GCS: %s", uploaded_path
  )
  return evaluation_result
+
+
+def _convert_gcs_to_evaluation_item_result(
+ api_client: BaseApiClient,
+ gcs_uri: str,
+) -> types.EvaluationItemResult:
+ """Converts a json file to an EvaluationItemResult."""
+ logger.info("Loading evaluation item result from GCS: %s", gcs_uri)
+ gcs_utils = _evals_utils.GcsUtils(api_client=api_client)
+ try:
+ eval_item_data = json.loads(gcs_utils.read_file_contents(gcs_uri))
+ return types.EvaluationItemResult(**eval_item_data)
+ except Exception as e:
+ logger.error(
+ "Failed to load evaluation result from GCS: %s. Error: %s", gcs_uri, e
+ )
+ return types.EvaluationItemResult()
+
+
+def _convert_gcs_to_evaluation_item_request(
+ api_client: BaseApiClient,
+ gcs_uri: str,
+) -> types.EvaluationItemRequest:
+ """Converts a json file to an EvaluationItemRequest."""
+ logger.info("Loading evaluation item request from GCS: %s", gcs_uri)
+ gcs_utils = _evals_utils.GcsUtils(api_client=api_client)
+ try:
+ eval_item_data = json.loads(gcs_utils.read_file_contents(gcs_uri))
+ return types.EvaluationItemRequest(**eval_item_data)
+ except Exception as e:
+ logger.error(
+ "Failed to load evaluation request from GCS: %s. Error: %s", gcs_uri, e
+ )
+ return types.EvaluationItemRequest()