4848)
4949
5050
51- # TODO(b/431231205): Re-enable once Unified Metrics are in prod. 
52- # def test_create_eval_run_data_source_evaluation_set(client): 
53- # """Tests that create_evaluation_run() creates a correctly structured EvaluationRun.""" 
54- # client._api_client._http_options.api_version = "v1beta1" 
55- # tool = genai_types.Tool( 
56- # function_declarations=[ 
57- # genai_types.FunctionDeclaration( 
58- # name="get_weather", 
59- # description="Get weather in a location", 
60- # parameters={ 
61- # "type": "object", 
62- # "properties": {"location": {"type": "string"}}, 
63- # }, 
64- # ) 
65- # ] 
66- # ) 
67- # evaluation_run = client.evals.create_evaluation_run( 
68- # name="test4", 
69- # display_name="test4", 
70- # dataset=types.EvaluationRunDataSource( 
71- # evaluation_set="projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800" 
72- # ), 
73- # dest=GCS_DEST, 
74- # metrics=[ 
75- # UNIVERSAL_AR_METRIC, 
76- # types.RubricMetric.FINAL_RESPONSE_QUALITY, 
77- # LLM_METRIC 
78- # ], 
79- # agent_info=types.AgentInfo( 
80- # agent="project/123/locations/us-central1/reasoningEngines/456", 
81- # name="agent-1", 
82- # instruction="agent-1 instruction", 
83- # tool_declarations=[tool], 
84- # ), 
85- # labels={"label1": "value1"}, 
86- # ) 
87- # assert isinstance(evaluation_run, types.EvaluationRun) 
88- # assert evaluation_run.display_name == "test4" 
89- # assert evaluation_run.state == types.EvaluationRunState.PENDING 
90- # assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource) 
91- # assert evaluation_run.data_source.evaluation_set == ( 
92- # "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800" 
93- # ) 
94- # assert evaluation_run.evaluation_config == types.EvaluationRunConfig( 
95- # output_config=genai_types.OutputConfig( 
96- # gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST) 
97- # ), 
98- # metrics=[UNIVERSAL_AR_METRIC, FINAL_RESPONSE_QUALITY_METRIC, LLM_METRIC], 
99- # ) 
100- # assert evaluation_run.inference_configs[ 
101- # "agent-1" 
102- # ] == types.EvaluationRunInferenceConfig( 
103- # agent_config=types.EvaluationRunAgentConfig( 
104- # developer_instruction=genai_types.Content( 
105- # parts=[genai_types.Part(text="agent-1 instruction")] 
106- # ), 
107- # tools=[tool], 
108- # ) 
109- # ) 
110- # assert evaluation_run.labels == { 
111- # "vertex-ai-evaluation-agent-engine-id": "456", 
112- # "label1": "value1", 
113- # } 
114- # assert evaluation_run.error is None 
51+ def  test_create_eval_run_data_source_evaluation_set (client ):
52+  """Tests that create_evaluation_run() creates a correctly structured EvaluationRun.""" 
53+  client ._api_client ._http_options .api_version  =  "v1beta1" 
54+  tool  =  genai_types .Tool (
55+  function_declarations = [
56+  genai_types .FunctionDeclaration (
57+  name = "get_weather" ,
58+  description = "Get weather in a location" ,
59+  parameters = {
60+  "type" : "object" ,
61+  "properties" : {"location" : {"type" : "string" }},
62+  },
63+  )
64+  ]
65+  )
66+  evaluation_run  =  client .evals .create_evaluation_run (
67+  name = "test4" ,
68+  display_name = "test4" ,
69+  dataset = types .EvaluationRunDataSource (
70+  evaluation_set = "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800" 
71+  ),
72+  dest = GCS_DEST ,
73+  metrics = [
74+  UNIVERSAL_AR_METRIC ,
75+  types .RubricMetric .FINAL_RESPONSE_QUALITY ,
76+  LLM_METRIC ,
77+  ],
78+  agent_info = types .evals .AgentInfo (
79+  agent = "project/123/locations/us-central1/reasoningEngines/456" ,
80+  name = "agent-1" ,
81+  instruction = "agent-1 instruction" ,
82+  tool_declarations = [tool ],
83+  ),
84+  labels = {"label1" : "value1" },
85+  )
86+  assert  isinstance (evaluation_run , types .EvaluationRun )
87+  assert  evaluation_run .display_name  ==  "test4" 
88+  assert  evaluation_run .state  ==  types .EvaluationRunState .PENDING 
89+  assert  isinstance (evaluation_run .data_source , types .EvaluationRunDataSource )
90+  assert  evaluation_run .data_source .evaluation_set  ==  (
91+  "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800" 
92+  )
93+  assert  evaluation_run .evaluation_config  ==  types .EvaluationRunConfig (
94+  output_config = genai_types .OutputConfig (
95+  gcs_destination = genai_types .GcsDestination (output_uri_prefix = GCS_DEST )
96+  ),
97+  metrics = [UNIVERSAL_AR_METRIC , FINAL_RESPONSE_QUALITY_METRIC , LLM_METRIC ],
98+  )
99+  assert  evaluation_run .inference_configs [
100+  "agent-1" 
101+  ] ==  types .EvaluationRunInferenceConfig (
102+  agent_config = types .EvaluationRunAgentConfig (
103+  developer_instruction = genai_types .Content (
104+  parts = [genai_types .Part (text = "agent-1 instruction" )]
105+  ),
106+  tools = [tool ],
107+  )
108+  )
109+  assert  evaluation_run .labels  ==  {
110+  "vertex-ai-evaluation-agent-engine-id" : "456" ,
111+  "label1" : "value1" ,
112+  }
113+  assert  evaluation_run .error  is  None 
115114
116115
117116def  test_create_eval_run_data_source_bigquery_request_set (client ):
@@ -132,6 +131,7 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
132131 ),
133132 labels = {"label1" : "value1" },
134133 dest = GCS_DEST ,
134+  metrics = [UNIVERSAL_AR_METRIC ],
135135 )
136136 assert  isinstance (evaluation_run , types .EvaluationRun )
137137 assert  evaluation_run .display_name  ==  "test5" 
@@ -152,6 +152,7 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
152152 output_config = genai_types .OutputConfig (
153153 gcs_destination = genai_types .GcsDestination (output_uri_prefix = GCS_DEST )
154154 ),
155+  metrics = [UNIVERSAL_AR_METRIC ],
155156 )
156157 assert  evaluation_run .inference_configs  is  None 
157158 assert  evaluation_run .labels  ==  {
@@ -160,7 +161,7 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
160161 assert  evaluation_run .error  is  None 
161162
162163
163- # Test fails in replay mode because of the timestamp issue  
164+ # Test fails in replay mode because of UUID generation mismatch.  
164165# def test_create_eval_run_data_source_evaluation_dataset(client): 
165166# """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with EvaluationDataset.""" 
166167# input_df = pd.DataFrame( 
@@ -215,7 +216,8 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
215216# candidate_name="candidate_1", 
216217# eval_dataset_df=input_df, 
217218# ), 
218- # dest="gs://lakeyk-limited-bucket/eval_run_output", 
219+ # dest=GCS_DEST, 
220+ # metrics=[UNIVERSAL_AR_METRIC], 
219221# ) 
220222# assert isinstance(evaluation_run, types.EvaluationRun) 
221223# assert evaluation_run.display_name == "test6" 
@@ -276,6 +278,7 @@ async def test_create_eval_run_async(client):
276278 )
277279 ),
278280 dest = GCS_DEST ,
281+  metrics = [UNIVERSAL_AR_METRIC ],
279282 )
280283 assert  isinstance (evaluation_run , types .EvaluationRun )
281284 assert  evaluation_run .display_name  ==  "test8" 
@@ -292,6 +295,7 @@ async def test_create_eval_run_async(client):
292295 output_config = genai_types .OutputConfig (
293296 gcs_destination = genai_types .GcsDestination (output_uri_prefix = GCS_DEST )
294297 ),
298+  metrics = [UNIVERSAL_AR_METRIC ],
295299 )
296300 assert  evaluation_run .error  is  None 
297301 assert  evaluation_run .inference_configs  is  None 
0 commit comments