1414#
1515# pylint: disable=protected-access,bad-continuation,missing-function-docstring
1616
17+ import json
1718
1819from tests .unit .vertexai .genai .replays import pytest_helper
1920from vertexai ._genai import types
2021import pandas as pd
21- import json
22+ import pytest
2223
2324
2425def test_bleu_metric (client ):
@@ -31,7 +32,11 @@ def test_bleu_metric(client):
3132 ],
3233 metric_spec = types .BleuSpec (),
3334 )
34- response = client .evals ._evaluate_instances (bleu_input = test_bleu_input )
35+ response = client .evals .evaluate_instances (
36+ metric_config = types ._EvaluateInstancesRequestParameters (
37+ bleu_input = test_bleu_input
38+ )
39+ )
3540 assert len (response .bleu_results .bleu_metric_values ) == 1
3641
3742
@@ -46,8 +51,10 @@ def test_exact_match_metric(client):
4651 ],
4752 metric_spec = types .ExactMatchSpec (),
4853 )
49- response = client .evals ._evaluate_instances (
50- exact_match_input = test_exact_match_input
54+ response = client .evals .evaluate_instances (
55+ metric_config = types ._EvaluateInstancesRequestParameters (
56+ exact_match_input = test_exact_match_input
57+ )
5158 )
5259 assert len (response .exact_match_results .exact_match_metric_values ) == 1
5360
@@ -63,7 +70,11 @@ def test_rouge_metric(client):
6370 ],
6471 metric_spec = types .RougeSpec (rouge_type = "rougeL" ),
6572 )
66- response = client .evals ._evaluate_instances (rouge_input = test_rouge_input )
73+ response = client .evals .evaluate_instances (
74+ metric_config = types ._EvaluateInstancesRequestParameters (
75+ rouge_input = test_rouge_input
76+ )
77+ )
6778 assert len (response .rouge_results .rouge_metric_values ) == 1
6879
6980
@@ -78,7 +89,11 @@ def test_pointwise_metric(client):
7889 metric_prompt_template = "Evaluate if the response '{response}' correctly answers the prompt '{prompt}'."
7990 ),
8091 )
81- response = client .evals ._evaluate_instances (pointwise_metric_input = test_input )
92+ response = client .evals .evaluate_instances (
93+ metric_config = types ._EvaluateInstancesRequestParameters (
94+ pointwise_metric_input = test_input
95+ )
96+ )
8297 assert response .pointwise_metric_result is not None
8398 assert response .pointwise_metric_result .score is not None
8499
@@ -100,8 +115,10 @@ def test_pairwise_metric_with_autorater(client):
100115 )
101116 autorater_config = types .AutoraterConfig (sampling_count = 2 )
102117
103- response = client .evals ._evaluate_instances (
104- pairwise_metric_input = test_input , autorater_config = autorater_config
118+ response = client .evals .evaluate_instances (
119+ metric_config = types ._EvaluateInstancesRequestParameters (
120+ pairwise_metric_input = test_input , autorater_config = autorater_config
121+ )
105122 )
106123 assert response .pairwise_metric_result is not None
107124 assert response .pairwise_metric_result .pairwise_choice is not None
@@ -147,3 +164,25 @@ def test_inference_with_prompt_template(client):
147164 globals_for_file = globals (),
148165 test_method = "evals.evaluate" ,
149166)
167+
168+
169+ pytest_plugins = ("pytest_asyncio" ,)
170+
171+
172+ @pytest .mark .asyncio
173+ async def test_bleu_metric_async (client ):
174+ test_bleu_input = types .BleuInput (
175+ instances = [
176+ types .BleuInstance (
177+ reference = "The quick brown fox jumps over the lazy dog." ,
178+ prediction = "A fast brown fox leaps over a lazy dog." ,
179+ )
180+ ],
181+ metric_spec = types .BleuSpec (),
182+ )
183+ response = await client .aio .evals .evaluate_instances (
184+ metric_config = types ._EvaluateInstancesRequestParameters (
185+ bleu_input = test_bleu_input
186+ )
187+ )
188+ assert len (response .bleu_results .bleu_metric_values ) == 1
0 commit comments