Skip to content

Commit e2aa3eb

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals) - Update agent eval evaluation report dashboard
PiperOrigin-RevId: 821764782
1 parent f0a4a00 commit e2aa3eb

File tree

2 files changed

+354
-8
lines changed

2 files changed

+354
-8
lines changed

tests/unit/vertexai/genai/test_evals.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import json
1717
import os
1818
import statistics
19+
import sys
1920
from unittest import mock
2021

2122
import google.auth.credentials
@@ -25,6 +26,7 @@
2526
from vertexai import _genai
2627
from vertexai._genai import _evals_data_converters
2728
from vertexai._genai import _evals_metric_handlers
29+
from vertexai._genai import _evals_visualization
2830
from vertexai._genai import _observability_data_converter
2931
from vertexai._genai import evals
3032
from vertexai._genai import types as vertexai_genai_types
@@ -185,6 +187,78 @@ def test_eval_evaluate_with_agent_info(self, mock_execute_evaluation):
185187
assert kwargs["agent_info"] == agent_info
186188

187189

190+
class TestEvalsVisualization:
191+
@mock.patch(
192+
"vertexai._genai._evals_visualization._is_ipython_env",
193+
return_value=True,
194+
)
195+
def test_display_evaluation_result_with_agent_trace_prefixes(self, mock_is_ipython):
196+
"""Tests that agent trace view includes added prefixes."""
197+
mock_display_module = mock.MagicMock()
198+
mock_ipython_module = mock.MagicMock()
199+
mock_ipython_module.display = mock_display_module
200+
sys.modules["IPython"] = mock_ipython_module
201+
sys.modules["IPython.display"] = mock_display_module
202+
203+
intermediate_events_list = [
204+
{
205+
"content": {
206+
"role": "model",
207+
"parts": [
208+
{
209+
"function_call": {
210+
"name": "my_function",
211+
"args": {"arg1": "value1"},
212+
}
213+
}
214+
],
215+
}
216+
},
217+
{
218+
"content": {
219+
"role": "model",
220+
"parts": [{"text": "this is model response"}],
221+
}
222+
},
223+
]
224+
dataset_df = pd.DataFrame(
225+
[
226+
{
227+
"prompt": "Test prompt",
228+
"response": "Test response",
229+
"intermediate_events": intermediate_events_list,
230+
},
231+
]
232+
)
233+
eval_dataset = vertexai_genai_types.EvaluationDataset(
234+
eval_dataset_df=dataset_df
235+
)
236+
eval_result = vertexai_genai_types.EvaluationResult(
237+
evaluation_dataset=[eval_dataset],
238+
agent_info=vertexai_genai_types.AgentInfo(name="test_agent"),
239+
eval_case_results=[
240+
vertexai_genai_types.EvalCaseResult(
241+
eval_case_index=0,
242+
response_candidate_results=[
243+
vertexai_genai_types.ResponseCandidateResult(
244+
response_index=0, metric_results={}
245+
)
246+
],
247+
)
248+
],
249+
)
250+
251+
_evals_visualization.display_evaluation_result(eval_result)
252+
253+
mock_display_module.HTML.assert_called_once()
254+
html_content = mock_display_module.HTML.call_args[0][0]
255+
assert "my_function" in html_content
256+
assert "this is model response" in html_content
257+
258+
del sys.modules["IPython"]
259+
del sys.modules["IPython.display"]
260+
261+
188262
class TestEvalsRunInference:
189263
"""Unit tests for the Evals run_inference method."""
190264

0 commit comments

Comments
 (0)