googleapis
diff --git a/‎tests/unit/vertexai/genai/test_evals.py‎
Lines changed: 69 additions & 3 deletions b/‎tests/unit/vertexai/genai/test_evals.py‎
Lines changed: 69 additions & 3 deletions
diff --git a/‎vertexai/_genai/_evals_visualization.py‎
Lines changed: 31 additions & 15 deletions b/‎vertexai/_genai/_evals_visualization.py‎
Lines changed: 31 additions & 15 deletions
@@ -13,9 +13,11 @@
 # limitations under the License.
 #
 # pylint: disable=protected-access,bad-continuation,
+import base64
 import importlib
 import json
 import os
+import re
 import statistics
 import sys
 from unittest import mock
@@ -291,8 +293,72 @@ def test_display_evaluation_result_with_agent_trace_prefixes(self, mock_is_ipyth
 
  mock_display_module.HTML.assert_called_once()
  html_content = mock_display_module.HTML.call_args[0][0]
- assert "my_function" in html_content
- assert "this is model response" in html_content
+ match = re.search(r'atob\("([^"]+)"\)', html_content)
+ assert match
+ decoded_json = base64.b64decode(match.group(1)).decode("utf-8")
+ assert "my_function" in decoded_json
+ assert "this is model response" in decoded_json
+
+ del sys.modules["IPython"]
+ del sys.modules["IPython.display"]
+
+ @mock.patch(
+ "vertexai._genai._evals_visualization._is_ipython_env",
+ return_value=True,
+ )
+ def test_display_evaluation_result_with_non_ascii_character(self, mock_is_ipython):
+ """Tests that non-ASCII characters are handled correctly."""
+ mock_display_module = mock.MagicMock()
+ mock_ipython_module = mock.MagicMock()
+ mock_ipython_module.display = mock_display_module
+ sys.modules["IPython"] = mock_ipython_module
+ sys.modules["IPython.display"] = mock_display_module
+
+ dataset_df = pd.DataFrame(
+ [
+ {
+ "prompt": "Test prompt with emoji 😊",
+ "response": "Test response with emoji 😊",
+ },
+ ]
+ )
+ eval_dataset = vertexai_genai_types.EvaluationDataset(
+ eval_dataset_df=dataset_df
+ )
+ eval_result = vertexai_genai_types.EvaluationResult(
+ evaluation_dataset=[eval_dataset],
+ eval_case_results=[
+ vertexai_genai_types.EvalCaseResult(
+ eval_case_index=0,
+ response_candidate_results=[
+ vertexai_genai_types.ResponseCandidateResult(
+ response_index=0, metric_results={}
+ )
+ ],
+ )
+ ],
+ )
+
+ _evals_visualization.display_evaluation_result(eval_result)
+
+ mock_display_module.HTML.assert_called_once()
+ html_content = mock_display_module.HTML.call_args[0][0]
+ # Verify that the new decoding logic is present in the HTML
+ assert "new TextDecoder().decode" in html_content
+
+ match = re.search(r'atob\("([^"]+)"\)', html_content)
+ assert match
+ decoded_json = base64.b64decode(match.group(1)).decode("utf-8")
+
+ # JSON serialization escapes non-ASCII characters (e.g. \uXXXX), so we
+ # parse it back to check for the actual characters.
+ parsed_json = json.loads(decoded_json)
+ assert "Test prompt with emoji 😊" in json.dumps(
+ parsed_json, ensure_ascii=False
+ )
+ assert "Test response with emoji 😊" in json.dumps(
+ parsed_json, ensure_ascii=False
+ )
 
  del sys.modules["IPython"]
  del sys.modules["IPython.display"]
@@ -1290,7 +1356,7 @@ def test_run_inference_with_agent_engine_with_response_column_raises_error(
  ) in str(excinfo.value)
 
  @mock.patch.object(_evals_utils, "EvalDatasetLoader")
- @mock.patch("vertexai._genai._evals_common.InMemorySessionService")
+ @mock.patch("vertexai._genai._evals_common.InMemorySessionService") # fmt: skip
  @mock.patch("vertexai._genai._evals_common.Runner")
  @mock.patch("vertexai._genai._evals_common.LlmAgent")
  def test_run_inference_with_local_agent(
 
@@ -14,8 +14,10 @@
 #
 """Visualization utilities for GenAI Evaluation SDK."""
 
+import base64
 import json
 import logging
+import textwrap
 from typing import Any, Optional
 
 import pandas as pd
@@ -78,9 +80,16 @@ def stringify_cell(cell: Any) -> Optional[str]:
  return df_copy
 
 
+def _encode_to_base64(data: str) -> str:
+ """Encodes a string to a web-safe Base64 string."""
+ return base64.b64encode(data.encode("utf-8")).decode("utf-8")
+
+
 def _get_evaluation_html(eval_result_json: str) -> str:
  """Returns a self-contained HTML for single evaluation visualization."""
- return f"""
+ payload_b64 = _encode_to_base64(eval_result_json)
+ return textwrap.dedent(
+ f"""
 <!DOCTYPE html>
 <html>
 <head>
@@ -249,12 +258,11 @@ def _get_evaluation_html(eval_result_json: str) -> str:
 <body>
  <div class="container">
  <h1>Evaluation Report</h1>
- <div id="summary-section"></div>
- <div id="agent-info-section"></div>
- <div id="details-section"></div>
- </div>
+ < <div id="summary-section"></div>
+ <div id="agent-info-section"></div>
+ <div id="details-section"></div>
  <script>
- var vizData_vertex_eval_sdk = {eval_result_json};
+ var vizData_vertex_eval_sdk = JSON.parse(new TextDecoder().decode(Uint8Array.from(atob("{payload_b64}"), c => c.charCodeAt(0))));
  function formatDictVals(obj) {{
  if (typeof obj === 'string') return obj;
  if (obj === undefined || obj === null) return '';
@@ -552,11 +560,14 @@ def _get_evaluation_html(eval_result_json: str) -> str:
 </body>
 </html>
 """
+ )
 
 
 def _get_comparison_html(eval_result_json: str) -> str:
  """Returns a self-contained HTML for a side-by-side eval comparison."""
- return f"""
+ payload_b64 = _encode_to_base64(eval_result_json)
+ return textwrap.dedent(
+ f"""
 <!DOCTYPE html>
 <html>
 <head>
@@ -612,11 +623,10 @@ def _get_comparison_html(eval_result_json: str) -> str:
 <body>
  <div class="container">
  <h1>Eval Comparison Report</h1>
- <div id="summary-section"></div>
- <div id="details-section"></div>
- </div>
+ < <div id="summary-section"></div>
+ <div id="details-section"></div>
  <script>
- var vizData_vertex_eval_sdk = {eval_result_json};
+ var vizData_vertex_eval_sdk = JSON.parse(new TextDecoder().decode(Uint8Array.from(atob("{payload_b64}"), c => c.charCodeAt(0))));
  function renderSummary(summaryMetrics, metadata) {{
  const container = document.getElementById('summary-section');
  if (!summaryMetrics || summaryMetrics.length === 0) {{ container.innerHTML = '<h2>Summary Metrics</h2><p>No summary metrics.</p>'; return; }}
@@ -692,11 +702,14 @@ def _get_comparison_html(eval_result_json: str) -> str:
 </body>
 </html>
 """
+ )
 
 
 def _get_inference_html(dataframe_json: str) -> str:
  """Returns a self-contained HTML for displaying inference results."""
- return f"""
+ payload_b64 = _encode_to_base64(dataframe_json)
+ return textwrap.dedent(
+ f"""
 <!DOCTYPE html>
 <html>
 <head>
@@ -741,12 +754,12 @@ def _get_inference_html(dataframe_json: str) -> str:
  </style>
 </head>
 <body>
- <div class="container">
+ < <div class="container">
  <h1>Evaluation Dataset</h1>
  <div id="results-table"></div>
  </div>
  <script>
- var vizData_vertex_eval_sdk = {dataframe_json};
+ var vizData_vertex_eval_sdk = JSON.parse(new TextDecoder().decode(Uint8Array.from(atob("{payload_b64}"), c => c.charCodeAt(0))));
  var container_vertex_eval_sdk = document.getElementById('results-table');
 
  function renderRubrics(cellValue) {{
@@ -822,6 +835,7 @@ def _get_inference_html(dataframe_json: str) -> str:
 </body>
 </html>
 """
+ )
 
 
 def _extract_text_and_raw_json(content: Any) -> dict[str, str]:
@@ -1086,12 +1100,14 @@ def _get_status_html(status: str, error_message: Optional[str] = None) -> str:
  </p>
  """
 
- return f"""
+ return textwrap.dedent(
+ f"""
  <div>
  <p><b>Status:</b> {status}</p>
  {error_html}
  </div>
  """
+ )
 
 
 def display_evaluation_run_status(eval_run_obj: "types.EvaluationRun") -> None: