|
15 | 15 | from __future__ import annotations |
16 | 16 |
|
17 | 17 | import enum |
| 18 | +import statistics |
18 | 19 | from typing import Optional |
| 20 | +from typing import Union |
19 | 21 |
|
20 | 22 | from google.genai import types as genai_types |
21 | 23 |
|
| 24 | +from .app_details import AppDetails |
| 25 | +from .common import EvalBaseModel |
| 26 | +from .eval_case import get_all_tool_calls_with_responses |
| 27 | +from .eval_case import IntermediateDataType |
| 28 | +from .eval_metrics import RubricScore |
22 | 29 | from .evaluator import EvalStatus |
23 | 30 |
|
24 | 31 |
|
@@ -46,3 +53,97 @@ def get_eval_status(score: Optional[float], threshold: float) -> EvalStatus: |
46 | 53 | if score is None: |
47 | 54 | return EvalStatus.NOT_EVALUATED |
48 | 55 | return EvalStatus.PASSED if score >= threshold else EvalStatus.FAILED |
| 56 | + |
| 57 | + |
| 58 | +def get_average_rubric_score( |
| 59 | + rubric_scores: list[RubricScore], |
| 60 | +) -> Optional[float]: |
| 61 | + """Returns a single score value from the given list of rubric scores. |
| 62 | +
|
| 63 | + It is possible that none of the rubric score actually contain a score value, |
| 64 | + if that happens then None is returned. |
| 65 | +
|
| 66 | + If non-zero score values are present, then a mean value is returned as the |
| 67 | + aggregated value. |
| 68 | + """ |
| 69 | + rubric_scores = [ |
| 70 | + rubric_score.score |
| 71 | + for rubric_score in rubric_scores |
| 72 | + if rubric_score.score is not None |
| 73 | + ] |
| 74 | + |
| 75 | + return statistics.mean(rubric_scores) if rubric_scores else None |
| 76 | + |
| 77 | + |
| 78 | +class _ToolDeclarations(EvalBaseModel): |
| 79 | + """Internal data model used for serializing Tool declarations.""" |
| 80 | + |
| 81 | + tool_declarations: dict[str, genai_types.ToolListUnion] |
| 82 | + |
| 83 | + |
| 84 | +def get_tool_declarations_as_json_str( |
| 85 | + app_details: AppDetails, |
| 86 | +) -> str: |
| 87 | + """Returns a JSON string representation of Tool declarations. |
| 88 | +
|
| 89 | + The output of this method is usually intended to be sent to the LLM. |
| 90 | + """ |
| 91 | + tool_declarations = _ToolDeclarations( |
| 92 | + tool_declarations=app_details.get_tools_by_agent_name() |
| 93 | + ) |
| 94 | + return tool_declarations.model_dump_json( |
| 95 | + indent=2, |
| 96 | + exclude_unset=True, |
| 97 | + exclude_defaults=True, |
| 98 | + exclude_none=True, |
| 99 | + ) |
| 100 | + |
| 101 | + |
| 102 | +class _ToolCallAndResponse(EvalBaseModel): |
| 103 | + """Internal data model to capture one single tool call and response.""" |
| 104 | + |
| 105 | + step: int |
| 106 | + tool_call: genai_types.FunctionCall |
| 107 | + tool_response: Union[genai_types.FunctionResponse, str] |
| 108 | + |
| 109 | + |
| 110 | +class _ToolCallsAndResponses(EvalBaseModel): |
| 111 | + """Internal data model used for serializing Tool call and responses.""" |
| 112 | + |
| 113 | + tool_calls_and_response: list[_ToolCallAndResponse] |
| 114 | + |
| 115 | + |
| 116 | +def get_tool_calls_and_responses_as_json_str( |
| 117 | + intermediate_data: Optional[IntermediateDataType], |
| 118 | +) -> str: |
| 119 | + """Returns a JSON string representation of tool calls and corresponding responses. |
| 120 | +
|
| 121 | + The output of this method is usually intended to be sent to the LLM. |
| 122 | + """ |
| 123 | + raw_tool_calls_and_response = get_all_tool_calls_with_responses( |
| 124 | + intermediate_data |
| 125 | + ) |
| 126 | + |
| 127 | + if not raw_tool_calls_and_response: |
| 128 | + return "No intermediate steps were taken." |
| 129 | + |
| 130 | + tool_calls_and_responses = [] |
| 131 | + for idx, (tool_call, tool_response) in enumerate(raw_tool_calls_and_response): |
| 132 | + tool_calls_and_responses.append( |
| 133 | + _ToolCallAndResponse( |
| 134 | + step=idx, |
| 135 | + tool_call=tool_call, |
| 136 | + tool_response=tool_response if tool_response else "None", |
| 137 | + ) |
| 138 | + ) |
| 139 | + |
| 140 | + internal_tool_calls_and_responses = _ToolCallsAndResponses( |
| 141 | + tool_calls_and_response=tool_calls_and_responses |
| 142 | + ) |
| 143 | + |
| 144 | + return internal_tool_calls_and_responses.model_dump_json( |
| 145 | + indent=2, |
| 146 | + exclude_unset=True, |
| 147 | + exclude_defaults=True, |
| 148 | + exclude_none=True, |
| 149 | + ) |
0 commit comments