Skip to content

Commit d486795

Browse files
ankursharmascopybara-github
authored andcommitted
feat: Populate AppDetails to each Invocation
AppDetails require two pieces of information: 1) Instructions 2) Tools Both these pieces of information are gathered using the llm_request that was passed to the model. This approach, slightly invasive, ensures that we capture the "exact" instructions and tools that were given to the model. PiperOrigin-RevId: 811180648
1 parent 2a2da0f commit d486795

File tree

4 files changed

+372
-12
lines changed

4 files changed

+372
-12
lines changed

src/google/adk/evaluation/evaluation_generator.py

Lines changed: 74 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,15 @@
3232
from ..sessions.in_memory_session_service import InMemorySessionService
3333
from ..sessions.session import Session
3434
from ..utils.context_utils import Aclosing
35+
from .app_details import AgentDetails
3536
from .app_details import AppDetails
3637
from .eval_case import EvalCase
3738
from .eval_case import Invocation
3839
from .eval_case import InvocationEvent
3940
from .eval_case import InvocationEvents
4041
from .eval_case import SessionInput
4142
from .eval_set import EvalSet
43+
from .request_intercepter_plugin import _RequestIntercepterPlugin
4244

4345
_USER_AUTHOR = "user"
4446
_DEFAULT_AUTHOR = "agent"
@@ -180,12 +182,16 @@ async def _generate_inferences_from_root_agent(
180182
if callable(reset_func):
181183
reset_func()
182184

185+
request_intercepter_plugin = _RequestIntercepterPlugin(
186+
name="request_intercepter_plugin"
187+
)
183188
async with Runner(
184189
app_name=app_name,
185190
agent=root_agent,
186191
artifact_service=artifact_service,
187192
session_service=session_service,
188193
memory_service=memory_service,
194+
plugins=[request_intercepter_plugin],
189195
) as runner:
190196
events = []
191197

@@ -212,30 +218,36 @@ async def _generate_inferences_from_root_agent(
212218

213219
events.append(event)
214220

215-
return EvaluationGenerator.convert_events_to_eval_invocations(events)
221+
app_details_by_invocation_id = (
222+
EvaluationGenerator._get_app_details_by_invocation_id(
223+
events, request_intercepter_plugin
224+
)
225+
)
226+
return EvaluationGenerator.convert_events_to_eval_invocations(
227+
events, app_details_by_invocation_id
228+
)
216229

217230
@staticmethod
218231
def convert_events_to_eval_invocations(
219232
events: list[Event],
233+
app_details_per_invocation: Optional[dict[str, AppDetails]] = None,
220234
) -> list[Invocation]:
221235
"""Converts a list of events to eval invocations."""
222-
# Group Events by invocation id. Events that share the same invocation id
223-
# belong to the same invocation.
224-
events_by_invocation_id: dict[str, list[Event]] = {}
225-
226-
for event in events:
227-
invocation_id = event.invocation_id
228-
229-
if invocation_id not in events_by_invocation_id:
230-
events_by_invocation_id[invocation_id] = []
231-
232-
events_by_invocation_id[invocation_id].append(event)
236+
events_by_invocation_id = (
237+
EvaluationGenerator._collect_events_by_invocation_id(events)
238+
)
233239

234240
invocations = []
235241
for invocation_id, events in events_by_invocation_id.items():
236242
final_response = None
237243
user_content = ""
238244
invocation_timestamp = 0
245+
app_details = None
246+
if (
247+
app_details_per_invocation
248+
and invocation_id in app_details_per_invocation
249+
):
250+
app_details = app_details_per_invocation[invocation_id]
239251

240252
events_to_add = []
241253

@@ -271,11 +283,61 @@ def convert_events_to_eval_invocations(
271283
invocation_events=invocation_events
272284
),
273285
creation_timestamp=invocation_timestamp,
286+
app_details=app_details,
274287
)
275288
)
276289

277290
return invocations
278291

292+
@staticmethod
293+
def _get_app_details_by_invocation_id(
294+
events: list[Event], request_intercepter: _RequestIntercepterPlugin
295+
) -> dict[str, AppDetails]:
296+
"""Creates an AppDetails object from the list of events."""
297+
events_by_invocation_id = (
298+
EvaluationGenerator._collect_events_by_invocation_id(events)
299+
)
300+
app_details_by_invocation_id = {}
301+
302+
for invocation_id, events in events_by_invocation_id.items():
303+
app_details = AppDetails(agent_details={})
304+
app_details_by_invocation_id[invocation_id] = app_details
305+
306+
for event in events:
307+
if event.author == _USER_AUTHOR:
308+
continue
309+
310+
llm_request = request_intercepter.get_model_request(event)
311+
312+
if not llm_request:
313+
continue
314+
315+
if event.author not in app_details.agent_details:
316+
agent_name = event.author
317+
app_details.agent_details[agent_name] = AgentDetails(
318+
name=agent_name,
319+
instructions=llm_request.config.system_instruction,
320+
tool_declarations=llm_request.config.tools or [],
321+
)
322+
323+
return app_details_by_invocation_id
324+
325+
@staticmethod
326+
def _collect_events_by_invocation_id(events: list[Event]) -> dict[str, Event]:
327+
# Group Events by invocation id. Events that share the same invocation id
328+
# belong to the same invocation.
329+
events_by_invocation_id: dict[str, list[Event]] = {}
330+
331+
for event in events:
332+
invocation_id = event.invocation_id
333+
334+
if invocation_id not in events_by_invocation_id:
335+
events_by_invocation_id[invocation_id] = []
336+
337+
events_by_invocation_id[invocation_id].append(event)
338+
339+
return events_by_invocation_id
340+
279341
@staticmethod
280342
def _process_query_with_session(session_data, data):
281343
"""Process the queries using the existing session data without invoking the runner."""
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import logging
18+
from typing import Optional
19+
import uuid
20+
21+
from typing_extensions import override
22+
23+
from ..agents.callback_context import CallbackContext
24+
from ..models.llm_request import LlmRequest
25+
from ..models.llm_response import LlmResponse
26+
from ..plugins.base_plugin import BasePlugin
27+
28+
logger = logging.getLogger("google_adk." + __name__)
29+
30+
_LLM_REQUEST_ID_KEY = "__llm_request_key__"
31+
32+
33+
class _RequestIntercepterPlugin(BasePlugin):
34+
"""A plugin that intercepts requests that are made to the model and couples them with the model response.
35+
36+
NOTE: This implementation is intended for eval systems internal usage. Do not
37+
take direct depdency on it.
38+
39+
Context behind the creation of this intercepter:
40+
Some of the newer AutoRater backed metrics need access the pieces of
41+
information that were presented to the model like instructions and the list
42+
of available tools.
43+
44+
We intercept the llm_request using this intercepter and make it available to
45+
eval system.
46+
47+
How is it done?
48+
The class maintains a cache of llm_requests that pass through it. Each request
49+
is given a unique id. The id is put in custom_metadata field of the response.
50+
Eval systems have access to the response and can use the request id to
51+
get the llm_request.
52+
"""
53+
54+
def __init__(self, name: str):
55+
super().__init__(name=name)
56+
self._llm_requests_cache: dict[str, LlmRequest] = {}
57+
58+
@override
59+
async def before_model_callback(
60+
self, *, callback_context: CallbackContext, llm_request: LlmRequest
61+
) -> Optional[LlmResponse]:
62+
# We add the llm_request to the call back context so that we can fetch
63+
# it later.
64+
request_id = str(uuid.uuid4())
65+
self._llm_requests_cache[request_id] = llm_request
66+
callback_context.state[_LLM_REQUEST_ID_KEY] = request_id
67+
68+
@override
69+
async def after_model_callback(
70+
self, *, callback_context: CallbackContext, llm_response: LlmResponse
71+
) -> Optional[LlmResponse]:
72+
# Fetch the request_id from the callback_context
73+
if callback_context and _LLM_REQUEST_ID_KEY in callback_context.state:
74+
if llm_response.custom_metadata is None:
75+
llm_response.custom_metadata = {}
76+
77+
llm_response.custom_metadata[_LLM_REQUEST_ID_KEY] = (
78+
callback_context.state[_LLM_REQUEST_ID_KEY]
79+
)
80+
81+
def get_model_request(
82+
self, llm_response: LlmResponse
83+
) -> Optional[LlmRequest]:
84+
"""Fetches the request object, if found."""
85+
if (
86+
llm_response.custom_metadata
87+
and _LLM_REQUEST_ID_KEY in llm_response.custom_metadata
88+
):
89+
request_id = llm_response.custom_metadata[_LLM_REQUEST_ID_KEY]
90+
91+
if request_id in self._llm_requests_cache:
92+
return self._llm_requests_cache[request_id]
93+
else:
94+
logger.warning("`%s` not found in llm_request_cache.", request_id)

tests/unittests/evaluation/test_evaluation_generator.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,14 @@
1414

1515
from __future__ import annotations
1616

17+
from unittest import mock
18+
19+
from google.adk.evaluation.app_details import AgentDetails
20+
from google.adk.evaluation.app_details import AppDetails
1721
from google.adk.evaluation.evaluation_generator import EvaluationGenerator
22+
from google.adk.evaluation.request_intercepter_plugin import _RequestIntercepterPlugin
1823
from google.adk.events.event import Event
24+
from google.adk.models.llm_request import LlmRequest
1925
from google.genai import types
2026

2127

@@ -195,3 +201,128 @@ def test_multi_agent(
195201
assert events[1].author == "sub_agent_1"
196202
assert events[2].author == "sub_agent_1"
197203
assert events[3].author == "sub_agent_2"
204+
205+
206+
class TestGetAppDetailsByInvocationId:
207+
"""Test cases for EvaluationGenerator._get_app_details_by_invocation_id method."""
208+
209+
def test_get_app_details_by_invocation_id_empty(self):
210+
"""Tests with an empty list of events."""
211+
mock_request_intercepter = mock.MagicMock(spec=_RequestIntercepterPlugin)
212+
app_details = EvaluationGenerator._get_app_details_by_invocation_id(
213+
[], mock_request_intercepter
214+
)
215+
assert app_details == {}
216+
217+
def test_get_app_details_by_invocation_id_no_model_requests(self):
218+
"""Tests when request_intercepter returns no model requests."""
219+
mock_request_intercepter = mock.MagicMock(spec=_RequestIntercepterPlugin)
220+
mock_request_intercepter.get_model_request.return_value = None
221+
events = [
222+
_build_event("user", [types.Part(text="Hello")], "inv1"),
223+
_build_event("agent", [types.Part(text="Hi there!")], "inv1"),
224+
]
225+
app_details = EvaluationGenerator._get_app_details_by_invocation_id(
226+
events, mock_request_intercepter
227+
)
228+
assert app_details == {"inv1": AppDetails(agent_details={})}
229+
mock_request_intercepter.get_model_request.assert_called_once_with(
230+
events[1]
231+
)
232+
233+
def test_get_app_details_single_invocation_single_agent(self):
234+
"""Tests a single invocation with one agent."""
235+
mock_request_intercepter = mock.MagicMock(spec=_RequestIntercepterPlugin)
236+
mock_llm_request = LlmRequest(model="test")
237+
mock_llm_request.config.system_instruction = "instruction1"
238+
mock_llm_request.config.tools = [types.Tool()]
239+
mock_request_intercepter.get_model_request.return_value = mock_llm_request
240+
241+
events = [
242+
_build_event("user", [types.Part(text="Hello")], "inv1"),
243+
_build_event("agent", [types.Part(text="Hi there!")], "inv1"),
244+
]
245+
app_details = EvaluationGenerator._get_app_details_by_invocation_id(
246+
events, mock_request_intercepter
247+
)
248+
249+
expected_app_details = {
250+
"inv1": AppDetails(
251+
agent_details={
252+
"agent": AgentDetails(
253+
name="agent",
254+
instructions="instruction1",
255+
tool_declarations=[types.Tool()],
256+
)
257+
}
258+
)
259+
}
260+
assert app_details == expected_app_details
261+
mock_request_intercepter.get_model_request.assert_called_once_with(
262+
events[1]
263+
)
264+
265+
def test_get_app_details_multiple_invocations_multiple_agents(self):
266+
"""Tests multiple invocations with multiple agents."""
267+
mock_request_intercepter = mock.MagicMock(spec=_RequestIntercepterPlugin)
268+
269+
def get_model_request_side_effect(event):
270+
mock_llm_request = LlmRequest(model="test")
271+
if event.invocation_id == "inv1" and event.author == "agent1":
272+
mock_llm_request.config.system_instruction = "instruction1"
273+
mock_llm_request.config.tools = [
274+
types.Tool(
275+
function_declarations=[types.FunctionDeclaration(name="tool1")]
276+
)
277+
]
278+
return mock_llm_request
279+
if event.invocation_id == "inv2" and event.author == "agent2":
280+
mock_llm_request.config.system_instruction = "instruction2"
281+
return mock_llm_request
282+
return None
283+
284+
mock_request_intercepter.get_model_request.side_effect = (
285+
get_model_request_side_effect
286+
)
287+
288+
events = [
289+
_build_event("user", [types.Part(text="Hello")], "inv1"),
290+
_build_event("agent1", [types.Part(text="Hi there!")], "inv1"),
291+
_build_event("user", [types.Part(text="Hello again")], "inv2"),
292+
_build_event("agent2", [types.Part(text="Hi again!")], "inv2"),
293+
_build_event(
294+
"agent1", [types.Part(text="Hi again from agent1")], "inv2"
295+
), # no request
296+
]
297+
app_details = EvaluationGenerator._get_app_details_by_invocation_id(
298+
events, mock_request_intercepter
299+
)
300+
301+
expected_app_details = {
302+
"inv1": AppDetails(
303+
agent_details={
304+
"agent1": AgentDetails(
305+
name="agent1",
306+
instructions="instruction1",
307+
tool_declarations=[
308+
types.Tool(
309+
function_declarations=[
310+
types.FunctionDeclaration(name="tool1")
311+
]
312+
)
313+
],
314+
)
315+
}
316+
),
317+
"inv2": AppDetails(
318+
agent_details={
319+
"agent2": AgentDetails(
320+
name="agent2",
321+
instructions="instruction2",
322+
tool_declarations=[],
323+
)
324+
}
325+
),
326+
}
327+
assert app_details == expected_app_details
328+
assert mock_request_intercepter.get_model_request.call_count == 3

0 commit comments

Comments
 (0)