google
diff --git a/‎contributing/samples/litellm_with_fallback_models/README.md‎
Lines changed: 10 additions & 0 deletions b/‎contributing/samples/litellm_with_fallback_models/README.md‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎contributing/samples/litellm_with_fallback_models/__init__.py‎
Lines changed: 15 additions & 0 deletions b/‎contributing/samples/litellm_with_fallback_models/__init__.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎contributing/samples/litellm_with_fallback_models/agent.py‎
Lines changed: 88 additions & 0 deletions b/‎contributing/samples/litellm_with_fallback_models/agent.py‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎src/google/adk/models/lite_llm.py‎
Lines changed: 13 additions & 5 deletions b/‎src/google/adk/models/lite_llm.py‎
Lines changed: 13 additions & 5 deletions
diff --git a/‎src/google/adk/models/llm_response.py‎
Lines changed: 7 additions & 0 deletions b/‎src/google/adk/models/llm_response.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎tests/unittests/models/test_litellm.py‎
Lines changed: 32 additions & 7 deletions b/‎tests/unittests/models/test_litellm.py‎
Lines changed: 32 additions & 7 deletions
diff --git a/‎tests/unittests/models/test_llm_response.py‎
Lines changed: 15 additions & 0 deletions b/‎tests/unittests/models/test_llm_response.py‎
Lines changed: 15 additions & 0 deletions
@@ -0,0 +1,10 @@
+# LiteLLM with Fallback Models
+
+This agent is built for resilience using LiteLLM's built-in fallback mechanism. It automatically switches models to guard against common disruptions like token limit errors and connection failures, while ensuring full conversational context is preserved across all model changes.
+
+To run this example, ensure your .env file includes the following variables:
+```
+GOOGLE_API_KEY=
+OPENAI_API_KEY=
+ANTHROPIC_API_KEY=
+```
@@ -0,0 +1,15 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import agent
@@ -0,0 +1,88 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import random
+
+from google.adk import Agent
+from google.adk.models.lite_llm import LiteLlm
+from google.adk.tools.tool_context import ToolContext
+from google.genai import types
+
+
+def roll_die(sides: int, tool_context: ToolContext) -> int:
+ """Roll a die and return the rolled result.
+
+ Args:
+ sides: The integer number of sides the die has.
+ tool_context: The tool context to use for the die roll.
+
+ Returns:
+ An integer of the result of rolling the die.
+ The result is also stored in the tool context for future use.
+ """
+ result = random.randint(1, sides)
+ if 'rolls' not in tool_context.state:
+ tool_context.state['rolls'] = []
+
+ tool_context.state['rolls'] = tool_context.state['rolls'] + [result]
+ return result
+
+
+async def before_model_callback(callback_context, llm_request):
+ print('@before_model_callback')
+ print(f'Beginning model choice: {llm_request.model}')
+ callback_context.state['beginning_model_choice'] = llm_request.model
+ return None
+
+
+async def after_model_callback(callback_context, llm_response):
+ print('@after_model_callback')
+ print(f'Final model choice: {llm_response.model_version}')
+ callback_context.state['final_model_choice'] = llm_response.model_version
+ return None
+
+
+root_agent = Agent(
+ model=LiteLlm(
+ model='gemini/gemini-2.5-pro',
+ fallbacks=[
+ 'anthropic/claude-sonnet-4-5-20250929',
+ 'openai/gpt-4o',
+ ],
+ ),
+ name='resilient_agent',
+ description=(
+ 'hello world agent that can roll a dice of given number of sides.'
+ ),
+ instruction="""
+ You roll dice and answer questions about the outcome of the dice rolls.
+ You can roll dice of different sizes.
+ It is ok to discuss previous dice roles, and comment on the dice rolls.
+ When you are asked to roll a die, you must call the roll_die tool with the number of sides. Be sure to pass in an integer. Do not pass in a string.
+ You should never roll a die on your own.
+ """,
+ tools=[
+ roll_die,
+ ],
+ generate_content_config=types.GenerateContentConfig(
+ safety_settings=[
+ types.SafetySetting( # avoid false alarm about rolling dice.
+ category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
+ threshold=types.HarmBlockThreshold.OFF,
+ ),
+ ]
+ ),
+ before_model_callback=before_model_callback,
+ after_model_callback=after_model_callback,
+)
@@ -558,7 +558,9 @@ def _model_response_to_generate_content_response(
  if not message:
  raise ValueError("No message in response")
 
- llm_response = _message_to_generate_content_response(message)
+ llm_response = _message_to_generate_content_response(
+ message, model_version=response.model
+ )
  if finish_reason:
  # If LiteLLM already provides a FinishReason enum (e.g., for Gemini), use
  # it directly. Otherwise, map the finish_reason string to the enum.
@@ -579,13 +581,14 @@ def _model_response_to_generate_content_response(
 
 
 def _message_to_generate_content_response(
- message: Message, is_partial: bool = False
+ message: Message, *, is_partial: bool = False, model_version: str = None
 ) -> LlmResponse:
  """Converts a litellm message to LlmResponse.
 
  Args:
  message: The message to convert.
  is_partial: Whether the message is partial.
+ model_version: The model version used to generate the response.
 
  Returns:
  The LlmResponse.
@@ -606,7 +609,9 @@ def _message_to_generate_content_response(
  parts.append(part)
 
  return LlmResponse(
- content=types.Content(role="model", parts=parts), partial=is_partial
+ content=types.Content(role="model", parts=parts),
+ partial=is_partial,
+ model_version=model_version,
  )
 
 
@@ -950,6 +955,7 @@ async def generate_content_async(
  content=chunk.text,
  ),
  is_partial=True,
+ model_version=part.model,
  )
  elif isinstance(chunk, UsageMetadataChunk):
  usage_metadata = types.GenerateContentResponseUsageMetadata(
@@ -981,14 +987,16 @@ async def generate_content_async(
  role="assistant",
  content=text,
  tool_calls=tool_calls,
- )
+ ),
+ model_version=part.model,
  )
  )
  text = ""
  function_calls.clear()
  elif finish_reason == "stop" and text:
  aggregated_llm_response = _message_to_generate_content_response(
- ChatCompletionAssistantMessage(role="assistant", content=text)
+ ChatCompletionAssistantMessage(role="assistant", content=text),
+ model_version=part.model,
  )
  text = ""
 
 
@@ -55,6 +55,9 @@ class LlmResponse(BaseModel):
  )
  """The pydantic model config."""
 
+ model_version: Optional[str] = None
+ """Output only. The model version used to generate the response."""
+
  content: Optional[types.Content] = None
  """The generative content of the response.
 
@@ -159,6 +162,7 @@ def create(
  citation_metadata=candidate.citation_metadata,
  avg_logprobs=candidate.avg_logprobs,
  logprobs_result=candidate.logprobs_result,
+ model_version=generate_content_response.model_version,
  )
  else:
  return LlmResponse(
@@ -169,6 +173,7 @@ def create(
  finish_reason=candidate.finish_reason,
  avg_logprobs=candidate.avg_logprobs,
  logprobs_result=candidate.logprobs_result,
+ model_version=generate_content_response.model_version,
  )
  else:
  if generate_content_response.prompt_feedback:
@@ -177,10 +182,12 @@ def create(
  error_code=prompt_feedback.block_reason,
  error_message=prompt_feedback.block_reason_message,
  usage_metadata=usage_metadata,
+ model_version=generate_content_response.model_version,
  )
  else:
  return LlmResponse(
  error_code='UNKNOWN_ERROR',
  error_message='Unknown error.',
  usage_metadata=usage_metadata,
+ model_version=generate_content_response.model_version,
  )
@@ -90,6 +90,7 @@
 
 STREAMING_MODEL_RESPONSE = [
  ModelResponse(
+ model="test_model",
  choices=[
  StreamingChoices(
  finish_reason=None,
@@ -98,9 +99,10 @@
  content="zero, ",
  ),
  )
- ]
+ ],
  ),
  ModelResponse(
+ model="test_model",
  choices=[
  StreamingChoices(
  finish_reason=None,
@@ -109,9 +111,10 @@
  content="one, ",
  ),
  )
- ]
+ ],
  ),
  ModelResponse(
+ model="test_model",
  choices=[
  StreamingChoices(
  finish_reason=None,
@@ -120,9 +123,10 @@
  content="two:",
  ),
  )
- ]
+ ],
  ),
  ModelResponse(
+ model="test_model",
  choices=[
  StreamingChoices(
  finish_reason=None,
@@ -141,9 +145,10 @@
  ],
  ),
  )
- ]
+ ],
  ),
  ModelResponse(
+ model="test_model",
  choices=[
  StreamingChoices(
  finish_reason=None,
@@ -162,14 +167,15 @@
  ],
  ),
  )
- ]
+ ],
  ),
  ModelResponse(
+ model="test_model",
  choices=[
  StreamingChoices(
  finish_reason="tool_use",
  )
- ]
+ ],
  ),
 ]
 
@@ -342,6 +348,7 @@
 @pytest.fixture
 def mock_response():
  return ModelResponse(
+ model="test_model",
  choices=[
  Choices(
  message=ChatCompletionAssistantMessage(
@@ -359,7 +366,7 @@ def mock_response():
  ],
  )
  )
- ]
+ ],
  )
 
 
@@ -529,6 +536,7 @@ async def test_generate_content_async(mock_acompletion, lite_llm_instance):
  "test_arg": "test_value"
  }
  assert response.content.parts[1].function_call.id == "test_tool_call_id"
+ assert response.model_version == "test_model"
 
  mock_acompletion.assert_called_once()
 
@@ -1262,6 +1270,19 @@ def test_message_to_generate_content_response_tool_call():
  assert response.content.parts[0].function_call.id == "test_tool_call_id"
 
 
+def test_message_to_generate_content_response_with_model():
+ message = ChatCompletionAssistantMessage(
+ role="assistant",
+ content="Test response",
+ )
+ response = _message_to_generate_content_response(
+ message, model_version="gemini-2.5-pro"
+ )
+ assert response.content.role == "model"
+ assert response.content.parts[0].text == "Test response"
+ assert response.model_version == "gemini-2.5-pro"
+
+
 def test_get_content_text():
  parts = [types.Part.from_text(text="Test text")]
  content = _get_content(parts)
@@ -1556,16 +1577,20 @@ async def test_generate_content_async_stream(
  assert len(responses) == 4
  assert responses[0].content.role == "model"
  assert responses[0].content.parts[0].text == "zero, "
+ assert responses[0].model_version == "test_model"
  assert responses[1].content.role == "model"
  assert responses[1].content.parts[0].text == "one, "
+ assert responses[1].model_version == "test_model"
  assert responses[2].content.role == "model"
  assert responses[2].content.parts[0].text == "two:"
+ assert responses[2].model_version == "test_model"
  assert responses[3].content.role == "model"
  assert responses[3].content.parts[-1].function_call.name == "test_function"
  assert responses[3].content.parts[-1].function_call.args == {
  "test_arg": "test_value"
  }
  assert responses[3].content.parts[-1].function_call.id == "test_tool_call_id"
+ assert responses[3].model_version == "test_model"
  mock_completion.assert_called_once()
 
  _, kwargs = mock_completion.call_args
 
@@ -334,3 +334,18 @@ def test_llm_response_create_empty_content_with_stop_reason():
 
  assert response.error_code is None
  assert response.content is not None
+
+
+def test_llm_response_create_includes_model_version():
+ """Test LlmResponse.create() includes model version."""
+ generate_content_response = types.GenerateContentResponse(
+ model_version='gemini-2.0-flash',
+ candidates=[
+ types.Candidate(
+ content=types.Content(parts=[types.Part(text='Response text')]),
+ finish_reason=types.FinishReason.STOP,
+ )
+ ],
+ )
+ response = LlmResponse.create(generate_content_response)
+ assert response.model_version == 'gemini-2.0-flash'