Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 137 additions & 0 deletions sentry_sdk/ai/message_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import json
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from typing import Any, Dict, List, Optional

from sentry_sdk.serializer import serialize
from sentry_sdk._types import AnnotatedValue

MAX_GEN_AI_MESSAGE_BYTES = 20_000 # 20KB


def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
# type: (List[Dict[str, Any]], int) -> List[Dict[str, Any]]
"""
Truncate messages by removing the oldest ones until the serialized size is within limits.
If the last message is still too large, truncate its content instead of removing it entirely.

This function prioritizes keeping the most recent messages while ensuring the total
serialized size stays under the specified byte limit. It uses the Sentry serializer
to get accurate size estimates that match what will actually be sent.

Always preserves at least one message, even if content needs to be truncated.

:param messages: List of message objects (typically with 'role' and 'content' keys)
:param max_bytes: Maximum allowed size in bytes for the serialized messages
:returns: Truncated list of messages that fits within the size limit
"""
if not messages:
return messages

truncated_messages = list(messages)

# First, remove older messages until we're under the limit or have only one message left
while len(truncated_messages) > 1:
serialized = serialize(
truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8)
)
serialized_json = json.dumps(serialized, separators=(",", ":"))
current_size = len(serialized_json.encode("utf-8"))

if current_size <= max_bytes:
break

truncated_messages.pop(0) # Remove oldest message

# If we still have one message but it's too large, truncate its content
# This ensures we always preserve at least one message
if len(truncated_messages) == 1:
serialized = serialize(
truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8)
)
serialized_json = json.dumps(serialized, separators=(",", ":"))
current_size = len(serialized_json.encode("utf-8"))

if current_size > max_bytes:
# Truncate the content of the last message
last_message = truncated_messages[0].copy()
content = last_message.get("content", "")

if content and isinstance(content, str):
last_message["content"] = content[: max_bytes * 0.8] + "..."
truncated_messages[0] = last_message

return truncated_messages


def serialize_gen_ai_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
# type: (Optional[Any], int) -> Optional[str]
"""
Serialize and truncate gen_ai messages for storage in spans.

This function handles the complete workflow of:
1. Truncating messages to fit within size limits (if not already done)
2. Serializing them using Sentry's serializer (which processes AnnotatedValue for _meta)
3. Converting to JSON string for storage

:param messages: List of message objects, AnnotatedValue, or None
:param max_bytes: Maximum allowed size in bytes for the serialized messages
:returns: JSON string of serialized messages or None if input was None/empty
"""
if not messages:
return None

if isinstance(messages, AnnotatedValue):
serialized_messages = serialize(
messages, is_vars=False, max_value_length=round(max_bytes * 0.8)
)
return json.dumps(serialized_messages, separators=(",", ":"))

truncated_messages = truncate_messages_by_size(messages, max_bytes)
if not truncated_messages:
return None
serialized_messages = serialize(
truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8)
)

return json.dumps(serialized_messages, separators=(",", ":"))


def truncate_and_serialize_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
# type: (Optional[List[Dict[str, Any]]], int) -> Any
"""
Truncate messages and return serialized string or AnnotatedValue for automatic _meta creation.

This function handles truncation and always returns serialized JSON strings. When truncation
occurs, it wraps the serialized string in an AnnotatedValue so that Sentry's serializer can
automatically create the appropriate _meta structure.

:param messages: List of message objects or None
:param max_bytes: Maximum allowed size in bytes for the serialized messages
:returns: JSON string, AnnotatedValue containing JSON string (if truncated), or None
"""
if not messages:
return None

truncated_messages = truncate_messages_by_size(messages, max_bytes)
if not truncated_messages:
return None

# Always serialize to JSON string
serialized_json = serialize_gen_ai_messages(truncated_messages, max_bytes)
if not serialized_json:
return None

original_count = len(messages)
truncated_count = len(truncated_messages)

# If truncation occurred, wrap the serialized string in AnnotatedValue for _meta
if original_count != truncated_count:
return AnnotatedValue(
value=serialized_json,
metadata={"len": original_count},
)

# No truncation, return plain serialized string
return serialized_json
28 changes: 13 additions & 15 deletions sentry_sdk/integrations/langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import sentry_sdk
from sentry_sdk.ai.monitoring import set_ai_pipeline_name
from sentry_sdk.ai.utils import set_data_normalized, get_start_span_function
from sentry_sdk.ai.message_utils import truncate_and_serialize_messages
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.integrations import DidNotEnable, Integration
from sentry_sdk.scope import should_send_default_pii
Expand Down Expand Up @@ -209,9 +210,9 @@ def on_llm_start(
_set_tools_on_span(span, all_params.get("tools"))

if should_send_default_pii() and self.include_prompts:
set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompts, unpack=False
)
messages_data = truncate_and_serialize_messages(prompts)
if messages_data is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)

def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
# type: (SentryLangchainCallback, Dict[str, Any], List[List[BaseMessage]], UUID, Any) -> Any
Expand Down Expand Up @@ -262,12 +263,9 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
normalized_messages.append(
self._normalize_langchain_message(message)
)
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalized_messages,
unpack=False,
)
messages_data = truncate_and_serialize_messages(normalized_messages)
if messages_data is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)

def on_chat_model_end(self, response, *, run_id, **kwargs):
# type: (SentryLangchainCallback, LLMResult, UUID, Any) -> Any
Expand Down Expand Up @@ -740,9 +738,9 @@ def new_invoke(self, *args, **kwargs):
and should_send_default_pii()
and integration.include_prompts
):
set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, [input], unpack=False
)
messages_data = truncate_and_serialize_messages([input])
if messages_data is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)

output = result.get("output")
if (
Expand Down Expand Up @@ -791,9 +789,9 @@ def new_stream(self, *args, **kwargs):
and should_send_default_pii()
and integration.include_prompts
):
set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, [input], unpack=False
)
messages_data = truncate_and_serialize_messages([input])
if messages_data is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)

# Run the agent
result = f(self, *args, **kwargs)
Expand Down
19 changes: 7 additions & 12 deletions sentry_sdk/integrations/langgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import sentry_sdk
from sentry_sdk.ai.utils import set_data_normalized
from sentry_sdk.ai.message_utils import truncate_and_serialize_messages
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.integrations import DidNotEnable, Integration
from sentry_sdk.scope import should_send_default_pii
Expand Down Expand Up @@ -180,12 +181,9 @@ def new_invoke(self, *args, **kwargs):
):
input_messages = _parse_langgraph_messages(args[0])
if input_messages:
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
input_messages,
unpack=False,
)
messages_data = truncate_and_serialize_messages(input_messages)
if messages_data is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)

result = f(self, *args, **kwargs)

Expand Down Expand Up @@ -230,12 +228,9 @@ async def new_ainvoke(self, *args, **kwargs):
):
input_messages = _parse_langgraph_messages(args[0])
if input_messages:
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
input_messages,
unpack=False,
)
messages_data = truncate_and_serialize_messages(input_messages)
if messages_data is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)

result = await f(self, *args, **kwargs)

Expand Down
Loading