fix: #2008 Fix agent memory leak using weakref #2014

chatgpt-codex-connector · 2025-10-31T01:28:10Z

Run items drop their agent immediately

The new weak‑reference logic in RunItemBase.__post_init__ stores agent in _agent_ref and immediately deletes the only strong attribute, so a RunItem no longer retains the agent that produced it. Any item constructed with an inline agent (e.g. the fixtures created via MessageOutputItem(agent=Agent(name="test"), ...) in tests/test_items_helpers.py) now sees item.agent is None as soon as the constructor returns, because no other strong references exist. Likewise, user code that keeps only result.new_items while discarding the Agent cannot execute new_item.agent.name or new_item.source_agent.name as in examples/customer_service/main.py:157‑164; those attributes become None immediately and raise AttributeError. This regresses the documented contract agent: Agent[Any] and makes run items unusable once the caller lets the agent go. The fix needs to preserve a strong reference (or store the identifying metadata) instead of deleting the field, and the same applies to the handoff source_agent/target_agent weakrefs below.

Useful? React with 👍 / 👎.

This is a fair point. I explored a solution, and this may be good to go:

diff --git a/src/agents/items.py b/src/agents/items.py index 96f9577a..517edc0e 100644 --- a/src/agents/items.py +++ b/src/agents/items.py @@ -92,15 +92,22 @@ class RunItemBase(Generic[T], abc.ABC): ) def __post_init__(self) -> None: - # Store the producing agent weakly to avoid keeping it alive after the run. + # Store a weak reference so we can release the strong reference later if desired. self._agent_ref = weakref.ref(self.agent) - object.__delattr__(self, "agent") def __getattr__(self, name: str) -> Any: if name == "agent": return self._agent_ref() if self._agent_ref else None raise AttributeError(name) + def release_agent(self) -> None: + """Release the strong reference to the agent while keeping a weak reference.""" + if "agent" not in self.__dict__: + return + agent = self.__dict__["agent"] + self._agent_ref = weakref.ref(agent) if agent is not None else None + object.__delattr__(self, "agent") + def to_input_item(self) -> TResponseInputItem: """Converts this item into an input item suitable for passing to the model.""" if isinstance(self.raw_item, dict): @@ -161,11 +168,9 @@ class HandoffOutputItem(RunItemBase[TResponseInputItem]): def __post_init__(self) -> None: super().__post_init__() - # Handoff metadata should not hold strong references to the agents either. + # Maintain weak references so downstream code can release the strong references when safe. self._source_agent_ref = weakref.ref(self.source_agent) self._target_agent_ref = weakref.ref(self.target_agent) - object.__delattr__(self, "source_agent") - object.__delattr__(self, "target_agent") def __getattr__(self, name: str) -> Any: if name == "source_agent": @@ -174,6 +179,17 @@ class HandoffOutputItem(RunItemBase[TResponseInputItem]): return self._target_agent_ref() if self._target_agent_ref else None return super().__getattr__(name) + def release_agent(self) -> None: + super().release_agent() + if "source_agent" in self.__dict__: + source_agent = self.__dict__["source_agent"] + self._source_agent_ref = weakref.ref(source_agent) if source_agent is not None else None + object.__delattr__(self, "source_agent") + if "target_agent" in self.__dict__: + target_agent = self.__dict__["target_agent"] + self._target_agent_ref = weakref.ref(target_agent) if target_agent is not None else None + object.__delattr__(self, "target_agent") + ToolCallItemTypes: TypeAlias = Union[ ResponseFunctionToolCall, diff --git a/src/agents/result.py b/src/agents/result.py index 3fe20cfa..181fffcf 100644 --- a/src/agents/result.py +++ b/src/agents/result.py @@ -2,6 +2,7 @@ from __future__ import annotations import abc import asyncio +import weakref from collections.abc import AsyncIterator from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any, Literal, cast @@ -74,6 +75,32 @@ class RunResultBase(abc.ABC): def last_agent(self) -> Agent[Any]: """The last agent that was run.""" + def release_agents(self) -> None: + """ + Release strong references to agents held by this result. After calling this method, + accessing `item.agent` or `last_agent` may return `None` if the agent has been garbage + collected. Callers can use this when they are done inspecting the result and want to + eagerly drop any associated agent graph. + """ + for item in self.new_items: + release = getattr(item, "release_agent", None) + if callable(release): + release() + self._release_last_agent_reference() + + def __del__(self) -> None: + try: + # Fall back to releasing agents automatically in case the caller never invoked + # `release_agents()` explicitly. This keeps the no-leak guarantee confirmed by tests. + self.release_agents() + except Exception: + # Avoid raising from __del__. + pass + + @abc.abstractmethod + def _release_last_agent_reference(self) -> None: + """Release stored agent reference specific to the concrete result type.""" + def final_output_as(self, cls: type[T], raise_if_incorrect_type: bool = False) -> T: """A convenience method to cast the final output to a specific type. By default, the cast is only for the typechecker. If you set `raise_if_incorrect_type` to True, we'll raise a @@ -111,11 +138,33 @@ class RunResultBase(abc.ABC): @dataclass class RunResult(RunResultBase): _last_agent: Agent[Any] + _last_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field( + init=False, + repr=False, + default=None, + ) + + def __post_init__(self) -> None: + self._last_agent_ref = weakref.ref(self._last_agent) @property def last_agent(self) -> Agent[Any]: """The last agent that was run.""" - return self._last_agent + agent = cast("Agent[Any] | None", self.__dict__.get("_last_agent")) + if agent is not None: + return agent + if self._last_agent_ref: + agent = self._last_agent_ref() + if agent is not None: + return agent + raise AgentsException("Last agent reference is no longer available.") + + def _release_last_agent_reference(self) -> None: + agent = cast("Agent[Any] | None", self.__dict__.get("_last_agent")) + if agent is None: + return + self._last_agent_ref = weakref.ref(agent) + object.__delattr__(self, "_last_agent") def __str__(self) -> str: return pretty_print_result(self) @@ -150,6 +199,12 @@ class RunResultStreaming(RunResultBase): is_complete: bool = False """Whether the agent has finished running.""" + _current_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field( + init=False, + repr=False, + default=None, + ) + # Queues that the background run_loop writes to _event_queue: asyncio.Queue[StreamEvent | QueueCompleteSentinel] = field( default_factory=asyncio.Queue, repr=False @@ -167,12 +222,29 @@ class RunResultStreaming(RunResultBase): # Soft cancel state _cancel_mode: Literal["none", "immediate", "after_turn"] = field(default="none", repr=False) + def __post_init__(self) -> None: + self._current_agent_ref = weakref.ref(self.current_agent) + @property def last_agent(self) -> Agent[Any]: """The last agent that was run. Updates as the agent run progresses, so the true last agent is only available after the agent run is complete. """ - return self.current_agent + agent = cast("Agent[Any] | None", self.__dict__.get("current_agent")) + if agent is not None: + return agent + if self._current_agent_ref: + agent = self._current_agent_ref() + if agent is not None: + return agent + raise AgentsException("Last agent reference is no longer available.") + + def _release_last_agent_reference(self) -> None: + agent = cast("Agent[Any] | None", self.__dict__.get("current_agent")) + if agent is None: + return + self._current_agent_ref = weakref.ref(agent) + object.__delattr__(self, "current_agent") def cancel(self, mode: Literal["immediate", "after_turn"] = "immediate") -> None: """Cancel the streaming run. diff --git a/tests/test_agent_memory_leak.py b/tests/test_agent_memory_leak.py index 6690d218..424aa399 100644 --- a/tests/test_agent_memory_leak.py +++ b/tests/test_agent_memory_leak.py @@ -23,9 +23,10 @@ def _make_message(text: str) -> ResponseOutputMessage: @pytest.mark.asyncio async def test_agent_is_released_after_run() -> None: fake_model = FakeModel(initial_output=[_make_message("Paris")]) - agent = Agent(name="leaker", instructions="Answer questions.", model=fake_model) + agent = Agent(name="leak-test-agent", instructions="Answer questions.", model=fake_model) agent_ref = weakref.ref(agent) + # Running the agent should not leave behind strong references once the result goes out of scope. await Runner.run(agent, "What is the capital of France?") del agent diff --git a/tests/test_items_helpers.py b/tests/test_items_helpers.py index a94d7454..43dee103 100644 --- a/tests/test_items_helpers.py +++ b/tests/test_items_helpers.py @@ -1,5 +1,6 @@ from __future__ import annotations +import gc import json from openai.types.responses.response_computer_tool_call import ( @@ -142,6 +143,21 @@ def test_text_message_outputs_across_list_of_runitems() -> None: assert ItemHelpers.text_message_outputs([item1, non_message_item, item2]) == "foobar" +def test_message_output_item_retains_agent_until_release() -> None: + # Construct the run item with an inline agent to ensure the run item keeps a strong reference. + message = make_message( + [ResponseOutputText(annotations=[], text="hello", type="output_text")] + ) + item = MessageOutputItem(agent=Agent(name="inline"), raw_item=message) + assert item.agent is not None + assert item.agent.name == "inline" + + # After explicitly releasing, the weak reference should drop once GC runs. + item.release_agent() + gc.collect() + assert item.agent is None + + def test_tool_call_output_item_constructs_function_call_output_dict(): # Build a simple ResponseFunctionToolCall. call = ResponseFunctionToolCall( diff --git a/tests/test_result_cast.py b/tests/test_result_cast.py index 4ef1a293..ec3d3abc 100644 --- a/tests/test_result_cast.py +++ b/tests/test_result_cast.py @@ -1,9 +1,13 @@ +import gc +import weakref from typing import Any import pytest from pydantic import BaseModel -from agents import Agent, RunContextWrapper, RunResult +from agents import Agent, MessageOutputItem, RunContextWrapper, RunResult +from agents.exceptions import AgentsException +from openai.types.responses import ResponseOutputMessage, ResponseOutputText def create_run_result(final_output: Any) -> RunResult: @@ -59,3 +63,39 @@ def test_bad_cast_with_param_raises(): result = create_run_result(Foo(bar=1)) with pytest.raises(TypeError): result.final_output_as(int, raise_if_incorrect_type=True) + + +def test_run_result_release_agents_breaks_strong_refs() -> None: + message = ResponseOutputMessage( + id="msg", + content=[ResponseOutputText(annotations=[], text="hello", type="output_text")], + role="assistant", + status="completed", + type="message", + ) + agent = Agent(name="leak-test-agent") + item = MessageOutputItem(agent=agent, raw_item=message) + result = RunResult( + input="test", + new_items=[item], + raw_responses=[], + final_output=None, + input_guardrail_results=[], + output_guardrail_results=[], + tool_input_guardrail_results=[], + tool_output_guardrail_results=[], + _last_agent=agent, + context_wrapper=RunContextWrapper(context=None), + ) + assert item.agent is not None + assert item.agent.name == "leak-test-agent" + + agent_ref = weakref.ref(agent) + result.release_agents() + del agent + gc.collect() + + assert agent_ref() is None + assert item.agent is None + with pytest.raises(AgentsException): + _ = result.last_agent

@rm-openai do you have any thoughts?

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

fix: #2008 Fix agent memory leak using weakref #2014

Diff view

Diff view

There are no files selected for viewing

chatgpt-codex-connector bot Oct 31, 2025

seratch Oct 31, 2025

-Original file line number
+Diff line change
@@ -1,7 +1,8 @@
  from __future__ import annotations
  import abc
- from dataclasses import dataclass
+ import weakref
+ from dataclasses import dataclass, field
  from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, Union
  import pydantic
@@ Expand Down Expand Up / @@ -84,6 +85,22 @@ class RunItemBase(Generic[T], abc.ABC): @@
   (i.e. `openai.types.responses.ResponseInputItemParam`).
   """
+  _agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
+  init=False,
+  repr=False,
+  default=None,
+  )
+  def __post_init__(self) -> None:
+  # Store the producing agent weakly to avoid keeping it alive after the run.
+  self._agent_ref = weakref.ref(self.agent)
+  object.__delattr__(self, "agent")
+  def __getattr__(self, name: str) -> Any:
+  if name == "agent":
+  return self._agent_ref() if self._agent_ref else None
+  raise AttributeError(name)
   def to_input_item(self) -> TResponseInputItem:
   """Converts this item into an input item suitable for passing to the model."""
   if isinstance(self.raw_item, dict):
@@ Expand Down Expand Up / @@ -131,6 +148,32 @@ class HandoffOutputItem(RunItemBase[TResponseInputItem]): @@
   type: Literal["handoff_output_item"] = "handoff_output_item"
+  _source_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
+  init=False,
+  repr=False,
+  default=None,
+  )
+  _target_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
+  init=False,
+  repr=False,
+  default=None,
+  )
+  def __post_init__(self) -> None:
+  super().__post_init__()
+  # Handoff metadata should not hold strong references to the agents either.
+  self._source_agent_ref = weakref.ref(self.source_agent)
+  self._target_agent_ref = weakref.ref(self.target_agent)
+  object.__delattr__(self, "source_agent")
+  object.__delattr__(self, "target_agent")
+  def __getattr__(self, name: str) -> Any:
+  if name == "source_agent":
+  return self._source_agent_ref() if self._source_agent_ref else None
+  if name == "target_agent":
+  return self._target_agent_ref() if self._target_agent_ref else None
+  return super().__getattr__(name)
  ToolCallItemTypes: TypeAlias = Union[
   ResponseFunctionToolCall,
@@ Expand Down @@

-Original file line number
+Diff line change
@@ -0,0 +1,34 @@
+ from __future__ import annotations
+ import gc
+ import weakref
+ import pytest
+ from openai.types.responses import ResponseOutputMessage, ResponseOutputText
+ from agents import Agent, Runner
+ from tests.fake_model import FakeModel
+ def _make_message(text: str) -> ResponseOutputMessage:
+  return ResponseOutputMessage(
+  id="msg-1",
+  content=[ResponseOutputText(annotations=[], text=text, type="output_text")],
+  role="assistant",
+  status="completed",
+  type="message",
+  )
+ @pytest.mark.asyncio
+ async def test_agent_is_released_after_run() -> None:
+  fake_model = FakeModel(initial_output=[_make_message("Paris")])
+  agent = Agent(name="leaker", instructions="Answer questions.", model=fake_model)
+  agent_ref = weakref.ref(agent)
+  await Runner.run(agent, "What is the capital of France?")
+  del agent
+  gc.collect()
+  assert agent_ref() is None

fix: #2008 Fix agent memory leak using weakref #2014

Are you sure you want to change the base?

fix: #2008 Fix agent memory leak using weakref #2014

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

chatgpt-codex-connector bot Oct 31, 2025

Choose a reason for hiding this comment

seratch Oct 31, 2025

Choose a reason for hiding this comment