openai
diff --git a/‎docs/examples.md‎
Lines changed: 3 additions & 0 deletions b/‎docs/examples.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/release.md‎
Lines changed: 4 additions & 0 deletions b/‎docs/release.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/sessions.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/sessions.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/tools.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/tools.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/mcp/prompt_server/main.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/mcp/prompt_server/main.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/realtime/demo.py‎
Lines changed: 18 additions & 15 deletions b/‎examples/realtime/demo.py‎
Lines changed: 18 additions & 15 deletions
diff --git a/‎examples/realtime/no_ui_demo.py‎
Lines changed: 253 additions & 0 deletions b/‎examples/realtime/no_ui_demo.py‎
Lines changed: 253 additions & 0 deletions
diff --git a/‎examples/realtime/ui.py‎
Lines changed: 1 addition & 4 deletions b/‎examples/realtime/ui.py‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎mkdocs.yml‎
Lines changed: 1 addition & 0 deletions b/‎mkdocs.yml‎
Lines changed: 1 addition & 0 deletions
@@ -40,3 +40,6 @@ Check out a variety of sample implementations of the SDK in the examples section
 
 - **[voice](https://github.com/openai/openai-agents-python/tree/main/examples/voice):**
  See examples of voice agents, using our TTS and STT models.
+
+- **[realtime](https://github.com/openai/openai-agents-python/tree/main/examples/realtime):**
+ Examples showing how to build realtime experiences using the SDK.
@@ -19,6 +19,10 @@ We will increment `Z` for non-breaking changes:
 
 ## Breaking change changelog
 
+### 0.2.0
+
+In this version, a few places that used to take `Agent` as an arg, now take `AgentBase` as an arg instead. For example, the `list_tools()` call in MCP servers. This is a purely typing change, you will still receive `Agent` objects. To update, just fix type errors by replacing `Agent` with `AgentBase`.
+
 ### 0.1.0
 
 In this version, [`MCPServer.list_tools()`][agents.mcp.server.MCPServer] has two new params: `run_context` and `agent`. You'll need to add these params to any classes that subclass `MCPServer`.
@@ -101,8 +101,8 @@ result = await Runner.run(
 print(f"Agent: {result.final_output}")
 
 # User wants to correct their question
-user_item = await session.pop_item() # Remove user's question
 assistant_item = await session.pop_item() # Remove agent's response
+user_item = await session.pop_item() # Remove user's question
 
 # Ask a corrected question
 result = await Runner.run(
 
@@ -180,7 +180,7 @@ Sometimes, you don't want to use a Python function as a tool. You can directly c
 - `name`
 - `description`
 - `params_json_schema`, which is the JSON schema for the arguments
-- `on_invoke_tool`, which is an async function that receives the context and the arguments as a JSON string, and must return the tool output as a string.
+- `on_invoke_tool`, which is an async function that receives a [`ToolContext`][agents.tool_context.ToolContext] and the arguments as a JSON string, and must return the tool output as a string.
 
 ```python
 from typing import Any
 
@@ -17,7 +17,7 @@ async def get_instructions_from_prompt(mcp_server: MCPServer, prompt_name: str,
  try:
  prompt_result = await mcp_server.get_prompt(prompt_name, kwargs)
  content = prompt_result.messages[0].content
- if hasattr(content, 'text'):
+ if hasattr(content, "text"):
  instructions = content.text
  else:
  instructions = str(content)
 
@@ -38,6 +38,12 @@ def get_weather(city: str) -> str:
 )
 
 
+def _truncate_str(s: str, max_length: int) -> str:
+ if len(s) > max_length:
+ return s[:max_length] + "..."
+ return s
+
+
 class Example:
  def __init__(self) -> None:
  self.ui = AppUI()
@@ -58,22 +64,18 @@ async def run(self) -> None:
  self.session = session
  self.ui.set_is_connected(True)
  async for event in session:
- await self.on_event(event)
+ await self._on_event(event)
+ print("done")
 
  # Wait for UI task to complete when session ends
  await ui_task
 
  async def on_audio_recorded(self, audio_bytes: bytes) -> None:
- """Called when audio is recorded by the UI."""
- try:
- # Send the audio to the session
- assert self.session is not None
- await self.session.send_audio(audio_bytes)
- except Exception as e:
- self.ui.log_message(f"Error sending audio: {e}")
+ # Send the audio to the session
+ assert self.session is not None
+ await self.session.send_audio(audio_bytes)
 
- async def on_event(self, event: RealtimeSessionEvent) -> None:
- # Display event in the UI
+ async def _on_event(self, event: RealtimeSessionEvent) -> None:
  try:
  if event.type == "agent_start":
  self.ui.add_transcript(f"Agent started: {event.agent.name}")
@@ -91,22 +93,23 @@ async def on_event(self, event: RealtimeSessionEvent) -> None:
  self.ui.add_transcript("Audio ended")
  elif event.type == "audio":
  np_audio = np.frombuffer(event.audio.data, dtype=np.int16)
- self.ui.play_audio(np_audio)
+ # Play audio in a separate thread to avoid blocking the event loop
+ await asyncio.to_thread(self.ui.play_audio, np_audio)
  elif event.type == "audio_interrupted":
  self.ui.add_transcript("Audio interrupted")
  elif event.type == "error":
- self.ui.add_transcript(f"Error: {event.error}")
+ pass
  elif event.type == "history_updated":
  pass
  elif event.type == "history_added":
  pass
  elif event.type == "raw_model_event":
- self.ui.log_message(f"Raw model event: {event.data}")
+ if event.data.type != "error" and event.data.type != "exception":
+ self.ui.log_message(f"Raw model event: {event.data}")
  else:
  self.ui.log_message(f"Unknown event type: {event.type}")
  except Exception as e:
- # This can happen if the UI has already exited
- self.ui.log_message(f"Event handling error: {str(e)}")
+ self.ui.log_message(f"Error processing event: {_truncate_str(str(e), 50)}")
 
 
 if __name__ == "__main__":
 
@@ -0,0 +1,253 @@
+import asyncio
+import queue
+import sys
+import threading
+from typing import Any
+
+import numpy as np
+import sounddevice as sd
+
+from agents import function_tool
+from agents.realtime import RealtimeAgent, RealtimeRunner, RealtimeSession, RealtimeSessionEvent
+
+# Audio configuration
+CHUNK_LENGTH_S = 0.05 # 50ms
+SAMPLE_RATE = 24000
+FORMAT = np.int16
+CHANNELS = 1
+
+# Set up logging for OpenAI agents SDK
+# logging.basicConfig(
+# level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+# )
+# logger.logger.setLevel(logging.ERROR)
+
+
+@function_tool
+def get_weather(city: str) -> str:
+ """Get the weather in a city."""
+ return f"The weather in {city} is sunny."
+
+
+agent = RealtimeAgent(
+ name="Assistant",
+ instructions="You always greet the user with 'Top of the morning to you'.",
+ tools=[get_weather],
+)
+
+
+def _truncate_str(s: str, max_length: int) -> str:
+ if len(s) > max_length:
+ return s[:max_length] + "..."
+ return s
+
+
+class NoUIDemo:
+ def __init__(self) -> None:
+ self.session: RealtimeSession | None = None
+ self.audio_stream: sd.InputStream | None = None
+ self.audio_player: sd.OutputStream | None = None
+ self.recording = False
+
+ # Audio output state for callback system
+ self.output_queue: queue.Queue[Any] = queue.Queue(maxsize=10) # Buffer more chunks
+ self.interrupt_event = threading.Event()
+ self.current_audio_chunk: np.ndarray | None = None # type: ignore
+ self.chunk_position = 0
+
+ def _output_callback(self, outdata, frames: int, time, status) -> None:
+ """Callback for audio output - handles continuous audio stream from server."""
+ if status:
+ print(f"Output callback status: {status}")
+
+ # Check if we should clear the queue due to interrupt
+ if self.interrupt_event.is_set():
+ # Clear the queue and current chunk state
+ while not self.output_queue.empty():
+ try:
+ self.output_queue.get_nowait()
+ except queue.Empty:
+ break
+ self.current_audio_chunk = None
+ self.chunk_position = 0
+ self.interrupt_event.clear()
+ outdata.fill(0)
+ return
+
+ # Fill output buffer from queue and current chunk
+ outdata.fill(0) # Start with silence
+ samples_filled = 0
+
+ while samples_filled < len(outdata):
+ # If we don't have a current chunk, try to get one from queue
+ if self.current_audio_chunk is None:
+ try:
+ self.current_audio_chunk = self.output_queue.get_nowait()
+ self.chunk_position = 0
+ except queue.Empty:
+ # No more audio data available - this causes choppiness
+ # Uncomment next line to debug underruns:
+ # print(f"Audio underrun: {samples_filled}/{len(outdata)} samples filled")
+ break
+
+ # Copy data from current chunk to output buffer
+ remaining_output = len(outdata) - samples_filled
+ remaining_chunk = len(self.current_audio_chunk) - self.chunk_position
+ samples_to_copy = min(remaining_output, remaining_chunk)
+
+ if samples_to_copy > 0:
+ chunk_data = self.current_audio_chunk[
+ self.chunk_position : self.chunk_position + samples_to_copy
+ ]
+ # More efficient: direct assignment for mono audio instead of reshape
+ outdata[samples_filled : samples_filled + samples_to_copy, 0] = chunk_data
+ samples_filled += samples_to_copy
+ self.chunk_position += samples_to_copy
+
+ # If we've used up the entire chunk, reset for next iteration
+ if self.chunk_position >= len(self.current_audio_chunk):
+ self.current_audio_chunk = None
+ self.chunk_position = 0
+
+ async def run(self) -> None:
+ print("Connecting, may take a few seconds...")
+
+ # Initialize audio player with callback
+ chunk_size = int(SAMPLE_RATE * CHUNK_LENGTH_S)
+ self.audio_player = sd.OutputStream(
+ channels=CHANNELS,
+ samplerate=SAMPLE_RATE,
+ dtype=FORMAT,
+ callback=self._output_callback,
+ blocksize=chunk_size, # Match our chunk timing for better alignment
+ )
+ self.audio_player.start()
+
+ try:
+ runner = RealtimeRunner(agent)
+ async with await runner.run() as session:
+ self.session = session
+ print("Connected. Starting audio recording...")
+
+ # Start audio recording
+ await self.start_audio_recording()
+ print("Audio recording started. You can start speaking - expect lots of logs!")
+
+ # Process session events
+ async for event in session:
+ await self._on_event(event)
+
+ finally:
+ # Clean up audio player
+ if self.audio_player and self.audio_player.active:
+ self.audio_player.stop()
+ if self.audio_player:
+ self.audio_player.close()
+
+ print("Session ended")
+
+ async def start_audio_recording(self) -> None:
+ """Start recording audio from the microphone."""
+ # Set up audio input stream
+ self.audio_stream = sd.InputStream(
+ channels=CHANNELS,
+ samplerate=SAMPLE_RATE,
+ dtype=FORMAT,
+ )
+
+ self.audio_stream.start()
+ self.recording = True
+
+ # Start audio capture task
+ asyncio.create_task(self.capture_audio())
+
+ async def capture_audio(self) -> None:
+ """Capture audio from the microphone and send to the session."""
+ if not self.audio_stream or not self.session:
+ return
+
+ # Buffer size in samples
+ read_size = int(SAMPLE_RATE * CHUNK_LENGTH_S)
+
+ try:
+ while self.recording:
+ # Check if there's enough data to read
+ if self.audio_stream.read_available < read_size:
+ await asyncio.sleep(0.01)
+ continue
+
+ # Read audio data
+ data, _ = self.audio_stream.read(read_size)
+
+ # Convert numpy array to bytes
+ audio_bytes = data.tobytes()
+
+ # Send audio to session
+ await self.session.send_audio(audio_bytes)
+
+ # Yield control back to event loop
+ await asyncio.sleep(0)
+
+ except Exception as e:
+ print(f"Audio capture error: {e}")
+ finally:
+ if self.audio_stream and self.audio_stream.active:
+ self.audio_stream.stop()
+ if self.audio_stream:
+ self.audio_stream.close()
+
+ async def _on_event(self, event: RealtimeSessionEvent) -> None:
+ """Handle session events."""
+ try:
+ if event.type == "agent_start":
+ print(f"Agent started: {event.agent.name}")
+ elif event.type == "agent_end":
+ print(f"Agent ended: {event.agent.name}")
+ elif event.type == "handoff":
+ print(f"Handoff from {event.from_agent.name} to {event.to_agent.name}")
+ elif event.type == "tool_start":
+ print(f"Tool started: {event.tool.name}")
+ elif event.type == "tool_end":
+ print(f"Tool ended: {event.tool.name}; output: {event.output}")
+ elif event.type == "audio_end":
+ print("Audio ended")
+ elif event.type == "audio":
+ # Enqueue audio for callback-based playback
+ np_audio = np.frombuffer(event.audio.data, dtype=np.int16)
+ try:
+ self.output_queue.put_nowait(np_audio)
+ except queue.Full:
+ # Queue is full - only drop if we have significant backlog
+ # This prevents aggressive dropping that could cause choppiness
+ if self.output_queue.qsize() > 8: # Keep some buffer
+ try:
+ self.output_queue.get_nowait()
+ self.output_queue.put_nowait(np_audio)
+ except queue.Empty:
+ pass
+ # If queue isn't too full, just skip this chunk to avoid blocking
+ elif event.type == "audio_interrupted":
+ print("Audio interrupted")
+ # Signal the output callback to clear its queue and state
+ self.interrupt_event.set()
+ elif event.type == "error":
+ print(f"Error: {event.error}")
+ elif event.type == "history_updated":
+ pass # Skip these frequent events
+ elif event.type == "history_added":
+ pass # Skip these frequent events
+ elif event.type == "raw_model_event":
+ print(f"Raw model event: {_truncate_str(str(event.data), 50)}")
+ else:
+ print(f"Unknown event type: {event.type}")
+ except Exception as e:
+ print(f"Error processing event: {_truncate_str(str(e), 50)}")
+
+
+if __name__ == "__main__":
+ demo = NoUIDemo()
+ try:
+ asyncio.run(demo.run())
+ except KeyboardInterrupt:
+ print("\nExiting...")
+ sys.exit(0)
@@ -239,10 +239,7 @@ async def capture_audio(self) -> None:
 
  # Call audio callback if set
  if self.audio_callback:
- try:
- await self.audio_callback(audio_bytes)
- except Exception as e:
- self.log_message(f"Audio callback error: {e}")
+ await self.audio_callback(audio_bytes)
 
  # Yield control back to event loop
  await asyncio.sleep(0)
 
@@ -92,6 +92,7 @@ plugins:
  - ref/lifecycle.md
  - ref/items.md
  - ref/run_context.md
+ - ref/tool_context.md
  - ref/usage.md
  - ref/exceptions.md
  - ref/guardrail.md