Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 48 additions & 13 deletions src/agents/handoffs/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,20 @@
_conversation_history_start = _DEFAULT_CONVERSATION_HISTORY_START
_conversation_history_end = _DEFAULT_CONVERSATION_HISTORY_END

# These item types are summarized inside the generated conversation summary.
# They should not be forwarded verbatim to the next agent.
_SUMMARY_ONLY_INPUT_TYPES = {
"function_call",
"function_call_output",
"computer_call",
"computer_call_output",
"file_search_call",
"file_search_call_output",
"web_search_call",
"web_search_call_output",
"reasoning",
}


def set_conversation_history_wrappers(
*,
Expand Down Expand Up @@ -67,23 +81,31 @@ def nest_handoff_history(

normalized_history = _normalize_input_history(handoff_input_data.input_history)
flattened_history = _flatten_nested_history_messages(normalized_history)
pre_items_as_inputs = [
_run_item_to_plain_input(item) for item in handoff_input_data.pre_handoff_items
]
new_items_as_inputs = [_run_item_to_plain_input(item) for item in handoff_input_data.new_items]
pre_items_as_inputs: list[TResponseInputItem] = []
filtered_pre_items: list[RunItem] = []
for run_item in handoff_input_data.pre_handoff_items:
plain_input = _run_item_to_plain_input(run_item)
pre_items_as_inputs.append(plain_input)
if _should_forward_pre_item(plain_input):
filtered_pre_items.append(run_item)

new_items_as_inputs: list[TResponseInputItem] = []
filtered_new_items: list[RunItem] = []
for run_item in handoff_input_data.new_items:
plain_input = _run_item_to_plain_input(run_item)
new_items_as_inputs.append(plain_input)
if _should_forward_new_item(plain_input):
filtered_new_items.append(run_item)
Comment on lines +94 to +98

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Preserve handoff tool items when nesting history

The new filtering of new_items drops any run item whose input type is in _SUMMARY_ONLY_INPUT_TYPES (e.g., function_call and function_call_output). With nest_handoff_history=True—the default when a handoff occurs—this loop removes both the HandoffCallItem and HandoffOutputItem produced during the handoff. Those filtered new_items are assigned to SingleStepResult.new_step_items, which AgentRunner then writes to the session/stream as the only records of the turn on the handoff path (see run.py lines 740‑748). After this change, the handoff tool call/output silently vanish from persisted session history and streamed events even though they actually happened, reducing observability of handoffs. The duplication fix should avoid forwarding these to the next agent without removing them from the turn’s recorded items.

Useful? React with 👍 / 👎.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, we need to handle this. Any suggestions?


transcript = flattened_history + pre_items_as_inputs + new_items_as_inputs

mapper = history_mapper or default_handoff_history_mapper
history_items = mapper(transcript)
filtered_pre_items = tuple(
item
for item in handoff_input_data.pre_handoff_items
if _get_run_item_role(item) != "assistant"
)

return handoff_input_data.clone(
input_history=tuple(deepcopy(item) for item in history_items),
pre_handoff_items=filtered_pre_items,
pre_handoff_items=tuple(filtered_pre_items),
new_items=tuple(filtered_new_items),
)


Expand Down Expand Up @@ -231,6 +253,19 @@ def _split_role_and_name(role_text: str) -> tuple[str, str | None]:
return (role_text or "developer", None)


def _get_run_item_role(run_item: RunItem) -> str | None:
role_candidate = run_item.to_input_item().get("role")
return role_candidate if isinstance(role_candidate, str) else None
def _should_forward_pre_item(input_item: TResponseInputItem) -> bool:
"""Return False when the previous transcript item is represented in the summary."""
role_candidate = input_item.get("role")
if isinstance(role_candidate, str) and role_candidate == "assistant":
return False
type_candidate = input_item.get("type")
return not (isinstance(type_candidate, str) and type_candidate in _SUMMARY_ONLY_INPUT_TYPES)


def _should_forward_new_item(input_item: TResponseInputItem) -> bool:
"""Return False for tool/side-effect items that the summary already covers."""
role_candidate = input_item.get("role")
if isinstance(role_candidate, str) and role_candidate:
return True
type_candidate = input_item.get("type")
return not (isinstance(type_candidate, str) and type_candidate in _SUMMARY_ONLY_INPUT_TYPES)
51 changes: 49 additions & 2 deletions tests/test_extension_filters.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from copy import deepcopy
from typing import Any, cast

from openai.types.responses import ResponseOutputMessage, ResponseOutputText
from openai.types.responses import (
ResponseFunctionToolCall,
ResponseOutputMessage,
ResponseOutputText,
)
from openai.types.responses.response_reasoning_item import ResponseReasoningItem

from agents import (
Expand All @@ -17,6 +21,7 @@
HandoffOutputItem,
MessageOutputItem,
ReasoningItem,
ToolCallItem,
ToolCallOutputItem,
TResponseInputItem,
)
Expand Down Expand Up @@ -81,6 +86,19 @@ def _get_tool_output_run_item(content: str) -> ToolCallOutputItem:
)


def _get_tool_call_run_item(name: str = "transfer_to_agent") -> ToolCallItem:
return ToolCallItem(
agent=fake_agent(),
raw_item=ResponseFunctionToolCall(
id="call-1",
arguments="{}",
call_id="call-1",
name=name,
type="function_call",
),
)


def _get_handoff_input_item(content: str) -> TResponseInputItem:
return {
"call_id": "1",
Expand Down Expand Up @@ -273,7 +291,36 @@ def test_nest_handoff_history_wraps_transcript() -> None:
assert "Assist reply" in summary_content
assert "Hello" in summary_content
assert len(nested.pre_handoff_items) == 0
assert nested.new_items == data.new_items
assert len(nested.new_items) == 1
assert isinstance(nested.new_items[0], MessageOutputItem)


def test_nest_handoff_history_filters_tool_calls_and_outputs() -> None:
data = HandoffInputData(
input_history=(_get_user_input_item("Hello"),),
pre_handoff_items=(
_get_tool_call_run_item(),
_get_tool_output_run_item("result"),
),
new_items=(
_get_message_output_run_item("Handoff request"),
_get_handoff_output_run_item("transfer"),
),
run_context=RunContextWrapper(context=()),
)

nested = nest_handoff_history(data)

assert isinstance(nested.input_history, tuple)
assert len(nested.input_history) == 1
summary = _as_message(nested.input_history[0])
summary_content = summary["content"]
assert isinstance(summary_content, str)
assert "function_call" in summary_content
assert "function_call_output" in summary_content
assert nested.pre_handoff_items == ()
assert len(nested.new_items) == 1
assert isinstance(nested.new_items[0], MessageOutputItem)


def test_nest_handoff_history_handles_missing_user() -> None:
Expand Down
Loading