codetether_agent/session/helper/experimental/pairing.rs
1//! Invariant-repair pass: ensure every tool_call has its tool_result
2//! and vice versa.
3//!
4//! Provider APIs (OpenAI Responses, Anthropic Messages, Gemini) all
5//! enforce one of two invariants on conversation history:
6//!
7//! 1. Every assistant `tool_call` must be followed by a matching
8//! `tool_result` with the same `tool_call_id`.
9//! 2. Every `tool_result` must reference a `tool_call_id` that exists
10//! earlier in the buffer.
11//!
12//! If a prior strategy in the [`super`] pipeline drops a message that
13//! broke one of these invariants — for example, a middle-drop that
14//! crosses a tool_call↔tool_result boundary — the provider rejects the
15//! request outright (OpenAI: *"No tool output found for function call
16//! call_XXX"*).
17//!
18//! This module runs **last** in [`super::apply_all`] and repairs
19//! orphans in the safest way available:
20//!
21//! * **Orphan tool_call** (call with no matching result): inject a
22//! synthetic `Role::Tool` message immediately after it carrying a
23//! `[tool result unavailable: elided by context management]` payload.
24//! The call id is preserved so the model can correlate.
25//! * **Orphan tool_result** (result whose call_id is nowhere earlier):
26//! drop the result. Providers that see an unreferenced result treat
27//! it as a malformed payload.
28//!
29//! # Always-on
30//!
31//! No config. This is a correctness pass — it can only make the buffer
32//! more-valid, never less.
33
34use super::ExperimentalStats;
35use crate::provider::{ContentPart, Message, Role};
36use std::collections::HashSet;
37
38/// Repair orphaned tool_call/tool_result pairings.
39///
40/// Returns stats where `snippet_hits` counts the number of repairs
41/// performed (injected synthetic results + dropped orphaned results)
42/// and `total_bytes_saved` is 0 — this pass prioritizes correctness
43/// over size.
44///
45/// # Examples
46///
47/// ```rust
48/// use codetether_agent::provider::{ContentPart, Message, Role};
49/// use codetether_agent::session::helper::experimental::pairing::repair_orphans;
50///
51/// // Assistant made a tool call, but the matching tool result message
52/// // was dropped by an upstream pass.
53/// let mut msgs = vec![
54/// Message {
55/// role: Role::Assistant,
56/// content: vec![ContentPart::ToolCall {
57/// id: "call_xyz".into(),
58/// name: "browserctl".into(),
59/// arguments: "{}".into(),
60/// thought_signature: None,
61/// }],
62/// },
63/// Message {
64/// role: Role::User,
65/// content: vec![ContentPart::Text {
66/// text: "follow-up".into(),
67/// }],
68/// },
69/// ];
70///
71/// let stats = repair_orphans(&mut msgs);
72/// assert!(stats.snippet_hits >= 1);
73/// assert_eq!(msgs.len(), 3);
74/// assert_eq!(msgs[1].role, Role::Tool);
75/// ```
76pub fn repair_orphans(messages: &mut Vec<Message>) -> ExperimentalStats {
77 let mut stats = ExperimentalStats::default();
78
79 // Pass 1: inject synthetic tool results for orphan tool_calls.
80 // Collect fulfilled ids in a forward walk; whenever we see a
81 // ToolCall whose id is never fulfilled later, inject a placeholder.
82 let fulfilled_ids = collect_fulfilled_ids(messages);
83 let mut to_inject: Vec<(usize, String)> = Vec::new();
84 for (idx, msg) in messages.iter().enumerate() {
85 if msg.role != Role::Assistant {
86 continue;
87 }
88 for part in &msg.content {
89 if let ContentPart::ToolCall { id, .. } = part
90 && !fulfilled_ids.contains(id)
91 {
92 to_inject.push((idx, id.clone()));
93 }
94 }
95 }
96 // Inject from the end so earlier indices stay valid.
97 for (idx, call_id) in to_inject.into_iter().rev() {
98 let placeholder = Message {
99 role: Role::Tool,
100 content: vec![ContentPart::ToolResult {
101 tool_call_id: call_id,
102 content: "[tool result unavailable: elided by context management]".into(),
103 }],
104 };
105 messages.insert(idx + 1, placeholder);
106 stats.snippet_hits += 1;
107 }
108
109 // Pass 2: drop tool_results that reference unknown call ids.
110 // Build a prefix set of known call ids and remove any ToolResult
111 // whose tool_call_id isn't in the prefix at its position.
112 let mut known: HashSet<String> = HashSet::new();
113 let mut drops: Vec<usize> = Vec::new();
114 for (idx, msg) in messages.iter().enumerate() {
115 // Record tool_call ids from this message for later results.
116 for part in &msg.content {
117 if let ContentPart::ToolCall { id, .. } = part {
118 known.insert(id.clone());
119 }
120 }
121 // If this message is tool-role with *every* result orphaned,
122 // mark for drop.
123 if msg.role == Role::Tool
124 && !msg.content.is_empty()
125 && msg.content.iter().all(|p| match p {
126 ContentPart::ToolResult { tool_call_id, .. } => !known.contains(tool_call_id),
127 _ => false,
128 })
129 {
130 drops.push(idx);
131 }
132 }
133 for idx in drops.into_iter().rev() {
134 messages.remove(idx);
135 stats.snippet_hits += 1;
136 }
137
138 stats
139}
140
141fn collect_fulfilled_ids(messages: &[Message]) -> HashSet<String> {
142 let mut out = HashSet::new();
143 for msg in messages {
144 if msg.role != Role::Tool {
145 continue;
146 }
147 for part in &msg.content {
148 if let ContentPart::ToolResult { tool_call_id, .. } = part {
149 out.insert(tool_call_id.clone());
150 }
151 }
152 }
153 out
154}
155
156#[cfg(test)]
157mod tests {
158 use super::*;
159
160 fn call(id: &str) -> Message {
161 Message {
162 role: Role::Assistant,
163 content: vec![ContentPart::ToolCall {
164 id: id.into(),
165 name: "t".into(),
166 arguments: "{}".into(),
167 thought_signature: None,
168 }],
169 }
170 }
171
172 fn result(id: &str) -> Message {
173 Message {
174 role: Role::Tool,
175 content: vec![ContentPart::ToolResult {
176 tool_call_id: id.into(),
177 content: "ok".into(),
178 }],
179 }
180 }
181
182 fn user(t: &str) -> Message {
183 Message {
184 role: Role::User,
185 content: vec![ContentPart::Text { text: t.into() }],
186 }
187 }
188
189 #[test]
190 fn well_formed_history_is_noop() {
191 let mut msgs = vec![user("q"), call("a"), result("a"), call("b"), result("b")];
192 let before = msgs.len();
193 let stats = repair_orphans(&mut msgs);
194 assert_eq!(stats.snippet_hits, 0);
195 assert_eq!(msgs.len(), before);
196 }
197
198 #[test]
199 fn orphan_call_gets_synthetic_result() {
200 let mut msgs = vec![user("q"), call("a"), user("follow-up")];
201 repair_orphans(&mut msgs);
202 assert_eq!(msgs.len(), 4);
203 assert_eq!(msgs[2].role, Role::Tool);
204 let ContentPart::ToolResult {
205 tool_call_id,
206 content,
207 } = &msgs[2].content[0]
208 else {
209 panic!("expected synthetic tool result");
210 };
211 assert_eq!(tool_call_id, "a");
212 assert!(content.contains("unavailable"));
213 }
214
215 #[test]
216 fn orphan_result_is_dropped() {
217 let mut msgs = vec![user("q"), result("nonexistent"), user("next")];
218 repair_orphans(&mut msgs);
219 // The orphaned result is removed.
220 assert_eq!(msgs.len(), 2);
221 assert_eq!(msgs[0].role, Role::User);
222 assert_eq!(msgs[1].role, Role::User);
223 }
224
225 #[test]
226 fn mixed_orphan_and_valid_pairs() {
227 let mut msgs = vec![
228 user("q"),
229 call("a"), // will be orphaned
230 call("b"),
231 result("b"),
232 user("mid"),
233 result("c"), // orphan — no prior call_c
234 ];
235 let stats = repair_orphans(&mut msgs);
236 // One synthetic injection for "a", one drop for "c".
237 assert_eq!(stats.snippet_hits, 2);
238 // a's synthetic result should be inserted right after call("a").
239 assert_eq!(msgs[1].role, Role::Assistant);
240 assert_eq!(msgs[2].role, Role::Tool);
241 let ContentPart::ToolResult { tool_call_id, .. } = &msgs[2].content[0] else {
242 panic!();
243 };
244 assert_eq!(tool_call_id, "a");
245 }
246}