phi_core/context/compact_messages.rs
1use super::config::ContextConfig;
2use super::token::*;
3use crate::types::*;
4use std::sync::Arc;
5
6// ---------------------------------------------------------------------------
7// Tiered compaction
8// ---------------------------------------------------------------------------
9
10/// Compact messages to fit within the token budget using tiered strategy.
11///
12/// - Level 1: Truncate tool outputs (keep head + tail)
13/// - Level 2: Summarize old turns (replace details with one-liner)
14/// - Level 3: Drop old messages (keep first + recent only)
15///
16/// Each level is tried in order. Returns as soon as messages fit.
17/*
18DESIGN: Why `messages` is owned (Vec) but `config` is borrowed (&ContextConfig)
19 `messages` = CONSUMED — tiered compaction rewrites the list; passing by value avoids
20 an upfront clone and lets each level freely transform/drop messages
21 `config` = READ-ONLY — just a budget + thresholds; never mutated; borrow is sufficient
22*/
23pub fn compact_messages(
24 messages: Vec<AgentMessage>, // OWNED — rewritten by each compaction level; no upfront clone needed
25 config: &ContextConfig, // SETTINGS — token budget derived from max_context_tokens - system_prompt_tokens
26) -> Vec<AgentMessage> {
27 compact_messages_with_counter(messages, config, config.token_counter.as_ref())
28}
29
30/// Compact messages using the provided token counter (or the default heuristic).
31pub fn compact_messages_with_counter(
32 messages: Vec<AgentMessage>,
33 config: &ContextConfig,
34 counter: Option<&Arc<dyn TokenCounter>>,
35) -> Vec<AgentMessage> {
36 let counter = resolve_counter(counter);
37 /*
38 RUST QUIRK: `saturating_sub` — subtraction that stops at 0, never wraps
39
40 Rust integers are bounded. On u32/usize, 0 - 1 would OVERFLOW (panic in debug, wrap in release).
41 `saturating_sub(n)` instead returns 0 if the result would be negative.
42
43 budget = max_context_tokens - system_prompt_tokens
44 If someone misconfigured these (system_prompt > max), we'd get underflow.
45 saturating_sub makes the budget = 0 (nothing fits) rather than a huge number.
46
47 Python analogy: max(0, max_context_tokens - system_prompt_tokens)
48
49 Alternative: `checked_sub(n)` returns `Option<usize>` — None on underflow.
50 Use saturating when 0 is a safe fallback; use checked when you need to handle it explicitly.
51 */
52 let budget = config
53 .max_context_tokens
54 .saturating_sub(config.system_prompt_tokens);
55
56 // Already fits?
57 if counter.estimate_messages(&messages) <= budget {
58 return messages;
59 }
60
61 // Level 1: Truncate tool outputs
62 let compacted = level1_truncate_tool_outputs(&messages, config.tool_output_max_lines);
63 if counter.estimate_messages(&compacted) <= budget {
64 return compacted;
65 }
66
67 // Level 2: Summarize old turns (keep recent N full, summarize the rest)
68 let compacted = level2_summarize_old_turns(&compacted, config.keep_recent);
69 if counter.estimate_messages(&compacted) <= budget {
70 return compacted;
71 }
72
73 // Level 3: Drop middle messages (keep first + recent)
74 level3_drop_middle_with_counter(&compacted, config, budget, counter)
75}
76
77/// Level 1: Truncate long tool outputs to head + tail.
78///
79/// This is the cheapest compaction — preserves conversation structure,
80/// just removes verbose tool output middles. In practice this saves
81/// 50-70% of context in coding sessions.
82pub(super) fn level1_truncate_tool_outputs(
83 messages: &[AgentMessage], // SOURCE — read-only input; all non-ToolResult messages pass through unchanged
84 max_lines: usize, // LIMIT — each ToolResult text block is truncated to this many lines (head+tail)
85) -> Vec<AgentMessage> {
86 messages
87 .iter()
88 .map(|msg| match msg {
89 // Match only ToolResult messages — destructure all fields so we can reconstruct below
90 AgentMessage::Llm(LlmMessage {
91 message:
92 Message::ToolResult {
93 tool_call_id,
94 tool_name,
95 content,
96 is_error,
97 timestamp,
98 },
99 ..
100 }) => {
101 let truncated_content: Vec<Content> = content
102 .iter()
103 .map(|c| match c {
104 Content::Text { text } => Content::Text {
105 text: truncate_text_head_tail(text, max_lines),
106 },
107 other => other.clone(), // Images, ToolCalls etc. passed through unchanged
108 })
109 .collect();
110
111 /*
112 RUST QUIRK: `*is_error` and `*timestamp` — dereferencing to copy
113
114 Inside a match arm that borrows the enum (we matched `msg` which is `&AgentMessage`),
115 the fields `is_error` and `timestamp` are bound as `&bool` and `&u64` — references.
116
117 To use them as plain values (not references) in the new struct literal, we dereference:
118 *is_error → bool (Copy type — dereference gives us the value)
119 *timestamp → u64 (Copy type — same)
120
121 For `String` fields (not Copy), we call `.clone()` instead of dereferencing,
122 because dereferencing a &String would give us a String borrow — we need owned Strings.
123
124 Python analogy: you never need this in Python because everything is a reference/object
125 and copying happens automatically for primitives.
126 */
127 AgentMessage::Llm(LlmMessage::new(Message::ToolResult {
128 tool_call_id: tool_call_id.clone(),
129 tool_name: tool_name.clone(),
130 content: truncated_content,
131 is_error: *is_error, // deref: &bool → bool
132 timestamp: *timestamp, // deref: &u64 → u64
133 }))
134 }
135 other => other.clone(), // Non-ToolResult messages pass through unchanged
136 })
137 .collect()
138}
139
140/// Truncate text keeping first N/2 and last N/2 lines.
141pub(super) fn truncate_text_head_tail(
142 text: &str, // SOURCE — the full tool output text to truncate
143 max_lines: usize, // LIMIT — keep first max_lines/2 and last max_lines/2; omitted middle shown as "[... N lines truncated ...]"
144) -> String {
145 let lines: Vec<&str> = text.lines().collect();
146 if lines.len() <= max_lines {
147 return text.to_string();
148 }
149
150 let head = max_lines / 2;
151 let tail = max_lines - head;
152 let omitted = lines.len() - head - tail;
153
154 let mut result = lines[..head].join("\n");
155 result.push_str(&format!("\n\n[... {} lines truncated ...]\n\n", omitted));
156 result.push_str(&lines[lines.len() - tail..].join("\n"));
157 result
158}
159
160/// Level 2: Summarize old assistant turns.
161///
162/// Keeps the last `keep_recent` messages in full detail.
163/// For older messages: assistant messages with tool calls get replaced
164/// with a short summary, and their tool results get dropped.
165fn level2_summarize_old_turns(
166 messages: &[AgentMessage], // SOURCE — full conversation history to be summarized
167 keep_recent: usize, // WINDOW — last N messages kept verbatim; everything before is summarized/dropped
168) -> Vec<AgentMessage> {
169 let len = messages.len();
170 if len <= keep_recent {
171 return messages.to_vec();
172 }
173
174 let boundary = len - keep_recent;
175 let mut result = Vec::new();
176
177 let mut i = 0;
178 while i < boundary {
179 let msg = &messages[i];
180 match msg {
181 AgentMessage::Llm(LlmMessage {
182 message: Message::Assistant { content, .. },
183 ..
184 }) => {
185 // Summarize: extract text content, skip tool call details
186 let text_parts: Vec<&str> = content
187 .iter()
188 .filter_map(|c| match c {
189 Content::Text { text } => {
190 if text.len() > 200 {
191 None // Too long, will be replaced
192 } else {
193 Some(text.as_str())
194 }
195 }
196 _ => None,
197 })
198 .collect();
199
200 let tool_count = content
201 .iter()
202 .filter(|c| matches!(c, Content::ToolCall { .. }))
203 .count();
204
205 let summary = if !text_parts.is_empty() {
206 text_parts.join(" ")
207 } else if tool_count > 0 {
208 format!("[Assistant used {} tool(s)]", tool_count)
209 } else {
210 "[Assistant response]".into()
211 };
212
213 result.push(AgentMessage::Llm(LlmMessage::new(Message::User {
214 content: vec![Content::Text {
215 text: format!("[Summary] {}", summary),
216 }],
217 timestamp: now_ms(),
218 })));
219
220 // Skip following tool results that belong to this turn
221 i += 1;
222 while i < boundary {
223 if let AgentMessage::Llm(LlmMessage {
224 message: Message::ToolResult { .. },
225 ..
226 }) = &messages[i]
227 {
228 i += 1;
229 } else {
230 break;
231 }
232 }
233 continue;
234 }
235 AgentMessage::Llm(LlmMessage {
236 message: Message::ToolResult { .. },
237 ..
238 }) => {
239 // Skip orphaned tool results in old section
240 i += 1;
241 continue;
242 }
243 other => {
244 // Keep user messages as-is (they provide intent)
245 result.push(other.clone());
246 }
247 }
248 i += 1;
249 }
250
251 // Append recent messages in full
252 result.extend_from_slice(&messages[boundary..]);
253 result
254}
255
256/// Level 3: Drop middle messages with a pluggable token counter.
257fn level3_drop_middle_with_counter(
258 messages: &[AgentMessage],
259 config: &ContextConfig,
260 budget: usize,
261 counter: &dyn TokenCounter,
262) -> Vec<AgentMessage> {
263 let len = messages.len();
264 // .min(len) prevents keep_first from exceeding the actual message count
265 // Python analogy: first_end = min(config.keep_first, len)
266 let first_end = config.keep_first.min(len);
267 // saturating_sub: if keep_recent > len, recent_start = 0 (take all messages as "recent")
268 let recent_start = len.saturating_sub(config.keep_recent);
269
270 if first_end >= recent_start {
271 // Can't split — just keep as many recent as fit
272 return keep_within_budget_with_counter(messages, budget, counter);
273 }
274
275 let first_msgs = &messages[..first_end];
276 let recent_msgs = &messages[recent_start..];
277 let removed = recent_start - first_end;
278
279 let marker = AgentMessage::Llm(LlmMessage::new(Message::User {
280 content: vec![Content::Text {
281 text: format!(
282 "[Context compacted: {} messages removed to fit context window]",
283 removed
284 ),
285 }],
286 timestamp: now_ms(),
287 }));
288
289 let mut result = first_msgs.to_vec();
290 result.push(marker);
291 result.extend_from_slice(recent_msgs);
292
293 // If still too big, progressively drop from recent
294 if counter.estimate_messages(&result) > budget {
295 return keep_within_budget_with_counter(&result, budget, counter);
296 }
297
298 result
299}
300
301/// Keep as many recent messages as fit within budget using a pluggable counter.
302fn keep_within_budget_with_counter(
303 messages: &[AgentMessage],
304 budget: usize,
305 counter: &dyn TokenCounter,
306) -> Vec<AgentMessage> {
307 let mut result = Vec::new();
308 let mut remaining = budget;
309
310 for msg in messages.iter().rev() {
311 let tokens = counter.estimate_message(msg);
312 if tokens > remaining {
313 break;
314 }
315 remaining -= tokens;
316 result.push(msg.clone());
317 }
318
319 result.reverse();
320
321 if result.len() < messages.len() {
322 let removed = messages.len() - result.len();
323 result.insert(
324 0,
325 AgentMessage::Llm(LlmMessage::new(Message::User {
326 content: vec![Content::Text {
327 text: format!("[Context compacted: {} messages removed]", removed),
328 }],
329 timestamp: now_ms(),
330 })),
331 );
332 }
333
334 result
335}