Skip to main content

phi_core/context/
compact_messages.rs

1use super::config::ContextConfig;
2use super::token::*;
3use crate::types::*;
4use std::sync::Arc;
5
6// ---------------------------------------------------------------------------
7// Tiered compaction
8// ---------------------------------------------------------------------------
9
10/// Compact messages to fit within the token budget using tiered strategy.
11///
12/// - Level 1: Truncate tool outputs (keep head + tail)
13/// - Level 2: Summarize old turns (replace details with one-liner)
14/// - Level 3: Drop old messages (keep first + recent only)
15///
16/// Each level is tried in order. Returns as soon as messages fit.
17/*
18DESIGN: Why `messages` is owned (Vec) but `config` is borrowed (&ContextConfig)
19  `messages` = CONSUMED — tiered compaction rewrites the list; passing by value avoids
20               an upfront clone and lets each level freely transform/drop messages
21  `config`   = READ-ONLY — just a budget + thresholds; never mutated; borrow is sufficient
22*/
23pub fn compact_messages(
24    messages: Vec<AgentMessage>, // OWNED — rewritten by each compaction level; no upfront clone needed
25    config: &ContextConfig, // SETTINGS — token budget derived from max_context_tokens - system_prompt_tokens
26) -> Vec<AgentMessage> {
27    compact_messages_with_counter(messages, config, config.token_counter.as_ref())
28}
29
30/// Compact messages using the provided token counter (or the default heuristic).
31pub fn compact_messages_with_counter(
32    messages: Vec<AgentMessage>,
33    config: &ContextConfig,
34    counter: Option<&Arc<dyn TokenCounter>>,
35) -> Vec<AgentMessage> {
36    let counter = resolve_counter(counter);
37    /*
38    RUST QUIRK: `saturating_sub` — subtraction that stops at 0, never wraps
39
40    Rust integers are bounded. On u32/usize, 0 - 1 would OVERFLOW (panic in debug, wrap in release).
41    `saturating_sub(n)` instead returns 0 if the result would be negative.
42
43    budget = max_context_tokens - system_prompt_tokens
44    If someone misconfigured these (system_prompt > max), we'd get underflow.
45    saturating_sub makes the budget = 0 (nothing fits) rather than a huge number.
46
47    Python analogy: max(0, max_context_tokens - system_prompt_tokens)
48
49    Alternative: `checked_sub(n)` returns `Option<usize>` — None on underflow.
50    Use saturating when 0 is a safe fallback; use checked when you need to handle it explicitly.
51    */
52    let budget = config
53        .max_context_tokens
54        .saturating_sub(config.system_prompt_tokens);
55
56    // Already fits?
57    if counter.estimate_messages(&messages) <= budget {
58        return messages;
59    }
60
61    // Level 1: Truncate tool outputs
62    let compacted = level1_truncate_tool_outputs(&messages, config.tool_output_max_lines);
63    if counter.estimate_messages(&compacted) <= budget {
64        return compacted;
65    }
66
67    // Level 2: Summarize old turns (keep recent N full, summarize the rest)
68    let compacted = level2_summarize_old_turns(&compacted, config.keep_recent);
69    if counter.estimate_messages(&compacted) <= budget {
70        return compacted;
71    }
72
73    // Level 3: Drop middle messages (keep first + recent)
74    level3_drop_middle_with_counter(&compacted, config, budget, counter)
75}
76
77/// Level 1: Truncate long tool outputs to head + tail.
78///
79/// This is the cheapest compaction — preserves conversation structure,
80/// just removes verbose tool output middles. In practice this saves
81/// 50-70% of context in coding sessions.
82pub(super) fn level1_truncate_tool_outputs(
83    messages: &[AgentMessage], // SOURCE — read-only input; all non-ToolResult messages pass through unchanged
84    max_lines: usize, // LIMIT — each ToolResult text block is truncated to this many lines (head+tail)
85) -> Vec<AgentMessage> {
86    messages
87        .iter()
88        .map(|msg| match msg {
89            // Match only ToolResult messages — destructure all fields so we can reconstruct below
90            AgentMessage::Llm(LlmMessage {
91                message:
92                    Message::ToolResult {
93                        tool_call_id,
94                        tool_name,
95                        content,
96                        is_error,
97                        timestamp,
98                    },
99                ..
100            }) => {
101                let truncated_content: Vec<Content> = content
102                    .iter()
103                    .map(|c| match c {
104                        Content::Text { text } => Content::Text {
105                            text: truncate_text_head_tail(text, max_lines),
106                        },
107                        other => other.clone(), // Images, ToolCalls etc. passed through unchanged
108                    })
109                    .collect();
110
111                /*
112                RUST QUIRK: `*is_error` and `*timestamp` — dereferencing to copy
113
114                Inside a match arm that borrows the enum (we matched `msg` which is `&AgentMessage`),
115                the fields `is_error` and `timestamp` are bound as `&bool` and `&u64` — references.
116
117                To use them as plain values (not references) in the new struct literal, we dereference:
118                  *is_error  → bool  (Copy type — dereference gives us the value)
119                  *timestamp → u64   (Copy type — same)
120
121                For `String` fields (not Copy), we call `.clone()` instead of dereferencing,
122                because dereferencing a &String would give us a String borrow — we need owned Strings.
123
124                Python analogy: you never need this in Python because everything is a reference/object
125                and copying happens automatically for primitives.
126                */
127                AgentMessage::Llm(LlmMessage::new(Message::ToolResult {
128                    tool_call_id: tool_call_id.clone(),
129                    tool_name: tool_name.clone(),
130                    content: truncated_content,
131                    is_error: *is_error,   // deref: &bool → bool
132                    timestamp: *timestamp, // deref: &u64  → u64
133                }))
134            }
135            other => other.clone(), // Non-ToolResult messages pass through unchanged
136        })
137        .collect()
138}
139
140/// Truncate text keeping first N/2 and last N/2 lines.
141pub(super) fn truncate_text_head_tail(
142    text: &str,       // SOURCE — the full tool output text to truncate
143    max_lines: usize, // LIMIT — keep first max_lines/2 and last max_lines/2; omitted middle shown as "[... N lines truncated ...]"
144) -> String {
145    let lines: Vec<&str> = text.lines().collect();
146    if lines.len() <= max_lines {
147        return text.to_string();
148    }
149
150    let head = max_lines / 2;
151    let tail = max_lines - head;
152    let omitted = lines.len() - head - tail;
153
154    let mut result = lines[..head].join("\n");
155    result.push_str(&format!("\n\n[... {} lines truncated ...]\n\n", omitted));
156    result.push_str(&lines[lines.len() - tail..].join("\n"));
157    result
158}
159
160/// Level 2: Summarize old assistant turns.
161///
162/// Keeps the last `keep_recent` messages in full detail.
163/// For older messages: assistant messages with tool calls get replaced
164/// with a short summary, and their tool results get dropped.
165fn level2_summarize_old_turns(
166    messages: &[AgentMessage], // SOURCE — full conversation history to be summarized
167    keep_recent: usize, // WINDOW — last N messages kept verbatim; everything before is summarized/dropped
168) -> Vec<AgentMessage> {
169    let len = messages.len();
170    if len <= keep_recent {
171        return messages.to_vec();
172    }
173
174    let boundary = len - keep_recent;
175    let mut result = Vec::new();
176
177    let mut i = 0;
178    while i < boundary {
179        let msg = &messages[i];
180        match msg {
181            AgentMessage::Llm(LlmMessage {
182                message: Message::Assistant { content, .. },
183                ..
184            }) => {
185                // Summarize: extract text content, skip tool call details
186                let text_parts: Vec<&str> = content
187                    .iter()
188                    .filter_map(|c| match c {
189                        Content::Text { text } => {
190                            if text.len() > 200 {
191                                None // Too long, will be replaced
192                            } else {
193                                Some(text.as_str())
194                            }
195                        }
196                        _ => None,
197                    })
198                    .collect();
199
200                let tool_count = content
201                    .iter()
202                    .filter(|c| matches!(c, Content::ToolCall { .. }))
203                    .count();
204
205                let summary = if !text_parts.is_empty() {
206                    text_parts.join(" ")
207                } else if tool_count > 0 {
208                    format!("[Assistant used {} tool(s)]", tool_count)
209                } else {
210                    "[Assistant response]".into()
211                };
212
213                result.push(AgentMessage::Llm(LlmMessage::new(Message::User {
214                    content: vec![Content::Text {
215                        text: format!("[Summary] {}", summary),
216                    }],
217                    timestamp: now_ms(),
218                })));
219
220                // Skip following tool results that belong to this turn
221                i += 1;
222                while i < boundary {
223                    if let AgentMessage::Llm(LlmMessage {
224                        message: Message::ToolResult { .. },
225                        ..
226                    }) = &messages[i]
227                    {
228                        i += 1;
229                    } else {
230                        break;
231                    }
232                }
233                continue;
234            }
235            AgentMessage::Llm(LlmMessage {
236                message: Message::ToolResult { .. },
237                ..
238            }) => {
239                // Skip orphaned tool results in old section
240                i += 1;
241                continue;
242            }
243            other => {
244                // Keep user messages as-is (they provide intent)
245                result.push(other.clone());
246            }
247        }
248        i += 1;
249    }
250
251    // Append recent messages in full
252    result.extend_from_slice(&messages[boundary..]);
253    result
254}
255
256/// Level 3: Drop middle messages with a pluggable token counter.
257fn level3_drop_middle_with_counter(
258    messages: &[AgentMessage],
259    config: &ContextConfig,
260    budget: usize,
261    counter: &dyn TokenCounter,
262) -> Vec<AgentMessage> {
263    let len = messages.len();
264    // .min(len) prevents keep_first from exceeding the actual message count
265    // Python analogy: first_end = min(config.keep_first, len)
266    let first_end = config.keep_first.min(len);
267    // saturating_sub: if keep_recent > len, recent_start = 0 (take all messages as "recent")
268    let recent_start = len.saturating_sub(config.keep_recent);
269
270    if first_end >= recent_start {
271        // Can't split — just keep as many recent as fit
272        return keep_within_budget_with_counter(messages, budget, counter);
273    }
274
275    let first_msgs = &messages[..first_end];
276    let recent_msgs = &messages[recent_start..];
277    let removed = recent_start - first_end;
278
279    let marker = AgentMessage::Llm(LlmMessage::new(Message::User {
280        content: vec![Content::Text {
281            text: format!(
282                "[Context compacted: {} messages removed to fit context window]",
283                removed
284            ),
285        }],
286        timestamp: now_ms(),
287    }));
288
289    let mut result = first_msgs.to_vec();
290    result.push(marker);
291    result.extend_from_slice(recent_msgs);
292
293    // If still too big, progressively drop from recent
294    if counter.estimate_messages(&result) > budget {
295        return keep_within_budget_with_counter(&result, budget, counter);
296    }
297
298    result
299}
300
301/// Keep as many recent messages as fit within budget using a pluggable counter.
302fn keep_within_budget_with_counter(
303    messages: &[AgentMessage],
304    budget: usize,
305    counter: &dyn TokenCounter,
306) -> Vec<AgentMessage> {
307    let mut result = Vec::new();
308    let mut remaining = budget;
309
310    for msg in messages.iter().rev() {
311        let tokens = counter.estimate_message(msg);
312        if tokens > remaining {
313            break;
314        }
315        remaining -= tokens;
316        result.push(msg.clone());
317    }
318
319    result.reverse();
320
321    if result.len() < messages.len() {
322        let removed = messages.len() - result.len();
323        result.insert(
324            0,
325            AgentMessage::Llm(LlmMessage::new(Message::User {
326                content: vec![Content::Text {
327                    text: format!("[Context compacted: {} messages removed]", removed),
328                }],
329                timestamp: now_ms(),
330            })),
331        );
332    }
333
334    result
335}