agent_code_lib/services/
compact.rs

1//! History compaction.
2//!
3//! Manages conversation history size by summarizing older messages
4//! when the context window limit approaches. Implements three
5//! compaction strategies:
6//!
7//! - **Auto-compact**: triggered when estimated tokens exceed threshold
8//! - **Reactive compact**: triggered by API `prompt_too_long` errors
9//! - **Microcompact**: clears stale tool results to free tokens
10//!
11//! # Thresholds
12//!
13//! ```text
14//! |<--- context window (e.g., 200K) -------------------------------->|
15//! |<--- effective window (context - 20K reserved) ------------------>|
16//! |<--- auto-compact threshold (effective - 13K buffer) ------------>|
17//! |                                                    ↑ compact fires here
18//! ```
19
20use crate::llm::message::{
21    ContentBlock, Message, MessageLevel, SystemMessage, SystemMessageType, UserMessage,
22};
23use crate::services::tokens;
24use uuid::Uuid;
25
26/// Buffer tokens before auto-compact fires.
27const AUTOCOMPACT_BUFFER_TOKENS: u64 = 13_000;
28
29/// Tokens reserved for the compact summary output.
30const MAX_OUTPUT_TOKENS_FOR_SUMMARY: u64 = 20_000;
31
32/// Maximum consecutive auto-compact failures before circuit breaker trips.
33const MAX_CONSECUTIVE_FAILURES: u32 = 3;
34
35/// Maximum recovery attempts for max-output-tokens errors.
36pub const MAX_OUTPUT_TOKENS_RECOVERY_LIMIT: u32 = 3;
37
38/// Tools whose results can be cleared by microcompact.
39const COMPACTABLE_TOOLS: &[&str] = &["FileRead", "Bash", "Grep", "Glob", "FileEdit", "FileWrite"];
40
41/// Token warning state for the UI.
42#[derive(Debug, Clone)]
43pub struct TokenWarningState {
44    /// Percentage of context window remaining.
45    pub percent_left: u64,
46    /// Whether to show a warning in the UI.
47    pub is_above_warning: bool,
48    /// Whether to show an error in the UI.
49    pub is_above_error: bool,
50    /// Whether auto-compact should fire.
51    pub should_compact: bool,
52    /// Whether the context is at the blocking limit.
53    pub is_blocking: bool,
54}
55
56/// Tracking state for auto-compact across turns.
57#[derive(Debug, Clone, Default)]
58pub struct CompactTracking {
59    pub consecutive_failures: u32,
60    pub was_compacted: bool,
61}
62
63/// Calculate the effective context window (total minus output reservation).
64pub fn effective_context_window(model: &str) -> u64 {
65    let context = tokens::context_window_for_model(model);
66    let reserved = tokens::max_output_tokens_for_model(model).min(MAX_OUTPUT_TOKENS_FOR_SUMMARY);
67    context.saturating_sub(reserved)
68}
69
70/// Calculate the auto-compact threshold.
71pub fn auto_compact_threshold(model: &str) -> u64 {
72    effective_context_window(model).saturating_sub(AUTOCOMPACT_BUFFER_TOKENS)
73}
74
75/// Calculate token warning state for the current conversation.
76pub fn token_warning_state(messages: &[Message], model: &str) -> TokenWarningState {
77    let token_count = tokens::estimate_context_tokens(messages);
78    let threshold = auto_compact_threshold(model);
79    let effective = effective_context_window(model);
80
81    let percent_left = if effective > 0 {
82        ((effective.saturating_sub(token_count)) as f64 / effective as f64 * 100.0)
83            .round()
84            .max(0.0) as u64
85    } else {
86        0
87    };
88
89    let warning_buffer = 20_000;
90
91    TokenWarningState {
92        percent_left,
93        is_above_warning: token_count >= effective.saturating_sub(warning_buffer),
94        is_above_error: token_count >= effective.saturating_sub(warning_buffer),
95        should_compact: token_count >= threshold,
96        is_blocking: token_count >= effective.saturating_sub(3_000),
97    }
98}
99
100/// Check whether auto-compact should fire for this conversation.
101pub fn should_auto_compact(messages: &[Message], model: &str, tracking: &CompactTracking) -> bool {
102    // Circuit breaker.
103    if tracking.consecutive_failures >= MAX_CONSECUTIVE_FAILURES {
104        return false;
105    }
106
107    let state = token_warning_state(messages, model);
108    state.should_compact
109}
110
111/// Perform microcompact: clear stale tool results to free tokens.
112///
113/// Replaces the content of old tool_result blocks with a placeholder,
114/// keeping the most recent `keep_recent` results intact.
115pub fn microcompact(messages: &mut [Message], keep_recent: usize) -> u64 {
116    let keep_recent = keep_recent.max(1);
117
118    // Collect indices of compactable tool results (in order).
119    let mut compactable_indices: Vec<(usize, usize)> = Vec::new(); // (msg_idx, block_idx)
120
121    for (msg_idx, msg) in messages.iter().enumerate() {
122        if let Message::User(u) = msg {
123            for (block_idx, block) in u.content.iter().enumerate() {
124                if let ContentBlock::ToolResult { tool_use_id, .. } = block {
125                    // Check if this tool_use_id corresponds to a compactable tool.
126                    if is_compactable_tool_result(messages, tool_use_id) {
127                        compactable_indices.push((msg_idx, block_idx));
128                    }
129                }
130            }
131        }
132    }
133
134    if compactable_indices.len() <= keep_recent {
135        return 0;
136    }
137
138    // Clear all but the most recent `keep_recent`.
139    let clear_count = compactable_indices.len() - keep_recent;
140    let to_clear = &compactable_indices[..clear_count];
141
142    let mut freed_tokens = 0u64;
143
144    for &(msg_idx, block_idx) in to_clear {
145        if let Message::User(ref mut u) = messages[msg_idx]
146            && let ContentBlock::ToolResult {
147                ref mut content,
148                tool_use_id: _,
149                is_error: _,
150                ..
151            } = u.content[block_idx]
152        {
153            let old_tokens = tokens::estimate_tokens(content);
154            let placeholder = "[Old tool result cleared]".to_string();
155            let new_tokens = tokens::estimate_tokens(&placeholder);
156            *content = placeholder;
157            freed_tokens += old_tokens.saturating_sub(new_tokens);
158        }
159    }
160
161    freed_tokens
162}
163
164/// Check if a tool_use_id corresponds to a compactable tool.
165fn is_compactable_tool_result(messages: &[Message], tool_use_id: &str) -> bool {
166    for msg in messages {
167        if let Message::Assistant(a) = msg {
168            for block in &a.content {
169                if let ContentBlock::ToolUse { id, name, .. } = block
170                    && id == tool_use_id
171                {
172                    return COMPACTABLE_TOOLS
173                        .iter()
174                        .any(|t| t.eq_ignore_ascii_case(name));
175                }
176            }
177        }
178    }
179    false
180}
181
182/// Create a compact boundary marker message.
183pub fn compact_boundary_message(summary: &str) -> Message {
184    Message::System(SystemMessage {
185        uuid: Uuid::new_v4(),
186        timestamp: chrono::Utc::now().to_rfc3339(),
187        subtype: SystemMessageType::CompactBoundary,
188        content: format!("[Conversation compacted. Summary: {summary}]"),
189        level: MessageLevel::Info,
190    })
191}
192
193/// Build a compact summary request: asks the LLM to summarize
194/// the conversation up to a certain point.
195pub fn build_compact_summary_prompt(messages: &[Message]) -> String {
196    let mut context = String::new();
197    for msg in messages {
198        match msg {
199            Message::User(u) => {
200                context.push_str("User: ");
201                for block in &u.content {
202                    if let ContentBlock::Text { text } = block {
203                        context.push_str(text);
204                    }
205                }
206                context.push('\n');
207            }
208            Message::Assistant(a) => {
209                context.push_str("Assistant: ");
210                for block in &a.content {
211                    if let ContentBlock::Text { text } = block {
212                        context.push_str(text);
213                    }
214                }
215                context.push('\n');
216            }
217            _ => {}
218        }
219    }
220
221    format!(
222        "Summarize this conversation concisely, preserving key decisions, \
223         file changes made, and important context. Focus on what the user \
224         was trying to accomplish and what was done.\n\n{context}"
225    )
226}
227
228/// Build the recovery message injected when max-output-tokens is hit.
229pub fn max_output_recovery_message() -> Message {
230    Message::User(UserMessage {
231        uuid: Uuid::new_v4(),
232        timestamp: chrono::Utc::now().to_rfc3339(),
233        content: vec![ContentBlock::Text {
234            text: "Output token limit hit. Resume directly — no apology, no recap \
235                   of what you were doing. Pick up mid-thought if that is where the \
236                   cut happened. Break remaining work into smaller pieces."
237                .to_string(),
238        }],
239        is_meta: true,
240        is_compact_summary: false,
241    })
242}
243
244/// Parse a "prompt too long" error to extract the token gap.
245///
246/// Looks for patterns like "prompt is too long: 137500 tokens > 135000 maximum"
247/// and returns the difference (2500 in this example).
248pub fn parse_prompt_too_long_gap(error_text: &str) -> Option<u64> {
249    let re = regex::Regex::new(r"(\d+)\s*tokens?\s*>\s*(\d+)").ok()?;
250    let captures = re.captures(error_text)?;
251    let actual: u64 = captures.get(1)?.as_str().parse().ok()?;
252    let limit: u64 = captures.get(2)?.as_str().parse().ok()?;
253    let gap = actual.saturating_sub(limit);
254    if gap > 0 { Some(gap) } else { None }
255}
256
257/// Perform full LLM-based compaction of the conversation history.
258///
259/// Splits the message history into two parts: messages to summarize
260/// (older) and messages to keep (recent). Calls the LLM to generate
261/// a summary, then replaces the old messages with:
262/// 1. A compact boundary marker
263/// 2. A summary message (as a user message with is_compact_summary=true)
264/// 3. The kept recent messages
265///
266/// Returns the number of messages removed, or None if compaction failed.
267pub async fn compact_with_llm(
268    messages: &mut Vec<Message>,
269    llm: &dyn crate::llm::provider::Provider,
270    model: &str,
271) -> Option<usize> {
272    if messages.len() < 4 {
273        return None; // Not enough messages to compact.
274    }
275
276    // Keep the most recent messages (at least 40K tokens worth, or
277    // minimum 5 messages with text content).
278    let keep_count = calculate_keep_count(messages);
279    let split_point = messages.len().saturating_sub(keep_count);
280
281    if split_point < 2 {
282        return None; // Not enough to summarize.
283    }
284
285    let to_summarize = &messages[..split_point];
286    let summary_prompt = build_compact_summary_prompt(to_summarize);
287
288    // Call the LLM to generate the summary.
289    let summary_messages = vec![crate::llm::message::user_message(&summary_prompt)];
290    let request = crate::llm::provider::ProviderRequest {
291        messages: summary_messages,
292        system_prompt: "You are a conversation summarizer. Produce a concise summary \
293                        preserving key decisions, file changes, and important context. \
294                        Do not use tools."
295            .to_string(),
296        tools: vec![],
297        model: model.to_string(),
298        max_tokens: 4096,
299        temperature: None,
300        enable_caching: false,
301        tool_choice: Default::default(),
302        metadata: None,
303    };
304
305    let mut rx = match llm.stream(&request).await {
306        Ok(rx) => rx,
307        Err(e) => {
308            tracing::warn!("Compact LLM call failed: {e}");
309            return None;
310        }
311    };
312
313    // Collect the summary text.
314    let mut summary = String::new();
315    while let Some(event) = rx.recv().await {
316        if let crate::llm::stream::StreamEvent::TextDelta(text) = event {
317            summary.push_str(&text);
318        }
319    }
320
321    if summary.is_empty() {
322        return None;
323    }
324
325    // Replace old messages with boundary + summary + kept messages.
326    let kept = messages[split_point..].to_vec();
327    let removed = split_point;
328
329    messages.clear();
330    messages.push(compact_boundary_message(&summary));
331    messages.push(Message::User(UserMessage {
332        uuid: Uuid::new_v4(),
333        timestamp: chrono::Utc::now().to_rfc3339(),
334        content: vec![ContentBlock::Text {
335            text: format!("[Conversation compacted. Prior context summary:]\n\n{summary}"),
336        }],
337        is_meta: true,
338        is_compact_summary: true,
339    }));
340    messages.extend(kept);
341
342    tracing::info!("Compacted {removed} messages into summary");
343    Some(removed)
344}
345
346/// Calculate how many recent messages to keep during compaction.
347///
348/// Keeps at least 5 messages with text content, or messages totaling
349/// at least 10K estimated tokens, whichever is more.
350fn calculate_keep_count(messages: &[Message]) -> usize {
351    let min_text_messages = 5;
352    let min_tokens = 10_000u64;
353    let max_tokens = 40_000u64;
354
355    let mut count = 0usize;
356    let mut text_count = 0usize;
357    let mut token_total = 0u64;
358
359    // Walk backwards from the end.
360    for msg in messages.iter().rev() {
361        let tokens = crate::services::tokens::estimate_message_tokens(msg);
362        token_total += tokens;
363        count += 1;
364
365        // Count messages with text content.
366        let has_text = match msg {
367            Message::User(u) => u
368                .content
369                .iter()
370                .any(|b| matches!(b, ContentBlock::Text { .. })),
371            Message::Assistant(a) => a
372                .content
373                .iter()
374                .any(|b| matches!(b, ContentBlock::Text { .. })),
375            _ => false,
376        };
377        if has_text {
378            text_count += 1;
379        }
380
381        // Stop if we've met both minimums.
382        if text_count >= min_text_messages && token_total >= min_tokens {
383            break;
384        }
385        // Hard cap.
386        if token_total >= max_tokens {
387            break;
388        }
389    }
390
391    count
392}
393
394#[cfg(test)]
395mod tests {
396    use super::*;
397
398    #[test]
399    fn test_auto_compact_threshold() {
400        // Sonnet: 200K context, 16K max output (capped at 20K), effective = 180K
401        // Threshold = 180K - 13K = 167K
402        let threshold = auto_compact_threshold("claude-sonnet");
403        assert_eq!(threshold, 200_000 - 16_384 - 13_000);
404    }
405
406    #[test]
407    fn test_parse_prompt_too_long_gap() {
408        let msg = "prompt is too long: 137500 tokens > 135000 maximum";
409        assert_eq!(parse_prompt_too_long_gap(msg), Some(2500));
410    }
411
412    #[test]
413    fn test_parse_prompt_too_long_no_match() {
414        assert_eq!(parse_prompt_too_long_gap("some other error"), None);
415    }
416
417    #[test]
418    fn test_effective_context_window() {
419        // Sonnet: 200K context - 16K output = 184K (capped at 20K → 180K)
420        let eff = effective_context_window("claude-sonnet");
421        assert!(eff > 100_000);
422        assert!(eff < 200_000);
423    }
424
425    #[test]
426    fn test_token_warning_state_empty() {
427        let state = token_warning_state(&[], "claude-sonnet");
428        assert_eq!(state.percent_left, 100);
429        assert!(!state.is_above_warning);
430        assert!(!state.is_blocking);
431    }
432
433    #[test]
434    fn test_should_auto_compact_empty() {
435        let tracking = CompactTracking::default();
436        assert!(!should_auto_compact(&[], "claude-sonnet", &tracking));
437    }
438
439    #[test]
440    fn test_should_auto_compact_circuit_breaker() {
441        let tracking = CompactTracking {
442            consecutive_failures: 5,
443            was_compacted: false,
444        };
445        // Even with huge message list, circuit breaker should prevent compaction.
446        assert!(!should_auto_compact(&[], "claude-sonnet", &tracking));
447    }
448
449    #[test]
450    fn test_microcompact_empty() {
451        let mut messages = vec![];
452        let freed = microcompact(&mut messages, 2);
453        assert_eq!(freed, 0);
454    }
455
456    #[test]
457    fn test_microcompact_keeps_recent() {
458        use crate::llm::message::*;
459        // Create a tool result message.
460        let mut messages = vec![
461            Message::Assistant(AssistantMessage {
462                uuid: uuid::Uuid::new_v4(),
463                timestamp: String::new(),
464                content: vec![ContentBlock::ToolUse {
465                    id: "call_1".into(),
466                    name: "FileRead".into(),
467                    input: serde_json::json!({}),
468                }],
469                model: None,
470                usage: None,
471                stop_reason: None,
472                request_id: None,
473            }),
474            Message::User(UserMessage {
475                uuid: uuid::Uuid::new_v4(),
476                timestamp: String::new(),
477                content: vec![ContentBlock::ToolResult {
478                    tool_use_id: "call_1".into(),
479                    content: "file content here".repeat(100),
480                    is_error: false,
481                    extra_content: vec![],
482                }],
483                is_meta: true,
484                is_compact_summary: false,
485            }),
486        ];
487        // keep_recent=5 means this single result should be kept.
488        let freed = microcompact(&mut messages, 5);
489        assert_eq!(freed, 0);
490    }
491
492    #[test]
493    fn test_compact_boundary_message() {
494        let msg = compact_boundary_message("test summary");
495        if let Message::System(s) = msg {
496            assert_eq!(
497                s.subtype,
498                crate::llm::message::SystemMessageType::CompactBoundary
499            );
500        } else {
501            panic!("Expected system message");
502        }
503    }
504
505    #[test]
506    fn test_max_output_recovery_message() {
507        let msg = max_output_recovery_message();
508        match msg {
509            Message::User(u) => {
510                assert!(!u.content.is_empty());
511            }
512            _ => panic!("Expected user message"),
513        }
514    }
515
516    #[test]
517    fn test_build_compact_summary_prompt() {
518        use crate::llm::message::*;
519        let messages = vec![user_message("hello"), user_message("world")];
520        let prompt = build_compact_summary_prompt(&messages);
521        assert!(prompt.contains("Summarize"));
522    }
523
524    #[test]
525    fn test_effective_context_window_gpt_model() {
526        let eff = effective_context_window("gpt-4o");
527        // gpt-4: 128K context, 16K max output (capped at 20K → 16K), effective = 128K - 16K = 112K
528        assert_eq!(eff, 128_000 - 16_384);
529    }
530
531    #[test]
532    fn test_auto_compact_threshold_gpt_model() {
533        let threshold = auto_compact_threshold("gpt-4o");
534        assert_eq!(threshold, 128_000 - 16_384 - 13_000);
535    }
536
537    #[test]
538    fn test_parse_prompt_too_long_gap_with_comma_format() {
539        // Numbers without commas embedded, but different magnitudes.
540        let msg = "prompt is too long: 137500 tokens > 135000 maximum";
541        assert_eq!(parse_prompt_too_long_gap(msg), Some(2500));
542    }
543
544    #[test]
545    fn test_parse_prompt_too_long_gap_equal_tokens_returns_none() {
546        let msg = "prompt is too long: 135000 tokens > 135000 maximum";
547        // gap = 0, so returns None.
548        assert_eq!(parse_prompt_too_long_gap(msg), None);
549    }
550
551    #[test]
552    fn test_token_warning_state_large_count_should_compact() {
553        use crate::llm::message::*;
554        // Create a huge message that will exceed the threshold.
555        let big_text = "a".repeat(800_000); // ~200K tokens
556        let messages = vec![user_message(&big_text)];
557        let state = token_warning_state(&messages, "claude-sonnet");
558        assert!(state.should_compact);
559    }
560
561    #[test]
562    fn test_should_auto_compact_empty_tracking_small_conversation() {
563        let tracking = CompactTracking::default();
564        let messages = vec![crate::llm::message::user_message("tiny")];
565        assert!(!should_auto_compact(&messages, "claude-sonnet", &tracking));
566    }
567
568    #[test]
569    fn test_compact_boundary_message_content_format() {
570        let msg = compact_boundary_message("my summary");
571        if let Message::System(s) = &msg {
572            assert!(s.content.contains("my summary"));
573            assert!(s.content.starts_with("[Conversation compacted."));
574        } else {
575            panic!("Expected System message");
576        }
577    }
578
579    #[test]
580    fn test_build_compact_summary_prompt_includes_user_and_assistant() {
581        use crate::llm::message::*;
582        let messages = vec![
583            user_message("user said this"),
584            Message::Assistant(AssistantMessage {
585                uuid: uuid::Uuid::new_v4(),
586                timestamp: String::new(),
587                content: vec![ContentBlock::Text {
588                    text: "assistant said that".into(),
589                }],
590                model: None,
591                usage: None,
592                stop_reason: None,
593                request_id: None,
594            }),
595        ];
596        let prompt = build_compact_summary_prompt(&messages);
597        assert!(prompt.contains("user said this"));
598        assert!(prompt.contains("assistant said that"));
599        assert!(prompt.contains("User:"));
600        assert!(prompt.contains("Assistant:"));
601    }
602
603    #[test]
604    fn test_max_output_recovery_message_is_meta() {
605        let msg = max_output_recovery_message();
606        if let Message::User(u) = &msg {
607            assert!(u.is_meta);
608        } else {
609            panic!("Expected User message");
610        }
611    }
612
613    #[test]
614    fn test_calculate_keep_count_returns_at_least_5_for_large_list() {
615        use crate::llm::message::*;
616        // Create 20 messages with text content.
617        let messages: Vec<Message> = (0..20)
618            .map(|i| user_message(format!("message {i}")))
619            .collect();
620        let keep = calculate_keep_count(&messages);
621        assert!(keep >= 5, "keep_count was {keep}, expected at least 5");
622    }
623}
agent_code_lib/services/compact.rs

agent_code_lib/services/
compact.rs