Skip to main content

agent_code_lib/services/
compact.rs

1//! History compaction.
2//!
3//! Manages conversation history size by summarizing older messages
4//! when the context window limit approaches. Implements three
5//! compaction strategies:
6//!
7//! - **Auto-compact**: triggered when estimated tokens exceed threshold
8//! - **Reactive compact**: triggered by API `prompt_too_long` errors
9//! - **Microcompact**: clears stale tool results to free tokens
10//!
11//! # Thresholds
12//!
13//! ```text
14//! |<--- context window (e.g., 200K) -------------------------------->|
15//! |<--- effective window (context - 20K reserved) ------------------>|
16//! |<--- auto-compact threshold (effective - 13K buffer) ------------>|
17//! |                                                    ↑ compact fires here
18//! ```
19
20use crate::llm::message::{
21    ContentBlock, Message, MessageLevel, SystemMessage, SystemMessageType, UserMessage,
22};
23use crate::services::tokens;
24use uuid::Uuid;
25
26/// Buffer tokens before auto-compact fires.
27const AUTOCOMPACT_BUFFER_TOKENS: u64 = 13_000;
28
29/// Tokens reserved for the compact summary output.
30const MAX_OUTPUT_TOKENS_FOR_SUMMARY: u64 = 20_000;
31
32/// Maximum consecutive auto-compact failures before circuit breaker trips.
33const MAX_CONSECUTIVE_FAILURES: u32 = 3;
34
35/// Maximum recovery attempts for max-output-tokens errors.
36pub const MAX_OUTPUT_TOKENS_RECOVERY_LIMIT: u32 = 3;
37
38/// Tools whose results can be cleared by microcompact.
39const COMPACTABLE_TOOLS: &[&str] = &["FileRead", "Bash", "Grep", "Glob", "FileEdit", "FileWrite"];
40
41/// Token warning state for the UI.
42#[derive(Debug, Clone)]
43pub struct TokenWarningState {
44    /// Percentage of context window remaining.
45    pub percent_left: u64,
46    /// Whether to show a warning in the UI.
47    pub is_above_warning: bool,
48    /// Whether to show an error in the UI.
49    pub is_above_error: bool,
50    /// Whether auto-compact should fire.
51    pub should_compact: bool,
52    /// Whether the context is at the blocking limit.
53    pub is_blocking: bool,
54}
55
56/// Tracking state for auto-compact across turns.
57#[derive(Debug, Clone, Default)]
58pub struct CompactTracking {
59    pub consecutive_failures: u32,
60    pub was_compacted: bool,
61}
62
63/// Calculate the effective context window (total minus output reservation).
64pub fn effective_context_window(model: &str) -> u64 {
65    let context = tokens::context_window_for_model(model);
66    let reserved = tokens::max_output_tokens_for_model(model).min(MAX_OUTPUT_TOKENS_FOR_SUMMARY);
67    context.saturating_sub(reserved)
68}
69
70/// Calculate the auto-compact threshold.
71pub fn auto_compact_threshold(model: &str) -> u64 {
72    effective_context_window(model).saturating_sub(AUTOCOMPACT_BUFFER_TOKENS)
73}
74
75/// Calculate token warning state for the current conversation.
76pub fn token_warning_state(messages: &[Message], model: &str) -> TokenWarningState {
77    let token_count = tokens::estimate_context_tokens(messages);
78    let threshold = auto_compact_threshold(model);
79    let effective = effective_context_window(model);
80
81    let percent_left = if effective > 0 {
82        ((effective.saturating_sub(token_count)) as f64 / effective as f64 * 100.0)
83            .round()
84            .max(0.0) as u64
85    } else {
86        0
87    };
88
89    let warning_buffer = 20_000;
90
91    TokenWarningState {
92        percent_left,
93        is_above_warning: token_count >= effective.saturating_sub(warning_buffer),
94        is_above_error: token_count >= effective.saturating_sub(warning_buffer),
95        should_compact: token_count >= threshold,
96        is_blocking: token_count >= effective.saturating_sub(3_000),
97    }
98}
99
100/// Check whether auto-compact should fire for this conversation.
101pub fn should_auto_compact(messages: &[Message], model: &str, tracking: &CompactTracking) -> bool {
102    // Circuit breaker.
103    if tracking.consecutive_failures >= MAX_CONSECUTIVE_FAILURES {
104        return false;
105    }
106
107    let state = token_warning_state(messages, model);
108    state.should_compact
109}
110
111/// Perform microcompact: clear stale tool results to free tokens.
112///
113/// Replaces the content of old tool_result blocks with a placeholder,
114/// keeping the most recent `keep_recent` results intact.
115pub fn microcompact(messages: &mut [Message], keep_recent: usize) -> u64 {
116    let keep_recent = keep_recent.max(1);
117
118    // Collect indices of compactable tool results (in order).
119    let mut compactable_indices: Vec<(usize, usize)> = Vec::new(); // (msg_idx, block_idx)
120
121    for (msg_idx, msg) in messages.iter().enumerate() {
122        if let Message::User(u) = msg {
123            for (block_idx, block) in u.content.iter().enumerate() {
124                if let ContentBlock::ToolResult { tool_use_id, .. } = block {
125                    // Check if this tool_use_id corresponds to a compactable tool.
126                    if is_compactable_tool_result(messages, tool_use_id) {
127                        compactable_indices.push((msg_idx, block_idx));
128                    }
129                }
130            }
131        }
132    }
133
134    if compactable_indices.len() <= keep_recent {
135        return 0;
136    }
137
138    // Clear all but the most recent `keep_recent`.
139    let clear_count = compactable_indices.len() - keep_recent;
140    let to_clear = &compactable_indices[..clear_count];
141
142    let mut freed_tokens = 0u64;
143
144    for &(msg_idx, block_idx) in to_clear {
145        if let Message::User(ref mut u) = messages[msg_idx]
146            && let ContentBlock::ToolResult {
147                ref mut content,
148                tool_use_id: _,
149                is_error: _,
150                ..
151            } = u.content[block_idx]
152        {
153            let old_tokens = tokens::estimate_tokens(content);
154            let placeholder = "[Old tool result cleared]".to_string();
155            let new_tokens = tokens::estimate_tokens(&placeholder);
156            *content = placeholder;
157            freed_tokens += old_tokens.saturating_sub(new_tokens);
158        }
159    }
160
161    freed_tokens
162}
163
164/// Check if a tool_use_id corresponds to a compactable tool.
165fn is_compactable_tool_result(messages: &[Message], tool_use_id: &str) -> bool {
166    for msg in messages {
167        if let Message::Assistant(a) = msg {
168            for block in &a.content {
169                if let ContentBlock::ToolUse { id, name, .. } = block
170                    && id == tool_use_id
171                {
172                    return COMPACTABLE_TOOLS
173                        .iter()
174                        .any(|t| t.eq_ignore_ascii_case(name));
175                }
176            }
177        }
178    }
179    false
180}
181
182/// Create a compact boundary marker message.
183pub fn compact_boundary_message(summary: &str) -> Message {
184    Message::System(SystemMessage {
185        uuid: Uuid::new_v4(),
186        timestamp: chrono::Utc::now().to_rfc3339(),
187        subtype: SystemMessageType::CompactBoundary,
188        content: format!("[Conversation compacted. Summary: {summary}]"),
189        level: MessageLevel::Info,
190    })
191}
192
193/// Build a compact summary request: asks the LLM to summarize
194/// the conversation up to a certain point.
195pub fn build_compact_summary_prompt(messages: &[Message]) -> String {
196    let mut context = String::new();
197    for msg in messages {
198        match msg {
199            Message::User(u) => {
200                context.push_str("User: ");
201                for block in &u.content {
202                    if let ContentBlock::Text { text } = block {
203                        context.push_str(text);
204                    }
205                }
206                context.push('\n');
207            }
208            Message::Assistant(a) => {
209                context.push_str("Assistant: ");
210                for block in &a.content {
211                    if let ContentBlock::Text { text } = block {
212                        context.push_str(text);
213                    }
214                }
215                context.push('\n');
216            }
217            _ => {}
218        }
219    }
220
221    format!(
222        "Summarize this conversation concisely, preserving key decisions, \
223         file changes made, and important context. Focus on what the user \
224         was trying to accomplish and what was done.\n\n{context}"
225    )
226}
227
228/// Build the recovery message injected when max-output-tokens is hit.
229pub fn max_output_recovery_message() -> Message {
230    Message::User(UserMessage {
231        uuid: Uuid::new_v4(),
232        timestamp: chrono::Utc::now().to_rfc3339(),
233        content: vec![ContentBlock::Text {
234            text: "Output token limit hit. Resume directly — no apology, no recap \
235                   of what you were doing. Pick up mid-thought if that is where the \
236                   cut happened. Break remaining work into smaller pieces."
237                .to_string(),
238        }],
239        is_meta: true,
240        is_compact_summary: false,
241    })
242}
243
244/// Parse a "prompt too long" error to extract the token gap.
245///
246/// Looks for patterns like "prompt is too long: 137500 tokens > 135000 maximum"
247/// and returns the difference (2500 in this example).
248pub fn parse_prompt_too_long_gap(error_text: &str) -> Option<u64> {
249    let re = regex::Regex::new(r"(\d+)\s*tokens?\s*>\s*(\d+)").ok()?;
250    let captures = re.captures(error_text)?;
251    let actual: u64 = captures.get(1)?.as_str().parse().ok()?;
252    let limit: u64 = captures.get(2)?.as_str().parse().ok()?;
253    let gap = actual.saturating_sub(limit);
254    if gap > 0 { Some(gap) } else { None }
255}
256
257/// Perform full LLM-based compaction of the conversation history.
258///
259/// Splits the message history into two parts: messages to summarize
260/// (older) and messages to keep (recent). Calls the LLM to generate
261/// a summary, then replaces the old messages with:
262/// 1. A compact boundary marker
263/// 2. A summary message (as a user message with is_compact_summary=true)
264/// 3. The kept recent messages
265///
266/// Returns the number of messages removed, or None if compaction failed.
267pub async fn compact_with_llm(
268    messages: &mut Vec<Message>,
269    llm: &dyn crate::llm::provider::Provider,
270    model: &str,
271    cancel: tokio_util::sync::CancellationToken,
272) -> Option<usize> {
273    if messages.len() < 4 {
274        return None; // Not enough messages to compact.
275    }
276
277    // Keep the most recent messages (at least 40K tokens worth, or
278    // minimum 5 messages with text content).
279    let keep_count = calculate_keep_count(messages);
280    let split_point = messages.len().saturating_sub(keep_count);
281
282    if split_point < 2 {
283        return None; // Not enough to summarize.
284    }
285
286    let to_summarize = &messages[..split_point];
287    let summary_prompt = build_compact_summary_prompt(to_summarize);
288
289    // Call the LLM to generate the summary.
290    let summary_messages = vec![crate::llm::message::user_message(&summary_prompt)];
291    let request = crate::llm::provider::ProviderRequest {
292        messages: summary_messages,
293        system_prompt: "You are a conversation summarizer. Produce a concise summary \
294                        preserving key decisions, file changes, and important context. \
295                        Do not use tools."
296            .to_string(),
297        tools: vec![],
298        model: model.to_string(),
299        max_tokens: 4096,
300        temperature: None,
301        enable_caching: false,
302        tool_choice: Default::default(),
303        metadata: None,
304        cancel,
305    };
306
307    let mut rx = match llm.stream(&request).await {
308        Ok(rx) => rx,
309        Err(e) => {
310            tracing::warn!("Compact LLM call failed: {e}");
311            return None;
312        }
313    };
314
315    // Collect the summary text.
316    let mut summary = String::new();
317    while let Some(event) = rx.recv().await {
318        if let crate::llm::stream::StreamEvent::TextDelta(text) = event {
319            summary.push_str(&text);
320        }
321    }
322
323    if summary.is_empty() {
324        return None;
325    }
326
327    // Replace old messages with boundary + summary + kept messages.
328    let kept = messages[split_point..].to_vec();
329    let removed = split_point;
330
331    messages.clear();
332    messages.push(compact_boundary_message(&summary));
333    messages.push(Message::User(UserMessage {
334        uuid: Uuid::new_v4(),
335        timestamp: chrono::Utc::now().to_rfc3339(),
336        content: vec![ContentBlock::Text {
337            text: format!("[Conversation compacted. Prior context summary:]\n\n{summary}"),
338        }],
339        is_meta: true,
340        is_compact_summary: true,
341    }));
342    messages.extend(kept);
343
344    tracing::info!("Compacted {removed} messages into summary");
345    Some(removed)
346}
347
348/// Calculate how many recent messages to keep during compaction.
349///
350/// Keeps at least 5 messages with text content, or messages totaling
351/// at least 10K estimated tokens, whichever is more.
352fn calculate_keep_count(messages: &[Message]) -> usize {
353    let min_text_messages = 5;
354    let min_tokens = 10_000u64;
355    let max_tokens = 40_000u64;
356
357    let mut count = 0usize;
358    let mut text_count = 0usize;
359    let mut token_total = 0u64;
360
361    // Walk backwards from the end.
362    for msg in messages.iter().rev() {
363        let tokens = crate::services::tokens::estimate_message_tokens(msg);
364        token_total += tokens;
365        count += 1;
366
367        // Count messages with text content.
368        let has_text = match msg {
369            Message::User(u) => u
370                .content
371                .iter()
372                .any(|b| matches!(b, ContentBlock::Text { .. })),
373            Message::Assistant(a) => a
374                .content
375                .iter()
376                .any(|b| matches!(b, ContentBlock::Text { .. })),
377            _ => false,
378        };
379        if has_text {
380            text_count += 1;
381        }
382
383        // Stop if we've met both minimums.
384        if text_count >= min_text_messages && token_total >= min_tokens {
385            break;
386        }
387        // Hard cap.
388        if token_total >= max_tokens {
389            break;
390        }
391    }
392
393    count
394}
395
396#[cfg(test)]
397mod tests {
398    use super::*;
399
400    #[test]
401    fn test_auto_compact_threshold() {
402        // Sonnet: 200K context, 16K max output (capped at 20K), effective = 180K
403        // Threshold = 180K - 13K = 167K
404        let threshold = auto_compact_threshold("claude-sonnet");
405        assert_eq!(threshold, 200_000 - 16_384 - 13_000);
406    }
407
408    #[test]
409    fn test_parse_prompt_too_long_gap() {
410        let msg = "prompt is too long: 137500 tokens > 135000 maximum";
411        assert_eq!(parse_prompt_too_long_gap(msg), Some(2500));
412    }
413
414    #[test]
415    fn test_parse_prompt_too_long_no_match() {
416        assert_eq!(parse_prompt_too_long_gap("some other error"), None);
417    }
418
419    #[test]
420    fn test_effective_context_window() {
421        // Sonnet: 200K context - 16K output = 184K (capped at 20K → 180K)
422        let eff = effective_context_window("claude-sonnet");
423        assert!(eff > 100_000);
424        assert!(eff < 200_000);
425    }
426
427    #[test]
428    fn test_token_warning_state_empty() {
429        let state = token_warning_state(&[], "claude-sonnet");
430        assert_eq!(state.percent_left, 100);
431        assert!(!state.is_above_warning);
432        assert!(!state.is_blocking);
433    }
434
435    #[test]
436    fn test_should_auto_compact_empty() {
437        let tracking = CompactTracking::default();
438        assert!(!should_auto_compact(&[], "claude-sonnet", &tracking));
439    }
440
441    #[test]
442    fn test_should_auto_compact_circuit_breaker() {
443        let tracking = CompactTracking {
444            consecutive_failures: 5,
445            was_compacted: false,
446        };
447        // Even with huge message list, circuit breaker should prevent compaction.
448        assert!(!should_auto_compact(&[], "claude-sonnet", &tracking));
449    }
450
451    #[test]
452    fn test_microcompact_empty() {
453        let mut messages = vec![];
454        let freed = microcompact(&mut messages, 2);
455        assert_eq!(freed, 0);
456    }
457
458    #[test]
459    fn test_microcompact_keeps_recent() {
460        use crate::llm::message::*;
461        // Create a tool result message.
462        let mut messages = vec![
463            Message::Assistant(AssistantMessage {
464                uuid: uuid::Uuid::new_v4(),
465                timestamp: String::new(),
466                content: vec![ContentBlock::ToolUse {
467                    id: "call_1".into(),
468                    name: "FileRead".into(),
469                    input: serde_json::json!({}),
470                }],
471                model: None,
472                usage: None,
473                stop_reason: None,
474                request_id: None,
475            }),
476            Message::User(UserMessage {
477                uuid: uuid::Uuid::new_v4(),
478                timestamp: String::new(),
479                content: vec![ContentBlock::ToolResult {
480                    tool_use_id: "call_1".into(),
481                    content: "file content here".repeat(100),
482                    is_error: false,
483                    extra_content: vec![],
484                }],
485                is_meta: true,
486                is_compact_summary: false,
487            }),
488        ];
489        // keep_recent=5 means this single result should be kept.
490        let freed = microcompact(&mut messages, 5);
491        assert_eq!(freed, 0);
492    }
493
494    #[test]
495    fn test_compact_boundary_message() {
496        let msg = compact_boundary_message("test summary");
497        if let Message::System(s) = msg {
498            assert_eq!(
499                s.subtype,
500                crate::llm::message::SystemMessageType::CompactBoundary
501            );
502        } else {
503            panic!("Expected system message");
504        }
505    }
506
507    #[test]
508    fn test_max_output_recovery_message() {
509        let msg = max_output_recovery_message();
510        match msg {
511            Message::User(u) => {
512                assert!(!u.content.is_empty());
513            }
514            _ => panic!("Expected user message"),
515        }
516    }
517
518    #[test]
519    fn test_build_compact_summary_prompt() {
520        use crate::llm::message::*;
521        let messages = vec![user_message("hello"), user_message("world")];
522        let prompt = build_compact_summary_prompt(&messages);
523        assert!(prompt.contains("Summarize"));
524    }
525
526    #[test]
527    fn test_effective_context_window_gpt_model() {
528        let eff = effective_context_window("gpt-4o");
529        // gpt-4: 128K context, 16K max output (capped at 20K → 16K), effective = 128K - 16K = 112K
530        assert_eq!(eff, 128_000 - 16_384);
531    }
532
533    #[test]
534    fn test_auto_compact_threshold_gpt_model() {
535        let threshold = auto_compact_threshold("gpt-4o");
536        assert_eq!(threshold, 128_000 - 16_384 - 13_000);
537    }
538
539    #[test]
540    fn test_parse_prompt_too_long_gap_with_comma_format() {
541        // Numbers without commas embedded, but different magnitudes.
542        let msg = "prompt is too long: 137500 tokens > 135000 maximum";
543        assert_eq!(parse_prompt_too_long_gap(msg), Some(2500));
544    }
545
546    #[test]
547    fn test_parse_prompt_too_long_gap_equal_tokens_returns_none() {
548        let msg = "prompt is too long: 135000 tokens > 135000 maximum";
549        // gap = 0, so returns None.
550        assert_eq!(parse_prompt_too_long_gap(msg), None);
551    }
552
553    #[test]
554    fn test_token_warning_state_large_count_should_compact() {
555        use crate::llm::message::*;
556        // Create a huge message that will exceed the threshold.
557        let big_text = "a".repeat(800_000); // ~200K tokens
558        let messages = vec![user_message(&big_text)];
559        let state = token_warning_state(&messages, "claude-sonnet");
560        assert!(state.should_compact);
561    }
562
563    #[test]
564    fn test_should_auto_compact_empty_tracking_small_conversation() {
565        let tracking = CompactTracking::default();
566        let messages = vec![crate::llm::message::user_message("tiny")];
567        assert!(!should_auto_compact(&messages, "claude-sonnet", &tracking));
568    }
569
570    #[test]
571    fn test_compact_boundary_message_content_format() {
572        let msg = compact_boundary_message("my summary");
573        if let Message::System(s) = &msg {
574            assert!(s.content.contains("my summary"));
575            assert!(s.content.starts_with("[Conversation compacted."));
576        } else {
577            panic!("Expected System message");
578        }
579    }
580
581    #[test]
582    fn test_build_compact_summary_prompt_includes_user_and_assistant() {
583        use crate::llm::message::*;
584        let messages = vec![
585            user_message("user said this"),
586            Message::Assistant(AssistantMessage {
587                uuid: uuid::Uuid::new_v4(),
588                timestamp: String::new(),
589                content: vec![ContentBlock::Text {
590                    text: "assistant said that".into(),
591                }],
592                model: None,
593                usage: None,
594                stop_reason: None,
595                request_id: None,
596            }),
597        ];
598        let prompt = build_compact_summary_prompt(&messages);
599        assert!(prompt.contains("user said this"));
600        assert!(prompt.contains("assistant said that"));
601        assert!(prompt.contains("User:"));
602        assert!(prompt.contains("Assistant:"));
603    }
604
605    #[test]
606    fn test_max_output_recovery_message_is_meta() {
607        let msg = max_output_recovery_message();
608        if let Message::User(u) = &msg {
609            assert!(u.is_meta);
610        } else {
611            panic!("Expected User message");
612        }
613    }
614
615    #[test]
616    fn test_calculate_keep_count_returns_at_least_5_for_large_list() {
617        use crate::llm::message::*;
618        // Create 20 messages with text content.
619        let messages: Vec<Message> = (0..20)
620            .map(|i| user_message(format!("message {i}")))
621            .collect();
622        let keep = calculate_keep_count(&messages);
623        assert!(keep >= 5, "keep_count was {keep}, expected at least 5");
624    }
625}