ai_agent/
compact.rs

1// Source: /data/home/swei/claudecode/openclaudecode/src/commands/compact/compact.ts
2//! Context compaction module.
3//!
4//! Handles automatic context compaction when the conversation gets too long.
5//! This includes token threshold detection, summary generation, and message management.
6
7use crate::constants::env::{ai, ai_code};
8use crate::services::token_estimation::{
9    rough_token_count_estimation, rough_token_count_estimation_for_message,
10};
11use crate::types::*;
12
13/// Default context window sizes by model (in tokens)
14pub const DEFAULT_CONTEXT_WINDOW: u32 = 200_000;
15
16/// Get default context window from environment or use default
17pub fn get_default_context_window() -> u32 {
18    if let Ok(override_val) = std::env::var(ai::CONTEXT_WINDOW) {
19        if let Ok(parsed) = override_val.parse::<u32>() {
20            if parsed > 0 {
21                return parsed;
22            }
23        }
24    }
25    DEFAULT_CONTEXT_WINDOW
26}
27
28/// Get the prompt for generating conversation summary
29/// Translated from: getCompactPrompt in prompt.ts
30pub fn get_compact_prompt() -> String {
31    r#"CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.
32
33- Do NOT use Read, Bash, Grep, Glob, Edit, Write, or ANY other tool.
34- You already have all the context you need in the conversation above.
35- Tool calls will be REJECTED and will waste your only turn — you will fail the task.
36- Your entire response must be plain text: an <analysis> block followed by a <summary> block.
37
38Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions.
39This summary should be thorough in capturing technical details, code patterns, and architectural decisions that would be essential for continuing development work without losing context.
40
41Before providing your final summary, wrap your analysis in <analysis> tags to organize your thoughts and ensure you've covered all necessary points. In your analysis process:
42
431. Chronologically analyze each message and section of the conversation. For each section thoroughly identify:
44   - The user's explicit requests and intents
45   - Your approach to addressing the user's requests
46   - Key decisions, technical concepts and code patterns
47   - Specific details like:
48     - file names
49     - full code snippets
50     - function signatures
51     - file edits
52   - Errors that you ran into and how you fixed them
53   - Pay special attention to specific user feedback that you received, especially if the user told you to do something differently.
542. Double-check for technical accuracy and completeness, addressing each required element thoroughly.
55
56Your summary should include the following sections:
57
581. Primary Request and Intent: Capture all of the user's explicit requests and intents in detail
592. Key Technical Concepts: List all important technical concepts, technologies, and frameworks discussed.
603. Files and Code Sections: Enumerate specific files and code sections examined, modified, or created. Pay special attention to the most recent messages and include full code snippets where applicable and include a summary of why this file read or edit is important.
614. Errors and fixes: List all errors that you ran into, and how you fixed them. Pay special attention to specific user feedback that you received, especially if the user told you to do something differently.
625. Problem Solving: Document problems solved and any ongoing troubleshooting efforts.
636. All user messages: List ALL user messages that are not tool results. These are critical for understanding the users' feedback and changing intent.
647. Pending Tasks: Outline any pending tasks that you have explicitly been asked to work on.
658. Current Work: Describe in detail precisely what was being worked on immediately before this summary request, paying special attention to the most recent messages from both user and assistant. Include file names and code snippets where applicable.
669. Context for Continuing Work: Key context, decisions, or state needed to continue the work.
67
68IMPORTANT: Be extremely thorough — include ALL important technical details, code patterns, and architectural decisions. This summary must provide enough context for the next turn to continue seamlessly.
69
70REMINDER: Do NOT call any tools. Respond with plain text only — an <analysis> block followed by a <summary> block. Tool calls will be rejected and you will fail the task.
71"#.to_string()
72}
73
74/// Reserve tokens for output during compaction
75/// Based on p99.99 of compact summary output
76pub const MAX_OUTPUT_TOKENS_FOR_SUMMARY: u32 = 20_000;
77
78/// Buffer tokens for auto-compact trigger
79pub const AUTOCOMPACT_BUFFER_TOKENS: u32 = 13_000;
80
81/// Buffer tokens for warning threshold
82pub const WARNING_THRESHOLD_BUFFER_TOKENS: u32 = 20_000;
83
84/// Buffer tokens for error threshold
85pub const ERROR_THRESHOLD_BUFFER_TOKENS: u32 = 20_000;
86
87/// Get the blocking limit (when to block further input)
88pub fn get_blocking_limit(model: &str) -> u32 {
89    let effective_window = get_effective_context_window_size(model);
90    let default_blocking_limit = effective_window.saturating_sub(MANUAL_COMPACT_BUFFER_TOKENS);
91
92    // Allow override for testing
93    if let Ok(override_val) = std::env::var(ai::BLOCKING_LIMIT_OVERRIDE) {
94        if let Ok(parsed) = override_val.parse::<u32>() {
95            if parsed > 0 {
96                return parsed;
97            }
98        }
99    }
100
101    default_blocking_limit
102}
103
104/// Manual compact uses smaller buffer (more aggressive)
105pub const MANUAL_COMPACT_BUFFER_TOKENS: u32 = 3_000;
106
107/// Maximum consecutive auto-compact failures before giving up
108pub const MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES: u32 = 3;
109
110/// Post-compaction: max files to restore
111pub const POST_COMPACT_MAX_FILES_TO_RESTORE: u32 = 5;
112
113/// Post-compaction: token budget for restored files
114pub const POST_COMPACT_TOKEN_BUDGET: u32 = 50_000;
115
116/// Post-compaction: max tokens per file
117pub const POST_COMPACT_MAX_TOKENS_PER_FILE: u32 = 5_000;
118
119/// Post-compaction: max tokens per skill
120pub const POST_COMPACT_MAX_TOKENS_PER_SKILL: u32 = 5_000;
121
122/// Post-compaction: skills token budget
123pub const POST_COMPACT_SKILLS_TOKEN_BUDGET: u32 = 25_000;
124
125/// Get effective context window size (total - output reserve)
126pub fn get_effective_context_window_size(model: &str) -> u32 {
127    let context_window = get_context_window_for_model(model);
128    context_window.saturating_sub(MAX_OUTPUT_TOKENS_FOR_SUMMARY)
129}
130
131/// Get context window size for a model
132pub fn get_context_window_for_model(model: &str) -> u32 {
133    // Check environment override for auto compact window
134    if let Ok(override_val) = std::env::var(ai::AUTO_COMPACT_WINDOW) {
135        if let Ok(parsed) = override_val.parse::<u32>() {
136            if parsed > 0 {
137                return parsed;
138            }
139        }
140    }
141
142    // Default context windows by model
143    let lower = model.to_lowercase();
144    if lower.contains("sonnet") {
145        // Claude Sonnet models typically have 200K context
146        get_default_context_window()
147    } else if lower.contains("haiku") {
148        // Haiku has 200K context
149        get_default_context_window()
150    } else if lower.contains("opus") {
151        // Opus models typically have 200K context
152        get_default_context_window()
153    } else {
154        get_default_context_window()
155    }
156}
157
158/// Get the auto-compact threshold (when to trigger compaction)
159pub fn get_auto_compact_threshold(model: &str) -> u32 {
160    let effective_window = get_effective_context_window_size(model);
161
162    let autocompact_threshold = effective_window.saturating_sub(AUTOCOMPACT_BUFFER_TOKENS);
163
164    // Override for easier testing of autocompact
165    if let Ok(env_percent) = std::env::var(ai::AUTOCOMPACT_PCT_OVERRIDE) {
166        if let Ok(parsed) = env_percent.parse::<f64>() {
167            if parsed > 0.0 && parsed <= 100.0 {
168                let percentage_threshold =
169                    ((effective_window as f64 * (parsed / 100.0)) as u32).min(effective_window);
170                return percentage_threshold.min(autocompact_threshold);
171            }
172        }
173    }
174
175    autocompact_threshold
176}
177
178/// Calculate token warning state
179/// Translated from: calculateTokenWarningState in autoCompact.ts
180#[derive(Debug, Clone)]
181pub struct TokenWarningState {
182    pub percent_left: f64,
183    pub is_above_warning_threshold: bool,
184    pub is_above_error_threshold: bool,
185    pub is_above_auto_compact_threshold: bool,
186    pub is_at_blocking_limit: bool,
187}
188
189pub fn calculate_token_warning_state(token_usage: u32, model: &str) -> TokenWarningState {
190    let auto_compact_threshold = get_auto_compact_threshold(model);
191    let effective_window = get_effective_context_window_size(model);
192
193    // Use auto_compact_threshold if enabled, otherwise use effective window
194    let threshold = if is_auto_compact_enabled_for_calculation() {
195        auto_compact_threshold
196    } else {
197        effective_window
198    };
199
200    let percent_left = if threshold > 0 {
201        ((threshold.saturating_sub(token_usage) as f64 / threshold as f64) * 100.0).max(0.0)
202    } else {
203        100.0
204    };
205
206    let warning_threshold = threshold.saturating_sub(WARNING_THRESHOLD_BUFFER_TOKENS);
207    let error_threshold = threshold.saturating_sub(ERROR_THRESHOLD_BUFFER_TOKENS);
208
209    let is_above_warning_threshold = token_usage >= warning_threshold;
210    let is_above_error_threshold = token_usage >= error_threshold;
211    let is_above_auto_compact_threshold =
212        is_auto_compact_enabled_for_calculation() && token_usage >= auto_compact_threshold;
213
214    // Calculate blocking limit
215    let default_blocking_limit = effective_window.saturating_sub(MANUAL_COMPACT_BUFFER_TOKENS);
216
217    // Allow override for testing (translate from CLAUDE_CODE_BLOCKING_LIMIT_OVERRIDE)
218    let blocking_limit = if let Ok(override_val) = std::env::var(ai_code::BLOCKING_LIMIT_OVERRIDE) {
219        if let Ok(parsed) = override_val.parse::<u32>() {
220            if parsed > 0 {
221                parsed
222            } else {
223                default_blocking_limit
224            }
225        } else {
226            default_blocking_limit
227        }
228    } else {
229        default_blocking_limit
230    };
231
232    let is_at_blocking_limit = token_usage >= blocking_limit;
233
234    TokenWarningState {
235        percent_left,
236        is_above_warning_threshold,
237        is_above_error_threshold,
238        is_above_auto_compact_threshold,
239        is_at_blocking_limit,
240    }
241}
242
243/// Check if auto-compact is enabled (used in calculation)
244/// Translated from: isAutoCompactEnabled in autoCompact.ts
245fn is_auto_compact_enabled_for_calculation() -> bool {
246    use crate::utils::env_utils::is_env_truthy;
247
248    if is_env_truthy(Some("DISABLE_COMPACT")) {
249        return false;
250    }
251    if is_env_truthy(Some("DISABLE_AUTO_COMPACT")) {
252        return false;
253    }
254    // Check user config - for now default to true
255    // In full implementation: getGlobalConfig().autoCompactEnabled
256    true
257}
258
259/// Compact result containing the new messages after compaction
260#[derive(Debug, Clone)]
261pub struct CompactionResult {
262    /// The boundary marker message
263    pub boundary_marker: Message,
264    /// Summary messages to keep
265    pub summary_messages: Vec<Message>,
266    /// Messages that were kept (not summarized)
267    pub messages_to_keep: Option<Vec<Message>>,
268    /// Attachments to include
269    pub attachments: Vec<Message>,
270    /// Pre-compaction token count
271    pub pre_compact_token_count: u32,
272    /// Post-compaction token count
273    pub post_compact_token_count: u32,
274}
275
276/// Strip images from messages before sending for compaction
277/// Images are not needed for summary generation
278pub fn strip_images_from_messages(messages: &[Message]) -> Vec<Message> {
279    messages
280        .iter()
281        .map(|msg| {
282            if let Message {
283                role: MessageRole::User,
284                content: _,
285                ..
286            } = msg
287            {
288                // For user messages, we could strip images
289                // For simplicity, return as-is
290                msg.clone()
291            } else {
292                msg.clone()
293            }
294        })
295        .collect()
296}
297
298/// Estimate token count for messages (rough estimation)
299/// Uses 4 chars per token for regular text (matching original TypeScript)
300/// Uses 2 chars per token for tool results (JSON is more token-efficient)
301/// Takes optional max_output_tokens to ensure we leave room for the response
302pub fn estimate_token_count(messages: &[Message], max_output_tokens: u32) -> u32 {
303    // Regular text: 4 chars per token (original TypeScript default)
304    let non_tool_chars: usize = messages
305        .iter()
306        .filter(|msg| msg.role != MessageRole::Tool)
307        .map(|msg| msg.content.len())
308        .sum();
309
310    // Tool results (JSON): 2 chars per token (more efficient encoding)
311    // Original: "Dense JSON has many single-character tokens..."
312    let tool_result_chars: usize = messages
313        .iter()
314        .filter(|msg| msg.role == MessageRole::Tool)
315        .map(|msg| msg.content.len())
316        .sum();
317
318    let base_estimate = (non_tool_chars / 4) as u32;
319    let tool_buffer = (tool_result_chars / 2) as u32; // More efficient for JSON
320
321    // Add the requested output tokens to ensure we leave room for the response
322    base_estimate + tool_buffer + max_output_tokens
323}
324
325/// Check if conversation should be compacted
326pub fn should_compact(token_usage: u32, model: &str) -> bool {
327    let state = calculate_token_warning_state(token_usage, model);
328    state.is_above_auto_compact_threshold
329}
330
331/// Truncate messages to fit within a safe token limit for summarization
332/// This is used when the conversation is too large to fit in context
333/// Skips ALL system messages (they contain huge compaction summaries)
334/// Returns (truncated_messages, estimated_tokens)
335pub fn truncate_messages_for_summary(
336    messages: &[Message],
337    model: &str,
338    max_output_tokens: u32,
339) -> (Vec<Message>, u32) {
340    let context_window = get_context_window_for_model(model);
341    // Leave room for output tokens and buffer - use 50% of available space for safety
342    let safe_limit = ((context_window.saturating_sub(max_output_tokens)) as f64 * 0.50) as u32;
343
344    let total_messages = messages.len();
345    if total_messages == 0 {
346        return (vec![], 0);
347    }
348
349    // Skip ALL system messages - they contain huge compaction summaries from previous rounds
350    // For summarization, we only need the conversation history (user/assistant/tool messages)
351    let non_system_messages: Vec<Message> = messages
352        .iter()
353        .filter(|m| m.role != MessageRole::System)
354        .cloned()
355        .collect();
356
357    // Now take most recent non-system messages using proper token estimation
358    let mut current_tokens = 0u32;
359    let mut history_messages = Vec::new();
360
361    for msg in non_system_messages.iter().rev() {
362        let msg_tokens = rough_token_count_estimation_for_message(msg) as u32;
363        if current_tokens + msg_tokens > safe_limit {
364            break;
365        }
366        current_tokens += msg_tokens;
367        history_messages.insert(0, msg.clone());
368    }
369
370    // If we couldn't fit any history, try to at least get recent messages
371    if history_messages.is_empty() && !non_system_messages.is_empty() {
372        // Take just the last message, truncated if needed
373        let last_msg = non_system_messages.last().unwrap();
374        let max_chars = (safe_limit as usize) * 4;
375        let chars_to_keep = last_msg.content.len().min(max_chars);
376        let truncated_content = last_msg
377            .content
378            .chars()
379            .take(chars_to_keep)
380            .collect::<String>();
381
382        current_tokens = rough_token_count_estimation(&truncated_content, 4.0) as u32;
383
384        history_messages = vec![Message {
385            role: last_msg.role.clone(),
386            content: truncated_content,
387            ..Default::default()
388        }];
389    }
390
391    let total_estimated = current_tokens;
392
393    (history_messages, total_estimated)
394}
395
396#[cfg(test)]
397mod tests {
398    use super::*;
399
400    #[test]
401    fn test_effective_context_window() {
402        let window = get_effective_context_window_size("claude-sonnet-4-6");
403        // 200000 - 20000 = 180000
404        assert_eq!(window, 180_000);
405    }
406
407    #[test]
408    fn test_auto_compact_threshold() {
409        let threshold = get_auto_compact_threshold("claude-sonnet-4-6");
410        // 180000 - 13000 = 167000
411        assert_eq!(threshold, 167_000);
412    }
413
414    #[test]
415    fn test_token_warning_state_normal() {
416        let state = calculate_token_warning_state(50_000, "claude-sonnet-4-6");
417        assert!(!state.is_above_warning_threshold);
418        assert!(!state.is_above_error_threshold);
419        assert!(!state.is_above_auto_compact_threshold);
420        assert!(state.percent_left > 50.0);
421    }
422
423    #[test]
424    fn test_token_warning_state_warning() {
425        // warning at 180000 - 20000 = 160000
426        let state = calculate_token_warning_state(165_000, "claude-sonnet-4-6");
427        assert!(state.is_above_warning_threshold);
428        // error uses same buffer, so this is also above error threshold
429        assert!(state.is_above_error_threshold);
430        assert!(!state.is_above_auto_compact_threshold);
431    }
432
433    #[test]
434    fn test_token_warning_state_compact() {
435        let state = calculate_token_warning_state(170_000, "claude-sonnet-4-6");
436        assert!(state.is_above_warning_threshold);
437        assert!(state.is_above_auto_compact_threshold);
438    }
439
440    #[test]
441    fn test_should_compact() {
442        assert!(!should_compact(50_000, "claude-sonnet-4-6"));
443        assert!(should_compact(170_000, "claude-sonnet-4-6"));
444    }
445
446    #[test]
447    fn test_estimate_token_count() {
448        let messages = vec![
449            Message {
450                role: MessageRole::User,
451                content: "Hello, this is a test message".to_string(),
452                ..Default::default()
453            },
454            Message {
455                role: MessageRole::Assistant,
456                content: "Hi! How can I help you today?".to_string(),
457                ..Default::default()
458            },
459        ];
460
461        let count = estimate_token_count(&messages, 0);
462        // ~60 chars / 4 = 15 tokens
463        assert!(count > 0);
464    }
465}
466
467// ============================================================================
468// Compact Command Module (translated from commands/compact/)
469// ============================================================================
470
471/// Compact command definition
472/// Translates: /data/home/swei/claudecode/openclaudecode/src/commands/compact/index.ts
473
474/// Check if an environment variable is truthy (copied from bridge_enabled)
475fn is_env_truthy(env_var: &str) -> bool {
476    if env_var.is_empty() {
477        return false;
478    }
479    let binding = env_var.to_lowercase();
480    let normalized = binding.trim();
481    matches!(normalized, "1" | "true" | "yes" | "on")
482}
483
484/// Compact command configuration
485#[derive(Debug, Clone)]
486pub struct CompactCommand {
487    /// Command type
488    pub command_type: String,
489    /// Command name
490    pub name: String,
491    /// Command description
492    pub description: String,
493    /// Whether the command is enabled
494    pub is_enabled: fn() -> bool,
495    /// Whether it supports non-interactive mode
496    pub supports_non_interactive: bool,
497    /// Argument hint text
498    pub argument_hint: String,
499}
500
501impl Default for CompactCommand {
502    fn default() -> Self {
503        Self::new()
504    }
505}
506
507impl CompactCommand {
508    /// Create a new compact command
509    pub fn new() -> Self {
510        Self {
511            command_type: "local".to_string(),
512            name: "compact".to_string(),
513            description: "Clear conversation history but keep a summary in context. Optional: /compact [instructions for summarization]".to_string(),
514            is_enabled: || !is_env_truthy("AI_DISABLE_COMPACT"),
515            supports_non_interactive: true,
516            argument_hint: "<optional custom summarization instructions>".to_string(),
517        }
518    }
519
520    /// Check if the command is enabled
521    pub fn is_enabled(&self) -> bool {
522        (self.is_enabled)()
523    }
524}
525
526/// Get the compact command
527pub fn get_compact_command() -> CompactCommand {
528    CompactCommand::new()
529}
530
531/// Compact command error messages
532pub mod compact_errors {
533    /// Error message for incomplete response
534    pub const ERROR_MESSAGE_INCOMPLETE_RESPONSE: &str =
535        "Incomplete response from model during compaction";
536    /// Error message for not enough messages
537    pub const ERROR_MESSAGE_NOT_ENOUGH_MESSAGES: &str = "Not enough messages to compact";
538    /// Error message for user abort
539    pub const ERROR_MESSAGE_USER_ABORT: &str = "User aborted compaction";
540}
ai_agent/compact.rs

ai_agent/
compact.rs