ai_agent/
compact.rs

1// Source: /data/home/swei/claudecode/openclaudecode/src/commands/compact/compact.ts
2//! Context compaction module.
3//!
4//! Handles automatic context compaction when the conversation gets too long.
5//! This includes token threshold detection, summary generation, and message management.
6
7use crate::constants::env::{ai, ai_code};
8pub use crate::services::token_estimation::{
9    rough_token_count_estimation, rough_token_count_estimation_for_content,
10    rough_token_count_estimation_for_message,
11};
12use crate::types::*;
13
14/// Default context window sizes by model (in tokens)
15pub const DEFAULT_CONTEXT_WINDOW: u32 = 200_000;
16
17/// Get default context window from environment or use default
18pub fn get_default_context_window() -> u32 {
19    if let Ok(override_val) = std::env::var(ai::CONTEXT_WINDOW) {
20        if let Ok(parsed) = override_val.parse::<u32>() {
21            if parsed > 0 {
22                return parsed;
23            }
24        }
25    }
26    DEFAULT_CONTEXT_WINDOW
27}
28
29/// Get the prompt for generating conversation summary
30/// Translated from: getCompactPrompt in prompt.ts
31pub fn get_compact_prompt() -> String {
32    r#"CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.
33
34- Do NOT use Read, Bash, Grep, Glob, Edit, Write, or ANY other tool.
35- You already have all the context you need in the conversation above.
36- Tool calls will be REJECTED and will waste your only turn — you will fail the task.
37- Your entire response must be plain text: an <analysis> block followed by a <summary> block.
38
39Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions.
40This summary should be thorough in capturing technical details, code patterns, and architectural decisions that would be essential for continuing development work without losing context.
41
42Before providing your final summary, wrap your analysis in <analysis> tags to organize your thoughts and ensure you've covered all necessary points. In your analysis process:
43
441. Chronologically analyze each message and section of the conversation. For each section thoroughly identify:
45   - The user's explicit requests and intents
46   - Your approach to addressing the user's requests
47   - Key decisions, technical concepts and code patterns
48   - Specific details like:
49     - file names
50     - full code snippets
51     - function signatures
52     - file edits
53   - Errors that you ran into and how you fixed them
54   - Pay special attention to specific user feedback that you received, especially if the user told you to do something differently.
552. Double-check for technical accuracy and completeness, addressing each required element thoroughly.
56
57Your summary should include the following sections:
58
591. Primary Request and Intent: Capture all of the user's explicit requests and intents in detail
602. Key Technical Concepts: List all important technical concepts, technologies, and frameworks discussed.
613. Files and Code Sections: Enumerate specific files and code sections examined, modified, or created. Pay special attention to the most recent messages and include full code snippets where applicable and include a summary of why this file read or edit is important.
624. Errors and fixes: List all errors that you ran into, and how you fixed them. Pay special attention to specific user feedback that you received, especially if the user told you to do something differently.
635. Problem Solving: Document problems solved and any ongoing troubleshooting efforts.
646. All user messages: List ALL user messages that are not tool results. These are critical for understanding the users' feedback and changing intent.
657. Pending Tasks: Outline any pending tasks that you have explicitly been asked to work on.
668. Current Work: Describe in detail precisely what was being worked on immediately before this summary request, paying special attention to the most recent messages from both user and assistant. Include file names and code snippets where applicable.
679. Context for Continuing Work: Key context, decisions, or state needed to continue the work.
68
69IMPORTANT: Be extremely thorough — include ALL important technical details, code patterns, and architectural decisions. This summary must provide enough context for the next turn to continue seamlessly.
70
71REMINDER: Do NOT call any tools. Respond with plain text only — an <analysis> block followed by a <summary> block. Tool calls will be rejected and you will fail the task.
72"#.to_string()
73}
74
75/// Reserve tokens for output during compaction
76/// Based on p99.99 of compact summary output
77pub const MAX_OUTPUT_TOKENS_FOR_SUMMARY: u32 = 20_000;
78
79/// Buffer tokens for auto-compact trigger
80pub const AUTOCOMPACT_BUFFER_TOKENS: u32 = 13_000;
81
82/// Buffer tokens for warning threshold
83pub const WARNING_THRESHOLD_BUFFER_TOKENS: u32 = 20_000;
84
85/// Buffer tokens for error threshold
86pub const ERROR_THRESHOLD_BUFFER_TOKENS: u32 = 20_000;
87
88/// Get the blocking limit (when to block further input)
89pub fn get_blocking_limit(model: &str) -> u32 {
90    let effective_window = get_effective_context_window_size(model);
91    let default_blocking_limit = effective_window.saturating_sub(MANUAL_COMPACT_BUFFER_TOKENS);
92
93    // Allow override for testing
94    if let Ok(override_val) = std::env::var(ai::BLOCKING_LIMIT_OVERRIDE) {
95        if let Ok(parsed) = override_val.parse::<u32>() {
96            if parsed > 0 {
97                return parsed;
98            }
99        }
100    }
101
102    default_blocking_limit
103}
104
105/// Manual compact uses smaller buffer (more aggressive)
106pub const MANUAL_COMPACT_BUFFER_TOKENS: u32 = 3_000;
107
108/// Maximum consecutive auto-compact failures before giving up
109pub const MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES: u32 = 3;
110
111/// Post-compaction: max files to restore
112pub const POST_COMPACT_MAX_FILES_TO_RESTORE: u32 = 5;
113
114/// Post-compaction: token budget for restored files
115pub const POST_COMPACT_TOKEN_BUDGET: u32 = 50_000;
116
117/// Post-compaction: max tokens per file
118pub const POST_COMPACT_MAX_TOKENS_PER_FILE: u32 = 5_000;
119
120/// Post-compaction: max tokens per skill
121pub const POST_COMPACT_MAX_TOKENS_PER_SKILL: u32 = 5_000;
122
123/// Post-compaction: skills token budget
124pub const POST_COMPACT_SKILLS_TOKEN_BUDGET: u32 = 25_000;
125
126/// Get effective context window size (total - output reserve)
127/// TS: autoCompact.ts getEffectiveContextWindowSize
128pub fn get_effective_context_window_size(model: &str) -> u32 {
129    let reserved_tokens_for_summary = crate::utils::context::get_max_output_tokens_for_model(model)
130        .min(crate::utils::context::COMPACT_MAX_OUTPUT_TOKENS) as u32;
131    let context_window = get_context_window_for_model(model);
132    context_window.saturating_sub(reserved_tokens_for_summary)
133}
134
135/// Get context window size for a model
136pub fn get_context_window_for_model(model: &str) -> u32 {
137    // Check environment override for auto compact window
138    if let Ok(override_val) = std::env::var(ai::AUTO_COMPACT_WINDOW) {
139        if let Ok(parsed) = override_val.parse::<u32>() {
140            if parsed > 0 {
141                return parsed;
142            }
143        }
144    }
145
146    // Default context windows by model
147    let lower = model.to_lowercase();
148    if lower.contains("sonnet") {
149        // Claude Sonnet models typically have 200K context
150        get_default_context_window()
151    } else if lower.contains("haiku") {
152        // Haiku has 200K context
153        get_default_context_window()
154    } else if lower.contains("opus") {
155        // Opus models typically have 200K context
156        get_default_context_window()
157    } else {
158        get_default_context_window()
159    }
160}
161
162/// Get the auto-compact threshold (when to trigger compaction)
163pub fn get_auto_compact_threshold(model: &str) -> u32 {
164    let effective_window = get_effective_context_window_size(model);
165
166    let autocompact_threshold = effective_window.saturating_sub(AUTOCOMPACT_BUFFER_TOKENS);
167
168    // Override for easier testing of autocompact
169    if let Ok(env_percent) = std::env::var(ai::AUTOCOMPACT_PCT_OVERRIDE) {
170        if let Ok(parsed) = env_percent.parse::<f64>() {
171            if parsed > 0.0 && parsed <= 100.0 {
172                let percentage_threshold =
173                    ((effective_window as f64 * (parsed / 100.0)) as u32).min(effective_window);
174                return percentage_threshold.min(autocompact_threshold);
175            }
176        }
177    }
178
179    autocompact_threshold
180}
181
182/// Calculate token warning state
183/// Translated from: calculateTokenWarningState in autoCompact.ts
184#[derive(Debug, Clone)]
185pub struct TokenWarningState {
186    pub percent_left: f64,
187    pub is_above_warning_threshold: bool,
188    pub is_above_error_threshold: bool,
189    pub is_above_auto_compact_threshold: bool,
190    pub is_at_blocking_limit: bool,
191}
192
193pub fn calculate_token_warning_state(token_usage: u32, model: &str) -> TokenWarningState {
194    let auto_compact_threshold = get_auto_compact_threshold(model);
195    let effective_window = get_effective_context_window_size(model);
196
197    // Use auto_compact_threshold if enabled, otherwise use effective window
198    let threshold = if is_auto_compact_enabled_for_calculation() {
199        auto_compact_threshold
200    } else {
201        effective_window
202    };
203
204    let percent_left = if threshold > 0 {
205        ((threshold.saturating_sub(token_usage) as f64 / threshold as f64) * 100.0).max(0.0)
206    } else {
207        100.0
208    };
209
210    let warning_threshold = threshold.saturating_sub(WARNING_THRESHOLD_BUFFER_TOKENS);
211    let error_threshold = threshold.saturating_sub(ERROR_THRESHOLD_BUFFER_TOKENS);
212
213    let is_above_warning_threshold = token_usage >= warning_threshold;
214    let is_above_error_threshold = token_usage >= error_threshold;
215    let is_above_auto_compact_threshold =
216        is_auto_compact_enabled_for_calculation() && token_usage >= auto_compact_threshold;
217
218    // Calculate blocking limit
219    let default_blocking_limit = effective_window.saturating_sub(MANUAL_COMPACT_BUFFER_TOKENS);
220
221    // Allow override for testing (translate from CLAUDE_CODE_BLOCKING_LIMIT_OVERRIDE)
222    let blocking_limit = if let Ok(override_val) = std::env::var(ai_code::BLOCKING_LIMIT_OVERRIDE) {
223        if let Ok(parsed) = override_val.parse::<u32>() {
224            if parsed > 0 {
225                parsed
226            } else {
227                default_blocking_limit
228            }
229        } else {
230            default_blocking_limit
231        }
232    } else {
233        default_blocking_limit
234    };
235
236    let is_at_blocking_limit = token_usage >= blocking_limit;
237
238    TokenWarningState {
239        percent_left,
240        is_above_warning_threshold,
241        is_above_error_threshold,
242        is_above_auto_compact_threshold,
243        is_at_blocking_limit,
244    }
245}
246
247/// Check if auto-compact is enabled (used in calculation)
248/// Translated from: isAutoCompactEnabled in autoCompact.ts
249fn is_auto_compact_enabled_for_calculation() -> bool {
250    use crate::utils::env_utils::is_env_truthy;
251
252    if is_env_truthy(Some("DISABLE_COMPACT")) {
253        return false;
254    }
255    if is_env_truthy(Some("DISABLE_AUTO_COMPACT")) {
256        return false;
257    }
258    // Check user config - for now default to true
259    // In full implementation: getGlobalConfig().autoCompactEnabled
260    true
261}
262
263/// Compact result containing the new messages after compaction
264#[derive(Debug, Clone)]
265pub struct CompactionResult {
266    /// The boundary marker message
267    pub boundary_marker: Message,
268    /// Summary messages to keep
269    pub summary_messages: Vec<Message>,
270    /// Messages that were kept (not summarized)
271    pub messages_to_keep: Option<Vec<Message>>,
272    /// Attachments to include
273    pub attachments: Vec<Message>,
274    /// Pre-compaction token count
275    pub pre_compact_token_count: u32,
276    /// Post-compaction token count
277    pub post_compact_token_count: u32,
278    /// True post-compact token count (estimated from final compacted messages)
279    pub true_post_compact_token_count: Option<u64>,
280    /// Token usage from the compaction API call itself
281    pub compaction_usage: Option<TokenUsage>,
282}
283
284/// Strip images from messages before sending for compaction
285/// Images are replaced with `[image]` text markers, documents with `[document]` markers
286/// to prevent compaction API from hitting prompt-too-long
287pub fn strip_images_from_messages(messages: &[Message]) -> Vec<Message> {
288    use crate::types::MessageRole;
289
290    messages
291        .iter()
292        .map(|msg| {
293            match msg.role {
294                MessageRole::User | MessageRole::Assistant => {
295                    // For user/assistant messages, strip image/document blocks
296                    // In the simple String content model, we look for image-like patterns
297                    let content = msg.content.clone();
298                    // Check for image markdown patterns
299                    if content.contains("![") || content.contains("<img") {
300                        // Strip markdown images: ![alt](url)
301                        let stripped = strip_image_markdown(&content);
302                        if stripped != content {
303                            return Message {
304                                role: msg.role.clone(),
305                                content: stripped,
306                                ..msg.clone()
307                            };
308                        }
309                    }
310                    msg.clone()
311                }
312                MessageRole::Tool => {
313                    // Tool results might contain image references
314                    let content = msg.content.clone();
315                    if content.contains("![")
316                        || content.contains("<img")
317                        || content.contains("image")
318                        || content.contains("document")
319                    {
320                        let stripped = strip_image_markdown(&content);
321                        if stripped != content {
322                            return Message {
323                                role: msg.role.clone(),
324                                content: stripped,
325                                ..msg.clone()
326                            };
327                        }
328                    }
329                    msg.clone()
330                }
331                MessageRole::System => msg.clone(),
332            }
333        })
334        .collect()
335}
336
337/// Strip markdown image patterns from content, replacing with text markers
338fn strip_image_markdown(content: &str) -> String {
339    // Replace markdown images ![alt](url) with [image]
340    let mut result = content.to_string();
341
342    // Simple regex-like replacement for markdown images
343    // ![...](...) → [image]
344    let mut output = String::with_capacity(content.len());
345    let chars: Vec<char> = content.chars().collect();
346    let mut i = 0;
347
348    while i < chars.len() {
349        if chars[i] == '!' && i + 1 < chars.len() && chars[i + 1] == '[' {
350            // Find the closing ](
351            if let Some(close_bracket) = chars[i..].iter().position(|&c| c == ']') {
352                let bracket_pos = i + close_bracket;
353                if bracket_pos + 1 < chars.len() && chars[bracket_pos + 1] == '(' {
354                    // Find the closing )
355                    if let Some(close_paren) =
356                        chars[bracket_pos + 2..].iter().position(|&c| c == ')')
357                    {
358                        let paren_pos = bracket_pos + 2 + close_paren;
359                        // Extract alt text
360                        let alt: String = chars[i + 2..bracket_pos].iter().collect();
361                        let marker = if alt.to_lowercase().contains("doc")
362                            || alt.to_lowercase().contains("pdf")
363                            || alt.to_lowercase().contains("file")
364                        {
365                            "[document]"
366                        } else {
367                            "[image]"
368                        };
369                        output.push_str(marker);
370                        i = paren_pos + 1;
371                        continue;
372                    }
373                }
374            }
375        }
376        output.push(chars[i]);
377        i += 1;
378    }
379
380    output
381}
382
383/// Strip reinjected attachments (skill_discovery/skill_listing) that will be
384/// re-injected post-compaction anyway
385pub fn strip_reinjected_attachments(messages: &[Message]) -> Vec<Message> {
386    // In the simple String content model, we look for skill attachment patterns
387    messages
388        .iter()
389        .map(|msg| {
390            if msg.content.contains("skill_discovery") || msg.content.contains("skill_listing") {
391                Message {
392                    role: msg.role.clone(),
393                    content: "[Skill attachment content cleared for compaction]".to_string(),
394                    ..msg.clone()
395                }
396            } else {
397                msg.clone()
398            }
399        })
400        .collect()
401}
402
403/// Estimate token count for messages (rough estimation)
404/// Uses 4 chars per token for regular text (matching original TypeScript)
405/// Uses 2 chars per token for tool results (JSON is more token-efficient)
406/// Takes optional max_output_tokens to ensure we leave room for the response
407pub fn estimate_token_count(messages: &[Message], max_output_tokens: u32) -> u32 {
408    // Regular text: 4 chars per token (original TypeScript default)
409    let non_tool_chars: usize = messages
410        .iter()
411        .filter(|msg| msg.role != MessageRole::Tool)
412        .map(|msg| msg.content.len())
413        .sum();
414
415    // Tool results (JSON): 2 chars per token (more efficient encoding)
416    // Original: "Dense JSON has many single-character tokens..."
417    let tool_result_chars: usize = messages
418        .iter()
419        .filter(|msg| msg.role == MessageRole::Tool)
420        .map(|msg| msg.content.len())
421        .sum();
422
423    let base_estimate = (non_tool_chars / 4) as u32;
424    let tool_buffer = (tool_result_chars / 2) as u32; // More efficient for JSON
425
426    // Add the requested output tokens to ensure we leave room for the response
427    base_estimate + tool_buffer + max_output_tokens
428}
429
430/// Check if conversation should be compacted
431pub fn should_compact(token_usage: u32, model: &str) -> bool {
432    let state = calculate_token_warning_state(token_usage, model);
433    state.is_above_auto_compact_threshold
434}
435
436/// Truncate messages to fit within a safe token limit for summarization
437/// This is used when the conversation is too large to fit in context
438/// Skips ALL system messages (they contain huge compaction summaries)
439/// Returns (truncated_messages, estimated_tokens)
440pub fn truncate_messages_for_summary(
441    messages: &[Message],
442    model: &str,
443    max_output_tokens: u32,
444) -> (Vec<Message>, u32) {
445    let context_window = get_context_window_for_model(model);
446    // Leave room for output tokens and buffer - use 50% of available space for safety
447    let safe_limit = ((context_window.saturating_sub(max_output_tokens)) as f64 * 0.50) as u32;
448
449    let total_messages = messages.len();
450    if total_messages == 0 {
451        return (vec![], 0);
452    }
453
454    // Skip ALL system messages - they contain huge compaction summaries from previous rounds
455    // For summarization, we only need the conversation history (user/assistant/tool messages)
456    let non_system_messages: Vec<Message> = messages
457        .iter()
458        .filter(|m| m.role != MessageRole::System)
459        .cloned()
460        .collect();
461
462    // Now take most recent non-system messages using proper token estimation
463    let mut current_tokens = 0u32;
464    let mut history_messages = Vec::new();
465
466    for msg in non_system_messages.iter().rev() {
467        let msg_tokens = rough_token_count_estimation_for_message(msg) as u32;
468        if current_tokens + msg_tokens > safe_limit {
469            break;
470        }
471        current_tokens += msg_tokens;
472        history_messages.insert(0, msg.clone());
473    }
474
475    // If we couldn't fit any history, try to at least get recent messages
476    if history_messages.is_empty() && !non_system_messages.is_empty() {
477        // Take just the last message, truncated if needed
478        let last_msg = non_system_messages.last().unwrap();
479        let max_chars = (safe_limit as usize) * 4;
480        let chars_to_keep = last_msg.content.len().min(max_chars);
481        let truncated_content = last_msg
482            .content
483            .chars()
484            .take(chars_to_keep)
485            .collect::<String>();
486
487        current_tokens = rough_token_count_estimation(&truncated_content, 4.0) as u32;
488
489        history_messages = vec![Message {
490            role: last_msg.role.clone(),
491            content: truncated_content,
492            ..Default::default()
493        }];
494    }
495
496    let total_estimated = current_tokens;
497
498    (history_messages, total_estimated)
499}
500
501#[cfg(test)]
502mod tests {
503    use super::*;
504
505    #[test]
506    fn test_effective_context_window() {
507        let window = get_effective_context_window_size("claude-sonnet-4-6");
508        // 200000 - 20000 = 180000
509        assert_eq!(window, 180_000);
510    }
511
512    #[test]
513    fn test_auto_compact_threshold() {
514        let threshold = get_auto_compact_threshold("claude-sonnet-4-6");
515        // 180000 - 13000 = 167000
516        assert_eq!(threshold, 167_000);
517    }
518
519    #[test]
520    fn test_token_warning_state_normal() {
521        let state = calculate_token_warning_state(50_000, "claude-sonnet-4-6");
522        assert!(!state.is_above_warning_threshold);
523        assert!(!state.is_above_error_threshold);
524        assert!(!state.is_above_auto_compact_threshold);
525        assert!(state.percent_left > 50.0);
526    }
527
528    #[test]
529    fn test_token_warning_state_warning() {
530        // warning at 180000 - 20000 = 160000
531        let state = calculate_token_warning_state(165_000, "claude-sonnet-4-6");
532        assert!(state.is_above_warning_threshold);
533        // error uses same buffer, so this is also above error threshold
534        assert!(state.is_above_error_threshold);
535        assert!(!state.is_above_auto_compact_threshold);
536    }
537
538    #[test]
539    fn test_token_warning_state_compact() {
540        let state = calculate_token_warning_state(170_000, "claude-sonnet-4-6");
541        assert!(state.is_above_warning_threshold);
542        assert!(state.is_above_auto_compact_threshold);
543    }
544
545    #[test]
546    fn test_should_compact() {
547        assert!(!should_compact(50_000, "claude-sonnet-4-6"));
548        assert!(should_compact(170_000, "claude-sonnet-4-6"));
549    }
550
551    #[test]
552    fn test_estimate_token_count() {
553        let messages = vec![
554            Message {
555                role: MessageRole::User,
556                content: "Hello, this is a test message".to_string(),
557                ..Default::default()
558            },
559            Message {
560                role: MessageRole::Assistant,
561                content: "Hi! How can I help you today?".to_string(),
562                ..Default::default()
563            },
564        ];
565
566        let count = estimate_token_count(&messages, 0);
567        // ~60 chars / 4 = 15 tokens
568        assert!(count > 0);
569    }
570}
571
572// ============================================================================
573// Compact Command Module (translated from commands/compact/)
574// ============================================================================
575
576/// Compact command definition
577/// Translates: /data/home/swei/claudecode/openclaudecode/src/commands/compact/index.ts
578
579/// Check if an environment variable is truthy (copied from bridge_enabled)
580fn is_env_truthy(env_var: &str) -> bool {
581    if env_var.is_empty() {
582        return false;
583    }
584    let binding = env_var.to_lowercase();
585    let normalized = binding.trim();
586    matches!(normalized, "1" | "true" | "yes" | "on")
587}
588
589/// Compact command configuration
590#[derive(Debug, Clone)]
591pub struct CompactCommand {
592    /// Command type
593    pub command_type: String,
594    /// Command name
595    pub name: String,
596    /// Command description
597    pub description: String,
598    /// Whether the command is enabled
599    pub is_enabled: fn() -> bool,
600    /// Whether it supports non-interactive mode
601    pub supports_non_interactive: bool,
602    /// Argument hint text
603    pub argument_hint: String,
604}
605
606impl Default for CompactCommand {
607    fn default() -> Self {
608        Self::new()
609    }
610}
611
612impl CompactCommand {
613    /// Create a new compact command
614    pub fn new() -> Self {
615        Self {
616            command_type: "local".to_string(),
617            name: "compact".to_string(),
618            description: "Clear conversation history but keep a summary in context. Optional: /compact [instructions for summarization]".to_string(),
619            is_enabled: || !is_env_truthy("AI_DISABLE_COMPACT"),
620            supports_non_interactive: true,
621            argument_hint: "<optional custom summarization instructions>".to_string(),
622        }
623    }
624
625    /// Check if the command is enabled
626    pub fn is_enabled(&self) -> bool {
627        (self.is_enabled)()
628    }
629}
630
631/// Get the compact command
632pub fn get_compact_command() -> CompactCommand {
633    CompactCommand::new()
634}
635
636/// Compact command error messages
637pub mod compact_errors {
638    /// Error message for incomplete response
639    pub const ERROR_MESSAGE_INCOMPLETE_RESPONSE: &str =
640        "Incomplete response from model during compaction";
641    /// Error message for not enough messages
642    pub const ERROR_MESSAGE_NOT_ENOUGH_MESSAGES: &str = "Not enough messages to compact";
643    /// Error message for user abort
644    pub const ERROR_MESSAGE_USER_ABORT: &str = "User aborted compaction";
645}
646
647/// Post-compact restore state — tracks recently accessed files for restoration
648#[derive(Debug, Clone, Default)]
649pub struct FileReadState {
650    /// Maps file path → (content, access order index)
651    entries: std::collections::HashMap<String, (String, u64)>,
652    /// Monotonic counter for recency tracking
653    counter: u64,
654}
655
656impl FileReadState {
657    pub fn new() -> Self {
658        Self::default()
659    }
660
661    /// Record a file read. More recent reads get higher priority for restore.
662    pub fn record(&mut self, path: String, content: String) {
663        self.counter += 1;
664        self.entries.insert(path, (content, self.counter));
665    }
666
667    /// Get the most recently accessed files, limited to max_files.
668    /// Skips files whose paths are already in preserved_read_paths.
669    pub fn recent_files(
670        &self,
671        max_files: usize,
672        preserved_read_paths: &std::collections::HashSet<String>,
673    ) -> Vec<(String, String)> {
674        let mut entries: Vec<(&String, &(String, u64))> = self.entries.iter().collect();
675        // Sort by recency (highest counter = most recent)
676        entries.sort_by(|a, b| b.1.1.cmp(&a.1.1));
677        entries
678            .into_iter()
679            .filter_map(|(path, (content, _))| {
680                if preserved_read_paths.contains(path.as_str()) {
681                    None
682                } else if should_exclude_from_restore(path) {
683                    None
684                } else {
685                    Some((path.clone(), content.clone()))
686                }
687            })
688            .take(max_files)
689            .collect()
690    }
691}
692
693/// Paths excluded from post-compact restore (plan files, memory files, CLAUDE.md variants)
694fn should_exclude_from_restore(path: &str) -> bool {
695    let lower = path.to_lowercase();
696    // Exclude AI.md / CLAUDE.md variants
697    if lower.ends_with("ai.md") || lower.ends_with("claude.md") {
698        return true;
699    }
700    // Exclude memory files
701    if lower.contains(".ai/memory/") || lower.contains(".claude/memory/") {
702        return true;
703    }
704    // Exclude plan files
705    if lower.contains("/plans/") {
706        return true;
707    }
708    false
709}
710
711/// Collect file paths from Read tool results in preserved messages.
712/// Returns paths that are already visible and don't need restoration.
713pub fn collect_read_tool_file_paths(messages: &[Message]) -> std::collections::HashSet<String> {
714    let mut paths = std::collections::HashSet::new();
715    for msg in messages {
716        if msg.role != MessageRole::Assistant {
717            continue;
718        }
719        // Check if this is a Read tool call
720        if let Some(ref calls) = msg.tool_calls {
721            for call in calls {
722                if call.name == "Read" {
723                    if let Some(path) = call.arguments.get("file_path").and_then(|p| p.as_str()) {
724                        paths.insert(path.to_string());
725                    }
726                }
727            }
728        }
729    }
730    paths
731}
732
733/// SKILL_TRUNCATION_MARKER appended when a skill is truncated for post-compact restore.
734pub const SKILL_TRUNCATION_MARKER: &str =
735    "\n\n[... skill content truncated for compaction; use Read on the skill path if you need the full text]";
736
737/// Truncate content to roughly max_tokens, keeping the head.
738/// rough_token_count_estimation uses ~4 chars/token, so char budget = max_tokens * 4.
739pub fn truncate_to_tokens(content: &str, max_tokens: u32) -> String {
740    if rough_token_count_estimation_for_content(content) <= max_tokens as usize {
741        return content.to_string();
742    }
743    let char_budget = (max_tokens as usize).saturating_sub(SKILL_TRUNCATION_MARKER.len())
744        * 4
745        .min(content.len());
746    format!("{}{}", &content[..char_budget], SKILL_TRUNCATION_MARKER)
747}
748
749/// Post-compact file restore result
750pub struct PostCompactRestore {
751    /// Attachment messages for recently read files
752    pub file_attachments: Vec<Message>,
753    /// Attachment messages for invoked skills
754    pub skill_attachments: Vec<Message>,
755}
756
757/// Create post-compact file restore attachments.
758///
759/// Reads the most recently accessed files that fit within the token budget
760/// and returns them as attachment messages to re-inject after compaction.
761pub fn create_post_compact_file_attachments(
762    file_state: &FileReadState,
763    preserved_messages: &[Message],
764    max_files: usize,
765) -> Vec<Message> {
766    let preserved_paths = collect_read_tool_file_paths(preserved_messages);
767    let recent = file_state.recent_files(max_files, &preserved_paths);
768
769    let mut attachments = Vec::new();
770    let mut used_tokens: usize = 0;
771
772    for (path, content) in recent {
773        let truncated = truncate_to_tokens(&content, POST_COMPACT_MAX_TOKENS_PER_FILE);
774        let attachment = create_file_restore_attachment(&path, &truncated);
775        let tokens = rough_token_count_estimation_for_content(
776            &serde_json::to_string(&attachment).unwrap_or_default(),
777        );
778        if used_tokens + tokens <= POST_COMPACT_TOKEN_BUDGET as usize {
779            used_tokens += tokens;
780            attachments.push(attachment);
781        }
782    }
783    attachments
784}
785
786/// Create a single file restore attachment message
787fn create_file_restore_attachment(path: &str, content: &str) -> Message {
788    Message {
789        role: MessageRole::User,
790        content: format!(
791            "<post-compact-file-restore>\nFile: {}\n```\n{}\n```\n</post-compact-file-restore>",
792            path, content
793        ),
794        attachments: None,
795        tool_call_id: None,
796        tool_calls: None,
797        is_error: None,
798        is_meta: Some(true),
799        is_api_error_message: None,
800        error_details: None,
801        uuid: None,
802    }
803}
804
805/// Create post-compact skill restore attachments.
806///
807/// Takes a list of (skill_name, skill_content) pairs and creates attachment
808/// messages within the skills token budget.
809pub fn create_post_compact_skill_attachments(
810    skills: &[(String, String)],
811) -> Vec<Message> {
812    let mut attachments = Vec::new();
813    let mut used_tokens: usize = 0;
814
815    for (name, content) in skills {
816        let truncated = truncate_to_tokens(content, POST_COMPACT_MAX_TOKENS_PER_SKILL);
817        let attachment = create_skill_restore_attachment(name, &truncated);
818        let tokens = rough_token_count_estimation_for_content(
819            &serde_json::to_string(&attachment).unwrap_or_default(),
820        );
821        if used_tokens + tokens <= POST_COMPACT_SKILLS_TOKEN_BUDGET as usize {
822            used_tokens += tokens;
823            attachments.push(attachment);
824        }
825    }
826    attachments
827}
828
829/// Create a single skill restore attachment message
830fn create_skill_restore_attachment(name: &str, content: &str) -> Message {
831    Message {
832        role: MessageRole::User,
833        content: format!(
834            "<post-compact-skill-restore>\nSkill: {}\n```\n{}\n```\n</post-compact-skill-restore>",
835            name, content
836        ),
837        attachments: None,
838        tool_call_id: None,
839        tool_calls: None,
840        is_error: None,
841        is_meta: Some(true),
842        is_api_error_message: None,
843        error_details: None,
844        uuid: None,
845    }
846}
847
848#[cfg(test)]
849mod post_compact_tests {
850    use super::*;
851
852    #[test]
853    fn test_file_read_state_records_and_retrieves() {
854        let mut state = FileReadState::new();
855        state.record("/a.txt".to_string(), "content a".to_string());
856        state.record("/b.txt".to_string(), "content b".to_string());
857        let recent = state.recent_files(1, &std::collections::HashSet::new());
858        assert_eq!(recent.len(), 1);
859        assert_eq!(recent[0].0, "/b.txt"); // most recent
860    }
861
862    #[test]
863    fn test_file_read_state_skips_preserved() {
864        let mut state = FileReadState::new();
865        state.record("/a.txt".to_string(), "content a".to_string());
866        state.record("/b.txt".to_string(), "content b".to_string());
867        let mut preserved = std::collections::HashSet::new();
868        preserved.insert("/a.txt".to_string());
869        let recent = state.recent_files(5, &preserved);
870        assert_eq!(recent.len(), 1);
871        assert_eq!(recent[0].0, "/b.txt");
872    }
873
874    #[test]
875    fn test_should_exclude_from_restore() {
876        assert!(should_exclude_from_restore("/home/user/.ai/ai.md"));
877        assert!(should_exclude_from_restore("/home/user/.ai/memory/user.md"));
878        assert!(should_exclude_from_restore("/home/user/.claude/memory/feedback.md"));
879        assert!(should_exclude_from_restore("/home/user/.claude/plans/my-plan.md"));
880        assert!(!should_exclude_from_restore("/home/user/src/main.rs"));
881        assert!(!should_exclude_from_restore("/home/user/Cargo.toml"));
882    }
883
884    #[test]
885    fn test_truncate_to_tokens_no_truncation() {
886        let content = "short content";
887        assert_eq!(truncate_to_tokens(content, 100), "short content");
888    }
889
890    #[test]
891    fn test_truncate_to_tokens_truncates() {
892        let content = "a".repeat(10_000);
893        let truncated = truncate_to_tokens(&content, 10);
894        assert!(truncated.contains(SKILL_TRUNCATION_MARKER));
895        assert!(truncated.len() < content.len());
896    }
897
898    #[test]
899    fn test_collect_read_tool_file_paths() {
900        let messages = vec![Message {
901            role: MessageRole::Assistant,
902            content: "reading file".to_string(),
903            attachments: None,
904            tool_call_id: None,
905            tool_calls: Some(vec![ToolCall {
906                id: "t1".to_string(),
907                r#type: "function".to_string(),
908                name: "Read".to_string(),
909                arguments: serde_json::json!({"file_path": "/foo/bar.txt"}),
910            }]),
911            is_error: None,
912            is_meta: None,
913            is_api_error_message: None,
914            error_details: None,
915            uuid: None,
916        }];
917        let paths = collect_read_tool_file_paths(&messages);
918        assert!(paths.contains("/foo/bar.txt"));
919    }
920
921    #[test]
922    fn test_collect_read_tool_file_paths_skips_non_read() {
923        let messages = vec![Message {
924            role: MessageRole::Assistant,
925            content: "running bash".to_string(),
926            attachments: None,
927            tool_call_id: None,
928            tool_calls: Some(vec![ToolCall {
929                id: "t1".to_string(),
930                r#type: "function".to_string(),
931                name: "Bash".to_string(),
932                arguments: serde_json::json!({"command": "ls"}),
933            }]),
934            is_error: None,
935            is_meta: None,
936            is_api_error_message: None,
937            error_details: None,
938            uuid: None,
939        }];
940        let paths = collect_read_tool_file_paths(&messages);
941        assert!(paths.is_empty());
942    }
943
944    #[test]
945    fn test_create_post_compact_file_attachments() {
946        let mut state = FileReadState::new();
947        state.record("/a.txt".to_string(), "a".repeat(100).to_string());
948        state.record("/b.txt".to_string(), "b".repeat(100).to_string());
949        let attachments = create_post_compact_file_attachments(&state, &[], 5);
950        assert_eq!(attachments.len(), 2);
951        assert!(attachments[0].is_meta == Some(true));
952        assert!(attachments[0].content.contains("post-compact-file-restore"));
953    }
954
955    #[test]
956    fn test_create_post_compact_skill_attachments() {
957        let skills = vec![("my-skill".to_string(), "skill content here".to_string())];
958        let attachments = create_post_compact_skill_attachments(&skills);
959        assert_eq!(attachments.len(), 1);
960        assert!(attachments[0].content.contains("my-skill"));
961        assert!(attachments[0].content.contains("post-compact-skill-restore"));
962    }
963
964    #[test]
965    fn test_post_compact_restore_token_budget() {
966        let mut state = FileReadState::new();
967        // Create large files that exceed budget
968        for i in 0..20 {
969            state.record(
970                format!("/file_{}.txt", i),
971                "x".repeat(100_000), // Each file is large
972            );
973        }
974        let attachments = create_post_compact_file_attachments(&state, &[], 5);
975        // Should be limited by budget
976        assert!(!attachments.is_empty());
977        assert!(attachments.len() <= 5);
978        // Total tokens should be within budget
979        let total_tokens: usize = attachments
980            .iter()
981            .map(|a| rough_token_count_estimation_for_content(&serde_json::to_string(a).unwrap_or_default()))
982            .sum();
983        assert!(total_tokens <= POST_COMPACT_TOKEN_BUDGET as usize);
984    }
985}
ai_agent/compact.rs

ai_agent/
compact.rs