oxi_ai/
compaction.rs

1//! Context compaction for long conversations
2//!
3//! This module provides functionality to compact conversation history when it
4//! becomes too large, using the LLM itself to summarize older messages.
5
6use crate::high_level::complete;
7use crate::high_level::tokens::estimate as estimate_tokens;
8use crate::{
9    Api, AssistantMessage, ContentBlock, Context, Message, Model, Provider, StreamOptions,
10    TextContent, UserMessage,
11};
12
13/// Safely truncate a string to a maximum number of characters, appending "..." if truncated.
14fn safe_truncate(s: &str, max_chars: usize) -> String {
15    if s.len() <= max_chars {
16        return s.to_string();
17    }
18    let boundary = s
19        .char_indices()
20        .take_while(|(i, _)| *i <= max_chars)
21        .last()
22        .map(|(i, c)| i + c.len_utf8())
23        .unwrap_or(0);
24    format!("{}...", &s[..boundary])
25}
26
27/// Generate a concise summary of the last N conversation messages.
28///
29/// Returns a string summarizing key topics and decisions without
30/// requiring a full compaction step.
31pub fn generate_branch_summary(messages: &[Message], n: usize) -> String {
32    if messages.is_empty() {
33        return "(empty conversation)".to_string();
34    }
35
36    let last_n: Vec<_> = if n > 0 {
37        messages.iter().rev().take(n).collect()
38    } else {
39        messages.iter().collect()
40    };
41
42    let mut topics = Vec::new();
43    let mut decisions = Vec::new();
44
45    for msg in last_n.iter().rev() {
46        let role = match msg {
47            Message::User(_) => "user",
48            Message::Assistant(_) => "assistant",
49            Message::ToolResult(_) => "tool",
50        };
51        let content = msg.text_content().unwrap_or_default();
52        let preview = safe_truncate(&content, 120);
53
54        // Detect code/file references
55        if content.contains("created file") || content.contains("edited file") {
56            topics.push("file modifications".to_string());
57        }
58        if content.contains("implemented") || content.contains("added feature") {
59            topics.push("feature implementation".to_string());
60        }
61        if content.contains("decided") || content.contains("chose") || content.contains("agreed") {
62            decisions.push(preview);
63        }
64        if content.contains("search") || content.contains("debug") || content.contains("fix") {
65            topics.push(format!("inquiry/analysis by {}", role));
66        }
67    }
68
69    // Deduplicate topics
70    topics.dedup();
71    decisions.dedup();
72
73    let summary = if topics.is_empty() && decisions.is_empty() {
74        // Fallback: just the last message preview
75        messages
76            .last()
77            .and_then(|m| m.text_content().ok())
78            .map(|c| safe_truncate(&c, 200))
79            .unwrap_or_else(|| "(no content)".to_string())
80    } else {
81        let mut parts = Vec::new();
82        if !topics.is_empty() {
83            parts.push(format!("Topics: {}", topics.join(", ")));
84        }
85        if !decisions.is_empty() {
86            parts.push(format!("Decisions: {}", decisions.join("; ")));
87        }
88        parts.join(" | ")
89    };
90
91    format!("[Branch summary of {} msgs] {}", messages.len(), summary)
92}
93
94use async_trait::async_trait;
95use chrono::{DateTime, Utc};
96use serde::{Deserialize, Serialize};
97use std::sync::Arc;
98use std::time::Duration;
99
100/// Compaction configuration for LLM-based compaction
101#[derive(Debug, Clone)]
102pub struct CompactionConfig {
103    /// How many recent messages to always keep (not compacted)
104    pub keep_recent: usize,
105    /// Maximum number of old messages to include in one summarization batch
106    pub max_batch: usize,
107    /// Target compaction ratio (0.0 to 1.0) - e.g., 0.5 means reduce to 50%
108    pub target_ratio: f32,
109    /// Maximum tokens for the summary response
110    pub summary_max_tokens: usize,
111    /// Temperature for summarization (lower = more focused)
112    pub temperature: f32,
113    /// Timeout for LLM compaction requests
114    pub timeout: Duration,
115    /// Custom instruction for the summarizer
116    pub custom_instruction: Option<String>,
117}
118
119impl CompactionConfig {
120    /// Create a default compaction configuration
121    pub fn new() -> Self {
122        Self {
123            keep_recent: 4,
124            max_batch: 20,
125            target_ratio: 0.5,
126            summary_max_tokens: 1024,
127            temperature: 0.3,
128            timeout: Duration::from_secs(60),
129            custom_instruction: None,
130        }
131    }
132
133    /// Set how many recent messages to always keep
134    pub fn with_keep_recent(mut self, count: usize) -> Self {
135        self.keep_recent = count;
136        self
137    }
138
139    /// Set maximum batch size for summarization
140    pub fn with_max_batch(mut self, count: usize) -> Self {
141        self.max_batch = count;
142        self
143    }
144
145    /// Set target compaction ratio (0.0 to 1.0)
146    pub fn with_target_ratio(mut self, ratio: f32) -> Self {
147        self.target_ratio = ratio.clamp(0.1, 0.9);
148        self
149    }
150
151    /// Set maximum tokens for summary
152    pub fn with_summary_max_tokens(mut self, tokens: usize) -> Self {
153        self.summary_max_tokens = tokens;
154        self
155    }
156
157    /// Set temperature for summarization
158    pub fn with_temperature(mut self, temp: f32) -> Self {
159        self.temperature = temp.clamp(0.0, 1.0);
160        self
161    }
162
163    /// Set timeout for LLM requests
164    pub fn with_timeout(mut self, timeout: Duration) -> Self {
165        self.timeout = timeout;
166        self
167    }
168
169    /// Set custom instruction for the summarizer
170    pub fn with_custom_instruction(mut self, instruction: impl Into<String>) -> Self {
171        self.custom_instruction = Some(instruction.into());
172        self
173    }
174}
175
176impl Default for CompactionConfig {
177    fn default() -> Self {
178        Self::new()
179    }
180}
181
182/// Metadata about a compaction operation
183#[derive(Debug, Clone, Serialize, Deserialize)]
184pub struct CompactionMetadata {
185    /// Estimated token count before compaction
186    pub original_tokens: usize,
187    /// Estimated token count after compaction
188    pub compacted_tokens: usize,
189    /// Number of messages that were compacted
190    pub messages_compacted: usize,
191    /// Number of messages kept
192    pub messages_kept: usize,
193    /// Timestamp of compaction
194    pub timestamp: DateTime<Utc>,
195    /// Target ratio used
196    pub target_ratio: f32,
197    /// Actual compaction ratio achieved
198    pub actual_ratio: f32,
199    /// Whether the operation was successful
200    pub success: bool,
201    /// Error message if the operation failed
202    pub error: Option<String>,
203}
204
205impl CompactionMetadata {
206    /// Create new metadata for a successful compaction
207    pub fn new(
208        original_tokens: usize,
209        compacted_tokens: usize,
210        messages_compacted: usize,
211        messages_kept: usize,
212        target_ratio: f32,
213    ) -> Self {
214        let actual_ratio = if original_tokens > 0 {
215            compacted_tokens as f32 / original_tokens as f32
216        } else {
217            1.0
218        };
219
220        Self {
221            original_tokens,
222            compacted_tokens,
223            messages_compacted,
224            messages_kept,
225            timestamp: Utc::now(),
226            target_ratio,
227            actual_ratio,
228            success: true,
229            error: None,
230        }
231    }
232
233    /// Create metadata for a failed compaction
234    pub fn failed(
235        original_tokens: usize,
236        messages_compacted: usize,
237        target_ratio: f32,
238        error: impl Into<String>,
239    ) -> Self {
240        Self {
241            original_tokens,
242            compacted_tokens: original_tokens,
243            messages_compacted,
244            messages_kept: 0,
245            timestamp: Utc::now(),
246            target_ratio,
247            actual_ratio: 1.0,
248            success: false,
249            error: Some(error.into()),
250        }
251    }
252
253    /// Get the compression factor (how much the context was reduced)
254    pub fn compression_factor(&self) -> f32 {
255        if self.actual_ratio > 0.0 {
256            1.0 - self.actual_ratio
257        } else {
258            0.0
259        }
260    }
261
262    /// Get tokens saved from compaction
263    pub fn tokens_saved(&self) -> usize {
264        self.original_tokens.saturating_sub(self.compacted_tokens)
265    }
266}
267
268/// Result of context compaction
269#[derive(Debug, Clone)]
270pub struct CompactedContext {
271    /// Summary of the compacted messages
272    pub summary: String,
273    /// Messages that were kept (typically recent ones)
274    pub kept_messages: Vec<Message>,
275    /// Number of messages that were compacted
276    pub compacted_count: usize,
277    /// Metadata about the compaction operation
278    pub metadata: CompactionMetadata,
279}
280
281impl CompactedContext {
282    /// Create a new compacted context
283    pub fn new(
284        summary: String,
285        kept_messages: Vec<Message>,
286        compacted_count: usize,
287        metadata: CompactionMetadata,
288    ) -> Self {
289        Self {
290            summary,
291            kept_messages,
292            compacted_count,
293            metadata,
294        }
295    }
296
297    /// Get the summary text
298    pub fn summary(&self) -> &str {
299        &self.summary
300    }
301
302    /// Get kept messages count
303    pub fn kept_count(&self) -> usize {
304        self.kept_messages.len()
305    }
306
307    /// Get compacted messages count
308    pub fn compacted_count(&self) -> usize {
309        self.compacted_count
310    }
311
312    /// Get the compaction metadata
313    pub fn metadata(&self) -> &CompactionMetadata {
314        &self.metadata
315    }
316
317    /// Check if compaction was successful
318    pub fn is_success(&self) -> bool {
319        self.metadata.success
320    }
321}
322
323/// Compaction strategy determining when to compact
324#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
325pub enum CompactionStrategy {
326    /// Never compact context
327    Disabled,
328    /// Compact when context is at least this percentage full (0.0 to 1.0)
329    Threshold(f32),
330    /// Compact after every N turns
331    EveryNTurns(usize),
332    /// Compact when context exceeds this absolute token count
333    AbsoluteTokens(usize),
334}
335
336impl CompactionStrategy {
337    /// Check if compaction should happen based on strategy
338    ///
339    /// # Arguments
340    /// * `context_tokens` - Estimated token count of current context
341    /// * `context_window` - Total context window size
342    /// * `iteration` - Current iteration count
343    ///
344    /// # Returns
345    /// `true` if compaction should be triggered
346    pub fn should_compact(
347        &self,
348        context_tokens: usize,
349        context_window: usize,
350        iteration: usize,
351    ) -> bool {
352        match self {
353            CompactionStrategy::Disabled => false,
354            CompactionStrategy::Threshold(threshold) => {
355                if context_window == 0 {
356                    return false;
357                }
358                let usage = context_tokens as f32 / context_window as f32;
359                usage >= *threshold
360            }
361            CompactionStrategy::EveryNTurns(n) => iteration > 0 && iteration.is_multiple_of(*n),
362            CompactionStrategy::AbsoluteTokens(max_tokens) => context_tokens >= *max_tokens,
363        }
364    }
365}
366
367impl Default for CompactionStrategy {
368    fn default() -> Self {
369        CompactionStrategy::Threshold(0.8)
370    }
371}
372
373/// Error type for compaction operations
374#[derive(Debug, Clone)]
375pub enum CompactionError {
376    /// Compaction request to LLM failed
377    LlmError(String),
378    /// No messages to compact
379    NoMessagesToCompact,
380    /// Too few messages to compact (need at least keep_recent + 1)
381    TooFewMessages { total: usize, keep_recent: usize },
382    /// Compaction was disabled
383    CompactionDisabled,
384    /// Context window not available
385    NoContextWindow,
386}
387
388impl std::fmt::Display for CompactionError {
389    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
390        match self {
391            CompactionError::LlmError(msg) => write!(f, "LLM compaction failed: {}", msg),
392            CompactionError::NoMessagesToCompact => write!(f, "No messages to compact"),
393            CompactionError::TooFewMessages { total, keep_recent } => {
394                write!(
395                    f,
396                    "Not enough messages ({}) to compact (need at least {} for keep_recent)",
397                    total,
398                    keep_recent + 1
399                )
400            }
401            CompactionError::CompactionDisabled => write!(f, "Compaction is disabled"),
402            CompactionError::NoContextWindow => write!(f, "Context window not configured"),
403        }
404    }
405}
406
407impl std::error::Error for CompactionError {}
408
409/// Trait for context compaction implementations
410#[async_trait]
411pub trait Compactor: Send + Sync {
412    /// Compact messages, returning a summary and kept messages
413    async fn compact(
414        &self,
415        messages: &[Message],
416        instruction: Option<&str>,
417    ) -> std::result::Result<CompactedContext, CompactionError>;
418
419    /// Estimate the token count of messages
420    fn estimate_tokens(&self, messages: &[Message]) -> usize {
421        messages
422            .iter()
423            .map(|msg| estimate_tokens(&msg.text_content().unwrap_or_default()))
424            .sum()
425    }
426}
427
428/// LLM-based compactor that uses the model itself to summarize
429pub struct LlmCompactor {
430    model: Model,
431    _provider: Arc<dyn Provider>,
432    config: CompactionConfig,
433}
434
435impl LlmCompactor {
436    /// Create a new LLM compactor with default configuration
437    pub fn new(model: Model, provider: Arc<dyn Provider>) -> Self {
438        Self {
439            model,
440            _provider: provider,
441            config: CompactionConfig::new(),
442        }
443    }
444
445    /// Create a new LLM compactor with custom configuration
446    pub fn with_config(
447        model: Model,
448        provider: Arc<dyn Provider>,
449        config: CompactionConfig,
450    ) -> Self {
451        Self {
452            model,
453            _provider: provider,
454            config,
455        }
456    }
457
458    /// Set how many recent messages to always keep
459    pub fn with_keep_recent(mut self, count: usize) -> Self {
460        self.config.keep_recent = count;
461        self
462    }
463
464    /// Set maximum batch size for summarization
465    pub fn with_max_batch(mut self, count: usize) -> Self {
466        self.config.max_batch = count;
467        self
468    }
469
470    /// Set target compaction ratio
471    pub fn with_target_ratio(mut self, ratio: f32) -> Self {
472        self.config.target_ratio = ratio.clamp(0.1, 0.9);
473        self
474    }
475
476    /// Build the summarization prompt
477    fn build_summarize_prompt(&self, messages: &[Message], instruction: Option<&str>) -> String {
478        let mut prompt = String::new();
479
480        prompt.push_str("Summarize the following conversation concisely. ");
481        prompt.push_str("Capture the key points, decisions, and any ongoing tasks or context.\n\n");
482
483        if let Some(instr) = instruction {
484            prompt.push_str(&format!("Focus areas: {}\n\n", instr));
485        } else if let Some(ref custom_instr) = self.config.custom_instruction {
486            prompt.push_str(&format!("Focus areas: {}\n\n", custom_instr));
487        }
488
489        prompt.push_str("## Conversation to summarize:\n");
490
491        for (i, msg) in messages.iter().enumerate() {
492            let role = match msg {
493                Message::User(_) => "User",
494                Message::Assistant(_) => "Assistant",
495                Message::ToolResult(_) => "Tool",
496            };
497            let content = msg.text_content().unwrap_or_default();
498            let content_preview = safe_truncate(&content, 500);
499            prompt.push_str(&format!("[{} {}]: {}\n", role, i + 1, content_preview));
500        }
501
502        prompt.push_str("\n## Summary:\n");
503        prompt
504            .push_str("Provide a concise summary that captures the essence of this conversation.");
505
506        prompt
507    }
508
509    /// Attempt to compact using a fallback strategy if LLM fails
510    async fn compact_with_fallback(
511        &self,
512        old_messages: &[Message],
513        recent_messages: &[Message],
514        instruction: Option<&str>,
515    ) -> std::result::Result<CompactedContext, CompactionError> {
516        // Try LLM-based summarization first
517        match self.summarize_with_llm(old_messages, instruction).await {
518            Ok(summary) => {
519                // Build the summary message
520                let mut summary_msg =
521                    AssistantMessage::new(Api::AnthropicMessages, "compactor", &self.model.id);
522                summary_msg.content = vec![ContentBlock::Text(TextContent::new(format!(
523                    "[Previous conversation summarized: {}]",
524                    summary
525                )))];
526
527                // Build final compacted context
528                let mut kept = vec![Message::Assistant(summary_msg)];
529                kept.extend(recent_messages.iter().cloned());
530
531                let original_tokens = self.estimate_tokens(old_messages);
532                let compacted_tokens = self.estimate_tokens(&kept);
533                let kept_len = kept.len();
534
535                Ok(CompactedContext::new(
536                    summary,
537                    kept,
538                    old_messages.len(),
539                    CompactionMetadata::new(
540                        original_tokens,
541                        compacted_tokens,
542                        old_messages.len(),
543                        kept_len,
544                        self.config.target_ratio,
545                    ),
546                ))
547            }
548            Err(llm_err) => {
549                // Fallback: simple truncation with key topics
550                self.compact_fallback(old_messages, recent_messages)
551                    .await
552                    .map_err(|_| CompactionError::LlmError(llm_err.to_string()))
553            }
554        }
555    }
556
557    /// Summarize messages using the LLM
558    async fn summarize_with_llm(
559        &self,
560        messages: &[Message],
561        instruction: Option<&str>,
562    ) -> std::result::Result<String, CompactionError> {
563        let prompt = self.build_summarize_prompt(messages, instruction);
564
565        let mut context = Context::new();
566        context.set_system_prompt(
567            "You are a helpful assistant that summarizes conversations concisely.",
568        );
569        context.add_message(Message::User(UserMessage::new(prompt)));
570
571        let options = StreamOptions {
572            temperature: Some(self.config.temperature as f64),
573            max_tokens: Some(self.config.summary_max_tokens),
574            ..Default::default()
575        };
576
577        let summary_message = complete(&self.model, &context, Some(options))
578            .await
579            .map_err(|e| CompactionError::LlmError(e.to_string()))?;
580
581        Ok(summary_message.text_content())
582    }
583
584    /// Fallback compaction when LLM fails - simple truncation with key preservation
585    async fn compact_fallback(
586        &self,
587        old_messages: &[Message],
588        recent_messages: &[Message],
589    ) -> std::result::Result<CompactedContext, CompactionError> {
590        // Simple fallback: keep first and last message, summarize in between
591        let mut summary_parts = Vec::new();
592
593        if old_messages.len() > 2 {
594            // Keep first message's topic
595            if let Some(first) = old_messages.first() {
596                let content = first.text_content().unwrap_or_default();
597                let preview = safe_truncate(&content, 200);
598                summary_parts.push(format!("Started discussing: {}", preview));
599            }
600
601            // Keep last message (likely the most relevant recent context)
602            if let Some(last) = old_messages.last() {
603                let content = last.text_content().unwrap_or_default();
604                let preview = safe_truncate(&content, 200);
605                summary_parts.push(format!("Ended with: {}", preview));
606            }
607
608            summary_parts.push(format!(
609                "({} messages omitted)",
610                old_messages.len().saturating_sub(2)
611            ));
612        } else if !old_messages.is_empty() {
613            // Just preserve first message content
614            if let Some(msg) = old_messages.first() {
615                let content = msg.text_content().unwrap_or_default();
616                summary_parts.push(format!("Conversation started: {}", content));
617            }
618        }
619
620        let summary = summary_parts.join(" ");
621
622        let mut summary_msg =
623            AssistantMessage::new(Api::AnthropicMessages, "compactor", &self.model.id);
624        summary_msg.content = vec![ContentBlock::Text(TextContent::new(format!(
625            "[Previous conversation summary: {}]",
626            summary
627        )))];
628
629        let mut kept = vec![Message::Assistant(summary_msg)];
630        kept.extend(recent_messages.iter().cloned());
631
632        let original_tokens = self.estimate_tokens(old_messages);
633        let compacted_tokens = self.estimate_tokens(&kept);
634        let kept_len = kept.len();
635
636        Ok(CompactedContext::new(
637            summary,
638            kept,
639            old_messages.len(),
640            CompactionMetadata::new(
641                original_tokens,
642                compacted_tokens,
643                old_messages.len(),
644                kept_len,
645                self.config.target_ratio,
646            ),
647        ))
648    }
649}
650
651#[async_trait]
652impl Compactor for LlmCompactor {
653    async fn compact(
654        &self,
655        messages: &[Message],
656        instruction: Option<&str>,
657    ) -> std::result::Result<CompactedContext, CompactionError> {
658        // Check minimum requirements
659        if messages.is_empty() {
660            return Err(CompactionError::NoMessagesToCompact);
661        }
662
663        if messages.len() <= self.config.keep_recent {
664            // Not enough messages to compact, return as-is with zero compaction
665            let original_tokens = self.estimate_tokens(messages);
666            return Ok(CompactedContext::new(
667                String::new(),
668                messages.to_vec(),
669                0,
670                CompactionMetadata::new(
671                    original_tokens,
672                    original_tokens,
673                    0,
674                    messages.len(),
675                    self.config.target_ratio,
676                ),
677            ));
678        }
679
680        // Split into old messages (to compact) and recent messages (to keep)
681        let keep_count = self.config.keep_recent.min(messages.len());
682        let old_messages: Vec<Message> = messages[..messages.len() - keep_count].to_vec();
683        let recent_messages: Vec<Message> = messages[messages.len() - keep_count..].to_vec();
684
685        if old_messages.is_empty() {
686            return Err(CompactionError::NoMessagesToCompact);
687        }
688
689        // Handle LLM failure gracefully
690        self.compact_with_fallback(&old_messages, &recent_messages, instruction)
691            .await
692    }
693}
694
695/// Additional methods for LlmCompactor (not part of Compactor trait)
696impl LlmCompactor {
697    /// Summarize a conversation branch for comparison purposes.
698    ///
699    /// This is used when branching occurs and you want to understand
700    /// what changed compared to another branch (e.g., main).
701    pub async fn summarize_branch(
702        &self,
703        messages: &[Message],
704        branch_name: &str,
705    ) -> std::result::Result<String, CompactionError> {
706        if messages.is_empty() {
707            return Ok(format!("Branch '{}' is empty", branch_name));
708        }
709
710        let mut prompt = String::new();
711        prompt.push_str(&format!(
712            "Summarize the conversation branch '{}' concisely. ",
713            branch_name
714        ));
715        prompt.push_str("Focus on: what was discussed, decisions made, and current state.\n\n");
716
717        prompt.push_str("## Branch messages:\n");
718        for (i, msg) in messages.iter().enumerate() {
719            let role = match msg {
720                Message::User(_) => "User",
721                Message::Assistant(_) => "Assistant",
722                Message::ToolResult(_) => "Tool",
723            };
724            let content = msg.text_content().unwrap_or_default();
725            let content_preview = safe_truncate(&content, 300);
726            prompt.push_str(&format!("[{} {}]: {}\n", role, i + 1, content_preview));
727        }
728
729        prompt.push_str("\n## Summary (be concise):\n");
730
731        // Use LLM to generate summary
732        let mut context = Context::new();
733        context.set_system_prompt(
734            "You are a helpful assistant that summarizes conversation branches. ",
735        );
736        context.add_message(Message::User(UserMessage::new(prompt)));
737
738        let options = StreamOptions {
739            temperature: Some(0.3),
740            max_tokens: Some(512),
741            ..Default::default()
742        };
743
744        let summary_message = complete(&self.model, &context, Some(options))
745            .await
746            .map_err(|e| CompactionError::LlmError(e.to_string()))?;
747
748        Ok(summary_message.text_content())
749    }
750}
751
752/// Context manager that handles compaction automatically
753pub struct CompactionManager {
754    strategy: CompactionStrategy,
755    compactor: Option<Arc<dyn Compactor>>,
756    context_window: usize,
757    config: CompactionConfig,
758}
759
760impl CompactionManager {
761    /// Create a new compaction manager
762    pub fn new(strategy: CompactionStrategy, context_window: usize) -> Self {
763        Self {
764            strategy,
765            compactor: None,
766            context_window,
767            config: CompactionConfig::new(),
768        }
769    }
770
771    /// Create a new compaction manager with custom config
772    pub fn with_config(
773        strategy: CompactionStrategy,
774        context_window: usize,
775        config: CompactionConfig,
776    ) -> Self {
777        Self {
778            strategy,
779            compactor: None,
780            context_window,
781            config,
782        }
783    }
784
785    /// Set the compactor to use
786    pub fn with_compactor<C: Compactor + 'static>(mut self, compactor: Arc<C>) -> Self {
787        self.compactor = Some(compactor);
788        self
789    }
790
791    /// Set the compactor from a trait object
792    pub fn set_compactor(&mut self, compactor: Arc<dyn Compactor>) {
793        self.compactor = Some(compactor);
794    }
795
796    /// Check if compaction should be triggered
797    pub fn should_compact(&self, context_tokens: usize, iteration: usize) -> bool {
798        self.strategy
799            .should_compact(context_tokens, self.context_window, iteration)
800    }
801
802    /// Get the current strategy
803    pub fn strategy(&self) -> &CompactionStrategy {
804        &self.strategy
805    }
806
807    /// Get the compaction configuration
808    pub fn config(&self) -> &CompactionConfig {
809        &self.config
810    }
811
812    /// Set compaction configuration
813    pub fn set_config(&mut self, config: CompactionConfig) {
814        self.config = config;
815    }
816
817    /// Compact the given messages if appropriate
818    pub async fn compact_if_needed(
819        &self,
820        messages: &[Message],
821        instruction: Option<&str>,
822        context_tokens: usize,
823        iteration: usize,
824    ) -> std::result::Result<Option<CompactedContext>, CompactionError> {
825        if !self.should_compact(context_tokens, iteration) {
826            return Ok(None);
827        }
828
829        let compactor = match &self.compactor {
830            Some(c) => c,
831            None => return Err(CompactionError::CompactionDisabled),
832        };
833
834        let result = compactor.compact(messages, instruction).await?;
835        Ok(Some(result))
836    }
837
838    /// Force compaction regardless of strategy
839    pub async fn compact_now(
840        &self,
841        messages: &[Message],
842        instruction: Option<&str>,
843    ) -> std::result::Result<CompactedContext, CompactionError> {
844        let compactor = match &self.compactor {
845            Some(c) => c,
846            None => return Err(CompactionError::CompactionDisabled),
847        };
848
849        compactor.compact(messages, instruction).await
850    }
851
852    /// Get estimated token count for messages
853    pub fn estimate_tokens(&self, messages: &[Message]) -> usize {
854        messages
855            .iter()
856            .map(|msg| estimate_tokens(&msg.text_content().unwrap_or_default()))
857            .sum()
858    }
859}
860
861impl Default for CompactionManager {
862    fn default() -> Self {
863        Self::new(CompactionStrategy::default(), 128_000)
864    }
865}
866
867// ============================================================================
868// Tests
869// ============================================================================
870
871#[cfg(test)]
872mod tests {
873    use super::*;
874
875    // Helper to create test user messages
876    fn make_user_message(content: &str) -> Message {
877        Message::user(content)
878    }
879
880    // Helper to create test assistant messages
881    fn make_assistant_message(content: &str) -> Message {
882        Message::Assistant({
883            let mut msg = AssistantMessage::new(Api::AnthropicMessages, "test", "test-model");
884            msg.content = vec![ContentBlock::Text(TextContent::new(content))];
885            msg
886        })
887    }
888
889    // Helper to create a test model
890    fn make_test_model() -> Model {
891        Model::new(
892            "test-model",
893            "Test Model",
894            Api::AnthropicMessages,
895            "test",
896            "https://test.example.com",
897        )
898    }
899
900    #[test]
901    fn test_compaction_config_defaults() {
902        let config = CompactionConfig::new();
903        assert_eq!(config.keep_recent, 4);
904        assert_eq!(config.max_batch, 20);
905        assert!((config.target_ratio - 0.5).abs() < 0.001);
906        assert_eq!(config.summary_max_tokens, 1024);
907        assert!((config.temperature - 0.3).abs() < 0.001);
908    }
909
910    #[test]
911    fn test_compaction_config_builder_pattern() {
912        let config = CompactionConfig::new()
913            .with_keep_recent(10)
914            .with_max_batch(30)
915            .with_target_ratio(0.3)
916            .with_temperature(0.5);
917
918        assert_eq!(config.keep_recent, 10);
919        assert_eq!(config.max_batch, 30);
920        assert!((config.target_ratio - 0.3).abs() < 0.001);
921        assert!((config.temperature - 0.5).abs() < 0.001);
922    }
923
924    #[test]
925    fn test_compaction_config_ratio_clamping() {
926        // Test upper bound clamping
927        let config = CompactionConfig::new().with_target_ratio(1.5);
928        assert!((config.target_ratio - 0.9).abs() < 0.001);
929
930        // Test lower bound clamping
931        let config = CompactionConfig::new().with_target_ratio(-0.5);
932        assert!((config.target_ratio - 0.1).abs() < 0.001);
933    }
934
935    #[test]
936    fn test_compaction_metadata_success() {
937        let metadata = CompactionMetadata::new(
938            1000, // original_tokens
939            500,  // compacted_tokens
940            10,   // messages_compacted
941            5,    // messages_kept
942            0.5,  // target_ratio
943        );
944
945        assert!(metadata.success);
946        assert_eq!(metadata.original_tokens, 1000);
947        assert_eq!(metadata.compacted_tokens, 500);
948        assert_eq!(metadata.messages_compacted, 10);
949        assert_eq!(metadata.messages_kept, 5);
950        assert!((metadata.actual_ratio - 0.5).abs() < 0.001);
951        assert!((metadata.compression_factor() - 0.5).abs() < 0.001);
952        assert_eq!(metadata.tokens_saved(), 500);
953        assert!(metadata.error.is_none());
954    }
955
956    #[test]
957    fn test_compaction_metadata_failure() {
958        let metadata = CompactionError::LlmError("test error".to_string());
959
960        // Verify error message
961        assert!(metadata.to_string().contains("test error"));
962    }
963
964    #[test]
965    fn test_compaction_metadata_compression_factor() {
966        // Zero original tokens should result in 1.0 ratio
967        let metadata = CompactionMetadata::new(0, 0, 0, 0, 0.5);
968        assert!((metadata.actual_ratio - 1.0).abs() < 0.001);
969        assert!((metadata.compression_factor() - 0.0).abs() < 0.001);
970
971        // Full compression
972        let metadata = CompactionMetadata::new(1000, 100, 10, 5, 0.5);
973        assert!((metadata.compression_factor() - 0.9).abs() < 0.001);
974    }
975
976    #[test]
977    fn test_compaction_metadata_tokens_saved() {
978        // Normal case
979        let metadata = CompactionMetadata::new(1000, 400, 10, 5, 0.5);
980        assert_eq!(metadata.tokens_saved(), 600);
981
982        // No savings
983        let metadata = CompactionMetadata::new(1000, 1000, 0, 0, 0.5);
984        assert_eq!(metadata.tokens_saved(), 0);
985
986        // Compacted is larger than original (should not happen but should be safe)
987        let metadata = CompactionMetadata::new(500, 600, 5, 3, 0.5);
988        assert_eq!(metadata.tokens_saved(), 0); // saturating_sub
989    }
990
991    #[test]
992    fn test_compaction_strategy_disabled() {
993        let strategy = CompactionStrategy::Disabled;
994        assert!(!strategy.should_compact(100_000, 128_000, 5));
995        assert!(!strategy.should_compact(120_000, 128_000, 10));
996        assert!(!strategy.should_compact(0, 128_000, 1));
997    }
998
999    #[test]
1000    fn test_compaction_strategy_threshold() {
1001        let strategy = CompactionStrategy::Threshold(0.8);
1002
1003        // Below threshold (79%)
1004        assert!(!strategy.should_compact(100_000, 128_000, 1));
1005
1006        // At threshold (exactly 80%)
1007        assert!(strategy.should_compact(102_400, 128_000, 1));
1008
1009        // Above threshold (93%)
1010        assert!(strategy.should_compact(120_000, 128_000, 1));
1011
1012        // Zero context window should return false
1013        assert!(!strategy.should_compact(100_000, 0, 1));
1014    }
1015
1016    #[test]
1017    fn test_compaction_strategy_every_n_turns() {
1018        let strategy = CompactionStrategy::EveryNTurns(5);
1019
1020        // Before threshold iterations
1021        assert!(!strategy.should_compact(0, 128_000, 0));
1022        assert!(!strategy.should_compact(0, 128_000, 3));
1023        assert!(!strategy.should_compact(0, 128_000, 4));
1024
1025        // At threshold iterations
1026        assert!(strategy.should_compact(0, 128_000, 5));
1027        assert!(strategy.should_compact(0, 128_000, 10));
1028        assert!(strategy.should_compact(0, 128_000, 15));
1029
1030        // Not at threshold
1031        assert!(!strategy.should_compact(0, 128_000, 6));
1032        assert!(!strategy.should_compact(0, 128_000, 9));
1033    }
1034
1035    #[test]
1036    fn test_compaction_strategy_absolute_tokens() {
1037        let strategy = CompactionStrategy::AbsoluteTokens(100_000);
1038
1039        // Below threshold
1040        assert!(!strategy.should_compact(50_000, 128_000, 0));
1041        assert!(!strategy.should_compact(99_999, 128_000, 0));
1042
1043        // At threshold
1044        assert!(strategy.should_compact(100_000, 128_000, 0));
1045
1046        // Above threshold
1047        assert!(strategy.should_compact(150_000, 128_000, 0));
1048    }
1049
1050    #[test]
1051    fn test_compacted_context_basic() {
1052        let metadata = CompactionMetadata::new(1000, 500, 10, 5, 0.5);
1053        let ctx = CompactedContext::new(
1054            "Test summary".to_string(),
1055            vec![make_user_message("test")],
1056            10,
1057            metadata,
1058        );
1059
1060        assert_eq!(ctx.summary(), "Test summary");
1061        assert_eq!(ctx.kept_count(), 1);
1062        assert_eq!(ctx.compacted_count(), 10);
1063        assert!(ctx.is_success());
1064        assert_eq!(ctx.metadata().tokens_saved(), 500);
1065    }
1066
1067    #[test]
1068    fn test_compacted_context_with_empty_summary() {
1069        let metadata = CompactionMetadata::new(100, 100, 0, 2, 0.5);
1070        let ctx = CompactedContext::new(
1071            String::new(), // Empty summary
1072            vec![make_user_message("test1"), make_user_message("test2")],
1073            0,
1074            metadata,
1075        );
1076
1077        assert_eq!(ctx.summary(), "");
1078        assert_eq!(ctx.kept_count(), 2);
1079        assert_eq!(ctx.compacted_count(), 0);
1080    }
1081
1082    #[test]
1083    fn test_llm_compactor_config_builder() {
1084        // Test that LlmCompactor can be created and builder pattern works
1085        use crate::providers::OpenAiProvider;
1086        let provider = OpenAiProvider::new();
1087        let model = make_test_model();
1088        let compactor = LlmCompactor::new(model, Arc::new(provider))
1089            .with_keep_recent(6)
1090            .with_max_batch(25)
1091            .with_target_ratio(0.6);
1092
1093        assert!(compactor.config.keep_recent >= 4);
1094        assert!(compactor.config.max_batch >= 20);
1095    }
1096
1097    #[test]
1098    fn test_compaction_error_display() {
1099        let err = CompactionError::NoMessagesToCompact;
1100        assert_eq!(err.to_string(), "No messages to compact");
1101
1102        let err = CompactionError::TooFewMessages {
1103            total: 3,
1104            keep_recent: 5,
1105        };
1106        assert!(err.to_string().contains("3"));
1107        // The error message says "need at least keep_recent + 1", so with keep_recent=5 it shows 6
1108        assert!(err.to_string().contains("6"));
1109
1110        let err = CompactionError::CompactionDisabled;
1111        assert_eq!(err.to_string(), "Compaction is disabled");
1112
1113        let err = CompactionError::NoContextWindow;
1114        assert_eq!(err.to_string(), "Context window not configured");
1115
1116        let err = CompactionError::LlmError("API timeout".to_string());
1117        assert!(err.to_string().contains("API timeout"));
1118    }
1119
1120    #[test]
1121    fn test_compaction_manager_default() {
1122        let manager = CompactionManager::default();
1123        assert!(matches!(
1124            manager.strategy(),
1125            CompactionStrategy::Threshold(_)
1126        ));
1127        assert_eq!(manager.config().keep_recent, 4);
1128    }
1129
1130    #[test]
1131    fn test_compaction_manager_with_custom_strategy() {
1132        let strategy = CompactionStrategy::AbsoluteTokens(50_000);
1133        let manager = CompactionManager::new(strategy, 200_000);
1134
1135        // Should not compact below threshold
1136        assert!(!manager.should_compact(30_000, 0));
1137
1138        // Should compact above threshold
1139        assert!(manager.should_compact(60_000, 0));
1140    }
1141
1142    #[test]
1143    fn test_compaction_manager_with_config() {
1144        let config = CompactionConfig::new()
1145            .with_keep_recent(8)
1146            .with_target_ratio(0.4);
1147
1148        let manager =
1149            CompactionManager::with_config(CompactionStrategy::default(), 128_000, config);
1150
1151        assert_eq!(manager.config().keep_recent, 8);
1152        assert!((manager.config().target_ratio - 0.4).abs() < 0.001);
1153    }
1154
1155    #[test]
1156    fn test_compaction_manager_should_compact_integration() {
1157        let manager = CompactionManager::new(CompactionStrategy::Threshold(0.75), 100_000);
1158
1159        // Below threshold
1160        assert!(!manager.should_compact(70_000, 0));
1161
1162        // At threshold (75%)
1163        assert!(manager.should_compact(75_000, 0));
1164
1165        // Above threshold
1166        assert!(manager.should_compact(80_000, 0));
1167        assert!(manager.should_compact(100_000, 0));
1168    }
1169
1170    #[test]
1171    fn test_compaction_manager_no_compactor_set() {
1172        let manager = CompactionManager::new(CompactionStrategy::EveryNTurns(5), 128_000);
1173
1174        // should_compact with EveryNTurns(5) at iteration 5 should return true
1175        // (compact_if_needed would return Err when no compactor is set, but should_compact works)
1176        assert!(manager.should_compact(0, 5)); // iteration 5 triggers compaction
1177    }
1178
1179    #[test]
1180    fn test_token_estimation_helper() {
1181        use crate::providers::OpenAiProvider;
1182        let provider = OpenAiProvider::new();
1183        let model = make_test_model();
1184        let compactor = LlmCompactor::new(model, Arc::new(provider));
1185
1186        let messages = vec![
1187            make_user_message("Hello world, this is a test message."),
1188            make_assistant_message("This is a response with some content."),
1189        ];
1190
1191        let tokens = compactor.estimate_tokens(&messages);
1192        assert!(tokens > 0, "Should estimate tokens for messages");
1193    }
1194
1195    #[test]
1196    fn test_compaction_config_custom_instruction() {
1197        let config = CompactionConfig::new()
1198            .with_custom_instruction("Focus on code changes and technical decisions");
1199
1200        assert!(config.custom_instruction.is_some());
1201        assert!(config.custom_instruction.unwrap().contains("code changes"));
1202    }
1203
1204    #[test]
1205    fn test_compaction_metadata_timestamp_is_set() {
1206        let metadata = CompactionMetadata::new(1000, 500, 10, 5, 0.5);
1207        assert!(metadata.timestamp <= Utc::now());
1208    }
1209
1210    #[test]
1211    fn test_compaction_ratio_achievement() {
1212        // Simulate compaction that achieves target ratio
1213        let metadata = CompactionMetadata::new(1000, 500, 10, 5, 0.5);
1214        assert!((metadata.actual_ratio - 0.5).abs() < 0.001);
1215
1216        // Simulate compaction that exceeds target (more compression)
1217        let metadata = CompactionMetadata::new(1000, 300, 10, 5, 0.5);
1218        assert!((metadata.actual_ratio - 0.3).abs() < 0.001);
1219        assert!(metadata.compression_factor() > 0.5);
1220
1221        // Simulate compaction that doesn't meet target (less compression)
1222        let metadata = CompactionMetadata::new(1000, 700, 10, 5, 0.5);
1223        assert!((metadata.actual_ratio - 0.7).abs() < 0.001);
1224        assert!(metadata.compression_factor() < 0.5);
1225    }
1226
1227    #[test]
1228    fn test_compaction_manager_config_updates() {
1229        let mut manager = CompactionManager::default();
1230
1231        let new_config = CompactionConfig::new()
1232            .with_keep_recent(12)
1233            .with_target_ratio(0.3);
1234
1235        manager.set_config(new_config);
1236
1237        assert_eq!(manager.config().keep_recent, 12);
1238        assert!((manager.config().target_ratio - 0.3).abs() < 0.001);
1239    }
1240
1241    #[test]
1242    fn test_llm_compactor_has_summarize_branch() {
1243        // Verify that LlmCompactor has the summarize_branch method
1244        use crate::providers::OpenAiProvider;
1245        let provider = OpenAiProvider::new();
1246        let model = make_test_model();
1247        let compactor = LlmCompactor::new(model, Arc::new(provider));
1248
1249        // Just verify the method exists (runtime test would require async)
1250        let messages = vec![
1251            make_user_message("Test message 1"),
1252            make_assistant_message("Test response 1"),
1253            make_user_message("Test message 2"),
1254        ];
1255
1256        // The method exists and can be called (we can't test async in sync test)
1257        // We verify it compiles correctly
1258        let branch_name = "test-branch";
1259        // This is a compile-time check that the method exists
1260        let _future = compactor.summarize_branch(&messages, branch_name);
1261    }
1262
1263    #[test]
1264    fn test_summarize_branch_returns_error_on_llm_failure() {
1265        // Test that summarize_branch handles empty messages gracefully
1266        use crate::providers::OpenAiProvider;
1267        let provider = OpenAiProvider::new();
1268        let model = make_test_model();
1269        let compactor = LlmCompactor::new(model, Arc::new(provider));
1270
1271        // Empty messages should return immediately
1272        let messages: Vec<Message> = vec![];
1273
1274        // This should not panic with empty messages
1275        // (We can't test the async result in a sync test, but compile-time check passes)
1276        let _future = compactor.summarize_branch(&messages, "empty-branch");
1277    }
1278}
oxi_ai/compaction.rs

oxi_ai/
compaction.rs