oxi_ai/
compaction.rs

1//! Context compaction for long conversations
2//!
3//! This module provides functionality to compact conversation history when it
4//! becomes too large, using the LLM itself to summarize older messages.
5
6use crate::high_level::complete;
7use crate::high_level::tokens::estimate as estimate_tokens;
8use crate::{
9    Api, AssistantMessage, ContentBlock, Context, Message, Model, Provider, StreamOptions,
10    TextContent, UserMessage,
11};
12
13/// Safely truncate a string to a maximum number of characters, appending "..." if truncated.
14fn safe_truncate(s: &str, max_chars: usize) -> String {
15    if s.len() <= max_chars {
16        return s.to_string();
17    }
18    let boundary = s
19        .char_indices()
20        .take_while(|(i, _)| *i <= max_chars)
21        .last()
22        .map(|(i, c)| i + c.len_utf8())
23        .unwrap_or(0);
24    format!("{}...", &s[..boundary])
25}
26
27/// Generate a concise summary of the last N conversation messages.
28///
29/// Returns a string summarizing key topics and decisions without
30/// requiring a full compaction step.
31pub fn generate_branch_summary(messages: &[Message], n: usize) -> String {
32    if messages.is_empty() {
33        return "(empty conversation)".to_string();
34    }
35
36    let last_n: Vec<_> = if n > 0 {
37        messages.iter().rev().take(n).collect()
38    } else {
39        messages.iter().collect()
40    };
41
42    let mut topics = Vec::new();
43    let mut decisions = Vec::new();
44
45    for msg in last_n.iter().rev() {
46        let role = match msg {
47            Message::User(_) => "user",
48            Message::Assistant(_) => "assistant",
49            Message::ToolResult(_) => "tool",
50        };
51        let content = msg.text_content().unwrap_or_default();
52        let preview = safe_truncate(&content, 120);
53
54        // Detect code/file references
55        if content.contains("created file") || content.contains("edited file") {
56            topics.push("file modifications".to_string());
57        }
58        if content.contains("implemented") || content.contains("added feature") {
59            topics.push("feature implementation".to_string());
60        }
61        if content.contains("decided") || content.contains("chose") || content.contains("agreed") {
62            decisions.push(preview);
63        }
64        if content.contains("search") || content.contains("debug") || content.contains("fix") {
65            topics.push(format!("inquiry/analysis by {}", role));
66        }
67    }
68
69    // Deduplicate topics
70    topics.dedup();
71    decisions.dedup();
72
73    let summary = if topics.is_empty() && decisions.is_empty() {
74        // Fallback: just the last message preview
75        messages
76            .last()
77            .and_then(|m| m.text_content().ok())
78            .map(|c| safe_truncate(&c, 200))
79            .unwrap_or_else(|| "(no content)".to_string())
80    } else {
81        let mut parts = Vec::new();
82        if !topics.is_empty() {
83            parts.push(format!("Topics: {}", topics.join(", ")));
84        }
85        if !decisions.is_empty() {
86            parts.push(format!("Decisions: {}", decisions.join("; ")));
87        }
88        parts.join(" | ")
89    };
90
91    format!("[Branch summary of {} msgs] {}", messages.len(), summary)
92}
93
94use chrono::{DateTime, Utc};
95use serde::{Deserialize, Serialize};
96use std::future::Future;
97use std::pin::Pin;
98use std::sync::Arc;
99use std::time::Duration;
100
101/// Compaction configuration for LLM-based compaction
102#[derive(Debug, Clone)]
103pub struct CompactionConfig {
104    /// How many recent messages to always keep (not compacted)
105    pub keep_recent: usize,
106    /// Maximum number of old messages to include in one summarization batch
107    pub max_batch: usize,
108    /// Target compaction ratio (0.0 to 1.0) - e.g., 0.5 means reduce to 50%
109    pub target_ratio: f32,
110    /// Maximum tokens for the summary response
111    pub summary_max_tokens: usize,
112    /// Temperature for summarization (lower = more focused)
113    pub temperature: f32,
114    /// Timeout for LLM compaction requests
115    pub timeout: Duration,
116    /// Custom instruction for the summarizer
117    pub custom_instruction: Option<String>,
118}
119
120impl CompactionConfig {
121    /// Create a default compaction configuration
122    pub fn new() -> Self {
123        Self {
124            keep_recent: 4,
125            max_batch: 20,
126            target_ratio: 0.5,
127            summary_max_tokens: 1024,
128            temperature: 0.3,
129            timeout: Duration::from_secs(60),
130            custom_instruction: None,
131        }
132    }
133
134    /// Set how many recent messages to always keep
135    pub fn with_keep_recent(mut self, count: usize) -> Self {
136        self.keep_recent = count;
137        self
138    }
139
140    /// Set maximum batch size for summarization
141    pub fn with_max_batch(mut self, count: usize) -> Self {
142        self.max_batch = count;
143        self
144    }
145
146    /// Set target compaction ratio (0.0 to 1.0)
147    pub fn with_target_ratio(mut self, ratio: f32) -> Self {
148        self.target_ratio = ratio.clamp(0.1, 0.9);
149        self
150    }
151
152    /// Set maximum tokens for summary
153    pub fn with_summary_max_tokens(mut self, tokens: usize) -> Self {
154        self.summary_max_tokens = tokens;
155        self
156    }
157
158    /// Set temperature for summarization
159    pub fn with_temperature(mut self, temp: f32) -> Self {
160        self.temperature = temp.clamp(0.0, 1.0);
161        self
162    }
163
164    /// Set timeout for LLM requests
165    pub fn with_timeout(mut self, timeout: Duration) -> Self {
166        self.timeout = timeout;
167        self
168    }
169
170    /// Set custom instruction for the summarizer
171    pub fn with_custom_instruction(mut self, instruction: impl Into<String>) -> Self {
172        self.custom_instruction = Some(instruction.into());
173        self
174    }
175}
176
177impl Default for CompactionConfig {
178    fn default() -> Self {
179        Self::new()
180    }
181}
182
183/// Metadata about a compaction operation
184#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct CompactionMetadata {
186    /// Estimated token count before compaction
187    pub original_tokens: usize,
188    /// Estimated token count after compaction
189    pub compacted_tokens: usize,
190    /// Number of messages that were compacted
191    pub messages_compacted: usize,
192    /// Number of messages kept
193    pub messages_kept: usize,
194    /// Timestamp of compaction
195    pub timestamp: DateTime<Utc>,
196    /// Target ratio used
197    pub target_ratio: f32,
198    /// Actual compaction ratio achieved
199    pub actual_ratio: f32,
200    /// Whether the operation was successful
201    pub success: bool,
202    /// Error message if the operation failed
203    pub error: Option<String>,
204}
205
206impl CompactionMetadata {
207    /// Create new metadata for a successful compaction
208    pub fn new(
209        original_tokens: usize,
210        compacted_tokens: usize,
211        messages_compacted: usize,
212        messages_kept: usize,
213        target_ratio: f32,
214    ) -> Self {
215        let actual_ratio = if original_tokens > 0 {
216            compacted_tokens as f32 / original_tokens as f32
217        } else {
218            1.0
219        };
220
221        Self {
222            original_tokens,
223            compacted_tokens,
224            messages_compacted,
225            messages_kept,
226            timestamp: Utc::now(),
227            target_ratio,
228            actual_ratio,
229            success: true,
230            error: None,
231        }
232    }
233
234    /// Create metadata for a failed compaction
235    pub fn failed(
236        original_tokens: usize,
237        messages_compacted: usize,
238        target_ratio: f32,
239        error: impl Into<String>,
240    ) -> Self {
241        Self {
242            original_tokens,
243            compacted_tokens: original_tokens,
244            messages_compacted,
245            messages_kept: 0,
246            timestamp: Utc::now(),
247            target_ratio,
248            actual_ratio: 1.0,
249            success: false,
250            error: Some(error.into()),
251        }
252    }
253
254    /// Get the compression factor (how much the context was reduced)
255    pub fn compression_factor(&self) -> f32 {
256        if self.actual_ratio > 0.0 {
257            1.0 - self.actual_ratio
258        } else {
259            0.0
260        }
261    }
262
263    /// Get tokens saved from compaction
264    pub fn tokens_saved(&self) -> usize {
265        self.original_tokens.saturating_sub(self.compacted_tokens)
266    }
267}
268
269/// Result of context compaction
270#[derive(Debug, Clone)]
271pub struct CompactedContext {
272    /// Summary of the compacted messages
273    pub summary: String,
274    /// Messages that were kept (typically recent ones)
275    pub kept_messages: Vec<Message>,
276    /// Number of messages that were compacted
277    pub compacted_count: usize,
278    /// Metadata about the compaction operation
279    pub metadata: CompactionMetadata,
280}
281
282impl CompactedContext {
283    /// Create a new compacted context
284    pub fn new(
285        summary: String,
286        kept_messages: Vec<Message>,
287        compacted_count: usize,
288        metadata: CompactionMetadata,
289    ) -> Self {
290        Self {
291            summary,
292            kept_messages,
293            compacted_count,
294            metadata,
295        }
296    }
297
298    /// Get the summary text
299    pub fn summary(&self) -> &str {
300        &self.summary
301    }
302
303    /// Get kept messages count
304    pub fn kept_count(&self) -> usize {
305        self.kept_messages.len()
306    }
307
308    /// Get compacted messages count
309    pub fn compacted_count(&self) -> usize {
310        self.compacted_count
311    }
312
313    /// Get the compaction metadata
314    pub fn metadata(&self) -> &CompactionMetadata {
315        &self.metadata
316    }
317
318    /// Check if compaction was successful
319    pub fn is_success(&self) -> bool {
320        self.metadata.success
321    }
322}
323
324/// Compaction strategy determining when to compact
325#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
326pub enum CompactionStrategy {
327    /// Never compact context
328    Disabled,
329    /// Compact when context is at least this percentage full (0.0 to 1.0)
330    Threshold(f32),
331    /// Compact after every N turns
332    EveryNTurns(usize),
333    /// Compact when context exceeds this absolute token count
334    AbsoluteTokens(usize),
335}
336
337impl CompactionStrategy {
338    /// Check if compaction should happen based on strategy
339    ///
340    /// # Arguments
341    /// * `context_tokens` - Estimated token count of current context
342    /// * `context_window` - Total context window size
343    /// * `iteration` - Current iteration count
344    ///
345    /// # Returns
346    /// `true` if compaction should be triggered
347    pub fn should_compact(
348        &self,
349        context_tokens: usize,
350        context_window: usize,
351        iteration: usize,
352    ) -> bool {
353        match self {
354            CompactionStrategy::Disabled => false,
355            CompactionStrategy::Threshold(threshold) => {
356                if context_window == 0 {
357                    return false;
358                }
359                let usage = context_tokens as f32 / context_window as f32;
360                usage >= *threshold
361            }
362            CompactionStrategy::EveryNTurns(n) => iteration > 0 && iteration.is_multiple_of(*n),
363            CompactionStrategy::AbsoluteTokens(max_tokens) => context_tokens >= *max_tokens,
364        }
365    }
366}
367
368impl Default for CompactionStrategy {
369    fn default() -> Self {
370        CompactionStrategy::Threshold(0.8)
371    }
372}
373
374/// Error type for compaction operations
375#[derive(Debug, Clone)]
376pub enum CompactionError {
377    /// Compaction request to LLM failed
378    LlmError(String),
379    /// No messages to compact
380    NoMessagesToCompact,
381    /// Too few messages to compact (need at least keep_recent + 1)
382    TooFewMessages { total: usize, keep_recent: usize },
383    /// Compaction was disabled
384    CompactionDisabled,
385    /// Context window not available
386    NoContextWindow,
387}
388
389impl std::fmt::Display for CompactionError {
390    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
391        match self {
392            CompactionError::LlmError(msg) => write!(f, "LLM compaction failed: {}", msg),
393            CompactionError::NoMessagesToCompact => write!(f, "No messages to compact"),
394            CompactionError::TooFewMessages { total, keep_recent } => {
395                write!(
396                    f,
397                    "Not enough messages ({}) to compact (need at least {} for keep_recent)",
398                    total,
399                    keep_recent + 1
400                )
401            }
402            CompactionError::CompactionDisabled => write!(f, "Compaction is disabled"),
403            CompactionError::NoContextWindow => write!(f, "Context window not configured"),
404        }
405    }
406}
407
408impl std::error::Error for CompactionError {}
409
410/// Trait for context compaction implementations
411pub trait Compactor: Send + Sync {
412    /// Compact messages, returning a summary and kept messages
413    fn compact<'a>(
414        &'a self,
415        messages: &'a [Message],
416        instruction: Option<&'a str>,
417    ) -> Pin<
418        Box<
419            dyn Future<Output = std::result::Result<CompactedContext, CompactionError>> + Send + 'a,
420        >,
421    >;
422
423    /// Estimate the token count of messages
424    fn estimate_tokens(&self, messages: &[Message]) -> usize {
425        messages
426            .iter()
427            .map(|msg| estimate_tokens(&msg.text_content().unwrap_or_default()))
428            .sum()
429    }
430}
431
432/// Context transformer — applied before provider stream call.
433///
434/// Used by snapcompact inline imaging to replace large tool results
435/// with PNG frames, reducing token usage on vision-capable models.
436pub trait ContextTransformer: Send + Sync {
437    /// Transform the context before sending to the provider.
438    fn transform<'a>(
439        &'a self,
440        context: &'a Context,
441        model: &'a Model,
442    ) -> Pin<Box<dyn Future<Output = Context> + Send + 'a>>;
443}
444
445/// A no-op transformer that returns the context unchanged.
446pub struct NoopContextTransformer;
447
448impl ContextTransformer for NoopContextTransformer {
449    fn transform<'a>(
450        &'a self,
451        context: &'a Context,
452        _model: &'a Model,
453    ) -> Pin<Box<dyn Future<Output = Context> + Send + 'a>> {
454        Box::pin(async move { context.clone() })
455    }
456}
457
458/// LLM-based compactor that uses the model itself to summarize
459pub struct LlmCompactor {
460    model: Model,
461    _provider: Arc<dyn Provider>,
462    config: CompactionConfig,
463}
464
465impl LlmCompactor {
466    /// Create a new LLM compactor with default configuration
467    pub fn new(model: Model, provider: Arc<dyn Provider>) -> Self {
468        Self {
469            model,
470            _provider: provider,
471            config: CompactionConfig::new(),
472        }
473    }
474
475    /// Create a new LLM compactor with custom configuration
476    pub fn with_config(
477        model: Model,
478        provider: Arc<dyn Provider>,
479        config: CompactionConfig,
480    ) -> Self {
481        Self {
482            model,
483            _provider: provider,
484            config,
485        }
486    }
487
488    /// Set how many recent messages to always keep
489    pub fn with_keep_recent(mut self, count: usize) -> Self {
490        self.config.keep_recent = count;
491        self
492    }
493
494    /// Set maximum batch size for summarization
495    pub fn with_max_batch(mut self, count: usize) -> Self {
496        self.config.max_batch = count;
497        self
498    }
499
500    /// Set target compaction ratio
501    pub fn with_target_ratio(mut self, ratio: f32) -> Self {
502        self.config.target_ratio = ratio.clamp(0.1, 0.9);
503        self
504    }
505
506    /// Build the summarization prompt
507    fn build_summarize_prompt(&self, messages: &[Message], instruction: Option<&str>) -> String {
508        let mut prompt = String::new();
509
510        prompt.push_str("Summarize the following conversation concisely. ");
511        prompt.push_str("Capture the key points, decisions, and any ongoing tasks or context.\n\n");
512
513        if let Some(instr) = instruction {
514            prompt.push_str(&format!("Focus areas: {}\n\n", instr));
515        } else if let Some(ref custom_instr) = self.config.custom_instruction {
516            prompt.push_str(&format!("Focus areas: {}\n\n", custom_instr));
517        }
518
519        prompt.push_str("## Conversation to summarize:\n");
520
521        for (i, msg) in messages.iter().enumerate() {
522            let role = match msg {
523                Message::User(_) => "User",
524                Message::Assistant(_) => "Assistant",
525                Message::ToolResult(_) => "Tool",
526            };
527            let content = msg.text_content().unwrap_or_default();
528            let content_preview = safe_truncate(&content, 500);
529            prompt.push_str(&format!("[{} {}]: {}\n", role, i + 1, content_preview));
530        }
531
532        prompt.push_str("\n## Summary:\n");
533        prompt
534            .push_str("Provide a concise summary that captures the essence of this conversation.");
535
536        prompt
537    }
538
539    /// Attempt to compact using a fallback strategy if LLM fails
540    async fn compact_with_fallback(
541        &self,
542        old_messages: &[Message],
543        recent_messages: &[Message],
544        instruction: Option<&str>,
545    ) -> std::result::Result<CompactedContext, CompactionError> {
546        // Try LLM-based summarization first
547        match self.summarize_with_llm(old_messages, instruction).await {
548            Ok(summary) => {
549                // Build the summary message
550                let mut summary_msg =
551                    AssistantMessage::new(Api::AnthropicMessages, "compactor", &self.model.id);
552                summary_msg.content = vec![ContentBlock::Text(TextContent::new(format!(
553                    "[Previous conversation summarized: {}]",
554                    summary
555                )))];
556
557                // Build final compacted context
558                let mut kept = vec![Message::Assistant(summary_msg)];
559                kept.extend(recent_messages.iter().cloned());
560
561                let original_tokens = self.estimate_tokens(old_messages);
562                let compacted_tokens = self.estimate_tokens(&kept);
563                let kept_len = kept.len();
564
565                Ok(CompactedContext::new(
566                    summary,
567                    kept,
568                    old_messages.len(),
569                    CompactionMetadata::new(
570                        original_tokens,
571                        compacted_tokens,
572                        old_messages.len(),
573                        kept_len,
574                        self.config.target_ratio,
575                    ),
576                ))
577            }
578            Err(llm_err) => {
579                // Fallback: simple truncation with key topics
580                self.compact_fallback(old_messages, recent_messages)
581                    .await
582                    .map_err(|_| CompactionError::LlmError(llm_err.to_string()))
583            }
584        }
585    }
586
587    /// Summarize messages using the LLM
588    async fn summarize_with_llm(
589        &self,
590        messages: &[Message],
591        instruction: Option<&str>,
592    ) -> std::result::Result<String, CompactionError> {
593        let prompt = self.build_summarize_prompt(messages, instruction);
594
595        let mut context = Context::new();
596        context.set_system_prompt(
597            "You are a helpful assistant that summarizes conversations concisely.",
598        );
599        context.add_message(Message::User(UserMessage::new(prompt)));
600
601        let options = StreamOptions {
602            temperature: Some(self.config.temperature as f64),
603            max_tokens: Some(self.config.summary_max_tokens),
604            ..Default::default()
605        };
606
607        let summary_message = complete(&self.model, &context, Some(options))
608            .await
609            .map_err(|e| CompactionError::LlmError(e.to_string()))?;
610
611        Ok(summary_message.text_content())
612    }
613
614    /// Fallback compaction when LLM fails - simple truncation with key preservation
615    async fn compact_fallback(
616        &self,
617        old_messages: &[Message],
618        recent_messages: &[Message],
619    ) -> std::result::Result<CompactedContext, CompactionError> {
620        // Simple fallback: keep first and last message, summarize in between
621        let mut summary_parts = Vec::new();
622
623        if old_messages.len() > 2 {
624            // Keep first message's topic
625            if let Some(first) = old_messages.first() {
626                let content = first.text_content().unwrap_or_default();
627                let preview = safe_truncate(&content, 200);
628                summary_parts.push(format!("Started discussing: {}", preview));
629            }
630
631            // Keep last message (likely the most relevant recent context)
632            if let Some(last) = old_messages.last() {
633                let content = last.text_content().unwrap_or_default();
634                let preview = safe_truncate(&content, 200);
635                summary_parts.push(format!("Ended with: {}", preview));
636            }
637
638            summary_parts.push(format!(
639                "({} messages omitted)",
640                old_messages.len().saturating_sub(2)
641            ));
642        } else if !old_messages.is_empty() {
643            // Just preserve first message content
644            if let Some(msg) = old_messages.first() {
645                let content = msg.text_content().unwrap_or_default();
646                summary_parts.push(format!("Conversation started: {}", content));
647            }
648        }
649
650        let summary = summary_parts.join(" ");
651
652        let mut summary_msg =
653            AssistantMessage::new(Api::AnthropicMessages, "compactor", &self.model.id);
654        summary_msg.content = vec![ContentBlock::Text(TextContent::new(format!(
655            "[Previous conversation summary: {}]",
656            summary
657        )))];
658
659        let mut kept = vec![Message::Assistant(summary_msg)];
660        kept.extend(recent_messages.iter().cloned());
661
662        let original_tokens = self.estimate_tokens(old_messages);
663        let compacted_tokens = self.estimate_tokens(&kept);
664        let kept_len = kept.len();
665
666        Ok(CompactedContext::new(
667            summary,
668            kept,
669            old_messages.len(),
670            CompactionMetadata::new(
671                original_tokens,
672                compacted_tokens,
673                old_messages.len(),
674                kept_len,
675                self.config.target_ratio,
676            ),
677        ))
678    }
679}
680
681impl Compactor for LlmCompactor {
682    fn compact<'a>(
683        &'a self,
684        messages: &'a [Message],
685        instruction: Option<&'a str>,
686    ) -> Pin<
687        Box<
688            dyn Future<Output = std::result::Result<CompactedContext, CompactionError>> + Send + 'a,
689        >,
690    > {
691        Box::pin(async move {
692            // Check minimum requirements
693            if messages.is_empty() {
694                return Err(CompactionError::NoMessagesToCompact);
695            }
696
697            if messages.len() <= self.config.keep_recent {
698                // Not enough messages to compact, return as-is with zero compaction
699                let original_tokens = self.estimate_tokens(messages);
700                return Ok(CompactedContext::new(
701                    String::new(),
702                    messages.to_vec(),
703                    0,
704                    CompactionMetadata::new(
705                        original_tokens,
706                        original_tokens,
707                        0,
708                        messages.len(),
709                        self.config.target_ratio,
710                    ),
711                ));
712            }
713
714            // Split into old messages (to compact) and recent messages (to keep)
715            let keep_count = self.config.keep_recent.min(messages.len());
716            let old_messages: Vec<Message> = messages[..messages.len() - keep_count].to_vec();
717            let recent_messages: Vec<Message> = messages[messages.len() - keep_count..].to_vec();
718
719            if old_messages.is_empty() {
720                return Err(CompactionError::NoMessagesToCompact);
721            }
722
723            // Handle LLM failure gracefully
724            self.compact_with_fallback(&old_messages, &recent_messages, instruction)
725                .await
726        })
727    }
728}
729
730/// Additional methods for LlmCompactor (not part of Compactor trait)
731impl LlmCompactor {
732    /// Summarize a conversation branch for comparison purposes.
733    ///
734    /// This is used when branching occurs and you want to understand
735    /// what changed compared to another branch (e.g., main).
736    pub async fn summarize_branch(
737        &self,
738        messages: &[Message],
739        branch_name: &str,
740    ) -> std::result::Result<String, CompactionError> {
741        if messages.is_empty() {
742            return Ok(format!("Branch '{}' is empty", branch_name));
743        }
744
745        let mut prompt = String::new();
746        prompt.push_str(&format!(
747            "Summarize the conversation branch '{}' concisely. ",
748            branch_name
749        ));
750        prompt.push_str("Focus on: what was discussed, decisions made, and current state.\n\n");
751
752        prompt.push_str("## Branch messages:\n");
753        for (i, msg) in messages.iter().enumerate() {
754            let role = match msg {
755                Message::User(_) => "User",
756                Message::Assistant(_) => "Assistant",
757                Message::ToolResult(_) => "Tool",
758            };
759            let content = msg.text_content().unwrap_or_default();
760            let content_preview = safe_truncate(&content, 300);
761            prompt.push_str(&format!("[{} {}]: {}\n", role, i + 1, content_preview));
762        }
763
764        prompt.push_str("\n## Summary (be concise):\n");
765
766        // Use LLM to generate summary
767        let mut context = Context::new();
768        context.set_system_prompt(
769            "You are a helpful assistant that summarizes conversation branches. ",
770        );
771        context.add_message(Message::User(UserMessage::new(prompt)));
772
773        let options = StreamOptions {
774            temperature: Some(0.3),
775            max_tokens: Some(512),
776            ..Default::default()
777        };
778
779        let summary_message = complete(&self.model, &context, Some(options))
780            .await
781            .map_err(|e| CompactionError::LlmError(e.to_string()))?;
782
783        Ok(summary_message.text_content())
784    }
785}
786
787/// Context manager that handles compaction automatically
788pub struct CompactionManager {
789    strategy: CompactionStrategy,
790    compactor: Option<Arc<dyn Compactor>>,
791    context_window: usize,
792    config: CompactionConfig,
793}
794
795impl CompactionManager {
796    /// Create a new compaction manager
797    pub fn new(strategy: CompactionStrategy, context_window: usize) -> Self {
798        Self {
799            strategy,
800            compactor: None,
801            context_window,
802            config: CompactionConfig::new(),
803        }
804    }
805
806    /// Create a new compaction manager with custom config
807    pub fn with_config(
808        strategy: CompactionStrategy,
809        context_window: usize,
810        config: CompactionConfig,
811    ) -> Self {
812        Self {
813            strategy,
814            compactor: None,
815            context_window,
816            config,
817        }
818    }
819
820    /// Set the compactor to use
821    pub fn with_compactor<C: Compactor + 'static>(mut self, compactor: Arc<C>) -> Self {
822        self.compactor = Some(compactor);
823        self
824    }
825
826    /// Set the compactor from a trait object
827    pub fn set_compactor(&mut self, compactor: Arc<dyn Compactor>) {
828        self.compactor = Some(compactor);
829    }
830
831    /// Check if compaction should be triggered
832    pub fn should_compact(&self, context_tokens: usize, iteration: usize) -> bool {
833        self.strategy
834            .should_compact(context_tokens, self.context_window, iteration)
835    }
836
837    /// Get the current strategy
838    pub fn strategy(&self) -> &CompactionStrategy {
839        &self.strategy
840    }
841
842    /// Get the compaction configuration
843    pub fn config(&self) -> &CompactionConfig {
844        &self.config
845    }
846
847    /// Set compaction configuration
848    pub fn set_config(&mut self, config: CompactionConfig) {
849        self.config = config;
850    }
851
852    /// Compact the given messages if appropriate
853    pub async fn compact_if_needed(
854        &self,
855        messages: &[Message],
856        instruction: Option<&str>,
857        context_tokens: usize,
858        iteration: usize,
859    ) -> std::result::Result<Option<CompactedContext>, CompactionError> {
860        if !self.should_compact(context_tokens, iteration) {
861            return Ok(None);
862        }
863
864        let compactor = match &self.compactor {
865            Some(c) => c,
866            None => return Err(CompactionError::CompactionDisabled),
867        };
868
869        let result = compactor.compact(messages, instruction).await?;
870        Ok(Some(result))
871    }
872
873    /// Force compaction regardless of strategy
874    pub async fn compact_now(
875        &self,
876        messages: &[Message],
877        instruction: Option<&str>,
878    ) -> std::result::Result<CompactedContext, CompactionError> {
879        let compactor = match &self.compactor {
880            Some(c) => c,
881            None => return Err(CompactionError::CompactionDisabled),
882        };
883
884        compactor.compact(messages, instruction).await
885    }
886
887    /// Get estimated token count for messages
888    pub fn estimate_tokens(&self, messages: &[Message]) -> usize {
889        messages
890            .iter()
891            .map(|msg| estimate_tokens(&msg.text_content().unwrap_or_default()))
892            .sum()
893    }
894}
895
896impl Default for CompactionManager {
897    fn default() -> Self {
898        Self::new(CompactionStrategy::default(), 128_000)
899    }
900}
901
902// ============================================================================
903// Tests
904// ============================================================================
905
906#[cfg(test)]
907mod tests {
908    use super::*;
909
910    // Helper to create test user messages
911    fn make_user_message(content: &str) -> Message {
912        Message::user(content)
913    }
914
915    // Helper to create test assistant messages
916    fn make_assistant_message(content: &str) -> Message {
917        Message::Assistant({
918            let mut msg = AssistantMessage::new(Api::AnthropicMessages, "test", "test-model");
919            msg.content = vec![ContentBlock::Text(TextContent::new(content))];
920            msg
921        })
922    }
923
924    // Helper to create a test model
925    fn make_test_model() -> Model {
926        Model::new(
927            "test-model",
928            "Test Model",
929            Api::AnthropicMessages,
930            "test",
931            "https://test.example.com",
932        )
933    }
934
935    #[test]
936    fn test_compaction_config_defaults() {
937        let config = CompactionConfig::new();
938        assert_eq!(config.keep_recent, 4);
939        assert_eq!(config.max_batch, 20);
940        assert!((config.target_ratio - 0.5).abs() < 0.001);
941        assert_eq!(config.summary_max_tokens, 1024);
942        assert!((config.temperature - 0.3).abs() < 0.001);
943    }
944
945    #[test]
946    fn test_compaction_config_builder_pattern() {
947        let config = CompactionConfig::new()
948            .with_keep_recent(10)
949            .with_max_batch(30)
950            .with_target_ratio(0.3)
951            .with_temperature(0.5);
952
953        assert_eq!(config.keep_recent, 10);
954        assert_eq!(config.max_batch, 30);
955        assert!((config.target_ratio - 0.3).abs() < 0.001);
956        assert!((config.temperature - 0.5).abs() < 0.001);
957    }
958
959    #[test]
960    fn test_compaction_config_ratio_clamping() {
961        // Test upper bound clamping
962        let config = CompactionConfig::new().with_target_ratio(1.5);
963        assert!((config.target_ratio - 0.9).abs() < 0.001);
964
965        // Test lower bound clamping
966        let config = CompactionConfig::new().with_target_ratio(-0.5);
967        assert!((config.target_ratio - 0.1).abs() < 0.001);
968    }
969
970    #[test]
971    fn test_compaction_metadata_success() {
972        let metadata = CompactionMetadata::new(
973            1000, // original_tokens
974            500,  // compacted_tokens
975            10,   // messages_compacted
976            5,    // messages_kept
977            0.5,  // target_ratio
978        );
979
980        assert!(metadata.success);
981        assert_eq!(metadata.original_tokens, 1000);
982        assert_eq!(metadata.compacted_tokens, 500);
983        assert_eq!(metadata.messages_compacted, 10);
984        assert_eq!(metadata.messages_kept, 5);
985        assert!((metadata.actual_ratio - 0.5).abs() < 0.001);
986        assert!((metadata.compression_factor() - 0.5).abs() < 0.001);
987        assert_eq!(metadata.tokens_saved(), 500);
988        assert!(metadata.error.is_none());
989    }
990
991    #[test]
992    fn test_compaction_metadata_failure() {
993        let metadata = CompactionError::LlmError("test error".to_string());
994
995        // Verify error message
996        assert!(metadata.to_string().contains("test error"));
997    }
998
999    #[test]
1000    fn test_compaction_metadata_compression_factor() {
1001        // Zero original tokens should result in 1.0 ratio
1002        let metadata = CompactionMetadata::new(0, 0, 0, 0, 0.5);
1003        assert!((metadata.actual_ratio - 1.0).abs() < 0.001);
1004        assert!((metadata.compression_factor() - 0.0).abs() < 0.001);
1005
1006        // Full compression
1007        let metadata = CompactionMetadata::new(1000, 100, 10, 5, 0.5);
1008        assert!((metadata.compression_factor() - 0.9).abs() < 0.001);
1009    }
1010
1011    #[test]
1012    fn test_compaction_metadata_tokens_saved() {
1013        // Normal case
1014        let metadata = CompactionMetadata::new(1000, 400, 10, 5, 0.5);
1015        assert_eq!(metadata.tokens_saved(), 600);
1016
1017        // No savings
1018        let metadata = CompactionMetadata::new(1000, 1000, 0, 0, 0.5);
1019        assert_eq!(metadata.tokens_saved(), 0);
1020
1021        // Compacted is larger than original (should not happen but should be safe)
1022        let metadata = CompactionMetadata::new(500, 600, 5, 3, 0.5);
1023        assert_eq!(metadata.tokens_saved(), 0); // saturating_sub
1024    }
1025
1026    #[test]
1027    fn test_compaction_strategy_disabled() {
1028        let strategy = CompactionStrategy::Disabled;
1029        assert!(!strategy.should_compact(100_000, 128_000, 5));
1030        assert!(!strategy.should_compact(120_000, 128_000, 10));
1031        assert!(!strategy.should_compact(0, 128_000, 1));
1032    }
1033
1034    #[test]
1035    fn test_compaction_strategy_threshold() {
1036        let strategy = CompactionStrategy::Threshold(0.8);
1037
1038        // Below threshold (79%)
1039        assert!(!strategy.should_compact(100_000, 128_000, 1));
1040
1041        // At threshold (exactly 80%)
1042        assert!(strategy.should_compact(102_400, 128_000, 1));
1043
1044        // Above threshold (93%)
1045        assert!(strategy.should_compact(120_000, 128_000, 1));
1046
1047        // Zero context window should return false
1048        assert!(!strategy.should_compact(100_000, 0, 1));
1049    }
1050
1051    #[test]
1052    fn test_compaction_strategy_every_n_turns() {
1053        let strategy = CompactionStrategy::EveryNTurns(5);
1054
1055        // Before threshold iterations
1056        assert!(!strategy.should_compact(0, 128_000, 0));
1057        assert!(!strategy.should_compact(0, 128_000, 3));
1058        assert!(!strategy.should_compact(0, 128_000, 4));
1059
1060        // At threshold iterations
1061        assert!(strategy.should_compact(0, 128_000, 5));
1062        assert!(strategy.should_compact(0, 128_000, 10));
1063        assert!(strategy.should_compact(0, 128_000, 15));
1064
1065        // Not at threshold
1066        assert!(!strategy.should_compact(0, 128_000, 6));
1067        assert!(!strategy.should_compact(0, 128_000, 9));
1068    }
1069
1070    #[test]
1071    fn test_compaction_strategy_absolute_tokens() {
1072        let strategy = CompactionStrategy::AbsoluteTokens(100_000);
1073
1074        // Below threshold
1075        assert!(!strategy.should_compact(50_000, 128_000, 0));
1076        assert!(!strategy.should_compact(99_999, 128_000, 0));
1077
1078        // At threshold
1079        assert!(strategy.should_compact(100_000, 128_000, 0));
1080
1081        // Above threshold
1082        assert!(strategy.should_compact(150_000, 128_000, 0));
1083    }
1084
1085    #[test]
1086    fn test_compacted_context_basic() {
1087        let metadata = CompactionMetadata::new(1000, 500, 10, 5, 0.5);
1088        let ctx = CompactedContext::new(
1089            "Test summary".to_string(),
1090            vec![make_user_message("test")],
1091            10,
1092            metadata,
1093        );
1094
1095        assert_eq!(ctx.summary(), "Test summary");
1096        assert_eq!(ctx.kept_count(), 1);
1097        assert_eq!(ctx.compacted_count(), 10);
1098        assert!(ctx.is_success());
1099        assert_eq!(ctx.metadata().tokens_saved(), 500);
1100    }
1101
1102    #[test]
1103    fn test_compacted_context_with_empty_summary() {
1104        let metadata = CompactionMetadata::new(100, 100, 0, 2, 0.5);
1105        let ctx = CompactedContext::new(
1106            String::new(), // Empty summary
1107            vec![make_user_message("test1"), make_user_message("test2")],
1108            0,
1109            metadata,
1110        );
1111
1112        assert_eq!(ctx.summary(), "");
1113        assert_eq!(ctx.kept_count(), 2);
1114        assert_eq!(ctx.compacted_count(), 0);
1115    }
1116
1117    #[test]
1118    fn test_llm_compactor_config_builder() {
1119        // Test that LlmCompactor can be created and builder pattern works
1120        use crate::providers::OpenAiProvider;
1121        let provider = OpenAiProvider::new();
1122        let model = make_test_model();
1123        let compactor = LlmCompactor::new(model, Arc::new(provider))
1124            .with_keep_recent(6)
1125            .with_max_batch(25)
1126            .with_target_ratio(0.6);
1127
1128        assert!(compactor.config.keep_recent >= 4);
1129        assert!(compactor.config.max_batch >= 20);
1130    }
1131
1132    #[test]
1133    fn test_compaction_error_display() {
1134        let err = CompactionError::NoMessagesToCompact;
1135        assert_eq!(err.to_string(), "No messages to compact");
1136
1137        let err = CompactionError::TooFewMessages {
1138            total: 3,
1139            keep_recent: 5,
1140        };
1141        assert!(err.to_string().contains("3"));
1142        // The error message says "need at least keep_recent + 1", so with keep_recent=5 it shows 6
1143        assert!(err.to_string().contains("6"));
1144
1145        let err = CompactionError::CompactionDisabled;
1146        assert_eq!(err.to_string(), "Compaction is disabled");
1147
1148        let err = CompactionError::NoContextWindow;
1149        assert_eq!(err.to_string(), "Context window not configured");
1150
1151        let err = CompactionError::LlmError("API timeout".to_string());
1152        assert!(err.to_string().contains("API timeout"));
1153    }
1154
1155    #[test]
1156    fn test_compaction_manager_default() {
1157        let manager = CompactionManager::default();
1158        assert!(matches!(
1159            manager.strategy(),
1160            CompactionStrategy::Threshold(_)
1161        ));
1162        assert_eq!(manager.config().keep_recent, 4);
1163    }
1164
1165    #[test]
1166    fn test_compaction_manager_with_custom_strategy() {
1167        let strategy = CompactionStrategy::AbsoluteTokens(50_000);
1168        let manager = CompactionManager::new(strategy, 200_000);
1169
1170        // Should not compact below threshold
1171        assert!(!manager.should_compact(30_000, 0));
1172
1173        // Should compact above threshold
1174        assert!(manager.should_compact(60_000, 0));
1175    }
1176
1177    #[test]
1178    fn test_compaction_manager_with_config() {
1179        let config = CompactionConfig::new()
1180            .with_keep_recent(8)
1181            .with_target_ratio(0.4);
1182
1183        let manager =
1184            CompactionManager::with_config(CompactionStrategy::default(), 128_000, config);
1185
1186        assert_eq!(manager.config().keep_recent, 8);
1187        assert!((manager.config().target_ratio - 0.4).abs() < 0.001);
1188    }
1189
1190    #[test]
1191    fn test_compaction_manager_should_compact_integration() {
1192        let manager = CompactionManager::new(CompactionStrategy::Threshold(0.75), 100_000);
1193
1194        // Below threshold
1195        assert!(!manager.should_compact(70_000, 0));
1196
1197        // At threshold (75%)
1198        assert!(manager.should_compact(75_000, 0));
1199
1200        // Above threshold
1201        assert!(manager.should_compact(80_000, 0));
1202        assert!(manager.should_compact(100_000, 0));
1203    }
1204
1205    #[test]
1206    fn test_compaction_manager_no_compactor_set() {
1207        let manager = CompactionManager::new(CompactionStrategy::EveryNTurns(5), 128_000);
1208
1209        // should_compact with EveryNTurns(5) at iteration 5 should return true
1210        // (compact_if_needed would return Err when no compactor is set, but should_compact works)
1211        assert!(manager.should_compact(0, 5)); // iteration 5 triggers compaction
1212    }
1213
1214    #[test]
1215    fn test_token_estimation_helper() {
1216        use crate::providers::OpenAiProvider;
1217        let provider = OpenAiProvider::new();
1218        let model = make_test_model();
1219        let compactor = LlmCompactor::new(model, Arc::new(provider));
1220
1221        let messages = vec![
1222            make_user_message("Hello world, this is a test message."),
1223            make_assistant_message("This is a response with some content."),
1224        ];
1225
1226        let tokens = compactor.estimate_tokens(&messages);
1227        assert!(tokens > 0, "Should estimate tokens for messages");
1228    }
1229
1230    #[test]
1231    fn test_compaction_config_custom_instruction() {
1232        let config = CompactionConfig::new()
1233            .with_custom_instruction("Focus on code changes and technical decisions");
1234
1235        assert!(config.custom_instruction.is_some());
1236        assert!(config.custom_instruction.unwrap().contains("code changes"));
1237    }
1238
1239    #[test]
1240    fn test_compaction_metadata_timestamp_is_set() {
1241        let metadata = CompactionMetadata::new(1000, 500, 10, 5, 0.5);
1242        assert!(metadata.timestamp <= Utc::now());
1243    }
1244
1245    #[test]
1246    fn test_compaction_ratio_achievement() {
1247        // Simulate compaction that achieves target ratio
1248        let metadata = CompactionMetadata::new(1000, 500, 10, 5, 0.5);
1249        assert!((metadata.actual_ratio - 0.5).abs() < 0.001);
1250
1251        // Simulate compaction that exceeds target (more compression)
1252        let metadata = CompactionMetadata::new(1000, 300, 10, 5, 0.5);
1253        assert!((metadata.actual_ratio - 0.3).abs() < 0.001);
1254        assert!(metadata.compression_factor() > 0.5);
1255
1256        // Simulate compaction that doesn't meet target (less compression)
1257        let metadata = CompactionMetadata::new(1000, 700, 10, 5, 0.5);
1258        assert!((metadata.actual_ratio - 0.7).abs() < 0.001);
1259        assert!(metadata.compression_factor() < 0.5);
1260    }
1261
1262    #[test]
1263    fn test_compaction_manager_config_updates() {
1264        let mut manager = CompactionManager::default();
1265
1266        let new_config = CompactionConfig::new()
1267            .with_keep_recent(12)
1268            .with_target_ratio(0.3);
1269
1270        manager.set_config(new_config);
1271
1272        assert_eq!(manager.config().keep_recent, 12);
1273        assert!((manager.config().target_ratio - 0.3).abs() < 0.001);
1274    }
1275
1276    #[test]
1277    fn test_llm_compactor_has_summarize_branch() {
1278        // Verify that LlmCompactor has the summarize_branch method
1279        use crate::providers::OpenAiProvider;
1280        let provider = OpenAiProvider::new();
1281        let model = make_test_model();
1282        let compactor = LlmCompactor::new(model, Arc::new(provider));
1283
1284        // Just verify the method exists (runtime test would require async)
1285        let messages = vec![
1286            make_user_message("Test message 1"),
1287            make_assistant_message("Test response 1"),
1288            make_user_message("Test message 2"),
1289        ];
1290
1291        // The method exists and can be called (we can't test async in sync test)
1292        // We verify it compiles correctly
1293        let branch_name = "test-branch";
1294        // This is a compile-time check that the method exists
1295        let _future = compactor.summarize_branch(&messages, branch_name);
1296    }
1297
1298    #[test]
1299    fn test_summarize_branch_returns_error_on_llm_failure() {
1300        // Test that summarize_branch handles empty messages gracefully
1301        use crate::providers::OpenAiProvider;
1302        let provider = OpenAiProvider::new();
1303        let model = make_test_model();
1304        let compactor = LlmCompactor::new(model, Arc::new(provider));
1305
1306        // Empty messages should return immediately
1307        let messages: Vec<Message> = vec![];
1308
1309        // This should not panic with empty messages
1310        // (We can't test the async result in a sync test, but compile-time check passes)
1311        let _future = compactor.summarize_branch(&messages, "empty-branch");
1312    }
1313}
oxi_ai/compaction.rs

oxi_ai/
compaction.rs