Skip to main content

oxi_ai/
compaction.rs

1//! Context compaction for long conversations
2//!
3//! This module provides functionality to compact conversation history when it
4//! becomes too large, using the LLM itself to summarize older messages.
5
6use crate::high_level::complete;
7use crate::high_level::tokens::estimate as estimate_tokens;
8use crate::{
9    Api, AssistantMessage, ContentBlock, Context, Message, Model, Provider, StreamOptions,
10    TextContent, UserMessage,
11};
12
13/// Safely truncate a string to a maximum number of characters, appending "..." if truncated.
14fn safe_truncate(s: &str, max_chars: usize) -> String {
15    if s.len() <= max_chars {
16        return s.to_string();
17    }
18    let boundary = s
19        .char_indices()
20        .take_while(|(i, _)| *i <= max_chars)
21        .last()
22        .map(|(i, c)| i + c.len_utf8())
23        .unwrap_or(0);
24    format!("{}...", &s[..boundary])
25}
26use async_trait::async_trait;
27use chrono::{DateTime, Utc};
28use serde::{Deserialize, Serialize};
29use std::sync::Arc;
30use std::time::Duration;
31
32/// Compaction configuration for LLM-based compaction
33#[derive(Debug, Clone)]
34pub struct CompactionConfig {
35    /// How many recent messages to always keep (not compacted)
36    pub keep_recent: usize,
37    /// Maximum number of old messages to include in one summarization batch
38    pub max_batch: usize,
39    /// Target compaction ratio (0.0 to 1.0) - e.g., 0.5 means reduce to 50%
40    pub target_ratio: f32,
41    /// Maximum tokens for the summary response
42    pub summary_max_tokens: usize,
43    /// Temperature for summarization (lower = more focused)
44    pub temperature: f32,
45    /// Timeout for LLM compaction requests
46    pub timeout: Duration,
47    /// Custom instruction for the summarizer
48    pub custom_instruction: Option<String>,
49}
50
51impl CompactionConfig {
52    /// Create a default compaction configuration
53    pub fn new() -> Self {
54        Self {
55            keep_recent: 4,
56            max_batch: 20,
57            target_ratio: 0.5,
58            summary_max_tokens: 1024,
59            temperature: 0.3,
60            timeout: Duration::from_secs(60),
61            custom_instruction: None,
62        }
63    }
64
65    /// Set how many recent messages to always keep
66    pub fn with_keep_recent(mut self, count: usize) -> Self {
67        self.keep_recent = count;
68        self
69    }
70
71    /// Set maximum batch size for summarization
72    pub fn with_max_batch(mut self, count: usize) -> Self {
73        self.max_batch = count;
74        self
75    }
76
77    /// Set target compaction ratio (0.0 to 1.0)
78    pub fn with_target_ratio(mut self, ratio: f32) -> Self {
79        self.target_ratio = ratio.clamp(0.1, 0.9);
80        self
81    }
82
83    /// Set maximum tokens for summary
84    pub fn with_summary_max_tokens(mut self, tokens: usize) -> Self {
85        self.summary_max_tokens = tokens;
86        self
87    }
88
89    /// Set temperature for summarization
90    pub fn with_temperature(mut self, temp: f32) -> Self {
91        self.temperature = temp.clamp(0.0, 1.0);
92        self
93    }
94
95    /// Set timeout for LLM requests
96    pub fn with_timeout(mut self, timeout: Duration) -> Self {
97        self.timeout = timeout;
98        self
99    }
100
101    /// Set custom instruction for the summarizer
102    pub fn with_custom_instruction(mut self, instruction: impl Into<String>) -> Self {
103        self.custom_instruction = Some(instruction.into());
104        self
105    }
106}
107
108impl Default for CompactionConfig {
109    fn default() -> Self {
110        Self::new()
111    }
112}
113
114/// Metadata about a compaction operation
115#[derive(Debug, Clone, Serialize, Deserialize)]
116pub struct CompactionMetadata {
117    /// Estimated token count before compaction
118    pub original_tokens: usize,
119    /// Estimated token count after compaction
120    pub compacted_tokens: usize,
121    /// Number of messages that were compacted
122    pub messages_compacted: usize,
123    /// Number of messages kept
124    pub messages_kept: usize,
125    /// Timestamp of compaction
126    pub timestamp: DateTime<Utc>,
127    /// Target ratio used
128    pub target_ratio: f32,
129    /// Actual compaction ratio achieved
130    pub actual_ratio: f32,
131    /// Whether the operation was successful
132    pub success: bool,
133    /// Error message if the operation failed
134    pub error: Option<String>,
135}
136
137impl CompactionMetadata {
138    /// Create new metadata for a successful compaction
139    pub fn new(
140        original_tokens: usize,
141        compacted_tokens: usize,
142        messages_compacted: usize,
143        messages_kept: usize,
144        target_ratio: f32,
145    ) -> Self {
146        let actual_ratio = if original_tokens > 0 {
147            compacted_tokens as f32 / original_tokens as f32
148        } else {
149            1.0
150        };
151
152        Self {
153            original_tokens,
154            compacted_tokens,
155            messages_compacted,
156            messages_kept,
157            timestamp: Utc::now(),
158            target_ratio,
159            actual_ratio,
160            success: true,
161            error: None,
162        }
163    }
164
165    /// Create metadata for a failed compaction
166    pub fn failed(
167        original_tokens: usize,
168        messages_compacted: usize,
169        target_ratio: f32,
170        error: impl Into<String>,
171    ) -> Self {
172        Self {
173            original_tokens,
174            compacted_tokens: original_tokens,
175            messages_compacted,
176            messages_kept: 0,
177            timestamp: Utc::now(),
178            target_ratio,
179            actual_ratio: 1.0,
180            success: false,
181            error: Some(error.into()),
182        }
183    }
184
185    /// Get the compression factor (how much the context was reduced)
186    pub fn compression_factor(&self) -> f32 {
187        if self.actual_ratio > 0.0 {
188            1.0 - self.actual_ratio
189        } else {
190            0.0
191        }
192    }
193
194    /// Get tokens saved from compaction
195    pub fn tokens_saved(&self) -> usize {
196        self.original_tokens.saturating_sub(self.compacted_tokens)
197    }
198}
199
200/// Result of context compaction
201#[derive(Debug, Clone)]
202pub struct CompactedContext {
203    /// Summary of the compacted messages
204    pub summary: String,
205    /// Messages that were kept (typically recent ones)
206    pub kept_messages: Vec<Message>,
207    /// Number of messages that were compacted
208    pub compacted_count: usize,
209    /// Metadata about the compaction operation
210    pub metadata: CompactionMetadata,
211}
212
213impl CompactedContext {
214    /// Create a new compacted context
215    pub fn new(
216        summary: String,
217        kept_messages: Vec<Message>,
218        compacted_count: usize,
219        metadata: CompactionMetadata,
220    ) -> Self {
221        Self {
222            summary,
223            kept_messages,
224            compacted_count,
225            metadata,
226        }
227    }
228
229    /// Get the summary text
230    pub fn summary(&self) -> &str {
231        &self.summary
232    }
233
234    /// Get kept messages count
235    pub fn kept_count(&self) -> usize {
236        self.kept_messages.len()
237    }
238
239    /// Get compacted messages count
240    pub fn compacted_count(&self) -> usize {
241        self.compacted_count
242    }
243
244    /// Get the compaction metadata
245    pub fn metadata(&self) -> &CompactionMetadata {
246        &self.metadata
247    }
248
249    /// Check if compaction was successful
250    pub fn is_success(&self) -> bool {
251        self.metadata.success
252    }
253}
254
255/// Compaction strategy determining when to compact
256#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
257pub enum CompactionStrategy {
258    /// Never compact context
259    Disabled,
260    /// Compact when context is at least this percentage full (0.0 to 1.0)
261    Threshold(f32),
262    /// Compact after every N turns
263    EveryNTurns(usize),
264    /// Compact when context exceeds this absolute token count
265    AbsoluteTokens(usize),
266}
267
268impl CompactionStrategy {
269    /// Check if compaction should happen based on strategy
270    ///
271    /// # Arguments
272    /// * `context_tokens` - Estimated token count of current context
273    /// * `context_window` - Total context window size
274    /// * `iteration` - Current iteration count
275    ///
276    /// # Returns
277    /// `true` if compaction should be triggered
278    pub fn should_compact(
279        &self,
280        context_tokens: usize,
281        context_window: usize,
282        iteration: usize,
283    ) -> bool {
284        match self {
285            CompactionStrategy::Disabled => false,
286            CompactionStrategy::Threshold(threshold) => {
287                if context_window == 0 {
288                    return false;
289                }
290                let usage = context_tokens as f32 / context_window as f32;
291                usage >= *threshold
292            }
293            CompactionStrategy::EveryNTurns(n) => iteration > 0 && iteration.is_multiple_of(*n),
294            CompactionStrategy::AbsoluteTokens(max_tokens) => context_tokens >= *max_tokens,
295        }
296    }
297}
298
299impl Default for CompactionStrategy {
300    fn default() -> Self {
301        CompactionStrategy::Threshold(0.8)
302    }
303}
304
305/// Error type for compaction operations
306#[derive(Debug, Clone)]
307pub enum CompactionError {
308    /// Compaction request to LLM failed
309    LlmError(String),
310    /// No messages to compact
311    NoMessagesToCompact,
312    /// Too few messages to compact (need at least keep_recent + 1)
313    TooFewMessages { total: usize, keep_recent: usize },
314    /// Compaction was disabled
315    CompactionDisabled,
316    /// Context window not available
317    NoContextWindow,
318}
319
320impl std::fmt::Display for CompactionError {
321    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
322        match self {
323            CompactionError::LlmError(msg) => write!(f, "LLM compaction failed: {}", msg),
324            CompactionError::NoMessagesToCompact => write!(f, "No messages to compact"),
325            CompactionError::TooFewMessages { total, keep_recent } => {
326                write!(
327                    f,
328                    "Not enough messages ({}) to compact (need at least {} for keep_recent)",
329                    total,
330                    keep_recent + 1
331                )
332            }
333            CompactionError::CompactionDisabled => write!(f, "Compaction is disabled"),
334            CompactionError::NoContextWindow => write!(f, "Context window not configured"),
335        }
336    }
337}
338
339impl std::error::Error for CompactionError {}
340
341/// Trait for context compaction implementations
342#[async_trait]
343pub trait Compactor: Send + Sync {
344    /// Compact messages, returning a summary and kept messages
345    async fn compact(
346        &self,
347        messages: &[Message],
348        instruction: Option<&str>,
349    ) -> std::result::Result<CompactedContext, CompactionError>;
350
351    /// Estimate the token count of messages
352    fn estimate_tokens(&self, messages: &[Message]) -> usize {
353        messages
354            .iter()
355            .map(|msg| estimate_tokens(&msg.text_content().unwrap_or_default()))
356            .sum()
357    }
358}
359
360/// LLM-based compactor that uses the model itself to summarize
361pub struct LlmCompactor {
362    model: Model,
363    _provider: Arc<dyn Provider>,
364    config: CompactionConfig,
365}
366
367impl LlmCompactor {
368    /// Create a new LLM compactor with default configuration
369    pub fn new(model: Model, provider: Arc<dyn Provider>) -> Self {
370        Self {
371            model,
372            _provider: provider,
373            config: CompactionConfig::new(),
374        }
375    }
376
377    /// Create a new LLM compactor with custom configuration
378    pub fn with_config(
379        model: Model,
380        provider: Arc<dyn Provider>,
381        config: CompactionConfig,
382    ) -> Self {
383        Self {
384            model,
385            _provider: provider,
386            config,
387        }
388    }
389
390    /// Set how many recent messages to always keep
391    pub fn with_keep_recent(mut self, count: usize) -> Self {
392        self.config.keep_recent = count;
393        self
394    }
395
396    /// Set maximum batch size for summarization
397    pub fn with_max_batch(mut self, count: usize) -> Self {
398        self.config.max_batch = count;
399        self
400    }
401
402    /// Set target compaction ratio
403    pub fn with_target_ratio(mut self, ratio: f32) -> Self {
404        self.config.target_ratio = ratio.clamp(0.1, 0.9);
405        self
406    }
407
408    /// Build the summarization prompt
409    fn build_summarize_prompt(&self, messages: &[Message], instruction: Option<&str>) -> String {
410        let mut prompt = String::new();
411
412        prompt.push_str("Summarize the following conversation concisely. ");
413        prompt.push_str("Capture the key points, decisions, and any ongoing tasks or context.\n\n");
414
415        if let Some(instr) = instruction {
416            prompt.push_str(&format!("Focus areas: {}\n\n", instr));
417        } else if let Some(ref custom_instr) = self.config.custom_instruction {
418            prompt.push_str(&format!("Focus areas: {}\n\n", custom_instr));
419        }
420
421        prompt.push_str("## Conversation to summarize:\n");
422
423        for (i, msg) in messages.iter().enumerate() {
424            let role = match msg {
425                Message::User(_) => "User",
426                Message::Assistant(_) => "Assistant",
427                Message::ToolResult(_) => "Tool",
428            };
429            let content = msg.text_content().unwrap_or_default();
430            let content_preview = safe_truncate(&content, 500);
431            prompt.push_str(&format!("[{} {}]: {}\n", role, i + 1, content_preview));
432        }
433
434        prompt.push_str("\n## Summary:\n");
435        prompt
436            .push_str("Provide a concise summary that captures the essence of this conversation.");
437
438        prompt
439    }
440
441    /// Attempt to compact using a fallback strategy if LLM fails
442    async fn compact_with_fallback(
443        &self,
444        old_messages: &[Message],
445        recent_messages: &[Message],
446        instruction: Option<&str>,
447    ) -> std::result::Result<CompactedContext, CompactionError> {
448        // Try LLM-based summarization first
449        match self.summarize_with_llm(old_messages, instruction).await {
450            Ok(summary) => {
451                // Build the summary message
452                let mut summary_msg =
453                    AssistantMessage::new(Api::AnthropicMessages, "compactor", &self.model.id);
454                summary_msg.content = vec![ContentBlock::Text(TextContent::new(format!(
455                    "[Previous conversation summarized: {}]",
456                    summary
457                )))];
458
459                // Build final compacted context
460                let mut kept = vec![Message::Assistant(summary_msg)];
461                kept.extend(recent_messages.iter().cloned());
462
463                let original_tokens = self.estimate_tokens(old_messages);
464                let compacted_tokens = self.estimate_tokens(&kept);
465                let kept_len = kept.len();
466
467                Ok(CompactedContext::new(
468                    summary,
469                    kept,
470                    old_messages.len(),
471                    CompactionMetadata::new(
472                        original_tokens,
473                        compacted_tokens,
474                        old_messages.len(),
475                        kept_len,
476                        self.config.target_ratio,
477                    ),
478                ))
479            }
480            Err(llm_err) => {
481                // Fallback: simple truncation with key topics
482                self.compact_fallback(old_messages, recent_messages)
483                    .await
484                    .map_err(|_| CompactionError::LlmError(llm_err.to_string()))
485            }
486        }
487    }
488
489    /// Summarize messages using the LLM
490    async fn summarize_with_llm(
491        &self,
492        messages: &[Message],
493        instruction: Option<&str>,
494    ) -> std::result::Result<String, CompactionError> {
495        let prompt = self.build_summarize_prompt(messages, instruction);
496
497        let mut context = Context::new();
498        context.set_system_prompt(
499            "You are a helpful assistant that summarizes conversations concisely.",
500        );
501        context.add_message(Message::User(UserMessage::new(prompt)));
502
503        let options = StreamOptions {
504            temperature: Some(self.config.temperature as f64),
505            max_tokens: Some(self.config.summary_max_tokens),
506            ..Default::default()
507        };
508
509        let summary_message = complete(&self.model, &context, Some(options))
510            .await
511            .map_err(|e| CompactionError::LlmError(e.to_string()))?;
512
513        Ok(summary_message.text_content())
514    }
515
516    /// Fallback compaction when LLM fails - simple truncation with key preservation
517    async fn compact_fallback(
518        &self,
519        old_messages: &[Message],
520        recent_messages: &[Message],
521    ) -> std::result::Result<CompactedContext, CompactionError> {
522        // Simple fallback: keep first and last message, summarize in between
523        let mut summary_parts = Vec::new();
524
525        if old_messages.len() > 2 {
526            // Keep first message's topic
527            if let Some(first) = old_messages.first() {
528                let content = first.text_content().unwrap_or_default();
529                let preview = safe_truncate(&content, 200);
530                summary_parts.push(format!("Started discussing: {}", preview));
531            }
532
533            // Keep last message (likely the most relevant recent context)
534            if let Some(last) = old_messages.last() {
535                let content = last.text_content().unwrap_or_default();
536                let preview = safe_truncate(&content, 200);
537                summary_parts.push(format!("Ended with: {}", preview));
538            }
539
540            summary_parts.push(format!(
541                "({} messages omitted)",
542                old_messages.len().saturating_sub(2)
543            ));
544        } else if !old_messages.is_empty() {
545            // Just preserve first message content
546            if let Some(msg) = old_messages.first() {
547                let content = msg.text_content().unwrap_or_default();
548                summary_parts.push(format!("Conversation started: {}", content));
549            }
550        }
551
552        let summary = summary_parts.join(" ");
553
554        let mut summary_msg =
555            AssistantMessage::new(Api::AnthropicMessages, "compactor", &self.model.id);
556        summary_msg.content = vec![ContentBlock::Text(TextContent::new(format!(
557            "[Previous conversation summary: {}]",
558            summary
559        )))];
560
561        let mut kept = vec![Message::Assistant(summary_msg)];
562        kept.extend(recent_messages.iter().cloned());
563
564        let original_tokens = self.estimate_tokens(old_messages);
565        let compacted_tokens = self.estimate_tokens(&kept);
566        let kept_len = kept.len();
567
568        Ok(CompactedContext::new(
569            summary,
570            kept,
571            old_messages.len(),
572            CompactionMetadata::new(
573                original_tokens,
574                compacted_tokens,
575                old_messages.len(),
576                kept_len,
577                self.config.target_ratio,
578            ),
579        ))
580    }
581}
582
583#[async_trait]
584impl Compactor for LlmCompactor {
585    async fn compact(
586        &self,
587        messages: &[Message],
588        instruction: Option<&str>,
589    ) -> std::result::Result<CompactedContext, CompactionError> {
590        // Check minimum requirements
591        if messages.is_empty() {
592            return Err(CompactionError::NoMessagesToCompact);
593        }
594
595        if messages.len() <= self.config.keep_recent {
596            // Not enough messages to compact, return as-is with zero compaction
597            let original_tokens = self.estimate_tokens(messages);
598            return Ok(CompactedContext::new(
599                String::new(),
600                messages.to_vec(),
601                0,
602                CompactionMetadata::new(
603                    original_tokens,
604                    original_tokens,
605                    0,
606                    messages.len(),
607                    self.config.target_ratio,
608                ),
609            ));
610        }
611
612        // Split into old messages (to compact) and recent messages (to keep)
613        let keep_count = self.config.keep_recent.min(messages.len());
614        let old_messages: Vec<Message> = messages[..messages.len() - keep_count].to_vec();
615        let recent_messages: Vec<Message> = messages[messages.len() - keep_count..].to_vec();
616
617        if old_messages.is_empty() {
618            return Err(CompactionError::NoMessagesToCompact);
619        }
620
621        // Handle LLM failure gracefully
622        self.compact_with_fallback(&old_messages, &recent_messages, instruction)
623            .await
624    }
625}
626
627/// Additional methods for LlmCompactor (not part of Compactor trait)
628impl LlmCompactor {
629    /// Summarize a conversation branch for comparison purposes.
630    ///
631    /// This is used when branching occurs and you want to understand
632    /// what changed compared to another branch (e.g., main).
633    pub async fn summarize_branch(
634        &self,
635        messages: &[Message],
636        branch_name: &str,
637    ) -> std::result::Result<String, CompactionError> {
638        if messages.is_empty() {
639            return Ok(format!("Branch '{}' is empty", branch_name));
640        }
641
642        let mut prompt = String::new();
643        prompt.push_str(&format!(
644            "Summarize the conversation branch '{}' concisely. ",
645            branch_name
646        ));
647        prompt.push_str("Focus on: what was discussed, decisions made, and current state.\n\n");
648
649        prompt.push_str("## Branch messages:\n");
650        for (i, msg) in messages.iter().enumerate() {
651            let role = match msg {
652                Message::User(_) => "User",
653                Message::Assistant(_) => "Assistant",
654                Message::ToolResult(_) => "Tool",
655            };
656            let content = msg.text_content().unwrap_or_default();
657            let content_preview = safe_truncate(&content, 300);
658            prompt.push_str(&format!("[{} {}]: {}\n", role, i + 1, content_preview));
659        }
660
661        prompt.push_str("\n## Summary (be concise):\n");
662
663        // Use LLM to generate summary
664        let mut context = Context::new();
665        context.set_system_prompt(
666            "You are a helpful assistant that summarizes conversation branches. ",
667        );
668        context.add_message(Message::User(UserMessage::new(prompt)));
669
670        let options = StreamOptions {
671            temperature: Some(0.3),
672            max_tokens: Some(512),
673            ..Default::default()
674        };
675
676        let summary_message = complete(&self.model, &context, Some(options))
677            .await
678            .map_err(|e| CompactionError::LlmError(e.to_string()))?;
679
680        Ok(summary_message.text_content())
681    }
682}
683
684/// Context manager that handles compaction automatically
685pub struct CompactionManager {
686    strategy: CompactionStrategy,
687    compactor: Option<Arc<dyn Compactor>>,
688    context_window: usize,
689    config: CompactionConfig,
690}
691
692impl CompactionManager {
693    /// Create a new compaction manager
694    pub fn new(strategy: CompactionStrategy, context_window: usize) -> Self {
695        Self {
696            strategy,
697            compactor: None,
698            context_window,
699            config: CompactionConfig::new(),
700        }
701    }
702
703    /// Create a new compaction manager with custom config
704    pub fn with_config(
705        strategy: CompactionStrategy,
706        context_window: usize,
707        config: CompactionConfig,
708    ) -> Self {
709        Self {
710            strategy,
711            compactor: None,
712            context_window,
713            config,
714        }
715    }
716
717    /// Set the compactor to use
718    pub fn with_compactor<C: Compactor + 'static>(mut self, compactor: Arc<C>) -> Self {
719        self.compactor = Some(compactor);
720        self
721    }
722
723    /// Set the compactor from a trait object
724    pub fn set_compactor(&mut self, compactor: Arc<dyn Compactor>) {
725        self.compactor = Some(compactor);
726    }
727
728    /// Check if compaction should be triggered
729    pub fn should_compact(&self, context_tokens: usize, iteration: usize) -> bool {
730        self.strategy
731            .should_compact(context_tokens, self.context_window, iteration)
732    }
733
734    /// Get the current strategy
735    pub fn strategy(&self) -> &CompactionStrategy {
736        &self.strategy
737    }
738
739    /// Get the compaction configuration
740    pub fn config(&self) -> &CompactionConfig {
741        &self.config
742    }
743
744    /// Set compaction configuration
745    pub fn set_config(&mut self, config: CompactionConfig) {
746        self.config = config;
747    }
748
749    /// Compact the given messages if appropriate
750    pub async fn compact_if_needed(
751        &self,
752        messages: &[Message],
753        instruction: Option<&str>,
754        context_tokens: usize,
755        iteration: usize,
756    ) -> std::result::Result<Option<CompactedContext>, CompactionError> {
757        if !self.should_compact(context_tokens, iteration) {
758            return Ok(None);
759        }
760
761        let compactor = match &self.compactor {
762            Some(c) => c,
763            None => return Err(CompactionError::CompactionDisabled),
764        };
765
766        let result = compactor.compact(messages, instruction).await?;
767        Ok(Some(result))
768    }
769
770    /// Force compaction regardless of strategy
771    pub async fn compact_now(
772        &self,
773        messages: &[Message],
774        instruction: Option<&str>,
775    ) -> std::result::Result<CompactedContext, CompactionError> {
776        let compactor = match &self.compactor {
777            Some(c) => c,
778            None => return Err(CompactionError::CompactionDisabled),
779        };
780
781        compactor.compact(messages, instruction).await
782    }
783
784    /// Get estimated token count for messages
785    pub fn estimate_tokens(&self, messages: &[Message]) -> usize {
786        messages
787            .iter()
788            .map(|msg| estimate_tokens(&msg.text_content().unwrap_or_default()))
789            .sum()
790    }
791}
792
793impl Default for CompactionManager {
794    fn default() -> Self {
795        Self::new(CompactionStrategy::default(), 128_000)
796    }
797}
798
799// ============================================================================
800// Tests
801// ============================================================================
802
803#[cfg(test)]
804mod tests {
805    use super::*;
806
807    // Helper to create test user messages
808    fn make_user_message(content: &str) -> Message {
809        Message::user(content)
810    }
811
812    // Helper to create test assistant messages
813    fn make_assistant_message(content: &str) -> Message {
814        Message::Assistant({
815            let mut msg = AssistantMessage::new(Api::AnthropicMessages, "test", "test-model");
816            msg.content = vec![ContentBlock::Text(TextContent::new(content))];
817            msg
818        })
819    }
820
821    // Helper to create a test model
822    fn make_test_model() -> Model {
823        Model::new(
824            "test-model",
825            "Test Model",
826            Api::AnthropicMessages,
827            "test",
828            "https://test.example.com",
829        )
830    }
831
832    #[test]
833    fn test_compaction_config_defaults() {
834        let config = CompactionConfig::new();
835        assert_eq!(config.keep_recent, 4);
836        assert_eq!(config.max_batch, 20);
837        assert!((config.target_ratio - 0.5).abs() < 0.001);
838        assert_eq!(config.summary_max_tokens, 1024);
839        assert!((config.temperature - 0.3).abs() < 0.001);
840    }
841
842    #[test]
843    fn test_compaction_config_builder_pattern() {
844        let config = CompactionConfig::new()
845            .with_keep_recent(10)
846            .with_max_batch(30)
847            .with_target_ratio(0.3)
848            .with_temperature(0.5);
849
850        assert_eq!(config.keep_recent, 10);
851        assert_eq!(config.max_batch, 30);
852        assert!((config.target_ratio - 0.3).abs() < 0.001);
853        assert!((config.temperature - 0.5).abs() < 0.001);
854    }
855
856    #[test]
857    fn test_compaction_config_ratio_clamping() {
858        // Test upper bound clamping
859        let config = CompactionConfig::new().with_target_ratio(1.5);
860        assert!((config.target_ratio - 0.9).abs() < 0.001);
861
862        // Test lower bound clamping
863        let config = CompactionConfig::new().with_target_ratio(-0.5);
864        assert!((config.target_ratio - 0.1).abs() < 0.001);
865    }
866
867    #[test]
868    fn test_compaction_metadata_success() {
869        let metadata = CompactionMetadata::new(
870            1000, // original_tokens
871            500,  // compacted_tokens
872            10,   // messages_compacted
873            5,    // messages_kept
874            0.5,  // target_ratio
875        );
876
877        assert!(metadata.success);
878        assert_eq!(metadata.original_tokens, 1000);
879        assert_eq!(metadata.compacted_tokens, 500);
880        assert_eq!(metadata.messages_compacted, 10);
881        assert_eq!(metadata.messages_kept, 5);
882        assert!((metadata.actual_ratio - 0.5).abs() < 0.001);
883        assert!((metadata.compression_factor() - 0.5).abs() < 0.001);
884        assert_eq!(metadata.tokens_saved(), 500);
885        assert!(metadata.error.is_none());
886    }
887
888    #[test]
889    fn test_compaction_metadata_failure() {
890        let metadata = CompactionError::LlmError("test error".to_string());
891
892        // Verify error message
893        assert!(metadata.to_string().contains("test error"));
894    }
895
896    #[test]
897    fn test_compaction_metadata_compression_factor() {
898        // Zero original tokens should result in 1.0 ratio
899        let metadata = CompactionMetadata::new(0, 0, 0, 0, 0.5);
900        assert!((metadata.actual_ratio - 1.0).abs() < 0.001);
901        assert!((metadata.compression_factor() - 0.0).abs() < 0.001);
902
903        // Full compression
904        let metadata = CompactionMetadata::new(1000, 100, 10, 5, 0.5);
905        assert!((metadata.compression_factor() - 0.9).abs() < 0.001);
906    }
907
908    #[test]
909    fn test_compaction_metadata_tokens_saved() {
910        // Normal case
911        let metadata = CompactionMetadata::new(1000, 400, 10, 5, 0.5);
912        assert_eq!(metadata.tokens_saved(), 600);
913
914        // No savings
915        let metadata = CompactionMetadata::new(1000, 1000, 0, 0, 0.5);
916        assert_eq!(metadata.tokens_saved(), 0);
917
918        // Compacted is larger than original (should not happen but should be safe)
919        let metadata = CompactionMetadata::new(500, 600, 5, 3, 0.5);
920        assert_eq!(metadata.tokens_saved(), 0); // saturating_sub
921    }
922
923    #[test]
924    fn test_compaction_strategy_disabled() {
925        let strategy = CompactionStrategy::Disabled;
926        assert!(!strategy.should_compact(100_000, 128_000, 5));
927        assert!(!strategy.should_compact(120_000, 128_000, 10));
928        assert!(!strategy.should_compact(0, 128_000, 1));
929    }
930
931    #[test]
932    fn test_compaction_strategy_threshold() {
933        let strategy = CompactionStrategy::Threshold(0.8);
934
935        // Below threshold (79%)
936        assert!(!strategy.should_compact(100_000, 128_000, 1));
937
938        // At threshold (exactly 80%)
939        assert!(strategy.should_compact(102_400, 128_000, 1));
940
941        // Above threshold (93%)
942        assert!(strategy.should_compact(120_000, 128_000, 1));
943
944        // Zero context window should return false
945        assert!(!strategy.should_compact(100_000, 0, 1));
946    }
947
948    #[test]
949    fn test_compaction_strategy_every_n_turns() {
950        let strategy = CompactionStrategy::EveryNTurns(5);
951
952        // Before threshold iterations
953        assert!(!strategy.should_compact(0, 128_000, 0));
954        assert!(!strategy.should_compact(0, 128_000, 3));
955        assert!(!strategy.should_compact(0, 128_000, 4));
956
957        // At threshold iterations
958        assert!(strategy.should_compact(0, 128_000, 5));
959        assert!(strategy.should_compact(0, 128_000, 10));
960        assert!(strategy.should_compact(0, 128_000, 15));
961
962        // Not at threshold
963        assert!(!strategy.should_compact(0, 128_000, 6));
964        assert!(!strategy.should_compact(0, 128_000, 9));
965    }
966
967    #[test]
968    fn test_compaction_strategy_absolute_tokens() {
969        let strategy = CompactionStrategy::AbsoluteTokens(100_000);
970
971        // Below threshold
972        assert!(!strategy.should_compact(50_000, 128_000, 0));
973        assert!(!strategy.should_compact(99_999, 128_000, 0));
974
975        // At threshold
976        assert!(strategy.should_compact(100_000, 128_000, 0));
977
978        // Above threshold
979        assert!(strategy.should_compact(150_000, 128_000, 0));
980    }
981
982    #[test]
983    fn test_compacted_context_basic() {
984        let metadata = CompactionMetadata::new(1000, 500, 10, 5, 0.5);
985        let ctx = CompactedContext::new(
986            "Test summary".to_string(),
987            vec![make_user_message("test")],
988            10,
989            metadata,
990        );
991
992        assert_eq!(ctx.summary(), "Test summary");
993        assert_eq!(ctx.kept_count(), 1);
994        assert_eq!(ctx.compacted_count(), 10);
995        assert!(ctx.is_success());
996        assert_eq!(ctx.metadata().tokens_saved(), 500);
997    }
998
999    #[test]
1000    fn test_compacted_context_with_empty_summary() {
1001        let metadata = CompactionMetadata::new(100, 100, 0, 2, 0.5);
1002        let ctx = CompactedContext::new(
1003            String::new(), // Empty summary
1004            vec![make_user_message("test1"), make_user_message("test2")],
1005            0,
1006            metadata,
1007        );
1008
1009        assert_eq!(ctx.summary(), "");
1010        assert_eq!(ctx.kept_count(), 2);
1011        assert_eq!(ctx.compacted_count(), 0);
1012    }
1013
1014    #[test]
1015    fn test_llm_compactor_config_builder() {
1016        // Test that LlmCompactor can be created and builder pattern works
1017        use crate::providers::OpenAiProvider;
1018        let provider = OpenAiProvider::new();
1019        let model = make_test_model();
1020        let compactor = LlmCompactor::new(model, Arc::new(provider))
1021            .with_keep_recent(6)
1022            .with_max_batch(25)
1023            .with_target_ratio(0.6);
1024
1025        assert!(compactor.config.keep_recent >= 4);
1026        assert!(compactor.config.max_batch >= 20);
1027    }
1028
1029    #[test]
1030    fn test_compaction_error_display() {
1031        let err = CompactionError::NoMessagesToCompact;
1032        assert_eq!(err.to_string(), "No messages to compact");
1033
1034        let err = CompactionError::TooFewMessages {
1035            total: 3,
1036            keep_recent: 5,
1037        };
1038        assert!(err.to_string().contains("3"));
1039        // The error message says "need at least keep_recent + 1", so with keep_recent=5 it shows 6
1040        assert!(err.to_string().contains("6"));
1041
1042        let err = CompactionError::CompactionDisabled;
1043        assert_eq!(err.to_string(), "Compaction is disabled");
1044
1045        let err = CompactionError::NoContextWindow;
1046        assert_eq!(err.to_string(), "Context window not configured");
1047
1048        let err = CompactionError::LlmError("API timeout".to_string());
1049        assert!(err.to_string().contains("API timeout"));
1050    }
1051
1052    #[test]
1053    fn test_compaction_manager_default() {
1054        let manager = CompactionManager::default();
1055        assert!(matches!(
1056            manager.strategy(),
1057            CompactionStrategy::Threshold(_)
1058        ));
1059        assert_eq!(manager.config().keep_recent, 4);
1060    }
1061
1062    #[test]
1063    fn test_compaction_manager_with_custom_strategy() {
1064        let strategy = CompactionStrategy::AbsoluteTokens(50_000);
1065        let manager = CompactionManager::new(strategy, 200_000);
1066
1067        // Should not compact below threshold
1068        assert!(!manager.should_compact(30_000, 0));
1069
1070        // Should compact above threshold
1071        assert!(manager.should_compact(60_000, 0));
1072    }
1073
1074    #[test]
1075    fn test_compaction_manager_with_config() {
1076        let config = CompactionConfig::new()
1077            .with_keep_recent(8)
1078            .with_target_ratio(0.4);
1079
1080        let manager =
1081            CompactionManager::with_config(CompactionStrategy::default(), 128_000, config);
1082
1083        assert_eq!(manager.config().keep_recent, 8);
1084        assert!((manager.config().target_ratio - 0.4).abs() < 0.001);
1085    }
1086
1087    #[test]
1088    fn test_compaction_manager_should_compact_integration() {
1089        let manager = CompactionManager::new(CompactionStrategy::Threshold(0.75), 100_000);
1090
1091        // Below threshold
1092        assert!(!manager.should_compact(70_000, 0));
1093
1094        // At threshold (75%)
1095        assert!(manager.should_compact(75_000, 0));
1096
1097        // Above threshold
1098        assert!(manager.should_compact(80_000, 0));
1099        assert!(manager.should_compact(100_000, 0));
1100    }
1101
1102    #[test]
1103    fn test_compaction_manager_no_compactor_set() {
1104        let manager = CompactionManager::new(CompactionStrategy::EveryNTurns(5), 128_000);
1105
1106        // should_compact with EveryNTurns(5) at iteration 5 should return true
1107        // (compact_if_needed would return Err when no compactor is set, but should_compact works)
1108        assert!(manager.should_compact(0, 5)); // iteration 5 triggers compaction
1109    }
1110
1111    #[test]
1112    fn test_token_estimation_helper() {
1113        use crate::providers::OpenAiProvider;
1114        let provider = OpenAiProvider::new();
1115        let model = make_test_model();
1116        let compactor = LlmCompactor::new(model, Arc::new(provider));
1117
1118        let messages = vec![
1119            make_user_message("Hello world, this is a test message."),
1120            make_assistant_message("This is a response with some content."),
1121        ];
1122
1123        let tokens = compactor.estimate_tokens(&messages);
1124        assert!(tokens > 0, "Should estimate tokens for messages");
1125    }
1126
1127    #[test]
1128    fn test_compaction_config_custom_instruction() {
1129        let config = CompactionConfig::new()
1130            .with_custom_instruction("Focus on code changes and technical decisions");
1131
1132        assert!(config.custom_instruction.is_some());
1133        assert!(config.custom_instruction.unwrap().contains("code changes"));
1134    }
1135
1136    #[test]
1137    fn test_compaction_metadata_timestamp_is_set() {
1138        let metadata = CompactionMetadata::new(1000, 500, 10, 5, 0.5);
1139        assert!(metadata.timestamp <= Utc::now());
1140    }
1141
1142    #[test]
1143    fn test_compaction_ratio_achievement() {
1144        // Simulate compaction that achieves target ratio
1145        let metadata = CompactionMetadata::new(1000, 500, 10, 5, 0.5);
1146        assert!((metadata.actual_ratio - 0.5).abs() < 0.001);
1147
1148        // Simulate compaction that exceeds target (more compression)
1149        let metadata = CompactionMetadata::new(1000, 300, 10, 5, 0.5);
1150        assert!((metadata.actual_ratio - 0.3).abs() < 0.001);
1151        assert!(metadata.compression_factor() > 0.5);
1152
1153        // Simulate compaction that doesn't meet target (less compression)
1154        let metadata = CompactionMetadata::new(1000, 700, 10, 5, 0.5);
1155        assert!((metadata.actual_ratio - 0.7).abs() < 0.001);
1156        assert!(metadata.compression_factor() < 0.5);
1157    }
1158
1159    #[test]
1160    fn test_compaction_manager_config_updates() {
1161        let mut manager = CompactionManager::default();
1162
1163        let new_config = CompactionConfig::new()
1164            .with_keep_recent(12)
1165            .with_target_ratio(0.3);
1166
1167        manager.set_config(new_config);
1168
1169        assert_eq!(manager.config().keep_recent, 12);
1170        assert!((manager.config().target_ratio - 0.3).abs() < 0.001);
1171    }
1172
1173    #[test]
1174    fn test_llm_compactor_has_summarize_branch() {
1175        // Verify that LlmCompactor has the summarize_branch method
1176        use crate::providers::OpenAiProvider;
1177        let provider = OpenAiProvider::new();
1178        let model = make_test_model();
1179        let compactor = LlmCompactor::new(model, Arc::new(provider));
1180
1181        // Just verify the method exists (runtime test would require async)
1182        let messages = vec![
1183            make_user_message("Test message 1"),
1184            make_assistant_message("Test response 1"),
1185            make_user_message("Test message 2"),
1186        ];
1187
1188        // The method exists and can be called (we can't test async in sync test)
1189        // We verify it compiles correctly
1190        let branch_name = "test-branch";
1191        // This is a compile-time check that the method exists
1192        let _future = compactor.summarize_branch(&messages, branch_name);
1193    }
1194
1195    #[test]
1196    fn test_summarize_branch_returns_error_on_llm_failure() {
1197        // Test that summarize_branch handles empty messages gracefully
1198        use crate::providers::OpenAiProvider;
1199        let provider = OpenAiProvider::new();
1200        let model = make_test_model();
1201        let compactor = LlmCompactor::new(model, Arc::new(provider));
1202
1203        // Empty messages should return immediately
1204        let messages: Vec<Message> = vec![];
1205
1206        // This should not panic with empty messages
1207        // (We can't test the async result in a sync test, but compile-time check passes)
1208        let _future = compactor.summarize_branch(&messages, "empty-branch");
1209    }
1210}