oxi_ai/
compaction.rs

1//! Context compaction for long conversations
2//!
3//! This module provides functionality to compact conversation history when it
4//! becomes too large, using the LLM itself to summarize older messages.
5
6use crate::high_level::complete;
7use crate::high_level::tokens::estimate as estimate_tokens;
8use crate::{
9    Api, AssistantMessage, ContentBlock, Context, Message, Model, Provider, StreamOptions,
10    TextContent, UserMessage,
11};
12
13/// Safely truncate a string to a maximum number of characters, appending "..." if truncated.
14fn safe_truncate(s: &str, max_chars: usize) -> String {
15    if s.len() <= max_chars {
16        return s.to_string();
17    }
18    let boundary = s
19        .char_indices()
20        .take_while(|(i, _)| *i <= max_chars)
21        .last()
22        .map(|(i, c)| i + c.len_utf8())
23        .unwrap_or(0);
24    format!("{}...", &s[..boundary])
25}
26
27/// Generate a concise summary of the last N conversation messages.
28///
29/// Returns a string summarizing key topics and decisions without
30/// requiring a full compaction step.
31pub fn generate_branch_summary(messages: &[Message], n: usize) -> String {
32    if messages.is_empty() {
33        return "(empty conversation)".to_string();
34    }
35
36    let last_n: Vec<_> = if n > 0 {
37        messages.iter().rev().take(n).collect()
38    } else {
39        messages.iter().collect()
40    };
41
42    let mut topics = Vec::new();
43    let mut decisions = Vec::new();
44
45    for msg in last_n.iter().rev() {
46        let role = match msg {
47            Message::User(_) => "user",
48            Message::Assistant(_) => "assistant",
49            Message::ToolResult(_) => "tool",
50        };
51        let content = msg.text_content().unwrap_or_default();
52        let preview = safe_truncate(&content, 120);
53
54        // Detect code/file references
55        if content.contains("created file") || content.contains("edited file") {
56            topics.push("file modifications".to_string());
57        }
58        if content.contains("implemented") || content.contains("added feature") {
59            topics.push("feature implementation".to_string());
60        }
61        if content.contains("decided") || content.contains("chose") || content.contains("agreed") {
62            decisions.push(preview);
63        }
64        if content.contains("search") || content.contains("debug") || content.contains("fix") {
65            topics.push(format!("inquiry/analysis by {}", role));
66        }
67    }
68
69    // Deduplicate topics
70    topics.dedup();
71    decisions.dedup();
72
73    let summary = if topics.is_empty() && decisions.is_empty() {
74        // Fallback: just the last message preview
75        messages
76            .last()
77            .and_then(|m| m.text_content().ok())
78            .map(|c| safe_truncate(&c, 200))
79            .unwrap_or_else(|| "(no content)".to_string())
80    } else {
81        let mut parts = Vec::new();
82        if !topics.is_empty() {
83            parts.push(format!("Topics: {}", topics.join(", ")));
84        }
85        if !decisions.is_empty() {
86            parts.push(format!("Decisions: {}", decisions.join("; ")));
87        }
88        parts.join(" | ")
89    };
90
91    format!("[Branch summary of {} msgs] {}", messages.len(), summary)
92}
93
94use chrono::{DateTime, Utc};
95use serde::{Deserialize, Serialize};
96use std::future::Future;
97use std::pin::Pin;
98use std::sync::Arc;
99use std::time::Duration;
100
101/// Compaction configuration for LLM-based compaction
102#[derive(Debug, Clone)]
103pub struct CompactionConfig {
104    /// How many recent messages to always keep (not compacted)
105    pub keep_recent: usize,
106    /// Maximum number of old messages to include in one summarization batch
107    pub max_batch: usize,
108    /// Target compaction ratio (0.0 to 1.0) - e.g., 0.5 means reduce to 50%
109    pub target_ratio: f32,
110    /// Maximum tokens for the summary response
111    pub summary_max_tokens: usize,
112    /// Temperature for summarization (lower = more focused)
113    pub temperature: f32,
114    /// Timeout for LLM compaction requests
115    pub timeout: Duration,
116    /// Custom instruction for the summarizer
117    pub custom_instruction: Option<String>,
118}
119
120impl CompactionConfig {
121    /// Create a default compaction configuration
122    pub fn new() -> Self {
123        Self {
124            keep_recent: 4,
125            max_batch: 20,
126            target_ratio: 0.5,
127            summary_max_tokens: 1024,
128            temperature: 0.3,
129            timeout: Duration::from_secs(60),
130            custom_instruction: None,
131        }
132    }
133
134    /// Set how many recent messages to always keep
135    pub fn with_keep_recent(mut self, count: usize) -> Self {
136        self.keep_recent = count;
137        self
138    }
139
140    /// Set maximum batch size for summarization
141    pub fn with_max_batch(mut self, count: usize) -> Self {
142        self.max_batch = count;
143        self
144    }
145
146    /// Set target compaction ratio (0.0 to 1.0)
147    pub fn with_target_ratio(mut self, ratio: f32) -> Self {
148        self.target_ratio = ratio.clamp(0.1, 0.9);
149        self
150    }
151
152    /// Set maximum tokens for summary
153    pub fn with_summary_max_tokens(mut self, tokens: usize) -> Self {
154        self.summary_max_tokens = tokens;
155        self
156    }
157
158    /// Set temperature for summarization
159    pub fn with_temperature(mut self, temp: f32) -> Self {
160        self.temperature = temp.clamp(0.0, 1.0);
161        self
162    }
163
164    /// Set timeout for LLM requests
165    pub fn with_timeout(mut self, timeout: Duration) -> Self {
166        self.timeout = timeout;
167        self
168    }
169
170    /// Set custom instruction for the summarizer
171    pub fn with_custom_instruction(mut self, instruction: impl Into<String>) -> Self {
172        self.custom_instruction = Some(instruction.into());
173        self
174    }
175}
176
177impl Default for CompactionConfig {
178    fn default() -> Self {
179        Self::new()
180    }
181}
182
183/// Metadata about a compaction operation
184#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct CompactionMetadata {
186    /// Estimated token count before compaction
187    pub original_tokens: usize,
188    /// Estimated token count after compaction
189    pub compacted_tokens: usize,
190    /// Number of messages that were compacted
191    pub messages_compacted: usize,
192    /// Number of messages kept
193    pub messages_kept: usize,
194    /// Timestamp of compaction
195    pub timestamp: DateTime<Utc>,
196    /// Target ratio used
197    pub target_ratio: f32,
198    /// Actual compaction ratio achieved
199    pub actual_ratio: f32,
200    /// Whether the operation was successful
201    pub success: bool,
202    /// Error message if the operation failed
203    pub error: Option<String>,
204}
205
206impl CompactionMetadata {
207    /// Create new metadata for a successful compaction
208    pub fn new(
209        original_tokens: usize,
210        compacted_tokens: usize,
211        messages_compacted: usize,
212        messages_kept: usize,
213        target_ratio: f32,
214    ) -> Self {
215        let actual_ratio = if original_tokens > 0 {
216            compacted_tokens as f32 / original_tokens as f32
217        } else {
218            1.0
219        };
220
221        Self {
222            original_tokens,
223            compacted_tokens,
224            messages_compacted,
225            messages_kept,
226            timestamp: Utc::now(),
227            target_ratio,
228            actual_ratio,
229            success: true,
230            error: None,
231        }
232    }
233
234    /// Create metadata for a failed compaction
235    pub fn failed(
236        original_tokens: usize,
237        messages_compacted: usize,
238        target_ratio: f32,
239        error: impl Into<String>,
240    ) -> Self {
241        Self {
242            original_tokens,
243            compacted_tokens: original_tokens,
244            messages_compacted,
245            messages_kept: 0,
246            timestamp: Utc::now(),
247            target_ratio,
248            actual_ratio: 1.0,
249            success: false,
250            error: Some(error.into()),
251        }
252    }
253
254    /// Get the compression factor (how much the context was reduced)
255    pub fn compression_factor(&self) -> f32 {
256        if self.actual_ratio > 0.0 {
257            1.0 - self.actual_ratio
258        } else {
259            0.0
260        }
261    }
262
263    /// Get tokens saved from compaction
264    pub fn tokens_saved(&self) -> usize {
265        self.original_tokens.saturating_sub(self.compacted_tokens)
266    }
267}
268
269/// Result of context compaction
270#[derive(Debug, Clone)]
271pub struct CompactedContext {
272    /// Summary of the compacted messages
273    pub summary: String,
274    /// Messages that were kept (typically recent ones)
275    pub kept_messages: Vec<Message>,
276    /// Number of messages that were compacted
277    pub compacted_count: usize,
278    /// Metadata about the compaction operation
279    pub metadata: CompactionMetadata,
280}
281
282impl CompactedContext {
283    /// Create a new compacted context
284    pub fn new(
285        summary: String,
286        kept_messages: Vec<Message>,
287        compacted_count: usize,
288        metadata: CompactionMetadata,
289    ) -> Self {
290        Self {
291            summary,
292            kept_messages,
293            compacted_count,
294            metadata,
295        }
296    }
297
298    /// Get the summary text
299    pub fn summary(&self) -> &str {
300        &self.summary
301    }
302
303    /// Get kept messages count
304    pub fn kept_count(&self) -> usize {
305        self.kept_messages.len()
306    }
307
308    /// Get compacted messages count
309    pub fn compacted_count(&self) -> usize {
310        self.compacted_count
311    }
312
313    /// Get the compaction metadata
314    pub fn metadata(&self) -> &CompactionMetadata {
315        &self.metadata
316    }
317
318    /// Check if compaction was successful
319    pub fn is_success(&self) -> bool {
320        self.metadata.success
321    }
322}
323
324/// Compaction strategy determining when to compact
325#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
326pub enum CompactionStrategy {
327    /// Never compact context
328    Disabled,
329    /// Compact when context is at least this percentage full (0.0 to 1.0)
330    Threshold(f32),
331    /// Compact after every N turns
332    EveryNTurns(usize),
333    /// Compact when context exceeds this absolute token count
334    AbsoluteTokens(usize),
335}
336
337impl CompactionStrategy {
338    /// Check if compaction should happen based on strategy
339    ///
340    /// # Arguments
341    /// * `context_tokens` - Estimated token count of current context
342    /// * `context_window` - Total context window size
343    /// * `iteration` - Current iteration count
344    ///
345    /// # Returns
346    /// `true` if compaction should be triggered
347    pub fn should_compact(
348        &self,
349        context_tokens: usize,
350        context_window: usize,
351        iteration: usize,
352    ) -> bool {
353        match self {
354            CompactionStrategy::Disabled => false,
355            CompactionStrategy::Threshold(threshold) => {
356                if context_window == 0 {
357                    return false;
358                }
359                let usage = context_tokens as f32 / context_window as f32;
360                usage >= *threshold
361            }
362            CompactionStrategy::EveryNTurns(n) => iteration > 0 && iteration.is_multiple_of(*n),
363            CompactionStrategy::AbsoluteTokens(max_tokens) => context_tokens >= *max_tokens,
364        }
365    }
366}
367
368impl Default for CompactionStrategy {
369    fn default() -> Self {
370        CompactionStrategy::Threshold(0.8)
371    }
372}
373
374/// Error type for compaction operations
375#[derive(Debug, Clone)]
376pub enum CompactionError {
377    /// Compaction request to LLM failed
378    LlmError(String),
379    /// No messages to compact
380    NoMessagesToCompact,
381    /// Too few messages to compact (need at least keep_recent + 1)
382    TooFewMessages { total: usize, keep_recent: usize },
383    /// Compaction was disabled
384    CompactionDisabled,
385    /// Context window not available
386    NoContextWindow,
387}
388
389impl std::fmt::Display for CompactionError {
390    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
391        match self {
392            CompactionError::LlmError(msg) => write!(f, "LLM compaction failed: {}", msg),
393            CompactionError::NoMessagesToCompact => write!(f, "No messages to compact"),
394            CompactionError::TooFewMessages { total, keep_recent } => {
395                write!(
396                    f,
397                    "Not enough messages ({}) to compact (need at least {} for keep_recent)",
398                    total,
399                    keep_recent + 1
400                )
401            }
402            CompactionError::CompactionDisabled => write!(f, "Compaction is disabled"),
403            CompactionError::NoContextWindow => write!(f, "Context window not configured"),
404        }
405    }
406}
407
408impl std::error::Error for CompactionError {}
409
410/// Trait for context compaction implementations
411pub trait Compactor: Send + Sync {
412    /// Compact messages, returning a summary and kept messages
413    fn compact<'a>(
414        &'a self,
415        messages: &'a [Message],
416        instruction: Option<&'a str>,
417    ) -> Pin<
418        Box<
419            dyn Future<Output = std::result::Result<CompactedContext, CompactionError>> + Send + 'a,
420        >,
421    >;
422
423    /// Estimate the token count of messages
424    fn estimate_tokens(&self, messages: &[Message]) -> usize {
425        messages
426            .iter()
427            .map(|msg| estimate_tokens(&msg.text_content().unwrap_or_default()))
428            .sum()
429    }
430}
431
432/// Context transformer — applied before provider stream call.
433///
434/// Used by snapcompact inline imaging to replace large tool results
435/// with PNG frames, reducing token usage on vision-capable models.
436pub trait ContextTransformer: Send + Sync {
437    /// Transform the context before sending to the provider.
438    fn transform<'a>(
439        &'a self,
440        context: &'a Context,
441        model: &'a Model,
442    ) -> Pin<Box<dyn Future<Output = Context> + Send + 'a>>;
443}
444
445/// A no-op transformer that returns the context unchanged.
446pub struct NoopContextTransformer;
447
448impl ContextTransformer for NoopContextTransformer {
449    fn transform<'a>(
450        &'a self,
451        context: &'a Context,
452        _model: &'a Model,
453    ) -> Pin<Box<dyn Future<Output = Context> + Send + 'a>> {
454        Box::pin(async move { context.clone() })
455    }
456}
457
458/// LLM-based compactor that uses the model itself to summarize
459pub struct LlmCompactor {
460    model: Model,
461    _provider: Arc<dyn Provider>,
462    config: CompactionConfig,
463}
464
465impl LlmCompactor {
466    /// Create a new LLM compactor with default configuration
467    pub fn new(model: Model, provider: Arc<dyn Provider>) -> Self {
468        Self {
469            model,
470            _provider: provider,
471            config: CompactionConfig::new(),
472        }
473    }
474
475    /// Create a new LLM compactor with custom configuration
476    pub fn with_config(
477        model: Model,
478        provider: Arc<dyn Provider>,
479        config: CompactionConfig,
480    ) -> Self {
481        Self {
482            model,
483            _provider: provider,
484            config,
485        }
486    }
487
488    /// Set how many recent messages to always keep
489    pub fn with_keep_recent(mut self, count: usize) -> Self {
490        self.config.keep_recent = count;
491        self
492    }
493
494    /// Set maximum batch size for summarization
495    pub fn with_max_batch(mut self, count: usize) -> Self {
496        self.config.max_batch = count;
497        self
498    }
499
500    /// Set target compaction ratio
501    pub fn with_target_ratio(mut self, ratio: f32) -> Self {
502        self.config.target_ratio = ratio.clamp(0.1, 0.9);
503        self
504    }
505
506    /// Build the summarization prompt
507    fn build_summarize_prompt(&self, messages: &[Message], instruction: Option<&str>) -> String {
508        let mut prompt = String::new();
509
510        prompt.push_str("Summarize the following conversation concisely. ");
511        prompt.push_str("Capture the key points, decisions, and any ongoing tasks or context.\n\n");
512
513        if let Some(instr) = instruction {
514            prompt.push_str(&format!("Focus areas: {}\n\n", instr));
515        } else if let Some(ref custom_instr) = self.config.custom_instruction {
516            prompt.push_str(&format!("Focus areas: {}\n\n", custom_instr));
517        }
518
519        prompt.push_str("## Conversation to summarize:\n");
520
521        for (i, msg) in messages.iter().enumerate() {
522            let role = match msg {
523                Message::User(_) => "User",
524                Message::Assistant(_) => "Assistant",
525                Message::ToolResult(_) => "Tool",
526            };
527            let content = msg.text_content().unwrap_or_default();
528            let content_preview = safe_truncate(&content, 500);
529            prompt.push_str(&format!("[{} {}]: {}\n", role, i + 1, content_preview));
530        }
531
532        prompt.push_str("\n## Summary:\n");
533        prompt
534            .push_str("Provide a concise summary that captures the essence of this conversation.");
535
536        prompt
537    }
538
539    /// Attempt to compact using a fallback strategy if LLM fails
540    async fn compact_with_fallback(
541        &self,
542        old_messages: &[Message],
543        recent_messages: &[Message],
544        instruction: Option<&str>,
545    ) -> std::result::Result<CompactedContext, CompactionError> {
546        // Try LLM-based summarization first
547        match self.summarize_with_llm(old_messages, instruction).await {
548            Ok(summary) => {
549                // Build the summary message
550                let mut summary_msg =
551                    AssistantMessage::new(Api::AnthropicMessages, "compactor", &self.model.id);
552                summary_msg.content = vec![ContentBlock::Text(TextContent::new(format!(
553                    "[Previous conversation summarized: {}]",
554                    summary
555                )))];
556
557                // Build final compacted context
558                let mut kept = vec![Message::Assistant(summary_msg)];
559                kept.extend(recent_messages.iter().cloned());
560
561                let original_tokens = self.estimate_tokens(old_messages);
562                let compacted_tokens = self.estimate_tokens(&kept);
563                let kept_len = kept.len();
564
565                Ok(CompactedContext::new(
566                    summary,
567                    kept,
568                    old_messages.len(),
569                    CompactionMetadata::new(
570                        original_tokens,
571                        compacted_tokens,
572                        old_messages.len(),
573                        kept_len,
574                        self.config.target_ratio,
575                    ),
576                ))
577            }
578            Err(llm_err) => {
579                // Fallback: simple truncation with key topics
580                self.compact_fallback(old_messages, recent_messages)
581                    .await
582                    .map_err(|_| CompactionError::LlmError(llm_err.to_string()))
583            }
584        }
585    }
586
587    /// Summarize messages using the LLM
588    async fn summarize_with_llm(
589        &self,
590        messages: &[Message],
591        instruction: Option<&str>,
592    ) -> std::result::Result<String, CompactionError> {
593        let prompt = self.build_summarize_prompt(messages, instruction);
594
595        let mut context = Context::new();
596        context.set_system_prompt(
597            "You are a helpful assistant that summarizes conversations concisely.",
598        );
599        context.add_message(Message::User(UserMessage::new(prompt)));
600
601        let options = StreamOptions {
602            temperature: Some(self.config.temperature as f64),
603            max_tokens: Some(self.config.summary_max_tokens),
604            ..Default::default()
605        };
606
607        let summary_message = complete(&self.model, &context, Some(options))
608            .await
609            .map_err(|e| CompactionError::LlmError(e.to_string()))?;
610
611        Ok(summary_message.text_content())
612    }
613
614    /// Fallback compaction when LLM fails - simple truncation with key preservation
615    async fn compact_fallback(
616        &self,
617        old_messages: &[Message],
618        recent_messages: &[Message],
619    ) -> std::result::Result<CompactedContext, CompactionError> {
620        // Simple fallback: keep first and last message, summarize in between
621        let mut summary_parts = Vec::new();
622
623        if old_messages.len() > 2 {
624            // Keep first message's topic
625            if let Some(first) = old_messages.first() {
626                let content = first.text_content().unwrap_or_default();
627                let preview = safe_truncate(&content, 200);
628                summary_parts.push(format!("Started discussing: {}", preview));
629            }
630
631            // Keep last message (likely the most relevant recent context)
632            if let Some(last) = old_messages.last() {
633                let content = last.text_content().unwrap_or_default();
634                let preview = safe_truncate(&content, 200);
635                summary_parts.push(format!("Ended with: {}", preview));
636            }
637
638            summary_parts.push(format!(
639                "({} messages omitted)",
640                old_messages.len().saturating_sub(2)
641            ));
642        } else if !old_messages.is_empty() {
643            // Just preserve first message content
644            if let Some(msg) = old_messages.first() {
645                let content = msg.text_content().unwrap_or_default();
646                summary_parts.push(format!("Conversation started: {}", content));
647            }
648        }
649
650        let summary = summary_parts.join(" ");
651
652        let mut summary_msg =
653            AssistantMessage::new(Api::AnthropicMessages, "compactor", &self.model.id);
654        summary_msg.content = vec![ContentBlock::Text(TextContent::new(format!(
655            "[Previous conversation summary: {}]",
656            summary
657        )))];
658
659        let mut kept = vec![Message::Assistant(summary_msg)];
660        kept.extend(recent_messages.iter().cloned());
661
662        let original_tokens = self.estimate_tokens(old_messages);
663        let compacted_tokens = self.estimate_tokens(&kept);
664        let kept_len = kept.len();
665
666        Ok(CompactedContext::new(
667            summary,
668            kept,
669            old_messages.len(),
670            CompactionMetadata::new(
671                original_tokens,
672                compacted_tokens,
673                old_messages.len(),
674                kept_len,
675                self.config.target_ratio,
676            ),
677        ))
678    }
679}
680
681impl Compactor for LlmCompactor {
682    fn compact<'a>(
683        &'a self,
684        messages: &'a [Message],
685        instruction: Option<&'a str>,
686    ) -> Pin<
687        Box<
688            dyn Future<Output = std::result::Result<CompactedContext, CompactionError>> + Send + 'a,
689        >,
690    > {
691        Box::pin(async move {
692            // Check minimum requirements
693            if messages.is_empty() {
694                return Err(CompactionError::NoMessagesToCompact);
695            }
696
697            if messages.len() <= self.config.keep_recent {
698                // Not enough messages to compact, return as-is with zero compaction
699                let original_tokens = self.estimate_tokens(messages);
700                return Ok(CompactedContext::new(
701                    String::new(),
702                    messages.to_vec(),
703                    0,
704                    CompactionMetadata::new(
705                        original_tokens,
706                        original_tokens,
707                        0,
708                        messages.len(),
709                        self.config.target_ratio,
710                    ),
711                ));
712            }
713
714            // Split into old messages (to compact) and recent messages (to keep).
715            // The naive `messages.len() - keep_recent` split point can bisect
716            // a tool_call/tool_result pair, leaving orphans on either side.
717            // align_split_boundary walks backward from the naive point to the
718            // nearest "stable" boundary (a user message or a tool_call-free
719            // assistant message) so every tool call is wholly old or wholly
720            // recent.
721            let keep_count = self.config.keep_recent.min(messages.len());
722            let raw_split = messages.len() - keep_count;
723            let split = align_split_boundary(messages, raw_split);
724            let old_messages: Vec<Message> = messages[..split].to_vec();
725            let recent_messages: Vec<Message> = messages[split..].to_vec();
726
727            if old_messages.is_empty() {
728                return Err(CompactionError::NoMessagesToCompact);
729            }
730
731            // Handle LLM failure gracefully
732            self.compact_with_fallback(&old_messages, &recent_messages, instruction)
733                .await
734        })
735    }
736}
737
738/// Find a stable split point in `messages` such that no tool_call /
739/// tool_result pair is bisected.
740///
741/// Starting from `raw_split`, walk backward to the nearest index `i` where
742/// `messages[i-1]` is a "boundary" message — either a [`Message::User`] or
743/// an assistant message with no [`ContentBlock::ToolCall`] blocks.
744/// This guarantees the slice `messages[..i]` ends at a stable boundary
745/// and `messages[i..]` starts cleanly.
746///
747/// Returns `raw_split` if it is already at a stable boundary. Returns 0
748/// if no stable boundary is found before it.
749///
750/// Stable boundary rules:
751/// - `[User]` always safe.
752/// - `[Assistant]` without `tool_calls` always safe.
753/// - `[Assistant]` with `tool_calls` → NOT safe (must be kept whole).
754/// - `[ToolResult]` → NOT safe (must stay with its issuing assistant).
755pub(crate) fn align_split_boundary(messages: &[crate::Message], raw_split: usize) -> usize {
756    use crate::{ContentBlock, Message};
757
758    if raw_split == 0 || raw_split >= messages.len() {
759        return raw_split;
760    }
761
762    let is_boundary = |msg: &Message| match msg {
763        Message::User(_) => true,
764        Message::Assistant(a) => !a
765            .content
766            .iter()
767            .any(|b| matches!(b, ContentBlock::ToolCall(_))),
768        Message::ToolResult(_) => false,
769    };
770
771    // Walk backward from raw_split until we find a boundary or hit 0.
772    // messages[raw_split - 1] is the last message in the "old" slice.
773    let mut i = raw_split;
774    while i > 0 && !is_boundary(&messages[i - 1]) {
775        i -= 1;
776    }
777    i
778}
779
780/// Additional methods for LlmCompactor (not part of Compactor trait)
781impl LlmCompactor {
782    /// Summarize a conversation branch for comparison purposes.
783    ///
784    /// This is used when branching occurs and you want to understand
785    /// what changed compared to another branch (e.g., main).
786    pub async fn summarize_branch(
787        &self,
788        messages: &[Message],
789        branch_name: &str,
790    ) -> std::result::Result<String, CompactionError> {
791        if messages.is_empty() {
792            return Ok(format!("Branch '{}' is empty", branch_name));
793        }
794
795        let mut prompt = String::new();
796        prompt.push_str(&format!(
797            "Summarize the conversation branch '{}' concisely. ",
798            branch_name
799        ));
800        prompt.push_str("Focus on: what was discussed, decisions made, and current state.\n\n");
801
802        prompt.push_str("## Branch messages:\n");
803        for (i, msg) in messages.iter().enumerate() {
804            let role = match msg {
805                Message::User(_) => "User",
806                Message::Assistant(_) => "Assistant",
807                Message::ToolResult(_) => "Tool",
808            };
809            let content = msg.text_content().unwrap_or_default();
810            let content_preview = safe_truncate(&content, 300);
811            prompt.push_str(&format!("[{} {}]: {}\n", role, i + 1, content_preview));
812        }
813
814        prompt.push_str("\n## Summary (be concise):\n");
815
816        // Use LLM to generate summary
817        let mut context = Context::new();
818        context.set_system_prompt(
819            "You are a helpful assistant that summarizes conversation branches. ",
820        );
821        context.add_message(Message::User(UserMessage::new(prompt)));
822
823        let options = StreamOptions {
824            temperature: Some(0.3),
825            max_tokens: Some(512),
826            ..Default::default()
827        };
828
829        let summary_message = complete(&self.model, &context, Some(options))
830            .await
831            .map_err(|e| CompactionError::LlmError(e.to_string()))?;
832
833        Ok(summary_message.text_content())
834    }
835}
836
837/// Context manager that handles compaction automatically
838pub struct CompactionManager {
839    strategy: CompactionStrategy,
840    compactor: Option<Arc<dyn Compactor>>,
841    context_window: usize,
842    config: CompactionConfig,
843}
844
845impl CompactionManager {
846    /// Create a new compaction manager
847    pub fn new(strategy: CompactionStrategy, context_window: usize) -> Self {
848        Self {
849            strategy,
850            compactor: None,
851            context_window,
852            config: CompactionConfig::new(),
853        }
854    }
855
856    /// Create a new compaction manager with custom config
857    pub fn with_config(
858        strategy: CompactionStrategy,
859        context_window: usize,
860        config: CompactionConfig,
861    ) -> Self {
862        Self {
863            strategy,
864            compactor: None,
865            context_window,
866            config,
867        }
868    }
869
870    /// Set the compactor to use
871    pub fn with_compactor<C: Compactor + 'static>(mut self, compactor: Arc<C>) -> Self {
872        self.compactor = Some(compactor);
873        self
874    }
875
876    /// Set the compactor from a trait object
877    pub fn set_compactor(&mut self, compactor: Arc<dyn Compactor>) {
878        self.compactor = Some(compactor);
879    }
880
881    /// Check if compaction should be triggered
882    pub fn should_compact(&self, context_tokens: usize, iteration: usize) -> bool {
883        self.strategy
884            .should_compact(context_tokens, self.context_window, iteration)
885    }
886
887    /// Get the current strategy
888    pub fn strategy(&self) -> &CompactionStrategy {
889        &self.strategy
890    }
891
892    /// Get the compaction configuration
893    pub fn config(&self) -> &CompactionConfig {
894        &self.config
895    }
896
897    /// Set compaction configuration
898    pub fn set_config(&mut self, config: CompactionConfig) {
899        self.config = config;
900    }
901
902    /// Compact the given messages if appropriate
903    pub async fn compact_if_needed(
904        &self,
905        messages: &[Message],
906        instruction: Option<&str>,
907        context_tokens: usize,
908        iteration: usize,
909    ) -> std::result::Result<Option<CompactedContext>, CompactionError> {
910        if !self.should_compact(context_tokens, iteration) {
911            return Ok(None);
912        }
913
914        let compactor = match &self.compactor {
915            Some(c) => c,
916            None => return Err(CompactionError::CompactionDisabled),
917        };
918
919        let result = compactor.compact(messages, instruction).await?;
920        Ok(Some(result))
921    }
922
923    /// Force compaction regardless of strategy
924    pub async fn compact_now(
925        &self,
926        messages: &[Message],
927        instruction: Option<&str>,
928    ) -> std::result::Result<CompactedContext, CompactionError> {
929        let compactor = match &self.compactor {
930            Some(c) => c,
931            None => return Err(CompactionError::CompactionDisabled),
932        };
933
934        compactor.compact(messages, instruction).await
935    }
936
937    /// Get estimated token count for messages
938    pub fn estimate_tokens(&self, messages: &[Message]) -> usize {
939        messages
940            .iter()
941            .map(|msg| estimate_tokens(&msg.text_content().unwrap_or_default()))
942            .sum()
943    }
944}
945
946impl Default for CompactionManager {
947    fn default() -> Self {
948        Self::new(CompactionStrategy::default(), 128_000)
949    }
950}
951
952// ============================================================================
953// Tests
954// ============================================================================
955
956#[cfg(test)]
957mod tests {
958    use super::*;
959
960    // Helper to create test user messages
961    fn make_user_message(content: &str) -> Message {
962        Message::user(content)
963    }
964
965    // Helper to create test assistant messages
966    fn make_assistant_message(content: &str) -> Message {
967        Message::Assistant({
968            let mut msg = AssistantMessage::new(Api::AnthropicMessages, "test", "test-model");
969            msg.content = vec![ContentBlock::Text(TextContent::new(content))];
970            msg
971        })
972    }
973
974    // Helper to create a test model
975    fn make_test_model() -> Model {
976        Model::new(
977            "test-model",
978            "Test Model",
979            Api::AnthropicMessages,
980            "test",
981            "https://test.example.com",
982        )
983    }
984
985    #[test]
986    fn test_compaction_config_defaults() {
987        let config = CompactionConfig::new();
988        assert_eq!(config.keep_recent, 4);
989        assert_eq!(config.max_batch, 20);
990        assert!((config.target_ratio - 0.5).abs() < 0.001);
991        assert_eq!(config.summary_max_tokens, 1024);
992        assert!((config.temperature - 0.3).abs() < 0.001);
993    }
994
995    #[test]
996    fn test_compaction_config_builder_pattern() {
997        let config = CompactionConfig::new()
998            .with_keep_recent(10)
999            .with_max_batch(30)
1000            .with_target_ratio(0.3)
1001            .with_temperature(0.5);
1002
1003        assert_eq!(config.keep_recent, 10);
1004        assert_eq!(config.max_batch, 30);
1005        assert!((config.target_ratio - 0.3).abs() < 0.001);
1006        assert!((config.temperature - 0.5).abs() < 0.001);
1007    }
1008
1009    #[test]
1010    fn test_compaction_config_ratio_clamping() {
1011        // Test upper bound clamping
1012        let config = CompactionConfig::new().with_target_ratio(1.5);
1013        assert!((config.target_ratio - 0.9).abs() < 0.001);
1014
1015        // Test lower bound clamping
1016        let config = CompactionConfig::new().with_target_ratio(-0.5);
1017        assert!((config.target_ratio - 0.1).abs() < 0.001);
1018    }
1019
1020    #[test]
1021    fn test_compaction_metadata_success() {
1022        let metadata = CompactionMetadata::new(
1023            1000, // original_tokens
1024            500,  // compacted_tokens
1025            10,   // messages_compacted
1026            5,    // messages_kept
1027            0.5,  // target_ratio
1028        );
1029
1030        assert!(metadata.success);
1031        assert_eq!(metadata.original_tokens, 1000);
1032        assert_eq!(metadata.compacted_tokens, 500);
1033        assert_eq!(metadata.messages_compacted, 10);
1034        assert_eq!(metadata.messages_kept, 5);
1035        assert!((metadata.actual_ratio - 0.5).abs() < 0.001);
1036        assert!((metadata.compression_factor() - 0.5).abs() < 0.001);
1037        assert_eq!(metadata.tokens_saved(), 500);
1038        assert!(metadata.error.is_none());
1039    }
1040
1041    #[test]
1042    fn test_compaction_metadata_failure() {
1043        let metadata = CompactionError::LlmError("test error".to_string());
1044
1045        // Verify error message
1046        assert!(metadata.to_string().contains("test error"));
1047    }
1048
1049    #[test]
1050    fn test_compaction_metadata_compression_factor() {
1051        // Zero original tokens should result in 1.0 ratio
1052        let metadata = CompactionMetadata::new(0, 0, 0, 0, 0.5);
1053        assert!((metadata.actual_ratio - 1.0).abs() < 0.001);
1054        assert!((metadata.compression_factor() - 0.0).abs() < 0.001);
1055
1056        // Full compression
1057        let metadata = CompactionMetadata::new(1000, 100, 10, 5, 0.5);
1058        assert!((metadata.compression_factor() - 0.9).abs() < 0.001);
1059    }
1060
1061    #[test]
1062    fn test_compaction_metadata_tokens_saved() {
1063        // Normal case
1064        let metadata = CompactionMetadata::new(1000, 400, 10, 5, 0.5);
1065        assert_eq!(metadata.tokens_saved(), 600);
1066
1067        // No savings
1068        let metadata = CompactionMetadata::new(1000, 1000, 0, 0, 0.5);
1069        assert_eq!(metadata.tokens_saved(), 0);
1070
1071        // Compacted is larger than original (should not happen but should be safe)
1072        let metadata = CompactionMetadata::new(500, 600, 5, 3, 0.5);
1073        assert_eq!(metadata.tokens_saved(), 0); // saturating_sub
1074    }
1075
1076    #[test]
1077    fn test_compaction_strategy_disabled() {
1078        let strategy = CompactionStrategy::Disabled;
1079        assert!(!strategy.should_compact(100_000, 128_000, 5));
1080        assert!(!strategy.should_compact(120_000, 128_000, 10));
1081        assert!(!strategy.should_compact(0, 128_000, 1));
1082    }
1083
1084    #[test]
1085    fn test_compaction_strategy_threshold() {
1086        let strategy = CompactionStrategy::Threshold(0.8);
1087
1088        // Below threshold (79%)
1089        assert!(!strategy.should_compact(100_000, 128_000, 1));
1090
1091        // At threshold (exactly 80%)
1092        assert!(strategy.should_compact(102_400, 128_000, 1));
1093
1094        // Above threshold (93%)
1095        assert!(strategy.should_compact(120_000, 128_000, 1));
1096
1097        // Zero context window should return false
1098        assert!(!strategy.should_compact(100_000, 0, 1));
1099    }
1100
1101    #[test]
1102    fn test_compaction_strategy_every_n_turns() {
1103        let strategy = CompactionStrategy::EveryNTurns(5);
1104
1105        // Before threshold iterations
1106        assert!(!strategy.should_compact(0, 128_000, 0));
1107        assert!(!strategy.should_compact(0, 128_000, 3));
1108        assert!(!strategy.should_compact(0, 128_000, 4));
1109
1110        // At threshold iterations
1111        assert!(strategy.should_compact(0, 128_000, 5));
1112        assert!(strategy.should_compact(0, 128_000, 10));
1113        assert!(strategy.should_compact(0, 128_000, 15));
1114
1115        // Not at threshold
1116        assert!(!strategy.should_compact(0, 128_000, 6));
1117        assert!(!strategy.should_compact(0, 128_000, 9));
1118    }
1119
1120    #[test]
1121    fn test_compaction_strategy_absolute_tokens() {
1122        let strategy = CompactionStrategy::AbsoluteTokens(100_000);
1123
1124        // Below threshold
1125        assert!(!strategy.should_compact(50_000, 128_000, 0));
1126        assert!(!strategy.should_compact(99_999, 128_000, 0));
1127
1128        // At threshold
1129        assert!(strategy.should_compact(100_000, 128_000, 0));
1130
1131        // Above threshold
1132        assert!(strategy.should_compact(150_000, 128_000, 0));
1133    }
1134
1135    #[test]
1136    fn test_compacted_context_basic() {
1137        let metadata = CompactionMetadata::new(1000, 500, 10, 5, 0.5);
1138        let ctx = CompactedContext::new(
1139            "Test summary".to_string(),
1140            vec![make_user_message("test")],
1141            10,
1142            metadata,
1143        );
1144
1145        assert_eq!(ctx.summary(), "Test summary");
1146        assert_eq!(ctx.kept_count(), 1);
1147        assert_eq!(ctx.compacted_count(), 10);
1148        assert!(ctx.is_success());
1149        assert_eq!(ctx.metadata().tokens_saved(), 500);
1150    }
1151
1152    #[test]
1153    fn test_compacted_context_with_empty_summary() {
1154        let metadata = CompactionMetadata::new(100, 100, 0, 2, 0.5);
1155        let ctx = CompactedContext::new(
1156            String::new(), // Empty summary
1157            vec![make_user_message("test1"), make_user_message("test2")],
1158            0,
1159            metadata,
1160        );
1161
1162        assert_eq!(ctx.summary(), "");
1163        assert_eq!(ctx.kept_count(), 2);
1164        assert_eq!(ctx.compacted_count(), 0);
1165    }
1166
1167    #[test]
1168    fn test_llm_compactor_config_builder() {
1169        // Test that LlmCompactor can be created and builder pattern works
1170        use crate::providers::OpenAiProvider;
1171        let provider = OpenAiProvider::new();
1172        let model = make_test_model();
1173        let compactor = LlmCompactor::new(model, Arc::new(provider))
1174            .with_keep_recent(6)
1175            .with_max_batch(25)
1176            .with_target_ratio(0.6);
1177
1178        assert!(compactor.config.keep_recent >= 4);
1179        assert!(compactor.config.max_batch >= 20);
1180    }
1181
1182    #[test]
1183    fn test_compaction_error_display() {
1184        let err = CompactionError::NoMessagesToCompact;
1185        assert_eq!(err.to_string(), "No messages to compact");
1186
1187        let err = CompactionError::TooFewMessages {
1188            total: 3,
1189            keep_recent: 5,
1190        };
1191        assert!(err.to_string().contains("3"));
1192        // The error message says "need at least keep_recent + 1", so with keep_recent=5 it shows 6
1193        assert!(err.to_string().contains("6"));
1194
1195        let err = CompactionError::CompactionDisabled;
1196        assert_eq!(err.to_string(), "Compaction is disabled");
1197
1198        let err = CompactionError::NoContextWindow;
1199        assert_eq!(err.to_string(), "Context window not configured");
1200
1201        let err = CompactionError::LlmError("API timeout".to_string());
1202        assert!(err.to_string().contains("API timeout"));
1203    }
1204
1205    #[test]
1206    fn test_compaction_manager_default() {
1207        let manager = CompactionManager::default();
1208        assert!(matches!(
1209            manager.strategy(),
1210            CompactionStrategy::Threshold(_)
1211        ));
1212        assert_eq!(manager.config().keep_recent, 4);
1213    }
1214
1215    #[test]
1216    fn test_compaction_manager_with_custom_strategy() {
1217        let strategy = CompactionStrategy::AbsoluteTokens(50_000);
1218        let manager = CompactionManager::new(strategy, 200_000);
1219
1220        // Should not compact below threshold
1221        assert!(!manager.should_compact(30_000, 0));
1222
1223        // Should compact above threshold
1224        assert!(manager.should_compact(60_000, 0));
1225    }
1226
1227    #[test]
1228    fn test_compaction_manager_with_config() {
1229        let config = CompactionConfig::new()
1230            .with_keep_recent(8)
1231            .with_target_ratio(0.4);
1232
1233        let manager =
1234            CompactionManager::with_config(CompactionStrategy::default(), 128_000, config);
1235
1236        assert_eq!(manager.config().keep_recent, 8);
1237        assert!((manager.config().target_ratio - 0.4).abs() < 0.001);
1238    }
1239
1240    #[test]
1241    fn test_compaction_manager_should_compact_integration() {
1242        let manager = CompactionManager::new(CompactionStrategy::Threshold(0.75), 100_000);
1243
1244        // Below threshold
1245        assert!(!manager.should_compact(70_000, 0));
1246
1247        // At threshold (75%)
1248        assert!(manager.should_compact(75_000, 0));
1249
1250        // Above threshold
1251        assert!(manager.should_compact(80_000, 0));
1252        assert!(manager.should_compact(100_000, 0));
1253    }
1254
1255    #[test]
1256    fn test_compaction_manager_no_compactor_set() {
1257        let manager = CompactionManager::new(CompactionStrategy::EveryNTurns(5), 128_000);
1258
1259        // should_compact with EveryNTurns(5) at iteration 5 should return true
1260        // (compact_if_needed would return Err when no compactor is set, but should_compact works)
1261        assert!(manager.should_compact(0, 5)); // iteration 5 triggers compaction
1262    }
1263
1264    #[test]
1265    fn test_token_estimation_helper() {
1266        use crate::providers::OpenAiProvider;
1267        let provider = OpenAiProvider::new();
1268        let model = make_test_model();
1269        let compactor = LlmCompactor::new(model, Arc::new(provider));
1270
1271        let messages = vec![
1272            make_user_message("Hello world, this is a test message."),
1273            make_assistant_message("This is a response with some content."),
1274        ];
1275
1276        let tokens = compactor.estimate_tokens(&messages);
1277        assert!(tokens > 0, "Should estimate tokens for messages");
1278    }
1279
1280    #[test]
1281    fn test_compaction_config_custom_instruction() {
1282        let config = CompactionConfig::new()
1283            .with_custom_instruction("Focus on code changes and technical decisions");
1284
1285        assert!(config.custom_instruction.is_some());
1286        assert!(config.custom_instruction.unwrap().contains("code changes"));
1287    }
1288
1289    #[test]
1290    fn test_compaction_metadata_timestamp_is_set() {
1291        let metadata = CompactionMetadata::new(1000, 500, 10, 5, 0.5);
1292        assert!(metadata.timestamp <= Utc::now());
1293    }
1294
1295    #[test]
1296    fn test_compaction_ratio_achievement() {
1297        // Simulate compaction that achieves target ratio
1298        let metadata = CompactionMetadata::new(1000, 500, 10, 5, 0.5);
1299        assert!((metadata.actual_ratio - 0.5).abs() < 0.001);
1300
1301        // Simulate compaction that exceeds target (more compression)
1302        let metadata = CompactionMetadata::new(1000, 300, 10, 5, 0.5);
1303        assert!((metadata.actual_ratio - 0.3).abs() < 0.001);
1304        assert!(metadata.compression_factor() > 0.5);
1305
1306        // Simulate compaction that doesn't meet target (less compression)
1307        let metadata = CompactionMetadata::new(1000, 700, 10, 5, 0.5);
1308        assert!((metadata.actual_ratio - 0.7).abs() < 0.001);
1309        assert!(metadata.compression_factor() < 0.5);
1310    }
1311
1312    #[test]
1313    fn test_compaction_manager_config_updates() {
1314        let mut manager = CompactionManager::default();
1315
1316        let new_config = CompactionConfig::new()
1317            .with_keep_recent(12)
1318            .with_target_ratio(0.3);
1319
1320        manager.set_config(new_config);
1321
1322        assert_eq!(manager.config().keep_recent, 12);
1323        assert!((manager.config().target_ratio - 0.3).abs() < 0.001);
1324    }
1325
1326    #[test]
1327    fn test_llm_compactor_has_summarize_branch() {
1328        // Verify that LlmCompactor has the summarize_branch method
1329        use crate::providers::OpenAiProvider;
1330        let provider = OpenAiProvider::new();
1331        let model = make_test_model();
1332        let compactor = LlmCompactor::new(model, Arc::new(provider));
1333
1334        // Just verify the method exists (runtime test would require async)
1335        let messages = vec![
1336            make_user_message("Test message 1"),
1337            make_assistant_message("Test response 1"),
1338            make_user_message("Test message 2"),
1339        ];
1340
1341        // The method exists and can be called (we can't test async in sync test)
1342        // We verify it compiles correctly
1343        let branch_name = "test-branch";
1344        // This is a compile-time check that the method exists
1345        let _future = compactor.summarize_branch(&messages, branch_name);
1346    }
1347
1348    #[test]
1349    fn test_summarize_branch_returns_error_on_llm_failure() {
1350        // Test that summarize_branch handles empty messages gracefully
1351        use crate::providers::OpenAiProvider;
1352        let provider = OpenAiProvider::new();
1353        let model = make_test_model();
1354        let compactor = LlmCompactor::new(model, Arc::new(provider));
1355
1356        // Empty messages should return immediately
1357        let messages: Vec<Message> = vec![];
1358
1359        // This should not panic with empty messages
1360        // (We can't test the async result in a sync test, but compile-time check passes)
1361        let _future = compactor.summarize_branch(&messages, "empty-branch");
1362    }
1363
1364    // ---- align_split_boundary tests ----
1365
1366    use crate::{ToolCall, ToolResultMessage};
1367    fn make_user_msg(text: &str) -> Message {
1368        Message::User(UserMessage::new(text))
1369    }
1370
1371    fn make_asst_text(text: &str) -> Message {
1372        let mut m = AssistantMessage::new(Api::AnthropicMessages, "agent", "m");
1373        m.content
1374            .push(ContentBlock::Text(TextContent::new(text.to_string())));
1375        Message::Assistant(m)
1376    }
1377
1378    fn make_asst_with_tool_call(id: &str) -> Message {
1379        let mut m = AssistantMessage::new(Api::AnthropicMessages, "agent", "m");
1380        m.content.push(ContentBlock::ToolCall(ToolCall::new(
1381            id,
1382            "bash",
1383            serde_json::json!({}),
1384        )));
1385        Message::Assistant(m)
1386    }
1387
1388    fn make_tool_result(id: &str) -> Message {
1389        Message::ToolResult(ToolResultMessage::new(
1390            id,
1391            "bash",
1392            vec![ContentBlock::Text(TextContent::new("ok"))],
1393        ))
1394    }
1395
1396    #[test]
1397    fn test_align_boundary_already_at_user() {
1398        // raw_split lands on a User → no adjustment needed.
1399        let msgs = vec![
1400            make_user_msg("a"),
1401            make_user_msg("b"),
1402            make_user_msg("c"),
1403            make_user_msg("d"),
1404        ];
1405        // raw_split = 2 → messages[1] is User → already a boundary.
1406        assert_eq!(align_split_boundary(&msgs, 2), 2);
1407    }
1408
1409    #[test]
1410    fn test_align_boundary_walks_back_from_tool_result() {
1411        // raw_split falls inside a tool_call/tool_result block.
1412        // Should walk back to the assistant that issued the tool_call.
1413        let msgs = vec![
1414            make_user_msg("u1"),
1415            make_asst_with_tool_call("call_1"),
1416            make_tool_result("call_1"),
1417            make_user_msg("u2"),
1418            make_asst_text("done"),
1419        ];
1420        // raw_split = 3 falls between tool_result and user.
1421        // Walking back: messages[2] = tool_result (not boundary),
1422        // messages[1] = assistant with tool_call (not boundary),
1423        // messages[0] = user (boundary). Result: 1.
1424        assert_eq!(align_split_boundary(&msgs, 3), 1);
1425    }
1426
1427    #[test]
1428    fn test_align_boundary_at_zero() {
1429        // Edge case: raw_split = 0.
1430        let msgs = vec![make_user_msg("u1")];
1431        assert_eq!(align_split_boundary(&msgs, 0), 0);
1432    }
1433
1434    #[test]
1435    fn test_align_boundary_past_end() {
1436        // Edge case: raw_split >= len → return as-is.
1437        let msgs = vec![make_user_msg("u1")];
1438        assert_eq!(align_split_boundary(&msgs, 5), 5);
1439    }
1440
1441    #[test]
1442    fn test_align_boundary_assistant_text_is_safe() {
1443        // An assistant with ONLY text (no tool_calls) IS a safe boundary.
1444        let msgs = vec![
1445            make_user_msg("u1"),
1446            make_asst_with_tool_call("call_1"),
1447            make_tool_result("call_1"),
1448            make_asst_text("summary"),
1449            make_user_msg("u2"),
1450        ];
1451        // raw_split = 4 → messages[3] = assistant text → boundary.
1452        assert_eq!(align_split_boundary(&msgs, 4), 4);
1453    }
1454}
oxi_ai/compaction.rs

oxi_ai/
compaction.rs