Skip to main content

agent_sdk/context/
compactor.rs

1//! Context compaction implementation.
2
3use crate::llm::{ChatOutcome, ChatRequest, Content, ContentBlock, LlmProvider, Message, Role};
4use anyhow::{Context, Result, bail};
5use async_trait::async_trait;
6use std::fmt::Write;
7use std::sync::Arc;
8
9use super::config::CompactionConfig;
10use super::estimator::TokenEstimator;
11
12const SUMMARY_PREFIX: &str = "[Previous conversation summary]\n\n";
13const COMPACTION_SYSTEM_PROMPT: &str = "You are a precise summarizer. Your task is to create concise but complete summaries of conversations, preserving all technical details needed to continue the work.";
14const COMPACTION_SUMMARY_PROMPT_PREFIX: &str = "Summarize this conversation concisely, preserving:\n- Key decisions and conclusions reached\n- Important file paths, code changes, and technical details\n- Current task context and what has been accomplished\n- Any pending items, errors encountered, or next steps\n\nBe specific about technical details (file names, function names, error messages) as these\nare critical for continuing the work.\n\nConversation:\n";
15const COMPACTION_SUMMARY_PROMPT_SUFFIX: &str =
16    "Provide a concise summary (aim for 500-1000 words):";
17const COMPACT_EMPTY_SUMMARY: &str = "No additional context was available to summarize; the previous messages were already compacted.";
18const SUMMARY_ACKNOWLEDGMENT: &str =
19    "I understand the context from the summary. Let me continue from where we left off.";
20const MAX_RETAINED_TAIL_MESSAGE_TOKENS: usize = 20_000;
21const MAX_TOOL_RESULT_CHARS: usize = 500;
22
23/// Trait for context compaction strategies.
24///
25/// Implement this trait to provide custom compaction logic.
26#[async_trait]
27pub trait ContextCompactor: Send + Sync {
28    /// Compact a list of messages into a summary.
29    ///
30    /// # Errors
31    /// Returns an error if summarization fails.
32    async fn compact(&self, messages: &[Message]) -> Result<String>;
33
34    /// Estimate tokens for a message list.
35    fn estimate_tokens(&self, messages: &[Message]) -> usize;
36
37    /// Check if compaction is needed.
38    fn needs_compaction(&self, messages: &[Message]) -> bool;
39
40    /// Perform full compaction, returning new message history.
41    ///
42    /// # Errors
43    /// Returns an error if compaction fails.
44    async fn compact_history(&self, messages: Vec<Message>) -> Result<CompactionResult>;
45}
46
47/// Result of a compaction operation.
48#[derive(Debug, Clone)]
49pub struct CompactionResult {
50    /// The new compacted message history.
51    pub messages: Vec<Message>,
52    /// Number of messages before compaction.
53    pub original_count: usize,
54    /// Number of messages after compaction.
55    pub new_count: usize,
56    /// Estimated tokens before compaction.
57    pub original_tokens: usize,
58    /// Estimated tokens after compaction.
59    pub new_tokens: usize,
60}
61
62/// LLM-based context compactor.
63///
64/// Uses the LLM itself to summarize older messages into a compact form.
65pub struct LlmContextCompactor<P: LlmProvider> {
66    provider: Arc<P>,
67    config: CompactionConfig,
68    system_prompt: String,
69    summary_prompt_prefix: String,
70    summary_prompt_suffix: String,
71}
72
73impl<P: LlmProvider> LlmContextCompactor<P> {
74    /// Create a new LLM context compactor.
75    #[must_use]
76    pub fn new(provider: Arc<P>, config: CompactionConfig) -> Self {
77        Self {
78            provider,
79            config,
80            system_prompt: COMPACTION_SYSTEM_PROMPT.to_string(),
81            summary_prompt_prefix: COMPACTION_SUMMARY_PROMPT_PREFIX.to_string(),
82            summary_prompt_suffix: COMPACTION_SUMMARY_PROMPT_SUFFIX.to_string(),
83        }
84    }
85
86    /// Create with default configuration.
87    #[must_use]
88    pub fn with_defaults(provider: Arc<P>) -> Self {
89        Self::new(provider, CompactionConfig::default())
90    }
91
92    /// Get the configuration.
93    #[must_use]
94    pub const fn config(&self) -> &CompactionConfig {
95        &self.config
96    }
97
98    /// Override the prompts used for LLM-based summarization.
99    #[must_use]
100    pub fn with_prompts(
101        mut self,
102        system_prompt: impl Into<String>,
103        summary_prompt_prefix: impl Into<String>,
104        summary_prompt_suffix: impl Into<String>,
105    ) -> Self {
106        self.system_prompt = system_prompt.into();
107        self.summary_prompt_prefix = summary_prompt_prefix.into();
108        self.summary_prompt_suffix = summary_prompt_suffix.into();
109        self
110    }
111
112    /// Return true when a content object is a previously inserted compaction summary marker.
113    fn is_summary_message(content: &Content) -> bool {
114        match content {
115            Content::Text(text) => text.starts_with(SUMMARY_PREFIX),
116            Content::Blocks(blocks) => blocks.iter().any(|block| match block {
117                ContentBlock::Text { text } => text.starts_with(SUMMARY_PREFIX),
118                _ => false,
119            }),
120        }
121    }
122
123    /// Return true when a message contains a tool-use block.
124    fn has_tool_use(content: &Content) -> bool {
125        matches!(
126            content,
127            Content::Blocks(blocks)
128                if blocks
129                    .iter()
130                    .any(|block| matches!(block, ContentBlock::ToolUse { .. }))
131        )
132    }
133
134    /// Return true when a message contains a tool-result block.
135    fn has_tool_result(content: &Content) -> bool {
136        matches!(
137            content,
138            Content::Blocks(blocks)
139                if blocks
140                    .iter()
141                    .any(|block| matches!(block, ContentBlock::ToolResult { .. }))
142        )
143    }
144
145    /// Shift split point backwards until tool-use/result pairs are not split.
146    fn split_point_preserves_tool_pairs(messages: &[Message], mut split_point: usize) -> usize {
147        while split_point > 0 && split_point < messages.len() {
148            let prev = &messages[split_point - 1];
149            let next = &messages[split_point];
150
151            let crosses_tool_pair = (prev.role == Role::Assistant
152                && Self::has_tool_use(&prev.content)
153                && next.role == Role::User
154                && Self::has_tool_result(&next.content))
155                || (prev.role == Role::User
156                    && Self::has_tool_result(&prev.content)
157                    && next.role == Role::Assistant
158                    && Self::has_tool_use(&next.content));
159
160            if crosses_tool_pair {
161                split_point -= 1;
162                continue;
163            }
164
165            break;
166        }
167
168        split_point
169    }
170
171    /// Shift split point to satisfy both pair safety and retained-tail token cap.
172    fn split_point_preserves_tool_pairs_with_cap(
173        messages: &[Message],
174        mut split_point: usize,
175        max_tokens: usize,
176    ) -> usize {
177        loop {
178            let candidate = Self::retain_tail_with_token_cap(messages, split_point, max_tokens);
179            let adjusted = Self::split_point_preserves_tool_pairs(messages, candidate);
180
181            if adjusted == split_point {
182                return candidate;
183            }
184
185            split_point = adjusted;
186        }
187    }
188
189    /// Keep most recent messages that fit within the retained-message token budget.
190    fn retain_tail_with_token_cap(messages: &[Message], start: usize, max_tokens: usize) -> usize {
191        if start >= messages.len() {
192            return messages.len();
193        }
194
195        if max_tokens == 0 {
196            return messages.len();
197        }
198
199        let mut used = 0usize;
200        let mut retained_start = messages.len();
201
202        for idx in (start..messages.len()).rev() {
203            let message_tokens = TokenEstimator::estimate_message(&messages[idx]);
204            if used + message_tokens > max_tokens {
205                break;
206            }
207
208            retained_start = idx;
209            used += message_tokens;
210        }
211
212        retained_start
213    }
214
215    /// Format messages for summarization.
216    fn format_messages_for_summary(messages: &[Message]) -> String {
217        let mut output = String::new();
218
219        for message in messages {
220            let role = match message.role {
221                Role::User => "User",
222                Role::Assistant => "Assistant",
223            };
224
225            let _ = write!(output, "{role}: ");
226
227            match &message.content {
228                Content::Text(text) => {
229                    let _ = writeln!(output, "{text}");
230                }
231                Content::Blocks(blocks) => {
232                    for block in blocks {
233                        match block {
234                            ContentBlock::Text { text } => {
235                                let _ = writeln!(output, "{text}");
236                            }
237                            ContentBlock::Thinking { thinking, .. } => {
238                                // Include thinking in summaries for context
239                                let _ = writeln!(output, "[Thinking: {thinking}]");
240                            }
241                            ContentBlock::RedactedThinking { .. } => {
242                                let _ = writeln!(output, "[Redacted thinking]");
243                            }
244                            ContentBlock::ToolUse { name, input, .. } => {
245                                let _ = writeln!(
246                                    output,
247                                    "[Called tool: {name} with input: {}]",
248                                    serde_json::to_string(input).unwrap_or_default()
249                                );
250                            }
251                            ContentBlock::ToolResult {
252                                content, is_error, ..
253                            } => {
254                                let status = if is_error.unwrap_or(false) {
255                                    "error"
256                                } else {
257                                    "success"
258                                };
259                                // Truncate long tool results (Unicode-safe; avoid slicing mid-codepoint)
260                                let truncated = if content.chars().count() > MAX_TOOL_RESULT_CHARS {
261                                    let prefix: String =
262                                        content.chars().take(MAX_TOOL_RESULT_CHARS).collect();
263                                    format!("{prefix}... (truncated)")
264                                } else {
265                                    content.clone()
266                                };
267                                let _ = writeln!(output, "[Tool result ({status}): {truncated}]");
268                            }
269                            ContentBlock::Image { source } => {
270                                let _ = writeln!(output, "[Image: {}]", source.media_type);
271                            }
272                            ContentBlock::Document { source } => {
273                                let _ = writeln!(output, "[Document: {}]", source.media_type);
274                            }
275                        }
276                    }
277                }
278            }
279            output.push('\n');
280        }
281
282        output
283    }
284
285    /// Build the summarization prompt.
286    fn build_summary_prompt(&self, messages_text: &str) -> String {
287        format!(
288            "{}{}{}",
289            self.summary_prompt_prefix, messages_text, self.summary_prompt_suffix
290        )
291    }
292}
293
294#[async_trait]
295impl<P: LlmProvider> ContextCompactor for LlmContextCompactor<P> {
296    async fn compact(&self, messages: &[Message]) -> Result<String> {
297        let messages_to_summarize: Vec<_> = messages
298            .iter()
299            .filter(|message| !Self::is_summary_message(&message.content))
300            .cloned()
301            .collect();
302
303        if messages_to_summarize.is_empty() {
304            return Ok(COMPACT_EMPTY_SUMMARY.to_string());
305        }
306
307        let messages_text = Self::format_messages_for_summary(&messages_to_summarize);
308        let prompt = self.build_summary_prompt(&messages_text);
309
310        let request = ChatRequest {
311            system: self.system_prompt.clone(),
312            messages: vec![Message::user(prompt)],
313            tools: None,
314            max_tokens: 2000,
315            max_tokens_explicit: true,
316            session_id: None,
317            cached_content: None,
318            thinking: None,
319        };
320
321        let outcome = self
322            .provider
323            .chat(request)
324            .await
325            .context("Failed to call LLM for summarization")?;
326
327        match outcome {
328            ChatOutcome::Success(response) => response
329                .first_text()
330                .map(String::from)
331                .context("No text in summarization response"),
332            ChatOutcome::RateLimited => {
333                bail!("Rate limited during summarization")
334            }
335            ChatOutcome::InvalidRequest(msg) => {
336                bail!("Invalid request during summarization: {msg}")
337            }
338            ChatOutcome::ServerError(msg) => {
339                bail!("Server error during summarization: {msg}")
340            }
341        }
342    }
343
344    fn estimate_tokens(&self, messages: &[Message]) -> usize {
345        TokenEstimator::estimate_history(messages)
346    }
347
348    fn needs_compaction(&self, messages: &[Message]) -> bool {
349        if !self.config.auto_compact {
350            return false;
351        }
352
353        if messages.len() < self.config.min_messages_for_compaction {
354            return false;
355        }
356
357        let estimated_tokens = self.estimate_tokens(messages);
358        estimated_tokens > self.config.threshold_tokens
359    }
360
361    async fn compact_history(&self, messages: Vec<Message>) -> Result<CompactionResult> {
362        let original_count = messages.len();
363        let original_tokens = self.estimate_tokens(&messages);
364
365        // Ensure we have enough messages to compact
366        if messages.len() <= self.config.retain_recent {
367            return Ok(CompactionResult {
368                messages,
369                original_count,
370                new_count: original_count,
371                original_tokens,
372                new_tokens: original_tokens,
373            });
374        }
375
376        // Split messages: old messages to summarize, recent messages to keep
377        let mut split_point = messages.len().saturating_sub(self.config.retain_recent);
378        split_point = Self::split_point_preserves_tool_pairs_with_cap(
379            &messages,
380            split_point,
381            MAX_RETAINED_TAIL_MESSAGE_TOKENS,
382        );
383
384        let (to_summarize, to_keep) = messages.split_at(split_point);
385
386        // Summarize old messages
387        let summary = self.compact(to_summarize).await?;
388
389        // Build new message history
390        let mut new_messages = Vec::with_capacity(2 + to_keep.len());
391
392        // Add summary as a user message
393        new_messages.push(Message::user(format!("{SUMMARY_PREFIX}{summary}")));
394
395        // Add acknowledgment from assistant only when some recent tail remains.
396        // If compaction drops the entire retained tail due to the token cap, ending
397        // the request with this synthetic assistant message would act like assistant
398        // prefill and Anthropic rejects that shape.
399        if !to_keep.is_empty() {
400            new_messages.push(Message::assistant(SUMMARY_ACKNOWLEDGMENT));
401        }
402
403        // Add recent messages
404        new_messages.extend(to_keep.iter().cloned());
405
406        let new_count = new_messages.len();
407        let new_tokens = self.estimate_tokens(&new_messages);
408
409        Ok(CompactionResult {
410            messages: new_messages,
411            original_count,
412            new_count,
413            original_tokens,
414            new_tokens,
415        })
416    }
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422    use crate::llm::{ChatResponse, StopReason, Usage};
423    use std::sync::Mutex;
424
425    struct MockProvider {
426        summary_response: String,
427        requests: Option<Arc<Mutex<Vec<String>>>>,
428    }
429
430    impl MockProvider {
431        fn new(summary: &str) -> Self {
432            Self {
433                summary_response: summary.to_string(),
434                requests: None,
435            }
436        }
437
438        fn new_with_request_log(summary: &str, requests: Arc<Mutex<Vec<String>>>) -> Self {
439            Self {
440                summary_response: summary.to_string(),
441                requests: Some(requests),
442            }
443        }
444    }
445
446    #[async_trait]
447    impl LlmProvider for MockProvider {
448        async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
449            if let Some(requests) = &self.requests {
450                let mut entries = requests.lock().unwrap();
451                let user_prompt = request
452                    .messages
453                    .iter()
454                    .find_map(|message| match &message.content {
455                        Content::Text(text) => Some(text.clone()),
456                        Content::Blocks(blocks) => {
457                            let text = blocks
458                                .iter()
459                                .filter_map(|block| {
460                                    if let ContentBlock::Text { text } = block {
461                                        Some(text.as_str())
462                                    } else {
463                                        None
464                                    }
465                                })
466                                .collect::<Vec<_>>()
467                                .join("\n");
468                            if text.is_empty() { None } else { Some(text) }
469                        }
470                    })
471                    .unwrap_or_default();
472                entries.push(user_prompt);
473            }
474            Ok(ChatOutcome::Success(ChatResponse {
475                id: "test".to_string(),
476                content: vec![ContentBlock::Text {
477                    text: self.summary_response.clone(),
478                }],
479                model: "mock".to_string(),
480                stop_reason: Some(StopReason::EndTurn),
481                usage: Usage {
482                    input_tokens: 100,
483                    output_tokens: 50,
484                    cached_input_tokens: 0,
485                },
486            }))
487        }
488
489        fn model(&self) -> &'static str {
490            "mock-model"
491        }
492
493        fn provider(&self) -> &'static str {
494            "mock"
495        }
496    }
497
498    #[test]
499    fn test_needs_compaction_below_threshold() {
500        let provider = Arc::new(MockProvider::new("summary"));
501        let config = CompactionConfig::default()
502            .with_threshold_tokens(10_000)
503            .with_min_messages(5);
504        let compactor = LlmContextCompactor::new(provider, config);
505
506        // Only 3 messages, below min_messages
507        let messages = vec![
508            Message::user("Hello"),
509            Message::assistant("Hi"),
510            Message::user("How are you?"),
511        ];
512
513        assert!(!compactor.needs_compaction(&messages));
514    }
515
516    #[test]
517    fn test_needs_compaction_above_threshold() {
518        let provider = Arc::new(MockProvider::new("summary"));
519        let config = CompactionConfig::default()
520            .with_threshold_tokens(50) // Very low threshold
521            .with_min_messages(3);
522        let compactor = LlmContextCompactor::new(provider, config);
523
524        // Messages that exceed threshold
525        let messages = vec![
526            Message::user("Hello, this is a longer message to test compaction"),
527            Message::assistant(
528                "Hi there! This is also a longer response to help trigger compaction",
529            ),
530            Message::user("Great, let's continue with even more text here"),
531            Message::assistant("Absolutely, adding more content to ensure we exceed the threshold"),
532        ];
533
534        assert!(compactor.needs_compaction(&messages));
535    }
536
537    #[test]
538    fn test_needs_compaction_auto_disabled() {
539        let provider = Arc::new(MockProvider::new("summary"));
540        let config = CompactionConfig::default()
541            .with_threshold_tokens(10) // Very low
542            .with_min_messages(1)
543            .with_auto_compact(false);
544        let compactor = LlmContextCompactor::new(provider, config);
545
546        let messages = vec![
547            Message::user("Hello, this is a longer message"),
548            Message::assistant("Response here"),
549        ];
550
551        assert!(!compactor.needs_compaction(&messages));
552    }
553
554    #[tokio::test]
555    async fn test_compact_history() -> Result<()> {
556        let provider = Arc::new(MockProvider::new(
557            "User asked about Rust programming. Assistant explained ownership, borrowing, and lifetimes.",
558        ));
559        let config = CompactionConfig::default()
560            .with_retain_recent(2)
561            .with_min_messages(3);
562        let compactor = LlmContextCompactor::new(provider, config);
563
564        // Use longer messages to ensure compaction actually reduces tokens
565        let messages = vec![
566            Message::user(
567                "What is Rust? I've heard it's a systems programming language but I don't know much about it. Can you explain the key features and why people are excited about it?",
568            ),
569            Message::assistant(
570                "Rust is a systems programming language focused on safety, speed, and concurrency. It achieves memory safety without garbage collection through its ownership system. The key features include zero-cost abstractions, guaranteed memory safety, threads without data races, and minimal runtime.",
571            ),
572            Message::user(
573                "Tell me about ownership in detail. How does it work and what are the rules? I want to understand this core concept thoroughly.",
574            ),
575            Message::assistant(
576                "Ownership is Rust's central feature with three rules: each value has one owner, only one owner at a time, and the value is dropped when owner goes out of scope. This system prevents memory leaks, double frees, and dangling pointers at compile time.",
577            ),
578            Message::user("What about borrowing?"), // Keep
579            Message::assistant("Borrowing allows references to data without taking ownership."), // Keep
580        ];
581
582        let result = compactor.compact_history(messages).await?;
583
584        // Should have: summary message + ack + 2 recent messages = 4
585        assert_eq!(result.new_count, 4);
586        assert_eq!(result.original_count, 6);
587
588        // With longer original messages, compaction should reduce tokens
589        assert!(
590            result.new_tokens < result.original_tokens,
591            "Expected fewer tokens after compaction: new={} < original={}",
592            result.new_tokens,
593            result.original_tokens
594        );
595
596        // First message should be the summary
597        if let Content::Text(text) = &result.messages[0].content {
598            assert!(text.contains("Previous conversation summary"));
599        }
600
601        Ok(())
602    }
603
604    #[tokio::test]
605    async fn test_compact_history_too_few_messages() -> Result<()> {
606        let provider = Arc::new(MockProvider::new("summary"));
607        let config = CompactionConfig::default().with_retain_recent(5);
608        let compactor = LlmContextCompactor::new(provider, config);
609
610        // Only 3 messages, less than retain_recent
611        let messages = vec![
612            Message::user("Hello"),
613            Message::assistant("Hi"),
614            Message::user("Bye"),
615        ];
616
617        let result = compactor.compact_history(messages.clone()).await?;
618
619        // Should return original messages unchanged
620        assert_eq!(result.new_count, 3);
621        assert_eq!(result.messages.len(), 3);
622
623        Ok(())
624    }
625
626    #[test]
627    fn test_format_messages_for_summary() {
628        let messages = vec![Message::user("Hello"), Message::assistant("Hi there!")];
629
630        let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
631
632        assert!(formatted.contains("User: Hello"));
633        assert!(formatted.contains("Assistant: Hi there!"));
634    }
635
636    #[test]
637    fn test_format_messages_for_summary_truncates_tool_results_unicode_safely() {
638        let long_unicode = "é".repeat(600);
639
640        let messages = vec![Message {
641            role: Role::Assistant,
642            content: Content::Blocks(vec![ContentBlock::ToolResult {
643                tool_use_id: "tool-1".to_string(),
644                content: long_unicode,
645                is_error: Some(false),
646            }]),
647        }];
648
649        let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
650
651        assert!(formatted.contains("... (truncated)"));
652    }
653
654    #[tokio::test]
655    async fn test_compact_filters_summary_messages() -> Result<()> {
656        let requests = Arc::new(Mutex::new(Vec::new()));
657        let provider = Arc::new(MockProvider::new_with_request_log(
658            "Fresh summary",
659            requests.clone(),
660        ));
661        let config = CompactionConfig::default().with_min_messages(1);
662        let compactor = LlmContextCompactor::new(provider, config);
663
664        let messages = vec![
665            Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
666            Message::assistant("Continue with the next task using this context."),
667        ];
668
669        let summary = compactor.compact(&messages).await?;
670
671        {
672            let recorded = requests.lock().unwrap();
673            assert_eq!(recorded.len(), 1);
674            assert_eq!(summary, "Fresh summary");
675            assert!(recorded[0].contains("Continue with the next task using this context."));
676            assert!(!recorded[0].contains("already compacted context"));
677            drop(recorded);
678        }
679
680        Ok(())
681    }
682
683    #[tokio::test]
684    async fn test_compact_history_ignores_prior_summary_in_candidate_payload() -> Result<()> {
685        let requests = Arc::new(Mutex::new(Vec::new()));
686        let provider = Arc::new(MockProvider::new_with_request_log(
687            "Fresh history summary",
688            requests.clone(),
689        ));
690        let config = CompactionConfig::default()
691            .with_retain_recent(2)
692            .with_min_messages(1);
693        let compactor = LlmContextCompactor::new(provider, config);
694
695        let messages = vec![
696            Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
697            Message::assistant("Current turn content from the latest exchange."),
698            Message::assistant("Recent message that should stay."),
699            Message::user("Newest note that should stay."),
700        ];
701
702        let result = compactor.compact_history(messages).await?;
703
704        {
705            let recorded = requests.lock().unwrap();
706            assert_eq!(recorded.len(), 1);
707            assert!(recorded[0].contains("Current turn content from the latest exchange."));
708            assert!(!recorded[0].contains("already compacted context"));
709            drop(recorded);
710        }
711        assert_eq!(result.new_count, 4);
712
713        Ok(())
714    }
715
716    #[tokio::test]
717    async fn test_compact_history_is_no_op_when_candidate_window_has_only_summaries() -> Result<()>
718    {
719        let requests = Arc::new(Mutex::new(Vec::new()));
720        let provider = Arc::new(MockProvider::new_with_request_log(
721            "This summary should not be used",
722            requests.clone(),
723        ));
724        let config = CompactionConfig::default()
725            .with_retain_recent(2)
726            .with_min_messages(1);
727        let compactor = LlmContextCompactor::new(provider, config);
728
729        let messages = vec![
730            Message::user(format!("{SUMMARY_PREFIX}first prior compacted section")),
731            Message::assistant(format!("{SUMMARY_PREFIX}second prior compacted section")),
732            Message::user(format!("{SUMMARY_PREFIX}third prior compacted section")),
733            Message::assistant("final short note"),
734        ];
735
736        let result = compactor.compact_history(messages).await?;
737
738        {
739            let recorded = requests.lock().unwrap();
740            assert!(recorded.is_empty());
741            drop(recorded);
742        }
743        assert_eq!(result.new_count, 4);
744        assert_eq!(result.messages.len(), 4);
745
746        if let Content::Text(text) = &result.messages[0].content {
747            assert!(text.contains(COMPACT_EMPTY_SUMMARY));
748        } else {
749            panic!("Expected summary text in first message");
750        }
751
752        Ok(())
753    }
754
755    #[tokio::test]
756    async fn test_compact_history_preserves_tool_use_tool_result_pairs() -> Result<()> {
757        let provider = Arc::new(MockProvider::new("Summary of earlier conversation."));
758        let config = CompactionConfig::default()
759            .with_retain_recent(2)
760            .with_min_messages(3);
761        let compactor = LlmContextCompactor::new(provider, config);
762
763        // Build a history where the split_point (len - retain_recent = 5 - 2 = 3)
764        // would land exactly on the user tool_result message at index 3,
765        // which would orphan it from its assistant tool_use at index 2.
766        let messages = vec![
767            // index 0: user
768            Message::user("What files are in the project?"),
769            // index 1: assistant text
770            Message::assistant("Let me check that for you."),
771            // index 2: assistant with tool_use
772            Message {
773                role: Role::Assistant,
774                content: Content::Blocks(vec![ContentBlock::ToolUse {
775                    id: "tool_1".to_string(),
776                    name: "list_files".to_string(),
777                    input: serde_json::json!({}),
778                    thought_signature: None,
779                }]),
780            },
781            // index 3: user with tool_result (naive split would land here)
782            Message {
783                role: Role::User,
784                content: Content::Blocks(vec![ContentBlock::ToolResult {
785                    tool_use_id: "tool_1".to_string(),
786                    content: "file1.rs\nfile2.rs".to_string(),
787                    is_error: None,
788                }]),
789            },
790            // index 4: assistant final response
791            Message::assistant("The project contains file1.rs and file2.rs."),
792        ];
793
794        let result = compactor.compact_history(messages).await?;
795
796        // The split_point should have been adjusted back from 3 to 2,
797        // so to_keep includes: [assistant tool_use, user tool_result, assistant response]
798        // Plus summary + ack = 5 total
799        assert_eq!(result.new_count, 5);
800
801        // Verify the kept messages include the tool_use/tool_result pair
802        // After summary + ack, the third message should be the assistant with tool_use
803        let kept_assistant = &result.messages[2];
804        if let Content::Blocks(blocks) = &kept_assistant.content {
805            assert!(
806                blocks
807                    .iter()
808                    .any(|b| matches!(b, ContentBlock::ToolUse { .. })),
809                "Expected assistant tool_use in kept messages"
810            );
811        } else {
812            panic!("Expected Blocks content for assistant tool_use message");
813        }
814
815        // The fourth message should be the user tool_result
816        let kept_user = &result.messages[3];
817        if let Content::Blocks(blocks) = &kept_user.content {
818            assert!(
819                blocks
820                    .iter()
821                    .any(|b| matches!(b, ContentBlock::ToolResult { .. })),
822                "Expected user tool_result in kept messages"
823            );
824        } else {
825            panic!("Expected Blocks content for user tool_result message");
826        }
827
828        Ok(())
829    }
830
831    #[tokio::test]
832    async fn test_compact_history_preserves_tool_result_tool_use_pairs() -> Result<()> {
833        let provider = Arc::new(MockProvider::new("Summary around tool pair."));
834        let config = CompactionConfig::default()
835            .with_retain_recent(2)
836            .with_min_messages(1);
837        let compactor = LlmContextCompactor::new(provider, config);
838
839        // Build a history where split_point would land on tool-use tool-result crossing in the
840        // opposite direction:
841        // ... user tool_result | assistant tool_use ...
842        let messages = vec![
843            Message::user("Start a workflow"),
844            Message {
845                role: Role::User,
846                content: Content::Blocks(vec![ContentBlock::ToolResult {
847                    tool_use_id: "tool_odd".to_string(),
848                    content: "prior result".to_string(),
849                    is_error: None,
850                }]),
851            },
852            Message {
853                role: Role::Assistant,
854                content: Content::Blocks(vec![ContentBlock::ToolUse {
855                    id: "tool_odd".to_string(),
856                    name: "follow_up".to_string(),
857                    input: serde_json::json!({}),
858                    thought_signature: None,
859                }]),
860            },
861            Message::assistant("Follow up done."),
862        ];
863
864        let result = compactor.compact_history(messages).await?;
865
866        // Split-point starts at 2 and is adjusted back to 1, keeping the tool result and tool use.
867        assert_eq!(result.new_count, 5);
868
869        // tool_result should remain with the kept tail.
870        let kept_result = &result.messages[2];
871        if let Content::Blocks(blocks) = &kept_result.content {
872            assert!(
873                blocks
874                    .iter()
875                    .any(|b| matches!(b, ContentBlock::ToolResult { .. })),
876                "Expected kept user tool_result in retained tail"
877            );
878        } else {
879            panic!("Expected tool_result blocks in retained tail");
880        }
881
882        // tool_use should remain with the kept tail.
883        let kept_tool_use = &result.messages[3];
884        if let Content::Blocks(blocks) = &kept_tool_use.content {
885            assert!(
886                blocks
887                    .iter()
888                    .any(|b| matches!(b, ContentBlock::ToolUse { .. })),
889                "Expected kept assistant tool_use in retained tail"
890            );
891        } else {
892            panic!("Expected tool_use blocks in retained tail");
893        }
894
895        Ok(())
896    }
897
898    #[tokio::test]
899    async fn test_compact_history_retained_tail_is_token_capped() -> Result<()> {
900        let provider = Arc::new(MockProvider::new(
901            "Project summary with a long context and technical context.",
902        ));
903        let config = CompactionConfig::default()
904            .with_retain_recent(8)
905            .with_min_messages(1)
906            .with_threshold_tokens(1);
907        let compactor = LlmContextCompactor::new(provider, config);
908
909        let mut messages = Vec::new();
910
911        // Older messages that will be summarized away.
912        messages.extend((0..6).map(|index| Message::user(format!("pre-compaction noise {index}"))));
913
914        // Newer long messages: intentionally large to force retained-tail truncation.
915        messages.extend(
916            (0..8).map(|index| Message::assistant(format!("kept-{index}: {}", "x".repeat(12_000)))),
917        );
918
919        let result = compactor.compact_history(messages).await?;
920
921        // The retained tail should be token capped and therefore shorter than retain_recent.
922        let retained_tail = &result.messages[2..];
923        assert!(retained_tail.len() < 8);
924
925        let mut latest_index = -1i32;
926        let mut all_retained = true;
927        for message in retained_tail {
928            if let Content::Text(text) = &message.content {
929                if let Some(number) = text.split(':').next().and_then(|prefix| {
930                    prefix
931                        .strip_prefix("kept-")
932                        .and_then(|rest| rest.parse::<i32>().ok())
933                }) {
934                    if number >= 0 {
935                        latest_index = latest_index.max(number);
936                    }
937                } else {
938                    all_retained = false;
939                }
940            } else {
941                all_retained = false;
942            }
943        }
944
945        assert!(all_retained);
946        assert_eq!(latest_index, 7);
947        assert!(
948            TokenEstimator::estimate_history(retained_tail) <= MAX_RETAINED_TAIL_MESSAGE_TOKENS
949        );
950        assert!(compactor.needs_compaction(&result.messages));
951
952        Ok(())
953    }
954
955    #[tokio::test]
956    async fn test_compact_history_skips_summary_ack_when_retained_tail_is_empty() -> Result<()> {
957        let provider = Arc::new(MockProvider::new("Summary for oversized user turn."));
958        let config = CompactionConfig::default()
959            .with_retain_recent(1)
960            .with_min_messages(1)
961            .with_threshold_tokens(1);
962        let compactor = LlmContextCompactor::new(provider, config);
963
964        let messages = vec![
965            Message::assistant("Earlier assistant context."),
966            Message::user(format!("oversized-user-turn: {}", "x".repeat(200_000))),
967        ];
968
969        let result = compactor.compact_history(messages).await?;
970
971        assert_eq!(result.new_count, 1);
972        assert_eq!(result.messages.len(), 1);
973
974        let only_message = &result.messages[0];
975        assert_eq!(only_message.role, Role::User);
976
977        if let Content::Text(text) = &only_message.content {
978            assert!(text.contains("Previous conversation summary"));
979            assert!(!text.contains(SUMMARY_ACKNOWLEDGMENT));
980        } else {
981            panic!("Expected summary text when retained tail is empty");
982        }
983
984        Ok(())
985    }
986}