Skip to main content

agent_sdk/context/
compactor.rs

1//! Context compaction implementation.
2
3use crate::llm::{ChatOutcome, ChatRequest, Content, ContentBlock, LlmProvider, Message, Role};
4use anyhow::{Context, Result, bail};
5use async_trait::async_trait;
6use std::fmt::Write;
7use std::sync::Arc;
8
9use super::config::CompactionConfig;
10use super::estimator::TokenEstimator;
11
12const SUMMARY_PREFIX: &str = "[Previous conversation summary]\n\n";
13const COMPACTION_SYSTEM_PROMPT: &str = "You are a precise summarizer. Your task is to create concise but complete summaries of conversations, preserving all technical details needed to continue the work.";
14const COMPACTION_SUMMARY_PROMPT_PREFIX: &str = "Summarize this conversation concisely, preserving:\n- Key decisions and conclusions reached\n- Important file paths, code changes, and technical details\n- Current task context and what has been accomplished\n- Any pending items, errors encountered, or next steps\n\nBe specific about technical details (file names, function names, error messages) as these\nare critical for continuing the work.\n\nConversation:\n";
15const COMPACTION_SUMMARY_PROMPT_SUFFIX: &str =
16    "Provide a concise summary (aim for 500-1000 words):";
17const COMPACT_EMPTY_SUMMARY: &str = "No additional context was available to summarize; the previous messages were already compacted.";
18const MAX_RETAINED_TAIL_MESSAGE_TOKENS: usize = 20_000;
19const MAX_TOOL_RESULT_CHARS: usize = 500;
20
21/// Trait for context compaction strategies.
22///
23/// Implement this trait to provide custom compaction logic.
24#[async_trait]
25pub trait ContextCompactor: Send + Sync {
26    /// Compact a list of messages into a summary.
27    ///
28    /// # Errors
29    /// Returns an error if summarization fails.
30    async fn compact(&self, messages: &[Message]) -> Result<String>;
31
32    /// Estimate tokens for a message list.
33    fn estimate_tokens(&self, messages: &[Message]) -> usize;
34
35    /// Check if compaction is needed.
36    fn needs_compaction(&self, messages: &[Message]) -> bool;
37
38    /// Perform full compaction, returning new message history.
39    ///
40    /// # Errors
41    /// Returns an error if compaction fails.
42    async fn compact_history(&self, messages: Vec<Message>) -> Result<CompactionResult>;
43}
44
45/// Result of a compaction operation.
46#[derive(Debug, Clone)]
47pub struct CompactionResult {
48    /// The new compacted message history.
49    pub messages: Vec<Message>,
50    /// Number of messages before compaction.
51    pub original_count: usize,
52    /// Number of messages after compaction.
53    pub new_count: usize,
54    /// Estimated tokens before compaction.
55    pub original_tokens: usize,
56    /// Estimated tokens after compaction.
57    pub new_tokens: usize,
58}
59
60/// LLM-based context compactor.
61///
62/// Uses the LLM itself to summarize older messages into a compact form.
63pub struct LlmContextCompactor<P: LlmProvider> {
64    provider: Arc<P>,
65    config: CompactionConfig,
66    system_prompt: String,
67    summary_prompt_prefix: String,
68    summary_prompt_suffix: String,
69}
70
71impl<P: LlmProvider> LlmContextCompactor<P> {
72    /// Create a new LLM context compactor.
73    #[must_use]
74    pub fn new(provider: Arc<P>, config: CompactionConfig) -> Self {
75        Self {
76            provider,
77            config,
78            system_prompt: COMPACTION_SYSTEM_PROMPT.to_string(),
79            summary_prompt_prefix: COMPACTION_SUMMARY_PROMPT_PREFIX.to_string(),
80            summary_prompt_suffix: COMPACTION_SUMMARY_PROMPT_SUFFIX.to_string(),
81        }
82    }
83
84    /// Create with default configuration.
85    #[must_use]
86    pub fn with_defaults(provider: Arc<P>) -> Self {
87        Self::new(provider, CompactionConfig::default())
88    }
89
90    /// Get the configuration.
91    #[must_use]
92    pub const fn config(&self) -> &CompactionConfig {
93        &self.config
94    }
95
96    /// Override the prompts used for LLM-based summarization.
97    #[must_use]
98    pub fn with_prompts(
99        mut self,
100        system_prompt: impl Into<String>,
101        summary_prompt_prefix: impl Into<String>,
102        summary_prompt_suffix: impl Into<String>,
103    ) -> Self {
104        self.system_prompt = system_prompt.into();
105        self.summary_prompt_prefix = summary_prompt_prefix.into();
106        self.summary_prompt_suffix = summary_prompt_suffix.into();
107        self
108    }
109
110    /// Return true when a content object is a previously inserted compaction summary marker.
111    fn is_summary_message(content: &Content) -> bool {
112        match content {
113            Content::Text(text) => text.starts_with(SUMMARY_PREFIX),
114            Content::Blocks(blocks) => blocks.iter().any(|block| match block {
115                ContentBlock::Text { text } => text.starts_with(SUMMARY_PREFIX),
116                _ => false,
117            }),
118        }
119    }
120
121    /// Return true when a message contains a tool-use block.
122    fn has_tool_use(content: &Content) -> bool {
123        matches!(
124            content,
125            Content::Blocks(blocks)
126                if blocks
127                    .iter()
128                    .any(|block| matches!(block, ContentBlock::ToolUse { .. }))
129        )
130    }
131
132    /// Return true when a message contains a tool-result block.
133    fn has_tool_result(content: &Content) -> bool {
134        matches!(
135            content,
136            Content::Blocks(blocks)
137                if blocks
138                    .iter()
139                    .any(|block| matches!(block, ContentBlock::ToolResult { .. }))
140        )
141    }
142
143    /// Shift split point backwards until tool-use/result pairs are not split.
144    fn split_point_preserves_tool_pairs(messages: &[Message], mut split_point: usize) -> usize {
145        while split_point > 0 && split_point < messages.len() {
146            let prev = &messages[split_point - 1];
147            let next = &messages[split_point];
148
149            let crosses_tool_pair = (prev.role == Role::Assistant
150                && Self::has_tool_use(&prev.content)
151                && next.role == Role::User
152                && Self::has_tool_result(&next.content))
153                || (prev.role == Role::User
154                    && Self::has_tool_result(&prev.content)
155                    && next.role == Role::Assistant
156                    && Self::has_tool_use(&next.content));
157
158            if crosses_tool_pair {
159                split_point -= 1;
160                continue;
161            }
162
163            break;
164        }
165
166        split_point
167    }
168
169    /// Shift split point to satisfy both pair safety and retained-tail token cap.
170    fn split_point_preserves_tool_pairs_with_cap(
171        messages: &[Message],
172        mut split_point: usize,
173        max_tokens: usize,
174    ) -> usize {
175        loop {
176            let candidate = Self::retain_tail_with_token_cap(messages, split_point, max_tokens);
177            let adjusted = Self::split_point_preserves_tool_pairs(messages, candidate);
178
179            if adjusted == split_point {
180                return candidate;
181            }
182
183            split_point = adjusted;
184        }
185    }
186
187    /// Keep most recent messages that fit within the retained-message token budget.
188    fn retain_tail_with_token_cap(messages: &[Message], start: usize, max_tokens: usize) -> usize {
189        if start >= messages.len() {
190            return messages.len();
191        }
192
193        if max_tokens == 0 {
194            return messages.len();
195        }
196
197        let mut used = 0usize;
198        let mut retained_start = messages.len();
199
200        for idx in (start..messages.len()).rev() {
201            let message_tokens = TokenEstimator::estimate_message(&messages[idx]);
202            if used + message_tokens > max_tokens {
203                break;
204            }
205
206            retained_start = idx;
207            used += message_tokens;
208        }
209
210        retained_start
211    }
212
213    /// Format messages for summarization.
214    fn format_messages_for_summary(messages: &[Message]) -> String {
215        let mut output = String::new();
216
217        for message in messages {
218            let role = match message.role {
219                Role::User => "User",
220                Role::Assistant => "Assistant",
221            };
222
223            let _ = write!(output, "{role}: ");
224
225            match &message.content {
226                Content::Text(text) => {
227                    let _ = writeln!(output, "{text}");
228                }
229                Content::Blocks(blocks) => {
230                    for block in blocks {
231                        match block {
232                            ContentBlock::Text { text } => {
233                                let _ = writeln!(output, "{text}");
234                            }
235                            ContentBlock::Thinking { thinking, .. } => {
236                                // Include thinking in summaries for context
237                                let _ = writeln!(output, "[Thinking: {thinking}]");
238                            }
239                            ContentBlock::RedactedThinking { .. } => {
240                                let _ = writeln!(output, "[Redacted thinking]");
241                            }
242                            ContentBlock::ToolUse { name, input, .. } => {
243                                let _ = writeln!(
244                                    output,
245                                    "[Called tool: {name} with input: {}]",
246                                    serde_json::to_string(input).unwrap_or_default()
247                                );
248                            }
249                            ContentBlock::ToolResult {
250                                content, is_error, ..
251                            } => {
252                                let status = if is_error.unwrap_or(false) {
253                                    "error"
254                                } else {
255                                    "success"
256                                };
257                                // Truncate long tool results (Unicode-safe; avoid slicing mid-codepoint)
258                                let truncated = if content.chars().count() > MAX_TOOL_RESULT_CHARS {
259                                    let prefix: String =
260                                        content.chars().take(MAX_TOOL_RESULT_CHARS).collect();
261                                    format!("{prefix}... (truncated)")
262                                } else {
263                                    content.clone()
264                                };
265                                let _ = writeln!(output, "[Tool result ({status}): {truncated}]");
266                            }
267                            ContentBlock::Image { source } => {
268                                let _ = writeln!(output, "[Image: {}]", source.media_type);
269                            }
270                            ContentBlock::Document { source } => {
271                                let _ = writeln!(output, "[Document: {}]", source.media_type);
272                            }
273                        }
274                    }
275                }
276            }
277            output.push('\n');
278        }
279
280        output
281    }
282
283    /// Build the summarization prompt.
284    fn build_summary_prompt(&self, messages_text: &str) -> String {
285        format!(
286            "{}{}{}",
287            self.summary_prompt_prefix, messages_text, self.summary_prompt_suffix
288        )
289    }
290}
291
292#[async_trait]
293impl<P: LlmProvider> ContextCompactor for LlmContextCompactor<P> {
294    async fn compact(&self, messages: &[Message]) -> Result<String> {
295        let messages_to_summarize: Vec<_> = messages
296            .iter()
297            .filter(|message| !Self::is_summary_message(&message.content))
298            .cloned()
299            .collect();
300
301        if messages_to_summarize.is_empty() {
302            return Ok(COMPACT_EMPTY_SUMMARY.to_string());
303        }
304
305        let messages_text = Self::format_messages_for_summary(&messages_to_summarize);
306        let prompt = self.build_summary_prompt(&messages_text);
307
308        let request = ChatRequest {
309            system: self.system_prompt.clone(),
310            messages: vec![Message::user(prompt)],
311            tools: None,
312            max_tokens: 2000,
313            thinking: None,
314        };
315
316        let outcome = self
317            .provider
318            .chat(request)
319            .await
320            .context("Failed to call LLM for summarization")?;
321
322        match outcome {
323            ChatOutcome::Success(response) => response
324                .first_text()
325                .map(String::from)
326                .context("No text in summarization response"),
327            ChatOutcome::RateLimited => {
328                bail!("Rate limited during summarization")
329            }
330            ChatOutcome::InvalidRequest(msg) => {
331                bail!("Invalid request during summarization: {msg}")
332            }
333            ChatOutcome::ServerError(msg) => {
334                bail!("Server error during summarization: {msg}")
335            }
336        }
337    }
338
339    fn estimate_tokens(&self, messages: &[Message]) -> usize {
340        TokenEstimator::estimate_history(messages)
341    }
342
343    fn needs_compaction(&self, messages: &[Message]) -> bool {
344        if !self.config.auto_compact {
345            return false;
346        }
347
348        if messages.len() < self.config.min_messages_for_compaction {
349            return false;
350        }
351
352        let estimated_tokens = self.estimate_tokens(messages);
353        estimated_tokens > self.config.threshold_tokens
354    }
355
356    async fn compact_history(&self, messages: Vec<Message>) -> Result<CompactionResult> {
357        let original_count = messages.len();
358        let original_tokens = self.estimate_tokens(&messages);
359
360        // Ensure we have enough messages to compact
361        if messages.len() <= self.config.retain_recent {
362            return Ok(CompactionResult {
363                messages,
364                original_count,
365                new_count: original_count,
366                original_tokens,
367                new_tokens: original_tokens,
368            });
369        }
370
371        // Split messages: old messages to summarize, recent messages to keep
372        let mut split_point = messages.len().saturating_sub(self.config.retain_recent);
373        split_point = Self::split_point_preserves_tool_pairs_with_cap(
374            &messages,
375            split_point,
376            MAX_RETAINED_TAIL_MESSAGE_TOKENS,
377        );
378
379        let (to_summarize, to_keep) = messages.split_at(split_point);
380
381        // Summarize old messages
382        let summary = self.compact(to_summarize).await?;
383
384        // Build new message history
385        let mut new_messages = Vec::with_capacity(2 + to_keep.len());
386
387        // Add summary as a user message
388        new_messages.push(Message::user(format!("{SUMMARY_PREFIX}{summary}")));
389
390        // Add acknowledgment from assistant
391        new_messages.push(Message::assistant(
392            "I understand the context from the summary. Let me continue from where we left off.",
393        ));
394
395        // Add recent messages
396        new_messages.extend(to_keep.iter().cloned());
397
398        let new_count = new_messages.len();
399        let new_tokens = self.estimate_tokens(&new_messages);
400
401        Ok(CompactionResult {
402            messages: new_messages,
403            original_count,
404            new_count,
405            original_tokens,
406            new_tokens,
407        })
408    }
409}
410
411#[cfg(test)]
412mod tests {
413    use super::*;
414    use crate::llm::{ChatResponse, StopReason, Usage};
415    use std::sync::Mutex;
416
417    struct MockProvider {
418        summary_response: String,
419        requests: Option<Arc<Mutex<Vec<String>>>>,
420    }
421
422    impl MockProvider {
423        fn new(summary: &str) -> Self {
424            Self {
425                summary_response: summary.to_string(),
426                requests: None,
427            }
428        }
429
430        fn new_with_request_log(summary: &str, requests: Arc<Mutex<Vec<String>>>) -> Self {
431            Self {
432                summary_response: summary.to_string(),
433                requests: Some(requests),
434            }
435        }
436    }
437
438    #[async_trait]
439    impl LlmProvider for MockProvider {
440        async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
441            if let Some(requests) = &self.requests {
442                let mut entries = requests.lock().unwrap();
443                let user_prompt = request
444                    .messages
445                    .iter()
446                    .find_map(|message| match &message.content {
447                        Content::Text(text) => Some(text.clone()),
448                        Content::Blocks(blocks) => {
449                            let text = blocks
450                                .iter()
451                                .filter_map(|block| {
452                                    if let ContentBlock::Text { text } = block {
453                                        Some(text.as_str())
454                                    } else {
455                                        None
456                                    }
457                                })
458                                .collect::<Vec<_>>()
459                                .join("\n");
460                            if text.is_empty() { None } else { Some(text) }
461                        }
462                    })
463                    .unwrap_or_default();
464                entries.push(user_prompt);
465            }
466            Ok(ChatOutcome::Success(ChatResponse {
467                id: "test".to_string(),
468                content: vec![ContentBlock::Text {
469                    text: self.summary_response.clone(),
470                }],
471                model: "mock".to_string(),
472                stop_reason: Some(StopReason::EndTurn),
473                usage: Usage {
474                    input_tokens: 100,
475                    output_tokens: 50,
476                },
477            }))
478        }
479
480        fn model(&self) -> &'static str {
481            "mock-model"
482        }
483
484        fn provider(&self) -> &'static str {
485            "mock"
486        }
487    }
488
489    #[test]
490    fn test_needs_compaction_below_threshold() {
491        let provider = Arc::new(MockProvider::new("summary"));
492        let config = CompactionConfig::default()
493            .with_threshold_tokens(10_000)
494            .with_min_messages(5);
495        let compactor = LlmContextCompactor::new(provider, config);
496
497        // Only 3 messages, below min_messages
498        let messages = vec![
499            Message::user("Hello"),
500            Message::assistant("Hi"),
501            Message::user("How are you?"),
502        ];
503
504        assert!(!compactor.needs_compaction(&messages));
505    }
506
507    #[test]
508    fn test_needs_compaction_above_threshold() {
509        let provider = Arc::new(MockProvider::new("summary"));
510        let config = CompactionConfig::default()
511            .with_threshold_tokens(50) // Very low threshold
512            .with_min_messages(3);
513        let compactor = LlmContextCompactor::new(provider, config);
514
515        // Messages that exceed threshold
516        let messages = vec![
517            Message::user("Hello, this is a longer message to test compaction"),
518            Message::assistant(
519                "Hi there! This is also a longer response to help trigger compaction",
520            ),
521            Message::user("Great, let's continue with even more text here"),
522            Message::assistant("Absolutely, adding more content to ensure we exceed the threshold"),
523        ];
524
525        assert!(compactor.needs_compaction(&messages));
526    }
527
528    #[test]
529    fn test_needs_compaction_auto_disabled() {
530        let provider = Arc::new(MockProvider::new("summary"));
531        let config = CompactionConfig::default()
532            .with_threshold_tokens(10) // Very low
533            .with_min_messages(1)
534            .with_auto_compact(false);
535        let compactor = LlmContextCompactor::new(provider, config);
536
537        let messages = vec![
538            Message::user("Hello, this is a longer message"),
539            Message::assistant("Response here"),
540        ];
541
542        assert!(!compactor.needs_compaction(&messages));
543    }
544
545    #[tokio::test]
546    async fn test_compact_history() -> Result<()> {
547        let provider = Arc::new(MockProvider::new(
548            "User asked about Rust programming. Assistant explained ownership, borrowing, and lifetimes.",
549        ));
550        let config = CompactionConfig::default()
551            .with_retain_recent(2)
552            .with_min_messages(3);
553        let compactor = LlmContextCompactor::new(provider, config);
554
555        // Use longer messages to ensure compaction actually reduces tokens
556        let messages = vec![
557            Message::user(
558                "What is Rust? I've heard it's a systems programming language but I don't know much about it. Can you explain the key features and why people are excited about it?",
559            ),
560            Message::assistant(
561                "Rust is a systems programming language focused on safety, speed, and concurrency. It achieves memory safety without garbage collection through its ownership system. The key features include zero-cost abstractions, guaranteed memory safety, threads without data races, and minimal runtime.",
562            ),
563            Message::user(
564                "Tell me about ownership in detail. How does it work and what are the rules? I want to understand this core concept thoroughly.",
565            ),
566            Message::assistant(
567                "Ownership is Rust's central feature with three rules: each value has one owner, only one owner at a time, and the value is dropped when owner goes out of scope. This system prevents memory leaks, double frees, and dangling pointers at compile time.",
568            ),
569            Message::user("What about borrowing?"), // Keep
570            Message::assistant("Borrowing allows references to data without taking ownership."), // Keep
571        ];
572
573        let result = compactor.compact_history(messages).await?;
574
575        // Should have: summary message + ack + 2 recent messages = 4
576        assert_eq!(result.new_count, 4);
577        assert_eq!(result.original_count, 6);
578
579        // With longer original messages, compaction should reduce tokens
580        assert!(
581            result.new_tokens < result.original_tokens,
582            "Expected fewer tokens after compaction: new={} < original={}",
583            result.new_tokens,
584            result.original_tokens
585        );
586
587        // First message should be the summary
588        if let Content::Text(text) = &result.messages[0].content {
589            assert!(text.contains("Previous conversation summary"));
590        }
591
592        Ok(())
593    }
594
595    #[tokio::test]
596    async fn test_compact_history_too_few_messages() -> Result<()> {
597        let provider = Arc::new(MockProvider::new("summary"));
598        let config = CompactionConfig::default().with_retain_recent(5);
599        let compactor = LlmContextCompactor::new(provider, config);
600
601        // Only 3 messages, less than retain_recent
602        let messages = vec![
603            Message::user("Hello"),
604            Message::assistant("Hi"),
605            Message::user("Bye"),
606        ];
607
608        let result = compactor.compact_history(messages.clone()).await?;
609
610        // Should return original messages unchanged
611        assert_eq!(result.new_count, 3);
612        assert_eq!(result.messages.len(), 3);
613
614        Ok(())
615    }
616
617    #[test]
618    fn test_format_messages_for_summary() {
619        let messages = vec![Message::user("Hello"), Message::assistant("Hi there!")];
620
621        let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
622
623        assert!(formatted.contains("User: Hello"));
624        assert!(formatted.contains("Assistant: Hi there!"));
625    }
626
627    #[test]
628    fn test_format_messages_for_summary_truncates_tool_results_unicode_safely() {
629        let long_unicode = "é".repeat(600);
630
631        let messages = vec![Message {
632            role: Role::Assistant,
633            content: Content::Blocks(vec![ContentBlock::ToolResult {
634                tool_use_id: "tool-1".to_string(),
635                content: long_unicode,
636                is_error: Some(false),
637            }]),
638        }];
639
640        let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
641
642        assert!(formatted.contains("... (truncated)"));
643    }
644
645    #[tokio::test]
646    async fn test_compact_filters_summary_messages() -> Result<()> {
647        let requests = Arc::new(Mutex::new(Vec::new()));
648        let provider = Arc::new(MockProvider::new_with_request_log(
649            "Fresh summary",
650            requests.clone(),
651        ));
652        let config = CompactionConfig::default().with_min_messages(1);
653        let compactor = LlmContextCompactor::new(provider, config);
654
655        let messages = vec![
656            Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
657            Message::assistant("Continue with the next task using this context."),
658        ];
659
660        let summary = compactor.compact(&messages).await?;
661
662        {
663            let recorded = requests.lock().unwrap();
664            assert_eq!(recorded.len(), 1);
665            assert_eq!(summary, "Fresh summary");
666            assert!(recorded[0].contains("Continue with the next task using this context."));
667            assert!(!recorded[0].contains("already compacted context"));
668            drop(recorded);
669        }
670
671        Ok(())
672    }
673
674    #[tokio::test]
675    async fn test_compact_history_ignores_prior_summary_in_candidate_payload() -> Result<()> {
676        let requests = Arc::new(Mutex::new(Vec::new()));
677        let provider = Arc::new(MockProvider::new_with_request_log(
678            "Fresh history summary",
679            requests.clone(),
680        ));
681        let config = CompactionConfig::default()
682            .with_retain_recent(2)
683            .with_min_messages(1);
684        let compactor = LlmContextCompactor::new(provider, config);
685
686        let messages = vec![
687            Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
688            Message::assistant("Current turn content from the latest exchange."),
689            Message::assistant("Recent message that should stay."),
690            Message::user("Newest note that should stay."),
691        ];
692
693        let result = compactor.compact_history(messages).await?;
694
695        {
696            let recorded = requests.lock().unwrap();
697            assert_eq!(recorded.len(), 1);
698            assert!(recorded[0].contains("Current turn content from the latest exchange."));
699            assert!(!recorded[0].contains("already compacted context"));
700            drop(recorded);
701        }
702        assert_eq!(result.new_count, 4);
703
704        Ok(())
705    }
706
707    #[tokio::test]
708    async fn test_compact_history_is_no_op_when_candidate_window_has_only_summaries() -> Result<()>
709    {
710        let requests = Arc::new(Mutex::new(Vec::new()));
711        let provider = Arc::new(MockProvider::new_with_request_log(
712            "This summary should not be used",
713            requests.clone(),
714        ));
715        let config = CompactionConfig::default()
716            .with_retain_recent(2)
717            .with_min_messages(1);
718        let compactor = LlmContextCompactor::new(provider, config);
719
720        let messages = vec![
721            Message::user(format!("{SUMMARY_PREFIX}first prior compacted section")),
722            Message::assistant(format!("{SUMMARY_PREFIX}second prior compacted section")),
723            Message::user(format!("{SUMMARY_PREFIX}third prior compacted section")),
724            Message::assistant("final short note"),
725        ];
726
727        let result = compactor.compact_history(messages).await?;
728
729        {
730            let recorded = requests.lock().unwrap();
731            assert!(recorded.is_empty());
732            drop(recorded);
733        }
734        assert_eq!(result.new_count, 4);
735        assert_eq!(result.messages.len(), 4);
736
737        if let Content::Text(text) = &result.messages[0].content {
738            assert!(text.contains(COMPACT_EMPTY_SUMMARY));
739        } else {
740            panic!("Expected summary text in first message");
741        }
742
743        Ok(())
744    }
745
746    #[tokio::test]
747    async fn test_compact_history_preserves_tool_use_tool_result_pairs() -> Result<()> {
748        let provider = Arc::new(MockProvider::new("Summary of earlier conversation."));
749        let config = CompactionConfig::default()
750            .with_retain_recent(2)
751            .with_min_messages(3);
752        let compactor = LlmContextCompactor::new(provider, config);
753
754        // Build a history where the split_point (len - retain_recent = 5 - 2 = 3)
755        // would land exactly on the user tool_result message at index 3,
756        // which would orphan it from its assistant tool_use at index 2.
757        let messages = vec![
758            // index 0: user
759            Message::user("What files are in the project?"),
760            // index 1: assistant text
761            Message::assistant("Let me check that for you."),
762            // index 2: assistant with tool_use
763            Message {
764                role: Role::Assistant,
765                content: Content::Blocks(vec![ContentBlock::ToolUse {
766                    id: "tool_1".to_string(),
767                    name: "list_files".to_string(),
768                    input: serde_json::json!({}),
769                    thought_signature: None,
770                }]),
771            },
772            // index 3: user with tool_result (naive split would land here)
773            Message {
774                role: Role::User,
775                content: Content::Blocks(vec![ContentBlock::ToolResult {
776                    tool_use_id: "tool_1".to_string(),
777                    content: "file1.rs\nfile2.rs".to_string(),
778                    is_error: None,
779                }]),
780            },
781            // index 4: assistant final response
782            Message::assistant("The project contains file1.rs and file2.rs."),
783        ];
784
785        let result = compactor.compact_history(messages).await?;
786
787        // The split_point should have been adjusted back from 3 to 2,
788        // so to_keep includes: [assistant tool_use, user tool_result, assistant response]
789        // Plus summary + ack = 5 total
790        assert_eq!(result.new_count, 5);
791
792        // Verify the kept messages include the tool_use/tool_result pair
793        // After summary + ack, the third message should be the assistant with tool_use
794        let kept_assistant = &result.messages[2];
795        if let Content::Blocks(blocks) = &kept_assistant.content {
796            assert!(
797                blocks
798                    .iter()
799                    .any(|b| matches!(b, ContentBlock::ToolUse { .. })),
800                "Expected assistant tool_use in kept messages"
801            );
802        } else {
803            panic!("Expected Blocks content for assistant tool_use message");
804        }
805
806        // The fourth message should be the user tool_result
807        let kept_user = &result.messages[3];
808        if let Content::Blocks(blocks) = &kept_user.content {
809            assert!(
810                blocks
811                    .iter()
812                    .any(|b| matches!(b, ContentBlock::ToolResult { .. })),
813                "Expected user tool_result in kept messages"
814            );
815        } else {
816            panic!("Expected Blocks content for user tool_result message");
817        }
818
819        Ok(())
820    }
821
822    #[tokio::test]
823    async fn test_compact_history_preserves_tool_result_tool_use_pairs() -> Result<()> {
824        let provider = Arc::new(MockProvider::new("Summary around tool pair."));
825        let config = CompactionConfig::default()
826            .with_retain_recent(2)
827            .with_min_messages(1);
828        let compactor = LlmContextCompactor::new(provider, config);
829
830        // Build a history where split_point would land on tool-use tool-result crossing in the
831        // opposite direction:
832        // ... user tool_result | assistant tool_use ...
833        let messages = vec![
834            Message::user("Start a workflow"),
835            Message {
836                role: Role::User,
837                content: Content::Blocks(vec![ContentBlock::ToolResult {
838                    tool_use_id: "tool_odd".to_string(),
839                    content: "prior result".to_string(),
840                    is_error: None,
841                }]),
842            },
843            Message {
844                role: Role::Assistant,
845                content: Content::Blocks(vec![ContentBlock::ToolUse {
846                    id: "tool_odd".to_string(),
847                    name: "follow_up".to_string(),
848                    input: serde_json::json!({}),
849                    thought_signature: None,
850                }]),
851            },
852            Message::assistant("Follow up done."),
853        ];
854
855        let result = compactor.compact_history(messages).await?;
856
857        // Split-point starts at 2 and is adjusted back to 1, keeping the tool result and tool use.
858        assert_eq!(result.new_count, 5);
859
860        // tool_result should remain with the kept tail.
861        let kept_result = &result.messages[2];
862        if let Content::Blocks(blocks) = &kept_result.content {
863            assert!(
864                blocks
865                    .iter()
866                    .any(|b| matches!(b, ContentBlock::ToolResult { .. })),
867                "Expected kept user tool_result in retained tail"
868            );
869        } else {
870            panic!("Expected tool_result blocks in retained tail");
871        }
872
873        // tool_use should remain with the kept tail.
874        let kept_tool_use = &result.messages[3];
875        if let Content::Blocks(blocks) = &kept_tool_use.content {
876            assert!(
877                blocks
878                    .iter()
879                    .any(|b| matches!(b, ContentBlock::ToolUse { .. })),
880                "Expected kept assistant tool_use in retained tail"
881            );
882        } else {
883            panic!("Expected tool_use blocks in retained tail");
884        }
885
886        Ok(())
887    }
888
889    #[tokio::test]
890    async fn test_compact_history_retained_tail_is_token_capped() -> Result<()> {
891        let provider = Arc::new(MockProvider::new(
892            "Project summary with a long context and technical context.",
893        ));
894        let config = CompactionConfig::default()
895            .with_retain_recent(8)
896            .with_min_messages(1)
897            .with_threshold_tokens(1);
898        let compactor = LlmContextCompactor::new(provider, config);
899
900        let mut messages = Vec::new();
901
902        // Older messages that will be summarized away.
903        messages.extend((0..6).map(|index| Message::user(format!("pre-compaction noise {index}"))));
904
905        // Newer long messages: intentionally large to force retained-tail truncation.
906        messages.extend(
907            (0..8).map(|index| Message::assistant(format!("kept-{index}: {}", "x".repeat(12_000)))),
908        );
909
910        let result = compactor.compact_history(messages).await?;
911
912        // The retained tail should be token capped and therefore shorter than retain_recent.
913        let retained_tail = &result.messages[2..];
914        assert!(retained_tail.len() < 8);
915
916        let mut latest_index = -1i32;
917        let mut all_retained = true;
918        for message in retained_tail {
919            if let Content::Text(text) = &message.content {
920                if let Some(number) = text.split(':').next().and_then(|prefix| {
921                    prefix
922                        .strip_prefix("kept-")
923                        .and_then(|rest| rest.parse::<i32>().ok())
924                }) {
925                    if number >= 0 {
926                        latest_index = latest_index.max(number);
927                    }
928                } else {
929                    all_retained = false;
930                }
931            } else {
932                all_retained = false;
933            }
934        }
935
936        assert!(all_retained);
937        assert_eq!(latest_index, 7);
938        assert!(
939            TokenEstimator::estimate_history(retained_tail) <= MAX_RETAINED_TAIL_MESSAGE_TOKENS
940        );
941        assert!(compactor.needs_compaction(&result.messages));
942
943        Ok(())
944    }
945}