1use crate::llm::{ChatOutcome, ChatRequest, Content, ContentBlock, LlmProvider, Message, Role};
4use anyhow::{Context, Result, bail};
5use async_trait::async_trait;
6use std::fmt::Write;
7use std::sync::Arc;
8
9use super::config::CompactionConfig;
10use super::estimator::TokenEstimator;
11
12const SUMMARY_PREFIX: &str = "[Previous conversation summary]\n\n";
13const COMPACTION_SYSTEM_PROMPT: &str = "You are a precise summarizer. Your task is to create concise but complete summaries of conversations, preserving all technical details needed to continue the work.";
14const COMPACTION_SUMMARY_PROMPT_PREFIX: &str = "Summarize this conversation concisely, preserving:\n- Key decisions and conclusions reached\n- Important file paths, code changes, and technical details\n- Current task context and what has been accomplished\n- Any pending items, errors encountered, or next steps\n\nBe specific about technical details (file names, function names, error messages) as these\nare critical for continuing the work.\n\nConversation:\n";
15const COMPACTION_SUMMARY_PROMPT_SUFFIX: &str =
16 "Provide a concise summary (aim for 500-1000 words):";
17const COMPACT_EMPTY_SUMMARY: &str = "No additional context was available to summarize; the previous messages were already compacted.";
18const SUMMARY_ACKNOWLEDGMENT: &str =
19 "I understand the context from the summary. Let me continue from where we left off.";
20const MAX_RETAINED_TAIL_MESSAGE_TOKENS: usize = 20_000;
21const MAX_TOOL_RESULT_CHARS: usize = 500;
22
23#[async_trait]
27pub trait ContextCompactor: Send + Sync {
28 async fn compact(&self, messages: &[Message]) -> Result<String>;
33
34 fn estimate_tokens(&self, messages: &[Message]) -> usize;
36
37 fn needs_compaction(&self, messages: &[Message]) -> bool;
39
40 async fn compact_history(&self, messages: Vec<Message>) -> Result<CompactionResult>;
45}
46
47#[derive(Debug, Clone)]
49pub struct CompactionResult {
50 pub messages: Vec<Message>,
52 pub original_count: usize,
54 pub new_count: usize,
56 pub original_tokens: usize,
58 pub new_tokens: usize,
60}
61
62pub struct LlmContextCompactor<P: LlmProvider + ?Sized> {
73 provider: Arc<P>,
74 config: CompactionConfig,
75 system_prompt: String,
76 summary_prompt_prefix: String,
77 summary_prompt_suffix: String,
78}
79
80impl<P: LlmProvider + ?Sized> LlmContextCompactor<P> {
81 #[must_use]
83 pub fn new(provider: Arc<P>, config: CompactionConfig) -> Self {
84 Self {
85 provider,
86 config,
87 system_prompt: COMPACTION_SYSTEM_PROMPT.to_string(),
88 summary_prompt_prefix: COMPACTION_SUMMARY_PROMPT_PREFIX.to_string(),
89 summary_prompt_suffix: COMPACTION_SUMMARY_PROMPT_SUFFIX.to_string(),
90 }
91 }
92
93 #[must_use]
95 pub fn with_defaults(provider: Arc<P>) -> Self {
96 Self::new(provider, CompactionConfig::default())
97 }
98
99 #[must_use]
101 pub const fn config(&self) -> &CompactionConfig {
102 &self.config
103 }
104
105 #[must_use]
107 pub fn with_prompts(
108 mut self,
109 system_prompt: impl Into<String>,
110 summary_prompt_prefix: impl Into<String>,
111 summary_prompt_suffix: impl Into<String>,
112 ) -> Self {
113 self.system_prompt = system_prompt.into();
114 self.summary_prompt_prefix = summary_prompt_prefix.into();
115 self.summary_prompt_suffix = summary_prompt_suffix.into();
116 self
117 }
118
119 fn is_summary_message(content: &Content) -> bool {
121 match content {
122 Content::Text(text) => text.starts_with(SUMMARY_PREFIX),
123 Content::Blocks(blocks) => blocks.iter().any(|block| match block {
124 ContentBlock::Text { text } => text.starts_with(SUMMARY_PREFIX),
125 _ => false,
126 }),
127 }
128 }
129
130 fn has_tool_use(content: &Content) -> bool {
132 matches!(
133 content,
134 Content::Blocks(blocks)
135 if blocks
136 .iter()
137 .any(|block| matches!(block, ContentBlock::ToolUse { .. }))
138 )
139 }
140
141 fn has_tool_result(content: &Content) -> bool {
143 matches!(
144 content,
145 Content::Blocks(blocks)
146 if blocks
147 .iter()
148 .any(|block| matches!(block, ContentBlock::ToolResult { .. }))
149 )
150 }
151
152 fn split_point_preserves_tool_pairs(messages: &[Message], mut split_point: usize) -> usize {
154 while split_point > 0 && split_point < messages.len() {
155 let prev = &messages[split_point - 1];
156 let next = &messages[split_point];
157
158 let crosses_tool_pair = (prev.role == Role::Assistant
159 && Self::has_tool_use(&prev.content)
160 && next.role == Role::User
161 && Self::has_tool_result(&next.content))
162 || (prev.role == Role::User
163 && Self::has_tool_result(&prev.content)
164 && next.role == Role::Assistant
165 && Self::has_tool_use(&next.content));
166
167 if crosses_tool_pair {
168 split_point -= 1;
169 continue;
170 }
171
172 break;
173 }
174
175 split_point
176 }
177
178 fn split_point_preserves_tool_pairs_with_cap(
217 messages: &[Message],
218 split_point: usize,
219 max_tokens: usize,
220 ) -> usize {
221 let cap_limit = Self::retain_tail_with_token_cap(messages, split_point, max_tokens);
222 let pair_safe = Self::split_point_preserves_tool_pairs(messages, cap_limit);
223 Self::split_point_skips_leading_orphan(messages, pair_safe)
224 }
225
226 fn split_point_skips_leading_orphan(messages: &[Message], mut split_point: usize) -> usize {
243 while split_point < messages.len() {
244 if Self::leading_message_has_orphan_tool_result(&messages[split_point..]) {
245 split_point = split_point.saturating_add(1);
246 continue;
247 }
248 break;
249 }
250 split_point
251 }
252
253 fn leading_message_has_orphan_tool_result(to_keep: &[Message]) -> bool {
261 let Some(first) = to_keep.first() else {
262 return false;
263 };
264 let Content::Blocks(blocks) = &first.content else {
265 return false;
266 };
267
268 let mut needed: Vec<&str> = Vec::new();
272 for block in blocks {
273 if let ContentBlock::ToolResult { tool_use_id, .. } = block {
274 needed.push(tool_use_id.as_str());
275 }
276 }
277 if needed.is_empty() {
278 return false;
279 }
280
281 let known_ids: std::collections::HashSet<&str> = to_keep
283 .iter()
284 .flat_map(|message| match &message.content {
285 Content::Blocks(blocks) => blocks
286 .iter()
287 .filter_map(|block| match block {
288 ContentBlock::ToolUse { id, .. } => Some(id.as_str()),
289 _ => None,
290 })
291 .collect::<Vec<_>>(),
292 Content::Text(_) => Vec::new(),
293 })
294 .collect();
295
296 needed.iter().any(|id| !known_ids.contains(id))
297 }
298
299 fn retain_tail_with_token_cap(messages: &[Message], start: usize, max_tokens: usize) -> usize {
301 if start >= messages.len() {
302 return messages.len();
303 }
304
305 if max_tokens == 0 {
306 return messages.len();
307 }
308
309 let mut used = 0usize;
310 let mut retained_start = messages.len();
311
312 for idx in (start..messages.len()).rev() {
313 let message_tokens = TokenEstimator::estimate_message(&messages[idx]);
314 if used + message_tokens > max_tokens {
315 break;
316 }
317
318 retained_start = idx;
319 used += message_tokens;
320 }
321
322 retained_start
323 }
324
325 fn format_messages_for_summary(messages: &[Message]) -> String {
327 let mut output = String::new();
328
329 for message in messages {
330 let role = match message.role {
331 Role::User => "User",
332 Role::Assistant => "Assistant",
333 };
334
335 let _ = write!(output, "{role}: ");
336
337 match &message.content {
338 Content::Text(text) => {
339 let _ = writeln!(output, "{text}");
340 }
341 Content::Blocks(blocks) => {
342 for block in blocks {
343 match block {
344 ContentBlock::Text { text } => {
345 let _ = writeln!(output, "{text}");
346 }
347 ContentBlock::Thinking { thinking, .. } => {
348 let _ = writeln!(output, "[Thinking: {thinking}]");
350 }
351 ContentBlock::RedactedThinking { .. } => {
352 let _ = writeln!(output, "[Redacted thinking]");
353 }
354 ContentBlock::ToolUse { name, input, .. } => {
355 let _ = writeln!(
356 output,
357 "[Called tool: {name} with input: {}]",
358 serde_json::to_string(input).unwrap_or_default()
359 );
360 }
361 ContentBlock::ToolResult {
362 content, is_error, ..
363 } => {
364 let status = if is_error.unwrap_or(false) {
365 "error"
366 } else {
367 "success"
368 };
369 let truncated = if content.chars().count() > MAX_TOOL_RESULT_CHARS {
371 let prefix: String =
372 content.chars().take(MAX_TOOL_RESULT_CHARS).collect();
373 format!("{prefix}... (truncated)")
374 } else {
375 content.clone()
376 };
377 let _ = writeln!(output, "[Tool result ({status}): {truncated}]");
378 }
379 ContentBlock::Image { source } => {
380 let _ = writeln!(output, "[Image: {}]", source.media_type);
381 }
382 ContentBlock::Document { source } => {
383 let _ = writeln!(output, "[Document: {}]", source.media_type);
384 }
385 _ => {
388 let _ = writeln!(output, "[Unrecognized content block]");
389 }
390 }
391 }
392 }
393 }
394 output.push('\n');
395 }
396
397 output
398 }
399
400 fn build_summary_prompt(&self, messages_text: &str) -> String {
402 format!(
403 "{}{}{}",
404 self.summary_prompt_prefix, messages_text, self.summary_prompt_suffix
405 )
406 }
407}
408
409#[async_trait]
410impl<P: LlmProvider + ?Sized> ContextCompactor for LlmContextCompactor<P> {
411 async fn compact(&self, messages: &[Message]) -> Result<String> {
412 let messages_to_summarize: Vec<_> = messages
413 .iter()
414 .filter(|message| !Self::is_summary_message(&message.content))
415 .cloned()
416 .collect();
417
418 if messages_to_summarize.is_empty() {
419 return Ok(COMPACT_EMPTY_SUMMARY.to_string());
420 }
421
422 let messages_text = Self::format_messages_for_summary(&messages_to_summarize);
423 let prompt = self.build_summary_prompt(&messages_text);
424
425 let request = ChatRequest {
426 system: self.system_prompt.clone(),
427 messages: vec![Message::user(prompt)],
428 tools: None,
429 max_tokens: 2000,
430 max_tokens_explicit: true,
431 session_id: None,
432 cached_content: None,
433 thinking: None,
434 tool_choice: None,
435 response_format: None,
436 };
437
438 let outcome = self
439 .provider
440 .chat(request)
441 .await
442 .context("Failed to call LLM for summarization")?;
443
444 match outcome {
445 ChatOutcome::Success(response) => response
446 .first_text()
447 .map(String::from)
448 .context("No text in summarization response"),
449 ChatOutcome::RateLimited => {
450 bail!("Rate limited during summarization")
451 }
452 ChatOutcome::InvalidRequest(msg) => {
453 bail!("Invalid request during summarization: {msg}")
454 }
455 ChatOutcome::ServerError(msg) => {
456 bail!("Server error during summarization: {msg}")
457 }
458 _ => {
461 bail!("Unrecognized provider outcome during summarization")
462 }
463 }
464 }
465
466 fn estimate_tokens(&self, messages: &[Message]) -> usize {
467 TokenEstimator::estimate_history(messages)
468 }
469
470 fn needs_compaction(&self, messages: &[Message]) -> bool {
471 if !self.config.auto_compact {
472 return false;
473 }
474
475 if messages.len() < self.config.min_messages_for_compaction {
476 return false;
477 }
478
479 let estimated_tokens = self.estimate_tokens(messages);
480 estimated_tokens > self.config.threshold_tokens
481 }
482
483 async fn compact_history(&self, messages: Vec<Message>) -> Result<CompactionResult> {
484 let original_count = messages.len();
485 let original_tokens = self.estimate_tokens(&messages);
486
487 if messages.len() <= self.config.retain_recent {
489 return Ok(CompactionResult {
490 messages,
491 original_count,
492 new_count: original_count,
493 original_tokens,
494 new_tokens: original_tokens,
495 });
496 }
497
498 let mut split_point = messages.len().saturating_sub(self.config.retain_recent);
500 split_point = Self::split_point_preserves_tool_pairs_with_cap(
501 &messages,
502 split_point,
503 MAX_RETAINED_TAIL_MESSAGE_TOKENS,
504 );
505
506 let (to_summarize, to_keep) = messages.split_at(split_point);
507
508 let summary = self.compact(to_summarize).await?;
510
511 let mut new_messages = Vec::with_capacity(2 + to_keep.len());
513
514 new_messages.push(Message::user(format!("{SUMMARY_PREFIX}{summary}")));
516
517 if !to_keep.is_empty() {
522 new_messages.push(Message::assistant(SUMMARY_ACKNOWLEDGMENT));
523 }
524
525 new_messages.extend(to_keep.iter().cloned());
532
533 let new_count = new_messages.len();
534 let new_tokens = self.estimate_tokens(&new_messages);
535
536 Ok(CompactionResult {
537 messages: new_messages,
538 original_count,
539 new_count,
540 original_tokens,
541 new_tokens,
542 })
543 }
544}
545
546#[cfg(test)]
547mod tests {
548 use super::*;
549 use crate::llm::{ChatResponse, StopReason, Usage};
550 use std::sync::Mutex;
551
552 struct MockProvider {
553 summary_response: String,
554 requests: Option<Arc<Mutex<Vec<String>>>>,
555 }
556
557 impl MockProvider {
558 fn new(summary: &str) -> Self {
559 Self {
560 summary_response: summary.to_string(),
561 requests: None,
562 }
563 }
564
565 fn new_with_request_log(summary: &str, requests: Arc<Mutex<Vec<String>>>) -> Self {
566 Self {
567 summary_response: summary.to_string(),
568 requests: Some(requests),
569 }
570 }
571 }
572
573 #[async_trait]
574 impl LlmProvider for MockProvider {
575 async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
576 if let Some(requests) = &self.requests {
577 let mut entries = requests.lock().unwrap();
578 let user_prompt = request
579 .messages
580 .iter()
581 .find_map(|message| match &message.content {
582 Content::Text(text) => Some(text.clone()),
583 Content::Blocks(blocks) => {
584 let text = blocks
585 .iter()
586 .filter_map(|block| {
587 if let ContentBlock::Text { text } = block {
588 Some(text.as_str())
589 } else {
590 None
591 }
592 })
593 .collect::<Vec<_>>()
594 .join("\n");
595 if text.is_empty() { None } else { Some(text) }
596 }
597 })
598 .unwrap_or_default();
599 entries.push(user_prompt);
600 }
601 Ok(ChatOutcome::Success(ChatResponse {
602 id: "test".to_string(),
603 content: vec![ContentBlock::Text {
604 text: self.summary_response.clone(),
605 }],
606 model: "mock".to_string(),
607 stop_reason: Some(StopReason::EndTurn),
608 usage: Usage {
609 input_tokens: 100,
610 output_tokens: 50,
611 cached_input_tokens: 0,
612 cache_creation_input_tokens: 0,
613 },
614 }))
615 }
616
617 fn model(&self) -> &'static str {
618 "mock-model"
619 }
620
621 fn provider(&self) -> &'static str {
622 "mock"
623 }
624 }
625
626 #[test]
627 fn test_needs_compaction_below_threshold() {
628 let provider = Arc::new(MockProvider::new("summary"));
629 let config = CompactionConfig::default()
630 .with_threshold_tokens(10_000)
631 .with_min_messages(5);
632 let compactor = LlmContextCompactor::new(provider, config);
633
634 let messages = vec![
636 Message::user("Hello"),
637 Message::assistant("Hi"),
638 Message::user("How are you?"),
639 ];
640
641 assert!(!compactor.needs_compaction(&messages));
642 }
643
644 #[test]
645 fn test_needs_compaction_above_threshold() {
646 let provider = Arc::new(MockProvider::new("summary"));
647 let config = CompactionConfig::default()
648 .with_threshold_tokens(50) .with_min_messages(3);
650 let compactor = LlmContextCompactor::new(provider, config);
651
652 let messages = vec![
654 Message::user("Hello, this is a longer message to test compaction"),
655 Message::assistant(
656 "Hi there! This is also a longer response to help trigger compaction",
657 ),
658 Message::user("Great, let's continue with even more text here"),
659 Message::assistant("Absolutely, adding more content to ensure we exceed the threshold"),
660 ];
661
662 assert!(compactor.needs_compaction(&messages));
663 }
664
665 #[test]
666 fn test_needs_compaction_auto_disabled() {
667 let provider = Arc::new(MockProvider::new("summary"));
668 let config = CompactionConfig::default()
669 .with_threshold_tokens(10) .with_min_messages(1)
671 .with_auto_compact(false);
672 let compactor = LlmContextCompactor::new(provider, config);
673
674 let messages = vec![
675 Message::user("Hello, this is a longer message"),
676 Message::assistant("Response here"),
677 ];
678
679 assert!(!compactor.needs_compaction(&messages));
680 }
681
682 #[tokio::test]
683 async fn test_compact_history() -> Result<()> {
684 let provider = Arc::new(MockProvider::new(
685 "User asked about Rust programming. Assistant explained ownership, borrowing, and lifetimes.",
686 ));
687 let config = CompactionConfig::default()
688 .with_retain_recent(2)
689 .with_min_messages(3);
690 let compactor = LlmContextCompactor::new(provider, config);
691
692 let messages = vec![
694 Message::user(
695 "What is Rust? I've heard it's a systems programming language but I don't know much about it. Can you explain the key features and why people are excited about it?",
696 ),
697 Message::assistant(
698 "Rust is a systems programming language focused on safety, speed, and concurrency. It achieves memory safety without garbage collection through its ownership system. The key features include zero-cost abstractions, guaranteed memory safety, threads without data races, and minimal runtime.",
699 ),
700 Message::user(
701 "Tell me about ownership in detail. How does it work and what are the rules? I want to understand this core concept thoroughly.",
702 ),
703 Message::assistant(
704 "Ownership is Rust's central feature with three rules: each value has one owner, only one owner at a time, and the value is dropped when owner goes out of scope. This system prevents memory leaks, double frees, and dangling pointers at compile time.",
705 ),
706 Message::user("What about borrowing?"), Message::assistant("Borrowing allows references to data without taking ownership."), ];
709
710 let result = compactor.compact_history(messages).await?;
711
712 assert_eq!(result.new_count, 4);
714 assert_eq!(result.original_count, 6);
715
716 assert!(
718 result.new_tokens < result.original_tokens,
719 "Expected fewer tokens after compaction: new={} < original={}",
720 result.new_tokens,
721 result.original_tokens
722 );
723
724 if let Content::Text(text) = &result.messages[0].content {
726 assert!(text.contains("Previous conversation summary"));
727 }
728
729 Ok(())
730 }
731
732 #[tokio::test]
733 async fn test_compact_history_too_few_messages() -> Result<()> {
734 let provider = Arc::new(MockProvider::new("summary"));
735 let config = CompactionConfig::default().with_retain_recent(5);
736 let compactor = LlmContextCompactor::new(provider, config);
737
738 let messages = vec![
740 Message::user("Hello"),
741 Message::assistant("Hi"),
742 Message::user("Bye"),
743 ];
744
745 let result = compactor.compact_history(messages.clone()).await?;
746
747 assert_eq!(result.new_count, 3);
749 assert_eq!(result.messages.len(), 3);
750
751 Ok(())
752 }
753
754 #[test]
755 fn test_format_messages_for_summary() {
756 let messages = vec![Message::user("Hello"), Message::assistant("Hi there!")];
757
758 let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
759
760 assert!(formatted.contains("User: Hello"));
761 assert!(formatted.contains("Assistant: Hi there!"));
762 }
763
764 #[test]
765 fn test_format_messages_for_summary_truncates_tool_results_unicode_safely() {
766 let long_unicode = "é".repeat(600);
767
768 let messages = vec![Message {
769 role: Role::Assistant,
770 content: Content::Blocks(vec![ContentBlock::ToolResult {
771 tool_use_id: "tool-1".to_string(),
772 content: long_unicode,
773 is_error: Some(false),
774 }]),
775 }];
776
777 let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
778
779 assert!(formatted.contains("... (truncated)"));
780 }
781
782 #[tokio::test]
783 async fn test_compact_filters_summary_messages() -> Result<()> {
784 let requests = Arc::new(Mutex::new(Vec::new()));
785 let provider = Arc::new(MockProvider::new_with_request_log(
786 "Fresh summary",
787 requests.clone(),
788 ));
789 let config = CompactionConfig::default().with_min_messages(1);
790 let compactor = LlmContextCompactor::new(provider, config);
791
792 let messages = vec![
793 Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
794 Message::assistant("Continue with the next task using this context."),
795 ];
796
797 let summary = compactor.compact(&messages).await?;
798
799 {
800 let recorded = requests.lock().unwrap();
801 assert_eq!(recorded.len(), 1);
802 assert_eq!(summary, "Fresh summary");
803 assert!(recorded[0].contains("Continue with the next task using this context."));
804 assert!(!recorded[0].contains("already compacted context"));
805 drop(recorded);
806 }
807
808 Ok(())
809 }
810
811 #[tokio::test]
812 async fn test_compact_history_ignores_prior_summary_in_candidate_payload() -> Result<()> {
813 let requests = Arc::new(Mutex::new(Vec::new()));
814 let provider = Arc::new(MockProvider::new_with_request_log(
815 "Fresh history summary",
816 requests.clone(),
817 ));
818 let config = CompactionConfig::default()
819 .with_retain_recent(2)
820 .with_min_messages(1);
821 let compactor = LlmContextCompactor::new(provider, config);
822
823 let messages = vec![
824 Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
825 Message::assistant("Current turn content from the latest exchange."),
826 Message::assistant("Recent message that should stay."),
827 Message::user("Newest note that should stay."),
828 ];
829
830 let result = compactor.compact_history(messages).await?;
831
832 {
833 let recorded = requests.lock().unwrap();
834 assert_eq!(recorded.len(), 1);
835 assert!(recorded[0].contains("Current turn content from the latest exchange."));
836 assert!(!recorded[0].contains("already compacted context"));
837 drop(recorded);
838 }
839 assert_eq!(result.new_count, 4);
840
841 Ok(())
842 }
843
844 #[tokio::test]
845 async fn test_compact_history_is_no_op_when_candidate_window_has_only_summaries() -> Result<()>
846 {
847 let requests = Arc::new(Mutex::new(Vec::new()));
848 let provider = Arc::new(MockProvider::new_with_request_log(
849 "This summary should not be used",
850 requests.clone(),
851 ));
852 let config = CompactionConfig::default()
853 .with_retain_recent(2)
854 .with_min_messages(1);
855 let compactor = LlmContextCompactor::new(provider, config);
856
857 let messages = vec![
858 Message::user(format!("{SUMMARY_PREFIX}first prior compacted section")),
859 Message::assistant(format!("{SUMMARY_PREFIX}second prior compacted section")),
860 Message::user(format!("{SUMMARY_PREFIX}third prior compacted section")),
861 Message::assistant("final short note"),
862 ];
863
864 let result = compactor.compact_history(messages).await?;
865
866 {
867 let recorded = requests.lock().unwrap();
868 assert!(recorded.is_empty());
869 drop(recorded);
870 }
871 assert_eq!(result.new_count, 4);
872 assert_eq!(result.messages.len(), 4);
873
874 if let Content::Text(text) = &result.messages[0].content {
875 assert!(text.contains(COMPACT_EMPTY_SUMMARY));
876 } else {
877 panic!("Expected summary text in first message");
878 }
879
880 Ok(())
881 }
882
883 #[tokio::test]
884 async fn test_compact_history_preserves_tool_use_tool_result_pairs() -> Result<()> {
885 let provider = Arc::new(MockProvider::new("Summary of earlier conversation."));
886 let config = CompactionConfig::default()
887 .with_retain_recent(2)
888 .with_min_messages(3);
889 let compactor = LlmContextCompactor::new(provider, config);
890
891 let messages = vec![
895 Message::user("What files are in the project?"),
897 Message::assistant("Let me check that for you."),
899 Message {
901 role: Role::Assistant,
902 content: Content::Blocks(vec![ContentBlock::ToolUse {
903 id: "tool_1".to_string(),
904 name: "list_files".to_string(),
905 input: serde_json::json!({}),
906 thought_signature: None,
907 }]),
908 },
909 Message {
911 role: Role::User,
912 content: Content::Blocks(vec![ContentBlock::ToolResult {
913 tool_use_id: "tool_1".to_string(),
914 content: "file1.rs\nfile2.rs".to_string(),
915 is_error: None,
916 }]),
917 },
918 Message::assistant("The project contains file1.rs and file2.rs."),
920 ];
921
922 let result = compactor.compact_history(messages).await?;
923
924 assert_eq!(result.new_count, 5);
928
929 let kept_assistant = &result.messages[2];
932 if let Content::Blocks(blocks) = &kept_assistant.content {
933 assert!(
934 blocks
935 .iter()
936 .any(|b| matches!(b, ContentBlock::ToolUse { .. })),
937 "Expected assistant tool_use in kept messages"
938 );
939 } else {
940 panic!("Expected Blocks content for assistant tool_use message");
941 }
942
943 let kept_user = &result.messages[3];
945 if let Content::Blocks(blocks) = &kept_user.content {
946 assert!(
947 blocks
948 .iter()
949 .any(|b| matches!(b, ContentBlock::ToolResult { .. })),
950 "Expected user tool_result in kept messages"
951 );
952 } else {
953 panic!("Expected Blocks content for user tool_result message");
954 }
955
956 Ok(())
957 }
958
959 #[tokio::test]
960 async fn test_compact_history_split_skips_leading_orphan_after_summary_ack() -> Result<()> {
961 let provider = Arc::new(MockProvider::new("Re-summary."));
989 let config = CompactionConfig::default()
990 .with_retain_recent(3)
991 .with_min_messages(1);
992 let compactor = LlmContextCompactor::new(provider, config);
993
994 let messages = vec![
995 Message::user(format!("{SUMMARY_PREFIX}Old summary about toolu_X.")),
996 Message::assistant(SUMMARY_ACKNOWLEDGMENT),
997 Message {
998 role: Role::User,
999 content: Content::Blocks(vec![ContentBlock::ToolResult {
1000 tool_use_id: "toolu_X".to_string(),
1001 content: "result for X".to_string(),
1002 is_error: None,
1003 }]),
1004 },
1005 Message::assistant("Result interpreted."),
1006 Message::user("Now what?"),
1007 ];
1008
1009 let result = compactor.compact_history(messages).await?;
1010
1011 let mut seen_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
1012 for msg in &result.messages {
1013 if let Content::Blocks(blocks) = &msg.content {
1014 for block in blocks {
1015 match block {
1016 ContentBlock::ToolResult { tool_use_id, .. } => {
1017 assert!(
1018 seen_ids.contains(tool_use_id),
1019 "orphan tool_use_id {tool_use_id} survived split selection",
1020 );
1021 }
1022 ContentBlock::ToolUse { id, .. } => {
1023 seen_ids.insert(id.clone());
1024 }
1025 _ => {}
1026 }
1027 }
1028 }
1029 }
1030
1031 Ok(())
1032 }
1033
1034 #[tokio::test]
1035 async fn test_compact_history_keeps_tool_pair_when_immediate_prev_is_text_only() -> Result<()> {
1036 let provider = Arc::new(MockProvider::new("Boundary summary."));
1043 let config = CompactionConfig::default()
1044 .with_retain_recent(2)
1045 .with_min_messages(1);
1046 let compactor = LlmContextCompactor::new(provider, config);
1047
1048 let messages = vec![
1061 Message::user("first turn"),
1062 Message::assistant("text only"),
1063 Message {
1064 role: Role::User,
1065 content: Content::Blocks(vec![ContentBlock::ToolResult {
1066 tool_use_id: "toolu_Y".to_string(),
1067 content: "ancient result".to_string(),
1068 is_error: None,
1069 }]),
1070 },
1071 Message::assistant("then a reply"),
1072 Message::user("ok thanks"),
1073 ];
1074
1075 let result = compactor.compact_history(messages).await?;
1076
1077 let has_tool_result = result.messages.iter().any(|m| {
1081 matches!(
1082 &m.content,
1083 Content::Blocks(blocks)
1084 if blocks.iter().any(|b| matches!(b, ContentBlock::ToolResult { .. }))
1085 )
1086 });
1087 assert!(
1088 !has_tool_result,
1089 "orphan tool_result should have been pushed into to_summarize, not retained",
1090 );
1091
1092 Ok(())
1093 }
1094
1095 #[tokio::test]
1096 async fn test_compact_history_retained_tail_is_token_capped() -> Result<()> {
1097 let provider = Arc::new(MockProvider::new(
1098 "Project summary with a long context and technical context.",
1099 ));
1100 let config = CompactionConfig::default()
1101 .with_retain_recent(8)
1102 .with_min_messages(1)
1103 .with_threshold_tokens(1);
1104 let compactor = LlmContextCompactor::new(provider, config);
1105
1106 let mut messages = Vec::new();
1107
1108 messages.extend((0..6).map(|index| Message::user(format!("pre-compaction noise {index}"))));
1110
1111 messages.extend(
1113 (0..8).map(|index| Message::assistant(format!("kept-{index}: {}", "x".repeat(12_000)))),
1114 );
1115
1116 let result = compactor.compact_history(messages).await?;
1117
1118 let retained_tail = &result.messages[2..];
1120 assert!(retained_tail.len() < 8);
1121
1122 let mut latest_index = -1i32;
1123 let mut all_retained = true;
1124 for message in retained_tail {
1125 if let Content::Text(text) = &message.content {
1126 if let Some(number) = text.split(':').next().and_then(|prefix| {
1127 prefix
1128 .strip_prefix("kept-")
1129 .and_then(|rest| rest.parse::<i32>().ok())
1130 }) {
1131 if number >= 0 {
1132 latest_index = latest_index.max(number);
1133 }
1134 } else {
1135 all_retained = false;
1136 }
1137 } else {
1138 all_retained = false;
1139 }
1140 }
1141
1142 assert!(all_retained);
1143 assert_eq!(latest_index, 7);
1144 assert!(
1145 TokenEstimator::estimate_history(retained_tail) <= MAX_RETAINED_TAIL_MESSAGE_TOKENS
1146 );
1147 assert!(compactor.needs_compaction(&result.messages));
1148
1149 Ok(())
1150 }
1151
1152 #[tokio::test]
1153 async fn test_compact_history_skips_summary_ack_when_retained_tail_is_empty() -> Result<()> {
1154 let provider = Arc::new(MockProvider::new("Summary for oversized user turn."));
1155 let config = CompactionConfig::default()
1156 .with_retain_recent(1)
1157 .with_min_messages(1)
1158 .with_threshold_tokens(1);
1159 let compactor = LlmContextCompactor::new(provider, config);
1160
1161 let messages = vec![
1162 Message::assistant("Earlier assistant context."),
1163 Message::user(format!("oversized-user-turn: {}", "x".repeat(200_000))),
1164 ];
1165
1166 let result = compactor.compact_history(messages).await?;
1167
1168 assert_eq!(result.new_count, 1);
1169 assert_eq!(result.messages.len(), 1);
1170
1171 let only_message = &result.messages[0];
1172 assert_eq!(only_message.role, Role::User);
1173
1174 if let Content::Text(text) = &only_message.content {
1175 assert!(text.contains("Previous conversation summary"));
1176 assert!(!text.contains(SUMMARY_ACKNOWLEDGMENT));
1177 } else {
1178 panic!("Expected summary text when retained tail is empty");
1179 }
1180
1181 Ok(())
1182 }
1183}