1use crate::llm::{ChatOutcome, ChatRequest, Content, ContentBlock, LlmProvider, Message, Role};
4use anyhow::{Context, Result, bail};
5use async_trait::async_trait;
6use std::fmt::Write;
7use std::sync::Arc;
8
9use super::config::CompactionConfig;
10use super::estimator::TokenEstimator;
11
12const SUMMARY_PREFIX: &str = "[Previous conversation summary]\n\n";
13const COMPACTION_SYSTEM_PROMPT: &str = "You are a precise summarizer. Your task is to create concise but complete summaries of conversations, preserving all technical details needed to continue the work.";
14const COMPACTION_SUMMARY_PROMPT_PREFIX: &str = "Summarize this conversation concisely, preserving:\n- Key decisions and conclusions reached\n- Important file paths, code changes, and technical details\n- Current task context and what has been accomplished\n- Any pending items, errors encountered, or next steps\n\nBe specific about technical details (file names, function names, error messages) as these\nare critical for continuing the work.\n\nConversation:\n";
15const COMPACTION_SUMMARY_PROMPT_SUFFIX: &str =
16 "Provide a concise summary (aim for 500-1000 words):";
17const COMPACT_EMPTY_SUMMARY: &str = "No additional context was available to summarize; the previous messages were already compacted.";
18const SUMMARY_ACKNOWLEDGMENT: &str =
19 "I understand the context from the summary. Let me continue from where we left off.";
20const MAX_RETAINED_TAIL_MESSAGE_TOKENS: usize = 20_000;
21const MAX_TOOL_RESULT_CHARS: usize = 500;
22
23#[async_trait]
27pub trait ContextCompactor: Send + Sync {
28 async fn compact(&self, messages: &[Message]) -> Result<String>;
33
34 fn estimate_tokens(&self, messages: &[Message]) -> usize;
36
37 fn needs_compaction(&self, messages: &[Message]) -> bool;
39
40 async fn compact_history(&self, messages: Vec<Message>) -> Result<CompactionResult>;
45}
46
47#[derive(Debug, Clone)]
49pub struct CompactionResult {
50 pub messages: Vec<Message>,
52 pub original_count: usize,
54 pub new_count: usize,
56 pub original_tokens: usize,
58 pub new_tokens: usize,
60}
61
62pub struct LlmContextCompactor<P: LlmProvider> {
66 provider: Arc<P>,
67 config: CompactionConfig,
68 system_prompt: String,
69 summary_prompt_prefix: String,
70 summary_prompt_suffix: String,
71}
72
73impl<P: LlmProvider> LlmContextCompactor<P> {
74 #[must_use]
76 pub fn new(provider: Arc<P>, config: CompactionConfig) -> Self {
77 Self {
78 provider,
79 config,
80 system_prompt: COMPACTION_SYSTEM_PROMPT.to_string(),
81 summary_prompt_prefix: COMPACTION_SUMMARY_PROMPT_PREFIX.to_string(),
82 summary_prompt_suffix: COMPACTION_SUMMARY_PROMPT_SUFFIX.to_string(),
83 }
84 }
85
86 #[must_use]
88 pub fn with_defaults(provider: Arc<P>) -> Self {
89 Self::new(provider, CompactionConfig::default())
90 }
91
92 #[must_use]
94 pub const fn config(&self) -> &CompactionConfig {
95 &self.config
96 }
97
98 #[must_use]
100 pub fn with_prompts(
101 mut self,
102 system_prompt: impl Into<String>,
103 summary_prompt_prefix: impl Into<String>,
104 summary_prompt_suffix: impl Into<String>,
105 ) -> Self {
106 self.system_prompt = system_prompt.into();
107 self.summary_prompt_prefix = summary_prompt_prefix.into();
108 self.summary_prompt_suffix = summary_prompt_suffix.into();
109 self
110 }
111
112 fn is_summary_message(content: &Content) -> bool {
114 match content {
115 Content::Text(text) => text.starts_with(SUMMARY_PREFIX),
116 Content::Blocks(blocks) => blocks.iter().any(|block| match block {
117 ContentBlock::Text { text } => text.starts_with(SUMMARY_PREFIX),
118 _ => false,
119 }),
120 }
121 }
122
123 fn has_tool_use(content: &Content) -> bool {
125 matches!(
126 content,
127 Content::Blocks(blocks)
128 if blocks
129 .iter()
130 .any(|block| matches!(block, ContentBlock::ToolUse { .. }))
131 )
132 }
133
134 fn has_tool_result(content: &Content) -> bool {
136 matches!(
137 content,
138 Content::Blocks(blocks)
139 if blocks
140 .iter()
141 .any(|block| matches!(block, ContentBlock::ToolResult { .. }))
142 )
143 }
144
145 fn split_point_preserves_tool_pairs(messages: &[Message], mut split_point: usize) -> usize {
147 while split_point > 0 && split_point < messages.len() {
148 let prev = &messages[split_point - 1];
149 let next = &messages[split_point];
150
151 let crosses_tool_pair = (prev.role == Role::Assistant
152 && Self::has_tool_use(&prev.content)
153 && next.role == Role::User
154 && Self::has_tool_result(&next.content))
155 || (prev.role == Role::User
156 && Self::has_tool_result(&prev.content)
157 && next.role == Role::Assistant
158 && Self::has_tool_use(&next.content));
159
160 if crosses_tool_pair {
161 split_point -= 1;
162 continue;
163 }
164
165 break;
166 }
167
168 split_point
169 }
170
171 fn split_point_preserves_tool_pairs_with_cap(
173 messages: &[Message],
174 mut split_point: usize,
175 max_tokens: usize,
176 ) -> usize {
177 loop {
178 let candidate = Self::retain_tail_with_token_cap(messages, split_point, max_tokens);
179 let adjusted = Self::split_point_preserves_tool_pairs(messages, candidate);
180
181 if adjusted == split_point {
182 return candidate;
183 }
184
185 split_point = adjusted;
186 }
187 }
188
189 fn retain_tail_with_token_cap(messages: &[Message], start: usize, max_tokens: usize) -> usize {
191 if start >= messages.len() {
192 return messages.len();
193 }
194
195 if max_tokens == 0 {
196 return messages.len();
197 }
198
199 let mut used = 0usize;
200 let mut retained_start = messages.len();
201
202 for idx in (start..messages.len()).rev() {
203 let message_tokens = TokenEstimator::estimate_message(&messages[idx]);
204 if used + message_tokens > max_tokens {
205 break;
206 }
207
208 retained_start = idx;
209 used += message_tokens;
210 }
211
212 retained_start
213 }
214
215 fn format_messages_for_summary(messages: &[Message]) -> String {
217 let mut output = String::new();
218
219 for message in messages {
220 let role = match message.role {
221 Role::User => "User",
222 Role::Assistant => "Assistant",
223 };
224
225 let _ = write!(output, "{role}: ");
226
227 match &message.content {
228 Content::Text(text) => {
229 let _ = writeln!(output, "{text}");
230 }
231 Content::Blocks(blocks) => {
232 for block in blocks {
233 match block {
234 ContentBlock::Text { text } => {
235 let _ = writeln!(output, "{text}");
236 }
237 ContentBlock::Thinking { thinking, .. } => {
238 let _ = writeln!(output, "[Thinking: {thinking}]");
240 }
241 ContentBlock::RedactedThinking { .. } => {
242 let _ = writeln!(output, "[Redacted thinking]");
243 }
244 ContentBlock::ToolUse { name, input, .. } => {
245 let _ = writeln!(
246 output,
247 "[Called tool: {name} with input: {}]",
248 serde_json::to_string(input).unwrap_or_default()
249 );
250 }
251 ContentBlock::ToolResult {
252 content, is_error, ..
253 } => {
254 let status = if is_error.unwrap_or(false) {
255 "error"
256 } else {
257 "success"
258 };
259 let truncated = if content.chars().count() > MAX_TOOL_RESULT_CHARS {
261 let prefix: String =
262 content.chars().take(MAX_TOOL_RESULT_CHARS).collect();
263 format!("{prefix}... (truncated)")
264 } else {
265 content.clone()
266 };
267 let _ = writeln!(output, "[Tool result ({status}): {truncated}]");
268 }
269 ContentBlock::Image { source } => {
270 let _ = writeln!(output, "[Image: {}]", source.media_type);
271 }
272 ContentBlock::Document { source } => {
273 let _ = writeln!(output, "[Document: {}]", source.media_type);
274 }
275 }
276 }
277 }
278 }
279 output.push('\n');
280 }
281
282 output
283 }
284
285 fn build_summary_prompt(&self, messages_text: &str) -> String {
287 format!(
288 "{}{}{}",
289 self.summary_prompt_prefix, messages_text, self.summary_prompt_suffix
290 )
291 }
292}
293
294#[async_trait]
295impl<P: LlmProvider> ContextCompactor for LlmContextCompactor<P> {
296 async fn compact(&self, messages: &[Message]) -> Result<String> {
297 let messages_to_summarize: Vec<_> = messages
298 .iter()
299 .filter(|message| !Self::is_summary_message(&message.content))
300 .cloned()
301 .collect();
302
303 if messages_to_summarize.is_empty() {
304 return Ok(COMPACT_EMPTY_SUMMARY.to_string());
305 }
306
307 let messages_text = Self::format_messages_for_summary(&messages_to_summarize);
308 let prompt = self.build_summary_prompt(&messages_text);
309
310 let request = ChatRequest {
311 system: self.system_prompt.clone(),
312 messages: vec![Message::user(prompt)],
313 tools: None,
314 max_tokens: 2000,
315 max_tokens_explicit: true,
316 session_id: None,
317 cached_content: None,
318 thinking: None,
319 };
320
321 let outcome = self
322 .provider
323 .chat(request)
324 .await
325 .context("Failed to call LLM for summarization")?;
326
327 match outcome {
328 ChatOutcome::Success(response) => response
329 .first_text()
330 .map(String::from)
331 .context("No text in summarization response"),
332 ChatOutcome::RateLimited => {
333 bail!("Rate limited during summarization")
334 }
335 ChatOutcome::InvalidRequest(msg) => {
336 bail!("Invalid request during summarization: {msg}")
337 }
338 ChatOutcome::ServerError(msg) => {
339 bail!("Server error during summarization: {msg}")
340 }
341 }
342 }
343
344 fn estimate_tokens(&self, messages: &[Message]) -> usize {
345 TokenEstimator::estimate_history(messages)
346 }
347
348 fn needs_compaction(&self, messages: &[Message]) -> bool {
349 if !self.config.auto_compact {
350 return false;
351 }
352
353 if messages.len() < self.config.min_messages_for_compaction {
354 return false;
355 }
356
357 let estimated_tokens = self.estimate_tokens(messages);
358 estimated_tokens > self.config.threshold_tokens
359 }
360
361 async fn compact_history(&self, messages: Vec<Message>) -> Result<CompactionResult> {
362 let original_count = messages.len();
363 let original_tokens = self.estimate_tokens(&messages);
364
365 if messages.len() <= self.config.retain_recent {
367 return Ok(CompactionResult {
368 messages,
369 original_count,
370 new_count: original_count,
371 original_tokens,
372 new_tokens: original_tokens,
373 });
374 }
375
376 let mut split_point = messages.len().saturating_sub(self.config.retain_recent);
378 split_point = Self::split_point_preserves_tool_pairs_with_cap(
379 &messages,
380 split_point,
381 MAX_RETAINED_TAIL_MESSAGE_TOKENS,
382 );
383
384 let (to_summarize, to_keep) = messages.split_at(split_point);
385
386 let summary = self.compact(to_summarize).await?;
388
389 let mut new_messages = Vec::with_capacity(2 + to_keep.len());
391
392 new_messages.push(Message::user(format!("{SUMMARY_PREFIX}{summary}")));
394
395 if !to_keep.is_empty() {
400 new_messages.push(Message::assistant(SUMMARY_ACKNOWLEDGMENT));
401 }
402
403 new_messages.extend(to_keep.iter().cloned());
405
406 let new_count = new_messages.len();
407 let new_tokens = self.estimate_tokens(&new_messages);
408
409 Ok(CompactionResult {
410 messages: new_messages,
411 original_count,
412 new_count,
413 original_tokens,
414 new_tokens,
415 })
416 }
417}
418
419#[cfg(test)]
420mod tests {
421 use super::*;
422 use crate::llm::{ChatResponse, StopReason, Usage};
423 use std::sync::Mutex;
424
425 struct MockProvider {
426 summary_response: String,
427 requests: Option<Arc<Mutex<Vec<String>>>>,
428 }
429
430 impl MockProvider {
431 fn new(summary: &str) -> Self {
432 Self {
433 summary_response: summary.to_string(),
434 requests: None,
435 }
436 }
437
438 fn new_with_request_log(summary: &str, requests: Arc<Mutex<Vec<String>>>) -> Self {
439 Self {
440 summary_response: summary.to_string(),
441 requests: Some(requests),
442 }
443 }
444 }
445
446 #[async_trait]
447 impl LlmProvider for MockProvider {
448 async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
449 if let Some(requests) = &self.requests {
450 let mut entries = requests.lock().unwrap();
451 let user_prompt = request
452 .messages
453 .iter()
454 .find_map(|message| match &message.content {
455 Content::Text(text) => Some(text.clone()),
456 Content::Blocks(blocks) => {
457 let text = blocks
458 .iter()
459 .filter_map(|block| {
460 if let ContentBlock::Text { text } = block {
461 Some(text.as_str())
462 } else {
463 None
464 }
465 })
466 .collect::<Vec<_>>()
467 .join("\n");
468 if text.is_empty() { None } else { Some(text) }
469 }
470 })
471 .unwrap_or_default();
472 entries.push(user_prompt);
473 }
474 Ok(ChatOutcome::Success(ChatResponse {
475 id: "test".to_string(),
476 content: vec![ContentBlock::Text {
477 text: self.summary_response.clone(),
478 }],
479 model: "mock".to_string(),
480 stop_reason: Some(StopReason::EndTurn),
481 usage: Usage {
482 input_tokens: 100,
483 output_tokens: 50,
484 cached_input_tokens: 0,
485 },
486 }))
487 }
488
489 fn model(&self) -> &'static str {
490 "mock-model"
491 }
492
493 fn provider(&self) -> &'static str {
494 "mock"
495 }
496 }
497
498 #[test]
499 fn test_needs_compaction_below_threshold() {
500 let provider = Arc::new(MockProvider::new("summary"));
501 let config = CompactionConfig::default()
502 .with_threshold_tokens(10_000)
503 .with_min_messages(5);
504 let compactor = LlmContextCompactor::new(provider, config);
505
506 let messages = vec![
508 Message::user("Hello"),
509 Message::assistant("Hi"),
510 Message::user("How are you?"),
511 ];
512
513 assert!(!compactor.needs_compaction(&messages));
514 }
515
516 #[test]
517 fn test_needs_compaction_above_threshold() {
518 let provider = Arc::new(MockProvider::new("summary"));
519 let config = CompactionConfig::default()
520 .with_threshold_tokens(50) .with_min_messages(3);
522 let compactor = LlmContextCompactor::new(provider, config);
523
524 let messages = vec![
526 Message::user("Hello, this is a longer message to test compaction"),
527 Message::assistant(
528 "Hi there! This is also a longer response to help trigger compaction",
529 ),
530 Message::user("Great, let's continue with even more text here"),
531 Message::assistant("Absolutely, adding more content to ensure we exceed the threshold"),
532 ];
533
534 assert!(compactor.needs_compaction(&messages));
535 }
536
537 #[test]
538 fn test_needs_compaction_auto_disabled() {
539 let provider = Arc::new(MockProvider::new("summary"));
540 let config = CompactionConfig::default()
541 .with_threshold_tokens(10) .with_min_messages(1)
543 .with_auto_compact(false);
544 let compactor = LlmContextCompactor::new(provider, config);
545
546 let messages = vec![
547 Message::user("Hello, this is a longer message"),
548 Message::assistant("Response here"),
549 ];
550
551 assert!(!compactor.needs_compaction(&messages));
552 }
553
554 #[tokio::test]
555 async fn test_compact_history() -> Result<()> {
556 let provider = Arc::new(MockProvider::new(
557 "User asked about Rust programming. Assistant explained ownership, borrowing, and lifetimes.",
558 ));
559 let config = CompactionConfig::default()
560 .with_retain_recent(2)
561 .with_min_messages(3);
562 let compactor = LlmContextCompactor::new(provider, config);
563
564 let messages = vec![
566 Message::user(
567 "What is Rust? I've heard it's a systems programming language but I don't know much about it. Can you explain the key features and why people are excited about it?",
568 ),
569 Message::assistant(
570 "Rust is a systems programming language focused on safety, speed, and concurrency. It achieves memory safety without garbage collection through its ownership system. The key features include zero-cost abstractions, guaranteed memory safety, threads without data races, and minimal runtime.",
571 ),
572 Message::user(
573 "Tell me about ownership in detail. How does it work and what are the rules? I want to understand this core concept thoroughly.",
574 ),
575 Message::assistant(
576 "Ownership is Rust's central feature with three rules: each value has one owner, only one owner at a time, and the value is dropped when owner goes out of scope. This system prevents memory leaks, double frees, and dangling pointers at compile time.",
577 ),
578 Message::user("What about borrowing?"), Message::assistant("Borrowing allows references to data without taking ownership."), ];
581
582 let result = compactor.compact_history(messages).await?;
583
584 assert_eq!(result.new_count, 4);
586 assert_eq!(result.original_count, 6);
587
588 assert!(
590 result.new_tokens < result.original_tokens,
591 "Expected fewer tokens after compaction: new={} < original={}",
592 result.new_tokens,
593 result.original_tokens
594 );
595
596 if let Content::Text(text) = &result.messages[0].content {
598 assert!(text.contains("Previous conversation summary"));
599 }
600
601 Ok(())
602 }
603
604 #[tokio::test]
605 async fn test_compact_history_too_few_messages() -> Result<()> {
606 let provider = Arc::new(MockProvider::new("summary"));
607 let config = CompactionConfig::default().with_retain_recent(5);
608 let compactor = LlmContextCompactor::new(provider, config);
609
610 let messages = vec![
612 Message::user("Hello"),
613 Message::assistant("Hi"),
614 Message::user("Bye"),
615 ];
616
617 let result = compactor.compact_history(messages.clone()).await?;
618
619 assert_eq!(result.new_count, 3);
621 assert_eq!(result.messages.len(), 3);
622
623 Ok(())
624 }
625
626 #[test]
627 fn test_format_messages_for_summary() {
628 let messages = vec![Message::user("Hello"), Message::assistant("Hi there!")];
629
630 let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
631
632 assert!(formatted.contains("User: Hello"));
633 assert!(formatted.contains("Assistant: Hi there!"));
634 }
635
636 #[test]
637 fn test_format_messages_for_summary_truncates_tool_results_unicode_safely() {
638 let long_unicode = "é".repeat(600);
639
640 let messages = vec![Message {
641 role: Role::Assistant,
642 content: Content::Blocks(vec![ContentBlock::ToolResult {
643 tool_use_id: "tool-1".to_string(),
644 content: long_unicode,
645 is_error: Some(false),
646 }]),
647 }];
648
649 let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
650
651 assert!(formatted.contains("... (truncated)"));
652 }
653
654 #[tokio::test]
655 async fn test_compact_filters_summary_messages() -> Result<()> {
656 let requests = Arc::new(Mutex::new(Vec::new()));
657 let provider = Arc::new(MockProvider::new_with_request_log(
658 "Fresh summary",
659 requests.clone(),
660 ));
661 let config = CompactionConfig::default().with_min_messages(1);
662 let compactor = LlmContextCompactor::new(provider, config);
663
664 let messages = vec![
665 Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
666 Message::assistant("Continue with the next task using this context."),
667 ];
668
669 let summary = compactor.compact(&messages).await?;
670
671 {
672 let recorded = requests.lock().unwrap();
673 assert_eq!(recorded.len(), 1);
674 assert_eq!(summary, "Fresh summary");
675 assert!(recorded[0].contains("Continue with the next task using this context."));
676 assert!(!recorded[0].contains("already compacted context"));
677 drop(recorded);
678 }
679
680 Ok(())
681 }
682
683 #[tokio::test]
684 async fn test_compact_history_ignores_prior_summary_in_candidate_payload() -> Result<()> {
685 let requests = Arc::new(Mutex::new(Vec::new()));
686 let provider = Arc::new(MockProvider::new_with_request_log(
687 "Fresh history summary",
688 requests.clone(),
689 ));
690 let config = CompactionConfig::default()
691 .with_retain_recent(2)
692 .with_min_messages(1);
693 let compactor = LlmContextCompactor::new(provider, config);
694
695 let messages = vec![
696 Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
697 Message::assistant("Current turn content from the latest exchange."),
698 Message::assistant("Recent message that should stay."),
699 Message::user("Newest note that should stay."),
700 ];
701
702 let result = compactor.compact_history(messages).await?;
703
704 {
705 let recorded = requests.lock().unwrap();
706 assert_eq!(recorded.len(), 1);
707 assert!(recorded[0].contains("Current turn content from the latest exchange."));
708 assert!(!recorded[0].contains("already compacted context"));
709 drop(recorded);
710 }
711 assert_eq!(result.new_count, 4);
712
713 Ok(())
714 }
715
716 #[tokio::test]
717 async fn test_compact_history_is_no_op_when_candidate_window_has_only_summaries() -> Result<()>
718 {
719 let requests = Arc::new(Mutex::new(Vec::new()));
720 let provider = Arc::new(MockProvider::new_with_request_log(
721 "This summary should not be used",
722 requests.clone(),
723 ));
724 let config = CompactionConfig::default()
725 .with_retain_recent(2)
726 .with_min_messages(1);
727 let compactor = LlmContextCompactor::new(provider, config);
728
729 let messages = vec![
730 Message::user(format!("{SUMMARY_PREFIX}first prior compacted section")),
731 Message::assistant(format!("{SUMMARY_PREFIX}second prior compacted section")),
732 Message::user(format!("{SUMMARY_PREFIX}third prior compacted section")),
733 Message::assistant("final short note"),
734 ];
735
736 let result = compactor.compact_history(messages).await?;
737
738 {
739 let recorded = requests.lock().unwrap();
740 assert!(recorded.is_empty());
741 drop(recorded);
742 }
743 assert_eq!(result.new_count, 4);
744 assert_eq!(result.messages.len(), 4);
745
746 if let Content::Text(text) = &result.messages[0].content {
747 assert!(text.contains(COMPACT_EMPTY_SUMMARY));
748 } else {
749 panic!("Expected summary text in first message");
750 }
751
752 Ok(())
753 }
754
755 #[tokio::test]
756 async fn test_compact_history_preserves_tool_use_tool_result_pairs() -> Result<()> {
757 let provider = Arc::new(MockProvider::new("Summary of earlier conversation."));
758 let config = CompactionConfig::default()
759 .with_retain_recent(2)
760 .with_min_messages(3);
761 let compactor = LlmContextCompactor::new(provider, config);
762
763 let messages = vec![
767 Message::user("What files are in the project?"),
769 Message::assistant("Let me check that for you."),
771 Message {
773 role: Role::Assistant,
774 content: Content::Blocks(vec![ContentBlock::ToolUse {
775 id: "tool_1".to_string(),
776 name: "list_files".to_string(),
777 input: serde_json::json!({}),
778 thought_signature: None,
779 }]),
780 },
781 Message {
783 role: Role::User,
784 content: Content::Blocks(vec![ContentBlock::ToolResult {
785 tool_use_id: "tool_1".to_string(),
786 content: "file1.rs\nfile2.rs".to_string(),
787 is_error: None,
788 }]),
789 },
790 Message::assistant("The project contains file1.rs and file2.rs."),
792 ];
793
794 let result = compactor.compact_history(messages).await?;
795
796 assert_eq!(result.new_count, 5);
800
801 let kept_assistant = &result.messages[2];
804 if let Content::Blocks(blocks) = &kept_assistant.content {
805 assert!(
806 blocks
807 .iter()
808 .any(|b| matches!(b, ContentBlock::ToolUse { .. })),
809 "Expected assistant tool_use in kept messages"
810 );
811 } else {
812 panic!("Expected Blocks content for assistant tool_use message");
813 }
814
815 let kept_user = &result.messages[3];
817 if let Content::Blocks(blocks) = &kept_user.content {
818 assert!(
819 blocks
820 .iter()
821 .any(|b| matches!(b, ContentBlock::ToolResult { .. })),
822 "Expected user tool_result in kept messages"
823 );
824 } else {
825 panic!("Expected Blocks content for user tool_result message");
826 }
827
828 Ok(())
829 }
830
831 #[tokio::test]
832 async fn test_compact_history_preserves_tool_result_tool_use_pairs() -> Result<()> {
833 let provider = Arc::new(MockProvider::new("Summary around tool pair."));
834 let config = CompactionConfig::default()
835 .with_retain_recent(2)
836 .with_min_messages(1);
837 let compactor = LlmContextCompactor::new(provider, config);
838
839 let messages = vec![
843 Message::user("Start a workflow"),
844 Message {
845 role: Role::User,
846 content: Content::Blocks(vec![ContentBlock::ToolResult {
847 tool_use_id: "tool_odd".to_string(),
848 content: "prior result".to_string(),
849 is_error: None,
850 }]),
851 },
852 Message {
853 role: Role::Assistant,
854 content: Content::Blocks(vec![ContentBlock::ToolUse {
855 id: "tool_odd".to_string(),
856 name: "follow_up".to_string(),
857 input: serde_json::json!({}),
858 thought_signature: None,
859 }]),
860 },
861 Message::assistant("Follow up done."),
862 ];
863
864 let result = compactor.compact_history(messages).await?;
865
866 assert_eq!(result.new_count, 5);
868
869 let kept_result = &result.messages[2];
871 if let Content::Blocks(blocks) = &kept_result.content {
872 assert!(
873 blocks
874 .iter()
875 .any(|b| matches!(b, ContentBlock::ToolResult { .. })),
876 "Expected kept user tool_result in retained tail"
877 );
878 } else {
879 panic!("Expected tool_result blocks in retained tail");
880 }
881
882 let kept_tool_use = &result.messages[3];
884 if let Content::Blocks(blocks) = &kept_tool_use.content {
885 assert!(
886 blocks
887 .iter()
888 .any(|b| matches!(b, ContentBlock::ToolUse { .. })),
889 "Expected kept assistant tool_use in retained tail"
890 );
891 } else {
892 panic!("Expected tool_use blocks in retained tail");
893 }
894
895 Ok(())
896 }
897
898 #[tokio::test]
899 async fn test_compact_history_retained_tail_is_token_capped() -> Result<()> {
900 let provider = Arc::new(MockProvider::new(
901 "Project summary with a long context and technical context.",
902 ));
903 let config = CompactionConfig::default()
904 .with_retain_recent(8)
905 .with_min_messages(1)
906 .with_threshold_tokens(1);
907 let compactor = LlmContextCompactor::new(provider, config);
908
909 let mut messages = Vec::new();
910
911 messages.extend((0..6).map(|index| Message::user(format!("pre-compaction noise {index}"))));
913
914 messages.extend(
916 (0..8).map(|index| Message::assistant(format!("kept-{index}: {}", "x".repeat(12_000)))),
917 );
918
919 let result = compactor.compact_history(messages).await?;
920
921 let retained_tail = &result.messages[2..];
923 assert!(retained_tail.len() < 8);
924
925 let mut latest_index = -1i32;
926 let mut all_retained = true;
927 for message in retained_tail {
928 if let Content::Text(text) = &message.content {
929 if let Some(number) = text.split(':').next().and_then(|prefix| {
930 prefix
931 .strip_prefix("kept-")
932 .and_then(|rest| rest.parse::<i32>().ok())
933 }) {
934 if number >= 0 {
935 latest_index = latest_index.max(number);
936 }
937 } else {
938 all_retained = false;
939 }
940 } else {
941 all_retained = false;
942 }
943 }
944
945 assert!(all_retained);
946 assert_eq!(latest_index, 7);
947 assert!(
948 TokenEstimator::estimate_history(retained_tail) <= MAX_RETAINED_TAIL_MESSAGE_TOKENS
949 );
950 assert!(compactor.needs_compaction(&result.messages));
951
952 Ok(())
953 }
954
955 #[tokio::test]
956 async fn test_compact_history_skips_summary_ack_when_retained_tail_is_empty() -> Result<()> {
957 let provider = Arc::new(MockProvider::new("Summary for oversized user turn."));
958 let config = CompactionConfig::default()
959 .with_retain_recent(1)
960 .with_min_messages(1)
961 .with_threshold_tokens(1);
962 let compactor = LlmContextCompactor::new(provider, config);
963
964 let messages = vec![
965 Message::assistant("Earlier assistant context."),
966 Message::user(format!("oversized-user-turn: {}", "x".repeat(200_000))),
967 ];
968
969 let result = compactor.compact_history(messages).await?;
970
971 assert_eq!(result.new_count, 1);
972 assert_eq!(result.messages.len(), 1);
973
974 let only_message = &result.messages[0];
975 assert_eq!(only_message.role, Role::User);
976
977 if let Content::Text(text) = &only_message.content {
978 assert!(text.contains("Previous conversation summary"));
979 assert!(!text.contains(SUMMARY_ACKNOWLEDGMENT));
980 } else {
981 panic!("Expected summary text when retained tail is empty");
982 }
983
984 Ok(())
985 }
986}