1use crate::llm::{ChatOutcome, ChatRequest, Content, ContentBlock, LlmProvider, Message, Role};
4use anyhow::{Context, Result, bail};
5use async_trait::async_trait;
6use std::fmt::Write;
7use std::sync::Arc;
8
9use super::config::CompactionConfig;
10use super::estimator::TokenEstimator;
11
12const SUMMARY_PREFIX: &str = "[Previous conversation summary]\n\n";
13const COMPACTION_SYSTEM_PROMPT: &str = "You are a precise summarizer. Your task is to create concise but complete summaries of conversations, preserving all technical details needed to continue the work.";
14const COMPACTION_SUMMARY_PROMPT_PREFIX: &str = "Summarize this conversation concisely, preserving:\n- Key decisions and conclusions reached\n- Important file paths, code changes, and technical details\n- Current task context and what has been accomplished\n- Any pending items, errors encountered, or next steps\n\nBe specific about technical details (file names, function names, error messages) as these\nare critical for continuing the work.\n\nConversation:\n";
15const COMPACTION_SUMMARY_PROMPT_SUFFIX: &str =
16 "Provide a concise summary (aim for 500-1000 words):";
17const COMPACT_EMPTY_SUMMARY: &str = "No additional context was available to summarize; the previous messages were already compacted.";
18const MAX_RETAINED_TAIL_MESSAGE_TOKENS: usize = 20_000;
19const MAX_TOOL_RESULT_CHARS: usize = 500;
20
21#[async_trait]
25pub trait ContextCompactor: Send + Sync {
26 async fn compact(&self, messages: &[Message]) -> Result<String>;
31
32 fn estimate_tokens(&self, messages: &[Message]) -> usize;
34
35 fn needs_compaction(&self, messages: &[Message]) -> bool;
37
38 async fn compact_history(&self, messages: Vec<Message>) -> Result<CompactionResult>;
43}
44
45#[derive(Debug, Clone)]
47pub struct CompactionResult {
48 pub messages: Vec<Message>,
50 pub original_count: usize,
52 pub new_count: usize,
54 pub original_tokens: usize,
56 pub new_tokens: usize,
58}
59
60pub struct LlmContextCompactor<P: LlmProvider> {
64 provider: Arc<P>,
65 config: CompactionConfig,
66 system_prompt: String,
67 summary_prompt_prefix: String,
68 summary_prompt_suffix: String,
69}
70
71impl<P: LlmProvider> LlmContextCompactor<P> {
72 #[must_use]
74 pub fn new(provider: Arc<P>, config: CompactionConfig) -> Self {
75 Self {
76 provider,
77 config,
78 system_prompt: COMPACTION_SYSTEM_PROMPT.to_string(),
79 summary_prompt_prefix: COMPACTION_SUMMARY_PROMPT_PREFIX.to_string(),
80 summary_prompt_suffix: COMPACTION_SUMMARY_PROMPT_SUFFIX.to_string(),
81 }
82 }
83
84 #[must_use]
86 pub fn with_defaults(provider: Arc<P>) -> Self {
87 Self::new(provider, CompactionConfig::default())
88 }
89
90 #[must_use]
92 pub const fn config(&self) -> &CompactionConfig {
93 &self.config
94 }
95
96 #[must_use]
98 pub fn with_prompts(
99 mut self,
100 system_prompt: impl Into<String>,
101 summary_prompt_prefix: impl Into<String>,
102 summary_prompt_suffix: impl Into<String>,
103 ) -> Self {
104 self.system_prompt = system_prompt.into();
105 self.summary_prompt_prefix = summary_prompt_prefix.into();
106 self.summary_prompt_suffix = summary_prompt_suffix.into();
107 self
108 }
109
110 fn is_summary_message(content: &Content) -> bool {
112 match content {
113 Content::Text(text) => text.starts_with(SUMMARY_PREFIX),
114 Content::Blocks(blocks) => blocks.iter().any(|block| match block {
115 ContentBlock::Text { text } => text.starts_with(SUMMARY_PREFIX),
116 _ => false,
117 }),
118 }
119 }
120
121 fn has_tool_use(content: &Content) -> bool {
123 matches!(
124 content,
125 Content::Blocks(blocks)
126 if blocks
127 .iter()
128 .any(|block| matches!(block, ContentBlock::ToolUse { .. }))
129 )
130 }
131
132 fn has_tool_result(content: &Content) -> bool {
134 matches!(
135 content,
136 Content::Blocks(blocks)
137 if blocks
138 .iter()
139 .any(|block| matches!(block, ContentBlock::ToolResult { .. }))
140 )
141 }
142
143 fn split_point_preserves_tool_pairs(messages: &[Message], mut split_point: usize) -> usize {
145 while split_point > 0 && split_point < messages.len() {
146 let prev = &messages[split_point - 1];
147 let next = &messages[split_point];
148
149 let crosses_tool_pair = (prev.role == Role::Assistant
150 && Self::has_tool_use(&prev.content)
151 && next.role == Role::User
152 && Self::has_tool_result(&next.content))
153 || (prev.role == Role::User
154 && Self::has_tool_result(&prev.content)
155 && next.role == Role::Assistant
156 && Self::has_tool_use(&next.content));
157
158 if crosses_tool_pair {
159 split_point -= 1;
160 continue;
161 }
162
163 break;
164 }
165
166 split_point
167 }
168
169 fn split_point_preserves_tool_pairs_with_cap(
171 messages: &[Message],
172 mut split_point: usize,
173 max_tokens: usize,
174 ) -> usize {
175 loop {
176 let candidate = Self::retain_tail_with_token_cap(messages, split_point, max_tokens);
177 let adjusted = Self::split_point_preserves_tool_pairs(messages, candidate);
178
179 if adjusted == split_point {
180 return candidate;
181 }
182
183 split_point = adjusted;
184 }
185 }
186
187 fn retain_tail_with_token_cap(messages: &[Message], start: usize, max_tokens: usize) -> usize {
189 if start >= messages.len() {
190 return messages.len();
191 }
192
193 if max_tokens == 0 {
194 return messages.len();
195 }
196
197 let mut used = 0usize;
198 let mut retained_start = messages.len();
199
200 for idx in (start..messages.len()).rev() {
201 let message_tokens = TokenEstimator::estimate_message(&messages[idx]);
202 if used + message_tokens > max_tokens {
203 break;
204 }
205
206 retained_start = idx;
207 used += message_tokens;
208 }
209
210 retained_start
211 }
212
213 fn format_messages_for_summary(messages: &[Message]) -> String {
215 let mut output = String::new();
216
217 for message in messages {
218 let role = match message.role {
219 Role::User => "User",
220 Role::Assistant => "Assistant",
221 };
222
223 let _ = write!(output, "{role}: ");
224
225 match &message.content {
226 Content::Text(text) => {
227 let _ = writeln!(output, "{text}");
228 }
229 Content::Blocks(blocks) => {
230 for block in blocks {
231 match block {
232 ContentBlock::Text { text } => {
233 let _ = writeln!(output, "{text}");
234 }
235 ContentBlock::Thinking { thinking, .. } => {
236 let _ = writeln!(output, "[Thinking: {thinking}]");
238 }
239 ContentBlock::RedactedThinking { .. } => {
240 let _ = writeln!(output, "[Redacted thinking]");
241 }
242 ContentBlock::ToolUse { name, input, .. } => {
243 let _ = writeln!(
244 output,
245 "[Called tool: {name} with input: {}]",
246 serde_json::to_string(input).unwrap_or_default()
247 );
248 }
249 ContentBlock::ToolResult {
250 content, is_error, ..
251 } => {
252 let status = if is_error.unwrap_or(false) {
253 "error"
254 } else {
255 "success"
256 };
257 let truncated = if content.chars().count() > MAX_TOOL_RESULT_CHARS {
259 let prefix: String =
260 content.chars().take(MAX_TOOL_RESULT_CHARS).collect();
261 format!("{prefix}... (truncated)")
262 } else {
263 content.clone()
264 };
265 let _ = writeln!(output, "[Tool result ({status}): {truncated}]");
266 }
267 ContentBlock::Image { source } => {
268 let _ = writeln!(output, "[Image: {}]", source.media_type);
269 }
270 ContentBlock::Document { source } => {
271 let _ = writeln!(output, "[Document: {}]", source.media_type);
272 }
273 }
274 }
275 }
276 }
277 output.push('\n');
278 }
279
280 output
281 }
282
283 fn build_summary_prompt(&self, messages_text: &str) -> String {
285 format!(
286 "{}{}{}",
287 self.summary_prompt_prefix, messages_text, self.summary_prompt_suffix
288 )
289 }
290}
291
292#[async_trait]
293impl<P: LlmProvider> ContextCompactor for LlmContextCompactor<P> {
294 async fn compact(&self, messages: &[Message]) -> Result<String> {
295 let messages_to_summarize: Vec<_> = messages
296 .iter()
297 .filter(|message| !Self::is_summary_message(&message.content))
298 .cloned()
299 .collect();
300
301 if messages_to_summarize.is_empty() {
302 return Ok(COMPACT_EMPTY_SUMMARY.to_string());
303 }
304
305 let messages_text = Self::format_messages_for_summary(&messages_to_summarize);
306 let prompt = self.build_summary_prompt(&messages_text);
307
308 let request = ChatRequest {
309 system: self.system_prompt.clone(),
310 messages: vec![Message::user(prompt)],
311 tools: None,
312 max_tokens: 2000,
313 thinking: None,
314 };
315
316 let outcome = self
317 .provider
318 .chat(request)
319 .await
320 .context("Failed to call LLM for summarization")?;
321
322 match outcome {
323 ChatOutcome::Success(response) => response
324 .first_text()
325 .map(String::from)
326 .context("No text in summarization response"),
327 ChatOutcome::RateLimited => {
328 bail!("Rate limited during summarization")
329 }
330 ChatOutcome::InvalidRequest(msg) => {
331 bail!("Invalid request during summarization: {msg}")
332 }
333 ChatOutcome::ServerError(msg) => {
334 bail!("Server error during summarization: {msg}")
335 }
336 }
337 }
338
339 fn estimate_tokens(&self, messages: &[Message]) -> usize {
340 TokenEstimator::estimate_history(messages)
341 }
342
343 fn needs_compaction(&self, messages: &[Message]) -> bool {
344 if !self.config.auto_compact {
345 return false;
346 }
347
348 if messages.len() < self.config.min_messages_for_compaction {
349 return false;
350 }
351
352 let estimated_tokens = self.estimate_tokens(messages);
353 estimated_tokens > self.config.threshold_tokens
354 }
355
356 async fn compact_history(&self, messages: Vec<Message>) -> Result<CompactionResult> {
357 let original_count = messages.len();
358 let original_tokens = self.estimate_tokens(&messages);
359
360 if messages.len() <= self.config.retain_recent {
362 return Ok(CompactionResult {
363 messages,
364 original_count,
365 new_count: original_count,
366 original_tokens,
367 new_tokens: original_tokens,
368 });
369 }
370
371 let mut split_point = messages.len().saturating_sub(self.config.retain_recent);
373 split_point = Self::split_point_preserves_tool_pairs_with_cap(
374 &messages,
375 split_point,
376 MAX_RETAINED_TAIL_MESSAGE_TOKENS,
377 );
378
379 let (to_summarize, to_keep) = messages.split_at(split_point);
380
381 let summary = self.compact(to_summarize).await?;
383
384 let mut new_messages = Vec::with_capacity(2 + to_keep.len());
386
387 new_messages.push(Message::user(format!("{SUMMARY_PREFIX}{summary}")));
389
390 new_messages.push(Message::assistant(
392 "I understand the context from the summary. Let me continue from where we left off.",
393 ));
394
395 new_messages.extend(to_keep.iter().cloned());
397
398 let new_count = new_messages.len();
399 let new_tokens = self.estimate_tokens(&new_messages);
400
401 Ok(CompactionResult {
402 messages: new_messages,
403 original_count,
404 new_count,
405 original_tokens,
406 new_tokens,
407 })
408 }
409}
410
411#[cfg(test)]
412mod tests {
413 use super::*;
414 use crate::llm::{ChatResponse, StopReason, Usage};
415 use std::sync::Mutex;
416
417 struct MockProvider {
418 summary_response: String,
419 requests: Option<Arc<Mutex<Vec<String>>>>,
420 }
421
422 impl MockProvider {
423 fn new(summary: &str) -> Self {
424 Self {
425 summary_response: summary.to_string(),
426 requests: None,
427 }
428 }
429
430 fn new_with_request_log(summary: &str, requests: Arc<Mutex<Vec<String>>>) -> Self {
431 Self {
432 summary_response: summary.to_string(),
433 requests: Some(requests),
434 }
435 }
436 }
437
438 #[async_trait]
439 impl LlmProvider for MockProvider {
440 async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
441 if let Some(requests) = &self.requests {
442 let mut entries = requests.lock().unwrap();
443 let user_prompt = request
444 .messages
445 .iter()
446 .find_map(|message| match &message.content {
447 Content::Text(text) => Some(text.clone()),
448 Content::Blocks(blocks) => {
449 let text = blocks
450 .iter()
451 .filter_map(|block| {
452 if let ContentBlock::Text { text } = block {
453 Some(text.as_str())
454 } else {
455 None
456 }
457 })
458 .collect::<Vec<_>>()
459 .join("\n");
460 if text.is_empty() { None } else { Some(text) }
461 }
462 })
463 .unwrap_or_default();
464 entries.push(user_prompt);
465 }
466 Ok(ChatOutcome::Success(ChatResponse {
467 id: "test".to_string(),
468 content: vec![ContentBlock::Text {
469 text: self.summary_response.clone(),
470 }],
471 model: "mock".to_string(),
472 stop_reason: Some(StopReason::EndTurn),
473 usage: Usage {
474 input_tokens: 100,
475 output_tokens: 50,
476 },
477 }))
478 }
479
480 fn model(&self) -> &'static str {
481 "mock-model"
482 }
483
484 fn provider(&self) -> &'static str {
485 "mock"
486 }
487 }
488
489 #[test]
490 fn test_needs_compaction_below_threshold() {
491 let provider = Arc::new(MockProvider::new("summary"));
492 let config = CompactionConfig::default()
493 .with_threshold_tokens(10_000)
494 .with_min_messages(5);
495 let compactor = LlmContextCompactor::new(provider, config);
496
497 let messages = vec![
499 Message::user("Hello"),
500 Message::assistant("Hi"),
501 Message::user("How are you?"),
502 ];
503
504 assert!(!compactor.needs_compaction(&messages));
505 }
506
507 #[test]
508 fn test_needs_compaction_above_threshold() {
509 let provider = Arc::new(MockProvider::new("summary"));
510 let config = CompactionConfig::default()
511 .with_threshold_tokens(50) .with_min_messages(3);
513 let compactor = LlmContextCompactor::new(provider, config);
514
515 let messages = vec![
517 Message::user("Hello, this is a longer message to test compaction"),
518 Message::assistant(
519 "Hi there! This is also a longer response to help trigger compaction",
520 ),
521 Message::user("Great, let's continue with even more text here"),
522 Message::assistant("Absolutely, adding more content to ensure we exceed the threshold"),
523 ];
524
525 assert!(compactor.needs_compaction(&messages));
526 }
527
528 #[test]
529 fn test_needs_compaction_auto_disabled() {
530 let provider = Arc::new(MockProvider::new("summary"));
531 let config = CompactionConfig::default()
532 .with_threshold_tokens(10) .with_min_messages(1)
534 .with_auto_compact(false);
535 let compactor = LlmContextCompactor::new(provider, config);
536
537 let messages = vec![
538 Message::user("Hello, this is a longer message"),
539 Message::assistant("Response here"),
540 ];
541
542 assert!(!compactor.needs_compaction(&messages));
543 }
544
545 #[tokio::test]
546 async fn test_compact_history() -> Result<()> {
547 let provider = Arc::new(MockProvider::new(
548 "User asked about Rust programming. Assistant explained ownership, borrowing, and lifetimes.",
549 ));
550 let config = CompactionConfig::default()
551 .with_retain_recent(2)
552 .with_min_messages(3);
553 let compactor = LlmContextCompactor::new(provider, config);
554
555 let messages = vec![
557 Message::user(
558 "What is Rust? I've heard it's a systems programming language but I don't know much about it. Can you explain the key features and why people are excited about it?",
559 ),
560 Message::assistant(
561 "Rust is a systems programming language focused on safety, speed, and concurrency. It achieves memory safety without garbage collection through its ownership system. The key features include zero-cost abstractions, guaranteed memory safety, threads without data races, and minimal runtime.",
562 ),
563 Message::user(
564 "Tell me about ownership in detail. How does it work and what are the rules? I want to understand this core concept thoroughly.",
565 ),
566 Message::assistant(
567 "Ownership is Rust's central feature with three rules: each value has one owner, only one owner at a time, and the value is dropped when owner goes out of scope. This system prevents memory leaks, double frees, and dangling pointers at compile time.",
568 ),
569 Message::user("What about borrowing?"), Message::assistant("Borrowing allows references to data without taking ownership."), ];
572
573 let result = compactor.compact_history(messages).await?;
574
575 assert_eq!(result.new_count, 4);
577 assert_eq!(result.original_count, 6);
578
579 assert!(
581 result.new_tokens < result.original_tokens,
582 "Expected fewer tokens after compaction: new={} < original={}",
583 result.new_tokens,
584 result.original_tokens
585 );
586
587 if let Content::Text(text) = &result.messages[0].content {
589 assert!(text.contains("Previous conversation summary"));
590 }
591
592 Ok(())
593 }
594
595 #[tokio::test]
596 async fn test_compact_history_too_few_messages() -> Result<()> {
597 let provider = Arc::new(MockProvider::new("summary"));
598 let config = CompactionConfig::default().with_retain_recent(5);
599 let compactor = LlmContextCompactor::new(provider, config);
600
601 let messages = vec![
603 Message::user("Hello"),
604 Message::assistant("Hi"),
605 Message::user("Bye"),
606 ];
607
608 let result = compactor.compact_history(messages.clone()).await?;
609
610 assert_eq!(result.new_count, 3);
612 assert_eq!(result.messages.len(), 3);
613
614 Ok(())
615 }
616
617 #[test]
618 fn test_format_messages_for_summary() {
619 let messages = vec![Message::user("Hello"), Message::assistant("Hi there!")];
620
621 let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
622
623 assert!(formatted.contains("User: Hello"));
624 assert!(formatted.contains("Assistant: Hi there!"));
625 }
626
627 #[test]
628 fn test_format_messages_for_summary_truncates_tool_results_unicode_safely() {
629 let long_unicode = "é".repeat(600);
630
631 let messages = vec![Message {
632 role: Role::Assistant,
633 content: Content::Blocks(vec![ContentBlock::ToolResult {
634 tool_use_id: "tool-1".to_string(),
635 content: long_unicode,
636 is_error: Some(false),
637 }]),
638 }];
639
640 let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
641
642 assert!(formatted.contains("... (truncated)"));
643 }
644
645 #[tokio::test]
646 async fn test_compact_filters_summary_messages() -> Result<()> {
647 let requests = Arc::new(Mutex::new(Vec::new()));
648 let provider = Arc::new(MockProvider::new_with_request_log(
649 "Fresh summary",
650 requests.clone(),
651 ));
652 let config = CompactionConfig::default().with_min_messages(1);
653 let compactor = LlmContextCompactor::new(provider, config);
654
655 let messages = vec![
656 Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
657 Message::assistant("Continue with the next task using this context."),
658 ];
659
660 let summary = compactor.compact(&messages).await?;
661
662 {
663 let recorded = requests.lock().unwrap();
664 assert_eq!(recorded.len(), 1);
665 assert_eq!(summary, "Fresh summary");
666 assert!(recorded[0].contains("Continue with the next task using this context."));
667 assert!(!recorded[0].contains("already compacted context"));
668 drop(recorded);
669 }
670
671 Ok(())
672 }
673
674 #[tokio::test]
675 async fn test_compact_history_ignores_prior_summary_in_candidate_payload() -> Result<()> {
676 let requests = Arc::new(Mutex::new(Vec::new()));
677 let provider = Arc::new(MockProvider::new_with_request_log(
678 "Fresh history summary",
679 requests.clone(),
680 ));
681 let config = CompactionConfig::default()
682 .with_retain_recent(2)
683 .with_min_messages(1);
684 let compactor = LlmContextCompactor::new(provider, config);
685
686 let messages = vec![
687 Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
688 Message::assistant("Current turn content from the latest exchange."),
689 Message::assistant("Recent message that should stay."),
690 Message::user("Newest note that should stay."),
691 ];
692
693 let result = compactor.compact_history(messages).await?;
694
695 {
696 let recorded = requests.lock().unwrap();
697 assert_eq!(recorded.len(), 1);
698 assert!(recorded[0].contains("Current turn content from the latest exchange."));
699 assert!(!recorded[0].contains("already compacted context"));
700 drop(recorded);
701 }
702 assert_eq!(result.new_count, 4);
703
704 Ok(())
705 }
706
707 #[tokio::test]
708 async fn test_compact_history_is_no_op_when_candidate_window_has_only_summaries() -> Result<()>
709 {
710 let requests = Arc::new(Mutex::new(Vec::new()));
711 let provider = Arc::new(MockProvider::new_with_request_log(
712 "This summary should not be used",
713 requests.clone(),
714 ));
715 let config = CompactionConfig::default()
716 .with_retain_recent(2)
717 .with_min_messages(1);
718 let compactor = LlmContextCompactor::new(provider, config);
719
720 let messages = vec![
721 Message::user(format!("{SUMMARY_PREFIX}first prior compacted section")),
722 Message::assistant(format!("{SUMMARY_PREFIX}second prior compacted section")),
723 Message::user(format!("{SUMMARY_PREFIX}third prior compacted section")),
724 Message::assistant("final short note"),
725 ];
726
727 let result = compactor.compact_history(messages).await?;
728
729 {
730 let recorded = requests.lock().unwrap();
731 assert!(recorded.is_empty());
732 drop(recorded);
733 }
734 assert_eq!(result.new_count, 4);
735 assert_eq!(result.messages.len(), 4);
736
737 if let Content::Text(text) = &result.messages[0].content {
738 assert!(text.contains(COMPACT_EMPTY_SUMMARY));
739 } else {
740 panic!("Expected summary text in first message");
741 }
742
743 Ok(())
744 }
745
746 #[tokio::test]
747 async fn test_compact_history_preserves_tool_use_tool_result_pairs() -> Result<()> {
748 let provider = Arc::new(MockProvider::new("Summary of earlier conversation."));
749 let config = CompactionConfig::default()
750 .with_retain_recent(2)
751 .with_min_messages(3);
752 let compactor = LlmContextCompactor::new(provider, config);
753
754 let messages = vec![
758 Message::user("What files are in the project?"),
760 Message::assistant("Let me check that for you."),
762 Message {
764 role: Role::Assistant,
765 content: Content::Blocks(vec![ContentBlock::ToolUse {
766 id: "tool_1".to_string(),
767 name: "list_files".to_string(),
768 input: serde_json::json!({}),
769 thought_signature: None,
770 }]),
771 },
772 Message {
774 role: Role::User,
775 content: Content::Blocks(vec![ContentBlock::ToolResult {
776 tool_use_id: "tool_1".to_string(),
777 content: "file1.rs\nfile2.rs".to_string(),
778 is_error: None,
779 }]),
780 },
781 Message::assistant("The project contains file1.rs and file2.rs."),
783 ];
784
785 let result = compactor.compact_history(messages).await?;
786
787 assert_eq!(result.new_count, 5);
791
792 let kept_assistant = &result.messages[2];
795 if let Content::Blocks(blocks) = &kept_assistant.content {
796 assert!(
797 blocks
798 .iter()
799 .any(|b| matches!(b, ContentBlock::ToolUse { .. })),
800 "Expected assistant tool_use in kept messages"
801 );
802 } else {
803 panic!("Expected Blocks content for assistant tool_use message");
804 }
805
806 let kept_user = &result.messages[3];
808 if let Content::Blocks(blocks) = &kept_user.content {
809 assert!(
810 blocks
811 .iter()
812 .any(|b| matches!(b, ContentBlock::ToolResult { .. })),
813 "Expected user tool_result in kept messages"
814 );
815 } else {
816 panic!("Expected Blocks content for user tool_result message");
817 }
818
819 Ok(())
820 }
821
822 #[tokio::test]
823 async fn test_compact_history_preserves_tool_result_tool_use_pairs() -> Result<()> {
824 let provider = Arc::new(MockProvider::new("Summary around tool pair."));
825 let config = CompactionConfig::default()
826 .with_retain_recent(2)
827 .with_min_messages(1);
828 let compactor = LlmContextCompactor::new(provider, config);
829
830 let messages = vec![
834 Message::user("Start a workflow"),
835 Message {
836 role: Role::User,
837 content: Content::Blocks(vec![ContentBlock::ToolResult {
838 tool_use_id: "tool_odd".to_string(),
839 content: "prior result".to_string(),
840 is_error: None,
841 }]),
842 },
843 Message {
844 role: Role::Assistant,
845 content: Content::Blocks(vec![ContentBlock::ToolUse {
846 id: "tool_odd".to_string(),
847 name: "follow_up".to_string(),
848 input: serde_json::json!({}),
849 thought_signature: None,
850 }]),
851 },
852 Message::assistant("Follow up done."),
853 ];
854
855 let result = compactor.compact_history(messages).await?;
856
857 assert_eq!(result.new_count, 5);
859
860 let kept_result = &result.messages[2];
862 if let Content::Blocks(blocks) = &kept_result.content {
863 assert!(
864 blocks
865 .iter()
866 .any(|b| matches!(b, ContentBlock::ToolResult { .. })),
867 "Expected kept user tool_result in retained tail"
868 );
869 } else {
870 panic!("Expected tool_result blocks in retained tail");
871 }
872
873 let kept_tool_use = &result.messages[3];
875 if let Content::Blocks(blocks) = &kept_tool_use.content {
876 assert!(
877 blocks
878 .iter()
879 .any(|b| matches!(b, ContentBlock::ToolUse { .. })),
880 "Expected kept assistant tool_use in retained tail"
881 );
882 } else {
883 panic!("Expected tool_use blocks in retained tail");
884 }
885
886 Ok(())
887 }
888
889 #[tokio::test]
890 async fn test_compact_history_retained_tail_is_token_capped() -> Result<()> {
891 let provider = Arc::new(MockProvider::new(
892 "Project summary with a long context and technical context.",
893 ));
894 let config = CompactionConfig::default()
895 .with_retain_recent(8)
896 .with_min_messages(1)
897 .with_threshold_tokens(1);
898 let compactor = LlmContextCompactor::new(provider, config);
899
900 let mut messages = Vec::new();
901
902 messages.extend((0..6).map(|index| Message::user(format!("pre-compaction noise {index}"))));
904
905 messages.extend(
907 (0..8).map(|index| Message::assistant(format!("kept-{index}: {}", "x".repeat(12_000)))),
908 );
909
910 let result = compactor.compact_history(messages).await?;
911
912 let retained_tail = &result.messages[2..];
914 assert!(retained_tail.len() < 8);
915
916 let mut latest_index = -1i32;
917 let mut all_retained = true;
918 for message in retained_tail {
919 if let Content::Text(text) = &message.content {
920 if let Some(number) = text.split(':').next().and_then(|prefix| {
921 prefix
922 .strip_prefix("kept-")
923 .and_then(|rest| rest.parse::<i32>().ok())
924 }) {
925 if number >= 0 {
926 latest_index = latest_index.max(number);
927 }
928 } else {
929 all_retained = false;
930 }
931 } else {
932 all_retained = false;
933 }
934 }
935
936 assert!(all_retained);
937 assert_eq!(latest_index, 7);
938 assert!(
939 TokenEstimator::estimate_history(retained_tail) <= MAX_RETAINED_TAIL_MESSAGE_TOKENS
940 );
941 assert!(compactor.needs_compaction(&result.messages));
942
943 Ok(())
944 }
945}