1use crate::llm::{
4 ChatOutcome, ChatRequest, Content, ContentBlock, LlmProvider, Message, Role, StopReason,
5};
6use anyhow::{Context, Result, bail};
7use async_trait::async_trait;
8use std::fmt::Write;
9use std::sync::Arc;
10
11use super::config::CompactionConfig;
12use super::estimator::TokenEstimator;
13
14const SUMMARY_PREFIX: &str = "[Previous conversation summary]\n\n";
15const COMPACTION_SYSTEM_PROMPT: &str = "You are a precise summarizer. Your task is to create concise but complete summaries of conversations, preserving all technical details needed to continue the work.";
16const COMPACTION_SUMMARY_PROMPT_PREFIX: &str = "Summarize this conversation concisely, preserving:\n- Key decisions and conclusions reached\n- Important file paths, code changes, and technical details\n- Current task context and what has been accomplished\n- Any pending items, errors encountered, or next steps\n\nBe specific about technical details (file names, function names, error messages) as these\nare critical for continuing the work.\n\nConversation:\n";
17const COMPACTION_SUMMARY_PROMPT_SUFFIX: &str =
18 "Provide a concise summary (aim for 500-1000 words):";
19const COMPACT_EMPTY_SUMMARY: &str = "No additional context was available to summarize; the previous messages were already compacted.";
20const SUMMARY_ACKNOWLEDGMENT: &str =
21 "I understand the context from the summary. Let me continue from where we left off.";
22const MAX_TOOL_RESULT_CHARS: usize = 500;
23const TRUNCATED_SUMMARY_MARKER: &str =
24 "\n\n[summary truncated: exceeded the configured summary_max_tokens budget]";
25
26#[async_trait]
30pub trait ContextCompactor: Send + Sync {
31 async fn compact(&self, messages: &[Message]) -> Result<String>;
36
37 fn estimate_tokens(&self, messages: &[Message]) -> usize;
39
40 fn needs_compaction(&self, messages: &[Message]) -> bool;
42
43 async fn compact_history(&self, messages: Vec<Message>) -> Result<CompactionResult>;
48}
49
50#[derive(Debug, Clone)]
52pub struct CompactionResult {
53 pub messages: Vec<Message>,
55 pub original_count: usize,
57 pub new_count: usize,
59 pub original_tokens: usize,
61 pub new_tokens: usize,
63}
64
65pub struct LlmContextCompactor<P: LlmProvider + ?Sized> {
76 provider: Arc<P>,
77 config: CompactionConfig,
78 system_prompt: String,
79 summary_prompt_prefix: String,
80 summary_prompt_suffix: String,
81}
82
83impl<P: LlmProvider + ?Sized> LlmContextCompactor<P> {
84 #[must_use]
86 pub fn new(provider: Arc<P>, config: CompactionConfig) -> Self {
87 Self {
88 provider,
89 config,
90 system_prompt: COMPACTION_SYSTEM_PROMPT.to_string(),
91 summary_prompt_prefix: COMPACTION_SUMMARY_PROMPT_PREFIX.to_string(),
92 summary_prompt_suffix: COMPACTION_SUMMARY_PROMPT_SUFFIX.to_string(),
93 }
94 }
95
96 #[must_use]
98 pub fn with_defaults(provider: Arc<P>) -> Self {
99 Self::new(provider, CompactionConfig::default())
100 }
101
102 #[must_use]
104 pub const fn config(&self) -> &CompactionConfig {
105 &self.config
106 }
107
108 #[must_use]
110 pub fn with_prompts(
111 mut self,
112 system_prompt: impl Into<String>,
113 summary_prompt_prefix: impl Into<String>,
114 summary_prompt_suffix: impl Into<String>,
115 ) -> Self {
116 self.system_prompt = system_prompt.into();
117 self.summary_prompt_prefix = summary_prompt_prefix.into();
118 self.summary_prompt_suffix = summary_prompt_suffix.into();
119 self
120 }
121
122 fn extract_summary_text(content: &Content) -> Option<String> {
132 match content {
133 Content::Text(text) => text.strip_prefix(SUMMARY_PREFIX).map(str::to_string),
134 Content::Blocks(blocks) => blocks.iter().find_map(|block| match block {
135 ContentBlock::Text { text } => {
136 text.strip_prefix(SUMMARY_PREFIX).map(str::to_string)
137 }
138 _ => None,
139 }),
140 }
141 }
142
143 fn has_tool_use(content: &Content) -> bool {
145 matches!(
146 content,
147 Content::Blocks(blocks)
148 if blocks
149 .iter()
150 .any(|block| matches!(block, ContentBlock::ToolUse { .. }))
151 )
152 }
153
154 fn has_tool_result(content: &Content) -> bool {
156 matches!(
157 content,
158 Content::Blocks(blocks)
159 if blocks
160 .iter()
161 .any(|block| matches!(block, ContentBlock::ToolResult { .. }))
162 )
163 }
164
165 fn split_point_preserves_tool_pairs(messages: &[Message], mut split_point: usize) -> usize {
178 while split_point > 0 && split_point < messages.len() {
179 let prev = &messages[split_point - 1];
180 let next = &messages[split_point];
181
182 let crosses_tool_pair = prev.role == Role::Assistant
183 && Self::has_tool_use(&prev.content)
184 && next.role == Role::User
185 && Self::has_tool_result(&next.content);
186
187 if crosses_tool_pair {
188 split_point -= 1;
189 continue;
190 }
191
192 break;
193 }
194
195 split_point
196 }
197
198 fn split_point_preserves_tool_pairs_with_cap(
237 messages: &[Message],
238 split_point: usize,
239 max_tokens: usize,
240 ) -> usize {
241 let cap_limit = Self::retain_tail_with_token_cap(messages, split_point, max_tokens);
242 let pair_safe = Self::split_point_preserves_tool_pairs(messages, cap_limit);
243 Self::split_point_skips_leading_orphan(messages, pair_safe)
244 }
245
246 fn split_point_skips_leading_orphan(messages: &[Message], mut split_point: usize) -> usize {
263 while split_point < messages.len() {
264 if Self::leading_message_has_orphan_tool_result(&messages[split_point..]) {
265 split_point = split_point.saturating_add(1);
266 continue;
267 }
268 break;
269 }
270 split_point
271 }
272
273 fn leading_message_has_orphan_tool_result(to_keep: &[Message]) -> bool {
281 let Some(first) = to_keep.first() else {
282 return false;
283 };
284 let Content::Blocks(blocks) = &first.content else {
285 return false;
286 };
287
288 let mut needed: Vec<&str> = Vec::new();
292 for block in blocks {
293 if let ContentBlock::ToolResult { tool_use_id, .. } = block {
294 needed.push(tool_use_id.as_str());
295 }
296 }
297 if needed.is_empty() {
298 return false;
299 }
300
301 let known_ids: std::collections::HashSet<&str> = to_keep
303 .iter()
304 .flat_map(|message| match &message.content {
305 Content::Blocks(blocks) => blocks
306 .iter()
307 .filter_map(|block| match block {
308 ContentBlock::ToolUse { id, .. } => Some(id.as_str()),
309 _ => None,
310 })
311 .collect::<Vec<_>>(),
312 Content::Text(_) => Vec::new(),
313 })
314 .collect();
315
316 needed.iter().any(|id| !known_ids.contains(id))
317 }
318
319 fn retain_tail_with_token_cap(messages: &[Message], start: usize, max_tokens: usize) -> usize {
321 if start >= messages.len() {
322 return messages.len();
323 }
324
325 if max_tokens == 0 {
326 return messages.len();
327 }
328
329 let mut used = 0usize;
330 let mut retained_start = messages.len();
331
332 for idx in (start..messages.len()).rev() {
333 let message_tokens = TokenEstimator::estimate_message(&messages[idx]);
334 if used + message_tokens > max_tokens {
335 break;
336 }
337
338 retained_start = idx;
339 used += message_tokens;
340 }
341
342 retained_start
343 }
344
345 fn format_messages_for_summary<'a>(messages: impl IntoIterator<Item = &'a Message>) -> String {
350 let mut output = String::new();
351
352 for message in messages {
353 let role = match message.role {
354 Role::User => "User",
355 Role::Assistant => "Assistant",
356 };
357
358 let _ = write!(output, "{role}: ");
359
360 match &message.content {
361 Content::Text(text) => {
362 let _ = writeln!(output, "{text}");
363 }
364 Content::Blocks(blocks) => {
365 for block in blocks {
366 match block {
367 ContentBlock::Text { text } => {
368 let _ = writeln!(output, "{text}");
369 }
370 ContentBlock::Thinking { thinking, .. } => {
371 let _ = writeln!(output, "[Thinking: {thinking}]");
373 }
374 ContentBlock::RedactedThinking { .. } => {
375 let _ = writeln!(output, "[Redacted thinking]");
376 }
377 ContentBlock::ToolUse { name, input, .. } => {
378 let _ = writeln!(
379 output,
380 "[Called tool: {name} with input: {}]",
381 serde_json::to_string(input).unwrap_or_default()
382 );
383 }
384 ContentBlock::ToolResult {
385 content, is_error, ..
386 } => {
387 let status = if is_error.unwrap_or(false) {
388 "error"
389 } else {
390 "success"
391 };
392 let truncated = if content.chars().count() > MAX_TOOL_RESULT_CHARS {
394 let prefix: String =
395 content.chars().take(MAX_TOOL_RESULT_CHARS).collect();
396 format!("{prefix}... (truncated)")
397 } else {
398 content.clone()
399 };
400 let _ = writeln!(output, "[Tool result ({status}): {truncated}]");
401 }
402 ContentBlock::Image { source } => {
403 let _ = writeln!(output, "[Image: {}]", source.media_type);
404 }
405 ContentBlock::Document { source } => {
406 let _ = writeln!(output, "[Document: {}]", source.media_type);
407 }
408 _ => {
411 let _ = writeln!(output, "[Unrecognized content block]");
412 }
413 }
414 }
415 }
416 }
417 output.push('\n');
418 }
419
420 output
421 }
422
423 fn build_summary_prompt(&self, prior_summaries: &[String], messages_text: &str) -> String {
430 let base = format!(
431 "{}{}{}",
432 self.summary_prompt_prefix, messages_text, self.summary_prompt_suffix
433 );
434
435 if prior_summaries.is_empty() {
436 return base;
437 }
438
439 let prior = prior_summaries.join("\n\n");
440 format!(
441 "Previous summary of earlier conversation. Preserve every fact below \
442 in your new summary so no earlier context is lost:\n{prior}\n\n{base}"
443 )
444 }
445
446 async fn run_summarization(&self, prompt: String, max_tokens: usize) -> Result<(String, bool)> {
449 let request = ChatRequest {
450 system: self.system_prompt.clone(),
451 messages: vec![Message::user(prompt)],
452 tools: None,
453 max_tokens: u32::try_from(max_tokens).unwrap_or(u32::MAX),
454 max_tokens_explicit: true,
455 session_id: None,
456 cached_content: None,
457 thinking: None,
458 tool_choice: None,
459 response_format: None,
460 cache: None,
461 };
462
463 let outcome = self
464 .provider
465 .chat(request)
466 .await
467 .context("Failed to call LLM for summarization")?;
468
469 match outcome {
470 ChatOutcome::Success(response) => {
471 let truncated = response.stop_reason == Some(StopReason::MaxTokens);
472 let text = response
473 .first_text()
474 .map(String::from)
475 .context("No text in summarization response")?;
476 Ok((text, truncated))
477 }
478 ChatOutcome::RateLimited(_) => {
479 bail!("Rate limited during summarization")
480 }
481 ChatOutcome::InvalidRequest(msg) => {
482 bail!("Invalid request during summarization: {msg}")
483 }
484 ChatOutcome::ServerError(msg) => {
485 bail!("Server error during summarization: {msg}")
486 }
487 _ => {
490 bail!("Unrecognized provider outcome during summarization")
491 }
492 }
493 }
494}
495
496#[async_trait]
497impl<P: LlmProvider + ?Sized> ContextCompactor for LlmContextCompactor<P> {
498 async fn compact(&self, messages: &[Message]) -> Result<String> {
499 let mut prior_summaries: Vec<String> = Vec::new();
504 let mut fresh: Vec<&Message> = Vec::new();
505 for message in messages {
506 if let Some(text) = Self::extract_summary_text(&message.content) {
507 if !text.is_empty() {
508 prior_summaries.push(text);
509 }
510 } else {
511 fresh.push(message);
512 }
513 }
514
515 if fresh.is_empty() {
518 if prior_summaries.is_empty() {
519 return Ok(COMPACT_EMPTY_SUMMARY.to_string());
520 }
521 return Ok(prior_summaries.join("\n\n"));
522 }
523
524 let messages_text = Self::format_messages_for_summary(fresh.iter().copied());
525 let prompt = self.build_summary_prompt(&prior_summaries, &messages_text);
526
527 let budget = self.config.summary_max_tokens;
528 let (mut summary, truncated) = self.run_summarization(prompt.clone(), budget).await?;
529
530 if truncated {
531 log::warn!(
532 "compaction summary hit the max_tokens budget ({budget}); \
533 retrying with a larger budget to avoid silent context loss"
534 );
535 let (retry_summary, still_truncated) = self
536 .run_summarization(prompt, budget.saturating_mul(2))
537 .await?;
538 summary = retry_summary;
539 if still_truncated {
540 log::warn!(
541 "compaction summary still truncated after retry; appending a \
542 truncation marker so downstream context loss is visible"
543 );
544 summary.push_str(TRUNCATED_SUMMARY_MARKER);
545 }
546 }
547
548 Ok(summary)
549 }
550
551 fn estimate_tokens(&self, messages: &[Message]) -> usize {
552 TokenEstimator::estimate_history(messages)
553 }
554
555 fn needs_compaction(&self, messages: &[Message]) -> bool {
556 if !self.config.auto_compact {
557 return false;
558 }
559
560 if messages.len() < self.config.min_messages_for_compaction {
561 return false;
562 }
563
564 let estimated_tokens = self.estimate_tokens(messages);
565 estimated_tokens > self.config.threshold_tokens
566 }
567
568 async fn compact_history(&self, mut messages: Vec<Message>) -> Result<CompactionResult> {
569 let original_count = messages.len();
570 let original_tokens = self.estimate_tokens(&messages);
571
572 if messages.len() <= self.config.retain_recent {
574 return Ok(CompactionResult {
575 messages,
576 original_count,
577 new_count: original_count,
578 original_tokens,
579 new_tokens: original_tokens,
580 });
581 }
582
583 let mut split_point = messages.len().saturating_sub(self.config.retain_recent);
585 split_point = Self::split_point_preserves_tool_pairs_with_cap(
586 &messages,
587 split_point,
588 self.config.max_retained_tail_tokens,
589 );
590
591 let to_keep = messages.split_off(split_point);
594 let to_summarize = messages;
595
596 let summary = self.compact(&to_summarize).await?;
598
599 let mut new_messages = Vec::with_capacity(2 + to_keep.len());
601
602 new_messages.push(Message::user(format!("{SUMMARY_PREFIX}{summary}")));
604
605 if !to_keep.is_empty() {
610 new_messages.push(Message::assistant(SUMMARY_ACKNOWLEDGMENT));
611 }
612
613 new_messages.extend(to_keep);
621
622 let new_count = new_messages.len();
623 let new_tokens = self.estimate_tokens(&new_messages);
624
625 Ok(CompactionResult {
626 messages: new_messages,
627 original_count,
628 new_count,
629 original_tokens,
630 new_tokens,
631 })
632 }
633}
634
635#[cfg(test)]
636mod tests {
637 use super::*;
638 use crate::llm::{ChatResponse, StopReason, Usage};
639 use std::sync::Mutex;
640
641 struct MockProvider {
642 summary_response: String,
643 requests: Arc<Mutex<Vec<String>>>,
644 echo_input: bool,
647 stop_reason: StopReason,
649 }
650
651 impl MockProvider {
652 fn build(
653 summary: &str,
654 requests: Arc<Mutex<Vec<String>>>,
655 echo_input: bool,
656 stop_reason: StopReason,
657 ) -> Self {
658 Self {
659 summary_response: summary.to_string(),
660 requests,
661 echo_input,
662 stop_reason,
663 }
664 }
665
666 fn new(summary: &str) -> Self {
667 Self::build(
668 summary,
669 Arc::new(Mutex::new(Vec::new())),
670 false,
671 StopReason::EndTurn,
672 )
673 }
674
675 fn new_with_request_log(summary: &str, requests: Arc<Mutex<Vec<String>>>) -> Self {
676 Self::build(summary, requests, false, StopReason::EndTurn)
677 }
678
679 fn new_echo(requests: Arc<Mutex<Vec<String>>>) -> Self {
681 Self::build("", requests, true, StopReason::EndTurn)
682 }
683
684 fn new_truncating(summary: &str, requests: Arc<Mutex<Vec<String>>>) -> Self {
686 Self::build(summary, requests, false, StopReason::MaxTokens)
687 }
688
689 fn user_prompt_of(request: &ChatRequest) -> String {
690 request
691 .messages
692 .iter()
693 .find_map(|message| match &message.content {
694 Content::Text(text) => Some(text.clone()),
695 Content::Blocks(blocks) => {
696 let text = blocks
697 .iter()
698 .filter_map(|block| {
699 if let ContentBlock::Text { text } = block {
700 Some(text.as_str())
701 } else {
702 None
703 }
704 })
705 .collect::<Vec<_>>()
706 .join("\n");
707 if text.is_empty() { None } else { Some(text) }
708 }
709 })
710 .unwrap_or_default()
711 }
712 }
713
714 #[async_trait]
715 impl LlmProvider for MockProvider {
716 async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
717 let user_prompt = Self::user_prompt_of(&request);
718 if let Ok(mut entries) = self.requests.lock() {
719 entries.push(user_prompt.clone());
720 }
721 let text = if self.echo_input {
722 user_prompt
723 } else {
724 self.summary_response.clone()
725 };
726 Ok(ChatOutcome::Success(ChatResponse {
727 id: "test".to_string(),
728 content: vec![ContentBlock::Text { text }],
729 model: "mock".to_string(),
730 stop_reason: Some(self.stop_reason),
731 usage: Usage {
732 input_tokens: 100,
733 output_tokens: 50,
734 cached_input_tokens: 0,
735 cache_creation_input_tokens: 0,
736 },
737 }))
738 }
739
740 fn model(&self) -> &'static str {
741 "mock-model"
742 }
743
744 fn provider(&self) -> &'static str {
745 "mock"
746 }
747 }
748
749 #[test]
750 fn test_needs_compaction_below_threshold() {
751 let provider = Arc::new(MockProvider::new("summary"));
752 let config = CompactionConfig::default()
753 .with_threshold_tokens(10_000)
754 .with_min_messages(5);
755 let compactor = LlmContextCompactor::new(provider, config);
756
757 let messages = vec![
759 Message::user("Hello"),
760 Message::assistant("Hi"),
761 Message::user("How are you?"),
762 ];
763
764 assert!(!compactor.needs_compaction(&messages));
765 }
766
767 #[test]
768 fn test_needs_compaction_above_threshold() {
769 let provider = Arc::new(MockProvider::new("summary"));
770 let config = CompactionConfig::default()
771 .with_threshold_tokens(50) .with_min_messages(3);
773 let compactor = LlmContextCompactor::new(provider, config);
774
775 let messages = vec![
777 Message::user("Hello, this is a longer message to test compaction"),
778 Message::assistant(
779 "Hi there! This is also a longer response to help trigger compaction",
780 ),
781 Message::user("Great, let's continue with even more text here"),
782 Message::assistant("Absolutely, adding more content to ensure we exceed the threshold"),
783 ];
784
785 assert!(compactor.needs_compaction(&messages));
786 }
787
788 #[test]
789 fn test_needs_compaction_auto_disabled() {
790 let provider = Arc::new(MockProvider::new("summary"));
791 let config = CompactionConfig::default()
792 .with_threshold_tokens(10) .with_min_messages(1)
794 .with_auto_compact(false);
795 let compactor = LlmContextCompactor::new(provider, config);
796
797 let messages = vec![
798 Message::user("Hello, this is a longer message"),
799 Message::assistant("Response here"),
800 ];
801
802 assert!(!compactor.needs_compaction(&messages));
803 }
804
805 #[tokio::test]
806 async fn test_compact_history() -> Result<()> {
807 let provider = Arc::new(MockProvider::new(
808 "User asked about Rust programming. Assistant explained ownership, borrowing, and lifetimes.",
809 ));
810 let config = CompactionConfig::default()
811 .with_retain_recent(2)
812 .with_min_messages(3);
813 let compactor = LlmContextCompactor::new(provider, config);
814
815 let messages = vec![
817 Message::user(
818 "What is Rust? I've heard it's a systems programming language but I don't know much about it. Can you explain the key features and why people are excited about it?",
819 ),
820 Message::assistant(
821 "Rust is a systems programming language focused on safety, speed, and concurrency. It achieves memory safety without garbage collection through its ownership system. The key features include zero-cost abstractions, guaranteed memory safety, threads without data races, and minimal runtime.",
822 ),
823 Message::user(
824 "Tell me about ownership in detail. How does it work and what are the rules? I want to understand this core concept thoroughly.",
825 ),
826 Message::assistant(
827 "Ownership is Rust's central feature with three rules: each value has one owner, only one owner at a time, and the value is dropped when owner goes out of scope. This system prevents memory leaks, double frees, and dangling pointers at compile time.",
828 ),
829 Message::user("What about borrowing?"), Message::assistant("Borrowing allows references to data without taking ownership."), ];
832
833 let result = compactor.compact_history(messages).await?;
834
835 assert_eq!(result.new_count, 4);
837 assert_eq!(result.original_count, 6);
838
839 assert!(
841 result.new_tokens < result.original_tokens,
842 "Expected fewer tokens after compaction: new={} < original={}",
843 result.new_tokens,
844 result.original_tokens
845 );
846
847 if let Content::Text(text) = &result.messages[0].content {
849 assert!(text.contains("Previous conversation summary"));
850 }
851
852 Ok(())
853 }
854
855 #[tokio::test]
856 async fn test_compact_history_too_few_messages() -> Result<()> {
857 let provider = Arc::new(MockProvider::new("summary"));
858 let config = CompactionConfig::default().with_retain_recent(5);
859 let compactor = LlmContextCompactor::new(provider, config);
860
861 let messages = vec![
863 Message::user("Hello"),
864 Message::assistant("Hi"),
865 Message::user("Bye"),
866 ];
867
868 let result = compactor.compact_history(messages.clone()).await?;
869
870 assert_eq!(result.new_count, 3);
872 assert_eq!(result.messages.len(), 3);
873
874 Ok(())
875 }
876
877 #[test]
878 fn test_format_messages_for_summary() {
879 let messages = vec![Message::user("Hello"), Message::assistant("Hi there!")];
880
881 let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
882
883 assert!(formatted.contains("User: Hello"));
884 assert!(formatted.contains("Assistant: Hi there!"));
885 }
886
887 #[test]
888 fn test_format_messages_for_summary_truncates_tool_results_unicode_safely() {
889 let long_unicode = "é".repeat(600);
890
891 let messages = vec![Message {
892 role: Role::Assistant,
893 content: Content::Blocks(vec![ContentBlock::ToolResult {
894 tool_use_id: "tool-1".to_string(),
895 content: long_unicode,
896 is_error: Some(false),
897 }]),
898 }];
899
900 let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
901
902 assert!(formatted.contains("... (truncated)"));
903 }
904
905 #[tokio::test]
906 async fn test_compact_carries_prior_summary_into_request() -> Result<()> {
907 let requests = Arc::new(Mutex::new(Vec::new()));
912 let provider = Arc::new(MockProvider::new_with_request_log(
913 "Fresh summary",
914 requests.clone(),
915 ));
916 let config = CompactionConfig::default().with_min_messages(1);
917 let compactor = LlmContextCompactor::new(provider, config);
918
919 let messages = vec![
920 Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
921 Message::assistant("Continue with the next task using this context."),
922 ];
923
924 let summary = compactor.compact(&messages).await?;
925
926 let recorded = requests
927 .lock()
928 .map_err(|_| anyhow::anyhow!("request log poisoned"))?;
929 assert_eq!(recorded.len(), 1);
930 assert_eq!(summary, "Fresh summary");
933 assert!(recorded[0].contains("Continue with the next task using this context."));
934 assert!(
935 recorded[0].contains("already compacted context"),
936 "prior summary must be carried into the summarization input"
937 );
938 drop(recorded);
939
940 Ok(())
941 }
942
943 #[tokio::test]
944 async fn test_compact_history_carries_prior_summary_in_candidate_payload() -> Result<()> {
945 let requests = Arc::new(Mutex::new(Vec::new()));
946 let provider = Arc::new(MockProvider::new_with_request_log(
947 "Fresh history summary",
948 requests.clone(),
949 ));
950 let config = CompactionConfig::default()
951 .with_retain_recent(2)
952 .with_min_messages(1);
953 let compactor = LlmContextCompactor::new(provider, config);
954
955 let messages = vec![
956 Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
957 Message::assistant("Current turn content from the latest exchange."),
958 Message::assistant("Recent message that should stay."),
959 Message::user("Newest note that should stay."),
960 ];
961
962 let result = compactor.compact_history(messages).await?;
963
964 let recorded = requests
965 .lock()
966 .map_err(|_| anyhow::anyhow!("request log poisoned"))?;
967 assert_eq!(recorded.len(), 1);
968 assert!(recorded[0].contains("Current turn content from the latest exchange."));
969 assert!(
972 recorded[0].contains("already compacted context"),
973 "prior summary content must reach the summarizer"
974 );
975 drop(recorded);
976 assert_eq!(result.new_count, 4);
977
978 Ok(())
979 }
980
981 #[tokio::test]
982 async fn test_compact_history_carries_summaries_forward_when_window_has_only_summaries()
983 -> Result<()> {
984 let requests = Arc::new(Mutex::new(Vec::new()));
985 let provider = Arc::new(MockProvider::new_with_request_log(
986 "This summary should not be used",
987 requests.clone(),
988 ));
989 let config = CompactionConfig::default()
990 .with_retain_recent(2)
991 .with_min_messages(1);
992 let compactor = LlmContextCompactor::new(provider, config);
993
994 let messages = vec![
995 Message::user(format!("{SUMMARY_PREFIX}first prior compacted section")),
996 Message::assistant(format!("{SUMMARY_PREFIX}second prior compacted section")),
997 Message::user(format!("{SUMMARY_PREFIX}third prior compacted section")),
998 Message::assistant("final short note"),
999 ];
1000
1001 let result = compactor.compact_history(messages).await?;
1002
1003 let recorded = requests
1007 .lock()
1008 .map_err(|_| anyhow::anyhow!("request log poisoned"))?;
1009 assert!(recorded.is_empty());
1010 drop(recorded);
1011 assert_eq!(result.new_count, 4);
1012 assert_eq!(result.messages.len(), 4);
1013
1014 if let Content::Text(text) = &result.messages[0].content {
1015 assert!(
1016 text.contains("first prior compacted section"),
1017 "first prior summary lost"
1018 );
1019 assert!(
1020 text.contains("second prior compacted section"),
1021 "second prior summary lost"
1022 );
1023 assert!(!text.contains(COMPACT_EMPTY_SUMMARY));
1024 } else {
1025 panic!("Expected summary text in first message");
1026 }
1027
1028 Ok(())
1029 }
1030
1031 #[tokio::test]
1032 async fn test_compact_history_preserves_tool_use_tool_result_pairs() -> Result<()> {
1033 let provider = Arc::new(MockProvider::new("Summary of earlier conversation."));
1034 let config = CompactionConfig::default()
1035 .with_retain_recent(2)
1036 .with_min_messages(3);
1037 let compactor = LlmContextCompactor::new(provider, config);
1038
1039 let messages = vec![
1043 Message::user("What files are in the project?"),
1045 Message::assistant("Let me check that for you."),
1047 Message {
1049 role: Role::Assistant,
1050 content: Content::Blocks(vec![ContentBlock::ToolUse {
1051 id: "tool_1".to_string(),
1052 name: "list_files".to_string(),
1053 input: serde_json::json!({}),
1054 thought_signature: None,
1055 }]),
1056 },
1057 Message {
1059 role: Role::User,
1060 content: Content::Blocks(vec![ContentBlock::ToolResult {
1061 tool_use_id: "tool_1".to_string(),
1062 content: "file1.rs\nfile2.rs".to_string(),
1063 is_error: None,
1064 }]),
1065 },
1066 Message::assistant("The project contains file1.rs and file2.rs."),
1068 ];
1069
1070 let result = compactor.compact_history(messages).await?;
1071
1072 assert_eq!(result.new_count, 5);
1076
1077 let kept_assistant = &result.messages[2];
1080 if let Content::Blocks(blocks) = &kept_assistant.content {
1081 assert!(
1082 blocks
1083 .iter()
1084 .any(|b| matches!(b, ContentBlock::ToolUse { .. })),
1085 "Expected assistant tool_use in kept messages"
1086 );
1087 } else {
1088 panic!("Expected Blocks content for assistant tool_use message");
1089 }
1090
1091 let kept_user = &result.messages[3];
1093 if let Content::Blocks(blocks) = &kept_user.content {
1094 assert!(
1095 blocks
1096 .iter()
1097 .any(|b| matches!(b, ContentBlock::ToolResult { .. })),
1098 "Expected user tool_result in kept messages"
1099 );
1100 } else {
1101 panic!("Expected Blocks content for user tool_result message");
1102 }
1103
1104 Ok(())
1105 }
1106
1107 #[tokio::test]
1108 async fn test_compact_history_split_skips_leading_orphan_after_summary_ack() -> Result<()> {
1109 let provider = Arc::new(MockProvider::new("Re-summary."));
1137 let config = CompactionConfig::default()
1138 .with_retain_recent(3)
1139 .with_min_messages(1);
1140 let compactor = LlmContextCompactor::new(provider, config);
1141
1142 let messages = vec![
1143 Message::user(format!("{SUMMARY_PREFIX}Old summary about toolu_X.")),
1144 Message::assistant(SUMMARY_ACKNOWLEDGMENT),
1145 Message {
1146 role: Role::User,
1147 content: Content::Blocks(vec![ContentBlock::ToolResult {
1148 tool_use_id: "toolu_X".to_string(),
1149 content: "result for X".to_string(),
1150 is_error: None,
1151 }]),
1152 },
1153 Message::assistant("Result interpreted."),
1154 Message::user("Now what?"),
1155 ];
1156
1157 let result = compactor.compact_history(messages).await?;
1158
1159 let mut seen_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
1160 for msg in &result.messages {
1161 if let Content::Blocks(blocks) = &msg.content {
1162 for block in blocks {
1163 match block {
1164 ContentBlock::ToolResult { tool_use_id, .. } => {
1165 assert!(
1166 seen_ids.contains(tool_use_id),
1167 "orphan tool_use_id {tool_use_id} survived split selection",
1168 );
1169 }
1170 ContentBlock::ToolUse { id, .. } => {
1171 seen_ids.insert(id.clone());
1172 }
1173 _ => {}
1174 }
1175 }
1176 }
1177 }
1178
1179 Ok(())
1180 }
1181
1182 #[tokio::test]
1183 async fn test_compact_history_keeps_tool_pair_when_immediate_prev_is_text_only() -> Result<()> {
1184 let provider = Arc::new(MockProvider::new("Boundary summary."));
1191 let config = CompactionConfig::default()
1192 .with_retain_recent(2)
1193 .with_min_messages(1);
1194 let compactor = LlmContextCompactor::new(provider, config);
1195
1196 let messages = vec![
1209 Message::user("first turn"),
1210 Message::assistant("text only"),
1211 Message {
1212 role: Role::User,
1213 content: Content::Blocks(vec![ContentBlock::ToolResult {
1214 tool_use_id: "toolu_Y".to_string(),
1215 content: "ancient result".to_string(),
1216 is_error: None,
1217 }]),
1218 },
1219 Message::assistant("then a reply"),
1220 Message::user("ok thanks"),
1221 ];
1222
1223 let result = compactor.compact_history(messages).await?;
1224
1225 let has_tool_result = result.messages.iter().any(|m| {
1229 matches!(
1230 &m.content,
1231 Content::Blocks(blocks)
1232 if blocks.iter().any(|b| matches!(b, ContentBlock::ToolResult { .. }))
1233 )
1234 });
1235 assert!(
1236 !has_tool_result,
1237 "orphan tool_result should have been pushed into to_summarize, not retained",
1238 );
1239
1240 Ok(())
1241 }
1242
1243 #[tokio::test]
1244 async fn test_compact_history_retained_tail_is_token_capped() -> Result<()> {
1245 let provider = Arc::new(MockProvider::new(
1246 "Project summary with a long context and technical context.",
1247 ));
1248 let config = CompactionConfig::default()
1249 .with_retain_recent(8)
1250 .with_min_messages(1)
1251 .with_threshold_tokens(1);
1252 let compactor = LlmContextCompactor::new(provider, config);
1253
1254 let mut messages = Vec::new();
1255
1256 messages.extend((0..6).map(|index| Message::user(format!("pre-compaction noise {index}"))));
1258
1259 messages.extend(
1261 (0..8).map(|index| Message::assistant(format!("kept-{index}: {}", "x".repeat(12_000)))),
1262 );
1263
1264 let result = compactor.compact_history(messages).await?;
1265
1266 let retained_tail = &result.messages[2..];
1268 assert!(retained_tail.len() < 8);
1269
1270 let mut latest_index = -1i32;
1271 let mut all_retained = true;
1272 for message in retained_tail {
1273 if let Content::Text(text) = &message.content {
1274 if let Some(number) = text.split(':').next().and_then(|prefix| {
1275 prefix
1276 .strip_prefix("kept-")
1277 .and_then(|rest| rest.parse::<i32>().ok())
1278 }) {
1279 if number >= 0 {
1280 latest_index = latest_index.max(number);
1281 }
1282 } else {
1283 all_retained = false;
1284 }
1285 } else {
1286 all_retained = false;
1287 }
1288 }
1289
1290 assert!(all_retained);
1291 assert_eq!(latest_index, 7);
1292 assert!(
1293 TokenEstimator::estimate_history(retained_tail)
1294 <= compactor.config().max_retained_tail_tokens
1295 );
1296 assert!(compactor.needs_compaction(&result.messages));
1297
1298 Ok(())
1299 }
1300
1301 #[tokio::test]
1302 async fn test_compact_history_skips_summary_ack_when_retained_tail_is_empty() -> Result<()> {
1303 let provider = Arc::new(MockProvider::new("Summary for oversized user turn."));
1304 let config = CompactionConfig::default()
1305 .with_retain_recent(1)
1306 .with_min_messages(1)
1307 .with_threshold_tokens(1);
1308 let compactor = LlmContextCompactor::new(provider, config);
1309
1310 let messages = vec![
1311 Message::assistant("Earlier assistant context."),
1312 Message::user(format!("oversized-user-turn: {}", "x".repeat(200_000))),
1313 ];
1314
1315 let result = compactor.compact_history(messages).await?;
1316
1317 assert_eq!(result.new_count, 1);
1318 assert_eq!(result.messages.len(), 1);
1319
1320 let only_message = &result.messages[0];
1321 assert_eq!(only_message.role, Role::User);
1322
1323 if let Content::Text(text) = &only_message.content {
1324 assert!(text.contains("Previous conversation summary"));
1325 assert!(!text.contains(SUMMARY_ACKNOWLEDGMENT));
1326 } else {
1327 panic!("Expected summary text when retained tail is empty");
1328 }
1329
1330 Ok(())
1331 }
1332
1333 fn message_contains(message: &Message, needle: &str) -> bool {
1334 match &message.content {
1335 Content::Text(text) => text.contains(needle),
1336 Content::Blocks(blocks) => blocks.iter().any(|block| match block {
1337 ContentBlock::Text { text } => text.contains(needle),
1338 _ => false,
1339 }),
1340 }
1341 }
1342
1343 #[tokio::test]
1344 async fn test_epoch_one_facts_survive_two_compactions() -> Result<()> {
1345 const EPOCH1_FACT: &str = "EPOCH1_FACT: the API key lives in config/secrets.toml";
1351
1352 let requests = Arc::new(Mutex::new(Vec::new()));
1353 let provider = Arc::new(MockProvider::new_echo(requests.clone()));
1354 let config = CompactionConfig::default()
1355 .with_retain_recent(2)
1356 .with_min_messages(1);
1357 let compactor = LlmContextCompactor::new(provider, config);
1358
1359 let epoch1 = vec![
1360 Message::user(EPOCH1_FACT),
1361 Message::assistant("Understood, noted the secrets path."),
1362 Message::user("Now add error handling to main.rs."),
1363 Message::assistant("Added error handling to main.rs."),
1364 Message::user("latest user message one"),
1365 Message::assistant("latest assistant message two"),
1366 ];
1367
1368 let first = compactor.compact_history(epoch1).await?;
1369 assert!(
1370 first
1371 .messages
1372 .iter()
1373 .any(|m| message_contains(m, "EPOCH1_FACT")),
1374 "epoch-1 fact must be captured in the first summary"
1375 );
1376
1377 let mut epoch2 = first.messages;
1379 epoch2.push(Message::user("Another later turn."));
1380 epoch2.push(Message::assistant("Reply to the later turn."));
1381 epoch2.push(Message::user("Final turn a."));
1382 epoch2.push(Message::assistant("Final turn b."));
1383
1384 let second = compactor.compact_history(epoch2).await?;
1385
1386 assert!(
1387 second
1388 .messages
1389 .iter()
1390 .any(|m| message_contains(m, "EPOCH1_FACT")),
1391 "epoch-1 fact must survive the second compaction"
1392 );
1393
1394 let recorded = requests
1397 .lock()
1398 .map_err(|_| anyhow::anyhow!("request log poisoned"))?;
1399 assert!(
1400 recorded.iter().any(|req| req.contains("EPOCH1_FACT")),
1401 "prior summary carrying the epoch-1 fact must reach the summarizer"
1402 );
1403 drop(recorded);
1404
1405 Ok(())
1406 }
1407
1408 #[tokio::test]
1409 async fn test_compact_history_long_tool_chain_respects_token_cap() -> Result<()> {
1410 let provider = Arc::new(MockProvider::new("Summary of the early tool chain."));
1416 let cap = 20_000;
1417 let config = CompactionConfig::default()
1420 .with_retain_recent(18)
1421 .with_min_messages(1)
1422 .with_threshold_tokens(1)
1423 .with_max_retained_tail_tokens(cap);
1424 let compactor = LlmContextCompactor::new(provider, config);
1425
1426 let mut messages = Vec::new();
1429 for i in 0..10 {
1430 messages.push(Message {
1431 role: Role::Assistant,
1432 content: Content::Blocks(vec![ContentBlock::ToolUse {
1433 id: format!("tool_{i}"),
1434 name: "run".to_string(),
1435 input: serde_json::json!({ "arg": "y".repeat(12_000) }),
1436 thought_signature: None,
1437 }]),
1438 });
1439 messages.push(Message {
1440 role: Role::User,
1441 content: Content::Blocks(vec![ContentBlock::ToolResult {
1442 tool_use_id: format!("tool_{i}"),
1443 content: format!("result-{i}: {}", "z".repeat(12_000)),
1444 is_error: None,
1445 }]),
1446 });
1447 }
1448
1449 let full_tokens = TokenEstimator::estimate_history(&messages);
1450 assert!(
1451 full_tokens > cap * 2,
1452 "test setup: full chain must far exceed the cap"
1453 );
1454
1455 let result = compactor.compact_history(messages).await?;
1456
1457 let retained_tail = &result.messages[2..];
1460
1461 let tail_tokens = TokenEstimator::estimate_history(retained_tail);
1462 assert!(
1465 tail_tokens <= cap + 8_000,
1466 "retained tail {tail_tokens} should be bounded by the cap {cap}, not the whole chain"
1467 );
1468 assert!(
1469 retained_tail.len() < 20,
1470 "compaction must have summarized part of the chain"
1471 );
1472
1473 Ok(())
1474 }
1475
1476 #[tokio::test]
1477 async fn test_compact_warns_and_marks_truncated_summary() -> Result<()> {
1478 let requests = Arc::new(Mutex::new(Vec::new()));
1483 let provider = Arc::new(MockProvider::new_truncating(
1484 "partial summary cut off mid-",
1485 requests.clone(),
1486 ));
1487 let config = CompactionConfig::default().with_min_messages(1);
1488 let compactor = LlmContextCompactor::new(provider, config);
1489
1490 let messages = vec![
1491 Message::user("Some content that needs summarizing."),
1492 Message::assistant("More content to summarize here."),
1493 ];
1494
1495 let summary = compactor.compact(&messages).await?;
1496
1497 assert!(
1498 summary.contains("[summary truncated"),
1499 "a persistently truncated summary must carry a truncation marker"
1500 );
1501
1502 let recorded = requests
1504 .lock()
1505 .map_err(|_| anyhow::anyhow!("request log poisoned"))?;
1506 assert_eq!(recorded.len(), 2, "truncation should trigger one retry");
1507 drop(recorded);
1508
1509 Ok(())
1510 }
1511}