1use crate::llm::{
4 ChatOutcome, ChatRequest, Content, ContentBlock, LlmProvider, Message, Role, StopReason,
5};
6use anyhow::{Context, Result, bail};
7use async_trait::async_trait;
8use std::fmt::Write;
9use std::sync::Arc;
10
11use super::config::CompactionConfig;
12use super::estimator::TokenEstimator;
13
14const SUMMARY_PREFIX: &str = "[Previous conversation summary]\n\n";
15const COMPACTION_SYSTEM_PROMPT: &str = "You are a precise summarizer. Your task is to create concise but complete summaries of conversations, preserving all technical details needed to continue the work.";
16const COMPACTION_SUMMARY_PROMPT_PREFIX: &str = "Summarize this conversation concisely, preserving:\n- Key decisions and conclusions reached\n- Important file paths, code changes, and technical details\n- Current task context and what has been accomplished\n- Any pending items, errors encountered, or next steps\n\nBe specific about technical details (file names, function names, error messages) as these\nare critical for continuing the work.\n\nConversation:\n";
17const COMPACTION_SUMMARY_PROMPT_SUFFIX: &str =
18 "Provide a concise summary (aim for 500-1000 words):";
19const COMPACT_EMPTY_SUMMARY: &str = "No additional context was available to summarize; the previous messages were already compacted.";
20const SUMMARY_ACKNOWLEDGMENT: &str =
21 "I understand the context from the summary. Let me continue from where we left off.";
22const MAX_TOOL_RESULT_CHARS: usize = 500;
23const TRUNCATED_SUMMARY_MARKER: &str =
24 "\n\n[summary truncated: exceeded the configured summary_max_tokens budget]";
25
26#[async_trait]
30pub trait ContextCompactor: Send + Sync {
31 async fn compact(&self, messages: &[Message]) -> Result<String>;
36
37 fn estimate_tokens(&self, messages: &[Message]) -> usize;
39
40 fn needs_compaction(&self, messages: &[Message]) -> bool;
42
43 async fn compact_history(&self, messages: Vec<Message>) -> Result<CompactionResult>;
48}
49
50#[derive(Debug, Clone)]
52pub struct CompactionResult {
53 pub messages: Vec<Message>,
55 pub original_count: usize,
57 pub new_count: usize,
59 pub original_tokens: usize,
61 pub new_tokens: usize,
63}
64
65pub struct LlmContextCompactor<P: LlmProvider + ?Sized> {
76 provider: Arc<P>,
77 config: CompactionConfig,
78 system_prompt: String,
79 summary_prompt_prefix: String,
80 summary_prompt_suffix: String,
81}
82
83impl<P: LlmProvider + ?Sized> LlmContextCompactor<P> {
84 #[must_use]
86 pub fn new(provider: Arc<P>, config: CompactionConfig) -> Self {
87 Self {
88 provider,
89 config,
90 system_prompt: COMPACTION_SYSTEM_PROMPT.to_string(),
91 summary_prompt_prefix: COMPACTION_SUMMARY_PROMPT_PREFIX.to_string(),
92 summary_prompt_suffix: COMPACTION_SUMMARY_PROMPT_SUFFIX.to_string(),
93 }
94 }
95
96 #[must_use]
98 pub fn with_defaults(provider: Arc<P>) -> Self {
99 Self::new(provider, CompactionConfig::default())
100 }
101
102 #[must_use]
104 pub const fn config(&self) -> &CompactionConfig {
105 &self.config
106 }
107
108 #[must_use]
110 pub fn with_prompts(
111 mut self,
112 system_prompt: impl Into<String>,
113 summary_prompt_prefix: impl Into<String>,
114 summary_prompt_suffix: impl Into<String>,
115 ) -> Self {
116 self.system_prompt = system_prompt.into();
117 self.summary_prompt_prefix = summary_prompt_prefix.into();
118 self.summary_prompt_suffix = summary_prompt_suffix.into();
119 self
120 }
121
122 fn extract_summary_text(content: &Content) -> Option<String> {
132 match content {
133 Content::Text(text) => text.strip_prefix(SUMMARY_PREFIX).map(str::to_string),
134 Content::Blocks(blocks) => blocks.iter().find_map(|block| match block {
135 ContentBlock::Text { text } => {
136 text.strip_prefix(SUMMARY_PREFIX).map(str::to_string)
137 }
138 _ => None,
139 }),
140 }
141 }
142
143 fn has_tool_use(content: &Content) -> bool {
145 matches!(
146 content,
147 Content::Blocks(blocks)
148 if blocks
149 .iter()
150 .any(|block| matches!(block, ContentBlock::ToolUse { .. }))
151 )
152 }
153
154 fn has_tool_result(content: &Content) -> bool {
156 matches!(
157 content,
158 Content::Blocks(blocks)
159 if blocks
160 .iter()
161 .any(|block| matches!(block, ContentBlock::ToolResult { .. }))
162 )
163 }
164
165 fn split_point_preserves_tool_pairs(messages: &[Message], mut split_point: usize) -> usize {
178 while split_point > 0 && split_point < messages.len() {
179 let prev = &messages[split_point - 1];
180 let next = &messages[split_point];
181
182 let crosses_tool_pair = prev.role == Role::Assistant
183 && Self::has_tool_use(&prev.content)
184 && next.role == Role::User
185 && Self::has_tool_result(&next.content);
186
187 if crosses_tool_pair {
188 split_point -= 1;
189 continue;
190 }
191
192 break;
193 }
194
195 split_point
196 }
197
198 fn split_point_preserves_tool_pairs_with_cap(
237 messages: &[Message],
238 split_point: usize,
239 max_tokens: usize,
240 ) -> usize {
241 let cap_limit = Self::retain_tail_with_token_cap(messages, split_point, max_tokens);
242 let pair_safe = Self::split_point_preserves_tool_pairs(messages, cap_limit);
243 Self::split_point_skips_leading_orphan(messages, pair_safe)
244 }
245
246 fn split_point_skips_leading_orphan(messages: &[Message], mut split_point: usize) -> usize {
263 while split_point < messages.len() {
264 if Self::leading_message_has_orphan_tool_result(&messages[split_point..]) {
265 split_point = split_point.saturating_add(1);
266 continue;
267 }
268 break;
269 }
270 split_point
271 }
272
273 fn leading_message_has_orphan_tool_result(to_keep: &[Message]) -> bool {
281 let Some(first) = to_keep.first() else {
282 return false;
283 };
284 let Content::Blocks(blocks) = &first.content else {
285 return false;
286 };
287
288 let mut needed: Vec<&str> = Vec::new();
292 for block in blocks {
293 if let ContentBlock::ToolResult { tool_use_id, .. } = block {
294 needed.push(tool_use_id.as_str());
295 }
296 }
297 if needed.is_empty() {
298 return false;
299 }
300
301 let known_ids: std::collections::HashSet<&str> = to_keep
303 .iter()
304 .flat_map(|message| match &message.content {
305 Content::Blocks(blocks) => blocks
306 .iter()
307 .filter_map(|block| match block {
308 ContentBlock::ToolUse { id, .. } => Some(id.as_str()),
309 _ => None,
310 })
311 .collect::<Vec<_>>(),
312 Content::Text(_) => Vec::new(),
313 })
314 .collect();
315
316 needed.iter().any(|id| !known_ids.contains(id))
317 }
318
319 fn retain_tail_with_token_cap(messages: &[Message], start: usize, max_tokens: usize) -> usize {
321 if start >= messages.len() {
322 return messages.len();
323 }
324
325 if max_tokens == 0 {
326 return messages.len();
327 }
328
329 let mut used = 0usize;
330 let mut retained_start = messages.len();
331
332 for idx in (start..messages.len()).rev() {
333 let message_tokens = TokenEstimator::estimate_message(&messages[idx]);
334 if used + message_tokens > max_tokens {
335 break;
336 }
337
338 retained_start = idx;
339 used += message_tokens;
340 }
341
342 retained_start
343 }
344
345 fn format_messages_for_summary<'a>(messages: impl IntoIterator<Item = &'a Message>) -> String {
350 let mut output = String::new();
351
352 for message in messages {
353 let role = match message.role {
354 Role::User => "User",
355 Role::Assistant => "Assistant",
356 };
357
358 let _ = write!(output, "{role}: ");
359
360 match &message.content {
361 Content::Text(text) => {
362 let _ = writeln!(output, "{text}");
363 }
364 Content::Blocks(blocks) => {
365 for block in blocks {
366 match block {
367 ContentBlock::Text { text } => {
368 let _ = writeln!(output, "{text}");
369 }
370 ContentBlock::Thinking { thinking, .. } => {
371 let _ = writeln!(output, "[Thinking: {thinking}]");
373 }
374 ContentBlock::RedactedThinking { .. } => {
375 let _ = writeln!(output, "[Redacted thinking]");
376 }
377 ContentBlock::ToolUse { name, input, .. } => {
378 let _ = writeln!(
379 output,
380 "[Called tool: {name} with input: {}]",
381 serde_json::to_string(input).unwrap_or_default()
382 );
383 }
384 ContentBlock::ToolResult {
385 content, is_error, ..
386 } => {
387 let status = if is_error.unwrap_or(false) {
388 "error"
389 } else {
390 "success"
391 };
392 let truncated = if content.chars().count() > MAX_TOOL_RESULT_CHARS {
394 let prefix: String =
395 content.chars().take(MAX_TOOL_RESULT_CHARS).collect();
396 format!("{prefix}... (truncated)")
397 } else {
398 content.clone()
399 };
400 let _ = writeln!(output, "[Tool result ({status}): {truncated}]");
401 }
402 ContentBlock::Image { source } => {
403 let _ = writeln!(output, "[Image: {}]", source.media_type);
404 }
405 ContentBlock::Document { source } => {
406 let _ = writeln!(output, "[Document: {}]", source.media_type);
407 }
408 _ => {
411 let _ = writeln!(output, "[Unrecognized content block]");
412 }
413 }
414 }
415 }
416 }
417 output.push('\n');
418 }
419
420 output
421 }
422
423 fn build_summary_prompt(&self, prior_summaries: &[String], messages_text: &str) -> String {
430 let base = format!(
431 "{}{}{}",
432 self.summary_prompt_prefix, messages_text, self.summary_prompt_suffix
433 );
434
435 if prior_summaries.is_empty() {
436 return base;
437 }
438
439 let prior = prior_summaries.join("\n\n");
440 format!(
441 "Previous summary of earlier conversation. Preserve every fact below \
442 in your new summary so no earlier context is lost:\n{prior}\n\n{base}"
443 )
444 }
445
446 async fn run_summarization(&self, prompt: String, max_tokens: usize) -> Result<(String, bool)> {
449 let request = ChatRequest {
450 system: self.system_prompt.clone(),
451 messages: vec![Message::user(prompt)],
452 tools: None,
453 max_tokens: u32::try_from(max_tokens).unwrap_or(u32::MAX),
454 max_tokens_explicit: true,
455 session_id: None,
456 cached_content: None,
457 thinking: None,
458 tool_choice: None,
459 response_format: None,
460 };
461
462 let outcome = self
463 .provider
464 .chat(request)
465 .await
466 .context("Failed to call LLM for summarization")?;
467
468 match outcome {
469 ChatOutcome::Success(response) => {
470 let truncated = response.stop_reason == Some(StopReason::MaxTokens);
471 let text = response
472 .first_text()
473 .map(String::from)
474 .context("No text in summarization response")?;
475 Ok((text, truncated))
476 }
477 ChatOutcome::RateLimited => {
478 bail!("Rate limited during summarization")
479 }
480 ChatOutcome::InvalidRequest(msg) => {
481 bail!("Invalid request during summarization: {msg}")
482 }
483 ChatOutcome::ServerError(msg) => {
484 bail!("Server error during summarization: {msg}")
485 }
486 _ => {
489 bail!("Unrecognized provider outcome during summarization")
490 }
491 }
492 }
493}
494
495#[async_trait]
496impl<P: LlmProvider + ?Sized> ContextCompactor for LlmContextCompactor<P> {
497 async fn compact(&self, messages: &[Message]) -> Result<String> {
498 let mut prior_summaries: Vec<String> = Vec::new();
503 let mut fresh: Vec<&Message> = Vec::new();
504 for message in messages {
505 if let Some(text) = Self::extract_summary_text(&message.content) {
506 if !text.is_empty() {
507 prior_summaries.push(text);
508 }
509 } else {
510 fresh.push(message);
511 }
512 }
513
514 if fresh.is_empty() {
517 if prior_summaries.is_empty() {
518 return Ok(COMPACT_EMPTY_SUMMARY.to_string());
519 }
520 return Ok(prior_summaries.join("\n\n"));
521 }
522
523 let messages_text = Self::format_messages_for_summary(fresh.iter().copied());
524 let prompt = self.build_summary_prompt(&prior_summaries, &messages_text);
525
526 let budget = self.config.summary_max_tokens;
527 let (mut summary, truncated) = self.run_summarization(prompt.clone(), budget).await?;
528
529 if truncated {
530 log::warn!(
531 "compaction summary hit the max_tokens budget ({budget}); \
532 retrying with a larger budget to avoid silent context loss"
533 );
534 let (retry_summary, still_truncated) = self
535 .run_summarization(prompt, budget.saturating_mul(2))
536 .await?;
537 summary = retry_summary;
538 if still_truncated {
539 log::warn!(
540 "compaction summary still truncated after retry; appending a \
541 truncation marker so downstream context loss is visible"
542 );
543 summary.push_str(TRUNCATED_SUMMARY_MARKER);
544 }
545 }
546
547 Ok(summary)
548 }
549
550 fn estimate_tokens(&self, messages: &[Message]) -> usize {
551 TokenEstimator::estimate_history(messages)
552 }
553
554 fn needs_compaction(&self, messages: &[Message]) -> bool {
555 if !self.config.auto_compact {
556 return false;
557 }
558
559 if messages.len() < self.config.min_messages_for_compaction {
560 return false;
561 }
562
563 let estimated_tokens = self.estimate_tokens(messages);
564 estimated_tokens > self.config.threshold_tokens
565 }
566
567 async fn compact_history(&self, mut messages: Vec<Message>) -> Result<CompactionResult> {
568 let original_count = messages.len();
569 let original_tokens = self.estimate_tokens(&messages);
570
571 if messages.len() <= self.config.retain_recent {
573 return Ok(CompactionResult {
574 messages,
575 original_count,
576 new_count: original_count,
577 original_tokens,
578 new_tokens: original_tokens,
579 });
580 }
581
582 let mut split_point = messages.len().saturating_sub(self.config.retain_recent);
584 split_point = Self::split_point_preserves_tool_pairs_with_cap(
585 &messages,
586 split_point,
587 self.config.max_retained_tail_tokens,
588 );
589
590 let to_keep = messages.split_off(split_point);
593 let to_summarize = messages;
594
595 let summary = self.compact(&to_summarize).await?;
597
598 let mut new_messages = Vec::with_capacity(2 + to_keep.len());
600
601 new_messages.push(Message::user(format!("{SUMMARY_PREFIX}{summary}")));
603
604 if !to_keep.is_empty() {
609 new_messages.push(Message::assistant(SUMMARY_ACKNOWLEDGMENT));
610 }
611
612 new_messages.extend(to_keep);
620
621 let new_count = new_messages.len();
622 let new_tokens = self.estimate_tokens(&new_messages);
623
624 Ok(CompactionResult {
625 messages: new_messages,
626 original_count,
627 new_count,
628 original_tokens,
629 new_tokens,
630 })
631 }
632}
633
634#[cfg(test)]
635mod tests {
636 use super::*;
637 use crate::llm::{ChatResponse, StopReason, Usage};
638 use std::sync::Mutex;
639
640 struct MockProvider {
641 summary_response: String,
642 requests: Arc<Mutex<Vec<String>>>,
643 echo_input: bool,
646 stop_reason: StopReason,
648 }
649
650 impl MockProvider {
651 fn build(
652 summary: &str,
653 requests: Arc<Mutex<Vec<String>>>,
654 echo_input: bool,
655 stop_reason: StopReason,
656 ) -> Self {
657 Self {
658 summary_response: summary.to_string(),
659 requests,
660 echo_input,
661 stop_reason,
662 }
663 }
664
665 fn new(summary: &str) -> Self {
666 Self::build(
667 summary,
668 Arc::new(Mutex::new(Vec::new())),
669 false,
670 StopReason::EndTurn,
671 )
672 }
673
674 fn new_with_request_log(summary: &str, requests: Arc<Mutex<Vec<String>>>) -> Self {
675 Self::build(summary, requests, false, StopReason::EndTurn)
676 }
677
678 fn new_echo(requests: Arc<Mutex<Vec<String>>>) -> Self {
680 Self::build("", requests, true, StopReason::EndTurn)
681 }
682
683 fn new_truncating(summary: &str, requests: Arc<Mutex<Vec<String>>>) -> Self {
685 Self::build(summary, requests, false, StopReason::MaxTokens)
686 }
687
688 fn user_prompt_of(request: &ChatRequest) -> String {
689 request
690 .messages
691 .iter()
692 .find_map(|message| match &message.content {
693 Content::Text(text) => Some(text.clone()),
694 Content::Blocks(blocks) => {
695 let text = blocks
696 .iter()
697 .filter_map(|block| {
698 if let ContentBlock::Text { text } = block {
699 Some(text.as_str())
700 } else {
701 None
702 }
703 })
704 .collect::<Vec<_>>()
705 .join("\n");
706 if text.is_empty() { None } else { Some(text) }
707 }
708 })
709 .unwrap_or_default()
710 }
711 }
712
713 #[async_trait]
714 impl LlmProvider for MockProvider {
715 async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
716 let user_prompt = Self::user_prompt_of(&request);
717 if let Ok(mut entries) = self.requests.lock() {
718 entries.push(user_prompt.clone());
719 }
720 let text = if self.echo_input {
721 user_prompt
722 } else {
723 self.summary_response.clone()
724 };
725 Ok(ChatOutcome::Success(ChatResponse {
726 id: "test".to_string(),
727 content: vec![ContentBlock::Text { text }],
728 model: "mock".to_string(),
729 stop_reason: Some(self.stop_reason),
730 usage: Usage {
731 input_tokens: 100,
732 output_tokens: 50,
733 cached_input_tokens: 0,
734 cache_creation_input_tokens: 0,
735 },
736 }))
737 }
738
739 fn model(&self) -> &'static str {
740 "mock-model"
741 }
742
743 fn provider(&self) -> &'static str {
744 "mock"
745 }
746 }
747
748 #[test]
749 fn test_needs_compaction_below_threshold() {
750 let provider = Arc::new(MockProvider::new("summary"));
751 let config = CompactionConfig::default()
752 .with_threshold_tokens(10_000)
753 .with_min_messages(5);
754 let compactor = LlmContextCompactor::new(provider, config);
755
756 let messages = vec![
758 Message::user("Hello"),
759 Message::assistant("Hi"),
760 Message::user("How are you?"),
761 ];
762
763 assert!(!compactor.needs_compaction(&messages));
764 }
765
766 #[test]
767 fn test_needs_compaction_above_threshold() {
768 let provider = Arc::new(MockProvider::new("summary"));
769 let config = CompactionConfig::default()
770 .with_threshold_tokens(50) .with_min_messages(3);
772 let compactor = LlmContextCompactor::new(provider, config);
773
774 let messages = vec![
776 Message::user("Hello, this is a longer message to test compaction"),
777 Message::assistant(
778 "Hi there! This is also a longer response to help trigger compaction",
779 ),
780 Message::user("Great, let's continue with even more text here"),
781 Message::assistant("Absolutely, adding more content to ensure we exceed the threshold"),
782 ];
783
784 assert!(compactor.needs_compaction(&messages));
785 }
786
787 #[test]
788 fn test_needs_compaction_auto_disabled() {
789 let provider = Arc::new(MockProvider::new("summary"));
790 let config = CompactionConfig::default()
791 .with_threshold_tokens(10) .with_min_messages(1)
793 .with_auto_compact(false);
794 let compactor = LlmContextCompactor::new(provider, config);
795
796 let messages = vec![
797 Message::user("Hello, this is a longer message"),
798 Message::assistant("Response here"),
799 ];
800
801 assert!(!compactor.needs_compaction(&messages));
802 }
803
804 #[tokio::test]
805 async fn test_compact_history() -> Result<()> {
806 let provider = Arc::new(MockProvider::new(
807 "User asked about Rust programming. Assistant explained ownership, borrowing, and lifetimes.",
808 ));
809 let config = CompactionConfig::default()
810 .with_retain_recent(2)
811 .with_min_messages(3);
812 let compactor = LlmContextCompactor::new(provider, config);
813
814 let messages = vec![
816 Message::user(
817 "What is Rust? I've heard it's a systems programming language but I don't know much about it. Can you explain the key features and why people are excited about it?",
818 ),
819 Message::assistant(
820 "Rust is a systems programming language focused on safety, speed, and concurrency. It achieves memory safety without garbage collection through its ownership system. The key features include zero-cost abstractions, guaranteed memory safety, threads without data races, and minimal runtime.",
821 ),
822 Message::user(
823 "Tell me about ownership in detail. How does it work and what are the rules? I want to understand this core concept thoroughly.",
824 ),
825 Message::assistant(
826 "Ownership is Rust's central feature with three rules: each value has one owner, only one owner at a time, and the value is dropped when owner goes out of scope. This system prevents memory leaks, double frees, and dangling pointers at compile time.",
827 ),
828 Message::user("What about borrowing?"), Message::assistant("Borrowing allows references to data without taking ownership."), ];
831
832 let result = compactor.compact_history(messages).await?;
833
834 assert_eq!(result.new_count, 4);
836 assert_eq!(result.original_count, 6);
837
838 assert!(
840 result.new_tokens < result.original_tokens,
841 "Expected fewer tokens after compaction: new={} < original={}",
842 result.new_tokens,
843 result.original_tokens
844 );
845
846 if let Content::Text(text) = &result.messages[0].content {
848 assert!(text.contains("Previous conversation summary"));
849 }
850
851 Ok(())
852 }
853
854 #[tokio::test]
855 async fn test_compact_history_too_few_messages() -> Result<()> {
856 let provider = Arc::new(MockProvider::new("summary"));
857 let config = CompactionConfig::default().with_retain_recent(5);
858 let compactor = LlmContextCompactor::new(provider, config);
859
860 let messages = vec![
862 Message::user("Hello"),
863 Message::assistant("Hi"),
864 Message::user("Bye"),
865 ];
866
867 let result = compactor.compact_history(messages.clone()).await?;
868
869 assert_eq!(result.new_count, 3);
871 assert_eq!(result.messages.len(), 3);
872
873 Ok(())
874 }
875
876 #[test]
877 fn test_format_messages_for_summary() {
878 let messages = vec![Message::user("Hello"), Message::assistant("Hi there!")];
879
880 let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
881
882 assert!(formatted.contains("User: Hello"));
883 assert!(formatted.contains("Assistant: Hi there!"));
884 }
885
886 #[test]
887 fn test_format_messages_for_summary_truncates_tool_results_unicode_safely() {
888 let long_unicode = "é".repeat(600);
889
890 let messages = vec![Message {
891 role: Role::Assistant,
892 content: Content::Blocks(vec![ContentBlock::ToolResult {
893 tool_use_id: "tool-1".to_string(),
894 content: long_unicode,
895 is_error: Some(false),
896 }]),
897 }];
898
899 let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
900
901 assert!(formatted.contains("... (truncated)"));
902 }
903
904 #[tokio::test]
905 async fn test_compact_carries_prior_summary_into_request() -> Result<()> {
906 let requests = Arc::new(Mutex::new(Vec::new()));
911 let provider = Arc::new(MockProvider::new_with_request_log(
912 "Fresh summary",
913 requests.clone(),
914 ));
915 let config = CompactionConfig::default().with_min_messages(1);
916 let compactor = LlmContextCompactor::new(provider, config);
917
918 let messages = vec![
919 Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
920 Message::assistant("Continue with the next task using this context."),
921 ];
922
923 let summary = compactor.compact(&messages).await?;
924
925 let recorded = requests
926 .lock()
927 .map_err(|_| anyhow::anyhow!("request log poisoned"))?;
928 assert_eq!(recorded.len(), 1);
929 assert_eq!(summary, "Fresh summary");
932 assert!(recorded[0].contains("Continue with the next task using this context."));
933 assert!(
934 recorded[0].contains("already compacted context"),
935 "prior summary must be carried into the summarization input"
936 );
937 drop(recorded);
938
939 Ok(())
940 }
941
942 #[tokio::test]
943 async fn test_compact_history_carries_prior_summary_in_candidate_payload() -> Result<()> {
944 let requests = Arc::new(Mutex::new(Vec::new()));
945 let provider = Arc::new(MockProvider::new_with_request_log(
946 "Fresh history summary",
947 requests.clone(),
948 ));
949 let config = CompactionConfig::default()
950 .with_retain_recent(2)
951 .with_min_messages(1);
952 let compactor = LlmContextCompactor::new(provider, config);
953
954 let messages = vec![
955 Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
956 Message::assistant("Current turn content from the latest exchange."),
957 Message::assistant("Recent message that should stay."),
958 Message::user("Newest note that should stay."),
959 ];
960
961 let result = compactor.compact_history(messages).await?;
962
963 let recorded = requests
964 .lock()
965 .map_err(|_| anyhow::anyhow!("request log poisoned"))?;
966 assert_eq!(recorded.len(), 1);
967 assert!(recorded[0].contains("Current turn content from the latest exchange."));
968 assert!(
971 recorded[0].contains("already compacted context"),
972 "prior summary content must reach the summarizer"
973 );
974 drop(recorded);
975 assert_eq!(result.new_count, 4);
976
977 Ok(())
978 }
979
980 #[tokio::test]
981 async fn test_compact_history_carries_summaries_forward_when_window_has_only_summaries()
982 -> Result<()> {
983 let requests = Arc::new(Mutex::new(Vec::new()));
984 let provider = Arc::new(MockProvider::new_with_request_log(
985 "This summary should not be used",
986 requests.clone(),
987 ));
988 let config = CompactionConfig::default()
989 .with_retain_recent(2)
990 .with_min_messages(1);
991 let compactor = LlmContextCompactor::new(provider, config);
992
993 let messages = vec![
994 Message::user(format!("{SUMMARY_PREFIX}first prior compacted section")),
995 Message::assistant(format!("{SUMMARY_PREFIX}second prior compacted section")),
996 Message::user(format!("{SUMMARY_PREFIX}third prior compacted section")),
997 Message::assistant("final short note"),
998 ];
999
1000 let result = compactor.compact_history(messages).await?;
1001
1002 let recorded = requests
1006 .lock()
1007 .map_err(|_| anyhow::anyhow!("request log poisoned"))?;
1008 assert!(recorded.is_empty());
1009 drop(recorded);
1010 assert_eq!(result.new_count, 4);
1011 assert_eq!(result.messages.len(), 4);
1012
1013 if let Content::Text(text) = &result.messages[0].content {
1014 assert!(
1015 text.contains("first prior compacted section"),
1016 "first prior summary lost"
1017 );
1018 assert!(
1019 text.contains("second prior compacted section"),
1020 "second prior summary lost"
1021 );
1022 assert!(!text.contains(COMPACT_EMPTY_SUMMARY));
1023 } else {
1024 panic!("Expected summary text in first message");
1025 }
1026
1027 Ok(())
1028 }
1029
1030 #[tokio::test]
1031 async fn test_compact_history_preserves_tool_use_tool_result_pairs() -> Result<()> {
1032 let provider = Arc::new(MockProvider::new("Summary of earlier conversation."));
1033 let config = CompactionConfig::default()
1034 .with_retain_recent(2)
1035 .with_min_messages(3);
1036 let compactor = LlmContextCompactor::new(provider, config);
1037
1038 let messages = vec![
1042 Message::user("What files are in the project?"),
1044 Message::assistant("Let me check that for you."),
1046 Message {
1048 role: Role::Assistant,
1049 content: Content::Blocks(vec![ContentBlock::ToolUse {
1050 id: "tool_1".to_string(),
1051 name: "list_files".to_string(),
1052 input: serde_json::json!({}),
1053 thought_signature: None,
1054 }]),
1055 },
1056 Message {
1058 role: Role::User,
1059 content: Content::Blocks(vec![ContentBlock::ToolResult {
1060 tool_use_id: "tool_1".to_string(),
1061 content: "file1.rs\nfile2.rs".to_string(),
1062 is_error: None,
1063 }]),
1064 },
1065 Message::assistant("The project contains file1.rs and file2.rs."),
1067 ];
1068
1069 let result = compactor.compact_history(messages).await?;
1070
1071 assert_eq!(result.new_count, 5);
1075
1076 let kept_assistant = &result.messages[2];
1079 if let Content::Blocks(blocks) = &kept_assistant.content {
1080 assert!(
1081 blocks
1082 .iter()
1083 .any(|b| matches!(b, ContentBlock::ToolUse { .. })),
1084 "Expected assistant tool_use in kept messages"
1085 );
1086 } else {
1087 panic!("Expected Blocks content for assistant tool_use message");
1088 }
1089
1090 let kept_user = &result.messages[3];
1092 if let Content::Blocks(blocks) = &kept_user.content {
1093 assert!(
1094 blocks
1095 .iter()
1096 .any(|b| matches!(b, ContentBlock::ToolResult { .. })),
1097 "Expected user tool_result in kept messages"
1098 );
1099 } else {
1100 panic!("Expected Blocks content for user tool_result message");
1101 }
1102
1103 Ok(())
1104 }
1105
1106 #[tokio::test]
1107 async fn test_compact_history_split_skips_leading_orphan_after_summary_ack() -> Result<()> {
1108 let provider = Arc::new(MockProvider::new("Re-summary."));
1136 let config = CompactionConfig::default()
1137 .with_retain_recent(3)
1138 .with_min_messages(1);
1139 let compactor = LlmContextCompactor::new(provider, config);
1140
1141 let messages = vec![
1142 Message::user(format!("{SUMMARY_PREFIX}Old summary about toolu_X.")),
1143 Message::assistant(SUMMARY_ACKNOWLEDGMENT),
1144 Message {
1145 role: Role::User,
1146 content: Content::Blocks(vec![ContentBlock::ToolResult {
1147 tool_use_id: "toolu_X".to_string(),
1148 content: "result for X".to_string(),
1149 is_error: None,
1150 }]),
1151 },
1152 Message::assistant("Result interpreted."),
1153 Message::user("Now what?"),
1154 ];
1155
1156 let result = compactor.compact_history(messages).await?;
1157
1158 let mut seen_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
1159 for msg in &result.messages {
1160 if let Content::Blocks(blocks) = &msg.content {
1161 for block in blocks {
1162 match block {
1163 ContentBlock::ToolResult { tool_use_id, .. } => {
1164 assert!(
1165 seen_ids.contains(tool_use_id),
1166 "orphan tool_use_id {tool_use_id} survived split selection",
1167 );
1168 }
1169 ContentBlock::ToolUse { id, .. } => {
1170 seen_ids.insert(id.clone());
1171 }
1172 _ => {}
1173 }
1174 }
1175 }
1176 }
1177
1178 Ok(())
1179 }
1180
1181 #[tokio::test]
1182 async fn test_compact_history_keeps_tool_pair_when_immediate_prev_is_text_only() -> Result<()> {
1183 let provider = Arc::new(MockProvider::new("Boundary summary."));
1190 let config = CompactionConfig::default()
1191 .with_retain_recent(2)
1192 .with_min_messages(1);
1193 let compactor = LlmContextCompactor::new(provider, config);
1194
1195 let messages = vec![
1208 Message::user("first turn"),
1209 Message::assistant("text only"),
1210 Message {
1211 role: Role::User,
1212 content: Content::Blocks(vec![ContentBlock::ToolResult {
1213 tool_use_id: "toolu_Y".to_string(),
1214 content: "ancient result".to_string(),
1215 is_error: None,
1216 }]),
1217 },
1218 Message::assistant("then a reply"),
1219 Message::user("ok thanks"),
1220 ];
1221
1222 let result = compactor.compact_history(messages).await?;
1223
1224 let has_tool_result = result.messages.iter().any(|m| {
1228 matches!(
1229 &m.content,
1230 Content::Blocks(blocks)
1231 if blocks.iter().any(|b| matches!(b, ContentBlock::ToolResult { .. }))
1232 )
1233 });
1234 assert!(
1235 !has_tool_result,
1236 "orphan tool_result should have been pushed into to_summarize, not retained",
1237 );
1238
1239 Ok(())
1240 }
1241
1242 #[tokio::test]
1243 async fn test_compact_history_retained_tail_is_token_capped() -> Result<()> {
1244 let provider = Arc::new(MockProvider::new(
1245 "Project summary with a long context and technical context.",
1246 ));
1247 let config = CompactionConfig::default()
1248 .with_retain_recent(8)
1249 .with_min_messages(1)
1250 .with_threshold_tokens(1);
1251 let compactor = LlmContextCompactor::new(provider, config);
1252
1253 let mut messages = Vec::new();
1254
1255 messages.extend((0..6).map(|index| Message::user(format!("pre-compaction noise {index}"))));
1257
1258 messages.extend(
1260 (0..8).map(|index| Message::assistant(format!("kept-{index}: {}", "x".repeat(12_000)))),
1261 );
1262
1263 let result = compactor.compact_history(messages).await?;
1264
1265 let retained_tail = &result.messages[2..];
1267 assert!(retained_tail.len() < 8);
1268
1269 let mut latest_index = -1i32;
1270 let mut all_retained = true;
1271 for message in retained_tail {
1272 if let Content::Text(text) = &message.content {
1273 if let Some(number) = text.split(':').next().and_then(|prefix| {
1274 prefix
1275 .strip_prefix("kept-")
1276 .and_then(|rest| rest.parse::<i32>().ok())
1277 }) {
1278 if number >= 0 {
1279 latest_index = latest_index.max(number);
1280 }
1281 } else {
1282 all_retained = false;
1283 }
1284 } else {
1285 all_retained = false;
1286 }
1287 }
1288
1289 assert!(all_retained);
1290 assert_eq!(latest_index, 7);
1291 assert!(
1292 TokenEstimator::estimate_history(retained_tail)
1293 <= compactor.config().max_retained_tail_tokens
1294 );
1295 assert!(compactor.needs_compaction(&result.messages));
1296
1297 Ok(())
1298 }
1299
1300 #[tokio::test]
1301 async fn test_compact_history_skips_summary_ack_when_retained_tail_is_empty() -> Result<()> {
1302 let provider = Arc::new(MockProvider::new("Summary for oversized user turn."));
1303 let config = CompactionConfig::default()
1304 .with_retain_recent(1)
1305 .with_min_messages(1)
1306 .with_threshold_tokens(1);
1307 let compactor = LlmContextCompactor::new(provider, config);
1308
1309 let messages = vec![
1310 Message::assistant("Earlier assistant context."),
1311 Message::user(format!("oversized-user-turn: {}", "x".repeat(200_000))),
1312 ];
1313
1314 let result = compactor.compact_history(messages).await?;
1315
1316 assert_eq!(result.new_count, 1);
1317 assert_eq!(result.messages.len(), 1);
1318
1319 let only_message = &result.messages[0];
1320 assert_eq!(only_message.role, Role::User);
1321
1322 if let Content::Text(text) = &only_message.content {
1323 assert!(text.contains("Previous conversation summary"));
1324 assert!(!text.contains(SUMMARY_ACKNOWLEDGMENT));
1325 } else {
1326 panic!("Expected summary text when retained tail is empty");
1327 }
1328
1329 Ok(())
1330 }
1331
1332 fn message_contains(message: &Message, needle: &str) -> bool {
1333 match &message.content {
1334 Content::Text(text) => text.contains(needle),
1335 Content::Blocks(blocks) => blocks.iter().any(|block| match block {
1336 ContentBlock::Text { text } => text.contains(needle),
1337 _ => false,
1338 }),
1339 }
1340 }
1341
1342 #[tokio::test]
1343 async fn test_epoch_one_facts_survive_two_compactions() -> Result<()> {
1344 const EPOCH1_FACT: &str = "EPOCH1_FACT: the API key lives in config/secrets.toml";
1350
1351 let requests = Arc::new(Mutex::new(Vec::new()));
1352 let provider = Arc::new(MockProvider::new_echo(requests.clone()));
1353 let config = CompactionConfig::default()
1354 .with_retain_recent(2)
1355 .with_min_messages(1);
1356 let compactor = LlmContextCompactor::new(provider, config);
1357
1358 let epoch1 = vec![
1359 Message::user(EPOCH1_FACT),
1360 Message::assistant("Understood, noted the secrets path."),
1361 Message::user("Now add error handling to main.rs."),
1362 Message::assistant("Added error handling to main.rs."),
1363 Message::user("latest user message one"),
1364 Message::assistant("latest assistant message two"),
1365 ];
1366
1367 let first = compactor.compact_history(epoch1).await?;
1368 assert!(
1369 first
1370 .messages
1371 .iter()
1372 .any(|m| message_contains(m, "EPOCH1_FACT")),
1373 "epoch-1 fact must be captured in the first summary"
1374 );
1375
1376 let mut epoch2 = first.messages;
1378 epoch2.push(Message::user("Another later turn."));
1379 epoch2.push(Message::assistant("Reply to the later turn."));
1380 epoch2.push(Message::user("Final turn a."));
1381 epoch2.push(Message::assistant("Final turn b."));
1382
1383 let second = compactor.compact_history(epoch2).await?;
1384
1385 assert!(
1386 second
1387 .messages
1388 .iter()
1389 .any(|m| message_contains(m, "EPOCH1_FACT")),
1390 "epoch-1 fact must survive the second compaction"
1391 );
1392
1393 let recorded = requests
1396 .lock()
1397 .map_err(|_| anyhow::anyhow!("request log poisoned"))?;
1398 assert!(
1399 recorded.iter().any(|req| req.contains("EPOCH1_FACT")),
1400 "prior summary carrying the epoch-1 fact must reach the summarizer"
1401 );
1402 drop(recorded);
1403
1404 Ok(())
1405 }
1406
1407 #[tokio::test]
1408 async fn test_compact_history_long_tool_chain_respects_token_cap() -> Result<()> {
1409 let provider = Arc::new(MockProvider::new("Summary of the early tool chain."));
1415 let cap = 20_000;
1416 let config = CompactionConfig::default()
1419 .with_retain_recent(18)
1420 .with_min_messages(1)
1421 .with_threshold_tokens(1)
1422 .with_max_retained_tail_tokens(cap);
1423 let compactor = LlmContextCompactor::new(provider, config);
1424
1425 let mut messages = Vec::new();
1428 for i in 0..10 {
1429 messages.push(Message {
1430 role: Role::Assistant,
1431 content: Content::Blocks(vec![ContentBlock::ToolUse {
1432 id: format!("tool_{i}"),
1433 name: "run".to_string(),
1434 input: serde_json::json!({ "arg": "y".repeat(12_000) }),
1435 thought_signature: None,
1436 }]),
1437 });
1438 messages.push(Message {
1439 role: Role::User,
1440 content: Content::Blocks(vec![ContentBlock::ToolResult {
1441 tool_use_id: format!("tool_{i}"),
1442 content: format!("result-{i}: {}", "z".repeat(12_000)),
1443 is_error: None,
1444 }]),
1445 });
1446 }
1447
1448 let full_tokens = TokenEstimator::estimate_history(&messages);
1449 assert!(
1450 full_tokens > cap * 2,
1451 "test setup: full chain must far exceed the cap"
1452 );
1453
1454 let result = compactor.compact_history(messages).await?;
1455
1456 let retained_tail = &result.messages[2..];
1459
1460 let tail_tokens = TokenEstimator::estimate_history(retained_tail);
1461 assert!(
1464 tail_tokens <= cap + 8_000,
1465 "retained tail {tail_tokens} should be bounded by the cap {cap}, not the whole chain"
1466 );
1467 assert!(
1468 retained_tail.len() < 20,
1469 "compaction must have summarized part of the chain"
1470 );
1471
1472 Ok(())
1473 }
1474
1475 #[tokio::test]
1476 async fn test_compact_warns_and_marks_truncated_summary() -> Result<()> {
1477 let requests = Arc::new(Mutex::new(Vec::new()));
1482 let provider = Arc::new(MockProvider::new_truncating(
1483 "partial summary cut off mid-",
1484 requests.clone(),
1485 ));
1486 let config = CompactionConfig::default().with_min_messages(1);
1487 let compactor = LlmContextCompactor::new(provider, config);
1488
1489 let messages = vec![
1490 Message::user("Some content that needs summarizing."),
1491 Message::assistant("More content to summarize here."),
1492 ];
1493
1494 let summary = compactor.compact(&messages).await?;
1495
1496 assert!(
1497 summary.contains("[summary truncated"),
1498 "a persistently truncated summary must carry a truncation marker"
1499 );
1500
1501 let recorded = requests
1503 .lock()
1504 .map_err(|_| anyhow::anyhow!("request log poisoned"))?;
1505 assert_eq!(recorded.len(), 2, "truncation should trigger one retry");
1506 drop(recorded);
1507
1508 Ok(())
1509 }
1510}