Skip to main content

agent_sdk/context/
compactor.rs

1//! Context compaction implementation.
2
3use crate::llm::{ChatOutcome, ChatRequest, Content, ContentBlock, LlmProvider, Message, Role};
4use anyhow::{Context, Result, bail};
5use async_trait::async_trait;
6use std::fmt::Write;
7use std::sync::Arc;
8
9use super::config::CompactionConfig;
10use super::estimator::TokenEstimator;
11
12const SUMMARY_PREFIX: &str = "[Previous conversation summary]\n\n";
13const COMPACTION_SYSTEM_PROMPT: &str = "You are a precise summarizer. Your task is to create concise but complete summaries of conversations, preserving all technical details needed to continue the work.";
14const COMPACTION_SUMMARY_PROMPT_PREFIX: &str = "Summarize this conversation concisely, preserving:\n- Key decisions and conclusions reached\n- Important file paths, code changes, and technical details\n- Current task context and what has been accomplished\n- Any pending items, errors encountered, or next steps\n\nBe specific about technical details (file names, function names, error messages) as these\nare critical for continuing the work.\n\nConversation:\n";
15const COMPACTION_SUMMARY_PROMPT_SUFFIX: &str =
16    "Provide a concise summary (aim for 500-1000 words):";
17const COMPACT_EMPTY_SUMMARY: &str = "No additional context was available to summarize; the previous messages were already compacted.";
18const SUMMARY_ACKNOWLEDGMENT: &str =
19    "I understand the context from the summary. Let me continue from where we left off.";
20const MAX_RETAINED_TAIL_MESSAGE_TOKENS: usize = 20_000;
21const MAX_TOOL_RESULT_CHARS: usize = 500;
22
23/// Trait for context compaction strategies.
24///
25/// Implement this trait to provide custom compaction logic.
26#[async_trait]
27pub trait ContextCompactor: Send + Sync {
28    /// Compact a list of messages into a summary.
29    ///
30    /// # Errors
31    /// Returns an error if summarization fails.
32    async fn compact(&self, messages: &[Message]) -> Result<String>;
33
34    /// Estimate tokens for a message list.
35    fn estimate_tokens(&self, messages: &[Message]) -> usize;
36
37    /// Check if compaction is needed.
38    fn needs_compaction(&self, messages: &[Message]) -> bool;
39
40    /// Perform full compaction, returning new message history.
41    ///
42    /// # Errors
43    /// Returns an error if compaction fails.
44    async fn compact_history(&self, messages: Vec<Message>) -> Result<CompactionResult>;
45}
46
47/// Result of a compaction operation.
48#[derive(Debug, Clone)]
49pub struct CompactionResult {
50    /// The new compacted message history.
51    pub messages: Vec<Message>,
52    /// Number of messages before compaction.
53    pub original_count: usize,
54    /// Number of messages after compaction.
55    pub new_count: usize,
56    /// Estimated tokens before compaction.
57    pub original_tokens: usize,
58    /// Estimated tokens after compaction.
59    pub new_tokens: usize,
60}
61
62/// LLM-based context compactor.
63///
64/// Uses the LLM itself to summarize older messages into a compact form.
65///
66/// `P` is `?Sized` so callers can hold an `Arc<dyn LlmProvider>` —
67/// useful when the provider is resolved dynamically per-thread (e.g.
68/// inside `agent-server`'s daemon worker, where the same compactor
69/// type wraps whichever concrete provider the host's resolver picks).
70/// Concrete-type users (`Arc<AnthropicProvider>`, etc.) still work
71/// unchanged.
72pub struct LlmContextCompactor<P: LlmProvider + ?Sized> {
73    provider: Arc<P>,
74    config: CompactionConfig,
75    system_prompt: String,
76    summary_prompt_prefix: String,
77    summary_prompt_suffix: String,
78}
79
80impl<P: LlmProvider + ?Sized> LlmContextCompactor<P> {
81    /// Create a new LLM context compactor.
82    #[must_use]
83    pub fn new(provider: Arc<P>, config: CompactionConfig) -> Self {
84        Self {
85            provider,
86            config,
87            system_prompt: COMPACTION_SYSTEM_PROMPT.to_string(),
88            summary_prompt_prefix: COMPACTION_SUMMARY_PROMPT_PREFIX.to_string(),
89            summary_prompt_suffix: COMPACTION_SUMMARY_PROMPT_SUFFIX.to_string(),
90        }
91    }
92
93    /// Create with default configuration.
94    #[must_use]
95    pub fn with_defaults(provider: Arc<P>) -> Self {
96        Self::new(provider, CompactionConfig::default())
97    }
98
99    /// Get the configuration.
100    #[must_use]
101    pub const fn config(&self) -> &CompactionConfig {
102        &self.config
103    }
104
105    /// Override the prompts used for LLM-based summarization.
106    #[must_use]
107    pub fn with_prompts(
108        mut self,
109        system_prompt: impl Into<String>,
110        summary_prompt_prefix: impl Into<String>,
111        summary_prompt_suffix: impl Into<String>,
112    ) -> Self {
113        self.system_prompt = system_prompt.into();
114        self.summary_prompt_prefix = summary_prompt_prefix.into();
115        self.summary_prompt_suffix = summary_prompt_suffix.into();
116        self
117    }
118
119    /// Return true when a content object is a previously inserted compaction summary marker.
120    fn is_summary_message(content: &Content) -> bool {
121        match content {
122            Content::Text(text) => text.starts_with(SUMMARY_PREFIX),
123            Content::Blocks(blocks) => blocks.iter().any(|block| match block {
124                ContentBlock::Text { text } => text.starts_with(SUMMARY_PREFIX),
125                _ => false,
126            }),
127        }
128    }
129
130    /// Return true when a message contains a tool-use block.
131    fn has_tool_use(content: &Content) -> bool {
132        matches!(
133            content,
134            Content::Blocks(blocks)
135                if blocks
136                    .iter()
137                    .any(|block| matches!(block, ContentBlock::ToolUse { .. }))
138        )
139    }
140
141    /// Return true when a message contains a tool-result block.
142    fn has_tool_result(content: &Content) -> bool {
143        matches!(
144            content,
145            Content::Blocks(blocks)
146                if blocks
147                    .iter()
148                    .any(|block| matches!(block, ContentBlock::ToolResult { .. }))
149        )
150    }
151
152    /// Shift split point backwards until tool-use/result pairs are not split.
153    fn split_point_preserves_tool_pairs(messages: &[Message], mut split_point: usize) -> usize {
154        while split_point > 0 && split_point < messages.len() {
155            let prev = &messages[split_point - 1];
156            let next = &messages[split_point];
157
158            let crosses_tool_pair = (prev.role == Role::Assistant
159                && Self::has_tool_use(&prev.content)
160                && next.role == Role::User
161                && Self::has_tool_result(&next.content))
162                || (prev.role == Role::User
163                    && Self::has_tool_result(&prev.content)
164                    && next.role == Role::Assistant
165                    && Self::has_tool_use(&next.content));
166
167            if crosses_tool_pair {
168                split_point -= 1;
169                continue;
170            }
171
172            break;
173        }
174
175        split_point
176    }
177
178    /// Pick a split point that produces a self-consistent `to_keep`.
179    ///
180    /// `to_keep` is self-consistent (per Anthropic's API contract)
181    /// when every `tool_result` block it contains references a
182    /// `tool_use` block earlier in `to_keep`. The compactor inserts
183    /// a synthetic `[summary, summary_ack]` prefix in front of
184    /// `to_keep`, and that prefix has no `tool_use` blocks — so the
185    /// only path to a valid wire payload is for `to_keep` itself to
186    /// be self-contained.
187    ///
188    /// Three constraints, applied in order:
189    ///
190    /// 1. **Token cap (soft)** — push split forward to keep the
191    ///    retained tail under `max_tokens` of estimated content. The
192    ///    retained-tail cap is a soft hint; a tool chain that doesn't
193    ///    fit gets retained anyway because chain safety is hard.
194    /// 2. **Pair safety (hard)** — shift split backward to keep
195    ///    `assistant_with_tool_use` and the immediately following
196    ///    `user_with_tool_result` together. Catches the common case
197    ///    where the boundary lands inside a single tool turn.
198    /// 3. **Chain safety (hard)** — advance split forward past any
199    ///    leading `user_with_tool_result` whose `tool_use_id` isn't
200    ///    in the rest of `to_keep`. Catches the case pair-preservation
201    ///    can't see: when the message immediately before the original
202    ///    boundary is text-only (e.g. a `summary_ack` from a prior
203    ///    compaction), pair-preservation has nothing to anchor on
204    ///    and silently leaves the orphan in `to_keep[0]`. The wire
205    ///    payload would then start `[summary, summary_ack,
206    ///    user(orphan_tool_result), …]` — which Anthropic rejects
207    ///    with `messages.2.content.0: unexpected tool_use_id`. Step
208    ///    3 makes the split-point selection responsible for chain
209    ///    integrity instead of post-hoc stripping the output.
210    ///
211    /// Step 2 and step 3 can pull in opposite directions (step 2
212    /// shifts back, step 3 shifts forward), so the function applies
213    /// step 3 last: pair-safety puts the candidate as far back as
214    /// it needs to go, then chain-safety advances past any leading
215    /// orphan that survived because the immediate prev was text-only.
216    fn split_point_preserves_tool_pairs_with_cap(
217        messages: &[Message],
218        split_point: usize,
219        max_tokens: usize,
220    ) -> usize {
221        let cap_limit = Self::retain_tail_with_token_cap(messages, split_point, max_tokens);
222        let pair_safe = Self::split_point_preserves_tool_pairs(messages, cap_limit);
223        Self::split_point_skips_leading_orphan(messages, pair_safe)
224    }
225
226    /// Advance `split_point` forward until `to_keep[0]` doesn't
227    /// contain an orphan `tool_result` block — i.e. a `tool_result`
228    /// whose `tool_use_id` isn't satisfied by some `tool_use` block
229    /// in `to_keep`.
230    ///
231    /// Implements step 3 of `split_point_preserves_tool_pairs_with_cap`
232    /// (chain safety). Pair-preservation alone can't catch the
233    /// "synthetic `summary_ack` precedes an orphan" shape because it
234    /// only inspects the immediate prev/next pair; this helper
235    /// inspects whether `to_keep[0]`'s `tool_result` blocks point
236    /// anywhere `to_keep` will host a matching `tool_use`. When they
237    /// don't, the `tool_result` belongs in `to_summarize` (where it
238    /// gets text-ified into the summary prose), not in `to_keep`.
239    ///
240    /// Walks at most `messages.len()` steps because each iteration
241    /// advances `split_point` by at least 1.
242    fn split_point_skips_leading_orphan(messages: &[Message], mut split_point: usize) -> usize {
243        while split_point < messages.len() {
244            if Self::leading_message_has_orphan_tool_result(&messages[split_point..]) {
245                split_point = split_point.saturating_add(1);
246                continue;
247            }
248            break;
249        }
250        split_point
251    }
252
253    /// True when `to_keep[0]` is a `user` message whose `tool_result`
254    /// blocks reference at least one `tool_use_id` not present in
255    /// `to_keep`. The check is scoped to the first message because
256    /// well-formed Anthropic conversations always have `tool_use`
257    /// immediately before `tool_result` — an orphan deeper than
258    /// `to_keep[0]` would require the input itself to be malformed
259    /// upstream of compaction, which is out of scope here.
260    fn leading_message_has_orphan_tool_result(to_keep: &[Message]) -> bool {
261        let Some(first) = to_keep.first() else {
262            return false;
263        };
264        let Content::Blocks(blocks) = &first.content else {
265            return false;
266        };
267
268        // Pull the tool_result ids that appear in the first message.
269        // If there are none, the first message can't contribute an
270        // orphan and we're done early without scanning the tail.
271        let mut needed: Vec<&str> = Vec::new();
272        for block in blocks {
273            if let ContentBlock::ToolResult { tool_use_id, .. } = block {
274                needed.push(tool_use_id.as_str());
275            }
276        }
277        if needed.is_empty() {
278            return false;
279        }
280
281        // Build the set of tool_use ids `to_keep` will host.
282        let known_ids: std::collections::HashSet<&str> = to_keep
283            .iter()
284            .flat_map(|message| match &message.content {
285                Content::Blocks(blocks) => blocks
286                    .iter()
287                    .filter_map(|block| match block {
288                        ContentBlock::ToolUse { id, .. } => Some(id.as_str()),
289                        _ => None,
290                    })
291                    .collect::<Vec<_>>(),
292                Content::Text(_) => Vec::new(),
293            })
294            .collect();
295
296        needed.iter().any(|id| !known_ids.contains(id))
297    }
298
299    /// Keep most recent messages that fit within the retained-message token budget.
300    fn retain_tail_with_token_cap(messages: &[Message], start: usize, max_tokens: usize) -> usize {
301        if start >= messages.len() {
302            return messages.len();
303        }
304
305        if max_tokens == 0 {
306            return messages.len();
307        }
308
309        let mut used = 0usize;
310        let mut retained_start = messages.len();
311
312        for idx in (start..messages.len()).rev() {
313            let message_tokens = TokenEstimator::estimate_message(&messages[idx]);
314            if used + message_tokens > max_tokens {
315                break;
316            }
317
318            retained_start = idx;
319            used += message_tokens;
320        }
321
322        retained_start
323    }
324
325    /// Format messages for summarization.
326    fn format_messages_for_summary(messages: &[Message]) -> String {
327        let mut output = String::new();
328
329        for message in messages {
330            let role = match message.role {
331                Role::User => "User",
332                Role::Assistant => "Assistant",
333            };
334
335            let _ = write!(output, "{role}: ");
336
337            match &message.content {
338                Content::Text(text) => {
339                    let _ = writeln!(output, "{text}");
340                }
341                Content::Blocks(blocks) => {
342                    for block in blocks {
343                        match block {
344                            ContentBlock::Text { text } => {
345                                let _ = writeln!(output, "{text}");
346                            }
347                            ContentBlock::Thinking { thinking, .. } => {
348                                // Include thinking in summaries for context
349                                let _ = writeln!(output, "[Thinking: {thinking}]");
350                            }
351                            ContentBlock::RedactedThinking { .. } => {
352                                let _ = writeln!(output, "[Redacted thinking]");
353                            }
354                            ContentBlock::ToolUse { name, input, .. } => {
355                                let _ = writeln!(
356                                    output,
357                                    "[Called tool: {name} with input: {}]",
358                                    serde_json::to_string(input).unwrap_or_default()
359                                );
360                            }
361                            ContentBlock::ToolResult {
362                                content, is_error, ..
363                            } => {
364                                let status = if is_error.unwrap_or(false) {
365                                    "error"
366                                } else {
367                                    "success"
368                                };
369                                // Truncate long tool results (Unicode-safe; avoid slicing mid-codepoint)
370                                let truncated = if content.chars().count() > MAX_TOOL_RESULT_CHARS {
371                                    let prefix: String =
372                                        content.chars().take(MAX_TOOL_RESULT_CHARS).collect();
373                                    format!("{prefix}... (truncated)")
374                                } else {
375                                    content.clone()
376                                };
377                                let _ = writeln!(output, "[Tool result ({status}): {truncated}]");
378                            }
379                            ContentBlock::Image { source } => {
380                                let _ = writeln!(output, "[Image: {}]", source.media_type);
381                            }
382                            ContentBlock::Document { source } => {
383                                let _ = writeln!(output, "[Document: {}]", source.media_type);
384                            }
385                            // `ContentBlock` is `#[non_exhaustive]`; render an
386                            // unknown future block kind with a generic marker.
387                            _ => {
388                                let _ = writeln!(output, "[Unrecognized content block]");
389                            }
390                        }
391                    }
392                }
393            }
394            output.push('\n');
395        }
396
397        output
398    }
399
400    /// Build the summarization prompt.
401    fn build_summary_prompt(&self, messages_text: &str) -> String {
402        format!(
403            "{}{}{}",
404            self.summary_prompt_prefix, messages_text, self.summary_prompt_suffix
405        )
406    }
407}
408
409#[async_trait]
410impl<P: LlmProvider + ?Sized> ContextCompactor for LlmContextCompactor<P> {
411    async fn compact(&self, messages: &[Message]) -> Result<String> {
412        let messages_to_summarize: Vec<_> = messages
413            .iter()
414            .filter(|message| !Self::is_summary_message(&message.content))
415            .cloned()
416            .collect();
417
418        if messages_to_summarize.is_empty() {
419            return Ok(COMPACT_EMPTY_SUMMARY.to_string());
420        }
421
422        let messages_text = Self::format_messages_for_summary(&messages_to_summarize);
423        let prompt = self.build_summary_prompt(&messages_text);
424
425        let request = ChatRequest {
426            system: self.system_prompt.clone(),
427            messages: vec![Message::user(prompt)],
428            tools: None,
429            max_tokens: 2000,
430            max_tokens_explicit: true,
431            session_id: None,
432            cached_content: None,
433            thinking: None,
434            tool_choice: None,
435            response_format: None,
436        };
437
438        let outcome = self
439            .provider
440            .chat(request)
441            .await
442            .context("Failed to call LLM for summarization")?;
443
444        match outcome {
445            ChatOutcome::Success(response) => response
446                .first_text()
447                .map(String::from)
448                .context("No text in summarization response"),
449            ChatOutcome::RateLimited => {
450                bail!("Rate limited during summarization")
451            }
452            ChatOutcome::InvalidRequest(msg) => {
453                bail!("Invalid request during summarization: {msg}")
454            }
455            ChatOutcome::ServerError(msg) => {
456                bail!("Server error during summarization: {msg}")
457            }
458            // `ChatOutcome` is `#[non_exhaustive]`; an unrecognized outcome
459            // fails the summarization rather than returning an empty summary.
460            _ => {
461                bail!("Unrecognized provider outcome during summarization")
462            }
463        }
464    }
465
466    fn estimate_tokens(&self, messages: &[Message]) -> usize {
467        TokenEstimator::estimate_history(messages)
468    }
469
470    fn needs_compaction(&self, messages: &[Message]) -> bool {
471        if !self.config.auto_compact {
472            return false;
473        }
474
475        if messages.len() < self.config.min_messages_for_compaction {
476            return false;
477        }
478
479        let estimated_tokens = self.estimate_tokens(messages);
480        estimated_tokens > self.config.threshold_tokens
481    }
482
483    async fn compact_history(&self, messages: Vec<Message>) -> Result<CompactionResult> {
484        let original_count = messages.len();
485        let original_tokens = self.estimate_tokens(&messages);
486
487        // Ensure we have enough messages to compact
488        if messages.len() <= self.config.retain_recent {
489            return Ok(CompactionResult {
490                messages,
491                original_count,
492                new_count: original_count,
493                original_tokens,
494                new_tokens: original_tokens,
495            });
496        }
497
498        // Split messages: old messages to summarize, recent messages to keep
499        let mut split_point = messages.len().saturating_sub(self.config.retain_recent);
500        split_point = Self::split_point_preserves_tool_pairs_with_cap(
501            &messages,
502            split_point,
503            MAX_RETAINED_TAIL_MESSAGE_TOKENS,
504        );
505
506        let (to_summarize, to_keep) = messages.split_at(split_point);
507
508        // Summarize old messages
509        let summary = self.compact(to_summarize).await?;
510
511        // Build new message history
512        let mut new_messages = Vec::with_capacity(2 + to_keep.len());
513
514        // Add summary as a user message
515        new_messages.push(Message::user(format!("{SUMMARY_PREFIX}{summary}")));
516
517        // Add acknowledgment from assistant only when some recent tail remains.
518        // If compaction drops the entire retained tail due to the token cap, ending
519        // the request with this synthetic assistant message would act like assistant
520        // prefill and Anthropic rejects that shape.
521        if !to_keep.is_empty() {
522            new_messages.push(Message::assistant(SUMMARY_ACKNOWLEDGMENT));
523        }
524
525        // Add recent messages. `to_keep` is guaranteed self-consistent
526        // by `split_point_preserves_tool_pairs_with_cap` (steps 2 and
527        // 3): any orphan `tool_result` was either folded into the
528        // summary (split shifted forward) or paired with its
529        // `tool_use` inside `to_keep` (split shifted backward). No
530        // post-hoc rewriting of the assembled output is required.
531        new_messages.extend(to_keep.iter().cloned());
532
533        let new_count = new_messages.len();
534        let new_tokens = self.estimate_tokens(&new_messages);
535
536        Ok(CompactionResult {
537            messages: new_messages,
538            original_count,
539            new_count,
540            original_tokens,
541            new_tokens,
542        })
543    }
544}
545
546#[cfg(test)]
547mod tests {
548    use super::*;
549    use crate::llm::{ChatResponse, StopReason, Usage};
550    use std::sync::Mutex;
551
552    struct MockProvider {
553        summary_response: String,
554        requests: Option<Arc<Mutex<Vec<String>>>>,
555    }
556
557    impl MockProvider {
558        fn new(summary: &str) -> Self {
559            Self {
560                summary_response: summary.to_string(),
561                requests: None,
562            }
563        }
564
565        fn new_with_request_log(summary: &str, requests: Arc<Mutex<Vec<String>>>) -> Self {
566            Self {
567                summary_response: summary.to_string(),
568                requests: Some(requests),
569            }
570        }
571    }
572
573    #[async_trait]
574    impl LlmProvider for MockProvider {
575        async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
576            if let Some(requests) = &self.requests {
577                let mut entries = requests.lock().unwrap();
578                let user_prompt = request
579                    .messages
580                    .iter()
581                    .find_map(|message| match &message.content {
582                        Content::Text(text) => Some(text.clone()),
583                        Content::Blocks(blocks) => {
584                            let text = blocks
585                                .iter()
586                                .filter_map(|block| {
587                                    if let ContentBlock::Text { text } = block {
588                                        Some(text.as_str())
589                                    } else {
590                                        None
591                                    }
592                                })
593                                .collect::<Vec<_>>()
594                                .join("\n");
595                            if text.is_empty() { None } else { Some(text) }
596                        }
597                    })
598                    .unwrap_or_default();
599                entries.push(user_prompt);
600            }
601            Ok(ChatOutcome::Success(ChatResponse {
602                id: "test".to_string(),
603                content: vec![ContentBlock::Text {
604                    text: self.summary_response.clone(),
605                }],
606                model: "mock".to_string(),
607                stop_reason: Some(StopReason::EndTurn),
608                usage: Usage {
609                    input_tokens: 100,
610                    output_tokens: 50,
611                    cached_input_tokens: 0,
612                    cache_creation_input_tokens: 0,
613                },
614            }))
615        }
616
617        fn model(&self) -> &'static str {
618            "mock-model"
619        }
620
621        fn provider(&self) -> &'static str {
622            "mock"
623        }
624    }
625
626    #[test]
627    fn test_needs_compaction_below_threshold() {
628        let provider = Arc::new(MockProvider::new("summary"));
629        let config = CompactionConfig::default()
630            .with_threshold_tokens(10_000)
631            .with_min_messages(5);
632        let compactor = LlmContextCompactor::new(provider, config);
633
634        // Only 3 messages, below min_messages
635        let messages = vec![
636            Message::user("Hello"),
637            Message::assistant("Hi"),
638            Message::user("How are you?"),
639        ];
640
641        assert!(!compactor.needs_compaction(&messages));
642    }
643
644    #[test]
645    fn test_needs_compaction_above_threshold() {
646        let provider = Arc::new(MockProvider::new("summary"));
647        let config = CompactionConfig::default()
648            .with_threshold_tokens(50) // Very low threshold
649            .with_min_messages(3);
650        let compactor = LlmContextCompactor::new(provider, config);
651
652        // Messages that exceed threshold
653        let messages = vec![
654            Message::user("Hello, this is a longer message to test compaction"),
655            Message::assistant(
656                "Hi there! This is also a longer response to help trigger compaction",
657            ),
658            Message::user("Great, let's continue with even more text here"),
659            Message::assistant("Absolutely, adding more content to ensure we exceed the threshold"),
660        ];
661
662        assert!(compactor.needs_compaction(&messages));
663    }
664
665    #[test]
666    fn test_needs_compaction_auto_disabled() {
667        let provider = Arc::new(MockProvider::new("summary"));
668        let config = CompactionConfig::default()
669            .with_threshold_tokens(10) // Very low
670            .with_min_messages(1)
671            .with_auto_compact(false);
672        let compactor = LlmContextCompactor::new(provider, config);
673
674        let messages = vec![
675            Message::user("Hello, this is a longer message"),
676            Message::assistant("Response here"),
677        ];
678
679        assert!(!compactor.needs_compaction(&messages));
680    }
681
682    #[tokio::test]
683    async fn test_compact_history() -> Result<()> {
684        let provider = Arc::new(MockProvider::new(
685            "User asked about Rust programming. Assistant explained ownership, borrowing, and lifetimes.",
686        ));
687        let config = CompactionConfig::default()
688            .with_retain_recent(2)
689            .with_min_messages(3);
690        let compactor = LlmContextCompactor::new(provider, config);
691
692        // Use longer messages to ensure compaction actually reduces tokens
693        let messages = vec![
694            Message::user(
695                "What is Rust? I've heard it's a systems programming language but I don't know much about it. Can you explain the key features and why people are excited about it?",
696            ),
697            Message::assistant(
698                "Rust is a systems programming language focused on safety, speed, and concurrency. It achieves memory safety without garbage collection through its ownership system. The key features include zero-cost abstractions, guaranteed memory safety, threads without data races, and minimal runtime.",
699            ),
700            Message::user(
701                "Tell me about ownership in detail. How does it work and what are the rules? I want to understand this core concept thoroughly.",
702            ),
703            Message::assistant(
704                "Ownership is Rust's central feature with three rules: each value has one owner, only one owner at a time, and the value is dropped when owner goes out of scope. This system prevents memory leaks, double frees, and dangling pointers at compile time.",
705            ),
706            Message::user("What about borrowing?"), // Keep
707            Message::assistant("Borrowing allows references to data without taking ownership."), // Keep
708        ];
709
710        let result = compactor.compact_history(messages).await?;
711
712        // Should have: summary message + ack + 2 recent messages = 4
713        assert_eq!(result.new_count, 4);
714        assert_eq!(result.original_count, 6);
715
716        // With longer original messages, compaction should reduce tokens
717        assert!(
718            result.new_tokens < result.original_tokens,
719            "Expected fewer tokens after compaction: new={} < original={}",
720            result.new_tokens,
721            result.original_tokens
722        );
723
724        // First message should be the summary
725        if let Content::Text(text) = &result.messages[0].content {
726            assert!(text.contains("Previous conversation summary"));
727        }
728
729        Ok(())
730    }
731
732    #[tokio::test]
733    async fn test_compact_history_too_few_messages() -> Result<()> {
734        let provider = Arc::new(MockProvider::new("summary"));
735        let config = CompactionConfig::default().with_retain_recent(5);
736        let compactor = LlmContextCompactor::new(provider, config);
737
738        // Only 3 messages, less than retain_recent
739        let messages = vec![
740            Message::user("Hello"),
741            Message::assistant("Hi"),
742            Message::user("Bye"),
743        ];
744
745        let result = compactor.compact_history(messages.clone()).await?;
746
747        // Should return original messages unchanged
748        assert_eq!(result.new_count, 3);
749        assert_eq!(result.messages.len(), 3);
750
751        Ok(())
752    }
753
754    #[test]
755    fn test_format_messages_for_summary() {
756        let messages = vec![Message::user("Hello"), Message::assistant("Hi there!")];
757
758        let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
759
760        assert!(formatted.contains("User: Hello"));
761        assert!(formatted.contains("Assistant: Hi there!"));
762    }
763
764    #[test]
765    fn test_format_messages_for_summary_truncates_tool_results_unicode_safely() {
766        let long_unicode = "é".repeat(600);
767
768        let messages = vec![Message {
769            role: Role::Assistant,
770            content: Content::Blocks(vec![ContentBlock::ToolResult {
771                tool_use_id: "tool-1".to_string(),
772                content: long_unicode,
773                is_error: Some(false),
774            }]),
775        }];
776
777        let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
778
779        assert!(formatted.contains("... (truncated)"));
780    }
781
782    #[tokio::test]
783    async fn test_compact_filters_summary_messages() -> Result<()> {
784        let requests = Arc::new(Mutex::new(Vec::new()));
785        let provider = Arc::new(MockProvider::new_with_request_log(
786            "Fresh summary",
787            requests.clone(),
788        ));
789        let config = CompactionConfig::default().with_min_messages(1);
790        let compactor = LlmContextCompactor::new(provider, config);
791
792        let messages = vec![
793            Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
794            Message::assistant("Continue with the next task using this context."),
795        ];
796
797        let summary = compactor.compact(&messages).await?;
798
799        {
800            let recorded = requests.lock().unwrap();
801            assert_eq!(recorded.len(), 1);
802            assert_eq!(summary, "Fresh summary");
803            assert!(recorded[0].contains("Continue with the next task using this context."));
804            assert!(!recorded[0].contains("already compacted context"));
805            drop(recorded);
806        }
807
808        Ok(())
809    }
810
811    #[tokio::test]
812    async fn test_compact_history_ignores_prior_summary_in_candidate_payload() -> Result<()> {
813        let requests = Arc::new(Mutex::new(Vec::new()));
814        let provider = Arc::new(MockProvider::new_with_request_log(
815            "Fresh history summary",
816            requests.clone(),
817        ));
818        let config = CompactionConfig::default()
819            .with_retain_recent(2)
820            .with_min_messages(1);
821        let compactor = LlmContextCompactor::new(provider, config);
822
823        let messages = vec![
824            Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
825            Message::assistant("Current turn content from the latest exchange."),
826            Message::assistant("Recent message that should stay."),
827            Message::user("Newest note that should stay."),
828        ];
829
830        let result = compactor.compact_history(messages).await?;
831
832        {
833            let recorded = requests.lock().unwrap();
834            assert_eq!(recorded.len(), 1);
835            assert!(recorded[0].contains("Current turn content from the latest exchange."));
836            assert!(!recorded[0].contains("already compacted context"));
837            drop(recorded);
838        }
839        assert_eq!(result.new_count, 4);
840
841        Ok(())
842    }
843
844    #[tokio::test]
845    async fn test_compact_history_is_no_op_when_candidate_window_has_only_summaries() -> Result<()>
846    {
847        let requests = Arc::new(Mutex::new(Vec::new()));
848        let provider = Arc::new(MockProvider::new_with_request_log(
849            "This summary should not be used",
850            requests.clone(),
851        ));
852        let config = CompactionConfig::default()
853            .with_retain_recent(2)
854            .with_min_messages(1);
855        let compactor = LlmContextCompactor::new(provider, config);
856
857        let messages = vec![
858            Message::user(format!("{SUMMARY_PREFIX}first prior compacted section")),
859            Message::assistant(format!("{SUMMARY_PREFIX}second prior compacted section")),
860            Message::user(format!("{SUMMARY_PREFIX}third prior compacted section")),
861            Message::assistant("final short note"),
862        ];
863
864        let result = compactor.compact_history(messages).await?;
865
866        {
867            let recorded = requests.lock().unwrap();
868            assert!(recorded.is_empty());
869            drop(recorded);
870        }
871        assert_eq!(result.new_count, 4);
872        assert_eq!(result.messages.len(), 4);
873
874        if let Content::Text(text) = &result.messages[0].content {
875            assert!(text.contains(COMPACT_EMPTY_SUMMARY));
876        } else {
877            panic!("Expected summary text in first message");
878        }
879
880        Ok(())
881    }
882
883    #[tokio::test]
884    async fn test_compact_history_preserves_tool_use_tool_result_pairs() -> Result<()> {
885        let provider = Arc::new(MockProvider::new("Summary of earlier conversation."));
886        let config = CompactionConfig::default()
887            .with_retain_recent(2)
888            .with_min_messages(3);
889        let compactor = LlmContextCompactor::new(provider, config);
890
891        // Build a history where the split_point (len - retain_recent = 5 - 2 = 3)
892        // would land exactly on the user tool_result message at index 3,
893        // which would orphan it from its assistant tool_use at index 2.
894        let messages = vec![
895            // index 0: user
896            Message::user("What files are in the project?"),
897            // index 1: assistant text
898            Message::assistant("Let me check that for you."),
899            // index 2: assistant with tool_use
900            Message {
901                role: Role::Assistant,
902                content: Content::Blocks(vec![ContentBlock::ToolUse {
903                    id: "tool_1".to_string(),
904                    name: "list_files".to_string(),
905                    input: serde_json::json!({}),
906                    thought_signature: None,
907                }]),
908            },
909            // index 3: user with tool_result (naive split would land here)
910            Message {
911                role: Role::User,
912                content: Content::Blocks(vec![ContentBlock::ToolResult {
913                    tool_use_id: "tool_1".to_string(),
914                    content: "file1.rs\nfile2.rs".to_string(),
915                    is_error: None,
916                }]),
917            },
918            // index 4: assistant final response
919            Message::assistant("The project contains file1.rs and file2.rs."),
920        ];
921
922        let result = compactor.compact_history(messages).await?;
923
924        // The split_point should have been adjusted back from 3 to 2,
925        // so to_keep includes: [assistant tool_use, user tool_result, assistant response]
926        // Plus summary + ack = 5 total
927        assert_eq!(result.new_count, 5);
928
929        // Verify the kept messages include the tool_use/tool_result pair
930        // After summary + ack, the third message should be the assistant with tool_use
931        let kept_assistant = &result.messages[2];
932        if let Content::Blocks(blocks) = &kept_assistant.content {
933            assert!(
934                blocks
935                    .iter()
936                    .any(|b| matches!(b, ContentBlock::ToolUse { .. })),
937                "Expected assistant tool_use in kept messages"
938            );
939        } else {
940            panic!("Expected Blocks content for assistant tool_use message");
941        }
942
943        // The fourth message should be the user tool_result
944        let kept_user = &result.messages[3];
945        if let Content::Blocks(blocks) = &kept_user.content {
946            assert!(
947                blocks
948                    .iter()
949                    .any(|b| matches!(b, ContentBlock::ToolResult { .. })),
950                "Expected user tool_result in kept messages"
951            );
952        } else {
953            panic!("Expected Blocks content for user tool_result message");
954        }
955
956        Ok(())
957    }
958
959    #[tokio::test]
960    async fn test_compact_history_split_skips_leading_orphan_after_summary_ack() -> Result<()> {
961        // The user-visible bug at M7.5: a previously
962        // compacted history was re-compacted in a later turn. The
963        // first compaction left
964        // `[summary, summary_ack, user(tool_result toolu_X),
965        //  assistant(toolu_X reply), ...]`. On the second pass the
966        // default `split_point` (len - retain_recent = 5 - 3 = 2)
967        // would have made `to_keep[0] == user(tool_result toolu_X)`,
968        // and the synthetic `[summary, summary_ack, …]` prefix the
969        // compactor inserts in front of `to_keep` has no `tool_use`
970        // blocks — so the next request to Anthropic blew up with
971        // `messages.2.content.0: unexpected tool_use_id`.
972        //
973        // Pair-preservation alone can't fix this: it only inspects
974        // the immediate prev/next pair (here `summary_ack` vs
975        // `user(tool_result)`) and `summary_ack` is text-only, so the
976        // pair check sees no `tool_use` to anchor on and lets the
977        // orphan through. The chain-safety pass added in
978        // `split_point_preserves_tool_pairs_with_cap` step 3 walks
979        // the candidate forward past any leading orphan, so the
980        // `tool_result` lands in `to_summarize` and gets folded into
981        // the summary's prose where it's harmless.
982        //
983        // The assertion is structural, not block-counting: every
984        // surviving `tool_result` must reference a `tool_use` that
985        // appears earlier in the new message list. No
986        // post-compaction stripping is involved — the split point
987        // alone is responsible for chain integrity.
988        let provider = Arc::new(MockProvider::new("Re-summary."));
989        let config = CompactionConfig::default()
990            .with_retain_recent(3)
991            .with_min_messages(1);
992        let compactor = LlmContextCompactor::new(provider, config);
993
994        let messages = vec![
995            Message::user(format!("{SUMMARY_PREFIX}Old summary about toolu_X.")),
996            Message::assistant(SUMMARY_ACKNOWLEDGMENT),
997            Message {
998                role: Role::User,
999                content: Content::Blocks(vec![ContentBlock::ToolResult {
1000                    tool_use_id: "toolu_X".to_string(),
1001                    content: "result for X".to_string(),
1002                    is_error: None,
1003                }]),
1004            },
1005            Message::assistant("Result interpreted."),
1006            Message::user("Now what?"),
1007        ];
1008
1009        let result = compactor.compact_history(messages).await?;
1010
1011        let mut seen_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
1012        for msg in &result.messages {
1013            if let Content::Blocks(blocks) = &msg.content {
1014                for block in blocks {
1015                    match block {
1016                        ContentBlock::ToolResult { tool_use_id, .. } => {
1017                            assert!(
1018                                seen_ids.contains(tool_use_id),
1019                                "orphan tool_use_id {tool_use_id} survived split selection",
1020                            );
1021                        }
1022                        ContentBlock::ToolUse { id, .. } => {
1023                            seen_ids.insert(id.clone());
1024                        }
1025                        _ => {}
1026                    }
1027                }
1028            }
1029        }
1030
1031        Ok(())
1032    }
1033
1034    #[tokio::test]
1035    async fn test_compact_history_keeps_tool_pair_when_immediate_prev_is_text_only() -> Result<()> {
1036        // Tighter regression for the chain-safety boundary: even
1037        // when the message *before* the candidate split point is
1038        // text-only (so pair-preservation has nothing to anchor on),
1039        // chain-safety must shift the split forward past a leading
1040        // `user(tool_result)` whose `tool_use` would otherwise be
1041        // folded into the summary.
1042        let provider = Arc::new(MockProvider::new("Boundary summary."));
1043        let config = CompactionConfig::default()
1044            .with_retain_recent(2)
1045            .with_min_messages(1);
1046        let compactor = LlmContextCompactor::new(provider, config);
1047
1048        // Layout (5 messages, retain_recent=2 → initial split=3):
1049        //   0: user("first turn") — to_summarize
1050        //   1: assistant("text only") — to_summarize, immediate prev
1051        //   2: user(tool_result toolu_Y) — orphan in default to_keep
1052        //   3: assistant("then a reply")
1053        //   4: user("ok thanks")
1054        //
1055        // The corresponding `tool_use` for toolu_Y was lost long
1056        // ago — there's no `tool_use` anywhere in `messages`. With
1057        // pair-preservation alone, `to_keep` would start at index 3
1058        // (or 2 unshifted), leaving the orphan at the head and
1059        // tripping Anthropic.
1060        let messages = vec![
1061            Message::user("first turn"),
1062            Message::assistant("text only"),
1063            Message {
1064                role: Role::User,
1065                content: Content::Blocks(vec![ContentBlock::ToolResult {
1066                    tool_use_id: "toolu_Y".to_string(),
1067                    content: "ancient result".to_string(),
1068                    is_error: None,
1069                }]),
1070            },
1071            Message::assistant("then a reply"),
1072            Message::user("ok thanks"),
1073        ];
1074
1075        let result = compactor.compact_history(messages).await?;
1076
1077        // No tool_result block survives anywhere — the only one in
1078        // input was orphaned and the split-shift folded it into the
1079        // summary.
1080        let has_tool_result = result.messages.iter().any(|m| {
1081            matches!(
1082                &m.content,
1083                Content::Blocks(blocks)
1084                    if blocks.iter().any(|b| matches!(b, ContentBlock::ToolResult { .. }))
1085            )
1086        });
1087        assert!(
1088            !has_tool_result,
1089            "orphan tool_result should have been pushed into to_summarize, not retained",
1090        );
1091
1092        Ok(())
1093    }
1094
1095    #[tokio::test]
1096    async fn test_compact_history_retained_tail_is_token_capped() -> Result<()> {
1097        let provider = Arc::new(MockProvider::new(
1098            "Project summary with a long context and technical context.",
1099        ));
1100        let config = CompactionConfig::default()
1101            .with_retain_recent(8)
1102            .with_min_messages(1)
1103            .with_threshold_tokens(1);
1104        let compactor = LlmContextCompactor::new(provider, config);
1105
1106        let mut messages = Vec::new();
1107
1108        // Older messages that will be summarized away.
1109        messages.extend((0..6).map(|index| Message::user(format!("pre-compaction noise {index}"))));
1110
1111        // Newer long messages: intentionally large to force retained-tail truncation.
1112        messages.extend(
1113            (0..8).map(|index| Message::assistant(format!("kept-{index}: {}", "x".repeat(12_000)))),
1114        );
1115
1116        let result = compactor.compact_history(messages).await?;
1117
1118        // The retained tail should be token capped and therefore shorter than retain_recent.
1119        let retained_tail = &result.messages[2..];
1120        assert!(retained_tail.len() < 8);
1121
1122        let mut latest_index = -1i32;
1123        let mut all_retained = true;
1124        for message in retained_tail {
1125            if let Content::Text(text) = &message.content {
1126                if let Some(number) = text.split(':').next().and_then(|prefix| {
1127                    prefix
1128                        .strip_prefix("kept-")
1129                        .and_then(|rest| rest.parse::<i32>().ok())
1130                }) {
1131                    if number >= 0 {
1132                        latest_index = latest_index.max(number);
1133                    }
1134                } else {
1135                    all_retained = false;
1136                }
1137            } else {
1138                all_retained = false;
1139            }
1140        }
1141
1142        assert!(all_retained);
1143        assert_eq!(latest_index, 7);
1144        assert!(
1145            TokenEstimator::estimate_history(retained_tail) <= MAX_RETAINED_TAIL_MESSAGE_TOKENS
1146        );
1147        assert!(compactor.needs_compaction(&result.messages));
1148
1149        Ok(())
1150    }
1151
1152    #[tokio::test]
1153    async fn test_compact_history_skips_summary_ack_when_retained_tail_is_empty() -> Result<()> {
1154        let provider = Arc::new(MockProvider::new("Summary for oversized user turn."));
1155        let config = CompactionConfig::default()
1156            .with_retain_recent(1)
1157            .with_min_messages(1)
1158            .with_threshold_tokens(1);
1159        let compactor = LlmContextCompactor::new(provider, config);
1160
1161        let messages = vec![
1162            Message::assistant("Earlier assistant context."),
1163            Message::user(format!("oversized-user-turn: {}", "x".repeat(200_000))),
1164        ];
1165
1166        let result = compactor.compact_history(messages).await?;
1167
1168        assert_eq!(result.new_count, 1);
1169        assert_eq!(result.messages.len(), 1);
1170
1171        let only_message = &result.messages[0];
1172        assert_eq!(only_message.role, Role::User);
1173
1174        if let Content::Text(text) = &only_message.content {
1175            assert!(text.contains("Previous conversation summary"));
1176            assert!(!text.contains(SUMMARY_ACKNOWLEDGMENT));
1177        } else {
1178            panic!("Expected summary text when retained tail is empty");
1179        }
1180
1181        Ok(())
1182    }
1183}