Skip to main content

agent_sdk/context/
compactor.rs

1//! Context compaction implementation.
2
3use crate::llm::{
4    ChatOutcome, ChatRequest, Content, ContentBlock, LlmProvider, Message, Role, StopReason,
5};
6use anyhow::{Context, Result, bail};
7use async_trait::async_trait;
8use std::fmt::Write;
9use std::sync::Arc;
10
11use super::config::CompactionConfig;
12use super::estimator::TokenEstimator;
13
14const SUMMARY_PREFIX: &str = "[Previous conversation summary]\n\n";
15const COMPACTION_SYSTEM_PROMPT: &str = "You are a precise summarizer. Your task is to create concise but complete summaries of conversations, preserving all technical details needed to continue the work.";
16const COMPACTION_SUMMARY_PROMPT_PREFIX: &str = "Summarize this conversation concisely, preserving:\n- Key decisions and conclusions reached\n- Important file paths, code changes, and technical details\n- Current task context and what has been accomplished\n- Any pending items, errors encountered, or next steps\n\nBe specific about technical details (file names, function names, error messages) as these\nare critical for continuing the work.\n\nConversation:\n";
17const COMPACTION_SUMMARY_PROMPT_SUFFIX: &str =
18    "Provide a concise summary (aim for 500-1000 words):";
19const COMPACT_EMPTY_SUMMARY: &str = "No additional context was available to summarize; the previous messages were already compacted.";
20const SUMMARY_ACKNOWLEDGMENT: &str =
21    "I understand the context from the summary. Let me continue from where we left off.";
22const MAX_TOOL_RESULT_CHARS: usize = 500;
23const TRUNCATED_SUMMARY_MARKER: &str =
24    "\n\n[summary truncated: exceeded the configured summary_max_tokens budget]";
25
26/// Trait for context compaction strategies.
27///
28/// Implement this trait to provide custom compaction logic.
29#[async_trait]
30pub trait ContextCompactor: Send + Sync {
31    /// Compact a list of messages into a summary.
32    ///
33    /// # Errors
34    /// Returns an error if summarization fails.
35    async fn compact(&self, messages: &[Message]) -> Result<String>;
36
37    /// Estimate tokens for a message list.
38    fn estimate_tokens(&self, messages: &[Message]) -> usize;
39
40    /// Check if compaction is needed.
41    fn needs_compaction(&self, messages: &[Message]) -> bool;
42
43    /// Perform full compaction, returning new message history.
44    ///
45    /// # Errors
46    /// Returns an error if compaction fails.
47    async fn compact_history(&self, messages: Vec<Message>) -> Result<CompactionResult>;
48}
49
50/// Result of a compaction operation.
51#[derive(Debug, Clone)]
52pub struct CompactionResult {
53    /// The new compacted message history.
54    pub messages: Vec<Message>,
55    /// Number of messages before compaction.
56    pub original_count: usize,
57    /// Number of messages after compaction.
58    pub new_count: usize,
59    /// Estimated tokens before compaction.
60    pub original_tokens: usize,
61    /// Estimated tokens after compaction.
62    pub new_tokens: usize,
63}
64
65/// LLM-based context compactor.
66///
67/// Uses the LLM itself to summarize older messages into a compact form.
68///
69/// `P` is `?Sized` so callers can hold an `Arc<dyn LlmProvider>` —
70/// useful when the provider is resolved dynamically per-thread (e.g.
71/// inside `agent-server`'s daemon worker, where the same compactor
72/// type wraps whichever concrete provider the host's resolver picks).
73/// Concrete-type users (`Arc<AnthropicProvider>`, etc.) still work
74/// unchanged.
75pub struct LlmContextCompactor<P: LlmProvider + ?Sized> {
76    provider: Arc<P>,
77    config: CompactionConfig,
78    system_prompt: String,
79    summary_prompt_prefix: String,
80    summary_prompt_suffix: String,
81}
82
83impl<P: LlmProvider + ?Sized> LlmContextCompactor<P> {
84    /// Create a new LLM context compactor.
85    #[must_use]
86    pub fn new(provider: Arc<P>, config: CompactionConfig) -> Self {
87        Self {
88            provider,
89            config,
90            system_prompt: COMPACTION_SYSTEM_PROMPT.to_string(),
91            summary_prompt_prefix: COMPACTION_SUMMARY_PROMPT_PREFIX.to_string(),
92            summary_prompt_suffix: COMPACTION_SUMMARY_PROMPT_SUFFIX.to_string(),
93        }
94    }
95
96    /// Create with default configuration.
97    #[must_use]
98    pub fn with_defaults(provider: Arc<P>) -> Self {
99        Self::new(provider, CompactionConfig::default())
100    }
101
102    /// Get the configuration.
103    #[must_use]
104    pub const fn config(&self) -> &CompactionConfig {
105        &self.config
106    }
107
108    /// Override the prompts used for LLM-based summarization.
109    #[must_use]
110    pub fn with_prompts(
111        mut self,
112        system_prompt: impl Into<String>,
113        summary_prompt_prefix: impl Into<String>,
114        summary_prompt_suffix: impl Into<String>,
115    ) -> Self {
116        self.system_prompt = system_prompt.into();
117        self.summary_prompt_prefix = summary_prompt_prefix.into();
118        self.summary_prompt_suffix = summary_prompt_suffix.into();
119        self
120    }
121
122    /// If `content` is a previously inserted compaction summary, return its
123    /// text with the `SUMMARY_PREFIX` marker stripped; otherwise `None`.
124    ///
125    /// Used to carry a prior summary's prose forward into the next compaction
126    /// instead of discarding it (which silently destroyed all pre-first-
127    /// compaction context). The marker is still a content-prefix sentinel
128    /// because `Message` lives in a foundation crate and cannot carry a
129    /// structural flag from here; the compactor itself is the only writer of
130    /// the prefix.
131    fn extract_summary_text(content: &Content) -> Option<String> {
132        match content {
133            Content::Text(text) => text.strip_prefix(SUMMARY_PREFIX).map(str::to_string),
134            Content::Blocks(blocks) => blocks.iter().find_map(|block| match block {
135                ContentBlock::Text { text } => {
136                    text.strip_prefix(SUMMARY_PREFIX).map(str::to_string)
137                }
138                _ => None,
139            }),
140        }
141    }
142
143    /// Return true when a message contains a tool-use block.
144    fn has_tool_use(content: &Content) -> bool {
145        matches!(
146            content,
147            Content::Blocks(blocks)
148                if blocks
149                    .iter()
150                    .any(|block| matches!(block, ContentBlock::ToolUse { .. }))
151        )
152    }
153
154    /// Return true when a message contains a tool-result block.
155    fn has_tool_result(content: &Content) -> bool {
156        matches!(
157            content,
158            Content::Blocks(blocks)
159                if blocks
160                    .iter()
161                    .any(|block| matches!(block, ContentBlock::ToolResult { .. }))
162        )
163    }
164
165    /// Shift split point backwards until a `tool_use`/`tool_result` pair is not
166    /// split.
167    ///
168    /// Only the `assistant(tool_use)` -> `user(tool_result)` boundary is
169    /// unsplittable: that is the single tool turn that must stay together for
170    /// the wire payload to be valid. Splitting at a `user(tool_result)` ->
171    /// `assistant(tool_use)` boundary is API-valid (the retained tail then
172    /// begins with an `assistant` `tool_use` followed by its own result), so
173    /// it is *not* treated as a pair. Treating it as a pair used to walk the
174    /// split backward through an entire unbroken tool chain — the dominant
175    /// shape of autonomous traces — defeating the retained-tail token cap and
176    /// summarizing almost nothing.
177    fn split_point_preserves_tool_pairs(messages: &[Message], mut split_point: usize) -> usize {
178        while split_point > 0 && split_point < messages.len() {
179            let prev = &messages[split_point - 1];
180            let next = &messages[split_point];
181
182            let crosses_tool_pair = prev.role == Role::Assistant
183                && Self::has_tool_use(&prev.content)
184                && next.role == Role::User
185                && Self::has_tool_result(&next.content);
186
187            if crosses_tool_pair {
188                split_point -= 1;
189                continue;
190            }
191
192            break;
193        }
194
195        split_point
196    }
197
198    /// Pick a split point that produces a self-consistent `to_keep`.
199    ///
200    /// `to_keep` is self-consistent (per Anthropic's API contract)
201    /// when every `tool_result` block it contains references a
202    /// `tool_use` block earlier in `to_keep`. The compactor inserts
203    /// a synthetic `[summary, summary_ack]` prefix in front of
204    /// `to_keep`, and that prefix has no `tool_use` blocks — so the
205    /// only path to a valid wire payload is for `to_keep` itself to
206    /// be self-contained.
207    ///
208    /// Three constraints, applied in order:
209    ///
210    /// 1. **Token cap (soft)** — push split forward to keep the
211    ///    retained tail under `max_tokens` of estimated content. The
212    ///    retained-tail cap is a soft hint; a tool chain that doesn't
213    ///    fit gets retained anyway because chain safety is hard.
214    /// 2. **Pair safety (hard)** — shift split backward to keep
215    ///    `assistant_with_tool_use` and the immediately following
216    ///    `user_with_tool_result` together. Catches the common case
217    ///    where the boundary lands inside a single tool turn.
218    /// 3. **Chain safety (hard)** — advance split forward past any
219    ///    leading `user_with_tool_result` whose `tool_use_id` isn't
220    ///    in the rest of `to_keep`. Catches the case pair-preservation
221    ///    can't see: when the message immediately before the original
222    ///    boundary is text-only (e.g. a `summary_ack` from a prior
223    ///    compaction), pair-preservation has nothing to anchor on
224    ///    and silently leaves the orphan in `to_keep[0]`. The wire
225    ///    payload would then start `[summary, summary_ack,
226    ///    user(orphan_tool_result), …]` — which Anthropic rejects
227    ///    with `messages.2.content.0: unexpected tool_use_id`. Step
228    ///    3 makes the split-point selection responsible for chain
229    ///    integrity instead of post-hoc stripping the output.
230    ///
231    /// Step 2 and step 3 can pull in opposite directions (step 2
232    /// shifts back, step 3 shifts forward), so the function applies
233    /// step 3 last: pair-safety puts the candidate as far back as
234    /// it needs to go, then chain-safety advances past any leading
235    /// orphan that survived because the immediate prev was text-only.
236    fn split_point_preserves_tool_pairs_with_cap(
237        messages: &[Message],
238        split_point: usize,
239        max_tokens: usize,
240    ) -> usize {
241        let cap_limit = Self::retain_tail_with_token_cap(messages, split_point, max_tokens);
242        let pair_safe = Self::split_point_preserves_tool_pairs(messages, cap_limit);
243        Self::split_point_skips_leading_orphan(messages, pair_safe)
244    }
245
246    /// Advance `split_point` forward until `to_keep[0]` doesn't
247    /// contain an orphan `tool_result` block — i.e. a `tool_result`
248    /// whose `tool_use_id` isn't satisfied by some `tool_use` block
249    /// in `to_keep`.
250    ///
251    /// Implements step 3 of `split_point_preserves_tool_pairs_with_cap`
252    /// (chain safety). Pair-preservation alone can't catch the
253    /// "synthetic `summary_ack` precedes an orphan" shape because it
254    /// only inspects the immediate prev/next pair; this helper
255    /// inspects whether `to_keep[0]`'s `tool_result` blocks point
256    /// anywhere `to_keep` will host a matching `tool_use`. When they
257    /// don't, the `tool_result` belongs in `to_summarize` (where it
258    /// gets text-ified into the summary prose), not in `to_keep`.
259    ///
260    /// Walks at most `messages.len()` steps because each iteration
261    /// advances `split_point` by at least 1.
262    fn split_point_skips_leading_orphan(messages: &[Message], mut split_point: usize) -> usize {
263        while split_point < messages.len() {
264            if Self::leading_message_has_orphan_tool_result(&messages[split_point..]) {
265                split_point = split_point.saturating_add(1);
266                continue;
267            }
268            break;
269        }
270        split_point
271    }
272
273    /// True when `to_keep[0]` is a `user` message whose `tool_result`
274    /// blocks reference at least one `tool_use_id` not present in
275    /// `to_keep`. The check is scoped to the first message because
276    /// well-formed Anthropic conversations always have `tool_use`
277    /// immediately before `tool_result` — an orphan deeper than
278    /// `to_keep[0]` would require the input itself to be malformed
279    /// upstream of compaction, which is out of scope here.
280    fn leading_message_has_orphan_tool_result(to_keep: &[Message]) -> bool {
281        let Some(first) = to_keep.first() else {
282            return false;
283        };
284        let Content::Blocks(blocks) = &first.content else {
285            return false;
286        };
287
288        // Pull the tool_result ids that appear in the first message.
289        // If there are none, the first message can't contribute an
290        // orphan and we're done early without scanning the tail.
291        let mut needed: Vec<&str> = Vec::new();
292        for block in blocks {
293            if let ContentBlock::ToolResult { tool_use_id, .. } = block {
294                needed.push(tool_use_id.as_str());
295            }
296        }
297        if needed.is_empty() {
298            return false;
299        }
300
301        // Build the set of tool_use ids `to_keep` will host.
302        let known_ids: std::collections::HashSet<&str> = to_keep
303            .iter()
304            .flat_map(|message| match &message.content {
305                Content::Blocks(blocks) => blocks
306                    .iter()
307                    .filter_map(|block| match block {
308                        ContentBlock::ToolUse { id, .. } => Some(id.as_str()),
309                        _ => None,
310                    })
311                    .collect::<Vec<_>>(),
312                Content::Text(_) => Vec::new(),
313            })
314            .collect();
315
316        needed.iter().any(|id| !known_ids.contains(id))
317    }
318
319    /// Keep most recent messages that fit within the retained-message token budget.
320    fn retain_tail_with_token_cap(messages: &[Message], start: usize, max_tokens: usize) -> usize {
321        if start >= messages.len() {
322            return messages.len();
323        }
324
325        if max_tokens == 0 {
326            return messages.len();
327        }
328
329        let mut used = 0usize;
330        let mut retained_start = messages.len();
331
332        for idx in (start..messages.len()).rev() {
333            let message_tokens = TokenEstimator::estimate_message(&messages[idx]);
334            if used + message_tokens > max_tokens {
335                break;
336            }
337
338            retained_start = idx;
339            used += message_tokens;
340        }
341
342        retained_start
343    }
344
345    /// Format messages for summarization.
346    ///
347    /// Borrows each message rather than taking a slice of owned values so the
348    /// caller can pass a filtered view (`Vec<&Message>`) without cloning.
349    fn format_messages_for_summary<'a>(messages: impl IntoIterator<Item = &'a Message>) -> String {
350        let mut output = String::new();
351
352        for message in messages {
353            let role = match message.role {
354                Role::User => "User",
355                Role::Assistant => "Assistant",
356            };
357
358            let _ = write!(output, "{role}: ");
359
360            match &message.content {
361                Content::Text(text) => {
362                    let _ = writeln!(output, "{text}");
363                }
364                Content::Blocks(blocks) => {
365                    for block in blocks {
366                        match block {
367                            ContentBlock::Text { text } => {
368                                let _ = writeln!(output, "{text}");
369                            }
370                            ContentBlock::Thinking { thinking, .. } => {
371                                // Include thinking in summaries for context
372                                let _ = writeln!(output, "[Thinking: {thinking}]");
373                            }
374                            ContentBlock::RedactedThinking { .. } => {
375                                let _ = writeln!(output, "[Redacted thinking]");
376                            }
377                            ContentBlock::ToolUse { name, input, .. } => {
378                                let _ = writeln!(
379                                    output,
380                                    "[Called tool: {name} with input: {}]",
381                                    serde_json::to_string(input).unwrap_or_default()
382                                );
383                            }
384                            ContentBlock::ToolResult {
385                                content, is_error, ..
386                            } => {
387                                let status = if is_error.unwrap_or(false) {
388                                    "error"
389                                } else {
390                                    "success"
391                                };
392                                // Truncate long tool results (Unicode-safe; avoid slicing mid-codepoint)
393                                let truncated = if content.chars().count() > MAX_TOOL_RESULT_CHARS {
394                                    let prefix: String =
395                                        content.chars().take(MAX_TOOL_RESULT_CHARS).collect();
396                                    format!("{prefix}... (truncated)")
397                                } else {
398                                    content.clone()
399                                };
400                                let _ = writeln!(output, "[Tool result ({status}): {truncated}]");
401                            }
402                            ContentBlock::Image { source } => {
403                                let _ = writeln!(output, "[Image: {}]", source.media_type);
404                            }
405                            ContentBlock::Document { source } => {
406                                let _ = writeln!(output, "[Document: {}]", source.media_type);
407                            }
408                            // `ContentBlock` is `#[non_exhaustive]`; render an
409                            // unknown future block kind with a generic marker.
410                            _ => {
411                                let _ = writeln!(output, "[Unrecognized content block]");
412                            }
413                        }
414                    }
415                }
416            }
417            output.push('\n');
418        }
419
420        output
421    }
422
423    /// Build the summarization prompt.
424    ///
425    /// When `prior_summaries` is non-empty (a re-compaction is folding earlier
426    /// summaries back in), their prose is prepended as a labeled section so the
427    /// model preserves and subsumes those facts in the new summary rather than
428    /// losing all pre-first-compaction context.
429    fn build_summary_prompt(&self, prior_summaries: &[String], messages_text: &str) -> String {
430        let base = format!(
431            "{}{}{}",
432            self.summary_prompt_prefix, messages_text, self.summary_prompt_suffix
433        );
434
435        if prior_summaries.is_empty() {
436            return base;
437        }
438
439        let prior = prior_summaries.join("\n\n");
440        format!(
441            "Previous summary of earlier conversation. Preserve every fact below \
442             in your new summary so no earlier context is lost:\n{prior}\n\n{base}"
443        )
444    }
445
446    /// Run a single summarization LLM call, returning the summary text and
447    /// whether the response was truncated by the `max_tokens` budget.
448    async fn run_summarization(&self, prompt: String, max_tokens: usize) -> Result<(String, bool)> {
449        let request = ChatRequest {
450            system: self.system_prompt.clone(),
451            messages: vec![Message::user(prompt)],
452            tools: None,
453            max_tokens: u32::try_from(max_tokens).unwrap_or(u32::MAX),
454            max_tokens_explicit: true,
455            session_id: None,
456            cached_content: None,
457            thinking: None,
458            tool_choice: None,
459            response_format: None,
460            cache: None,
461        };
462
463        let outcome = self
464            .provider
465            .chat(request)
466            .await
467            .context("Failed to call LLM for summarization")?;
468
469        match outcome {
470            ChatOutcome::Success(response) => {
471                let truncated = response.stop_reason == Some(StopReason::MaxTokens);
472                let text = response
473                    .first_text()
474                    .map(String::from)
475                    .context("No text in summarization response")?;
476                Ok((text, truncated))
477            }
478            ChatOutcome::RateLimited(_) => {
479                bail!("Rate limited during summarization")
480            }
481            ChatOutcome::InvalidRequest(msg) => {
482                bail!("Invalid request during summarization: {msg}")
483            }
484            ChatOutcome::ServerError(msg) => {
485                bail!("Server error during summarization: {msg}")
486            }
487            // `ChatOutcome` is `#[non_exhaustive]`; an unrecognized outcome
488            // fails the summarization rather than returning an empty summary.
489            _ => {
490                bail!("Unrecognized provider outcome during summarization")
491            }
492        }
493    }
494}
495
496#[async_trait]
497impl<P: LlmProvider + ?Sized> ContextCompactor for LlmContextCompactor<P> {
498    async fn compact(&self, messages: &[Message]) -> Result<String> {
499        // Separate prior compaction summaries (whose prose must be carried
500        // forward) from fresh messages (which still need summarizing). Prior
501        // summaries used to be filtered out and silently dropped, destroying
502        // all context from before the previous compaction.
503        let mut prior_summaries: Vec<String> = Vec::new();
504        let mut fresh: Vec<&Message> = Vec::new();
505        for message in messages {
506            if let Some(text) = Self::extract_summary_text(&message.content) {
507                if !text.is_empty() {
508                    prior_summaries.push(text);
509                }
510            } else {
511                fresh.push(message);
512            }
513        }
514
515        // Nothing fresh to summarize: carry prior summaries forward verbatim
516        // (no LLM call needed) rather than discarding them.
517        if fresh.is_empty() {
518            if prior_summaries.is_empty() {
519                return Ok(COMPACT_EMPTY_SUMMARY.to_string());
520            }
521            return Ok(prior_summaries.join("\n\n"));
522        }
523
524        let messages_text = Self::format_messages_for_summary(fresh.iter().copied());
525        let prompt = self.build_summary_prompt(&prior_summaries, &messages_text);
526
527        let budget = self.config.summary_max_tokens;
528        let (mut summary, truncated) = self.run_summarization(prompt.clone(), budget).await?;
529
530        if truncated {
531            log::warn!(
532                "compaction summary hit the max_tokens budget ({budget}); \
533                 retrying with a larger budget to avoid silent context loss"
534            );
535            let (retry_summary, still_truncated) = self
536                .run_summarization(prompt, budget.saturating_mul(2))
537                .await?;
538            summary = retry_summary;
539            if still_truncated {
540                log::warn!(
541                    "compaction summary still truncated after retry; appending a \
542                     truncation marker so downstream context loss is visible"
543                );
544                summary.push_str(TRUNCATED_SUMMARY_MARKER);
545            }
546        }
547
548        Ok(summary)
549    }
550
551    fn estimate_tokens(&self, messages: &[Message]) -> usize {
552        TokenEstimator::estimate_history(messages)
553    }
554
555    fn needs_compaction(&self, messages: &[Message]) -> bool {
556        if !self.config.auto_compact {
557            return false;
558        }
559
560        if messages.len() < self.config.min_messages_for_compaction {
561            return false;
562        }
563
564        let estimated_tokens = self.estimate_tokens(messages);
565        estimated_tokens > self.config.threshold_tokens
566    }
567
568    async fn compact_history(&self, mut messages: Vec<Message>) -> Result<CompactionResult> {
569        let original_count = messages.len();
570        let original_tokens = self.estimate_tokens(&messages);
571
572        // Ensure we have enough messages to compact
573        if messages.len() <= self.config.retain_recent {
574            return Ok(CompactionResult {
575                messages,
576                original_count,
577                new_count: original_count,
578                original_tokens,
579                new_tokens: original_tokens,
580            });
581        }
582
583        // Split messages: old messages to summarize, recent messages to keep
584        let mut split_point = messages.len().saturating_sub(self.config.retain_recent);
585        split_point = Self::split_point_preserves_tool_pairs_with_cap(
586            &messages,
587            split_point,
588            self.config.max_retained_tail_tokens,
589        );
590
591        // Move the retained tail out of `messages` so it doesn't have to be
592        // cloned: `messages` then holds exactly the slice to summarize.
593        let to_keep = messages.split_off(split_point);
594        let to_summarize = messages;
595
596        // Summarize old messages
597        let summary = self.compact(&to_summarize).await?;
598
599        // Build new message history
600        let mut new_messages = Vec::with_capacity(2 + to_keep.len());
601
602        // Add summary as a user message
603        new_messages.push(Message::user(format!("{SUMMARY_PREFIX}{summary}")));
604
605        // Add acknowledgment from assistant only when some recent tail remains.
606        // If compaction drops the entire retained tail due to the token cap, ending
607        // the request with this synthetic assistant message would act like assistant
608        // prefill and Anthropic rejects that shape.
609        if !to_keep.is_empty() {
610            new_messages.push(Message::assistant(SUMMARY_ACKNOWLEDGMENT));
611        }
612
613        // Add recent messages. `to_keep` is guaranteed self-consistent
614        // by `split_point_preserves_tool_pairs_with_cap` (steps 2 and
615        // 3): any orphan `tool_result` was either folded into the
616        // summary (split shifted forward) or paired with its
617        // `tool_use` inside `to_keep` (split shifted backward). No
618        // post-hoc rewriting of the assembled output is required.
619        // The tail is moved (not cloned) since `compact_history` owns it.
620        new_messages.extend(to_keep);
621
622        let new_count = new_messages.len();
623        let new_tokens = self.estimate_tokens(&new_messages);
624
625        Ok(CompactionResult {
626            messages: new_messages,
627            original_count,
628            new_count,
629            original_tokens,
630            new_tokens,
631        })
632    }
633}
634
635#[cfg(test)]
636mod tests {
637    use super::*;
638    use crate::llm::{ChatResponse, StopReason, Usage};
639    use std::sync::Mutex;
640
641    struct MockProvider {
642        summary_response: String,
643        requests: Arc<Mutex<Vec<String>>>,
644        /// When true, the summary echoes the received prompt (simulating an LLM
645        /// that faithfully preserves its input — used to assert carry-forward).
646        echo_input: bool,
647        /// `stop_reason` returned by the mock; `MaxTokens` simulates truncation.
648        stop_reason: StopReason,
649    }
650
651    impl MockProvider {
652        fn build(
653            summary: &str,
654            requests: Arc<Mutex<Vec<String>>>,
655            echo_input: bool,
656            stop_reason: StopReason,
657        ) -> Self {
658            Self {
659                summary_response: summary.to_string(),
660                requests,
661                echo_input,
662                stop_reason,
663            }
664        }
665
666        fn new(summary: &str) -> Self {
667            Self::build(
668                summary,
669                Arc::new(Mutex::new(Vec::new())),
670                false,
671                StopReason::EndTurn,
672            )
673        }
674
675        fn new_with_request_log(summary: &str, requests: Arc<Mutex<Vec<String>>>) -> Self {
676            Self::build(summary, requests, false, StopReason::EndTurn)
677        }
678
679        /// A provider whose summary echoes the received prompt verbatim.
680        fn new_echo(requests: Arc<Mutex<Vec<String>>>) -> Self {
681            Self::build("", requests, true, StopReason::EndTurn)
682        }
683
684        /// A provider that always reports `MaxTokens` (a truncated summary).
685        fn new_truncating(summary: &str, requests: Arc<Mutex<Vec<String>>>) -> Self {
686            Self::build(summary, requests, false, StopReason::MaxTokens)
687        }
688
689        fn user_prompt_of(request: &ChatRequest) -> String {
690            request
691                .messages
692                .iter()
693                .find_map(|message| match &message.content {
694                    Content::Text(text) => Some(text.clone()),
695                    Content::Blocks(blocks) => {
696                        let text = blocks
697                            .iter()
698                            .filter_map(|block| {
699                                if let ContentBlock::Text { text } = block {
700                                    Some(text.as_str())
701                                } else {
702                                    None
703                                }
704                            })
705                            .collect::<Vec<_>>()
706                            .join("\n");
707                        if text.is_empty() { None } else { Some(text) }
708                    }
709                })
710                .unwrap_or_default()
711        }
712    }
713
714    #[async_trait]
715    impl LlmProvider for MockProvider {
716        async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
717            let user_prompt = Self::user_prompt_of(&request);
718            if let Ok(mut entries) = self.requests.lock() {
719                entries.push(user_prompt.clone());
720            }
721            let text = if self.echo_input {
722                user_prompt
723            } else {
724                self.summary_response.clone()
725            };
726            Ok(ChatOutcome::Success(ChatResponse {
727                id: "test".to_string(),
728                content: vec![ContentBlock::Text { text }],
729                model: "mock".to_string(),
730                stop_reason: Some(self.stop_reason),
731                usage: Usage {
732                    input_tokens: 100,
733                    output_tokens: 50,
734                    cached_input_tokens: 0,
735                    cache_creation_input_tokens: 0,
736                },
737            }))
738        }
739
740        fn model(&self) -> &'static str {
741            "mock-model"
742        }
743
744        fn provider(&self) -> &'static str {
745            "mock"
746        }
747    }
748
749    #[test]
750    fn test_needs_compaction_below_threshold() {
751        let provider = Arc::new(MockProvider::new("summary"));
752        let config = CompactionConfig::default()
753            .with_threshold_tokens(10_000)
754            .with_min_messages(5);
755        let compactor = LlmContextCompactor::new(provider, config);
756
757        // Only 3 messages, below min_messages
758        let messages = vec![
759            Message::user("Hello"),
760            Message::assistant("Hi"),
761            Message::user("How are you?"),
762        ];
763
764        assert!(!compactor.needs_compaction(&messages));
765    }
766
767    #[test]
768    fn test_needs_compaction_above_threshold() {
769        let provider = Arc::new(MockProvider::new("summary"));
770        let config = CompactionConfig::default()
771            .with_threshold_tokens(50) // Very low threshold
772            .with_min_messages(3);
773        let compactor = LlmContextCompactor::new(provider, config);
774
775        // Messages that exceed threshold
776        let messages = vec![
777            Message::user("Hello, this is a longer message to test compaction"),
778            Message::assistant(
779                "Hi there! This is also a longer response to help trigger compaction",
780            ),
781            Message::user("Great, let's continue with even more text here"),
782            Message::assistant("Absolutely, adding more content to ensure we exceed the threshold"),
783        ];
784
785        assert!(compactor.needs_compaction(&messages));
786    }
787
788    #[test]
789    fn test_needs_compaction_auto_disabled() {
790        let provider = Arc::new(MockProvider::new("summary"));
791        let config = CompactionConfig::default()
792            .with_threshold_tokens(10) // Very low
793            .with_min_messages(1)
794            .with_auto_compact(false);
795        let compactor = LlmContextCompactor::new(provider, config);
796
797        let messages = vec![
798            Message::user("Hello, this is a longer message"),
799            Message::assistant("Response here"),
800        ];
801
802        assert!(!compactor.needs_compaction(&messages));
803    }
804
805    #[tokio::test]
806    async fn test_compact_history() -> Result<()> {
807        let provider = Arc::new(MockProvider::new(
808            "User asked about Rust programming. Assistant explained ownership, borrowing, and lifetimes.",
809        ));
810        let config = CompactionConfig::default()
811            .with_retain_recent(2)
812            .with_min_messages(3);
813        let compactor = LlmContextCompactor::new(provider, config);
814
815        // Use longer messages to ensure compaction actually reduces tokens
816        let messages = vec![
817            Message::user(
818                "What is Rust? I've heard it's a systems programming language but I don't know much about it. Can you explain the key features and why people are excited about it?",
819            ),
820            Message::assistant(
821                "Rust is a systems programming language focused on safety, speed, and concurrency. It achieves memory safety without garbage collection through its ownership system. The key features include zero-cost abstractions, guaranteed memory safety, threads without data races, and minimal runtime.",
822            ),
823            Message::user(
824                "Tell me about ownership in detail. How does it work and what are the rules? I want to understand this core concept thoroughly.",
825            ),
826            Message::assistant(
827                "Ownership is Rust's central feature with three rules: each value has one owner, only one owner at a time, and the value is dropped when owner goes out of scope. This system prevents memory leaks, double frees, and dangling pointers at compile time.",
828            ),
829            Message::user("What about borrowing?"), // Keep
830            Message::assistant("Borrowing allows references to data without taking ownership."), // Keep
831        ];
832
833        let result = compactor.compact_history(messages).await?;
834
835        // Should have: summary message + ack + 2 recent messages = 4
836        assert_eq!(result.new_count, 4);
837        assert_eq!(result.original_count, 6);
838
839        // With longer original messages, compaction should reduce tokens
840        assert!(
841            result.new_tokens < result.original_tokens,
842            "Expected fewer tokens after compaction: new={} < original={}",
843            result.new_tokens,
844            result.original_tokens
845        );
846
847        // First message should be the summary
848        if let Content::Text(text) = &result.messages[0].content {
849            assert!(text.contains("Previous conversation summary"));
850        }
851
852        Ok(())
853    }
854
855    #[tokio::test]
856    async fn test_compact_history_too_few_messages() -> Result<()> {
857        let provider = Arc::new(MockProvider::new("summary"));
858        let config = CompactionConfig::default().with_retain_recent(5);
859        let compactor = LlmContextCompactor::new(provider, config);
860
861        // Only 3 messages, less than retain_recent
862        let messages = vec![
863            Message::user("Hello"),
864            Message::assistant("Hi"),
865            Message::user("Bye"),
866        ];
867
868        let result = compactor.compact_history(messages.clone()).await?;
869
870        // Should return original messages unchanged
871        assert_eq!(result.new_count, 3);
872        assert_eq!(result.messages.len(), 3);
873
874        Ok(())
875    }
876
877    #[test]
878    fn test_format_messages_for_summary() {
879        let messages = vec![Message::user("Hello"), Message::assistant("Hi there!")];
880
881        let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
882
883        assert!(formatted.contains("User: Hello"));
884        assert!(formatted.contains("Assistant: Hi there!"));
885    }
886
887    #[test]
888    fn test_format_messages_for_summary_truncates_tool_results_unicode_safely() {
889        let long_unicode = "é".repeat(600);
890
891        let messages = vec![Message {
892            role: Role::Assistant,
893            content: Content::Blocks(vec![ContentBlock::ToolResult {
894                tool_use_id: "tool-1".to_string(),
895                content: long_unicode,
896                is_error: Some(false),
897            }]),
898        }];
899
900        let formatted = LlmContextCompactor::<MockProvider>::format_messages_for_summary(&messages);
901
902        assert!(formatted.contains("... (truncated)"));
903    }
904
905    #[tokio::test]
906    async fn test_compact_carries_prior_summary_into_request() -> Result<()> {
907        // A prior compaction summary must be carried forward into the
908        // summarization input (not silently filtered out), so its facts are
909        // preserved across re-compaction. The fresh message is summarized as
910        // usual; the prior summary is included as a "Previous summary" section.
911        let requests = Arc::new(Mutex::new(Vec::new()));
912        let provider = Arc::new(MockProvider::new_with_request_log(
913            "Fresh summary",
914            requests.clone(),
915        ));
916        let config = CompactionConfig::default().with_min_messages(1);
917        let compactor = LlmContextCompactor::new(provider, config);
918
919        let messages = vec![
920            Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
921            Message::assistant("Continue with the next task using this context."),
922        ];
923
924        let summary = compactor.compact(&messages).await?;
925
926        let recorded = requests
927            .lock()
928            .map_err(|_| anyhow::anyhow!("request log poisoned"))?;
929        assert_eq!(recorded.len(), 1);
930        // The new summary is the LLM's output; the prior summary lives in the
931        // request, where a real model subsumes it into the new summary.
932        assert_eq!(summary, "Fresh summary");
933        assert!(recorded[0].contains("Continue with the next task using this context."));
934        assert!(
935            recorded[0].contains("already compacted context"),
936            "prior summary must be carried into the summarization input"
937        );
938        drop(recorded);
939
940        Ok(())
941    }
942
943    #[tokio::test]
944    async fn test_compact_history_carries_prior_summary_in_candidate_payload() -> Result<()> {
945        let requests = Arc::new(Mutex::new(Vec::new()));
946        let provider = Arc::new(MockProvider::new_with_request_log(
947            "Fresh history summary",
948            requests.clone(),
949        ));
950        let config = CompactionConfig::default()
951            .with_retain_recent(2)
952            .with_min_messages(1);
953        let compactor = LlmContextCompactor::new(provider, config);
954
955        let messages = vec![
956            Message::user(format!("{SUMMARY_PREFIX}already compacted context")),
957            Message::assistant("Current turn content from the latest exchange."),
958            Message::assistant("Recent message that should stay."),
959            Message::user("Newest note that should stay."),
960        ];
961
962        let result = compactor.compact_history(messages).await?;
963
964        let recorded = requests
965            .lock()
966            .map_err(|_| anyhow::anyhow!("request log poisoned"))?;
967        assert_eq!(recorded.len(), 1);
968        assert!(recorded[0].contains("Current turn content from the latest exchange."));
969        // The prior summary is carried into the summarization input rather than
970        // being silently discarded.
971        assert!(
972            recorded[0].contains("already compacted context"),
973            "prior summary content must reach the summarizer"
974        );
975        drop(recorded);
976        assert_eq!(result.new_count, 4);
977
978        Ok(())
979    }
980
981    #[tokio::test]
982    async fn test_compact_history_carries_summaries_forward_when_window_has_only_summaries()
983    -> Result<()> {
984        let requests = Arc::new(Mutex::new(Vec::new()));
985        let provider = Arc::new(MockProvider::new_with_request_log(
986            "This summary should not be used",
987            requests.clone(),
988        ));
989        let config = CompactionConfig::default()
990            .with_retain_recent(2)
991            .with_min_messages(1);
992        let compactor = LlmContextCompactor::new(provider, config);
993
994        let messages = vec![
995            Message::user(format!("{SUMMARY_PREFIX}first prior compacted section")),
996            Message::assistant(format!("{SUMMARY_PREFIX}second prior compacted section")),
997            Message::user(format!("{SUMMARY_PREFIX}third prior compacted section")),
998            Message::assistant("final short note"),
999        ];
1000
1001        let result = compactor.compact_history(messages).await?;
1002
1003        // No fresh content in the candidate window -> no LLM call is made, but
1004        // the prior summaries must be carried forward verbatim, NOT replaced
1005        // with an empty-summary placeholder (which used to destroy context).
1006        let recorded = requests
1007            .lock()
1008            .map_err(|_| anyhow::anyhow!("request log poisoned"))?;
1009        assert!(recorded.is_empty());
1010        drop(recorded);
1011        assert_eq!(result.new_count, 4);
1012        assert_eq!(result.messages.len(), 4);
1013
1014        if let Content::Text(text) = &result.messages[0].content {
1015            assert!(
1016                text.contains("first prior compacted section"),
1017                "first prior summary lost"
1018            );
1019            assert!(
1020                text.contains("second prior compacted section"),
1021                "second prior summary lost"
1022            );
1023            assert!(!text.contains(COMPACT_EMPTY_SUMMARY));
1024        } else {
1025            panic!("Expected summary text in first message");
1026        }
1027
1028        Ok(())
1029    }
1030
1031    #[tokio::test]
1032    async fn test_compact_history_preserves_tool_use_tool_result_pairs() -> Result<()> {
1033        let provider = Arc::new(MockProvider::new("Summary of earlier conversation."));
1034        let config = CompactionConfig::default()
1035            .with_retain_recent(2)
1036            .with_min_messages(3);
1037        let compactor = LlmContextCompactor::new(provider, config);
1038
1039        // Build a history where the split_point (len - retain_recent = 5 - 2 = 3)
1040        // would land exactly on the user tool_result message at index 3,
1041        // which would orphan it from its assistant tool_use at index 2.
1042        let messages = vec![
1043            // index 0: user
1044            Message::user("What files are in the project?"),
1045            // index 1: assistant text
1046            Message::assistant("Let me check that for you."),
1047            // index 2: assistant with tool_use
1048            Message {
1049                role: Role::Assistant,
1050                content: Content::Blocks(vec![ContentBlock::ToolUse {
1051                    id: "tool_1".to_string(),
1052                    name: "list_files".to_string(),
1053                    input: serde_json::json!({}),
1054                    thought_signature: None,
1055                }]),
1056            },
1057            // index 3: user with tool_result (naive split would land here)
1058            Message {
1059                role: Role::User,
1060                content: Content::Blocks(vec![ContentBlock::ToolResult {
1061                    tool_use_id: "tool_1".to_string(),
1062                    content: "file1.rs\nfile2.rs".to_string(),
1063                    is_error: None,
1064                }]),
1065            },
1066            // index 4: assistant final response
1067            Message::assistant("The project contains file1.rs and file2.rs."),
1068        ];
1069
1070        let result = compactor.compact_history(messages).await?;
1071
1072        // The split_point should have been adjusted back from 3 to 2,
1073        // so to_keep includes: [assistant tool_use, user tool_result, assistant response]
1074        // Plus summary + ack = 5 total
1075        assert_eq!(result.new_count, 5);
1076
1077        // Verify the kept messages include the tool_use/tool_result pair
1078        // After summary + ack, the third message should be the assistant with tool_use
1079        let kept_assistant = &result.messages[2];
1080        if let Content::Blocks(blocks) = &kept_assistant.content {
1081            assert!(
1082                blocks
1083                    .iter()
1084                    .any(|b| matches!(b, ContentBlock::ToolUse { .. })),
1085                "Expected assistant tool_use in kept messages"
1086            );
1087        } else {
1088            panic!("Expected Blocks content for assistant tool_use message");
1089        }
1090
1091        // The fourth message should be the user tool_result
1092        let kept_user = &result.messages[3];
1093        if let Content::Blocks(blocks) = &kept_user.content {
1094            assert!(
1095                blocks
1096                    .iter()
1097                    .any(|b| matches!(b, ContentBlock::ToolResult { .. })),
1098                "Expected user tool_result in kept messages"
1099            );
1100        } else {
1101            panic!("Expected Blocks content for user tool_result message");
1102        }
1103
1104        Ok(())
1105    }
1106
1107    #[tokio::test]
1108    async fn test_compact_history_split_skips_leading_orphan_after_summary_ack() -> Result<()> {
1109        // The user-visible bug at M7.5: a previously
1110        // compacted history was re-compacted in a later turn. The
1111        // first compaction left
1112        // `[summary, summary_ack, user(tool_result toolu_X),
1113        //  assistant(toolu_X reply), ...]`. On the second pass the
1114        // default `split_point` (len - retain_recent = 5 - 3 = 2)
1115        // would have made `to_keep[0] == user(tool_result toolu_X)`,
1116        // and the synthetic `[summary, summary_ack, …]` prefix the
1117        // compactor inserts in front of `to_keep` has no `tool_use`
1118        // blocks — so the next request to Anthropic blew up with
1119        // `messages.2.content.0: unexpected tool_use_id`.
1120        //
1121        // Pair-preservation alone can't fix this: it only inspects
1122        // the immediate prev/next pair (here `summary_ack` vs
1123        // `user(tool_result)`) and `summary_ack` is text-only, so the
1124        // pair check sees no `tool_use` to anchor on and lets the
1125        // orphan through. The chain-safety pass added in
1126        // `split_point_preserves_tool_pairs_with_cap` step 3 walks
1127        // the candidate forward past any leading orphan, so the
1128        // `tool_result` lands in `to_summarize` and gets folded into
1129        // the summary's prose where it's harmless.
1130        //
1131        // The assertion is structural, not block-counting: every
1132        // surviving `tool_result` must reference a `tool_use` that
1133        // appears earlier in the new message list. No
1134        // post-compaction stripping is involved — the split point
1135        // alone is responsible for chain integrity.
1136        let provider = Arc::new(MockProvider::new("Re-summary."));
1137        let config = CompactionConfig::default()
1138            .with_retain_recent(3)
1139            .with_min_messages(1);
1140        let compactor = LlmContextCompactor::new(provider, config);
1141
1142        let messages = vec![
1143            Message::user(format!("{SUMMARY_PREFIX}Old summary about toolu_X.")),
1144            Message::assistant(SUMMARY_ACKNOWLEDGMENT),
1145            Message {
1146                role: Role::User,
1147                content: Content::Blocks(vec![ContentBlock::ToolResult {
1148                    tool_use_id: "toolu_X".to_string(),
1149                    content: "result for X".to_string(),
1150                    is_error: None,
1151                }]),
1152            },
1153            Message::assistant("Result interpreted."),
1154            Message::user("Now what?"),
1155        ];
1156
1157        let result = compactor.compact_history(messages).await?;
1158
1159        let mut seen_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
1160        for msg in &result.messages {
1161            if let Content::Blocks(blocks) = &msg.content {
1162                for block in blocks {
1163                    match block {
1164                        ContentBlock::ToolResult { tool_use_id, .. } => {
1165                            assert!(
1166                                seen_ids.contains(tool_use_id),
1167                                "orphan tool_use_id {tool_use_id} survived split selection",
1168                            );
1169                        }
1170                        ContentBlock::ToolUse { id, .. } => {
1171                            seen_ids.insert(id.clone());
1172                        }
1173                        _ => {}
1174                    }
1175                }
1176            }
1177        }
1178
1179        Ok(())
1180    }
1181
1182    #[tokio::test]
1183    async fn test_compact_history_keeps_tool_pair_when_immediate_prev_is_text_only() -> Result<()> {
1184        // Tighter regression for the chain-safety boundary: even
1185        // when the message *before* the candidate split point is
1186        // text-only (so pair-preservation has nothing to anchor on),
1187        // chain-safety must shift the split forward past a leading
1188        // `user(tool_result)` whose `tool_use` would otherwise be
1189        // folded into the summary.
1190        let provider = Arc::new(MockProvider::new("Boundary summary."));
1191        let config = CompactionConfig::default()
1192            .with_retain_recent(2)
1193            .with_min_messages(1);
1194        let compactor = LlmContextCompactor::new(provider, config);
1195
1196        // Layout (5 messages, retain_recent=2 → initial split=3):
1197        //   0: user("first turn") — to_summarize
1198        //   1: assistant("text only") — to_summarize, immediate prev
1199        //   2: user(tool_result toolu_Y) — orphan in default to_keep
1200        //   3: assistant("then a reply")
1201        //   4: user("ok thanks")
1202        //
1203        // The corresponding `tool_use` for toolu_Y was lost long
1204        // ago — there's no `tool_use` anywhere in `messages`. With
1205        // pair-preservation alone, `to_keep` would start at index 3
1206        // (or 2 unshifted), leaving the orphan at the head and
1207        // tripping Anthropic.
1208        let messages = vec![
1209            Message::user("first turn"),
1210            Message::assistant("text only"),
1211            Message {
1212                role: Role::User,
1213                content: Content::Blocks(vec![ContentBlock::ToolResult {
1214                    tool_use_id: "toolu_Y".to_string(),
1215                    content: "ancient result".to_string(),
1216                    is_error: None,
1217                }]),
1218            },
1219            Message::assistant("then a reply"),
1220            Message::user("ok thanks"),
1221        ];
1222
1223        let result = compactor.compact_history(messages).await?;
1224
1225        // No tool_result block survives anywhere — the only one in
1226        // input was orphaned and the split-shift folded it into the
1227        // summary.
1228        let has_tool_result = result.messages.iter().any(|m| {
1229            matches!(
1230                &m.content,
1231                Content::Blocks(blocks)
1232                    if blocks.iter().any(|b| matches!(b, ContentBlock::ToolResult { .. }))
1233            )
1234        });
1235        assert!(
1236            !has_tool_result,
1237            "orphan tool_result should have been pushed into to_summarize, not retained",
1238        );
1239
1240        Ok(())
1241    }
1242
1243    #[tokio::test]
1244    async fn test_compact_history_retained_tail_is_token_capped() -> Result<()> {
1245        let provider = Arc::new(MockProvider::new(
1246            "Project summary with a long context and technical context.",
1247        ));
1248        let config = CompactionConfig::default()
1249            .with_retain_recent(8)
1250            .with_min_messages(1)
1251            .with_threshold_tokens(1);
1252        let compactor = LlmContextCompactor::new(provider, config);
1253
1254        let mut messages = Vec::new();
1255
1256        // Older messages that will be summarized away.
1257        messages.extend((0..6).map(|index| Message::user(format!("pre-compaction noise {index}"))));
1258
1259        // Newer long messages: intentionally large to force retained-tail truncation.
1260        messages.extend(
1261            (0..8).map(|index| Message::assistant(format!("kept-{index}: {}", "x".repeat(12_000)))),
1262        );
1263
1264        let result = compactor.compact_history(messages).await?;
1265
1266        // The retained tail should be token capped and therefore shorter than retain_recent.
1267        let retained_tail = &result.messages[2..];
1268        assert!(retained_tail.len() < 8);
1269
1270        let mut latest_index = -1i32;
1271        let mut all_retained = true;
1272        for message in retained_tail {
1273            if let Content::Text(text) = &message.content {
1274                if let Some(number) = text.split(':').next().and_then(|prefix| {
1275                    prefix
1276                        .strip_prefix("kept-")
1277                        .and_then(|rest| rest.parse::<i32>().ok())
1278                }) {
1279                    if number >= 0 {
1280                        latest_index = latest_index.max(number);
1281                    }
1282                } else {
1283                    all_retained = false;
1284                }
1285            } else {
1286                all_retained = false;
1287            }
1288        }
1289
1290        assert!(all_retained);
1291        assert_eq!(latest_index, 7);
1292        assert!(
1293            TokenEstimator::estimate_history(retained_tail)
1294                <= compactor.config().max_retained_tail_tokens
1295        );
1296        assert!(compactor.needs_compaction(&result.messages));
1297
1298        Ok(())
1299    }
1300
1301    #[tokio::test]
1302    async fn test_compact_history_skips_summary_ack_when_retained_tail_is_empty() -> Result<()> {
1303        let provider = Arc::new(MockProvider::new("Summary for oversized user turn."));
1304        let config = CompactionConfig::default()
1305            .with_retain_recent(1)
1306            .with_min_messages(1)
1307            .with_threshold_tokens(1);
1308        let compactor = LlmContextCompactor::new(provider, config);
1309
1310        let messages = vec![
1311            Message::assistant("Earlier assistant context."),
1312            Message::user(format!("oversized-user-turn: {}", "x".repeat(200_000))),
1313        ];
1314
1315        let result = compactor.compact_history(messages).await?;
1316
1317        assert_eq!(result.new_count, 1);
1318        assert_eq!(result.messages.len(), 1);
1319
1320        let only_message = &result.messages[0];
1321        assert_eq!(only_message.role, Role::User);
1322
1323        if let Content::Text(text) = &only_message.content {
1324            assert!(text.contains("Previous conversation summary"));
1325            assert!(!text.contains(SUMMARY_ACKNOWLEDGMENT));
1326        } else {
1327            panic!("Expected summary text when retained tail is empty");
1328        }
1329
1330        Ok(())
1331    }
1332
1333    fn message_contains(message: &Message, needle: &str) -> bool {
1334        match &message.content {
1335            Content::Text(text) => text.contains(needle),
1336            Content::Blocks(blocks) => blocks.iter().any(|block| match block {
1337                ContentBlock::Text { text } => text.contains(needle),
1338                _ => false,
1339            }),
1340        }
1341    }
1342
1343    #[tokio::test]
1344    async fn test_epoch_one_facts_survive_two_compactions() -> Result<()> {
1345        // Regression for re-compaction data loss: a fact recorded before the
1346        // first compaction must still be present in the history after a second
1347        // compaction. The echo provider models a faithful LLM that preserves
1348        // its input; the bug was that the first summary (carrying the epoch-1
1349        // fact) was filtered out of the second summarization and dropped.
1350        const EPOCH1_FACT: &str = "EPOCH1_FACT: the API key lives in config/secrets.toml";
1351
1352        let requests = Arc::new(Mutex::new(Vec::new()));
1353        let provider = Arc::new(MockProvider::new_echo(requests.clone()));
1354        let config = CompactionConfig::default()
1355            .with_retain_recent(2)
1356            .with_min_messages(1);
1357        let compactor = LlmContextCompactor::new(provider, config);
1358
1359        let epoch1 = vec![
1360            Message::user(EPOCH1_FACT),
1361            Message::assistant("Understood, noted the secrets path."),
1362            Message::user("Now add error handling to main.rs."),
1363            Message::assistant("Added error handling to main.rs."),
1364            Message::user("latest user message one"),
1365            Message::assistant("latest assistant message two"),
1366        ];
1367
1368        let first = compactor.compact_history(epoch1).await?;
1369        assert!(
1370            first
1371                .messages
1372                .iter()
1373                .any(|m| message_contains(m, "EPOCH1_FACT")),
1374            "epoch-1 fact must be captured in the first summary"
1375        );
1376
1377        // Build the epoch-2 history on top of the first compaction's output.
1378        let mut epoch2 = first.messages;
1379        epoch2.push(Message::user("Another later turn."));
1380        epoch2.push(Message::assistant("Reply to the later turn."));
1381        epoch2.push(Message::user("Final turn a."));
1382        epoch2.push(Message::assistant("Final turn b."));
1383
1384        let second = compactor.compact_history(epoch2).await?;
1385
1386        assert!(
1387            second
1388                .messages
1389                .iter()
1390                .any(|m| message_contains(m, "EPOCH1_FACT")),
1391            "epoch-1 fact must survive the second compaction"
1392        );
1393
1394        // Sanity: the second compaction actually summarized (made an LLM call
1395        // on the prior summary), so this is a true re-compaction path.
1396        let recorded = requests
1397            .lock()
1398            .map_err(|_| anyhow::anyhow!("request log poisoned"))?;
1399        assert!(
1400            recorded.iter().any(|req| req.contains("EPOCH1_FACT")),
1401            "prior summary carrying the epoch-1 fact must reach the summarizer"
1402        );
1403        drop(recorded);
1404
1405        Ok(())
1406    }
1407
1408    #[tokio::test]
1409    async fn test_compact_history_long_tool_chain_respects_token_cap() -> Result<()> {
1410        // Regression for the pair-shift bug: in an unbroken tool chain, the
1411        // old second clause of `crosses_tool_pair` walked the split point back
1412        // through the entire chain, retaining everything and defeating the
1413        // token cap. With only the assistant(tool_use)->user(tool_result)
1414        // boundary unsplittable, the retained tail stays bounded near the cap.
1415        let provider = Arc::new(MockProvider::new("Summary of the early tool chain."));
1416        let cap = 20_000;
1417        // retain_recent asks to keep many messages, but the cap must override
1418        // it. retain_recent < message count so we don't hit the early return.
1419        let config = CompactionConfig::default()
1420            .with_retain_recent(18)
1421            .with_min_messages(1)
1422            .with_threshold_tokens(1)
1423            .with_max_retained_tail_tokens(cap);
1424        let compactor = LlmContextCompactor::new(provider, config);
1425
1426        // 10 alternating tool pairs (20 messages), each large enough that the
1427        // whole chain dwarfs the cap.
1428        let mut messages = Vec::new();
1429        for i in 0..10 {
1430            messages.push(Message {
1431                role: Role::Assistant,
1432                content: Content::Blocks(vec![ContentBlock::ToolUse {
1433                    id: format!("tool_{i}"),
1434                    name: "run".to_string(),
1435                    input: serde_json::json!({ "arg": "y".repeat(12_000) }),
1436                    thought_signature: None,
1437                }]),
1438            });
1439            messages.push(Message {
1440                role: Role::User,
1441                content: Content::Blocks(vec![ContentBlock::ToolResult {
1442                    tool_use_id: format!("tool_{i}"),
1443                    content: format!("result-{i}: {}", "z".repeat(12_000)),
1444                    is_error: None,
1445                }]),
1446            });
1447        }
1448
1449        let full_tokens = TokenEstimator::estimate_history(&messages);
1450        assert!(
1451            full_tokens > cap * 2,
1452            "test setup: full chain must far exceed the cap"
1453        );
1454
1455        let result = compactor.compact_history(messages).await?;
1456
1457        // The retained tail is non-empty, so the output is
1458        // [summary, ack, ...tail]; skip the synthetic summary + ack prefix.
1459        let retained_tail = &result.messages[2..];
1460
1461        let tail_tokens = TokenEstimator::estimate_history(retained_tail);
1462        // Bounded near the cap (soft cap allows one extra message from pair
1463        // preservation); crucially NOT the entire chain.
1464        assert!(
1465            tail_tokens <= cap + 8_000,
1466            "retained tail {tail_tokens} should be bounded by the cap {cap}, not the whole chain"
1467        );
1468        assert!(
1469            retained_tail.len() < 20,
1470            "compaction must have summarized part of the chain"
1471        );
1472
1473        Ok(())
1474    }
1475
1476    #[tokio::test]
1477    async fn test_compact_warns_and_marks_truncated_summary() -> Result<()> {
1478        // Regression for silent summary truncation: when the summarizer hits
1479        // MaxTokens, the compactor retries with a larger budget and, if still
1480        // truncated, appends a visible marker instead of silently accepting a
1481        // clipped summary.
1482        let requests = Arc::new(Mutex::new(Vec::new()));
1483        let provider = Arc::new(MockProvider::new_truncating(
1484            "partial summary cut off mid-",
1485            requests.clone(),
1486        ));
1487        let config = CompactionConfig::default().with_min_messages(1);
1488        let compactor = LlmContextCompactor::new(provider, config);
1489
1490        let messages = vec![
1491            Message::user("Some content that needs summarizing."),
1492            Message::assistant("More content to summarize here."),
1493        ];
1494
1495        let summary = compactor.compact(&messages).await?;
1496
1497        assert!(
1498            summary.contains("[summary truncated"),
1499            "a persistently truncated summary must carry a truncation marker"
1500        );
1501
1502        // The compactor retried once with a larger budget: two calls total.
1503        let recorded = requests
1504            .lock()
1505            .map_err(|_| anyhow::anyhow!("request log poisoned"))?;
1506        assert_eq!(recorded.len(), 2, "truncation should trigger one retry");
1507        drop(recorded);
1508
1509        Ok(())
1510    }
1511}