Skip to main content

deepstrike_core/context/
compression.rs

1use super::config::ContextConfig;
2use super::partitions::ContextPartitions;
3use super::pressure::PressureAction;
4use super::summarizer::Summarizer;
5use super::token_engine::ContextTokenEngine;
6use crate::types::message::{Content, ContentPart, Message};
7
8/// Compression result returned by every compactor.
9#[derive(Default)]
10pub struct CompressResult {
11    /// Tokens freed from the partition.
12    pub tokens_saved: u32,
13    /// Generated summary text if any.
14    pub summary: Option<String>,
15    /// Messages drained/archived from the context.
16    pub archived: Vec<Message>,
17    /// Cache-aware (W1-1 step 2 / DoD #4): the earliest history-message index this op rewrote or
18    /// removed — i.e. where it invalidates the prompt-cache prefix. `None` = prefix-safe (touched
19    /// nothing). The pipeline folds the minimum across stages and surfaces it on the observation.
20    pub prefix_invalidated_at: Option<usize>,
21}
22
23/// Compression strategy interface.
24pub trait Compressor: Send + Sync {
25    fn compress(
26        &self,
27        partitions: &mut ContextPartitions,
28        target_tokens: u32,
29        max_tokens: u32,
30        preserve_k: usize,
31        summarizer: &dyn Summarizer,
32        engine: &ContextTokenEngine,
33    ) -> CompressResult;
34}
35
36/// rho > snip_threshold: cap each oversized message at `per_msg_tokens`.
37pub struct SnipCompactor {
38    pub per_msg_ratio: f64,
39}
40
41impl Compressor for SnipCompactor {
42    fn compress(
43        &self,
44        partitions: &mut ContextPartitions,
45        _target_tokens: u32,
46        max_tokens: u32,
47        preserve_k: usize,
48        _summarizer: &dyn Summarizer,
49        engine: &ContextTokenEngine,
50    ) -> CompressResult {
51        let per_msg_limit = ((max_tokens as f64 * self.per_msg_ratio) as u32).max(50);
52        let mut saved = 0u32;
53        let partition = &mut partitions.history;
54        // Cache-prefix protection yields when there is no drop-fallback. An untouchable message —
55        // protected-from-snip (idx < preserve_k) AND inside the drop floor (idx ≥ len − preserve_k*2)
56        // — exists only when `len < preserve_k*3`. Below that threshold, disable protection so a
57        // forced/413 compaction can always cap the oldest messages and free space; above it, the
58        // prefix is droppable as a fallback, so we protect it (cache-aware).
59        let prefix_keep = prefix_keep_for(partition.messages.len(), preserve_k);
60        let indices =
61            oversized_text_message_indices(&partition.messages, per_msg_limit, prefix_keep, engine);
62
63        for &i in &indices {
64            let msg = &mut partition.messages[i];
65            let original_tokens = msg.token_count.unwrap_or_else(|| engine.count_message(msg));
66            if let Content::Text(ref t) = msg.content {
67                let head_limit = per_msg_limit / 2;
68                let tail_limit = per_msg_limit.saturating_sub(head_limit);
69                let head_text = engine.truncate(t, head_limit);
70
71                let chars: Vec<char> = t.chars().collect();
72                let mut low = head_text.chars().count();
73                let mut high = chars.len();
74                let mut suffix_start = chars.len();
75                while low <= high {
76                    let mid = (low + high) / 2;
77                    if mid >= chars.len() {
78                        break;
79                    }
80                    let candidate: String = chars[mid..].iter().collect();
81                    let tokens = engine.count(&candidate);
82                    if tokens <= tail_limit {
83                        suffix_start = mid;
84                        if mid == 0 {
85                            break;
86                        }
87                        high = mid - 1;
88                    } else {
89                        low = mid + 1;
90                    }
91                }
92                let tail_text: String = chars[suffix_start..].iter().collect();
93                let omitted = original_tokens
94                    .saturating_sub(head_limit)
95                    .saturating_sub(tail_limit);
96                msg.content = Content::Text(format!(
97                    "{}… [… {} tokens omitted …] …{}",
98                    head_text, omitted, tail_text
99                ));
100                msg.token_count = Some(per_msg_limit);
101                saved += original_tokens.saturating_sub(per_msg_limit);
102            }
103        }
104
105        partition.token_count = partition.token_count.saturating_sub(saved);
106
107        // Pure executor: snip caps oversized messages in place; it never archives or summarizes.
108        // Summary + compression-log attribution is the pipeline's job (under the *requested* action).
109        CompressResult {
110            tokens_saved: saved,
111            prefix_invalidated_at: indices.iter().min().copied(),
112            ..Default::default()
113        }
114    }
115}
116
117/// Pure selection (W1-1 collapse): indices of oversized **text** history messages a snip caps
118/// (tokens > `per_msg_limit`; non-text and tiny ≤10-token messages skipped). The cache-aware planner
119/// reuses this to choose which — and how far back — to snip; the executor only applies the head/tail
120/// truncation to the chosen indices.
121/// How many of the oldest messages to protect from in-place rewrites (snip/excerpt) as the stable
122/// prompt-cache prefix. The protection **yields when there is no drop-fallback**: an untouchable
123/// message (protected-from-snip `idx < preserve_k` AND inside the drop floor `idx ≥ len − preserve_k*2`)
124/// exists only when `len < preserve_k*3`. Below that, return 0 so a forced/413 compaction can always
125/// cap the oldest messages; at or above it, the prefix is droppable as a fallback, so protect it.
126fn prefix_keep_for(len: usize, preserve_k: usize) -> usize {
127    if len >= preserve_k.saturating_mul(3) {
128        preserve_k
129    } else {
130        0
131    }
132}
133
134fn oversized_text_message_indices(
135    messages: &[Message],
136    per_msg_limit: u32,
137    prefix_keep: usize,
138    engine: &ContextTokenEngine,
139) -> Vec<usize> {
140    messages
141        .iter()
142        .enumerate()
143        .filter(|(i, msg)| {
144            // Cache-aware (W1-1 step 2): never snip the oldest `prefix_keep` messages — they are the
145            // stable prompt-cache prefix, and rewriting one invalidates the whole cache. Their tokens
146            // are reclaimed by a batched DropOldest instead (which breaks the prefix exactly once).
147            if *i < prefix_keep {
148                return false;
149            }
150            if !matches!(msg.content, Content::Text(_)) {
151                return false;
152            }
153            let toks = msg.token_count.unwrap_or_else(|| engine.count_message(msg));
154            toks > per_msg_limit && toks > 10
155        })
156        .map(|(i, _)| i)
157        .collect()
158}
159
160/// 获取当前UTC时间戳
161fn utc_now() -> String {
162    // 在实际使用中,这应该从ProviderResult.now_ms获取
163    // 这里简化为占位符
164    format!("{:?}", std::time::SystemTime::now())
165}
166
167/// Helper to extract key fields and info from JSON strings.
168fn extract_json_excerpt(output: &str) -> Option<String> {
169    let val: serde_json::Value = serde_json::from_str(output).ok()?;
170    match val {
171        serde_json::Value::Object(map) => {
172            let mut summary_parts = Vec::new();
173            let mut keys = Vec::new();
174            for (k, v) in &map {
175                keys.push(k.as_str());
176                if v.is_number() || v.is_boolean() {
177                    summary_parts.push(format!("{}: {}", k, v));
178                } else if let Some(s) = v.as_str() {
179                    if s.len() <= 50 {
180                        summary_parts.push(format!("{}: \"{}\"", k, s));
181                    }
182                }
183            }
184            Some(format!(
185                "JSON Keys: [{}]\nJSON Fields: {{{}}}",
186                keys.join(", "),
187                summary_parts.join(", ")
188            ))
189        }
190        serde_json::Value::Array(arr) => {
191            if arr.is_empty() {
192                return Some("JSON Array: []".to_string());
193            }
194            let mut headers = Vec::new();
195            if let Some(serde_json::Value::Object(first_map)) = arr.first() {
196                for k in first_map.keys() {
197                    headers.push(k.as_str());
198                }
199            }
200            let len = arr.len();
201            Some(format!(
202                "JSON Array: {} items. Keys: [{}]",
203                len,
204                headers.join(", ")
205            ))
206        }
207        _ => None,
208    }
209}
210
211/// Helper to keep a specific amount of head and tail tokens.
212fn excerpt_text(
213    text: &str,
214    head_tokens: u32,
215    tail_tokens: u32,
216    engine: &ContextTokenEngine,
217) -> String {
218    let total_tokens = engine.count(text);
219    if total_tokens <= head_tokens + tail_tokens {
220        return text.to_string();
221    }
222    let head = engine.truncate(text, head_tokens);
223
224    let chars: Vec<char> = text.chars().collect();
225    let mut low = head.chars().count();
226    let mut high = chars.len();
227    let mut suffix_start = chars.len();
228    while low <= high {
229        let mid = (low + high) / 2;
230        if mid >= chars.len() {
231            break;
232        }
233        let candidate: String = chars[mid..].iter().collect();
234        let tokens = engine.count(&candidate);
235        if tokens <= tail_tokens {
236            suffix_start = mid;
237            if mid == 0 {
238                break;
239            }
240            high = mid - 1;
241        } else {
242            low = mid + 1;
243        }
244    }
245    let tail: String = chars[suffix_start..].iter().collect();
246    let remaining = total_tokens
247        .saturating_sub(head_tokens)
248        .saturating_sub(tail_tokens);
249    format!("{}… [… {} tokens omitted …] …{}", head, remaining, tail)
250}
251
252/// Pure selection (W1-1 collapse): indices of history messages whose large (≥200-token) tool result
253/// a micro-compact would excerpt — the first tool-result part whose `call_id` is not in
254/// `preserved_refs`. The executor applies the excerpt. The cache-aware planner reuses this: tool
255/// results are interleaved mid/late history, so excerpting them is prefix-safe.
256fn excerptable_tool_result_indices(
257    messages: &[Message],
258    preserved_refs: &[String],
259    prefix_keep: usize,
260    engine: &ContextTokenEngine,
261) -> Vec<usize> {
262    messages
263        .iter()
264        .enumerate()
265        .filter_map(|(i, msg)| {
266            // Cache-aware (W1-1 step 2): protect the oldest `prefix_keep` messages from in-place
267            // excerpting (they are the stable prompt-cache prefix).
268            if i < prefix_keep {
269                return None;
270            }
271            let toks = msg.token_count.unwrap_or_else(|| engine.count_message(msg));
272            if toks < 200 {
273                return None;
274            }
275            let Content::Parts(parts) = &msg.content else {
276                return None;
277            };
278            let call_id = parts.iter().find_map(|p| match p {
279                ContentPart::ToolResult { call_id, .. } => Some(call_id.to_string()),
280                _ => None,
281            })?;
282            (!preserved_refs.contains(&call_id)).then_some(i)
283        })
284        .collect()
285}
286
287/// rho > micro_threshold: replace tool results with a compact excerpt. Selection via
288/// [`excerptable_tool_result_indices`]; this executor only applies the excerpt.
289pub struct MicroCompactor;
290
291impl Compressor for MicroCompactor {
292    fn compress(
293        &self,
294        partitions: &mut ContextPartitions,
295        _target_tokens: u32,
296        _max_tokens: u32,
297        preserve_k: usize,
298        _summarizer: &dyn Summarizer,
299        engine: &ContextTokenEngine,
300    ) -> CompressResult {
301        let find_tool_name = |call_id: &str, msgs: &[Message]| -> Option<String> {
302            for m in msgs {
303                for tc in &m.tool_calls {
304                    if tc.id == call_id {
305                        return Some(tc.name.to_string());
306                    }
307                }
308            }
309            None
310        };
311
312        // Selection lifted to a pure helper (excludes `preserved_refs` + the cache-prefix when it has
313        // a drop-fallback); the executor only applies the excerpt to the chosen tool-result messages.
314        let prefix_keep = prefix_keep_for(partitions.history.messages.len(), preserve_k);
315        let indices = excerptable_tool_result_indices(
316            &partitions.history.messages,
317            &partitions.task_state.preserved_refs,
318            prefix_keep,
319            engine,
320        );
321        let messages_clone = partitions.history.messages.clone();
322        let partition = &mut partitions.history;
323        let mut saved = 0u32;
324
325        for &i in &indices {
326            let msg = &mut partition.messages[i];
327            let original_tokens = msg.token_count.unwrap_or_else(|| engine.count_message(msg));
328            if let Content::Parts(ref mut parts) = msg.content {
329                let tool_result_index = parts
330                    .iter()
331                    .position(|p| matches!(p, ContentPart::ToolResult { .. }));
332                if let Some(idx) = tool_result_index {
333                    if let ContentPart::ToolResult {
334                        call_id,
335                        output,
336                        is_error: _,
337                    } = &mut parts[idx]
338                    {
339                        let tool_name = find_tool_name(call_id, &messages_clone)
340                            .unwrap_or_else(|| "unknown".to_string());
341
342                        let new_output = if original_tokens > 2000 {
343                            if let Some(json_excerpt) = extract_json_excerpt(output) {
344                                format!(
345                                    "[tool result: {} | {} | {} tokens]\n{}",
346                                    call_id, tool_name, original_tokens, json_excerpt
347                                )
348                            } else {
349                                let excerpt = excerpt_text(output, 30, 10, engine);
350                                format!(
351                                    "[tool result: {} | {} | {} tokens]\n{}",
352                                    call_id, tool_name, original_tokens, excerpt
353                                )
354                            }
355                        } else {
356                            let excerpt = excerpt_text(output, 150, 50, engine);
357                            format!(
358                                "[tool result: {} | {} | {} tokens]\n{}",
359                                call_id, tool_name, original_tokens, excerpt
360                            )
361                        };
362
363                        let new_tokens = engine.count(&new_output);
364                        msg.content = Content::Text(new_output);
365                        msg.token_count = Some(new_tokens);
366                        saved += original_tokens.saturating_sub(new_tokens);
367                    }
368                }
369            }
370        }
371
372        partition.token_count = partition.token_count.saturating_sub(saved);
373
374        // Pure executor: excerpts tool results in place; no archive, summary, or self-log.
375        CompressResult {
376            tokens_saved: saved,
377            prefix_invalidated_at: indices.iter().min().copied(),
378            ..Default::default()
379        }
380    }
381}
382
383/// Pure **selection** (W1-1 collapse): how many of the oldest history messages to drop to bring the
384/// partition under `target_tokens`, never crossing the preserve-recent floor (`keep` messages).
385/// Returns `(count, tokens_saved)`; the executor just drains `count` from the front. This is the
386/// decision the cache-aware planner reuses to "batch one big drop to target" rather than re-deriving
387/// the count inside the compactor.
388pub fn plan_drop_oldest(
389    messages: &[Message],
390    total_tokens: u32,
391    target_tokens: u32,
392    keep: usize,
393    engine: &ContextTokenEngine,
394) -> (usize, u32) {
395    let limit = messages.len().saturating_sub(keep);
396    let mut saved = 0u32;
397    let mut n = 0usize;
398    for (i, msg) in messages.iter().take(limit).enumerate() {
399        if total_tokens.saturating_sub(saved) <= target_tokens {
400            break;
401        }
402        saved += msg.token_count.unwrap_or_else(|| engine.count_message(msg));
403        n = i + 1;
404    }
405    (n, saved)
406}
407
408/// rho > collapse_threshold: drop oldest messages until within target. Selection via
409/// [`plan_drop_oldest`]; this executor only drains the chosen count.
410pub struct CollapseCompactor;
411
412impl Compressor for CollapseCompactor {
413    fn compress(
414        &self,
415        partitions: &mut ContextPartitions,
416        target_tokens: u32,
417        _max_tokens: u32,
418        preserve_k: usize,
419        _summarizer: &dyn Summarizer,
420        engine: &ContextTokenEngine,
421    ) -> CompressResult {
422        let partition = &mut partitions.history;
423        let keep = preserve_k * 2; // turns → messages (user + assistant per turn)
424        let (n, saved) =
425            plan_drop_oldest(&partition.messages, partition.token_count, target_tokens, keep, engine);
426
427        if n == 0 {
428            return CompressResult::default();
429        }
430
431        let archived: Vec<Message> = partition.messages.drain(..n).collect();
432        partition.token_count = partition.token_count.saturating_sub(saved);
433
434        // Pure executor: return the drained messages; the pipeline summarizes + logs once under the
435        // requested action. Dropping the oldest `n` breaks the cache prefix at index 0.
436        CompressResult {
437            tokens_saved: saved,
438            archived,
439            prefix_invalidated_at: Some(0),
440            ..Default::default()
441        }
442    }
443}
444
445/// rho > auto_threshold: collapse history entirely except last K turns, updating compression log.
446pub struct AutoCompactor;
447
448impl Compressor for AutoCompactor {
449    fn compress(
450        &self,
451        partitions: &mut ContextPartitions,
452        _target_tokens: u32,
453        _max_tokens: u32,
454        preserve_k: usize,
455        _summarizer: &dyn Summarizer,
456        engine: &ContextTokenEngine,
457    ) -> CompressResult {
458        let partition = &mut partitions.history;
459        if partition.messages.is_empty() {
460            return CompressResult::default();
461        }
462
463        let original_tokens = partition.token_count;
464        let keep = preserve_k * 2;
465        let limit = partition.messages.len().saturating_sub(keep);
466        let (archived, kept): (Vec<Message>, Vec<Message>) = if limit > 0 {
467            let archived_msgs = partition.messages.drain(..limit).collect();
468            let kept_msgs = partition.messages.drain(..).collect();
469            (archived_msgs, kept_msgs)
470        } else {
471            (vec![], partition.messages.drain(..).collect())
472        };
473
474        if archived.is_empty() {
475            partition.messages = kept;
476            return CompressResult::default();
477        }
478
479        partition.messages = kept;
480
481        let kept_tokens: u32 = partition
482            .messages
483            .iter()
484            .map(|m| m.token_count.unwrap_or_else(|| engine.count_message(m)))
485            .sum();
486        partition.token_count = kept_tokens;
487
488        // Pure executor: return the drained messages; the pipeline summarizes + logs once under the
489        // requested action. Auto-compact drops all but the last K turns → prefix break at index 0.
490        CompressResult {
491            tokens_saved: original_tokens.saturating_sub(kept_tokens),
492            archived,
493            prefix_invalidated_at: Some(0),
494            ..Default::default()
495        }
496    }
497}
498
499// ─── Cache-aware compaction (W1-1 step 2) ───────────────────────────────────────────────────────
500// Additive cost model: introduced + tested before it drives the cascade, so the behavior-changing
501// wiring (prefix-safe-first selection + batching, with golden updates) is a separate, reviewable step.
502
503/// A fully-specified compaction step the cache-aware planner emits; the executor applies it
504/// mechanically (all *selection* already done by the planner via the pure helpers above).
505#[derive(Debug, Clone, PartialEq)]
506pub enum CompactionStep {
507    /// Excerpt the tool results at these history-message indices. Prefix-safe in practice: tool
508    /// results are interleaved mid/late, so the earliest touched index is rarely the cache prefix.
509    Excerpt { msg_idx: Vec<usize> },
510    /// Cap the oversized text messages at these indices to `per_msg_limit`.
511    Snip { msg_idx: Vec<usize>, per_msg_limit: u32 },
512    /// Drop the `count` oldest messages (the pipeline summarizes them). Prefix-breaking at index 0.
513    DropOldest { count: usize },
514}
515
516impl CompactionStep {
517    /// The earliest history-message index this step rewrites or removes — i.e. where it invalidates
518    /// the prompt-cache prefix. `None` = prefix-safe (touches nothing). A lower index is a higher
519    /// cache cost (Anthropic keys the cache off the first N messages), so the planner prefers `None`
520    /// or a later index, and escalates to a prefix-breaking drop only when the safe steps can't free
521    /// enough.
522    pub fn invalidates_prefix_at(&self) -> Option<usize> {
523        match self {
524            CompactionStep::Excerpt { msg_idx } | CompactionStep::Snip { msg_idx, .. } => {
525                msg_idx.iter().min().copied()
526            }
527            CompactionStep::DropOldest { count } => (*count > 0).then_some(0),
528        }
529    }
530}
531
532/// The prompt-cache-invalidation index of a whole plan = the earliest break across its steps (an
533/// earlier break invalidates everything after it, so the minimum dominates the cost). `None` means
534/// the plan is entirely prefix-safe and preserves the prompt cache — the cache-aware planner's goal
535/// whenever the safe steps can free enough.
536pub fn plan_cache_cost(steps: &[CompactionStep]) -> Option<usize> {
537    steps.iter().filter_map(|s| s.invalidates_prefix_at()).min()
538}
539
540/// Compression pipeline — operates on history partition but can reference full partitions.
541pub struct CompressionPipeline {
542    stages: Vec<(PressureAction, Box<dyn Compressor>)>,
543    preserve_recent_turns: usize,
544}
545
546impl CompressionPipeline {
547    pub fn new(config: &ContextConfig) -> Self {
548        Self {
549            preserve_recent_turns: config.preserve_recent_turns,
550            stages: vec![
551                (
552                    PressureAction::SnipCompact,
553                    Box::new(SnipCompactor {
554                        per_msg_ratio: config.snip_per_msg_ratio,
555                    }),
556                ),
557                (PressureAction::MicroCompact, Box::new(MicroCompactor)),
558                (PressureAction::ContextCollapse, Box::new(CollapseCompactor)),
559                (PressureAction::AutoCompact, Box::new(AutoCompactor)),
560            ],
561        }
562    }
563
564    pub fn compress(
565        &self,
566        partitions: &mut ContextPartitions,
567        action: PressureAction,
568        max_tokens: u32,
569        target_tokens: u32,
570        engine: &ContextTokenEngine,
571    ) -> (u32, Option<String>, Vec<Message>, Option<usize>) {
572        if action == PressureAction::None {
573            return (0, None, vec![], None);
574        }
575
576        let mut total_saved = 0;
577        let mut all_archived = vec![];
578        // Cache cost of the whole compaction = the earliest prefix-break across the stages that ran
579        // (an earlier break dominates). `None` = entirely prefix-safe.
580        let mut cache_at: Option<usize> = None;
581        let summarizer = super::summarizer::RuleSummarizer;
582
583        for (stage_action, compressor) in &self.stages {
584            if *stage_action <= action {
585                if partitions.total_tokens(engine) <= target_tokens {
586                    break;
587                }
588                let res = compressor.compress(
589                    partitions,
590                    target_tokens,
591                    max_tokens,
592                    self.preserve_recent_turns,
593                    &summarizer,
594                    engine,
595                );
596                total_saved += res.tokens_saved;
597                cache_at = [cache_at, res.prefix_invalidated_at].into_iter().flatten().min();
598                all_archived.extend(res.archived);
599            }
600        }
601
602        // Single decision point for summary + log attribution: whatever the cascade drained is
603        // summarized ONCE under the **requested** action and logged once. The compactors are pure
604        // executors that no longer self-attribute — so a `compress(AutoCompact)` whose draining
605        // happened in the Collapse stage is still labeled `auto_compact` (the C fix), and a
606        // `compress(ContextCollapse)` stays `context_collapse` (unchanged).
607        let summary = if all_archived.is_empty() {
608            None
609        } else {
610            let s = summarizer.summarize(&all_archived, action, target_tokens);
611            partitions.task_state.log_compression(action.label(), s.clone());
612            Some(s)
613        };
614
615        (total_saved, summary, all_archived, cache_at)
616    }
617}
618
619#[cfg(test)]
620mod tests {
621    use super::*;
622    use crate::context::config::ContextConfig;
623    use crate::context::partitions::ContextPartitions;
624    use crate::context::token_engine::ContextTokenEngine;
625    use crate::types::message::Message;
626
627    fn engine() -> ContextTokenEngine {
628        ContextTokenEngine::char_approx()
629    }
630    fn config() -> ContextConfig {
631        ContextConfig::default()
632    }
633    fn summarizer() -> super::super::summarizer::RuleSummarizer {
634        super::super::summarizer::RuleSummarizer
635    }
636    const MAX: u32 = 1_000;
637
638    #[test]
639    fn snip_compactor_truncates_oversized_messages() {
640        let cfg = ContextConfig {
641            snip_per_msg_ratio: 0.10,
642            ..Default::default()
643        };
644        let compactor = SnipCompactor {
645            per_msg_ratio: cfg.snip_per_msg_ratio,
646        };
647        let mut ctx = ContextPartitions::new(&cfg);
648        ctx.history.push(Message::user("a".repeat(800)), 200);
649        // preserve_k=0: exercise the truncation transform directly (no cache-prefix protection).
650        let result = compactor.compress(&mut ctx, 0, MAX, 0, &summarizer(), &engine());
651        assert!(result.tokens_saved > 0);
652        if let Content::Text(ref t) = ctx.history.messages[0].content {
653            assert!(t.contains("… [… 100 tokens omitted …] …"), "got: {t}");
654        }
655    }
656
657    #[test]
658    fn snip_compactor_leaves_small_messages_untouched() {
659        let cfg = ContextConfig {
660            snip_per_msg_ratio: 0.10,
661            ..Default::default()
662        };
663        let compactor = SnipCompactor {
664            per_msg_ratio: cfg.snip_per_msg_ratio,
665        };
666        let mut ctx = ContextPartitions::new(&cfg);
667        ctx.history.push(Message::user("short"), 5);
668        let result = compactor.compress(&mut ctx, 0, MAX, 2, &summarizer(), &engine());
669        assert_eq!(result.tokens_saved, 0);
670    }
671
672    #[test]
673    fn micro_compactor_replaces_tool_results_with_measured_placeholder() {
674        use crate::types::message::{ContentPart, Role};
675        use compact_str::CompactString;
676
677        let compactor = MicroCompactor;
678        let mut ctx = ContextPartitions::new(&config());
679        let parts = vec![ContentPart::ToolResult {
680            call_id: CompactString::new("c1"),
681            output: "a".repeat(1200),
682            is_error: false,
683        }];
684        let msg = Message {
685            role: Role::Tool,
686            content: Content::Parts(parts),
687            tool_calls: vec![],
688            token_count: Some(300),
689        };
690        ctx.history.messages.push(msg);
691        ctx.history.token_count = 300;
692
693        // preserve_k=0: exercise the excerpt transform directly (no cache-prefix protection).
694        let result = compactor.compress(&mut ctx, 0, MAX, 0, &summarizer(), &engine());
695        assert!(result.tokens_saved > 0);
696        let text = ctx.history.messages[0].content.as_text().unwrap();
697        assert!(
698            text.contains("[tool result: c1 | unknown | 300 tokens]"),
699            "got: {text}"
700        );
701    }
702
703    #[test]
704    fn collapse_compactor_drops_oldest_to_reach_target() {
705        let compactor = CollapseCompactor;
706        let mut ctx = ContextPartitions::new(&config());
707        for _ in 0..8 {
708            ctx.history.push(Message::user("msg"), 50);
709        }
710        let result = compactor.compress(&mut ctx, 250, MAX, 2, &summarizer(), &engine());
711        assert!(result.tokens_saved > 0);
712        assert!(ctx.history.messages.len() < 8);
713        // Pure executor: returns the drained messages; summary + log attribution is the pipeline's
714        // job (under the requested action), so the compactor itself no longer summarizes or logs.
715        assert!(!result.archived.is_empty(), "drained messages are returned to the pipeline");
716        assert!(result.summary.is_none(), "compactor no longer self-summarizes");
717        assert!(ctx.task_state.compression_log.is_empty(), "compactor no longer logs");
718    }
719
720    #[test]
721    fn rule_summarizer_formats_correctly() {
722        use crate::context::summarizer::RuleSummarizer;
723        use crate::types::message::{Content, Message, Role};
724        let summarizer = RuleSummarizer;
725        let mut messages = vec![];
726        messages.push(Message {
727            role: Role::User,
728            content: Content::Text("hello".to_string()),
729            tool_calls: vec![],
730            token_count: Some(5),
731        });
732        messages.push(Message {
733            role: Role::Assistant,
734            content: Content::Text("world".to_string()),
735            tool_calls: vec![],
736            token_count: Some(6),
737        });
738        let summary = summarizer.summarize(&messages, PressureAction::SnipCompact, 100);
739        assert!(summary.contains("[Compressed: snip_compact]"));
740        assert!(summary.contains("2 messages / 11 tokens archived"));
741        assert!(summary.contains("last assistant output: world"));
742    }
743
744    #[test]
745    fn micro_compactor_preserves_refs_in_preserved_refs() {
746        use crate::types::message::{ContentPart, Role};
747        use compact_str::CompactString;
748
749        let compactor = MicroCompactor;
750        let mut ctx = ContextPartitions::new(&config());
751        ctx.task_state.preserved_refs = vec!["keep_me".to_string()];
752
753        let parts = vec![ContentPart::ToolResult {
754            call_id: CompactString::new("keep_me"),
755            output: "a".repeat(1200),
756            is_error: false,
757        }];
758        let msg = Message {
759            role: Role::Tool,
760            content: Content::Parts(parts),
761            tool_calls: vec![],
762            token_count: Some(300),
763        };
764        ctx.history.messages.push(msg);
765        ctx.history.token_count = 300;
766
767        let result = compactor.compress(&mut ctx, 0, MAX, 2, &summarizer(), &engine());
768        // Since call_id "keep_me" is in preserved_refs, it should not be replaced!
769        assert_eq!(result.tokens_saved, 0);
770        let text_opt = ctx.history.messages[0].content.as_text();
771        assert!(
772            text_opt.is_none(),
773            "should not be replaced to text placeholder"
774        );
775    }
776
777    #[test]
778    fn auto_compactor_merges_all_except_last_two_turns() {
779        let compactor = AutoCompactor;
780        let mut ctx = ContextPartitions::new(&config());
781        for i in 0..10 {
782            ctx.history.push(Message::user(format!("msg {i}")), 10);
783        }
784        let result = compactor.compress(&mut ctx, 0, MAX, 2, &summarizer(), &engine());
785        assert!(result.tokens_saved > 0);
786        assert_eq!(ctx.history.messages.len(), 4); // kept last 2 turns = 4 messages
787        // Pure executor: returns the drained messages; the pipeline summarizes + logs under the
788        // requested action (see `baseline_auto_*` / `pipeline_attributes_summary_to_requested_action`).
789        assert!(!result.archived.is_empty(), "drained messages returned to the pipeline");
790        assert!(result.summary.is_none(), "compactor no longer self-summarizes");
791        assert!(ctx.task_state.compression_log.is_empty(), "compactor no longer logs");
792    }
793
794    #[test]
795    fn plan_drop_oldest_respects_target_and_preserve_floor() {
796        // Pure selection helper (W1-1 collapse): drop the fewest oldest messages to reach target,
797        // never below the preserve floor. This is the decision the cache-aware planner reuses.
798        let msgs: Vec<Message> = (0..8)
799            .map(|i| {
800                let mut m = Message::user(format!("m{i}"));
801                m.token_count = Some(50);
802                m
803            })
804            .collect();
805        // total=400, target=250, keep=2 → drop 3 oldest (150 saved) lands exactly at 250.
806        assert_eq!(plan_drop_oldest(&msgs, 400, 250, 2, &engine()), (3, 150));
807        // target=0 with keep=2 → drains down to the floor (len-keep = 6), never below it.
808        assert_eq!(plan_drop_oldest(&msgs, 400, 0, 2, &engine()), (6, 300));
809        // already under target → no drop.
810        assert_eq!(plan_drop_oldest(&msgs, 400, 500, 2, &engine()), (0, 0));
811    }
812
813    #[test]
814    fn prefix_keep_yields_without_drop_fallback() {
815        // Protect the oldest `preserve_k` only when the history is large enough that they remain
816        // droppable (len >= preserve_k*3); otherwise 0, so a forced/413 compaction can cap them.
817        assert_eq!(prefix_keep_for(6, 2), 2, "len 6 >= 6 → protect oldest 2");
818        assert_eq!(prefix_keep_for(5, 2), 0, "len 5 < 6 → would leave an untouchable message");
819        assert_eq!(prefix_keep_for(3, 2), 0);
820        assert_eq!(prefix_keep_for(0, 2), 0);
821    }
822
823    #[test]
824    fn compaction_step_prefix_cost() {
825        // Excerpt/Snip cost = the earliest touched message index; DropOldest breaks the prefix at 0.
826        assert_eq!(CompactionStep::Excerpt { msg_idx: vec![5, 8] }.invalidates_prefix_at(), Some(5));
827        assert_eq!(
828            CompactionStep::Snip { msg_idx: vec![3, 9], per_msg_limit: 50 }.invalidates_prefix_at(),
829            Some(3)
830        );
831        assert_eq!(CompactionStep::DropOldest { count: 4 }.invalidates_prefix_at(), Some(0));
832        assert_eq!(CompactionStep::DropOldest { count: 0 }.invalidates_prefix_at(), None);
833        // An empty selection touches nothing → prefix-safe.
834        assert_eq!(CompactionStep::Excerpt { msg_idx: vec![] }.invalidates_prefix_at(), None);
835    }
836
837    #[test]
838    fn plan_cache_cost_is_the_earliest_break() {
839        // Cost of a plan = the earliest message any step touches (an earlier break dominates).
840        let late = vec![
841            CompactionStep::Excerpt { msg_idx: vec![6] },
842            CompactionStep::Snip { msg_idx: vec![7], per_msg_limit: 50 },
843        ];
844        assert_eq!(plan_cache_cost(&late), Some(6));
845        // Escalating to a DropOldest breaks the prefix at 0 — the whole plan's cost collapses to 0.
846        let mut with_drop = late.clone();
847        with_drop.push(CompactionStep::DropOldest { count: 3 });
848        assert_eq!(plan_cache_cost(&with_drop), Some(0));
849        // An empty plan preserves the cache entirely.
850        assert_eq!(plan_cache_cost(&[]), None);
851    }
852
853    #[test]
854    fn pipeline_reports_accurate_prefix_invalidation() {
855        // (a) DoD #4: the pipeline surfaces the earliest message any stage actually touched. On the
856        // len=6 baseline (prefix_keep=2), a SnipCompact protects the oldest 2 and caps msgs 3,4 — so
857        // the cache break is at index 3, NOT the coarse 0. An AutoCompact drops the oldest → break 0.
858        let cfg = config();
859        let mut ctx = baseline_partitions();
860        let (_s, _u, _a, cache_at) = CompressionPipeline::new(&cfg).compress(
861            &mut ctx,
862            PressureAction::SnipCompact,
863            MAX,
864            500,
865            &engine(),
866        );
867        assert_eq!(cache_at, Some(3), "snip protected the oldest 2 → earliest touch is msg 3");
868
869        let mut ctx2 = baseline_partitions();
870        let (_s2, _u2, _a2, cache_at2) = CompressionPipeline::new(&cfg).compress(
871            &mut ctx2,
872            PressureAction::AutoCompact,
873            MAX,
874            500,
875            &engine(),
876        );
877        assert_eq!(cache_at2, Some(0), "dropping the oldest breaks the cache prefix at 0");
878    }
879
880    // ─── W1-1 characterization baseline ────────────────────────────────────────
881    // Locks the CURRENT compaction behavior (tokens_saved / archived count / summary)
882    // across all four pressure levels + the cascade, so the upcoming compactor→executor
883    // refactor (EvictionOp vocab + cache-aware planner) is provably behavior-preserving.
884    // These are golden-master pins: the values describe what the pipeline does TODAY, not
885    // an independent derivation. If a future change moves a number here, that is a behavior
886    // change and must be justified, not blindly re-pinned.
887
888    use crate::types::message::Role;
889    use compact_str::CompactString;
890
891    /// Deterministic fixture: 4 oversized text turns + 2 tool-result messages, explicit token
892    /// counts so the cascade math is reproducible under `char_approx`.
893    fn baseline_partitions() -> ContextPartitions {
894        let cfg = config();
895        let mut ctx = ContextPartitions::new(&cfg);
896        // Oversized text turns (trigger Snip / Collapse / Auto).
897        ctx.history.push(Message::user("u0 ".repeat(120)), 300);
898        ctx.history.push(Message::assistant("a0 ".repeat(120)), 300);
899        // Tool-result message (trigger Micro).
900        ctx.history.messages.push(Message {
901            role: Role::Tool,
902            content: Content::Parts(vec![ContentPart::ToolResult {
903                call_id: CompactString::new("call_1"),
904                output: serde_json::json!({"rows": 42, "ok": true, "name": "alpha"}).to_string()
905                    + &"-pad".repeat(400),
906                is_error: false,
907            }]),
908            tool_calls: vec![],
909            token_count: Some(400),
910        });
911        ctx.history.token_count += 400;
912        ctx.history.push(Message::user("u1 ".repeat(120)), 300);
913        ctx.history.push(Message::assistant("a1 ".repeat(120)), 300);
914        ctx.history.messages.push(Message {
915            role: Role::Tool,
916            content: Content::Parts(vec![ContentPart::ToolResult {
917                call_id: CompactString::new("call_2"),
918                output: "y".repeat(1600),
919                is_error: false,
920            }]),
921            tool_calls: vec![],
922            token_count: Some(400),
923        });
924        ctx.history.token_count += 400;
925        ctx
926    }
927
928    /// Run the pipeline on a fresh baseline fixture at one action level.
929    /// Returns `(before, saved, summary, archived_len, msgs_after, total_after)`.
930    fn run_baseline(action: PressureAction) -> (u32, u32, Option<String>, usize, usize, u32) {
931        let mut ctx = baseline_partitions();
932        let before = ctx.total_tokens(&engine());
933        let (saved, summary, archived, _cache_at) =
934            CompressionPipeline::new(&config()).compress(&mut ctx, action, MAX, 500, &engine());
935        let archived_len = archived.len();
936        let msgs_after = ctx.history.messages.len();
937        let total_after = ctx.total_tokens(&engine());
938        (before, saved, summary, archived_len, msgs_after, total_after)
939    }
940
941    #[test]
942    fn baseline_snip_only_caps_text_no_archival() {
943        // SnipCompact runs only the Snip stage: caps oversized text messages in place — EXCEPT the
944        // oldest `preserve_recent_turns` (=2) messages, which are the stable cache prefix and are
945        // protected from in-place rewrites (W1-1 step 2 cache-aware). So it caps the 2 non-prefix
946        // oversized turns (idx 3,4), not all 4: 500 saved, was 1000 before prefix-protection. Never
947        // archives or summarizes.
948        let (before, saved, summary, archived, msgs, total) = run_baseline(PressureAction::SnipCompact);
949        assert_eq!(before, 2001);
950        assert_eq!(saved, 500, "2 non-prefix oversized turns × 250 (oldest 2 protected; was 1000)");
951        assert_eq!(archived, 0);
952        assert!(summary.is_none());
953        assert_eq!(msgs, 6, "snip mutates in place, drops no messages");
954        assert_eq!(total, 1501);
955    }
956
957    #[test]
958    fn baseline_micro_excerpts_tool_results() {
959        // MicroCompact runs Snip then Micro: snip caps the non-prefix oversized text (500); micro
960        // excerpts the non-prefix tool results (362). The cache prefix (oldest 2) is protected from
961        // both in-place ops. Still no archival/summary; messages stay in place.
962        let (before, saved, summary, archived, msgs, total) = run_baseline(PressureAction::MicroCompact);
963        assert_eq!(before, 2001);
964        assert_eq!(saved, 862, "snip(500, prefix-protected) + excerpt(362); was 1362");
965        assert_eq!(archived, 0);
966        assert!(summary.is_none());
967        assert_eq!(msgs, 6);
968        assert_eq!(total, 1139);
969    }
970
971    #[test]
972    fn baseline_collapse_drops_oldest_and_summarizes() {
973        // ContextCollapse runs Snip→Micro→Collapse: oldest messages drained to `archived` with a
974        // summary, down to the preserve-recent floor (4 msgs kept).
975        let (before, saved, summary, archived, msgs, total) =
976            run_baseline(PressureAction::ContextCollapse);
977        assert_eq!(before, 2001);
978        assert_eq!(saved, 1462);
979        assert_eq!(archived, 2, "drops the 2 oldest messages above the preserve floor");
980        assert_eq!(msgs, 4, "preserve_recent_turns=2 → 4 messages kept");
981        assert_eq!(total, 539);
982        let summary = summary.expect("collapse summarizes archived messages");
983        assert!(
984            summary.contains("[Compressed: context_collapse]"),
985            "summary routes the collapse action: {summary}"
986        );
987    }
988
989    #[test]
990    fn baseline_auto_attributes_summary_to_auto_compact() {
991        // AutoCompact runs all 4 stages; on this fixture Snip→Micro→Collapse already hit the preserve
992        // floor, so the Auto *stage* archives nothing extra. The token math is identical to Collapse,
993        // but the summary is attributed to the **requested** action (auto_compact) — NOT silently
994        // downgraded to context_collapse by whichever stage did the draining. This is the C fix:
995        // op-label == summary/log label (node K04/K09 + the manager-level regression gate).
996        let (before, saved, summary, archived, msgs, total) = run_baseline(PressureAction::AutoCompact);
997        assert_eq!(before, 2001);
998        assert_eq!(saved, 1462);
999        assert_eq!(archived, 2);
1000        assert_eq!(msgs, 4);
1001        assert_eq!(total, 539);
1002        let summary = summary.expect("auto-compact summarizes the archived messages");
1003        assert!(summary.contains("[Compressed: auto_compact]"), "got: {summary}");
1004    }
1005
1006    #[test]
1007    fn baseline_saved_is_monotonic_in_action_level() {
1008        // The cross-level contract the refactor must preserve: heavier pressure never frees less.
1009        let snip = run_baseline(PressureAction::SnipCompact).1;
1010        let micro = run_baseline(PressureAction::MicroCompact).1;
1011        let collapse = run_baseline(PressureAction::ContextCollapse).1;
1012        let auto = run_baseline(PressureAction::AutoCompact).1;
1013        assert!(snip <= micro, "{snip} <= {micro}");
1014        assert!(micro <= collapse, "{micro} <= {collapse}");
1015        assert!(collapse <= auto, "{collapse} <= {auto}");
1016    }
1017
1018    #[test]
1019    fn pipeline_stops_cascade_when_target_reached() {
1020        let cfg = ContextConfig {
1021            snip_per_msg_ratio: 0.25,
1022            // preserve_recent_turns=0: no cache-prefix protection, so snip can cap the lone message —
1023            // this test isolates the cascade early-break (snip reaches target → heavier stages skip).
1024            preserve_recent_turns: 0,
1025            ..Default::default()
1026        };
1027        let pipeline = CompressionPipeline::new(&cfg);
1028        let mut ctx = ContextPartitions::new(&cfg);
1029        ctx.history.push(Message::user("a".repeat(3600)), 900);
1030
1031        let (saved, summary, archived, _cache_at) = pipeline.compress(
1032            &mut ctx,
1033            PressureAction::AutoCompact,
1034            1_000,
1035            500,
1036            &engine(),
1037        );
1038
1039        assert!(saved > 0);
1040        assert!(summary.is_none(), "auto compactor should not run after snip reaches target");
1041        assert!(archived.is_empty(), "heavier archival stages should not run");
1042        assert_eq!(ctx.history.messages.len(), 1);
1043        assert!(ctx.total_tokens(&engine()) <= 500);
1044    }
1045}