everruns_core/capabilities/
compaction.rs

1//! Compaction Capability
2//!
3//! Configurable context compaction strategy. Users choose between native provider
4//! compaction (e.g., OpenAI /responses/compact) and our own strategies (observation
5//! masking, LLM summarization). See specs/compaction.md.
6//!
7//! Design decisions:
8//! - Strategy selection is per-agent/harness via `AgentCapabilityConfig`
9//! - Native and our own strategies coexist as first-class options
10//! - The `auto` cascade: observation masking → native → summarization
11//! - Proactive compaction at a configurable budget threshold, not just on error
12
13use super::{
14    Capability, CapabilityLocalization, CapabilityStatus, ModelViewContext, ModelViewProvider,
15};
16use crate::events::TokenUsage;
17use crate::message::{ContentPart, Message, MessageRole};
18use crate::message_filter::MessageFilterProvider;
19use serde::{Deserialize, Serialize};
20use std::collections::{HashMap, HashSet};
21use std::sync::Arc;
22
23/// Capability ID for compaction.
24pub const COMPACTION_CAPABILITY_ID: &str = "compaction";
25const MAX_RELATED_RECENT_READ_RESULTS: usize = 4;
26
27/// Compaction strategy selection.
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
29#[serde(rename_all = "snake_case")]
30pub enum CompactionStrategy {
31    /// Cascade: observation masking → native → summarization → aggressive trim.
32    #[default]
33    Auto,
34    /// Use provider's native compact endpoint only (e.g., OpenAI /responses/compact).
35    Native,
36    /// Strip old tool outputs, replace with one-line summaries.
37    ObservationMasking,
38    /// Use LLM to summarize older turns.
39    Summarization,
40}
41
42impl std::fmt::Display for CompactionStrategy {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        match self {
45            Self::Auto => write!(f, "auto"),
46            Self::Native => write!(f, "native"),
47            Self::ObservationMasking => write!(f, "observation_masking"),
48            Self::Summarization => write!(f, "summarization"),
49        }
50    }
51}
52
53/// Format for masked tool output summaries.
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
55#[serde(rename_all = "snake_case")]
56pub enum MaskingSummaryFormat {
57    /// `[tool_name(args_truncated) → OK]`
58    #[default]
59    OneLine,
60    /// Keep first and last 3 lines of output.
61    HeadTail,
62}
63
64/// Observation masking settings.
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct ObservationMaskingConfig {
67    /// Number of recent tool outputs to keep verbatim.
68    #[serde(default = "default_keep_recent_tool_outputs")]
69    pub keep_recent_tool_outputs: usize,
70
71    /// Format for masked tool output summaries.
72    #[serde(default)]
73    pub summary_format: MaskingSummaryFormat,
74}
75
76impl Default for ObservationMaskingConfig {
77    fn default() -> Self {
78        Self {
79            keep_recent_tool_outputs: default_keep_recent_tool_outputs(),
80            summary_format: MaskingSummaryFormat::default(),
81        }
82    }
83}
84
85fn default_keep_recent_tool_outputs() -> usize {
86    // Lowered from 5 to 2 (EVE-224). With EVE-221 capping exec output at 16 KiB,
87    // keeping 2 recent (~8K tokens) instead of 5 (~20K tokens) significantly reduces
88    // stale exec output accumulation. Older tool results are masked to one-line summaries.
89    2
90}
91
92/// Cost-control masking settings.
93///
94/// Unlike proactive compaction, this is cost-oriented rather than
95/// context-window-oriented: old bulky tool results should not stay verbatim in
96/// every request just because the model still has room for them.
97#[derive(Debug, Clone, Serialize, Deserialize)]
98pub struct CostControlConfig {
99    /// Enable low-cost tool-result masking before every LLM call.
100    #[serde(default = "default_cost_control_enabled")]
101    pub enabled: bool,
102
103    /// Number of most-recent tool results to always keep verbatim.
104    #[serde(default = "default_cost_control_keep_recent_tool_results")]
105    pub keep_recent_tool_results: usize,
106
107    /// Start masking once this many tool results are present.
108    #[serde(default = "default_cost_control_mask_after_tool_results")]
109    pub mask_after_tool_results: usize,
110
111    /// Start masking once aggregate live tool-result payload exceeds this many bytes.
112    #[serde(default = "default_cost_control_max_live_tool_result_bytes")]
113    pub max_live_tool_result_bytes: usize,
114
115    /// If cumulative/session usage is available, mask when uncached input exceeds this.
116    #[serde(default = "default_cost_control_max_uncached_input_tokens")]
117    pub max_uncached_input_tokens: u32,
118
119    /// If cumulative/session usage is available, mask when cache read ratio falls below this.
120    #[serde(default = "default_cost_control_min_cache_read_ratio")]
121    pub min_cache_read_ratio: f32,
122}
123
124impl Default for CostControlConfig {
125    fn default() -> Self {
126        Self {
127            enabled: default_cost_control_enabled(),
128            keep_recent_tool_results: default_cost_control_keep_recent_tool_results(),
129            mask_after_tool_results: default_cost_control_mask_after_tool_results(),
130            max_live_tool_result_bytes: default_cost_control_max_live_tool_result_bytes(),
131            max_uncached_input_tokens: default_cost_control_max_uncached_input_tokens(),
132            min_cache_read_ratio: default_cost_control_min_cache_read_ratio(),
133        }
134    }
135}
136
137fn default_cost_control_enabled() -> bool {
138    true
139}
140
141fn default_cost_control_keep_recent_tool_results() -> usize {
142    2
143}
144
145fn default_cost_control_mask_after_tool_results() -> usize {
146    4
147}
148
149fn default_cost_control_max_live_tool_result_bytes() -> usize {
150    24 * 1024
151}
152
153fn default_cost_control_max_uncached_input_tokens() -> u32 {
154    100_000
155}
156
157fn default_cost_control_min_cache_read_ratio() -> f32 {
158    0.35
159}
160
161/// Summarization settings.
162#[derive(Debug, Clone, Serialize, Deserialize)]
163pub struct SummarizationConfig {
164    /// Model to use for summarization. None = same model as agent.
165    #[serde(default)]
166    pub model: Option<String>,
167
168    /// What to preserve in summaries.
169    #[serde(default = "default_preserve")]
170    pub preserve: Vec<String>,
171
172    /// Custom instructions appended to summarization prompt.
173    #[serde(default)]
174    pub instructions: Option<String>,
175}
176
177impl Default for SummarizationConfig {
178    fn default() -> Self {
179        Self {
180            model: None,
181            preserve: default_preserve(),
182            instructions: None,
183        }
184    }
185}
186
187fn default_preserve() -> Vec<String> {
188    vec![
189        "decisions".to_string(),
190        "files_modified".to_string(),
191        "errors".to_string(),
192        "current_plan".to_string(),
193        "skill_instructions".to_string(),
194    ]
195}
196
197/// Compaction capability configuration.
198///
199/// Configured per agent/harness via `AgentCapabilityConfig`:
200/// ```json
201/// { "ref": "compaction", "config": { "strategy": "auto", "proactive": true } }
202/// ```
203#[derive(Debug, Clone, Serialize, Deserialize)]
204pub struct CompactionConfig {
205    /// Which strategy to use.
206    #[serde(default)]
207    pub strategy: CompactionStrategy,
208
209    /// Compact proactively at budget_percent, not just on RequestTooLarge.
210    #[serde(default = "default_proactive")]
211    pub proactive: bool,
212
213    /// Trigger proactive compaction at this fraction of context budget.
214    #[serde(default = "default_budget_percent")]
215    pub budget_percent: f32,
216
217    /// Observation masking settings.
218    #[serde(default)]
219    pub observation_masking: ObservationMaskingConfig,
220
221    /// Summarization settings.
222    #[serde(default)]
223    pub summarization: SummarizationConfig,
224
225    /// Hierarchical memory tier settings for hot/warm/cold management.
226    #[serde(default)]
227    pub memory_tiers: HierarchicalMemoryConfig,
228
229    /// Always-on cost-oriented masking for stale tool results.
230    #[serde(default)]
231    pub cost_control: CostControlConfig,
232}
233
234impl Default for CompactionConfig {
235    fn default() -> Self {
236        Self {
237            strategy: CompactionStrategy::default(),
238            proactive: default_proactive(),
239            budget_percent: default_budget_percent(),
240            observation_masking: ObservationMaskingConfig::default(),
241            summarization: SummarizationConfig::default(),
242            memory_tiers: HierarchicalMemoryConfig::default(),
243            cost_control: CostControlConfig::default(),
244        }
245    }
246}
247
248fn default_proactive() -> bool {
249    true
250}
251
252fn default_budget_percent() -> f32 {
253    0.85
254}
255
256impl CompactionConfig {
257    /// Parse from JSON value, falling back to defaults for invalid config.
258    pub fn from_json(value: &serde_json::Value) -> Self {
259        serde_json::from_value(value.clone()).unwrap_or_default()
260    }
261}
262
263/// Compaction capability.
264pub struct CompactionCapability;
265
266impl Capability for CompactionCapability {
267    fn id(&self) -> &str {
268        COMPACTION_CAPABILITY_ID
269    }
270
271    fn name(&self) -> &str {
272        "Compaction"
273    }
274
275    fn description(&self) -> &str {
276        r#"Configurable context compaction when conversations exceed LLM context windows.
277
278Choose between native provider compaction (e.g., OpenAI /responses/compact), observation masking (strip old tool outputs), or LLM summarization. The `auto` strategy cascades through all available options."#
279    }
280
281    fn status(&self) -> CapabilityStatus {
282        CapabilityStatus::Available
283    }
284
285    fn icon(&self) -> Option<&str> {
286        Some("shrink")
287    }
288
289    fn category(&self) -> Option<&str> {
290        Some("Optimization")
291    }
292
293    fn message_filter_provider(&self) -> Option<Arc<dyn MessageFilterProvider>> {
294        Some(Arc::new(CompactionFilterProvider))
295    }
296
297    fn model_view_provider(&self) -> Option<Arc<dyn ModelViewProvider>> {
298        Some(Arc::new(CompactionModelViewProvider))
299    }
300
301    /// Only the top-level knobs users meaningfully tune are exposed:
302    /// `strategy`, `proactive`, and `budget_percent`. The nested
303    /// `observation_masking` / `summarization` / `memory_tiers` /
304    /// `cost_control` objects are advanced tuning with safe defaults and stay
305    /// out of the schema, but `validate_config` still accepts them via the
306    /// typed `CompactionConfig` parse.
307    fn config_schema(&self) -> Option<serde_json::Value> {
308        Some(serde_json::json!({
309            "type": "object",
310            "properties": {
311                "strategy": {
312                    "type": "string",
313                    "title": "Strategy",
314                    "description": "Compaction strategy used when the conversation approaches the context window.",
315                    "oneOf": [
316                        { "const": "auto", "title": "Automatic" },
317                        { "const": "native", "title": "Provider-native" },
318                        { "const": "observation_masking", "title": "Observation masking" },
319                        { "const": "summarization", "title": "LLM summarization" }
320                    ],
321                    "default": "auto"
322                },
323                "proactive": {
324                    "type": "boolean",
325                    "title": "Proactive compaction",
326                    "description": "Compact at the budget threshold instead of waiting for a request-too-large error.",
327                    "default": true
328                },
329                "budget_percent": {
330                    "type": "number",
331                    "title": "Context budget threshold",
332                    "description": "Fraction of the model context window at which proactive compaction triggers.",
333                    "minimum": 0.1,
334                    "maximum": 1.0,
335                    // Keep in sync with `default_budget_percent()`; the f32
336                    // value is not used directly to avoid noisy f32->f64
337                    // widening in the serialized schema.
338                    "default": 0.85
339                }
340            }
341        }))
342    }
343
344    fn validate_config(&self, config: &serde_json::Value) -> Result<(), String> {
345        if config.is_null() {
346            return Ok(());
347        }
348        let typed: CompactionConfig = serde_json::from_value(config.clone())
349            .map_err(|e| format!("invalid compaction config: {e}"))?;
350        if !(0.1..=1.0).contains(&typed.budget_percent) {
351            return Err(format!(
352                "budget_percent must be between 0.1 and 1.0, got {}",
353                typed.budget_percent
354            ));
355        }
356        Ok(())
357    }
358
359    fn localizations(&self) -> Vec<CapabilityLocalization> {
360        vec![
361            CapabilityLocalization {
362                locale: "en",
363                name: None,
364                description: None,
365                config_description: Some(
366                    "Controls the compaction strategy, proactive triggering, and the \
367                     context-budget threshold.",
368                ),
369                config_overlay: None,
370            },
371            CapabilityLocalization {
372                locale: "uk",
373                name: Some("Ущільнення контексту"),
374                description: Some(
375                    "Налаштовуване ущільнення контексту, коли розмова перевищує контекстне \
376                     вікно LLM. Доступні стратегії: нативне ущільнення провайдера, маскування \
377                     результатів інструментів і підсумовування через LLM; стратегія auto \
378                     перебирає всі доступні варіанти.",
379                ),
380                config_description: Some(
381                    "Визначає стратегію ущільнення контексту, проактивний запуск і поріг \
382                     бюджету контексту.",
383                ),
384                config_overlay: Some(serde_json::json!({
385                    "properties": {
386                        "strategy": {
387                            "title": "Стратегія",
388                            "description": "Стратегія ущільнення, коли розмова наближається до межі контекстного вікна.",
389                            "enum_labels": {
390                                "auto": "Автоматично",
391                                "native": "Нативна (провайдер)",
392                                "observation_masking": "Маскування результатів інструментів",
393                                "summarization": "Підсумовування через LLM"
394                            }
395                        },
396                        "proactive": {
397                            "title": "Проактивне ущільнення",
398                            "description": "Ущільнювати контекст при досягненні порогу бюджету, а не лише після помилки про завеликий запит."
399                        },
400                        "budget_percent": {
401                            "title": "Поріг бюджету контексту",
402                            "description": "Частка контекстного вікна моделі, після якої запускається проактивне ущільнення."
403                        }
404                    }
405                })),
406            },
407        ]
408    }
409}
410
411struct CompactionModelViewProvider;
412
413impl ModelViewProvider for CompactionModelViewProvider {
414    fn apply_model_view(
415        &self,
416        messages: Vec<Message>,
417        config: &serde_json::Value,
418        context: &ModelViewContext<'_>,
419    ) -> Vec<Message> {
420        let config = CompactionConfig::from_json(config);
421        let masking = build_model_view_messages_owned(messages, &config, context.prior_usage);
422        if masking.masked_count > 0 {
423            tracing::info!(
424                session_id = %context.session_id,
425                masked_count = masking.masked_count,
426                tool_result_bytes_before = masking.tool_result_bytes_before,
427                tool_result_bytes_after = masking.tool_result_bytes_after,
428                "CompactionCapability: masked stale tool results for model view"
429            );
430        }
431        masking.messages
432    }
433
434    fn priority(&self) -> i32 {
435        50
436    }
437}
438
439// ============================================================================
440// Message Filter Provider (proactive observation masking at message load time)
441// ============================================================================
442
443/// Applies observation masking as a message filter during message loading.
444///
445/// This runs *before* the LLM call, proactively reducing context size
446/// by masking old tool outputs. Lower priority than infinity context (50 vs 100)
447/// so it runs first — masking happens before trimming.
448struct CompactionFilterProvider;
449
450impl MessageFilterProvider for CompactionFilterProvider {
451    fn apply_filters(
452        &self,
453        _query: &mut crate::message_filter::MessageQuery,
454        _config: &serde_json::Value,
455    ) {
456        // The filter provider signals that compaction is active on this session.
457        // Actual observation masking is applied at LLM message construction time
458        // (in ReasonAtom) rather than at message query time, because masking
459        // operates on LlmMessage format, not the storage Message format.
460        //
461        // The proactive compaction check in ReasonAtom reads the compaction config
462        // and applies masking + budget checks before the LLM call.
463    }
464
465    fn priority(&self) -> i32 {
466        50 // Before infinity context (100)
467    }
468}
469
470// ============================================================================
471// Token Estimation
472// ============================================================================
473
474/// Estimate token count for an LLM message using char/4 approximation.
475///
476/// This is intentionally simple. More accurate estimation (tiktoken, etc.) can
477/// be swapped in later, but char/4 is sufficient for budget decisions.
478pub fn estimate_tokens(msg: &LlmMessage) -> usize {
479    let text_len = match &msg.content {
480        LlmMessageContent::Text(t) => t.len(),
481        LlmMessageContent::Parts(parts) => parts
482            .iter()
483            .map(|p| match p {
484                LlmContentPart::Text { text } => text.len(),
485                _ => 50, // images, etc. — rough estimate
486            })
487            .sum(),
488    };
489
490    // Add tool call overhead
491    let tool_call_len = msg
492        .tool_calls
493        .as_ref()
494        .map(|calls| {
495            calls
496                .iter()
497                .map(|tc| tc.name.len() + tc.arguments.to_string().len() + 20)
498                .sum::<usize>()
499        })
500        .unwrap_or(0);
501
502    (text_len + tool_call_len) / 4
503}
504
505/// Estimate total tokens for a slice of messages.
506pub fn estimate_total_tokens(messages: &[LlmMessage]) -> usize {
507    messages.iter().map(estimate_tokens).sum()
508}
509
510/// Check whether proactive compaction should trigger.
511///
512/// Returns `true` if the estimated tokens exceed `budget_percent` of the model's
513/// context window.
514pub fn should_compact_proactively(
515    messages: &[LlmMessage],
516    config: &CompactionConfig,
517    context_window_tokens: usize,
518) -> bool {
519    if !config.proactive {
520        return false;
521    }
522    let budget = (context_window_tokens as f32 * config.budget_percent) as usize;
523    let estimated = estimate_total_tokens(messages);
524    estimated > budget
525}
526
527// ============================================================================
528// Aggressive Trim (last resort in cascade)
529// ============================================================================
530
531/// Drop oldest messages to fit within a target token count.
532///
533/// Preserves the system prompt (index 0 if present), protected messages
534/// (e.g. `activate_skill` results and their tool call messages), and the
535/// most recent messages. This is the last resort — lossy, no recovery.
536pub fn aggressive_trim(
537    messages: &[LlmMessage],
538    target_tokens: usize,
539    has_system_prompt: bool,
540) -> Vec<LlmMessage> {
541    let mut result = Vec::new();
542    let mut token_budget = target_tokens;
543
544    // Always keep system prompt
545    let start_idx = if has_system_prompt && !messages.is_empty() {
546        let sys_tokens = estimate_tokens(&messages[0]);
547        if sys_tokens < token_budget {
548            result.push(messages[0].clone());
549            token_budget -= sys_tokens;
550        }
551        1
552    } else {
553        0
554    };
555
556    let conversation = &messages[start_idx..];
557
558    // Identify protected messages (skill tool results and their call messages).
559    // Reserve budget for them first so they are never dropped.
560    let mut protected_indices: std::collections::HashSet<usize> = conversation
561        .iter()
562        .enumerate()
563        .filter(|(_, m)| {
564            is_protected_tool_result(conversation, m) || is_protected_tool_call_message(m)
565        })
566        .map(|(i, _)| i)
567        .collect();
568
569    // Anchor the first conversation message (the original task / goal) by
570    // adding it to the protected set, so its budget is reserved before any
571    // non-protected message — like infinity context's head anchor, this is the
572    // eviction we most want to avoid once the window slides. Under extreme
573    // pressure (protected messages alone exceed the budget) the oldest
574    // protected messages, including this one, may still be dropped, matching how
575    // protected tool results are handled.
576    if !conversation.is_empty() {
577        protected_indices.insert(0);
578    }
579
580    let mut protected_budget: usize = 0;
581    for &idx in &protected_indices {
582        protected_budget += estimate_tokens(&conversation[idx]);
583    }
584
585    // If protected messages alone exceed the remaining budget, keep as many
586    // protected messages as possible (newest first) and skip non-protected.
587    if protected_budget > token_budget {
588        let mut protected_with_indices: Vec<(usize, LlmMessage)> = protected_indices
589            .iter()
590            .map(|&idx| (idx, conversation[idx].clone()))
591            .collect();
592        protected_with_indices.sort_by_key(|(i, _)| *i);
593
594        let mut remaining = token_budget;
595        let mut kept: Vec<(usize, LlmMessage)> = Vec::new();
596        for (idx, msg) in protected_with_indices.into_iter().rev() {
597            let t = estimate_tokens(&msg);
598            if t <= remaining {
599                kept.push((idx, msg));
600                remaining -= t;
601            }
602        }
603        kept.sort_by_key(|(i, _)| *i);
604        result.extend(kept.into_iter().map(|(_, m)| m));
605        return result;
606    }
607
608    token_budget -= protected_budget;
609
610    // Walk from newest to oldest, collecting non-protected messages that fit
611    let mut keep_from_end = Vec::new();
612    for (i, msg) in conversation.iter().enumerate().rev() {
613        if protected_indices.contains(&i) {
614            continue; // handled separately
615        }
616        let msg_tokens = estimate_tokens(msg);
617        if msg_tokens <= token_budget {
618            keep_from_end.push((i, msg.clone()));
619            token_budget -= msg_tokens;
620        } else {
621            break;
622        }
623    }
624
625    // Merge protected + kept messages in original order
626    let mut all_kept: Vec<(usize, LlmMessage)> = Vec::new();
627    for &idx in &protected_indices {
628        all_kept.push((idx, conversation[idx].clone()));
629    }
630    all_kept.extend(keep_from_end);
631    all_kept.sort_by_key(|(i, _)| *i);
632
633    result.extend(all_kept.into_iter().map(|(_, m)| m));
634    result
635}
636
637// ============================================================================
638// Session Compaction Metrics
639// ============================================================================
640
641/// Per-session compaction metrics, stored as session metadata.
642#[derive(Debug, Clone, Default, Serialize, Deserialize)]
643pub struct SessionCompactionMetrics {
644    /// Total number of compaction events in this session.
645    pub compaction_count: u32,
646    /// Total messages saved across all compactions.
647    pub total_messages_saved: u64,
648    /// Breakdown by strategy.
649    pub strategy_counts: HashMap<String, u32>,
650    /// Total time spent compacting (ms).
651    pub total_duration_ms: u64,
652}
653
654impl SessionCompactionMetrics {
655    /// Record a completed compaction step.
656    pub fn record(
657        &mut self,
658        strategy_used: &str,
659        messages_before: usize,
660        messages_after: usize,
661        duration_ms: u64,
662    ) {
663        self.compaction_count += 1;
664        self.total_messages_saved += (messages_before.saturating_sub(messages_after)) as u64;
665        self.total_duration_ms += duration_ms;
666
667        for strategy in strategy_used.split('+') {
668            *self
669                .strategy_counts
670                .entry(strategy.to_string())
671                .or_insert(0) += 1;
672        }
673    }
674}
675
676// ============================================================================
677// Hierarchical Memory Tiers
678// ============================================================================
679
680/// Memory tier for a message in the hierarchy.
681#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
682#[serde(rename_all = "snake_case")]
683pub enum MemoryTier {
684    /// Full verbatim text, always in context.
685    Hot,
686    /// Observation-masked (tool outputs replaced with summaries).
687    Warm,
688    /// Summarized to key facts. Queryable via `query_history` if Infinity Context enabled.
689    Cold,
690}
691
692/// Configuration for hierarchical memory tiers.
693#[derive(Debug, Clone, Serialize, Deserialize)]
694pub struct HierarchicalMemoryConfig {
695    /// Number of most recent messages to keep in the hot tier (full verbatim).
696    #[serde(default = "default_hot_messages")]
697    pub hot_messages: usize,
698    /// Number of messages in the warm tier (observation-masked).
699    #[serde(default = "default_warm_messages")]
700    pub warm_messages: usize,
701    // Everything older → cold tier (summarized / queryable)
702}
703
704impl Default for HierarchicalMemoryConfig {
705    fn default() -> Self {
706        Self {
707            hot_messages: default_hot_messages(),
708            warm_messages: default_warm_messages(),
709        }
710    }
711}
712
713fn default_hot_messages() -> usize {
714    20
715}
716
717fn default_warm_messages() -> usize {
718    100
719}
720
721/// Classify messages into memory tiers based on position (newest-first).
722///
723/// Returns a vec of (tier, message) pairs in original order.
724pub fn classify_memory_tiers<'a>(
725    messages: &'a [LlmMessage],
726    config: &HierarchicalMemoryConfig,
727) -> Vec<(MemoryTier, &'a LlmMessage)> {
728    let len = messages.len();
729    messages
730        .iter()
731        .enumerate()
732        .map(|(i, msg)| {
733            let from_end = len - 1 - i;
734            let tier = if from_end < config.hot_messages {
735                MemoryTier::Hot
736            } else if from_end < config.hot_messages + config.warm_messages {
737                MemoryTier::Warm
738            } else {
739                MemoryTier::Cold
740            };
741            (tier, msg)
742        })
743        .collect()
744}
745
746/// Apply hierarchical memory: mask warm-tier tool outputs, summarize cold tier.
747///
748/// Returns the processed messages ready for LLM context. Cold-tier messages are
749/// replaced with a `[CONVERSATION_SUMMARY]` if a summary is provided.
750///
751/// Protected messages (e.g. `activate_skill` results) in cold/warm tiers are
752/// promoted to the output verbatim — they are never dropped or masked.
753pub fn apply_hierarchical_memory(
754    messages: &[LlmMessage],
755    config: &HierarchicalMemoryConfig,
756    masking_config: &ObservationMaskingConfig,
757    cold_summary: Option<&str>,
758) -> Vec<LlmMessage> {
759    let len = messages.len();
760    let hot_start = len.saturating_sub(config.hot_messages);
761    let warm_start = hot_start.saturating_sub(config.warm_messages);
762
763    let mut result = Vec::new();
764
765    // Cold tier: replace with summary if available, but rescue protected messages
766    if warm_start > 0 {
767        // Extract protected messages from cold tier before dropping
768        let cold_msgs = &messages[..warm_start];
769        let protected_cold: Vec<LlmMessage> = cold_msgs
770            .iter()
771            .filter(|m| is_protected_tool_result(cold_msgs, m) || is_protected_tool_call_message(m))
772            .cloned()
773            .collect();
774
775        if let Some(summary) = cold_summary {
776            result.push(build_summary_message(summary));
777        }
778
779        // Re-insert protected messages after the summary
780        result.extend(protected_cold);
781    }
782
783    // Warm tier: apply observation masking to tool outputs.
784    // Use the full message slice for protected-tool detection so that a tool
785    // result in warm tier whose assistant call is in cold tier is still recognized.
786    if warm_start < hot_start {
787        let warm_msgs = &messages[warm_start..hot_start];
788
789        // Pre-identify protected tool_call_ids using the full message list
790        let protected_call_ids: std::collections::HashSet<String> = warm_msgs
791            .iter()
792            .filter(|m| is_protected_tool_result(messages, m))
793            .filter_map(|m| m.tool_call_id.clone())
794            .collect();
795
796        let masked = apply_observation_masking_with_protected(
797            warm_msgs,
798            masking_config,
799            &protected_call_ids,
800        );
801        result.extend(masked.messages);
802    }
803
804    // Hot tier: verbatim
805    if hot_start < len {
806        result.extend_from_slice(&messages[hot_start..]);
807    }
808
809    result
810}
811
812// ============================================================================
813// Protected Tool Detection
814// ============================================================================
815
816use crate::driver_registry::{LlmContentPart, LlmMessage, LlmMessageContent, LlmMessageRole};
817
818/// Tool names whose results must be protected from compaction.
819///
820/// Skill activation results contain durable behavioral instructions that silently
821/// degrade agent behavior when masked, summarized, or trimmed. The agentskills.io
822/// client implementation guide recommends exempting skill content from pruning.
823///
824/// See: specs/compaction.md (Tier 3: tool-aware masking), specs/skills-registry.md
825const PROTECTED_TOOL_NAMES: &[&str] = &["activate_skill"];
826
827/// Check if a tool result message corresponds to a protected tool.
828///
829/// Looks up the tool_call_id in preceding assistant messages to find the tool name.
830/// Returns `true` if the tool name is in `PROTECTED_TOOL_NAMES`.
831fn is_protected_tool_result(messages: &[LlmMessage], tool_msg: &LlmMessage) -> bool {
832    if tool_msg.role != LlmMessageRole::Tool {
833        return false;
834    }
835    let tool_name = find_tool_call_name(messages, tool_msg);
836    PROTECTED_TOOL_NAMES.contains(&tool_name.as_str())
837}
838
839/// Check if an assistant message contains a tool call to a protected tool.
840///
841/// Returns `true` if any tool call in the message targets a protected tool name.
842fn is_protected_tool_call_message(msg: &LlmMessage) -> bool {
843    if msg.role != LlmMessageRole::Assistant {
844        return false;
845    }
846    msg.tool_calls.as_ref().is_some_and(|calls| {
847        calls
848            .iter()
849            .any(|tc| PROTECTED_TOOL_NAMES.contains(&tc.name.as_str()))
850    })
851}
852
853// ============================================================================
854// Observation Masking
855// ============================================================================
856
857/// Result of applying observation masking to a message list.
858#[derive(Debug)]
859pub struct ObservationMaskingResult {
860    /// The masked messages.
861    pub messages: Vec<LlmMessage>,
862    /// Number of tool outputs that were masked.
863    pub masked_count: usize,
864}
865
866/// Apply observation masking: replace old tool outputs with one-line summaries.
867///
868/// Keeps the last `keep_recent_tool_outputs` tool results verbatim and replaces
869/// older ones with compact summaries. Message count is preserved (replace, not remove).
870///
871/// Protected tool results (e.g. `activate_skill`) are never masked — they contain
872/// durable behavioral instructions that must survive compaction.
873pub fn apply_observation_masking(
874    messages: &[LlmMessage],
875    config: &ObservationMaskingConfig,
876) -> ObservationMaskingResult {
877    apply_observation_masking_with_protected(messages, config, &std::collections::HashSet::new())
878}
879
880/// Result of cost-control masking applied before provider serialization.
881#[derive(Debug)]
882pub struct CostControlMaskingResult {
883    /// Messages after stale bulky tool results were replaced by summaries.
884    pub messages: Vec<Message>,
885    /// Number of tool-result messages that were masked.
886    pub masked_count: usize,
887    /// Tool-result payload bytes before masking.
888    pub tool_result_bytes_before: usize,
889    /// Tool-result payload bytes after masking.
890    pub tool_result_bytes_after: usize,
891}
892
893/// Build the bounded model-view messages from lossless stored messages.
894///
895/// Storage keeps full tool results. This helper defines the cheaper prompt
896/// view used for provider serialization when the compaction capability is
897/// configured.
898pub fn build_model_view_messages(
899    stored_messages: &[Message],
900    compaction_config: &CompactionConfig,
901    prior_usage: Option<&TokenUsage>,
902) -> CostControlMaskingResult {
903    apply_cost_control_masking(stored_messages, compaction_config, prior_usage)
904}
905
906/// Build the bounded model-view messages from owned stored messages.
907///
908/// This avoids cloning the message list when masking does not apply.
909pub fn build_model_view_messages_owned(
910    stored_messages: Vec<Message>,
911    compaction_config: &CompactionConfig,
912    prior_usage: Option<&TokenUsage>,
913) -> CostControlMaskingResult {
914    apply_cost_control_masking_owned(stored_messages, compaction_config, prior_usage)
915}
916
917/// Apply cheap, generic cost-control masking to stored messages.
918///
919/// This runs before converting messages to provider-specific LLM messages, so
920/// the llm.generation event can reflect the context actually sent. It is
921/// deliberately separate from observation masking: observation masking is part
922/// of the context-window compaction cascade, while this keeps stale tool output
923/// from being paid for repeatedly even when a large-context model still has
924/// room.
925pub fn apply_cost_control_masking(
926    messages: &[Message],
927    config: &CompactionConfig,
928    prior_usage: Option<&TokenUsage>,
929) -> CostControlMaskingResult {
930    apply_cost_control_masking_owned(messages.to_vec(), config, prior_usage)
931}
932
933fn apply_cost_control_masking_owned(
934    messages: Vec<Message>,
935    config: &CompactionConfig,
936    prior_usage: Option<&TokenUsage>,
937) -> CostControlMaskingResult {
938    let cost_config = &config.cost_control;
939    let tool_indices: Vec<usize> = messages
940        .iter()
941        .enumerate()
942        .filter(|(_, message)| {
943            message.role == MessageRole::ToolResult
944                && !is_protected_message_tool_result(&messages, message)
945        })
946        .map(|(index, _)| index)
947        .collect();
948    let tool_result_bytes_before = tool_indices
949        .iter()
950        .map(|index| message_tool_result_len(&messages[*index]))
951        .sum();
952
953    if !cost_config.enabled
954        || tool_indices.len() <= cost_config.keep_recent_tool_results
955        || !should_apply_cost_control_masking(
956            tool_indices.len(),
957            tool_result_bytes_before,
958            cost_config,
959            prior_usage,
960        )
961    {
962        return CostControlMaskingResult {
963            messages,
964            masked_count: 0,
965            tool_result_bytes_before,
966            tool_result_bytes_after: tool_result_bytes_before,
967        };
968    }
969
970    let keep_recent = cost_config.keep_recent_tool_results;
971    let to_mask_count = tool_indices.len().saturating_sub(keep_recent);
972    let related_recent_reads =
973        related_recent_paginated_read_results(&messages, &tool_indices, keep_recent);
974    let indices_to_mask: HashSet<usize> = tool_indices[..to_mask_count]
975        .iter()
976        .copied()
977        .filter(|index| !related_recent_reads.contains(index))
978        .collect();
979    let tool_names: std::collections::HashMap<usize, String> = indices_to_mask
980        .iter()
981        .map(|index| {
982            (
983                *index,
984                find_message_tool_call_name(&messages, &messages[*index]),
985            )
986        })
987        .collect();
988
989    let mut masked_count = 0;
990    let mut masked_messages = Vec::with_capacity(messages.len());
991    for (index, message) in messages.into_iter().enumerate() {
992        if let Some(tool_name) = tool_names.get(&index) {
993            masked_messages.push(mask_tool_result_message(&message, tool_name));
994            masked_count += 1;
995        } else {
996            masked_messages.push(message);
997        }
998    }
999
1000    let tool_result_bytes_after = masked_messages
1001        .iter()
1002        .filter(|message| message.role == MessageRole::ToolResult)
1003        .map(message_tool_result_len)
1004        .sum();
1005
1006    CostControlMaskingResult {
1007        messages: masked_messages,
1008        masked_count,
1009        tool_result_bytes_before,
1010        tool_result_bytes_after,
1011    }
1012}
1013
1014fn should_apply_cost_control_masking(
1015    tool_result_count: usize,
1016    tool_result_bytes: usize,
1017    config: &CostControlConfig,
1018    prior_usage: Option<&TokenUsage>,
1019) -> bool {
1020    if tool_result_count >= config.mask_after_tool_results {
1021        return true;
1022    }
1023    if tool_result_bytes >= config.max_live_tool_result_bytes {
1024        return true;
1025    }
1026    let Some(usage) = prior_usage else {
1027        return false;
1028    };
1029    let cache_read = usage.cache_read_tokens.unwrap_or(0);
1030    let uncached = usage.input_tokens.saturating_sub(cache_read);
1031    if uncached >= config.max_uncached_input_tokens {
1032        return true;
1033    }
1034    usage.input_tokens > 0
1035        && (cache_read as f32 / usage.input_tokens as f32) < config.min_cache_read_ratio
1036}
1037
1038fn is_protected_message_tool_result(messages: &[Message], tool_msg: &Message) -> bool {
1039    if tool_msg.role != MessageRole::ToolResult {
1040        return false;
1041    }
1042    let tool_name = find_message_tool_call_name(messages, tool_msg);
1043    PROTECTED_TOOL_NAMES.contains(&tool_name.as_str())
1044}
1045
1046#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1047struct ReadResultKey {
1048    tool_name: String,
1049    path: String,
1050    content_hash: String,
1051}
1052
1053fn related_recent_paginated_read_results(
1054    messages: &[Message],
1055    tool_indices: &[usize],
1056    keep_recent: usize,
1057) -> HashSet<usize> {
1058    if keep_recent == 0 || tool_indices.len() <= keep_recent {
1059        return HashSet::new();
1060    }
1061
1062    let keep_start = tool_indices.len().saturating_sub(keep_recent);
1063    let recent_keys: HashSet<ReadResultKey> = tool_indices[keep_start..]
1064        .iter()
1065        .filter_map(|index| paginated_read_result_key(messages, &messages[*index]))
1066        .collect();
1067    if recent_keys.is_empty() {
1068        return HashSet::new();
1069    }
1070
1071    let mut protected = HashSet::new();
1072    for index in tool_indices[..keep_start].iter().rev() {
1073        let Some(key) = paginated_read_result_key(messages, &messages[*index]) else {
1074            break;
1075        };
1076        if !recent_keys.contains(&key) {
1077            break;
1078        }
1079        protected.insert(*index);
1080        if protected.len() >= MAX_RELATED_RECENT_READ_RESULTS {
1081            break;
1082        }
1083    }
1084    protected
1085}
1086
1087fn paginated_read_result_key(messages: &[Message], tool_msg: &Message) -> Option<ReadResultKey> {
1088    let tool_name = find_message_tool_call_name(messages, tool_msg);
1089    if !is_read_file_tool_name(&tool_name) {
1090        return None;
1091    }
1092    let value = tool_msg.tool_result_content()?.result.as_ref()?;
1093    let object = value.as_object()?;
1094    object.get("lines_shown").and_then(|v| v.as_object())?;
1095    Some(ReadResultKey {
1096        tool_name,
1097        path: object.get("path")?.as_str()?.to_string(),
1098        content_hash: object.get("content_hash")?.as_str()?.to_string(),
1099    })
1100}
1101
1102fn is_read_file_tool_name(tool_name: &str) -> bool {
1103    matches!(
1104        tool_name,
1105        "read_file"
1106            | "daytona_read_file"
1107            | "sandbox_read_file"
1108            | "e2b_read_file"
1109            | "docker_read_file"
1110            | "deno_read_file"
1111            | "sprites_read_file"
1112            | "read_github_file"
1113    )
1114}
1115
1116fn find_message_tool_call_name(messages: &[Message], tool_msg: &Message) -> String {
1117    let Some(call_id) = tool_msg.tool_call_id() else {
1118        return "unknown_tool".to_string();
1119    };
1120
1121    for msg in messages.iter().rev() {
1122        if msg.role != MessageRole::Agent {
1123            continue;
1124        }
1125        for tool_call in msg.tool_calls() {
1126            if tool_call.id == call_id {
1127                return tool_call.name.clone();
1128            }
1129        }
1130    }
1131
1132    "unknown_tool".to_string()
1133}
1134
1135fn message_tool_result_len(message: &Message) -> usize {
1136    let Some(result) = message.tool_result_content() else {
1137        return 0;
1138    };
1139    result
1140        .result
1141        .as_ref()
1142        .map(estimate_json_value_len)
1143        .unwrap_or(0)
1144        + result.error.as_ref().map_or(0, String::len)
1145}
1146
1147fn mask_tool_result_message(message: &Message, tool_name: &str) -> Message {
1148    let Some(result) = message.tool_result_content() else {
1149        return message.clone();
1150    };
1151    let summary = summarize_tool_result(tool_name, result.result.as_ref(), result.error.as_ref());
1152    let was_error = result.error.is_some();
1153    let mut masked = message.clone();
1154    for part in &mut masked.content {
1155        if let ContentPart::ToolResult(tool_result) = part {
1156            if was_error {
1157                tool_result.result = None;
1158                tool_result.error = Some(summary);
1159            } else {
1160                tool_result.result = Some(serde_json::json!({
1161                    "masked": true,
1162                    "summary": summary,
1163                }));
1164                tool_result.error = None;
1165            }
1166            break;
1167        }
1168    }
1169    masked
1170}
1171
1172fn summarize_tool_result(
1173    tool_name: &str,
1174    result: Option<&serde_json::Value>,
1175    error: Option<&String>,
1176) -> String {
1177    if let Some(error) = error {
1178        return format!("[{tool_name} error: {}]", truncate_inline(error, 160));
1179    }
1180    let Some(value) = result else {
1181        return format!("[{tool_name} returned no result]");
1182    };
1183    let Some(object) = value.as_object() else {
1184        return format!(
1185            "[{tool_name} -> {}, {} bytes]",
1186            value_kind(value),
1187            estimate_json_value_len(value)
1188        );
1189    };
1190
1191    match tool_name {
1192        tool_name if is_read_file_tool_name(tool_name) => {
1193            summarize_read_file_result(tool_name, object, value)
1194        }
1195        "bash" | "daytona_exec" | "sandbox_exec" | "e2b_exec" | "docker_exec" | "deno_exec" => {
1196            summarize_exec_result(tool_name, object, value)
1197        }
1198        "list_directory" => summarize_list_directory_result(tool_name, object, value),
1199        "grep_files" => summarize_grep_files_result(tool_name, object, value),
1200        _ => summarize_generic_tool_result(tool_name, object, value),
1201    }
1202}
1203
1204fn summarize_read_file_result(
1205    tool_name: &str,
1206    object: &serde_json::Map<String, serde_json::Value>,
1207    value: &serde_json::Value,
1208) -> String {
1209    let path = object
1210        .get("path")
1211        .and_then(|v| v.as_str())
1212        .unwrap_or("(unknown path)");
1213    let lines = object.get("lines_shown").and_then(|v| v.as_object());
1214    let line_range = lines
1215        .and_then(|lines| {
1216            let start = lines.get("start")?.as_u64()?;
1217            let end = lines.get("end")?.as_u64()?;
1218            Some(format!(" lines {start}-{end}"))
1219        })
1220        .unwrap_or_default();
1221    let total_lines = object
1222        .get("total_lines")
1223        .and_then(|v| v.as_u64())
1224        .map(|lines| format!(", total_lines={lines}"))
1225        .unwrap_or_default();
1226    let next_offset = object
1227        .get("truncation")
1228        .and_then(|v| v.as_object())
1229        .and_then(|truncation| truncation.get("next_offset"))
1230        .and_then(|v| v.as_u64())
1231        .map(|offset| format!(", next_offset={offset}"))
1232        .unwrap_or_default();
1233    let hash = object
1234        .get("content_hash")
1235        .and_then(|v| v.as_str())
1236        .map(|hash| format!(", hash={hash}"))
1237        .unwrap_or_default();
1238    let truncated = object
1239        .get("truncated")
1240        .and_then(|v| v.as_bool())
1241        .unwrap_or(false);
1242
1243    format!(
1244        "[{tool_name} {path}{line_range}, {} bytes, truncated={truncated}{total_lines}{next_offset}{hash}]",
1245        estimate_json_value_len(value)
1246    )
1247}
1248
1249fn summarize_exec_result(
1250    tool_name: &str,
1251    object: &serde_json::Map<String, serde_json::Value>,
1252    value: &serde_json::Value,
1253) -> String {
1254    let exit = object
1255        .get("exit_code")
1256        .and_then(|v| v.as_i64())
1257        .map(|code| format!(" exit={code}"))
1258        .unwrap_or_default();
1259    let stdout_len = object
1260        .get("stdout")
1261        .and_then(|v| v.as_str())
1262        .map(|stdout| stdout.len())
1263        .unwrap_or(0);
1264    let stderr_len = object
1265        .get("stderr")
1266        .and_then(|v| v.as_str())
1267        .map(|stderr| stderr.len())
1268        .unwrap_or(0);
1269    let full_output = object
1270        .get("full_output")
1271        .and_then(|v| v.as_str())
1272        .map(|path| format!(", full_output={path}"))
1273        .unwrap_or_default();
1274    let total_lines = object
1275        .get("total_lines")
1276        .and_then(|v| v.as_u64())
1277        .map(|lines| format!(", total_lines={lines}"))
1278        .unwrap_or_default();
1279
1280    format!(
1281        "[{tool_name}{exit}, stdout={} bytes, stderr={} bytes, result={} bytes{full_output}{total_lines}]",
1282        stdout_len,
1283        stderr_len,
1284        estimate_json_value_len(value)
1285    )
1286}
1287
1288fn summarize_list_directory_result(
1289    tool_name: &str,
1290    object: &serde_json::Map<String, serde_json::Value>,
1291    value: &serde_json::Value,
1292) -> String {
1293    let path = object
1294        .get("path")
1295        .and_then(|v| v.as_str())
1296        .unwrap_or("(unknown path)");
1297    let count = object
1298        .get("count")
1299        .and_then(|v| v.as_u64())
1300        .or_else(|| {
1301            object
1302                .get("entries")
1303                .and_then(|v| v.as_array())
1304                .map(|v| v.len() as u64)
1305        })
1306        .unwrap_or(0);
1307    format!(
1308        "[{tool_name} {path}, {count} entries, {} bytes]",
1309        estimate_json_value_len(value)
1310    )
1311}
1312
1313fn summarize_grep_files_result(
1314    tool_name: &str,
1315    object: &serde_json::Map<String, serde_json::Value>,
1316    value: &serde_json::Value,
1317) -> String {
1318    let pattern = object
1319        .get("pattern")
1320        .and_then(|v| v.as_str())
1321        .map(|pattern| format!(" pattern={:?}", truncate_inline(pattern, 80)))
1322        .unwrap_or_default();
1323    let match_count = object
1324        .get("match_count")
1325        .and_then(|v| v.as_u64())
1326        .unwrap_or(0);
1327    format!(
1328        "[{tool_name}{pattern}, matches={match_count}, {} bytes]",
1329        estimate_json_value_len(value)
1330    )
1331}
1332
1333fn summarize_generic_tool_result(
1334    tool_name: &str,
1335    object: &serde_json::Map<String, serde_json::Value>,
1336    value: &serde_json::Value,
1337) -> String {
1338    let keys = object.keys().take(5).cloned().collect::<Vec<_>>().join(",");
1339    format!(
1340        "[{tool_name} result, {} bytes, keys={keys}]",
1341        estimate_json_value_len(value)
1342    )
1343}
1344
1345fn value_kind(value: &serde_json::Value) -> &'static str {
1346    match value {
1347        serde_json::Value::Null => "null",
1348        serde_json::Value::Bool(_) => "bool",
1349        serde_json::Value::Number(_) => "number",
1350        serde_json::Value::String(_) => "string",
1351        serde_json::Value::Array(_) => "array",
1352        serde_json::Value::Object(_) => "object",
1353    }
1354}
1355
1356fn estimate_json_value_len(value: &serde_json::Value) -> usize {
1357    let mut writer = CountingWriter::default();
1358    serde_json::to_writer(&mut writer, value)
1359        .map(|_| writer.bytes)
1360        .unwrap_or(0)
1361}
1362
1363#[derive(Default)]
1364struct CountingWriter {
1365    bytes: usize,
1366}
1367
1368impl std::io::Write for CountingWriter {
1369    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1370        self.bytes += buf.len();
1371        Ok(buf.len())
1372    }
1373
1374    fn flush(&mut self) -> std::io::Result<()> {
1375        Ok(())
1376    }
1377}
1378
1379fn truncate_inline(text: &str, max_chars: usize) -> String {
1380    if text.chars().count() <= max_chars {
1381        return text.to_string();
1382    }
1383    let mut truncated = text.chars().take(max_chars).collect::<String>();
1384    truncated.push_str("...");
1385    truncated
1386}
1387
1388/// Like `apply_observation_masking`, but accepts additional pre-identified protected
1389/// tool_call_ids. This is needed when the message slice doesn't contain the
1390/// assistant tool-call message (e.g. warm tier where the call is in cold tier).
1391fn apply_observation_masking_with_protected(
1392    messages: &[LlmMessage],
1393    config: &ObservationMaskingConfig,
1394    extra_protected_call_ids: &std::collections::HashSet<String>,
1395) -> ObservationMaskingResult {
1396    // Separate protected vs maskable tool result indices
1397    let tool_indices: Vec<usize> = messages
1398        .iter()
1399        .enumerate()
1400        .filter(|(_, m)| {
1401            m.role == LlmMessageRole::Tool
1402                && !is_protected_tool_result(messages, m)
1403                && !m
1404                    .tool_call_id
1405                    .as_ref()
1406                    .is_some_and(|id| extra_protected_call_ids.contains(id))
1407        })
1408        .map(|(i, _)| i)
1409        .collect();
1410
1411    if tool_indices.len() <= config.keep_recent_tool_outputs {
1412        return ObservationMaskingResult {
1413            messages: messages.to_vec(),
1414            masked_count: 0,
1415        };
1416    }
1417
1418    let to_mask_count = tool_indices.len() - config.keep_recent_tool_outputs;
1419    let indices_to_mask: std::collections::HashSet<usize> =
1420        tool_indices[..to_mask_count].iter().copied().collect();
1421
1422    let mut result = Vec::with_capacity(messages.len());
1423    let mut masked_count = 0;
1424
1425    for (i, msg) in messages.iter().enumerate() {
1426        if indices_to_mask.contains(&i) {
1427            let tool_name = find_tool_call_name(messages, msg);
1428            let summary = match config.summary_format {
1429                MaskingSummaryFormat::OneLine => format_one_line_summary(&tool_name, &msg.content),
1430                MaskingSummaryFormat::HeadTail => format_head_tail_summary(&msg.content),
1431            };
1432            result.push(LlmMessage {
1433                role: LlmMessageRole::Tool,
1434                content: LlmMessageContent::Text(summary),
1435                tool_calls: msg.tool_calls.clone(),
1436                tool_call_id: msg.tool_call_id.clone(),
1437                phase: msg.phase,
1438                thinking: None,
1439                thinking_signature: None,
1440            });
1441            masked_count += 1;
1442        } else {
1443            result.push(msg.clone());
1444        }
1445    }
1446
1447    ObservationMaskingResult {
1448        messages: result,
1449        masked_count,
1450    }
1451}
1452
1453/// Find the tool name from a preceding assistant message that issued the tool call.
1454fn find_tool_call_name(messages: &[LlmMessage], tool_msg: &LlmMessage) -> String {
1455    let Some(ref call_id) = tool_msg.tool_call_id else {
1456        return "unknown_tool".to_string();
1457    };
1458
1459    for msg in messages.iter().rev() {
1460        if msg.role == LlmMessageRole::Assistant
1461            && let Some(ref tool_calls) = msg.tool_calls
1462        {
1463            for tc in tool_calls {
1464                if tc.id == *call_id {
1465                    return tc.name.clone();
1466                }
1467            }
1468        }
1469    }
1470
1471    "unknown_tool".to_string()
1472}
1473
1474fn extract_text(content: &LlmMessageContent) -> String {
1475    match content {
1476        LlmMessageContent::Text(t) => t.clone(),
1477        LlmMessageContent::Parts(parts) => parts
1478            .iter()
1479            .filter_map(|p| {
1480                if let LlmContentPart::Text { text } = p {
1481                    Some(text.clone())
1482                } else {
1483                    None
1484                }
1485            })
1486            .collect::<Vec<_>>()
1487            .join(" "),
1488    }
1489}
1490
1491fn format_one_line_summary(tool_name: &str, content: &LlmMessageContent) -> String {
1492    let text = extract_text(content);
1493    let line_count = text.lines().count();
1494    let byte_len = text.len();
1495
1496    if byte_len <= 100 {
1497        format!("[{tool_name} → {text}]")
1498    } else {
1499        format!("[{tool_name} → {line_count} lines, {byte_len} bytes]")
1500    }
1501}
1502
1503fn format_head_tail_summary(content: &LlmMessageContent) -> String {
1504    let text = extract_text(content);
1505    let lines: Vec<&str> = text.lines().collect();
1506
1507    if lines.len() <= 6 {
1508        return text;
1509    }
1510
1511    let head: Vec<&str> = lines[..3].to_vec();
1512    let tail: Vec<&str> = lines[lines.len() - 3..].to_vec();
1513
1514    format!(
1515        "{}\n... ({} lines omitted) ...\n{}",
1516        head.join("\n"),
1517        lines.len() - 6,
1518        tail.join("\n")
1519    )
1520}
1521
1522// ============================================================================
1523// Summarization
1524// ============================================================================
1525
1526/// Build the summarization system prompt.
1527pub fn build_summarization_prompt(config: &SummarizationConfig) -> String {
1528    let preserve_items = if config.preserve.is_empty() {
1529        default_preserve()
1530    } else {
1531        config.preserve.clone()
1532    };
1533
1534    let preserve_list = preserve_items
1535        .iter()
1536        .map(|item| format!("- {item}"))
1537        .collect::<Vec<_>>()
1538        .join("\n");
1539
1540    let custom_instructions = config
1541        .instructions
1542        .as_deref()
1543        .map(|instr| format!("\n- {instr}"))
1544        .unwrap_or_default();
1545
1546    format!(
1547        r#"<task>
1548Summarize the following conversation history. The summary replaces these
1549messages in the agent's context window — it must contain everything the
1550agent needs to continue working.
1551</task>
1552
1553<preserve>
1554{preserve_list}{custom_instructions}
1555</preserve>
1556
1557<format>
1558Produce a structured summary. Use sections. Be concise but complete.
1559Do not include tool output verbatim — reference files by path.
1560IMPORTANT: Any activate_skill tool results contain durable skill instructions.
1561Include them verbatim in a dedicated "Active Skills" section — do not summarize
1562or paraphrase skill instructions.
1563</format>"#
1564    )
1565}
1566
1567/// Format messages into a text block for the summarization prompt.
1568pub fn format_messages_for_summarization(messages: &[LlmMessage]) -> String {
1569    let mut parts = Vec::new();
1570    for msg in messages {
1571        let role = match msg.role {
1572            LlmMessageRole::System => "system",
1573            LlmMessageRole::User => "user",
1574            LlmMessageRole::Assistant => "assistant",
1575            LlmMessageRole::Tool => "tool",
1576        };
1577
1578        let content = extract_text(&msg.content);
1579
1580        // Protected tool results (skill instructions) are never truncated —
1581        // the summarizer must see the full text to reproduce them verbatim.
1582        let is_protected = is_protected_tool_result(messages, msg);
1583
1584        // Truncate very long messages to avoid blowing up the summarization prompt
1585        let truncated = if !is_protected && content.len() > 2000 {
1586            let safe_prefix = truncate_at_char_boundary(&content, 2000);
1587            format!(
1588                "{}... [truncated, {} chars total]",
1589                safe_prefix,
1590                content.len()
1591            )
1592        } else {
1593            content
1594        };
1595
1596        parts.push(format!("[{role}]: {truncated}"));
1597    }
1598    parts.join("\n\n")
1599}
1600
1601fn truncate_at_char_boundary(content: &str, max_bytes: usize) -> &str {
1602    if content.len() <= max_bytes {
1603        return content;
1604    }
1605
1606    if content.is_char_boundary(max_bytes) {
1607        return &content[..max_bytes];
1608    }
1609
1610    let mut end = max_bytes;
1611    while end > 0 && !content.is_char_boundary(end) {
1612        end -= 1;
1613    }
1614
1615    &content[..end]
1616}
1617
1618/// Build a summary system message that replaces compacted messages in context.
1619pub fn build_summary_message(summary_text: &str) -> LlmMessage {
1620    LlmMessage {
1621        role: LlmMessageRole::System,
1622        content: LlmMessageContent::Text(format!(
1623            "[CONVERSATION_SUMMARY]\n{summary_text}\n[/CONVERSATION_SUMMARY]"
1624        )),
1625        tool_calls: None,
1626        tool_call_id: None,
1627        phase: None,
1628        thinking: None,
1629        thinking_signature: None,
1630    }
1631}
1632
1633// ============================================================================
1634// Compaction Step Tracking
1635// ============================================================================
1636
1637/// Record of a single compaction step in a cascade.
1638#[derive(Debug, Clone, Serialize, Deserialize)]
1639pub struct CompactionStep {
1640    /// Strategy used in this step.
1641    pub strategy: String,
1642    /// Message count after this step.
1643    pub messages_after: usize,
1644    /// Duration of this step in milliseconds.
1645    pub duration_ms: u64,
1646}
1647
1648// ============================================================================
1649// Tests
1650// ============================================================================
1651
1652#[cfg(test)]
1653mod tests {
1654    use super::*;
1655    use crate::tool_types::ToolCall;
1656    use serde_json::json;
1657
1658    fn make_user_msg(text: &str) -> LlmMessage {
1659        LlmMessage {
1660            role: LlmMessageRole::User,
1661            content: LlmMessageContent::Text(text.to_string()),
1662            tool_calls: None,
1663            tool_call_id: None,
1664            phase: None,
1665            thinking: None,
1666            thinking_signature: None,
1667        }
1668    }
1669
1670    fn make_assistant_msg(text: &str) -> LlmMessage {
1671        LlmMessage {
1672            role: LlmMessageRole::Assistant,
1673            content: LlmMessageContent::Text(text.to_string()),
1674            tool_calls: None,
1675            tool_call_id: None,
1676            phase: None,
1677            thinking: None,
1678            thinking_signature: None,
1679        }
1680    }
1681
1682    fn make_assistant_with_tool_call(call_id: &str, tool_name: &str) -> LlmMessage {
1683        LlmMessage {
1684            role: LlmMessageRole::Assistant,
1685            content: LlmMessageContent::Text(String::new()),
1686            tool_calls: Some(vec![ToolCall {
1687                id: call_id.to_string(),
1688                name: tool_name.to_string(),
1689                arguments: json!({"path": "src/main.rs"}),
1690            }]),
1691            tool_call_id: None,
1692            phase: None,
1693            thinking: None,
1694            thinking_signature: None,
1695        }
1696    }
1697
1698    fn make_tool_result(call_id: &str, output: &str) -> LlmMessage {
1699        LlmMessage {
1700            role: LlmMessageRole::Tool,
1701            content: LlmMessageContent::Text(output.to_string()),
1702            tool_calls: None,
1703            tool_call_id: Some(call_id.to_string()),
1704            phase: None,
1705            thinking: None,
1706            thinking_signature: None,
1707        }
1708    }
1709
1710    // ====================================================================
1711    // CompactionConfig tests
1712    // ====================================================================
1713
1714    #[test]
1715    fn test_capability_metadata() {
1716        let cap = CompactionCapability;
1717        assert_eq!(cap.id(), COMPACTION_CAPABILITY_ID);
1718        assert_eq!(cap.name(), "Compaction");
1719        assert_eq!(cap.status(), CapabilityStatus::Available);
1720        assert_eq!(cap.category(), Some("Optimization"));
1721        assert!(cap.tools().is_empty());
1722        assert!(cap.message_filter_provider().is_some());
1723    }
1724
1725    #[test]
1726    fn test_config_schema_and_validate_config() {
1727        let cap = CompactionCapability;
1728
1729        let schema = cap.config_schema().expect("config schema");
1730        assert_eq!(schema["type"], "object");
1731        // Only the simple knobs are exposed; advanced nested objects stay out.
1732        assert!(schema["properties"]["strategy"].is_object());
1733        assert!(schema["properties"]["proactive"].is_object());
1734        assert!(schema["properties"]["budget_percent"].is_object());
1735        assert!(schema["properties"].get("observation_masking").is_none());
1736        assert!(schema["properties"].get("cost_control").is_none());
1737
1738        // Null and valid configs are accepted.
1739        assert!(cap.validate_config(&serde_json::Value::Null).is_ok());
1740        assert!(
1741            cap.validate_config(&json!({
1742                "strategy": "native",
1743                "proactive": false,
1744                "budget_percent": 0.9
1745            }))
1746            .is_ok()
1747        );
1748        // Advanced nested fields are tolerated even though not in the schema.
1749        assert!(
1750            cap.validate_config(&json!({
1751                "strategy": "observation_masking",
1752                "observation_masking": { "keep_recent_tool_outputs": 4 },
1753                "cost_control": { "enabled": false }
1754            }))
1755            .is_ok()
1756        );
1757
1758        // Invalid values are rejected.
1759        assert!(cap.validate_config(&json!({"strategy": "bogus"})).is_err());
1760        let err = cap
1761            .validate_config(&json!({"budget_percent": 5.0}))
1762            .unwrap_err();
1763        assert!(err.contains("budget_percent"));
1764    }
1765
1766    #[test]
1767    fn test_localizations_resolve_uk() {
1768        let cap = CompactionCapability;
1769        assert_eq!(cap.localized_name(Some("uk-UA")), "Ущільнення контексту");
1770        assert!(cap.describe_schema(None).is_some());
1771    }
1772
1773    #[test]
1774    fn test_default_config() {
1775        let config = CompactionConfig::default();
1776        assert_eq!(config.strategy, CompactionStrategy::Auto);
1777        assert!(config.proactive);
1778        assert!((config.budget_percent - 0.85).abs() < f32::EPSILON);
1779        assert_eq!(config.observation_masking.keep_recent_tool_outputs, 2);
1780        assert_eq!(
1781            config.observation_masking.summary_format,
1782            MaskingSummaryFormat::OneLine
1783        );
1784        assert!(config.summarization.model.is_none());
1785        assert_eq!(config.summarization.preserve.len(), 5);
1786        assert!(config.summarization.instructions.is_none());
1787        assert!(config.cost_control.enabled);
1788        assert_eq!(config.cost_control.keep_recent_tool_results, 2);
1789    }
1790
1791    #[test]
1792    fn test_config_from_empty_json() {
1793        let config = CompactionConfig::from_json(&json!({}));
1794        assert_eq!(config.strategy, CompactionStrategy::Auto);
1795        assert!(config.proactive);
1796    }
1797
1798    #[test]
1799    fn test_config_native_only() {
1800        let config = CompactionConfig::from_json(&json!({"strategy": "native"}));
1801        assert_eq!(config.strategy, CompactionStrategy::Native);
1802        assert!(config.proactive);
1803    }
1804
1805    #[test]
1806    fn test_config_observation_masking_with_custom_settings() {
1807        let config = CompactionConfig::from_json(&json!({
1808            "strategy": "observation_masking",
1809            "proactive": false,
1810            "observation_masking": {
1811                "keep_recent_tool_outputs": 10,
1812                "summary_format": "head_tail"
1813            }
1814        }));
1815        assert_eq!(config.strategy, CompactionStrategy::ObservationMasking);
1816        assert!(!config.proactive);
1817        assert_eq!(config.observation_masking.keep_recent_tool_outputs, 10);
1818        assert_eq!(
1819            config.observation_masking.summary_format,
1820            MaskingSummaryFormat::HeadTail
1821        );
1822    }
1823
1824    #[test]
1825    fn test_config_cost_control_with_custom_settings() {
1826        let config = CompactionConfig::from_json(&json!({
1827            "cost_control": {
1828                "enabled": true,
1829                "keep_recent_tool_results": 1,
1830                "mask_after_tool_results": 2,
1831                "max_live_tool_result_bytes": 4096,
1832                "max_uncached_input_tokens": 50000,
1833                "min_cache_read_ratio": 0.5
1834            }
1835        }));
1836
1837        assert!(config.cost_control.enabled);
1838        assert_eq!(config.cost_control.keep_recent_tool_results, 1);
1839        assert_eq!(config.cost_control.mask_after_tool_results, 2);
1840        assert_eq!(config.cost_control.max_live_tool_result_bytes, 4096);
1841        assert_eq!(config.cost_control.max_uncached_input_tokens, 50000);
1842        assert!((config.cost_control.min_cache_read_ratio - 0.5).abs() < f32::EPSILON);
1843    }
1844
1845    #[test]
1846    fn test_config_summarization_with_custom_model() {
1847        let config = CompactionConfig::from_json(&json!({
1848            "strategy": "summarization",
1849            "summarization": {
1850                "model": "claude-haiku-4-5-20251001",
1851                "instructions": "Focus on API decisions",
1852                "preserve": ["decisions", "errors"]
1853            }
1854        }));
1855        assert_eq!(config.strategy, CompactionStrategy::Summarization);
1856        assert_eq!(
1857            config.summarization.model.as_deref(),
1858            Some("claude-haiku-4-5-20251001")
1859        );
1860        assert_eq!(
1861            config.summarization.instructions.as_deref(),
1862            Some("Focus on API decisions")
1863        );
1864        assert_eq!(config.summarization.preserve.len(), 2);
1865    }
1866
1867    fn make_message_tool_turn(
1868        call_id: &str,
1869        tool_name: &str,
1870        result: serde_json::Value,
1871    ) -> Vec<Message> {
1872        vec![
1873            Message::assistant_with_tools(
1874                "",
1875                vec![ToolCall {
1876                    id: call_id.to_string(),
1877                    name: tool_name.to_string(),
1878                    arguments: json!({"path": "/workspace/src/lib.rs"}),
1879                }],
1880            ),
1881            Message::tool_result(call_id, Some(result), None),
1882        ]
1883    }
1884
1885    #[test]
1886    fn test_cost_control_masks_old_read_file_results() {
1887        let mut messages = vec![Message::user("inspect files")];
1888        for index in 0..5 {
1889            messages.extend(make_message_tool_turn(
1890                &format!("call_{index}"),
1891                "read_file",
1892                json!({
1893                    "path": "/workspace/src/lib.rs",
1894                    "content": format!("{}{}", "line\n".repeat(400), index),
1895                    "total_lines": 900,
1896                    "lines_shown": {"start": 1, "end": 400},
1897                    "truncated": true,
1898                    "content_hash": format!("sha256:{index}"),
1899                    "truncation": {"truncated": true, "next_offset": 400, "reason": "line_cap"}
1900                }),
1901            ));
1902        }
1903
1904        let config = CompactionConfig::from_json(&json!({
1905            "cost_control": {
1906                "keep_recent_tool_results": 2,
1907                "mask_after_tool_results": 4
1908            }
1909        }));
1910        let result = apply_cost_control_masking(&messages, &config, None);
1911
1912        assert_eq!(result.masked_count, 3);
1913        assert!(result.tool_result_bytes_after < result.tool_result_bytes_before);
1914
1915        let first_tool = result.messages[2].tool_result_content().unwrap();
1916        let masked = first_tool.result.as_ref().unwrap();
1917        assert_eq!(masked["masked"], true);
1918        let summary = masked["summary"].as_str().unwrap();
1919        assert!(summary.contains("read_file"));
1920        assert!(summary.contains("/workspace/src/lib.rs"));
1921        assert!(summary.contains("lines 1-400"));
1922        assert!(summary.contains("next_offset=400"));
1923        assert!(!summary.contains("line\nline"));
1924
1925        let last_tool = result
1926            .messages
1927            .last()
1928            .unwrap()
1929            .tool_result_content()
1930            .unwrap();
1931        assert!(last_tool.result.as_ref().unwrap().get("content").is_some());
1932    }
1933
1934    #[test]
1935    fn test_cost_control_keeps_recent_paginated_read_group() {
1936        let mut messages = vec![Message::user("inspect saved output")];
1937        messages.extend(make_message_tool_turn(
1938            "call_bash",
1939            "bash",
1940            json!({
1941                "stdout": "old command output",
1942                "stderr": "",
1943                "exit_code": 0,
1944                "success": true
1945            }),
1946        ));
1947        messages.extend(make_message_tool_turn(
1948            "call_read_first",
1949            "read_file",
1950            json!({
1951                "path": "/workspace/outputs/call_123.stdout",
1952                "content": "first page\n".repeat(200),
1953                "total_lines": 400,
1954                "lines_shown": {"start": 1, "end": 200},
1955                "truncated": true,
1956                "content_hash": "sha256:same-output",
1957                "truncation": {"truncated": true, "next_offset": 200, "reason": "line_cap"}
1958            }),
1959        ));
1960        messages.extend(make_message_tool_turn(
1961            "call_read_second",
1962            "read_file",
1963            json!({
1964                "path": "/workspace/outputs/call_123.stdout",
1965                "content": "second page\n".repeat(200),
1966                "total_lines": 400,
1967                "lines_shown": {"start": 201, "end": 400},
1968                "truncated": false,
1969                "content_hash": "sha256:same-output"
1970            }),
1971        ));
1972
1973        let config = CompactionConfig::from_json(&json!({
1974            "cost_control": {
1975                "keep_recent_tool_results": 1,
1976                "mask_after_tool_results": 2
1977            }
1978        }));
1979        let result = build_model_view_messages(&messages, &config, None);
1980
1981        assert_eq!(result.masked_count, 1);
1982        let bash_result = result.messages[2].tool_result_content().unwrap();
1983        assert_eq!(bash_result.result.as_ref().unwrap()["masked"], true);
1984        let first_page = result.messages[4].tool_result_content().unwrap();
1985        assert!(first_page.result.as_ref().unwrap().get("content").is_some());
1986        let second_page = result.messages[6].tool_result_content().unwrap();
1987        assert!(
1988            second_page
1989                .result
1990                .as_ref()
1991                .unwrap()
1992                .get("content")
1993                .is_some()
1994        );
1995    }
1996
1997    #[test]
1998    fn test_model_view_masks_with_compaction_config() {
1999        let mut messages = vec![Message::user("inspect files repeatedly")];
2000        for index in 0..9 {
2001            messages.extend(make_message_tool_turn(
2002                &format!("call_{index}"),
2003                "read_file",
2004                json!({
2005                    "path": "/workspace/session_019e4c9dd1b17021af70ad3227361b16.jsonl",
2006                    "content": format!("{}{}", "large transcript line\n".repeat(1000), index),
2007                    "total_lines": 1000,
2008                    "lines_shown": {"start": 1, "end": 1000},
2009                    "truncated": false,
2010                    "content_hash": format!("sha256:{index}")
2011                }),
2012            ));
2013        }
2014
2015        let config = CompactionConfig::default();
2016        let result = build_model_view_messages(&messages, &config, None);
2017
2018        assert_eq!(result.masked_count, 7);
2019        assert!(result.tool_result_bytes_after < result.tool_result_bytes_before / 4);
2020        let first_tool = result.messages[2].tool_result_content().unwrap();
2021        let masked = first_tool.result.as_ref().unwrap();
2022        assert_eq!(masked["masked"], true);
2023        assert!(masked["summary"].as_str().unwrap().contains("read_file"));
2024        let last_tool = result
2025            .messages
2026            .last()
2027            .unwrap()
2028            .tool_result_content()
2029            .unwrap();
2030        assert!(last_tool.result.as_ref().unwrap().get("content").is_some());
2031    }
2032
2033    #[test]
2034    fn test_compaction_capability_contributes_model_view_provider() {
2035        let mut messages = vec![Message::user("inspect files repeatedly")];
2036        for index in 0..9 {
2037            messages.extend(make_message_tool_turn(
2038                &format!("call_{index}"),
2039                "read_file",
2040                json!({
2041                    "path": "/workspace/src/lib.rs",
2042                    "content": format!("{}{}", "large file line\n".repeat(1000), index),
2043                    "total_lines": 1000,
2044                    "lines_shown": {"start": 1, "end": 1000},
2045                    "truncated": false
2046                }),
2047            ));
2048        }
2049
2050        let capability = CompactionCapability;
2051        let provider = capability.model_view_provider().unwrap();
2052        let context = ModelViewContext {
2053            session_id: crate::typed_id::SessionId::new(),
2054            prior_usage: None,
2055        };
2056        let result = provider.apply_model_view(messages, &json!({}), &context);
2057
2058        let first_tool = result[2].tool_result_content().unwrap();
2059        assert_eq!(first_tool.result.as_ref().unwrap()["masked"], true);
2060        let last_tool = result.last().unwrap().tool_result_content().unwrap();
2061        assert!(last_tool.result.as_ref().unwrap().get("content").is_some());
2062    }
2063
2064    #[test]
2065    fn test_model_view_respects_disabled_cost_control_config() {
2066        let mut messages = vec![Message::user("inspect files repeatedly")];
2067        for index in 0..5 {
2068            messages.extend(make_message_tool_turn(
2069                &format!("call_{index}"),
2070                "read_file",
2071                json!({
2072                    "path": "/workspace/src/lib.rs",
2073                    "content": "line\n".repeat(400),
2074                    "total_lines": 400,
2075                    "lines_shown": {"start": 1, "end": 400},
2076                    "truncated": false
2077                }),
2078            ));
2079        }
2080
2081        let config = CompactionConfig::from_json(&json!({
2082            "cost_control": {
2083                "enabled": false,
2084                "keep_recent_tool_results": 1,
2085                "mask_after_tool_results": 2
2086            }
2087        }));
2088        let result = build_model_view_messages(&messages, &config, None);
2089
2090        assert_eq!(result.masked_count, 0);
2091        assert_eq!(
2092            result.tool_result_bytes_after,
2093            result.tool_result_bytes_before
2094        );
2095    }
2096
2097    #[test]
2098    fn test_cost_control_uses_prior_usage_signal() {
2099        let mut messages = vec![Message::user("run commands")];
2100        for index in 0..3 {
2101            messages.extend(make_message_tool_turn(
2102                &format!("call_{index}"),
2103                "bash",
2104                json!({
2105                    "stdout": "small output",
2106                    "stderr": "",
2107                    "exit_code": 0,
2108                    "success": true
2109                }),
2110            ));
2111        }
2112
2113        let config = CompactionConfig::from_json(&json!({
2114            "cost_control": {
2115                "keep_recent_tool_results": 1,
2116                "mask_after_tool_results": 99,
2117                "max_live_tool_result_bytes": 999999,
2118                "max_uncached_input_tokens": 1000
2119            }
2120        }));
2121        let usage = TokenUsage::with_cache(10_000, 100, Some(0), None);
2122        let result = apply_cost_control_masking(&messages, &config, Some(&usage));
2123
2124        assert_eq!(result.masked_count, 2);
2125        let first_tool = result.messages[2].tool_result_content().unwrap();
2126        let summary = first_tool.result.as_ref().unwrap()["summary"]
2127            .as_str()
2128            .unwrap();
2129        assert!(summary.contains("bash exit=0"));
2130    }
2131
2132    #[test]
2133    fn test_model_view_uses_provider_cache_signal_from_compaction_config() {
2134        let mut messages = vec![Message::user("run commands")];
2135        for index in 0..3 {
2136            messages.extend(make_message_tool_turn(
2137                &format!("call_{index}"),
2138                "bash",
2139                json!({
2140                    "stdout": "small output",
2141                    "stderr": "",
2142                    "exit_code": 0,
2143                    "success": true
2144                }),
2145            ));
2146        }
2147        let usage = TokenUsage::with_cache(150_000, 100, Some(0), None);
2148
2149        let config = CompactionConfig::default();
2150        let result = build_model_view_messages(&messages, &config, Some(&usage));
2151
2152        assert_eq!(result.masked_count, 1);
2153        let first_tool = result.messages[2].tool_result_content().unwrap();
2154        assert_eq!(first_tool.result.as_ref().unwrap()["masked"], true);
2155    }
2156
2157    #[test]
2158    fn test_config_falls_back_to_defaults_for_invalid_json() {
2159        let config = CompactionConfig::from_json(&json!({
2160            "strategy": "nonexistent_strategy",
2161            "budget_percent": "not-a-number"
2162        }));
2163        assert_eq!(config.strategy, CompactionStrategy::Auto);
2164        assert!(config.proactive);
2165    }
2166
2167    #[test]
2168    fn test_config_partial_override() {
2169        let config = CompactionConfig::from_json(&json!({
2170            "budget_percent": 0.7,
2171            "observation_masking": {
2172                "keep_recent_tool_outputs": 3
2173            }
2174        }));
2175        assert_eq!(config.strategy, CompactionStrategy::Auto);
2176        assert!(config.proactive);
2177        assert!((config.budget_percent - 0.7).abs() < f32::EPSILON);
2178        assert_eq!(config.observation_masking.keep_recent_tool_outputs, 3);
2179        assert_eq!(
2180            config.observation_masking.summary_format,
2181            MaskingSummaryFormat::OneLine
2182        );
2183    }
2184
2185    #[test]
2186    fn test_strategy_serialization_roundtrip() {
2187        for strategy in [
2188            CompactionStrategy::Auto,
2189            CompactionStrategy::Native,
2190            CompactionStrategy::ObservationMasking,
2191            CompactionStrategy::Summarization,
2192        ] {
2193            let json = serde_json::to_value(strategy).unwrap();
2194            let deserialized: CompactionStrategy = serde_json::from_value(json).unwrap();
2195            assert_eq!(strategy, deserialized);
2196        }
2197    }
2198
2199    #[test]
2200    fn test_strategy_display() {
2201        assert_eq!(CompactionStrategy::Auto.to_string(), "auto");
2202        assert_eq!(CompactionStrategy::Native.to_string(), "native");
2203        assert_eq!(
2204            CompactionStrategy::ObservationMasking.to_string(),
2205            "observation_masking"
2206        );
2207        assert_eq!(
2208            CompactionStrategy::Summarization.to_string(),
2209            "summarization"
2210        );
2211    }
2212
2213    #[test]
2214    fn test_masking_format_serialization_roundtrip() {
2215        for format in [
2216            MaskingSummaryFormat::OneLine,
2217            MaskingSummaryFormat::HeadTail,
2218        ] {
2219            let json = serde_json::to_value(format).unwrap();
2220            let deserialized: MaskingSummaryFormat = serde_json::from_value(json).unwrap();
2221            assert_eq!(format, deserialized);
2222        }
2223    }
2224
2225    #[test]
2226    fn test_budget_percent_boundary_values() {
2227        let config = CompactionConfig::from_json(&json!({"budget_percent": 0.1}));
2228        assert!((config.budget_percent - 0.1).abs() < f32::EPSILON);
2229
2230        let config = CompactionConfig::from_json(&json!({"budget_percent": 0.99}));
2231        assert!((config.budget_percent - 0.99).abs() < f32::EPSILON);
2232    }
2233
2234    #[test]
2235    fn test_keep_recent_tool_outputs_zero() {
2236        let config = CompactionConfig::from_json(&json!({
2237            "observation_masking": {"keep_recent_tool_outputs": 0}
2238        }));
2239        assert_eq!(config.observation_masking.keep_recent_tool_outputs, 0);
2240    }
2241
2242    // ====================================================================
2243    // Observation masking tests
2244    // ====================================================================
2245
2246    #[test]
2247    fn test_masking_no_tool_messages() {
2248        let messages = vec![make_user_msg("hello"), make_assistant_msg("hi")];
2249        let config = ObservationMaskingConfig::default();
2250        let result = apply_observation_masking(&messages, &config);
2251        assert_eq!(result.masked_count, 0);
2252        assert_eq!(result.messages.len(), 2);
2253    }
2254
2255    #[test]
2256    fn test_masking_fewer_than_keep_recent() {
2257        let messages = vec![
2258            make_user_msg("read file"),
2259            make_assistant_with_tool_call("call_1", "read_file"),
2260            make_tool_result("call_1", "file contents"),
2261            make_assistant_msg("done"),
2262        ];
2263        let config = ObservationMaskingConfig {
2264            keep_recent_tool_outputs: 5,
2265            summary_format: MaskingSummaryFormat::OneLine,
2266        };
2267        let result = apply_observation_masking(&messages, &config);
2268        assert_eq!(result.masked_count, 0);
2269    }
2270
2271    #[test]
2272    fn test_masking_masks_old_outputs() {
2273        let messages = vec![
2274            make_user_msg("start"),
2275            make_assistant_with_tool_call("call_1", "read_file"),
2276            make_tool_result(
2277                "call_1",
2278                "old file contents that are very long and should be masked by the observation masking strategy because it exceeds 100 chars",
2279            ),
2280            make_assistant_msg("got it"),
2281            make_user_msg("next"),
2282            make_assistant_with_tool_call("call_2", "search"),
2283            make_tool_result("call_2", "search results"),
2284            make_assistant_msg("found it"),
2285            make_user_msg("more"),
2286            make_assistant_with_tool_call("call_3", "bash"),
2287            make_tool_result("call_3", "command output"),
2288        ];
2289
2290        let config = ObservationMaskingConfig {
2291            keep_recent_tool_outputs: 2,
2292            summary_format: MaskingSummaryFormat::OneLine,
2293        };
2294        let result = apply_observation_masking(&messages, &config);
2295
2296        assert_eq!(result.masked_count, 1);
2297
2298        // First tool result should be masked
2299        let masked = &result.messages[2];
2300        assert_eq!(masked.role, LlmMessageRole::Tool);
2301        let text = extract_text(&masked.content);
2302        assert!(
2303            text.starts_with('['),
2304            "Expected masked summary, got: {text}"
2305        );
2306        assert!(text.contains("read_file"), "Expected tool name: {text}");
2307
2308        // Last 2 tool results should be verbatim
2309        assert_eq!(extract_text(&result.messages[6].content), "search results");
2310        assert_eq!(extract_text(&result.messages[10].content), "command output");
2311    }
2312
2313    #[test]
2314    fn test_masking_preserves_tool_call_id() {
2315        let messages = vec![
2316            make_assistant_with_tool_call("call_1", "read_file"),
2317            make_tool_result("call_1", "content"),
2318            make_assistant_with_tool_call("call_2", "bash"),
2319            make_tool_result("call_2", "output"),
2320        ];
2321
2322        let config = ObservationMaskingConfig {
2323            keep_recent_tool_outputs: 1,
2324            summary_format: MaskingSummaryFormat::OneLine,
2325        };
2326        let result = apply_observation_masking(&messages, &config);
2327        assert_eq!(result.messages[1].tool_call_id, Some("call_1".to_string()));
2328    }
2329
2330    #[test]
2331    fn test_masking_head_tail_format() {
2332        let long_output = (0..20)
2333            .map(|i| format!("line {i}"))
2334            .collect::<Vec<_>>()
2335            .join("\n");
2336
2337        let messages = vec![
2338            make_assistant_with_tool_call("call_1", "bash"),
2339            make_tool_result("call_1", &long_output),
2340            make_assistant_with_tool_call("call_2", "bash"),
2341            make_tool_result("call_2", "recent output"),
2342        ];
2343
2344        let config = ObservationMaskingConfig {
2345            keep_recent_tool_outputs: 1,
2346            summary_format: MaskingSummaryFormat::HeadTail,
2347        };
2348        let result = apply_observation_masking(&messages, &config);
2349
2350        let text = extract_text(&result.messages[1].content);
2351        assert!(text.contains("line 0"), "Should contain first lines");
2352        assert!(text.contains("line 19"), "Should contain last lines");
2353        assert!(text.contains("lines omitted"), "Should indicate omissions");
2354    }
2355
2356    #[test]
2357    fn test_masking_short_output_inline() {
2358        let messages = vec![
2359            make_assistant_with_tool_call("call_1", "get_time"),
2360            make_tool_result("call_1", "2024-01-01"),
2361            make_assistant_with_tool_call("call_2", "bash"),
2362            make_tool_result("call_2", "ok"),
2363        ];
2364
2365        let config = ObservationMaskingConfig {
2366            keep_recent_tool_outputs: 1,
2367            summary_format: MaskingSummaryFormat::OneLine,
2368        };
2369        let result = apply_observation_masking(&messages, &config);
2370        let text = extract_text(&result.messages[1].content);
2371        assert!(text.contains("2024-01-01"), "Short output included: {text}");
2372    }
2373
2374    #[test]
2375    fn test_masking_all_when_keep_zero() {
2376        let messages = vec![
2377            make_assistant_with_tool_call("call_1", "a"),
2378            make_tool_result("call_1", "output1"),
2379            make_assistant_with_tool_call("call_2", "b"),
2380            make_tool_result("call_2", "output2"),
2381        ];
2382
2383        let config = ObservationMaskingConfig {
2384            keep_recent_tool_outputs: 0,
2385            summary_format: MaskingSummaryFormat::OneLine,
2386        };
2387        let result = apply_observation_masking(&messages, &config);
2388        assert_eq!(result.masked_count, 2);
2389    }
2390
2391    #[test]
2392    fn test_masking_empty_messages() {
2393        let result = apply_observation_masking(&[], &ObservationMaskingConfig::default());
2394        assert_eq!(result.masked_count, 0);
2395        assert!(result.messages.is_empty());
2396    }
2397
2398    #[test]
2399    fn test_masking_preserves_message_count() {
2400        let messages = vec![
2401            make_user_msg("start"),
2402            make_assistant_with_tool_call("c1", "read_file"),
2403            make_tool_result("c1", "content 1"),
2404            make_assistant_msg("ok"),
2405            make_user_msg("next"),
2406            make_assistant_with_tool_call("c2", "bash"),
2407            make_tool_result("c2", "content 2"),
2408            make_assistant_msg("done"),
2409        ];
2410
2411        let config = ObservationMaskingConfig {
2412            keep_recent_tool_outputs: 1,
2413            summary_format: MaskingSummaryFormat::OneLine,
2414        };
2415        let result = apply_observation_masking(&messages, &config);
2416        assert_eq!(result.messages.len(), messages.len());
2417    }
2418
2419    #[test]
2420    fn test_masking_unknown_tool_call_id() {
2421        let messages = vec![
2422            make_tool_result("orphan", "some output"),
2423            make_assistant_with_tool_call("call_2", "bash"),
2424            make_tool_result("call_2", "recent"),
2425        ];
2426
2427        let config = ObservationMaskingConfig {
2428            keep_recent_tool_outputs: 1,
2429            summary_format: MaskingSummaryFormat::OneLine,
2430        };
2431        let result = apply_observation_masking(&messages, &config);
2432        assert_eq!(result.masked_count, 1);
2433        let text = extract_text(&result.messages[0].content);
2434        assert!(text.contains("unknown_tool"), "Fallback name: {text}");
2435    }
2436
2437    #[test]
2438    fn test_masking_many_tool_calls_keeps_exactly_n() {
2439        let mut messages = Vec::new();
2440        for i in 0..10 {
2441            let id = format!("call_{i}");
2442            messages.push(make_assistant_with_tool_call(&id, &format!("tool_{i}")));
2443            messages.push(make_tool_result(&id, &format!("output {i}")));
2444        }
2445
2446        let config = ObservationMaskingConfig {
2447            keep_recent_tool_outputs: 3,
2448            summary_format: MaskingSummaryFormat::OneLine,
2449        };
2450        let result = apply_observation_masking(&messages, &config);
2451        assert_eq!(result.masked_count, 7);
2452
2453        // Last 3 tool results at indices 15, 17, 19 should be verbatim
2454        assert_eq!(extract_text(&result.messages[15].content), "output 7");
2455        assert_eq!(extract_text(&result.messages[17].content), "output 8");
2456        assert_eq!(extract_text(&result.messages[19].content), "output 9");
2457    }
2458
2459    // ====================================================================
2460    // Summarization tests
2461    // ====================================================================
2462
2463    #[test]
2464    fn test_summarization_prompt_default() {
2465        let config = SummarizationConfig::default();
2466        let prompt = build_summarization_prompt(&config);
2467        assert!(prompt.contains("<task>"));
2468        assert!(prompt.contains("decisions"));
2469        assert!(prompt.contains("files_modified"));
2470        assert!(prompt.contains("errors"));
2471        assert!(prompt.contains("current_plan"));
2472    }
2473
2474    #[test]
2475    fn test_summarization_prompt_custom_instructions() {
2476        let config = SummarizationConfig {
2477            instructions: Some("Focus on API changes".to_string()),
2478            ..Default::default()
2479        };
2480        let prompt = build_summarization_prompt(&config);
2481        assert!(prompt.contains("Focus on API changes"));
2482    }
2483
2484    #[test]
2485    fn test_summarization_prompt_custom_preserve() {
2486        let config = SummarizationConfig {
2487            preserve: vec!["auth_tokens".to_string(), "database_schema".to_string()],
2488            ..Default::default()
2489        };
2490        let prompt = build_summarization_prompt(&config);
2491        assert!(prompt.contains("auth_tokens"));
2492        assert!(prompt.contains("database_schema"));
2493        assert!(!prompt.contains("decisions"));
2494    }
2495
2496    #[test]
2497    fn test_summarization_prompt_empty_preserve_uses_defaults() {
2498        let config = SummarizationConfig {
2499            preserve: vec![],
2500            ..Default::default()
2501        };
2502        let prompt = build_summarization_prompt(&config);
2503        assert!(prompt.contains("decisions"));
2504    }
2505
2506    #[test]
2507    fn test_format_messages_for_summarization() {
2508        let messages = vec![
2509            make_user_msg("What is 2+2?"),
2510            make_assistant_msg("The answer is 4."),
2511        ];
2512        let formatted = format_messages_for_summarization(&messages);
2513        assert!(formatted.contains("[user]: What is 2+2?"));
2514        assert!(formatted.contains("[assistant]: The answer is 4."));
2515    }
2516
2517    #[test]
2518    fn test_format_messages_truncates_long_content() {
2519        let long_content = "x".repeat(5000);
2520        let messages = vec![make_user_msg(&long_content)];
2521        let formatted = format_messages_for_summarization(&messages);
2522        assert!(formatted.contains("truncated"));
2523        assert!(formatted.len() < long_content.len());
2524    }
2525
2526    #[test]
2527    fn test_format_messages_truncates_utf8_without_panic() {
2528        let multibyte = "é".repeat(1001); // 2002 bytes, 1001 chars
2529        let messages = vec![make_user_msg(&multibyte)];
2530        let formatted = format_messages_for_summarization(&messages);
2531        assert!(formatted.contains("truncated"));
2532        assert!(formatted.contains("[truncated, 2002 chars total]"));
2533    }
2534
2535    #[test]
2536    fn test_build_summary_message() {
2537        let msg = build_summary_message("The user asked about APIs.");
2538        assert_eq!(msg.role, LlmMessageRole::System);
2539        let text = extract_text(&msg.content);
2540        assert!(text.contains("[CONVERSATION_SUMMARY]"));
2541        assert!(text.contains("The user asked about APIs."));
2542        assert!(text.contains("[/CONVERSATION_SUMMARY]"));
2543    }
2544
2545    // ====================================================================
2546    // Head-tail format edge cases
2547    // ====================================================================
2548
2549    #[test]
2550    fn test_head_tail_short_content_unchanged() {
2551        let content = LlmMessageContent::Text("line1\nline2\nline3".to_string());
2552        assert_eq!(format_head_tail_summary(&content), "line1\nline2\nline3");
2553    }
2554
2555    #[test]
2556    fn test_head_tail_exactly_six_lines() {
2557        let content = LlmMessageContent::Text("1\n2\n3\n4\n5\n6".to_string());
2558        assert_eq!(format_head_tail_summary(&content), "1\n2\n3\n4\n5\n6");
2559    }
2560
2561    #[test]
2562    fn test_head_tail_seven_lines() {
2563        let content = LlmMessageContent::Text("1\n2\n3\n4\n5\n6\n7".to_string());
2564        let result = format_head_tail_summary(&content);
2565        assert!(result.contains("1\n2\n3"));
2566        assert!(result.contains("5\n6\n7"));
2567        assert!(result.contains("1 lines omitted"));
2568    }
2569
2570    // ====================================================================
2571    // One-line format edge cases
2572    // ====================================================================
2573
2574    #[test]
2575    fn test_one_line_empty_output() {
2576        let result = format_one_line_summary("bash", &LlmMessageContent::Text(String::new()));
2577        assert_eq!(result, "[bash → ]");
2578    }
2579
2580    #[test]
2581    fn test_one_line_exactly_100_chars() {
2582        let text = "x".repeat(100);
2583        let result = format_one_line_summary("bash", &LlmMessageContent::Text(text.clone()));
2584        assert!(result.contains(&text));
2585    }
2586
2587    #[test]
2588    fn test_one_line_101_chars_summarized() {
2589        let text = "x".repeat(101);
2590        let result = format_one_line_summary("bash", &LlmMessageContent::Text(text));
2591        assert!(result.contains("lines"));
2592        assert!(result.contains("bytes"));
2593    }
2594
2595    #[test]
2596    fn test_one_line_multipart_content() {
2597        let content = LlmMessageContent::Parts(vec![
2598            LlmContentPart::Text {
2599                text: "part1".to_string(),
2600            },
2601            LlmContentPart::Text {
2602                text: "part2".to_string(),
2603            },
2604        ]);
2605        let result = format_one_line_summary("tool", &content);
2606        assert!(result.contains("part1"));
2607        assert!(result.contains("part2"));
2608    }
2609
2610    // ====================================================================
2611    // CompactionStep tests
2612    // ====================================================================
2613
2614    #[test]
2615    fn test_compaction_step_serialization() {
2616        let step = CompactionStep {
2617            strategy: "observation_masking".to_string(),
2618            messages_after: 42,
2619            duration_ms: 12,
2620        };
2621        let json = serde_json::to_value(&step).unwrap();
2622        assert_eq!(json["strategy"], "observation_masking");
2623        assert_eq!(json["messages_after"], 42);
2624        assert_eq!(json["duration_ms"], 12);
2625    }
2626
2627    // ====================================================================
2628    // Token estimation tests
2629    // ====================================================================
2630
2631    #[test]
2632    fn test_estimate_tokens_text() {
2633        let msg = make_user_msg("hello world"); // 11 chars → ~2 tokens
2634        let tokens = estimate_tokens(&msg);
2635        assert_eq!(tokens, 11 / 4);
2636    }
2637
2638    #[test]
2639    fn test_estimate_tokens_empty() {
2640        let msg = make_user_msg("");
2641        assert_eq!(estimate_tokens(&msg), 0);
2642    }
2643
2644    #[test]
2645    fn test_estimate_total_tokens() {
2646        let messages = vec![
2647            make_user_msg("a".repeat(400).as_str()),      // 100 tokens
2648            make_assistant_msg("b".repeat(200).as_str()), // 50 tokens
2649        ];
2650        assert_eq!(estimate_total_tokens(&messages), 150);
2651    }
2652
2653    #[test]
2654    fn test_estimate_tokens_with_tool_calls() {
2655        let msg = make_assistant_with_tool_call("call_1", "read_file");
2656        let tokens = estimate_tokens(&msg);
2657        assert!(tokens > 0, "Tool call should contribute tokens");
2658    }
2659
2660    // ====================================================================
2661    // Proactive compaction check tests
2662    // ====================================================================
2663
2664    #[test]
2665    fn test_should_compact_proactively_under_budget() {
2666        let messages = vec![make_user_msg("short")];
2667        let config = CompactionConfig::default(); // 85% budget
2668        assert!(!should_compact_proactively(&messages, &config, 128_000));
2669    }
2670
2671    #[test]
2672    fn test_should_compact_proactively_over_budget() {
2673        // Create messages that exceed 85% of 1000 tokens = 850 tokens
2674        let big_text = "x".repeat(4000); // ~1000 tokens
2675        let messages = vec![make_user_msg(&big_text)];
2676        let config = CompactionConfig::default();
2677        assert!(should_compact_proactively(&messages, &config, 1000));
2678    }
2679
2680    #[test]
2681    fn test_should_compact_proactively_disabled() {
2682        let big_text = "x".repeat(4000);
2683        let messages = vec![make_user_msg(&big_text)];
2684        let config = CompactionConfig {
2685            proactive: false,
2686            ..Default::default()
2687        };
2688        assert!(!should_compact_proactively(&messages, &config, 1000));
2689    }
2690
2691    // ====================================================================
2692    // Aggressive trim tests
2693    // ====================================================================
2694
2695    #[test]
2696    fn test_aggressive_trim_keeps_newest() {
2697        // Use big messages so budget matters
2698        let messages = vec![
2699            make_user_msg(&"s".repeat(400)),      // system: 100 tokens
2700            make_user_msg(&"a".repeat(400)),      // old: 100 tokens
2701            make_assistant_msg(&"b".repeat(400)), // old: 100 tokens
2702            make_user_msg(&"c".repeat(400)),      // recent: 100 tokens
2703            make_assistant_msg(&"d".repeat(400)), // recent: 100 tokens
2704        ];
2705        // Target: enough for system + 2 recent messages only (300 tokens)
2706        let target_tokens = 300;
2707        let result = aggressive_trim(&messages, target_tokens, true);
2708        assert!(
2709            result.len() < messages.len(),
2710            "Expected trim, got {} messages",
2711            result.len()
2712        );
2713        // Should keep system prompt (first)
2714        assert_eq!(result[0].role, LlmMessageRole::User);
2715    }
2716
2717    #[test]
2718    fn test_aggressive_trim_empty() {
2719        let result = aggressive_trim(&[], 100, false);
2720        assert!(result.is_empty());
2721    }
2722
2723    #[test]
2724    fn test_aggressive_trim_anchors_first_conversation_message() {
2725        // messages[0] = system prompt; messages[1] = the original task (old, big);
2726        // the rest are newer. Under a tight budget the task must still survive so
2727        // the model does not lose track of what it is doing.
2728        let messages = vec![
2729            make_user_msg("sys"),                 // system prompt (small)
2730            make_user_msg(&"TASK ".repeat(80)),   // the task: old + big
2731            make_assistant_msg(&"x".repeat(400)), // filler old
2732            make_user_msg(&"c".repeat(400)),      // recent
2733            make_assistant_msg(&"d".repeat(400)), // recent
2734        ];
2735        let result = aggressive_trim(&messages, 250, true);
2736
2737        assert!(
2738            result.len() < messages.len(),
2739            "expected a trim, got {} messages",
2740            result.len()
2741        );
2742        let kept_task = result.iter().any(|m| match &m.content {
2743            LlmMessageContent::Text(t) => t.contains("TASK"),
2744            _ => false,
2745        });
2746        assert!(kept_task, "the original task must be anchored, not dropped");
2747    }
2748
2749    #[test]
2750    fn test_aggressive_trim_everything_fits() {
2751        let messages = vec![make_user_msg("hi"), make_assistant_msg("hello")];
2752        let result = aggressive_trim(&messages, 100_000, false);
2753        assert_eq!(result.len(), 2);
2754    }
2755
2756    // ====================================================================
2757    // Session compaction metrics tests
2758    // ====================================================================
2759
2760    #[test]
2761    fn test_session_metrics_record() {
2762        let mut metrics = SessionCompactionMetrics::default();
2763        metrics.record("observation_masking+native", 100, 50, 200);
2764
2765        assert_eq!(metrics.compaction_count, 1);
2766        assert_eq!(metrics.total_messages_saved, 50);
2767        assert_eq!(metrics.total_duration_ms, 200);
2768        assert_eq!(metrics.strategy_counts["observation_masking"], 1);
2769        assert_eq!(metrics.strategy_counts["native"], 1);
2770    }
2771
2772    #[test]
2773    fn test_session_metrics_accumulate() {
2774        let mut metrics = SessionCompactionMetrics::default();
2775        metrics.record("observation_masking", 100, 80, 10);
2776        metrics.record("summarization", 80, 40, 500);
2777
2778        assert_eq!(metrics.compaction_count, 2);
2779        assert_eq!(metrics.total_messages_saved, 60);
2780        assert_eq!(metrics.total_duration_ms, 510);
2781        assert_eq!(metrics.strategy_counts["observation_masking"], 1);
2782        assert_eq!(metrics.strategy_counts["summarization"], 1);
2783    }
2784
2785    #[test]
2786    fn test_session_metrics_serialization() {
2787        let mut metrics = SessionCompactionMetrics::default();
2788        metrics.record("auto", 50, 30, 100);
2789        let json = serde_json::to_value(&metrics).unwrap();
2790        assert_eq!(json["compaction_count"], 1);
2791        assert_eq!(json["total_messages_saved"], 20);
2792    }
2793
2794    // ====================================================================
2795    // Hierarchical memory tier tests
2796    // ====================================================================
2797
2798    #[test]
2799    fn test_classify_memory_tiers_basic() {
2800        let messages: Vec<LlmMessage> = (0..30)
2801            .map(|i| make_user_msg(&format!("msg {i}")))
2802            .collect();
2803
2804        let config = HierarchicalMemoryConfig {
2805            hot_messages: 5,
2806            warm_messages: 10,
2807        };
2808
2809        let classified = classify_memory_tiers(&messages, &config);
2810        assert_eq!(classified.len(), 30);
2811
2812        // Last 5 = hot
2813        assert_eq!(classified[29].0, MemoryTier::Hot);
2814        assert_eq!(classified[25].0, MemoryTier::Hot);
2815
2816        // Next 10 = warm
2817        assert_eq!(classified[24].0, MemoryTier::Warm);
2818        assert_eq!(classified[15].0, MemoryTier::Warm);
2819
2820        // Rest = cold
2821        assert_eq!(classified[14].0, MemoryTier::Cold);
2822        assert_eq!(classified[0].0, MemoryTier::Cold);
2823    }
2824
2825    #[test]
2826    fn test_classify_memory_tiers_all_hot() {
2827        let messages: Vec<LlmMessage> =
2828            (0..3).map(|i| make_user_msg(&format!("msg {i}"))).collect();
2829
2830        let config = HierarchicalMemoryConfig::default(); // 20 hot
2831
2832        let classified = classify_memory_tiers(&messages, &config);
2833        assert!(classified.iter().all(|(tier, _)| *tier == MemoryTier::Hot));
2834    }
2835
2836    #[test]
2837    fn test_apply_hierarchical_memory_basic() {
2838        let mut messages = Vec::new();
2839
2840        // Cold: old tool interactions
2841        for i in 0..5 {
2842            let id = format!("old_{i}");
2843            messages.push(make_assistant_with_tool_call(&id, "read_file"));
2844            messages.push(make_tool_result(&id, &format!("old content {i}")));
2845        }
2846
2847        // Warm: mid tool interactions
2848        for i in 0..3 {
2849            let id = format!("mid_{i}");
2850            messages.push(make_assistant_with_tool_call(&id, "bash"));
2851            messages.push(make_tool_result(&id, &format!("mid output {i}")));
2852        }
2853
2854        // Hot: recent
2855        messages.push(make_user_msg("what now?"));
2856        messages.push(make_assistant_msg("let me check"));
2857
2858        let config = HierarchicalMemoryConfig {
2859            hot_messages: 2,
2860            warm_messages: 6,
2861        };
2862        let masking_config = ObservationMaskingConfig::default();
2863
2864        let result = apply_hierarchical_memory(
2865            &messages,
2866            &config,
2867            &masking_config,
2868            Some("Summary of old work"),
2869        );
2870
2871        // Should have: 1 summary + 6 warm messages + 2 hot messages
2872        assert!(result.len() <= 9);
2873        // First should be the summary
2874        let first_text = extract_text(&result[0].content);
2875        assert!(first_text.contains("CONVERSATION_SUMMARY"));
2876        // Last 2 should be hot (verbatim)
2877        let last = extract_text(&result[result.len() - 1].content);
2878        assert!(last.contains("let me check"));
2879    }
2880
2881    #[test]
2882    fn test_apply_hierarchical_memory_no_cold() {
2883        let messages = vec![make_user_msg("hello"), make_assistant_msg("hi")];
2884
2885        let config = HierarchicalMemoryConfig {
2886            hot_messages: 5,
2887            warm_messages: 5,
2888        };
2889
2890        let result = apply_hierarchical_memory(
2891            &messages,
2892            &config,
2893            &ObservationMaskingConfig::default(),
2894            None,
2895        );
2896        // All hot, no summary needed
2897        assert_eq!(result.len(), 2);
2898    }
2899
2900    #[test]
2901    fn test_memory_tier_config_from_json() {
2902        let config: HierarchicalMemoryConfig = serde_json::from_value(json!({
2903            "hot_messages": 10,
2904            "warm_messages": 50
2905        }))
2906        .unwrap();
2907        assert_eq!(config.hot_messages, 10);
2908        assert_eq!(config.warm_messages, 50);
2909    }
2910
2911    #[test]
2912    fn test_memory_tier_config_defaults() {
2913        let config = HierarchicalMemoryConfig::default();
2914        assert_eq!(config.hot_messages, 20);
2915        assert_eq!(config.warm_messages, 100);
2916    }
2917
2918    #[test]
2919    fn test_compaction_config_with_memory_tiers() {
2920        let config = CompactionConfig::from_json(&json!({
2921            "strategy": "auto",
2922            "memory_tiers": {
2923                "hot_messages": 15,
2924                "warm_messages": 80
2925            }
2926        }));
2927        assert_eq!(config.memory_tiers.hot_messages, 15);
2928        assert_eq!(config.memory_tiers.warm_messages, 80);
2929    }
2930
2931    #[test]
2932    fn test_memory_tier_serialization() {
2933        assert_eq!(serde_json::to_value(MemoryTier::Hot).unwrap(), json!("hot"));
2934        assert_eq!(
2935            serde_json::to_value(MemoryTier::Warm).unwrap(),
2936            json!("warm")
2937        );
2938        assert_eq!(
2939            serde_json::to_value(MemoryTier::Cold).unwrap(),
2940            json!("cold")
2941        );
2942    }
2943
2944    // ====================================================================
2945    // Skill content protection tests
2946    // ====================================================================
2947
2948    #[test]
2949    fn test_masking_skips_activate_skill_results() {
2950        // 3 tool results: activate_skill (protected), read_file, bash
2951        // With keep_recent=1, only read_file should be masked (activate_skill exempt)
2952        let messages = vec![
2953            make_assistant_with_tool_call("call_skill", "activate_skill"),
2954            make_tool_result(
2955                "call_skill",
2956                "You are a code review agent. Follow these instructions...",
2957            ),
2958            make_assistant_msg("Skill activated"),
2959            make_assistant_with_tool_call("call_read", "read_file"),
2960            make_tool_result(
2961                "call_read",
2962                "file contents that are long enough to be masked by observation masking because they exceed one hundred characters easily",
2963            ),
2964            make_assistant_msg("got it"),
2965            make_assistant_with_tool_call("call_bash", "bash"),
2966            make_tool_result("call_bash", "command output"),
2967        ];
2968
2969        let config = ObservationMaskingConfig {
2970            keep_recent_tool_outputs: 1,
2971            summary_format: MaskingSummaryFormat::OneLine,
2972        };
2973        let result = apply_observation_masking(&messages, &config);
2974
2975        // activate_skill result should be verbatim
2976        assert_eq!(
2977            extract_text(&result.messages[1].content),
2978            "You are a code review agent. Follow these instructions..."
2979        );
2980        // read_file result should be masked (it's the only maskable old one)
2981        assert!(extract_text(&result.messages[4].content).starts_with('['));
2982        // bash result should be verbatim (most recent maskable)
2983        assert_eq!(extract_text(&result.messages[7].content), "command output");
2984        assert_eq!(result.masked_count, 1);
2985    }
2986
2987    #[test]
2988    fn test_masking_all_activate_skill_exempt_from_count() {
2989        // 2 activate_skill results + 1 regular tool result
2990        // With keep_recent=0, only the regular one should be masked
2991        let messages = vec![
2992            make_assistant_with_tool_call("s1", "activate_skill"),
2993            make_tool_result("s1", "Skill 1 instructions"),
2994            make_assistant_with_tool_call("s2", "activate_skill"),
2995            make_tool_result("s2", "Skill 2 instructions"),
2996            make_assistant_with_tool_call("c1", "bash"),
2997            make_tool_result("c1", "output"),
2998        ];
2999
3000        let config = ObservationMaskingConfig {
3001            keep_recent_tool_outputs: 0,
3002            summary_format: MaskingSummaryFormat::OneLine,
3003        };
3004        let result = apply_observation_masking(&messages, &config);
3005
3006        assert_eq!(result.masked_count, 1);
3007        // Both skill results preserved
3008        assert_eq!(
3009            extract_text(&result.messages[1].content),
3010            "Skill 1 instructions"
3011        );
3012        assert_eq!(
3013            extract_text(&result.messages[3].content),
3014            "Skill 2 instructions"
3015        );
3016    }
3017
3018    #[test]
3019    fn test_aggressive_trim_preserves_skill_messages() {
3020        // Create messages where budget only fits ~2 messages, but skill messages
3021        // should always be preserved
3022        let messages = vec![
3023            make_user_msg(&"s".repeat(400)), // system: 100 tokens
3024            make_assistant_with_tool_call("skill1", "activate_skill"),
3025            make_tool_result("skill1", "Important skill instructions"),
3026            make_user_msg(&"a".repeat(400)),      // old: 100 tokens
3027            make_assistant_msg(&"b".repeat(400)), // old: 100 tokens
3028            make_user_msg(&"c".repeat(400)),      // recent: 100 tokens
3029            make_assistant_msg(&"d".repeat(400)), // recent: 100 tokens
3030        ];
3031
3032        // Budget for system + skill call + skill result + 1 recent = ~400 tokens
3033        // Should keep: system, skill call, skill result, and as many recent as fit
3034        let target_tokens = 400;
3035        let result = aggressive_trim(&messages, target_tokens, true);
3036
3037        // Verify skill messages are preserved
3038        let has_skill_result = result.iter().any(|m| {
3039            m.role == LlmMessageRole::Tool
3040                && extract_text(&m.content) == "Important skill instructions"
3041        });
3042        assert!(
3043            has_skill_result,
3044            "Skill tool result must survive aggressive trim"
3045        );
3046
3047        let has_skill_call = result.iter().any(|m| {
3048            m.tool_calls
3049                .as_ref()
3050                .is_some_and(|calls| calls.iter().any(|tc| tc.name == "activate_skill"))
3051        });
3052        assert!(
3053            has_skill_call,
3054            "Skill tool call must survive aggressive trim"
3055        );
3056    }
3057
3058    #[test]
3059    fn test_hierarchical_memory_rescues_skill_from_cold_tier() {
3060        let mut messages = Vec::new();
3061
3062        // Cold tier: old messages including a skill activation
3063        messages.push(make_assistant_with_tool_call("skill1", "activate_skill"));
3064        messages.push(make_tool_result(
3065            "skill1",
3066            "You must always validate input.",
3067        ));
3068        for i in 0..8 {
3069            let id = format!("old_{i}");
3070            messages.push(make_assistant_with_tool_call(&id, "read_file"));
3071            messages.push(make_tool_result(&id, &format!("old content {i}")));
3072        }
3073
3074        // Warm tier
3075        for i in 0..3 {
3076            let id = format!("mid_{i}");
3077            messages.push(make_assistant_with_tool_call(&id, "bash"));
3078            messages.push(make_tool_result(&id, &format!("mid output {i}")));
3079        }
3080
3081        // Hot tier
3082        messages.push(make_user_msg("what now?"));
3083        messages.push(make_assistant_msg("let me check"));
3084
3085        let config = HierarchicalMemoryConfig {
3086            hot_messages: 2,
3087            warm_messages: 6,
3088        };
3089        let masking_config = ObservationMaskingConfig::default();
3090
3091        let result = apply_hierarchical_memory(
3092            &messages,
3093            &config,
3094            &masking_config,
3095            Some("Summary of old work"),
3096        );
3097
3098        // The protected skill messages from cold tier should be rescued
3099        let has_skill_instructions = result
3100            .iter()
3101            .any(|m| extract_text(&m.content).contains("You must always validate input."));
3102        assert!(
3103            has_skill_instructions,
3104            "Skill instructions from cold tier must be rescued into output"
3105        );
3106
3107        // Summary should still be present
3108        assert!(extract_text(&result[0].content).contains("CONVERSATION_SUMMARY"));
3109    }
3110
3111    #[test]
3112    fn test_is_protected_tool_result_detection() {
3113        let messages = vec![
3114            make_assistant_with_tool_call("s1", "activate_skill"),
3115            make_tool_result("s1", "skill content"),
3116            make_assistant_with_tool_call("r1", "read_file"),
3117            make_tool_result("r1", "file content"),
3118        ];
3119
3120        // activate_skill result is protected
3121        assert!(is_protected_tool_result(&messages, &messages[1]));
3122        // read_file result is not
3123        assert!(!is_protected_tool_result(&messages, &messages[3]));
3124        // non-tool message is not
3125        assert!(!is_protected_tool_result(&messages, &messages[0]));
3126    }
3127
3128    #[test]
3129    fn test_is_protected_tool_call_message_detection() {
3130        let skill_call = make_assistant_with_tool_call("s1", "activate_skill");
3131        let regular_call = make_assistant_with_tool_call("r1", "read_file");
3132        let user_msg = make_user_msg("hello");
3133
3134        assert!(is_protected_tool_call_message(&skill_call));
3135        assert!(!is_protected_tool_call_message(&regular_call));
3136        assert!(!is_protected_tool_call_message(&user_msg));
3137    }
3138
3139    #[test]
3140    fn test_default_preserve_includes_skill_instructions() {
3141        let config = SummarizationConfig::default();
3142        assert!(
3143            config.preserve.contains(&"skill_instructions".to_string()),
3144            "Default preserve list must include skill_instructions"
3145        );
3146    }
3147
3148    #[test]
3149    fn test_summarization_prompt_mentions_skill_protection() {
3150        let config = SummarizationConfig::default();
3151        let prompt = build_summarization_prompt(&config);
3152        assert!(
3153            prompt.contains("activate_skill"),
3154            "Summarization prompt must instruct LLM to preserve skill content"
3155        );
3156    }
3157
3158    #[test]
3159    fn test_aggressive_trim_protected_exceed_budget() {
3160        // When protected messages alone exceed the budget, keep as many as
3161        // fit (newest first) and drop non-protected entirely.
3162        let messages = vec![
3163            make_user_msg(&"s".repeat(400)), // system ~100 tokens
3164            make_assistant_with_tool_call("skill1", "activate_skill"), // protected
3165            make_tool_result("skill1", &"x".repeat(800)), // protected ~200 tokens
3166            make_assistant_with_tool_call("skill2", "activate_skill"), // protected
3167            make_tool_result("skill2", &"y".repeat(800)), // protected ~200 tokens
3168            make_user_msg(&"z".repeat(400)), // non-protected
3169        ];
3170
3171        // Budget only fits system + ~1 protected pair
3172        let result = aggressive_trim(&messages, 200, true);
3173
3174        // Must not exceed budget — non-protected messages dropped
3175        let has_non_protected = result
3176            .iter()
3177            .any(|m| m.role == LlmMessageRole::User && extract_text(&m.content).contains('z'));
3178        assert!(
3179            !has_non_protected,
3180            "Non-protected messages must be dropped when protected exceed budget"
3181        );
3182    }
3183
3184    #[test]
3185    fn test_format_messages_no_truncate_protected_tool_result() {
3186        // Protected tool results should not be truncated at 2000 chars
3187        let long_instructions = "a".repeat(5000);
3188        let messages = vec![
3189            make_assistant_with_tool_call("s1", "activate_skill"),
3190            make_tool_result("s1", &long_instructions),
3191            make_assistant_with_tool_call("r1", "read_file"),
3192            make_tool_result("r1", &"b".repeat(5000)),
3193        ];
3194
3195        let formatted = format_messages_for_summarization(&messages);
3196
3197        // Skill result: full 5000-char content present, not truncated
3198        assert!(
3199            formatted.contains(&long_instructions),
3200            "Protected tool result must not be truncated"
3201        );
3202        // Regular result: should be truncated
3203        assert!(
3204            formatted.contains("[truncated, 5000 chars total]"),
3205            "Non-protected tool result should be truncated"
3206        );
3207    }
3208
3209    #[test]
3210    fn test_hierarchical_memory_cross_tier_boundary_protection() {
3211        // The activate_skill tool-call is in cold tier, but its tool-result
3212        // lands in warm tier. The result must still be protected from masking.
3213        let mut messages = Vec::new();
3214
3215        // Cold tier: skill call + filler to push result into warm tier
3216        messages.push(make_assistant_with_tool_call("skill1", "activate_skill"));
3217        for i in 0..9 {
3218            let id = format!("cold_{i}");
3219            messages.push(make_assistant_with_tool_call(&id, "read_file"));
3220            messages.push(make_tool_result(&id, &format!("cold content {i}")));
3221        }
3222
3223        // Warm tier starts here — skill result is first warm message
3224        messages.push(make_tool_result(
3225            "skill1",
3226            "Cross-tier skill instructions that must survive",
3227        ));
3228        for i in 0..2 {
3229            let id = format!("warm_{i}");
3230            messages.push(make_assistant_with_tool_call(&id, "bash"));
3231            messages.push(make_tool_result(&id, &format!("warm output {i}")));
3232        }
3233
3234        // Hot tier
3235        messages.push(make_user_msg("continue"));
3236        messages.push(make_assistant_msg("ok"));
3237
3238        let config = HierarchicalMemoryConfig {
3239            hot_messages: 2,
3240            warm_messages: 5, // skill result + 2 bash pairs
3241        };
3242        let masking_config = ObservationMaskingConfig {
3243            keep_recent_tool_outputs: 0,
3244            summary_format: MaskingSummaryFormat::OneLine,
3245        };
3246
3247        let result = apply_hierarchical_memory(&messages, &config, &masking_config, None);
3248
3249        let has_skill_instructions = result.iter().any(|m| {
3250            extract_text(&m.content).contains("Cross-tier skill instructions that must survive")
3251        });
3252        assert!(
3253            has_skill_instructions,
3254            "Skill result in warm tier with call in cold tier must be protected"
3255        );
3256    }
3257}
everruns_core/capabilities/compaction.rs

everruns_core/capabilities/
compaction.rs