Skip to main content

everruns_core/capabilities/
compaction.rs

1//! Compaction Capability
2//!
3//! Configurable context compaction strategy. Users choose between native provider
4//! compaction (e.g., OpenAI /responses/compact) and our own strategies (observation
5//! masking, LLM summarization). See specs/compaction.md.
6//!
7//! Design decisions:
8//! - Strategy selection is per-agent/harness via `AgentCapabilityConfig`
9//! - Native and our own strategies coexist as first-class options
10//! - The `auto` cascade: observation masking → native → summarization
11//! - Proactive compaction at a configurable budget threshold, not just on error
12
13use super::{Capability, CapabilityStatus, ModelViewContext, ModelViewProvider};
14use crate::events::TokenUsage;
15use crate::message::{ContentPart, Message, MessageRole};
16use crate::message_filter::MessageFilterProvider;
17use serde::{Deserialize, Serialize};
18use std::collections::HashMap;
19use std::sync::Arc;
20
21/// Capability ID for compaction.
22pub const COMPACTION_CAPABILITY_ID: &str = "compaction";
23
24/// Compaction strategy selection.
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
26#[serde(rename_all = "snake_case")]
27pub enum CompactionStrategy {
28    /// Cascade: observation masking → native → summarization → aggressive trim.
29    #[default]
30    Auto,
31    /// Use provider's native compact endpoint only (e.g., OpenAI /responses/compact).
32    Native,
33    /// Strip old tool outputs, replace with one-line summaries.
34    ObservationMasking,
35    /// Use LLM to summarize older turns.
36    Summarization,
37}
38
39impl std::fmt::Display for CompactionStrategy {
40    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41        match self {
42            Self::Auto => write!(f, "auto"),
43            Self::Native => write!(f, "native"),
44            Self::ObservationMasking => write!(f, "observation_masking"),
45            Self::Summarization => write!(f, "summarization"),
46        }
47    }
48}
49
50/// Format for masked tool output summaries.
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
52#[serde(rename_all = "snake_case")]
53pub enum MaskingSummaryFormat {
54    /// `[tool_name(args_truncated) → OK]`
55    #[default]
56    OneLine,
57    /// Keep first and last 3 lines of output.
58    HeadTail,
59}
60
61/// Observation masking settings.
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct ObservationMaskingConfig {
64    /// Number of recent tool outputs to keep verbatim.
65    #[serde(default = "default_keep_recent_tool_outputs")]
66    pub keep_recent_tool_outputs: usize,
67
68    /// Format for masked tool output summaries.
69    #[serde(default)]
70    pub summary_format: MaskingSummaryFormat,
71}
72
73impl Default for ObservationMaskingConfig {
74    fn default() -> Self {
75        Self {
76            keep_recent_tool_outputs: default_keep_recent_tool_outputs(),
77            summary_format: MaskingSummaryFormat::default(),
78        }
79    }
80}
81
82fn default_keep_recent_tool_outputs() -> usize {
83    // Lowered from 5 to 2 (EVE-224). With EVE-221 capping exec output at 16 KiB,
84    // keeping 2 recent (~8K tokens) instead of 5 (~20K tokens) significantly reduces
85    // stale exec output accumulation. Older tool results are masked to one-line summaries.
86    2
87}
88
89/// Cost-control masking settings.
90///
91/// Unlike proactive compaction, this is cost-oriented rather than
92/// context-window-oriented: old bulky tool results should not stay verbatim in
93/// every request just because the model still has room for them.
94#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct CostControlConfig {
96    /// Enable low-cost tool-result masking before every LLM call.
97    #[serde(default = "default_cost_control_enabled")]
98    pub enabled: bool,
99
100    /// Number of most-recent tool results to always keep verbatim.
101    #[serde(default = "default_cost_control_keep_recent_tool_results")]
102    pub keep_recent_tool_results: usize,
103
104    /// Start masking once this many tool results are present.
105    #[serde(default = "default_cost_control_mask_after_tool_results")]
106    pub mask_after_tool_results: usize,
107
108    /// Start masking once aggregate live tool-result payload exceeds this many bytes.
109    #[serde(default = "default_cost_control_max_live_tool_result_bytes")]
110    pub max_live_tool_result_bytes: usize,
111
112    /// If cumulative/session usage is available, mask when uncached input exceeds this.
113    #[serde(default = "default_cost_control_max_uncached_input_tokens")]
114    pub max_uncached_input_tokens: u32,
115
116    /// If cumulative/session usage is available, mask when cache read ratio falls below this.
117    #[serde(default = "default_cost_control_min_cache_read_ratio")]
118    pub min_cache_read_ratio: f32,
119}
120
121impl Default for CostControlConfig {
122    fn default() -> Self {
123        Self {
124            enabled: default_cost_control_enabled(),
125            keep_recent_tool_results: default_cost_control_keep_recent_tool_results(),
126            mask_after_tool_results: default_cost_control_mask_after_tool_results(),
127            max_live_tool_result_bytes: default_cost_control_max_live_tool_result_bytes(),
128            max_uncached_input_tokens: default_cost_control_max_uncached_input_tokens(),
129            min_cache_read_ratio: default_cost_control_min_cache_read_ratio(),
130        }
131    }
132}
133
134fn default_cost_control_enabled() -> bool {
135    true
136}
137
138fn default_cost_control_keep_recent_tool_results() -> usize {
139    2
140}
141
142fn default_cost_control_mask_after_tool_results() -> usize {
143    4
144}
145
146fn default_cost_control_max_live_tool_result_bytes() -> usize {
147    24 * 1024
148}
149
150fn default_cost_control_max_uncached_input_tokens() -> u32 {
151    100_000
152}
153
154fn default_cost_control_min_cache_read_ratio() -> f32 {
155    0.35
156}
157
158/// Summarization settings.
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct SummarizationConfig {
161    /// Model to use for summarization. None = same model as agent.
162    #[serde(default)]
163    pub model: Option<String>,
164
165    /// What to preserve in summaries.
166    #[serde(default = "default_preserve")]
167    pub preserve: Vec<String>,
168
169    /// Custom instructions appended to summarization prompt.
170    #[serde(default)]
171    pub instructions: Option<String>,
172}
173
174impl Default for SummarizationConfig {
175    fn default() -> Self {
176        Self {
177            model: None,
178            preserve: default_preserve(),
179            instructions: None,
180        }
181    }
182}
183
184fn default_preserve() -> Vec<String> {
185    vec![
186        "decisions".to_string(),
187        "files_modified".to_string(),
188        "errors".to_string(),
189        "current_plan".to_string(),
190        "skill_instructions".to_string(),
191    ]
192}
193
194/// Compaction capability configuration.
195///
196/// Configured per agent/harness via `AgentCapabilityConfig`:
197/// ```json
198/// { "ref": "compaction", "config": { "strategy": "auto", "proactive": true } }
199/// ```
200#[derive(Debug, Clone, Serialize, Deserialize)]
201pub struct CompactionConfig {
202    /// Which strategy to use.
203    #[serde(default)]
204    pub strategy: CompactionStrategy,
205
206    /// Compact proactively at budget_percent, not just on RequestTooLarge.
207    #[serde(default = "default_proactive")]
208    pub proactive: bool,
209
210    /// Trigger proactive compaction at this fraction of context budget.
211    #[serde(default = "default_budget_percent")]
212    pub budget_percent: f32,
213
214    /// Observation masking settings.
215    #[serde(default)]
216    pub observation_masking: ObservationMaskingConfig,
217
218    /// Summarization settings.
219    #[serde(default)]
220    pub summarization: SummarizationConfig,
221
222    /// Hierarchical memory tier settings for hot/warm/cold management.
223    #[serde(default)]
224    pub memory_tiers: HierarchicalMemoryConfig,
225
226    /// Always-on cost-oriented masking for stale tool results.
227    #[serde(default)]
228    pub cost_control: CostControlConfig,
229}
230
231impl Default for CompactionConfig {
232    fn default() -> Self {
233        Self {
234            strategy: CompactionStrategy::default(),
235            proactive: default_proactive(),
236            budget_percent: default_budget_percent(),
237            observation_masking: ObservationMaskingConfig::default(),
238            summarization: SummarizationConfig::default(),
239            memory_tiers: HierarchicalMemoryConfig::default(),
240            cost_control: CostControlConfig::default(),
241        }
242    }
243}
244
245fn default_proactive() -> bool {
246    true
247}
248
249fn default_budget_percent() -> f32 {
250    0.85
251}
252
253impl CompactionConfig {
254    /// Parse from JSON value, falling back to defaults for invalid config.
255    pub fn from_json(value: &serde_json::Value) -> Self {
256        serde_json::from_value(value.clone()).unwrap_or_default()
257    }
258}
259
260/// Compaction capability.
261pub struct CompactionCapability;
262
263impl Capability for CompactionCapability {
264    fn id(&self) -> &str {
265        COMPACTION_CAPABILITY_ID
266    }
267
268    fn name(&self) -> &str {
269        "Compaction"
270    }
271
272    fn description(&self) -> &str {
273        r#"Configurable context compaction when conversations exceed LLM context windows.
274
275Choose between native provider compaction (e.g., OpenAI /responses/compact), observation masking (strip old tool outputs), or LLM summarization. The `auto` strategy cascades through all available options."#
276    }
277
278    fn status(&self) -> CapabilityStatus {
279        CapabilityStatus::Available
280    }
281
282    fn icon(&self) -> Option<&str> {
283        Some("shrink")
284    }
285
286    fn category(&self) -> Option<&str> {
287        Some("Optimization")
288    }
289
290    fn message_filter_provider(&self) -> Option<Arc<dyn MessageFilterProvider>> {
291        Some(Arc::new(CompactionFilterProvider))
292    }
293
294    fn model_view_provider(&self) -> Option<Arc<dyn ModelViewProvider>> {
295        Some(Arc::new(CompactionModelViewProvider))
296    }
297}
298
299struct CompactionModelViewProvider;
300
301impl ModelViewProvider for CompactionModelViewProvider {
302    fn apply_model_view(
303        &self,
304        messages: Vec<Message>,
305        config: &serde_json::Value,
306        context: &ModelViewContext<'_>,
307    ) -> Vec<Message> {
308        let config = CompactionConfig::from_json(config);
309        let masking = build_model_view_messages_owned(messages, &config, context.prior_usage);
310        if masking.masked_count > 0 {
311            tracing::info!(
312                session_id = %context.session_id,
313                masked_count = masking.masked_count,
314                tool_result_bytes_before = masking.tool_result_bytes_before,
315                tool_result_bytes_after = masking.tool_result_bytes_after,
316                "CompactionCapability: masked stale tool results for model view"
317            );
318        }
319        masking.messages
320    }
321
322    fn priority(&self) -> i32 {
323        50
324    }
325}
326
327// ============================================================================
328// Message Filter Provider (proactive observation masking at message load time)
329// ============================================================================
330
331/// Applies observation masking as a message filter during message loading.
332///
333/// This runs *before* the LLM call, proactively reducing context size
334/// by masking old tool outputs. Lower priority than infinity context (50 vs 100)
335/// so it runs first — masking happens before trimming.
336struct CompactionFilterProvider;
337
338impl MessageFilterProvider for CompactionFilterProvider {
339    fn apply_filters(
340        &self,
341        _query: &mut crate::message_filter::MessageQuery,
342        _config: &serde_json::Value,
343    ) {
344        // The filter provider signals that compaction is active on this session.
345        // Actual observation masking is applied at LLM message construction time
346        // (in ReasonAtom) rather than at message query time, because masking
347        // operates on LlmMessage format, not the storage Message format.
348        //
349        // The proactive compaction check in ReasonAtom reads the compaction config
350        // and applies masking + budget checks before the LLM call.
351    }
352
353    fn priority(&self) -> i32 {
354        50 // Before infinity context (100)
355    }
356}
357
358// ============================================================================
359// Token Estimation
360// ============================================================================
361
362/// Estimate token count for an LLM message using char/4 approximation.
363///
364/// This is intentionally simple. More accurate estimation (tiktoken, etc.) can
365/// be swapped in later, but char/4 is sufficient for budget decisions.
366pub fn estimate_tokens(msg: &LlmMessage) -> usize {
367    let text_len = match &msg.content {
368        LlmMessageContent::Text(t) => t.len(),
369        LlmMessageContent::Parts(parts) => parts
370            .iter()
371            .map(|p| match p {
372                LlmContentPart::Text { text } => text.len(),
373                _ => 50, // images, etc. — rough estimate
374            })
375            .sum(),
376    };
377
378    // Add tool call overhead
379    let tool_call_len = msg
380        .tool_calls
381        .as_ref()
382        .map(|calls| {
383            calls
384                .iter()
385                .map(|tc| tc.name.len() + tc.arguments.to_string().len() + 20)
386                .sum::<usize>()
387        })
388        .unwrap_or(0);
389
390    (text_len + tool_call_len) / 4
391}
392
393/// Estimate total tokens for a slice of messages.
394pub fn estimate_total_tokens(messages: &[LlmMessage]) -> usize {
395    messages.iter().map(estimate_tokens).sum()
396}
397
398/// Check whether proactive compaction should trigger.
399///
400/// Returns `true` if the estimated tokens exceed `budget_percent` of the model's
401/// context window.
402pub fn should_compact_proactively(
403    messages: &[LlmMessage],
404    config: &CompactionConfig,
405    context_window_tokens: usize,
406) -> bool {
407    if !config.proactive {
408        return false;
409    }
410    let budget = (context_window_tokens as f32 * config.budget_percent) as usize;
411    let estimated = estimate_total_tokens(messages);
412    estimated > budget
413}
414
415// ============================================================================
416// Aggressive Trim (last resort in cascade)
417// ============================================================================
418
419/// Drop oldest messages to fit within a target token count.
420///
421/// Preserves the system prompt (index 0 if present), protected messages
422/// (e.g. `activate_skill` results and their tool call messages), and the
423/// most recent messages. This is the last resort — lossy, no recovery.
424pub fn aggressive_trim(
425    messages: &[LlmMessage],
426    target_tokens: usize,
427    has_system_prompt: bool,
428) -> Vec<LlmMessage> {
429    let mut result = Vec::new();
430    let mut token_budget = target_tokens;
431
432    // Always keep system prompt
433    let start_idx = if has_system_prompt && !messages.is_empty() {
434        let sys_tokens = estimate_tokens(&messages[0]);
435        if sys_tokens < token_budget {
436            result.push(messages[0].clone());
437            token_budget -= sys_tokens;
438        }
439        1
440    } else {
441        0
442    };
443
444    let conversation = &messages[start_idx..];
445
446    // Identify protected messages (skill tool results and their call messages).
447    // Reserve budget for them first so they are never dropped.
448    let protected_indices: std::collections::HashSet<usize> = conversation
449        .iter()
450        .enumerate()
451        .filter(|(_, m)| {
452            is_protected_tool_result(conversation, m) || is_protected_tool_call_message(m)
453        })
454        .map(|(i, _)| i)
455        .collect();
456
457    let mut protected_budget: usize = 0;
458    for &idx in &protected_indices {
459        protected_budget += estimate_tokens(&conversation[idx]);
460    }
461
462    // If protected messages alone exceed the remaining budget, keep as many
463    // protected messages as possible (newest first) and skip non-protected.
464    if protected_budget > token_budget {
465        let mut protected_with_indices: Vec<(usize, LlmMessage)> = protected_indices
466            .iter()
467            .map(|&idx| (idx, conversation[idx].clone()))
468            .collect();
469        protected_with_indices.sort_by_key(|(i, _)| *i);
470
471        let mut remaining = token_budget;
472        let mut kept: Vec<(usize, LlmMessage)> = Vec::new();
473        for (idx, msg) in protected_with_indices.into_iter().rev() {
474            let t = estimate_tokens(&msg);
475            if t <= remaining {
476                kept.push((idx, msg));
477                remaining -= t;
478            }
479        }
480        kept.sort_by_key(|(i, _)| *i);
481        result.extend(kept.into_iter().map(|(_, m)| m));
482        return result;
483    }
484
485    token_budget -= protected_budget;
486
487    // Walk from newest to oldest, collecting non-protected messages that fit
488    let mut keep_from_end = Vec::new();
489    for (i, msg) in conversation.iter().enumerate().rev() {
490        if protected_indices.contains(&i) {
491            continue; // handled separately
492        }
493        let msg_tokens = estimate_tokens(msg);
494        if msg_tokens <= token_budget {
495            keep_from_end.push((i, msg.clone()));
496            token_budget -= msg_tokens;
497        } else {
498            break;
499        }
500    }
501
502    // Merge protected + kept messages in original order
503    let mut all_kept: Vec<(usize, LlmMessage)> = Vec::new();
504    for &idx in &protected_indices {
505        all_kept.push((idx, conversation[idx].clone()));
506    }
507    all_kept.extend(keep_from_end);
508    all_kept.sort_by_key(|(i, _)| *i);
509
510    result.extend(all_kept.into_iter().map(|(_, m)| m));
511    result
512}
513
514// ============================================================================
515// Session Compaction Metrics
516// ============================================================================
517
518/// Per-session compaction metrics, stored as session metadata.
519#[derive(Debug, Clone, Default, Serialize, Deserialize)]
520pub struct SessionCompactionMetrics {
521    /// Total number of compaction events in this session.
522    pub compaction_count: u32,
523    /// Total messages saved across all compactions.
524    pub total_messages_saved: u64,
525    /// Breakdown by strategy.
526    pub strategy_counts: HashMap<String, u32>,
527    /// Total time spent compacting (ms).
528    pub total_duration_ms: u64,
529}
530
531impl SessionCompactionMetrics {
532    /// Record a completed compaction step.
533    pub fn record(
534        &mut self,
535        strategy_used: &str,
536        messages_before: usize,
537        messages_after: usize,
538        duration_ms: u64,
539    ) {
540        self.compaction_count += 1;
541        self.total_messages_saved += (messages_before.saturating_sub(messages_after)) as u64;
542        self.total_duration_ms += duration_ms;
543
544        for strategy in strategy_used.split('+') {
545            *self
546                .strategy_counts
547                .entry(strategy.to_string())
548                .or_insert(0) += 1;
549        }
550    }
551}
552
553// ============================================================================
554// Hierarchical Memory Tiers
555// ============================================================================
556
557/// Memory tier for a message in the hierarchy.
558#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
559#[serde(rename_all = "snake_case")]
560pub enum MemoryTier {
561    /// Full verbatim text, always in context.
562    Hot,
563    /// Observation-masked (tool outputs replaced with summaries).
564    Warm,
565    /// Summarized to key facts. Queryable via `query_history` if Infinity Context enabled.
566    Cold,
567}
568
569/// Configuration for hierarchical memory tiers.
570#[derive(Debug, Clone, Serialize, Deserialize)]
571pub struct HierarchicalMemoryConfig {
572    /// Number of most recent messages to keep in the hot tier (full verbatim).
573    #[serde(default = "default_hot_messages")]
574    pub hot_messages: usize,
575    /// Number of messages in the warm tier (observation-masked).
576    #[serde(default = "default_warm_messages")]
577    pub warm_messages: usize,
578    // Everything older → cold tier (summarized / queryable)
579}
580
581impl Default for HierarchicalMemoryConfig {
582    fn default() -> Self {
583        Self {
584            hot_messages: default_hot_messages(),
585            warm_messages: default_warm_messages(),
586        }
587    }
588}
589
590fn default_hot_messages() -> usize {
591    20
592}
593
594fn default_warm_messages() -> usize {
595    100
596}
597
598/// Classify messages into memory tiers based on position (newest-first).
599///
600/// Returns a vec of (tier, message) pairs in original order.
601pub fn classify_memory_tiers<'a>(
602    messages: &'a [LlmMessage],
603    config: &HierarchicalMemoryConfig,
604) -> Vec<(MemoryTier, &'a LlmMessage)> {
605    let len = messages.len();
606    messages
607        .iter()
608        .enumerate()
609        .map(|(i, msg)| {
610            let from_end = len - 1 - i;
611            let tier = if from_end < config.hot_messages {
612                MemoryTier::Hot
613            } else if from_end < config.hot_messages + config.warm_messages {
614                MemoryTier::Warm
615            } else {
616                MemoryTier::Cold
617            };
618            (tier, msg)
619        })
620        .collect()
621}
622
623/// Apply hierarchical memory: mask warm-tier tool outputs, summarize cold tier.
624///
625/// Returns the processed messages ready for LLM context. Cold-tier messages are
626/// replaced with a `[CONVERSATION_SUMMARY]` if a summary is provided.
627///
628/// Protected messages (e.g. `activate_skill` results) in cold/warm tiers are
629/// promoted to the output verbatim — they are never dropped or masked.
630pub fn apply_hierarchical_memory(
631    messages: &[LlmMessage],
632    config: &HierarchicalMemoryConfig,
633    masking_config: &ObservationMaskingConfig,
634    cold_summary: Option<&str>,
635) -> Vec<LlmMessage> {
636    let len = messages.len();
637    let hot_start = len.saturating_sub(config.hot_messages);
638    let warm_start = hot_start.saturating_sub(config.warm_messages);
639
640    let mut result = Vec::new();
641
642    // Cold tier: replace with summary if available, but rescue protected messages
643    if warm_start > 0 {
644        // Extract protected messages from cold tier before dropping
645        let cold_msgs = &messages[..warm_start];
646        let protected_cold: Vec<LlmMessage> = cold_msgs
647            .iter()
648            .filter(|m| is_protected_tool_result(cold_msgs, m) || is_protected_tool_call_message(m))
649            .cloned()
650            .collect();
651
652        if let Some(summary) = cold_summary {
653            result.push(build_summary_message(summary));
654        }
655
656        // Re-insert protected messages after the summary
657        result.extend(protected_cold);
658    }
659
660    // Warm tier: apply observation masking to tool outputs.
661    // Use the full message slice for protected-tool detection so that a tool
662    // result in warm tier whose assistant call is in cold tier is still recognized.
663    if warm_start < hot_start {
664        let warm_msgs = &messages[warm_start..hot_start];
665
666        // Pre-identify protected tool_call_ids using the full message list
667        let protected_call_ids: std::collections::HashSet<String> = warm_msgs
668            .iter()
669            .filter(|m| is_protected_tool_result(messages, m))
670            .filter_map(|m| m.tool_call_id.clone())
671            .collect();
672
673        let masked = apply_observation_masking_with_protected(
674            warm_msgs,
675            masking_config,
676            &protected_call_ids,
677        );
678        result.extend(masked.messages);
679    }
680
681    // Hot tier: verbatim
682    if hot_start < len {
683        result.extend_from_slice(&messages[hot_start..]);
684    }
685
686    result
687}
688
689// ============================================================================
690// Protected Tool Detection
691// ============================================================================
692
693use crate::llm_driver_registry::{LlmContentPart, LlmMessage, LlmMessageContent, LlmMessageRole};
694
695/// Tool names whose results must be protected from compaction.
696///
697/// Skill activation results contain durable behavioral instructions that silently
698/// degrade agent behavior when masked, summarized, or trimmed. The agentskills.io
699/// client implementation guide recommends exempting skill content from pruning.
700///
701/// See: specs/compaction.md (Tier 3: tool-aware masking), specs/skills-registry.md
702const PROTECTED_TOOL_NAMES: &[&str] = &["activate_skill"];
703
704/// Check if a tool result message corresponds to a protected tool.
705///
706/// Looks up the tool_call_id in preceding assistant messages to find the tool name.
707/// Returns `true` if the tool name is in `PROTECTED_TOOL_NAMES`.
708fn is_protected_tool_result(messages: &[LlmMessage], tool_msg: &LlmMessage) -> bool {
709    if tool_msg.role != LlmMessageRole::Tool {
710        return false;
711    }
712    let tool_name = find_tool_call_name(messages, tool_msg);
713    PROTECTED_TOOL_NAMES.contains(&tool_name.as_str())
714}
715
716/// Check if an assistant message contains a tool call to a protected tool.
717///
718/// Returns `true` if any tool call in the message targets a protected tool name.
719fn is_protected_tool_call_message(msg: &LlmMessage) -> bool {
720    if msg.role != LlmMessageRole::Assistant {
721        return false;
722    }
723    msg.tool_calls.as_ref().is_some_and(|calls| {
724        calls
725            .iter()
726            .any(|tc| PROTECTED_TOOL_NAMES.contains(&tc.name.as_str()))
727    })
728}
729
730// ============================================================================
731// Observation Masking
732// ============================================================================
733
734/// Result of applying observation masking to a message list.
735#[derive(Debug)]
736pub struct ObservationMaskingResult {
737    /// The masked messages.
738    pub messages: Vec<LlmMessage>,
739    /// Number of tool outputs that were masked.
740    pub masked_count: usize,
741}
742
743/// Apply observation masking: replace old tool outputs with one-line summaries.
744///
745/// Keeps the last `keep_recent_tool_outputs` tool results verbatim and replaces
746/// older ones with compact summaries. Message count is preserved (replace, not remove).
747///
748/// Protected tool results (e.g. `activate_skill`) are never masked — they contain
749/// durable behavioral instructions that must survive compaction.
750pub fn apply_observation_masking(
751    messages: &[LlmMessage],
752    config: &ObservationMaskingConfig,
753) -> ObservationMaskingResult {
754    apply_observation_masking_with_protected(messages, config, &std::collections::HashSet::new())
755}
756
757/// Result of cost-control masking applied before provider serialization.
758#[derive(Debug)]
759pub struct CostControlMaskingResult {
760    /// Messages after stale bulky tool results were replaced by summaries.
761    pub messages: Vec<Message>,
762    /// Number of tool-result messages that were masked.
763    pub masked_count: usize,
764    /// Tool-result payload bytes before masking.
765    pub tool_result_bytes_before: usize,
766    /// Tool-result payload bytes after masking.
767    pub tool_result_bytes_after: usize,
768}
769
770/// Build the bounded model-view messages from lossless stored messages.
771///
772/// Storage keeps full tool results. This helper defines the cheaper prompt
773/// view used for provider serialization when the compaction capability is
774/// configured.
775pub fn build_model_view_messages(
776    stored_messages: &[Message],
777    compaction_config: &CompactionConfig,
778    prior_usage: Option<&TokenUsage>,
779) -> CostControlMaskingResult {
780    apply_cost_control_masking(stored_messages, compaction_config, prior_usage)
781}
782
783/// Build the bounded model-view messages from owned stored messages.
784///
785/// This avoids cloning the message list when masking does not apply.
786pub fn build_model_view_messages_owned(
787    stored_messages: Vec<Message>,
788    compaction_config: &CompactionConfig,
789    prior_usage: Option<&TokenUsage>,
790) -> CostControlMaskingResult {
791    apply_cost_control_masking_owned(stored_messages, compaction_config, prior_usage)
792}
793
794/// Apply cheap, generic cost-control masking to stored messages.
795///
796/// This runs before converting messages to provider-specific LLM messages, so
797/// the llm.generation event can reflect the context actually sent. It is
798/// deliberately separate from observation masking: observation masking is part
799/// of the context-window compaction cascade, while this keeps stale tool output
800/// from being paid for repeatedly even when a large-context model still has
801/// room.
802pub fn apply_cost_control_masking(
803    messages: &[Message],
804    config: &CompactionConfig,
805    prior_usage: Option<&TokenUsage>,
806) -> CostControlMaskingResult {
807    apply_cost_control_masking_owned(messages.to_vec(), config, prior_usage)
808}
809
810fn apply_cost_control_masking_owned(
811    messages: Vec<Message>,
812    config: &CompactionConfig,
813    prior_usage: Option<&TokenUsage>,
814) -> CostControlMaskingResult {
815    let cost_config = &config.cost_control;
816    let tool_indices: Vec<usize> = messages
817        .iter()
818        .enumerate()
819        .filter(|(_, message)| {
820            message.role == MessageRole::ToolResult
821                && !is_protected_message_tool_result(&messages, message)
822        })
823        .map(|(index, _)| index)
824        .collect();
825    let tool_result_bytes_before = tool_indices
826        .iter()
827        .map(|index| message_tool_result_len(&messages[*index]))
828        .sum();
829
830    if !cost_config.enabled
831        || tool_indices.len() <= cost_config.keep_recent_tool_results
832        || !should_apply_cost_control_masking(
833            tool_indices.len(),
834            tool_result_bytes_before,
835            cost_config,
836            prior_usage,
837        )
838    {
839        return CostControlMaskingResult {
840            messages,
841            masked_count: 0,
842            tool_result_bytes_before,
843            tool_result_bytes_after: tool_result_bytes_before,
844        };
845    }
846
847    let keep_recent = cost_config.keep_recent_tool_results;
848    let to_mask_count = tool_indices.len().saturating_sub(keep_recent);
849    let indices_to_mask: std::collections::HashSet<usize> =
850        tool_indices[..to_mask_count].iter().copied().collect();
851    let tool_names: std::collections::HashMap<usize, String> = indices_to_mask
852        .iter()
853        .map(|index| {
854            (
855                *index,
856                find_message_tool_call_name(&messages, &messages[*index]),
857            )
858        })
859        .collect();
860
861    let mut masked_count = 0;
862    let mut masked_messages = Vec::with_capacity(messages.len());
863    for (index, message) in messages.into_iter().enumerate() {
864        if let Some(tool_name) = tool_names.get(&index) {
865            masked_messages.push(mask_tool_result_message(&message, tool_name));
866            masked_count += 1;
867        } else {
868            masked_messages.push(message);
869        }
870    }
871
872    let tool_result_bytes_after = masked_messages
873        .iter()
874        .filter(|message| message.role == MessageRole::ToolResult)
875        .map(message_tool_result_len)
876        .sum();
877
878    CostControlMaskingResult {
879        messages: masked_messages,
880        masked_count,
881        tool_result_bytes_before,
882        tool_result_bytes_after,
883    }
884}
885
886fn should_apply_cost_control_masking(
887    tool_result_count: usize,
888    tool_result_bytes: usize,
889    config: &CostControlConfig,
890    prior_usage: Option<&TokenUsage>,
891) -> bool {
892    if tool_result_count >= config.mask_after_tool_results {
893        return true;
894    }
895    if tool_result_bytes >= config.max_live_tool_result_bytes {
896        return true;
897    }
898    let Some(usage) = prior_usage else {
899        return false;
900    };
901    let cache_read = usage.cache_read_tokens.unwrap_or(0);
902    let uncached = usage.input_tokens.saturating_sub(cache_read);
903    if uncached >= config.max_uncached_input_tokens {
904        return true;
905    }
906    usage.input_tokens > 0
907        && (cache_read as f32 / usage.input_tokens as f32) < config.min_cache_read_ratio
908}
909
910fn is_protected_message_tool_result(messages: &[Message], tool_msg: &Message) -> bool {
911    if tool_msg.role != MessageRole::ToolResult {
912        return false;
913    }
914    let tool_name = find_message_tool_call_name(messages, tool_msg);
915    PROTECTED_TOOL_NAMES.contains(&tool_name.as_str())
916}
917
918fn find_message_tool_call_name(messages: &[Message], tool_msg: &Message) -> String {
919    let Some(call_id) = tool_msg.tool_call_id() else {
920        return "unknown_tool".to_string();
921    };
922
923    for msg in messages.iter().rev() {
924        if msg.role != MessageRole::Agent {
925            continue;
926        }
927        for tool_call in msg.tool_calls() {
928            if tool_call.id == call_id {
929                return tool_call.name.clone();
930            }
931        }
932    }
933
934    "unknown_tool".to_string()
935}
936
937fn message_tool_result_len(message: &Message) -> usize {
938    let Some(result) = message.tool_result_content() else {
939        return 0;
940    };
941    result
942        .result
943        .as_ref()
944        .map(estimate_json_value_len)
945        .unwrap_or(0)
946        + result.error.as_ref().map_or(0, String::len)
947}
948
949fn mask_tool_result_message(message: &Message, tool_name: &str) -> Message {
950    let Some(result) = message.tool_result_content() else {
951        return message.clone();
952    };
953    let summary = summarize_tool_result(tool_name, result.result.as_ref(), result.error.as_ref());
954    let was_error = result.error.is_some();
955    let mut masked = message.clone();
956    for part in &mut masked.content {
957        if let ContentPart::ToolResult(tool_result) = part {
958            if was_error {
959                tool_result.result = None;
960                tool_result.error = Some(summary);
961            } else {
962                tool_result.result = Some(serde_json::json!({
963                    "masked": true,
964                    "summary": summary,
965                }));
966                tool_result.error = None;
967            }
968            break;
969        }
970    }
971    masked
972}
973
974fn summarize_tool_result(
975    tool_name: &str,
976    result: Option<&serde_json::Value>,
977    error: Option<&String>,
978) -> String {
979    if let Some(error) = error {
980        return format!("[{tool_name} error: {}]", truncate_inline(error, 160));
981    }
982    let Some(value) = result else {
983        return format!("[{tool_name} returned no result]");
984    };
985    let Some(object) = value.as_object() else {
986        return format!(
987            "[{tool_name} -> {}, {} bytes]",
988            value_kind(value),
989            estimate_json_value_len(value)
990        );
991    };
992
993    match tool_name {
994        "read_file" | "daytona_read_file" | "sandbox_read_file" | "e2b_read_file"
995        | "docker_read_file" | "deno_read_file" | "sprites_read_file" | "read_github_file" => {
996            summarize_read_file_result(tool_name, object, value)
997        }
998        "bash" | "daytona_exec" | "sandbox_exec" | "e2b_exec" | "docker_exec" | "deno_exec" => {
999            summarize_exec_result(tool_name, object, value)
1000        }
1001        "list_directory" => summarize_list_directory_result(tool_name, object, value),
1002        "grep_files" => summarize_grep_files_result(tool_name, object, value),
1003        _ => summarize_generic_tool_result(tool_name, object, value),
1004    }
1005}
1006
1007fn summarize_read_file_result(
1008    tool_name: &str,
1009    object: &serde_json::Map<String, serde_json::Value>,
1010    value: &serde_json::Value,
1011) -> String {
1012    let path = object
1013        .get("path")
1014        .and_then(|v| v.as_str())
1015        .unwrap_or("(unknown path)");
1016    let lines = object.get("lines_shown").and_then(|v| v.as_object());
1017    let line_range = lines
1018        .and_then(|lines| {
1019            let start = lines.get("start")?.as_u64()?;
1020            let end = lines.get("end")?.as_u64()?;
1021            Some(format!(" lines {start}-{end}"))
1022        })
1023        .unwrap_or_default();
1024    let total_lines = object
1025        .get("total_lines")
1026        .and_then(|v| v.as_u64())
1027        .map(|lines| format!(", total_lines={lines}"))
1028        .unwrap_or_default();
1029    let next_offset = object
1030        .get("truncation")
1031        .and_then(|v| v.as_object())
1032        .and_then(|truncation| truncation.get("next_offset"))
1033        .and_then(|v| v.as_u64())
1034        .map(|offset| format!(", next_offset={offset}"))
1035        .unwrap_or_default();
1036    let hash = object
1037        .get("content_hash")
1038        .and_then(|v| v.as_str())
1039        .map(|hash| format!(", hash={hash}"))
1040        .unwrap_or_default();
1041    let truncated = object
1042        .get("truncated")
1043        .and_then(|v| v.as_bool())
1044        .unwrap_or(false);
1045
1046    format!(
1047        "[{tool_name} {path}{line_range}, {} bytes, truncated={truncated}{total_lines}{next_offset}{hash}]",
1048        estimate_json_value_len(value)
1049    )
1050}
1051
1052fn summarize_exec_result(
1053    tool_name: &str,
1054    object: &serde_json::Map<String, serde_json::Value>,
1055    value: &serde_json::Value,
1056) -> String {
1057    let exit = object
1058        .get("exit_code")
1059        .and_then(|v| v.as_i64())
1060        .map(|code| format!(" exit={code}"))
1061        .unwrap_or_default();
1062    let stdout_len = object
1063        .get("stdout")
1064        .and_then(|v| v.as_str())
1065        .map(|stdout| stdout.len())
1066        .unwrap_or(0);
1067    let stderr_len = object
1068        .get("stderr")
1069        .and_then(|v| v.as_str())
1070        .map(|stderr| stderr.len())
1071        .unwrap_or(0);
1072    let full_output = object
1073        .get("full_output")
1074        .and_then(|v| v.as_str())
1075        .map(|path| format!(", full_output={path}"))
1076        .unwrap_or_default();
1077    let total_lines = object
1078        .get("total_lines")
1079        .and_then(|v| v.as_u64())
1080        .map(|lines| format!(", total_lines={lines}"))
1081        .unwrap_or_default();
1082
1083    format!(
1084        "[{tool_name}{exit}, stdout={} bytes, stderr={} bytes, result={} bytes{full_output}{total_lines}]",
1085        stdout_len,
1086        stderr_len,
1087        estimate_json_value_len(value)
1088    )
1089}
1090
1091fn summarize_list_directory_result(
1092    tool_name: &str,
1093    object: &serde_json::Map<String, serde_json::Value>,
1094    value: &serde_json::Value,
1095) -> String {
1096    let path = object
1097        .get("path")
1098        .and_then(|v| v.as_str())
1099        .unwrap_or("(unknown path)");
1100    let count = object
1101        .get("count")
1102        .and_then(|v| v.as_u64())
1103        .or_else(|| {
1104            object
1105                .get("entries")
1106                .and_then(|v| v.as_array())
1107                .map(|v| v.len() as u64)
1108        })
1109        .unwrap_or(0);
1110    format!(
1111        "[{tool_name} {path}, {count} entries, {} bytes]",
1112        estimate_json_value_len(value)
1113    )
1114}
1115
1116fn summarize_grep_files_result(
1117    tool_name: &str,
1118    object: &serde_json::Map<String, serde_json::Value>,
1119    value: &serde_json::Value,
1120) -> String {
1121    let pattern = object
1122        .get("pattern")
1123        .and_then(|v| v.as_str())
1124        .map(|pattern| format!(" pattern={:?}", truncate_inline(pattern, 80)))
1125        .unwrap_or_default();
1126    let match_count = object
1127        .get("match_count")
1128        .and_then(|v| v.as_u64())
1129        .unwrap_or(0);
1130    format!(
1131        "[{tool_name}{pattern}, matches={match_count}, {} bytes]",
1132        estimate_json_value_len(value)
1133    )
1134}
1135
1136fn summarize_generic_tool_result(
1137    tool_name: &str,
1138    object: &serde_json::Map<String, serde_json::Value>,
1139    value: &serde_json::Value,
1140) -> String {
1141    let keys = object.keys().take(5).cloned().collect::<Vec<_>>().join(",");
1142    format!(
1143        "[{tool_name} result, {} bytes, keys={keys}]",
1144        estimate_json_value_len(value)
1145    )
1146}
1147
1148fn value_kind(value: &serde_json::Value) -> &'static str {
1149    match value {
1150        serde_json::Value::Null => "null",
1151        serde_json::Value::Bool(_) => "bool",
1152        serde_json::Value::Number(_) => "number",
1153        serde_json::Value::String(_) => "string",
1154        serde_json::Value::Array(_) => "array",
1155        serde_json::Value::Object(_) => "object",
1156    }
1157}
1158
1159fn estimate_json_value_len(value: &serde_json::Value) -> usize {
1160    let mut writer = CountingWriter::default();
1161    serde_json::to_writer(&mut writer, value)
1162        .map(|_| writer.bytes)
1163        .unwrap_or(0)
1164}
1165
1166#[derive(Default)]
1167struct CountingWriter {
1168    bytes: usize,
1169}
1170
1171impl std::io::Write for CountingWriter {
1172    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1173        self.bytes += buf.len();
1174        Ok(buf.len())
1175    }
1176
1177    fn flush(&mut self) -> std::io::Result<()> {
1178        Ok(())
1179    }
1180}
1181
1182fn truncate_inline(text: &str, max_chars: usize) -> String {
1183    if text.chars().count() <= max_chars {
1184        return text.to_string();
1185    }
1186    let mut truncated = text.chars().take(max_chars).collect::<String>();
1187    truncated.push_str("...");
1188    truncated
1189}
1190
1191/// Like `apply_observation_masking`, but accepts additional pre-identified protected
1192/// tool_call_ids. This is needed when the message slice doesn't contain the
1193/// assistant tool-call message (e.g. warm tier where the call is in cold tier).
1194fn apply_observation_masking_with_protected(
1195    messages: &[LlmMessage],
1196    config: &ObservationMaskingConfig,
1197    extra_protected_call_ids: &std::collections::HashSet<String>,
1198) -> ObservationMaskingResult {
1199    // Separate protected vs maskable tool result indices
1200    let tool_indices: Vec<usize> = messages
1201        .iter()
1202        .enumerate()
1203        .filter(|(_, m)| {
1204            m.role == LlmMessageRole::Tool
1205                && !is_protected_tool_result(messages, m)
1206                && !m
1207                    .tool_call_id
1208                    .as_ref()
1209                    .is_some_and(|id| extra_protected_call_ids.contains(id))
1210        })
1211        .map(|(i, _)| i)
1212        .collect();
1213
1214    if tool_indices.len() <= config.keep_recent_tool_outputs {
1215        return ObservationMaskingResult {
1216            messages: messages.to_vec(),
1217            masked_count: 0,
1218        };
1219    }
1220
1221    let to_mask_count = tool_indices.len() - config.keep_recent_tool_outputs;
1222    let indices_to_mask: std::collections::HashSet<usize> =
1223        tool_indices[..to_mask_count].iter().copied().collect();
1224
1225    let mut result = Vec::with_capacity(messages.len());
1226    let mut masked_count = 0;
1227
1228    for (i, msg) in messages.iter().enumerate() {
1229        if indices_to_mask.contains(&i) {
1230            let tool_name = find_tool_call_name(messages, msg);
1231            let summary = match config.summary_format {
1232                MaskingSummaryFormat::OneLine => format_one_line_summary(&tool_name, &msg.content),
1233                MaskingSummaryFormat::HeadTail => format_head_tail_summary(&msg.content),
1234            };
1235            result.push(LlmMessage {
1236                role: LlmMessageRole::Tool,
1237                content: LlmMessageContent::Text(summary),
1238                tool_calls: msg.tool_calls.clone(),
1239                tool_call_id: msg.tool_call_id.clone(),
1240                phase: msg.phase,
1241                thinking: None,
1242                thinking_signature: None,
1243            });
1244            masked_count += 1;
1245        } else {
1246            result.push(msg.clone());
1247        }
1248    }
1249
1250    ObservationMaskingResult {
1251        messages: result,
1252        masked_count,
1253    }
1254}
1255
1256/// Find the tool name from a preceding assistant message that issued the tool call.
1257fn find_tool_call_name(messages: &[LlmMessage], tool_msg: &LlmMessage) -> String {
1258    let Some(ref call_id) = tool_msg.tool_call_id else {
1259        return "unknown_tool".to_string();
1260    };
1261
1262    for msg in messages.iter().rev() {
1263        if msg.role == LlmMessageRole::Assistant
1264            && let Some(ref tool_calls) = msg.tool_calls
1265        {
1266            for tc in tool_calls {
1267                if tc.id == *call_id {
1268                    return tc.name.clone();
1269                }
1270            }
1271        }
1272    }
1273
1274    "unknown_tool".to_string()
1275}
1276
1277fn extract_text(content: &LlmMessageContent) -> String {
1278    match content {
1279        LlmMessageContent::Text(t) => t.clone(),
1280        LlmMessageContent::Parts(parts) => parts
1281            .iter()
1282            .filter_map(|p| {
1283                if let LlmContentPart::Text { text } = p {
1284                    Some(text.clone())
1285                } else {
1286                    None
1287                }
1288            })
1289            .collect::<Vec<_>>()
1290            .join(" "),
1291    }
1292}
1293
1294fn format_one_line_summary(tool_name: &str, content: &LlmMessageContent) -> String {
1295    let text = extract_text(content);
1296    let line_count = text.lines().count();
1297    let byte_len = text.len();
1298
1299    if byte_len <= 100 {
1300        format!("[{tool_name} → {text}]")
1301    } else {
1302        format!("[{tool_name} → {line_count} lines, {byte_len} bytes]")
1303    }
1304}
1305
1306fn format_head_tail_summary(content: &LlmMessageContent) -> String {
1307    let text = extract_text(content);
1308    let lines: Vec<&str> = text.lines().collect();
1309
1310    if lines.len() <= 6 {
1311        return text;
1312    }
1313
1314    let head: Vec<&str> = lines[..3].to_vec();
1315    let tail: Vec<&str> = lines[lines.len() - 3..].to_vec();
1316
1317    format!(
1318        "{}\n... ({} lines omitted) ...\n{}",
1319        head.join("\n"),
1320        lines.len() - 6,
1321        tail.join("\n")
1322    )
1323}
1324
1325// ============================================================================
1326// Summarization
1327// ============================================================================
1328
1329/// Build the summarization system prompt.
1330pub fn build_summarization_prompt(config: &SummarizationConfig) -> String {
1331    let preserve_items = if config.preserve.is_empty() {
1332        default_preserve()
1333    } else {
1334        config.preserve.clone()
1335    };
1336
1337    let preserve_list = preserve_items
1338        .iter()
1339        .map(|item| format!("- {item}"))
1340        .collect::<Vec<_>>()
1341        .join("\n");
1342
1343    let custom_instructions = config
1344        .instructions
1345        .as_deref()
1346        .map(|instr| format!("\n- {instr}"))
1347        .unwrap_or_default();
1348
1349    format!(
1350        r#"<task>
1351Summarize the following conversation history. The summary replaces these
1352messages in the agent's context window — it must contain everything the
1353agent needs to continue working.
1354</task>
1355
1356<preserve>
1357{preserve_list}{custom_instructions}
1358</preserve>
1359
1360<format>
1361Produce a structured summary. Use sections. Be concise but complete.
1362Do not include tool output verbatim — reference files by path.
1363IMPORTANT: Any activate_skill tool results contain durable skill instructions.
1364Include them verbatim in a dedicated "Active Skills" section — do not summarize
1365or paraphrase skill instructions.
1366</format>"#
1367    )
1368}
1369
1370/// Format messages into a text block for the summarization prompt.
1371pub fn format_messages_for_summarization(messages: &[LlmMessage]) -> String {
1372    let mut parts = Vec::new();
1373    for msg in messages {
1374        let role = match msg.role {
1375            LlmMessageRole::System => "system",
1376            LlmMessageRole::User => "user",
1377            LlmMessageRole::Assistant => "assistant",
1378            LlmMessageRole::Tool => "tool",
1379        };
1380
1381        let content = extract_text(&msg.content);
1382
1383        // Protected tool results (skill instructions) are never truncated —
1384        // the summarizer must see the full text to reproduce them verbatim.
1385        let is_protected = is_protected_tool_result(messages, msg);
1386
1387        // Truncate very long messages to avoid blowing up the summarization prompt
1388        let truncated = if !is_protected && content.len() > 2000 {
1389            let safe_prefix = truncate_at_char_boundary(&content, 2000);
1390            format!(
1391                "{}... [truncated, {} chars total]",
1392                safe_prefix,
1393                content.len()
1394            )
1395        } else {
1396            content
1397        };
1398
1399        parts.push(format!("[{role}]: {truncated}"));
1400    }
1401    parts.join("\n\n")
1402}
1403
1404fn truncate_at_char_boundary(content: &str, max_bytes: usize) -> &str {
1405    if content.len() <= max_bytes {
1406        return content;
1407    }
1408
1409    if content.is_char_boundary(max_bytes) {
1410        return &content[..max_bytes];
1411    }
1412
1413    let mut end = max_bytes;
1414    while end > 0 && !content.is_char_boundary(end) {
1415        end -= 1;
1416    }
1417
1418    &content[..end]
1419}
1420
1421/// Build a summary system message that replaces compacted messages in context.
1422pub fn build_summary_message(summary_text: &str) -> LlmMessage {
1423    LlmMessage {
1424        role: LlmMessageRole::System,
1425        content: LlmMessageContent::Text(format!(
1426            "[CONVERSATION_SUMMARY]\n{summary_text}\n[/CONVERSATION_SUMMARY]"
1427        )),
1428        tool_calls: None,
1429        tool_call_id: None,
1430        phase: None,
1431        thinking: None,
1432        thinking_signature: None,
1433    }
1434}
1435
1436// ============================================================================
1437// Compaction Step Tracking
1438// ============================================================================
1439
1440/// Record of a single compaction step in a cascade.
1441#[derive(Debug, Clone, Serialize, Deserialize)]
1442pub struct CompactionStep {
1443    /// Strategy used in this step.
1444    pub strategy: String,
1445    /// Message count after this step.
1446    pub messages_after: usize,
1447    /// Duration of this step in milliseconds.
1448    pub duration_ms: u64,
1449}
1450
1451// ============================================================================
1452// Tests
1453// ============================================================================
1454
1455#[cfg(test)]
1456mod tests {
1457    use super::*;
1458    use crate::tool_types::ToolCall;
1459    use serde_json::json;
1460
1461    fn make_user_msg(text: &str) -> LlmMessage {
1462        LlmMessage {
1463            role: LlmMessageRole::User,
1464            content: LlmMessageContent::Text(text.to_string()),
1465            tool_calls: None,
1466            tool_call_id: None,
1467            phase: None,
1468            thinking: None,
1469            thinking_signature: None,
1470        }
1471    }
1472
1473    fn make_assistant_msg(text: &str) -> LlmMessage {
1474        LlmMessage {
1475            role: LlmMessageRole::Assistant,
1476            content: LlmMessageContent::Text(text.to_string()),
1477            tool_calls: None,
1478            tool_call_id: None,
1479            phase: None,
1480            thinking: None,
1481            thinking_signature: None,
1482        }
1483    }
1484
1485    fn make_assistant_with_tool_call(call_id: &str, tool_name: &str) -> LlmMessage {
1486        LlmMessage {
1487            role: LlmMessageRole::Assistant,
1488            content: LlmMessageContent::Text(String::new()),
1489            tool_calls: Some(vec![ToolCall {
1490                id: call_id.to_string(),
1491                name: tool_name.to_string(),
1492                arguments: json!({"path": "src/main.rs"}),
1493            }]),
1494            tool_call_id: None,
1495            phase: None,
1496            thinking: None,
1497            thinking_signature: None,
1498        }
1499    }
1500
1501    fn make_tool_result(call_id: &str, output: &str) -> LlmMessage {
1502        LlmMessage {
1503            role: LlmMessageRole::Tool,
1504            content: LlmMessageContent::Text(output.to_string()),
1505            tool_calls: None,
1506            tool_call_id: Some(call_id.to_string()),
1507            phase: None,
1508            thinking: None,
1509            thinking_signature: None,
1510        }
1511    }
1512
1513    // ====================================================================
1514    // CompactionConfig tests
1515    // ====================================================================
1516
1517    #[test]
1518    fn test_capability_metadata() {
1519        let cap = CompactionCapability;
1520        assert_eq!(cap.id(), COMPACTION_CAPABILITY_ID);
1521        assert_eq!(cap.name(), "Compaction");
1522        assert_eq!(cap.status(), CapabilityStatus::Available);
1523        assert_eq!(cap.category(), Some("Optimization"));
1524        assert!(cap.tools().is_empty());
1525        assert!(cap.message_filter_provider().is_some());
1526    }
1527
1528    #[test]
1529    fn test_default_config() {
1530        let config = CompactionConfig::default();
1531        assert_eq!(config.strategy, CompactionStrategy::Auto);
1532        assert!(config.proactive);
1533        assert!((config.budget_percent - 0.85).abs() < f32::EPSILON);
1534        assert_eq!(config.observation_masking.keep_recent_tool_outputs, 2);
1535        assert_eq!(
1536            config.observation_masking.summary_format,
1537            MaskingSummaryFormat::OneLine
1538        );
1539        assert!(config.summarization.model.is_none());
1540        assert_eq!(config.summarization.preserve.len(), 5);
1541        assert!(config.summarization.instructions.is_none());
1542        assert!(config.cost_control.enabled);
1543        assert_eq!(config.cost_control.keep_recent_tool_results, 2);
1544    }
1545
1546    #[test]
1547    fn test_config_from_empty_json() {
1548        let config = CompactionConfig::from_json(&json!({}));
1549        assert_eq!(config.strategy, CompactionStrategy::Auto);
1550        assert!(config.proactive);
1551    }
1552
1553    #[test]
1554    fn test_config_native_only() {
1555        let config = CompactionConfig::from_json(&json!({"strategy": "native"}));
1556        assert_eq!(config.strategy, CompactionStrategy::Native);
1557        assert!(config.proactive);
1558    }
1559
1560    #[test]
1561    fn test_config_observation_masking_with_custom_settings() {
1562        let config = CompactionConfig::from_json(&json!({
1563            "strategy": "observation_masking",
1564            "proactive": false,
1565            "observation_masking": {
1566                "keep_recent_tool_outputs": 10,
1567                "summary_format": "head_tail"
1568            }
1569        }));
1570        assert_eq!(config.strategy, CompactionStrategy::ObservationMasking);
1571        assert!(!config.proactive);
1572        assert_eq!(config.observation_masking.keep_recent_tool_outputs, 10);
1573        assert_eq!(
1574            config.observation_masking.summary_format,
1575            MaskingSummaryFormat::HeadTail
1576        );
1577    }
1578
1579    #[test]
1580    fn test_config_cost_control_with_custom_settings() {
1581        let config = CompactionConfig::from_json(&json!({
1582            "cost_control": {
1583                "enabled": true,
1584                "keep_recent_tool_results": 1,
1585                "mask_after_tool_results": 2,
1586                "max_live_tool_result_bytes": 4096,
1587                "max_uncached_input_tokens": 50000,
1588                "min_cache_read_ratio": 0.5
1589            }
1590        }));
1591
1592        assert!(config.cost_control.enabled);
1593        assert_eq!(config.cost_control.keep_recent_tool_results, 1);
1594        assert_eq!(config.cost_control.mask_after_tool_results, 2);
1595        assert_eq!(config.cost_control.max_live_tool_result_bytes, 4096);
1596        assert_eq!(config.cost_control.max_uncached_input_tokens, 50000);
1597        assert!((config.cost_control.min_cache_read_ratio - 0.5).abs() < f32::EPSILON);
1598    }
1599
1600    #[test]
1601    fn test_config_summarization_with_custom_model() {
1602        let config = CompactionConfig::from_json(&json!({
1603            "strategy": "summarization",
1604            "summarization": {
1605                "model": "claude-haiku-4-5-20251001",
1606                "instructions": "Focus on API decisions",
1607                "preserve": ["decisions", "errors"]
1608            }
1609        }));
1610        assert_eq!(config.strategy, CompactionStrategy::Summarization);
1611        assert_eq!(
1612            config.summarization.model.as_deref(),
1613            Some("claude-haiku-4-5-20251001")
1614        );
1615        assert_eq!(
1616            config.summarization.instructions.as_deref(),
1617            Some("Focus on API decisions")
1618        );
1619        assert_eq!(config.summarization.preserve.len(), 2);
1620    }
1621
1622    fn make_message_tool_turn(
1623        call_id: &str,
1624        tool_name: &str,
1625        result: serde_json::Value,
1626    ) -> Vec<Message> {
1627        vec![
1628            Message::assistant_with_tools(
1629                "",
1630                vec![ToolCall {
1631                    id: call_id.to_string(),
1632                    name: tool_name.to_string(),
1633                    arguments: json!({"path": "/workspace/src/lib.rs"}),
1634                }],
1635            ),
1636            Message::tool_result(call_id, Some(result), None),
1637        ]
1638    }
1639
1640    #[test]
1641    fn test_cost_control_masks_old_read_file_results() {
1642        let mut messages = vec![Message::user("inspect files")];
1643        for index in 0..5 {
1644            messages.extend(make_message_tool_turn(
1645                &format!("call_{index}"),
1646                "read_file",
1647                json!({
1648                    "path": "/workspace/src/lib.rs",
1649                    "content": format!("{}{}", "line\n".repeat(400), index),
1650                    "total_lines": 900,
1651                    "lines_shown": {"start": 1, "end": 400},
1652                    "truncated": true,
1653                    "content_hash": format!("sha256:{index}"),
1654                    "truncation": {"truncated": true, "next_offset": 400, "reason": "line_cap"}
1655                }),
1656            ));
1657        }
1658
1659        let config = CompactionConfig::from_json(&json!({
1660            "cost_control": {
1661                "keep_recent_tool_results": 2,
1662                "mask_after_tool_results": 4
1663            }
1664        }));
1665        let result = apply_cost_control_masking(&messages, &config, None);
1666
1667        assert_eq!(result.masked_count, 3);
1668        assert!(result.tool_result_bytes_after < result.tool_result_bytes_before);
1669
1670        let first_tool = result.messages[2].tool_result_content().unwrap();
1671        let masked = first_tool.result.as_ref().unwrap();
1672        assert_eq!(masked["masked"], true);
1673        let summary = masked["summary"].as_str().unwrap();
1674        assert!(summary.contains("read_file"));
1675        assert!(summary.contains("/workspace/src/lib.rs"));
1676        assert!(summary.contains("lines 1-400"));
1677        assert!(summary.contains("next_offset=400"));
1678        assert!(!summary.contains("line\nline"));
1679
1680        let last_tool = result
1681            .messages
1682            .last()
1683            .unwrap()
1684            .tool_result_content()
1685            .unwrap();
1686        assert!(last_tool.result.as_ref().unwrap().get("content").is_some());
1687    }
1688
1689    #[test]
1690    fn test_model_view_masks_with_compaction_config() {
1691        let mut messages = vec![Message::user("inspect files repeatedly")];
1692        for index in 0..9 {
1693            messages.extend(make_message_tool_turn(
1694                &format!("call_{index}"),
1695                "read_file",
1696                json!({
1697                    "path": "/workspace/session_019e4c9dd1b17021af70ad3227361b16.jsonl",
1698                    "content": format!("{}{}", "large transcript line\n".repeat(1000), index),
1699                    "total_lines": 1000,
1700                    "lines_shown": {"start": 1, "end": 1000},
1701                    "truncated": false,
1702                    "content_hash": format!("sha256:{index}")
1703                }),
1704            ));
1705        }
1706
1707        let config = CompactionConfig::default();
1708        let result = build_model_view_messages(&messages, &config, None);
1709
1710        assert_eq!(result.masked_count, 7);
1711        assert!(result.tool_result_bytes_after < result.tool_result_bytes_before / 4);
1712        let first_tool = result.messages[2].tool_result_content().unwrap();
1713        let masked = first_tool.result.as_ref().unwrap();
1714        assert_eq!(masked["masked"], true);
1715        assert!(masked["summary"].as_str().unwrap().contains("read_file"));
1716        let last_tool = result
1717            .messages
1718            .last()
1719            .unwrap()
1720            .tool_result_content()
1721            .unwrap();
1722        assert!(last_tool.result.as_ref().unwrap().get("content").is_some());
1723    }
1724
1725    #[test]
1726    fn test_compaction_capability_contributes_model_view_provider() {
1727        let mut messages = vec![Message::user("inspect files repeatedly")];
1728        for index in 0..9 {
1729            messages.extend(make_message_tool_turn(
1730                &format!("call_{index}"),
1731                "read_file",
1732                json!({
1733                    "path": "/workspace/src/lib.rs",
1734                    "content": format!("{}{}", "large file line\n".repeat(1000), index),
1735                    "total_lines": 1000,
1736                    "lines_shown": {"start": 1, "end": 1000},
1737                    "truncated": false
1738                }),
1739            ));
1740        }
1741
1742        let capability = CompactionCapability;
1743        let provider = capability.model_view_provider().unwrap();
1744        let context = ModelViewContext {
1745            session_id: crate::typed_id::SessionId::new(),
1746            prior_usage: None,
1747        };
1748        let result = provider.apply_model_view(messages, &json!({}), &context);
1749
1750        let first_tool = result[2].tool_result_content().unwrap();
1751        assert_eq!(first_tool.result.as_ref().unwrap()["masked"], true);
1752        let last_tool = result.last().unwrap().tool_result_content().unwrap();
1753        assert!(last_tool.result.as_ref().unwrap().get("content").is_some());
1754    }
1755
1756    #[test]
1757    fn test_model_view_respects_disabled_cost_control_config() {
1758        let mut messages = vec![Message::user("inspect files repeatedly")];
1759        for index in 0..5 {
1760            messages.extend(make_message_tool_turn(
1761                &format!("call_{index}"),
1762                "read_file",
1763                json!({
1764                    "path": "/workspace/src/lib.rs",
1765                    "content": "line\n".repeat(400),
1766                    "total_lines": 400,
1767                    "lines_shown": {"start": 1, "end": 400},
1768                    "truncated": false
1769                }),
1770            ));
1771        }
1772
1773        let config = CompactionConfig::from_json(&json!({
1774            "cost_control": {
1775                "enabled": false,
1776                "keep_recent_tool_results": 1,
1777                "mask_after_tool_results": 2
1778            }
1779        }));
1780        let result = build_model_view_messages(&messages, &config, None);
1781
1782        assert_eq!(result.masked_count, 0);
1783        assert_eq!(
1784            result.tool_result_bytes_after,
1785            result.tool_result_bytes_before
1786        );
1787    }
1788
1789    #[test]
1790    fn test_cost_control_uses_prior_usage_signal() {
1791        let mut messages = vec![Message::user("run commands")];
1792        for index in 0..3 {
1793            messages.extend(make_message_tool_turn(
1794                &format!("call_{index}"),
1795                "bash",
1796                json!({
1797                    "stdout": "small output",
1798                    "stderr": "",
1799                    "exit_code": 0,
1800                    "success": true
1801                }),
1802            ));
1803        }
1804
1805        let config = CompactionConfig::from_json(&json!({
1806            "cost_control": {
1807                "keep_recent_tool_results": 1,
1808                "mask_after_tool_results": 99,
1809                "max_live_tool_result_bytes": 999999,
1810                "max_uncached_input_tokens": 1000
1811            }
1812        }));
1813        let usage = TokenUsage::with_cache(10_000, 100, Some(0), None);
1814        let result = apply_cost_control_masking(&messages, &config, Some(&usage));
1815
1816        assert_eq!(result.masked_count, 2);
1817        let first_tool = result.messages[2].tool_result_content().unwrap();
1818        let summary = first_tool.result.as_ref().unwrap()["summary"]
1819            .as_str()
1820            .unwrap();
1821        assert!(summary.contains("bash exit=0"));
1822    }
1823
1824    #[test]
1825    fn test_model_view_uses_provider_cache_signal_from_compaction_config() {
1826        let mut messages = vec![Message::user("run commands")];
1827        for index in 0..3 {
1828            messages.extend(make_message_tool_turn(
1829                &format!("call_{index}"),
1830                "bash",
1831                json!({
1832                    "stdout": "small output",
1833                    "stderr": "",
1834                    "exit_code": 0,
1835                    "success": true
1836                }),
1837            ));
1838        }
1839        let usage = TokenUsage::with_cache(150_000, 100, Some(0), None);
1840
1841        let config = CompactionConfig::default();
1842        let result = build_model_view_messages(&messages, &config, Some(&usage));
1843
1844        assert_eq!(result.masked_count, 1);
1845        let first_tool = result.messages[2].tool_result_content().unwrap();
1846        assert_eq!(first_tool.result.as_ref().unwrap()["masked"], true);
1847    }
1848
1849    #[test]
1850    fn test_config_falls_back_to_defaults_for_invalid_json() {
1851        let config = CompactionConfig::from_json(&json!({
1852            "strategy": "nonexistent_strategy",
1853            "budget_percent": "not-a-number"
1854        }));
1855        assert_eq!(config.strategy, CompactionStrategy::Auto);
1856        assert!(config.proactive);
1857    }
1858
1859    #[test]
1860    fn test_config_partial_override() {
1861        let config = CompactionConfig::from_json(&json!({
1862            "budget_percent": 0.7,
1863            "observation_masking": {
1864                "keep_recent_tool_outputs": 3
1865            }
1866        }));
1867        assert_eq!(config.strategy, CompactionStrategy::Auto);
1868        assert!(config.proactive);
1869        assert!((config.budget_percent - 0.7).abs() < f32::EPSILON);
1870        assert_eq!(config.observation_masking.keep_recent_tool_outputs, 3);
1871        assert_eq!(
1872            config.observation_masking.summary_format,
1873            MaskingSummaryFormat::OneLine
1874        );
1875    }
1876
1877    #[test]
1878    fn test_strategy_serialization_roundtrip() {
1879        for strategy in [
1880            CompactionStrategy::Auto,
1881            CompactionStrategy::Native,
1882            CompactionStrategy::ObservationMasking,
1883            CompactionStrategy::Summarization,
1884        ] {
1885            let json = serde_json::to_value(strategy).unwrap();
1886            let deserialized: CompactionStrategy = serde_json::from_value(json).unwrap();
1887            assert_eq!(strategy, deserialized);
1888        }
1889    }
1890
1891    #[test]
1892    fn test_strategy_display() {
1893        assert_eq!(CompactionStrategy::Auto.to_string(), "auto");
1894        assert_eq!(CompactionStrategy::Native.to_string(), "native");
1895        assert_eq!(
1896            CompactionStrategy::ObservationMasking.to_string(),
1897            "observation_masking"
1898        );
1899        assert_eq!(
1900            CompactionStrategy::Summarization.to_string(),
1901            "summarization"
1902        );
1903    }
1904
1905    #[test]
1906    fn test_masking_format_serialization_roundtrip() {
1907        for format in [
1908            MaskingSummaryFormat::OneLine,
1909            MaskingSummaryFormat::HeadTail,
1910        ] {
1911            let json = serde_json::to_value(format).unwrap();
1912            let deserialized: MaskingSummaryFormat = serde_json::from_value(json).unwrap();
1913            assert_eq!(format, deserialized);
1914        }
1915    }
1916
1917    #[test]
1918    fn test_budget_percent_boundary_values() {
1919        let config = CompactionConfig::from_json(&json!({"budget_percent": 0.1}));
1920        assert!((config.budget_percent - 0.1).abs() < f32::EPSILON);
1921
1922        let config = CompactionConfig::from_json(&json!({"budget_percent": 0.99}));
1923        assert!((config.budget_percent - 0.99).abs() < f32::EPSILON);
1924    }
1925
1926    #[test]
1927    fn test_keep_recent_tool_outputs_zero() {
1928        let config = CompactionConfig::from_json(&json!({
1929            "observation_masking": {"keep_recent_tool_outputs": 0}
1930        }));
1931        assert_eq!(config.observation_masking.keep_recent_tool_outputs, 0);
1932    }
1933
1934    // ====================================================================
1935    // Observation masking tests
1936    // ====================================================================
1937
1938    #[test]
1939    fn test_masking_no_tool_messages() {
1940        let messages = vec![make_user_msg("hello"), make_assistant_msg("hi")];
1941        let config = ObservationMaskingConfig::default();
1942        let result = apply_observation_masking(&messages, &config);
1943        assert_eq!(result.masked_count, 0);
1944        assert_eq!(result.messages.len(), 2);
1945    }
1946
1947    #[test]
1948    fn test_masking_fewer_than_keep_recent() {
1949        let messages = vec![
1950            make_user_msg("read file"),
1951            make_assistant_with_tool_call("call_1", "read_file"),
1952            make_tool_result("call_1", "file contents"),
1953            make_assistant_msg("done"),
1954        ];
1955        let config = ObservationMaskingConfig {
1956            keep_recent_tool_outputs: 5,
1957            summary_format: MaskingSummaryFormat::OneLine,
1958        };
1959        let result = apply_observation_masking(&messages, &config);
1960        assert_eq!(result.masked_count, 0);
1961    }
1962
1963    #[test]
1964    fn test_masking_masks_old_outputs() {
1965        let messages = vec![
1966            make_user_msg("start"),
1967            make_assistant_with_tool_call("call_1", "read_file"),
1968            make_tool_result(
1969                "call_1",
1970                "old file contents that are very long and should be masked by the observation masking strategy because it exceeds 100 chars",
1971            ),
1972            make_assistant_msg("got it"),
1973            make_user_msg("next"),
1974            make_assistant_with_tool_call("call_2", "search"),
1975            make_tool_result("call_2", "search results"),
1976            make_assistant_msg("found it"),
1977            make_user_msg("more"),
1978            make_assistant_with_tool_call("call_3", "bash"),
1979            make_tool_result("call_3", "command output"),
1980        ];
1981
1982        let config = ObservationMaskingConfig {
1983            keep_recent_tool_outputs: 2,
1984            summary_format: MaskingSummaryFormat::OneLine,
1985        };
1986        let result = apply_observation_masking(&messages, &config);
1987
1988        assert_eq!(result.masked_count, 1);
1989
1990        // First tool result should be masked
1991        let masked = &result.messages[2];
1992        assert_eq!(masked.role, LlmMessageRole::Tool);
1993        let text = extract_text(&masked.content);
1994        assert!(
1995            text.starts_with('['),
1996            "Expected masked summary, got: {text}"
1997        );
1998        assert!(text.contains("read_file"), "Expected tool name: {text}");
1999
2000        // Last 2 tool results should be verbatim
2001        assert_eq!(extract_text(&result.messages[6].content), "search results");
2002        assert_eq!(extract_text(&result.messages[10].content), "command output");
2003    }
2004
2005    #[test]
2006    fn test_masking_preserves_tool_call_id() {
2007        let messages = vec![
2008            make_assistant_with_tool_call("call_1", "read_file"),
2009            make_tool_result("call_1", "content"),
2010            make_assistant_with_tool_call("call_2", "bash"),
2011            make_tool_result("call_2", "output"),
2012        ];
2013
2014        let config = ObservationMaskingConfig {
2015            keep_recent_tool_outputs: 1,
2016            summary_format: MaskingSummaryFormat::OneLine,
2017        };
2018        let result = apply_observation_masking(&messages, &config);
2019        assert_eq!(result.messages[1].tool_call_id, Some("call_1".to_string()));
2020    }
2021
2022    #[test]
2023    fn test_masking_head_tail_format() {
2024        let long_output = (0..20)
2025            .map(|i| format!("line {i}"))
2026            .collect::<Vec<_>>()
2027            .join("\n");
2028
2029        let messages = vec![
2030            make_assistant_with_tool_call("call_1", "bash"),
2031            make_tool_result("call_1", &long_output),
2032            make_assistant_with_tool_call("call_2", "bash"),
2033            make_tool_result("call_2", "recent output"),
2034        ];
2035
2036        let config = ObservationMaskingConfig {
2037            keep_recent_tool_outputs: 1,
2038            summary_format: MaskingSummaryFormat::HeadTail,
2039        };
2040        let result = apply_observation_masking(&messages, &config);
2041
2042        let text = extract_text(&result.messages[1].content);
2043        assert!(text.contains("line 0"), "Should contain first lines");
2044        assert!(text.contains("line 19"), "Should contain last lines");
2045        assert!(text.contains("lines omitted"), "Should indicate omissions");
2046    }
2047
2048    #[test]
2049    fn test_masking_short_output_inline() {
2050        let messages = vec![
2051            make_assistant_with_tool_call("call_1", "get_time"),
2052            make_tool_result("call_1", "2024-01-01"),
2053            make_assistant_with_tool_call("call_2", "bash"),
2054            make_tool_result("call_2", "ok"),
2055        ];
2056
2057        let config = ObservationMaskingConfig {
2058            keep_recent_tool_outputs: 1,
2059            summary_format: MaskingSummaryFormat::OneLine,
2060        };
2061        let result = apply_observation_masking(&messages, &config);
2062        let text = extract_text(&result.messages[1].content);
2063        assert!(text.contains("2024-01-01"), "Short output included: {text}");
2064    }
2065
2066    #[test]
2067    fn test_masking_all_when_keep_zero() {
2068        let messages = vec![
2069            make_assistant_with_tool_call("call_1", "a"),
2070            make_tool_result("call_1", "output1"),
2071            make_assistant_with_tool_call("call_2", "b"),
2072            make_tool_result("call_2", "output2"),
2073        ];
2074
2075        let config = ObservationMaskingConfig {
2076            keep_recent_tool_outputs: 0,
2077            summary_format: MaskingSummaryFormat::OneLine,
2078        };
2079        let result = apply_observation_masking(&messages, &config);
2080        assert_eq!(result.masked_count, 2);
2081    }
2082
2083    #[test]
2084    fn test_masking_empty_messages() {
2085        let result = apply_observation_masking(&[], &ObservationMaskingConfig::default());
2086        assert_eq!(result.masked_count, 0);
2087        assert!(result.messages.is_empty());
2088    }
2089
2090    #[test]
2091    fn test_masking_preserves_message_count() {
2092        let messages = vec![
2093            make_user_msg("start"),
2094            make_assistant_with_tool_call("c1", "read_file"),
2095            make_tool_result("c1", "content 1"),
2096            make_assistant_msg("ok"),
2097            make_user_msg("next"),
2098            make_assistant_with_tool_call("c2", "bash"),
2099            make_tool_result("c2", "content 2"),
2100            make_assistant_msg("done"),
2101        ];
2102
2103        let config = ObservationMaskingConfig {
2104            keep_recent_tool_outputs: 1,
2105            summary_format: MaskingSummaryFormat::OneLine,
2106        };
2107        let result = apply_observation_masking(&messages, &config);
2108        assert_eq!(result.messages.len(), messages.len());
2109    }
2110
2111    #[test]
2112    fn test_masking_unknown_tool_call_id() {
2113        let messages = vec![
2114            make_tool_result("orphan", "some output"),
2115            make_assistant_with_tool_call("call_2", "bash"),
2116            make_tool_result("call_2", "recent"),
2117        ];
2118
2119        let config = ObservationMaskingConfig {
2120            keep_recent_tool_outputs: 1,
2121            summary_format: MaskingSummaryFormat::OneLine,
2122        };
2123        let result = apply_observation_masking(&messages, &config);
2124        assert_eq!(result.masked_count, 1);
2125        let text = extract_text(&result.messages[0].content);
2126        assert!(text.contains("unknown_tool"), "Fallback name: {text}");
2127    }
2128
2129    #[test]
2130    fn test_masking_many_tool_calls_keeps_exactly_n() {
2131        let mut messages = Vec::new();
2132        for i in 0..10 {
2133            let id = format!("call_{i}");
2134            messages.push(make_assistant_with_tool_call(&id, &format!("tool_{i}")));
2135            messages.push(make_tool_result(&id, &format!("output {i}")));
2136        }
2137
2138        let config = ObservationMaskingConfig {
2139            keep_recent_tool_outputs: 3,
2140            summary_format: MaskingSummaryFormat::OneLine,
2141        };
2142        let result = apply_observation_masking(&messages, &config);
2143        assert_eq!(result.masked_count, 7);
2144
2145        // Last 3 tool results at indices 15, 17, 19 should be verbatim
2146        assert_eq!(extract_text(&result.messages[15].content), "output 7");
2147        assert_eq!(extract_text(&result.messages[17].content), "output 8");
2148        assert_eq!(extract_text(&result.messages[19].content), "output 9");
2149    }
2150
2151    // ====================================================================
2152    // Summarization tests
2153    // ====================================================================
2154
2155    #[test]
2156    fn test_summarization_prompt_default() {
2157        let config = SummarizationConfig::default();
2158        let prompt = build_summarization_prompt(&config);
2159        assert!(prompt.contains("<task>"));
2160        assert!(prompt.contains("decisions"));
2161        assert!(prompt.contains("files_modified"));
2162        assert!(prompt.contains("errors"));
2163        assert!(prompt.contains("current_plan"));
2164    }
2165
2166    #[test]
2167    fn test_summarization_prompt_custom_instructions() {
2168        let config = SummarizationConfig {
2169            instructions: Some("Focus on API changes".to_string()),
2170            ..Default::default()
2171        };
2172        let prompt = build_summarization_prompt(&config);
2173        assert!(prompt.contains("Focus on API changes"));
2174    }
2175
2176    #[test]
2177    fn test_summarization_prompt_custom_preserve() {
2178        let config = SummarizationConfig {
2179            preserve: vec!["auth_tokens".to_string(), "database_schema".to_string()],
2180            ..Default::default()
2181        };
2182        let prompt = build_summarization_prompt(&config);
2183        assert!(prompt.contains("auth_tokens"));
2184        assert!(prompt.contains("database_schema"));
2185        assert!(!prompt.contains("decisions"));
2186    }
2187
2188    #[test]
2189    fn test_summarization_prompt_empty_preserve_uses_defaults() {
2190        let config = SummarizationConfig {
2191            preserve: vec![],
2192            ..Default::default()
2193        };
2194        let prompt = build_summarization_prompt(&config);
2195        assert!(prompt.contains("decisions"));
2196    }
2197
2198    #[test]
2199    fn test_format_messages_for_summarization() {
2200        let messages = vec![
2201            make_user_msg("What is 2+2?"),
2202            make_assistant_msg("The answer is 4."),
2203        ];
2204        let formatted = format_messages_for_summarization(&messages);
2205        assert!(formatted.contains("[user]: What is 2+2?"));
2206        assert!(formatted.contains("[assistant]: The answer is 4."));
2207    }
2208
2209    #[test]
2210    fn test_format_messages_truncates_long_content() {
2211        let long_content = "x".repeat(5000);
2212        let messages = vec![make_user_msg(&long_content)];
2213        let formatted = format_messages_for_summarization(&messages);
2214        assert!(formatted.contains("truncated"));
2215        assert!(formatted.len() < long_content.len());
2216    }
2217
2218    #[test]
2219    fn test_format_messages_truncates_utf8_without_panic() {
2220        let multibyte = "é".repeat(1001); // 2002 bytes, 1001 chars
2221        let messages = vec![make_user_msg(&multibyte)];
2222        let formatted = format_messages_for_summarization(&messages);
2223        assert!(formatted.contains("truncated"));
2224        assert!(formatted.contains("[truncated, 2002 chars total]"));
2225    }
2226
2227    #[test]
2228    fn test_build_summary_message() {
2229        let msg = build_summary_message("The user asked about APIs.");
2230        assert_eq!(msg.role, LlmMessageRole::System);
2231        let text = extract_text(&msg.content);
2232        assert!(text.contains("[CONVERSATION_SUMMARY]"));
2233        assert!(text.contains("The user asked about APIs."));
2234        assert!(text.contains("[/CONVERSATION_SUMMARY]"));
2235    }
2236
2237    // ====================================================================
2238    // Head-tail format edge cases
2239    // ====================================================================
2240
2241    #[test]
2242    fn test_head_tail_short_content_unchanged() {
2243        let content = LlmMessageContent::Text("line1\nline2\nline3".to_string());
2244        assert_eq!(format_head_tail_summary(&content), "line1\nline2\nline3");
2245    }
2246
2247    #[test]
2248    fn test_head_tail_exactly_six_lines() {
2249        let content = LlmMessageContent::Text("1\n2\n3\n4\n5\n6".to_string());
2250        assert_eq!(format_head_tail_summary(&content), "1\n2\n3\n4\n5\n6");
2251    }
2252
2253    #[test]
2254    fn test_head_tail_seven_lines() {
2255        let content = LlmMessageContent::Text("1\n2\n3\n4\n5\n6\n7".to_string());
2256        let result = format_head_tail_summary(&content);
2257        assert!(result.contains("1\n2\n3"));
2258        assert!(result.contains("5\n6\n7"));
2259        assert!(result.contains("1 lines omitted"));
2260    }
2261
2262    // ====================================================================
2263    // One-line format edge cases
2264    // ====================================================================
2265
2266    #[test]
2267    fn test_one_line_empty_output() {
2268        let result = format_one_line_summary("bash", &LlmMessageContent::Text(String::new()));
2269        assert_eq!(result, "[bash → ]");
2270    }
2271
2272    #[test]
2273    fn test_one_line_exactly_100_chars() {
2274        let text = "x".repeat(100);
2275        let result = format_one_line_summary("bash", &LlmMessageContent::Text(text.clone()));
2276        assert!(result.contains(&text));
2277    }
2278
2279    #[test]
2280    fn test_one_line_101_chars_summarized() {
2281        let text = "x".repeat(101);
2282        let result = format_one_line_summary("bash", &LlmMessageContent::Text(text));
2283        assert!(result.contains("lines"));
2284        assert!(result.contains("bytes"));
2285    }
2286
2287    #[test]
2288    fn test_one_line_multipart_content() {
2289        let content = LlmMessageContent::Parts(vec![
2290            LlmContentPart::Text {
2291                text: "part1".to_string(),
2292            },
2293            LlmContentPart::Text {
2294                text: "part2".to_string(),
2295            },
2296        ]);
2297        let result = format_one_line_summary("tool", &content);
2298        assert!(result.contains("part1"));
2299        assert!(result.contains("part2"));
2300    }
2301
2302    // ====================================================================
2303    // CompactionStep tests
2304    // ====================================================================
2305
2306    #[test]
2307    fn test_compaction_step_serialization() {
2308        let step = CompactionStep {
2309            strategy: "observation_masking".to_string(),
2310            messages_after: 42,
2311            duration_ms: 12,
2312        };
2313        let json = serde_json::to_value(&step).unwrap();
2314        assert_eq!(json["strategy"], "observation_masking");
2315        assert_eq!(json["messages_after"], 42);
2316        assert_eq!(json["duration_ms"], 12);
2317    }
2318
2319    // ====================================================================
2320    // Token estimation tests
2321    // ====================================================================
2322
2323    #[test]
2324    fn test_estimate_tokens_text() {
2325        let msg = make_user_msg("hello world"); // 11 chars → ~2 tokens
2326        let tokens = estimate_tokens(&msg);
2327        assert_eq!(tokens, 11 / 4);
2328    }
2329
2330    #[test]
2331    fn test_estimate_tokens_empty() {
2332        let msg = make_user_msg("");
2333        assert_eq!(estimate_tokens(&msg), 0);
2334    }
2335
2336    #[test]
2337    fn test_estimate_total_tokens() {
2338        let messages = vec![
2339            make_user_msg("a".repeat(400).as_str()),      // 100 tokens
2340            make_assistant_msg("b".repeat(200).as_str()), // 50 tokens
2341        ];
2342        assert_eq!(estimate_total_tokens(&messages), 150);
2343    }
2344
2345    #[test]
2346    fn test_estimate_tokens_with_tool_calls() {
2347        let msg = make_assistant_with_tool_call("call_1", "read_file");
2348        let tokens = estimate_tokens(&msg);
2349        assert!(tokens > 0, "Tool call should contribute tokens");
2350    }
2351
2352    // ====================================================================
2353    // Proactive compaction check tests
2354    // ====================================================================
2355
2356    #[test]
2357    fn test_should_compact_proactively_under_budget() {
2358        let messages = vec![make_user_msg("short")];
2359        let config = CompactionConfig::default(); // 85% budget
2360        assert!(!should_compact_proactively(&messages, &config, 128_000));
2361    }
2362
2363    #[test]
2364    fn test_should_compact_proactively_over_budget() {
2365        // Create messages that exceed 85% of 1000 tokens = 850 tokens
2366        let big_text = "x".repeat(4000); // ~1000 tokens
2367        let messages = vec![make_user_msg(&big_text)];
2368        let config = CompactionConfig::default();
2369        assert!(should_compact_proactively(&messages, &config, 1000));
2370    }
2371
2372    #[test]
2373    fn test_should_compact_proactively_disabled() {
2374        let big_text = "x".repeat(4000);
2375        let messages = vec![make_user_msg(&big_text)];
2376        let config = CompactionConfig {
2377            proactive: false,
2378            ..Default::default()
2379        };
2380        assert!(!should_compact_proactively(&messages, &config, 1000));
2381    }
2382
2383    // ====================================================================
2384    // Aggressive trim tests
2385    // ====================================================================
2386
2387    #[test]
2388    fn test_aggressive_trim_keeps_newest() {
2389        // Use big messages so budget matters
2390        let messages = vec![
2391            make_user_msg(&"s".repeat(400)),      // system: 100 tokens
2392            make_user_msg(&"a".repeat(400)),      // old: 100 tokens
2393            make_assistant_msg(&"b".repeat(400)), // old: 100 tokens
2394            make_user_msg(&"c".repeat(400)),      // recent: 100 tokens
2395            make_assistant_msg(&"d".repeat(400)), // recent: 100 tokens
2396        ];
2397        // Target: enough for system + 2 recent messages only (300 tokens)
2398        let target_tokens = 300;
2399        let result = aggressive_trim(&messages, target_tokens, true);
2400        assert!(
2401            result.len() < messages.len(),
2402            "Expected trim, got {} messages",
2403            result.len()
2404        );
2405        // Should keep system prompt (first)
2406        assert_eq!(result[0].role, LlmMessageRole::User);
2407    }
2408
2409    #[test]
2410    fn test_aggressive_trim_empty() {
2411        let result = aggressive_trim(&[], 100, false);
2412        assert!(result.is_empty());
2413    }
2414
2415    #[test]
2416    fn test_aggressive_trim_everything_fits() {
2417        let messages = vec![make_user_msg("hi"), make_assistant_msg("hello")];
2418        let result = aggressive_trim(&messages, 100_000, false);
2419        assert_eq!(result.len(), 2);
2420    }
2421
2422    // ====================================================================
2423    // Session compaction metrics tests
2424    // ====================================================================
2425
2426    #[test]
2427    fn test_session_metrics_record() {
2428        let mut metrics = SessionCompactionMetrics::default();
2429        metrics.record("observation_masking+native", 100, 50, 200);
2430
2431        assert_eq!(metrics.compaction_count, 1);
2432        assert_eq!(metrics.total_messages_saved, 50);
2433        assert_eq!(metrics.total_duration_ms, 200);
2434        assert_eq!(metrics.strategy_counts["observation_masking"], 1);
2435        assert_eq!(metrics.strategy_counts["native"], 1);
2436    }
2437
2438    #[test]
2439    fn test_session_metrics_accumulate() {
2440        let mut metrics = SessionCompactionMetrics::default();
2441        metrics.record("observation_masking", 100, 80, 10);
2442        metrics.record("summarization", 80, 40, 500);
2443
2444        assert_eq!(metrics.compaction_count, 2);
2445        assert_eq!(metrics.total_messages_saved, 60);
2446        assert_eq!(metrics.total_duration_ms, 510);
2447        assert_eq!(metrics.strategy_counts["observation_masking"], 1);
2448        assert_eq!(metrics.strategy_counts["summarization"], 1);
2449    }
2450
2451    #[test]
2452    fn test_session_metrics_serialization() {
2453        let mut metrics = SessionCompactionMetrics::default();
2454        metrics.record("auto", 50, 30, 100);
2455        let json = serde_json::to_value(&metrics).unwrap();
2456        assert_eq!(json["compaction_count"], 1);
2457        assert_eq!(json["total_messages_saved"], 20);
2458    }
2459
2460    // ====================================================================
2461    // Hierarchical memory tier tests
2462    // ====================================================================
2463
2464    #[test]
2465    fn test_classify_memory_tiers_basic() {
2466        let messages: Vec<LlmMessage> = (0..30)
2467            .map(|i| make_user_msg(&format!("msg {i}")))
2468            .collect();
2469
2470        let config = HierarchicalMemoryConfig {
2471            hot_messages: 5,
2472            warm_messages: 10,
2473        };
2474
2475        let classified = classify_memory_tiers(&messages, &config);
2476        assert_eq!(classified.len(), 30);
2477
2478        // Last 5 = hot
2479        assert_eq!(classified[29].0, MemoryTier::Hot);
2480        assert_eq!(classified[25].0, MemoryTier::Hot);
2481
2482        // Next 10 = warm
2483        assert_eq!(classified[24].0, MemoryTier::Warm);
2484        assert_eq!(classified[15].0, MemoryTier::Warm);
2485
2486        // Rest = cold
2487        assert_eq!(classified[14].0, MemoryTier::Cold);
2488        assert_eq!(classified[0].0, MemoryTier::Cold);
2489    }
2490
2491    #[test]
2492    fn test_classify_memory_tiers_all_hot() {
2493        let messages: Vec<LlmMessage> =
2494            (0..3).map(|i| make_user_msg(&format!("msg {i}"))).collect();
2495
2496        let config = HierarchicalMemoryConfig::default(); // 20 hot
2497
2498        let classified = classify_memory_tiers(&messages, &config);
2499        assert!(classified.iter().all(|(tier, _)| *tier == MemoryTier::Hot));
2500    }
2501
2502    #[test]
2503    fn test_apply_hierarchical_memory_basic() {
2504        let mut messages = Vec::new();
2505
2506        // Cold: old tool interactions
2507        for i in 0..5 {
2508            let id = format!("old_{i}");
2509            messages.push(make_assistant_with_tool_call(&id, "read_file"));
2510            messages.push(make_tool_result(&id, &format!("old content {i}")));
2511        }
2512
2513        // Warm: mid tool interactions
2514        for i in 0..3 {
2515            let id = format!("mid_{i}");
2516            messages.push(make_assistant_with_tool_call(&id, "bash"));
2517            messages.push(make_tool_result(&id, &format!("mid output {i}")));
2518        }
2519
2520        // Hot: recent
2521        messages.push(make_user_msg("what now?"));
2522        messages.push(make_assistant_msg("let me check"));
2523
2524        let config = HierarchicalMemoryConfig {
2525            hot_messages: 2,
2526            warm_messages: 6,
2527        };
2528        let masking_config = ObservationMaskingConfig::default();
2529
2530        let result = apply_hierarchical_memory(
2531            &messages,
2532            &config,
2533            &masking_config,
2534            Some("Summary of old work"),
2535        );
2536
2537        // Should have: 1 summary + 6 warm messages + 2 hot messages
2538        assert!(result.len() <= 9);
2539        // First should be the summary
2540        let first_text = extract_text(&result[0].content);
2541        assert!(first_text.contains("CONVERSATION_SUMMARY"));
2542        // Last 2 should be hot (verbatim)
2543        let last = extract_text(&result[result.len() - 1].content);
2544        assert!(last.contains("let me check"));
2545    }
2546
2547    #[test]
2548    fn test_apply_hierarchical_memory_no_cold() {
2549        let messages = vec![make_user_msg("hello"), make_assistant_msg("hi")];
2550
2551        let config = HierarchicalMemoryConfig {
2552            hot_messages: 5,
2553            warm_messages: 5,
2554        };
2555
2556        let result = apply_hierarchical_memory(
2557            &messages,
2558            &config,
2559            &ObservationMaskingConfig::default(),
2560            None,
2561        );
2562        // All hot, no summary needed
2563        assert_eq!(result.len(), 2);
2564    }
2565
2566    #[test]
2567    fn test_memory_tier_config_from_json() {
2568        let config: HierarchicalMemoryConfig = serde_json::from_value(json!({
2569            "hot_messages": 10,
2570            "warm_messages": 50
2571        }))
2572        .unwrap();
2573        assert_eq!(config.hot_messages, 10);
2574        assert_eq!(config.warm_messages, 50);
2575    }
2576
2577    #[test]
2578    fn test_memory_tier_config_defaults() {
2579        let config = HierarchicalMemoryConfig::default();
2580        assert_eq!(config.hot_messages, 20);
2581        assert_eq!(config.warm_messages, 100);
2582    }
2583
2584    #[test]
2585    fn test_compaction_config_with_memory_tiers() {
2586        let config = CompactionConfig::from_json(&json!({
2587            "strategy": "auto",
2588            "memory_tiers": {
2589                "hot_messages": 15,
2590                "warm_messages": 80
2591            }
2592        }));
2593        assert_eq!(config.memory_tiers.hot_messages, 15);
2594        assert_eq!(config.memory_tiers.warm_messages, 80);
2595    }
2596
2597    #[test]
2598    fn test_memory_tier_serialization() {
2599        assert_eq!(serde_json::to_value(MemoryTier::Hot).unwrap(), json!("hot"));
2600        assert_eq!(
2601            serde_json::to_value(MemoryTier::Warm).unwrap(),
2602            json!("warm")
2603        );
2604        assert_eq!(
2605            serde_json::to_value(MemoryTier::Cold).unwrap(),
2606            json!("cold")
2607        );
2608    }
2609
2610    // ====================================================================
2611    // Skill content protection tests
2612    // ====================================================================
2613
2614    #[test]
2615    fn test_masking_skips_activate_skill_results() {
2616        // 3 tool results: activate_skill (protected), read_file, bash
2617        // With keep_recent=1, only read_file should be masked (activate_skill exempt)
2618        let messages = vec![
2619            make_assistant_with_tool_call("call_skill", "activate_skill"),
2620            make_tool_result(
2621                "call_skill",
2622                "You are a code review agent. Follow these instructions...",
2623            ),
2624            make_assistant_msg("Skill activated"),
2625            make_assistant_with_tool_call("call_read", "read_file"),
2626            make_tool_result(
2627                "call_read",
2628                "file contents that are long enough to be masked by observation masking because they exceed one hundred characters easily",
2629            ),
2630            make_assistant_msg("got it"),
2631            make_assistant_with_tool_call("call_bash", "bash"),
2632            make_tool_result("call_bash", "command output"),
2633        ];
2634
2635        let config = ObservationMaskingConfig {
2636            keep_recent_tool_outputs: 1,
2637            summary_format: MaskingSummaryFormat::OneLine,
2638        };
2639        let result = apply_observation_masking(&messages, &config);
2640
2641        // activate_skill result should be verbatim
2642        assert_eq!(
2643            extract_text(&result.messages[1].content),
2644            "You are a code review agent. Follow these instructions..."
2645        );
2646        // read_file result should be masked (it's the only maskable old one)
2647        assert!(extract_text(&result.messages[4].content).starts_with('['));
2648        // bash result should be verbatim (most recent maskable)
2649        assert_eq!(extract_text(&result.messages[7].content), "command output");
2650        assert_eq!(result.masked_count, 1);
2651    }
2652
2653    #[test]
2654    fn test_masking_all_activate_skill_exempt_from_count() {
2655        // 2 activate_skill results + 1 regular tool result
2656        // With keep_recent=0, only the regular one should be masked
2657        let messages = vec![
2658            make_assistant_with_tool_call("s1", "activate_skill"),
2659            make_tool_result("s1", "Skill 1 instructions"),
2660            make_assistant_with_tool_call("s2", "activate_skill"),
2661            make_tool_result("s2", "Skill 2 instructions"),
2662            make_assistant_with_tool_call("c1", "bash"),
2663            make_tool_result("c1", "output"),
2664        ];
2665
2666        let config = ObservationMaskingConfig {
2667            keep_recent_tool_outputs: 0,
2668            summary_format: MaskingSummaryFormat::OneLine,
2669        };
2670        let result = apply_observation_masking(&messages, &config);
2671
2672        assert_eq!(result.masked_count, 1);
2673        // Both skill results preserved
2674        assert_eq!(
2675            extract_text(&result.messages[1].content),
2676            "Skill 1 instructions"
2677        );
2678        assert_eq!(
2679            extract_text(&result.messages[3].content),
2680            "Skill 2 instructions"
2681        );
2682    }
2683
2684    #[test]
2685    fn test_aggressive_trim_preserves_skill_messages() {
2686        // Create messages where budget only fits ~2 messages, but skill messages
2687        // should always be preserved
2688        let messages = vec![
2689            make_user_msg(&"s".repeat(400)), // system: 100 tokens
2690            make_assistant_with_tool_call("skill1", "activate_skill"),
2691            make_tool_result("skill1", "Important skill instructions"),
2692            make_user_msg(&"a".repeat(400)),      // old: 100 tokens
2693            make_assistant_msg(&"b".repeat(400)), // old: 100 tokens
2694            make_user_msg(&"c".repeat(400)),      // recent: 100 tokens
2695            make_assistant_msg(&"d".repeat(400)), // recent: 100 tokens
2696        ];
2697
2698        // Budget for system + skill call + skill result + 1 recent = ~400 tokens
2699        // Should keep: system, skill call, skill result, and as many recent as fit
2700        let target_tokens = 400;
2701        let result = aggressive_trim(&messages, target_tokens, true);
2702
2703        // Verify skill messages are preserved
2704        let has_skill_result = result.iter().any(|m| {
2705            m.role == LlmMessageRole::Tool
2706                && extract_text(&m.content) == "Important skill instructions"
2707        });
2708        assert!(
2709            has_skill_result,
2710            "Skill tool result must survive aggressive trim"
2711        );
2712
2713        let has_skill_call = result.iter().any(|m| {
2714            m.tool_calls
2715                .as_ref()
2716                .is_some_and(|calls| calls.iter().any(|tc| tc.name == "activate_skill"))
2717        });
2718        assert!(
2719            has_skill_call,
2720            "Skill tool call must survive aggressive trim"
2721        );
2722    }
2723
2724    #[test]
2725    fn test_hierarchical_memory_rescues_skill_from_cold_tier() {
2726        let mut messages = Vec::new();
2727
2728        // Cold tier: old messages including a skill activation
2729        messages.push(make_assistant_with_tool_call("skill1", "activate_skill"));
2730        messages.push(make_tool_result(
2731            "skill1",
2732            "You must always validate input.",
2733        ));
2734        for i in 0..8 {
2735            let id = format!("old_{i}");
2736            messages.push(make_assistant_with_tool_call(&id, "read_file"));
2737            messages.push(make_tool_result(&id, &format!("old content {i}")));
2738        }
2739
2740        // Warm tier
2741        for i in 0..3 {
2742            let id = format!("mid_{i}");
2743            messages.push(make_assistant_with_tool_call(&id, "bash"));
2744            messages.push(make_tool_result(&id, &format!("mid output {i}")));
2745        }
2746
2747        // Hot tier
2748        messages.push(make_user_msg("what now?"));
2749        messages.push(make_assistant_msg("let me check"));
2750
2751        let config = HierarchicalMemoryConfig {
2752            hot_messages: 2,
2753            warm_messages: 6,
2754        };
2755        let masking_config = ObservationMaskingConfig::default();
2756
2757        let result = apply_hierarchical_memory(
2758            &messages,
2759            &config,
2760            &masking_config,
2761            Some("Summary of old work"),
2762        );
2763
2764        // The protected skill messages from cold tier should be rescued
2765        let has_skill_instructions = result
2766            .iter()
2767            .any(|m| extract_text(&m.content).contains("You must always validate input."));
2768        assert!(
2769            has_skill_instructions,
2770            "Skill instructions from cold tier must be rescued into output"
2771        );
2772
2773        // Summary should still be present
2774        assert!(extract_text(&result[0].content).contains("CONVERSATION_SUMMARY"));
2775    }
2776
2777    #[test]
2778    fn test_is_protected_tool_result_detection() {
2779        let messages = vec![
2780            make_assistant_with_tool_call("s1", "activate_skill"),
2781            make_tool_result("s1", "skill content"),
2782            make_assistant_with_tool_call("r1", "read_file"),
2783            make_tool_result("r1", "file content"),
2784        ];
2785
2786        // activate_skill result is protected
2787        assert!(is_protected_tool_result(&messages, &messages[1]));
2788        // read_file result is not
2789        assert!(!is_protected_tool_result(&messages, &messages[3]));
2790        // non-tool message is not
2791        assert!(!is_protected_tool_result(&messages, &messages[0]));
2792    }
2793
2794    #[test]
2795    fn test_is_protected_tool_call_message_detection() {
2796        let skill_call = make_assistant_with_tool_call("s1", "activate_skill");
2797        let regular_call = make_assistant_with_tool_call("r1", "read_file");
2798        let user_msg = make_user_msg("hello");
2799
2800        assert!(is_protected_tool_call_message(&skill_call));
2801        assert!(!is_protected_tool_call_message(&regular_call));
2802        assert!(!is_protected_tool_call_message(&user_msg));
2803    }
2804
2805    #[test]
2806    fn test_default_preserve_includes_skill_instructions() {
2807        let config = SummarizationConfig::default();
2808        assert!(
2809            config.preserve.contains(&"skill_instructions".to_string()),
2810            "Default preserve list must include skill_instructions"
2811        );
2812    }
2813
2814    #[test]
2815    fn test_summarization_prompt_mentions_skill_protection() {
2816        let config = SummarizationConfig::default();
2817        let prompt = build_summarization_prompt(&config);
2818        assert!(
2819            prompt.contains("activate_skill"),
2820            "Summarization prompt must instruct LLM to preserve skill content"
2821        );
2822    }
2823
2824    #[test]
2825    fn test_aggressive_trim_protected_exceed_budget() {
2826        // When protected messages alone exceed the budget, keep as many as
2827        // fit (newest first) and drop non-protected entirely.
2828        let messages = vec![
2829            make_user_msg(&"s".repeat(400)), // system ~100 tokens
2830            make_assistant_with_tool_call("skill1", "activate_skill"), // protected
2831            make_tool_result("skill1", &"x".repeat(800)), // protected ~200 tokens
2832            make_assistant_with_tool_call("skill2", "activate_skill"), // protected
2833            make_tool_result("skill2", &"y".repeat(800)), // protected ~200 tokens
2834            make_user_msg(&"z".repeat(400)), // non-protected
2835        ];
2836
2837        // Budget only fits system + ~1 protected pair
2838        let result = aggressive_trim(&messages, 200, true);
2839
2840        // Must not exceed budget — non-protected messages dropped
2841        let has_non_protected = result
2842            .iter()
2843            .any(|m| m.role == LlmMessageRole::User && extract_text(&m.content).contains('z'));
2844        assert!(
2845            !has_non_protected,
2846            "Non-protected messages must be dropped when protected exceed budget"
2847        );
2848    }
2849
2850    #[test]
2851    fn test_format_messages_no_truncate_protected_tool_result() {
2852        // Protected tool results should not be truncated at 2000 chars
2853        let long_instructions = "a".repeat(5000);
2854        let messages = vec![
2855            make_assistant_with_tool_call("s1", "activate_skill"),
2856            make_tool_result("s1", &long_instructions),
2857            make_assistant_with_tool_call("r1", "read_file"),
2858            make_tool_result("r1", &"b".repeat(5000)),
2859        ];
2860
2861        let formatted = format_messages_for_summarization(&messages);
2862
2863        // Skill result: full 5000-char content present, not truncated
2864        assert!(
2865            formatted.contains(&long_instructions),
2866            "Protected tool result must not be truncated"
2867        );
2868        // Regular result: should be truncated
2869        assert!(
2870            formatted.contains("[truncated, 5000 chars total]"),
2871            "Non-protected tool result should be truncated"
2872        );
2873    }
2874
2875    #[test]
2876    fn test_hierarchical_memory_cross_tier_boundary_protection() {
2877        // The activate_skill tool-call is in cold tier, but its tool-result
2878        // lands in warm tier. The result must still be protected from masking.
2879        let mut messages = Vec::new();
2880
2881        // Cold tier: skill call + filler to push result into warm tier
2882        messages.push(make_assistant_with_tool_call("skill1", "activate_skill"));
2883        for i in 0..9 {
2884            let id = format!("cold_{i}");
2885            messages.push(make_assistant_with_tool_call(&id, "read_file"));
2886            messages.push(make_tool_result(&id, &format!("cold content {i}")));
2887        }
2888
2889        // Warm tier starts here — skill result is first warm message
2890        messages.push(make_tool_result(
2891            "skill1",
2892            "Cross-tier skill instructions that must survive",
2893        ));
2894        for i in 0..2 {
2895            let id = format!("warm_{i}");
2896            messages.push(make_assistant_with_tool_call(&id, "bash"));
2897            messages.push(make_tool_result(&id, &format!("warm output {i}")));
2898        }
2899
2900        // Hot tier
2901        messages.push(make_user_msg("continue"));
2902        messages.push(make_assistant_msg("ok"));
2903
2904        let config = HierarchicalMemoryConfig {
2905            hot_messages: 2,
2906            warm_messages: 5, // skill result + 2 bash pairs
2907        };
2908        let masking_config = ObservationMaskingConfig {
2909            keep_recent_tool_outputs: 0,
2910            summary_format: MaskingSummaryFormat::OneLine,
2911        };
2912
2913        let result = apply_hierarchical_memory(&messages, &config, &masking_config, None);
2914
2915        let has_skill_instructions = result.iter().any(|m| {
2916            extract_text(&m.content).contains("Cross-tier skill instructions that must survive")
2917        });
2918        assert!(
2919            has_skill_instructions,
2920            "Skill result in warm tier with call in cold tier must be protected"
2921        );
2922    }
2923}