zeph-context 0.21.3

// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
// SPDX-License-Identifier: MIT OR Apache-2.0

//! Context lifecycle state machine for the Zeph agent.
//!
//! [`ContextManager`] tracks per-session compaction state and token budgets.
//! It decides when soft (pruning) or hard (LLM summarization) compaction should fire,
//! and builds the memory router used for query-aware store selection.
//!
//! [`CompactionState`] is the core state machine — see its doc comment for the
//! full transition map.

use std::sync::Arc;

use zeph_config::{CompressionConfig, StoreRoutingConfig};

use crate::budget::ContextBudget;

/// Lifecycle state of the compaction subsystem within a single session.
///
/// Replaces four independent boolean/u8 fields with an explicit state machine that makes
/// invalid states unrepresentable (e.g., warned-without-exhausted).
///
/// # Transition map
///
/// ```text
/// Ready
///   → CompactedThisTurn { cooldown } when hard compaction succeeds (pruning or LLM)
///   → CompactedThisTurn { cooldown: 0 } when focus truncation, eviction, or proactive
///     compression fires (these callers do not want post-compaction cooldown)
///   → Exhausted { warned: false } when compaction is counterproductive (too few messages,
///     zero net freed tokens, or still above hard threshold after LLM compaction)
///
/// CompactedThisTurn { cooldown }
///   → Cooling { turns_remaining: cooldown } when cooldown > 0  (via advance_turn)
///   → Ready                                 when cooldown == 0 (via advance_turn)
///
/// Cooling { turns_remaining }
///   → Cooling { turns_remaining - 1 } decremented inside maybe_compact each turn
///   → Ready                           when turns_remaining reaches 0
///   NOTE: Exhausted is NOT reachable from Cooling — all exhaustion-setting sites in
///   summarization.rs are guarded by an early-return when in_cooldown is true.
///
/// Exhausted { warned: false }
///   → Exhausted { warned: true } after the user warning is sent (one-shot)
///
/// Exhausted { warned: true }  (terminal — no further transitions)
/// ```
///
/// `turns_since_last_hard_compaction` is a **metric counter**, not part of this state machine,
/// and remains a separate field on `ContextManager`.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub enum CompactionState {
    /// Normal state — compaction may fire if context exceeds thresholds.
    Ready,
    /// Hard compaction (or focus truncation / eviction / proactive compression) ran this turn.
    /// No further compaction until `advance_turn()` is called at the next turn boundary.
    /// `cooldown` carries the number of cooling turns to enforce after this turn ends.
    CompactedThisTurn {
        /// Cooling turns to enforce after this turn ends.
        cooldown: u8,
    },
    /// Cooling down after a recent hard compaction. Hard tier is skipped; soft is still allowed.
    /// Counter decrements inside `maybe_compact` each turn until it reaches 0.
    Cooling {
        /// Remaining cooling turns before returning to `Ready`.
        turns_remaining: u8,
    },
    /// Compaction cannot reduce context further. No more attempts will be made.
    /// `warned` tracks whether the one-shot user warning has been sent.
    Exhausted {
        /// Whether the user has already been notified of context exhaustion.
        warned: bool,
    },
}

impl CompactionState {
    /// Whether hard compaction (or a compaction-equivalent operation) already ran this turn.
    ///
    /// When `true`, `maybe_compact`, `maybe_proactive_compress`, and
    /// `maybe_soft_compact_mid_iteration` all skip execution (CRIT-03).
    #[must_use]
    pub fn is_compacted_this_turn(self) -> bool {
        matches!(self, Self::CompactedThisTurn { .. })
    }

    /// Whether compaction is permanently disabled for this session.
    #[must_use]
    pub fn is_exhausted(self) -> bool {
        matches!(self, Self::Exhausted { .. })
    }

    /// Remaining cooldown turns (0 when not in `Cooling` state).
    #[must_use]
    pub fn cooldown_remaining(self) -> u8 {
        match self {
            Self::Cooling { turns_remaining } => turns_remaining,
            _ => 0,
        }
    }

    /// Transition to the next-turn state at the start of each user turn.
    ///
    /// **Must be called exactly once per turn, before any compaction, eviction, or
    /// focus truncation can run.** This guarantees that `is_compacted_this_turn()`
    /// returns `false` when the sidequest check executes — preserving the invariant
    /// that the sidequest only sees same-turn compaction set by eviction which runs
    /// *after* this call.
    ///
    /// Transitions:
    /// - `CompactedThisTurn { cooldown: 0 }` → `Ready`
    /// - `CompactedThisTurn { cooldown: n }` → `Cooling { turns_remaining: n }`
    /// - All other states are returned unchanged.
    #[must_use]
    pub fn advance_turn(self) -> Self {
        match self {
            Self::CompactedThisTurn { cooldown } if cooldown > 0 => Self::Cooling {
                turns_remaining: cooldown,
            },
            Self::CompactedThisTurn { .. } => Self::Ready,
            other => other,
        }
    }
}

/// Indicates which compaction tier applies for the current context size.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompactionTier {
    /// Context is within budget — no compaction needed.
    None,
    /// Soft tier: prune tool outputs + apply deferred summaries. No LLM call.
    Soft,
    /// Hard tier: full LLM-based summarization.
    Hard,
}

/// Per-session context lifecycle manager.
///
/// Holds the token budget, compaction lifecycle state, and routing configuration.
/// Callers in `zeph-core` drive the state machine via `advance_turn`, `compaction_tier`,
/// and related accessors; the assembler reads the budget via `build_router` and field access.
pub struct ContextManager {
    /// Token budget for this session. `None` until configured via `apply_budget_config`.
    pub budget: Option<ContextBudget>,
    /// Soft compaction threshold (default 0.70): prune tool outputs + apply deferred summaries.
    pub soft_compaction_threshold: f32,
    /// Hard compaction threshold (default 0.90): full LLM-based summarization.
    pub hard_compaction_threshold: f32,
    /// Number of recent messages preserved during hard compaction.
    pub compaction_preserve_tail: usize,
    /// Token count protected from pruning during soft compaction.
    pub prune_protect_tokens: usize,
    /// Compression configuration for proactive compression.
    pub compression: CompressionConfig,
    /// Routing configuration for query-aware memory routing.
    pub routing: StoreRoutingConfig,
    /// Resolved provider for LLM/hybrid routing. `None` when strategy is `Heuristic`
    /// or when the named provider could not be resolved from the pool.
    pub store_routing_provider: Option<Arc<zeph_llm::any::AnyProvider>>,
    /// Compaction lifecycle state. Replaces four independent boolean/u8 fields to make
    /// invalid states unrepresentable. See [`CompactionState`] for the full transition map.
    pub(crate) compaction: CompactionState,
    /// Number of cooling turns to enforce after a successful hard compaction.
    pub(crate) compaction_cooldown_turns: u8,
    /// Counts user-message turns since the last hard compaction event.
    /// `None` = no hard compaction has occurred yet in this session.
    /// `Some(n)` = n turns have elapsed since the last hard compaction.
    pub(crate) turns_since_last_hard_compaction: Option<u64>,
    /// Whether a proactive fidelity regrade has already fired this turn (INV-06).
    ///
    /// Reset to `false` by `advance_turn()` at each turn boundary.
    pub(crate) regraded_this_turn: bool,
}

impl ContextManager {
    /// Create a new `ContextManager` with default thresholds and no budget.
    #[must_use]
    pub fn new() -> Self {
        Self {
            budget: None,
            soft_compaction_threshold: 0.60,
            hard_compaction_threshold: 0.90,
            compaction_preserve_tail: 6,
            prune_protect_tokens: 40_000,
            compression: CompressionConfig::default(),
            routing: StoreRoutingConfig::default(),
            store_routing_provider: None,
            compaction: CompactionState::Ready,
            compaction_cooldown_turns: 2,
            turns_since_last_hard_compaction: None,
            regraded_this_turn: false,
        }
    }

    /// Apply budget and compaction thresholds from config.
    ///
    /// Must be called once after config is resolved. Safe to call again when config reloads.
    #[allow(clippy::too_many_arguments)] // function with many required inputs; a *Params struct would be more verbose without simplifying the call site
    pub fn apply_budget_config(
        &mut self,
        budget_tokens: usize,
        reserve_ratio: f32,
        hard_compaction_threshold: f32,
        compaction_preserve_tail: usize,
        prune_protect_tokens: usize,
        soft_compaction_threshold: f32,
        compaction_cooldown_turns: u8,
    ) {
        if budget_tokens == 0 {
            tracing::warn!("context budget is 0 — agent will have no token tracking");
        }
        if budget_tokens > 0 {
            self.budget = Some(ContextBudget::new(budget_tokens, reserve_ratio));
        }
        self.hard_compaction_threshold = hard_compaction_threshold;
        self.compaction_preserve_tail = compaction_preserve_tail;
        self.prune_protect_tokens = prune_protect_tokens;
        self.soft_compaction_threshold = soft_compaction_threshold;
        self.compaction_cooldown_turns = compaction_cooldown_turns;
    }

    /// Reset compaction state for a new conversation.
    ///
    /// Clears cooldown, exhaustion, and turn counters so the new conversation starts
    /// with a clean compaction slate.
    pub fn reset_compaction(&mut self) {
        self.compaction = CompactionState::Ready;
        self.turns_since_last_hard_compaction = None;
    }

    /// Determine which compaction tier applies for the given token count.
    ///
    /// Compares the current cached token count against the configured thresholds to decide
    /// whether hard compaction, soft compaction, or no compaction should be triggered.
    /// This method is typically called by the context assembler during a turn to proactively
    /// compress older messages if token usage grows too large.
    ///
    /// - `Hard` when `cached_tokens > budget * hard_compaction_threshold` — triggers
    ///   aggressive summarization and compaction
    /// - `Soft` when `cached_tokens > budget * soft_compaction_threshold` — triggers
    ///   lighter compaction without full summarization
    /// - `None` otherwise (or when no budget is set) — no compaction needed
    ///
    /// # Parameters
    ///
    /// * `cached_tokens` — current token count in the cached context (e.g., message history)
    ///
    /// # Returns
    ///
    /// The compaction tier that should be applied (`Hard`, `Soft`, or `None`).
    ///
    /// # Examples
    ///
    /// ```no_run
    /// use zeph_context::manager::ContextManager;
    /// use zeph_context::budget::ContextBudget;
    ///
    /// let budget = ContextBudget::new(128_000, 0.15);
    /// let mut manager = ContextManager::new();
    /// manager.soft_compaction_threshold = 0.6;
    /// manager.hard_compaction_threshold = 0.8;
    /// manager.budget = Some(budget);
    ///
    /// // Check tier for 96k cached tokens (75% of 128k)
    /// let tier = manager.compaction_tier(96_000);
    /// // Returns Soft (75% is between 60% and 80%)
    /// ```
    #[allow(
        clippy::cast_precision_loss,
        clippy::cast_possible_truncation,
        clippy::cast_sign_loss
    )]
    pub fn compaction_tier(&self, cached_tokens: u64) -> CompactionTier {
        let Some(ref budget) = self.budget else {
            return CompactionTier::None;
        };
        let used = usize::try_from(cached_tokens).unwrap_or(usize::MAX);
        let max = budget.max_tokens();
        let hard = (max as f32 * self.hard_compaction_threshold) as usize;
        if used > hard {
            tracing::debug!(
                cached_tokens,
                hard_threshold = hard,
                "context budget check: Hard tier"
            );
            return CompactionTier::Hard;
        }
        let soft = (max as f32 * self.soft_compaction_threshold) as usize;
        if used > soft {
            tracing::debug!(
                cached_tokens,
                soft_threshold = soft,
                "context budget check: Soft tier"
            );
            return CompactionTier::Soft;
        }
        tracing::debug!(
            cached_tokens,
            soft_threshold = soft,
            "context budget check: None"
        );
        CompactionTier::None
    }

    /// Check if proactive compression should fire for the current turn.
    ///
    /// Returns `Some((threshold_tokens, max_summary_tokens))` when proactive compression
    /// should be triggered, `None` otherwise.
    ///
    /// For `CompressionStrategy::Focus`, the threshold is the soft-compaction fraction
    /// of the budget (same gate used by mid-iteration soft compaction). The
    /// `max_summary_tokens` element is unused on the Focus path — the auto-consolidation
    /// function uses `FocusConfig.max_knowledge_tokens / 2` instead.
    ///
    /// Returns the current compaction lifecycle state.
    #[must_use]
    pub fn compaction_state(&self) -> CompactionState {
        self.compaction
    }

    /// Returns a mutable reference to the compaction lifecycle state.
    pub fn compaction_state_mut(&mut self) -> &mut CompactionState {
        &mut self.compaction
    }

    /// Replaces the compaction lifecycle state.
    pub fn set_compaction_state(&mut self, state: CompactionState) {
        self.compaction = state;
    }

    /// Returns the number of cooling turns enforced after a hard compaction.
    #[must_use]
    pub fn compaction_cooldown_turns(&self) -> u8 {
        self.compaction_cooldown_turns
    }

    /// Sets the number of cooling turns enforced after a hard compaction.
    pub fn set_compaction_cooldown_turns(&mut self, turns: u8) {
        self.compaction_cooldown_turns = turns;
    }

    /// Returns the number of user-message turns since the last hard compaction, if any.
    #[must_use]
    pub fn turns_since_last_hard_compaction(&self) -> Option<u64> {
        self.turns_since_last_hard_compaction
    }

    /// Returns a mutable reference to the turns-since-last-hard-compaction counter.
    pub fn turns_since_last_hard_compaction_mut(&mut self) -> &mut Option<u64> {
        &mut self.turns_since_last_hard_compaction
    }

    /// Sets the turns-since-last-hard-compaction counter.
    pub fn set_turns_since_last_hard_compaction(&mut self, value: Option<u64>) {
        self.turns_since_last_hard_compaction = value;
    }

    /// Reset the per-turn regrade flag at the start of a new user turn.
    ///
    /// Must be called alongside `CompactionState::advance_turn()` at each turn boundary.
    ///
    /// # Examples
    ///
    /// ```
    /// use zeph_context::manager::ContextManager;
    ///
    /// let mut cm = ContextManager::new();
    /// cm.set_regraded_this_turn(true);
    /// cm.advance_turn();
    /// // regraded_this_turn is reset to false — proactive regrade is available again
    /// assert!(!cm.should_proactively_regrade(0, 0.6, false));
    /// ```
    pub fn advance_turn(&mut self) {
        self.regraded_this_turn = false;
        self.compaction = self.compaction.advance_turn();
    }

    /// Mark that a proactive fidelity regrade has fired this turn (INV-06).
    ///
    /// Called by the caller after `should_proactively_regrade` returns `true` and the scorer
    /// has been applied. Prevents a second regrade in the same turn.
    ///
    /// # Examples
    ///
    /// ```
    /// use zeph_context::manager::ContextManager;
    /// use zeph_context::budget::ContextBudget;
    ///
    /// let mut cm = ContextManager::new();
    /// cm.set_regraded_this_turn(true);
    /// assert!(!cm.should_proactively_regrade(0, 0.6, false)); // guarded by regraded flag
    /// cm.advance_turn();
    /// assert!(!cm.should_proactively_regrade(0, 0.6, false)); // resets after advance_turn
    /// ```
    pub fn set_regraded_this_turn(&mut self, value: bool) {
        self.regraded_this_turn = value;
    }

    /// Whether a proactive fidelity regrade should fire for the current context state.
    ///
    /// Returns `true` only when all of the following hold:
    /// 1. No regrade has fired this turn yet (`regraded_this_turn == false`).
    /// 2. The compaction subsystem is not exhausted.
    /// 3. If server compaction is active, budget usage is below 95%.
    /// 4. Budget usage exceeds `regrade_threshold`.
    ///
    /// # Parameters
    ///
    /// - `cached_tokens` — current token count in the message window.
    /// - `regrade_threshold` — fraction of max tokens at which regrade triggers (e.g. `0.6`).
    /// - `server_compaction_active` — whether Claude server-side compaction is in use.
    ///
    /// # Examples
    ///
    /// ```
    /// use zeph_context::manager::ContextManager;
    /// use zeph_context::budget::ContextBudget;
    ///
    /// let mut cm = ContextManager::new();
    /// cm.budget = Some(ContextBudget::new(100_000, 0.1));
    /// // At 70% budget with threshold 0.6 → should regrade.
    /// assert!(cm.should_proactively_regrade(70_000, 0.6, false));
    /// ```
    #[must_use]
    #[allow(clippy::cast_precision_loss)]
    pub fn should_proactively_regrade(
        &self,
        cached_tokens: u64,
        regrade_threshold: f32,
        server_compaction_active: bool,
    ) -> bool {
        if self.regraded_this_turn {
            return false;
        }
        if self.compaction.is_exhausted() {
            return false;
        }
        let Some(ref budget) = self.budget else {
            return false;
        };
        let max = budget.max_tokens() as f64;
        if max <= 0.0 {
            return false;
        }
        let ratio = cached_tokens as f64 / max;
        if server_compaction_active && ratio < 0.95 {
            return false;
        }
        ratio > f64::from(regrade_threshold)
    }

    /// Will return `None` if compaction already happened this turn (CRIT-03 fix).
    #[must_use]
    pub fn should_proactively_compress(&self, current_tokens: u64) -> Option<(usize, usize)> {
        use zeph_config::CompressionStrategy;
        if self.compaction.is_compacted_this_turn() {
            return None;
        }
        match &self.compression.strategy {
            CompressionStrategy::Proactive {
                threshold_tokens,
                max_summary_tokens,
            } if usize::try_from(current_tokens).unwrap_or(usize::MAX) > *threshold_tokens => {
                Some((*threshold_tokens, *max_summary_tokens))
            }
            CompressionStrategy::Focus => {
                // Focus fires at the soft-compaction threshold (same as tier machinery).
                let budget = self.budget.as_ref()?.max_tokens();
                #[allow(
                    clippy::cast_precision_loss,
                    clippy::cast_sign_loss,
                    clippy::cast_possible_truncation
                )]
                let threshold = (budget as f32 * self.soft_compaction_threshold) as usize;
                if usize::try_from(current_tokens).unwrap_or(usize::MAX) > threshold {
                    // NOTE: the second tuple element (max_summary_tokens) is a placeholder
                    // on the Focus path — the auto-consolidation function ignores it and uses
                    // FocusConfig.max_knowledge_tokens / 2 instead.
                    Some((threshold, threshold / 4))
                } else {
                    None
                }
            }
            _ => None,
        }
    }
}

impl Default for ContextManager {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use zeph_config::CompressionStrategy;

    #[test]
    fn new_defaults() {
        let cm = ContextManager::new();
        assert!(cm.budget.is_none());
        assert!((cm.soft_compaction_threshold - 0.60).abs() < f32::EPSILON);
        assert!((cm.hard_compaction_threshold - 0.90).abs() < f32::EPSILON);
        assert_eq!(cm.compaction_preserve_tail, 6);
        assert_eq!(cm.prune_protect_tokens, 40_000);
        assert_eq!(cm.compaction, CompactionState::Ready);
    }

    #[test]
    fn compaction_tier_no_budget() {
        let cm = ContextManager::new();
        assert_eq!(cm.compaction_tier(1_000_000), CompactionTier::None);
    }

    #[test]
    fn compaction_tier_below_soft() {
        let mut cm = ContextManager::new();
        cm.budget = Some(ContextBudget::new(100_000, 0.1));
        assert_eq!(cm.compaction_tier(50_000), CompactionTier::None);
    }

    #[test]
    fn compaction_tier_between_soft_and_hard() {
        let mut cm = ContextManager::new();
        cm.budget = Some(ContextBudget::new(100_000, 0.1));
        assert_eq!(cm.compaction_tier(75_000), CompactionTier::Soft);
    }

    #[test]
    fn compaction_tier_above_hard() {
        let mut cm = ContextManager::new();
        cm.budget = Some(ContextBudget::new(100_000, 0.1));
        assert_eq!(cm.compaction_tier(95_000), CompactionTier::Hard);
    }

    #[test]
    fn proactive_compress_above_threshold_returns_params() {
        let mut cm = ContextManager::new();
        cm.compression.strategy = CompressionStrategy::Proactive {
            threshold_tokens: 80_000,
            max_summary_tokens: 4_000,
        };
        let result = cm.should_proactively_compress(90_000);
        assert_eq!(result, Some((80_000, 4_000)));
    }

    #[test]
    fn proactive_compress_blocked_if_compacted_this_turn() {
        let mut cm = ContextManager::new();
        cm.compression.strategy = CompressionStrategy::Proactive {
            threshold_tokens: 80_000,
            max_summary_tokens: 4_000,
        };
        cm.compaction = CompactionState::CompactedThisTurn { cooldown: 0 };
        assert!(cm.should_proactively_compress(100_000).is_none());
    }

    #[test]
    fn compaction_state_ready_is_not_compacted_this_turn() {
        assert!(!CompactionState::Ready.is_compacted_this_turn());
    }

    #[test]
    fn compaction_state_compacted_this_turn_flag() {
        assert!(CompactionState::CompactedThisTurn { cooldown: 2 }.is_compacted_this_turn());
        assert!(CompactionState::CompactedThisTurn { cooldown: 0 }.is_compacted_this_turn());
    }

    #[test]
    fn compaction_state_cooling_is_not_compacted_this_turn() {
        assert!(!CompactionState::Cooling { turns_remaining: 1 }.is_compacted_this_turn());
    }

    #[test]
    fn advance_turn_compacted_with_cooldown_enters_cooling() {
        let state = CompactionState::CompactedThisTurn { cooldown: 3 };
        assert_eq!(
            state.advance_turn(),
            CompactionState::Cooling { turns_remaining: 3 }
        );
    }

    #[test]
    fn advance_turn_compacted_zero_cooldown_returns_ready() {
        let state = CompactionState::CompactedThisTurn { cooldown: 0 };
        assert_eq!(state.advance_turn(), CompactionState::Ready);
    }

    #[test]
    fn should_proactively_compress_focus_fires_above_soft_threshold() {
        let mut cm = ContextManager::new();
        cm.budget = Some(ContextBudget::new(100_000, 0.1));
        cm.compression.strategy = CompressionStrategy::Focus;
        // Default soft threshold is 0.60 → 60_000 tokens.
        // 75_000 > 60_000 → should fire.
        let result = cm.should_proactively_compress(75_000);
        assert!(result.is_some(), "Focus must fire above soft threshold");
        let (threshold, _) = result.unwrap();
        assert_eq!(threshold, 60_000);
    }

    #[test]
    fn should_proactively_compress_focus_returns_none_below_threshold() {
        let mut cm = ContextManager::new();
        cm.budget = Some(ContextBudget::new(100_000, 0.1));
        cm.compression.strategy = CompressionStrategy::Focus;
        // 50_000 < 60_000 → should not fire.
        assert!(cm.should_proactively_compress(50_000).is_none());
    }

    #[test]
    fn should_proactively_compress_focus_returns_none_without_budget() {
        let mut cm = ContextManager::new();
        cm.compression.strategy = CompressionStrategy::Focus;
        // No budget set → cannot compute threshold → None.
        assert!(cm.should_proactively_compress(999_999).is_none());
    }

    // AC-07: regraded_this_turn resets to false after advance_turn().
    #[test]
    fn advance_turn_resets_regraded_this_turn() {
        let mut cm = ContextManager::new();
        cm.regraded_this_turn = true;
        cm.advance_turn();
        assert!(
            !cm.regraded_this_turn,
            "regraded_this_turn must reset after advance_turn"
        );
    }

    // AC-08: should_proactively_regrade returns false if already regraded this turn.
    #[test]
    fn regrade_blocked_if_already_regraded_this_turn() {
        let mut cm = ContextManager::new();
        cm.budget = Some(ContextBudget::new(100_000, 0.1));
        cm.regraded_this_turn = true;
        assert!(
            !cm.should_proactively_regrade(70_000, 0.6, false),
            "must not regrade twice in the same turn"
        );
    }

    #[test]
    fn regrade_fires_above_threshold() {
        let mut cm = ContextManager::new();
        cm.budget = Some(ContextBudget::new(100_000, 0.1));
        assert!(
            cm.should_proactively_regrade(70_000, 0.6, false),
            "must fire when budget ratio > threshold"
        );
    }

    #[test]
    fn regrade_does_not_fire_below_threshold() {
        let mut cm = ContextManager::new();
        cm.budget = Some(ContextBudget::new(100_000, 0.1));
        assert!(
            !cm.should_proactively_regrade(50_000, 0.6, false),
            "must not fire when budget ratio <= threshold"
        );
    }

    #[test]
    fn regrade_blocked_when_exhausted() {
        let mut cm = ContextManager::new();
        cm.budget = Some(ContextBudget::new(100_000, 0.1));
        cm.compaction = CompactionState::Exhausted { warned: false };
        assert!(
            !cm.should_proactively_regrade(80_000, 0.6, false),
            "must not fire when compaction is exhausted"
        );
    }

    #[test]
    fn regrade_blocked_by_server_compaction_at_sub_95() {
        let mut cm = ContextManager::new();
        cm.budget = Some(ContextBudget::new(100_000, 0.1));
        // 80% budget, server_compaction_active=true → ratio < 0.95 → blocked.
        assert!(
            !cm.should_proactively_regrade(80_000, 0.6, true),
            "must not fire with server compaction active below 95%"
        );
    }

    #[test]
    fn regrade_fires_with_server_compaction_at_95() {
        let mut cm = ContextManager::new();
        cm.budget = Some(ContextBudget::new(100_000, 0.1));
        // 96% budget, server_compaction_active=true → ratio >= 0.95 → fires.
        assert!(
            cm.should_proactively_regrade(96_000, 0.6, true),
            "must fire with server compaction active at >= 95%"
        );
    }
}