Skip to main content

zeph_context/
manager.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Context lifecycle state machine for the Zeph agent.
5//!
6//! [`ContextManager`] tracks per-session compaction state and token budgets.
7//! It decides when soft (pruning) or hard (LLM summarization) compaction should fire,
8//! and builds the memory router used for query-aware store selection.
9//!
10//! [`CompactionState`] is the core state machine — see its doc comment for the
11//! full transition map.
12
13use std::sync::Arc;
14
15use zeph_config::{CompressionConfig, StoreRoutingConfig};
16
17use crate::budget::ContextBudget;
18
19/// Lifecycle state of the compaction subsystem within a single session.
20///
21/// Replaces four independent boolean/u8 fields with an explicit state machine that makes
22/// invalid states unrepresentable (e.g., warned-without-exhausted).
23///
24/// # Transition map
25///
26/// ```text
27/// Ready
28///   → CompactedThisTurn { cooldown } when hard compaction succeeds (pruning or LLM)
29///   → CompactedThisTurn { cooldown: 0 } when focus truncation, eviction, or proactive
30///     compression fires (these callers do not want post-compaction cooldown)
31///   → Exhausted { warned: false } when compaction is counterproductive (too few messages,
32///     zero net freed tokens, or still above hard threshold after LLM compaction)
33///
34/// CompactedThisTurn { cooldown }
35///   → Cooling { turns_remaining: cooldown } when cooldown > 0  (via advance_turn)
36///   → Ready                                 when cooldown == 0 (via advance_turn)
37///
38/// Cooling { turns_remaining }
39///   → Cooling { turns_remaining - 1 } decremented inside maybe_compact each turn
40///   → Ready                           when turns_remaining reaches 0
41///   NOTE: Exhausted is NOT reachable from Cooling — all exhaustion-setting sites in
42///   summarization.rs are guarded by an early-return when in_cooldown is true.
43///
44/// Exhausted { warned: false }
45///   → Exhausted { warned: true } after the user warning is sent (one-shot)
46///
47/// Exhausted { warned: true }  (terminal — no further transitions)
48/// ```
49///
50/// `turns_since_last_hard_compaction` is a **metric counter**, not part of this state machine,
51/// and remains a separate field on `ContextManager`.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum CompactionState {
54    /// Normal state — compaction may fire if context exceeds thresholds.
55    Ready,
56    /// Hard compaction (or focus truncation / eviction / proactive compression) ran this turn.
57    /// No further compaction until `advance_turn()` is called at the next turn boundary.
58    /// `cooldown` carries the number of cooling turns to enforce after this turn ends.
59    CompactedThisTurn {
60        /// Cooling turns to enforce after this turn ends.
61        cooldown: u8,
62    },
63    /// Cooling down after a recent hard compaction. Hard tier is skipped; soft is still allowed.
64    /// Counter decrements inside `maybe_compact` each turn until it reaches 0.
65    Cooling {
66        /// Remaining cooling turns before returning to `Ready`.
67        turns_remaining: u8,
68    },
69    /// Compaction cannot reduce context further. No more attempts will be made.
70    /// `warned` tracks whether the one-shot user warning has been sent.
71    Exhausted {
72        /// Whether the user has already been notified of context exhaustion.
73        warned: bool,
74    },
75}
76
77impl CompactionState {
78    /// Whether hard compaction (or a compaction-equivalent operation) already ran this turn.
79    ///
80    /// When `true`, `maybe_compact`, `maybe_proactive_compress`, and
81    /// `maybe_soft_compact_mid_iteration` all skip execution (CRIT-03).
82    #[must_use]
83    pub fn is_compacted_this_turn(self) -> bool {
84        matches!(self, Self::CompactedThisTurn { .. })
85    }
86
87    /// Whether compaction is permanently disabled for this session.
88    #[must_use]
89    pub fn is_exhausted(self) -> bool {
90        matches!(self, Self::Exhausted { .. })
91    }
92
93    /// Remaining cooldown turns (0 when not in `Cooling` state).
94    #[must_use]
95    pub fn cooldown_remaining(self) -> u8 {
96        match self {
97            Self::Cooling { turns_remaining } => turns_remaining,
98            _ => 0,
99        }
100    }
101
102    /// Transition to the next-turn state at the start of each user turn.
103    ///
104    /// **Must be called exactly once per turn, before any compaction, eviction, or
105    /// focus truncation can run.** This guarantees that `is_compacted_this_turn()`
106    /// returns `false` when the sidequest check executes — preserving the invariant
107    /// that the sidequest only sees same-turn compaction set by eviction which runs
108    /// *after* this call.
109    ///
110    /// Transitions:
111    /// - `CompactedThisTurn { cooldown: 0 }` → `Ready`
112    /// - `CompactedThisTurn { cooldown: n }` → `Cooling { turns_remaining: n }`
113    /// - All other states are returned unchanged.
114    #[must_use]
115    pub fn advance_turn(self) -> Self {
116        match self {
117            Self::CompactedThisTurn { cooldown } if cooldown > 0 => Self::Cooling {
118                turns_remaining: cooldown,
119            },
120            Self::CompactedThisTurn { .. } => Self::Ready,
121            other => other,
122        }
123    }
124}
125
126/// Indicates which compaction tier applies for the current context size.
127#[derive(Debug, Clone, Copy, PartialEq, Eq)]
128pub enum CompactionTier {
129    /// Context is within budget — no compaction needed.
130    None,
131    /// Soft tier: prune tool outputs + apply deferred summaries. No LLM call.
132    Soft,
133    /// Hard tier: full LLM-based summarization.
134    Hard,
135}
136
137/// Per-session context lifecycle manager.
138///
139/// Holds the token budget, compaction lifecycle state, and routing configuration.
140/// Callers in `zeph-core` drive the state machine via `advance_turn`, `compaction_tier`,
141/// and related accessors; the assembler reads the budget via `build_router` and field access.
142pub struct ContextManager {
143    /// Token budget for this session. `None` until configured via `apply_budget_config`.
144    pub budget: Option<ContextBudget>,
145    /// Soft compaction threshold (default 0.70): prune tool outputs + apply deferred summaries.
146    pub soft_compaction_threshold: f32,
147    /// Hard compaction threshold (default 0.90): full LLM-based summarization.
148    pub hard_compaction_threshold: f32,
149    /// Number of recent messages preserved during hard compaction.
150    pub compaction_preserve_tail: usize,
151    /// Token count protected from pruning during soft compaction.
152    pub prune_protect_tokens: usize,
153    /// Compression configuration for proactive compression.
154    pub compression: CompressionConfig,
155    /// Routing configuration for query-aware memory routing.
156    pub routing: StoreRoutingConfig,
157    /// Resolved provider for LLM/hybrid routing. `None` when strategy is `Heuristic`
158    /// or when the named provider could not be resolved from the pool.
159    pub store_routing_provider: Option<Arc<zeph_llm::any::AnyProvider>>,
160    /// Compaction lifecycle state. Replaces four independent boolean/u8 fields to make
161    /// invalid states unrepresentable. See [`CompactionState`] for the full transition map.
162    pub compaction: CompactionState,
163    /// Number of cooling turns to enforce after a successful hard compaction.
164    pub compaction_cooldown_turns: u8,
165    /// Counts user-message turns since the last hard compaction event.
166    /// `None` = no hard compaction has occurred yet in this session.
167    /// `Some(n)` = n turns have elapsed since the last hard compaction.
168    pub turns_since_last_hard_compaction: Option<u64>,
169}
170
171impl ContextManager {
172    /// Create a new `ContextManager` with default thresholds and no budget.
173    #[must_use]
174    pub fn new() -> Self {
175        Self {
176            budget: None,
177            soft_compaction_threshold: 0.60,
178            hard_compaction_threshold: 0.90,
179            compaction_preserve_tail: 6,
180            prune_protect_tokens: 40_000,
181            compression: CompressionConfig::default(),
182            routing: StoreRoutingConfig::default(),
183            store_routing_provider: None,
184            compaction: CompactionState::Ready,
185            compaction_cooldown_turns: 2,
186            turns_since_last_hard_compaction: None,
187        }
188    }
189
190    /// Apply budget and compaction thresholds from config.
191    ///
192    /// Must be called once after config is resolved. Safe to call again when config reloads.
193    #[allow(clippy::too_many_arguments)] // function with many required inputs; a *Params struct would be more verbose without simplifying the call site
194    pub fn apply_budget_config(
195        &mut self,
196        budget_tokens: usize,
197        reserve_ratio: f32,
198        hard_compaction_threshold: f32,
199        compaction_preserve_tail: usize,
200        prune_protect_tokens: usize,
201        soft_compaction_threshold: f32,
202        compaction_cooldown_turns: u8,
203    ) {
204        if budget_tokens == 0 {
205            tracing::warn!("context budget is 0 — agent will have no token tracking");
206        }
207        if budget_tokens > 0 {
208            self.budget = Some(ContextBudget::new(budget_tokens, reserve_ratio));
209        }
210        self.hard_compaction_threshold = hard_compaction_threshold;
211        self.compaction_preserve_tail = compaction_preserve_tail;
212        self.prune_protect_tokens = prune_protect_tokens;
213        self.soft_compaction_threshold = soft_compaction_threshold;
214        self.compaction_cooldown_turns = compaction_cooldown_turns;
215    }
216
217    /// Reset compaction state for a new conversation.
218    ///
219    /// Clears cooldown, exhaustion, and turn counters so the new conversation starts
220    /// with a clean compaction slate.
221    pub fn reset_compaction(&mut self) {
222        self.compaction = CompactionState::Ready;
223        self.turns_since_last_hard_compaction = None;
224    }
225
226    /// Determine which compaction tier applies for the given token count.
227    ///
228    /// - `Hard` when `cached_tokens > budget * hard_compaction_threshold`
229    /// - `Soft` when `cached_tokens > budget * soft_compaction_threshold`
230    /// - `None` otherwise (or when no budget is set)
231    #[allow(
232        clippy::cast_precision_loss,
233        clippy::cast_possible_truncation,
234        clippy::cast_sign_loss
235    )]
236    pub fn compaction_tier(&self, cached_tokens: u64) -> CompactionTier {
237        let Some(ref budget) = self.budget else {
238            return CompactionTier::None;
239        };
240        let used = usize::try_from(cached_tokens).unwrap_or(usize::MAX);
241        let max = budget.max_tokens();
242        let hard = (max as f32 * self.hard_compaction_threshold) as usize;
243        if used > hard {
244            tracing::debug!(
245                cached_tokens,
246                hard_threshold = hard,
247                "context budget check: Hard tier"
248            );
249            return CompactionTier::Hard;
250        }
251        let soft = (max as f32 * self.soft_compaction_threshold) as usize;
252        if used > soft {
253            tracing::debug!(
254                cached_tokens,
255                soft_threshold = soft,
256                "context budget check: Soft tier"
257            );
258            return CompactionTier::Soft;
259        }
260        tracing::debug!(
261            cached_tokens,
262            soft_threshold = soft,
263            "context budget check: None"
264        );
265        CompactionTier::None
266    }
267
268    /// Check if proactive compression should fire for the current turn.
269    ///
270    /// Returns `Some((threshold_tokens, max_summary_tokens))` when proactive compression
271    /// should be triggered, `None` otherwise.
272    ///
273    /// For `CompressionStrategy::Focus`, the threshold is the soft-compaction fraction
274    /// of the budget (same gate used by mid-iteration soft compaction). The
275    /// `max_summary_tokens` element is unused on the Focus path — the auto-consolidation
276    /// function uses `FocusConfig.max_knowledge_tokens / 2` instead.
277    ///
278    /// Will return `None` if compaction already happened this turn (CRIT-03 fix).
279    #[must_use]
280    pub fn should_proactively_compress(&self, current_tokens: u64) -> Option<(usize, usize)> {
281        use zeph_config::CompressionStrategy;
282        if self.compaction.is_compacted_this_turn() {
283            return None;
284        }
285        match &self.compression.strategy {
286            CompressionStrategy::Proactive {
287                threshold_tokens,
288                max_summary_tokens,
289            } if usize::try_from(current_tokens).unwrap_or(usize::MAX) > *threshold_tokens => {
290                Some((*threshold_tokens, *max_summary_tokens))
291            }
292            CompressionStrategy::Focus => {
293                // Focus fires at the soft-compaction threshold (same as tier machinery).
294                let budget = self.budget.as_ref()?.max_tokens();
295                #[allow(
296                    clippy::cast_precision_loss,
297                    clippy::cast_sign_loss,
298                    clippy::cast_possible_truncation
299                )]
300                let threshold = (budget as f32 * self.soft_compaction_threshold) as usize;
301                if usize::try_from(current_tokens).unwrap_or(usize::MAX) > threshold {
302                    // NOTE: the second tuple element (max_summary_tokens) is a placeholder
303                    // on the Focus path — the auto-consolidation function ignores it and uses
304                    // FocusConfig.max_knowledge_tokens / 2 instead.
305                    Some((threshold, threshold / 4))
306                } else {
307                    None
308                }
309            }
310            _ => None,
311        }
312    }
313}
314
315impl Default for ContextManager {
316    fn default() -> Self {
317        Self::new()
318    }
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324    use zeph_config::CompressionStrategy;
325
326    #[test]
327    fn new_defaults() {
328        let cm = ContextManager::new();
329        assert!(cm.budget.is_none());
330        assert!((cm.soft_compaction_threshold - 0.60).abs() < f32::EPSILON);
331        assert!((cm.hard_compaction_threshold - 0.90).abs() < f32::EPSILON);
332        assert_eq!(cm.compaction_preserve_tail, 6);
333        assert_eq!(cm.prune_protect_tokens, 40_000);
334        assert_eq!(cm.compaction, CompactionState::Ready);
335    }
336
337    #[test]
338    fn compaction_tier_no_budget() {
339        let cm = ContextManager::new();
340        assert_eq!(cm.compaction_tier(1_000_000), CompactionTier::None);
341    }
342
343    #[test]
344    fn compaction_tier_below_soft() {
345        let mut cm = ContextManager::new();
346        cm.budget = Some(ContextBudget::new(100_000, 0.1));
347        assert_eq!(cm.compaction_tier(50_000), CompactionTier::None);
348    }
349
350    #[test]
351    fn compaction_tier_between_soft_and_hard() {
352        let mut cm = ContextManager::new();
353        cm.budget = Some(ContextBudget::new(100_000, 0.1));
354        assert_eq!(cm.compaction_tier(75_000), CompactionTier::Soft);
355    }
356
357    #[test]
358    fn compaction_tier_above_hard() {
359        let mut cm = ContextManager::new();
360        cm.budget = Some(ContextBudget::new(100_000, 0.1));
361        assert_eq!(cm.compaction_tier(95_000), CompactionTier::Hard);
362    }
363
364    #[test]
365    fn proactive_compress_above_threshold_returns_params() {
366        let mut cm = ContextManager::new();
367        cm.compression.strategy = CompressionStrategy::Proactive {
368            threshold_tokens: 80_000,
369            max_summary_tokens: 4_000,
370        };
371        let result = cm.should_proactively_compress(90_000);
372        assert_eq!(result, Some((80_000, 4_000)));
373    }
374
375    #[test]
376    fn proactive_compress_blocked_if_compacted_this_turn() {
377        let mut cm = ContextManager::new();
378        cm.compression.strategy = CompressionStrategy::Proactive {
379            threshold_tokens: 80_000,
380            max_summary_tokens: 4_000,
381        };
382        cm.compaction = CompactionState::CompactedThisTurn { cooldown: 0 };
383        assert!(cm.should_proactively_compress(100_000).is_none());
384    }
385
386    #[test]
387    fn compaction_state_ready_is_not_compacted_this_turn() {
388        assert!(!CompactionState::Ready.is_compacted_this_turn());
389    }
390
391    #[test]
392    fn compaction_state_compacted_this_turn_flag() {
393        assert!(CompactionState::CompactedThisTurn { cooldown: 2 }.is_compacted_this_turn());
394        assert!(CompactionState::CompactedThisTurn { cooldown: 0 }.is_compacted_this_turn());
395    }
396
397    #[test]
398    fn compaction_state_cooling_is_not_compacted_this_turn() {
399        assert!(!CompactionState::Cooling { turns_remaining: 1 }.is_compacted_this_turn());
400    }
401
402    #[test]
403    fn advance_turn_compacted_with_cooldown_enters_cooling() {
404        let state = CompactionState::CompactedThisTurn { cooldown: 3 };
405        assert_eq!(
406            state.advance_turn(),
407            CompactionState::Cooling { turns_remaining: 3 }
408        );
409    }
410
411    #[test]
412    fn advance_turn_compacted_zero_cooldown_returns_ready() {
413        let state = CompactionState::CompactedThisTurn { cooldown: 0 };
414        assert_eq!(state.advance_turn(), CompactionState::Ready);
415    }
416
417    #[test]
418    fn should_proactively_compress_focus_fires_above_soft_threshold() {
419        let mut cm = ContextManager::new();
420        cm.budget = Some(ContextBudget::new(100_000, 0.1));
421        cm.compression.strategy = CompressionStrategy::Focus;
422        // Default soft threshold is 0.60 → 60_000 tokens.
423        // 75_000 > 60_000 → should fire.
424        let result = cm.should_proactively_compress(75_000);
425        assert!(result.is_some(), "Focus must fire above soft threshold");
426        let (threshold, _) = result.unwrap();
427        assert_eq!(threshold, 60_000);
428    }
429
430    #[test]
431    fn should_proactively_compress_focus_returns_none_below_threshold() {
432        let mut cm = ContextManager::new();
433        cm.budget = Some(ContextBudget::new(100_000, 0.1));
434        cm.compression.strategy = CompressionStrategy::Focus;
435        // 50_000 < 60_000 → should not fire.
436        assert!(cm.should_proactively_compress(50_000).is_none());
437    }
438
439    #[test]
440    fn should_proactively_compress_focus_returns_none_without_budget() {
441        let mut cm = ContextManager::new();
442        cm.compression.strategy = CompressionStrategy::Focus;
443        // No budget set → cannot compute threshold → None.
444        assert!(cm.should_proactively_compress(999_999).is_none());
445    }
446}