Skip to main content

zeph_context/
manager.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Context lifecycle state machine for the Zeph agent.
5//!
6//! [`ContextManager`] tracks per-session compaction state and token budgets.
7//! It decides when soft (pruning) or hard (LLM summarization) compaction should fire,
8//! and builds the memory router used for query-aware store selection.
9//!
10//! [`CompactionState`] is the core state machine — see its doc comment for the
11//! full transition map.
12
13use std::sync::Arc;
14
15use zeph_config::{CompressionConfig, StoreRoutingConfig};
16
17use crate::budget::ContextBudget;
18
19/// Lifecycle state of the compaction subsystem within a single session.
20///
21/// Replaces four independent boolean/u8 fields with an explicit state machine that makes
22/// invalid states unrepresentable (e.g., warned-without-exhausted).
23///
24/// # Transition map
25///
26/// ```text
27/// Ready
28///   → CompactedThisTurn { cooldown } when hard compaction succeeds (pruning or LLM)
29///   → CompactedThisTurn { cooldown: 0 } when focus truncation, eviction, or proactive
30///     compression fires (these callers do not want post-compaction cooldown)
31///   → Exhausted { warned: false } when compaction is counterproductive (too few messages,
32///     zero net freed tokens, or still above hard threshold after LLM compaction)
33///
34/// CompactedThisTurn { cooldown }
35///   → Cooling { turns_remaining: cooldown } when cooldown > 0  (via advance_turn)
36///   → Ready                                 when cooldown == 0 (via advance_turn)
37///
38/// Cooling { turns_remaining }
39///   → Cooling { turns_remaining - 1 } decremented inside maybe_compact each turn
40///   → Ready                           when turns_remaining reaches 0
41///   NOTE: Exhausted is NOT reachable from Cooling — all exhaustion-setting sites in
42///   summarization.rs are guarded by an early-return when in_cooldown is true.
43///
44/// Exhausted { warned: false }
45///   → Exhausted { warned: true } after the user warning is sent (one-shot)
46///
47/// Exhausted { warned: true }  (terminal — no further transitions)
48/// ```
49///
50/// `turns_since_last_hard_compaction` is a **metric counter**, not part of this state machine,
51/// and remains a separate field on `ContextManager`.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum CompactionState {
54    /// Normal state — compaction may fire if context exceeds thresholds.
55    Ready,
56    /// Hard compaction (or focus truncation / eviction / proactive compression) ran this turn.
57    /// No further compaction until `advance_turn()` is called at the next turn boundary.
58    /// `cooldown` carries the number of cooling turns to enforce after this turn ends.
59    CompactedThisTurn {
60        /// Cooling turns to enforce after this turn ends.
61        cooldown: u8,
62    },
63    /// Cooling down after a recent hard compaction. Hard tier is skipped; soft is still allowed.
64    /// Counter decrements inside `maybe_compact` each turn until it reaches 0.
65    Cooling {
66        /// Remaining cooling turns before returning to `Ready`.
67        turns_remaining: u8,
68    },
69    /// Compaction cannot reduce context further. No more attempts will be made.
70    /// `warned` tracks whether the one-shot user warning has been sent.
71    Exhausted {
72        /// Whether the user has already been notified of context exhaustion.
73        warned: bool,
74    },
75}
76
77impl CompactionState {
78    /// Whether hard compaction (or a compaction-equivalent operation) already ran this turn.
79    ///
80    /// When `true`, `maybe_compact`, `maybe_proactive_compress`, and
81    /// `maybe_soft_compact_mid_iteration` all skip execution (CRIT-03).
82    #[must_use]
83    pub fn is_compacted_this_turn(self) -> bool {
84        matches!(self, Self::CompactedThisTurn { .. })
85    }
86
87    /// Whether compaction is permanently disabled for this session.
88    #[must_use]
89    pub fn is_exhausted(self) -> bool {
90        matches!(self, Self::Exhausted { .. })
91    }
92
93    /// Remaining cooldown turns (0 when not in `Cooling` state).
94    #[must_use]
95    pub fn cooldown_remaining(self) -> u8 {
96        match self {
97            Self::Cooling { turns_remaining } => turns_remaining,
98            _ => 0,
99        }
100    }
101
102    /// Transition to the next-turn state at the start of each user turn.
103    ///
104    /// **Must be called exactly once per turn, before any compaction, eviction, or
105    /// focus truncation can run.** This guarantees that `is_compacted_this_turn()`
106    /// returns `false` when the sidequest check executes — preserving the invariant
107    /// that the sidequest only sees same-turn compaction set by eviction which runs
108    /// *after* this call.
109    ///
110    /// Transitions:
111    /// - `CompactedThisTurn { cooldown: 0 }` → `Ready`
112    /// - `CompactedThisTurn { cooldown: n }` → `Cooling { turns_remaining: n }`
113    /// - All other states are returned unchanged.
114    #[must_use]
115    pub fn advance_turn(self) -> Self {
116        match self {
117            Self::CompactedThisTurn { cooldown } if cooldown > 0 => Self::Cooling {
118                turns_remaining: cooldown,
119            },
120            Self::CompactedThisTurn { .. } => Self::Ready,
121            other => other,
122        }
123    }
124}
125
126/// Indicates which compaction tier applies for the current context size.
127#[derive(Debug, Clone, Copy, PartialEq, Eq)]
128pub enum CompactionTier {
129    /// Context is within budget — no compaction needed.
130    None,
131    /// Soft tier: prune tool outputs + apply deferred summaries. No LLM call.
132    Soft,
133    /// Hard tier: full LLM-based summarization.
134    Hard,
135}
136
137/// Per-session context lifecycle manager.
138///
139/// Holds the token budget, compaction lifecycle state, and routing configuration.
140/// Callers in `zeph-core` drive the state machine via `advance_turn`, `compaction_tier`,
141/// and related accessors; the assembler reads the budget via `build_router` and field access.
142pub struct ContextManager {
143    /// Token budget for this session. `None` until configured via `apply_budget_config`.
144    pub budget: Option<ContextBudget>,
145    /// Soft compaction threshold (default 0.70): prune tool outputs + apply deferred summaries.
146    pub soft_compaction_threshold: f32,
147    /// Hard compaction threshold (default 0.90): full LLM-based summarization.
148    pub hard_compaction_threshold: f32,
149    /// Number of recent messages preserved during hard compaction.
150    pub compaction_preserve_tail: usize,
151    /// Token count protected from pruning during soft compaction.
152    pub prune_protect_tokens: usize,
153    /// Compression configuration for proactive compression.
154    pub compression: CompressionConfig,
155    /// Routing configuration for query-aware memory routing.
156    pub routing: StoreRoutingConfig,
157    /// Resolved provider for LLM/hybrid routing. `None` when strategy is `Heuristic`
158    /// or when the named provider could not be resolved from the pool.
159    pub store_routing_provider: Option<Arc<zeph_llm::any::AnyProvider>>,
160    /// Compaction lifecycle state. Replaces four independent boolean/u8 fields to make
161    /// invalid states unrepresentable. See [`CompactionState`] for the full transition map.
162    pub compaction: CompactionState,
163    /// Number of cooling turns to enforce after a successful hard compaction.
164    pub compaction_cooldown_turns: u8,
165    /// Counts user-message turns since the last hard compaction event.
166    /// `None` = no hard compaction has occurred yet in this session.
167    /// `Some(n)` = n turns have elapsed since the last hard compaction.
168    pub turns_since_last_hard_compaction: Option<u64>,
169}
170
171impl ContextManager {
172    /// Create a new `ContextManager` with default thresholds and no budget.
173    #[must_use]
174    pub fn new() -> Self {
175        Self {
176            budget: None,
177            soft_compaction_threshold: 0.60,
178            hard_compaction_threshold: 0.90,
179            compaction_preserve_tail: 6,
180            prune_protect_tokens: 40_000,
181            compression: CompressionConfig::default(),
182            routing: StoreRoutingConfig::default(),
183            store_routing_provider: None,
184            compaction: CompactionState::Ready,
185            compaction_cooldown_turns: 2,
186            turns_since_last_hard_compaction: None,
187        }
188    }
189
190    /// Apply budget and compaction thresholds from config.
191    ///
192    /// Must be called once after config is resolved. Safe to call again when config reloads.
193    #[allow(clippy::too_many_arguments)]
194    pub fn apply_budget_config(
195        &mut self,
196        budget_tokens: usize,
197        reserve_ratio: f32,
198        hard_compaction_threshold: f32,
199        compaction_preserve_tail: usize,
200        prune_protect_tokens: usize,
201        soft_compaction_threshold: f32,
202        compaction_cooldown_turns: u8,
203    ) {
204        if budget_tokens == 0 {
205            tracing::warn!("context budget is 0 — agent will have no token tracking");
206        }
207        if budget_tokens > 0 {
208            self.budget = Some(ContextBudget::new(budget_tokens, reserve_ratio));
209        }
210        self.hard_compaction_threshold = hard_compaction_threshold;
211        self.compaction_preserve_tail = compaction_preserve_tail;
212        self.prune_protect_tokens = prune_protect_tokens;
213        self.soft_compaction_threshold = soft_compaction_threshold;
214        self.compaction_cooldown_turns = compaction_cooldown_turns;
215    }
216
217    /// Reset compaction state for a new conversation.
218    ///
219    /// Clears cooldown, exhaustion, and turn counters so the new conversation starts
220    /// with a clean compaction slate.
221    pub fn reset_compaction(&mut self) {
222        self.compaction = CompactionState::Ready;
223        self.turns_since_last_hard_compaction = None;
224    }
225
226    /// Determine which compaction tier applies for the given token count.
227    ///
228    /// - `Hard` when `cached_tokens > budget * hard_compaction_threshold`
229    /// - `Soft` when `cached_tokens > budget * soft_compaction_threshold`
230    /// - `None` otherwise (or when no budget is set)
231    #[allow(
232        clippy::cast_precision_loss,
233        clippy::cast_possible_truncation,
234        clippy::cast_sign_loss
235    )]
236    pub fn compaction_tier(&self, cached_tokens: u64) -> CompactionTier {
237        let Some(ref budget) = self.budget else {
238            return CompactionTier::None;
239        };
240        let used = usize::try_from(cached_tokens).unwrap_or(usize::MAX);
241        let max = budget.max_tokens();
242        let hard = (max as f32 * self.hard_compaction_threshold) as usize;
243        if used > hard {
244            tracing::debug!(
245                cached_tokens,
246                hard_threshold = hard,
247                "context budget check: Hard tier"
248            );
249            return CompactionTier::Hard;
250        }
251        let soft = (max as f32 * self.soft_compaction_threshold) as usize;
252        if used > soft {
253            tracing::debug!(
254                cached_tokens,
255                soft_threshold = soft,
256                "context budget check: Soft tier"
257            );
258            return CompactionTier::Soft;
259        }
260        tracing::debug!(
261            cached_tokens,
262            soft_threshold = soft,
263            "context budget check: None"
264        );
265        CompactionTier::None
266    }
267
268    /// Build a memory router from the current routing configuration.
269    ///
270    /// Returns a `Box<dyn AsyncMemoryRouter>` so callers can use `route_async()` for LLM-based
271    /// classification. `HeuristicRouter` implements `AsyncMemoryRouter` via a blanket impl that
272    /// delegates to the sync `route_with_confidence`.
273    pub fn build_router(&self) -> Box<dyn zeph_memory::AsyncMemoryRouter + Send + Sync> {
274        use zeph_config::StoreRoutingStrategy;
275        if !self.routing.enabled {
276            return Box::new(zeph_memory::HeuristicRouter);
277        }
278        let fallback = zeph_memory::router::parse_route_str(
279            &self.routing.fallback_route,
280            zeph_memory::MemoryRoute::Hybrid,
281        );
282        match self.routing.strategy {
283            StoreRoutingStrategy::Heuristic => Box::new(zeph_memory::HeuristicRouter),
284            StoreRoutingStrategy::Llm => {
285                let Some(provider) = self.store_routing_provider.clone() else {
286                    tracing::warn!(
287                        "store_routing: strategy=llm but no provider resolved; \
288                         falling back to heuristic"
289                    );
290                    return Box::new(zeph_memory::HeuristicRouter);
291                };
292                Box::new(zeph_memory::LlmRouter::new(provider, fallback))
293            }
294            StoreRoutingStrategy::Hybrid => {
295                let Some(provider) = self.store_routing_provider.clone() else {
296                    tracing::warn!(
297                        "store_routing: strategy=hybrid but no provider resolved; \
298                         falling back to heuristic"
299                    );
300                    return Box::new(zeph_memory::HeuristicRouter);
301                };
302                Box::new(zeph_memory::HybridRouter::new(
303                    provider,
304                    fallback,
305                    self.routing.confidence_threshold,
306                ))
307            }
308        }
309    }
310
311    /// Check if proactive compression should fire for the current turn.
312    ///
313    /// Returns `Some((threshold_tokens, max_summary_tokens))` when proactive compression
314    /// should be triggered, `None` otherwise.
315    ///
316    /// Will return `None` if compaction already happened this turn (CRIT-03 fix).
317    #[must_use]
318    pub fn should_proactively_compress(&self, current_tokens: u64) -> Option<(usize, usize)> {
319        use zeph_config::CompressionStrategy;
320        if self.compaction.is_compacted_this_turn() {
321            return None;
322        }
323        match &self.compression.strategy {
324            CompressionStrategy::Proactive {
325                threshold_tokens,
326                max_summary_tokens,
327            } if usize::try_from(current_tokens).unwrap_or(usize::MAX) > *threshold_tokens => {
328                Some((*threshold_tokens, *max_summary_tokens))
329            }
330            _ => None,
331        }
332    }
333}
334
335impl Default for ContextManager {
336    fn default() -> Self {
337        Self::new()
338    }
339}
340
341#[cfg(test)]
342mod tests {
343    use super::*;
344    use zeph_config::CompressionStrategy;
345
346    #[test]
347    fn new_defaults() {
348        let cm = ContextManager::new();
349        assert!(cm.budget.is_none());
350        assert!((cm.soft_compaction_threshold - 0.60).abs() < f32::EPSILON);
351        assert!((cm.hard_compaction_threshold - 0.90).abs() < f32::EPSILON);
352        assert_eq!(cm.compaction_preserve_tail, 6);
353        assert_eq!(cm.prune_protect_tokens, 40_000);
354        assert_eq!(cm.compaction, CompactionState::Ready);
355    }
356
357    #[test]
358    fn compaction_tier_no_budget() {
359        let cm = ContextManager::new();
360        assert_eq!(cm.compaction_tier(1_000_000), CompactionTier::None);
361    }
362
363    #[test]
364    fn compaction_tier_below_soft() {
365        let mut cm = ContextManager::new();
366        cm.budget = Some(ContextBudget::new(100_000, 0.1));
367        assert_eq!(cm.compaction_tier(50_000), CompactionTier::None);
368    }
369
370    #[test]
371    fn compaction_tier_between_soft_and_hard() {
372        let mut cm = ContextManager::new();
373        cm.budget = Some(ContextBudget::new(100_000, 0.1));
374        assert_eq!(cm.compaction_tier(75_000), CompactionTier::Soft);
375    }
376
377    #[test]
378    fn compaction_tier_above_hard() {
379        let mut cm = ContextManager::new();
380        cm.budget = Some(ContextBudget::new(100_000, 0.1));
381        assert_eq!(cm.compaction_tier(95_000), CompactionTier::Hard);
382    }
383
384    #[test]
385    fn proactive_compress_above_threshold_returns_params() {
386        let mut cm = ContextManager::new();
387        cm.compression.strategy = CompressionStrategy::Proactive {
388            threshold_tokens: 80_000,
389            max_summary_tokens: 4_000,
390        };
391        let result = cm.should_proactively_compress(90_000);
392        assert_eq!(result, Some((80_000, 4_000)));
393    }
394
395    #[test]
396    fn proactive_compress_blocked_if_compacted_this_turn() {
397        let mut cm = ContextManager::new();
398        cm.compression.strategy = CompressionStrategy::Proactive {
399            threshold_tokens: 80_000,
400            max_summary_tokens: 4_000,
401        };
402        cm.compaction = CompactionState::CompactedThisTurn { cooldown: 0 };
403        assert!(cm.should_proactively_compress(100_000).is_none());
404    }
405
406    #[test]
407    fn compaction_state_ready_is_not_compacted_this_turn() {
408        assert!(!CompactionState::Ready.is_compacted_this_turn());
409    }
410
411    #[test]
412    fn compaction_state_compacted_this_turn_flag() {
413        assert!(CompactionState::CompactedThisTurn { cooldown: 2 }.is_compacted_this_turn());
414        assert!(CompactionState::CompactedThisTurn { cooldown: 0 }.is_compacted_this_turn());
415    }
416
417    #[test]
418    fn compaction_state_cooling_is_not_compacted_this_turn() {
419        assert!(!CompactionState::Cooling { turns_remaining: 1 }.is_compacted_this_turn());
420    }
421
422    #[test]
423    fn advance_turn_compacted_with_cooldown_enters_cooling() {
424        let state = CompactionState::CompactedThisTurn { cooldown: 3 };
425        assert_eq!(
426            state.advance_turn(),
427            CompactionState::Cooling { turns_remaining: 3 }
428        );
429    }
430
431    #[test]
432    fn advance_turn_compacted_zero_cooldown_returns_ready() {
433        let state = CompactionState::CompactedThisTurn { cooldown: 0 };
434        assert_eq!(state.advance_turn(), CompactionState::Ready);
435    }
436}