Skip to main content

zeph_context/
manager.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Context lifecycle state machine for the Zeph agent.
5//!
6//! [`ContextManager`] tracks per-session compaction state and token budgets.
7//! It decides when soft (pruning) or hard (LLM summarization) compaction should fire,
8//! and builds the memory router used for query-aware store selection.
9//!
10//! [`CompactionState`] is the core state machine — see its doc comment for the
11//! full transition map.
12
13use std::sync::Arc;
14
15use zeph_config::{CompressionConfig, StoreRoutingConfig};
16
17use crate::budget::ContextBudget;
18
19/// Lifecycle state of the compaction subsystem within a single session.
20///
21/// Replaces four independent boolean/u8 fields with an explicit state machine that makes
22/// invalid states unrepresentable (e.g., warned-without-exhausted).
23///
24/// # Transition map
25///
26/// ```text
27/// Ready
28///   → CompactedThisTurn { cooldown } when hard compaction succeeds (pruning or LLM)
29///   → CompactedThisTurn { cooldown: 0 } when focus truncation, eviction, or proactive
30///     compression fires (these callers do not want post-compaction cooldown)
31///   → Exhausted { warned: false } when compaction is counterproductive (too few messages,
32///     zero net freed tokens, or still above hard threshold after LLM compaction)
33///
34/// CompactedThisTurn { cooldown }
35///   → Cooling { turns_remaining: cooldown } when cooldown > 0  (via advance_turn)
36///   → Ready                                 when cooldown == 0 (via advance_turn)
37///
38/// Cooling { turns_remaining }
39///   → Cooling { turns_remaining - 1 } decremented inside maybe_compact each turn
40///   → Ready                           when turns_remaining reaches 0
41///   NOTE: Exhausted is NOT reachable from Cooling — all exhaustion-setting sites in
42///   summarization.rs are guarded by an early-return when in_cooldown is true.
43///
44/// Exhausted { warned: false }
45///   → Exhausted { warned: true } after the user warning is sent (one-shot)
46///
47/// Exhausted { warned: true }  (terminal — no further transitions)
48/// ```
49///
50/// `turns_since_last_hard_compaction` is a **metric counter**, not part of this state machine,
51/// and remains a separate field on `ContextManager`.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum CompactionState {
54    /// Normal state — compaction may fire if context exceeds thresholds.
55    Ready,
56    /// Hard compaction (or focus truncation / eviction / proactive compression) ran this turn.
57    /// No further compaction until `advance_turn()` is called at the next turn boundary.
58    /// `cooldown` carries the number of cooling turns to enforce after this turn ends.
59    CompactedThisTurn {
60        /// Cooling turns to enforce after this turn ends.
61        cooldown: u8,
62    },
63    /// Cooling down after a recent hard compaction. Hard tier is skipped; soft is still allowed.
64    /// Counter decrements inside `maybe_compact` each turn until it reaches 0.
65    Cooling {
66        /// Remaining cooling turns before returning to `Ready`.
67        turns_remaining: u8,
68    },
69    /// Compaction cannot reduce context further. No more attempts will be made.
70    /// `warned` tracks whether the one-shot user warning has been sent.
71    Exhausted {
72        /// Whether the user has already been notified of context exhaustion.
73        warned: bool,
74    },
75}
76
77impl CompactionState {
78    /// Whether hard compaction (or a compaction-equivalent operation) already ran this turn.
79    ///
80    /// When `true`, `maybe_compact`, `maybe_proactive_compress`, and
81    /// `maybe_soft_compact_mid_iteration` all skip execution (CRIT-03).
82    #[must_use]
83    pub fn is_compacted_this_turn(self) -> bool {
84        matches!(self, Self::CompactedThisTurn { .. })
85    }
86
87    /// Whether compaction is permanently disabled for this session.
88    #[must_use]
89    pub fn is_exhausted(self) -> bool {
90        matches!(self, Self::Exhausted { .. })
91    }
92
93    /// Remaining cooldown turns (0 when not in `Cooling` state).
94    #[must_use]
95    pub fn cooldown_remaining(self) -> u8 {
96        match self {
97            Self::Cooling { turns_remaining } => turns_remaining,
98            _ => 0,
99        }
100    }
101
102    /// Transition to the next-turn state at the start of each user turn.
103    ///
104    /// **Must be called exactly once per turn, before any compaction, eviction, or
105    /// focus truncation can run.** This guarantees that `is_compacted_this_turn()`
106    /// returns `false` when the sidequest check executes — preserving the invariant
107    /// that the sidequest only sees same-turn compaction set by eviction which runs
108    /// *after* this call.
109    ///
110    /// Transitions:
111    /// - `CompactedThisTurn { cooldown: 0 }` → `Ready`
112    /// - `CompactedThisTurn { cooldown: n }` → `Cooling { turns_remaining: n }`
113    /// - All other states are returned unchanged.
114    #[must_use]
115    pub fn advance_turn(self) -> Self {
116        match self {
117            Self::CompactedThisTurn { cooldown } if cooldown > 0 => Self::Cooling {
118                turns_remaining: cooldown,
119            },
120            Self::CompactedThisTurn { .. } => Self::Ready,
121            other => other,
122        }
123    }
124}
125
126/// Indicates which compaction tier applies for the current context size.
127#[derive(Debug, Clone, Copy, PartialEq, Eq)]
128pub enum CompactionTier {
129    /// Context is within budget — no compaction needed.
130    None,
131    /// Soft tier: prune tool outputs + apply deferred summaries. No LLM call.
132    Soft,
133    /// Hard tier: full LLM-based summarization.
134    Hard,
135}
136
137/// Per-session context lifecycle manager.
138///
139/// Holds the token budget, compaction lifecycle state, and routing configuration.
140/// Callers in `zeph-core` drive the state machine via `advance_turn`, `compaction_tier`,
141/// and related accessors; the assembler reads the budget via `build_router` and field access.
142pub struct ContextManager {
143    /// Token budget for this session. `None` until configured via `apply_budget_config`.
144    pub budget: Option<ContextBudget>,
145    /// Soft compaction threshold (default 0.70): prune tool outputs + apply deferred summaries.
146    pub soft_compaction_threshold: f32,
147    /// Hard compaction threshold (default 0.90): full LLM-based summarization.
148    pub hard_compaction_threshold: f32,
149    /// Number of recent messages preserved during hard compaction.
150    pub compaction_preserve_tail: usize,
151    /// Token count protected from pruning during soft compaction.
152    pub prune_protect_tokens: usize,
153    /// Compression configuration for proactive compression.
154    pub compression: CompressionConfig,
155    /// Routing configuration for query-aware memory routing.
156    pub routing: StoreRoutingConfig,
157    /// Resolved provider for LLM/hybrid routing. `None` when strategy is `Heuristic`
158    /// or when the named provider could not be resolved from the pool.
159    pub store_routing_provider: Option<Arc<zeph_llm::any::AnyProvider>>,
160    /// Compaction lifecycle state. Replaces four independent boolean/u8 fields to make
161    /// invalid states unrepresentable. See [`CompactionState`] for the full transition map.
162    pub compaction: CompactionState,
163    /// Number of cooling turns to enforce after a successful hard compaction.
164    pub compaction_cooldown_turns: u8,
165    /// Counts user-message turns since the last hard compaction event.
166    /// `None` = no hard compaction has occurred yet in this session.
167    /// `Some(n)` = n turns have elapsed since the last hard compaction.
168    pub turns_since_last_hard_compaction: Option<u64>,
169}
170
171impl ContextManager {
172    /// Create a new `ContextManager` with default thresholds and no budget.
173    #[must_use]
174    pub fn new() -> Self {
175        Self {
176            budget: None,
177            soft_compaction_threshold: 0.60,
178            hard_compaction_threshold: 0.90,
179            compaction_preserve_tail: 6,
180            prune_protect_tokens: 40_000,
181            compression: CompressionConfig::default(),
182            routing: StoreRoutingConfig::default(),
183            store_routing_provider: None,
184            compaction: CompactionState::Ready,
185            compaction_cooldown_turns: 2,
186            turns_since_last_hard_compaction: None,
187        }
188    }
189
190    /// Apply budget and compaction thresholds from config.
191    ///
192    /// Must be called once after config is resolved. Safe to call again when config reloads.
193    #[allow(clippy::too_many_arguments)]
194    pub fn apply_budget_config(
195        &mut self,
196        budget_tokens: usize,
197        reserve_ratio: f32,
198        hard_compaction_threshold: f32,
199        compaction_preserve_tail: usize,
200        prune_protect_tokens: usize,
201        soft_compaction_threshold: f32,
202        compaction_cooldown_turns: u8,
203    ) {
204        if budget_tokens == 0 {
205            tracing::warn!("context budget is 0 — agent will have no token tracking");
206        }
207        if budget_tokens > 0 {
208            self.budget = Some(ContextBudget::new(budget_tokens, reserve_ratio));
209        }
210        self.hard_compaction_threshold = hard_compaction_threshold;
211        self.compaction_preserve_tail = compaction_preserve_tail;
212        self.prune_protect_tokens = prune_protect_tokens;
213        self.soft_compaction_threshold = soft_compaction_threshold;
214        self.compaction_cooldown_turns = compaction_cooldown_turns;
215    }
216
217    /// Reset compaction state for a new conversation.
218    ///
219    /// Clears cooldown, exhaustion, and turn counters so the new conversation starts
220    /// with a clean compaction slate.
221    pub fn reset_compaction(&mut self) {
222        self.compaction = CompactionState::Ready;
223        self.turns_since_last_hard_compaction = None;
224    }
225
226    /// Determine which compaction tier applies for the given token count.
227    ///
228    /// - `Hard` when `cached_tokens > budget * hard_compaction_threshold`
229    /// - `Soft` when `cached_tokens > budget * soft_compaction_threshold`
230    /// - `None` otherwise (or when no budget is set)
231    #[allow(
232        clippy::cast_precision_loss,
233        clippy::cast_possible_truncation,
234        clippy::cast_sign_loss
235    )]
236    pub fn compaction_tier(&self, cached_tokens: u64) -> CompactionTier {
237        let Some(ref budget) = self.budget else {
238            return CompactionTier::None;
239        };
240        let used = usize::try_from(cached_tokens).unwrap_or(usize::MAX);
241        let max = budget.max_tokens();
242        let hard = (max as f32 * self.hard_compaction_threshold) as usize;
243        if used > hard {
244            tracing::debug!(
245                cached_tokens,
246                hard_threshold = hard,
247                "context budget check: Hard tier"
248            );
249            return CompactionTier::Hard;
250        }
251        let soft = (max as f32 * self.soft_compaction_threshold) as usize;
252        if used > soft {
253            tracing::debug!(
254                cached_tokens,
255                soft_threshold = soft,
256                "context budget check: Soft tier"
257            );
258            return CompactionTier::Soft;
259        }
260        tracing::debug!(
261            cached_tokens,
262            soft_threshold = soft,
263            "context budget check: None"
264        );
265        CompactionTier::None
266    }
267
268    /// Build a memory router from the current routing configuration.
269    ///
270    /// Returns a `Box<dyn AsyncMemoryRouter>` so callers can use `route_async()` for LLM-based
271    /// classification. `HeuristicRouter` implements `AsyncMemoryRouter` via a blanket impl that
272    /// delegates to the sync `route_with_confidence`.
273    pub fn build_router(&self) -> Box<dyn zeph_memory::AsyncMemoryRouter + Send + Sync> {
274        use zeph_config::StoreRoutingStrategy;
275        if !self.routing.enabled {
276            return Box::new(zeph_memory::HeuristicRouter);
277        }
278        let fallback = zeph_memory::router::parse_route_str(
279            &self.routing.fallback_route,
280            zeph_memory::MemoryRoute::Hybrid,
281        );
282        match self.routing.strategy {
283            StoreRoutingStrategy::Heuristic => Box::new(zeph_memory::HeuristicRouter),
284            StoreRoutingStrategy::Llm => {
285                let Some(provider) = self.store_routing_provider.clone() else {
286                    tracing::warn!(
287                        "store_routing: strategy=llm but no provider resolved; \
288                         falling back to heuristic"
289                    );
290                    return Box::new(zeph_memory::HeuristicRouter);
291                };
292                Box::new(zeph_memory::LlmRouter::new(provider, fallback))
293            }
294            StoreRoutingStrategy::Hybrid => {
295                let Some(provider) = self.store_routing_provider.clone() else {
296                    tracing::warn!(
297                        "store_routing: strategy=hybrid but no provider resolved; \
298                         falling back to heuristic"
299                    );
300                    return Box::new(zeph_memory::HeuristicRouter);
301                };
302                Box::new(zeph_memory::HybridRouter::new(
303                    provider,
304                    fallback,
305                    self.routing.confidence_threshold,
306                ))
307            }
308        }
309    }
310
311    /// Check if proactive compression should fire for the current turn.
312    ///
313    /// Returns `Some((threshold_tokens, max_summary_tokens))` when proactive compression
314    /// should be triggered, `None` otherwise.
315    ///
316    /// For `CompressionStrategy::Focus`, the threshold is the soft-compaction fraction
317    /// of the budget (same gate used by mid-iteration soft compaction). The
318    /// `max_summary_tokens` element is unused on the Focus path — the auto-consolidation
319    /// function uses `FocusConfig.max_knowledge_tokens / 2` instead.
320    ///
321    /// Will return `None` if compaction already happened this turn (CRIT-03 fix).
322    #[must_use]
323    pub fn should_proactively_compress(&self, current_tokens: u64) -> Option<(usize, usize)> {
324        use zeph_config::CompressionStrategy;
325        if self.compaction.is_compacted_this_turn() {
326            return None;
327        }
328        match &self.compression.strategy {
329            CompressionStrategy::Proactive {
330                threshold_tokens,
331                max_summary_tokens,
332            } if usize::try_from(current_tokens).unwrap_or(usize::MAX) > *threshold_tokens => {
333                Some((*threshold_tokens, *max_summary_tokens))
334            }
335            CompressionStrategy::Focus => {
336                // Focus fires at the soft-compaction threshold (same as tier machinery).
337                let budget = self.budget.as_ref()?.max_tokens();
338                #[allow(
339                    clippy::cast_precision_loss,
340                    clippy::cast_sign_loss,
341                    clippy::cast_possible_truncation
342                )]
343                let threshold = (budget as f32 * self.soft_compaction_threshold) as usize;
344                if usize::try_from(current_tokens).unwrap_or(usize::MAX) > threshold {
345                    // NOTE: the second tuple element (max_summary_tokens) is a placeholder
346                    // on the Focus path — the auto-consolidation function ignores it and uses
347                    // FocusConfig.max_knowledge_tokens / 2 instead.
348                    Some((threshold, threshold / 4))
349                } else {
350                    None
351                }
352            }
353            _ => None,
354        }
355    }
356}
357
358impl Default for ContextManager {
359    fn default() -> Self {
360        Self::new()
361    }
362}
363
364#[cfg(test)]
365mod tests {
366    use super::*;
367    use zeph_config::CompressionStrategy;
368
369    #[test]
370    fn new_defaults() {
371        let cm = ContextManager::new();
372        assert!(cm.budget.is_none());
373        assert!((cm.soft_compaction_threshold - 0.60).abs() < f32::EPSILON);
374        assert!((cm.hard_compaction_threshold - 0.90).abs() < f32::EPSILON);
375        assert_eq!(cm.compaction_preserve_tail, 6);
376        assert_eq!(cm.prune_protect_tokens, 40_000);
377        assert_eq!(cm.compaction, CompactionState::Ready);
378    }
379
380    #[test]
381    fn compaction_tier_no_budget() {
382        let cm = ContextManager::new();
383        assert_eq!(cm.compaction_tier(1_000_000), CompactionTier::None);
384    }
385
386    #[test]
387    fn compaction_tier_below_soft() {
388        let mut cm = ContextManager::new();
389        cm.budget = Some(ContextBudget::new(100_000, 0.1));
390        assert_eq!(cm.compaction_tier(50_000), CompactionTier::None);
391    }
392
393    #[test]
394    fn compaction_tier_between_soft_and_hard() {
395        let mut cm = ContextManager::new();
396        cm.budget = Some(ContextBudget::new(100_000, 0.1));
397        assert_eq!(cm.compaction_tier(75_000), CompactionTier::Soft);
398    }
399
400    #[test]
401    fn compaction_tier_above_hard() {
402        let mut cm = ContextManager::new();
403        cm.budget = Some(ContextBudget::new(100_000, 0.1));
404        assert_eq!(cm.compaction_tier(95_000), CompactionTier::Hard);
405    }
406
407    #[test]
408    fn proactive_compress_above_threshold_returns_params() {
409        let mut cm = ContextManager::new();
410        cm.compression.strategy = CompressionStrategy::Proactive {
411            threshold_tokens: 80_000,
412            max_summary_tokens: 4_000,
413        };
414        let result = cm.should_proactively_compress(90_000);
415        assert_eq!(result, Some((80_000, 4_000)));
416    }
417
418    #[test]
419    fn proactive_compress_blocked_if_compacted_this_turn() {
420        let mut cm = ContextManager::new();
421        cm.compression.strategy = CompressionStrategy::Proactive {
422            threshold_tokens: 80_000,
423            max_summary_tokens: 4_000,
424        };
425        cm.compaction = CompactionState::CompactedThisTurn { cooldown: 0 };
426        assert!(cm.should_proactively_compress(100_000).is_none());
427    }
428
429    #[test]
430    fn compaction_state_ready_is_not_compacted_this_turn() {
431        assert!(!CompactionState::Ready.is_compacted_this_turn());
432    }
433
434    #[test]
435    fn compaction_state_compacted_this_turn_flag() {
436        assert!(CompactionState::CompactedThisTurn { cooldown: 2 }.is_compacted_this_turn());
437        assert!(CompactionState::CompactedThisTurn { cooldown: 0 }.is_compacted_this_turn());
438    }
439
440    #[test]
441    fn compaction_state_cooling_is_not_compacted_this_turn() {
442        assert!(!CompactionState::Cooling { turns_remaining: 1 }.is_compacted_this_turn());
443    }
444
445    #[test]
446    fn advance_turn_compacted_with_cooldown_enters_cooling() {
447        let state = CompactionState::CompactedThisTurn { cooldown: 3 };
448        assert_eq!(
449            state.advance_turn(),
450            CompactionState::Cooling { turns_remaining: 3 }
451        );
452    }
453
454    #[test]
455    fn advance_turn_compacted_zero_cooldown_returns_ready() {
456        let state = CompactionState::CompactedThisTurn { cooldown: 0 };
457        assert_eq!(state.advance_turn(), CompactionState::Ready);
458    }
459
460    #[test]
461    fn should_proactively_compress_focus_fires_above_soft_threshold() {
462        let mut cm = ContextManager::new();
463        cm.budget = Some(ContextBudget::new(100_000, 0.1));
464        cm.compression.strategy = CompressionStrategy::Focus;
465        // Default soft threshold is 0.60 → 60_000 tokens.
466        // 75_000 > 60_000 → should fire.
467        let result = cm.should_proactively_compress(75_000);
468        assert!(result.is_some(), "Focus must fire above soft threshold");
469        let (threshold, _) = result.unwrap();
470        assert_eq!(threshold, 60_000);
471    }
472
473    #[test]
474    fn should_proactively_compress_focus_returns_none_below_threshold() {
475        let mut cm = ContextManager::new();
476        cm.budget = Some(ContextBudget::new(100_000, 0.1));
477        cm.compression.strategy = CompressionStrategy::Focus;
478        // 50_000 < 60_000 → should not fire.
479        assert!(cm.should_proactively_compress(50_000).is_none());
480    }
481
482    #[test]
483    fn should_proactively_compress_focus_returns_none_without_budget() {
484        let mut cm = ContextManager::new();
485        cm.compression.strategy = CompressionStrategy::Focus;
486        // No budget set → cannot compute threshold → None.
487        assert!(cm.should_proactively_compress(999_999).is_none());
488    }
489}