zeph_context/manager.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Context lifecycle state machine for the Zeph agent.
5//!
6//! [`ContextManager`] tracks per-session compaction state and token budgets.
7//! It decides when soft (pruning) or hard (LLM summarization) compaction should fire,
8//! and builds the memory router used for query-aware store selection.
9//!
10//! [`CompactionState`] is the core state machine — see its doc comment for the
11//! full transition map.
12
13use std::sync::Arc;
14
15use zeph_config::{CompressionConfig, StoreRoutingConfig};
16
17use crate::budget::ContextBudget;
18
19/// Lifecycle state of the compaction subsystem within a single session.
20///
21/// Replaces four independent boolean/u8 fields with an explicit state machine that makes
22/// invalid states unrepresentable (e.g., warned-without-exhausted).
23///
24/// # Transition map
25///
26/// ```text
27/// Ready
28/// → CompactedThisTurn { cooldown } when hard compaction succeeds (pruning or LLM)
29/// → CompactedThisTurn { cooldown: 0 } when focus truncation, eviction, or proactive
30/// compression fires (these callers do not want post-compaction cooldown)
31/// → Exhausted { warned: false } when compaction is counterproductive (too few messages,
32/// zero net freed tokens, or still above hard threshold after LLM compaction)
33///
34/// CompactedThisTurn { cooldown }
35/// → Cooling { turns_remaining: cooldown } when cooldown > 0 (via advance_turn)
36/// → Ready when cooldown == 0 (via advance_turn)
37///
38/// Cooling { turns_remaining }
39/// → Cooling { turns_remaining - 1 } decremented inside maybe_compact each turn
40/// → Ready when turns_remaining reaches 0
41/// NOTE: Exhausted is NOT reachable from Cooling — all exhaustion-setting sites in
42/// summarization.rs are guarded by an early-return when in_cooldown is true.
43///
44/// Exhausted { warned: false }
45/// → Exhausted { warned: true } after the user warning is sent (one-shot)
46///
47/// Exhausted { warned: true } (terminal — no further transitions)
48/// ```
49///
50/// `turns_since_last_hard_compaction` is a **metric counter**, not part of this state machine,
51/// and remains a separate field on `ContextManager`.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum CompactionState {
54 /// Normal state — compaction may fire if context exceeds thresholds.
55 Ready,
56 /// Hard compaction (or focus truncation / eviction / proactive compression) ran this turn.
57 /// No further compaction until `advance_turn()` is called at the next turn boundary.
58 /// `cooldown` carries the number of cooling turns to enforce after this turn ends.
59 CompactedThisTurn {
60 /// Cooling turns to enforce after this turn ends.
61 cooldown: u8,
62 },
63 /// Cooling down after a recent hard compaction. Hard tier is skipped; soft is still allowed.
64 /// Counter decrements inside `maybe_compact` each turn until it reaches 0.
65 Cooling {
66 /// Remaining cooling turns before returning to `Ready`.
67 turns_remaining: u8,
68 },
69 /// Compaction cannot reduce context further. No more attempts will be made.
70 /// `warned` tracks whether the one-shot user warning has been sent.
71 Exhausted {
72 /// Whether the user has already been notified of context exhaustion.
73 warned: bool,
74 },
75}
76
77impl CompactionState {
78 /// Whether hard compaction (or a compaction-equivalent operation) already ran this turn.
79 ///
80 /// When `true`, `maybe_compact`, `maybe_proactive_compress`, and
81 /// `maybe_soft_compact_mid_iteration` all skip execution (CRIT-03).
82 #[must_use]
83 pub fn is_compacted_this_turn(self) -> bool {
84 matches!(self, Self::CompactedThisTurn { .. })
85 }
86
87 /// Whether compaction is permanently disabled for this session.
88 #[must_use]
89 pub fn is_exhausted(self) -> bool {
90 matches!(self, Self::Exhausted { .. })
91 }
92
93 /// Remaining cooldown turns (0 when not in `Cooling` state).
94 #[must_use]
95 pub fn cooldown_remaining(self) -> u8 {
96 match self {
97 Self::Cooling { turns_remaining } => turns_remaining,
98 _ => 0,
99 }
100 }
101
102 /// Transition to the next-turn state at the start of each user turn.
103 ///
104 /// **Must be called exactly once per turn, before any compaction, eviction, or
105 /// focus truncation can run.** This guarantees that `is_compacted_this_turn()`
106 /// returns `false` when the sidequest check executes — preserving the invariant
107 /// that the sidequest only sees same-turn compaction set by eviction which runs
108 /// *after* this call.
109 ///
110 /// Transitions:
111 /// - `CompactedThisTurn { cooldown: 0 }` → `Ready`
112 /// - `CompactedThisTurn { cooldown: n }` → `Cooling { turns_remaining: n }`
113 /// - All other states are returned unchanged.
114 #[must_use]
115 pub fn advance_turn(self) -> Self {
116 match self {
117 Self::CompactedThisTurn { cooldown } if cooldown > 0 => Self::Cooling {
118 turns_remaining: cooldown,
119 },
120 Self::CompactedThisTurn { .. } => Self::Ready,
121 other => other,
122 }
123 }
124}
125
126/// Indicates which compaction tier applies for the current context size.
127#[derive(Debug, Clone, Copy, PartialEq, Eq)]
128pub enum CompactionTier {
129 /// Context is within budget — no compaction needed.
130 None,
131 /// Soft tier: prune tool outputs + apply deferred summaries. No LLM call.
132 Soft,
133 /// Hard tier: full LLM-based summarization.
134 Hard,
135}
136
137/// Per-session context lifecycle manager.
138///
139/// Holds the token budget, compaction lifecycle state, and routing configuration.
140/// Callers in `zeph-core` drive the state machine via `advance_turn`, `compaction_tier`,
141/// and related accessors; the assembler reads the budget via `build_router` and field access.
142pub struct ContextManager {
143 /// Token budget for this session. `None` until configured via `apply_budget_config`.
144 pub budget: Option<ContextBudget>,
145 /// Soft compaction threshold (default 0.70): prune tool outputs + apply deferred summaries.
146 pub soft_compaction_threshold: f32,
147 /// Hard compaction threshold (default 0.90): full LLM-based summarization.
148 pub hard_compaction_threshold: f32,
149 /// Number of recent messages preserved during hard compaction.
150 pub compaction_preserve_tail: usize,
151 /// Token count protected from pruning during soft compaction.
152 pub prune_protect_tokens: usize,
153 /// Compression configuration for proactive compression.
154 pub compression: CompressionConfig,
155 /// Routing configuration for query-aware memory routing.
156 pub routing: StoreRoutingConfig,
157 /// Resolved provider for LLM/hybrid routing. `None` when strategy is `Heuristic`
158 /// or when the named provider could not be resolved from the pool.
159 pub store_routing_provider: Option<Arc<zeph_llm::any::AnyProvider>>,
160 /// Compaction lifecycle state. Replaces four independent boolean/u8 fields to make
161 /// invalid states unrepresentable. See [`CompactionState`] for the full transition map.
162 pub compaction: CompactionState,
163 /// Number of cooling turns to enforce after a successful hard compaction.
164 pub compaction_cooldown_turns: u8,
165 /// Counts user-message turns since the last hard compaction event.
166 /// `None` = no hard compaction has occurred yet in this session.
167 /// `Some(n)` = n turns have elapsed since the last hard compaction.
168 pub turns_since_last_hard_compaction: Option<u64>,
169}
170
171impl ContextManager {
172 /// Create a new `ContextManager` with default thresholds and no budget.
173 #[must_use]
174 pub fn new() -> Self {
175 Self {
176 budget: None,
177 soft_compaction_threshold: 0.60,
178 hard_compaction_threshold: 0.90,
179 compaction_preserve_tail: 6,
180 prune_protect_tokens: 40_000,
181 compression: CompressionConfig::default(),
182 routing: StoreRoutingConfig::default(),
183 store_routing_provider: None,
184 compaction: CompactionState::Ready,
185 compaction_cooldown_turns: 2,
186 turns_since_last_hard_compaction: None,
187 }
188 }
189
190 /// Apply budget and compaction thresholds from config.
191 ///
192 /// Must be called once after config is resolved. Safe to call again when config reloads.
193 #[allow(clippy::too_many_arguments)]
194 pub fn apply_budget_config(
195 &mut self,
196 budget_tokens: usize,
197 reserve_ratio: f32,
198 hard_compaction_threshold: f32,
199 compaction_preserve_tail: usize,
200 prune_protect_tokens: usize,
201 soft_compaction_threshold: f32,
202 compaction_cooldown_turns: u8,
203 ) {
204 if budget_tokens == 0 {
205 tracing::warn!("context budget is 0 — agent will have no token tracking");
206 }
207 if budget_tokens > 0 {
208 self.budget = Some(ContextBudget::new(budget_tokens, reserve_ratio));
209 }
210 self.hard_compaction_threshold = hard_compaction_threshold;
211 self.compaction_preserve_tail = compaction_preserve_tail;
212 self.prune_protect_tokens = prune_protect_tokens;
213 self.soft_compaction_threshold = soft_compaction_threshold;
214 self.compaction_cooldown_turns = compaction_cooldown_turns;
215 }
216
217 /// Reset compaction state for a new conversation.
218 ///
219 /// Clears cooldown, exhaustion, and turn counters so the new conversation starts
220 /// with a clean compaction slate.
221 pub fn reset_compaction(&mut self) {
222 self.compaction = CompactionState::Ready;
223 self.turns_since_last_hard_compaction = None;
224 }
225
226 /// Determine which compaction tier applies for the given token count.
227 ///
228 /// - `Hard` when `cached_tokens > budget * hard_compaction_threshold`
229 /// - `Soft` when `cached_tokens > budget * soft_compaction_threshold`
230 /// - `None` otherwise (or when no budget is set)
231 #[allow(
232 clippy::cast_precision_loss,
233 clippy::cast_possible_truncation,
234 clippy::cast_sign_loss
235 )]
236 pub fn compaction_tier(&self, cached_tokens: u64) -> CompactionTier {
237 let Some(ref budget) = self.budget else {
238 return CompactionTier::None;
239 };
240 let used = usize::try_from(cached_tokens).unwrap_or(usize::MAX);
241 let max = budget.max_tokens();
242 let hard = (max as f32 * self.hard_compaction_threshold) as usize;
243 if used > hard {
244 tracing::debug!(
245 cached_tokens,
246 hard_threshold = hard,
247 "context budget check: Hard tier"
248 );
249 return CompactionTier::Hard;
250 }
251 let soft = (max as f32 * self.soft_compaction_threshold) as usize;
252 if used > soft {
253 tracing::debug!(
254 cached_tokens,
255 soft_threshold = soft,
256 "context budget check: Soft tier"
257 );
258 return CompactionTier::Soft;
259 }
260 tracing::debug!(
261 cached_tokens,
262 soft_threshold = soft,
263 "context budget check: None"
264 );
265 CompactionTier::None
266 }
267
268 /// Build a memory router from the current routing configuration.
269 ///
270 /// Returns a `Box<dyn AsyncMemoryRouter>` so callers can use `route_async()` for LLM-based
271 /// classification. `HeuristicRouter` implements `AsyncMemoryRouter` via a blanket impl that
272 /// delegates to the sync `route_with_confidence`.
273 pub fn build_router(&self) -> Box<dyn zeph_memory::AsyncMemoryRouter + Send + Sync> {
274 use zeph_config::StoreRoutingStrategy;
275 if !self.routing.enabled {
276 return Box::new(zeph_memory::HeuristicRouter);
277 }
278 let fallback = zeph_memory::router::parse_route_str(
279 &self.routing.fallback_route,
280 zeph_memory::MemoryRoute::Hybrid,
281 );
282 match self.routing.strategy {
283 StoreRoutingStrategy::Heuristic => Box::new(zeph_memory::HeuristicRouter),
284 StoreRoutingStrategy::Llm => {
285 let Some(provider) = self.store_routing_provider.clone() else {
286 tracing::warn!(
287 "store_routing: strategy=llm but no provider resolved; \
288 falling back to heuristic"
289 );
290 return Box::new(zeph_memory::HeuristicRouter);
291 };
292 Box::new(zeph_memory::LlmRouter::new(provider, fallback))
293 }
294 StoreRoutingStrategy::Hybrid => {
295 let Some(provider) = self.store_routing_provider.clone() else {
296 tracing::warn!(
297 "store_routing: strategy=hybrid but no provider resolved; \
298 falling back to heuristic"
299 );
300 return Box::new(zeph_memory::HeuristicRouter);
301 };
302 Box::new(zeph_memory::HybridRouter::new(
303 provider,
304 fallback,
305 self.routing.confidence_threshold,
306 ))
307 }
308 }
309 }
310
311 /// Check if proactive compression should fire for the current turn.
312 ///
313 /// Returns `Some((threshold_tokens, max_summary_tokens))` when proactive compression
314 /// should be triggered, `None` otherwise.
315 ///
316 /// For `CompressionStrategy::Focus`, the threshold is the soft-compaction fraction
317 /// of the budget (same gate used by mid-iteration soft compaction). The
318 /// `max_summary_tokens` element is unused on the Focus path — the auto-consolidation
319 /// function uses `FocusConfig.max_knowledge_tokens / 2` instead.
320 ///
321 /// Will return `None` if compaction already happened this turn (CRIT-03 fix).
322 #[must_use]
323 pub fn should_proactively_compress(&self, current_tokens: u64) -> Option<(usize, usize)> {
324 use zeph_config::CompressionStrategy;
325 if self.compaction.is_compacted_this_turn() {
326 return None;
327 }
328 match &self.compression.strategy {
329 CompressionStrategy::Proactive {
330 threshold_tokens,
331 max_summary_tokens,
332 } if usize::try_from(current_tokens).unwrap_or(usize::MAX) > *threshold_tokens => {
333 Some((*threshold_tokens, *max_summary_tokens))
334 }
335 CompressionStrategy::Focus => {
336 // Focus fires at the soft-compaction threshold (same as tier machinery).
337 let budget = self.budget.as_ref()?.max_tokens();
338 #[allow(
339 clippy::cast_precision_loss,
340 clippy::cast_sign_loss,
341 clippy::cast_possible_truncation
342 )]
343 let threshold = (budget as f32 * self.soft_compaction_threshold) as usize;
344 if usize::try_from(current_tokens).unwrap_or(usize::MAX) > threshold {
345 // NOTE: the second tuple element (max_summary_tokens) is a placeholder
346 // on the Focus path — the auto-consolidation function ignores it and uses
347 // FocusConfig.max_knowledge_tokens / 2 instead.
348 Some((threshold, threshold / 4))
349 } else {
350 None
351 }
352 }
353 _ => None,
354 }
355 }
356}
357
358impl Default for ContextManager {
359 fn default() -> Self {
360 Self::new()
361 }
362}
363
364#[cfg(test)]
365mod tests {
366 use super::*;
367 use zeph_config::CompressionStrategy;
368
369 #[test]
370 fn new_defaults() {
371 let cm = ContextManager::new();
372 assert!(cm.budget.is_none());
373 assert!((cm.soft_compaction_threshold - 0.60).abs() < f32::EPSILON);
374 assert!((cm.hard_compaction_threshold - 0.90).abs() < f32::EPSILON);
375 assert_eq!(cm.compaction_preserve_tail, 6);
376 assert_eq!(cm.prune_protect_tokens, 40_000);
377 assert_eq!(cm.compaction, CompactionState::Ready);
378 }
379
380 #[test]
381 fn compaction_tier_no_budget() {
382 let cm = ContextManager::new();
383 assert_eq!(cm.compaction_tier(1_000_000), CompactionTier::None);
384 }
385
386 #[test]
387 fn compaction_tier_below_soft() {
388 let mut cm = ContextManager::new();
389 cm.budget = Some(ContextBudget::new(100_000, 0.1));
390 assert_eq!(cm.compaction_tier(50_000), CompactionTier::None);
391 }
392
393 #[test]
394 fn compaction_tier_between_soft_and_hard() {
395 let mut cm = ContextManager::new();
396 cm.budget = Some(ContextBudget::new(100_000, 0.1));
397 assert_eq!(cm.compaction_tier(75_000), CompactionTier::Soft);
398 }
399
400 #[test]
401 fn compaction_tier_above_hard() {
402 let mut cm = ContextManager::new();
403 cm.budget = Some(ContextBudget::new(100_000, 0.1));
404 assert_eq!(cm.compaction_tier(95_000), CompactionTier::Hard);
405 }
406
407 #[test]
408 fn proactive_compress_above_threshold_returns_params() {
409 let mut cm = ContextManager::new();
410 cm.compression.strategy = CompressionStrategy::Proactive {
411 threshold_tokens: 80_000,
412 max_summary_tokens: 4_000,
413 };
414 let result = cm.should_proactively_compress(90_000);
415 assert_eq!(result, Some((80_000, 4_000)));
416 }
417
418 #[test]
419 fn proactive_compress_blocked_if_compacted_this_turn() {
420 let mut cm = ContextManager::new();
421 cm.compression.strategy = CompressionStrategy::Proactive {
422 threshold_tokens: 80_000,
423 max_summary_tokens: 4_000,
424 };
425 cm.compaction = CompactionState::CompactedThisTurn { cooldown: 0 };
426 assert!(cm.should_proactively_compress(100_000).is_none());
427 }
428
429 #[test]
430 fn compaction_state_ready_is_not_compacted_this_turn() {
431 assert!(!CompactionState::Ready.is_compacted_this_turn());
432 }
433
434 #[test]
435 fn compaction_state_compacted_this_turn_flag() {
436 assert!(CompactionState::CompactedThisTurn { cooldown: 2 }.is_compacted_this_turn());
437 assert!(CompactionState::CompactedThisTurn { cooldown: 0 }.is_compacted_this_turn());
438 }
439
440 #[test]
441 fn compaction_state_cooling_is_not_compacted_this_turn() {
442 assert!(!CompactionState::Cooling { turns_remaining: 1 }.is_compacted_this_turn());
443 }
444
445 #[test]
446 fn advance_turn_compacted_with_cooldown_enters_cooling() {
447 let state = CompactionState::CompactedThisTurn { cooldown: 3 };
448 assert_eq!(
449 state.advance_turn(),
450 CompactionState::Cooling { turns_remaining: 3 }
451 );
452 }
453
454 #[test]
455 fn advance_turn_compacted_zero_cooldown_returns_ready() {
456 let state = CompactionState::CompactedThisTurn { cooldown: 0 };
457 assert_eq!(state.advance_turn(), CompactionState::Ready);
458 }
459
460 #[test]
461 fn should_proactively_compress_focus_fires_above_soft_threshold() {
462 let mut cm = ContextManager::new();
463 cm.budget = Some(ContextBudget::new(100_000, 0.1));
464 cm.compression.strategy = CompressionStrategy::Focus;
465 // Default soft threshold is 0.60 → 60_000 tokens.
466 // 75_000 > 60_000 → should fire.
467 let result = cm.should_proactively_compress(75_000);
468 assert!(result.is_some(), "Focus must fire above soft threshold");
469 let (threshold, _) = result.unwrap();
470 assert_eq!(threshold, 60_000);
471 }
472
473 #[test]
474 fn should_proactively_compress_focus_returns_none_below_threshold() {
475 let mut cm = ContextManager::new();
476 cm.budget = Some(ContextBudget::new(100_000, 0.1));
477 cm.compression.strategy = CompressionStrategy::Focus;
478 // 50_000 < 60_000 → should not fire.
479 assert!(cm.should_proactively_compress(50_000).is_none());
480 }
481
482 #[test]
483 fn should_proactively_compress_focus_returns_none_without_budget() {
484 let mut cm = ContextManager::new();
485 cm.compression.strategy = CompressionStrategy::Focus;
486 // No budget set → cannot compute threshold → None.
487 assert!(cm.should_proactively_compress(999_999).is_none());
488 }
489}