zeph_context/manager.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Context lifecycle state machine for the Zeph agent.
5//!
6//! [`ContextManager`] tracks per-session compaction state and token budgets.
7//! It decides when soft (pruning) or hard (LLM summarization) compaction should fire,
8//! and builds the memory router used for query-aware store selection.
9//!
10//! [`CompactionState`] is the core state machine — see its doc comment for the
11//! full transition map.
12
13use std::sync::Arc;
14
15use zeph_config::{CompressionConfig, StoreRoutingConfig};
16
17use crate::budget::ContextBudget;
18
19/// Lifecycle state of the compaction subsystem within a single session.
20///
21/// Replaces four independent boolean/u8 fields with an explicit state machine that makes
22/// invalid states unrepresentable (e.g., warned-without-exhausted).
23///
24/// # Transition map
25///
26/// ```text
27/// Ready
28/// → CompactedThisTurn { cooldown } when hard compaction succeeds (pruning or LLM)
29/// → CompactedThisTurn { cooldown: 0 } when focus truncation, eviction, or proactive
30/// compression fires (these callers do not want post-compaction cooldown)
31/// → Exhausted { warned: false } when compaction is counterproductive (too few messages,
32/// zero net freed tokens, or still above hard threshold after LLM compaction)
33///
34/// CompactedThisTurn { cooldown }
35/// → Cooling { turns_remaining: cooldown } when cooldown > 0 (via advance_turn)
36/// → Ready when cooldown == 0 (via advance_turn)
37///
38/// Cooling { turns_remaining }
39/// → Cooling { turns_remaining - 1 } decremented inside maybe_compact each turn
40/// → Ready when turns_remaining reaches 0
41/// NOTE: Exhausted is NOT reachable from Cooling — all exhaustion-setting sites in
42/// summarization.rs are guarded by an early-return when in_cooldown is true.
43///
44/// Exhausted { warned: false }
45/// → Exhausted { warned: true } after the user warning is sent (one-shot)
46///
47/// Exhausted { warned: true } (terminal — no further transitions)
48/// ```
49///
50/// `turns_since_last_hard_compaction` is a **metric counter**, not part of this state machine,
51/// and remains a separate field on `ContextManager`.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum CompactionState {
54 /// Normal state — compaction may fire if context exceeds thresholds.
55 Ready,
56 /// Hard compaction (or focus truncation / eviction / proactive compression) ran this turn.
57 /// No further compaction until `advance_turn()` is called at the next turn boundary.
58 /// `cooldown` carries the number of cooling turns to enforce after this turn ends.
59 CompactedThisTurn {
60 /// Cooling turns to enforce after this turn ends.
61 cooldown: u8,
62 },
63 /// Cooling down after a recent hard compaction. Hard tier is skipped; soft is still allowed.
64 /// Counter decrements inside `maybe_compact` each turn until it reaches 0.
65 Cooling {
66 /// Remaining cooling turns before returning to `Ready`.
67 turns_remaining: u8,
68 },
69 /// Compaction cannot reduce context further. No more attempts will be made.
70 /// `warned` tracks whether the one-shot user warning has been sent.
71 Exhausted {
72 /// Whether the user has already been notified of context exhaustion.
73 warned: bool,
74 },
75}
76
77impl CompactionState {
78 /// Whether hard compaction (or a compaction-equivalent operation) already ran this turn.
79 ///
80 /// When `true`, `maybe_compact`, `maybe_proactive_compress`, and
81 /// `maybe_soft_compact_mid_iteration` all skip execution (CRIT-03).
82 #[must_use]
83 pub fn is_compacted_this_turn(self) -> bool {
84 matches!(self, Self::CompactedThisTurn { .. })
85 }
86
87 /// Whether compaction is permanently disabled for this session.
88 #[must_use]
89 pub fn is_exhausted(self) -> bool {
90 matches!(self, Self::Exhausted { .. })
91 }
92
93 /// Remaining cooldown turns (0 when not in `Cooling` state).
94 #[must_use]
95 pub fn cooldown_remaining(self) -> u8 {
96 match self {
97 Self::Cooling { turns_remaining } => turns_remaining,
98 _ => 0,
99 }
100 }
101
102 /// Transition to the next-turn state at the start of each user turn.
103 ///
104 /// **Must be called exactly once per turn, before any compaction, eviction, or
105 /// focus truncation can run.** This guarantees that `is_compacted_this_turn()`
106 /// returns `false` when the sidequest check executes — preserving the invariant
107 /// that the sidequest only sees same-turn compaction set by eviction which runs
108 /// *after* this call.
109 ///
110 /// Transitions:
111 /// - `CompactedThisTurn { cooldown: 0 }` → `Ready`
112 /// - `CompactedThisTurn { cooldown: n }` → `Cooling { turns_remaining: n }`
113 /// - All other states are returned unchanged.
114 #[must_use]
115 pub fn advance_turn(self) -> Self {
116 match self {
117 Self::CompactedThisTurn { cooldown } if cooldown > 0 => Self::Cooling {
118 turns_remaining: cooldown,
119 },
120 Self::CompactedThisTurn { .. } => Self::Ready,
121 other => other,
122 }
123 }
124}
125
126/// Indicates which compaction tier applies for the current context size.
127#[derive(Debug, Clone, Copy, PartialEq, Eq)]
128pub enum CompactionTier {
129 /// Context is within budget — no compaction needed.
130 None,
131 /// Soft tier: prune tool outputs + apply deferred summaries. No LLM call.
132 Soft,
133 /// Hard tier: full LLM-based summarization.
134 Hard,
135}
136
137/// Per-session context lifecycle manager.
138///
139/// Holds the token budget, compaction lifecycle state, and routing configuration.
140/// Callers in `zeph-core` drive the state machine via `advance_turn`, `compaction_tier`,
141/// and related accessors; the assembler reads the budget via `build_router` and field access.
142pub struct ContextManager {
143 /// Token budget for this session. `None` until configured via `apply_budget_config`.
144 pub budget: Option<ContextBudget>,
145 /// Soft compaction threshold (default 0.70): prune tool outputs + apply deferred summaries.
146 pub soft_compaction_threshold: f32,
147 /// Hard compaction threshold (default 0.90): full LLM-based summarization.
148 pub hard_compaction_threshold: f32,
149 /// Number of recent messages preserved during hard compaction.
150 pub compaction_preserve_tail: usize,
151 /// Token count protected from pruning during soft compaction.
152 pub prune_protect_tokens: usize,
153 /// Compression configuration for proactive compression.
154 pub compression: CompressionConfig,
155 /// Routing configuration for query-aware memory routing.
156 pub routing: StoreRoutingConfig,
157 /// Resolved provider for LLM/hybrid routing. `None` when strategy is `Heuristic`
158 /// or when the named provider could not be resolved from the pool.
159 pub store_routing_provider: Option<Arc<zeph_llm::any::AnyProvider>>,
160 /// Compaction lifecycle state. Replaces four independent boolean/u8 fields to make
161 /// invalid states unrepresentable. See [`CompactionState`] for the full transition map.
162 pub compaction: CompactionState,
163 /// Number of cooling turns to enforce after a successful hard compaction.
164 pub compaction_cooldown_turns: u8,
165 /// Counts user-message turns since the last hard compaction event.
166 /// `None` = no hard compaction has occurred yet in this session.
167 /// `Some(n)` = n turns have elapsed since the last hard compaction.
168 pub turns_since_last_hard_compaction: Option<u64>,
169}
170
171impl ContextManager {
172 /// Create a new `ContextManager` with default thresholds and no budget.
173 #[must_use]
174 pub fn new() -> Self {
175 Self {
176 budget: None,
177 soft_compaction_threshold: 0.60,
178 hard_compaction_threshold: 0.90,
179 compaction_preserve_tail: 6,
180 prune_protect_tokens: 40_000,
181 compression: CompressionConfig::default(),
182 routing: StoreRoutingConfig::default(),
183 store_routing_provider: None,
184 compaction: CompactionState::Ready,
185 compaction_cooldown_turns: 2,
186 turns_since_last_hard_compaction: None,
187 }
188 }
189
190 /// Apply budget and compaction thresholds from config.
191 ///
192 /// Must be called once after config is resolved. Safe to call again when config reloads.
193 #[allow(clippy::too_many_arguments)] // function with many required inputs; a *Params struct would be more verbose without simplifying the call site
194 pub fn apply_budget_config(
195 &mut self,
196 budget_tokens: usize,
197 reserve_ratio: f32,
198 hard_compaction_threshold: f32,
199 compaction_preserve_tail: usize,
200 prune_protect_tokens: usize,
201 soft_compaction_threshold: f32,
202 compaction_cooldown_turns: u8,
203 ) {
204 if budget_tokens == 0 {
205 tracing::warn!("context budget is 0 — agent will have no token tracking");
206 }
207 if budget_tokens > 0 {
208 self.budget = Some(ContextBudget::new(budget_tokens, reserve_ratio));
209 }
210 self.hard_compaction_threshold = hard_compaction_threshold;
211 self.compaction_preserve_tail = compaction_preserve_tail;
212 self.prune_protect_tokens = prune_protect_tokens;
213 self.soft_compaction_threshold = soft_compaction_threshold;
214 self.compaction_cooldown_turns = compaction_cooldown_turns;
215 }
216
217 /// Reset compaction state for a new conversation.
218 ///
219 /// Clears cooldown, exhaustion, and turn counters so the new conversation starts
220 /// with a clean compaction slate.
221 pub fn reset_compaction(&mut self) {
222 self.compaction = CompactionState::Ready;
223 self.turns_since_last_hard_compaction = None;
224 }
225
226 /// Determine which compaction tier applies for the given token count.
227 ///
228 /// - `Hard` when `cached_tokens > budget * hard_compaction_threshold`
229 /// - `Soft` when `cached_tokens > budget * soft_compaction_threshold`
230 /// - `None` otherwise (or when no budget is set)
231 #[allow(
232 clippy::cast_precision_loss,
233 clippy::cast_possible_truncation,
234 clippy::cast_sign_loss
235 )]
236 pub fn compaction_tier(&self, cached_tokens: u64) -> CompactionTier {
237 let Some(ref budget) = self.budget else {
238 return CompactionTier::None;
239 };
240 let used = usize::try_from(cached_tokens).unwrap_or(usize::MAX);
241 let max = budget.max_tokens();
242 let hard = (max as f32 * self.hard_compaction_threshold) as usize;
243 if used > hard {
244 tracing::debug!(
245 cached_tokens,
246 hard_threshold = hard,
247 "context budget check: Hard tier"
248 );
249 return CompactionTier::Hard;
250 }
251 let soft = (max as f32 * self.soft_compaction_threshold) as usize;
252 if used > soft {
253 tracing::debug!(
254 cached_tokens,
255 soft_threshold = soft,
256 "context budget check: Soft tier"
257 );
258 return CompactionTier::Soft;
259 }
260 tracing::debug!(
261 cached_tokens,
262 soft_threshold = soft,
263 "context budget check: None"
264 );
265 CompactionTier::None
266 }
267
268 /// Check if proactive compression should fire for the current turn.
269 ///
270 /// Returns `Some((threshold_tokens, max_summary_tokens))` when proactive compression
271 /// should be triggered, `None` otherwise.
272 ///
273 /// For `CompressionStrategy::Focus`, the threshold is the soft-compaction fraction
274 /// of the budget (same gate used by mid-iteration soft compaction). The
275 /// `max_summary_tokens` element is unused on the Focus path — the auto-consolidation
276 /// function uses `FocusConfig.max_knowledge_tokens / 2` instead.
277 ///
278 /// Will return `None` if compaction already happened this turn (CRIT-03 fix).
279 #[must_use]
280 pub fn should_proactively_compress(&self, current_tokens: u64) -> Option<(usize, usize)> {
281 use zeph_config::CompressionStrategy;
282 if self.compaction.is_compacted_this_turn() {
283 return None;
284 }
285 match &self.compression.strategy {
286 CompressionStrategy::Proactive {
287 threshold_tokens,
288 max_summary_tokens,
289 } if usize::try_from(current_tokens).unwrap_or(usize::MAX) > *threshold_tokens => {
290 Some((*threshold_tokens, *max_summary_tokens))
291 }
292 CompressionStrategy::Focus => {
293 // Focus fires at the soft-compaction threshold (same as tier machinery).
294 let budget = self.budget.as_ref()?.max_tokens();
295 #[allow(
296 clippy::cast_precision_loss,
297 clippy::cast_sign_loss,
298 clippy::cast_possible_truncation
299 )]
300 let threshold = (budget as f32 * self.soft_compaction_threshold) as usize;
301 if usize::try_from(current_tokens).unwrap_or(usize::MAX) > threshold {
302 // NOTE: the second tuple element (max_summary_tokens) is a placeholder
303 // on the Focus path — the auto-consolidation function ignores it and uses
304 // FocusConfig.max_knowledge_tokens / 2 instead.
305 Some((threshold, threshold / 4))
306 } else {
307 None
308 }
309 }
310 _ => None,
311 }
312 }
313}
314
315impl Default for ContextManager {
316 fn default() -> Self {
317 Self::new()
318 }
319}
320
321#[cfg(test)]
322mod tests {
323 use super::*;
324 use zeph_config::CompressionStrategy;
325
326 #[test]
327 fn new_defaults() {
328 let cm = ContextManager::new();
329 assert!(cm.budget.is_none());
330 assert!((cm.soft_compaction_threshold - 0.60).abs() < f32::EPSILON);
331 assert!((cm.hard_compaction_threshold - 0.90).abs() < f32::EPSILON);
332 assert_eq!(cm.compaction_preserve_tail, 6);
333 assert_eq!(cm.prune_protect_tokens, 40_000);
334 assert_eq!(cm.compaction, CompactionState::Ready);
335 }
336
337 #[test]
338 fn compaction_tier_no_budget() {
339 let cm = ContextManager::new();
340 assert_eq!(cm.compaction_tier(1_000_000), CompactionTier::None);
341 }
342
343 #[test]
344 fn compaction_tier_below_soft() {
345 let mut cm = ContextManager::new();
346 cm.budget = Some(ContextBudget::new(100_000, 0.1));
347 assert_eq!(cm.compaction_tier(50_000), CompactionTier::None);
348 }
349
350 #[test]
351 fn compaction_tier_between_soft_and_hard() {
352 let mut cm = ContextManager::new();
353 cm.budget = Some(ContextBudget::new(100_000, 0.1));
354 assert_eq!(cm.compaction_tier(75_000), CompactionTier::Soft);
355 }
356
357 #[test]
358 fn compaction_tier_above_hard() {
359 let mut cm = ContextManager::new();
360 cm.budget = Some(ContextBudget::new(100_000, 0.1));
361 assert_eq!(cm.compaction_tier(95_000), CompactionTier::Hard);
362 }
363
364 #[test]
365 fn proactive_compress_above_threshold_returns_params() {
366 let mut cm = ContextManager::new();
367 cm.compression.strategy = CompressionStrategy::Proactive {
368 threshold_tokens: 80_000,
369 max_summary_tokens: 4_000,
370 };
371 let result = cm.should_proactively_compress(90_000);
372 assert_eq!(result, Some((80_000, 4_000)));
373 }
374
375 #[test]
376 fn proactive_compress_blocked_if_compacted_this_turn() {
377 let mut cm = ContextManager::new();
378 cm.compression.strategy = CompressionStrategy::Proactive {
379 threshold_tokens: 80_000,
380 max_summary_tokens: 4_000,
381 };
382 cm.compaction = CompactionState::CompactedThisTurn { cooldown: 0 };
383 assert!(cm.should_proactively_compress(100_000).is_none());
384 }
385
386 #[test]
387 fn compaction_state_ready_is_not_compacted_this_turn() {
388 assert!(!CompactionState::Ready.is_compacted_this_turn());
389 }
390
391 #[test]
392 fn compaction_state_compacted_this_turn_flag() {
393 assert!(CompactionState::CompactedThisTurn { cooldown: 2 }.is_compacted_this_turn());
394 assert!(CompactionState::CompactedThisTurn { cooldown: 0 }.is_compacted_this_turn());
395 }
396
397 #[test]
398 fn compaction_state_cooling_is_not_compacted_this_turn() {
399 assert!(!CompactionState::Cooling { turns_remaining: 1 }.is_compacted_this_turn());
400 }
401
402 #[test]
403 fn advance_turn_compacted_with_cooldown_enters_cooling() {
404 let state = CompactionState::CompactedThisTurn { cooldown: 3 };
405 assert_eq!(
406 state.advance_turn(),
407 CompactionState::Cooling { turns_remaining: 3 }
408 );
409 }
410
411 #[test]
412 fn advance_turn_compacted_zero_cooldown_returns_ready() {
413 let state = CompactionState::CompactedThisTurn { cooldown: 0 };
414 assert_eq!(state.advance_turn(), CompactionState::Ready);
415 }
416
417 #[test]
418 fn should_proactively_compress_focus_fires_above_soft_threshold() {
419 let mut cm = ContextManager::new();
420 cm.budget = Some(ContextBudget::new(100_000, 0.1));
421 cm.compression.strategy = CompressionStrategy::Focus;
422 // Default soft threshold is 0.60 → 60_000 tokens.
423 // 75_000 > 60_000 → should fire.
424 let result = cm.should_proactively_compress(75_000);
425 assert!(result.is_some(), "Focus must fire above soft threshold");
426 let (threshold, _) = result.unwrap();
427 assert_eq!(threshold, 60_000);
428 }
429
430 #[test]
431 fn should_proactively_compress_focus_returns_none_below_threshold() {
432 let mut cm = ContextManager::new();
433 cm.budget = Some(ContextBudget::new(100_000, 0.1));
434 cm.compression.strategy = CompressionStrategy::Focus;
435 // 50_000 < 60_000 → should not fire.
436 assert!(cm.should_proactively_compress(50_000).is_none());
437 }
438
439 #[test]
440 fn should_proactively_compress_focus_returns_none_without_budget() {
441 let mut cm = ContextManager::new();
442 cm.compression.strategy = CompressionStrategy::Focus;
443 // No budget set → cannot compute threshold → None.
444 assert!(cm.should_proactively_compress(999_999).is_none());
445 }
446}