zeph_context/manager.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Context lifecycle state machine for the Zeph agent.
5//!
6//! [`ContextManager`] tracks per-session compaction state and token budgets.
7//! It decides when soft (pruning) or hard (LLM summarization) compaction should fire,
8//! and builds the memory router used for query-aware store selection.
9//!
10//! [`CompactionState`] is the core state machine — see its doc comment for the
11//! full transition map.
12
13use std::sync::Arc;
14
15use zeph_config::{CompressionConfig, StoreRoutingConfig};
16
17use crate::budget::ContextBudget;
18
19/// Lifecycle state of the compaction subsystem within a single session.
20///
21/// Replaces four independent boolean/u8 fields with an explicit state machine that makes
22/// invalid states unrepresentable (e.g., warned-without-exhausted).
23///
24/// # Transition map
25///
26/// ```text
27/// Ready
28/// → CompactedThisTurn { cooldown } when hard compaction succeeds (pruning or LLM)
29/// → CompactedThisTurn { cooldown: 0 } when focus truncation, eviction, or proactive
30/// compression fires (these callers do not want post-compaction cooldown)
31/// → Exhausted { warned: false } when compaction is counterproductive (too few messages,
32/// zero net freed tokens, or still above hard threshold after LLM compaction)
33///
34/// CompactedThisTurn { cooldown }
35/// → Cooling { turns_remaining: cooldown } when cooldown > 0 (via advance_turn)
36/// → Ready when cooldown == 0 (via advance_turn)
37///
38/// Cooling { turns_remaining }
39/// → Cooling { turns_remaining - 1 } decremented inside maybe_compact each turn
40/// → Ready when turns_remaining reaches 0
41/// NOTE: Exhausted is NOT reachable from Cooling — all exhaustion-setting sites in
42/// summarization.rs are guarded by an early-return when in_cooldown is true.
43///
44/// Exhausted { warned: false }
45/// → Exhausted { warned: true } after the user warning is sent (one-shot)
46///
47/// Exhausted { warned: true } (terminal — no further transitions)
48/// ```
49///
50/// `turns_since_last_hard_compaction` is a **metric counter**, not part of this state machine,
51/// and remains a separate field on `ContextManager`.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum CompactionState {
54 /// Normal state — compaction may fire if context exceeds thresholds.
55 Ready,
56 /// Hard compaction (or focus truncation / eviction / proactive compression) ran this turn.
57 /// No further compaction until `advance_turn()` is called at the next turn boundary.
58 /// `cooldown` carries the number of cooling turns to enforce after this turn ends.
59 CompactedThisTurn {
60 /// Cooling turns to enforce after this turn ends.
61 cooldown: u8,
62 },
63 /// Cooling down after a recent hard compaction. Hard tier is skipped; soft is still allowed.
64 /// Counter decrements inside `maybe_compact` each turn until it reaches 0.
65 Cooling {
66 /// Remaining cooling turns before returning to `Ready`.
67 turns_remaining: u8,
68 },
69 /// Compaction cannot reduce context further. No more attempts will be made.
70 /// `warned` tracks whether the one-shot user warning has been sent.
71 Exhausted {
72 /// Whether the user has already been notified of context exhaustion.
73 warned: bool,
74 },
75}
76
77impl CompactionState {
78 /// Whether hard compaction (or a compaction-equivalent operation) already ran this turn.
79 ///
80 /// When `true`, `maybe_compact`, `maybe_proactive_compress`, and
81 /// `maybe_soft_compact_mid_iteration` all skip execution (CRIT-03).
82 #[must_use]
83 pub fn is_compacted_this_turn(self) -> bool {
84 matches!(self, Self::CompactedThisTurn { .. })
85 }
86
87 /// Whether compaction is permanently disabled for this session.
88 #[must_use]
89 pub fn is_exhausted(self) -> bool {
90 matches!(self, Self::Exhausted { .. })
91 }
92
93 /// Remaining cooldown turns (0 when not in `Cooling` state).
94 #[must_use]
95 pub fn cooldown_remaining(self) -> u8 {
96 match self {
97 Self::Cooling { turns_remaining } => turns_remaining,
98 _ => 0,
99 }
100 }
101
102 /// Transition to the next-turn state at the start of each user turn.
103 ///
104 /// **Must be called exactly once per turn, before any compaction, eviction, or
105 /// focus truncation can run.** This guarantees that `is_compacted_this_turn()`
106 /// returns `false` when the sidequest check executes — preserving the invariant
107 /// that the sidequest only sees same-turn compaction set by eviction which runs
108 /// *after* this call.
109 ///
110 /// Transitions:
111 /// - `CompactedThisTurn { cooldown: 0 }` → `Ready`
112 /// - `CompactedThisTurn { cooldown: n }` → `Cooling { turns_remaining: n }`
113 /// - All other states are returned unchanged.
114 #[must_use]
115 pub fn advance_turn(self) -> Self {
116 match self {
117 Self::CompactedThisTurn { cooldown } if cooldown > 0 => Self::Cooling {
118 turns_remaining: cooldown,
119 },
120 Self::CompactedThisTurn { .. } => Self::Ready,
121 other => other,
122 }
123 }
124}
125
126/// Indicates which compaction tier applies for the current context size.
127#[derive(Debug, Clone, Copy, PartialEq, Eq)]
128pub enum CompactionTier {
129 /// Context is within budget — no compaction needed.
130 None,
131 /// Soft tier: prune tool outputs + apply deferred summaries. No LLM call.
132 Soft,
133 /// Hard tier: full LLM-based summarization.
134 Hard,
135}
136
137/// Per-session context lifecycle manager.
138///
139/// Holds the token budget, compaction lifecycle state, and routing configuration.
140/// Callers in `zeph-core` drive the state machine via `advance_turn`, `compaction_tier`,
141/// and related accessors; the assembler reads the budget via `build_router` and field access.
142pub struct ContextManager {
143 /// Token budget for this session. `None` until configured via `apply_budget_config`.
144 pub budget: Option<ContextBudget>,
145 /// Soft compaction threshold (default 0.70): prune tool outputs + apply deferred summaries.
146 pub soft_compaction_threshold: f32,
147 /// Hard compaction threshold (default 0.90): full LLM-based summarization.
148 pub hard_compaction_threshold: f32,
149 /// Number of recent messages preserved during hard compaction.
150 pub compaction_preserve_tail: usize,
151 /// Token count protected from pruning during soft compaction.
152 pub prune_protect_tokens: usize,
153 /// Compression configuration for proactive compression.
154 pub compression: CompressionConfig,
155 /// Routing configuration for query-aware memory routing.
156 pub routing: StoreRoutingConfig,
157 /// Resolved provider for LLM/hybrid routing. `None` when strategy is `Heuristic`
158 /// or when the named provider could not be resolved from the pool.
159 pub store_routing_provider: Option<Arc<zeph_llm::any::AnyProvider>>,
160 /// Compaction lifecycle state. Replaces four independent boolean/u8 fields to make
161 /// invalid states unrepresentable. See [`CompactionState`] for the full transition map.
162 pub compaction: CompactionState,
163 /// Number of cooling turns to enforce after a successful hard compaction.
164 pub compaction_cooldown_turns: u8,
165 /// Counts user-message turns since the last hard compaction event.
166 /// `None` = no hard compaction has occurred yet in this session.
167 /// `Some(n)` = n turns have elapsed since the last hard compaction.
168 pub turns_since_last_hard_compaction: Option<u64>,
169}
170
171impl ContextManager {
172 /// Create a new `ContextManager` with default thresholds and no budget.
173 #[must_use]
174 pub fn new() -> Self {
175 Self {
176 budget: None,
177 soft_compaction_threshold: 0.60,
178 hard_compaction_threshold: 0.90,
179 compaction_preserve_tail: 6,
180 prune_protect_tokens: 40_000,
181 compression: CompressionConfig::default(),
182 routing: StoreRoutingConfig::default(),
183 store_routing_provider: None,
184 compaction: CompactionState::Ready,
185 compaction_cooldown_turns: 2,
186 turns_since_last_hard_compaction: None,
187 }
188 }
189
190 /// Apply budget and compaction thresholds from config.
191 ///
192 /// Must be called once after config is resolved. Safe to call again when config reloads.
193 #[allow(clippy::too_many_arguments)]
194 pub fn apply_budget_config(
195 &mut self,
196 budget_tokens: usize,
197 reserve_ratio: f32,
198 hard_compaction_threshold: f32,
199 compaction_preserve_tail: usize,
200 prune_protect_tokens: usize,
201 soft_compaction_threshold: f32,
202 compaction_cooldown_turns: u8,
203 ) {
204 if budget_tokens == 0 {
205 tracing::warn!("context budget is 0 — agent will have no token tracking");
206 }
207 if budget_tokens > 0 {
208 self.budget = Some(ContextBudget::new(budget_tokens, reserve_ratio));
209 }
210 self.hard_compaction_threshold = hard_compaction_threshold;
211 self.compaction_preserve_tail = compaction_preserve_tail;
212 self.prune_protect_tokens = prune_protect_tokens;
213 self.soft_compaction_threshold = soft_compaction_threshold;
214 self.compaction_cooldown_turns = compaction_cooldown_turns;
215 }
216
217 /// Reset compaction state for a new conversation.
218 ///
219 /// Clears cooldown, exhaustion, and turn counters so the new conversation starts
220 /// with a clean compaction slate.
221 pub fn reset_compaction(&mut self) {
222 self.compaction = CompactionState::Ready;
223 self.turns_since_last_hard_compaction = None;
224 }
225
226 /// Determine which compaction tier applies for the given token count.
227 ///
228 /// - `Hard` when `cached_tokens > budget * hard_compaction_threshold`
229 /// - `Soft` when `cached_tokens > budget * soft_compaction_threshold`
230 /// - `None` otherwise (or when no budget is set)
231 #[allow(
232 clippy::cast_precision_loss,
233 clippy::cast_possible_truncation,
234 clippy::cast_sign_loss
235 )]
236 pub fn compaction_tier(&self, cached_tokens: u64) -> CompactionTier {
237 let Some(ref budget) = self.budget else {
238 return CompactionTier::None;
239 };
240 let used = usize::try_from(cached_tokens).unwrap_or(usize::MAX);
241 let max = budget.max_tokens();
242 let hard = (max as f32 * self.hard_compaction_threshold) as usize;
243 if used > hard {
244 tracing::debug!(
245 cached_tokens,
246 hard_threshold = hard,
247 "context budget check: Hard tier"
248 );
249 return CompactionTier::Hard;
250 }
251 let soft = (max as f32 * self.soft_compaction_threshold) as usize;
252 if used > soft {
253 tracing::debug!(
254 cached_tokens,
255 soft_threshold = soft,
256 "context budget check: Soft tier"
257 );
258 return CompactionTier::Soft;
259 }
260 tracing::debug!(
261 cached_tokens,
262 soft_threshold = soft,
263 "context budget check: None"
264 );
265 CompactionTier::None
266 }
267
268 /// Build a memory router from the current routing configuration.
269 ///
270 /// Returns a `Box<dyn AsyncMemoryRouter>` so callers can use `route_async()` for LLM-based
271 /// classification. `HeuristicRouter` implements `AsyncMemoryRouter` via a blanket impl that
272 /// delegates to the sync `route_with_confidence`.
273 pub fn build_router(&self) -> Box<dyn zeph_memory::AsyncMemoryRouter + Send + Sync> {
274 use zeph_config::StoreRoutingStrategy;
275 if !self.routing.enabled {
276 return Box::new(zeph_memory::HeuristicRouter);
277 }
278 let fallback = zeph_memory::router::parse_route_str(
279 &self.routing.fallback_route,
280 zeph_memory::MemoryRoute::Hybrid,
281 );
282 match self.routing.strategy {
283 StoreRoutingStrategy::Heuristic => Box::new(zeph_memory::HeuristicRouter),
284 StoreRoutingStrategy::Llm => {
285 let Some(provider) = self.store_routing_provider.clone() else {
286 tracing::warn!(
287 "store_routing: strategy=llm but no provider resolved; \
288 falling back to heuristic"
289 );
290 return Box::new(zeph_memory::HeuristicRouter);
291 };
292 Box::new(zeph_memory::LlmRouter::new(provider, fallback))
293 }
294 StoreRoutingStrategy::Hybrid => {
295 let Some(provider) = self.store_routing_provider.clone() else {
296 tracing::warn!(
297 "store_routing: strategy=hybrid but no provider resolved; \
298 falling back to heuristic"
299 );
300 return Box::new(zeph_memory::HeuristicRouter);
301 };
302 Box::new(zeph_memory::HybridRouter::new(
303 provider,
304 fallback,
305 self.routing.confidence_threshold,
306 ))
307 }
308 }
309 }
310
311 /// Check if proactive compression should fire for the current turn.
312 ///
313 /// Returns `Some((threshold_tokens, max_summary_tokens))` when proactive compression
314 /// should be triggered, `None` otherwise.
315 ///
316 /// Will return `None` if compaction already happened this turn (CRIT-03 fix).
317 #[must_use]
318 pub fn should_proactively_compress(&self, current_tokens: u64) -> Option<(usize, usize)> {
319 use zeph_config::CompressionStrategy;
320 if self.compaction.is_compacted_this_turn() {
321 return None;
322 }
323 match &self.compression.strategy {
324 CompressionStrategy::Proactive {
325 threshold_tokens,
326 max_summary_tokens,
327 } if usize::try_from(current_tokens).unwrap_or(usize::MAX) > *threshold_tokens => {
328 Some((*threshold_tokens, *max_summary_tokens))
329 }
330 _ => None,
331 }
332 }
333}
334
335impl Default for ContextManager {
336 fn default() -> Self {
337 Self::new()
338 }
339}
340
341#[cfg(test)]
342mod tests {
343 use super::*;
344 use zeph_config::CompressionStrategy;
345
346 #[test]
347 fn new_defaults() {
348 let cm = ContextManager::new();
349 assert!(cm.budget.is_none());
350 assert!((cm.soft_compaction_threshold - 0.60).abs() < f32::EPSILON);
351 assert!((cm.hard_compaction_threshold - 0.90).abs() < f32::EPSILON);
352 assert_eq!(cm.compaction_preserve_tail, 6);
353 assert_eq!(cm.prune_protect_tokens, 40_000);
354 assert_eq!(cm.compaction, CompactionState::Ready);
355 }
356
357 #[test]
358 fn compaction_tier_no_budget() {
359 let cm = ContextManager::new();
360 assert_eq!(cm.compaction_tier(1_000_000), CompactionTier::None);
361 }
362
363 #[test]
364 fn compaction_tier_below_soft() {
365 let mut cm = ContextManager::new();
366 cm.budget = Some(ContextBudget::new(100_000, 0.1));
367 assert_eq!(cm.compaction_tier(50_000), CompactionTier::None);
368 }
369
370 #[test]
371 fn compaction_tier_between_soft_and_hard() {
372 let mut cm = ContextManager::new();
373 cm.budget = Some(ContextBudget::new(100_000, 0.1));
374 assert_eq!(cm.compaction_tier(75_000), CompactionTier::Soft);
375 }
376
377 #[test]
378 fn compaction_tier_above_hard() {
379 let mut cm = ContextManager::new();
380 cm.budget = Some(ContextBudget::new(100_000, 0.1));
381 assert_eq!(cm.compaction_tier(95_000), CompactionTier::Hard);
382 }
383
384 #[test]
385 fn proactive_compress_above_threshold_returns_params() {
386 let mut cm = ContextManager::new();
387 cm.compression.strategy = CompressionStrategy::Proactive {
388 threshold_tokens: 80_000,
389 max_summary_tokens: 4_000,
390 };
391 let result = cm.should_proactively_compress(90_000);
392 assert_eq!(result, Some((80_000, 4_000)));
393 }
394
395 #[test]
396 fn proactive_compress_blocked_if_compacted_this_turn() {
397 let mut cm = ContextManager::new();
398 cm.compression.strategy = CompressionStrategy::Proactive {
399 threshold_tokens: 80_000,
400 max_summary_tokens: 4_000,
401 };
402 cm.compaction = CompactionState::CompactedThisTurn { cooldown: 0 };
403 assert!(cm.should_proactively_compress(100_000).is_none());
404 }
405
406 #[test]
407 fn compaction_state_ready_is_not_compacted_this_turn() {
408 assert!(!CompactionState::Ready.is_compacted_this_turn());
409 }
410
411 #[test]
412 fn compaction_state_compacted_this_turn_flag() {
413 assert!(CompactionState::CompactedThisTurn { cooldown: 2 }.is_compacted_this_turn());
414 assert!(CompactionState::CompactedThisTurn { cooldown: 0 }.is_compacted_this_turn());
415 }
416
417 #[test]
418 fn compaction_state_cooling_is_not_compacted_this_turn() {
419 assert!(!CompactionState::Cooling { turns_remaining: 1 }.is_compacted_this_turn());
420 }
421
422 #[test]
423 fn advance_turn_compacted_with_cooldown_enters_cooling() {
424 let state = CompactionState::CompactedThisTurn { cooldown: 3 };
425 assert_eq!(
426 state.advance_turn(),
427 CompactionState::Cooling { turns_remaining: 3 }
428 );
429 }
430
431 #[test]
432 fn advance_turn_compacted_zero_cooldown_returns_ready() {
433 let state = CompactionState::CompactedThisTurn { cooldown: 0 };
434 assert_eq!(state.advance_turn(), CompactionState::Ready);
435 }
436}