Skip to main content

deepstrike_core/context/
pressure.rs

1use super::config::ContextConfig;
2use super::partitions::ContextPartitions;
3use super::token_engine::ContextTokenEngine;
4
5/// Action recommended by the pressure monitor — the *pressure-level* vocabulary.
6///
7/// This is distinct from [`crate::mm::EvictionOp`] (the *planner-op* vocabulary) and the two must
8/// **not** be collapsed: `PressureAction` is what [`PressureMonitor::recommend`] /
9/// `ContextManager::should_compress` return, the `Ord`-keyed cascade selector inside the compression
10/// pipeline, and the canonical compaction wire label. `EvictionOp` is what `plan_eviction` emits, and
11/// carries per-op data this enum has no place for. The single bridge between the two layers is
12/// `LoopStateMachine::execute_eviction_op`. See the layer-boundary note on [`crate::mm::EvictionOp`].
13#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
14pub enum PressureAction {
15    None,
16    SnipCompact,
17    MicroCompact,
18    ContextCollapse,
19    AutoCompact,
20}
21
22impl PressureAction {
23    /// Canonical snake_case label — the single source for compaction-event action strings and the
24    /// `[Compressed: {label}]` summary marker (used by both the summarizer and the compaction log).
25    pub fn label(self) -> &'static str {
26        match self {
27            PressureAction::None => "none",
28            PressureAction::SnipCompact => "snip_compact",
29            PressureAction::MicroCompact => "micro_compact",
30            PressureAction::ContextCollapse => "context_collapse",
31            PressureAction::AutoCompact => "auto_compact",
32        }
33    }
34}
35
36/// Monitors rho = used_tokens / max_tokens and recommends compression actions.
37/// All thresholds come from `ContextConfig` — no hardcoded constants.
38pub struct PressureMonitor {
39    max_tokens: u32,
40    config: ContextConfig,
41}
42
43impl PressureMonitor {
44    pub fn new(max_tokens: u32, config: ContextConfig) -> Self {
45        Self { max_tokens, config }
46    }
47
48    pub fn max_tokens(&self) -> u32 {
49        self.max_tokens
50    }
51
52    /// Current pressure rho ∈ [0, +∞).
53    /// Uses provider-reported prompt tokens when available; otherwise estimates from partitions.
54    ///
55    /// This is the **raw** rho (full partition weight). Making rho paging-aware — i.e. subtracting
56    /// non-resident (`Collapsed`/`SpooledOut`/`PagedOut`) handle tokens so paging immediately relieves
57    /// pressure — is **not** a drop-in here: [`crate::context::manager::ContextManager::recompute_handle_residency`]
58    /// decides the Resident↔Collapsed projection from this very rho, so subtracting collapsed tokens
59    /// would drop rho below `collapse_threshold` and immediately un-collapse (oscillation). That needs
60    /// a deliberate split into *raw* rho (drives the collapse decision) vs *effective* rho (drives
61    /// further compaction/renewal), tracked as remaining W1-1 design work — see `ContextManager::rho`.
62    pub fn pressure(
63        &self,
64        partitions: &ContextPartitions,
65        engine: &ContextTokenEngine,
66        observed_prompt_tokens: Option<u32>,
67    ) -> f64 {
68        if self.max_tokens == 0 {
69            return 0.0;
70        }
71        match observed_prompt_tokens {
72            Some(tokens) => tokens as f64 / self.max_tokens as f64,
73            None => partitions.total_tokens(engine) as f64 / self.max_tokens as f64,
74        }
75    }
76
77    pub fn recommend(&self, rho: f64) -> PressureAction {
78        if rho > self.config.auto_threshold {
79            PressureAction::AutoCompact
80        } else if rho > self.config.collapse_threshold {
81            PressureAction::ContextCollapse
82        } else if rho > self.config.micro_threshold {
83            PressureAction::MicroCompact
84        } else if rho > self.config.snip_threshold {
85            PressureAction::SnipCompact
86        } else {
87            PressureAction::None
88        }
89    }
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95    use crate::context::config::ContextConfig;
96    use crate::context::partitions::ContextPartitions;
97    use crate::context::token_engine::ContextTokenEngine;
98    use crate::types::message::Message;
99
100    fn engine() -> ContextTokenEngine {
101        ContextTokenEngine::char_approx()
102    }
103    fn config() -> ContextConfig {
104        ContextConfig::default()
105    }
106
107    #[test]
108    fn thresholds_follow_config() {
109        let cfg = config();
110        let monitor = PressureMonitor::new(100, cfg.clone());
111        assert_eq!(monitor.recommend(0.50), PressureAction::None);
112        assert_eq!(
113            monitor.recommend(cfg.snip_threshold + 0.01),
114            PressureAction::SnipCompact
115        );
116        assert_eq!(
117            monitor.recommend(cfg.micro_threshold + 0.01),
118            PressureAction::MicroCompact
119        );
120        assert_eq!(
121            monitor.recommend(cfg.collapse_threshold + 0.01),
122            PressureAction::ContextCollapse
123        );
124        assert_eq!(
125            monitor.recommend(cfg.auto_threshold + 0.01),
126            PressureAction::AutoCompact
127        );
128    }
129
130    #[test]
131    fn custom_thresholds_respected() {
132        let cfg = ContextConfig {
133            snip_threshold: 0.50,
134            ..Default::default()
135        };
136        let monitor = PressureMonitor::new(100, cfg);
137        assert_eq!(monitor.recommend(0.51), PressureAction::SnipCompact);
138        assert_eq!(monitor.recommend(0.49), PressureAction::None);
139    }
140
141    #[test]
142    fn pressure_calculation_uses_engine() {
143        let cfg = config();
144        let monitor = PressureMonitor::new(1_000, cfg.clone());
145        let mut ctx = ContextPartitions::new(&cfg);
146        let baseline = ctx.total_tokens(&engine()) as f64;
147        ctx.history.push(Message::user("test"), 500);
148        let rho = monitor.pressure(&ctx, &engine(), None);
149        let expected = (baseline + 500.0) / 1_000.0;
150        assert!((rho - expected).abs() < f64::EPSILON);
151    }
152}