Skip to main content

deepstrike_core/context/
config.rs

1/// All compression and context management parameters expressed as fractions of
2/// `max_tokens`. This is the single control surface for the compression pipeline:
3/// changing `max_tokens` (e.g. switching model) rescales every derived limit
4/// automatically with no other configuration change required.
5///
6/// Invariant: snip < micro < collapse < auto < renewal (strictly increasing).
7#[derive(Debug, Clone)]
8pub struct ContextConfig {
9    // ── Pressure thresholds ─────────────────────────────────────────────────
10    pub snip_threshold: f64,
11    pub micro_threshold: f64,
12    pub collapse_threshold: f64,
13    pub auto_threshold: f64,
14    pub renewal_threshold: f64,
15
16    // ── Post-compression target ──────────────────────────────────────────────
17    /// Target rho after any compression pass. Must be < snip_threshold.
18    pub target_after_compress: f64,
19
20    // ── Per-compactor ratios ─────────────────────────────────────────────────
21    /// Fraction of max_tokens any single message may occupy after SnipCompact.
22    /// Messages smaller than this are never touched.
23    pub snip_per_msg_ratio: f64,
24
25    // ── Renewal ──────────────────────────────────────────────────────────────
26    /// Fraction of max_tokens worth of history tokens to carry across renewal.
27    /// Renewal stops carrying messages once this token budget is exhausted.
28    pub carryover_ratio: f64,
29
30    // ── Recovery / repair ────────────────────────────────────────────────────
31    /// Maximum fraction of max_tokens a recovery/replay payload may occupy.
32    pub recovery_content_ratio: f64,
33
34    /// Recent history messages always kept during render.
35    pub preserve_recent_msgs: usize,
36
37    /// Number of most-recent turns (user+assistant pairs) preserved by
38    /// CollapseCompactor and AutoCompactor. Each turn = 2 messages, so
39    /// the actual message count kept is `preserve_recent_turns * 2`.
40    /// Must be ≥ 1. Default: 2 (= 4 messages).
41    pub preserve_recent_turns: usize,
42
43    // ── Noise reduction ──────────────────────────────────────────────────────
44    /// Include the dashboard block in the rendered system context.
45    /// Defaults to false; enable only in explicit agent-os mode.
46    pub render_dashboard: bool,
47
48    /// Use verbose internal control notes (e.g. "[SYSTEM] Transaction rollback: …").
49    /// Defaults to false; uses concise natural-language notes instead.
50    pub verbose_control_notes: bool,
51
52    /// Collapse the *narration* text of OLD assistant turns (those past the
53    /// `preserve_recent_msgs` window that also carry tool calls) to a short stub at render time —
54    /// non-destructively (the full text stays in `partitions.history`). The model's user-facing
55    /// preamble ("好的,我来…先X") has no value once it has aged out of the recent window, but
56    /// re-feeding it verbatim every turn primes the model to keep emitting the same preamble (an
57    /// in-context repetition trap). Tool calls and pairing are untouched; current progress lives in
58    /// the TASK STATE turn. Defaults to true.
59    pub collapse_assistant_narration: bool,
60
61    // ── Layer 3: Time-based decay ───────────────────────────────────────
62
63    /// Minutes of inactivity before triggering Micro-Compact (Layer 3).
64    /// Defaults to 60 minutes — assumes Prompt Cache has expired by then.
65    pub micro_compact_idle_minutes: u32,
66
67    /// Number of recent tool results to preserve during Micro-Compact.
68    pub preserved_tool_results: usize,
69
70    // ── Layer 5: Auto-Compact buffer ─────────────────────────────────────
71
72    /// Buffer size for Auto-Compact trigger (Layer 5).
73    /// Trigger threshold = max_tokens - autocompact_buffer.
74    /// Defaults to 13K tokens (p99.99 of summarizer output length + safety margin).
75    pub autocompact_buffer: u32,
76
77    // ── Layer 1: Large-result spool ──────────────────────────────────────
78
79    /// Byte size above which a single tool result is spooled (Layer 1): the kernel
80    /// keeps only a preview in context and emits `LargeResultSpooled` for the SDK to
81    /// persist the full content to disk. Default: 50 KiB. `0` disables spooling.
82    pub spool_threshold_bytes: u32,
83
84    /// Preview byte budget kept in context when a tool result is spooled. Default: 2 KiB.
85    pub spool_preview_bytes: u32,
86}
87
88fn default_micro_compact_idle_minutes() -> u32 {
89    60
90}
91
92fn default_preserved_tool_results() -> usize {
93    5
94}
95
96fn default_autocompact_buffer() -> u32 {
97    13_000
98}
99
100impl Default for ContextConfig {
101    fn default() -> Self {
102        Self {
103            snip_threshold: 0.70,
104            micro_threshold: 0.80,
105            collapse_threshold: 0.90,
106            auto_threshold: 0.95,
107            renewal_threshold: 0.98,
108            target_after_compress: 0.65,
109            snip_per_msg_ratio: 0.05,
110            carryover_ratio: 0.05,
111            recovery_content_ratio: 0.25,
112            preserve_recent_msgs: 4,
113            preserve_recent_turns: 2,
114            render_dashboard: false,
115            verbose_control_notes: false,
116            collapse_assistant_narration: true,
117            micro_compact_idle_minutes: 60,
118            preserved_tool_results: 5,
119            autocompact_buffer: 13_000,
120            spool_threshold_bytes: 50 * 1024,
121            spool_preview_bytes: 2 * 1024,
122        }
123    }
124}
125
126impl ContextConfig {
127    /// Token budget to target after a compression pass.
128    pub fn target_tokens(&self, max_tokens: u32) -> u32 {
129        (max_tokens as f64 * self.target_after_compress) as u32
130    }
131
132    /// Per-message token cap used by SnipCompact.
133    /// Floor of 50 ensures very small context windows still get useful output.
134    pub fn snip_per_msg_tokens(&self, max_tokens: u32) -> u32 {
135        ((max_tokens as f64 * self.snip_per_msg_ratio) as u32).max(50)
136    }
137
138    /// Token budget for history carryover across renewal.
139    pub fn carryover_tokens(&self, max_tokens: u32) -> u32 {
140        ((max_tokens as f64 * self.carryover_ratio) as u32).max(100)
141    }
142
143    /// Token cap for a single recovery/replay payload.
144    pub fn recovery_content_tokens(&self, max_tokens: u32) -> u32 {
145        (max_tokens as f64 * self.recovery_content_ratio) as u32
146    }
147
148    /// Auto-Compact trigger threshold (Layer 5).
149    /// Returns `max_tokens - autocompact_buffer` (absolute value).
150    pub fn autocompact_threshold(&self, max_tokens: u32) -> u32 {
151        max_tokens.saturating_sub(self.autocompact_buffer)
152    }
153}
154
155#[cfg(test)]
156mod tests {
157    use super::*;
158
159    #[test]
160    fn noise_reduction_defaults_to_quiet() {
161        let c = ContextConfig::default();
162        assert!(!c.render_dashboard, "dashboard should be off by default");
163        assert!(!c.verbose_control_notes, "verbose notes should be off by default");
164    }
165
166    #[test]
167    fn default_thresholds_strictly_increasing() {
168        let c = ContextConfig::default();
169        assert!(c.snip_threshold < c.micro_threshold);
170        assert!(c.micro_threshold < c.collapse_threshold);
171        assert!(c.collapse_threshold < c.auto_threshold);
172        assert!(c.auto_threshold < c.renewal_threshold);
173    }
174
175    #[test]
176    fn target_after_compress_below_snip_threshold() {
177        let c = ContextConfig::default();
178        assert!(c.target_after_compress < c.snip_threshold);
179    }
180
181    #[test]
182    fn derived_limits_scale_with_max_tokens() {
183        let c = ContextConfig::default();
184        let small = 8_000u32;
185        let large = 200_000u32;
186        let ratio = c.snip_per_msg_tokens(large) as f64 / c.snip_per_msg_tokens(small) as f64;
187        assert!((ratio - 25.0).abs() < 1.0, "expected ~25×, got {ratio}");
188    }
189
190    #[test]
191    fn small_context_window_has_floor() {
192        let c = ContextConfig::default();
193        assert!(c.snip_per_msg_tokens(100) >= 50);
194        assert!(c.carryover_tokens(100) >= 100);
195    }
196}