deepstrike_core/context/config.rs
1/// All compression and context management parameters expressed as fractions of
2/// `max_tokens`. This is the single control surface for the compression pipeline:
3/// changing `max_tokens` (e.g. switching model) rescales every derived limit
4/// automatically with no other configuration change required.
5///
6/// Invariant: snip < micro < collapse < auto < renewal (strictly increasing).
7#[derive(Debug, Clone)]
8pub struct ContextConfig {
9 // ── Pressure thresholds ─────────────────────────────────────────────────
10 pub snip_threshold: f64,
11 pub micro_threshold: f64,
12 pub collapse_threshold: f64,
13 pub auto_threshold: f64,
14 pub renewal_threshold: f64,
15
16 // ── Post-compression target ──────────────────────────────────────────────
17 /// Target rho after any compression pass. Must be < snip_threshold.
18 pub target_after_compress: f64,
19
20 // ── Per-compactor ratios ─────────────────────────────────────────────────
21 /// Fraction of max_tokens any single message may occupy after SnipCompact.
22 /// Messages smaller than this are never touched.
23 pub snip_per_msg_ratio: f64,
24
25 // ── Renewal ──────────────────────────────────────────────────────────────
26 /// Fraction of max_tokens worth of history tokens to carry across renewal.
27 /// Renewal stops carrying messages once this token budget is exhausted.
28 pub carryover_ratio: f64,
29
30 // ── Recovery / repair ────────────────────────────────────────────────────
31 /// Maximum fraction of max_tokens a recovery/replay payload may occupy.
32 pub recovery_content_ratio: f64,
33
34 /// Recent history messages always kept during render.
35 pub preserve_recent_msgs: usize,
36
37 /// Number of most-recent turns (user+assistant pairs) preserved by
38 /// CollapseCompactor and AutoCompactor. Each turn = 2 messages, so
39 /// the actual message count kept is `preserve_recent_turns * 2`.
40 /// Must be ≥ 1. Default: 2 (= 4 messages).
41 pub preserve_recent_turns: usize,
42
43 // ── Noise reduction ──────────────────────────────────────────────────────
44 /// Include the dashboard block in the rendered system context.
45 /// Defaults to false; enable only in explicit agent-os mode.
46 pub render_dashboard: bool,
47
48 /// Use verbose internal control notes (e.g. "[SYSTEM] Transaction rollback: …").
49 /// Defaults to false; uses concise natural-language notes instead.
50 pub verbose_control_notes: bool,
51
52 /// Collapse the *narration* text of OLD assistant turns (those past the
53 /// `preserve_recent_msgs` window that also carry tool calls) to a short stub at render time —
54 /// non-destructively (the full text stays in `partitions.history`). The model's user-facing
55 /// preamble ("好的,我来…先X") has no value once it has aged out of the recent window, but
56 /// re-feeding it verbatim every turn primes the model to keep emitting the same preamble (an
57 /// in-context repetition trap). Tool calls and pairing are untouched; current progress lives in
58 /// the TASK STATE turn. Defaults to true.
59 pub collapse_assistant_narration: bool,
60
61 // ── Layer 3: Time-based decay ───────────────────────────────────────
62
63 /// Minutes of inactivity before triggering Micro-Compact (Layer 3).
64 /// Defaults to 60 minutes — assumes Prompt Cache has expired by then.
65 pub micro_compact_idle_minutes: u32,
66
67 /// Number of recent tool results to preserve during Micro-Compact.
68 pub preserved_tool_results: usize,
69
70 // ── Layer 5: Auto-Compact buffer ─────────────────────────────────────
71
72 /// Buffer size for Auto-Compact trigger (Layer 5).
73 /// Trigger threshold = max_tokens - autocompact_buffer.
74 /// Defaults to 13K tokens (p99.99 of summarizer output length + safety margin).
75 pub autocompact_buffer: u32,
76
77 // ── Layer 1: Large-result spool ──────────────────────────────────────
78
79 /// Byte size above which a single tool result is spooled (Layer 1): the kernel
80 /// keeps only a preview in context and emits `LargeResultSpooled` for the SDK to
81 /// persist the full content to disk. Default: 50 KiB. `0` disables spooling.
82 pub spool_threshold_bytes: u32,
83
84 /// Preview byte budget kept in context when a tool result is spooled. Default: 2 KiB.
85 pub spool_preview_bytes: u32,
86}
87
88fn default_micro_compact_idle_minutes() -> u32 {
89 60
90}
91
92fn default_preserved_tool_results() -> usize {
93 5
94}
95
96fn default_autocompact_buffer() -> u32 {
97 13_000
98}
99
100impl Default for ContextConfig {
101 fn default() -> Self {
102 Self {
103 snip_threshold: 0.70,
104 micro_threshold: 0.80,
105 collapse_threshold: 0.90,
106 auto_threshold: 0.95,
107 renewal_threshold: 0.98,
108 target_after_compress: 0.65,
109 snip_per_msg_ratio: 0.05,
110 carryover_ratio: 0.05,
111 recovery_content_ratio: 0.25,
112 preserve_recent_msgs: 4,
113 preserve_recent_turns: 2,
114 render_dashboard: false,
115 verbose_control_notes: false,
116 collapse_assistant_narration: true,
117 micro_compact_idle_minutes: 60,
118 preserved_tool_results: 5,
119 autocompact_buffer: 13_000,
120 spool_threshold_bytes: 50 * 1024,
121 spool_preview_bytes: 2 * 1024,
122 }
123 }
124}
125
126impl ContextConfig {
127 /// Token budget to target after a compression pass.
128 pub fn target_tokens(&self, max_tokens: u32) -> u32 {
129 (max_tokens as f64 * self.target_after_compress) as u32
130 }
131
132 /// Per-message token cap used by SnipCompact.
133 /// Floor of 50 ensures very small context windows still get useful output.
134 pub fn snip_per_msg_tokens(&self, max_tokens: u32) -> u32 {
135 ((max_tokens as f64 * self.snip_per_msg_ratio) as u32).max(50)
136 }
137
138 /// Token budget for history carryover across renewal.
139 pub fn carryover_tokens(&self, max_tokens: u32) -> u32 {
140 ((max_tokens as f64 * self.carryover_ratio) as u32).max(100)
141 }
142
143 /// Token cap for a single recovery/replay payload.
144 pub fn recovery_content_tokens(&self, max_tokens: u32) -> u32 {
145 (max_tokens as f64 * self.recovery_content_ratio) as u32
146 }
147
148 /// Auto-Compact trigger threshold (Layer 5).
149 /// Returns `max_tokens - autocompact_buffer` (absolute value).
150 pub fn autocompact_threshold(&self, max_tokens: u32) -> u32 {
151 max_tokens.saturating_sub(self.autocompact_buffer)
152 }
153}
154
155#[cfg(test)]
156mod tests {
157 use super::*;
158
159 #[test]
160 fn noise_reduction_defaults_to_quiet() {
161 let c = ContextConfig::default();
162 assert!(!c.render_dashboard, "dashboard should be off by default");
163 assert!(!c.verbose_control_notes, "verbose notes should be off by default");
164 }
165
166 #[test]
167 fn default_thresholds_strictly_increasing() {
168 let c = ContextConfig::default();
169 assert!(c.snip_threshold < c.micro_threshold);
170 assert!(c.micro_threshold < c.collapse_threshold);
171 assert!(c.collapse_threshold < c.auto_threshold);
172 assert!(c.auto_threshold < c.renewal_threshold);
173 }
174
175 #[test]
176 fn target_after_compress_below_snip_threshold() {
177 let c = ContextConfig::default();
178 assert!(c.target_after_compress < c.snip_threshold);
179 }
180
181 #[test]
182 fn derived_limits_scale_with_max_tokens() {
183 let c = ContextConfig::default();
184 let small = 8_000u32;
185 let large = 200_000u32;
186 let ratio = c.snip_per_msg_tokens(large) as f64 / c.snip_per_msg_tokens(small) as f64;
187 assert!((ratio - 25.0).abs() < 1.0, "expected ~25×, got {ratio}");
188 }
189
190 #[test]
191 fn small_context_window_has_floor() {
192 let c = ContextConfig::default();
193 assert!(c.snip_per_msg_tokens(100) >= 50);
194 assert!(c.carryover_tokens(100) >= 100);
195 }
196}