1use zeph_common::memory::TokenCounting;
12
13#[derive(Debug, Clone)]
17pub struct BudgetAllocation {
18 pub system_prompt: usize,
20 pub skills: usize,
22 pub summaries: usize,
24 pub semantic_recall: usize,
26 pub cross_session: usize,
28 pub code_context: usize,
30 pub graph_facts: usize,
32 pub recent_history: usize,
34 pub response_reserve: usize,
36 pub session_digest: usize,
39}
40
41impl BudgetAllocation {
42 #[must_use]
44 pub fn active_sources(&self) -> usize {
45 [
46 self.summaries,
47 self.semantic_recall,
48 self.cross_session,
49 self.code_context,
50 self.graph_facts,
51 ]
52 .iter()
53 .filter(|&&t| t > 0)
54 .count()
55 }
56}
57
58#[derive(Debug, Clone)]
64pub struct ContextBudget {
65 max_tokens: usize,
66 reserve_ratio: f32,
67 pub(crate) graph_enabled: bool,
69}
70
71impl ContextBudget {
72 #[must_use]
84 pub fn new(max_tokens: usize, reserve_ratio: f32) -> Self {
85 Self {
86 max_tokens,
87 reserve_ratio,
88 graph_enabled: false,
89 }
90 }
91
92 #[must_use]
97 pub fn with_graph_enabled(mut self, enabled: bool) -> Self {
98 self.graph_enabled = enabled;
99 self
100 }
101
102 #[must_use]
104 pub fn max_tokens(&self) -> usize {
105 self.max_tokens
106 }
107
108 #[must_use]
131 pub fn allocate(
132 &self,
133 system_prompt: &str,
134 skills_prompt: &str,
135 tc: &dyn TokenCounting,
136 graph_enabled: bool,
137 ) -> BudgetAllocation {
138 self.allocate_with_opts(system_prompt, skills_prompt, tc, graph_enabled, 0, false)
139 }
140
141 #[must_use]
149 #[allow(
150 clippy::cast_precision_loss,
151 clippy::cast_possible_truncation,
152 clippy::cast_sign_loss
153 )]
154 pub fn allocate_with_opts(
155 &self,
156 system_prompt: &str,
157 skills_prompt: &str,
158 tc: &dyn TokenCounting,
159 graph_enabled: bool,
160 digest_tokens: usize,
161 memory_first: bool,
162 ) -> BudgetAllocation {
163 if self.max_tokens == 0 {
164 return BudgetAllocation {
165 system_prompt: 0,
166 skills: 0,
167 summaries: 0,
168 semantic_recall: 0,
169 cross_session: 0,
170 code_context: 0,
171 graph_facts: 0,
172 recent_history: 0,
173 response_reserve: 0,
174 session_digest: 0,
175 };
176 }
177
178 let response_reserve = (self.max_tokens as f32 * self.reserve_ratio) as usize;
179 let mut available = self.max_tokens.saturating_sub(response_reserve);
180
181 let system_prompt_tokens = tc.count_tokens(system_prompt);
182 let skills_tokens = tc.count_tokens(skills_prompt);
183
184 available = available.saturating_sub(system_prompt_tokens + skills_tokens);
185
186 let session_digest = digest_tokens.min(available);
188 available = available.saturating_sub(session_digest);
189
190 let (summaries, semantic_recall, cross_session, code_context, graph_facts, recent_history) =
191 if memory_first {
192 if graph_enabled {
194 (
195 (available as f32 * 0.22) as usize,
196 (available as f32 * 0.22) as usize,
197 (available as f32 * 0.12) as usize,
198 (available as f32 * 0.38) as usize,
199 (available as f32 * 0.06) as usize,
200 0,
201 )
202 } else {
203 (
204 (available as f32 * 0.25) as usize,
205 (available as f32 * 0.25) as usize,
206 (available as f32 * 0.15) as usize,
207 (available as f32 * 0.35) as usize,
208 0,
209 0,
210 )
211 }
212 } else if graph_enabled {
213 (
215 (available as f32 * 0.07) as usize,
216 (available as f32 * 0.07) as usize,
217 (available as f32 * 0.03) as usize,
218 (available as f32 * 0.29) as usize,
219 (available as f32 * 0.04) as usize,
220 (available as f32 * 0.50) as usize,
221 )
222 } else {
223 (
224 (available as f32 * 0.08) as usize,
225 (available as f32 * 0.08) as usize,
226 (available as f32 * 0.04) as usize,
227 (available as f32 * 0.30) as usize,
228 0,
229 (available as f32 * 0.50) as usize,
230 )
231 };
232
233 BudgetAllocation {
234 system_prompt: system_prompt_tokens,
235 skills: skills_tokens,
236 summaries,
237 semantic_recall,
238 cross_session,
239 code_context,
240 graph_facts,
241 recent_history,
242 response_reserve,
243 session_digest,
244 }
245 }
246}
247
248#[cfg(test)]
249mod tests {
250 #![allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
251
252 use super::*;
253
254 struct NaiveTc;
255 impl TokenCounting for NaiveTc {
256 fn count_tokens(&self, text: &str) -> usize {
257 text.split_whitespace().count()
258 }
259 fn count_tool_schema_tokens(&self, schema: &serde_json::Value) -> usize {
260 schema.to_string().split_whitespace().count()
261 }
262 }
263
264 #[test]
265 fn context_budget_max_tokens_accessor() {
266 let budget = ContextBudget::new(1000, 0.2);
267 assert_eq!(budget.max_tokens(), 1000);
268 }
269
270 #[test]
271 fn budget_allocation_basic() {
272 let budget = ContextBudget::new(1000, 0.20);
273 let tc = NaiveTc;
274 let alloc = budget.allocate("system prompt", "skills prompt", &tc, false);
275 assert_eq!(alloc.response_reserve, 200);
276 assert!(alloc.system_prompt > 0);
277 assert!(alloc.skills > 0);
278 assert!(alloc.summaries > 0);
279 assert!(alloc.semantic_recall > 0);
280 assert!(alloc.recent_history > 0);
281 }
282
283 #[test]
284 fn budget_allocation_zero_disables() {
285 let tc = NaiveTc;
286 let budget = ContextBudget::new(0, 0.20);
287 let alloc = budget.allocate("test", "test", &tc, false);
288 assert_eq!(alloc.system_prompt, 0);
289 assert_eq!(alloc.skills, 0);
290 assert_eq!(alloc.summaries, 0);
291 assert_eq!(alloc.recent_history, 0);
292 }
293
294 #[test]
295 fn budget_allocation_graph_disabled_no_graph_facts() {
296 let tc = NaiveTc;
297 let budget = ContextBudget::new(10_000, 0.20);
298 let alloc = budget.allocate("", "", &tc, false);
299 assert_eq!(alloc.graph_facts, 0);
300 assert_eq!(alloc.summaries, (8_000_f32 * 0.08) as usize);
301 assert_eq!(alloc.semantic_recall, (8_000_f32 * 0.08) as usize);
302 }
303
304 #[test]
305 fn budget_allocation_graph_enabled_allocates_4_percent() {
306 let tc = NaiveTc;
307 let budget = ContextBudget::new(10_000, 0.20).with_graph_enabled(true);
308 let alloc = budget.allocate("", "", &tc, true);
309 assert!(alloc.graph_facts > 0);
310 assert_eq!(alloc.summaries, (8_000_f32 * 0.07) as usize);
311 assert_eq!(alloc.graph_facts, (8_000_f32 * 0.04) as usize);
312 }
313
314 #[test]
315 fn budget_allocation_memory_first_zeroes_history() {
316 let tc = NaiveTc;
317 let budget = ContextBudget::new(10_000, 0.20);
318 let alloc = budget.allocate_with_opts("", "", &tc, false, 0, true);
319 assert_eq!(alloc.recent_history, 0);
320 assert!(alloc.summaries > 0);
321 assert!(alloc.semantic_recall > 0);
322 }
323
324 #[test]
325 fn budget_allocation_memory_first_and_graph_enabled() {
326 let tc = NaiveTc;
327 let budget = ContextBudget::new(10_000, 0.20).with_graph_enabled(true);
329 let alloc = budget.allocate_with_opts("", "", &tc, true, 0, true);
330 let available = 8_000_f32;
331 assert_eq!(
332 alloc.recent_history, 0,
333 "memory_first must zero recent_history"
334 );
335 assert_eq!(alloc.summaries, (available * 0.22) as usize);
336 assert_eq!(alloc.semantic_recall, (available * 0.22) as usize);
337 assert_eq!(alloc.cross_session, (available * 0.12) as usize);
338 assert_eq!(alloc.code_context, (available * 0.38) as usize);
339 assert_eq!(alloc.graph_facts, (available * 0.06) as usize);
340 assert_eq!(alloc.response_reserve, 2_000);
341 }
342}