1use zeph_memory::TokenCounter;
12
13#[derive(Debug, Clone)]
17pub struct BudgetAllocation {
18 pub system_prompt: usize,
20 pub skills: usize,
22 pub summaries: usize,
24 pub semantic_recall: usize,
26 pub cross_session: usize,
28 pub code_context: usize,
30 pub graph_facts: usize,
32 pub recent_history: usize,
34 pub response_reserve: usize,
36 pub session_digest: usize,
39}
40
41impl BudgetAllocation {
42 #[must_use]
44 pub fn active_sources(&self) -> usize {
45 [
46 self.summaries,
47 self.semantic_recall,
48 self.cross_session,
49 self.code_context,
50 self.graph_facts,
51 ]
52 .iter()
53 .filter(|&&t| t > 0)
54 .count()
55 }
56}
57
58#[derive(Debug, Clone)]
64pub struct ContextBudget {
65 max_tokens: usize,
66 reserve_ratio: f32,
67 pub(crate) graph_enabled: bool,
69}
70
71impl ContextBudget {
72 #[must_use]
84 pub fn new(max_tokens: usize, reserve_ratio: f32) -> Self {
85 Self {
86 max_tokens,
87 reserve_ratio,
88 graph_enabled: false,
89 }
90 }
91
92 #[must_use]
97 pub fn with_graph_enabled(mut self, enabled: bool) -> Self {
98 self.graph_enabled = enabled;
99 self
100 }
101
102 #[must_use]
104 pub fn max_tokens(&self) -> usize {
105 self.max_tokens
106 }
107
108 #[must_use]
124 pub fn allocate(
125 &self,
126 system_prompt: &str,
127 skills_prompt: &str,
128 tc: &TokenCounter,
129 graph_enabled: bool,
130 ) -> BudgetAllocation {
131 self.allocate_with_opts(system_prompt, skills_prompt, tc, graph_enabled, 0, false)
132 }
133
134 #[must_use]
142 #[allow(
143 clippy::cast_precision_loss,
144 clippy::cast_possible_truncation,
145 clippy::cast_sign_loss
146 )]
147 pub fn allocate_with_opts(
148 &self,
149 system_prompt: &str,
150 skills_prompt: &str,
151 tc: &TokenCounter,
152 graph_enabled: bool,
153 digest_tokens: usize,
154 memory_first: bool,
155 ) -> BudgetAllocation {
156 if self.max_tokens == 0 {
157 return BudgetAllocation {
158 system_prompt: 0,
159 skills: 0,
160 summaries: 0,
161 semantic_recall: 0,
162 cross_session: 0,
163 code_context: 0,
164 graph_facts: 0,
165 recent_history: 0,
166 response_reserve: 0,
167 session_digest: 0,
168 };
169 }
170
171 let response_reserve = (self.max_tokens as f32 * self.reserve_ratio) as usize;
172 let mut available = self.max_tokens.saturating_sub(response_reserve);
173
174 let system_prompt_tokens = tc.count_tokens(system_prompt);
175 let skills_tokens = tc.count_tokens(skills_prompt);
176
177 available = available.saturating_sub(system_prompt_tokens + skills_tokens);
178
179 let session_digest = digest_tokens.min(available);
181 available = available.saturating_sub(session_digest);
182
183 let (summaries, semantic_recall, cross_session, code_context, graph_facts, recent_history) =
184 if memory_first {
185 if graph_enabled {
187 (
188 (available as f32 * 0.22) as usize,
189 (available as f32 * 0.22) as usize,
190 (available as f32 * 0.12) as usize,
191 (available as f32 * 0.38) as usize,
192 (available as f32 * 0.06) as usize,
193 0,
194 )
195 } else {
196 (
197 (available as f32 * 0.25) as usize,
198 (available as f32 * 0.25) as usize,
199 (available as f32 * 0.15) as usize,
200 (available as f32 * 0.35) as usize,
201 0,
202 0,
203 )
204 }
205 } else if graph_enabled {
206 (
208 (available as f32 * 0.07) as usize,
209 (available as f32 * 0.07) as usize,
210 (available as f32 * 0.03) as usize,
211 (available as f32 * 0.29) as usize,
212 (available as f32 * 0.04) as usize,
213 (available as f32 * 0.50) as usize,
214 )
215 } else {
216 (
217 (available as f32 * 0.08) as usize,
218 (available as f32 * 0.08) as usize,
219 (available as f32 * 0.04) as usize,
220 (available as f32 * 0.30) as usize,
221 0,
222 (available as f32 * 0.50) as usize,
223 )
224 };
225
226 BudgetAllocation {
227 system_prompt: system_prompt_tokens,
228 skills: skills_tokens,
229 summaries,
230 semantic_recall,
231 cross_session,
232 code_context,
233 graph_facts,
234 recent_history,
235 response_reserve,
236 session_digest,
237 }
238 }
239}
240
241#[cfg(test)]
242mod tests {
243 #![allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
244
245 use super::*;
246
247 #[test]
248 fn context_budget_max_tokens_accessor() {
249 let budget = ContextBudget::new(1000, 0.2);
250 assert_eq!(budget.max_tokens(), 1000);
251 }
252
253 #[test]
254 fn budget_allocation_basic() {
255 let budget = ContextBudget::new(1000, 0.20);
256 let tc = TokenCounter::new();
257 let alloc = budget.allocate("system prompt", "skills prompt", &tc, false);
258 assert_eq!(alloc.response_reserve, 200);
259 assert!(alloc.system_prompt > 0);
260 assert!(alloc.skills > 0);
261 assert!(alloc.summaries > 0);
262 assert!(alloc.semantic_recall > 0);
263 assert!(alloc.recent_history > 0);
264 }
265
266 #[test]
267 fn budget_allocation_zero_disables() {
268 let tc = TokenCounter::new();
269 let budget = ContextBudget::new(0, 0.20);
270 let alloc = budget.allocate("test", "test", &tc, false);
271 assert_eq!(alloc.system_prompt, 0);
272 assert_eq!(alloc.skills, 0);
273 assert_eq!(alloc.summaries, 0);
274 assert_eq!(alloc.recent_history, 0);
275 }
276
277 #[test]
278 fn budget_allocation_graph_disabled_no_graph_facts() {
279 let tc = TokenCounter::new();
280 let budget = ContextBudget::new(10_000, 0.20);
281 let alloc = budget.allocate("", "", &tc, false);
282 assert_eq!(alloc.graph_facts, 0);
283 assert_eq!(alloc.summaries, (8_000_f32 * 0.08) as usize);
284 assert_eq!(alloc.semantic_recall, (8_000_f32 * 0.08) as usize);
285 }
286
287 #[test]
288 fn budget_allocation_graph_enabled_allocates_4_percent() {
289 let tc = TokenCounter::new();
290 let budget = ContextBudget::new(10_000, 0.20).with_graph_enabled(true);
291 let alloc = budget.allocate("", "", &tc, true);
292 assert!(alloc.graph_facts > 0);
293 assert_eq!(alloc.summaries, (8_000_f32 * 0.07) as usize);
294 assert_eq!(alloc.graph_facts, (8_000_f32 * 0.04) as usize);
295 }
296
297 #[test]
298 fn budget_allocation_memory_first_zeroes_history() {
299 let tc = TokenCounter::new();
300 let budget = ContextBudget::new(10_000, 0.20);
301 let alloc = budget.allocate_with_opts("", "", &tc, false, 0, true);
302 assert_eq!(alloc.recent_history, 0);
303 assert!(alloc.summaries > 0);
304 assert!(alloc.semantic_recall > 0);
305 }
306}