zeph_context/budget.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Token budget calculation for context assembly.
5//!
6//! [`ContextBudget`] tracks the maximum token count for a session and divides
7//! available tokens across context slots (summaries, semantic recall, code context, etc.).
8//! [`BudgetAllocation`] is the result of one budget split and is consumed by
9//! [`crate::assembler::ContextAssembler`].
10
11use zeph_common::memory::TokenCounting;
12
13/// Per-slot token budget produced by [`ContextBudget::allocate`].
14///
15/// All fields are in tokens. Zero means the slot is disabled or budget-exhausted for this turn.
16#[derive(Debug, Clone)]
17pub struct BudgetAllocation {
18 /// Tokens consumed by the current system prompt.
19 pub system_prompt: usize,
20 /// Tokens consumed by the current skills prompt.
21 pub skills: usize,
22 /// Tokens allocated for past-conversation summaries.
23 pub summaries: usize,
24 /// Tokens allocated for semantic (vector) recall results.
25 pub semantic_recall: usize,
26 /// Tokens allocated for cross-session memory recall.
27 pub cross_session: usize,
28 /// Tokens allocated for code-index RAG context.
29 pub code_context: usize,
30 /// Tokens reserved for graph facts. Always present; 0 when graph-memory is disabled.
31 pub graph_facts: usize,
32 /// Tokens allocated for recent conversation history trim.
33 pub recent_history: usize,
34 /// Tokens reserved for the model response (not filled by context sources).
35 pub response_reserve: usize,
36 /// Tokens pre-reserved for the session digest block. Always present; 0 when digest is
37 /// disabled or no digest exists for the current conversation.
38 pub session_digest: usize,
39}
40
41impl BudgetAllocation {
42 /// Count of context source slots with non-zero token budgets.
43 #[must_use]
44 pub fn active_sources(&self) -> usize {
45 [
46 self.summaries,
47 self.semantic_recall,
48 self.cross_session,
49 self.code_context,
50 self.graph_facts,
51 ]
52 .iter()
53 .filter(|&&t| t > 0)
54 .count()
55 }
56}
57
58/// Token budget for a single agent session.
59///
60/// Tracks the maximum token window and divides it across context slots.
61/// Call [`ContextBudget::allocate`] or [`ContextBudget::allocate_with_opts`] to get a
62/// [`BudgetAllocation`] that can be fed to [`crate::assembler::ContextAssembler`].
63#[derive(Debug, Clone)]
64pub struct ContextBudget {
65 max_tokens: usize,
66 reserve_ratio: f32,
67 /// Whether graph-fact allocation is active. Toggles the 4% graph-facts slice.
68 pub(crate) graph_enabled: bool,
69}
70
71impl ContextBudget {
72 /// Create a new budget with `max_tokens` capacity and `reserve_ratio` fraction reserved
73 /// for the model response.
74 ///
75 /// # Examples
76 ///
77 /// ```
78 /// use zeph_context::budget::ContextBudget;
79 ///
80 /// let budget = ContextBudget::new(128_000, 0.15);
81 /// assert_eq!(budget.max_tokens(), 128_000);
82 /// ```
83 #[must_use]
84 pub fn new(max_tokens: usize, reserve_ratio: f32) -> Self {
85 Self {
86 max_tokens,
87 reserve_ratio,
88 graph_enabled: false,
89 }
90 }
91
92 /// Enable or disable graph fact allocation in the budget split.
93 ///
94 /// When enabled, 4% of available tokens are routed to the `graph_facts` slot, and the
95 /// `summaries`/`semantic_recall` slices are each reduced by 1%.
96 #[must_use]
97 pub fn with_graph_enabled(mut self, enabled: bool) -> Self {
98 self.graph_enabled = enabled;
99 self
100 }
101
102 /// Maximum token capacity for this session.
103 #[must_use]
104 pub fn max_tokens(&self) -> usize {
105 self.max_tokens
106 }
107
108 /// Allocate the budget across context slots for one turn.
109 ///
110 /// Equivalent to `allocate_with_opts(…, 0, false)`.
111 ///
112 /// # Examples
113 ///
114 /// # Examples
115 ///
116 /// ```no_run
117 /// use zeph_context::budget::ContextBudget;
118 ///
119 /// // Any type implementing `zeph_common::memory::TokenCounting` can be used.
120 /// # struct Tc;
121 /// # impl zeph_common::memory::TokenCounting for Tc {
122 /// # fn count_tokens(&self, t: &str) -> usize { t.split_whitespace().count() }
123 /// # fn count_tool_schema_tokens(&self, v: &serde_json::Value) -> usize { v.to_string().len() }
124 /// # }
125 /// let budget = ContextBudget::new(128_000, 0.15);
126 /// let tc = Tc;
127 /// let alloc = budget.allocate("system prompt", "skills prompt", &tc, false);
128 /// assert!(alloc.recent_history > 0);
129 /// ```
130 #[must_use]
131 pub fn allocate(
132 &self,
133 system_prompt: &str,
134 skills_prompt: &str,
135 tc: &dyn TokenCounting,
136 graph_enabled: bool,
137 ) -> BudgetAllocation {
138 self.allocate_with_opts(system_prompt, skills_prompt, tc, graph_enabled, 0, false)
139 }
140
141 /// Allocate context budget with optional digest pre-reservation and `MemoryFirst` mode.
142 ///
143 /// This method provides fine-grained control over token allocation by allowing callers
144 /// to pre-reserve tokens for session digests and toggle `MemoryFirst` mode. Use this
145 /// when digest blocks or memory-focused allocation is needed; otherwise, call
146 /// [`allocate`](Self::allocate) for simpler usage.
147 ///
148 /// # Parameters
149 ///
150 /// * `system_prompt` — the system prompt string; its token count is deducted from available tokens
151 /// * `skills_prompt` — the skills block; its token count is also deducted
152 /// * `tc` — a token counter implementing [`TokenCounting`] (e.g., the LLM provider)
153 /// * `graph_enabled` — when `true`, allocates 4% of available tokens to `graph_facts`
154 /// * `digest_tokens` — pre-counted tokens for the session digest block; deducted from
155 /// available tokens BEFORE percentage splits so it does not silently crowd out other slots
156 /// * `memory_first` — when `true`, sets `recent_history` to 0 and redistributes those
157 /// tokens across `summaries`, `semantic_recall`, and `cross_session`
158 ///
159 /// # Returns
160 ///
161 /// A [`BudgetAllocation`] with all context slots populated according to the budget strategy.
162 ///
163 /// # Examples
164 ///
165 /// ```no_run
166 /// use zeph_context::budget::ContextBudget;
167 /// # struct Tc;
168 /// # impl zeph_common::memory::TokenCounting for Tc {
169 /// # fn count_tokens(&self, t: &str) -> usize { t.split_whitespace().count() }
170 /// # fn count_tool_schema_tokens(&self, v: &serde_json::Value) -> usize { v.to_string().len() }
171 /// # }
172 ///
173 /// let budget = ContextBudget::new(128_000, 0.15);
174 /// let tc = Tc;
175 /// // Allocate with a pre-counted digest of 500 tokens in MemoryFirst mode
176 /// let alloc = budget.allocate_with_opts(
177 /// "system prompt",
178 /// "skills prompt",
179 /// &tc,
180 /// false,
181 /// 500, // digest_tokens
182 /// true, // memory_first
183 /// );
184 /// assert_eq!(alloc.recent_history, 0);
185 /// ```
186 #[must_use]
187 #[allow(
188 clippy::cast_precision_loss,
189 clippy::cast_possible_truncation,
190 clippy::cast_sign_loss
191 )]
192 pub fn allocate_with_opts(
193 &self,
194 system_prompt: &str,
195 skills_prompt: &str,
196 tc: &dyn TokenCounting,
197 graph_enabled: bool,
198 digest_tokens: usize,
199 memory_first: bool,
200 ) -> BudgetAllocation {
201 if self.max_tokens == 0 {
202 return BudgetAllocation {
203 system_prompt: 0,
204 skills: 0,
205 summaries: 0,
206 semantic_recall: 0,
207 cross_session: 0,
208 code_context: 0,
209 graph_facts: 0,
210 recent_history: 0,
211 response_reserve: 0,
212 session_digest: 0,
213 };
214 }
215
216 let response_reserve = (self.max_tokens as f32 * self.reserve_ratio) as usize;
217 let mut available = self.max_tokens.saturating_sub(response_reserve);
218
219 let system_prompt_tokens = tc.count_tokens(system_prompt);
220 let skills_tokens = tc.count_tokens(skills_prompt);
221
222 available = available.saturating_sub(system_prompt_tokens + skills_tokens);
223
224 // Deduct digest tokens BEFORE percentage splits so the budget allocator accounts for them.
225 let session_digest = digest_tokens.min(available);
226 available = available.saturating_sub(session_digest);
227
228 let (summaries, semantic_recall, cross_session, code_context, graph_facts, recent_history) =
229 if memory_first {
230 // MemoryFirst: no recent history, redistribute to memory slots.
231 if graph_enabled {
232 (
233 (available as f32 * 0.22) as usize,
234 (available as f32 * 0.22) as usize,
235 (available as f32 * 0.12) as usize,
236 (available as f32 * 0.38) as usize,
237 (available as f32 * 0.06) as usize,
238 0,
239 )
240 } else {
241 (
242 (available as f32 * 0.25) as usize,
243 (available as f32 * 0.25) as usize,
244 (available as f32 * 0.15) as usize,
245 (available as f32 * 0.35) as usize,
246 0,
247 0,
248 )
249 }
250 } else if graph_enabled {
251 // When graph is enabled: take 4% for graph facts, reduce other slices by 1% each.
252 (
253 (available as f32 * 0.07) as usize,
254 (available as f32 * 0.07) as usize,
255 (available as f32 * 0.03) as usize,
256 (available as f32 * 0.29) as usize,
257 (available as f32 * 0.04) as usize,
258 (available as f32 * 0.50) as usize,
259 )
260 } else {
261 (
262 (available as f32 * 0.08) as usize,
263 (available as f32 * 0.08) as usize,
264 (available as f32 * 0.04) as usize,
265 (available as f32 * 0.30) as usize,
266 0,
267 (available as f32 * 0.50) as usize,
268 )
269 };
270
271 BudgetAllocation {
272 system_prompt: system_prompt_tokens,
273 skills: skills_tokens,
274 summaries,
275 semantic_recall,
276 cross_session,
277 code_context,
278 graph_facts,
279 recent_history,
280 response_reserve,
281 session_digest,
282 }
283 }
284}
285
286#[cfg(test)]
287mod tests {
288 #![allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
289
290 use super::*;
291
292 struct NaiveTc;
293 impl TokenCounting for NaiveTc {
294 fn count_tokens(&self, text: &str) -> usize {
295 text.split_whitespace().count()
296 }
297 fn count_tool_schema_tokens(&self, schema: &serde_json::Value) -> usize {
298 schema.to_string().split_whitespace().count()
299 }
300 }
301
302 #[test]
303 fn context_budget_max_tokens_accessor() {
304 let budget = ContextBudget::new(1000, 0.2);
305 assert_eq!(budget.max_tokens(), 1000);
306 }
307
308 #[test]
309 fn budget_allocation_basic() {
310 let budget = ContextBudget::new(1000, 0.20);
311 let tc = NaiveTc;
312 let alloc = budget.allocate("system prompt", "skills prompt", &tc, false);
313 assert_eq!(alloc.response_reserve, 200);
314 assert!(alloc.system_prompt > 0);
315 assert!(alloc.skills > 0);
316 assert!(alloc.summaries > 0);
317 assert!(alloc.semantic_recall > 0);
318 assert!(alloc.recent_history > 0);
319 }
320
321 #[test]
322 fn budget_allocation_zero_disables() {
323 let tc = NaiveTc;
324 let budget = ContextBudget::new(0, 0.20);
325 let alloc = budget.allocate("test", "test", &tc, false);
326 assert_eq!(alloc.system_prompt, 0);
327 assert_eq!(alloc.skills, 0);
328 assert_eq!(alloc.summaries, 0);
329 assert_eq!(alloc.recent_history, 0);
330 }
331
332 #[test]
333 fn budget_allocation_graph_disabled_no_graph_facts() {
334 let tc = NaiveTc;
335 let budget = ContextBudget::new(10_000, 0.20);
336 let alloc = budget.allocate("", "", &tc, false);
337 assert_eq!(alloc.graph_facts, 0);
338 assert_eq!(alloc.summaries, (8_000_f32 * 0.08) as usize);
339 assert_eq!(alloc.semantic_recall, (8_000_f32 * 0.08) as usize);
340 }
341
342 #[test]
343 fn budget_allocation_graph_enabled_allocates_4_percent() {
344 let tc = NaiveTc;
345 let budget = ContextBudget::new(10_000, 0.20).with_graph_enabled(true);
346 let alloc = budget.allocate("", "", &tc, true);
347 assert!(alloc.graph_facts > 0);
348 assert_eq!(alloc.summaries, (8_000_f32 * 0.07) as usize);
349 assert_eq!(alloc.graph_facts, (8_000_f32 * 0.04) as usize);
350 }
351
352 #[test]
353 fn budget_allocation_memory_first_zeroes_history() {
354 let tc = NaiveTc;
355 let budget = ContextBudget::new(10_000, 0.20);
356 let alloc = budget.allocate_with_opts("", "", &tc, false, 0, true);
357 assert_eq!(alloc.recent_history, 0);
358 assert!(alloc.summaries > 0);
359 assert!(alloc.semantic_recall > 0);
360 }
361
362 #[test]
363 fn budget_allocation_memory_first_and_graph_enabled() {
364 let tc = NaiveTc;
365 // 10_000 max, 20% reserve → 8_000 available (empty prompts = 0 tokens).
366 let budget = ContextBudget::new(10_000, 0.20).with_graph_enabled(true);
367 let alloc = budget.allocate_with_opts("", "", &tc, true, 0, true);
368 let available = 8_000_f32;
369 assert_eq!(
370 alloc.recent_history, 0,
371 "memory_first must zero recent_history"
372 );
373 assert_eq!(alloc.summaries, (available * 0.22) as usize);
374 assert_eq!(alloc.semantic_recall, (available * 0.22) as usize);
375 assert_eq!(alloc.cross_session, (available * 0.12) as usize);
376 assert_eq!(alloc.code_context, (available * 0.38) as usize);
377 assert_eq!(alloc.graph_facts, (available * 0.06) as usize);
378 assert_eq!(alloc.response_reserve, 2_000);
379 }
380}