Skip to main content

zeph_context/
budget.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Token budget calculation for context assembly.
5//!
6//! [`ContextBudget`] tracks the maximum token count for a session and divides
7//! available tokens across context slots (summaries, semantic recall, code context, etc.).
8//! [`BudgetAllocation`] is the result of one budget split and is consumed by
9//! [`crate::assembler::ContextAssembler`].
10
11use zeph_common::memory::TokenCounting;
12
13/// Per-slot token budget produced by [`ContextBudget::allocate`].
14///
15/// All fields are in tokens. Zero means the slot is disabled or budget-exhausted for this turn.
16#[derive(Debug, Clone)]
17pub struct BudgetAllocation {
18    /// Tokens consumed by the current system prompt.
19    pub system_prompt: usize,
20    /// Tokens consumed by the current skills prompt.
21    pub skills: usize,
22    /// Tokens allocated for past-conversation summaries.
23    pub summaries: usize,
24    /// Tokens allocated for semantic (vector) recall results.
25    pub semantic_recall: usize,
26    /// Tokens allocated for cross-session memory recall.
27    pub cross_session: usize,
28    /// Tokens allocated for code-index RAG context.
29    pub code_context: usize,
30    /// Tokens reserved for graph facts. Always present; 0 when graph-memory is disabled.
31    pub graph_facts: usize,
32    /// Tokens allocated for recent conversation history trim.
33    pub recent_history: usize,
34    /// Tokens reserved for the model response (not filled by context sources).
35    pub response_reserve: usize,
36    /// Tokens pre-reserved for the session digest block. Always present; 0 when digest is
37    /// disabled or no digest exists for the current conversation.
38    pub session_digest: usize,
39}
40
41impl BudgetAllocation {
42    /// Count of context source slots with non-zero token budgets.
43    #[must_use]
44    pub fn active_sources(&self) -> usize {
45        [
46            self.summaries,
47            self.semantic_recall,
48            self.cross_session,
49            self.code_context,
50            self.graph_facts,
51        ]
52        .iter()
53        .filter(|&&t| t > 0)
54        .count()
55    }
56}
57
58/// Token budget for a single agent session.
59///
60/// Tracks the maximum token window and divides it across context slots.
61/// Call [`ContextBudget::allocate`] or [`ContextBudget::allocate_with_opts`] to get a
62/// [`BudgetAllocation`] that can be fed to [`crate::assembler::ContextAssembler`].
63#[derive(Debug, Clone)]
64pub struct ContextBudget {
65    max_tokens: usize,
66    reserve_ratio: f32,
67    /// Whether graph-fact allocation is active. Toggles the 4% graph-facts slice.
68    pub(crate) graph_enabled: bool,
69}
70
71impl ContextBudget {
72    /// Create a new budget with `max_tokens` capacity and `reserve_ratio` fraction reserved
73    /// for the model response.
74    ///
75    /// # Examples
76    ///
77    /// ```
78    /// use zeph_context::budget::ContextBudget;
79    ///
80    /// let budget = ContextBudget::new(128_000, 0.15);
81    /// assert_eq!(budget.max_tokens(), 128_000);
82    /// ```
83    #[must_use]
84    pub fn new(max_tokens: usize, reserve_ratio: f32) -> Self {
85        Self {
86            max_tokens,
87            reserve_ratio,
88            graph_enabled: false,
89        }
90    }
91
92    /// Enable or disable graph fact allocation in the budget split.
93    ///
94    /// When enabled, 4% of available tokens are routed to the `graph_facts` slot, and the
95    /// `summaries`/`semantic_recall` slices are each reduced by 1%.
96    #[must_use]
97    pub fn with_graph_enabled(mut self, enabled: bool) -> Self {
98        self.graph_enabled = enabled;
99        self
100    }
101
102    /// Maximum token capacity for this session.
103    #[must_use]
104    pub fn max_tokens(&self) -> usize {
105        self.max_tokens
106    }
107
108    /// Allocate the budget across context slots for one turn.
109    ///
110    /// Equivalent to `allocate_with_opts(…, 0, false)`.
111    ///
112    /// # Examples
113    ///
114    /// # Examples
115    ///
116    /// ```no_run
117    /// use zeph_context::budget::ContextBudget;
118    ///
119    /// // Any type implementing `zeph_common::memory::TokenCounting` can be used.
120    /// # struct Tc;
121    /// # impl zeph_common::memory::TokenCounting for Tc {
122    /// #     fn count_tokens(&self, t: &str) -> usize { t.split_whitespace().count() }
123    /// #     fn count_tool_schema_tokens(&self, v: &serde_json::Value) -> usize { v.to_string().len() }
124    /// # }
125    /// let budget = ContextBudget::new(128_000, 0.15);
126    /// let tc = Tc;
127    /// let alloc = budget.allocate("system prompt", "skills prompt", &tc, false);
128    /// assert!(alloc.recent_history > 0);
129    /// ```
130    #[must_use]
131    pub fn allocate(
132        &self,
133        system_prompt: &str,
134        skills_prompt: &str,
135        tc: &dyn TokenCounting,
136        graph_enabled: bool,
137    ) -> BudgetAllocation {
138        self.allocate_with_opts(system_prompt, skills_prompt, tc, graph_enabled, 0, false)
139    }
140
141    /// Allocate context budget with optional digest pre-reservation and `MemoryFirst` mode.
142    ///
143    /// This method provides fine-grained control over token allocation by allowing callers
144    /// to pre-reserve tokens for session digests and toggle `MemoryFirst` mode. Use this
145    /// when digest blocks or memory-focused allocation is needed; otherwise, call
146    /// [`allocate`](Self::allocate) for simpler usage.
147    ///
148    /// # Parameters
149    ///
150    /// * `system_prompt` — the system prompt string; its token count is deducted from available tokens
151    /// * `skills_prompt` — the skills block; its token count is also deducted
152    /// * `tc` — a token counter implementing [`TokenCounting`] (e.g., the LLM provider)
153    /// * `graph_enabled` — when `true`, allocates 4% of available tokens to `graph_facts`
154    /// * `digest_tokens` — pre-counted tokens for the session digest block; deducted from
155    ///   available tokens BEFORE percentage splits so it does not silently crowd out other slots
156    /// * `memory_first` — when `true`, sets `recent_history` to 0 and redistributes those
157    ///   tokens across `summaries`, `semantic_recall`, and `cross_session`
158    ///
159    /// # Returns
160    ///
161    /// A [`BudgetAllocation`] with all context slots populated according to the budget strategy.
162    ///
163    /// # Examples
164    ///
165    /// ```no_run
166    /// use zeph_context::budget::ContextBudget;
167    /// # struct Tc;
168    /// # impl zeph_common::memory::TokenCounting for Tc {
169    /// #     fn count_tokens(&self, t: &str) -> usize { t.split_whitespace().count() }
170    /// #     fn count_tool_schema_tokens(&self, v: &serde_json::Value) -> usize { v.to_string().len() }
171    /// # }
172    ///
173    /// let budget = ContextBudget::new(128_000, 0.15);
174    /// let tc = Tc;
175    /// // Allocate with a pre-counted digest of 500 tokens in MemoryFirst mode
176    /// let alloc = budget.allocate_with_opts(
177    ///     "system prompt",
178    ///     "skills prompt",
179    ///     &tc,
180    ///     false,
181    ///     500,  // digest_tokens
182    ///     true, // memory_first
183    /// );
184    /// assert_eq!(alloc.recent_history, 0);
185    /// ```
186    #[must_use]
187    #[allow(
188        clippy::cast_precision_loss,
189        clippy::cast_possible_truncation,
190        clippy::cast_sign_loss
191    )]
192    pub fn allocate_with_opts(
193        &self,
194        system_prompt: &str,
195        skills_prompt: &str,
196        tc: &dyn TokenCounting,
197        graph_enabled: bool,
198        digest_tokens: usize,
199        memory_first: bool,
200    ) -> BudgetAllocation {
201        if self.max_tokens == 0 {
202            return BudgetAllocation {
203                system_prompt: 0,
204                skills: 0,
205                summaries: 0,
206                semantic_recall: 0,
207                cross_session: 0,
208                code_context: 0,
209                graph_facts: 0,
210                recent_history: 0,
211                response_reserve: 0,
212                session_digest: 0,
213            };
214        }
215
216        let response_reserve = (self.max_tokens as f32 * self.reserve_ratio) as usize;
217        let mut available = self.max_tokens.saturating_sub(response_reserve);
218
219        let system_prompt_tokens = tc.count_tokens(system_prompt);
220        let skills_tokens = tc.count_tokens(skills_prompt);
221
222        available = available.saturating_sub(system_prompt_tokens + skills_tokens);
223
224        // Deduct digest tokens BEFORE percentage splits so the budget allocator accounts for them.
225        let session_digest = digest_tokens.min(available);
226        available = available.saturating_sub(session_digest);
227
228        let (summaries, semantic_recall, cross_session, code_context, graph_facts, recent_history) =
229            if memory_first {
230                // MemoryFirst: no recent history, redistribute to memory slots.
231                if graph_enabled {
232                    (
233                        (available as f32 * 0.22) as usize,
234                        (available as f32 * 0.22) as usize,
235                        (available as f32 * 0.12) as usize,
236                        (available as f32 * 0.38) as usize,
237                        (available as f32 * 0.06) as usize,
238                        0,
239                    )
240                } else {
241                    (
242                        (available as f32 * 0.25) as usize,
243                        (available as f32 * 0.25) as usize,
244                        (available as f32 * 0.15) as usize,
245                        (available as f32 * 0.35) as usize,
246                        0,
247                        0,
248                    )
249                }
250            } else if graph_enabled {
251                // When graph is enabled: take 4% for graph facts, reduce other slices by 1% each.
252                (
253                    (available as f32 * 0.07) as usize,
254                    (available as f32 * 0.07) as usize,
255                    (available as f32 * 0.03) as usize,
256                    (available as f32 * 0.29) as usize,
257                    (available as f32 * 0.04) as usize,
258                    (available as f32 * 0.50) as usize,
259                )
260            } else {
261                (
262                    (available as f32 * 0.08) as usize,
263                    (available as f32 * 0.08) as usize,
264                    (available as f32 * 0.04) as usize,
265                    (available as f32 * 0.30) as usize,
266                    0,
267                    (available as f32 * 0.50) as usize,
268                )
269            };
270
271        BudgetAllocation {
272            system_prompt: system_prompt_tokens,
273            skills: skills_tokens,
274            summaries,
275            semantic_recall,
276            cross_session,
277            code_context,
278            graph_facts,
279            recent_history,
280            response_reserve,
281            session_digest,
282        }
283    }
284}
285
286#[cfg(test)]
287mod tests {
288    #![allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
289
290    use super::*;
291
292    struct NaiveTc;
293    impl TokenCounting for NaiveTc {
294        fn count_tokens(&self, text: &str) -> usize {
295            text.split_whitespace().count()
296        }
297        fn count_tool_schema_tokens(&self, schema: &serde_json::Value) -> usize {
298            schema.to_string().split_whitespace().count()
299        }
300    }
301
302    #[test]
303    fn context_budget_max_tokens_accessor() {
304        let budget = ContextBudget::new(1000, 0.2);
305        assert_eq!(budget.max_tokens(), 1000);
306    }
307
308    #[test]
309    fn budget_allocation_basic() {
310        let budget = ContextBudget::new(1000, 0.20);
311        let tc = NaiveTc;
312        let alloc = budget.allocate("system prompt", "skills prompt", &tc, false);
313        assert_eq!(alloc.response_reserve, 200);
314        assert!(alloc.system_prompt > 0);
315        assert!(alloc.skills > 0);
316        assert!(alloc.summaries > 0);
317        assert!(alloc.semantic_recall > 0);
318        assert!(alloc.recent_history > 0);
319    }
320
321    #[test]
322    fn budget_allocation_zero_disables() {
323        let tc = NaiveTc;
324        let budget = ContextBudget::new(0, 0.20);
325        let alloc = budget.allocate("test", "test", &tc, false);
326        assert_eq!(alloc.system_prompt, 0);
327        assert_eq!(alloc.skills, 0);
328        assert_eq!(alloc.summaries, 0);
329        assert_eq!(alloc.recent_history, 0);
330    }
331
332    #[test]
333    fn budget_allocation_graph_disabled_no_graph_facts() {
334        let tc = NaiveTc;
335        let budget = ContextBudget::new(10_000, 0.20);
336        let alloc = budget.allocate("", "", &tc, false);
337        assert_eq!(alloc.graph_facts, 0);
338        assert_eq!(alloc.summaries, (8_000_f32 * 0.08) as usize);
339        assert_eq!(alloc.semantic_recall, (8_000_f32 * 0.08) as usize);
340    }
341
342    #[test]
343    fn budget_allocation_graph_enabled_allocates_4_percent() {
344        let tc = NaiveTc;
345        let budget = ContextBudget::new(10_000, 0.20).with_graph_enabled(true);
346        let alloc = budget.allocate("", "", &tc, true);
347        assert!(alloc.graph_facts > 0);
348        assert_eq!(alloc.summaries, (8_000_f32 * 0.07) as usize);
349        assert_eq!(alloc.graph_facts, (8_000_f32 * 0.04) as usize);
350    }
351
352    #[test]
353    fn budget_allocation_memory_first_zeroes_history() {
354        let tc = NaiveTc;
355        let budget = ContextBudget::new(10_000, 0.20);
356        let alloc = budget.allocate_with_opts("", "", &tc, false, 0, true);
357        assert_eq!(alloc.recent_history, 0);
358        assert!(alloc.summaries > 0);
359        assert!(alloc.semantic_recall > 0);
360    }
361
362    #[test]
363    fn budget_allocation_memory_first_and_graph_enabled() {
364        let tc = NaiveTc;
365        // 10_000 max, 20% reserve → 8_000 available (empty prompts = 0 tokens).
366        let budget = ContextBudget::new(10_000, 0.20).with_graph_enabled(true);
367        let alloc = budget.allocate_with_opts("", "", &tc, true, 0, true);
368        let available = 8_000_f32;
369        assert_eq!(
370            alloc.recent_history, 0,
371            "memory_first must zero recent_history"
372        );
373        assert_eq!(alloc.summaries, (available * 0.22) as usize);
374        assert_eq!(alloc.semantic_recall, (available * 0.22) as usize);
375        assert_eq!(alloc.cross_session, (available * 0.12) as usize);
376        assert_eq!(alloc.code_context, (available * 0.38) as usize);
377        assert_eq!(alloc.graph_facts, (available * 0.06) as usize);
378        assert_eq!(alloc.response_reserve, 2_000);
379    }
380}