Skip to main content

zeph_context/
budget.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Token budget calculation for context assembly.
5//!
6//! [`ContextBudget`] tracks the maximum token count for a session and divides
7//! available tokens across context slots (summaries, semantic recall, code context, etc.).
8//! [`BudgetAllocation`] is the result of one budget split and is consumed by
9//! [`crate::assembler::ContextAssembler`].
10
11use zeph_common::memory::TokenCounting;
12
13/// Per-slot token budget produced by [`ContextBudget::allocate`].
14///
15/// All fields are in tokens. Zero means the slot is disabled or budget-exhausted for this turn.
16#[derive(Debug, Clone)]
17pub struct BudgetAllocation {
18    /// Tokens consumed by the current system prompt.
19    pub system_prompt: usize,
20    /// Tokens consumed by the current skills prompt.
21    pub skills: usize,
22    /// Tokens allocated for past-conversation summaries.
23    pub summaries: usize,
24    /// Tokens allocated for semantic (vector) recall results.
25    pub semantic_recall: usize,
26    /// Tokens allocated for cross-session memory recall.
27    pub cross_session: usize,
28    /// Tokens allocated for code-index RAG context.
29    pub code_context: usize,
30    /// Tokens reserved for graph facts. Always present; 0 when graph-memory is disabled.
31    pub graph_facts: usize,
32    /// Tokens allocated for recent conversation history trim.
33    pub recent_history: usize,
34    /// Tokens reserved for the model response (not filled by context sources).
35    pub response_reserve: usize,
36    /// Tokens pre-reserved for the session digest block. Always present; 0 when digest is
37    /// disabled or no digest exists for the current conversation.
38    pub session_digest: usize,
39}
40
41impl BudgetAllocation {
42    /// Count of context source slots with non-zero token budgets.
43    #[must_use]
44    pub fn active_sources(&self) -> usize {
45        [
46            self.summaries,
47            self.semantic_recall,
48            self.cross_session,
49            self.code_context,
50            self.graph_facts,
51        ]
52        .iter()
53        .filter(|&&t| t > 0)
54        .count()
55    }
56}
57
58/// Token budget for a single agent session.
59///
60/// Tracks the maximum token window and divides it across context slots.
61/// Call [`ContextBudget::allocate`] or [`ContextBudget::allocate_with_opts`] to get a
62/// [`BudgetAllocation`] that can be fed to [`crate::assembler::ContextAssembler`].
63#[derive(Debug, Clone)]
64pub struct ContextBudget {
65    max_tokens: usize,
66    reserve_ratio: f32,
67    /// Whether graph-fact allocation is active. Toggles the 4% graph-facts slice.
68    pub(crate) graph_enabled: bool,
69}
70
71impl ContextBudget {
72    /// Create a new budget with `max_tokens` capacity and `reserve_ratio` fraction reserved
73    /// for the model response.
74    ///
75    /// # Examples
76    ///
77    /// ```
78    /// use zeph_context::budget::ContextBudget;
79    ///
80    /// let budget = ContextBudget::new(128_000, 0.15);
81    /// assert_eq!(budget.max_tokens(), 128_000);
82    /// ```
83    #[must_use]
84    pub fn new(max_tokens: usize, reserve_ratio: f32) -> Self {
85        Self {
86            max_tokens,
87            reserve_ratio,
88            graph_enabled: false,
89        }
90    }
91
92    /// Enable or disable graph fact allocation in the budget split.
93    ///
94    /// When enabled, 4% of available tokens are routed to the `graph_facts` slot, and the
95    /// `summaries`/`semantic_recall` slices are each reduced by 1%.
96    #[must_use]
97    pub fn with_graph_enabled(mut self, enabled: bool) -> Self {
98        self.graph_enabled = enabled;
99        self
100    }
101
102    /// Maximum token capacity for this session.
103    #[must_use]
104    pub fn max_tokens(&self) -> usize {
105        self.max_tokens
106    }
107
108    /// Allocate the budget across context slots for one turn.
109    ///
110    /// Equivalent to `allocate_with_opts(…, 0, false)`.
111    ///
112    /// # Examples
113    ///
114    /// # Examples
115    ///
116    /// ```no_run
117    /// use zeph_context::budget::ContextBudget;
118    ///
119    /// // Any type implementing `zeph_common::memory::TokenCounting` can be used.
120    /// # struct Tc;
121    /// # impl zeph_common::memory::TokenCounting for Tc {
122    /// #     fn count_tokens(&self, t: &str) -> usize { t.split_whitespace().count() }
123    /// #     fn count_tool_schema_tokens(&self, v: &serde_json::Value) -> usize { v.to_string().len() }
124    /// # }
125    /// let budget = ContextBudget::new(128_000, 0.15);
126    /// let tc = Tc;
127    /// let alloc = budget.allocate("system prompt", "skills prompt", &tc, false);
128    /// assert!(alloc.recent_history > 0);
129    /// ```
130    #[must_use]
131    pub fn allocate(
132        &self,
133        system_prompt: &str,
134        skills_prompt: &str,
135        tc: &dyn TokenCounting,
136        graph_enabled: bool,
137    ) -> BudgetAllocation {
138        self.allocate_with_opts(system_prompt, skills_prompt, tc, graph_enabled, 0, false)
139    }
140
141    /// Allocate context budget with optional digest pre-reservation and `MemoryFirst` mode.
142    ///
143    /// `digest_tokens` — pre-counted tokens for the session digest block; deducted from
144    /// available tokens BEFORE percentage splits so it does not silently crowd out other slots.
145    ///
146    /// `memory_first` — when `true`, sets `recent_history` to 0 and redistributes those
147    /// tokens across `summaries`, `semantic_recall`, and `cross_session`.
148    #[must_use]
149    #[allow(
150        clippy::cast_precision_loss,
151        clippy::cast_possible_truncation,
152        clippy::cast_sign_loss
153    )]
154    pub fn allocate_with_opts(
155        &self,
156        system_prompt: &str,
157        skills_prompt: &str,
158        tc: &dyn TokenCounting,
159        graph_enabled: bool,
160        digest_tokens: usize,
161        memory_first: bool,
162    ) -> BudgetAllocation {
163        if self.max_tokens == 0 {
164            return BudgetAllocation {
165                system_prompt: 0,
166                skills: 0,
167                summaries: 0,
168                semantic_recall: 0,
169                cross_session: 0,
170                code_context: 0,
171                graph_facts: 0,
172                recent_history: 0,
173                response_reserve: 0,
174                session_digest: 0,
175            };
176        }
177
178        let response_reserve = (self.max_tokens as f32 * self.reserve_ratio) as usize;
179        let mut available = self.max_tokens.saturating_sub(response_reserve);
180
181        let system_prompt_tokens = tc.count_tokens(system_prompt);
182        let skills_tokens = tc.count_tokens(skills_prompt);
183
184        available = available.saturating_sub(system_prompt_tokens + skills_tokens);
185
186        // Deduct digest tokens BEFORE percentage splits so the budget allocator accounts for them.
187        let session_digest = digest_tokens.min(available);
188        available = available.saturating_sub(session_digest);
189
190        let (summaries, semantic_recall, cross_session, code_context, graph_facts, recent_history) =
191            if memory_first {
192                // MemoryFirst: no recent history, redistribute to memory slots.
193                if graph_enabled {
194                    (
195                        (available as f32 * 0.22) as usize,
196                        (available as f32 * 0.22) as usize,
197                        (available as f32 * 0.12) as usize,
198                        (available as f32 * 0.38) as usize,
199                        (available as f32 * 0.06) as usize,
200                        0,
201                    )
202                } else {
203                    (
204                        (available as f32 * 0.25) as usize,
205                        (available as f32 * 0.25) as usize,
206                        (available as f32 * 0.15) as usize,
207                        (available as f32 * 0.35) as usize,
208                        0,
209                        0,
210                    )
211                }
212            } else if graph_enabled {
213                // When graph is enabled: take 4% for graph facts, reduce other slices by 1% each.
214                (
215                    (available as f32 * 0.07) as usize,
216                    (available as f32 * 0.07) as usize,
217                    (available as f32 * 0.03) as usize,
218                    (available as f32 * 0.29) as usize,
219                    (available as f32 * 0.04) as usize,
220                    (available as f32 * 0.50) as usize,
221                )
222            } else {
223                (
224                    (available as f32 * 0.08) as usize,
225                    (available as f32 * 0.08) as usize,
226                    (available as f32 * 0.04) as usize,
227                    (available as f32 * 0.30) as usize,
228                    0,
229                    (available as f32 * 0.50) as usize,
230                )
231            };
232
233        BudgetAllocation {
234            system_prompt: system_prompt_tokens,
235            skills: skills_tokens,
236            summaries,
237            semantic_recall,
238            cross_session,
239            code_context,
240            graph_facts,
241            recent_history,
242            response_reserve,
243            session_digest,
244        }
245    }
246}
247
248#[cfg(test)]
249mod tests {
250    #![allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
251
252    use super::*;
253
254    struct NaiveTc;
255    impl TokenCounting for NaiveTc {
256        fn count_tokens(&self, text: &str) -> usize {
257            text.split_whitespace().count()
258        }
259        fn count_tool_schema_tokens(&self, schema: &serde_json::Value) -> usize {
260            schema.to_string().split_whitespace().count()
261        }
262    }
263
264    #[test]
265    fn context_budget_max_tokens_accessor() {
266        let budget = ContextBudget::new(1000, 0.2);
267        assert_eq!(budget.max_tokens(), 1000);
268    }
269
270    #[test]
271    fn budget_allocation_basic() {
272        let budget = ContextBudget::new(1000, 0.20);
273        let tc = NaiveTc;
274        let alloc = budget.allocate("system prompt", "skills prompt", &tc, false);
275        assert_eq!(alloc.response_reserve, 200);
276        assert!(alloc.system_prompt > 0);
277        assert!(alloc.skills > 0);
278        assert!(alloc.summaries > 0);
279        assert!(alloc.semantic_recall > 0);
280        assert!(alloc.recent_history > 0);
281    }
282
283    #[test]
284    fn budget_allocation_zero_disables() {
285        let tc = NaiveTc;
286        let budget = ContextBudget::new(0, 0.20);
287        let alloc = budget.allocate("test", "test", &tc, false);
288        assert_eq!(alloc.system_prompt, 0);
289        assert_eq!(alloc.skills, 0);
290        assert_eq!(alloc.summaries, 0);
291        assert_eq!(alloc.recent_history, 0);
292    }
293
294    #[test]
295    fn budget_allocation_graph_disabled_no_graph_facts() {
296        let tc = NaiveTc;
297        let budget = ContextBudget::new(10_000, 0.20);
298        let alloc = budget.allocate("", "", &tc, false);
299        assert_eq!(alloc.graph_facts, 0);
300        assert_eq!(alloc.summaries, (8_000_f32 * 0.08) as usize);
301        assert_eq!(alloc.semantic_recall, (8_000_f32 * 0.08) as usize);
302    }
303
304    #[test]
305    fn budget_allocation_graph_enabled_allocates_4_percent() {
306        let tc = NaiveTc;
307        let budget = ContextBudget::new(10_000, 0.20).with_graph_enabled(true);
308        let alloc = budget.allocate("", "", &tc, true);
309        assert!(alloc.graph_facts > 0);
310        assert_eq!(alloc.summaries, (8_000_f32 * 0.07) as usize);
311        assert_eq!(alloc.graph_facts, (8_000_f32 * 0.04) as usize);
312    }
313
314    #[test]
315    fn budget_allocation_memory_first_zeroes_history() {
316        let tc = NaiveTc;
317        let budget = ContextBudget::new(10_000, 0.20);
318        let alloc = budget.allocate_with_opts("", "", &tc, false, 0, true);
319        assert_eq!(alloc.recent_history, 0);
320        assert!(alloc.summaries > 0);
321        assert!(alloc.semantic_recall > 0);
322    }
323
324    #[test]
325    fn budget_allocation_memory_first_and_graph_enabled() {
326        let tc = NaiveTc;
327        // 10_000 max, 20% reserve → 8_000 available (empty prompts = 0 tokens).
328        let budget = ContextBudget::new(10_000, 0.20).with_graph_enabled(true);
329        let alloc = budget.allocate_with_opts("", "", &tc, true, 0, true);
330        let available = 8_000_f32;
331        assert_eq!(
332            alloc.recent_history, 0,
333            "memory_first must zero recent_history"
334        );
335        assert_eq!(alloc.summaries, (available * 0.22) as usize);
336        assert_eq!(alloc.semantic_recall, (available * 0.22) as usize);
337        assert_eq!(alloc.cross_session, (available * 0.12) as usize);
338        assert_eq!(alloc.code_context, (available * 0.38) as usize);
339        assert_eq!(alloc.graph_facts, (available * 0.06) as usize);
340        assert_eq!(alloc.response_reserve, 2_000);
341    }
342}