Skip to main content

zeph_context/
budget.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Token budget calculation for context assembly.
5//!
6//! [`ContextBudget`] tracks the maximum token count for a session and divides
7//! available tokens across context slots (summaries, semantic recall, code context, etc.).
8//! [`BudgetAllocation`] is the result of one budget split and is consumed by
9//! [`crate::assembler::ContextAssembler`].
10
11use zeph_memory::TokenCounter;
12
13/// Per-slot token budget produced by [`ContextBudget::allocate`].
14///
15/// All fields are in tokens. Zero means the slot is disabled or budget-exhausted for this turn.
16#[derive(Debug, Clone)]
17pub struct BudgetAllocation {
18    /// Tokens consumed by the current system prompt.
19    pub system_prompt: usize,
20    /// Tokens consumed by the current skills prompt.
21    pub skills: usize,
22    /// Tokens allocated for past-conversation summaries.
23    pub summaries: usize,
24    /// Tokens allocated for semantic (vector) recall results.
25    pub semantic_recall: usize,
26    /// Tokens allocated for cross-session memory recall.
27    pub cross_session: usize,
28    /// Tokens allocated for code-index RAG context.
29    pub code_context: usize,
30    /// Tokens reserved for graph facts. Always present; 0 when graph-memory is disabled.
31    pub graph_facts: usize,
32    /// Tokens allocated for recent conversation history trim.
33    pub recent_history: usize,
34    /// Tokens reserved for the model response (not filled by context sources).
35    pub response_reserve: usize,
36    /// Tokens pre-reserved for the session digest block. Always present; 0 when digest is
37    /// disabled or no digest exists for the current conversation.
38    pub session_digest: usize,
39}
40
41impl BudgetAllocation {
42    /// Count of context source slots with non-zero token budgets.
43    #[must_use]
44    pub fn active_sources(&self) -> usize {
45        [
46            self.summaries,
47            self.semantic_recall,
48            self.cross_session,
49            self.code_context,
50            self.graph_facts,
51        ]
52        .iter()
53        .filter(|&&t| t > 0)
54        .count()
55    }
56}
57
58/// Token budget for a single agent session.
59///
60/// Tracks the maximum token window and divides it across context slots.
61/// Call [`ContextBudget::allocate`] or [`ContextBudget::allocate_with_opts`] to get a
62/// [`BudgetAllocation`] that can be fed to [`crate::assembler::ContextAssembler`].
63#[derive(Debug, Clone)]
64pub struct ContextBudget {
65    max_tokens: usize,
66    reserve_ratio: f32,
67    /// Whether graph-fact allocation is active. Toggles the 4% graph-facts slice.
68    pub(crate) graph_enabled: bool,
69}
70
71impl ContextBudget {
72    /// Create a new budget with `max_tokens` capacity and `reserve_ratio` fraction reserved
73    /// for the model response.
74    ///
75    /// # Examples
76    ///
77    /// ```
78    /// use zeph_context::budget::ContextBudget;
79    ///
80    /// let budget = ContextBudget::new(128_000, 0.15);
81    /// assert_eq!(budget.max_tokens(), 128_000);
82    /// ```
83    #[must_use]
84    pub fn new(max_tokens: usize, reserve_ratio: f32) -> Self {
85        Self {
86            max_tokens,
87            reserve_ratio,
88            graph_enabled: false,
89        }
90    }
91
92    /// Enable or disable graph fact allocation in the budget split.
93    ///
94    /// When enabled, 4% of available tokens are routed to the `graph_facts` slot, and the
95    /// `summaries`/`semantic_recall` slices are each reduced by 1%.
96    #[must_use]
97    pub fn with_graph_enabled(mut self, enabled: bool) -> Self {
98        self.graph_enabled = enabled;
99        self
100    }
101
102    /// Maximum token capacity for this session.
103    #[must_use]
104    pub fn max_tokens(&self) -> usize {
105        self.max_tokens
106    }
107
108    /// Allocate the budget across context slots for one turn.
109    ///
110    /// Equivalent to `allocate_with_opts(…, 0, false)`.
111    ///
112    /// # Examples
113    ///
114    /// ```no_run
115    /// use zeph_context::budget::ContextBudget;
116    /// use zeph_memory::TokenCounter;
117    ///
118    /// let budget = ContextBudget::new(128_000, 0.15);
119    /// let tc = TokenCounter::new();
120    /// let alloc = budget.allocate("system prompt", "skills prompt", &tc, false);
121    /// assert!(alloc.recent_history > 0);
122    /// ```
123    #[must_use]
124    pub fn allocate(
125        &self,
126        system_prompt: &str,
127        skills_prompt: &str,
128        tc: &TokenCounter,
129        graph_enabled: bool,
130    ) -> BudgetAllocation {
131        self.allocate_with_opts(system_prompt, skills_prompt, tc, graph_enabled, 0, false)
132    }
133
134    /// Allocate context budget with optional digest pre-reservation and `MemoryFirst` mode.
135    ///
136    /// `digest_tokens` — pre-counted tokens for the session digest block; deducted from
137    /// available tokens BEFORE percentage splits so it does not silently crowd out other slots.
138    ///
139    /// `memory_first` — when `true`, sets `recent_history` to 0 and redistributes those
140    /// tokens across `summaries`, `semantic_recall`, and `cross_session`.
141    #[must_use]
142    #[allow(
143        clippy::cast_precision_loss,
144        clippy::cast_possible_truncation,
145        clippy::cast_sign_loss
146    )]
147    pub fn allocate_with_opts(
148        &self,
149        system_prompt: &str,
150        skills_prompt: &str,
151        tc: &TokenCounter,
152        graph_enabled: bool,
153        digest_tokens: usize,
154        memory_first: bool,
155    ) -> BudgetAllocation {
156        if self.max_tokens == 0 {
157            return BudgetAllocation {
158                system_prompt: 0,
159                skills: 0,
160                summaries: 0,
161                semantic_recall: 0,
162                cross_session: 0,
163                code_context: 0,
164                graph_facts: 0,
165                recent_history: 0,
166                response_reserve: 0,
167                session_digest: 0,
168            };
169        }
170
171        let response_reserve = (self.max_tokens as f32 * self.reserve_ratio) as usize;
172        let mut available = self.max_tokens.saturating_sub(response_reserve);
173
174        let system_prompt_tokens = tc.count_tokens(system_prompt);
175        let skills_tokens = tc.count_tokens(skills_prompt);
176
177        available = available.saturating_sub(system_prompt_tokens + skills_tokens);
178
179        // Deduct digest tokens BEFORE percentage splits so the budget allocator accounts for them.
180        let session_digest = digest_tokens.min(available);
181        available = available.saturating_sub(session_digest);
182
183        let (summaries, semantic_recall, cross_session, code_context, graph_facts, recent_history) =
184            if memory_first {
185                // MemoryFirst: no recent history, redistribute to memory slots.
186                if graph_enabled {
187                    (
188                        (available as f32 * 0.22) as usize,
189                        (available as f32 * 0.22) as usize,
190                        (available as f32 * 0.12) as usize,
191                        (available as f32 * 0.38) as usize,
192                        (available as f32 * 0.06) as usize,
193                        0,
194                    )
195                } else {
196                    (
197                        (available as f32 * 0.25) as usize,
198                        (available as f32 * 0.25) as usize,
199                        (available as f32 * 0.15) as usize,
200                        (available as f32 * 0.35) as usize,
201                        0,
202                        0,
203                    )
204                }
205            } else if graph_enabled {
206                // When graph is enabled: take 4% for graph facts, reduce other slices by 1% each.
207                (
208                    (available as f32 * 0.07) as usize,
209                    (available as f32 * 0.07) as usize,
210                    (available as f32 * 0.03) as usize,
211                    (available as f32 * 0.29) as usize,
212                    (available as f32 * 0.04) as usize,
213                    (available as f32 * 0.50) as usize,
214                )
215            } else {
216                (
217                    (available as f32 * 0.08) as usize,
218                    (available as f32 * 0.08) as usize,
219                    (available as f32 * 0.04) as usize,
220                    (available as f32 * 0.30) as usize,
221                    0,
222                    (available as f32 * 0.50) as usize,
223                )
224            };
225
226        BudgetAllocation {
227            system_prompt: system_prompt_tokens,
228            skills: skills_tokens,
229            summaries,
230            semantic_recall,
231            cross_session,
232            code_context,
233            graph_facts,
234            recent_history,
235            response_reserve,
236            session_digest,
237        }
238    }
239}
240
241#[cfg(test)]
242mod tests {
243    #![allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
244
245    use super::*;
246
247    #[test]
248    fn context_budget_max_tokens_accessor() {
249        let budget = ContextBudget::new(1000, 0.2);
250        assert_eq!(budget.max_tokens(), 1000);
251    }
252
253    #[test]
254    fn budget_allocation_basic() {
255        let budget = ContextBudget::new(1000, 0.20);
256        let tc = TokenCounter::new();
257        let alloc = budget.allocate("system prompt", "skills prompt", &tc, false);
258        assert_eq!(alloc.response_reserve, 200);
259        assert!(alloc.system_prompt > 0);
260        assert!(alloc.skills > 0);
261        assert!(alloc.summaries > 0);
262        assert!(alloc.semantic_recall > 0);
263        assert!(alloc.recent_history > 0);
264    }
265
266    #[test]
267    fn budget_allocation_zero_disables() {
268        let tc = TokenCounter::new();
269        let budget = ContextBudget::new(0, 0.20);
270        let alloc = budget.allocate("test", "test", &tc, false);
271        assert_eq!(alloc.system_prompt, 0);
272        assert_eq!(alloc.skills, 0);
273        assert_eq!(alloc.summaries, 0);
274        assert_eq!(alloc.recent_history, 0);
275    }
276
277    #[test]
278    fn budget_allocation_graph_disabled_no_graph_facts() {
279        let tc = TokenCounter::new();
280        let budget = ContextBudget::new(10_000, 0.20);
281        let alloc = budget.allocate("", "", &tc, false);
282        assert_eq!(alloc.graph_facts, 0);
283        assert_eq!(alloc.summaries, (8_000_f32 * 0.08) as usize);
284        assert_eq!(alloc.semantic_recall, (8_000_f32 * 0.08) as usize);
285    }
286
287    #[test]
288    fn budget_allocation_graph_enabled_allocates_4_percent() {
289        let tc = TokenCounter::new();
290        let budget = ContextBudget::new(10_000, 0.20).with_graph_enabled(true);
291        let alloc = budget.allocate("", "", &tc, true);
292        assert!(alloc.graph_facts > 0);
293        assert_eq!(alloc.summaries, (8_000_f32 * 0.07) as usize);
294        assert_eq!(alloc.graph_facts, (8_000_f32 * 0.04) as usize);
295    }
296
297    #[test]
298    fn budget_allocation_memory_first_zeroes_history() {
299        let tc = TokenCounter::new();
300        let budget = ContextBudget::new(10_000, 0.20);
301        let alloc = budget.allocate_with_opts("", "", &tc, false, 0, true);
302        assert_eq!(alloc.recent_history, 0);
303        assert!(alloc.summaries > 0);
304        assert!(alloc.semantic_recall > 0);
305    }
306}