use zeph_memory::TokenCounter;
#[derive(Debug, Clone)]
pub struct BudgetAllocation {
pub system_prompt: usize,
pub skills: usize,
pub summaries: usize,
pub semantic_recall: usize,
pub cross_session: usize,
pub code_context: usize,
pub graph_facts: usize,
pub recent_history: usize,
pub response_reserve: usize,
pub session_digest: usize,
}
impl BudgetAllocation {
#[must_use]
pub fn active_sources(&self) -> usize {
[
self.summaries,
self.semantic_recall,
self.cross_session,
self.code_context,
self.graph_facts,
]
.iter()
.filter(|&&t| t > 0)
.count()
}
}
#[derive(Debug, Clone)]
pub struct ContextBudget {
max_tokens: usize,
reserve_ratio: f32,
pub(crate) graph_enabled: bool,
}
impl ContextBudget {
#[must_use]
pub fn new(max_tokens: usize, reserve_ratio: f32) -> Self {
Self {
max_tokens,
reserve_ratio,
graph_enabled: false,
}
}
#[must_use]
pub fn with_graph_enabled(mut self, enabled: bool) -> Self {
self.graph_enabled = enabled;
self
}
#[must_use]
pub fn max_tokens(&self) -> usize {
self.max_tokens
}
#[must_use]
pub fn allocate(
&self,
system_prompt: &str,
skills_prompt: &str,
tc: &TokenCounter,
graph_enabled: bool,
) -> BudgetAllocation {
self.allocate_with_opts(system_prompt, skills_prompt, tc, graph_enabled, 0, false)
}
#[must_use]
#[allow(
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss
)]
pub fn allocate_with_opts(
&self,
system_prompt: &str,
skills_prompt: &str,
tc: &TokenCounter,
graph_enabled: bool,
digest_tokens: usize,
memory_first: bool,
) -> BudgetAllocation {
if self.max_tokens == 0 {
return BudgetAllocation {
system_prompt: 0,
skills: 0,
summaries: 0,
semantic_recall: 0,
cross_session: 0,
code_context: 0,
graph_facts: 0,
recent_history: 0,
response_reserve: 0,
session_digest: 0,
};
}
let response_reserve = (self.max_tokens as f32 * self.reserve_ratio) as usize;
let mut available = self.max_tokens.saturating_sub(response_reserve);
let system_prompt_tokens = tc.count_tokens(system_prompt);
let skills_tokens = tc.count_tokens(skills_prompt);
available = available.saturating_sub(system_prompt_tokens + skills_tokens);
let session_digest = digest_tokens.min(available);
available = available.saturating_sub(session_digest);
let (summaries, semantic_recall, cross_session, code_context, graph_facts, recent_history) =
if memory_first {
if graph_enabled {
(
(available as f32 * 0.22) as usize,
(available as f32 * 0.22) as usize,
(available as f32 * 0.12) as usize,
(available as f32 * 0.38) as usize,
(available as f32 * 0.06) as usize,
0,
)
} else {
(
(available as f32 * 0.25) as usize,
(available as f32 * 0.25) as usize,
(available as f32 * 0.15) as usize,
(available as f32 * 0.35) as usize,
0,
0,
)
}
} else if graph_enabled {
(
(available as f32 * 0.07) as usize,
(available as f32 * 0.07) as usize,
(available as f32 * 0.03) as usize,
(available as f32 * 0.29) as usize,
(available as f32 * 0.04) as usize,
(available as f32 * 0.50) as usize,
)
} else {
(
(available as f32 * 0.08) as usize,
(available as f32 * 0.08) as usize,
(available as f32 * 0.04) as usize,
(available as f32 * 0.30) as usize,
0,
(available as f32 * 0.50) as usize,
)
};
BudgetAllocation {
system_prompt: system_prompt_tokens,
skills: skills_tokens,
summaries,
semantic_recall,
cross_session,
code_context,
graph_facts,
recent_history,
response_reserve,
session_digest,
}
}
}
#[cfg(test)]
mod tests {
#![allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
use super::*;
#[test]
fn context_budget_max_tokens_accessor() {
let budget = ContextBudget::new(1000, 0.2);
assert_eq!(budget.max_tokens(), 1000);
}
#[test]
fn budget_allocation_basic() {
let budget = ContextBudget::new(1000, 0.20);
let tc = TokenCounter::new();
let alloc = budget.allocate("system prompt", "skills prompt", &tc, false);
assert_eq!(alloc.response_reserve, 200);
assert!(alloc.system_prompt > 0);
assert!(alloc.skills > 0);
assert!(alloc.summaries > 0);
assert!(alloc.semantic_recall > 0);
assert!(alloc.recent_history > 0);
}
#[test]
fn budget_allocation_zero_disables() {
let tc = TokenCounter::new();
let budget = ContextBudget::new(0, 0.20);
let alloc = budget.allocate("test", "test", &tc, false);
assert_eq!(alloc.system_prompt, 0);
assert_eq!(alloc.skills, 0);
assert_eq!(alloc.summaries, 0);
assert_eq!(alloc.recent_history, 0);
}
#[test]
fn budget_allocation_graph_disabled_no_graph_facts() {
let tc = TokenCounter::new();
let budget = ContextBudget::new(10_000, 0.20);
let alloc = budget.allocate("", "", &tc, false);
assert_eq!(alloc.graph_facts, 0);
assert_eq!(alloc.summaries, (8_000_f32 * 0.08) as usize);
assert_eq!(alloc.semantic_recall, (8_000_f32 * 0.08) as usize);
}
#[test]
fn budget_allocation_graph_enabled_allocates_4_percent() {
let tc = TokenCounter::new();
let budget = ContextBudget::new(10_000, 0.20).with_graph_enabled(true);
let alloc = budget.allocate("", "", &tc, true);
assert!(alloc.graph_facts > 0);
assert_eq!(alloc.summaries, (8_000_f32 * 0.07) as usize);
assert_eq!(alloc.graph_facts, (8_000_f32 * 0.04) as usize);
}
#[test]
fn budget_allocation_memory_first_zeroes_history() {
let tc = TokenCounter::new();
let budget = ContextBudget::new(10_000, 0.20);
let alloc = budget.allocate_with_opts("", "", &tc, false, 0, true);
assert_eq!(alloc.recent_history, 0);
assert!(alloc.summaries > 0);
assert!(alloc.semantic_recall > 0);
}
}