a3s_code_core/budget.rs
1//! Budget / cost / quota contract for cluster-grade hosts.
2//!
3//! The framework does not enforce budgets itself — it only defines the
4//! decision points and emits structured events. The host (e.g. 书安OS)
5//! implements [`BudgetGuard`] with whatever backend it likes
6//! (per-tenant counters in Redis, per-day USD caps in Postgres, etc.)
7//! and plugs it into [`SessionOptions::with_budget_guard`].
8//!
9//! Decision points wired today:
10//!
11//! 1. **Before each LLM call** — [`BudgetGuard::check_before_llm`].
12//! A `Deny` aborts the call; a `SoftLimit` lets the call proceed but
13//! triggers an [`AgentEvent::BudgetThresholdHit`] so in-session
14//! policy (hooks, custom prompts) can react.
15//! 2. **After each LLM call** — [`BudgetGuard::record_after_llm`].
16//! The host updates its running spend total with the actual usage.
17//! 3. **Before each tool call** — [`BudgetGuard::check_before_tool`].
18//! Same decision shape; useful for capping expensive tools per
19//! tenant.
20//!
21//! The default trait methods are no-ops returning [`BudgetDecision::Allow`]
22//! so existing code is unaffected until a host plugs in a real impl.
23//!
24//! See [`AgentEvent::BudgetThresholdHit`](crate::agent::AgentEvent::BudgetThresholdHit)
25//! for the event vocabulary triggered by `SoftLimit`.
26
27use crate::llm::TokenUsage;
28use async_trait::async_trait;
29
30/// Outcome of a budget check.
31///
32/// The framework treats this purely as a decision — it never inspects
33/// the carried strings except to forward them to [`AgentEvent`]s and to
34/// the eventual error.
35///
36/// [`AgentEvent`]: crate::agent::AgentEvent
37#[derive(Debug, Clone)]
38pub enum BudgetDecision {
39 /// Operation proceeds normally. No event is emitted.
40 Allow,
41 /// Operation proceeds, but the framework emits a
42 /// [`AgentEvent::BudgetThresholdHit { kind: "soft", .. }`]
43 /// event before continuing. In-session hooks can react (e.g. trigger
44 /// auto-compact, swap to a cheaper model on next turn).
45 ///
46 /// [`AgentEvent::BudgetThresholdHit { kind: "soft", .. }`]: crate::agent::AgentEvent::BudgetThresholdHit
47 SoftLimit {
48 /// Logical resource label ("llm_tokens", "usd_cost", "wall_time", ...).
49 resource: String,
50 /// Current consumed amount (units depend on `resource`).
51 consumed: f64,
52 /// Threshold that was crossed.
53 limit: f64,
54 /// Optional human-readable explanation for logs / UI.
55 message: Option<String>,
56 },
57 /// Operation is refused. The framework returns
58 /// [`CodeError::BudgetExhausted`](crate::error::CodeError::BudgetExhausted)
59 /// from the LLM / tool entry point. The session itself stays open —
60 /// callers can re-try later or after the host has re-allocated
61 /// budget.
62 Deny {
63 /// Logical resource label that exhausted.
64 resource: String,
65 /// Human-readable reason surfaced in the error and in any
66 /// emitted `BudgetThresholdHit { kind: "hard", .. }` event.
67 reason: String,
68 },
69}
70
71/// Host-supplied budget / quota contract.
72///
73/// Implementations are typically wired up by a cluster control plane
74/// to enforce cross-session, cross-tenant cost limits. The framework
75/// itself ships only the no-op [`NoopBudgetGuard`].
76///
77/// All trait methods default to `Allow` / no-op so impls only need to
78/// override what they actually want to govern.
79#[async_trait]
80pub trait BudgetGuard: Send + Sync {
81 /// Called immediately before an LLM API call.
82 ///
83 /// `estimated_prompt_tokens` is a best-effort framework estimate
84 /// from the message history at call time; impls that want precise
85 /// accounting should use [`record_after_llm`](Self::record_after_llm)
86 /// instead of trusting the estimate.
87 async fn check_before_llm(
88 &self,
89 session_id: &str,
90 estimated_prompt_tokens: usize,
91 ) -> BudgetDecision {
92 let _ = (session_id, estimated_prompt_tokens);
93 BudgetDecision::Allow
94 }
95
96 /// Called after every successful LLM call with the actual usage
97 /// reported by the provider. Lets the impl keep its running spend
98 /// total in sync with reality.
99 ///
100 /// Failed LLM calls do not invoke this hook.
101 async fn record_after_llm(&self, session_id: &str, usage: &TokenUsage) {
102 let _ = (session_id, usage);
103 }
104
105 /// Called immediately before a tool invocation. The framework does
106 /// not pass tool arguments — impls that need argument-aware caps
107 /// must wrap the executor via a custom `ToolExecutor`.
108 async fn check_before_tool(&self, session_id: &str, tool_name: &str) -> BudgetDecision {
109 let _ = (session_id, tool_name);
110 BudgetDecision::Allow
111 }
112}
113
114/// Default implementation that always allows everything. Used when no
115/// host-supplied guard is configured.
116#[derive(Debug, Default, Clone, Copy)]
117pub struct NoopBudgetGuard;
118
119#[async_trait]
120impl BudgetGuard for NoopBudgetGuard {}
121
122#[cfg(test)]
123mod tests {
124 use super::*;
125 use std::sync::atomic::{AtomicUsize, Ordering};
126 use std::sync::Arc;
127
128 #[tokio::test]
129 async fn noop_allows_everything() {
130 let guard = NoopBudgetGuard;
131 assert!(matches!(
132 guard.check_before_llm("s", 1000).await,
133 BudgetDecision::Allow
134 ));
135 assert!(matches!(
136 guard.check_before_tool("s", "bash").await,
137 BudgetDecision::Allow
138 ));
139 // record is just observable side-effect; ensure it doesn't panic.
140 guard.record_after_llm("s", &TokenUsage::default()).await;
141 }
142
143 #[derive(Debug, Default)]
144 struct CountingGuard {
145 llm_checks: AtomicUsize,
146 records: AtomicUsize,
147 }
148
149 #[async_trait]
150 impl BudgetGuard for CountingGuard {
151 async fn check_before_llm(&self, _: &str, _: usize) -> BudgetDecision {
152 self.llm_checks.fetch_add(1, Ordering::SeqCst);
153 BudgetDecision::Deny {
154 resource: "llm_tokens".to_string(),
155 reason: "budget exhausted in test".to_string(),
156 }
157 }
158 async fn record_after_llm(&self, _: &str, _: &TokenUsage) {
159 self.records.fetch_add(1, Ordering::SeqCst);
160 }
161 }
162
163 #[tokio::test]
164 async fn custom_guard_can_deny() {
165 let guard: Arc<dyn BudgetGuard> = Arc::new(CountingGuard::default());
166 let decision = guard.check_before_llm("s", 100).await;
167 match decision {
168 BudgetDecision::Deny { resource, .. } => assert_eq!(resource, "llm_tokens"),
169 other => panic!("expected Deny, got {other:?}"),
170 }
171 }
172}