Skip to main content

a3s_code_core/
budget.rs

1//! Budget / cost / quota contract for cluster-grade hosts.
2//!
3//! The framework does not enforce budgets itself — it only defines the
4//! decision points and emits structured events. The host (e.g. 书安OS)
5//! implements [`BudgetGuard`] with whatever backend it likes
6//! (per-tenant counters in Redis, per-day USD caps in Postgres, etc.)
7//! and plugs it into [`SessionOptions::with_budget_guard`].
8//!
9//! Decision points wired today:
10//!
11//! 1. **Before each LLM call** — [`BudgetGuard::check_before_llm`].
12//!    A `Deny` aborts the call; a `SoftLimit` lets the call proceed but
13//!    triggers an [`AgentEvent::BudgetThresholdHit`] so in-session
14//!    policy (hooks, custom prompts) can react.
15//! 2. **After each LLM call** — [`BudgetGuard::record_after_llm`].
16//!    The host updates its running spend total with the actual usage.
17//! 3. **Before each tool call** — [`BudgetGuard::check_before_tool`].
18//!    Same decision shape; useful for capping expensive tools per
19//!    tenant.
20//!
21//! The default trait methods are no-ops returning [`BudgetDecision::Allow`]
22//! so existing code is unaffected until a host plugs in a real impl.
23//!
24//! See [`AgentEvent::BudgetThresholdHit`](crate::agent::AgentEvent::BudgetThresholdHit)
25//! for the event vocabulary triggered by `SoftLimit`.
26
27use crate::llm::TokenUsage;
28use async_trait::async_trait;
29
30/// Outcome of a budget check.
31///
32/// The framework treats this purely as a decision — it never inspects
33/// the carried strings except to forward them to [`AgentEvent`]s and to
34/// the eventual error.
35///
36/// [`AgentEvent`]: crate::agent::AgentEvent
37#[derive(Debug, Clone)]
38pub enum BudgetDecision {
39    /// Operation proceeds normally. No event is emitted.
40    Allow,
41    /// Operation proceeds, but the framework emits a
42    /// [`AgentEvent::BudgetThresholdHit { kind: "soft", .. }`]
43    /// event before continuing. In-session hooks can react (e.g. trigger
44    /// auto-compact, swap to a cheaper model on next turn).
45    ///
46    /// [`AgentEvent::BudgetThresholdHit { kind: "soft", .. }`]: crate::agent::AgentEvent::BudgetThresholdHit
47    SoftLimit {
48        /// Logical resource label ("llm_tokens", "usd_cost", "wall_time", ...).
49        resource: String,
50        /// Current consumed amount (units depend on `resource`).
51        consumed: f64,
52        /// Threshold that was crossed.
53        limit: f64,
54        /// Optional human-readable explanation for logs / UI.
55        message: Option<String>,
56    },
57    /// Operation is refused. The framework returns
58    /// [`CodeError::BudgetExhausted`](crate::error::CodeError::BudgetExhausted)
59    /// from the LLM / tool entry point. The session itself stays open —
60    /// callers can re-try later or after the host has re-allocated
61    /// budget.
62    Deny {
63        /// Logical resource label that exhausted.
64        resource: String,
65        /// Human-readable reason surfaced in the error and in any
66        /// emitted `BudgetThresholdHit { kind: "hard", .. }` event.
67        reason: String,
68    },
69}
70
71/// Host-supplied budget / quota contract.
72///
73/// Implementations are typically wired up by a cluster control plane
74/// to enforce cross-session, cross-tenant cost limits. The framework
75/// itself ships only the no-op [`NoopBudgetGuard`].
76///
77/// All trait methods default to `Allow` / no-op so impls only need to
78/// override what they actually want to govern.
79#[async_trait]
80pub trait BudgetGuard: Send + Sync {
81    /// Called immediately before an LLM API call.
82    ///
83    /// `estimated_prompt_tokens` is a best-effort framework estimate
84    /// from the message history at call time; impls that want precise
85    /// accounting should use [`record_after_llm`](Self::record_after_llm)
86    /// instead of trusting the estimate.
87    async fn check_before_llm(
88        &self,
89        session_id: &str,
90        estimated_prompt_tokens: usize,
91    ) -> BudgetDecision {
92        let _ = (session_id, estimated_prompt_tokens);
93        BudgetDecision::Allow
94    }
95
96    /// Called after every successful LLM call with the actual usage
97    /// reported by the provider. Lets the impl keep its running spend
98    /// total in sync with reality.
99    ///
100    /// Failed LLM calls do not invoke this hook.
101    async fn record_after_llm(&self, session_id: &str, usage: &TokenUsage) {
102        let _ = (session_id, usage);
103    }
104
105    /// Called immediately before a tool invocation. The framework does
106    /// not pass tool arguments — impls that need argument-aware caps
107    /// must wrap the executor via a custom `ToolExecutor`.
108    async fn check_before_tool(&self, session_id: &str, tool_name: &str) -> BudgetDecision {
109        let _ = (session_id, tool_name);
110        BudgetDecision::Allow
111    }
112}
113
114/// Default implementation that always allows everything. Used when no
115/// host-supplied guard is configured.
116#[derive(Debug, Default, Clone, Copy)]
117pub struct NoopBudgetGuard;
118
119#[async_trait]
120impl BudgetGuard for NoopBudgetGuard {}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125    use std::sync::atomic::{AtomicUsize, Ordering};
126    use std::sync::Arc;
127
128    #[tokio::test]
129    async fn noop_allows_everything() {
130        let guard = NoopBudgetGuard;
131        assert!(matches!(
132            guard.check_before_llm("s", 1000).await,
133            BudgetDecision::Allow
134        ));
135        assert!(matches!(
136            guard.check_before_tool("s", "bash").await,
137            BudgetDecision::Allow
138        ));
139        // record is just observable side-effect; ensure it doesn't panic.
140        guard.record_after_llm("s", &TokenUsage::default()).await;
141    }
142
143    #[derive(Debug, Default)]
144    struct CountingGuard {
145        llm_checks: AtomicUsize,
146        records: AtomicUsize,
147    }
148
149    #[async_trait]
150    impl BudgetGuard for CountingGuard {
151        async fn check_before_llm(&self, _: &str, _: usize) -> BudgetDecision {
152            self.llm_checks.fetch_add(1, Ordering::SeqCst);
153            BudgetDecision::Deny {
154                resource: "llm_tokens".to_string(),
155                reason: "budget exhausted in test".to_string(),
156            }
157        }
158        async fn record_after_llm(&self, _: &str, _: &TokenUsage) {
159            self.records.fetch_add(1, Ordering::SeqCst);
160        }
161    }
162
163    #[tokio::test]
164    async fn custom_guard_can_deny() {
165        let guard: Arc<dyn BudgetGuard> = Arc::new(CountingGuard::default());
166        let decision = guard.check_before_llm("s", 100).await;
167        match decision {
168            BudgetDecision::Deny { resource, .. } => assert_eq!(resource, "llm_tokens"),
169            other => panic!("expected Deny, got {other:?}"),
170        }
171    }
172}