pub trait TokenBudget: Send + Sync {
// Required methods
fn try_reserve_tokens<'life0, 'async_trait>(
&'life0 self,
est: u64,
) -> Pin<Box<dyn Future<Output = Result<Option<TokenReservation>, BudgetError>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn record_usage<'life0, 'async_trait>(
&'life0 self,
reservation: TokenReservation,
prompt: u64,
completion: u64,
) -> Pin<Box<dyn Future<Output = ()> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn tokens_consumed<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = u64> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
}Expand description
Soft-cap on cumulative LLM token spend.
BudgetGuard constrains unit-cost work (rows, dispatches);
TokenBudget constrains tokens burned by a model call. A single
prompt is typically a handful of tokens; a multi-round tool-call
loop can be 10–100× larger.
Implementations are expected to be cheap and lock-free in the hot
path. TokenBudget::try_reserve_tokens is called before a
prompt is sent; TokenBudget::record_usage is called after with
the observed totals from the provider so the next reservation
reflects reality.
Required Methods§
Sourcefn try_reserve_tokens<'life0, 'async_trait>(
&'life0 self,
est: u64,
) -> Pin<Box<dyn Future<Output = Result<Option<TokenReservation>, BudgetError>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn try_reserve_tokens<'life0, 'async_trait>(
&'life0 self,
est: u64,
) -> Pin<Box<dyn Future<Output = Result<Option<TokenReservation>, BudgetError>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Reserve est prompt+completion tokens optimistically.
Returns Ok(Some(reservation)) on success and Ok(None) on soft
denial.
Sourcefn record_usage<'life0, 'async_trait>(
&'life0 self,
reservation: TokenReservation,
prompt: u64,
completion: u64,
) -> Pin<Box<dyn Future<Output = ()> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn record_usage<'life0, 'async_trait>(
&'life0 self,
reservation: TokenReservation,
prompt: u64,
completion: u64,
) -> Pin<Box<dyn Future<Output = ()> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Record the actual prompt + completion token usage from a finished call. The implementation reconciles the supplied reservation against the observed usage.