llm_budget_window/
lib.rs

1//! # llm-budget-window
2//!
3//! Time-windowed token + USD budget for LLM calls.
4//!
5//! [`token-budget-pool`](https://crates.io/crates/token-budget-pool) caps
6//! total spend across concurrent tasks. This crate adds a time axis:
7//! cap spend per minute, per hour, per day, or any combination. Each
8//! recorded call is timestamped; older entries fall out of the window
9//! automatically.
10//!
11//! ## Quick example
12//!
13//! ```
14//! use std::time::Duration;
15//! use llm_budget_window::{BudgetWindows, Window, WindowBreached};
16//!
17//! let bw = BudgetWindows::new(vec![
18//!     Window::new("per_minute", Duration::from_secs(60))
19//!         .with_token_cap(50_000)
20//!         .with_usd_cap(1.0),
21//!     Window::new("per_hour", Duration::from_secs(3600))
22//!         .with_usd_cap(10.0),
23//! ]);
24//!
25//! // record consumption; raises if ANY window would breach
26//! bw.record(tokens(1000), usd(0.05)).unwrap();
27//!
28//! // for very cheap calls, both windows have plenty of room
29//! for _ in 0..50 {
30//!     let _ = bw.record(tokens(100), usd(0.001));
31//! }
32//!
33//! # fn tokens(n: u64) -> u64 { n }
34//! # fn usd(v: f64) -> f64 { v }
35//! ```
36//!
37//! ## Memory
38//!
39//! Each window keeps a `VecDeque` of (timestamp, tokens, usd) records.
40//! Old records age out on every `record()` and `snapshot()`. For very
41//! high call rates, set windows you actually need - a 1-day window
42//! holds every call from the last 24h.
43
44#![deny(missing_docs)]
45
46mod error;
47mod window;
48
49pub use error::WindowBreached;
50pub use window::{BudgetWindows, Window, WindowSnapshot};
llm_budget_window/lib.rs

llm_budget_window/
lib.rs