Skip to main content

lean_ctx/proxy/
cost.rs

1//! Per-model proxy savings accounting.
2//!
3//! Headroom's per-model cost breakdown was the one metric a tester found clearer
4//! than lean-ctx's single flat number. This module buckets request-side savings
5//! by model and prices them with the shared [`ModelPricing`] table so `/status`
6//! can report estimated USD avoided per model.
7//!
8//! Honesty contract: token counts here are request-side *estimates* (the bytes
9//! the proxy removed before forwarding). They deliberately do NOT try to model
10//! re-reads the agent may perform later, so figures are conservative and
11//! labelled `estimated` in the output.
12
13use std::collections::HashMap;
14use std::sync::{Mutex, OnceLock};
15
16use serde::Serialize;
17
18use crate::core::gain::model_pricing::{ModelPricing, PricingMatchKind};
19
20#[derive(Default, Clone)]
21struct ModelAccum {
22    requests: u64,
23    tokens_saved: u64,
24    bytes_original: u64,
25    bytes_compressed: u64,
26}
27
28/// One model's aggregated, priced savings for the `/status` endpoint.
29#[derive(Debug, Clone, Serialize, PartialEq)]
30pub struct ModelStat {
31    pub model: String,
32    pub requests: u64,
33    pub tokens_saved: u64,
34    pub usd_saved: f64,
35    /// True when the price came from a fallback/heuristic match, not an exact one.
36    pub pricing_estimated: bool,
37}
38
39fn store() -> &'static Mutex<HashMap<String, ModelAccum>> {
40    static STORE: OnceLock<Mutex<HashMap<String, ModelAccum>>> = OnceLock::new();
41    STORE.get_or_init(|| Mutex::new(HashMap::new()))
42}
43
44/// Records one request's request-side savings against its model bucket.
45///
46/// `model` is taken from the request body (`None`/empty buckets under
47/// `"unknown"`). Recording never blocks request handling on poisoning.
48pub fn record(model: Option<&str>, tokens_saved: u64, bytes_original: u64, bytes_compressed: u64) {
49    let key = model
50        .map(str::trim)
51        .filter(|m| !m.is_empty())
52        .unwrap_or("unknown")
53        .to_string();
54
55    let mut map = store()
56        .lock()
57        .unwrap_or_else(std::sync::PoisonError::into_inner);
58    let acc = map.entry(key).or_default();
59    acc.requests += 1;
60    acc.tokens_saved += tokens_saved;
61    acc.bytes_original += bytes_original;
62    acc.bytes_compressed += bytes_compressed;
63}
64
65/// Returns per-model stats, priced and sorted by USD saved (descending).
66pub fn snapshot() -> Vec<ModelStat> {
67    let pricing = ModelPricing::load();
68    let map = store()
69        .lock()
70        .unwrap_or_else(std::sync::PoisonError::into_inner);
71
72    let mut stats: Vec<ModelStat> = map
73        .iter()
74        .map(|(model, acc)| {
75            let quote = pricing.quote(Some(model));
76            // Compression removes *input* tokens, so price against the input rate.
77            let usd_saved = acc.tokens_saved as f64 / 1_000_000.0 * quote.cost.input_per_m;
78            let pricing_estimated = !matches!(quote.match_kind, PricingMatchKind::Exact);
79            ModelStat {
80                model: model.clone(),
81                requests: acc.requests,
82                tokens_saved: acc.tokens_saved,
83                usd_saved,
84                pricing_estimated,
85            }
86        })
87        .collect();
88
89    stats.sort_by(|a, b| {
90        b.usd_saved
91            .partial_cmp(&a.usd_saved)
92            .unwrap_or(std::cmp::Ordering::Equal)
93    });
94    stats
95}
96
97#[cfg(test)]
98mod tests {
99    use super::*;
100
101    #[test]
102    fn unknown_model_buckets_and_prices_without_panic() {
103        record(None, 1000, 4000, 0);
104        record(Some("   "), 500, 2000, 0);
105        let stats = snapshot();
106        let unknown = stats.iter().find(|s| s.model == "unknown");
107        assert!(
108            unknown.is_some(),
109            "blank/None models bucket under 'unknown'"
110        );
111        assert!(unknown.unwrap().requests >= 2);
112    }
113
114    #[test]
115    fn known_model_yields_positive_usd() {
116        record(
117            Some("claude-opus-4-8-zzz-cost-test"),
118            2_000_000,
119            8_000_000,
120            100,
121        );
122        let stats = snapshot();
123        let row = stats
124            .iter()
125            .find(|s| s.model.contains("opus-4-8-zzz-cost-test"))
126            .expect("recorded model present");
127        // Fallback pricing still produces a finite, non-negative estimate.
128        assert!(row.usd_saved >= 0.0 && row.usd_saved.is_finite());
129        assert_eq!(row.tokens_saved, 2_000_000);
130    }
131}