Skip to main content

evolve_llm/
cost.rs

1//! In-memory token cost tracker. Purely observational — no enforcement.
2
3use crate::TokenUsage;
4use std::sync::atomic::{AtomicU64, Ordering};
5
6/// Price table entry. Units: **micro-cents per token**, where 1 cent = 10 000
7/// micro-cents. Stored this way so per-token values are integers even for
8/// cheap models.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub struct Price {
11    /// Micro-cents per input token.
12    pub input_per_token: u64,
13    /// Micro-cents per output token.
14    pub output_per_token: u64,
15}
16
17impl Price {
18    /// Claude Haiku 4.5 public pricing: $0.25/M input, $1.25/M output.
19    ///
20    /// $0.25/M tokens = 25 cents/M = 250 000 micro-cents/M = 25 micro-cents per 100 tokens
21    /// = 0.25 micro-cents per token. We scale the *whole* per-M price up by 1e6 so the
22    /// per-token figure is integer: **2.5 micro-cents/token** for input → stored as 2.
23    /// For accuracy we use **$0.25/M = 2 500 micro-cents per 10 000 tokens**, i.e. we
24    /// charge in chunks. Simpler: bill on 1 000 tokens at a time by keeping the per-
25    /// 1M ratio. See `record_with_price` below.
26    pub const HAIKU: Self = Self {
27        // Using micro-cents per million tokens for precision, then dividing.
28        // $0.25/M = 25 cents/M = 250_000 micro-cents/M.
29        input_per_token: 250_000,
30        // $1.25/M = 125 cents/M = 1_250_000 micro-cents/M.
31        output_per_token: 1_250_000,
32    };
33
34    /// Ollama runs locally — free.
35    pub const OLLAMA: Self = Self {
36        input_per_token: 0,
37        output_per_token: 0,
38    };
39}
40
41/// Accumulates token usage and cost across calls. Thread-safe.
42///
43/// Cost is stored as **micro-cents** (1 cent = 10 000 micro-cents). The scaling
44/// factor is chosen so a single Haiku session (~1 000 tokens) costs on the
45/// order of a few hundred micro-cents — integer math without losing precision.
46#[derive(Debug, Default)]
47pub struct CostTracker {
48    input_tokens: AtomicU64,
49    output_tokens: AtomicU64,
50    /// Accumulated cost, scaled: `tokens * price.input_per_token / 1_000_000`.
51    /// We keep the un-divided product here and divide on read.
52    scaled_cost_micro_cents: AtomicU64,
53}
54
55impl CostTracker {
56    /// Fresh tracker at zero.
57    pub fn new() -> Self {
58        Self::default()
59    }
60
61    /// Record one call's usage under the given price table.
62    pub fn record(&self, usage: TokenUsage, price: Price) {
63        self.input_tokens
64            .fetch_add(usage.input as u64, Ordering::Relaxed);
65        self.output_tokens
66            .fetch_add(usage.output as u64, Ordering::Relaxed);
67        // price fields are micro-cents per million tokens.
68        // product = tokens * (mc/M) -> mc * tokens / M.
69        let product = (usage.input as u64).saturating_mul(price.input_per_token)
70            + (usage.output as u64).saturating_mul(price.output_per_token);
71        self.scaled_cost_micro_cents
72            .fetch_add(product, Ordering::Relaxed);
73    }
74
75    /// Total cost so far, in **micro-cents** (integer). Divide by 10 000 for cents.
76    pub fn spent_micro_cents(&self) -> u64 {
77        // Divide off the per-million scaling.
78        self.scaled_cost_micro_cents.load(Ordering::Relaxed) / 1_000_000
79    }
80
81    /// Total input tokens accumulated.
82    pub fn input_tokens(&self) -> u64 {
83        self.input_tokens.load(Ordering::Relaxed)
84    }
85
86    /// Total output tokens accumulated.
87    pub fn output_tokens(&self) -> u64 {
88        self.output_tokens.load(Ordering::Relaxed)
89    }
90
91    /// Emit a `tracing::info!` line summarizing accumulated cost.
92    pub fn log_session(&self) {
93        tracing::info!(
94            target: "evolve::cost",
95            input_tokens = self.input_tokens(),
96            output_tokens = self.output_tokens(),
97            micro_cents = self.spent_micro_cents(),
98            "evolve llm usage"
99        );
100    }
101}
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106
107    #[test]
108    fn record_twice_accumulates() {
109        let tracker = CostTracker::new();
110        tracker.record(
111            TokenUsage {
112                input: 100,
113                output: 50,
114            },
115            Price::HAIKU,
116        );
117        tracker.record(
118            TokenUsage {
119                input: 200,
120                output: 75,
121            },
122            Price::HAIKU,
123        );
124        assert_eq!(tracker.input_tokens(), 300);
125        assert_eq!(tracker.output_tokens(), 125);
126    }
127
128    #[test]
129    fn haiku_cost_math_matches_hand_calc() {
130        let tracker = CostTracker::new();
131        // 1_000_000 input tokens at $0.25/M = 25 cents = 250_000 micro-cents.
132        tracker.record(
133            TokenUsage {
134                input: 1_000_000,
135                output: 0,
136            },
137            Price::HAIKU,
138        );
139        assert_eq!(tracker.spent_micro_cents(), 250_000);
140
141        let tracker2 = CostTracker::new();
142        // 1_000_000 output tokens at $1.25/M = 125 cents = 1_250_000 micro-cents.
143        tracker2.record(
144            TokenUsage {
145                input: 0,
146                output: 1_000_000,
147            },
148            Price::HAIKU,
149        );
150        assert_eq!(tracker2.spent_micro_cents(), 1_250_000);
151    }
152
153    #[test]
154    fn typical_challenger_call_cost_is_under_one_cent() {
155        // Realistic: ~500 input + 200 output tokens per challenger generation.
156        let tracker = CostTracker::new();
157        tracker.record(
158            TokenUsage {
159                input: 500,
160                output: 200,
161            },
162            Price::HAIKU,
163        );
164        // 500 * 250_000 / 1_000_000 = 125 mc input
165        // 200 * 1_250_000 / 1_000_000 = 250 mc output
166        // total = 375 micro-cents = 0.0375 cents = less than a penny.
167        assert_eq!(tracker.spent_micro_cents(), 375);
168    }
169
170    #[test]
171    fn ollama_records_zero_cost() {
172        let tracker = CostTracker::new();
173        tracker.record(
174            TokenUsage {
175                input: 10_000,
176                output: 10_000,
177            },
178            Price::OLLAMA,
179        );
180        assert_eq!(tracker.spent_micro_cents(), 0);
181        assert_eq!(tracker.input_tokens(), 10_000);
182        assert_eq!(tracker.output_tokens(), 10_000);
183    }
184}