evolve_llm/cost.rs
1//! In-memory token cost tracker. Purely observational — no enforcement.
2
3use crate::TokenUsage;
4use std::sync::atomic::{AtomicU64, Ordering};
5
6/// Price table entry. Units: **micro-cents per token**, where 1 cent = 10 000
7/// micro-cents. Stored this way so per-token values are integers even for
8/// cheap models.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub struct Price {
11 /// Micro-cents per input token.
12 pub input_per_token: u64,
13 /// Micro-cents per output token.
14 pub output_per_token: u64,
15}
16
17impl Price {
18 /// Claude Haiku 4.5 public pricing: $0.25/M input, $1.25/M output.
19 ///
20 /// $0.25/M tokens = 25 cents/M = 250 000 micro-cents/M = 25 micro-cents per 100 tokens
21 /// = 0.25 micro-cents per token. We scale the *whole* per-M price up by 1e6 so the
22 /// per-token figure is integer: **2.5 micro-cents/token** for input → stored as 2.
23 /// For accuracy we use **$0.25/M = 2 500 micro-cents per 10 000 tokens**, i.e. we
24 /// charge in chunks. Simpler: bill on 1 000 tokens at a time by keeping the per-
25 /// 1M ratio. See `record_with_price` below.
26 pub const HAIKU: Self = Self {
27 // Using micro-cents per million tokens for precision, then dividing.
28 // $0.25/M = 25 cents/M = 250_000 micro-cents/M.
29 input_per_token: 250_000,
30 // $1.25/M = 125 cents/M = 1_250_000 micro-cents/M.
31 output_per_token: 1_250_000,
32 };
33
34 /// Ollama runs locally — free.
35 pub const OLLAMA: Self = Self {
36 input_per_token: 0,
37 output_per_token: 0,
38 };
39}
40
41/// Accumulates token usage and cost across calls. Thread-safe.
42///
43/// Cost is stored as **micro-cents** (1 cent = 10 000 micro-cents). The scaling
44/// factor is chosen so a single Haiku session (~1 000 tokens) costs on the
45/// order of a few hundred micro-cents — integer math without losing precision.
46#[derive(Debug, Default)]
47pub struct CostTracker {
48 input_tokens: AtomicU64,
49 output_tokens: AtomicU64,
50 /// Accumulated cost, scaled: `tokens * price.input_per_token / 1_000_000`.
51 /// We keep the un-divided product here and divide on read.
52 scaled_cost_micro_cents: AtomicU64,
53}
54
55impl CostTracker {
56 /// Fresh tracker at zero.
57 pub fn new() -> Self {
58 Self::default()
59 }
60
61 /// Record one call's usage under the given price table.
62 pub fn record(&self, usage: TokenUsage, price: Price) {
63 self.input_tokens
64 .fetch_add(usage.input as u64, Ordering::Relaxed);
65 self.output_tokens
66 .fetch_add(usage.output as u64, Ordering::Relaxed);
67 // price fields are micro-cents per million tokens.
68 // product = tokens * (mc/M) -> mc * tokens / M.
69 let product = (usage.input as u64).saturating_mul(price.input_per_token)
70 + (usage.output as u64).saturating_mul(price.output_per_token);
71 self.scaled_cost_micro_cents
72 .fetch_add(product, Ordering::Relaxed);
73 }
74
75 /// Total cost so far, in **micro-cents** (integer). Divide by 10 000 for cents.
76 pub fn spent_micro_cents(&self) -> u64 {
77 // Divide off the per-million scaling.
78 self.scaled_cost_micro_cents.load(Ordering::Relaxed) / 1_000_000
79 }
80
81 /// Total input tokens accumulated.
82 pub fn input_tokens(&self) -> u64 {
83 self.input_tokens.load(Ordering::Relaxed)
84 }
85
86 /// Total output tokens accumulated.
87 pub fn output_tokens(&self) -> u64 {
88 self.output_tokens.load(Ordering::Relaxed)
89 }
90
91 /// Emit a `tracing::info!` line summarizing accumulated cost.
92 pub fn log_session(&self) {
93 tracing::info!(
94 target: "evolve::cost",
95 input_tokens = self.input_tokens(),
96 output_tokens = self.output_tokens(),
97 micro_cents = self.spent_micro_cents(),
98 "evolve llm usage"
99 );
100 }
101}
102
103#[cfg(test)]
104mod tests {
105 use super::*;
106
107 #[test]
108 fn record_twice_accumulates() {
109 let tracker = CostTracker::new();
110 tracker.record(
111 TokenUsage {
112 input: 100,
113 output: 50,
114 },
115 Price::HAIKU,
116 );
117 tracker.record(
118 TokenUsage {
119 input: 200,
120 output: 75,
121 },
122 Price::HAIKU,
123 );
124 assert_eq!(tracker.input_tokens(), 300);
125 assert_eq!(tracker.output_tokens(), 125);
126 }
127
128 #[test]
129 fn haiku_cost_math_matches_hand_calc() {
130 let tracker = CostTracker::new();
131 // 1_000_000 input tokens at $0.25/M = 25 cents = 250_000 micro-cents.
132 tracker.record(
133 TokenUsage {
134 input: 1_000_000,
135 output: 0,
136 },
137 Price::HAIKU,
138 );
139 assert_eq!(tracker.spent_micro_cents(), 250_000);
140
141 let tracker2 = CostTracker::new();
142 // 1_000_000 output tokens at $1.25/M = 125 cents = 1_250_000 micro-cents.
143 tracker2.record(
144 TokenUsage {
145 input: 0,
146 output: 1_000_000,
147 },
148 Price::HAIKU,
149 );
150 assert_eq!(tracker2.spent_micro_cents(), 1_250_000);
151 }
152
153 #[test]
154 fn typical_challenger_call_cost_is_under_one_cent() {
155 // Realistic: ~500 input + 200 output tokens per challenger generation.
156 let tracker = CostTracker::new();
157 tracker.record(
158 TokenUsage {
159 input: 500,
160 output: 200,
161 },
162 Price::HAIKU,
163 );
164 // 500 * 250_000 / 1_000_000 = 125 mc input
165 // 200 * 1_250_000 / 1_000_000 = 250 mc output
166 // total = 375 micro-cents = 0.0375 cents = less than a penny.
167 assert_eq!(tracker.spent_micro_cents(), 375);
168 }
169
170 #[test]
171 fn ollama_records_zero_cost() {
172 let tracker = CostTracker::new();
173 tracker.record(
174 TokenUsage {
175 input: 10_000,
176 output: 10_000,
177 },
178 Price::OLLAMA,
179 );
180 assert_eq!(tracker.spent_micro_cents(), 0);
181 assert_eq!(tracker.input_tokens(), 10_000);
182 assert_eq!(tracker.output_tokens(), 10_000);
183 }
184}