1use std::collections::HashMap;
16
17use serde::{Deserialize, Serialize};
18
19use crate::agentlog::Record;
20use crate::diff::axes::{Axis, AxisStat, Flag};
21use crate::diff::bootstrap::{median, paired_ci};
22
23#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
25pub struct ModelPricing {
26 pub input: f64,
28 pub output: f64,
30 #[serde(default)]
34 pub cached_input: f64,
35 #[serde(default)]
39 pub cached_write_5m: f64,
40 #[serde(default)]
43 pub cached_write_1h: f64,
44 #[serde(default)]
47 pub reasoning: f64,
48 #[serde(default)]
52 pub batch_discount: f64,
53}
54
55impl ModelPricing {
56 pub fn simple(input: f64, output: f64) -> Self {
58 Self {
59 input,
60 output,
61 cached_input: 0.0,
62 cached_write_5m: 0.0,
63 cached_write_1h: 0.0,
64 reasoning: 0.0,
65 batch_discount: 0.0,
66 }
67 }
68}
69
70pub type Pricing = HashMap<String, ModelPricing>;
72
73fn lookup_with_snapshot_fallback<'a>(
79 pricing: &'a Pricing,
80 model: &str,
81) -> Option<&'a ModelPricing> {
82 if let Some(p) = pricing.get(model) {
83 return Some(p);
84 }
85 if let Some(base) = strip_snapshot_tail(model) {
86 return pricing.get(base);
87 }
88 None
89}
90
91fn strip_snapshot_tail(model: &str) -> Option<&str> {
94 let bytes = model.as_bytes();
95 if bytes.len() > 11 && bytes[bytes.len() - 11] == b'-' {
97 let tail = &bytes[bytes.len() - 10..];
98 if tail.len() == 10
99 && tail[..4].iter().all(u8::is_ascii_digit)
100 && tail[4] == b'-'
101 && tail[5..7].iter().all(u8::is_ascii_digit)
102 && tail[7] == b'-'
103 && tail[8..10].iter().all(u8::is_ascii_digit)
104 {
105 return Some(&model[..model.len() - 11]);
106 }
107 }
108 if bytes.len() > 9 && bytes[bytes.len() - 9] == b'-' {
110 let tail = &bytes[bytes.len() - 8..];
111 if tail.len() == 8 && tail.iter().all(u8::is_ascii_digit) {
112 return Some(&model[..model.len() - 9]);
113 }
114 }
115 None
116}
117
118pub(crate) fn cost_of(r: &Record, pricing: &Pricing) -> Option<f64> {
119 let model = r.payload.get("model")?.as_str()?;
120 let usage = r.payload.get("usage")?;
121 let input = usage.get("input_tokens")?.as_f64()?;
122 let output = usage.get("output_tokens")?.as_f64()?;
123 let cached_input = usage
124 .get("cached_input_tokens")
125 .and_then(|v| v.as_f64())
126 .unwrap_or(0.0);
127 let cached_write_5m = usage
128 .get("cached_write_5m_tokens")
129 .and_then(|v| v.as_f64())
130 .unwrap_or(0.0);
131 let cached_write_1h = usage
132 .get("cached_write_1h_tokens")
133 .and_then(|v| v.as_f64())
134 .unwrap_or(0.0);
135 let thinking = usage
136 .get("thinking_tokens")
137 .and_then(|v| v.as_f64())
138 .unwrap_or(0.0);
139 if !(input.is_finite()
143 && output.is_finite()
144 && cached_input.is_finite()
145 && cached_write_5m.is_finite()
146 && cached_write_1h.is_finite()
147 && thinking.is_finite())
148 {
149 return Some(0.0);
150 }
151 let Some(p) = lookup_with_snapshot_fallback(pricing, model) else {
152 return Some(0.0);
153 };
154 let cached_rate = if p.cached_input > 0.0 {
155 p.cached_input
156 } else {
157 p.input
158 };
159 let reasoning_rate = if p.reasoning > 0.0 {
160 p.reasoning
161 } else {
162 p.output
163 };
164 let write_5m_rate = if p.cached_write_5m > 0.0 {
166 p.cached_write_5m
167 } else {
168 p.input
169 };
170 let write_1h_rate = if p.cached_write_1h > 0.0 {
171 p.cached_write_1h
172 } else {
173 p.input
174 };
175 let mut cost = input * p.input
180 + cached_input * cached_rate
181 + cached_write_5m * write_5m_rate
182 + cached_write_1h * write_1h_rate
183 + output * p.output
184 + thinking * reasoning_rate;
185 let batch = r
189 .payload
190 .get("batch")
191 .and_then(|v| v.as_bool())
192 .unwrap_or(false);
193 if batch && p.batch_discount > 0.0 {
194 cost *= p.batch_discount;
195 }
196 Some(cost)
197}
198
199fn pair_is_priced(br: &Record, cr: &Record, pricing: &Pricing) -> bool {
205 fn model_in_table(r: &Record, pricing: &Pricing) -> bool {
206 r.payload
207 .get("model")
208 .and_then(|m| m.as_str())
209 .is_some_and(|m| lookup_with_snapshot_fallback(pricing, m).is_some())
210 }
211 model_in_table(br, pricing) && model_in_table(cr, pricing)
212}
213
214pub fn compute(pairs: &[(&Record, &Record)], pricing: &Pricing, seed: Option<u64>) -> AxisStat {
226 let mut b = Vec::with_capacity(pairs.len());
227 let mut c = Vec::with_capacity(pairs.len());
228 let mut priced_pairs = 0usize;
229 for (br, cr) in pairs {
230 if let (Some(bv), Some(cv)) = (cost_of(br, pricing), cost_of(cr, pricing)) {
231 b.push(bv);
232 c.push(cv);
233 if pair_is_priced(br, cr, pricing) {
234 priced_pairs += 1;
235 }
236 }
237 }
238 if b.is_empty() {
239 let mut stat = AxisStat::empty(Axis::Cost);
240 if !pairs.is_empty() {
244 stat.flags.push(Flag::NoPricing);
245 }
246 return stat;
247 }
248 let bm = median(&b);
249 let cm = median(&c);
250 let delta = cm - bm;
251 let ci = paired_ci(&b, &c, |bs, cs| median(cs) - median(bs), 0, seed);
252 let mut stat = AxisStat::new_value(Axis::Cost, bm, cm, delta, ci.low, ci.high, b.len());
253 if priced_pairs * 2 < pairs.len() {
258 stat.flags.push(Flag::NoPricing);
259 }
260 stat
261}
262
263#[cfg(test)]
264mod tests {
265 use super::*;
266 use crate::agentlog::Kind;
267 use crate::diff::axes::Severity;
268 use serde_json::json;
269
270 fn response(model: &str, input: u64, output: u64) -> Record {
271 Record::new(
272 Kind::ChatResponse,
273 json!({
274 "model": model,
275 "content": [],
276 "stop_reason": "end_turn",
277 "latency_ms": 0,
278 "usage": {"input_tokens": input, "output_tokens": output, "thinking_tokens": 0},
279 }),
280 "2026-04-21T10:00:00Z",
281 None,
282 )
283 }
284
285 fn response_with_usage(model: &str, usage: serde_json::Value) -> Record {
286 Record::new(
287 Kind::ChatResponse,
288 json!({
289 "model": model,
290 "content": [],
291 "stop_reason": "end_turn",
292 "latency_ms": 0,
293 "usage": usage,
294 }),
295 "2026-04-21T10:00:00Z",
296 None,
297 )
298 }
299
300 #[test]
301 fn pricing_lookup_drives_cost() {
302 let mut pricing = Pricing::new();
303 pricing.insert("opus".to_string(), ModelPricing::simple(0.000015, 0.000075));
304 pricing.insert(
305 "haiku".to_string(),
306 ModelPricing::simple(0.0000008, 0.000004),
307 );
308 let baseline: Vec<Record> = (0..10).map(|_| response("opus", 1000, 500)).collect();
309 let candidate: Vec<Record> = (0..10).map(|_| response("haiku", 1000, 500)).collect();
310 let pairs: Vec<(&Record, &Record)> = baseline.iter().zip(candidate.iter()).collect();
311 let stat = compute(&pairs, &pricing, Some(1));
312 assert!(stat.delta < 0.0);
313 assert_eq!(stat.severity, Severity::Severe);
314 }
315
316 #[test]
317 fn unknown_model_costs_zero() {
318 let pricing = Pricing::new();
319 let r = response("mystery", 1000, 500);
320 let pairs = [(&r, &r)];
321 let stat = compute(&pairs, &pricing, Some(1));
322 assert_eq!(stat.baseline_median, 0.0);
323 }
324
325 #[test]
326 fn no_pricing_flag_when_table_is_empty_but_pairs_exist() {
327 let pricing = Pricing::new();
330 let r = response("mystery", 1000, 500);
331 let pairs = [(&r, &r), (&r, &r), (&r, &r)];
332 let stat = compute(&pairs, &pricing, Some(1));
333 assert!(stat.flags.contains(&Flag::NoPricing));
334 assert_eq!(stat.delta, 0.0);
335 }
336
337 #[test]
338 fn no_pricing_flag_when_most_models_unpriced() {
339 let mut pricing = Pricing::new();
342 pricing.insert("opus".to_string(), ModelPricing::simple(0.000015, 0.000075));
343 let priced = response("opus", 1000, 500);
344 let unpriced1 = response("sonnet-unlisted", 1000, 500);
345 let unpriced2 = response("gpt-x-unlisted", 1000, 500);
346 let pairs = [
347 (&priced, &priced),
348 (&unpriced1, &unpriced1),
349 (&unpriced2, &unpriced2),
350 ];
351 let stat = compute(&pairs, &pricing, Some(1));
352 assert!(stat.flags.contains(&Flag::NoPricing));
354 }
355
356 #[test]
357 fn no_pricing_flag_absent_when_all_pairs_priced() {
358 let mut pricing = Pricing::new();
359 pricing.insert("opus".to_string(), ModelPricing::simple(0.000015, 0.000075));
360 let r = response("opus", 1000, 500);
361 let pairs = [(&r, &r), (&r, &r)];
362 let stat = compute(&pairs, &pricing, Some(1));
363 assert!(!stat.flags.contains(&Flag::NoPricing));
364 }
365
366 #[test]
367 fn no_pricing_flag_absent_when_pairs_empty() {
368 let pricing = Pricing::new();
372 let pairs: Vec<(&Record, &Record)> = Vec::new();
373 let stat = compute(&pairs, &pricing, Some(1));
374 assert!(!stat.flags.contains(&Flag::NoPricing));
375 assert_eq!(stat.n, 0);
376 }
377
378 #[test]
379 fn cached_input_tokens_billed_at_cheaper_rate() {
380 let mut pricing = Pricing::new();
382 pricing.insert(
383 "opus".to_string(),
384 ModelPricing {
385 input: 0.000015,
386 output: 0.000075,
387 cached_input: 0.0000015, cached_write_5m: 0.0,
389 cached_write_1h: 0.0,
390 reasoning: 0.0,
391 batch_discount: 0.0,
392 },
393 );
394 let r = response_with_usage(
395 "opus",
396 json!({
397 "input_tokens": 1000,
398 "output_tokens": 500,
399 "thinking_tokens": 0,
400 "cached_input_tokens": 1000,
401 }),
402 );
403 let pairs = [(&r, &r)];
404 let stat = compute(&pairs, &pricing, Some(1));
405 assert!((stat.baseline_median - 0.054).abs() < 1e-9);
407 }
408
409 #[test]
410 fn reasoning_tokens_billed_at_reasoning_rate() {
411 let mut pricing = Pricing::new();
412 pricing.insert(
413 "gpt-5".to_string(),
414 ModelPricing {
415 input: 0.000010,
416 output: 0.000040,
417 cached_input: 0.0,
418 cached_write_5m: 0.0,
419 cached_write_1h: 0.0,
420 reasoning: 0.000060, batch_discount: 0.0,
422 },
423 );
424 let r = response_with_usage(
425 "gpt-5",
426 json!({
427 "input_tokens": 100,
428 "output_tokens": 100,
429 "thinking_tokens": 500,
430 }),
431 );
432 let pairs = [(&r, &r)];
433 let stat = compute(&pairs, &pricing, Some(1));
434 assert!((stat.baseline_median - 0.035).abs() < 1e-6);
436 }
437
438 #[test]
439 fn anthropic_cache_write_tiers_are_billed_separately() {
440 let mut pricing = Pricing::new();
442 pricing.insert(
443 "opus".to_string(),
444 ModelPricing {
445 input: 0.000015,
446 output: 0.000075,
447 cached_input: 0.0000015,
448 cached_write_5m: 0.00001875,
449 cached_write_1h: 0.00003,
450 reasoning: 0.0,
451 batch_discount: 0.0,
452 },
453 );
454 let r = Record::new(
455 Kind::ChatResponse,
456 json!({
457 "model": "opus",
458 "content": [],
459 "stop_reason": "end_turn",
460 "latency_ms": 0,
461 "usage": {
462 "input_tokens": 1000,
463 "output_tokens": 200,
464 "thinking_tokens": 0,
465 "cached_input_tokens": 500,
466 "cached_write_5m_tokens": 200,
467 "cached_write_1h_tokens": 100,
468 },
469 }),
470 "2026-04-21T10:00:00Z",
471 None,
472 );
473 let pairs = [(&r, &r)];
474 let stat = compute(&pairs, &pricing, Some(1));
475 assert!(
483 (stat.baseline_median - 0.0375).abs() < 1e-6,
484 "got {}",
485 stat.baseline_median
486 );
487 }
488
489 #[test]
490 fn nan_usage_values_produce_zero_cost_not_phantom_inf() {
491 let mut pricing = Pricing::new();
492 pricing.insert("m".to_string(), ModelPricing::simple(0.001, 0.002));
493 let r = Record::new(
494 Kind::ChatResponse,
495 json!({
496 "model": "m",
497 "content": [],
498 "stop_reason": "end_turn",
499 "latency_ms": 0,
500 "usage": {
501 "input_tokens": 100.0,
502 "output_tokens": 100.0,
503 "thinking_tokens": 0,
504 "cached_input_tokens": f64::NAN,
505 },
506 }),
507 "2026-04-21T10:00:00Z",
508 None,
509 );
510 let pairs = [(&r, &r)];
511 let stat = compute(&pairs, &pricing, Some(1));
512 assert!(stat.baseline_median.is_finite());
514 assert_eq!(stat.severity, Severity::None);
515 }
516
517 #[test]
518 fn batch_flag_applies_discount() {
519 let mut pricing = Pricing::new();
520 pricing.insert(
521 "opus".to_string(),
522 ModelPricing {
523 input: 0.000015,
524 output: 0.000075,
525 cached_input: 0.0,
526 cached_write_5m: 0.0,
527 cached_write_1h: 0.0,
528 reasoning: 0.0,
529 batch_discount: 0.5, },
531 );
532 let batched = Record::new(
533 Kind::ChatResponse,
534 json!({
535 "model": "opus",
536 "content": [],
537 "stop_reason": "end_turn",
538 "latency_ms": 0,
539 "batch": true,
540 "usage": {"input_tokens": 1000, "output_tokens": 500, "thinking_tokens": 0},
541 }),
542 "2026-04-21T10:00:00Z",
543 None,
544 );
545 let non_batched = response("opus", 1000, 500);
546 let pairs_batched = [(&batched, &batched)];
547 let pairs_normal = [(&non_batched, &non_batched)];
548 let stat_b = compute(&pairs_batched, &pricing, Some(1));
549 let stat_n = compute(&pairs_normal, &pricing, Some(1));
550 assert!((stat_b.baseline_median - stat_n.baseline_median * 0.5).abs() < 1e-9);
551 }
552
553 #[test]
554 fn snapshot_tail_strips_iso_dates() {
555 assert_eq!(strip_snapshot_tail("gpt-5-2025-08-07"), Some("gpt-5"));
556 assert_eq!(
557 strip_snapshot_tail("gpt-4o-mini-2024-07-18"),
558 Some("gpt-4o-mini"),
559 );
560 assert_eq!(
562 strip_snapshot_tail("claude-opus-4-7-20250219"),
563 Some("claude-opus-4-7"),
564 );
565 assert_eq!(strip_snapshot_tail("gpt-5"), None);
567 assert_eq!(strip_snapshot_tail("gpt-4o-mini"), None);
568 assert_eq!(strip_snapshot_tail("o1"), None);
570 }
571
572 #[test]
573 fn cost_resolves_dated_snapshot_to_bare_alias() {
574 let mut pricing = Pricing::new();
578 pricing.insert(
579 "gpt-5".to_string(),
580 ModelPricing {
581 input: 0.000010,
582 output: 0.000040,
583 cached_input: 0.0,
584 cached_write_5m: 0.0,
585 cached_write_1h: 0.0,
586 reasoning: 0.0,
587 batch_discount: 0.0,
588 },
589 );
590 let r = response("gpt-5-2025-08-07", 100, 50);
591 let cost = cost_of(&r, &pricing).unwrap();
592 assert!((cost - 0.003).abs() < 1e-9, "got {}", cost);
594 let pairs = [(&r, &r)];
596 let stat = compute(&pairs, &pricing, Some(42));
597 assert!(
598 !stat.flags.contains(&Flag::NoPricing),
599 "pair_is_priced should accept dated snapshots"
600 );
601 }
602}