codetether_agent/provider/pricing.rs
1//! Canonical per-million-token pricing for known LLM models.
2//!
3//! This is the **single source of truth** for model pricing. Every
4//! subsystem (TUI cost badge, cost guardrail, benchmark runner) must
5//! delegate here rather than maintaining its own table.
6//!
7//! Prices are best-effort retail USD per 1M tokens `(input, output)`.
8//! Unknown models fall back to a conservative default.
9
10/// Return `(input_price_per_million, output_price_per_million)` in USD
11/// for a given model identifier. Matching is case-insensitive substring.
12///
13/// # Examples
14///
15/// ```rust
16/// use codetether_agent::provider::pricing::pricing_for_model;
17///
18/// let (input, output) = pricing_for_model("claude-opus-4-7");
19/// assert!(input > 0.0 && output > 0.0);
20/// ```
21pub fn pricing_for_model(model: &str) -> (f64, f64) {
22 let m = model.to_ascii_lowercase();
23
24 // ── Most specific patterns first ───────────────────────────────
25 if m.contains("gpt-4o-mini") {
26 (0.15, 0.60)
27 } else if m.contains("gpt-4o") {
28 (2.50, 10.00)
29 } else if m.contains("gpt-4-turbo") {
30 (10.00, 30.00)
31 } else if m.contains("gpt-5") {
32 (5.00, 15.00)
33 } else if m.contains("gpt-4") {
34 (30.00, 60.00)
35 } else if m.contains("claude-3-5-sonnet") || m.contains("claude-sonnet-4") {
36 (3.00, 15.00)
37 } else if m.contains("claude-3-5-haiku") || m.contains("claude-haiku") {
38 (0.80, 4.00)
39 } else if m.contains("claude-opus") {
40 // Anthropic Opus 4.x retail: $15 in / $75 out per million.
41 (15.00, 75.00)
42 } else if m.contains("gemini-2.5-pro") || m.contains("gemini-2-pro") {
43 (1.25, 10.00)
44 } else if m.contains("gemini-1.5-pro") {
45 (1.25, 5.00)
46 } else if m.contains("gemini") {
47 (0.075, 0.30)
48 } else if m.contains("glm-5") || m.contains("glm-4") {
49 (0.50, 1.50)
50 } else if m.contains("kimi") || m.contains("k1.6") {
51 (6.00, 6.00)
52 } else if m.contains("k1.5") {
53 (8.00, 8.00)
54 } else {
55 // Conservative default for unknown models.
56 (1.00, 3.00)
57 }
58}
59
60/// Multiplier applied to `input_price` for cache-read tokens. Varies by
61/// provider family:
62///
63/// * Anthropic / Bedrock Claude — 10% (0.1×)
64/// * OpenAI (GPT-4o, GPT-5, Codex) — 50% (0.5×)
65/// * Google Gemini — 25% (0.25×)
66/// * Z.AI GLM / Moonshot — 20% (0.2×) *(best-effort default)*
67///
68/// Returns `0.1` as a conservative default so pricing never silently
69/// over-counts for an unknown model.
70pub fn cache_read_multiplier(model: &str) -> f64 {
71 let m = model.to_ascii_lowercase();
72 if m.contains("claude") || m.contains("anthropic") {
73 0.10
74 } else if m.contains("gpt-") || m.contains("codex") || m.contains("o1") || m.contains("o3") {
75 0.50
76 } else if m.contains("gemini") {
77 0.25
78 } else if m.contains("glm") || m.contains("kimi") || m.contains("k1.") {
79 0.20
80 } else {
81 0.10
82 }
83}
84
85/// Multiplier applied to `input_price` for cache-write tokens. Only
86/// Anthropic/Bedrock bill a surcharge for cache writes (1.25×). For
87/// providers that cache implicitly (OpenAI, Gemini, GLM) this is `0.0`
88/// because the write is bundled into the regular input price.
89pub fn cache_write_multiplier(model: &str) -> f64 {
90 let m = model.to_ascii_lowercase();
91 if m.contains("claude") || m.contains("anthropic") {
92 1.25
93 } else {
94 0.0
95 }
96}
97
98/// Estimate the running cost (USD) of the current session, using the
99/// global [`crate::telemetry::TOKEN_USAGE`] counters and
100/// [`pricing_for_model`]. Applies provider-specific prompt-cache
101/// multipliers via [`cache_read_multiplier`] and [`cache_write_multiplier`].
102///
103/// Assumes `prompt_tokens` in the counter already excludes cached input —
104/// which is how [`crate::telemetry::AtomicTokenCounter::record_model_usage_with_cache`]
105/// is invoked from every provider adapter.
106pub fn session_cost_usd() -> f64 {
107 use crate::telemetry::TOKEN_USAGE;
108
109 TOKEN_USAGE
110 .model_snapshots()
111 .iter()
112 .map(|s| {
113 let (input_price, output_price) = pricing_for_model(&s.name);
114 let (cache_read, cache_write) = TOKEN_USAGE.cache_usage_for(&s.name);
115 let per_million = |n: u64, price: f64| (n as f64 / 1_000_000.0) * price;
116 per_million(s.prompt_tokens, input_price)
117 + per_million(s.completion_tokens, output_price)
118 + per_million(cache_read, input_price * cache_read_multiplier(&s.name))
119 + per_million(cache_write, input_price * cache_write_multiplier(&s.name))
120 })
121 .sum()
122}