codetether_agent/provider/
pricing.rs

1//! Canonical per-million-token pricing for known LLM models.
2//!
3//! This is the **single source of truth** for model pricing. Every
4//! subsystem (TUI cost badge, cost guardrail, benchmark runner) must
5//! delegate here rather than maintaining its own table.
6//!
7//! Prices are best-effort retail USD per 1M tokens `(input, output)`.
8//! Unknown models fall back to a conservative default.
9
10/// Return `(input_price_per_million, output_price_per_million)` in USD
11/// for a given model identifier. Matching is case-insensitive substring.
12///
13/// # Examples
14///
15/// ```rust
16/// use codetether_agent::provider::pricing::pricing_for_model;
17///
18/// let (input, output) = pricing_for_model("claude-opus-4-7");
19/// assert!(input > 0.0 && output > 0.0);
20/// ```
21pub fn pricing_for_model(model: &str) -> (f64, f64) {
22    let m = model.to_ascii_lowercase();
23
24    // ── Most specific patterns first ───────────────────────────────
25    if m.contains("gpt-4o-mini") {
26        (0.15, 0.60)
27    } else if m.contains("gpt-4o") {
28        (2.50, 10.00)
29    } else if m.contains("gpt-4-turbo") {
30        (10.00, 30.00)
31    } else if m.contains("gpt-5") {
32        (5.00, 15.00)
33    } else if m.contains("gpt-4") {
34        (30.00, 60.00)
35    } else if m.contains("claude-3-5-sonnet") || m.contains("claude-sonnet-4") {
36        (3.00, 15.00)
37    } else if m.contains("claude-3-5-haiku") || m.contains("claude-haiku") {
38        (0.80, 4.00)
39    } else if m.contains("claude-opus") {
40        // Anthropic Opus 4.x retail: $15 in / $75 out per million.
41        (15.00, 75.00)
42    } else if m.contains("gemini-2.5-pro") || m.contains("gemini-2-pro") {
43        (1.25, 10.00)
44    } else if m.contains("gemini-1.5-pro") {
45        (1.25, 5.00)
46    } else if m.contains("gemini") {
47        (0.075, 0.30)
48    } else if m.contains("glm-5") || m.contains("glm-4") {
49        (0.50, 1.50)
50    } else if m.contains("kimi") || m.contains("k1.6") {
51        (6.00, 6.00)
52    } else if m.contains("k1.5") {
53        (8.00, 8.00)
54    } else {
55        // Conservative default for unknown models.
56        (1.00, 3.00)
57    }
58}
59
60/// Multiplier applied to `input_price` for cache-read tokens. Varies by
61/// provider family:
62///
63/// * Anthropic / Bedrock Claude — 10% (0.1×)
64/// * OpenAI (GPT-4o, GPT-5, Codex) — 50% (0.5×)
65/// * Google Gemini — 25% (0.25×)
66/// * Z.AI GLM / Moonshot — 20% (0.2×) *(best-effort default)*
67///
68/// Returns `0.1` as a conservative default so pricing never silently
69/// over-counts for an unknown model.
70pub fn cache_read_multiplier(model: &str) -> f64 {
71    let m = model.to_ascii_lowercase();
72    if m.contains("claude") || m.contains("anthropic") {
73        0.10
74    } else if m.contains("gpt-") || m.contains("codex") || m.contains("o1") || m.contains("o3") {
75        0.50
76    } else if m.contains("gemini") {
77        0.25
78    } else if m.contains("glm") || m.contains("kimi") || m.contains("k1.") {
79        0.20
80    } else {
81        0.10
82    }
83}
84
85/// Multiplier applied to `input_price` for cache-write tokens. Only
86/// Anthropic/Bedrock bill a surcharge for cache writes (1.25×). For
87/// providers that cache implicitly (OpenAI, Gemini, GLM) this is `0.0`
88/// because the write is bundled into the regular input price.
89pub fn cache_write_multiplier(model: &str) -> f64 {
90    let m = model.to_ascii_lowercase();
91    if m.contains("claude") || m.contains("anthropic") {
92        1.25
93    } else {
94        0.0
95    }
96}
97
98/// Estimate the running cost (USD) of the current session, using the
99/// global [`crate::telemetry::TOKEN_USAGE`] counters and
100/// [`pricing_for_model`]. Applies provider-specific prompt-cache
101/// multipliers via [`cache_read_multiplier`] and [`cache_write_multiplier`].
102///
103/// Assumes `prompt_tokens` in the counter already excludes cached input —
104/// which is how [`crate::telemetry::AtomicTokenCounter::record_model_usage_with_cache`]
105/// is invoked from every provider adapter.
106pub fn session_cost_usd() -> f64 {
107    use crate::telemetry::TOKEN_USAGE;
108
109    TOKEN_USAGE
110        .model_snapshots()
111        .iter()
112        .map(|s| {
113            let (input_price, output_price) = pricing_for_model(&s.name);
114            let (cache_read, cache_write) = TOKEN_USAGE.cache_usage_for(&s.name);
115            let per_million = |n: u64, price: f64| (n as f64 / 1_000_000.0) * price;
116            per_million(s.prompt_tokens, input_price)
117                + per_million(s.completion_tokens, output_price)
118                + per_million(cache_read, input_price * cache_read_multiplier(&s.name))
119                + per_million(cache_write, input_price * cache_write_multiplier(&s.name))
120        })
121        .sum()
122}
codetether_agent/provider/pricing.rs

codetether_agent/provider/
pricing.rs