Skip to main content

agx_core/
pricing.rs

1//! Per-model pricing lookup. Converts a `Step`'s token counters into a USD
2//! cost estimate.
3//!
4//! Prices are hardcoded and WILL drift as providers change their rates. Each
5//! entry carries a `last_verified` date so maintainers can audit staleness
6//! without re-reading every source. See the comment at the top of the PRICES
7//! array for the source-of-truth pages.
8//!
9//! When a model name is unknown, `cost_usd` returns `None` rather than
10//! guessing — agx doesn't fabricate cost numbers.
11//!
12//! Cache pricing follows Anthropic's public model: cache reads are billed
13//! at ~10% of the input rate, cache creation at ~125% of the input rate.
14//! OpenAI and Google structure caching differently; for those providers
15//! agx treats `cache_read` as a flat input-rate discount and
16//! `cache_create` as zero until better data is available.
17//!
18//! `ModelPricing::last_verified` is audited by a dedicated test but not
19//! read at runtime; its field-level `#[allow(dead_code)]` is the only
20//! intentional allow in this module.
21
22/// USD per 1M tokens. Small struct so adding a new model is one row.
23#[derive(Debug, Clone, Copy)]
24pub struct ModelPricing {
25    pub name: &'static str,
26    pub input_per_mtoken: f64,
27    pub output_per_mtoken: f64,
28    /// Set this when the provider charges a separate cache-read rate
29    /// (e.g. Anthropic). When `None`, cache-read tokens are billed at the
30    /// input rate.
31    pub cache_read_per_mtoken: Option<f64>,
32    /// Set this when the provider charges a separate cache-create rate.
33    /// When `None`, cache-create tokens are billed at the input rate.
34    pub cache_create_per_mtoken: Option<f64>,
35    /// Last date a human verified this entry against the provider's pricing
36    /// page. Present for audit; not used at runtime.
37    #[allow(dead_code)]
38    pub last_verified: &'static str,
39}
40
41// Source pages at time of last verification:
42//   Anthropic: https://www.anthropic.com/pricing
43//   OpenAI:    https://platform.openai.com/docs/pricing
44//   Google:    https://ai.google.dev/gemini-api/docs/pricing
45//
46// Rates below are ESTIMATES. Treat cost output as a ballpark until a
47// maintainer verifies against the current pricing page.
48const PRICES: &[ModelPricing] = &[
49    // --- Anthropic Claude 4.6 family ---
50    ModelPricing {
51        name: "claude-opus-4-6",
52        input_per_mtoken: 15.0,
53        output_per_mtoken: 75.0,
54        cache_read_per_mtoken: Some(1.50),
55        cache_create_per_mtoken: Some(18.75),
56        last_verified: "2026-04-15 (estimate; unverified)",
57    },
58    ModelPricing {
59        name: "claude-sonnet-4-6",
60        input_per_mtoken: 3.0,
61        output_per_mtoken: 15.0,
62        cache_read_per_mtoken: Some(0.30),
63        cache_create_per_mtoken: Some(3.75),
64        last_verified: "2026-04-15 (estimate; unverified)",
65    },
66    ModelPricing {
67        name: "claude-haiku-4-5",
68        input_per_mtoken: 1.0,
69        output_per_mtoken: 5.0,
70        cache_read_per_mtoken: Some(0.10),
71        cache_create_per_mtoken: Some(1.25),
72        last_verified: "2026-04-15 (estimate; unverified)",
73    },
74    // --- OpenAI ---
75    ModelPricing {
76        name: "gpt-5",
77        input_per_mtoken: 10.0,
78        output_per_mtoken: 30.0,
79        cache_read_per_mtoken: Some(2.50),
80        cache_create_per_mtoken: None,
81        last_verified: "2026-04-15 (estimate; unverified)",
82    },
83    ModelPricing {
84        name: "gpt-5-mini",
85        input_per_mtoken: 0.5,
86        output_per_mtoken: 2.0,
87        cache_read_per_mtoken: Some(0.10),
88        cache_create_per_mtoken: None,
89        last_verified: "2026-04-15 (estimate; unverified)",
90    },
91    // --- Google Gemini ---
92    ModelPricing {
93        name: "gemini-2-5-pro",
94        input_per_mtoken: 2.50,
95        output_per_mtoken: 15.0,
96        cache_read_per_mtoken: Some(0.625),
97        cache_create_per_mtoken: None,
98        last_verified: "2026-04-15 (estimate; unverified)",
99    },
100    ModelPricing {
101        name: "gemini-2-5-flash",
102        input_per_mtoken: 0.30,
103        output_per_mtoken: 2.50,
104        cache_read_per_mtoken: Some(0.075),
105        cache_create_per_mtoken: None,
106        last_verified: "2026-04-15 (estimate; unverified)",
107    },
108];
109
110/// Look up pricing for a given model name. Returns `None` when the model is
111/// not in the table. Uses case-insensitive exact match — no fuzzy matching,
112/// no family fallback (avoids silent wrong numbers for new variants).
113#[must_use]
114pub fn lookup(model: &str) -> Option<&'static ModelPricing> {
115    PRICES.iter().find(|p| p.name.eq_ignore_ascii_case(model))
116}
117
118/// Compute USD cost for a single step given its token counters and model.
119/// Returns `None` when the model is unknown OR when there are no non-zero
120/// token counters (nothing to cost).
121#[must_use]
122pub fn cost_usd(
123    model: Option<&str>,
124    tokens_in: Option<u64>,
125    tokens_out: Option<u64>,
126    cache_read: Option<u64>,
127    cache_create: Option<u64>,
128) -> Option<f64> {
129    let pricing = lookup(model?)?;
130    let has_any = [tokens_in, tokens_out, cache_read, cache_create]
131        .iter()
132        .any(|v| v.is_some_and(|n| n > 0));
133    if !has_any {
134        return None;
135    }
136    #[allow(clippy::cast_precision_loss)]
137    let t_in = tokens_in.unwrap_or(0) as f64;
138    #[allow(clippy::cast_precision_loss)]
139    let t_out = tokens_out.unwrap_or(0) as f64;
140    #[allow(clippy::cast_precision_loss)]
141    let t_cr = cache_read.unwrap_or(0) as f64;
142    #[allow(clippy::cast_precision_loss)]
143    let t_cc = cache_create.unwrap_or(0) as f64;
144    let cost = t_in * pricing.input_per_mtoken
145        + t_out * pricing.output_per_mtoken
146        + t_cr
147            * pricing
148                .cache_read_per_mtoken
149                .unwrap_or(pricing.input_per_mtoken)
150        + t_cc
151            * pricing
152                .cache_create_per_mtoken
153                .unwrap_or(pricing.input_per_mtoken);
154    Some(cost / 1_000_000.0)
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    #[test]
162    fn lookup_finds_known_model_case_insensitive() {
163        assert!(lookup("claude-opus-4-6").is_some());
164        assert!(lookup("Claude-Opus-4-6").is_some());
165        assert!(lookup("CLAUDE-OPUS-4-6").is_some());
166    }
167
168    #[test]
169    fn lookup_returns_none_for_unknown_model() {
170        assert!(lookup("llama-99-ultra").is_none());
171        assert!(lookup("").is_none());
172    }
173
174    #[test]
175    fn cost_unknown_model_returns_none() {
176        assert_eq!(
177            cost_usd(Some("unknown"), Some(100), Some(50), None, None),
178            None
179        );
180    }
181
182    #[test]
183    fn cost_none_model_returns_none() {
184        assert_eq!(cost_usd(None, Some(100), Some(50), None, None), None);
185    }
186
187    #[test]
188    fn cost_zero_tokens_returns_none() {
189        // No non-zero counter → nothing to cost → None (not 0.0).
190        // Matters because downstream code formats None as "—" and 0 as "$0.00".
191        assert_eq!(
192            cost_usd(Some("claude-opus-4-6"), Some(0), Some(0), Some(0), Some(0)),
193            None
194        );
195        assert_eq!(
196            cost_usd(Some("claude-opus-4-6"), None, None, None, None),
197            None
198        );
199    }
200
201    #[test]
202    fn cost_computes_input_plus_output() {
203        // opus-4-6: $15/Mtok input, $75/Mtok output.
204        // 1M input + 1M output = $15 + $75 = $90.
205        let c = cost_usd(
206            Some("claude-opus-4-6"),
207            Some(1_000_000),
208            Some(1_000_000),
209            None,
210            None,
211        )
212        .unwrap();
213        assert!((c - 90.0).abs() < 1e-6, "expected 90.0, got {c}");
214    }
215
216    #[test]
217    fn cost_cache_read_uses_discounted_rate_when_provider_sets_one() {
218        // opus-4-6 cache_read rate is $1.50/Mtok (10% of $15 input).
219        // 1M cache_read → $1.50 alone.
220        let c = cost_usd(Some("claude-opus-4-6"), None, None, Some(1_000_000), None).unwrap();
221        assert!((c - 1.50).abs() < 1e-6, "expected 1.50, got {c}");
222    }
223
224    #[test]
225    fn cost_falls_back_to_input_rate_when_cache_rate_missing() {
226        // gpt-5 has no cache_create rate, so cache_create is billed at
227        // input rate ($10/Mtok).
228        let c = cost_usd(Some("gpt-5"), None, None, None, Some(1_000_000)).unwrap();
229        assert!((c - 10.0).abs() < 1e-6, "expected 10.0, got {c}");
230    }
231
232    #[test]
233    fn every_entry_has_last_verified_date() {
234        for p in PRICES {
235            assert!(
236                !p.last_verified.is_empty(),
237                "{} missing last_verified",
238                p.name
239            );
240        }
241    }
242
243    #[test]
244    fn no_duplicate_model_names() {
245        use std::collections::HashSet;
246        let mut seen = HashSet::new();
247        for p in PRICES {
248            assert!(seen.insert(p.name), "duplicate pricing entry: {}", p.name);
249        }
250    }
251}