Skip to main content

tt_shared/
pricing.rs

1//! Pricing tables per model. Values are a **manually-curated snapshot** taken
2//! from provider pricing pages; they are NOT refreshed automatically.
3//! `effective_at` records when each rate took effect and lets us replay
4//! historical telemetry against the correct rate. To refresh rates, edit
5//! `data/pricing.toml` and append new entries — see `scripts/refresh-pricing.sh`
6//! for the manual workflow. See also `docs/02-provider-adapter-guide.md`.
7//!
8//! Rates live in a versioned data file (`data/pricing.toml`), embedded at build
9//! time and parsed once into a [`PricingCatalog`]. Provider adapters delegate
10//! to [`catalog`] instead of hardcoding rate tables, so a price refresh is a
11//! data edit — decoupled from a Rust release. The catalog keeps a per-model
12//! price *history*, enabling [`PricingCatalog::at`] to price historical
13//! telemetry against the rate that was in effect at request time.
14
15use std::collections::HashMap;
16use std::sync::OnceLock;
17
18use chrono::{DateTime, Utc};
19use serde::{Deserialize, Serialize};
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct ModelPricing {
23    /// USD per 1M input tokens.
24    pub input_per_million: f64,
25    /// USD per 1M output tokens.
26    pub output_per_million: f64,
27    /// USD per 1M cached input tokens (Anthropic 10%, OpenAI 10%, Gemini 10%).
28    pub cached_input_per_million: Option<f64>,
29    /// USD per 1M cache-creation (cache-write) input tokens. Anthropic charges
30    /// ~1.25× the base input rate for tokens written to the prompt cache.
31    /// `None` for providers with no documented write premium (cost path unchanged).
32    pub cache_write_per_million: Option<f64>,
33    /// When this pricing took effect (for historical replay).
34    pub effective_at: DateTime<Utc>,
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct ModelInfo {
39    pub id: String,
40    pub provider: String,
41    pub capabilities: Vec<Capability>,
42    pub max_input_tokens: u64,
43    pub max_output_tokens: u64,
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
47#[serde(rename_all = "snake_case")]
48pub enum Capability {
49    Text,
50    Vision,
51    Audio,
52    Tools,
53    JsonMode,
54    Streaming,
55    Reasoning,
56    PromptCaching,
57}
58
59/// Embedded versioned rate catalog. The source of truth for token rates;
60/// edited as data (`data/pricing.toml`), not Rust source.
61const PRICING_TOML: &str = include_str!("../data/pricing.toml");
62
63/// One row of the catalog as it appears in `pricing.toml`.
64#[derive(Debug, Deserialize)]
65struct RawEntry {
66    provider: String,
67    model: String,
68    input_per_million: f64,
69    output_per_million: f64,
70    #[serde(default)]
71    cached_input_per_million: Option<f64>,
72    #[serde(default)]
73    cache_write_per_million: Option<f64>,
74    effective_at: DateTime<Utc>,
75}
76
77#[derive(Debug, Deserialize)]
78struct RawCatalog {
79    #[serde(default)]
80    entry: Vec<RawEntry>,
81}
82
83/// In-memory pricing catalog: per `(provider, model)`, a price history sorted
84/// ascending by `effective_at`. Built once from the embedded TOML.
85#[derive(Debug)]
86pub struct PricingCatalog {
87    by_model: HashMap<(String, String), Vec<ModelPricing>>,
88}
89
90impl PricingCatalog {
91    /// Parse a catalog from TOML text. Used by [`catalog`] over the embedded
92    /// file; exposed for tests that want to parse a synthetic catalog.
93    pub fn parse(toml_text: &str) -> Result<Self, toml::de::Error> {
94        let raw: RawCatalog = toml::from_str(toml_text)?;
95        let mut by_model: HashMap<(String, String), Vec<ModelPricing>> = HashMap::new();
96        for e in raw.entry {
97            by_model
98                .entry((e.provider, e.model))
99                .or_default()
100                .push(ModelPricing {
101                    input_per_million: e.input_per_million,
102                    output_per_million: e.output_per_million,
103                    cached_input_per_million: e.cached_input_per_million,
104                    cache_write_per_million: e.cache_write_per_million,
105                    effective_at: e.effective_at,
106                });
107        }
108        // Sort each model's history ascending by effective_at so `latest` is
109        // the last element and `at` can scan from newest backward.
110        for history in by_model.values_mut() {
111            history.sort_by_key(|p| p.effective_at);
112        }
113        Ok(Self { by_model })
114    }
115
116    /// The current (most recently effective) rate for `(provider, model)`,
117    /// or `None` if the model is not in the catalog.
118    pub fn latest(&self, provider: &str, model: &str) -> Option<ModelPricing> {
119        self.by_model
120            .get(&(provider.to_string(), model.to_string()))?
121            .last()
122            .cloned()
123    }
124
125    /// The rate that was in effect at `at` for `(provider, model)` — the most
126    /// recent entry whose `effective_at <= at`. If `at` predates every known
127    /// entry, falls back to the earliest entry (best-effort historical replay
128    /// rather than reporting no price). `None` only when the model is unknown.
129    pub fn at(&self, provider: &str, model: &str, at: DateTime<Utc>) -> Option<ModelPricing> {
130        let history = self
131            .by_model
132            .get(&(provider.to_string(), model.to_string()))?;
133        history
134            .iter()
135            .rev()
136            .find(|p| p.effective_at <= at)
137            .or_else(|| history.first())
138            .cloned()
139    }
140
141    /// Every model's current rate for `provider`, as `(model, pricing)` pairs.
142    /// Order is unspecified. Used by adapters that build a model→rate map at
143    /// construction time (the OpenAI-compatible providers).
144    pub fn latest_for_provider(&self, provider: &str) -> Vec<(String, ModelPricing)> {
145        self.by_model
146            .iter()
147            .filter(|((p, _), _)| p == provider)
148            .filter_map(|((_, model), history)| history.last().map(|p| (model.clone(), p.clone())))
149            .collect()
150    }
151
152    /// Every `(provider, model)` pair in the catalog. Order is unspecified.
153    /// Pair with [`latest`](Self::latest) / [`at`](Self::at) to materialize a
154    /// full rate table (e.g. for the Plan replay engine).
155    pub fn pairs(&self) -> Vec<(String, String)> {
156        self.by_model.keys().cloned().collect()
157    }
158
159    /// Number of distinct `(provider, model)` pairs in the catalog.
160    pub fn len(&self) -> usize {
161        self.by_model.len()
162    }
163
164    /// Whether the catalog has no entries.
165    pub fn is_empty(&self) -> bool {
166        self.by_model.is_empty()
167    }
168
169    /// The newest `effective_at` across every entry in the catalog — i.e. the
170    /// date of the most recent manual rate snapshot. Returns `None` only when
171    /// the catalog is empty (a build-time error in practice, because the
172    /// embedded file is non-empty and the parse is guarded by a unit test).
173    ///
174    /// Use this as a freshness signal: if the returned date is far in the past
175    /// it means pricing.toml has not been updated in a while.
176    pub fn catalog_max_effective_at(&self) -> Option<DateTime<Utc>> {
177        self.by_model
178            .values()
179            .filter_map(|history| history.last().map(|p| p.effective_at))
180            .max()
181    }
182}
183
184/// The process-wide pricing catalog, parsed once from the embedded
185/// `data/pricing.toml`. Panics at first use only if that bundled file is
186/// malformed — which a unit test guards against, so it cannot reach a release.
187pub fn catalog() -> &'static PricingCatalog {
188    static CATALOG: OnceLock<PricingCatalog> = OnceLock::new();
189    CATALOG.get_or_init(|| {
190        PricingCatalog::parse(PRICING_TOML).expect("embedded data/pricing.toml must be valid")
191    })
192}
193
194#[cfg(test)]
195mod catalog_tests {
196    use super::*;
197    use chrono::TimeZone;
198
199    #[test]
200    fn embedded_catalog_parses_and_is_populated() {
201        let c = catalog();
202        assert!(!c.is_empty(), "embedded catalog should not be empty");
203        // 36 models across 7 paid providers (32 at import + 4 current flagships
204        // added in the 2026-05-31 verification: gpt-5.5-pro, gpt-5.4-mini,
205        // gpt-5.4-pro, claude-opus-4-8).
206        assert_eq!(
207            c.len(),
208            36,
209            "unexpected catalog size — update if intentional"
210        );
211    }
212
213    /// The embedded catalog must carry at least one `effective_at` date and it
214    /// must be parseable (which `catalog_max_effective_at` returning `Some`
215    /// proves). This test is NOT time-sensitive: we assert presence only, never
216    /// a hardcoded "must be within N days of today", so it will never fail
217    /// merely because time has passed.
218    #[test]
219    fn catalog_max_effective_at_is_present() {
220        let c = catalog();
221        let max_date = c
222            .catalog_max_effective_at()
223            .expect("non-empty catalog must have a max effective_at");
224        // Sanity: the catalog was first created in 2026; the date must be at
225        // least 2026-01-01 to confirm we aren't reading a zero/epoch value.
226        let floor = Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap();
227        assert!(
228            max_date >= floor,
229            "catalog_max_effective_at = {max_date} is older than expected floor {floor}"
230        );
231    }
232
233    /// Staleness helper works on a synthetic catalog with known dates.
234    #[test]
235    fn catalog_max_effective_at_picks_newest() {
236        let toml = r#"
237            [[entry]]
238            provider = "p"
239            model = "m1"
240            input_per_million = 1.0
241            output_per_million = 2.0
242            effective_at = "2026-03-01T00:00:00Z"
243
244            [[entry]]
245            provider = "p"
246            model = "m2"
247            input_per_million = 3.0
248            output_per_million = 4.0
249            effective_at = "2026-05-01T00:00:00Z"
250        "#;
251        let c = PricingCatalog::parse(toml).expect("valid");
252        let max = c.catalog_max_effective_at().expect("present");
253        assert_eq!(
254            max,
255            Utc.with_ymd_and_hms(2026, 5, 1, 0, 0, 0).unwrap(),
256            "should return the newest effective_at across all models"
257        );
258    }
259
260    /// Empty catalog returns None (not a panic).
261    #[test]
262    fn catalog_max_effective_at_empty_catalog() {
263        let c = PricingCatalog::parse("").expect("empty TOML is valid");
264        assert!(c.catalog_max_effective_at().is_none());
265    }
266
267    #[test]
268    fn latest_returns_known_rates() {
269        let c = catalog();
270        let p = c.latest("openai", "gpt-4o").expect("gpt-4o present");
271        assert_eq!(p.input_per_million, 2.50);
272        assert_eq!(p.output_per_million, 10.00);
273        assert_eq!(p.cached_input_per_million, Some(1.25));
274
275        // A model whose cached rate is omitted in TOML → None, not 0.0.
276        let g = c.latest("groq", "llama-3.1-8b-instant").expect("present");
277        assert_eq!(g.cached_input_per_million, None);
278    }
279
280    /// Anthropic models must carry a cache_write_per_million at ~1.25× base input.
281    /// Non-Anthropic models must have None (no write premium documented).
282    #[test]
283    fn anthropic_models_have_cache_write_rate() {
284        let c = catalog();
285
286        let haiku = c.latest("anthropic", "claude-haiku-4-5").expect("present");
287        assert_eq!(
288            haiku.cache_write_per_million,
289            Some(1.25),
290            "haiku write rate = 1.25× base input (1.00)"
291        );
292
293        let sonnet = c.latest("anthropic", "claude-sonnet-4-6").expect("present");
294        assert_eq!(
295            sonnet.cache_write_per_million,
296            Some(3.75),
297            "sonnet write rate = 1.25× base input (3.00)"
298        );
299
300        let opus = c.latest("anthropic", "claude-opus-4-7").expect("present");
301        assert_eq!(
302            opus.cache_write_per_million,
303            Some(6.25),
304            "opus write rate = 1.25× base input (5.00)"
305        );
306
307        // Non-Anthropic models have no documented write premium.
308        let gpt4o = c.latest("openai", "gpt-4o").expect("gpt-4o present");
309        assert_eq!(
310            gpt4o.cache_write_per_million, None,
311            "OpenAI has no cache-write premium"
312        );
313
314        let groq_llama = c.latest("groq", "llama-3.1-8b-instant").expect("present");
315        assert_eq!(
316            groq_llama.cache_write_per_million, None,
317            "Groq has no cache-write premium"
318        );
319    }
320
321    #[test]
322    fn unknown_provider_or_model_is_none() {
323        let c = catalog();
324        assert!(c.latest("openai", "no-such-model").is_none());
325        assert!(c.latest("no-such-provider", "gpt-4o").is_none());
326    }
327
328    #[test]
329    fn at_selects_rate_effective_at_timestamp() {
330        // Two-entry history: $1/$2 from 2026-01-01, $3/$4 from 2026-06-01.
331        let toml = r#"
332            [[entry]]
333            provider = "p"
334            model = "m"
335            input_per_million = 1.0
336            output_per_million = 2.0
337            effective_at = "2026-01-01T00:00:00Z"
338
339            [[entry]]
340            provider = "p"
341            model = "m"
342            input_per_million = 3.0
343            output_per_million = 4.0
344            effective_at = "2026-06-01T00:00:00Z"
345        "#;
346        let c = PricingCatalog::parse(toml).expect("valid");
347
348        // Before either entry → earliest (best-effort).
349        let before = c
350            .at("p", "m", Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap())
351            .unwrap();
352        assert_eq!(before.input_per_million, 1.0);
353
354        // Between the two → first (older) rate.
355        let mid = c
356            .at("p", "m", Utc.with_ymd_and_hms(2026, 3, 1, 0, 0, 0).unwrap())
357            .unwrap();
358        assert_eq!(mid.input_per_million, 1.0);
359
360        // After the second → newest rate.
361        let after = c
362            .at("p", "m", Utc.with_ymd_and_hms(2026, 9, 1, 0, 0, 0).unwrap())
363            .unwrap();
364        assert_eq!(after.input_per_million, 3.0);
365
366        // `latest` is always the newest regardless of time.
367        assert_eq!(c.latest("p", "m").unwrap().input_per_million, 3.0);
368    }
369}