Skip to main content

tt_shared/
pricing.rs

1//! Pricing tables per model. Values are a **manually-curated snapshot** taken
2//! from provider pricing pages; they are NOT refreshed automatically.
3//! `effective_at` records when each rate took effect and lets us replay
4//! historical telemetry against the correct rate. To refresh rates, edit
5//! `data/pricing.toml` and append new entries — see `scripts/refresh-pricing.sh`
6//! for the manual workflow. See also `docs/02-provider-adapter-guide.md`.
7//!
8//! Rates live in a versioned data file (`data/pricing.toml`), embedded at build
9//! time and parsed once into a [`PricingCatalog`]. Provider adapters delegate
10//! to [`catalog`] instead of hardcoding rate tables, so a price refresh is a
11//! data edit — decoupled from a Rust release. The catalog keeps a per-model
12//! price *history*, enabling [`PricingCatalog::at`] to price historical
13//! telemetry against the rate that was in effect at request time.
14
15use std::collections::HashMap;
16use std::sync::OnceLock;
17
18use chrono::{DateTime, Utc};
19use serde::{Deserialize, Serialize};
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct ModelPricing {
23    /// USD per 1M input tokens.
24    pub input_per_million: f64,
25    /// USD per 1M output tokens.
26    pub output_per_million: f64,
27    /// USD per 1M cached input tokens (Anthropic 10%, OpenAI 10%, Gemini 10%).
28    pub cached_input_per_million: Option<f64>,
29    /// USD per 1M cache-creation (cache-write) input tokens. Anthropic charges
30    /// ~1.25× the base input rate for tokens written to the prompt cache.
31    /// `None` for providers with no documented write premium (cost path unchanged).
32    pub cache_write_per_million: Option<f64>,
33    /// USD per 1M batch (async) input tokens. Providers with a batch tier
34    /// (OpenAI / Anthropic / Gemini) bill async requests at ~50% of standard
35    /// input. `None` for providers with no batch tier.
36    pub batch_input_per_million: Option<f64>,
37    /// USD per 1M batch (async) output tokens (~50% of standard output).
38    /// `None` for providers with no batch tier.
39    pub batch_output_per_million: Option<f64>,
40    /// USD per 1M input tokens under OpenAI's **Flex** service tier
41    /// (`service_tier: "flex"`) — a synchronous-but-slower tier billed at Batch
42    /// API rates (~50% of standard). `None` for models/providers with no Flex
43    /// tier; **presence is the eligibility gate** (only models that carry a Flex
44    /// rate may be opted into `service_tier=flex`). See
45    /// developers.openai.com/api/docs/guides/flex-processing.
46    pub flex_input_per_million: Option<f64>,
47    /// USD per 1M output tokens under the Flex service tier (~50% of standard
48    /// output). `None` when the model has no Flex tier.
49    pub flex_output_per_million: Option<f64>,
50    /// Provider minimum prefix length, in tokens, before a `cache_control`
51    /// breakpoint actually caches (shorter prefixes silently don't cache).
52    /// Anthropic varies this by model (2048–4096); `None` when not documented.
53    pub prompt_cache_min_tokens: Option<u32>,
54    /// When this pricing took effect (for historical replay).
55    pub effective_at: DateTime<Utc>,
56}
57
58/// Which cache-write TTL tier a prompt-cache write was billed at.
59///
60/// Anthropic bills cache *writes* at a per-TTL premium over the base input rate:
61/// the default 5-minute ephemeral tier is ~1.25× base input, and the opt-in
62/// 1-hour tier (`cache_control: {"type": "ephemeral", "ttl": "1h"}`) is ~2×
63/// (platform.claude.com/docs/en/build-with-claude/prompt-caching § Economics).
64/// [`ModelPricing::cache_write_per_million`] is the 5-minute rate;
65/// [`ModelPricing::cache_write_rate_per_million`] resolves either tier.
66#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
67pub enum CacheWriteTier {
68    /// The default ephemeral TTL — `cache_control` with no `ttl` field. ~1.25×.
69    #[default]
70    FiveMin,
71    /// The opt-in 1-hour TTL — `cache_control` with `"ttl": "1h"`. ~2×.
72    OneHour,
73}
74
75/// Ratio of the 1-hour cache-write rate to the base input rate (Anthropic's
76/// documented 2× one-hour-TTL premium). The 5-minute rate is carried directly
77/// in the catalog as `cache_write_per_million` (~1.25× base); the 1-hour rate
78/// follows the same documented base-input relationship, so we derive it rather
79/// than carrying a second column.
80const CACHE_WRITE_1H_MULTIPLIER: f64 = 2.0;
81
82impl ModelPricing {
83    /// USD per 1M cache-write (creation) tokens for the given TTL `tier`.
84    ///
85    /// - `FiveMin` → the catalog's [`cache_write_per_million`](Self::cache_write_per_million)
86    ///   (the 5-minute/1.25× rate Anthropic applies to bare `ephemeral` writes).
87    /// - `OneHour` → the documented 2× base-input rate, but **only when a 5-min
88    ///   write premium is documented** (i.e. the provider tiers cache writes at
89    ///   all). Providers with no write premium return `None` for both tiers so
90    ///   the caller falls back to the plain input rate, unchanged.
91    ///
92    /// Returns `None` when no write premium applies, so callers price the
93    /// remaining tokens at `input_per_million`.
94    #[must_use]
95    pub fn cache_write_rate_per_million(&self, tier: CacheWriteTier) -> Option<f64> {
96        match tier {
97            CacheWriteTier::FiveMin => self.cache_write_per_million,
98            // Only tier up when the provider documents a 5-min write premium;
99            // otherwise there is no premium to scale and we leave it absent.
100            CacheWriteTier::OneHour => self
101                .cache_write_per_million
102                .map(|_| self.input_per_million * CACHE_WRITE_1H_MULTIPLIER),
103        }
104    }
105
106    /// Whether this model is eligible for OpenAI's Flex service tier
107    /// (`service_tier: "flex"`). Eligibility is **catalog-driven**: a model is
108    /// flex-eligible iff it carries a Flex input rate. OpenAI lists Flex prices
109    /// only for supported models (gpt-5.x family); o3 / o4-mini are batch-only
110    /// "specialized models" and therefore carry no Flex rate.
111    #[must_use]
112    pub fn flex_eligible(&self) -> bool {
113        self.flex_input_per_million.is_some()
114    }
115
116    /// The Flex `(input, output)` per-million rates when this model is
117    /// flex-eligible, else `None`. Both are present together for an eligible
118    /// row (the catalog carries the pair); a missing output rate falls back to
119    /// the standard output rate so a partially-populated row stays priceable.
120    #[must_use]
121    pub fn flex_rates_per_million(&self) -> Option<(f64, f64)> {
122        let input = self.flex_input_per_million?;
123        let output = self
124            .flex_output_per_million
125            .unwrap_or(self.output_per_million);
126        Some((input, output))
127    }
128}
129
130#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
131pub struct ModelInfo {
132    pub id: String,
133    pub provider: String,
134    pub capabilities: Vec<Capability>,
135    pub max_input_tokens: u64,
136    pub max_output_tokens: u64,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
140#[serde(rename_all = "snake_case")]
141pub enum Capability {
142    Text,
143    Vision,
144    Audio,
145    Tools,
146    JsonMode,
147    Streaming,
148    Reasoning,
149    PromptCaching,
150}
151
152/// Embedded versioned rate catalog. The source of truth for token rates;
153/// edited as data (`data/pricing.toml`), not Rust source.
154const PRICING_TOML: &str = include_str!("../data/pricing.toml");
155
156/// One row of the catalog as it appears in `pricing.toml`.
157#[derive(Debug, Deserialize)]
158struct RawEntry {
159    provider: String,
160    model: String,
161    input_per_million: f64,
162    output_per_million: f64,
163    #[serde(default)]
164    cached_input_per_million: Option<f64>,
165    #[serde(default)]
166    cache_write_per_million: Option<f64>,
167    #[serde(default)]
168    batch_input_per_million: Option<f64>,
169    #[serde(default)]
170    batch_output_per_million: Option<f64>,
171    #[serde(default)]
172    flex_input_per_million: Option<f64>,
173    #[serde(default)]
174    flex_output_per_million: Option<f64>,
175    #[serde(default)]
176    prompt_cache_min_tokens: Option<u32>,
177    effective_at: DateTime<Utc>,
178}
179
180#[derive(Debug, Deserialize)]
181struct RawCatalog {
182    #[serde(default)]
183    entry: Vec<RawEntry>,
184}
185
186/// In-memory pricing catalog: per `(provider, model)`, a price history sorted
187/// ascending by `effective_at`. Built once from the embedded TOML.
188#[derive(Debug)]
189pub struct PricingCatalog {
190    by_model: HashMap<(String, String), Vec<ModelPricing>>,
191}
192
193impl PricingCatalog {
194    /// Parse a catalog from TOML text. Used by [`catalog`] over the embedded
195    /// file; exposed for tests that want to parse a synthetic catalog.
196    pub fn parse(toml_text: &str) -> Result<Self, toml::de::Error> {
197        let raw: RawCatalog = toml::from_str(toml_text)?;
198        let mut by_model: HashMap<(String, String), Vec<ModelPricing>> = HashMap::new();
199        for e in raw.entry {
200            by_model
201                .entry((e.provider, e.model))
202                .or_default()
203                .push(ModelPricing {
204                    input_per_million: e.input_per_million,
205                    output_per_million: e.output_per_million,
206                    cached_input_per_million: e.cached_input_per_million,
207                    cache_write_per_million: e.cache_write_per_million,
208                    batch_input_per_million: e.batch_input_per_million,
209                    batch_output_per_million: e.batch_output_per_million,
210                    flex_input_per_million: e.flex_input_per_million,
211                    flex_output_per_million: e.flex_output_per_million,
212                    prompt_cache_min_tokens: e.prompt_cache_min_tokens,
213                    effective_at: e.effective_at,
214                });
215        }
216        // Sort each model's history ascending by effective_at so `latest` is
217        // the last element and `at` can scan from newest backward.
218        for history in by_model.values_mut() {
219            history.sort_by_key(|p| p.effective_at);
220        }
221        Ok(Self { by_model })
222    }
223
224    /// The current (most recently effective) rate for `(provider, model)`,
225    /// or `None` if the model is not in the catalog.
226    pub fn latest(&self, provider: &str, model: &str) -> Option<ModelPricing> {
227        self.by_model
228            .get(&(provider.to_string(), model.to_string()))?
229            .last()
230            .cloned()
231    }
232
233    /// The rate that was in effect at `at` for `(provider, model)` — the most
234    /// recent entry whose `effective_at <= at`. If `at` predates every known
235    /// entry, falls back to the earliest entry (best-effort historical replay
236    /// rather than reporting no price). `None` only when the model is unknown.
237    pub fn at(&self, provider: &str, model: &str, at: DateTime<Utc>) -> Option<ModelPricing> {
238        let history = self
239            .by_model
240            .get(&(provider.to_string(), model.to_string()))?;
241        history
242            .iter()
243            .rev()
244            .find(|p| p.effective_at <= at)
245            .or_else(|| history.first())
246            .cloned()
247    }
248
249    /// Every model's current rate for `provider`, as `(model, pricing)` pairs.
250    /// Order is unspecified. Used by adapters that build a model→rate map at
251    /// construction time (the OpenAI-compatible providers).
252    pub fn latest_for_provider(&self, provider: &str) -> Vec<(String, ModelPricing)> {
253        self.by_model
254            .iter()
255            .filter(|((p, _), _)| p == provider)
256            .filter_map(|((_, model), history)| history.last().map(|p| (model.clone(), p.clone())))
257            .collect()
258    }
259
260    /// Every `(provider, model)` pair in the catalog. Order is unspecified.
261    /// Pair with [`latest`](Self::latest) / [`at`](Self::at) to materialize a
262    /// full rate table (e.g. for the Plan replay engine).
263    pub fn pairs(&self) -> Vec<(String, String)> {
264        self.by_model.keys().cloned().collect()
265    }
266
267    /// Number of distinct `(provider, model)` pairs in the catalog.
268    pub fn len(&self) -> usize {
269        self.by_model.len()
270    }
271
272    /// Whether the catalog has no entries.
273    pub fn is_empty(&self) -> bool {
274        self.by_model.is_empty()
275    }
276
277    /// The newest `effective_at` across every entry in the catalog — i.e. the
278    /// date of the most recent manual rate snapshot. Returns `None` only when
279    /// the catalog is empty (a build-time error in practice, because the
280    /// embedded file is non-empty and the parse is guarded by a unit test).
281    ///
282    /// Use this as a freshness signal: if the returned date is far in the past
283    /// it means pricing.toml has not been updated in a while.
284    pub fn catalog_max_effective_at(&self) -> Option<DateTime<Utc>> {
285        self.by_model
286            .values()
287            .filter_map(|history| history.last().map(|p| p.effective_at))
288            .max()
289    }
290}
291
292/// The process-wide pricing catalog, parsed once from the embedded
293/// `data/pricing.toml`. Panics at first use only if that bundled file is
294/// malformed — which a unit test guards against, so it cannot reach a release.
295pub fn catalog() -> &'static PricingCatalog {
296    static CATALOG: OnceLock<PricingCatalog> = OnceLock::new();
297    CATALOG.get_or_init(|| {
298        PricingCatalog::parse(PRICING_TOML).expect("embedded data/pricing.toml must be valid")
299    })
300}
301
302/// Whether `newest` (the catalog's max `effective_at`) is more than `max_days`
303/// before `now`. An empty catalog (`None`) is treated as not stale.
304#[must_use]
305pub fn is_stale(newest: Option<DateTime<Utc>>, now: DateTime<Utc>, max_days: i64) -> bool {
306    match newest {
307        Some(d) => (now - d).num_days() > max_days,
308        None => false,
309    }
310}
311
312#[cfg(test)]
313mod catalog_tests {
314    use super::*;
315    use chrono::TimeZone;
316
317    #[test]
318    fn is_stale_thresholds() {
319        use chrono::Duration;
320        let now: DateTime<Utc> = "2026-06-05T00:00:00Z".parse().unwrap();
321        assert!(!is_stale(None, now, 90)); // empty catalog: not stale
322        assert!(!is_stale(Some(now - Duration::days(10)), now, 90));
323        assert!(is_stale(Some(now - Duration::days(100)), now, 90));
324    }
325
326    #[test]
327    fn embedded_catalog_parses_and_is_populated() {
328        let c = catalog();
329        assert!(!c.is_empty(), "embedded catalog should not be empty");
330        // 36 models across 7 paid providers (32 at import + 4 current flagships
331        // added in the 2026-05-31 verification: gpt-5.5-pro, gpt-5.4-mini,
332        // gpt-5.4-pro, claude-opus-4-8).
333        assert_eq!(
334            c.len(),
335            36,
336            "unexpected catalog size — update if intentional"
337        );
338    }
339
340    /// The embedded catalog must carry at least one `effective_at` date and it
341    /// must be parseable (which `catalog_max_effective_at` returning `Some`
342    /// proves). This test is NOT time-sensitive: we assert presence only, never
343    /// a hardcoded "must be within N days of today", so it will never fail
344    /// merely because time has passed.
345    #[test]
346    fn catalog_max_effective_at_is_present() {
347        let c = catalog();
348        let max_date = c
349            .catalog_max_effective_at()
350            .expect("non-empty catalog must have a max effective_at");
351        // Sanity: the catalog was first created in 2026; the date must be at
352        // least 2026-01-01 to confirm we aren't reading a zero/epoch value.
353        let floor = Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap();
354        assert!(
355            max_date >= floor,
356            "catalog_max_effective_at = {max_date} is older than expected floor {floor}"
357        );
358    }
359
360    /// Staleness helper works on a synthetic catalog with known dates.
361    #[test]
362    fn catalog_max_effective_at_picks_newest() {
363        let toml = r#"
364            [[entry]]
365            provider = "p"
366            model = "m1"
367            input_per_million = 1.0
368            output_per_million = 2.0
369            effective_at = "2026-03-01T00:00:00Z"
370
371            [[entry]]
372            provider = "p"
373            model = "m2"
374            input_per_million = 3.0
375            output_per_million = 4.0
376            effective_at = "2026-05-01T00:00:00Z"
377        "#;
378        let c = PricingCatalog::parse(toml).expect("valid");
379        let max = c.catalog_max_effective_at().expect("present");
380        assert_eq!(
381            max,
382            Utc.with_ymd_and_hms(2026, 5, 1, 0, 0, 0).unwrap(),
383            "should return the newest effective_at across all models"
384        );
385    }
386
387    /// Empty catalog returns None (not a panic).
388    #[test]
389    fn catalog_max_effective_at_empty_catalog() {
390        let c = PricingCatalog::parse("").expect("empty TOML is valid");
391        assert!(c.catalog_max_effective_at().is_none());
392    }
393
394    #[test]
395    fn latest_returns_known_rates() {
396        let c = catalog();
397        let p = c.latest("openai", "gpt-4o").expect("gpt-4o present");
398        assert_eq!(p.input_per_million, 2.50);
399        assert_eq!(p.output_per_million, 10.00);
400        assert_eq!(p.cached_input_per_million, Some(1.25));
401
402        // A model whose cached rate is omitted in TOML → None, not 0.0.
403        let g = c.latest("groq", "llama-3.1-8b-instant").expect("present");
404        assert_eq!(g.cached_input_per_million, None);
405    }
406
407    /// Anthropic models must carry a cache_write_per_million at ~1.25× base input.
408    /// Non-Anthropic models must have None (no write premium documented).
409    #[test]
410    fn anthropic_models_have_cache_write_rate() {
411        let c = catalog();
412
413        let haiku = c.latest("anthropic", "claude-haiku-4-5").expect("present");
414        assert_eq!(
415            haiku.cache_write_per_million,
416            Some(1.25),
417            "haiku write rate = 1.25× base input (1.00)"
418        );
419
420        let sonnet = c.latest("anthropic", "claude-sonnet-4-6").expect("present");
421        assert_eq!(
422            sonnet.cache_write_per_million,
423            Some(3.75),
424            "sonnet write rate = 1.25× base input (3.00)"
425        );
426
427        let opus = c.latest("anthropic", "claude-opus-4-7").expect("present");
428        assert_eq!(
429            opus.cache_write_per_million,
430            Some(6.25),
431            "opus write rate = 1.25× base input (5.00)"
432        );
433
434        // Non-Anthropic models have no documented write premium.
435        let gpt4o = c.latest("openai", "gpt-4o").expect("gpt-4o present");
436        assert_eq!(
437            gpt4o.cache_write_per_million, None,
438            "OpenAI has no cache-write premium"
439        );
440
441        let groq_llama = c.latest("groq", "llama-3.1-8b-instant").expect("present");
442        assert_eq!(
443            groq_llama.cache_write_per_million, None,
444            "Groq has no cache-write premium"
445        );
446    }
447
448    /// The new schema fields (batch rates + prompt-cache minimum) parse and
449    /// carry the documented values on the current Anthropic flagships, and are
450    /// `None` for providers without a batch tier / documented cache minimum.
451    #[test]
452    fn batch_and_cache_min_fields_parse() {
453        let c = catalog();
454
455        // Anthropic batch = flat 50% of standard; cache minimum is model-specific.
456        let opus = c.latest("anthropic", "claude-opus-4-8").expect("present");
457        assert_eq!(opus.batch_input_per_million, Some(2.50), "50% of 5.00");
458        assert_eq!(opus.batch_output_per_million, Some(12.50), "50% of 25.00");
459        assert_eq!(opus.prompt_cache_min_tokens, Some(4096), "Opus 4.x: 4096");
460
461        let sonnet = c.latest("anthropic", "claude-sonnet-4-6").expect("present");
462        assert_eq!(sonnet.batch_input_per_million, Some(1.50));
463        assert_eq!(sonnet.batch_output_per_million, Some(7.50));
464        assert_eq!(
465            sonnet.prompt_cache_min_tokens,
466            Some(2048),
467            "Sonnet 4.6: 2048"
468        );
469
470        // OpenAI flagship: batch tier present, 1024-token auto-cache minimum.
471        let gpt = c.latest("openai", "gpt-5.5").expect("present");
472        assert_eq!(gpt.batch_input_per_million, Some(2.50));
473        assert_eq!(gpt.prompt_cache_min_tokens, Some(1024));
474
475        // Gemini: batch present, cache minimum intentionally unset (None).
476        let gem = c.latest("gemini", "gemini-3.1-pro").expect("present");
477        assert_eq!(gem.batch_input_per_million, Some(1.00));
478        assert_eq!(gem.prompt_cache_min_tokens, None);
479
480        // A provider with no batch tier → both batch fields None.
481        let groq = c.latest("groq", "llama-3.1-8b-instant").expect("present");
482        assert_eq!(groq.batch_input_per_million, None);
483        assert_eq!(groq.batch_output_per_million, None);
484        assert_eq!(groq.prompt_cache_min_tokens, None);
485    }
486
487    /// `cache_write_rate_per_million` resolves the documented per-TTL premium:
488    /// 5-min from the catalog column, 1-hour as 2× base input — but only when a
489    /// 5-min write premium is documented (providers without one stay at None so
490    /// the caller falls back to the plain input rate).
491    #[test]
492    fn cache_write_rate_resolves_per_ttl_tier() {
493        let c = catalog();
494
495        // Anthropic Sonnet 4.6: base input 3.00, 5-min write 3.75 (=1.25×).
496        let sonnet = c.latest("anthropic", "claude-sonnet-4-6").expect("present");
497        assert_eq!(
498            sonnet.cache_write_rate_per_million(CacheWriteTier::FiveMin),
499            Some(3.75),
500            "5-min tier = catalog cache_write_per_million (1.25× input)"
501        );
502        assert_eq!(
503            sonnet.cache_write_rate_per_million(CacheWriteTier::OneHour),
504            Some(6.00),
505            "1-hour tier = 2× base input (3.00)"
506        );
507
508        // Opus 4.8: base input 5.00 → 1-hour write 10.00.
509        let opus = c.latest("anthropic", "claude-opus-4-8").expect("present");
510        assert_eq!(
511            opus.cache_write_rate_per_million(CacheWriteTier::FiveMin),
512            Some(6.25)
513        );
514        assert_eq!(
515            opus.cache_write_rate_per_million(CacheWriteTier::OneHour),
516            Some(10.00),
517            "1-hour tier = 2× base input (5.00)"
518        );
519
520        // A provider with no documented write premium: both tiers are None so
521        // the caller prices these tokens at the plain input rate (unchanged).
522        let groq = c.latest("groq", "llama-3.1-8b-instant").expect("present");
523        assert_eq!(
524            groq.cache_write_rate_per_million(CacheWriteTier::FiveMin),
525            None
526        );
527        assert_eq!(
528            groq.cache_write_rate_per_million(CacheWriteTier::OneHour),
529            None,
530            "no 5-min premium → no 1-hour premium either"
531        );
532    }
533
534    /// Default tier is the 5-minute tier — Anthropic's default for a bare
535    /// `cache_control: {"type": "ephemeral"}` (no `ttl`), which is the only
536    /// breakpoint the gateway's Anthropic adapter emits.
537    #[test]
538    fn cache_write_tier_defaults_to_five_min() {
539        assert_eq!(CacheWriteTier::default(), CacheWriteTier::FiveMin);
540    }
541
542    /// Flex eligibility is catalog-driven: the supported gpt-5.x models carry a
543    /// Flex rate (== batch, 50% of standard) and report eligible; o3 / o4-mini
544    /// are batch-only "specialized models" and carry no Flex rate, so they are
545    /// NOT flex-eligible. Verified vs developers.openai.com Flex docs/pricing.
546    #[test]
547    fn flex_rates_and_eligibility_match_openai_docs() {
548        let c = catalog();
549
550        // gpt-5.5: standard $5/$30 → flex $2.50/$15 (== batch, 50% off).
551        let gpt55 = c.latest("openai", "gpt-5.5").expect("present");
552        assert!(gpt55.flex_eligible(), "gpt-5.5 is flex-eligible");
553        assert_eq!(gpt55.flex_rates_per_million(), Some((2.50, 15.00)));
554        assert_eq!(gpt55.flex_input_per_million, gpt55.batch_input_per_million);
555        assert_eq!(
556            gpt55.flex_output_per_million,
557            gpt55.batch_output_per_million
558        );
559
560        // gpt-5.4: standard $2.50/$15 → flex $1.25/$7.50.
561        let gpt54 = c.latest("openai", "gpt-5.4").expect("present");
562        assert!(gpt54.flex_eligible());
563        assert_eq!(gpt54.flex_rates_per_million(), Some((1.25, 7.50)));
564
565        // o3 / o4-mini are batch-only → no flex rate → ineligible.
566        let o3 = c.latest("openai", "o3").expect("present");
567        assert!(!o3.flex_eligible(), "o3 is batch-only, not flex-eligible");
568        assert_eq!(o3.flex_rates_per_million(), None);
569        let o4 = c.latest("openai", "o4-mini").expect("present");
570        assert!(!o4.flex_eligible());
571
572        // A non-OpenAI model never carries a flex rate.
573        let haiku = c.latest("anthropic", "claude-haiku-4-5").expect("present");
574        assert!(!haiku.flex_eligible());
575    }
576
577    #[test]
578    fn unknown_provider_or_model_is_none() {
579        let c = catalog();
580        assert!(c.latest("openai", "no-such-model").is_none());
581        assert!(c.latest("no-such-provider", "gpt-4o").is_none());
582    }
583
584    #[test]
585    fn at_selects_rate_effective_at_timestamp() {
586        // Two-entry history: $1/$2 from 2026-01-01, $3/$4 from 2026-06-01.
587        let toml = r#"
588            [[entry]]
589            provider = "p"
590            model = "m"
591            input_per_million = 1.0
592            output_per_million = 2.0
593            effective_at = "2026-01-01T00:00:00Z"
594
595            [[entry]]
596            provider = "p"
597            model = "m"
598            input_per_million = 3.0
599            output_per_million = 4.0
600            effective_at = "2026-06-01T00:00:00Z"
601        "#;
602        let c = PricingCatalog::parse(toml).expect("valid");
603
604        // Before either entry → earliest (best-effort).
605        let before = c
606            .at("p", "m", Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap())
607            .unwrap();
608        assert_eq!(before.input_per_million, 1.0);
609
610        // Between the two → first (older) rate.
611        let mid = c
612            .at("p", "m", Utc.with_ymd_and_hms(2026, 3, 1, 0, 0, 0).unwrap())
613            .unwrap();
614        assert_eq!(mid.input_per_million, 1.0);
615
616        // After the second → newest rate.
617        let after = c
618            .at("p", "m", Utc.with_ymd_and_hms(2026, 9, 1, 0, 0, 0).unwrap())
619            .unwrap();
620        assert_eq!(after.input_per_million, 3.0);
621
622        // `latest` is always the newest regardless of time.
623        assert_eq!(c.latest("p", "m").unwrap().input_per_million, 3.0);
624    }
625}