tt_shared/pricing.rs
1//! Pricing tables per model. Values are a **manually-curated snapshot** taken
2//! from provider pricing pages; they are NOT refreshed automatically.
3//! `effective_at` records when each rate took effect and lets us replay
4//! historical telemetry against the correct rate. To refresh rates, edit
5//! `data/pricing.toml` and append new entries — see `scripts/refresh-pricing.sh`
6//! for the manual workflow. See also `docs/02-provider-adapter-guide.md`.
7//!
8//! Rates live in a versioned data file (`data/pricing.toml`), embedded at build
9//! time and parsed once into a [`PricingCatalog`]. Provider adapters delegate
10//! to [`catalog`] instead of hardcoding rate tables, so a price refresh is a
11//! data edit — decoupled from a Rust release. The catalog keeps a per-model
12//! price *history*, enabling [`PricingCatalog::at`] to price historical
13//! telemetry against the rate that was in effect at request time.
14
15use std::collections::HashMap;
16use std::sync::OnceLock;
17
18use chrono::{DateTime, Utc};
19use serde::{Deserialize, Serialize};
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct ModelPricing {
23 /// USD per 1M input tokens.
24 pub input_per_million: f64,
25 /// USD per 1M output tokens.
26 pub output_per_million: f64,
27 /// USD per 1M cached input tokens (Anthropic 10%, OpenAI 10%, Gemini 10%).
28 pub cached_input_per_million: Option<f64>,
29 /// USD per 1M cache-creation (cache-write) input tokens. Anthropic charges
30 /// ~1.25× the base input rate for tokens written to the prompt cache.
31 /// `None` for providers with no documented write premium (cost path unchanged).
32 pub cache_write_per_million: Option<f64>,
33 /// USD per 1M batch (async) input tokens. Providers with a batch tier
34 /// (OpenAI / Anthropic / Gemini) bill async requests at ~50% of standard
35 /// input. `None` for providers with no batch tier.
36 pub batch_input_per_million: Option<f64>,
37 /// USD per 1M batch (async) output tokens (~50% of standard output).
38 /// `None` for providers with no batch tier.
39 pub batch_output_per_million: Option<f64>,
40 /// USD per 1M input tokens under OpenAI's **Flex** service tier
41 /// (`service_tier: "flex"`) — a synchronous-but-slower tier billed at Batch
42 /// API rates (~50% of standard). `None` for models/providers with no Flex
43 /// tier; **presence is the eligibility gate** (only models that carry a Flex
44 /// rate may be opted into `service_tier=flex`). See
45 /// developers.openai.com/api/docs/guides/flex-processing.
46 pub flex_input_per_million: Option<f64>,
47 /// USD per 1M output tokens under the Flex service tier (~50% of standard
48 /// output). `None` when the model has no Flex tier.
49 pub flex_output_per_million: Option<f64>,
50 /// Provider minimum prefix length, in tokens, before a `cache_control`
51 /// breakpoint actually caches (shorter prefixes silently don't cache).
52 /// Anthropic varies this by model (2048–4096); `None` when not documented.
53 pub prompt_cache_min_tokens: Option<u32>,
54 /// When this pricing took effect (for historical replay).
55 pub effective_at: DateTime<Utc>,
56}
57
58/// Which cache-write TTL tier a prompt-cache write was billed at.
59///
60/// Anthropic bills cache *writes* at a per-TTL premium over the base input rate:
61/// the default 5-minute ephemeral tier is ~1.25× base input, and the opt-in
62/// 1-hour tier (`cache_control: {"type": "ephemeral", "ttl": "1h"}`) is ~2×
63/// (platform.claude.com/docs/en/build-with-claude/prompt-caching § Economics).
64/// [`ModelPricing::cache_write_per_million`] is the 5-minute rate;
65/// [`ModelPricing::cache_write_rate_per_million`] resolves either tier.
66#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
67pub enum CacheWriteTier {
68 /// The default ephemeral TTL — `cache_control` with no `ttl` field. ~1.25×.
69 #[default]
70 FiveMin,
71 /// The opt-in 1-hour TTL — `cache_control` with `"ttl": "1h"`. ~2×.
72 OneHour,
73}
74
75/// Ratio of the 1-hour cache-write rate to the base input rate (Anthropic's
76/// documented 2× one-hour-TTL premium). The 5-minute rate is carried directly
77/// in the catalog as `cache_write_per_million` (~1.25× base); the 1-hour rate
78/// follows the same documented base-input relationship, so we derive it rather
79/// than carrying a second column.
80const CACHE_WRITE_1H_MULTIPLIER: f64 = 2.0;
81
82impl ModelPricing {
83 /// USD per 1M cache-write (creation) tokens for the given TTL `tier`.
84 ///
85 /// - `FiveMin` → the catalog's [`cache_write_per_million`](Self::cache_write_per_million)
86 /// (the 5-minute/1.25× rate Anthropic applies to bare `ephemeral` writes).
87 /// - `OneHour` → the documented 2× base-input rate, but **only when a 5-min
88 /// write premium is documented** (i.e. the provider tiers cache writes at
89 /// all). Providers with no write premium return `None` for both tiers so
90 /// the caller falls back to the plain input rate, unchanged.
91 ///
92 /// Returns `None` when no write premium applies, so callers price the
93 /// remaining tokens at `input_per_million`.
94 #[must_use]
95 pub fn cache_write_rate_per_million(&self, tier: CacheWriteTier) -> Option<f64> {
96 match tier {
97 CacheWriteTier::FiveMin => self.cache_write_per_million,
98 // Only tier up when the provider documents a 5-min write premium;
99 // otherwise there is no premium to scale and we leave it absent.
100 CacheWriteTier::OneHour => self
101 .cache_write_per_million
102 .map(|_| self.input_per_million * CACHE_WRITE_1H_MULTIPLIER),
103 }
104 }
105
106 /// Whether this model is eligible for OpenAI's Flex service tier
107 /// (`service_tier: "flex"`). Eligibility is **catalog-driven**: a model is
108 /// flex-eligible iff it carries a Flex input rate. OpenAI lists Flex prices
109 /// only for supported models (gpt-5.x family); o3 / o4-mini are batch-only
110 /// "specialized models" and therefore carry no Flex rate.
111 #[must_use]
112 pub fn flex_eligible(&self) -> bool {
113 self.flex_input_per_million.is_some()
114 }
115
116 /// The Flex `(input, output)` per-million rates when this model is
117 /// flex-eligible, else `None`. Both are present together for an eligible
118 /// row (the catalog carries the pair); a missing output rate falls back to
119 /// the standard output rate so a partially-populated row stays priceable.
120 #[must_use]
121 pub fn flex_rates_per_million(&self) -> Option<(f64, f64)> {
122 let input = self.flex_input_per_million?;
123 let output = self
124 .flex_output_per_million
125 .unwrap_or(self.output_per_million);
126 Some((input, output))
127 }
128}
129
130#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
131pub struct ModelInfo {
132 pub id: String,
133 pub provider: String,
134 pub capabilities: Vec<Capability>,
135 pub max_input_tokens: u64,
136 pub max_output_tokens: u64,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
140#[serde(rename_all = "snake_case")]
141pub enum Capability {
142 Text,
143 Vision,
144 Audio,
145 Tools,
146 JsonMode,
147 Streaming,
148 Reasoning,
149 PromptCaching,
150}
151
152/// Embedded versioned rate catalog. The source of truth for token rates;
153/// edited as data (`data/pricing.toml`), not Rust source.
154const PRICING_TOML: &str = include_str!("../data/pricing.toml");
155
156/// One row of the catalog as it appears in `pricing.toml`.
157#[derive(Debug, Deserialize)]
158struct RawEntry {
159 provider: String,
160 model: String,
161 input_per_million: f64,
162 output_per_million: f64,
163 #[serde(default)]
164 cached_input_per_million: Option<f64>,
165 #[serde(default)]
166 cache_write_per_million: Option<f64>,
167 #[serde(default)]
168 batch_input_per_million: Option<f64>,
169 #[serde(default)]
170 batch_output_per_million: Option<f64>,
171 #[serde(default)]
172 flex_input_per_million: Option<f64>,
173 #[serde(default)]
174 flex_output_per_million: Option<f64>,
175 #[serde(default)]
176 prompt_cache_min_tokens: Option<u32>,
177 effective_at: DateTime<Utc>,
178}
179
180#[derive(Debug, Deserialize)]
181struct RawCatalog {
182 #[serde(default)]
183 entry: Vec<RawEntry>,
184}
185
186/// In-memory pricing catalog: per `(provider, model)`, a price history sorted
187/// ascending by `effective_at`. Built once from the embedded TOML.
188#[derive(Debug)]
189pub struct PricingCatalog {
190 by_model: HashMap<(String, String), Vec<ModelPricing>>,
191}
192
193impl PricingCatalog {
194 /// Parse a catalog from TOML text. Used by [`catalog`] over the embedded
195 /// file; exposed for tests that want to parse a synthetic catalog.
196 pub fn parse(toml_text: &str) -> Result<Self, toml::de::Error> {
197 let raw: RawCatalog = toml::from_str(toml_text)?;
198 let mut by_model: HashMap<(String, String), Vec<ModelPricing>> = HashMap::new();
199 for e in raw.entry {
200 by_model
201 .entry((e.provider, e.model))
202 .or_default()
203 .push(ModelPricing {
204 input_per_million: e.input_per_million,
205 output_per_million: e.output_per_million,
206 cached_input_per_million: e.cached_input_per_million,
207 cache_write_per_million: e.cache_write_per_million,
208 batch_input_per_million: e.batch_input_per_million,
209 batch_output_per_million: e.batch_output_per_million,
210 flex_input_per_million: e.flex_input_per_million,
211 flex_output_per_million: e.flex_output_per_million,
212 prompt_cache_min_tokens: e.prompt_cache_min_tokens,
213 effective_at: e.effective_at,
214 });
215 }
216 // Sort each model's history ascending by effective_at so `latest` is
217 // the last element and `at` can scan from newest backward.
218 for history in by_model.values_mut() {
219 history.sort_by_key(|p| p.effective_at);
220 }
221 Ok(Self { by_model })
222 }
223
224 /// The current (most recently effective) rate for `(provider, model)`,
225 /// or `None` if the model is not in the catalog.
226 pub fn latest(&self, provider: &str, model: &str) -> Option<ModelPricing> {
227 self.by_model
228 .get(&(provider.to_string(), model.to_string()))?
229 .last()
230 .cloned()
231 }
232
233 /// The rate that was in effect at `at` for `(provider, model)` — the most
234 /// recent entry whose `effective_at <= at`. If `at` predates every known
235 /// entry, falls back to the earliest entry (best-effort historical replay
236 /// rather than reporting no price). `None` only when the model is unknown.
237 pub fn at(&self, provider: &str, model: &str, at: DateTime<Utc>) -> Option<ModelPricing> {
238 let history = self
239 .by_model
240 .get(&(provider.to_string(), model.to_string()))?;
241 history
242 .iter()
243 .rev()
244 .find(|p| p.effective_at <= at)
245 .or_else(|| history.first())
246 .cloned()
247 }
248
249 /// Every model's current rate for `provider`, as `(model, pricing)` pairs.
250 /// Order is unspecified. Used by adapters that build a model→rate map at
251 /// construction time (the OpenAI-compatible providers).
252 pub fn latest_for_provider(&self, provider: &str) -> Vec<(String, ModelPricing)> {
253 self.by_model
254 .iter()
255 .filter(|((p, _), _)| p == provider)
256 .filter_map(|((_, model), history)| history.last().map(|p| (model.clone(), p.clone())))
257 .collect()
258 }
259
260 /// Every `(provider, model)` pair in the catalog. Order is unspecified.
261 /// Pair with [`latest`](Self::latest) / [`at`](Self::at) to materialize a
262 /// full rate table (e.g. for the Plan replay engine).
263 pub fn pairs(&self) -> Vec<(String, String)> {
264 self.by_model.keys().cloned().collect()
265 }
266
267 /// Number of distinct `(provider, model)` pairs in the catalog.
268 pub fn len(&self) -> usize {
269 self.by_model.len()
270 }
271
272 /// Whether the catalog has no entries.
273 pub fn is_empty(&self) -> bool {
274 self.by_model.is_empty()
275 }
276
277 /// The newest `effective_at` across every entry in the catalog — i.e. the
278 /// date of the most recent manual rate snapshot. Returns `None` only when
279 /// the catalog is empty (a build-time error in practice, because the
280 /// embedded file is non-empty and the parse is guarded by a unit test).
281 ///
282 /// Use this as a freshness signal: if the returned date is far in the past
283 /// it means pricing.toml has not been updated in a while.
284 pub fn catalog_max_effective_at(&self) -> Option<DateTime<Utc>> {
285 self.by_model
286 .values()
287 .filter_map(|history| history.last().map(|p| p.effective_at))
288 .max()
289 }
290}
291
292/// The process-wide pricing catalog, parsed once from the embedded
293/// `data/pricing.toml`. Panics at first use only if that bundled file is
294/// malformed — which a unit test guards against, so it cannot reach a release.
295pub fn catalog() -> &'static PricingCatalog {
296 static CATALOG: OnceLock<PricingCatalog> = OnceLock::new();
297 CATALOG.get_or_init(|| {
298 PricingCatalog::parse(PRICING_TOML).expect("embedded data/pricing.toml must be valid")
299 })
300}
301
302/// Whether `newest` (the catalog's max `effective_at`) is more than `max_days`
303/// before `now`. An empty catalog (`None`) is treated as not stale.
304#[must_use]
305pub fn is_stale(newest: Option<DateTime<Utc>>, now: DateTime<Utc>, max_days: i64) -> bool {
306 match newest {
307 Some(d) => (now - d).num_days() > max_days,
308 None => false,
309 }
310}
311
312#[cfg(test)]
313mod catalog_tests {
314 use super::*;
315 use chrono::TimeZone;
316
317 #[test]
318 fn is_stale_thresholds() {
319 use chrono::Duration;
320 let now: DateTime<Utc> = "2026-06-05T00:00:00Z".parse().unwrap();
321 assert!(!is_stale(None, now, 90)); // empty catalog: not stale
322 assert!(!is_stale(Some(now - Duration::days(10)), now, 90));
323 assert!(is_stale(Some(now - Duration::days(100)), now, 90));
324 }
325
326 #[test]
327 fn embedded_catalog_parses_and_is_populated() {
328 let c = catalog();
329 assert!(!c.is_empty(), "embedded catalog should not be empty");
330 // 36 models across 7 paid providers (32 at import + 4 current flagships
331 // added in the 2026-05-31 verification: gpt-5.5-pro, gpt-5.4-mini,
332 // gpt-5.4-pro, claude-opus-4-8).
333 assert_eq!(
334 c.len(),
335 36,
336 "unexpected catalog size — update if intentional"
337 );
338 }
339
340 /// The embedded catalog must carry at least one `effective_at` date and it
341 /// must be parseable (which `catalog_max_effective_at` returning `Some`
342 /// proves). This test is NOT time-sensitive: we assert presence only, never
343 /// a hardcoded "must be within N days of today", so it will never fail
344 /// merely because time has passed.
345 #[test]
346 fn catalog_max_effective_at_is_present() {
347 let c = catalog();
348 let max_date = c
349 .catalog_max_effective_at()
350 .expect("non-empty catalog must have a max effective_at");
351 // Sanity: the catalog was first created in 2026; the date must be at
352 // least 2026-01-01 to confirm we aren't reading a zero/epoch value.
353 let floor = Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap();
354 assert!(
355 max_date >= floor,
356 "catalog_max_effective_at = {max_date} is older than expected floor {floor}"
357 );
358 }
359
360 /// Staleness helper works on a synthetic catalog with known dates.
361 #[test]
362 fn catalog_max_effective_at_picks_newest() {
363 let toml = r#"
364 [[entry]]
365 provider = "p"
366 model = "m1"
367 input_per_million = 1.0
368 output_per_million = 2.0
369 effective_at = "2026-03-01T00:00:00Z"
370
371 [[entry]]
372 provider = "p"
373 model = "m2"
374 input_per_million = 3.0
375 output_per_million = 4.0
376 effective_at = "2026-05-01T00:00:00Z"
377 "#;
378 let c = PricingCatalog::parse(toml).expect("valid");
379 let max = c.catalog_max_effective_at().expect("present");
380 assert_eq!(
381 max,
382 Utc.with_ymd_and_hms(2026, 5, 1, 0, 0, 0).unwrap(),
383 "should return the newest effective_at across all models"
384 );
385 }
386
387 /// Empty catalog returns None (not a panic).
388 #[test]
389 fn catalog_max_effective_at_empty_catalog() {
390 let c = PricingCatalog::parse("").expect("empty TOML is valid");
391 assert!(c.catalog_max_effective_at().is_none());
392 }
393
394 #[test]
395 fn latest_returns_known_rates() {
396 let c = catalog();
397 let p = c.latest("openai", "gpt-4o").expect("gpt-4o present");
398 assert_eq!(p.input_per_million, 2.50);
399 assert_eq!(p.output_per_million, 10.00);
400 assert_eq!(p.cached_input_per_million, Some(1.25));
401
402 // A model whose cached rate is omitted in TOML → None, not 0.0.
403 let g = c.latest("groq", "llama-3.1-8b-instant").expect("present");
404 assert_eq!(g.cached_input_per_million, None);
405 }
406
407 /// Anthropic models must carry a cache_write_per_million at ~1.25× base input.
408 /// Non-Anthropic models must have None (no write premium documented).
409 #[test]
410 fn anthropic_models_have_cache_write_rate() {
411 let c = catalog();
412
413 let haiku = c.latest("anthropic", "claude-haiku-4-5").expect("present");
414 assert_eq!(
415 haiku.cache_write_per_million,
416 Some(1.25),
417 "haiku write rate = 1.25× base input (1.00)"
418 );
419
420 let sonnet = c.latest("anthropic", "claude-sonnet-4-6").expect("present");
421 assert_eq!(
422 sonnet.cache_write_per_million,
423 Some(3.75),
424 "sonnet write rate = 1.25× base input (3.00)"
425 );
426
427 let opus = c.latest("anthropic", "claude-opus-4-7").expect("present");
428 assert_eq!(
429 opus.cache_write_per_million,
430 Some(6.25),
431 "opus write rate = 1.25× base input (5.00)"
432 );
433
434 // Non-Anthropic models have no documented write premium.
435 let gpt4o = c.latest("openai", "gpt-4o").expect("gpt-4o present");
436 assert_eq!(
437 gpt4o.cache_write_per_million, None,
438 "OpenAI has no cache-write premium"
439 );
440
441 let groq_llama = c.latest("groq", "llama-3.1-8b-instant").expect("present");
442 assert_eq!(
443 groq_llama.cache_write_per_million, None,
444 "Groq has no cache-write premium"
445 );
446 }
447
448 /// The new schema fields (batch rates + prompt-cache minimum) parse and
449 /// carry the documented values on the current Anthropic flagships, and are
450 /// `None` for providers without a batch tier / documented cache minimum.
451 #[test]
452 fn batch_and_cache_min_fields_parse() {
453 let c = catalog();
454
455 // Anthropic batch = flat 50% of standard; cache minimum is model-specific.
456 let opus = c.latest("anthropic", "claude-opus-4-8").expect("present");
457 assert_eq!(opus.batch_input_per_million, Some(2.50), "50% of 5.00");
458 assert_eq!(opus.batch_output_per_million, Some(12.50), "50% of 25.00");
459 assert_eq!(opus.prompt_cache_min_tokens, Some(4096), "Opus 4.x: 4096");
460
461 let sonnet = c.latest("anthropic", "claude-sonnet-4-6").expect("present");
462 assert_eq!(sonnet.batch_input_per_million, Some(1.50));
463 assert_eq!(sonnet.batch_output_per_million, Some(7.50));
464 assert_eq!(
465 sonnet.prompt_cache_min_tokens,
466 Some(2048),
467 "Sonnet 4.6: 2048"
468 );
469
470 // OpenAI flagship: batch tier present, 1024-token auto-cache minimum.
471 let gpt = c.latest("openai", "gpt-5.5").expect("present");
472 assert_eq!(gpt.batch_input_per_million, Some(2.50));
473 assert_eq!(gpt.prompt_cache_min_tokens, Some(1024));
474
475 // Gemini: batch present, cache minimum intentionally unset (None).
476 let gem = c.latest("gemini", "gemini-3.1-pro").expect("present");
477 assert_eq!(gem.batch_input_per_million, Some(1.00));
478 assert_eq!(gem.prompt_cache_min_tokens, None);
479
480 // A provider with no batch tier → both batch fields None.
481 let groq = c.latest("groq", "llama-3.1-8b-instant").expect("present");
482 assert_eq!(groq.batch_input_per_million, None);
483 assert_eq!(groq.batch_output_per_million, None);
484 assert_eq!(groq.prompt_cache_min_tokens, None);
485 }
486
487 /// `cache_write_rate_per_million` resolves the documented per-TTL premium:
488 /// 5-min from the catalog column, 1-hour as 2× base input — but only when a
489 /// 5-min write premium is documented (providers without one stay at None so
490 /// the caller falls back to the plain input rate).
491 #[test]
492 fn cache_write_rate_resolves_per_ttl_tier() {
493 let c = catalog();
494
495 // Anthropic Sonnet 4.6: base input 3.00, 5-min write 3.75 (=1.25×).
496 let sonnet = c.latest("anthropic", "claude-sonnet-4-6").expect("present");
497 assert_eq!(
498 sonnet.cache_write_rate_per_million(CacheWriteTier::FiveMin),
499 Some(3.75),
500 "5-min tier = catalog cache_write_per_million (1.25× input)"
501 );
502 assert_eq!(
503 sonnet.cache_write_rate_per_million(CacheWriteTier::OneHour),
504 Some(6.00),
505 "1-hour tier = 2× base input (3.00)"
506 );
507
508 // Opus 4.8: base input 5.00 → 1-hour write 10.00.
509 let opus = c.latest("anthropic", "claude-opus-4-8").expect("present");
510 assert_eq!(
511 opus.cache_write_rate_per_million(CacheWriteTier::FiveMin),
512 Some(6.25)
513 );
514 assert_eq!(
515 opus.cache_write_rate_per_million(CacheWriteTier::OneHour),
516 Some(10.00),
517 "1-hour tier = 2× base input (5.00)"
518 );
519
520 // A provider with no documented write premium: both tiers are None so
521 // the caller prices these tokens at the plain input rate (unchanged).
522 let groq = c.latest("groq", "llama-3.1-8b-instant").expect("present");
523 assert_eq!(
524 groq.cache_write_rate_per_million(CacheWriteTier::FiveMin),
525 None
526 );
527 assert_eq!(
528 groq.cache_write_rate_per_million(CacheWriteTier::OneHour),
529 None,
530 "no 5-min premium → no 1-hour premium either"
531 );
532 }
533
534 /// Default tier is the 5-minute tier — Anthropic's default for a bare
535 /// `cache_control: {"type": "ephemeral"}` (no `ttl`), which is the only
536 /// breakpoint the gateway's Anthropic adapter emits.
537 #[test]
538 fn cache_write_tier_defaults_to_five_min() {
539 assert_eq!(CacheWriteTier::default(), CacheWriteTier::FiveMin);
540 }
541
542 /// Flex eligibility is catalog-driven: the supported gpt-5.x models carry a
543 /// Flex rate (== batch, 50% of standard) and report eligible; o3 / o4-mini
544 /// are batch-only "specialized models" and carry no Flex rate, so they are
545 /// NOT flex-eligible. Verified vs developers.openai.com Flex docs/pricing.
546 #[test]
547 fn flex_rates_and_eligibility_match_openai_docs() {
548 let c = catalog();
549
550 // gpt-5.5: standard $5/$30 → flex $2.50/$15 (== batch, 50% off).
551 let gpt55 = c.latest("openai", "gpt-5.5").expect("present");
552 assert!(gpt55.flex_eligible(), "gpt-5.5 is flex-eligible");
553 assert_eq!(gpt55.flex_rates_per_million(), Some((2.50, 15.00)));
554 assert_eq!(gpt55.flex_input_per_million, gpt55.batch_input_per_million);
555 assert_eq!(
556 gpt55.flex_output_per_million,
557 gpt55.batch_output_per_million
558 );
559
560 // gpt-5.4: standard $2.50/$15 → flex $1.25/$7.50.
561 let gpt54 = c.latest("openai", "gpt-5.4").expect("present");
562 assert!(gpt54.flex_eligible());
563 assert_eq!(gpt54.flex_rates_per_million(), Some((1.25, 7.50)));
564
565 // o3 / o4-mini are batch-only → no flex rate → ineligible.
566 let o3 = c.latest("openai", "o3").expect("present");
567 assert!(!o3.flex_eligible(), "o3 is batch-only, not flex-eligible");
568 assert_eq!(o3.flex_rates_per_million(), None);
569 let o4 = c.latest("openai", "o4-mini").expect("present");
570 assert!(!o4.flex_eligible());
571
572 // A non-OpenAI model never carries a flex rate.
573 let haiku = c.latest("anthropic", "claude-haiku-4-5").expect("present");
574 assert!(!haiku.flex_eligible());
575 }
576
577 #[test]
578 fn unknown_provider_or_model_is_none() {
579 let c = catalog();
580 assert!(c.latest("openai", "no-such-model").is_none());
581 assert!(c.latest("no-such-provider", "gpt-4o").is_none());
582 }
583
584 #[test]
585 fn at_selects_rate_effective_at_timestamp() {
586 // Two-entry history: $1/$2 from 2026-01-01, $3/$4 from 2026-06-01.
587 let toml = r#"
588 [[entry]]
589 provider = "p"
590 model = "m"
591 input_per_million = 1.0
592 output_per_million = 2.0
593 effective_at = "2026-01-01T00:00:00Z"
594
595 [[entry]]
596 provider = "p"
597 model = "m"
598 input_per_million = 3.0
599 output_per_million = 4.0
600 effective_at = "2026-06-01T00:00:00Z"
601 "#;
602 let c = PricingCatalog::parse(toml).expect("valid");
603
604 // Before either entry → earliest (best-effort).
605 let before = c
606 .at("p", "m", Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap())
607 .unwrap();
608 assert_eq!(before.input_per_million, 1.0);
609
610 // Between the two → first (older) rate.
611 let mid = c
612 .at("p", "m", Utc.with_ymd_and_hms(2026, 3, 1, 0, 0, 0).unwrap())
613 .unwrap();
614 assert_eq!(mid.input_per_million, 1.0);
615
616 // After the second → newest rate.
617 let after = c
618 .at("p", "m", Utc.with_ymd_and_hms(2026, 9, 1, 0, 0, 0).unwrap())
619 .unwrap();
620 assert_eq!(after.input_per_million, 3.0);
621
622 // `latest` is always the newest regardless of time.
623 assert_eq!(c.latest("p", "m").unwrap().input_per_million, 3.0);
624 }
625}