1use std::collections::HashMap;
16use std::sync::OnceLock;
17
18use chrono::{DateTime, Utc};
19use serde::{Deserialize, Serialize};
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct ModelPricing {
23 pub input_per_million: f64,
25 pub output_per_million: f64,
27 pub cached_input_per_million: Option<f64>,
29 pub cache_write_per_million: Option<f64>,
33 pub effective_at: DateTime<Utc>,
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct ModelInfo {
39 pub id: String,
40 pub provider: String,
41 pub capabilities: Vec<Capability>,
42 pub max_input_tokens: u64,
43 pub max_output_tokens: u64,
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
47#[serde(rename_all = "snake_case")]
48pub enum Capability {
49 Text,
50 Vision,
51 Audio,
52 Tools,
53 JsonMode,
54 Streaming,
55 Reasoning,
56 PromptCaching,
57}
58
59const PRICING_TOML: &str = include_str!("../data/pricing.toml");
62
63#[derive(Debug, Deserialize)]
65struct RawEntry {
66 provider: String,
67 model: String,
68 input_per_million: f64,
69 output_per_million: f64,
70 #[serde(default)]
71 cached_input_per_million: Option<f64>,
72 #[serde(default)]
73 cache_write_per_million: Option<f64>,
74 effective_at: DateTime<Utc>,
75}
76
77#[derive(Debug, Deserialize)]
78struct RawCatalog {
79 #[serde(default)]
80 entry: Vec<RawEntry>,
81}
82
83#[derive(Debug)]
86pub struct PricingCatalog {
87 by_model: HashMap<(String, String), Vec<ModelPricing>>,
88}
89
90impl PricingCatalog {
91 pub fn parse(toml_text: &str) -> Result<Self, toml::de::Error> {
94 let raw: RawCatalog = toml::from_str(toml_text)?;
95 let mut by_model: HashMap<(String, String), Vec<ModelPricing>> = HashMap::new();
96 for e in raw.entry {
97 by_model
98 .entry((e.provider, e.model))
99 .or_default()
100 .push(ModelPricing {
101 input_per_million: e.input_per_million,
102 output_per_million: e.output_per_million,
103 cached_input_per_million: e.cached_input_per_million,
104 cache_write_per_million: e.cache_write_per_million,
105 effective_at: e.effective_at,
106 });
107 }
108 for history in by_model.values_mut() {
111 history.sort_by_key(|p| p.effective_at);
112 }
113 Ok(Self { by_model })
114 }
115
116 pub fn latest(&self, provider: &str, model: &str) -> Option<ModelPricing> {
119 self.by_model
120 .get(&(provider.to_string(), model.to_string()))?
121 .last()
122 .cloned()
123 }
124
125 pub fn at(&self, provider: &str, model: &str, at: DateTime<Utc>) -> Option<ModelPricing> {
130 let history = self
131 .by_model
132 .get(&(provider.to_string(), model.to_string()))?;
133 history
134 .iter()
135 .rev()
136 .find(|p| p.effective_at <= at)
137 .or_else(|| history.first())
138 .cloned()
139 }
140
141 pub fn latest_for_provider(&self, provider: &str) -> Vec<(String, ModelPricing)> {
145 self.by_model
146 .iter()
147 .filter(|((p, _), _)| p == provider)
148 .filter_map(|((_, model), history)| history.last().map(|p| (model.clone(), p.clone())))
149 .collect()
150 }
151
152 pub fn pairs(&self) -> Vec<(String, String)> {
156 self.by_model.keys().cloned().collect()
157 }
158
159 pub fn len(&self) -> usize {
161 self.by_model.len()
162 }
163
164 pub fn is_empty(&self) -> bool {
166 self.by_model.is_empty()
167 }
168
169 pub fn catalog_max_effective_at(&self) -> Option<DateTime<Utc>> {
177 self.by_model
178 .values()
179 .filter_map(|history| history.last().map(|p| p.effective_at))
180 .max()
181 }
182}
183
184pub fn catalog() -> &'static PricingCatalog {
188 static CATALOG: OnceLock<PricingCatalog> = OnceLock::new();
189 CATALOG.get_or_init(|| {
190 PricingCatalog::parse(PRICING_TOML).expect("embedded data/pricing.toml must be valid")
191 })
192}
193
194#[cfg(test)]
195mod catalog_tests {
196 use super::*;
197 use chrono::TimeZone;
198
199 #[test]
200 fn embedded_catalog_parses_and_is_populated() {
201 let c = catalog();
202 assert!(!c.is_empty(), "embedded catalog should not be empty");
203 assert_eq!(
207 c.len(),
208 36,
209 "unexpected catalog size — update if intentional"
210 );
211 }
212
213 #[test]
219 fn catalog_max_effective_at_is_present() {
220 let c = catalog();
221 let max_date = c
222 .catalog_max_effective_at()
223 .expect("non-empty catalog must have a max effective_at");
224 let floor = Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap();
227 assert!(
228 max_date >= floor,
229 "catalog_max_effective_at = {max_date} is older than expected floor {floor}"
230 );
231 }
232
233 #[test]
235 fn catalog_max_effective_at_picks_newest() {
236 let toml = r#"
237 [[entry]]
238 provider = "p"
239 model = "m1"
240 input_per_million = 1.0
241 output_per_million = 2.0
242 effective_at = "2026-03-01T00:00:00Z"
243
244 [[entry]]
245 provider = "p"
246 model = "m2"
247 input_per_million = 3.0
248 output_per_million = 4.0
249 effective_at = "2026-05-01T00:00:00Z"
250 "#;
251 let c = PricingCatalog::parse(toml).expect("valid");
252 let max = c.catalog_max_effective_at().expect("present");
253 assert_eq!(
254 max,
255 Utc.with_ymd_and_hms(2026, 5, 1, 0, 0, 0).unwrap(),
256 "should return the newest effective_at across all models"
257 );
258 }
259
260 #[test]
262 fn catalog_max_effective_at_empty_catalog() {
263 let c = PricingCatalog::parse("").expect("empty TOML is valid");
264 assert!(c.catalog_max_effective_at().is_none());
265 }
266
267 #[test]
268 fn latest_returns_known_rates() {
269 let c = catalog();
270 let p = c.latest("openai", "gpt-4o").expect("gpt-4o present");
271 assert_eq!(p.input_per_million, 2.50);
272 assert_eq!(p.output_per_million, 10.00);
273 assert_eq!(p.cached_input_per_million, Some(1.25));
274
275 let g = c.latest("groq", "llama-3.1-8b-instant").expect("present");
277 assert_eq!(g.cached_input_per_million, None);
278 }
279
280 #[test]
283 fn anthropic_models_have_cache_write_rate() {
284 let c = catalog();
285
286 let haiku = c.latest("anthropic", "claude-haiku-4-5").expect("present");
287 assert_eq!(
288 haiku.cache_write_per_million,
289 Some(1.25),
290 "haiku write rate = 1.25× base input (1.00)"
291 );
292
293 let sonnet = c.latest("anthropic", "claude-sonnet-4-6").expect("present");
294 assert_eq!(
295 sonnet.cache_write_per_million,
296 Some(3.75),
297 "sonnet write rate = 1.25× base input (3.00)"
298 );
299
300 let opus = c.latest("anthropic", "claude-opus-4-7").expect("present");
301 assert_eq!(
302 opus.cache_write_per_million,
303 Some(6.25),
304 "opus write rate = 1.25× base input (5.00)"
305 );
306
307 let gpt4o = c.latest("openai", "gpt-4o").expect("gpt-4o present");
309 assert_eq!(
310 gpt4o.cache_write_per_million, None,
311 "OpenAI has no cache-write premium"
312 );
313
314 let groq_llama = c.latest("groq", "llama-3.1-8b-instant").expect("present");
315 assert_eq!(
316 groq_llama.cache_write_per_million, None,
317 "Groq has no cache-write premium"
318 );
319 }
320
321 #[test]
322 fn unknown_provider_or_model_is_none() {
323 let c = catalog();
324 assert!(c.latest("openai", "no-such-model").is_none());
325 assert!(c.latest("no-such-provider", "gpt-4o").is_none());
326 }
327
328 #[test]
329 fn at_selects_rate_effective_at_timestamp() {
330 let toml = r#"
332 [[entry]]
333 provider = "p"
334 model = "m"
335 input_per_million = 1.0
336 output_per_million = 2.0
337 effective_at = "2026-01-01T00:00:00Z"
338
339 [[entry]]
340 provider = "p"
341 model = "m"
342 input_per_million = 3.0
343 output_per_million = 4.0
344 effective_at = "2026-06-01T00:00:00Z"
345 "#;
346 let c = PricingCatalog::parse(toml).expect("valid");
347
348 let before = c
350 .at("p", "m", Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap())
351 .unwrap();
352 assert_eq!(before.input_per_million, 1.0);
353
354 let mid = c
356 .at("p", "m", Utc.with_ymd_and_hms(2026, 3, 1, 0, 0, 0).unwrap())
357 .unwrap();
358 assert_eq!(mid.input_per_million, 1.0);
359
360 let after = c
362 .at("p", "m", Utc.with_ymd_and_hms(2026, 9, 1, 0, 0, 0).unwrap())
363 .unwrap();
364 assert_eq!(after.input_per_million, 3.0);
365
366 assert_eq!(c.latest("p", "m").unwrap().input_per_million, 3.0);
368 }
369}