1use agent_sdk_foundation::llm::Usage;
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4pub enum SourceStatus {
5 Official,
6 Derived,
7 Unverified,
8}
9
10#[derive(Debug, Clone, Copy, PartialEq)]
11pub struct PricePoint {
12 pub usd_per_million_tokens: f64,
14}
15
16impl PricePoint {
17 #[must_use]
18 pub const fn new(usd_per_million_tokens: f64) -> Self {
19 Self {
20 usd_per_million_tokens,
21 }
22 }
23
24 #[must_use]
25 pub fn estimate_cost_usd(self, tokens: u32) -> f64 {
26 (f64::from(tokens) / 1_000_000.0) * self.usd_per_million_tokens
27 }
28}
29
30#[derive(Debug, Clone, Copy, PartialEq)]
31pub struct Pricing {
32 pub input: Option<PricePoint>,
33 pub output: Option<PricePoint>,
34 pub cached_input: Option<PricePoint>,
35 pub notes: Option<&'static str>,
36}
37
38impl Pricing {
39 #[must_use]
40 pub const fn flat(input: f64, output: f64) -> Self {
41 Self {
42 input: Some(PricePoint::new(input)),
43 output: Some(PricePoint::new(output)),
44 cached_input: None,
45 notes: None,
46 }
47 }
48
49 #[must_use]
50 pub const fn flat_with_cached(input: f64, output: f64, cached_input: f64) -> Self {
51 Self {
52 input: Some(PricePoint::new(input)),
53 output: Some(PricePoint::new(output)),
54 cached_input: Some(PricePoint::new(cached_input)),
55 notes: None,
56 }
57 }
58
59 #[must_use]
60 pub const fn with_notes(mut self, notes: &'static str) -> Self {
61 self.notes = Some(notes);
62 self
63 }
64
65 #[must_use]
66 pub fn estimate_cost_usd(&self, usage: &Usage) -> Option<f64> {
67 let cached_input_tokens = usage.cached_input_tokens.min(usage.input_tokens);
68 let uncached_input_tokens = usage.input_tokens.saturating_sub(cached_input_tokens);
69
70 let input = match (self.input, self.cached_input) {
71 (Some(input), Some(cached_input)) => Some(
72 input.estimate_cost_usd(uncached_input_tokens)
73 + cached_input.estimate_cost_usd(cached_input_tokens),
74 ),
75 (Some(input), None) => Some(input.estimate_cost_usd(usage.input_tokens)),
76 (None, Some(cached_input)) => Some(cached_input.estimate_cost_usd(cached_input_tokens)),
77 (None, None) => None,
78 };
79 let output = self
80 .output
81 .map(|p| p.estimate_cost_usd(usage.output_tokens));
82 match (input, output) {
83 (Some(input), Some(output)) => Some(input + output),
84 (Some(input), None) => Some(input),
85 (None, Some(output)) => Some(output),
86 (None, None) => None,
87 }
88 }
89}
90
91#[derive(Debug, Clone, Copy, PartialEq)]
92pub struct ModelCapabilities {
93 pub provider: &'static str,
94 pub model_id: &'static str,
95 pub context_window: Option<u32>,
96 pub max_output_tokens: Option<u32>,
97 pub pricing: Option<Pricing>,
98 pub supports_thinking: bool,
99 pub supports_adaptive_thinking: bool,
100 pub source_url: &'static str,
101 pub source_status: SourceStatus,
102 pub notes: Option<&'static str>,
103}
104
105impl ModelCapabilities {
106 #[must_use]
107 pub fn estimate_cost_usd(&self, usage: &Usage) -> Option<f64> {
108 self.pricing
109 .as_ref()
110 .and_then(|p| p.estimate_cost_usd(usage))
111 }
112}
113
114const ANTHROPIC_MODELS_URL: &str =
115 "https://docs.anthropic.com/en/docs/about-claude/models/all-models";
116const OPENAI_MODELS_URL: &str = "https://developers.openai.com/api/docs/models";
117const OPENAI_PRICING_URL: &str = "https://developers.openai.com/api/docs/pricing";
118const OPENAI_GPT54_URL: &str = "https://developers.openai.com/api/docs/models/gpt-5.4";
119const OPENAI_GPT53_CODEX_URL: &str = "https://developers.openai.com/api/docs/models/gpt-5.3-codex";
120const GOOGLE_MODELS_URL: &str = "https://ai.google.dev/gemini-api/docs/models";
121const GOOGLE_PRICING_URL: &str = "https://ai.google.dev/gemini-api/docs/pricing";
122
123const OPENROUTER_GLM51_URL: &str = "https://openrouter.ai/z-ai/glm-5.1";
126const ZAI_GLM5_PRICING_URL: &str = "https://docs.z.ai/guides/overview/pricing";
127const OPENROUTER_KIMI_K26_URL: &str = "https://openrouter.ai/moonshotai/kimi-k2.6";
128const OPENROUTER_KIMI_K25_URL: &str = "https://openrouter.ai/moonshotai/kimi-k2.5";
129const KIMI_K25_AA_URL: &str = "https://artificialanalysis.ai/models/kimi-k2-5";
130const OPENROUTER_KIMI_K2_THINKING_URL: &str = "https://openrouter.ai/moonshotai/kimi-k2-thinking";
131const OPENROUTER_DEEPSEEK_V4_PRO_URL: &str = "https://openrouter.ai/deepseek/deepseek-v4-pro";
132const OPENROUTER_DEEPSEEK_V4_FLASH_URL: &str = "https://openrouter.ai/deepseek/deepseek-v4-flash";
133const DEEPSEEK_PRICING_URL: &str = "https://api-docs.deepseek.com/quick_start/pricing";
134const MINIMAX_PRICING_URL: &str = "https://platform.minimax.io/docs/guides/pricing-paygo";
135const OPENROUTER_MINIMAX_M25_URL: &str = "https://openrouter.ai/minimax/minimax-m2.5";
136
137const MODEL_CAPABILITIES: &[ModelCapabilities] = &[
138 ModelCapabilities {
140 provider: "anthropic",
141 model_id: "claude-fable-5",
142 context_window: Some(1_000_000),
143 max_output_tokens: Some(128_000),
144 pricing: Some(Pricing::flat(10.0, 50.0).with_notes("Anthropic Fable 5 official pricing: $10 input / $50 output per 1M tokens.")),
145 supports_thinking: true,
146 supports_adaptive_thinking: true,
147 source_url: ANTHROPIC_MODELS_URL,
148 source_status: SourceStatus::Official,
149 notes: Some("Fable 5 is adaptive-only: adaptive thinking is always on (applies even when `thinking` is unset) and `ThinkingMode::Enabled { budget_tokens }` is rejected by the Anthropic API. The SDK fails fast in validate_thinking_config. Raw chain of thought is never returned — thinking blocks arrive empty (the SDK requests thinking display=omitted). Safety classifiers may decline a request with stop_reason=refusal on an HTTP 200."),
150 },
151 ModelCapabilities {
152 provider: "anthropic",
153 model_id: "claude-opus-4-8",
154 context_window: Some(1_000_000),
155 max_output_tokens: Some(128_000),
156 pricing: Some(Pricing::flat(5.0, 25.0).with_notes("Anthropic Opus 4.8 pricing matches the Opus 4.6 tier ($5/$25 per 1M); verify exact current SKU mapping before billing-critical use.")),
157 supports_thinking: true,
158 supports_adaptive_thinking: true,
159 source_url: ANTHROPIC_MODELS_URL,
160 source_status: SourceStatus::Derived,
161 notes: Some("Opus 4.8 requires adaptive thinking — `ThinkingMode::Enabled { budget_tokens }` is rejected by the Anthropic API. The SDK fails fast in validate_thinking_config."),
162 },
163 ModelCapabilities {
164 provider: "anthropic",
165 model_id: "claude-opus-4-7",
166 context_window: Some(1_000_000),
167 max_output_tokens: Some(128_000),
168 pricing: Some(Pricing::flat(5.0, 25.0).with_notes("Anthropic Opus 4.7 pricing matches the Opus 4.6 tier ($5/$25 per 1M); verify exact current SKU mapping before billing-critical use.")),
169 supports_thinking: true,
170 supports_adaptive_thinking: true,
171 source_url: ANTHROPIC_MODELS_URL,
172 source_status: SourceStatus::Derived,
173 notes: Some("Opus 4.7 requires adaptive thinking — `ThinkingMode::Enabled { budget_tokens }` is rejected by the Anthropic API. The SDK fails fast in validate_thinking_config."),
174 },
175 ModelCapabilities {
176 provider: "anthropic",
177 model_id: "claude-opus-4-6",
178 context_window: Some(1_000_000),
179 max_output_tokens: Some(128_000),
180 pricing: Some(Pricing::flat(5.0, 25.0).with_notes("Anthropic Opus 4.6 pricing from bundled Claude API guidance; verify exact current SKU mapping before billing-critical use.")),
181 supports_thinking: true,
182 supports_adaptive_thinking: true,
183 source_url: ANTHROPIC_MODELS_URL,
184 source_status: SourceStatus::Derived,
185 notes: Some("Current Anthropic docs show this model alongside 200K/128K markers."),
186 },
187 ModelCapabilities {
188 provider: "anthropic",
189 model_id: "claude-sonnet-4-6",
190 context_window: Some(1_000_000),
191 max_output_tokens: Some(64_000),
192 pricing: Some(Pricing::flat(3.0, 15.0).with_notes("Anthropic Sonnet tier pricing; verify exact current SKU mapping before billing-critical use.")),
193 supports_thinking: true,
194 supports_adaptive_thinking: true,
195 source_url: ANTHROPIC_MODELS_URL,
196 source_status: SourceStatus::Derived,
197 notes: Some("Anthropic docs list Sonnet 4.6; user confirmed adaptive thinking support."),
198 },
199 ModelCapabilities {
200 provider: "anthropic",
201 model_id: "claude-sonnet-4-5-20250929",
202 context_window: Some(200_000),
203 max_output_tokens: Some(64_000),
204 pricing: Some(Pricing::flat(3.0, 15.0).with_notes("Anthropic Sonnet tier pricing; verify exact current SKU mapping before billing-critical use.")),
205 supports_thinking: true,
206 supports_adaptive_thinking: false,
207 source_url: ANTHROPIC_MODELS_URL,
208 source_status: SourceStatus::Derived,
209 notes: None,
210 },
211 ModelCapabilities {
212 provider: "anthropic",
213 model_id: "claude-haiku-4-5-20251001",
214 context_window: Some(200_000),
215 max_output_tokens: Some(64_000),
216 pricing: Some(Pricing::flat(1.0, 5.0).with_notes("Anthropic Haiku tier pricing; verify exact current SKU mapping before billing-critical use.")),
217 supports_thinking: true,
218 supports_adaptive_thinking: false,
219 source_url: ANTHROPIC_MODELS_URL,
220 source_status: SourceStatus::Derived,
221 notes: None,
222 },
223 ModelCapabilities {
224 provider: "anthropic",
225 model_id: "claude-sonnet-4-20250514",
226 context_window: Some(200_000),
227 max_output_tokens: Some(64_000),
228 pricing: Some(Pricing::flat(3.0, 15.0).with_notes("Anthropic Sonnet tier pricing; verify exact current SKU mapping before billing-critical use.")),
229 supports_thinking: true,
230 supports_adaptive_thinking: false,
231 source_url: ANTHROPIC_MODELS_URL,
232 source_status: SourceStatus::Derived,
233 notes: None,
234 },
235 ModelCapabilities {
236 provider: "anthropic",
237 model_id: "claude-opus-4-20250514",
238 context_window: Some(200_000),
239 max_output_tokens: Some(32_000),
240 pricing: Some(Pricing::flat(15.0, 75.0).with_notes("Anthropic Opus tier pricing; verify exact current SKU mapping before billing-critical use.")),
241 supports_thinking: true,
242 supports_adaptive_thinking: false,
243 source_url: ANTHROPIC_MODELS_URL,
244 source_status: SourceStatus::Derived,
245 notes: None,
246 },
247 ModelCapabilities {
248 provider: "anthropic",
249 model_id: "claude-3-5-sonnet-20241022",
250 context_window: Some(200_000),
251 max_output_tokens: Some(8_192),
252 pricing: Some(Pricing::flat(3.0, 15.0).with_notes("Anthropic Sonnet tier pricing; verify exact current SKU mapping before billing-critical use.")),
253 supports_thinking: true,
254 supports_adaptive_thinking: false,
255 source_url: ANTHROPIC_MODELS_URL,
256 source_status: SourceStatus::Derived,
257 notes: None,
258 },
259 ModelCapabilities {
260 provider: "anthropic",
261 model_id: "claude-3-5-haiku-20241022",
262 context_window: Some(200_000),
263 max_output_tokens: Some(8_192),
264 pricing: Some(Pricing::flat(1.0, 5.0).with_notes("Anthropic Haiku tier pricing; verify exact current SKU mapping before billing-critical use.")),
265 supports_thinking: true,
266 supports_adaptive_thinking: false,
267 source_url: ANTHROPIC_MODELS_URL,
268 source_status: SourceStatus::Derived,
269 notes: None,
270 },
271 ModelCapabilities {
273 provider: "openai",
274 model_id: "gpt-5.4",
275 context_window: Some(1_050_000),
276 max_output_tokens: Some(128_000),
277 pricing: Some(Pricing::flat_with_cached(2.50, 15.0, 0.25)),
278 supports_thinking: true,
279 supports_adaptive_thinking: false,
280 source_url: OPENAI_GPT54_URL,
281 source_status: SourceStatus::Official,
282 notes: Some("OpenAI model docs list 1.05M context, 128K max output, and reasoning.effort support."),
283 },
284 ModelCapabilities {
285 provider: "openai",
286 model_id: "gpt-5.3-codex",
287 context_window: Some(400_000),
288 max_output_tokens: Some(120_000),
289 pricing: Some(Pricing::flat_with_cached(1.50, 6.0, 0.375)),
290 supports_thinking: true,
291 supports_adaptive_thinking: false,
292 source_url: OPENAI_GPT53_CODEX_URL,
293 source_status: SourceStatus::Official,
294 notes: Some("OpenAI model docs list Chat Completions and Responses API support plus reasoning.effort levels."),
295 },
296 ModelCapabilities {
297 provider: "openai",
298 model_id: "gpt-5",
299 context_window: Some(400_000),
300 max_output_tokens: Some(128_000),
301 pricing: Some(Pricing::flat_with_cached(1.25, 10.0, 0.125)),
302 supports_thinking: false,
303 supports_adaptive_thinking: false,
304 source_url: OPENAI_PRICING_URL,
305 source_status: SourceStatus::Official,
306 notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
307 },
308 ModelCapabilities {
309 provider: "openai",
310 model_id: "gpt-5-mini",
311 context_window: Some(400_000),
312 max_output_tokens: Some(128_000),
313 pricing: Some(Pricing::flat_with_cached(0.125, 1.0, 0.0125)),
314 supports_thinking: false,
315 supports_adaptive_thinking: false,
316 source_url: OPENAI_PRICING_URL,
317 source_status: SourceStatus::Official,
318 notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
319 },
320 ModelCapabilities {
321 provider: "openai",
322 model_id: "gpt-5-nano",
323 context_window: Some(400_000),
324 max_output_tokens: Some(128_000),
325 pricing: Some(Pricing::flat_with_cached(0.025, 0.20, 0.0025)),
326 supports_thinking: false,
327 supports_adaptive_thinking: false,
328 source_url: OPENAI_PRICING_URL,
329 source_status: SourceStatus::Official,
330 notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
331 },
332 ModelCapabilities {
333 provider: "openai",
334 model_id: "gpt-5.2-instant",
335 context_window: Some(400_000),
336 max_output_tokens: Some(128_000),
337 pricing: None,
338 supports_thinking: false,
339 supports_adaptive_thinking: false,
340 source_url: OPENAI_MODELS_URL,
341 source_status: SourceStatus::Unverified,
342 notes: Some("Model exists in OpenAI docs, but pricing was not extracted from the official pricing page in this pass."),
343 },
344 ModelCapabilities {
345 provider: "openai",
346 model_id: "gpt-5.2-thinking",
347 context_window: Some(400_000),
348 max_output_tokens: Some(128_000),
349 pricing: None,
350 supports_thinking: true,
351 supports_adaptive_thinking: false,
352 source_url: OPENAI_MODELS_URL,
353 source_status: SourceStatus::Unverified,
354 notes: Some("Model exists in OpenAI docs, but pricing was not extracted from the official pricing page in this pass."),
355 },
356 ModelCapabilities {
357 provider: "openai",
358 model_id: "gpt-5.2-pro",
359 context_window: Some(400_000),
360 max_output_tokens: Some(128_000),
361 pricing: Some(Pricing::flat(10.50, 84.0)),
362 supports_thinking: false,
363 supports_adaptive_thinking: false,
364 source_url: OPENAI_PRICING_URL,
365 source_status: SourceStatus::Official,
366 notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
367 },
368 ModelCapabilities {
369 provider: "openai",
370 model_id: "gpt-5.2-codex",
371 context_window: Some(400_000),
372 max_output_tokens: Some(128_000),
373 pricing: None,
374 supports_thinking: false,
375 supports_adaptive_thinking: false,
376 source_url: OPENAI_MODELS_URL,
377 source_status: SourceStatus::Unverified,
378 notes: Some("Model presence confirmed from OpenAI docs; pricing not yet extracted in this pass."),
379 },
380 ModelCapabilities {
381 provider: "openai",
382 model_id: "o3",
383 context_window: Some(200_000),
384 max_output_tokens: Some(100_000),
385 pricing: Some(Pricing::flat(1.0, 4.0)),
386 supports_thinking: true,
387 supports_adaptive_thinking: false,
388 source_url: OPENAI_PRICING_URL,
389 source_status: SourceStatus::Official,
390 notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
391 },
392 ModelCapabilities {
393 provider: "openai",
394 model_id: "o3-mini",
395 context_window: Some(200_000),
396 max_output_tokens: Some(100_000),
397 pricing: Some(Pricing::flat(0.55, 2.20)),
398 supports_thinking: true,
399 supports_adaptive_thinking: false,
400 source_url: OPENAI_PRICING_URL,
401 source_status: SourceStatus::Official,
402 notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
403 },
404 ModelCapabilities {
405 provider: "openai",
406 model_id: "o4-mini",
407 context_window: Some(200_000),
408 max_output_tokens: Some(100_000),
409 pricing: Some(Pricing::flat(0.55, 2.20)),
410 supports_thinking: true,
411 supports_adaptive_thinking: false,
412 source_url: OPENAI_PRICING_URL,
413 source_status: SourceStatus::Official,
414 notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
415 },
416 ModelCapabilities {
417 provider: "openai",
418 model_id: "o1",
419 context_window: Some(200_000),
420 max_output_tokens: Some(100_000),
421 pricing: Some(Pricing::flat(7.50, 30.0)),
422 supports_thinking: true,
423 supports_adaptive_thinking: false,
424 source_url: OPENAI_PRICING_URL,
425 source_status: SourceStatus::Official,
426 notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
427 },
428 ModelCapabilities {
429 provider: "openai",
430 model_id: "o1-mini",
431 context_window: Some(200_000),
432 max_output_tokens: Some(100_000),
433 pricing: Some(Pricing::flat(0.55, 2.20)),
434 supports_thinking: true,
435 supports_adaptive_thinking: false,
436 source_url: OPENAI_PRICING_URL,
437 source_status: SourceStatus::Official,
438 notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
439 },
440 ModelCapabilities {
441 provider: "openai",
442 model_id: "gpt-4.1",
443 context_window: Some(1_000_000),
444 max_output_tokens: Some(16_384),
445 pricing: Some(Pricing::flat(1.0, 4.0)),
446 supports_thinking: false,
447 supports_adaptive_thinking: false,
448 source_url: OPENAI_PRICING_URL,
449 source_status: SourceStatus::Official,
450 notes: Some("Pricing verified from OpenAI pricing page. Context window from model family docs/notes."),
451 },
452 ModelCapabilities {
453 provider: "openai",
454 model_id: "gpt-4.1-mini",
455 context_window: Some(1_000_000),
456 max_output_tokens: Some(16_384),
457 pricing: Some(Pricing::flat(0.20, 0.80)),
458 supports_thinking: false,
459 supports_adaptive_thinking: false,
460 source_url: OPENAI_PRICING_URL,
461 source_status: SourceStatus::Official,
462 notes: Some("Pricing verified from OpenAI pricing page. Context window from model family docs/notes."),
463 },
464 ModelCapabilities {
465 provider: "openai",
466 model_id: "gpt-4.1-nano",
467 context_window: Some(1_000_000),
468 max_output_tokens: Some(16_384),
469 pricing: Some(Pricing::flat(0.05, 0.20)),
470 supports_thinking: false,
471 supports_adaptive_thinking: false,
472 source_url: OPENAI_PRICING_URL,
473 source_status: SourceStatus::Official,
474 notes: Some("Pricing verified from OpenAI pricing page. Context window from model family docs/notes."),
475 },
476 ModelCapabilities {
477 provider: "openai",
478 model_id: "gpt-4o",
479 context_window: Some(128_000),
480 max_output_tokens: Some(16_384),
481 pricing: Some(Pricing::flat(1.25, 5.0)),
482 supports_thinking: false,
483 supports_adaptive_thinking: false,
484 source_url: OPENAI_PRICING_URL,
485 source_status: SourceStatus::Official,
486 notes: Some("Pricing verified from OpenAI pricing page. Context/max output from existing runtime assumptions."),
487 },
488 ModelCapabilities {
489 provider: "openai",
490 model_id: "gpt-4o-mini",
491 context_window: Some(128_000),
492 max_output_tokens: Some(16_384),
493 pricing: Some(Pricing::flat(0.075, 0.30)),
494 supports_thinking: false,
495 supports_adaptive_thinking: false,
496 source_url: OPENAI_PRICING_URL,
497 source_status: SourceStatus::Official,
498 notes: Some("Pricing verified from OpenAI pricing page. Context/max output from existing runtime assumptions."),
499 },
500 ModelCapabilities {
502 provider: "gemini",
503 model_id: "gemini-3.1-pro-preview",
504 context_window: Some(1_048_576),
505 max_output_tokens: Some(65_536),
506 pricing: Some(Pricing::flat(2.0, 12.0).with_notes("Official pricing for prompts <= 200K tokens. For prompts > 200K, pricing increases to $4 input / $18 output per 1M tokens.")),
507 supports_thinking: true,
508 supports_adaptive_thinking: false,
509 source_url: GOOGLE_PRICING_URL,
510 source_status: SourceStatus::Official,
511 notes: Some("Pricing sourced from Gemini 3.1 Pro Preview docs."),
512 },
513 ModelCapabilities {
514 provider: "gemini",
515 model_id: "gemini-3.1-pro",
516 context_window: Some(1_048_576),
517 max_output_tokens: Some(65_536),
518 pricing: Some(Pricing::flat(2.0, 12.0).with_notes("Legacy alias retained for compatibility. For prompts > 200K, pricing increases to $4 input / $18 output per 1M tokens.")),
519 supports_thinking: true,
520 supports_adaptive_thinking: false,
521 source_url: GOOGLE_PRICING_URL,
522 source_status: SourceStatus::Derived,
523 notes: Some("Legacy Gemini 3.1 Pro alias retained for compatibility; prefer gemini-3.1-pro-preview."),
524 },
525 ModelCapabilities {
526 provider: "gemini",
527 model_id: "gemini-3.1-flash-lite-preview",
528 context_window: Some(1_048_576),
529 max_output_tokens: Some(65_536),
530 pricing: None,
531 supports_thinking: true,
532 supports_adaptive_thinking: false,
533 source_url: GOOGLE_MODELS_URL,
534 source_status: SourceStatus::Unverified,
535 notes: Some("Model presence confirmed from Google docs, but pricing was not extracted in this pass."),
536 },
537 ModelCapabilities {
538 provider: "gemini",
539 model_id: "gemini-3-flash-preview",
540 context_window: Some(1_048_576),
541 max_output_tokens: Some(65_536),
542 pricing: None,
543 supports_thinking: true,
544 supports_adaptive_thinking: false,
545 source_url: GOOGLE_MODELS_URL,
546 source_status: SourceStatus::Unverified,
547 notes: Some("Model presence confirmed from Google docs, but pricing was not extracted in this pass."),
548 },
549 ModelCapabilities {
550 provider: "gemini",
551 model_id: "gemini-3.0-flash",
552 context_window: Some(1_048_576),
553 max_output_tokens: Some(65_536),
554 pricing: None,
555 supports_thinking: true,
556 supports_adaptive_thinking: false,
557 source_url: GOOGLE_MODELS_URL,
558 source_status: SourceStatus::Derived,
559 notes: Some("Legacy Gemini 3.0 Flash model retained for compatibility; prefer gemini-3-flash-preview."),
560 },
561 ModelCapabilities {
562 provider: "gemini",
563 model_id: "gemini-3.0-pro",
564 context_window: Some(1_048_576),
565 max_output_tokens: Some(65_536),
566 pricing: None,
567 supports_thinking: true,
568 supports_adaptive_thinking: false,
569 source_url: GOOGLE_MODELS_URL,
570 source_status: SourceStatus::Unverified,
571 notes: Some("Model presence confirmed from Google docs, but pricing was not extracted in this pass."),
572 },
573 ModelCapabilities {
574 provider: "gemini",
575 model_id: "gemini-2.5-flash",
576 context_window: Some(1_000_000),
577 max_output_tokens: Some(65_536),
578 pricing: Some(Pricing::flat(0.30, 2.50).with_notes("Official text/image/video pricing. Audio input is priced separately at $1.00 / 1M tokens.")),
579 supports_thinking: true,
580 supports_adaptive_thinking: false,
581 source_url: GOOGLE_PRICING_URL,
582 source_status: SourceStatus::Official,
583 notes: Some("Official docs state output pricing includes thinking tokens."),
584 },
585 ModelCapabilities {
586 provider: "gemini",
587 model_id: "gemini-2.5-pro",
588 context_window: Some(1_000_000),
589 max_output_tokens: Some(65_536),
590 pricing: None,
591 supports_thinking: true,
592 supports_adaptive_thinking: false,
593 source_url: GOOGLE_MODELS_URL,
594 source_status: SourceStatus::Unverified,
595 notes: Some("Model presence confirmed from Google docs, but pricing was not extracted in this pass."),
596 },
597 ModelCapabilities {
598 provider: "gemini",
599 model_id: "gemini-2.0-flash",
600 context_window: Some(1_000_000),
601 max_output_tokens: Some(8_192),
602 pricing: Some(Pricing::flat(0.10, 0.40).with_notes("Official text/image/video pricing. Audio input is priced separately at $0.70 / 1M tokens.")),
603 supports_thinking: false,
604 supports_adaptive_thinking: false,
605 source_url: GOOGLE_PRICING_URL,
606 source_status: SourceStatus::Official,
607 notes: None,
608 },
609 ModelCapabilities {
610 provider: "gemini",
611 model_id: "gemini-2.0-flash-lite",
612 context_window: Some(1_000_000),
613 max_output_tokens: Some(8_192),
614 pricing: Some(Pricing::flat(0.075, 0.30)),
615 supports_thinking: false,
616 supports_adaptive_thinking: false,
617 source_url: GOOGLE_PRICING_URL,
618 source_status: SourceStatus::Official,
619 notes: None,
620 },
621 ModelCapabilities {
625 provider: "openai",
626 model_id: "z-ai/glm-5.1",
627 context_window: Some(202_752),
628 max_output_tokens: Some(131_072),
629 pricing: Some(Pricing::flat(0.98, 3.08).with_notes("OpenRouter rate for z-ai/glm-5.1: input $0.98/M, output $3.08/M.")),
630 supports_thinking: true,
631 supports_adaptive_thinking: false,
632 source_url: OPENROUTER_GLM51_URL,
633 source_status: SourceStatus::Derived,
634 notes: Some("GLM-5.1 (z.ai/Zhipu) via OpenRouter slug. Reasoning/thinking model; context 203K (=202,752). max_output 128K from z.ai GLM-5.1 docs, sized generously for hidden reasoning + answer. Released ~Apr 7, 2026."),
635 },
636 ModelCapabilities {
637 provider: "openai",
638 model_id: "glm-5",
639 context_window: Some(200_000),
640 max_output_tokens: Some(131_072),
641 pricing: Some(Pricing::flat(1.0, 3.2).with_notes("Native z.ai pricing: input $1.0/M, output $3.2/M (higher than the OpenRouter GLM-5 rate of $0.60/$1.92).")),
642 supports_thinking: true,
643 supports_adaptive_thinking: false,
644 source_url: ZAI_GLM5_PRICING_URL,
645 source_status: SourceStatus::Derived,
646 notes: Some("Native z.ai constructor model string `glm-5`. Reasoning/thinking model; 200K context, 128K (131072) max output per docs.z.ai/guides/llm/glm-5. Native pricing used for the native route. Released ~Feb 11, 2026."),
647 },
648 ModelCapabilities {
649 provider: "openai",
650 model_id: "moonshotai/kimi-k2.6",
651 context_window: Some(262_144),
652 max_output_tokens: Some(65_536),
653 pricing: Some(Pricing::flat(0.684, 3.42).with_notes("OpenRouter rate for moonshotai/kimi-k2.6: input $0.684/M, output $3.42/M.")),
654 supports_thinking: false,
655 supports_adaptive_thinking: false,
656 source_url: OPENROUTER_KIMI_K26_URL,
657 source_status: SourceStatus::Derived,
658 notes: Some("Exact OpenRouter slug (note the dot). Hybrid model marketed/used as a non-reasoning coding+multimodal model, so supports_thinking=false (use moonshotai/kimi-k2-thinking for the dedicated reasoning model). Context 262,144; 65536 is a generous app-side completion budget within the window."),
659 },
660 ModelCapabilities {
661 provider: "openai",
662 model_id: "moonshotai/kimi-k2.5",
663 context_window: Some(262_144),
664 max_output_tokens: Some(32_768),
665 pricing: Some(Pricing::flat(0.4, 1.9).with_notes("OpenRouter rate for moonshotai/kimi-k2.5: input $0.40/M, output $1.90/M.")),
666 supports_thinking: false,
667 supports_adaptive_thinking: false,
668 source_url: OPENROUTER_KIMI_K25_URL,
669 source_status: SourceStatus::Derived,
670 notes: Some("OpenRouter route for the model the native constructor names 'kimi-k2.5'. Treated as non-reasoning (visual-coding + agentic tool-calling) on OpenRouter. Context 262,144; 32768 is a generous app-side completion budget within the window."),
671 },
672 ModelCapabilities {
673 provider: "openai",
674 model_id: "kimi-k2.5",
675 context_window: Some(262_144),
676 max_output_tokens: Some(32_768),
677 pricing: Some(Pricing::flat(0.6, 3.0).with_notes("Native Moonshot estimate from Artificial Analysis (~$0.58 in / $3.00 out); input rounded up to $0.60 to stay conservative for budget reservation.")),
678 supports_thinking: false,
679 supports_adaptive_thinking: false,
680 source_url: KIMI_K25_AA_URL,
681 source_status: SourceStatus::Unverified,
682 notes: Some("Exact native model_id used by the native constructor (Moonshot platform.kimi.ai base_url). Native pricing not on the first-party table (only k2.6 is enumerated); figures derived from Artificial Analysis. Context 262,144; 32768 is a generous within-window completion budget."),
683 },
684 ModelCapabilities {
685 provider: "openai",
686 model_id: "kimi-k2-thinking",
687 context_window: Some(262_144),
688 max_output_tokens: Some(131_072),
689 pricing: Some(Pricing::flat(0.6, 2.5).with_notes("Cross-provider median for kimi-k2-thinking (OpenRouter/Artificial Analysis): input $0.60/M, output $2.50/M, used as a conservative native estimate.")),
690 supports_thinking: true,
691 supports_adaptive_thinking: false,
692 source_url: OPENROUTER_KIMI_K2_THINKING_URL,
693 source_status: SourceStatus::Unverified,
694 notes: Some("Exact native model_id used by the native constructor; a REASONING model (emits hidden chain-of-thought before the answer). Native Moonshot base_url. First-party pricing could not be isolated; figures are the cross-provider median. Context 262,144; max_output 131072 sized generously for reasoning tokens, within the window."),
695 },
696 ModelCapabilities {
697 provider: "openai",
698 model_id: "deepseek/deepseek-v4-pro",
699 context_window: Some(1_048_576),
700 max_output_tokens: Some(384_000),
701 pricing: Some(Pricing::flat(0.44, 0.87).with_notes("OpenRouter effective post-promo rate ($0.435 in rounded up to $0.44 / $0.87 out). Pre-promo regular rate was $1.74/$3.48.")),
702 supports_thinking: true,
703 supports_adaptive_thinking: false,
704 source_url: OPENROUTER_DEEPSEEK_V4_PRO_URL,
705 source_status: SourceStatus::Derived,
706 notes: Some("Primary model named in forge config; exact OpenRouter slug. Large MoE (1.6T total / 49B active), released 2026-04-24. Reasoning/thinking model; DeepSeek returns the answer in `content` and chain-of-thought in a separate `reasoning_content` field, which must be echoed back in subsequent thinking-mode turns or the API returns 400. Max output 384K (DeepSeek ceiling), sized generously for reasoning."),
707 },
708 ModelCapabilities {
709 provider: "openai",
710 model_id: "deepseek-v4-pro",
711 context_window: Some(1_048_576),
712 max_output_tokens: Some(384_000),
713 pricing: Some(Pricing::flat_with_cached(0.44, 0.87, 0.003_625).with_notes("Official DeepSeek pricing: input cache-MISS $0.435/M (rounded up to $0.44), cache-HIT $0.003625/M, output $0.87/M.")),
714 supports_thinking: true,
715 supports_adaptive_thinking: false,
716 source_url: DEEPSEEK_PRICING_URL,
717 source_status: SourceStatus::Derived,
718 notes: Some("Native DeepSeek API model id 'deepseek-v4-pro' (no vendor prefix). 1M context, 384K max output. Reasoning/thinking model; separate `reasoning_content` that must be echoed back in multi-turn thinking-mode requests or you get a 400. Legacy ids deepseek-reasoner/deepseek-chat now map to V4-FLASH, not Pro."),
719 },
720 ModelCapabilities {
721 provider: "openai",
722 model_id: "deepseek/deepseek-v4-flash",
723 context_window: Some(1_048_576),
724 max_output_tokens: Some(384_000),
725 pricing: Some(Pricing::flat(0.15, 0.28).with_notes("DeepSeek list rate rounded up ($0.14 in -> $0.15 / $0.28 out) used instead of OpenRouter's lower fluctuating effective rate so consumers never under-reserve budget.")),
726 supports_thinking: true,
727 supports_adaptive_thinking: false,
728 source_url: OPENROUTER_DEEPSEEK_V4_FLASH_URL,
729 source_status: SourceStatus::Derived,
730 notes: Some("Sibling V4 model (cheaper routing target). Efficiency MoE (284B total / 13B active), released 2026-04-24. Reasoning/thinking model with the same reasoning_content split + mandatory pass-back-or-400 behavior as V4 Pro. Max output 384K per DeepSeek docs."),
731 },
732 ModelCapabilities {
733 provider: "openai",
734 model_id: "deepseek-v4-flash",
735 context_window: Some(1_048_576),
736 max_output_tokens: Some(384_000),
737 pricing: Some(Pricing::flat_with_cached(0.14, 0.28, 0.002_8).with_notes("Official DeepSeek pricing: input cache-MISS $0.14/M, cache-HIT $0.0028/M, output $0.28/M.")),
738 supports_thinking: true,
739 supports_adaptive_thinking: false,
740 source_url: DEEPSEEK_PRICING_URL,
741 source_status: SourceStatus::Derived,
742 notes: Some("Native DeepSeek API model id 'deepseek-v4-flash'. 1M context, 384K max output. Reasoning/thinking model; same content/reasoning_content split and mandatory pass-back in thinking mode. Legacy aliases deepseek-chat/deepseek-reasoner now resolve to this Flash model."),
743 },
744 ModelCapabilities {
745 provider: "openai",
746 model_id: "MiniMax-M2.5",
747 context_window: Some(204_800),
748 max_output_tokens: Some(131_072),
749 pricing: Some(Pricing::flat_with_cached(0.3, 1.2, 0.03).with_notes("Native MiniMax first-party pricing: input $0.30/M, output $1.20/M, cache-read input $0.03/M (platform.minimax.io PAYG).")),
750 supports_thinking: true,
751 supports_adaptive_thinking: false,
752 source_url: MINIMAX_PRICING_URL,
753 source_status: SourceStatus::Derived,
754 notes: Some("Native agent-sdk constructor model string 'MiniMax-M2.5' (api.minimax.io, OpenAI-compatible). Reasoning/thinking model; emits chain-of-thought in <think>...</think> tags and supports interleaved thinking. Context 204,800; max_output 131072 sized generously for hidden reasoning + answer within the window."),
755 },
756 ModelCapabilities {
757 provider: "openai",
758 model_id: "minimax/minimax-m2.5",
759 context_window: Some(204_800),
760 max_output_tokens: Some(131_072),
761 pricing: Some(Pricing::flat(0.15, 1.15).with_notes("OpenRouter rate for minimax/minimax-m2.5: input $0.15/M, output $1.15/M (lower than MiniMax's $0.30/$1.20 first-party rate; OpenRouter prices can fluctuate, so reserve conservatively).")),
762 supports_thinking: true,
763 supports_adaptive_thinking: false,
764 source_url: OPENROUTER_MINIMAX_M25_URL,
765 source_status: SourceStatus::Derived,
766 notes: Some("OpenRouter slug 'minimax/minimax-m2.5' (same M2.5 weights as native). Reasoning/thinking model. Context 204,800; max_output 131072 sized generously for hidden reasoning tokens before the answer."),
767 },
768];
769
770#[must_use]
771pub fn get_model_capabilities(
772 provider: &str,
773 model_id: &str,
774) -> Option<&'static ModelCapabilities> {
775 MODEL_CAPABILITIES.iter().find(|caps| {
776 caps.provider.eq_ignore_ascii_case(provider) && caps.model_id.eq_ignore_ascii_case(model_id)
777 })
778}
779
780#[must_use]
781pub fn default_max_output_tokens(provider: &str, model_id: &str) -> Option<u32> {
782 get_model_capabilities(provider, model_id).and_then(|caps| caps.max_output_tokens)
783}
784
785#[must_use]
786pub const fn supported_model_capabilities() -> &'static [ModelCapabilities] {
787 MODEL_CAPABILITIES
788}
789
790#[cfg(test)]
791mod tests {
792 use super::*;
793
794 #[test]
795 fn test_lookup_anthropic_fable_5() -> anyhow::Result<()> {
796 use anyhow::Context;
797
798 let caps = get_model_capabilities("anthropic", "claude-fable-5")
799 .context("claude-fable-5 capabilities missing")?;
800 assert_eq!(caps.context_window, Some(1_000_000));
801 assert_eq!(caps.max_output_tokens, Some(128_000));
802 assert!(caps.supports_thinking);
803 assert!(caps.supports_adaptive_thinking);
804 assert_eq!(caps.source_status, SourceStatus::Official);
805 let pricing = caps.pricing.context("pricing missing")?;
806 let input = pricing.input.context("input price missing")?;
807 let output = pricing.output.context("output price missing")?;
808 assert!((input.usd_per_million_tokens - 10.0).abs() < f64::EPSILON);
809 assert!((output.usd_per_million_tokens - 50.0).abs() < f64::EPSILON);
810 Ok(())
811 }
812
813 #[test]
814 fn test_lookup_anthropic_opus_48() {
815 let caps = get_model_capabilities("anthropic", "claude-opus-4-8").unwrap();
816 assert_eq!(caps.context_window, Some(1_000_000));
817 assert_eq!(caps.max_output_tokens, Some(128_000));
818 assert!(caps.supports_thinking);
819 assert!(caps.supports_adaptive_thinking);
820 }
821
822 #[test]
823 fn test_lookup_anthropic_opus_46() {
824 let caps = get_model_capabilities("anthropic", "claude-opus-4-6").unwrap();
825 assert_eq!(caps.context_window, Some(1_000_000));
826 assert_eq!(caps.max_output_tokens, Some(128_000));
827 assert!(caps.supports_adaptive_thinking);
828 }
829
830 #[test]
831 fn test_lookup_anthropic_sonnet_46() {
832 let caps = get_model_capabilities("anthropic", "claude-sonnet-4-6").unwrap();
833 assert_eq!(caps.context_window, Some(1_000_000));
834 assert_eq!(caps.max_output_tokens, Some(64_000));
835 assert!(caps.supports_adaptive_thinking);
836 }
837
838 #[test]
839 fn test_lookup_anthropic_sonnet_45_disables_adaptive_thinking() {
840 let caps = get_model_capabilities("anthropic", "claude-sonnet-4-5-20250929").unwrap();
841 assert!(!caps.supports_adaptive_thinking);
842 }
843
844 #[test]
845 fn test_lookup_openai_pricing() {
846 let caps = get_model_capabilities("openai", "gpt-4o").unwrap();
847 let pricing = caps.pricing.unwrap();
848 assert!((pricing.input.unwrap().usd_per_million_tokens - 1.25).abs() < f64::EPSILON);
849 assert!((pricing.output.unwrap().usd_per_million_tokens - 5.0).abs() < f64::EPSILON);
850 }
851
852 #[test]
853 fn test_lookup_openai_gpt54() {
854 let caps = get_model_capabilities("openai", "gpt-5.4").unwrap();
855 assert_eq!(caps.context_window, Some(1_050_000));
856 assert_eq!(caps.max_output_tokens, Some(128_000));
857 assert!(caps.supports_thinking);
858 assert_eq!(caps.source_status, SourceStatus::Official);
859 }
860
861 #[test]
862 fn test_lookup_openai_gpt53_codex() {
863 let caps = get_model_capabilities("openai", "gpt-5.3-codex").unwrap();
864 assert_eq!(caps.context_window, Some(400_000));
865 assert_eq!(caps.max_output_tokens, Some(120_000));
866 assert!(caps.supports_thinking);
867 assert_eq!(caps.source_status, SourceStatus::Official);
868 }
869
870 #[test]
871 fn test_lookup_gemini_preview_models() {
872 let flash = get_model_capabilities("gemini", "gemini-3-flash-preview").unwrap();
873 assert_eq!(flash.context_window, Some(1_048_576));
874 assert!(flash.supports_thinking);
875
876 let pro = get_model_capabilities("gemini", "gemini-3.1-pro-preview").unwrap();
877 assert_eq!(pro.max_output_tokens, Some(65_536));
878 assert!(pro.supports_thinking);
879 }
880
881 #[test]
882 fn test_lookup_open_reasoning_models_resolve_with_thinking() {
883 let deepseek = get_model_capabilities("openai", "deepseek/deepseek-v4-pro").unwrap();
885 assert!(deepseek.supports_thinking);
886 assert_eq!(deepseek.max_output_tokens, Some(384_000));
887 let pricing = deepseek.pricing.unwrap();
888 assert!(pricing.input.unwrap().usd_per_million_tokens > 0.0);
889 assert!(pricing.output.unwrap().usd_per_million_tokens > 0.0);
890
891 let glm = get_model_capabilities("openai", "z-ai/glm-5.1").unwrap();
893 assert!(glm.supports_thinking);
894 assert_eq!(glm.max_output_tokens, Some(131_072));
895 let glm_pricing = glm.pricing.unwrap();
896 assert!((glm_pricing.input.unwrap().usd_per_million_tokens - 0.98).abs() < f64::EPSILON);
897 assert!((glm_pricing.output.unwrap().usd_per_million_tokens - 3.08).abs() < f64::EPSILON);
898
899 let kimi_thinking = get_model_capabilities("openai", "kimi-k2-thinking").unwrap();
901 assert!(kimi_thinking.supports_thinking);
902 assert_eq!(kimi_thinking.max_output_tokens, Some(131_072));
903 assert!(
904 kimi_thinking
905 .pricing
906 .unwrap()
907 .output
908 .unwrap()
909 .usd_per_million_tokens
910 > 0.0
911 );
912 }
913
914 #[test]
915 fn test_lookup_open_non_reasoning_kimi_models() {
916 let k26 = get_model_capabilities("openai", "moonshotai/kimi-k2.6").unwrap();
918 assert!(!k26.supports_thinking);
919 assert_eq!(k26.max_output_tokens, Some(65_536));
920 assert!(k26.pricing.unwrap().input.unwrap().usd_per_million_tokens > 0.0);
921
922 let k25_native = get_model_capabilities("openai", "kimi-k2.5").unwrap();
923 assert!(!k25_native.supports_thinking);
924 assert_eq!(k25_native.max_output_tokens, Some(32_768));
925 }
926
927 #[test]
928 fn test_lookup_all_open_models_resolve() {
929 for model_id in [
932 "z-ai/glm-5.1",
933 "glm-5",
934 "moonshotai/kimi-k2.6",
935 "moonshotai/kimi-k2.5",
936 "kimi-k2.5",
937 "kimi-k2-thinking",
938 "deepseek/deepseek-v4-pro",
939 "deepseek-v4-pro",
940 "deepseek/deepseek-v4-flash",
941 "deepseek-v4-flash",
942 "MiniMax-M2.5",
943 "minimax/minimax-m2.5",
944 ] {
945 let caps = get_model_capabilities("openai", model_id)
946 .unwrap_or_else(|| panic!("missing capabilities for {model_id}"));
947 assert!(
948 caps.pricing.is_some(),
949 "pricing should be populated for {model_id}"
950 );
951 assert!(
952 caps.max_output_tokens.is_some_and(|m| m > 0),
953 "max_output_tokens should be non-zero for {model_id}"
954 );
955 assert!(
956 caps.context_window.is_some_and(|c| c > 0),
957 "context_window should be non-zero for {model_id}"
958 );
959 }
960 }
961
962 #[test]
963 fn test_lookup_minimax_native_pricing() {
964 let native = get_model_capabilities("openai", "MiniMax-M2.5").unwrap();
965 assert!(native.supports_thinking);
966 let pricing = native.pricing.unwrap();
967 assert!((pricing.input.unwrap().usd_per_million_tokens - 0.3).abs() < f64::EPSILON);
968 assert!((pricing.output.unwrap().usd_per_million_tokens - 1.2).abs() < f64::EPSILON);
969 assert!((pricing.cached_input.unwrap().usd_per_million_tokens - 0.03).abs() < f64::EPSILON);
972 }
973
974 #[test]
975 fn test_estimate_cost_usd() {
976 let caps = get_model_capabilities("openai", "gpt-4o").unwrap();
977 let cost = caps
978 .estimate_cost_usd(&Usage {
979 input_tokens: 2_000,
980 output_tokens: 1_000,
981 cached_input_tokens: 0,
982 cache_creation_input_tokens: 0,
983 })
984 .unwrap();
985 assert!((cost - 0.0075).abs() < f64::EPSILON);
986 }
987
988 #[test]
989 fn test_estimate_cost_usd_with_cached_input() {
990 let caps = get_model_capabilities("openai", "gpt-5.4").unwrap();
991 let cost = caps
992 .estimate_cost_usd(&Usage {
993 input_tokens: 2_000,
994 output_tokens: 1_000,
995 cached_input_tokens: 1_000,
996 cache_creation_input_tokens: 0,
997 })
998 .unwrap();
999 assert!((cost - 0.01775).abs() < f64::EPSILON);
1000 }
1001}