Skip to main content

agent_sdk_providers/
model_capabilities.rs

1use agent_sdk_foundation::llm::Usage;
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4pub enum SourceStatus {
5    Official,
6    Derived,
7    Unverified,
8}
9
10#[derive(Debug, Clone, Copy, PartialEq)]
11pub struct PricePoint {
12    /// USD per 1M tokens.
13    pub usd_per_million_tokens: f64,
14}
15
16impl PricePoint {
17    #[must_use]
18    pub const fn new(usd_per_million_tokens: f64) -> Self {
19        Self {
20            usd_per_million_tokens,
21        }
22    }
23
24    #[must_use]
25    pub fn estimate_cost_usd(self, tokens: u32) -> f64 {
26        (f64::from(tokens) / 1_000_000.0) * self.usd_per_million_tokens
27    }
28}
29
30#[derive(Debug, Clone, Copy, PartialEq)]
31pub struct Pricing {
32    pub input: Option<PricePoint>,
33    pub output: Option<PricePoint>,
34    pub cached_input: Option<PricePoint>,
35    pub notes: Option<&'static str>,
36}
37
38impl Pricing {
39    #[must_use]
40    pub const fn flat(input: f64, output: f64) -> Self {
41        Self {
42            input: Some(PricePoint::new(input)),
43            output: Some(PricePoint::new(output)),
44            cached_input: None,
45            notes: None,
46        }
47    }
48
49    #[must_use]
50    pub const fn flat_with_cached(input: f64, output: f64, cached_input: f64) -> Self {
51        Self {
52            input: Some(PricePoint::new(input)),
53            output: Some(PricePoint::new(output)),
54            cached_input: Some(PricePoint::new(cached_input)),
55            notes: None,
56        }
57    }
58
59    #[must_use]
60    pub const fn with_notes(mut self, notes: &'static str) -> Self {
61        self.notes = Some(notes);
62        self
63    }
64
65    #[must_use]
66    pub fn estimate_cost_usd(&self, usage: &Usage) -> Option<f64> {
67        let cached_input_tokens = usage.cached_input_tokens.min(usage.input_tokens);
68        let uncached_input_tokens = usage.input_tokens.saturating_sub(cached_input_tokens);
69
70        let input = match (self.input, self.cached_input) {
71            (Some(input), Some(cached_input)) => Some(
72                input.estimate_cost_usd(uncached_input_tokens)
73                    + cached_input.estimate_cost_usd(cached_input_tokens),
74            ),
75            (Some(input), None) => Some(input.estimate_cost_usd(usage.input_tokens)),
76            (None, Some(cached_input)) => Some(cached_input.estimate_cost_usd(cached_input_tokens)),
77            (None, None) => None,
78        };
79        let output = self
80            .output
81            .map(|p| p.estimate_cost_usd(usage.output_tokens));
82        match (input, output) {
83            (Some(input), Some(output)) => Some(input + output),
84            (Some(input), None) => Some(input),
85            (None, Some(output)) => Some(output),
86            (None, None) => None,
87        }
88    }
89}
90
91#[derive(Debug, Clone, Copy, PartialEq)]
92pub struct ModelCapabilities {
93    pub provider: &'static str,
94    pub model_id: &'static str,
95    pub context_window: Option<u32>,
96    pub max_output_tokens: Option<u32>,
97    pub pricing: Option<Pricing>,
98    pub supports_thinking: bool,
99    pub supports_adaptive_thinking: bool,
100    pub source_url: &'static str,
101    pub source_status: SourceStatus,
102    pub notes: Option<&'static str>,
103}
104
105impl ModelCapabilities {
106    #[must_use]
107    pub fn estimate_cost_usd(&self, usage: &Usage) -> Option<f64> {
108        self.pricing
109            .as_ref()
110            .and_then(|p| p.estimate_cost_usd(usage))
111    }
112}
113
114const ANTHROPIC_MODELS_URL: &str =
115    "https://docs.anthropic.com/en/docs/about-claude/models/all-models";
116const OPENAI_MODELS_URL: &str = "https://developers.openai.com/api/docs/models";
117const OPENAI_PRICING_URL: &str = "https://developers.openai.com/api/docs/pricing";
118const OPENAI_GPT54_URL: &str = "https://developers.openai.com/api/docs/models/gpt-5.4";
119const OPENAI_GPT53_CODEX_URL: &str = "https://developers.openai.com/api/docs/models/gpt-5.3-codex";
120const GOOGLE_MODELS_URL: &str = "https://ai.google.dev/gemini-api/docs/models";
121const GOOGLE_PRICING_URL: &str = "https://ai.google.dev/gemini-api/docs/pricing";
122
123// Open-model routes. All reached through OpenAIProvider (provider()=="openai"),
124// whether via OpenRouter slugs or the native z.ai / Moonshot / MiniMax base URLs.
125const OPENROUTER_GLM51_URL: &str = "https://openrouter.ai/z-ai/glm-5.1";
126const ZAI_GLM5_PRICING_URL: &str = "https://docs.z.ai/guides/overview/pricing";
127const OPENROUTER_KIMI_K26_URL: &str = "https://openrouter.ai/moonshotai/kimi-k2.6";
128const OPENROUTER_KIMI_K25_URL: &str = "https://openrouter.ai/moonshotai/kimi-k2.5";
129const KIMI_K25_AA_URL: &str = "https://artificialanalysis.ai/models/kimi-k2-5";
130const OPENROUTER_KIMI_K2_THINKING_URL: &str = "https://openrouter.ai/moonshotai/kimi-k2-thinking";
131const OPENROUTER_DEEPSEEK_V4_PRO_URL: &str = "https://openrouter.ai/deepseek/deepseek-v4-pro";
132const OPENROUTER_DEEPSEEK_V4_FLASH_URL: &str = "https://openrouter.ai/deepseek/deepseek-v4-flash";
133const DEEPSEEK_PRICING_URL: &str = "https://api-docs.deepseek.com/quick_start/pricing";
134const MINIMAX_PRICING_URL: &str = "https://platform.minimax.io/docs/guides/pricing-paygo";
135const OPENROUTER_MINIMAX_M25_URL: &str = "https://openrouter.ai/minimax/minimax-m2.5";
136
137const MODEL_CAPABILITIES: &[ModelCapabilities] = &[
138    // Anthropic
139    ModelCapabilities {
140        provider: "anthropic",
141        model_id: "claude-fable-5",
142        context_window: Some(1_000_000),
143        max_output_tokens: Some(128_000),
144        pricing: Some(Pricing::flat(10.0, 50.0).with_notes("Anthropic Fable 5 official pricing: $10 input / $50 output per 1M tokens.")),
145        supports_thinking: true,
146        supports_adaptive_thinking: true,
147        source_url: ANTHROPIC_MODELS_URL,
148        source_status: SourceStatus::Official,
149        notes: Some("Fable 5 is adaptive-only: adaptive thinking is always on (applies even when `thinking` is unset) and `ThinkingMode::Enabled { budget_tokens }` is rejected by the Anthropic API. The SDK fails fast in validate_thinking_config. Raw chain of thought is never returned — thinking blocks arrive empty (the SDK requests thinking display=omitted). Safety classifiers may decline a request with stop_reason=refusal on an HTTP 200."),
150    },
151    ModelCapabilities {
152        provider: "anthropic",
153        model_id: "claude-opus-4-8",
154        context_window: Some(1_000_000),
155        max_output_tokens: Some(128_000),
156        pricing: Some(Pricing::flat(5.0, 25.0).with_notes("Anthropic Opus 4.8 pricing matches the Opus 4.6 tier ($5/$25 per 1M); verify exact current SKU mapping before billing-critical use.")),
157        supports_thinking: true,
158        supports_adaptive_thinking: true,
159        source_url: ANTHROPIC_MODELS_URL,
160        source_status: SourceStatus::Derived,
161        notes: Some("Opus 4.8 requires adaptive thinking — `ThinkingMode::Enabled { budget_tokens }` is rejected by the Anthropic API. The SDK fails fast in validate_thinking_config."),
162    },
163    ModelCapabilities {
164        provider: "anthropic",
165        model_id: "claude-opus-4-7",
166        context_window: Some(1_000_000),
167        max_output_tokens: Some(128_000),
168        pricing: Some(Pricing::flat(5.0, 25.0).with_notes("Anthropic Opus 4.7 pricing matches the Opus 4.6 tier ($5/$25 per 1M); verify exact current SKU mapping before billing-critical use.")),
169        supports_thinking: true,
170        supports_adaptive_thinking: true,
171        source_url: ANTHROPIC_MODELS_URL,
172        source_status: SourceStatus::Derived,
173        notes: Some("Opus 4.7 requires adaptive thinking — `ThinkingMode::Enabled { budget_tokens }` is rejected by the Anthropic API. The SDK fails fast in validate_thinking_config."),
174    },
175    ModelCapabilities {
176        provider: "anthropic",
177        model_id: "claude-opus-4-6",
178        context_window: Some(1_000_000),
179        max_output_tokens: Some(128_000),
180        pricing: Some(Pricing::flat(5.0, 25.0).with_notes("Anthropic Opus 4.6 pricing from bundled Claude API guidance; verify exact current SKU mapping before billing-critical use.")),
181        supports_thinking: true,
182        supports_adaptive_thinking: true,
183        source_url: ANTHROPIC_MODELS_URL,
184        source_status: SourceStatus::Derived,
185        notes: Some("Current Anthropic docs show this model alongside 200K/128K markers."),
186    },
187    ModelCapabilities {
188        provider: "anthropic",
189        model_id: "claude-sonnet-4-6",
190        context_window: Some(1_000_000),
191        max_output_tokens: Some(64_000),
192        pricing: Some(Pricing::flat(3.0, 15.0).with_notes("Anthropic Sonnet tier pricing; verify exact current SKU mapping before billing-critical use.")),
193        supports_thinking: true,
194        supports_adaptive_thinking: true,
195        source_url: ANTHROPIC_MODELS_URL,
196        source_status: SourceStatus::Derived,
197        notes: Some("Anthropic docs list Sonnet 4.6; user confirmed adaptive thinking support."),
198    },
199    ModelCapabilities {
200        provider: "anthropic",
201        model_id: "claude-sonnet-4-5-20250929",
202        context_window: Some(200_000),
203        max_output_tokens: Some(64_000),
204        pricing: Some(Pricing::flat(3.0, 15.0).with_notes("Anthropic Sonnet tier pricing; verify exact current SKU mapping before billing-critical use.")),
205        supports_thinking: true,
206        supports_adaptive_thinking: false,
207        source_url: ANTHROPIC_MODELS_URL,
208        source_status: SourceStatus::Derived,
209        notes: None,
210    },
211    ModelCapabilities {
212        provider: "anthropic",
213        model_id: "claude-haiku-4-5-20251001",
214        context_window: Some(200_000),
215        max_output_tokens: Some(64_000),
216        pricing: Some(Pricing::flat(1.0, 5.0).with_notes("Anthropic Haiku tier pricing; verify exact current SKU mapping before billing-critical use.")),
217        supports_thinking: true,
218        supports_adaptive_thinking: false,
219        source_url: ANTHROPIC_MODELS_URL,
220        source_status: SourceStatus::Derived,
221        notes: None,
222    },
223    ModelCapabilities {
224        provider: "anthropic",
225        model_id: "claude-sonnet-4-20250514",
226        context_window: Some(200_000),
227        max_output_tokens: Some(64_000),
228        pricing: Some(Pricing::flat(3.0, 15.0).with_notes("Anthropic Sonnet tier pricing; verify exact current SKU mapping before billing-critical use.")),
229        supports_thinking: true,
230        supports_adaptive_thinking: false,
231        source_url: ANTHROPIC_MODELS_URL,
232        source_status: SourceStatus::Derived,
233        notes: None,
234    },
235    ModelCapabilities {
236        provider: "anthropic",
237        model_id: "claude-opus-4-20250514",
238        context_window: Some(200_000),
239        max_output_tokens: Some(32_000),
240        pricing: Some(Pricing::flat(15.0, 75.0).with_notes("Anthropic Opus tier pricing; verify exact current SKU mapping before billing-critical use.")),
241        supports_thinking: true,
242        supports_adaptive_thinking: false,
243        source_url: ANTHROPIC_MODELS_URL,
244        source_status: SourceStatus::Derived,
245        notes: None,
246    },
247    ModelCapabilities {
248        provider: "anthropic",
249        model_id: "claude-3-5-sonnet-20241022",
250        context_window: Some(200_000),
251        max_output_tokens: Some(8_192),
252        pricing: Some(Pricing::flat(3.0, 15.0).with_notes("Anthropic Sonnet tier pricing; verify exact current SKU mapping before billing-critical use.")),
253        supports_thinking: true,
254        supports_adaptive_thinking: false,
255        source_url: ANTHROPIC_MODELS_URL,
256        source_status: SourceStatus::Derived,
257        notes: None,
258    },
259    ModelCapabilities {
260        provider: "anthropic",
261        model_id: "claude-3-5-haiku-20241022",
262        context_window: Some(200_000),
263        max_output_tokens: Some(8_192),
264        pricing: Some(Pricing::flat(1.0, 5.0).with_notes("Anthropic Haiku tier pricing; verify exact current SKU mapping before billing-critical use.")),
265        supports_thinking: true,
266        supports_adaptive_thinking: false,
267        source_url: ANTHROPIC_MODELS_URL,
268        source_status: SourceStatus::Derived,
269        notes: None,
270    },
271    // OpenAI
272    ModelCapabilities {
273        provider: "openai",
274        model_id: "gpt-5.4",
275        context_window: Some(1_050_000),
276        max_output_tokens: Some(128_000),
277        pricing: Some(Pricing::flat_with_cached(2.50, 15.0, 0.25)),
278        supports_thinking: true,
279        supports_adaptive_thinking: false,
280        source_url: OPENAI_GPT54_URL,
281        source_status: SourceStatus::Official,
282        notes: Some("OpenAI model docs list 1.05M context, 128K max output, and reasoning.effort support."),
283    },
284    ModelCapabilities {
285        provider: "openai",
286        model_id: "gpt-5.3-codex",
287        context_window: Some(400_000),
288        max_output_tokens: Some(120_000),
289        pricing: Some(Pricing::flat_with_cached(1.50, 6.0, 0.375)),
290        supports_thinking: true,
291        supports_adaptive_thinking: false,
292        source_url: OPENAI_GPT53_CODEX_URL,
293        source_status: SourceStatus::Official,
294        notes: Some("OpenAI model docs list Chat Completions and Responses API support plus reasoning.effort levels."),
295    },
296    ModelCapabilities {
297        provider: "openai",
298        model_id: "gpt-5",
299        context_window: Some(400_000),
300        max_output_tokens: Some(128_000),
301        pricing: Some(Pricing::flat_with_cached(1.25, 10.0, 0.125)),
302        supports_thinking: false,
303        supports_adaptive_thinking: false,
304        source_url: OPENAI_PRICING_URL,
305        source_status: SourceStatus::Official,
306        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
307    },
308    ModelCapabilities {
309        provider: "openai",
310        model_id: "gpt-5-mini",
311        context_window: Some(400_000),
312        max_output_tokens: Some(128_000),
313        pricing: Some(Pricing::flat_with_cached(0.125, 1.0, 0.0125)),
314        supports_thinking: false,
315        supports_adaptive_thinking: false,
316        source_url: OPENAI_PRICING_URL,
317        source_status: SourceStatus::Official,
318        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
319    },
320    ModelCapabilities {
321        provider: "openai",
322        model_id: "gpt-5-nano",
323        context_window: Some(400_000),
324        max_output_tokens: Some(128_000),
325        pricing: Some(Pricing::flat_with_cached(0.025, 0.20, 0.0025)),
326        supports_thinking: false,
327        supports_adaptive_thinking: false,
328        source_url: OPENAI_PRICING_URL,
329        source_status: SourceStatus::Official,
330        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
331    },
332    ModelCapabilities {
333        provider: "openai",
334        model_id: "gpt-5.2-instant",
335        context_window: Some(400_000),
336        max_output_tokens: Some(128_000),
337        pricing: None,
338        supports_thinking: false,
339        supports_adaptive_thinking: false,
340        source_url: OPENAI_MODELS_URL,
341        source_status: SourceStatus::Unverified,
342        notes: Some("Model exists in OpenAI docs, but pricing was not extracted from the official pricing page in this pass."),
343    },
344    ModelCapabilities {
345        provider: "openai",
346        model_id: "gpt-5.2-thinking",
347        context_window: Some(400_000),
348        max_output_tokens: Some(128_000),
349        pricing: None,
350        supports_thinking: true,
351        supports_adaptive_thinking: false,
352        source_url: OPENAI_MODELS_URL,
353        source_status: SourceStatus::Unverified,
354        notes: Some("Model exists in OpenAI docs, but pricing was not extracted from the official pricing page in this pass."),
355    },
356    ModelCapabilities {
357        provider: "openai",
358        model_id: "gpt-5.2-pro",
359        context_window: Some(400_000),
360        max_output_tokens: Some(128_000),
361        pricing: Some(Pricing::flat(10.50, 84.0)),
362        supports_thinking: false,
363        supports_adaptive_thinking: false,
364        source_url: OPENAI_PRICING_URL,
365        source_status: SourceStatus::Official,
366        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
367    },
368    ModelCapabilities {
369        provider: "openai",
370        model_id: "gpt-5.2-codex",
371        context_window: Some(400_000),
372        max_output_tokens: Some(128_000),
373        pricing: None,
374        supports_thinking: false,
375        supports_adaptive_thinking: false,
376        source_url: OPENAI_MODELS_URL,
377        source_status: SourceStatus::Unverified,
378        notes: Some("Model presence confirmed from OpenAI docs; pricing not yet extracted in this pass."),
379    },
380    ModelCapabilities {
381        provider: "openai",
382        model_id: "o3",
383        context_window: Some(200_000),
384        max_output_tokens: Some(100_000),
385        pricing: Some(Pricing::flat(1.0, 4.0)),
386        supports_thinking: true,
387        supports_adaptive_thinking: false,
388        source_url: OPENAI_PRICING_URL,
389        source_status: SourceStatus::Official,
390        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
391    },
392    ModelCapabilities {
393        provider: "openai",
394        model_id: "o3-mini",
395        context_window: Some(200_000),
396        max_output_tokens: Some(100_000),
397        pricing: Some(Pricing::flat(0.55, 2.20)),
398        supports_thinking: true,
399        supports_adaptive_thinking: false,
400        source_url: OPENAI_PRICING_URL,
401        source_status: SourceStatus::Official,
402        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
403    },
404    ModelCapabilities {
405        provider: "openai",
406        model_id: "o4-mini",
407        context_window: Some(200_000),
408        max_output_tokens: Some(100_000),
409        pricing: Some(Pricing::flat(0.55, 2.20)),
410        supports_thinking: true,
411        supports_adaptive_thinking: false,
412        source_url: OPENAI_PRICING_URL,
413        source_status: SourceStatus::Official,
414        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
415    },
416    ModelCapabilities {
417        provider: "openai",
418        model_id: "o1",
419        context_window: Some(200_000),
420        max_output_tokens: Some(100_000),
421        pricing: Some(Pricing::flat(7.50, 30.0)),
422        supports_thinking: true,
423        supports_adaptive_thinking: false,
424        source_url: OPENAI_PRICING_URL,
425        source_status: SourceStatus::Official,
426        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
427    },
428    ModelCapabilities {
429        provider: "openai",
430        model_id: "o1-mini",
431        context_window: Some(200_000),
432        max_output_tokens: Some(100_000),
433        pricing: Some(Pricing::flat(0.55, 2.20)),
434        supports_thinking: true,
435        supports_adaptive_thinking: false,
436        source_url: OPENAI_PRICING_URL,
437        source_status: SourceStatus::Official,
438        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
439    },
440    ModelCapabilities {
441        provider: "openai",
442        model_id: "gpt-4.1",
443        context_window: Some(1_000_000),
444        max_output_tokens: Some(16_384),
445        pricing: Some(Pricing::flat(1.0, 4.0)),
446        supports_thinking: false,
447        supports_adaptive_thinking: false,
448        source_url: OPENAI_PRICING_URL,
449        source_status: SourceStatus::Official,
450        notes: Some("Pricing verified from OpenAI pricing page. Context window from model family docs/notes."),
451    },
452    ModelCapabilities {
453        provider: "openai",
454        model_id: "gpt-4.1-mini",
455        context_window: Some(1_000_000),
456        max_output_tokens: Some(16_384),
457        pricing: Some(Pricing::flat(0.20, 0.80)),
458        supports_thinking: false,
459        supports_adaptive_thinking: false,
460        source_url: OPENAI_PRICING_URL,
461        source_status: SourceStatus::Official,
462        notes: Some("Pricing verified from OpenAI pricing page. Context window from model family docs/notes."),
463    },
464    ModelCapabilities {
465        provider: "openai",
466        model_id: "gpt-4.1-nano",
467        context_window: Some(1_000_000),
468        max_output_tokens: Some(16_384),
469        pricing: Some(Pricing::flat(0.05, 0.20)),
470        supports_thinking: false,
471        supports_adaptive_thinking: false,
472        source_url: OPENAI_PRICING_URL,
473        source_status: SourceStatus::Official,
474        notes: Some("Pricing verified from OpenAI pricing page. Context window from model family docs/notes."),
475    },
476    ModelCapabilities {
477        provider: "openai",
478        model_id: "gpt-4o",
479        context_window: Some(128_000),
480        max_output_tokens: Some(16_384),
481        pricing: Some(Pricing::flat(1.25, 5.0)),
482        supports_thinking: false,
483        supports_adaptive_thinking: false,
484        source_url: OPENAI_PRICING_URL,
485        source_status: SourceStatus::Official,
486        notes: Some("Pricing verified from OpenAI pricing page. Context/max output from existing runtime assumptions."),
487    },
488    ModelCapabilities {
489        provider: "openai",
490        model_id: "gpt-4o-mini",
491        context_window: Some(128_000),
492        max_output_tokens: Some(16_384),
493        pricing: Some(Pricing::flat(0.075, 0.30)),
494        supports_thinking: false,
495        supports_adaptive_thinking: false,
496        source_url: OPENAI_PRICING_URL,
497        source_status: SourceStatus::Official,
498        notes: Some("Pricing verified from OpenAI pricing page. Context/max output from existing runtime assumptions."),
499    },
500    // Gemini
501    ModelCapabilities {
502        provider: "gemini",
503        model_id: "gemini-3.1-pro-preview",
504        context_window: Some(1_048_576),
505        max_output_tokens: Some(65_536),
506        pricing: Some(Pricing::flat(2.0, 12.0).with_notes("Official pricing for prompts <= 200K tokens. For prompts > 200K, pricing increases to $4 input / $18 output per 1M tokens.")),
507        supports_thinking: true,
508        supports_adaptive_thinking: false,
509        source_url: GOOGLE_PRICING_URL,
510        source_status: SourceStatus::Official,
511        notes: Some("Pricing sourced from Gemini 3.1 Pro Preview docs."),
512    },
513    ModelCapabilities {
514        provider: "gemini",
515        model_id: "gemini-3.1-pro",
516        context_window: Some(1_048_576),
517        max_output_tokens: Some(65_536),
518        pricing: Some(Pricing::flat(2.0, 12.0).with_notes("Legacy alias retained for compatibility. For prompts > 200K, pricing increases to $4 input / $18 output per 1M tokens.")),
519        supports_thinking: true,
520        supports_adaptive_thinking: false,
521        source_url: GOOGLE_PRICING_URL,
522        source_status: SourceStatus::Derived,
523        notes: Some("Legacy Gemini 3.1 Pro alias retained for compatibility; prefer gemini-3.1-pro-preview."),
524    },
525    ModelCapabilities {
526        provider: "gemini",
527        model_id: "gemini-3.1-flash-lite-preview",
528        context_window: Some(1_048_576),
529        max_output_tokens: Some(65_536),
530        pricing: None,
531        supports_thinking: true,
532        supports_adaptive_thinking: false,
533        source_url: GOOGLE_MODELS_URL,
534        source_status: SourceStatus::Unverified,
535        notes: Some("Model presence confirmed from Google docs, but pricing was not extracted in this pass."),
536    },
537    ModelCapabilities {
538        provider: "gemini",
539        model_id: "gemini-3-flash-preview",
540        context_window: Some(1_048_576),
541        max_output_tokens: Some(65_536),
542        pricing: None,
543        supports_thinking: true,
544        supports_adaptive_thinking: false,
545        source_url: GOOGLE_MODELS_URL,
546        source_status: SourceStatus::Unverified,
547        notes: Some("Model presence confirmed from Google docs, but pricing was not extracted in this pass."),
548    },
549    ModelCapabilities {
550        provider: "gemini",
551        model_id: "gemini-3.0-flash",
552        context_window: Some(1_048_576),
553        max_output_tokens: Some(65_536),
554        pricing: None,
555        supports_thinking: true,
556        supports_adaptive_thinking: false,
557        source_url: GOOGLE_MODELS_URL,
558        source_status: SourceStatus::Derived,
559        notes: Some("Legacy Gemini 3.0 Flash model retained for compatibility; prefer gemini-3-flash-preview."),
560    },
561    ModelCapabilities {
562        provider: "gemini",
563        model_id: "gemini-3.0-pro",
564        context_window: Some(1_048_576),
565        max_output_tokens: Some(65_536),
566        pricing: None,
567        supports_thinking: true,
568        supports_adaptive_thinking: false,
569        source_url: GOOGLE_MODELS_URL,
570        source_status: SourceStatus::Unverified,
571        notes: Some("Model presence confirmed from Google docs, but pricing was not extracted in this pass."),
572    },
573    ModelCapabilities {
574        provider: "gemini",
575        model_id: "gemini-2.5-flash",
576        context_window: Some(1_000_000),
577        max_output_tokens: Some(65_536),
578        pricing: Some(Pricing::flat(0.30, 2.50).with_notes("Official text/image/video pricing. Audio input is priced separately at $1.00 / 1M tokens.")),
579        supports_thinking: true,
580        supports_adaptive_thinking: false,
581        source_url: GOOGLE_PRICING_URL,
582        source_status: SourceStatus::Official,
583        notes: Some("Official docs state output pricing includes thinking tokens."),
584    },
585    ModelCapabilities {
586        provider: "gemini",
587        model_id: "gemini-2.5-pro",
588        context_window: Some(1_000_000),
589        max_output_tokens: Some(65_536),
590        pricing: None,
591        supports_thinking: true,
592        supports_adaptive_thinking: false,
593        source_url: GOOGLE_MODELS_URL,
594        source_status: SourceStatus::Unverified,
595        notes: Some("Model presence confirmed from Google docs, but pricing was not extracted in this pass."),
596    },
597    ModelCapabilities {
598        provider: "gemini",
599        model_id: "gemini-2.0-flash",
600        context_window: Some(1_000_000),
601        max_output_tokens: Some(8_192),
602        pricing: Some(Pricing::flat(0.10, 0.40).with_notes("Official text/image/video pricing. Audio input is priced separately at $0.70 / 1M tokens.")),
603        supports_thinking: false,
604        supports_adaptive_thinking: false,
605        source_url: GOOGLE_PRICING_URL,
606        source_status: SourceStatus::Official,
607        notes: None,
608    },
609    ModelCapabilities {
610        provider: "gemini",
611        model_id: "gemini-2.0-flash-lite",
612        context_window: Some(1_000_000),
613        max_output_tokens: Some(8_192),
614        pricing: Some(Pricing::flat(0.075, 0.30)),
615        supports_thinking: false,
616        supports_adaptive_thinking: false,
617        source_url: GOOGLE_PRICING_URL,
618        source_status: SourceStatus::Official,
619        notes: None,
620    },
621    // Open models (z.ai / Moonshot / DeepSeek / MiniMax). All routed through
622    // OpenAIProvider, so provider == "openai" and the model_id is the exact
623    // string the caller passes (OpenRouter slug or native model id).
624    ModelCapabilities {
625        provider: "openai",
626        model_id: "z-ai/glm-5.1",
627        context_window: Some(202_752),
628        max_output_tokens: Some(131_072),
629        pricing: Some(Pricing::flat(0.98, 3.08).with_notes("OpenRouter rate for z-ai/glm-5.1: input $0.98/M, output $3.08/M.")),
630        supports_thinking: true,
631        supports_adaptive_thinking: false,
632        source_url: OPENROUTER_GLM51_URL,
633        source_status: SourceStatus::Derived,
634        notes: Some("GLM-5.1 (z.ai/Zhipu) via OpenRouter slug. Reasoning/thinking model; context 203K (=202,752). max_output 128K from z.ai GLM-5.1 docs, sized generously for hidden reasoning + answer. Released ~Apr 7, 2026."),
635    },
636    ModelCapabilities {
637        provider: "openai",
638        model_id: "glm-5",
639        context_window: Some(200_000),
640        max_output_tokens: Some(131_072),
641        pricing: Some(Pricing::flat(1.0, 3.2).with_notes("Native z.ai pricing: input $1.0/M, output $3.2/M (higher than the OpenRouter GLM-5 rate of $0.60/$1.92).")),
642        supports_thinking: true,
643        supports_adaptive_thinking: false,
644        source_url: ZAI_GLM5_PRICING_URL,
645        source_status: SourceStatus::Derived,
646        notes: Some("Native z.ai constructor model string `glm-5`. Reasoning/thinking model; 200K context, 128K (131072) max output per docs.z.ai/guides/llm/glm-5. Native pricing used for the native route. Released ~Feb 11, 2026."),
647    },
648    ModelCapabilities {
649        provider: "openai",
650        model_id: "moonshotai/kimi-k2.6",
651        context_window: Some(262_144),
652        max_output_tokens: Some(65_536),
653        pricing: Some(Pricing::flat(0.684, 3.42).with_notes("OpenRouter rate for moonshotai/kimi-k2.6: input $0.684/M, output $3.42/M.")),
654        supports_thinking: false,
655        supports_adaptive_thinking: false,
656        source_url: OPENROUTER_KIMI_K26_URL,
657        source_status: SourceStatus::Derived,
658        notes: Some("Exact OpenRouter slug (note the dot). Hybrid model marketed/used as a non-reasoning coding+multimodal model, so supports_thinking=false (use moonshotai/kimi-k2-thinking for the dedicated reasoning model). Context 262,144; 65536 is a generous app-side completion budget within the window."),
659    },
660    ModelCapabilities {
661        provider: "openai",
662        model_id: "moonshotai/kimi-k2.5",
663        context_window: Some(262_144),
664        max_output_tokens: Some(32_768),
665        pricing: Some(Pricing::flat(0.4, 1.9).with_notes("OpenRouter rate for moonshotai/kimi-k2.5: input $0.40/M, output $1.90/M.")),
666        supports_thinking: false,
667        supports_adaptive_thinking: false,
668        source_url: OPENROUTER_KIMI_K25_URL,
669        source_status: SourceStatus::Derived,
670        notes: Some("OpenRouter route for the model the native constructor names 'kimi-k2.5'. Treated as non-reasoning (visual-coding + agentic tool-calling) on OpenRouter. Context 262,144; 32768 is a generous app-side completion budget within the window."),
671    },
672    ModelCapabilities {
673        provider: "openai",
674        model_id: "kimi-k2.5",
675        context_window: Some(262_144),
676        max_output_tokens: Some(32_768),
677        pricing: Some(Pricing::flat(0.6, 3.0).with_notes("Native Moonshot estimate from Artificial Analysis (~$0.58 in / $3.00 out); input rounded up to $0.60 to stay conservative for budget reservation.")),
678        supports_thinking: false,
679        supports_adaptive_thinking: false,
680        source_url: KIMI_K25_AA_URL,
681        source_status: SourceStatus::Unverified,
682        notes: Some("Exact native model_id used by the native constructor (Moonshot platform.kimi.ai base_url). Native pricing not on the first-party table (only k2.6 is enumerated); figures derived from Artificial Analysis. Context 262,144; 32768 is a generous within-window completion budget."),
683    },
684    ModelCapabilities {
685        provider: "openai",
686        model_id: "kimi-k2-thinking",
687        context_window: Some(262_144),
688        max_output_tokens: Some(131_072),
689        pricing: Some(Pricing::flat(0.6, 2.5).with_notes("Cross-provider median for kimi-k2-thinking (OpenRouter/Artificial Analysis): input $0.60/M, output $2.50/M, used as a conservative native estimate.")),
690        supports_thinking: true,
691        supports_adaptive_thinking: false,
692        source_url: OPENROUTER_KIMI_K2_THINKING_URL,
693        source_status: SourceStatus::Unverified,
694        notes: Some("Exact native model_id used by the native constructor; a REASONING model (emits hidden chain-of-thought before the answer). Native Moonshot base_url. First-party pricing could not be isolated; figures are the cross-provider median. Context 262,144; max_output 131072 sized generously for reasoning tokens, within the window."),
695    },
696    ModelCapabilities {
697        provider: "openai",
698        model_id: "deepseek/deepseek-v4-pro",
699        context_window: Some(1_048_576),
700        max_output_tokens: Some(384_000),
701        pricing: Some(Pricing::flat(0.44, 0.87).with_notes("OpenRouter effective post-promo rate ($0.435 in rounded up to $0.44 / $0.87 out). Pre-promo regular rate was $1.74/$3.48.")),
702        supports_thinking: true,
703        supports_adaptive_thinking: false,
704        source_url: OPENROUTER_DEEPSEEK_V4_PRO_URL,
705        source_status: SourceStatus::Derived,
706        notes: Some("Primary model named in forge config; exact OpenRouter slug. Large MoE (1.6T total / 49B active), released 2026-04-24. Reasoning/thinking model; DeepSeek returns the answer in `content` and chain-of-thought in a separate `reasoning_content` field, which must be echoed back in subsequent thinking-mode turns or the API returns 400. Max output 384K (DeepSeek ceiling), sized generously for reasoning."),
707    },
708    ModelCapabilities {
709        provider: "openai",
710        model_id: "deepseek-v4-pro",
711        context_window: Some(1_048_576),
712        max_output_tokens: Some(384_000),
713        pricing: Some(Pricing::flat_with_cached(0.44, 0.87, 0.003_625).with_notes("Official DeepSeek pricing: input cache-MISS $0.435/M (rounded up to $0.44), cache-HIT $0.003625/M, output $0.87/M.")),
714        supports_thinking: true,
715        supports_adaptive_thinking: false,
716        source_url: DEEPSEEK_PRICING_URL,
717        source_status: SourceStatus::Derived,
718        notes: Some("Native DeepSeek API model id 'deepseek-v4-pro' (no vendor prefix). 1M context, 384K max output. Reasoning/thinking model; separate `reasoning_content` that must be echoed back in multi-turn thinking-mode requests or you get a 400. Legacy ids deepseek-reasoner/deepseek-chat now map to V4-FLASH, not Pro."),
719    },
720    ModelCapabilities {
721        provider: "openai",
722        model_id: "deepseek/deepseek-v4-flash",
723        context_window: Some(1_048_576),
724        max_output_tokens: Some(384_000),
725        pricing: Some(Pricing::flat(0.15, 0.28).with_notes("DeepSeek list rate rounded up ($0.14 in -> $0.15 / $0.28 out) used instead of OpenRouter's lower fluctuating effective rate so consumers never under-reserve budget.")),
726        supports_thinking: true,
727        supports_adaptive_thinking: false,
728        source_url: OPENROUTER_DEEPSEEK_V4_FLASH_URL,
729        source_status: SourceStatus::Derived,
730        notes: Some("Sibling V4 model (cheaper routing target). Efficiency MoE (284B total / 13B active), released 2026-04-24. Reasoning/thinking model with the same reasoning_content split + mandatory pass-back-or-400 behavior as V4 Pro. Max output 384K per DeepSeek docs."),
731    },
732    ModelCapabilities {
733        provider: "openai",
734        model_id: "deepseek-v4-flash",
735        context_window: Some(1_048_576),
736        max_output_tokens: Some(384_000),
737        pricing: Some(Pricing::flat_with_cached(0.14, 0.28, 0.002_8).with_notes("Official DeepSeek pricing: input cache-MISS $0.14/M, cache-HIT $0.0028/M, output $0.28/M.")),
738        supports_thinking: true,
739        supports_adaptive_thinking: false,
740        source_url: DEEPSEEK_PRICING_URL,
741        source_status: SourceStatus::Derived,
742        notes: Some("Native DeepSeek API model id 'deepseek-v4-flash'. 1M context, 384K max output. Reasoning/thinking model; same content/reasoning_content split and mandatory pass-back in thinking mode. Legacy aliases deepseek-chat/deepseek-reasoner now resolve to this Flash model."),
743    },
744    ModelCapabilities {
745        provider: "openai",
746        model_id: "MiniMax-M2.5",
747        context_window: Some(204_800),
748        max_output_tokens: Some(131_072),
749        pricing: Some(Pricing::flat_with_cached(0.3, 1.2, 0.03).with_notes("Native MiniMax first-party pricing: input $0.30/M, output $1.20/M, cache-read input $0.03/M (platform.minimax.io PAYG).")),
750        supports_thinking: true,
751        supports_adaptive_thinking: false,
752        source_url: MINIMAX_PRICING_URL,
753        source_status: SourceStatus::Derived,
754        notes: Some("Native agent-sdk constructor model string 'MiniMax-M2.5' (api.minimax.io, OpenAI-compatible). Reasoning/thinking model; emits chain-of-thought in <think>...</think> tags and supports interleaved thinking. Context 204,800; max_output 131072 sized generously for hidden reasoning + answer within the window."),
755    },
756    ModelCapabilities {
757        provider: "openai",
758        model_id: "minimax/minimax-m2.5",
759        context_window: Some(204_800),
760        max_output_tokens: Some(131_072),
761        pricing: Some(Pricing::flat(0.15, 1.15).with_notes("OpenRouter rate for minimax/minimax-m2.5: input $0.15/M, output $1.15/M (lower than MiniMax's $0.30/$1.20 first-party rate; OpenRouter prices can fluctuate, so reserve conservatively).")),
762        supports_thinking: true,
763        supports_adaptive_thinking: false,
764        source_url: OPENROUTER_MINIMAX_M25_URL,
765        source_status: SourceStatus::Derived,
766        notes: Some("OpenRouter slug 'minimax/minimax-m2.5' (same M2.5 weights as native). Reasoning/thinking model. Context 204,800; max_output 131072 sized generously for hidden reasoning tokens before the answer."),
767    },
768];
769
770#[must_use]
771pub fn get_model_capabilities(
772    provider: &str,
773    model_id: &str,
774) -> Option<&'static ModelCapabilities> {
775    MODEL_CAPABILITIES.iter().find(|caps| {
776        caps.provider.eq_ignore_ascii_case(provider) && caps.model_id.eq_ignore_ascii_case(model_id)
777    })
778}
779
780#[must_use]
781pub fn default_max_output_tokens(provider: &str, model_id: &str) -> Option<u32> {
782    get_model_capabilities(provider, model_id).and_then(|caps| caps.max_output_tokens)
783}
784
785#[must_use]
786pub const fn supported_model_capabilities() -> &'static [ModelCapabilities] {
787    MODEL_CAPABILITIES
788}
789
790#[cfg(test)]
791mod tests {
792    use super::*;
793
794    #[test]
795    fn test_lookup_anthropic_fable_5() -> anyhow::Result<()> {
796        use anyhow::Context;
797
798        let caps = get_model_capabilities("anthropic", "claude-fable-5")
799            .context("claude-fable-5 capabilities missing")?;
800        assert_eq!(caps.context_window, Some(1_000_000));
801        assert_eq!(caps.max_output_tokens, Some(128_000));
802        assert!(caps.supports_thinking);
803        assert!(caps.supports_adaptive_thinking);
804        assert_eq!(caps.source_status, SourceStatus::Official);
805        let pricing = caps.pricing.context("pricing missing")?;
806        let input = pricing.input.context("input price missing")?;
807        let output = pricing.output.context("output price missing")?;
808        assert!((input.usd_per_million_tokens - 10.0).abs() < f64::EPSILON);
809        assert!((output.usd_per_million_tokens - 50.0).abs() < f64::EPSILON);
810        Ok(())
811    }
812
813    #[test]
814    fn test_lookup_anthropic_opus_48() {
815        let caps = get_model_capabilities("anthropic", "claude-opus-4-8").unwrap();
816        assert_eq!(caps.context_window, Some(1_000_000));
817        assert_eq!(caps.max_output_tokens, Some(128_000));
818        assert!(caps.supports_thinking);
819        assert!(caps.supports_adaptive_thinking);
820    }
821
822    #[test]
823    fn test_lookup_anthropic_opus_46() {
824        let caps = get_model_capabilities("anthropic", "claude-opus-4-6").unwrap();
825        assert_eq!(caps.context_window, Some(1_000_000));
826        assert_eq!(caps.max_output_tokens, Some(128_000));
827        assert!(caps.supports_adaptive_thinking);
828    }
829
830    #[test]
831    fn test_lookup_anthropic_sonnet_46() {
832        let caps = get_model_capabilities("anthropic", "claude-sonnet-4-6").unwrap();
833        assert_eq!(caps.context_window, Some(1_000_000));
834        assert_eq!(caps.max_output_tokens, Some(64_000));
835        assert!(caps.supports_adaptive_thinking);
836    }
837
838    #[test]
839    fn test_lookup_anthropic_sonnet_45_disables_adaptive_thinking() {
840        let caps = get_model_capabilities("anthropic", "claude-sonnet-4-5-20250929").unwrap();
841        assert!(!caps.supports_adaptive_thinking);
842    }
843
844    #[test]
845    fn test_lookup_openai_pricing() {
846        let caps = get_model_capabilities("openai", "gpt-4o").unwrap();
847        let pricing = caps.pricing.unwrap();
848        assert!((pricing.input.unwrap().usd_per_million_tokens - 1.25).abs() < f64::EPSILON);
849        assert!((pricing.output.unwrap().usd_per_million_tokens - 5.0).abs() < f64::EPSILON);
850    }
851
852    #[test]
853    fn test_lookup_openai_gpt54() {
854        let caps = get_model_capabilities("openai", "gpt-5.4").unwrap();
855        assert_eq!(caps.context_window, Some(1_050_000));
856        assert_eq!(caps.max_output_tokens, Some(128_000));
857        assert!(caps.supports_thinking);
858        assert_eq!(caps.source_status, SourceStatus::Official);
859    }
860
861    #[test]
862    fn test_lookup_openai_gpt53_codex() {
863        let caps = get_model_capabilities("openai", "gpt-5.3-codex").unwrap();
864        assert_eq!(caps.context_window, Some(400_000));
865        assert_eq!(caps.max_output_tokens, Some(120_000));
866        assert!(caps.supports_thinking);
867        assert_eq!(caps.source_status, SourceStatus::Official);
868    }
869
870    #[test]
871    fn test_lookup_gemini_preview_models() {
872        let flash = get_model_capabilities("gemini", "gemini-3-flash-preview").unwrap();
873        assert_eq!(flash.context_window, Some(1_048_576));
874        assert!(flash.supports_thinking);
875
876        let pro = get_model_capabilities("gemini", "gemini-3.1-pro-preview").unwrap();
877        assert_eq!(pro.max_output_tokens, Some(65_536));
878        assert!(pro.supports_thinking);
879    }
880
881    #[test]
882    fn test_lookup_open_reasoning_models_resolve_with_thinking() {
883        // DeepSeek V4 Pro via OpenRouter slug — reasoning model.
884        let deepseek = get_model_capabilities("openai", "deepseek/deepseek-v4-pro").unwrap();
885        assert!(deepseek.supports_thinking);
886        assert_eq!(deepseek.max_output_tokens, Some(384_000));
887        let pricing = deepseek.pricing.unwrap();
888        assert!(pricing.input.unwrap().usd_per_million_tokens > 0.0);
889        assert!(pricing.output.unwrap().usd_per_million_tokens > 0.0);
890
891        // z.ai GLM-5.1 via OpenRouter slug — reasoning model.
892        let glm = get_model_capabilities("openai", "z-ai/glm-5.1").unwrap();
893        assert!(glm.supports_thinking);
894        assert_eq!(glm.max_output_tokens, Some(131_072));
895        let glm_pricing = glm.pricing.unwrap();
896        assert!((glm_pricing.input.unwrap().usd_per_million_tokens - 0.98).abs() < f64::EPSILON);
897        assert!((glm_pricing.output.unwrap().usd_per_million_tokens - 3.08).abs() < f64::EPSILON);
898
899        // Kimi K2 Thinking native — reasoning model.
900        let kimi_thinking = get_model_capabilities("openai", "kimi-k2-thinking").unwrap();
901        assert!(kimi_thinking.supports_thinking);
902        assert_eq!(kimi_thinking.max_output_tokens, Some(131_072));
903        assert!(
904            kimi_thinking
905                .pricing
906                .unwrap()
907                .output
908                .unwrap()
909                .usd_per_million_tokens
910                > 0.0
911        );
912    }
913
914    #[test]
915    fn test_lookup_open_non_reasoning_kimi_models() {
916        // Kimi K2.6 / K2.5 are registered as non-reasoning coding models.
917        let k26 = get_model_capabilities("openai", "moonshotai/kimi-k2.6").unwrap();
918        assert!(!k26.supports_thinking);
919        assert_eq!(k26.max_output_tokens, Some(65_536));
920        assert!(k26.pricing.unwrap().input.unwrap().usd_per_million_tokens > 0.0);
921
922        let k25_native = get_model_capabilities("openai", "kimi-k2.5").unwrap();
923        assert!(!k25_native.supports_thinking);
924        assert_eq!(k25_native.max_output_tokens, Some(32_768));
925    }
926
927    #[test]
928    fn test_lookup_all_open_models_resolve() {
929        // Every model_id below is exactly how the consumer looks them up
930        // (provider == "openai" for all open routes).
931        for model_id in [
932            "z-ai/glm-5.1",
933            "glm-5",
934            "moonshotai/kimi-k2.6",
935            "moonshotai/kimi-k2.5",
936            "kimi-k2.5",
937            "kimi-k2-thinking",
938            "deepseek/deepseek-v4-pro",
939            "deepseek-v4-pro",
940            "deepseek/deepseek-v4-flash",
941            "deepseek-v4-flash",
942            "MiniMax-M2.5",
943            "minimax/minimax-m2.5",
944        ] {
945            let caps = get_model_capabilities("openai", model_id)
946                .unwrap_or_else(|| panic!("missing capabilities for {model_id}"));
947            assert!(
948                caps.pricing.is_some(),
949                "pricing should be populated for {model_id}"
950            );
951            assert!(
952                caps.max_output_tokens.is_some_and(|m| m > 0),
953                "max_output_tokens should be non-zero for {model_id}"
954            );
955            assert!(
956                caps.context_window.is_some_and(|c| c > 0),
957                "context_window should be non-zero for {model_id}"
958            );
959        }
960    }
961
962    #[test]
963    fn test_lookup_minimax_native_pricing() {
964        let native = get_model_capabilities("openai", "MiniMax-M2.5").unwrap();
965        assert!(native.supports_thinking);
966        let pricing = native.pricing.unwrap();
967        assert!((pricing.input.unwrap().usd_per_million_tokens - 0.3).abs() < f64::EPSILON);
968        assert!((pricing.output.unwrap().usd_per_million_tokens - 1.2).abs() < f64::EPSILON);
969        // Cache-read is the first-party platform.minimax.io PAYG rate ($0.03/M),
970        // not the ~$0.155/M that an earlier entry overstated by ~3-5x.
971        assert!((pricing.cached_input.unwrap().usd_per_million_tokens - 0.03).abs() < f64::EPSILON);
972    }
973
974    #[test]
975    fn test_estimate_cost_usd() {
976        let caps = get_model_capabilities("openai", "gpt-4o").unwrap();
977        let cost = caps
978            .estimate_cost_usd(&Usage {
979                input_tokens: 2_000,
980                output_tokens: 1_000,
981                cached_input_tokens: 0,
982                cache_creation_input_tokens: 0,
983            })
984            .unwrap();
985        assert!((cost - 0.0075).abs() < f64::EPSILON);
986    }
987
988    #[test]
989    fn test_estimate_cost_usd_with_cached_input() {
990        let caps = get_model_capabilities("openai", "gpt-5.4").unwrap();
991        let cost = caps
992            .estimate_cost_usd(&Usage {
993                input_tokens: 2_000,
994                output_tokens: 1_000,
995                cached_input_tokens: 1_000,
996                cache_creation_input_tokens: 0,
997            })
998            .unwrap();
999        assert!((cost - 0.01775).abs() < f64::EPSILON);
1000    }
1001}