Skip to main content

agent_sdk_providers/
model_capabilities.rs

1use agent_sdk_foundation::llm::Usage;
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4pub enum SourceStatus {
5    Official,
6    Derived,
7    Unverified,
8}
9
10#[derive(Debug, Clone, Copy, PartialEq)]
11pub struct PricePoint {
12    /// USD per 1M tokens.
13    pub usd_per_million_tokens: f64,
14}
15
16impl PricePoint {
17    #[must_use]
18    pub const fn new(usd_per_million_tokens: f64) -> Self {
19        Self {
20            usd_per_million_tokens,
21        }
22    }
23
24    #[must_use]
25    pub fn estimate_cost_usd(self, tokens: u32) -> f64 {
26        (f64::from(tokens) / 1_000_000.0) * self.usd_per_million_tokens
27    }
28}
29
30#[derive(Debug, Clone, Copy, PartialEq)]
31pub struct Pricing {
32    pub input: Option<PricePoint>,
33    pub output: Option<PricePoint>,
34    pub cached_input: Option<PricePoint>,
35    pub notes: Option<&'static str>,
36}
37
38impl Pricing {
39    #[must_use]
40    pub const fn flat(input: f64, output: f64) -> Self {
41        Self {
42            input: Some(PricePoint::new(input)),
43            output: Some(PricePoint::new(output)),
44            cached_input: None,
45            notes: None,
46        }
47    }
48
49    #[must_use]
50    pub const fn flat_with_cached(input: f64, output: f64, cached_input: f64) -> Self {
51        Self {
52            input: Some(PricePoint::new(input)),
53            output: Some(PricePoint::new(output)),
54            cached_input: Some(PricePoint::new(cached_input)),
55            notes: None,
56        }
57    }
58
59    #[must_use]
60    pub const fn with_notes(mut self, notes: &'static str) -> Self {
61        self.notes = Some(notes);
62        self
63    }
64
65    #[must_use]
66    pub fn estimate_cost_usd(&self, usage: &Usage) -> Option<f64> {
67        let cached_input_tokens = usage.cached_input_tokens.min(usage.input_tokens);
68        let uncached_input_tokens = usage.input_tokens.saturating_sub(cached_input_tokens);
69
70        let input = match (self.input, self.cached_input) {
71            (Some(input), Some(cached_input)) => Some(
72                input.estimate_cost_usd(uncached_input_tokens)
73                    + cached_input.estimate_cost_usd(cached_input_tokens),
74            ),
75            (Some(input), None) => Some(input.estimate_cost_usd(usage.input_tokens)),
76            (None, Some(cached_input)) => Some(cached_input.estimate_cost_usd(cached_input_tokens)),
77            (None, None) => None,
78        };
79        let output = self
80            .output
81            .map(|p| p.estimate_cost_usd(usage.output_tokens));
82        match (input, output) {
83            (Some(input), Some(output)) => Some(input + output),
84            (Some(input), None) => Some(input),
85            (None, Some(output)) => Some(output),
86            (None, None) => None,
87        }
88    }
89}
90
91#[derive(Debug, Clone, Copy, PartialEq)]
92pub struct ModelCapabilities {
93    pub provider: &'static str,
94    pub model_id: &'static str,
95    pub context_window: Option<u32>,
96    pub max_output_tokens: Option<u32>,
97    pub pricing: Option<Pricing>,
98    pub supports_thinking: bool,
99    pub supports_adaptive_thinking: bool,
100    pub source_url: &'static str,
101    pub source_status: SourceStatus,
102    pub notes: Option<&'static str>,
103}
104
105impl ModelCapabilities {
106    #[must_use]
107    pub fn estimate_cost_usd(&self, usage: &Usage) -> Option<f64> {
108        self.pricing
109            .as_ref()
110            .and_then(|p| p.estimate_cost_usd(usage))
111    }
112}
113
114const ANTHROPIC_MODELS_URL: &str =
115    "https://docs.anthropic.com/en/docs/about-claude/models/all-models";
116const OPENAI_MODELS_URL: &str = "https://developers.openai.com/api/docs/models";
117const OPENAI_PRICING_URL: &str = "https://developers.openai.com/api/docs/pricing";
118const OPENAI_GPT54_URL: &str = "https://developers.openai.com/api/docs/models/gpt-5.4";
119const OPENAI_GPT53_CODEX_URL: &str = "https://developers.openai.com/api/docs/models/gpt-5.3-codex";
120const GOOGLE_MODELS_URL: &str = "https://ai.google.dev/gemini-api/docs/models";
121const GOOGLE_PRICING_URL: &str = "https://ai.google.dev/gemini-api/docs/pricing";
122
123// Open-model routes. All reached through OpenAIProvider (provider()=="openai"),
124// whether via OpenRouter slugs or the native z.ai / Moonshot / MiniMax base URLs.
125const OPENROUTER_GLM51_URL: &str = "https://openrouter.ai/z-ai/glm-5.1";
126const ZAI_GLM5_PRICING_URL: &str = "https://docs.z.ai/guides/overview/pricing";
127const OPENROUTER_KIMI_K26_URL: &str = "https://openrouter.ai/moonshotai/kimi-k2.6";
128const OPENROUTER_KIMI_K25_URL: &str = "https://openrouter.ai/moonshotai/kimi-k2.5";
129const KIMI_K25_AA_URL: &str = "https://artificialanalysis.ai/models/kimi-k2-5";
130const OPENROUTER_KIMI_K2_THINKING_URL: &str = "https://openrouter.ai/moonshotai/kimi-k2-thinking";
131const OPENROUTER_DEEPSEEK_V4_PRO_URL: &str = "https://openrouter.ai/deepseek/deepseek-v4-pro";
132const OPENROUTER_DEEPSEEK_V4_FLASH_URL: &str = "https://openrouter.ai/deepseek/deepseek-v4-flash";
133const DEEPSEEK_PRICING_URL: &str = "https://api-docs.deepseek.com/quick_start/pricing";
134const MINIMAX_PRICING_URL: &str = "https://platform.minimax.io/docs/guides/pricing-paygo";
135const OPENROUTER_MINIMAX_M25_URL: &str = "https://openrouter.ai/minimax/minimax-m2.5";
136
137const MODEL_CAPABILITIES: &[ModelCapabilities] = &[
138    // Anthropic
139    ModelCapabilities {
140        provider: "anthropic",
141        model_id: "claude-opus-4-8",
142        context_window: Some(1_000_000),
143        max_output_tokens: Some(128_000),
144        pricing: Some(Pricing::flat(5.0, 25.0).with_notes("Anthropic Opus 4.8 pricing matches the Opus 4.6 tier ($5/$25 per 1M); verify exact current SKU mapping before billing-critical use.")),
145        supports_thinking: true,
146        supports_adaptive_thinking: true,
147        source_url: ANTHROPIC_MODELS_URL,
148        source_status: SourceStatus::Derived,
149        notes: Some("Opus 4.8 requires adaptive thinking — `ThinkingMode::Enabled { budget_tokens }` is rejected by the Anthropic API. The SDK fails fast in validate_thinking_config."),
150    },
151    ModelCapabilities {
152        provider: "anthropic",
153        model_id: "claude-opus-4-7",
154        context_window: Some(1_000_000),
155        max_output_tokens: Some(128_000),
156        pricing: Some(Pricing::flat(5.0, 25.0).with_notes("Anthropic Opus 4.7 pricing matches the Opus 4.6 tier ($5/$25 per 1M); verify exact current SKU mapping before billing-critical use.")),
157        supports_thinking: true,
158        supports_adaptive_thinking: true,
159        source_url: ANTHROPIC_MODELS_URL,
160        source_status: SourceStatus::Derived,
161        notes: Some("Opus 4.7 requires adaptive thinking — `ThinkingMode::Enabled { budget_tokens }` is rejected by the Anthropic API. The SDK fails fast in validate_thinking_config."),
162    },
163    ModelCapabilities {
164        provider: "anthropic",
165        model_id: "claude-opus-4-6",
166        context_window: Some(1_000_000),
167        max_output_tokens: Some(128_000),
168        pricing: Some(Pricing::flat(5.0, 25.0).with_notes("Anthropic Opus 4.6 pricing from bundled Claude API guidance; verify exact current SKU mapping before billing-critical use.")),
169        supports_thinking: true,
170        supports_adaptive_thinking: true,
171        source_url: ANTHROPIC_MODELS_URL,
172        source_status: SourceStatus::Derived,
173        notes: Some("Current Anthropic docs show this model alongside 200K/128K markers."),
174    },
175    ModelCapabilities {
176        provider: "anthropic",
177        model_id: "claude-sonnet-4-6",
178        context_window: Some(1_000_000),
179        max_output_tokens: Some(64_000),
180        pricing: Some(Pricing::flat(3.0, 15.0).with_notes("Anthropic Sonnet tier pricing; verify exact current SKU mapping before billing-critical use.")),
181        supports_thinking: true,
182        supports_adaptive_thinking: true,
183        source_url: ANTHROPIC_MODELS_URL,
184        source_status: SourceStatus::Derived,
185        notes: Some("Anthropic docs list Sonnet 4.6; user confirmed adaptive thinking support."),
186    },
187    ModelCapabilities {
188        provider: "anthropic",
189        model_id: "claude-sonnet-4-5-20250929",
190        context_window: Some(200_000),
191        max_output_tokens: Some(64_000),
192        pricing: Some(Pricing::flat(3.0, 15.0).with_notes("Anthropic Sonnet tier pricing; verify exact current SKU mapping before billing-critical use.")),
193        supports_thinking: true,
194        supports_adaptive_thinking: false,
195        source_url: ANTHROPIC_MODELS_URL,
196        source_status: SourceStatus::Derived,
197        notes: None,
198    },
199    ModelCapabilities {
200        provider: "anthropic",
201        model_id: "claude-haiku-4-5-20251001",
202        context_window: Some(200_000),
203        max_output_tokens: Some(64_000),
204        pricing: Some(Pricing::flat(1.0, 5.0).with_notes("Anthropic Haiku tier pricing; verify exact current SKU mapping before billing-critical use.")),
205        supports_thinking: true,
206        supports_adaptive_thinking: false,
207        source_url: ANTHROPIC_MODELS_URL,
208        source_status: SourceStatus::Derived,
209        notes: None,
210    },
211    ModelCapabilities {
212        provider: "anthropic",
213        model_id: "claude-sonnet-4-20250514",
214        context_window: Some(200_000),
215        max_output_tokens: Some(64_000),
216        pricing: Some(Pricing::flat(3.0, 15.0).with_notes("Anthropic Sonnet tier pricing; verify exact current SKU mapping before billing-critical use.")),
217        supports_thinking: true,
218        supports_adaptive_thinking: false,
219        source_url: ANTHROPIC_MODELS_URL,
220        source_status: SourceStatus::Derived,
221        notes: None,
222    },
223    ModelCapabilities {
224        provider: "anthropic",
225        model_id: "claude-opus-4-20250514",
226        context_window: Some(200_000),
227        max_output_tokens: Some(32_000),
228        pricing: Some(Pricing::flat(15.0, 75.0).with_notes("Anthropic Opus tier pricing; verify exact current SKU mapping before billing-critical use.")),
229        supports_thinking: true,
230        supports_adaptive_thinking: false,
231        source_url: ANTHROPIC_MODELS_URL,
232        source_status: SourceStatus::Derived,
233        notes: None,
234    },
235    ModelCapabilities {
236        provider: "anthropic",
237        model_id: "claude-3-5-sonnet-20241022",
238        context_window: Some(200_000),
239        max_output_tokens: Some(8_192),
240        pricing: Some(Pricing::flat(3.0, 15.0).with_notes("Anthropic Sonnet tier pricing; verify exact current SKU mapping before billing-critical use.")),
241        supports_thinking: true,
242        supports_adaptive_thinking: false,
243        source_url: ANTHROPIC_MODELS_URL,
244        source_status: SourceStatus::Derived,
245        notes: None,
246    },
247    ModelCapabilities {
248        provider: "anthropic",
249        model_id: "claude-3-5-haiku-20241022",
250        context_window: Some(200_000),
251        max_output_tokens: Some(8_192),
252        pricing: Some(Pricing::flat(1.0, 5.0).with_notes("Anthropic Haiku tier pricing; verify exact current SKU mapping before billing-critical use.")),
253        supports_thinking: true,
254        supports_adaptive_thinking: false,
255        source_url: ANTHROPIC_MODELS_URL,
256        source_status: SourceStatus::Derived,
257        notes: None,
258    },
259    // OpenAI
260    ModelCapabilities {
261        provider: "openai",
262        model_id: "gpt-5.4",
263        context_window: Some(1_050_000),
264        max_output_tokens: Some(128_000),
265        pricing: Some(Pricing::flat_with_cached(2.50, 15.0, 0.25)),
266        supports_thinking: true,
267        supports_adaptive_thinking: false,
268        source_url: OPENAI_GPT54_URL,
269        source_status: SourceStatus::Official,
270        notes: Some("OpenAI model docs list 1.05M context, 128K max output, and reasoning.effort support."),
271    },
272    ModelCapabilities {
273        provider: "openai",
274        model_id: "gpt-5.3-codex",
275        context_window: Some(400_000),
276        max_output_tokens: Some(120_000),
277        pricing: Some(Pricing::flat_with_cached(1.50, 6.0, 0.375)),
278        supports_thinking: true,
279        supports_adaptive_thinking: false,
280        source_url: OPENAI_GPT53_CODEX_URL,
281        source_status: SourceStatus::Official,
282        notes: Some("OpenAI model docs list Chat Completions and Responses API support plus reasoning.effort levels."),
283    },
284    ModelCapabilities {
285        provider: "openai",
286        model_id: "gpt-5",
287        context_window: Some(400_000),
288        max_output_tokens: Some(128_000),
289        pricing: Some(Pricing::flat_with_cached(1.25, 10.0, 0.125)),
290        supports_thinking: false,
291        supports_adaptive_thinking: false,
292        source_url: OPENAI_PRICING_URL,
293        source_status: SourceStatus::Official,
294        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
295    },
296    ModelCapabilities {
297        provider: "openai",
298        model_id: "gpt-5-mini",
299        context_window: Some(400_000),
300        max_output_tokens: Some(128_000),
301        pricing: Some(Pricing::flat_with_cached(0.125, 1.0, 0.0125)),
302        supports_thinking: false,
303        supports_adaptive_thinking: false,
304        source_url: OPENAI_PRICING_URL,
305        source_status: SourceStatus::Official,
306        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
307    },
308    ModelCapabilities {
309        provider: "openai",
310        model_id: "gpt-5-nano",
311        context_window: Some(400_000),
312        max_output_tokens: Some(128_000),
313        pricing: Some(Pricing::flat_with_cached(0.025, 0.20, 0.0025)),
314        supports_thinking: false,
315        supports_adaptive_thinking: false,
316        source_url: OPENAI_PRICING_URL,
317        source_status: SourceStatus::Official,
318        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
319    },
320    ModelCapabilities {
321        provider: "openai",
322        model_id: "gpt-5.2-instant",
323        context_window: Some(400_000),
324        max_output_tokens: Some(128_000),
325        pricing: None,
326        supports_thinking: false,
327        supports_adaptive_thinking: false,
328        source_url: OPENAI_MODELS_URL,
329        source_status: SourceStatus::Unverified,
330        notes: Some("Model exists in OpenAI docs, but pricing was not extracted from the official pricing page in this pass."),
331    },
332    ModelCapabilities {
333        provider: "openai",
334        model_id: "gpt-5.2-thinking",
335        context_window: Some(400_000),
336        max_output_tokens: Some(128_000),
337        pricing: None,
338        supports_thinking: true,
339        supports_adaptive_thinking: false,
340        source_url: OPENAI_MODELS_URL,
341        source_status: SourceStatus::Unverified,
342        notes: Some("Model exists in OpenAI docs, but pricing was not extracted from the official pricing page in this pass."),
343    },
344    ModelCapabilities {
345        provider: "openai",
346        model_id: "gpt-5.2-pro",
347        context_window: Some(400_000),
348        max_output_tokens: Some(128_000),
349        pricing: Some(Pricing::flat(10.50, 84.0)),
350        supports_thinking: false,
351        supports_adaptive_thinking: false,
352        source_url: OPENAI_PRICING_URL,
353        source_status: SourceStatus::Official,
354        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
355    },
356    ModelCapabilities {
357        provider: "openai",
358        model_id: "gpt-5.2-codex",
359        context_window: Some(400_000),
360        max_output_tokens: Some(128_000),
361        pricing: None,
362        supports_thinking: false,
363        supports_adaptive_thinking: false,
364        source_url: OPENAI_MODELS_URL,
365        source_status: SourceStatus::Unverified,
366        notes: Some("Model presence confirmed from OpenAI docs; pricing not yet extracted in this pass."),
367    },
368    ModelCapabilities {
369        provider: "openai",
370        model_id: "o3",
371        context_window: Some(200_000),
372        max_output_tokens: Some(100_000),
373        pricing: Some(Pricing::flat(1.0, 4.0)),
374        supports_thinking: true,
375        supports_adaptive_thinking: false,
376        source_url: OPENAI_PRICING_URL,
377        source_status: SourceStatus::Official,
378        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
379    },
380    ModelCapabilities {
381        provider: "openai",
382        model_id: "o3-mini",
383        context_window: Some(200_000),
384        max_output_tokens: Some(100_000),
385        pricing: Some(Pricing::flat(0.55, 2.20)),
386        supports_thinking: true,
387        supports_adaptive_thinking: false,
388        source_url: OPENAI_PRICING_URL,
389        source_status: SourceStatus::Official,
390        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
391    },
392    ModelCapabilities {
393        provider: "openai",
394        model_id: "o4-mini",
395        context_window: Some(200_000),
396        max_output_tokens: Some(100_000),
397        pricing: Some(Pricing::flat(0.55, 2.20)),
398        supports_thinking: true,
399        supports_adaptive_thinking: false,
400        source_url: OPENAI_PRICING_URL,
401        source_status: SourceStatus::Official,
402        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
403    },
404    ModelCapabilities {
405        provider: "openai",
406        model_id: "o1",
407        context_window: Some(200_000),
408        max_output_tokens: Some(100_000),
409        pricing: Some(Pricing::flat(7.50, 30.0)),
410        supports_thinking: true,
411        supports_adaptive_thinking: false,
412        source_url: OPENAI_PRICING_URL,
413        source_status: SourceStatus::Official,
414        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
415    },
416    ModelCapabilities {
417        provider: "openai",
418        model_id: "o1-mini",
419        context_window: Some(200_000),
420        max_output_tokens: Some(100_000),
421        pricing: Some(Pricing::flat(0.55, 2.20)),
422        supports_thinking: true,
423        supports_adaptive_thinking: false,
424        source_url: OPENAI_PRICING_URL,
425        source_status: SourceStatus::Official,
426        notes: Some("Pricing verified from OpenAI pricing page. Context/max output still need clean extraction from models docs."),
427    },
428    ModelCapabilities {
429        provider: "openai",
430        model_id: "gpt-4.1",
431        context_window: Some(1_000_000),
432        max_output_tokens: Some(16_384),
433        pricing: Some(Pricing::flat(1.0, 4.0)),
434        supports_thinking: false,
435        supports_adaptive_thinking: false,
436        source_url: OPENAI_PRICING_URL,
437        source_status: SourceStatus::Official,
438        notes: Some("Pricing verified from OpenAI pricing page. Context window from model family docs/notes."),
439    },
440    ModelCapabilities {
441        provider: "openai",
442        model_id: "gpt-4.1-mini",
443        context_window: Some(1_000_000),
444        max_output_tokens: Some(16_384),
445        pricing: Some(Pricing::flat(0.20, 0.80)),
446        supports_thinking: false,
447        supports_adaptive_thinking: false,
448        source_url: OPENAI_PRICING_URL,
449        source_status: SourceStatus::Official,
450        notes: Some("Pricing verified from OpenAI pricing page. Context window from model family docs/notes."),
451    },
452    ModelCapabilities {
453        provider: "openai",
454        model_id: "gpt-4.1-nano",
455        context_window: Some(1_000_000),
456        max_output_tokens: Some(16_384),
457        pricing: Some(Pricing::flat(0.05, 0.20)),
458        supports_thinking: false,
459        supports_adaptive_thinking: false,
460        source_url: OPENAI_PRICING_URL,
461        source_status: SourceStatus::Official,
462        notes: Some("Pricing verified from OpenAI pricing page. Context window from model family docs/notes."),
463    },
464    ModelCapabilities {
465        provider: "openai",
466        model_id: "gpt-4o",
467        context_window: Some(128_000),
468        max_output_tokens: Some(16_384),
469        pricing: Some(Pricing::flat(1.25, 5.0)),
470        supports_thinking: false,
471        supports_adaptive_thinking: false,
472        source_url: OPENAI_PRICING_URL,
473        source_status: SourceStatus::Official,
474        notes: Some("Pricing verified from OpenAI pricing page. Context/max output from existing runtime assumptions."),
475    },
476    ModelCapabilities {
477        provider: "openai",
478        model_id: "gpt-4o-mini",
479        context_window: Some(128_000),
480        max_output_tokens: Some(16_384),
481        pricing: Some(Pricing::flat(0.075, 0.30)),
482        supports_thinking: false,
483        supports_adaptive_thinking: false,
484        source_url: OPENAI_PRICING_URL,
485        source_status: SourceStatus::Official,
486        notes: Some("Pricing verified from OpenAI pricing page. Context/max output from existing runtime assumptions."),
487    },
488    // Gemini
489    ModelCapabilities {
490        provider: "gemini",
491        model_id: "gemini-3.1-pro-preview",
492        context_window: Some(1_048_576),
493        max_output_tokens: Some(65_536),
494        pricing: Some(Pricing::flat(2.0, 12.0).with_notes("Official pricing for prompts <= 200K tokens. For prompts > 200K, pricing increases to $4 input / $18 output per 1M tokens.")),
495        supports_thinking: true,
496        supports_adaptive_thinking: false,
497        source_url: GOOGLE_PRICING_URL,
498        source_status: SourceStatus::Official,
499        notes: Some("Pricing sourced from Gemini 3.1 Pro Preview docs."),
500    },
501    ModelCapabilities {
502        provider: "gemini",
503        model_id: "gemini-3.1-pro",
504        context_window: Some(1_048_576),
505        max_output_tokens: Some(65_536),
506        pricing: Some(Pricing::flat(2.0, 12.0).with_notes("Legacy alias retained for compatibility. For prompts > 200K, pricing increases to $4 input / $18 output per 1M tokens.")),
507        supports_thinking: true,
508        supports_adaptive_thinking: false,
509        source_url: GOOGLE_PRICING_URL,
510        source_status: SourceStatus::Derived,
511        notes: Some("Legacy Gemini 3.1 Pro alias retained for compatibility; prefer gemini-3.1-pro-preview."),
512    },
513    ModelCapabilities {
514        provider: "gemini",
515        model_id: "gemini-3.1-flash-lite-preview",
516        context_window: Some(1_048_576),
517        max_output_tokens: Some(65_536),
518        pricing: None,
519        supports_thinking: true,
520        supports_adaptive_thinking: false,
521        source_url: GOOGLE_MODELS_URL,
522        source_status: SourceStatus::Unverified,
523        notes: Some("Model presence confirmed from Google docs, but pricing was not extracted in this pass."),
524    },
525    ModelCapabilities {
526        provider: "gemini",
527        model_id: "gemini-3-flash-preview",
528        context_window: Some(1_048_576),
529        max_output_tokens: Some(65_536),
530        pricing: None,
531        supports_thinking: true,
532        supports_adaptive_thinking: false,
533        source_url: GOOGLE_MODELS_URL,
534        source_status: SourceStatus::Unverified,
535        notes: Some("Model presence confirmed from Google docs, but pricing was not extracted in this pass."),
536    },
537    ModelCapabilities {
538        provider: "gemini",
539        model_id: "gemini-3.0-flash",
540        context_window: Some(1_048_576),
541        max_output_tokens: Some(65_536),
542        pricing: None,
543        supports_thinking: true,
544        supports_adaptive_thinking: false,
545        source_url: GOOGLE_MODELS_URL,
546        source_status: SourceStatus::Derived,
547        notes: Some("Legacy Gemini 3.0 Flash model retained for compatibility; prefer gemini-3-flash-preview."),
548    },
549    ModelCapabilities {
550        provider: "gemini",
551        model_id: "gemini-3.0-pro",
552        context_window: Some(1_048_576),
553        max_output_tokens: Some(65_536),
554        pricing: None,
555        supports_thinking: true,
556        supports_adaptive_thinking: false,
557        source_url: GOOGLE_MODELS_URL,
558        source_status: SourceStatus::Unverified,
559        notes: Some("Model presence confirmed from Google docs, but pricing was not extracted in this pass."),
560    },
561    ModelCapabilities {
562        provider: "gemini",
563        model_id: "gemini-2.5-flash",
564        context_window: Some(1_000_000),
565        max_output_tokens: Some(65_536),
566        pricing: Some(Pricing::flat(0.30, 2.50).with_notes("Official text/image/video pricing. Audio input is priced separately at $1.00 / 1M tokens.")),
567        supports_thinking: true,
568        supports_adaptive_thinking: false,
569        source_url: GOOGLE_PRICING_URL,
570        source_status: SourceStatus::Official,
571        notes: Some("Official docs state output pricing includes thinking tokens."),
572    },
573    ModelCapabilities {
574        provider: "gemini",
575        model_id: "gemini-2.5-pro",
576        context_window: Some(1_000_000),
577        max_output_tokens: Some(65_536),
578        pricing: None,
579        supports_thinking: true,
580        supports_adaptive_thinking: false,
581        source_url: GOOGLE_MODELS_URL,
582        source_status: SourceStatus::Unverified,
583        notes: Some("Model presence confirmed from Google docs, but pricing was not extracted in this pass."),
584    },
585    ModelCapabilities {
586        provider: "gemini",
587        model_id: "gemini-2.0-flash",
588        context_window: Some(1_000_000),
589        max_output_tokens: Some(8_192),
590        pricing: Some(Pricing::flat(0.10, 0.40).with_notes("Official text/image/video pricing. Audio input is priced separately at $0.70 / 1M tokens.")),
591        supports_thinking: false,
592        supports_adaptive_thinking: false,
593        source_url: GOOGLE_PRICING_URL,
594        source_status: SourceStatus::Official,
595        notes: None,
596    },
597    ModelCapabilities {
598        provider: "gemini",
599        model_id: "gemini-2.0-flash-lite",
600        context_window: Some(1_000_000),
601        max_output_tokens: Some(8_192),
602        pricing: Some(Pricing::flat(0.075, 0.30)),
603        supports_thinking: false,
604        supports_adaptive_thinking: false,
605        source_url: GOOGLE_PRICING_URL,
606        source_status: SourceStatus::Official,
607        notes: None,
608    },
609    // Open models (z.ai / Moonshot / DeepSeek / MiniMax). All routed through
610    // OpenAIProvider, so provider == "openai" and the model_id is the exact
611    // string the caller passes (OpenRouter slug or native model id).
612    ModelCapabilities {
613        provider: "openai",
614        model_id: "z-ai/glm-5.1",
615        context_window: Some(202_752),
616        max_output_tokens: Some(131_072),
617        pricing: Some(Pricing::flat(0.98, 3.08).with_notes("OpenRouter rate for z-ai/glm-5.1: input $0.98/M, output $3.08/M.")),
618        supports_thinking: true,
619        supports_adaptive_thinking: false,
620        source_url: OPENROUTER_GLM51_URL,
621        source_status: SourceStatus::Derived,
622        notes: Some("GLM-5.1 (z.ai/Zhipu) via OpenRouter slug. Reasoning/thinking model; context 203K (=202,752). max_output 128K from z.ai GLM-5.1 docs, sized generously for hidden reasoning + answer. Released ~Apr 7, 2026."),
623    },
624    ModelCapabilities {
625        provider: "openai",
626        model_id: "glm-5",
627        context_window: Some(200_000),
628        max_output_tokens: Some(131_072),
629        pricing: Some(Pricing::flat(1.0, 3.2).with_notes("Native z.ai pricing: input $1.0/M, output $3.2/M (higher than the OpenRouter GLM-5 rate of $0.60/$1.92).")),
630        supports_thinking: true,
631        supports_adaptive_thinking: false,
632        source_url: ZAI_GLM5_PRICING_URL,
633        source_status: SourceStatus::Derived,
634        notes: Some("Native z.ai constructor model string `glm-5`. Reasoning/thinking model; 200K context, 128K (131072) max output per docs.z.ai/guides/llm/glm-5. Native pricing used for the native route. Released ~Feb 11, 2026."),
635    },
636    ModelCapabilities {
637        provider: "openai",
638        model_id: "moonshotai/kimi-k2.6",
639        context_window: Some(262_144),
640        max_output_tokens: Some(65_536),
641        pricing: Some(Pricing::flat(0.684, 3.42).with_notes("OpenRouter rate for moonshotai/kimi-k2.6: input $0.684/M, output $3.42/M.")),
642        supports_thinking: false,
643        supports_adaptive_thinking: false,
644        source_url: OPENROUTER_KIMI_K26_URL,
645        source_status: SourceStatus::Derived,
646        notes: Some("Exact OpenRouter slug (note the dot). Hybrid model marketed/used as a non-reasoning coding+multimodal model, so supports_thinking=false (use moonshotai/kimi-k2-thinking for the dedicated reasoning model). Context 262,144; 65536 is a generous app-side completion budget within the window."),
647    },
648    ModelCapabilities {
649        provider: "openai",
650        model_id: "moonshotai/kimi-k2.5",
651        context_window: Some(262_144),
652        max_output_tokens: Some(32_768),
653        pricing: Some(Pricing::flat(0.4, 1.9).with_notes("OpenRouter rate for moonshotai/kimi-k2.5: input $0.40/M, output $1.90/M.")),
654        supports_thinking: false,
655        supports_adaptive_thinking: false,
656        source_url: OPENROUTER_KIMI_K25_URL,
657        source_status: SourceStatus::Derived,
658        notes: Some("OpenRouter route for the model the native constructor names 'kimi-k2.5'. Treated as non-reasoning (visual-coding + agentic tool-calling) on OpenRouter. Context 262,144; 32768 is a generous app-side completion budget within the window."),
659    },
660    ModelCapabilities {
661        provider: "openai",
662        model_id: "kimi-k2.5",
663        context_window: Some(262_144),
664        max_output_tokens: Some(32_768),
665        pricing: Some(Pricing::flat(0.6, 3.0).with_notes("Native Moonshot estimate from Artificial Analysis (~$0.58 in / $3.00 out); input rounded up to $0.60 to stay conservative for budget reservation.")),
666        supports_thinking: false,
667        supports_adaptive_thinking: false,
668        source_url: KIMI_K25_AA_URL,
669        source_status: SourceStatus::Unverified,
670        notes: Some("Exact native model_id used by the native constructor (Moonshot platform.kimi.ai base_url). Native pricing not on the first-party table (only k2.6 is enumerated); figures derived from Artificial Analysis. Context 262,144; 32768 is a generous within-window completion budget."),
671    },
672    ModelCapabilities {
673        provider: "openai",
674        model_id: "kimi-k2-thinking",
675        context_window: Some(262_144),
676        max_output_tokens: Some(131_072),
677        pricing: Some(Pricing::flat(0.6, 2.5).with_notes("Cross-provider median for kimi-k2-thinking (OpenRouter/Artificial Analysis): input $0.60/M, output $2.50/M, used as a conservative native estimate.")),
678        supports_thinking: true,
679        supports_adaptive_thinking: false,
680        source_url: OPENROUTER_KIMI_K2_THINKING_URL,
681        source_status: SourceStatus::Unverified,
682        notes: Some("Exact native model_id used by the native constructor; a REASONING model (emits hidden chain-of-thought before the answer). Native Moonshot base_url. First-party pricing could not be isolated; figures are the cross-provider median. Context 262,144; max_output 131072 sized generously for reasoning tokens, within the window."),
683    },
684    ModelCapabilities {
685        provider: "openai",
686        model_id: "deepseek/deepseek-v4-pro",
687        context_window: Some(1_048_576),
688        max_output_tokens: Some(384_000),
689        pricing: Some(Pricing::flat(0.44, 0.87).with_notes("OpenRouter effective post-promo rate ($0.435 in rounded up to $0.44 / $0.87 out). Pre-promo regular rate was $1.74/$3.48.")),
690        supports_thinking: true,
691        supports_adaptive_thinking: false,
692        source_url: OPENROUTER_DEEPSEEK_V4_PRO_URL,
693        source_status: SourceStatus::Derived,
694        notes: Some("Primary model named in forge config; exact OpenRouter slug. Large MoE (1.6T total / 49B active), released 2026-04-24. Reasoning/thinking model; DeepSeek returns the answer in `content` and chain-of-thought in a separate `reasoning_content` field, which must be echoed back in subsequent thinking-mode turns or the API returns 400. Max output 384K (DeepSeek ceiling), sized generously for reasoning."),
695    },
696    ModelCapabilities {
697        provider: "openai",
698        model_id: "deepseek-v4-pro",
699        context_window: Some(1_048_576),
700        max_output_tokens: Some(384_000),
701        pricing: Some(Pricing::flat_with_cached(0.44, 0.87, 0.003_625).with_notes("Official DeepSeek pricing: input cache-MISS $0.435/M (rounded up to $0.44), cache-HIT $0.003625/M, output $0.87/M.")),
702        supports_thinking: true,
703        supports_adaptive_thinking: false,
704        source_url: DEEPSEEK_PRICING_URL,
705        source_status: SourceStatus::Derived,
706        notes: Some("Native DeepSeek API model id 'deepseek-v4-pro' (no vendor prefix). 1M context, 384K max output. Reasoning/thinking model; separate `reasoning_content` that must be echoed back in multi-turn thinking-mode requests or you get a 400. Legacy ids deepseek-reasoner/deepseek-chat now map to V4-FLASH, not Pro."),
707    },
708    ModelCapabilities {
709        provider: "openai",
710        model_id: "deepseek/deepseek-v4-flash",
711        context_window: Some(1_048_576),
712        max_output_tokens: Some(384_000),
713        pricing: Some(Pricing::flat(0.15, 0.28).with_notes("DeepSeek list rate rounded up ($0.14 in -> $0.15 / $0.28 out) used instead of OpenRouter's lower fluctuating effective rate so consumers never under-reserve budget.")),
714        supports_thinking: true,
715        supports_adaptive_thinking: false,
716        source_url: OPENROUTER_DEEPSEEK_V4_FLASH_URL,
717        source_status: SourceStatus::Derived,
718        notes: Some("Sibling V4 model (cheaper routing target). Efficiency MoE (284B total / 13B active), released 2026-04-24. Reasoning/thinking model with the same reasoning_content split + mandatory pass-back-or-400 behavior as V4 Pro. Max output 384K per DeepSeek docs."),
719    },
720    ModelCapabilities {
721        provider: "openai",
722        model_id: "deepseek-v4-flash",
723        context_window: Some(1_048_576),
724        max_output_tokens: Some(384_000),
725        pricing: Some(Pricing::flat_with_cached(0.14, 0.28, 0.002_8).with_notes("Official DeepSeek pricing: input cache-MISS $0.14/M, cache-HIT $0.0028/M, output $0.28/M.")),
726        supports_thinking: true,
727        supports_adaptive_thinking: false,
728        source_url: DEEPSEEK_PRICING_URL,
729        source_status: SourceStatus::Derived,
730        notes: Some("Native DeepSeek API model id 'deepseek-v4-flash'. 1M context, 384K max output. Reasoning/thinking model; same content/reasoning_content split and mandatory pass-back in thinking mode. Legacy aliases deepseek-chat/deepseek-reasoner now resolve to this Flash model."),
731    },
732    ModelCapabilities {
733        provider: "openai",
734        model_id: "MiniMax-M2.5",
735        context_window: Some(204_800),
736        max_output_tokens: Some(131_072),
737        pricing: Some(Pricing::flat_with_cached(0.3, 1.2, 0.155).with_notes("Native MiniMax first-party pricing: input $0.30/M, output $1.20/M, cached input ~$0.155/M.")),
738        supports_thinking: true,
739        supports_adaptive_thinking: false,
740        source_url: MINIMAX_PRICING_URL,
741        source_status: SourceStatus::Derived,
742        notes: Some("Native agent-sdk constructor model string 'MiniMax-M2.5' (api.minimax.io, OpenAI-compatible). Reasoning/thinking model; emits chain-of-thought in <think>...</think> tags and supports interleaved thinking. Context 204,800; max_output 131072 sized generously for hidden reasoning + answer within the window."),
743    },
744    ModelCapabilities {
745        provider: "openai",
746        model_id: "minimax/minimax-m2.5",
747        context_window: Some(204_800),
748        max_output_tokens: Some(131_072),
749        pricing: Some(Pricing::flat(0.15, 1.15).with_notes("OpenRouter rate for minimax/minimax-m2.5: input $0.15/M, output $1.15/M (lower than MiniMax's $0.30/$1.20 first-party rate; OpenRouter prices can fluctuate, so reserve conservatively).")),
750        supports_thinking: true,
751        supports_adaptive_thinking: false,
752        source_url: OPENROUTER_MINIMAX_M25_URL,
753        source_status: SourceStatus::Derived,
754        notes: Some("OpenRouter slug 'minimax/minimax-m2.5' (same M2.5 weights as native). Reasoning/thinking model. Context 204,800; max_output 131072 sized generously for hidden reasoning tokens before the answer."),
755    },
756];
757
758#[must_use]
759pub fn get_model_capabilities(
760    provider: &str,
761    model_id: &str,
762) -> Option<&'static ModelCapabilities> {
763    MODEL_CAPABILITIES.iter().find(|caps| {
764        caps.provider.eq_ignore_ascii_case(provider) && caps.model_id.eq_ignore_ascii_case(model_id)
765    })
766}
767
768#[must_use]
769pub fn default_max_output_tokens(provider: &str, model_id: &str) -> Option<u32> {
770    get_model_capabilities(provider, model_id).and_then(|caps| caps.max_output_tokens)
771}
772
773#[must_use]
774pub const fn supported_model_capabilities() -> &'static [ModelCapabilities] {
775    MODEL_CAPABILITIES
776}
777
778#[cfg(test)]
779mod tests {
780    use super::*;
781
782    #[test]
783    fn test_lookup_anthropic_opus_48() {
784        let caps = get_model_capabilities("anthropic", "claude-opus-4-8").unwrap();
785        assert_eq!(caps.context_window, Some(1_000_000));
786        assert_eq!(caps.max_output_tokens, Some(128_000));
787        assert!(caps.supports_thinking);
788        assert!(caps.supports_adaptive_thinking);
789    }
790
791    #[test]
792    fn test_lookup_anthropic_opus_46() {
793        let caps = get_model_capabilities("anthropic", "claude-opus-4-6").unwrap();
794        assert_eq!(caps.context_window, Some(1_000_000));
795        assert_eq!(caps.max_output_tokens, Some(128_000));
796        assert!(caps.supports_adaptive_thinking);
797    }
798
799    #[test]
800    fn test_lookup_anthropic_sonnet_46() {
801        let caps = get_model_capabilities("anthropic", "claude-sonnet-4-6").unwrap();
802        assert_eq!(caps.context_window, Some(1_000_000));
803        assert_eq!(caps.max_output_tokens, Some(64_000));
804        assert!(caps.supports_adaptive_thinking);
805    }
806
807    #[test]
808    fn test_lookup_anthropic_sonnet_45_disables_adaptive_thinking() {
809        let caps = get_model_capabilities("anthropic", "claude-sonnet-4-5-20250929").unwrap();
810        assert!(!caps.supports_adaptive_thinking);
811    }
812
813    #[test]
814    fn test_lookup_openai_pricing() {
815        let caps = get_model_capabilities("openai", "gpt-4o").unwrap();
816        let pricing = caps.pricing.unwrap();
817        assert!((pricing.input.unwrap().usd_per_million_tokens - 1.25).abs() < f64::EPSILON);
818        assert!((pricing.output.unwrap().usd_per_million_tokens - 5.0).abs() < f64::EPSILON);
819    }
820
821    #[test]
822    fn test_lookup_openai_gpt54() {
823        let caps = get_model_capabilities("openai", "gpt-5.4").unwrap();
824        assert_eq!(caps.context_window, Some(1_050_000));
825        assert_eq!(caps.max_output_tokens, Some(128_000));
826        assert!(caps.supports_thinking);
827        assert_eq!(caps.source_status, SourceStatus::Official);
828    }
829
830    #[test]
831    fn test_lookup_openai_gpt53_codex() {
832        let caps = get_model_capabilities("openai", "gpt-5.3-codex").unwrap();
833        assert_eq!(caps.context_window, Some(400_000));
834        assert_eq!(caps.max_output_tokens, Some(120_000));
835        assert!(caps.supports_thinking);
836        assert_eq!(caps.source_status, SourceStatus::Official);
837    }
838
839    #[test]
840    fn test_lookup_gemini_preview_models() {
841        let flash = get_model_capabilities("gemini", "gemini-3-flash-preview").unwrap();
842        assert_eq!(flash.context_window, Some(1_048_576));
843        assert!(flash.supports_thinking);
844
845        let pro = get_model_capabilities("gemini", "gemini-3.1-pro-preview").unwrap();
846        assert_eq!(pro.max_output_tokens, Some(65_536));
847        assert!(pro.supports_thinking);
848    }
849
850    #[test]
851    fn test_lookup_open_reasoning_models_resolve_with_thinking() {
852        // DeepSeek V4 Pro via OpenRouter slug — reasoning model.
853        let deepseek = get_model_capabilities("openai", "deepseek/deepseek-v4-pro").unwrap();
854        assert!(deepseek.supports_thinking);
855        assert_eq!(deepseek.max_output_tokens, Some(384_000));
856        let pricing = deepseek.pricing.unwrap();
857        assert!(pricing.input.unwrap().usd_per_million_tokens > 0.0);
858        assert!(pricing.output.unwrap().usd_per_million_tokens > 0.0);
859
860        // z.ai GLM-5.1 via OpenRouter slug — reasoning model.
861        let glm = get_model_capabilities("openai", "z-ai/glm-5.1").unwrap();
862        assert!(glm.supports_thinking);
863        assert_eq!(glm.max_output_tokens, Some(131_072));
864        let glm_pricing = glm.pricing.unwrap();
865        assert!((glm_pricing.input.unwrap().usd_per_million_tokens - 0.98).abs() < f64::EPSILON);
866        assert!((glm_pricing.output.unwrap().usd_per_million_tokens - 3.08).abs() < f64::EPSILON);
867
868        // Kimi K2 Thinking native — reasoning model.
869        let kimi_thinking = get_model_capabilities("openai", "kimi-k2-thinking").unwrap();
870        assert!(kimi_thinking.supports_thinking);
871        assert_eq!(kimi_thinking.max_output_tokens, Some(131_072));
872        assert!(
873            kimi_thinking
874                .pricing
875                .unwrap()
876                .output
877                .unwrap()
878                .usd_per_million_tokens
879                > 0.0
880        );
881    }
882
883    #[test]
884    fn test_lookup_open_non_reasoning_kimi_models() {
885        // Kimi K2.6 / K2.5 are registered as non-reasoning coding models.
886        let k26 = get_model_capabilities("openai", "moonshotai/kimi-k2.6").unwrap();
887        assert!(!k26.supports_thinking);
888        assert_eq!(k26.max_output_tokens, Some(65_536));
889        assert!(k26.pricing.unwrap().input.unwrap().usd_per_million_tokens > 0.0);
890
891        let k25_native = get_model_capabilities("openai", "kimi-k2.5").unwrap();
892        assert!(!k25_native.supports_thinking);
893        assert_eq!(k25_native.max_output_tokens, Some(32_768));
894    }
895
896    #[test]
897    fn test_lookup_all_open_models_resolve() {
898        // Every model_id below is exactly how the consumer looks them up
899        // (provider == "openai" for all open routes).
900        for model_id in [
901            "z-ai/glm-5.1",
902            "glm-5",
903            "moonshotai/kimi-k2.6",
904            "moonshotai/kimi-k2.5",
905            "kimi-k2.5",
906            "kimi-k2-thinking",
907            "deepseek/deepseek-v4-pro",
908            "deepseek-v4-pro",
909            "deepseek/deepseek-v4-flash",
910            "deepseek-v4-flash",
911            "MiniMax-M2.5",
912            "minimax/minimax-m2.5",
913        ] {
914            let caps = get_model_capabilities("openai", model_id)
915                .unwrap_or_else(|| panic!("missing capabilities for {model_id}"));
916            assert!(
917                caps.pricing.is_some(),
918                "pricing should be populated for {model_id}"
919            );
920            assert!(
921                caps.max_output_tokens.is_some_and(|m| m > 0),
922                "max_output_tokens should be non-zero for {model_id}"
923            );
924            assert!(
925                caps.context_window.is_some_and(|c| c > 0),
926                "context_window should be non-zero for {model_id}"
927            );
928        }
929    }
930
931    #[test]
932    fn test_lookup_minimax_native_pricing() {
933        let native = get_model_capabilities("openai", "MiniMax-M2.5").unwrap();
934        assert!(native.supports_thinking);
935        let pricing = native.pricing.unwrap();
936        assert!((pricing.input.unwrap().usd_per_million_tokens - 0.3).abs() < f64::EPSILON);
937        assert!((pricing.output.unwrap().usd_per_million_tokens - 1.2).abs() < f64::EPSILON);
938    }
939
940    #[test]
941    fn test_estimate_cost_usd() {
942        let caps = get_model_capabilities("openai", "gpt-4o").unwrap();
943        let cost = caps
944            .estimate_cost_usd(&Usage {
945                input_tokens: 2_000,
946                output_tokens: 1_000,
947                cached_input_tokens: 0,
948                cache_creation_input_tokens: 0,
949            })
950            .unwrap();
951        assert!((cost - 0.0075).abs() < f64::EPSILON);
952    }
953
954    #[test]
955    fn test_estimate_cost_usd_with_cached_input() {
956        let caps = get_model_capabilities("openai", "gpt-5.4").unwrap();
957        let cost = caps
958            .estimate_cost_usd(&Usage {
959                input_tokens: 2_000,
960                output_tokens: 1_000,
961                cached_input_tokens: 1_000,
962                cache_creation_input_tokens: 0,
963            })
964            .unwrap();
965        assert!((cost - 0.01775).abs() < f64::EPSILON);
966    }
967}