sparrow/config/
providers.rs

1use serde::{Deserialize, Serialize};
2
3use crate::provider::{LatencyClass, ModelCaps};
4
5// ─── Auth flow types ─────────────────────────────────────────────────────────
6
7/// Describes how a provider authenticates the client.
8#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
9#[serde(tag = "type", rename_all = "snake_case")]
10pub enum AuthFlow {
11    /// Standard API key — stored as PROVIDER_API_KEY or via `sparrow auth add`.
12    ApiKey,
13    /// RFC 8628 Device Authorization Grant — browser-less, best for CLIs.
14    /// `device_endpoint`: POST URL to start the flow.
15    /// `token_endpoint`:  POST URL to poll for the token.
16    /// `scope`:           Space-separated OAuth scopes to request.
17    /// `client_id_env`:   Env var name for the client id (optional override).
18    DeviceOAuth {
19        device_endpoint: String,
20        token_endpoint: String,
21        scope: String,
22        client_id_env: String,
23    },
24}
25
26impl Default for AuthFlow {
27    fn default() -> Self {
28        AuthFlow::ApiKey
29    }
30}
31
32/// Provider + model registry aligned with Hermes Agent (NousResearch/hermes-agent).
33/// Source: https://github.com/NousResearch/hermes-agent/tree/main/plugins/model-providers
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct ProviderDef {
36    pub id: String,
37    pub label: String,
38    pub adapter: String,
39    pub base_url: String,
40    pub api_key_env: Option<String>,
41    pub models: Vec<ModelDef>,
42    pub tags: Vec<String>,
43    pub notes: String,
44    /// Authentication method. Defaults to `ApiKey` for backwards-compat.
45    #[serde(default)]
46    pub auth_flow: AuthFlow,
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct ModelDef {
51    pub name: String,
52    pub label: String,
53    pub tags: Vec<String>,
54    pub cost_input_per_mtok: f64,
55    pub cost_output_per_mtok: f64,
56    pub context_window: u64,
57    pub recommended: bool,
58}
59
60pub fn provider_registry() -> Vec<ProviderDef> {
61    vec![
62        // ─── Ollama Cloud (Hermes: ollama-cloud) ──────────────────────
63        ProviderDef {
64            id: "ollama-cloud".into(),
65            label: "Ollama Cloud".into(),
66            adapter: "openai-compatible".into(),
67            base_url: "http://localhost:11434/v1".into(),
68            api_key_env: None,
69            models: vec![
70                ModelDef {
71                    name: "qwen3.5:32b".into(),
72                    label: "Qwen 3.5 32B".into(),
73                    tags: vec!["free".into(), "code".into(), "tool_support".into()],
74                    cost_input_per_mtok: 0.0,
75                    cost_output_per_mtok: 0.0,
76                    context_window: 32768,
77                    recommended: true,
78                },
79                ModelDef {
80                    name: "llama4:latest".into(),
81                    label: "Llama 4".into(),
82                    tags: vec!["free".into(), "strong".into(), "tool_support".into()],
83                    cost_input_per_mtok: 0.0,
84                    cost_output_per_mtok: 0.0,
85                    context_window: 131072,
86                    recommended: false,
87                },
88            ],
89            tags: vec!["local".into(), "free".into()],
90            notes: "Ollama Cloud. For self-hosted Ollama use 'custom' provider.".into(),
91            auth_flow: AuthFlow::default(),
92        },
93        // ─── Anthropic ─────────────────────────────────────────────────
94        ProviderDef {
95            id: "anthropic".into(),
96            label: "Anthropic".into(),
97            adapter: "anthropic-messages".into(),
98            base_url: "https://api.anthropic.com".into(),
99            api_key_env: Some("ANTHROPIC_API_KEY".into()),
100            models: vec![
101                ModelDef {
102                    name: "claude-sonnet-4-6".into(),
103                    label: "Claude Sonnet 4".into(),
104                    tags: vec![
105                        "strong".into(),
106                        "code".into(),
107                        "vision".into(),
108                        "tool_support".into(),
109                    ],
110                    cost_input_per_mtok: 3.0,
111                    cost_output_per_mtok: 15.0,
112                    context_window: 200000,
113                    recommended: true,
114                },
115                ModelDef {
116                    name: "claude-opus-4-8".into(),
117                    label: "Claude Opus 4".into(),
118                    tags: vec![
119                        "strong".into(),
120                        "code".into(),
121                        "vision".into(),
122                        "tool_support".into(),
123                    ],
124                    cost_input_per_mtok: 15.0,
125                    cost_output_per_mtok: 75.0,
126                    context_window: 200000,
127                    recommended: false,
128                },
129                ModelDef {
130                    name: "claude-haiku-4-5".into(),
131                    label: "Claude Haiku 4".into(),
132                    tags: vec!["fast".into(), "cheap".into(), "tool_support".into()],
133                    cost_input_per_mtok: 1.0,
134                    cost_output_per_mtok: 5.0,
135                    context_window: 200000,
136                    recommended: false,
137                },
138            ],
139            tags: vec!["strong".into(), "code".into(), "vision".into()],
140            notes: "Best-in-class for complex code and reasoning.".into(),
141            auth_flow: AuthFlow::default(),
142        },
143        // ─── OpenAI Codex (Hermes: openai-codex) ───────────────────────
144        ProviderDef {
145            id: "openai-codex".into(),
146            label: "OpenAI Codex".into(),
147            adapter: "openai-compatible".into(),
148            base_url: "https://api.openai.com/v1".into(),
149            api_key_env: Some("OPENAI_API_KEY".into()),
150            models: vec![
151                ModelDef {
152                    name: "gpt-5".into(),
153                    label: "GPT-5".into(),
154                    tags: vec![
155                        "strong".into(),
156                        "code".into(),
157                        "vision".into(),
158                        "tool_support".into(),
159                    ],
160                    cost_input_per_mtok: 2.5,
161                    cost_output_per_mtok: 10.0,
162                    context_window: 128000,
163                    recommended: true,
164                },
165                ModelDef {
166                    name: "gpt-5-mini".into(),
167                    label: "GPT-5 Mini".into(),
168                    tags: vec![
169                        "cheap".into(),
170                        "fast".into(),
171                        "code".into(),
172                        "tool_support".into(),
173                    ],
174                    cost_input_per_mtok: 0.15,
175                    cost_output_per_mtok: 0.60,
176                    context_window: 128000,
177                    recommended: false,
178                },
179            ],
180            tags: vec!["strong".into(), "code".into(), "vision".into()],
181            notes: "OpenAI Codex — GPT models via OpenAI API.".into(),
182            auth_flow: AuthFlow::default(),
183        },
184        // ─── NVIDIA ────────────────────────────────────────────────────
185        ProviderDef {
186            id: "nvidia".into(),
187            label: "NVIDIA NIM".into(),
188            adapter: "openai-compatible".into(),
189            base_url: "https://integrate.api.nvidia.com/v1".into(),
190            api_key_env: Some("NVIDIA_API_KEY".into()),
191            models: vec![
192                ModelDef {
193                    name: "meta/llama-3.1-8b-instruct".into(),
194                    label: "Llama 3.1 8B Instruct".into(),
195                    tags: vec![
196                        "fast".into(),
197                        "free".into(),
198                        "tool_support".into(),
199                        "chat".into(),
200                    ],
201                    cost_input_per_mtok: 0.0,
202                    cost_output_per_mtok: 0.0,
203                    context_window: 131072,
204                    recommended: true,
205                },
206                ModelDef {
207                    name: "deepseek-ai/deepseek-v4-flash".into(),
208                    label: "DeepSeek V4 Flash".into(),
209                    tags: vec!["code".into(), "reasoning".into(), "free".into(), "tool_support".into()],
210                    cost_input_per_mtok: 0.0,
211                    cost_output_per_mtok: 0.0,
212                    context_window: 131072,
213                    recommended: false,
214                },
215                ModelDef {
216                    name: "stepfun-ai/step-3.5-flash".into(),
217                    label: "Step 3.5 Flash".into(),
218                    tags: vec!["fast".into(), "free".into(), "tool_support".into()],
219                    cost_input_per_mtok: 0.0,
220                    cost_output_per_mtok: 0.0,
221                    context_window: 131072,
222                    recommended: true,
223                },
224                ModelDef {
225                    name: "nvidia/llama-3.1-nemotron-nano-8b-v1".into(),
226                    label: "Nemotron Nano 8B".into(),
227                    tags: vec!["fast".into(), "code".into(), "free".into(), "tool_support".into()],
228                    cost_input_per_mtok: 0.0,
229                    cost_output_per_mtok: 0.0,
230                    context_window: 131072,
231                    recommended: false,
232                },
233                ModelDef {
234                    name: "openai/gpt-oss-20b".into(),
235                    label: "GPT-OSS 20B".into(),
236                    tags: vec!["fast".into(), "free".into(), "tool_support".into()],
237                    cost_input_per_mtok: 0.0,
238                    cost_output_per_mtok: 0.0,
239                    context_window: 131072,
240                    recommended: false,
241                },
242                ModelDef {
243                    name: "nvidia/nemotron-3-super-120b-a12b".into(),
244                    label: "Nemotron Super 120B".into(),
245                    tags: vec![
246                        "strong".into(),
247                        "reasoning".into(),
248                        "code".into(),
249                        "free".into(),
250                    ],
251                    cost_input_per_mtok: 0.0,
252                    cost_output_per_mtok: 0.0,
253                    context_window: 131072,
254                    recommended: true,
255                },
256            ],
257            tags: vec!["free".into(), "fast".into(), "strong".into(), "code".into()],
258            notes: "NVIDIA API Catalog / NIM — free tier with API key; discovery expands this list from /v1/models.".into(),
259            auth_flow: AuthFlow::default(),
260        },
261        // ─── OpenRouter ────────────────────────────────────────────────
262        ProviderDef {
263            id: "openrouter".into(),
264            label: "OpenRouter".into(),
265            adapter: "openai-compatible".into(),
266            base_url: "https://openrouter.ai/api/v1".into(),
267            api_key_env: Some("OPENROUTER_API_KEY".into()),
268            models: vec![ModelDef {
269                name: "openrouter/auto".into(),
270                label: "Auto (best for task)".into(),
271                tags: vec![
272                    "strong".into(),
273                    "code".into(),
274                    "vision".into(),
275                    "tool_support".into(),
276                ],
277                cost_input_per_mtok: 0.0,
278                cost_output_per_mtok: 0.0,
279                context_window: 200000,
280                recommended: true,
281            }],
282            tags: vec!["strong".into(), "multi".into()],
283            notes: "200+ models via one API — auto-routes to best model.".into(),
284            auth_flow: AuthFlow::default(),
285        },
286        // ─── DeepSeek ──────────────────────────────────────────────────
287        ProviderDef {
288            id: "deepseek".into(),
289            label: "DeepSeek".into(),
290            adapter: "openai-compatible".into(),
291            base_url: "https://api.deepseek.com/v1".into(),
292            api_key_env: Some("DEEPSEEK_API_KEY".into()),
293            models: vec![
294                ModelDef {
295                    name: "deepseek-chat".into(),
296                    label: "DeepSeek V3".into(),
297                    tags: vec!["cheap".into(), "code".into(), "tool_support".into()],
298                    cost_input_per_mtok: 0.27,
299                    cost_output_per_mtok: 1.1,
300                    context_window: 65536,
301                    recommended: true,
302                },
303                ModelDef {
304                    name: "deepseek-reasoner".into(),
305                    label: "DeepSeek R1".into(),
306                    tags: vec!["reasoning".into(), "strong".into()],
307                    cost_input_per_mtok: 0.55,
308                    cost_output_per_mtok: 2.19,
309                    context_window: 65536,
310                    recommended: false,
311                },
312            ],
313            tags: vec!["cheap".into(), "code".into(), "reasoning".into()],
314            notes: "DeepSeek — very competitive pricing, strong coding.".into(),
315            auth_flow: AuthFlow::default(),
316        },
317        // ─── Gemini ────────────────────────────────────────────────────
318        ProviderDef {
319            id: "gemini".into(),
320            label: "Google Gemini".into(),
321            adapter: "openai-compatible".into(),
322            base_url: "https://generativelanguage.googleapis.com/v1beta/openai".into(),
323            api_key_env: Some("GEMINI_API_KEY".into()),
324            models: vec![
325                ModelDef {
326                    name: "gemini-2.5-pro".into(),
327                    label: "Gemini 2.5 Pro".into(),
328                    tags: vec![
329                        "strong".into(),
330                        "code".into(),
331                        "vision".into(),
332                        "tool_support".into(),
333                    ],
334                    cost_input_per_mtok: 0.0,
335                    cost_output_per_mtok: 0.0,
336                    context_window: 1048576,
337                    recommended: true,
338                },
339                ModelDef {
340                    name: "gemini-2.5-flash".into(),
341                    label: "Gemini 2.5 Flash".into(),
342                    tags: vec![
343                        "fast".into(),
344                        "cheap".into(),
345                        "vision".into(),
346                        "tool_support".into(),
347                    ],
348                    cost_input_per_mtok: 0.0,
349                    cost_output_per_mtok: 0.0,
350                    context_window: 1048576,
351                    recommended: false,
352                },
353            ],
354            tags: vec!["strong".into(), "vision".into(), "free".into()],
355            notes: "Google Gemini — 1M context window, free tier.".into(),
356            auth_flow: AuthFlow::default(),
357        },
358        // ─── xAI ───────────────────────────────────────────────────────
359        ProviderDef {
360            id: "xai".into(),
361            label: "xAI (Grok)".into(),
362            adapter: "openai-compatible".into(),
363            base_url: "https://api.x.ai/v1".into(),
364            api_key_env: Some("XAI_API_KEY".into()),
365            models: vec![ModelDef {
366                name: "grok-3".into(),
367                label: "Grok 3".into(),
368                tags: vec!["strong".into(), "code".into(), "vision".into()],
369                cost_input_per_mtok: 3.0,
370                cost_output_per_mtok: 15.0,
371                context_window: 131072,
372                recommended: true,
373            }],
374            tags: vec!["strong".into(), "code".into()],
375            notes: "xAI Grok — strong reasoning and coding.".into(),
376            auth_flow: AuthFlow::default(),
377        },
378        // ─── HuggingFace ───────────────────────────────────────────────
379        ProviderDef {
380            id: "huggingface".into(),
381            label: "Hugging Face".into(),
382            adapter: "openai-compatible".into(),
383            base_url: "https://api-inference.huggingface.co/v1".into(),
384            api_key_env: Some("HF_TOKEN".into()),
385            models: vec![ModelDef {
386                name: "Qwen/Qwen3-235B-A22B".into(),
387                label: "Qwen 3 235B".into(),
388                tags: vec!["strong".into(), "code".into(), "tool_support".into()],
389                cost_input_per_mtok: 0.0,
390                cost_output_per_mtok: 0.0,
391                context_window: 32768,
392                recommended: true,
393            }],
394            tags: vec!["free".into()],
395            notes: "Hugging Face Serverless Inference — free tier, many models.".into(),
396            auth_flow: AuthFlow::default(),
397        },
398        // ─── Nous Portal ───────────────────────────────────────────────
399        ProviderDef {
400            id: "nous".into(),
401            label: "Nous Portal".into(),
402            adapter: "openai-compatible".into(),
403            base_url: "https://portal.nousresearch.com/api/v1".into(),
404            api_key_env: Some("NOUS_API_KEY".into()),
405            models: vec![ModelDef {
406                name: "hermes-3-70b".into(),
407                label: "Hermes 3 70B".into(),
408                tags: vec!["strong".into(), "code".into(), "tool_support".into()],
409                cost_input_per_mtok: 0.0,
410                cost_output_per_mtok: 0.0,
411                context_window: 32768,
412                recommended: true,
413            }],
414            tags: vec!["strong".into(), "code".into()],
415            notes: "Nous Portal — one sub for models + web search + image gen + TTS + browser."
416                .into(),
417            auth_flow: AuthFlow::default(),
418        },
419        // ─── NovitaAI ──────────────────────────────────────────────────
420        ProviderDef {
421            id: "novita".into(),
422            label: "NovitaAI".into(),
423            adapter: "openai-compatible".into(),
424            base_url: "https://api.novita.ai/v3/openai".into(),
425            api_key_env: Some("NOVITA_API_KEY".into()),
426            models: vec![ModelDef {
427                name: "deepseek/deepseek-r1".into(),
428                label: "DeepSeek R1".into(),
429                tags: vec!["reasoning".into(), "strong".into()],
430                cost_input_per_mtok: 0.0,
431                cost_output_per_mtok: 0.0,
432                context_window: 65536,
433                recommended: true,
434            }],
435            tags: vec!["cheap".into(), "reasoning".into()],
436            notes: "NovitaAI — AI-native cloud for Model API, Agent Sandbox, and GPU Cloud.".into(),
437            auth_flow: AuthFlow::default(),
438        },
439        // ─── Alibaba ───────────────────────────────────────────────────
440        ProviderDef {
441            id: "alibaba".into(),
442            label: "Alibaba Cloud".into(),
443            adapter: "openai-compatible".into(),
444            base_url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".into(),
445            api_key_env: Some("DASHSCOPE_API_KEY".into()),
446            models: vec![ModelDef {
447                name: "qwen-plus".into(),
448                label: "Qwen Plus".into(),
449                tags: vec!["strong".into(), "code".into(), "tool_support".into()],
450                cost_input_per_mtok: 0.0,
451                cost_output_per_mtok: 0.0,
452                context_window: 131072,
453                recommended: true,
454            }],
455            tags: vec!["cheap".into(), "code".into()],
456            notes: "Alibaba Cloud DashScope — Qwen models via OpenAI-compatible API.".into(),
457            auth_flow: AuthFlow::default(),
458        },
459        // ─── Bedrock (AWS) ─────────────────────────────────────────────
460        ProviderDef {
461            id: "bedrock".into(),
462            label: "AWS Bedrock".into(),
463            adapter: "bedrock".into(),
464            base_url: "https://bedrock-runtime.us-east-1.amazonaws.com".into(),
465            api_key_env: Some("AWS_ACCESS_KEY_ID".into()),
466            models: vec![ModelDef {
467                name: "anthropic.claude-sonnet-4-6".into(),
468                label: "Claude Sonnet 4".into(),
469                tags: vec![
470                    "strong".into(),
471                    "code".into(),
472                    "vision".into(),
473                    "tool_support".into(),
474                ],
475                cost_input_per_mtok: 3.0,
476                cost_output_per_mtok: 15.0,
477                context_window: 200000,
478                recommended: true,
479            }],
480            tags: vec!["strong".into(), "code".into()],
481            notes: "AWS Bedrock — managed foundation models. Requires AWS credentials.".into(),
482            auth_flow: AuthFlow::default(),
483        },
484        // ─── Kimi/Moonshot ─────────────────────────────────────────────
485        ProviderDef {
486            id: "kimi-coding".into(),
487            label: "Kimi Coding".into(),
488            adapter: "openai-compatible".into(),
489            base_url: "https://api.moonshot.cn/v1".into(),
490            api_key_env: Some("MOONSHOT_API_KEY".into()),
491            models: vec![ModelDef {
492                name: "moonshot-v1-auto".into(),
493                label: "Moonshot Auto".into(),
494                tags: vec!["strong".into(), "code".into(), "tool_support".into()],
495                cost_input_per_mtok: 0.0,
496                cost_output_per_mtok: 0.0,
497                context_window: 131072,
498                recommended: true,
499            }],
500            tags: vec!["code".into()],
501            notes: "Kimi/Moonshot — coding-focused models.".into(),
502            auth_flow: AuthFlow::default(),
503        },
504        // ─── MiniMax ───────────────────────────────────────────────────
505        ProviderDef {
506            id: "minimax".into(),
507            label: "MiniMax".into(),
508            adapter: "openai-compatible".into(),
509            base_url: "https://api.minimax.chat/v1".into(),
510            api_key_env: Some("MINIMAX_API_KEY".into()),
511            models: vec![ModelDef {
512                name: "abab7-chat".into(),
513                label: "ABAB 7".into(),
514                tags: vec!["strong".into(), "tool_support".into()],
515                cost_input_per_mtok: 0.0,
516                cost_output_per_mtok: 0.0,
517                context_window: 131072,
518                recommended: true,
519            }],
520            tags: vec!["strong".into()],
521            notes: "MiniMax — ABAB series models via OpenAI-compatible API.".into(),
522            auth_flow: AuthFlow::default(),
523        },
524        // ─── Xiaomi MiMo ───────────────────────────────────────────────
525        ProviderDef {
526            id: "xiaomi".into(),
527            label: "Xiaomi MiMo".into(),
528            adapter: "openai-compatible".into(),
529            base_url: "https://platform.xiaomimimo.com/v1".into(),
530            api_key_env: Some("XIAOMI_API_KEY".into()),
531            models: vec![ModelDef {
532                name: "mimo-v2".into(),
533                label: "MiMo V2".into(),
534                tags: vec!["strong".into(), "code".into()],
535                cost_input_per_mtok: 0.0,
536                cost_output_per_mtok: 0.0,
537                context_window: 32768,
538                recommended: true,
539            }],
540            tags: vec!["code".into()],
541            notes: "Xiaomi MiMo — coding models via Xiaomi platform.".into(),
542            auth_flow: AuthFlow::default(),
543        },
544        // ─── z.ai/GLM ──────────────────────────────────────────────────
545        ProviderDef {
546            id: "zai".into(),
547            label: "z.ai / GLM".into(),
548            adapter: "openai-compatible".into(),
549            base_url: "https://api.z.ai/v1".into(),
550            api_key_env: Some("ZAI_API_KEY".into()),
551            models: vec![ModelDef {
552                name: "glm-4-plus".into(),
553                label: "GLM-4 Plus".into(),
554                tags: vec!["strong".into(), "code".into(), "tool_support".into()],
555                cost_input_per_mtok: 0.0,
556                cost_output_per_mtok: 0.0,
557                context_window: 131072,
558                recommended: true,
559            }],
560            tags: vec!["code".into()],
561            notes: "z.ai / GLM — ChatGLM models via OpenAI-compatible API.".into(),
562            auth_flow: AuthFlow::default(),
563        },
564        // ─── GMI Cloud ─────────────────────────────────────────────────
565        ProviderDef {
566            id: "gmi".into(),
567            label: "GMI Cloud".into(),
568            adapter: "openai-compatible".into(),
569            base_url: "https://api.gmi.cloud/v1".into(),
570            api_key_env: Some("GMI_API_KEY".into()),
571            models: vec![ModelDef {
572                name: "llama-4-maverick".into(),
573                label: "Llama 4 Maverick".into(),
574                tags: vec!["strong".into(), "code".into()],
575                cost_input_per_mtok: 0.0,
576                cost_output_per_mtok: 0.0,
577                context_window: 131072,
578                recommended: true,
579            }],
580            tags: vec!["cheap".into()],
581            notes: "GMI Cloud — GPU cloud for open-source model inference.".into(),
582            auth_flow: AuthFlow::default(),
583        },
584        // ─── Arcee ─────────────────────────────────────────────────────
585        ProviderDef {
586            id: "arcee".into(),
587            label: "Arcee".into(),
588            adapter: "openai-compatible".into(),
589            base_url: "https://api.arcee.ai/v1".into(),
590            api_key_env: Some("ARCEE_API_KEY".into()),
591            models: vec![ModelDef {
592                name: "arcee-virtuoso-small".into(),
593                label: "Virtuoso Small".into(),
594                tags: vec!["code".into(), "tool_support".into()],
595                cost_input_per_mtok: 0.0,
596                cost_output_per_mtok: 0.0,
597                context_window: 32768,
598                recommended: true,
599            }],
600            tags: vec!["code".into()],
601            notes: "Arcee — specialized coding models.".into(),
602            auth_flow: AuthFlow::default(),
603        },
604        // ─── StepFun ───────────────────────────────────────────────────
605        ProviderDef {
606            id: "stepfun".into(),
607            label: "StepFun".into(),
608            adapter: "openai-compatible".into(),
609            base_url: "https://api.stepfun.com/v1".into(),
610            api_key_env: Some("STEPFUN_API_KEY".into()),
611            models: vec![ModelDef {
612                name: "step-2-16k".into(),
613                label: "Step-2 16K".into(),
614                tags: vec!["strong".into(), "tool_support".into()],
615                cost_input_per_mtok: 0.0,
616                cost_output_per_mtok: 0.0,
617                context_window: 16384,
618                recommended: true,
619            }],
620            tags: vec!["cheap".into()],
621            notes: "StepFun — Step series models via OpenAI-compatible API.".into(),
622            auth_flow: AuthFlow::default(),
623        },
624        // ─── Custom ────────────────────────────────────────────────────
625        ProviderDef {
626            id: "custom".into(),
627            label: "Custom Endpoint".into(),
628            adapter: "openai-compatible".into(),
629            base_url: "https://your-endpoint/v1".into(),
630            api_key_env: Some("CUSTOM_API_KEY".into()),
631            models: vec![ModelDef {
632                name: "custom-model".into(),
633                label: "Custom Model".into(),
634                tags: vec!["custom".into()],
635                cost_input_per_mtok: 0.0,
636                cost_output_per_mtok: 0.0,
637                context_window: 128000,
638                recommended: true,
639            }],
640            tags: vec!["custom".into()],
641            notes: "Bring your own OpenAI-compatible endpoint. Set base_url to your server.".into(),
642            auth_flow: AuthFlow::default(),
643        },
644        // ─── Azure Foundry ─────────────────────────────────────────────
645        ProviderDef {
646            id: "azure-foundry".into(),
647            label: "Azure AI Foundry".into(),
648            adapter: "openai-compatible".into(),
649            base_url: "https://<your-resource>.openai.azure.com/openai/v1".into(),
650            api_key_env: Some("AZURE_OPENAI_API_KEY".into()),
651            models: vec![ModelDef {
652                name: "gpt-5".into(),
653                label: "GPT-5 (Azure)".into(),
654                tags: vec!["strong".into(), "code".into(), "tool_support".into()],
655                cost_input_per_mtok: 0.0,
656                cost_output_per_mtok: 0.0,
657                context_window: 128000,
658                recommended: true,
659            }],
660            tags: vec!["strong".into(), "code".into()],
661            notes:
662                "Azure AI Foundry — OpenAI models on Azure. Set base_url with your resource name."
663                    .into(),
664            auth_flow: AuthFlow::default(),
665        },
666        // ─── Qwen OAuth ────────────────────────────────────────────────
667        ProviderDef {
668            id: "qwen-oauth".into(),
669            label: "Qwen (OAuth)".into(),
670            adapter: "openai-compatible".into(),
671            base_url: "https://dashscope.aliyuncs.com/compatible-mode/v1".into(),
672            api_key_env: None,
673            models: vec![ModelDef {
674                name: "qwen-plus".into(),
675                label: "Qwen Plus".into(),
676                tags: vec!["strong".into(), "code".into(), "tool_support".into()],
677                cost_input_per_mtok: 0.0,
678                cost_output_per_mtok: 0.0,
679                context_window: 131072,
680                recommended: true,
681            }],
682            tags: vec!["code".into()],
683            notes: "Qwen via OAuth login — no API key needed, login via browser.".into(),
684            auth_flow: AuthFlow::DeviceOAuth {
685                device_endpoint: "https://oauth.aliyun.com/device/code".into(),
686                token_endpoint:  "https://oauth.aliyun.com/device/token".into(),
687                scope: "openid profile".into(),
688                client_id_env: "QWEN_OAUTH_CLIENT_ID".into(),
689            },
690        },
691        // ─── OpenCode Go (paid subscription tier) ─────────────────────
692        ProviderDef {
693            id: "opencode-go".into(),
694            label: "OpenCode Go".into(),
695            adapter: "openai-compatible".into(),
696            base_url: "https://opencode.ai/zen/go/v1".into(),
697            api_key_env: Some("OPENCODE_GO_API_KEY".into()),
698            models: vec![
699                ModelDef {
700                    name: "claude-sonnet-4-6".into(),
701                    label: "Claude Sonnet 4.6 (Go)".into(),
702                    tags: vec![
703                        "strong".into(),
704                        "code".into(),
705                        "vision".into(),
706                        "tool_support".into(),
707                    ],
708                    cost_input_per_mtok: 3.0,
709                    cost_output_per_mtok: 15.0,
710                    context_window: 200000,
711                    recommended: true,
712                },
713                ModelDef {
714                    name: "claude-opus-4-8".into(),
715                    label: "Claude Opus 4 (Go)".into(),
716                    tags: vec![
717                        "strong".into(),
718                        "code".into(),
719                        "vision".into(),
720                        "tool_support".into(),
721                    ],
722                    cost_input_per_mtok: 15.0,
723                    cost_output_per_mtok: 75.0,
724                    context_window: 200000,
725                    recommended: false,
726                },
727                ModelDef {
728                    name: "gpt-5".into(),
729                    label: "GPT-5 (Go)".into(),
730                    tags: vec!["strong".into(), "code".into(), "tool_support".into()],
731                    cost_input_per_mtok: 5.0,
732                    cost_output_per_mtok: 20.0,
733                    context_window: 200000,
734                    recommended: false,
735                },
736                ModelDef {
737                    name: "gemini-2-5-pro".into(),
738                    label: "Gemini 2.5 Pro (Go)".into(),
739                    tags: vec![
740                        "strong".into(),
741                        "code".into(),
742                        "vision".into(),
743                        "tool_support".into(),
744                    ],
745                    cost_input_per_mtok: 2.5,
746                    cost_output_per_mtok: 15.0,
747                    context_window: 1000000,
748                    recommended: false,
749                },
750            ],
751            tags: vec!["code".into(), "strong".into(), "multi".into(), "paid".into()],
752            notes: "OpenCode Go subscription tier — full model library via go/v1 endpoint. Discovery expands the list from /v1/models.".into(),
753            auth_flow: AuthFlow::default(),
754        },
755        // ─── OpenCode Zen ──────────────────────────────────────────────
756        ProviderDef {
757            id: "opencode-zen".into(),
758            label: "OpenCode Zen".into(),
759            adapter: "openai-compatible".into(),
760            base_url: "https://opencode.ai/zen/v1".into(),
761            api_key_env: Some("OPENCODE_API_KEY".into()),
762            models: vec![
763                ModelDef {
764                    name: "claude-sonnet-4-6".into(),
765                    label: "Claude Sonnet 4.6 (via Zen)".into(),
766                    tags: vec![
767                        "strong".into(),
768                        "code".into(),
769                        "vision".into(),
770                        "tool_support".into(),
771                    ],
772                    cost_input_per_mtok: 3.0,
773                    cost_output_per_mtok: 15.0,
774                    context_window: 200000,
775                    recommended: true,
776                },
777                ModelDef {
778                    name: "qwen3.6-plus".into(),
779                    label: "Qwen 3.6 Plus (via Zen)".into(),
780                    tags: vec![
781                        "strong".into(),
782                        "code".into(),
783                        "tool_support".into(),
784                    ],
785                    cost_input_per_mtok: 1.0,
786                    cost_output_per_mtok: 3.0,
787                    context_window: 262144,
788                    recommended: false,
789                },
790                ModelDef {
791                    name: "gpt-5-codex".into(),
792                    label: "GPT-5 Codex (via Zen)".into(),
793                    tags: vec![
794                        "strong".into(),
795                        "code".into(),
796                        "tool_support".into(),
797                    ],
798                    cost_input_per_mtok: 2.5,
799                    cost_output_per_mtok: 10.0,
800                    context_window: 200000,
801                    recommended: false,
802                },
803            ],
804            tags: vec!["code".into(), "strong".into(), "multi".into()],
805            notes: "OpenCode Zen — curated gateway (Claude, GPT, Qwen, DeepSeek). Most models need credits or an OpenCode Go subscription. Discovery expands the list from /v1/models.".into(),
806            auth_flow: AuthFlow::default(),
807        },
808        // ─── KiloCode ──────────────────────────────────────────────────
809        ProviderDef {
810            id: "kilocode".into(),
811            label: "KiloCode".into(),
812            adapter: "openai-compatible".into(),
813            base_url: "https://api.kilocode.ai/v1".into(),
814            api_key_env: Some("KILOCODE_API_KEY".into()),
815            models: vec![ModelDef {
816                name: "kilocode".into(),
817                label: "KiloCode".into(),
818                tags: vec!["code".into(), "tool_support".into()],
819                cost_input_per_mtok: 0.0,
820                cost_output_per_mtok: 0.0,
821                context_window: 32768,
822                recommended: true,
823            }],
824            tags: vec!["code".into()],
825            notes: "KiloCode — coding-specialized models.".into(),
826            auth_flow: AuthFlow::default(),
827        },
828        // ─── Copilot (GitHub) ──────────────────────────────────────────
829        ProviderDef {
830            id: "copilot".into(),
831            label: "GitHub Copilot".into(),
832            adapter: "openai-compatible".into(),
833            base_url: "https://api.githubcopilot.com".into(),
834            api_key_env: Some("GITHUB_TOKEN".into()),
835            models: vec![ModelDef {
836                name: "copilot".into(),
837                label: "Copilot".into(),
838                tags: vec!["code".into(), "tool_support".into()],
839                cost_input_per_mtok: 0.0,
840                cost_output_per_mtok: 0.0,
841                context_window: 32768,
842                recommended: true,
843            }],
844            tags: vec!["code".into()],
845            notes: "GitHub Copilot — available with Copilot subscription.".into(),
846            auth_flow: AuthFlow::DeviceOAuth {
847                device_endpoint: "https://github.com/login/device/code".into(),
848                token_endpoint:  "https://github.com/login/oauth/access_token".into(),
849                scope: "read:user".into(),
850                client_id_env: "GITHUB_OAUTH_CLIENT_ID".into(),
851            },
852        },
853        // ─── Alibaba Coding Plan ───────────────────────────────────────
854        ProviderDef {
855            id: "alibaba-coding-plan".into(),
856            label: "Alibaba Coding Plan".into(),
857            adapter: "openai-compatible".into(),
858            base_url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".into(),
859            api_key_env: Some("DASHSCOPE_API_KEY".into()),
860            models: vec![ModelDef {
861                name: "qwen-coder-plus".into(),
862                label: "Qwen Coder Plus".into(),
863                tags: vec!["code".into(), "tool_support".into()],
864                cost_input_per_mtok: 0.0,
865                cost_output_per_mtok: 0.0,
866                context_window: 131072,
867                recommended: true,
868            }],
869            tags: vec!["code".into()],
870            notes: "Alibaba Coding Plan — Qwen Coder models for software development.".into(),
871            auth_flow: AuthFlow::default(),
872        },
873        // ═══ MERGED: additional providers not in Hermes but valid ═════
874
875        // ─── Ollama (local, self-hosted) ───────────────────────────────
876        ProviderDef {
877            id: "ollama".into(),
878            label: "Ollama (local)".into(),
879            adapter: "ollama".into(),
880            base_url: "http://localhost:11434/v1".into(),
881            api_key_env: Some("OLLAMA_HOST".into()),
882            models: vec![
883                ModelDef {
884                    name: "qwen3.5:32b".into(),
885                    label: "Qwen 3.5 32B".into(),
886                    tags: vec![
887                        "local".into(),
888                        "free".into(),
889                        "code".into(),
890                        "tool_support".into(),
891                    ],
892                    cost_input_per_mtok: 0.0,
893                    cost_output_per_mtok: 0.0,
894                    context_window: 32768,
895                    recommended: true,
896                },
897                ModelDef {
898                    name: "codellama:latest".into(),
899                    label: "CodeLlama".into(),
900                    tags: vec!["local".into(), "free".into(), "code".into()],
901                    cost_input_per_mtok: 0.0,
902                    cost_output_per_mtok: 0.0,
903                    context_window: 16384,
904                    recommended: false,
905                },
906                ModelDef {
907                    name: "mistral:latest".into(),
908                    label: "Mistral (local)".into(),
909                    tags: vec!["local".into(), "free".into(), "fast".into()],
910                    cost_input_per_mtok: 0.0,
911                    cost_output_per_mtok: 0.0,
912                    context_window: 32768,
913                    recommended: false,
914                },
915            ],
916            tags: vec!["local".into(), "free".into(), "offline".into()],
917            notes: "Self-hosted models via Ollama. Install: https://ollama.com".into(),
918            auth_flow: AuthFlow::default(),
919        },
920        // ─── Groq ──────────────────────────────────────────────────────
921        ProviderDef {
922            id: "groq".into(),
923            label: "Groq".into(),
924            adapter: "openai-compatible".into(),
925            base_url: "https://api.groq.com/openai/v1".into(),
926            api_key_env: Some("GROQ_API_KEY".into()),
927            models: vec![
928                ModelDef {
929                    name: "llama-4-scout-17b-16e".into(),
930                    label: "Llama 4 Scout".into(),
931                    tags: vec!["fast".into(), "cheap".into(), "tool_support".into()],
932                    cost_input_per_mtok: 0.0,
933                    cost_output_per_mtok: 0.0,
934                    context_window: 131072,
935                    recommended: true,
936                },
937                ModelDef {
938                    name: "deepseek-r1-distill-llama-70b".into(),
939                    label: "DeepSeek R1 70B".into(),
940                    tags: vec!["reasoning".into(), "strong".into()],
941                    cost_input_per_mtok: 0.0,
942                    cost_output_per_mtok: 0.0,
943                    context_window: 131072,
944                    recommended: false,
945                },
946            ],
947            tags: vec!["fast".into(), "free".into()],
948            notes: "Groq LPU — ultra-fast inference, free tier available.".into(),
949            auth_flow: AuthFlow::default(),
950        },
951        // ─── Together AI ───────────────────────────────────────────────
952        ProviderDef {
953            id: "together".into(),
954            label: "Together AI".into(),
955            adapter: "openai-compatible".into(),
956            base_url: "https://api.together.xyz/v1".into(),
957            api_key_env: Some("TOGETHER_API_KEY".into()),
958            models: vec![ModelDef {
959                name: "meta-llama/Llama-4-Maverick-17B-128E".into(),
960                label: "Llama 4 Maverick".into(),
961                tags: vec!["strong".into(), "code".into(), "tool_support".into()],
962                cost_input_per_mtok: 0.2,
963                cost_output_per_mtok: 0.2,
964                context_window: 131072,
965                recommended: true,
966            }],
967            tags: vec!["cheap".into(), "code".into()],
968            notes: "Together AI — open-source models at low cost.".into(),
969            auth_flow: AuthFlow::default(),
970        },
971        // ─── Cerebras ──────────────────────────────────────────────────
972        ProviderDef {
973            id: "cerebras".into(),
974            label: "Cerebras".into(),
975            adapter: "openai-compatible".into(),
976            base_url: "https://api.cerebras.ai/v1".into(),
977            api_key_env: Some("CEREBRAS_API_KEY".into()),
978            models: vec![ModelDef {
979                name: "llama-4-scout-17b-16e".into(),
980                label: "Llama 4 Scout".into(),
981                tags: vec!["fast".into(), "cheap".into(), "tool_support".into()],
982                cost_input_per_mtok: 0.0,
983                cost_output_per_mtok: 0.0,
984                context_window: 131072,
985                recommended: true,
986            }],
987            tags: vec!["fast".into(), "free".into()],
988            notes: "Cerebras Wafer-Scale — fastest inference available.".into(),
989            auth_flow: AuthFlow::default(),
990        },
991        // ─── Mistral ───────────────────────────────────────────────────
992        ProviderDef {
993            id: "mistral".into(),
994            label: "Mistral AI".into(),
995            adapter: "openai-compatible".into(),
996            base_url: "https://api.mistral.ai/v1".into(),
997            api_key_env: Some("MISTRAL_API_KEY".into()),
998            models: vec![
999                ModelDef {
1000                    name: "mistral-large-latest".into(),
1001                    label: "Mistral Large".into(),
1002                    tags: vec!["strong".into(), "code".into(), "tool_support".into()],
1003                    cost_input_per_mtok: 2.0,
1004                    cost_output_per_mtok: 6.0,
1005                    context_window: 131072,
1006                    recommended: true,
1007                },
1008                ModelDef {
1009                    name: "mistral-small-latest".into(),
1010                    label: "Mistral Small".into(),
1011                    tags: vec!["cheap".into(), "fast".into()],
1012                    cost_input_per_mtok: 0.2,
1013                    cost_output_per_mtok: 0.6,
1014                    context_window: 32768,
1015                    recommended: false,
1016                },
1017            ],
1018            tags: vec!["strong".into(), "code".into()],
1019            notes: "Mistral AI — strong European models.".into(),
1020            auth_flow: AuthFlow::default(),
1021        },
1022        // ─── Fireworks AI ──────────────────────────────────────────────
1023        ProviderDef {
1024            id: "fireworks".into(),
1025            label: "Fireworks AI".into(),
1026            adapter: "openai-compatible".into(),
1027            base_url: "https://api.fireworks.ai/inference/v1".into(),
1028            api_key_env: Some("FIREWORKS_API_KEY".into()),
1029            models: vec![ModelDef {
1030                name: "accounts/fireworks/models/llama-v4-maverick".into(),
1031                label: "Llama 4 Maverick".into(),
1032                tags: vec!["fast".into(), "code".into(), "tool_support".into()],
1033                cost_input_per_mtok: 0.2,
1034                cost_output_per_mtok: 0.2,
1035                context_window: 131072,
1036                recommended: true,
1037            }],
1038            tags: vec!["fast".into(), "code".into()],
1039            notes: "Fireworks AI — fast open-source model inference.".into(),
1040            auth_flow: AuthFlow::default(),
1041        },
1042        // ─── Perplexity ────────────────────────────────────────────────
1043        ProviderDef {
1044            id: "perplexity".into(),
1045            label: "Perplexity".into(),
1046            adapter: "openai-compatible".into(),
1047            base_url: "https://api.perplexity.ai".into(),
1048            api_key_env: Some("PERPLEXITY_API_KEY".into()),
1049            models: vec![
1050                ModelDef {
1051                    name: "sonar-pro".into(),
1052                    label: "Sonar Pro".into(),
1053                    tags: vec!["search".into(), "web".into(), "tool_support".into()],
1054                    cost_input_per_mtok: 3.0,
1055                    cost_output_per_mtok: 15.0,
1056                    context_window: 200000,
1057                    recommended: true,
1058                },
1059                ModelDef {
1060                    name: "sonar".into(),
1061                    label: "Sonar".into(),
1062                    tags: vec!["search".into(), "fast".into(), "web".into()],
1063                    cost_input_per_mtok: 1.0,
1064                    cost_output_per_mtok: 1.0,
1065                    context_window: 127000,
1066                    recommended: false,
1067                },
1068            ],
1069            tags: vec!["search".into(), "web".into()],
1070            notes: "Perplexity Sonar — live web/search-focused model routing.".into(),
1071            auth_flow: AuthFlow::default(),
1072        },
1073        // ─── Cohere ────────────────────────────────────────────────────
1074        ProviderDef {
1075            id: "cohere".into(),
1076            label: "Cohere".into(),
1077            adapter: "openai-compatible".into(),
1078            base_url: "https://api.cohere.com/compatibility/v1".into(),
1079            api_key_env: Some("COHERE_API_KEY".into()),
1080            models: vec![
1081                ModelDef {
1082                    name: "command-a-03-2025".into(),
1083                    label: "Command A".into(),
1084                    tags: vec!["strong".into(), "tool_support".into(), "enterprise".into()],
1085                    cost_input_per_mtok: 2.5,
1086                    cost_output_per_mtok: 10.0,
1087                    context_window: 256000,
1088                    recommended: true,
1089                },
1090                ModelDef {
1091                    name: "command-r7b-12-2024".into(),
1092                    label: "Command R7B".into(),
1093                    tags: vec!["fast".into(), "cheap".into(), "tool_support".into()],
1094                    cost_input_per_mtok: 0.15,
1095                    cost_output_per_mtok: 0.6,
1096                    context_window: 128000,
1097                    recommended: false,
1098                },
1099            ],
1100            tags: vec!["enterprise".into(), "tool_support".into()],
1101            notes: "Cohere Command models through the OpenAI-compatible endpoint.".into(),
1102            auth_flow: AuthFlow::default(),
1103        },
1104        // ─── Google (OAuth device flow) ────────────────────────────────
1105        ProviderDef {
1106            id: "google-oauth".into(),
1107            label: "Google (OAuth)".into(),
1108            adapter: "openai-compatible".into(),
1109            base_url: "https://generativelanguage.googleapis.com/v1beta/openai".into(),
1110            api_key_env: None,
1111            models: vec![ModelDef {
1112                name: "gemini-2.5-pro".into(),
1113                label: "Gemini 2.5 Pro".into(),
1114                tags: vec!["strong".into(), "vision".into(), "tool_support".into()],
1115                cost_input_per_mtok: 0.0,
1116                cost_output_per_mtok: 0.0,
1117                context_window: 1_000_000,
1118                recommended: true,
1119            }],
1120            tags: vec!["strong".into()],
1121            notes: "Google Gemini via OAuth device flow — no API key required.".into(),
1122            auth_flow: AuthFlow::DeviceOAuth {
1123                device_endpoint: "https://oauth2.googleapis.com/device/code".into(),
1124                token_endpoint:  "https://oauth2.googleapis.com/token".into(),
1125                scope: "openid https://www.googleapis.com/auth/generative-language".into(),
1126                client_id_env: "GOOGLE_OAUTH_CLIENT_ID".into(),
1127            },
1128        },
1129        // ─── Microsoft (OAuth device flow) ────────────────────────────
1130        ProviderDef {
1131            id: "microsoft-oauth".into(),
1132            label: "Microsoft (OAuth)".into(),
1133            adapter: "openai-compatible".into(),
1134            base_url: "https://api.cognitive.microsoft.com/openai/v1".into(),
1135            api_key_env: None,
1136            models: vec![ModelDef {
1137                name: "gpt-4o".into(),
1138                label: "GPT-4o (Azure)".into(),
1139                tags: vec!["strong".into(), "vision".into(), "tool_support".into()],
1140                cost_input_per_mtok: 0.0,
1141                cost_output_per_mtok: 0.0,
1142                context_window: 128000,
1143                recommended: true,
1144            }],
1145            tags: vec!["strong".into()],
1146            notes: "Microsoft Azure OpenAI via OAuth device flow.".into(),
1147            auth_flow: AuthFlow::DeviceOAuth {
1148                device_endpoint: "https://login.microsoftonline.com/common/oauth2/v2.0/devicecode".into(),
1149                token_endpoint:  "https://login.microsoftonline.com/common/oauth2/v2.0/token".into(),
1150                scope: "openid profile".into(),
1151                client_id_env: "MICROSOFT_OAUTH_CLIENT_ID".into(),
1152            },
1153        },
1154    ]
1155}
1156
1157pub fn find_provider(id: &str) -> Option<ProviderDef> {
1158    provider_registry().into_iter().find(|p| p.id == id)
1159}
1160
1161/// Return all providers that use an OAuth device flow (not a plain API key).
1162pub fn list_oauth_providers() -> Vec<ProviderDef> {
1163    provider_registry()
1164        .into_iter()
1165        .filter(|p| p.auth_flow != AuthFlow::ApiKey)
1166        .collect()
1167}
1168
1169pub fn find_model(provider_id: &str, model_name: &str) -> Option<ModelDef> {
1170    find_provider(provider_id).and_then(|p| p.models.into_iter().find(|m| m.name == model_name))
1171}
1172
1173pub fn default_models(provider_id: &str) -> Vec<String> {
1174    find_provider(provider_id)
1175        .map(|p| {
1176            let recommended: Vec<String> = p
1177                .models
1178                .iter()
1179                .filter(|m| m.recommended)
1180                .map(|m| m.name.clone())
1181                .collect();
1182            if recommended.is_empty() {
1183                p.models.into_iter().map(|m| m.name).collect()
1184            } else {
1185                recommended
1186            }
1187        })
1188        .unwrap_or_default()
1189}
1190
1191/// Infer capabilities for a model that is NOT in the static registry (e.g. a
1192/// model returned by live discovery). Heuristics on the model name let the
1193/// router distinguish a strong 120B coder from an 8B flash model instead of
1194/// treating every discovered model as identical default caps.
1195pub fn infer_caps_from_name(model_name: &str) -> ModelCaps {
1196    let n = model_name.to_ascii_lowercase();
1197
1198    let vision = n.contains("vision")
1199        || n.contains("-vl")
1200        || n.contains("vl-")
1201        || n.contains("multimodal")
1202        || n.contains("omni");
1203
1204    // Coding / tool-use signal
1205    let tools = n.contains("coder")
1206        || n.contains("code")
1207        || n.contains("instruct")
1208        || n.contains("chat")
1209        || n.contains("nemotron")
1210        || n.contains("qwen")
1211        || n.contains("llama")
1212        || n.contains("mistral")
1213        || n.contains("deepseek")
1214        || n.contains("gpt")
1215        || n.contains("glm");
1216
1217    // Size / strength signal → latency + context window
1218    let is_large = [
1219        "70b", "72b", "120b", "122b", "175b", "180b", "235b", "253b", "340b", "397b", "405b",
1220        "480b", "675b", "ultra", "-large", "super",
1221    ]
1222    .iter()
1223    .any(|t| n.contains(t));
1224    let is_small = n.contains("flash")
1225        || n.contains("nano")
1226        || n.contains("mini")
1227        || n.contains("lite")
1228        || n.contains("-small")
1229        || n.contains("1b")
1230        || n.contains("2b")
1231        || n.contains("3b")
1232        || n.contains("7b")
1233        || n.contains("8b")
1234        || n.contains("9b");
1235
1236    // Vendor-specific long-context families. These take precedence over the
1237    // generic size heuristic so a discovered "deepseek-v4-pro" (which the
1238    // vendor ships with 1M context) doesn't get clamped to the small default.
1239    // Order matters: most specific match first.
1240    let vendor_ctx: Option<u64> = if n.contains("gemini")
1241        && (n.contains("1.5") || n.contains("2.0") || n.contains("2.5") || n.contains("3."))
1242    {
1243        Some(2_000_000) // Gemini 1.5+ Pro family ships 1M–2M context
1244    } else if n.contains("deepseek-v4-pro")
1245        || n.contains("deepseek-v4.5")
1246        || n.contains("deepseek-v5")
1247        || (n.contains("deepseek") && n.contains("pro"))
1248    {
1249        Some(1_000_000) // DeepSeek V4 Pro and successors
1250    } else if n.contains("deepseek-v4")
1251        || n.contains("deepseek-r1")
1252        || n.contains("deepseek-reasoner")
1253    {
1254        Some(131_072) // DeepSeek V4 flash / R1: 128k
1255    } else if n.contains("deepseek-chat")
1256        || n.contains("deepseek-coder")
1257        || n.contains("deepseek-v3")
1258    {
1259        Some(65_536) // Older DeepSeek V3 chat/coder: 64k
1260    } else if n.contains("qwen3") && (n.contains("235b") || n.contains("max") || n.contains("plus"))
1261    {
1262        Some(1_000_000) // Qwen3 Max / Plus: 1M
1263    } else if n.contains("qwen") && (n.contains("turbo") || n.contains("plus") || n.contains("max"))
1264    {
1265        Some(1_000_000) // Qwen Turbo / Plus / Max
1266    } else if n.contains("qwen3") || n.contains("qwen2.5") {
1267        Some(131_072)
1268    } else if n.contains("gpt-4.1")
1269        || n.contains("gpt-5")
1270        || n.contains("o1")
1271        || n.contains("o3")
1272        || n.contains("o4")
1273    {
1274        Some(1_000_000) // GPT-4.1, GPT-5, o-series
1275    } else if n.contains("gpt-4o") || n.contains("gpt-4-turbo") {
1276        Some(128_000)
1277    } else if n.contains("claude-sonnet-4")
1278        || n.contains("claude-opus-4")
1279        || n.contains("claude-haiku-4")
1280    {
1281        Some(200_000)
1282    } else if n.contains("claude-3.5") || n.contains("claude-3-5") {
1283        Some(200_000)
1284    } else if n.contains("claude") {
1285        Some(200_000)
1286    } else if n.contains("llama-3.3") || n.contains("llama3.3") || n.contains("llama-4") {
1287        Some(128_000)
1288    } else if n.contains("mistral-large") || n.contains("mixtral") || n.contains("codestral") {
1289        Some(128_000)
1290    } else if n.contains("grok-3") || n.contains("grok-4") {
1291        Some(131_072)
1292    } else if n.contains("kimi-k2") || n.contains("moonshot-v1-128k") {
1293        Some(200_000)
1294    } else if n.contains("kimi") || n.contains("moonshot") {
1295        Some(128_000)
1296    } else if n.contains("minimax-m") || n.contains("minimax-text-01") {
1297        Some(1_000_000)
1298    } else if n.contains("step-3") || n.contains("stepfun") {
1299        Some(131_072)
1300    } else if n.contains("glm-4.6") || n.contains("glm-5") {
1301        Some(200_000)
1302    } else if n.contains("glm-4") || n.contains("glm-z1") {
1303        Some(131_072)
1304    } else if n.contains("longctx") || n.contains("long-ctx") || n.contains("1m") {
1305        Some(1_000_000)
1306    } else if n.contains("128k") {
1307        Some(128_000)
1308    } else if n.contains("200k") {
1309        Some(200_000)
1310    } else if n.contains("256k") {
1311        Some(262_144)
1312    } else if n.contains("512k") {
1313        Some(524_288)
1314    } else {
1315        None
1316    };
1317
1318    let (latency, context_window) = if let Some(ctx) = vendor_ctx {
1319        let lat = if is_small {
1320            LatencyClass::Fast
1321        } else if is_large || ctx >= 200_000 {
1322            LatencyClass::Slow
1323        } else {
1324            LatencyClass::Medium
1325        };
1326        (lat, ctx)
1327    } else if is_large {
1328        (LatencyClass::Slow, 131_072)
1329    } else if is_small {
1330        (LatencyClass::Fast, 32_768)
1331    } else {
1332        (LatencyClass::Medium, 65_536)
1333    };
1334
1335    // max_output scales with context but caps at 32k tokens to avoid
1336    // accidentally requesting more output than any current provider supports.
1337    let max_output = (context_window / 8).clamp(4_096, 32_000);
1338
1339    ModelCaps {
1340        context_window,
1341        max_output,
1342        tools,
1343        vision,
1344        cost_input_per_mtok: 0.0,
1345        cost_output_per_mtok: 0.0,
1346        latency,
1347    }
1348}
1349
1350pub fn model_caps(provider_id: &str, model_name: &str) -> ModelCaps {
1351    let Some(model) = find_model(provider_id, model_name) else {
1352        // Not in the static registry → infer from the model name (discovery path).
1353        return infer_caps_from_name(model_name);
1354    };
1355
1356    let latency = if model.tags.iter().any(|t| t == "fast") {
1357        LatencyClass::Fast
1358    } else if model.tags.iter().any(|t| t == "strong" || t == "reasoning") {
1359        LatencyClass::Slow
1360    } else {
1361        LatencyClass::Medium
1362    };
1363
1364    ModelCaps {
1365        context_window: model.context_window,
1366        max_output: model.context_window.min(32_000).max(4_096),
1367        tools: model
1368            .tags
1369            .iter()
1370            .any(|t| t == "tool_support" || t == "code"),
1371        vision: model.tags.iter().any(|t| t == "vision"),
1372        cost_input_per_mtok: model.cost_input_per_mtok,
1373        cost_output_per_mtok: model.cost_output_per_mtok,
1374        latency,
1375    }
1376}
1377
1378pub fn onboarding_providers() -> Vec<ProviderDef> {
1379    provider_registry()
1380}
sparrow/config/providers.rs

sparrow/config/
providers.rs