1use serde::{Deserialize, Serialize};
2
3use crate::provider::{LatencyClass, ModelCaps};
4
5#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
9#[serde(tag = "type", rename_all = "snake_case")]
10pub enum AuthFlow {
11 ApiKey,
13 DeviceOAuth {
19 device_endpoint: String,
20 token_endpoint: String,
21 scope: String,
22 client_id_env: String,
23 },
24}
25
26impl Default for AuthFlow {
27 fn default() -> Self {
28 AuthFlow::ApiKey
29 }
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct ProviderDef {
36 pub id: String,
37 pub label: String,
38 pub adapter: String,
39 pub base_url: String,
40 pub api_key_env: Option<String>,
41 pub models: Vec<ModelDef>,
42 pub tags: Vec<String>,
43 pub notes: String,
44 #[serde(default)]
46 pub auth_flow: AuthFlow,
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct ModelDef {
51 pub name: String,
52 pub label: String,
53 pub tags: Vec<String>,
54 pub cost_input_per_mtok: f64,
55 pub cost_output_per_mtok: f64,
56 pub context_window: u64,
57 pub recommended: bool,
58}
59
60pub fn provider_registry() -> Vec<ProviderDef> {
61 vec![
62 ProviderDef {
64 id: "ollama-cloud".into(),
65 label: "Ollama Cloud".into(),
66 adapter: "openai-compatible".into(),
67 base_url: "http://localhost:11434/v1".into(),
68 api_key_env: None,
69 models: vec![
70 ModelDef {
71 name: "qwen3.5:32b".into(),
72 label: "Qwen 3.5 32B".into(),
73 tags: vec!["free".into(), "code".into(), "tool_support".into()],
74 cost_input_per_mtok: 0.0,
75 cost_output_per_mtok: 0.0,
76 context_window: 32768,
77 recommended: true,
78 },
79 ModelDef {
80 name: "llama4:latest".into(),
81 label: "Llama 4".into(),
82 tags: vec!["free".into(), "strong".into(), "tool_support".into()],
83 cost_input_per_mtok: 0.0,
84 cost_output_per_mtok: 0.0,
85 context_window: 131072,
86 recommended: false,
87 },
88 ],
89 tags: vec!["local".into(), "free".into()],
90 notes: "Ollama Cloud. For self-hosted Ollama use 'custom' provider.".into(),
91 auth_flow: AuthFlow::default(),
92 },
93 ProviderDef {
95 id: "anthropic".into(),
96 label: "Anthropic".into(),
97 adapter: "anthropic-messages".into(),
98 base_url: "https://api.anthropic.com".into(),
99 api_key_env: Some("ANTHROPIC_API_KEY".into()),
100 models: vec![
101 ModelDef {
102 name: "claude-sonnet-4-6".into(),
103 label: "Claude Sonnet 4".into(),
104 tags: vec![
105 "strong".into(),
106 "code".into(),
107 "vision".into(),
108 "tool_support".into(),
109 ],
110 cost_input_per_mtok: 3.0,
111 cost_output_per_mtok: 15.0,
112 context_window: 200000,
113 recommended: true,
114 },
115 ModelDef {
116 name: "claude-opus-4-8".into(),
117 label: "Claude Opus 4".into(),
118 tags: vec![
119 "strong".into(),
120 "code".into(),
121 "vision".into(),
122 "tool_support".into(),
123 ],
124 cost_input_per_mtok: 15.0,
125 cost_output_per_mtok: 75.0,
126 context_window: 200000,
127 recommended: false,
128 },
129 ModelDef {
130 name: "claude-haiku-4-5".into(),
131 label: "Claude Haiku 4".into(),
132 tags: vec!["fast".into(), "cheap".into(), "tool_support".into()],
133 cost_input_per_mtok: 1.0,
134 cost_output_per_mtok: 5.0,
135 context_window: 200000,
136 recommended: false,
137 },
138 ],
139 tags: vec!["strong".into(), "code".into(), "vision".into()],
140 notes: "Best-in-class for complex code and reasoning.".into(),
141 auth_flow: AuthFlow::default(),
142 },
143 ProviderDef {
145 id: "openai-codex".into(),
146 label: "OpenAI Codex".into(),
147 adapter: "openai-compatible".into(),
148 base_url: "https://api.openai.com/v1".into(),
149 api_key_env: Some("OPENAI_API_KEY".into()),
150 models: vec![
151 ModelDef {
152 name: "gpt-5".into(),
153 label: "GPT-5".into(),
154 tags: vec![
155 "strong".into(),
156 "code".into(),
157 "vision".into(),
158 "tool_support".into(),
159 ],
160 cost_input_per_mtok: 2.5,
161 cost_output_per_mtok: 10.0,
162 context_window: 128000,
163 recommended: true,
164 },
165 ModelDef {
166 name: "gpt-5-mini".into(),
167 label: "GPT-5 Mini".into(),
168 tags: vec![
169 "cheap".into(),
170 "fast".into(),
171 "code".into(),
172 "tool_support".into(),
173 ],
174 cost_input_per_mtok: 0.15,
175 cost_output_per_mtok: 0.60,
176 context_window: 128000,
177 recommended: false,
178 },
179 ],
180 tags: vec!["strong".into(), "code".into(), "vision".into()],
181 notes: "OpenAI Codex — GPT models via OpenAI API.".into(),
182 auth_flow: AuthFlow::default(),
183 },
184 ProviderDef {
186 id: "nvidia".into(),
187 label: "NVIDIA NIM".into(),
188 adapter: "openai-compatible".into(),
189 base_url: "https://integrate.api.nvidia.com/v1".into(),
190 api_key_env: Some("NVIDIA_API_KEY".into()),
191 models: vec![
192 ModelDef {
193 name: "meta/llama-3.1-8b-instruct".into(),
194 label: "Llama 3.1 8B Instruct".into(),
195 tags: vec![
196 "fast".into(),
197 "free".into(),
198 "tool_support".into(),
199 "chat".into(),
200 ],
201 cost_input_per_mtok: 0.0,
202 cost_output_per_mtok: 0.0,
203 context_window: 131072,
204 recommended: true,
205 },
206 ModelDef {
207 name: "deepseek-ai/deepseek-v4-flash".into(),
208 label: "DeepSeek V4 Flash".into(),
209 tags: vec!["code".into(), "reasoning".into(), "free".into(), "tool_support".into()],
210 cost_input_per_mtok: 0.0,
211 cost_output_per_mtok: 0.0,
212 context_window: 131072,
213 recommended: false,
214 },
215 ModelDef {
216 name: "stepfun-ai/step-3.5-flash".into(),
217 label: "Step 3.5 Flash".into(),
218 tags: vec!["fast".into(), "free".into(), "tool_support".into()],
219 cost_input_per_mtok: 0.0,
220 cost_output_per_mtok: 0.0,
221 context_window: 131072,
222 recommended: true,
223 },
224 ModelDef {
225 name: "nvidia/llama-3.1-nemotron-nano-8b-v1".into(),
226 label: "Nemotron Nano 8B".into(),
227 tags: vec!["fast".into(), "code".into(), "free".into(), "tool_support".into()],
228 cost_input_per_mtok: 0.0,
229 cost_output_per_mtok: 0.0,
230 context_window: 131072,
231 recommended: false,
232 },
233 ModelDef {
234 name: "openai/gpt-oss-20b".into(),
235 label: "GPT-OSS 20B".into(),
236 tags: vec!["fast".into(), "free".into(), "tool_support".into()],
237 cost_input_per_mtok: 0.0,
238 cost_output_per_mtok: 0.0,
239 context_window: 131072,
240 recommended: false,
241 },
242 ModelDef {
243 name: "nvidia/nemotron-3-super-120b-a12b".into(),
244 label: "Nemotron Super 120B".into(),
245 tags: vec![
246 "strong".into(),
247 "reasoning".into(),
248 "code".into(),
249 "free".into(),
250 ],
251 cost_input_per_mtok: 0.0,
252 cost_output_per_mtok: 0.0,
253 context_window: 131072,
254 recommended: true,
255 },
256 ],
257 tags: vec!["free".into(), "fast".into(), "strong".into(), "code".into()],
258 notes: "NVIDIA API Catalog / NIM — free tier with API key; discovery expands this list from /v1/models.".into(),
259 auth_flow: AuthFlow::default(),
260 },
261 ProviderDef {
263 id: "openrouter".into(),
264 label: "OpenRouter".into(),
265 adapter: "openai-compatible".into(),
266 base_url: "https://openrouter.ai/api/v1".into(),
267 api_key_env: Some("OPENROUTER_API_KEY".into()),
268 models: vec![ModelDef {
269 name: "openrouter/auto".into(),
270 label: "Auto (best for task)".into(),
271 tags: vec![
272 "strong".into(),
273 "code".into(),
274 "vision".into(),
275 "tool_support".into(),
276 ],
277 cost_input_per_mtok: 0.0,
278 cost_output_per_mtok: 0.0,
279 context_window: 200000,
280 recommended: true,
281 }],
282 tags: vec!["strong".into(), "multi".into()],
283 notes: "200+ models via one API — auto-routes to best model.".into(),
284 auth_flow: AuthFlow::default(),
285 },
286 ProviderDef {
288 id: "deepseek".into(),
289 label: "DeepSeek".into(),
290 adapter: "openai-compatible".into(),
291 base_url: "https://api.deepseek.com/v1".into(),
292 api_key_env: Some("DEEPSEEK_API_KEY".into()),
293 models: vec![
294 ModelDef {
295 name: "deepseek-chat".into(),
296 label: "DeepSeek V3".into(),
297 tags: vec!["cheap".into(), "code".into(), "tool_support".into()],
298 cost_input_per_mtok: 0.27,
299 cost_output_per_mtok: 1.1,
300 context_window: 65536,
301 recommended: true,
302 },
303 ModelDef {
304 name: "deepseek-reasoner".into(),
305 label: "DeepSeek R1".into(),
306 tags: vec!["reasoning".into(), "strong".into()],
307 cost_input_per_mtok: 0.55,
308 cost_output_per_mtok: 2.19,
309 context_window: 65536,
310 recommended: false,
311 },
312 ],
313 tags: vec!["cheap".into(), "code".into(), "reasoning".into()],
314 notes: "DeepSeek — very competitive pricing, strong coding.".into(),
315 auth_flow: AuthFlow::default(),
316 },
317 ProviderDef {
319 id: "gemini".into(),
320 label: "Google Gemini".into(),
321 adapter: "openai-compatible".into(),
322 base_url: "https://generativelanguage.googleapis.com/v1beta/openai".into(),
323 api_key_env: Some("GEMINI_API_KEY".into()),
324 models: vec![
325 ModelDef {
326 name: "gemini-2.5-pro".into(),
327 label: "Gemini 2.5 Pro".into(),
328 tags: vec![
329 "strong".into(),
330 "code".into(),
331 "vision".into(),
332 "tool_support".into(),
333 ],
334 cost_input_per_mtok: 0.0,
335 cost_output_per_mtok: 0.0,
336 context_window: 1048576,
337 recommended: true,
338 },
339 ModelDef {
340 name: "gemini-2.5-flash".into(),
341 label: "Gemini 2.5 Flash".into(),
342 tags: vec![
343 "fast".into(),
344 "cheap".into(),
345 "vision".into(),
346 "tool_support".into(),
347 ],
348 cost_input_per_mtok: 0.0,
349 cost_output_per_mtok: 0.0,
350 context_window: 1048576,
351 recommended: false,
352 },
353 ],
354 tags: vec!["strong".into(), "vision".into(), "free".into()],
355 notes: "Google Gemini — 1M context window, free tier.".into(),
356 auth_flow: AuthFlow::default(),
357 },
358 ProviderDef {
360 id: "xai".into(),
361 label: "xAI (Grok)".into(),
362 adapter: "openai-compatible".into(),
363 base_url: "https://api.x.ai/v1".into(),
364 api_key_env: Some("XAI_API_KEY".into()),
365 models: vec![ModelDef {
366 name: "grok-3".into(),
367 label: "Grok 3".into(),
368 tags: vec!["strong".into(), "code".into(), "vision".into()],
369 cost_input_per_mtok: 3.0,
370 cost_output_per_mtok: 15.0,
371 context_window: 131072,
372 recommended: true,
373 }],
374 tags: vec!["strong".into(), "code".into()],
375 notes: "xAI Grok — strong reasoning and coding.".into(),
376 auth_flow: AuthFlow::default(),
377 },
378 ProviderDef {
380 id: "huggingface".into(),
381 label: "Hugging Face".into(),
382 adapter: "openai-compatible".into(),
383 base_url: "https://api-inference.huggingface.co/v1".into(),
384 api_key_env: Some("HF_TOKEN".into()),
385 models: vec![ModelDef {
386 name: "Qwen/Qwen3-235B-A22B".into(),
387 label: "Qwen 3 235B".into(),
388 tags: vec!["strong".into(), "code".into(), "tool_support".into()],
389 cost_input_per_mtok: 0.0,
390 cost_output_per_mtok: 0.0,
391 context_window: 32768,
392 recommended: true,
393 }],
394 tags: vec!["free".into()],
395 notes: "Hugging Face Serverless Inference — free tier, many models.".into(),
396 auth_flow: AuthFlow::default(),
397 },
398 ProviderDef {
400 id: "nous".into(),
401 label: "Nous Portal".into(),
402 adapter: "openai-compatible".into(),
403 base_url: "https://portal.nousresearch.com/api/v1".into(),
404 api_key_env: Some("NOUS_API_KEY".into()),
405 models: vec![ModelDef {
406 name: "hermes-3-70b".into(),
407 label: "Hermes 3 70B".into(),
408 tags: vec!["strong".into(), "code".into(), "tool_support".into()],
409 cost_input_per_mtok: 0.0,
410 cost_output_per_mtok: 0.0,
411 context_window: 32768,
412 recommended: true,
413 }],
414 tags: vec!["strong".into(), "code".into()],
415 notes: "Nous Portal — one sub for models + web search + image gen + TTS + browser."
416 .into(),
417 auth_flow: AuthFlow::default(),
418 },
419 ProviderDef {
421 id: "novita".into(),
422 label: "NovitaAI".into(),
423 adapter: "openai-compatible".into(),
424 base_url: "https://api.novita.ai/v3/openai".into(),
425 api_key_env: Some("NOVITA_API_KEY".into()),
426 models: vec![ModelDef {
427 name: "deepseek/deepseek-r1".into(),
428 label: "DeepSeek R1".into(),
429 tags: vec!["reasoning".into(), "strong".into()],
430 cost_input_per_mtok: 0.0,
431 cost_output_per_mtok: 0.0,
432 context_window: 65536,
433 recommended: true,
434 }],
435 tags: vec!["cheap".into(), "reasoning".into()],
436 notes: "NovitaAI — AI-native cloud for Model API, Agent Sandbox, and GPU Cloud.".into(),
437 auth_flow: AuthFlow::default(),
438 },
439 ProviderDef {
441 id: "alibaba".into(),
442 label: "Alibaba Cloud".into(),
443 adapter: "openai-compatible".into(),
444 base_url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".into(),
445 api_key_env: Some("DASHSCOPE_API_KEY".into()),
446 models: vec![ModelDef {
447 name: "qwen-plus".into(),
448 label: "Qwen Plus".into(),
449 tags: vec!["strong".into(), "code".into(), "tool_support".into()],
450 cost_input_per_mtok: 0.0,
451 cost_output_per_mtok: 0.0,
452 context_window: 131072,
453 recommended: true,
454 }],
455 tags: vec!["cheap".into(), "code".into()],
456 notes: "Alibaba Cloud DashScope — Qwen models via OpenAI-compatible API.".into(),
457 auth_flow: AuthFlow::default(),
458 },
459 ProviderDef {
461 id: "bedrock".into(),
462 label: "AWS Bedrock".into(),
463 adapter: "bedrock".into(),
464 base_url: "https://bedrock-runtime.us-east-1.amazonaws.com".into(),
465 api_key_env: Some("AWS_ACCESS_KEY_ID".into()),
466 models: vec![ModelDef {
467 name: "anthropic.claude-sonnet-4-6".into(),
468 label: "Claude Sonnet 4".into(),
469 tags: vec![
470 "strong".into(),
471 "code".into(),
472 "vision".into(),
473 "tool_support".into(),
474 ],
475 cost_input_per_mtok: 3.0,
476 cost_output_per_mtok: 15.0,
477 context_window: 200000,
478 recommended: true,
479 }],
480 tags: vec!["strong".into(), "code".into()],
481 notes: "AWS Bedrock — managed foundation models. Requires AWS credentials.".into(),
482 auth_flow: AuthFlow::default(),
483 },
484 ProviderDef {
486 id: "kimi-coding".into(),
487 label: "Kimi Coding".into(),
488 adapter: "openai-compatible".into(),
489 base_url: "https://api.moonshot.cn/v1".into(),
490 api_key_env: Some("MOONSHOT_API_KEY".into()),
491 models: vec![ModelDef {
492 name: "moonshot-v1-auto".into(),
493 label: "Moonshot Auto".into(),
494 tags: vec!["strong".into(), "code".into(), "tool_support".into()],
495 cost_input_per_mtok: 0.0,
496 cost_output_per_mtok: 0.0,
497 context_window: 131072,
498 recommended: true,
499 }],
500 tags: vec!["code".into()],
501 notes: "Kimi/Moonshot — coding-focused models.".into(),
502 auth_flow: AuthFlow::default(),
503 },
504 ProviderDef {
506 id: "minimax".into(),
507 label: "MiniMax".into(),
508 adapter: "openai-compatible".into(),
509 base_url: "https://api.minimax.chat/v1".into(),
510 api_key_env: Some("MINIMAX_API_KEY".into()),
511 models: vec![ModelDef {
512 name: "abab7-chat".into(),
513 label: "ABAB 7".into(),
514 tags: vec!["strong".into(), "tool_support".into()],
515 cost_input_per_mtok: 0.0,
516 cost_output_per_mtok: 0.0,
517 context_window: 131072,
518 recommended: true,
519 }],
520 tags: vec!["strong".into()],
521 notes: "MiniMax — ABAB series models via OpenAI-compatible API.".into(),
522 auth_flow: AuthFlow::default(),
523 },
524 ProviderDef {
526 id: "xiaomi".into(),
527 label: "Xiaomi MiMo".into(),
528 adapter: "openai-compatible".into(),
529 base_url: "https://platform.xiaomimimo.com/v1".into(),
530 api_key_env: Some("XIAOMI_API_KEY".into()),
531 models: vec![ModelDef {
532 name: "mimo-v2".into(),
533 label: "MiMo V2".into(),
534 tags: vec!["strong".into(), "code".into()],
535 cost_input_per_mtok: 0.0,
536 cost_output_per_mtok: 0.0,
537 context_window: 32768,
538 recommended: true,
539 }],
540 tags: vec!["code".into()],
541 notes: "Xiaomi MiMo — coding models via Xiaomi platform.".into(),
542 auth_flow: AuthFlow::default(),
543 },
544 ProviderDef {
546 id: "zai".into(),
547 label: "z.ai / GLM".into(),
548 adapter: "openai-compatible".into(),
549 base_url: "https://api.z.ai/v1".into(),
550 api_key_env: Some("ZAI_API_KEY".into()),
551 models: vec![ModelDef {
552 name: "glm-4-plus".into(),
553 label: "GLM-4 Plus".into(),
554 tags: vec!["strong".into(), "code".into(), "tool_support".into()],
555 cost_input_per_mtok: 0.0,
556 cost_output_per_mtok: 0.0,
557 context_window: 131072,
558 recommended: true,
559 }],
560 tags: vec!["code".into()],
561 notes: "z.ai / GLM — ChatGLM models via OpenAI-compatible API.".into(),
562 auth_flow: AuthFlow::default(),
563 },
564 ProviderDef {
566 id: "gmi".into(),
567 label: "GMI Cloud".into(),
568 adapter: "openai-compatible".into(),
569 base_url: "https://api.gmi.cloud/v1".into(),
570 api_key_env: Some("GMI_API_KEY".into()),
571 models: vec![ModelDef {
572 name: "llama-4-maverick".into(),
573 label: "Llama 4 Maverick".into(),
574 tags: vec!["strong".into(), "code".into()],
575 cost_input_per_mtok: 0.0,
576 cost_output_per_mtok: 0.0,
577 context_window: 131072,
578 recommended: true,
579 }],
580 tags: vec!["cheap".into()],
581 notes: "GMI Cloud — GPU cloud for open-source model inference.".into(),
582 auth_flow: AuthFlow::default(),
583 },
584 ProviderDef {
586 id: "arcee".into(),
587 label: "Arcee".into(),
588 adapter: "openai-compatible".into(),
589 base_url: "https://api.arcee.ai/v1".into(),
590 api_key_env: Some("ARCEE_API_KEY".into()),
591 models: vec![ModelDef {
592 name: "arcee-virtuoso-small".into(),
593 label: "Virtuoso Small".into(),
594 tags: vec!["code".into(), "tool_support".into()],
595 cost_input_per_mtok: 0.0,
596 cost_output_per_mtok: 0.0,
597 context_window: 32768,
598 recommended: true,
599 }],
600 tags: vec!["code".into()],
601 notes: "Arcee — specialized coding models.".into(),
602 auth_flow: AuthFlow::default(),
603 },
604 ProviderDef {
606 id: "stepfun".into(),
607 label: "StepFun".into(),
608 adapter: "openai-compatible".into(),
609 base_url: "https://api.stepfun.com/v1".into(),
610 api_key_env: Some("STEPFUN_API_KEY".into()),
611 models: vec![ModelDef {
612 name: "step-2-16k".into(),
613 label: "Step-2 16K".into(),
614 tags: vec!["strong".into(), "tool_support".into()],
615 cost_input_per_mtok: 0.0,
616 cost_output_per_mtok: 0.0,
617 context_window: 16384,
618 recommended: true,
619 }],
620 tags: vec!["cheap".into()],
621 notes: "StepFun — Step series models via OpenAI-compatible API.".into(),
622 auth_flow: AuthFlow::default(),
623 },
624 ProviderDef {
626 id: "custom".into(),
627 label: "Custom Endpoint".into(),
628 adapter: "openai-compatible".into(),
629 base_url: "https://your-endpoint/v1".into(),
630 api_key_env: Some("CUSTOM_API_KEY".into()),
631 models: vec![ModelDef {
632 name: "custom-model".into(),
633 label: "Custom Model".into(),
634 tags: vec!["custom".into()],
635 cost_input_per_mtok: 0.0,
636 cost_output_per_mtok: 0.0,
637 context_window: 128000,
638 recommended: true,
639 }],
640 tags: vec!["custom".into()],
641 notes: "Bring your own OpenAI-compatible endpoint. Set base_url to your server.".into(),
642 auth_flow: AuthFlow::default(),
643 },
644 ProviderDef {
646 id: "azure-foundry".into(),
647 label: "Azure AI Foundry".into(),
648 adapter: "openai-compatible".into(),
649 base_url: "https://<your-resource>.openai.azure.com/openai/v1".into(),
650 api_key_env: Some("AZURE_OPENAI_API_KEY".into()),
651 models: vec![ModelDef {
652 name: "gpt-5".into(),
653 label: "GPT-5 (Azure)".into(),
654 tags: vec!["strong".into(), "code".into(), "tool_support".into()],
655 cost_input_per_mtok: 0.0,
656 cost_output_per_mtok: 0.0,
657 context_window: 128000,
658 recommended: true,
659 }],
660 tags: vec!["strong".into(), "code".into()],
661 notes:
662 "Azure AI Foundry — OpenAI models on Azure. Set base_url with your resource name."
663 .into(),
664 auth_flow: AuthFlow::default(),
665 },
666 ProviderDef {
668 id: "qwen-oauth".into(),
669 label: "Qwen (OAuth)".into(),
670 adapter: "openai-compatible".into(),
671 base_url: "https://dashscope.aliyuncs.com/compatible-mode/v1".into(),
672 api_key_env: None,
673 models: vec![ModelDef {
674 name: "qwen-plus".into(),
675 label: "Qwen Plus".into(),
676 tags: vec!["strong".into(), "code".into(), "tool_support".into()],
677 cost_input_per_mtok: 0.0,
678 cost_output_per_mtok: 0.0,
679 context_window: 131072,
680 recommended: true,
681 }],
682 tags: vec!["code".into()],
683 notes: "Qwen via OAuth login — no API key needed, login via browser.".into(),
684 auth_flow: AuthFlow::DeviceOAuth {
685 device_endpoint: "https://oauth.aliyun.com/device/code".into(),
686 token_endpoint: "https://oauth.aliyun.com/device/token".into(),
687 scope: "openid profile".into(),
688 client_id_env: "QWEN_OAUTH_CLIENT_ID".into(),
689 },
690 },
691 ProviderDef {
693 id: "opencode-go".into(),
694 label: "OpenCode Go".into(),
695 adapter: "openai-compatible".into(),
696 base_url: "https://opencode.ai/zen/go/v1".into(),
697 api_key_env: Some("OPENCODE_GO_API_KEY".into()),
698 models: vec![
699 ModelDef {
700 name: "claude-sonnet-4-6".into(),
701 label: "Claude Sonnet 4.6 (Go)".into(),
702 tags: vec![
703 "strong".into(),
704 "code".into(),
705 "vision".into(),
706 "tool_support".into(),
707 ],
708 cost_input_per_mtok: 3.0,
709 cost_output_per_mtok: 15.0,
710 context_window: 200000,
711 recommended: true,
712 },
713 ModelDef {
714 name: "claude-opus-4-8".into(),
715 label: "Claude Opus 4 (Go)".into(),
716 tags: vec![
717 "strong".into(),
718 "code".into(),
719 "vision".into(),
720 "tool_support".into(),
721 ],
722 cost_input_per_mtok: 15.0,
723 cost_output_per_mtok: 75.0,
724 context_window: 200000,
725 recommended: false,
726 },
727 ModelDef {
728 name: "gpt-5".into(),
729 label: "GPT-5 (Go)".into(),
730 tags: vec!["strong".into(), "code".into(), "tool_support".into()],
731 cost_input_per_mtok: 5.0,
732 cost_output_per_mtok: 20.0,
733 context_window: 200000,
734 recommended: false,
735 },
736 ModelDef {
737 name: "gemini-2-5-pro".into(),
738 label: "Gemini 2.5 Pro (Go)".into(),
739 tags: vec![
740 "strong".into(),
741 "code".into(),
742 "vision".into(),
743 "tool_support".into(),
744 ],
745 cost_input_per_mtok: 2.5,
746 cost_output_per_mtok: 15.0,
747 context_window: 1000000,
748 recommended: false,
749 },
750 ],
751 tags: vec!["code".into(), "strong".into(), "multi".into(), "paid".into()],
752 notes: "OpenCode Go subscription tier — full model library via go/v1 endpoint. Discovery expands the list from /v1/models.".into(),
753 auth_flow: AuthFlow::default(),
754 },
755 ProviderDef {
757 id: "opencode-zen".into(),
758 label: "OpenCode Zen".into(),
759 adapter: "openai-compatible".into(),
760 base_url: "https://opencode.ai/zen/v1".into(),
761 api_key_env: Some("OPENCODE_API_KEY".into()),
762 models: vec![
763 ModelDef {
764 name: "claude-sonnet-4-6".into(),
765 label: "Claude Sonnet 4.6 (via Zen)".into(),
766 tags: vec![
767 "strong".into(),
768 "code".into(),
769 "vision".into(),
770 "tool_support".into(),
771 ],
772 cost_input_per_mtok: 3.0,
773 cost_output_per_mtok: 15.0,
774 context_window: 200000,
775 recommended: true,
776 },
777 ModelDef {
778 name: "qwen3.6-plus".into(),
779 label: "Qwen 3.6 Plus (via Zen)".into(),
780 tags: vec![
781 "strong".into(),
782 "code".into(),
783 "tool_support".into(),
784 ],
785 cost_input_per_mtok: 1.0,
786 cost_output_per_mtok: 3.0,
787 context_window: 262144,
788 recommended: false,
789 },
790 ModelDef {
791 name: "gpt-5-codex".into(),
792 label: "GPT-5 Codex (via Zen)".into(),
793 tags: vec![
794 "strong".into(),
795 "code".into(),
796 "tool_support".into(),
797 ],
798 cost_input_per_mtok: 2.5,
799 cost_output_per_mtok: 10.0,
800 context_window: 200000,
801 recommended: false,
802 },
803 ],
804 tags: vec!["code".into(), "strong".into(), "multi".into()],
805 notes: "OpenCode Zen — curated gateway (Claude, GPT, Qwen, DeepSeek). Most models need credits or an OpenCode Go subscription. Discovery expands the list from /v1/models.".into(),
806 auth_flow: AuthFlow::default(),
807 },
808 ProviderDef {
810 id: "kilocode".into(),
811 label: "KiloCode".into(),
812 adapter: "openai-compatible".into(),
813 base_url: "https://api.kilocode.ai/v1".into(),
814 api_key_env: Some("KILOCODE_API_KEY".into()),
815 models: vec![ModelDef {
816 name: "kilocode".into(),
817 label: "KiloCode".into(),
818 tags: vec!["code".into(), "tool_support".into()],
819 cost_input_per_mtok: 0.0,
820 cost_output_per_mtok: 0.0,
821 context_window: 32768,
822 recommended: true,
823 }],
824 tags: vec!["code".into()],
825 notes: "KiloCode — coding-specialized models.".into(),
826 auth_flow: AuthFlow::default(),
827 },
828 ProviderDef {
830 id: "copilot".into(),
831 label: "GitHub Copilot".into(),
832 adapter: "openai-compatible".into(),
833 base_url: "https://api.githubcopilot.com".into(),
834 api_key_env: Some("GITHUB_TOKEN".into()),
835 models: vec![ModelDef {
836 name: "copilot".into(),
837 label: "Copilot".into(),
838 tags: vec!["code".into(), "tool_support".into()],
839 cost_input_per_mtok: 0.0,
840 cost_output_per_mtok: 0.0,
841 context_window: 32768,
842 recommended: true,
843 }],
844 tags: vec!["code".into()],
845 notes: "GitHub Copilot — available with Copilot subscription.".into(),
846 auth_flow: AuthFlow::DeviceOAuth {
847 device_endpoint: "https://github.com/login/device/code".into(),
848 token_endpoint: "https://github.com/login/oauth/access_token".into(),
849 scope: "read:user".into(),
850 client_id_env: "GITHUB_OAUTH_CLIENT_ID".into(),
851 },
852 },
853 ProviderDef {
855 id: "alibaba-coding-plan".into(),
856 label: "Alibaba Coding Plan".into(),
857 adapter: "openai-compatible".into(),
858 base_url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".into(),
859 api_key_env: Some("DASHSCOPE_API_KEY".into()),
860 models: vec![ModelDef {
861 name: "qwen-coder-plus".into(),
862 label: "Qwen Coder Plus".into(),
863 tags: vec!["code".into(), "tool_support".into()],
864 cost_input_per_mtok: 0.0,
865 cost_output_per_mtok: 0.0,
866 context_window: 131072,
867 recommended: true,
868 }],
869 tags: vec!["code".into()],
870 notes: "Alibaba Coding Plan — Qwen Coder models for software development.".into(),
871 auth_flow: AuthFlow::default(),
872 },
873 ProviderDef {
877 id: "ollama".into(),
878 label: "Ollama (local)".into(),
879 adapter: "ollama".into(),
880 base_url: "http://localhost:11434/v1".into(),
881 api_key_env: Some("OLLAMA_HOST".into()),
882 models: vec![
883 ModelDef {
884 name: "qwen3.5:32b".into(),
885 label: "Qwen 3.5 32B".into(),
886 tags: vec![
887 "local".into(),
888 "free".into(),
889 "code".into(),
890 "tool_support".into(),
891 ],
892 cost_input_per_mtok: 0.0,
893 cost_output_per_mtok: 0.0,
894 context_window: 32768,
895 recommended: true,
896 },
897 ModelDef {
898 name: "codellama:latest".into(),
899 label: "CodeLlama".into(),
900 tags: vec!["local".into(), "free".into(), "code".into()],
901 cost_input_per_mtok: 0.0,
902 cost_output_per_mtok: 0.0,
903 context_window: 16384,
904 recommended: false,
905 },
906 ModelDef {
907 name: "mistral:latest".into(),
908 label: "Mistral (local)".into(),
909 tags: vec!["local".into(), "free".into(), "fast".into()],
910 cost_input_per_mtok: 0.0,
911 cost_output_per_mtok: 0.0,
912 context_window: 32768,
913 recommended: false,
914 },
915 ],
916 tags: vec!["local".into(), "free".into(), "offline".into()],
917 notes: "Self-hosted models via Ollama. Install: https://ollama.com".into(),
918 auth_flow: AuthFlow::default(),
919 },
920 ProviderDef {
922 id: "groq".into(),
923 label: "Groq".into(),
924 adapter: "openai-compatible".into(),
925 base_url: "https://api.groq.com/openai/v1".into(),
926 api_key_env: Some("GROQ_API_KEY".into()),
927 models: vec![
928 ModelDef {
929 name: "llama-4-scout-17b-16e".into(),
930 label: "Llama 4 Scout".into(),
931 tags: vec!["fast".into(), "cheap".into(), "tool_support".into()],
932 cost_input_per_mtok: 0.0,
933 cost_output_per_mtok: 0.0,
934 context_window: 131072,
935 recommended: true,
936 },
937 ModelDef {
938 name: "deepseek-r1-distill-llama-70b".into(),
939 label: "DeepSeek R1 70B".into(),
940 tags: vec!["reasoning".into(), "strong".into()],
941 cost_input_per_mtok: 0.0,
942 cost_output_per_mtok: 0.0,
943 context_window: 131072,
944 recommended: false,
945 },
946 ],
947 tags: vec!["fast".into(), "free".into()],
948 notes: "Groq LPU — ultra-fast inference, free tier available.".into(),
949 auth_flow: AuthFlow::default(),
950 },
951 ProviderDef {
953 id: "together".into(),
954 label: "Together AI".into(),
955 adapter: "openai-compatible".into(),
956 base_url: "https://api.together.xyz/v1".into(),
957 api_key_env: Some("TOGETHER_API_KEY".into()),
958 models: vec![ModelDef {
959 name: "meta-llama/Llama-4-Maverick-17B-128E".into(),
960 label: "Llama 4 Maverick".into(),
961 tags: vec!["strong".into(), "code".into(), "tool_support".into()],
962 cost_input_per_mtok: 0.2,
963 cost_output_per_mtok: 0.2,
964 context_window: 131072,
965 recommended: true,
966 }],
967 tags: vec!["cheap".into(), "code".into()],
968 notes: "Together AI — open-source models at low cost.".into(),
969 auth_flow: AuthFlow::default(),
970 },
971 ProviderDef {
973 id: "cerebras".into(),
974 label: "Cerebras".into(),
975 adapter: "openai-compatible".into(),
976 base_url: "https://api.cerebras.ai/v1".into(),
977 api_key_env: Some("CEREBRAS_API_KEY".into()),
978 models: vec![ModelDef {
979 name: "llama-4-scout-17b-16e".into(),
980 label: "Llama 4 Scout".into(),
981 tags: vec!["fast".into(), "cheap".into(), "tool_support".into()],
982 cost_input_per_mtok: 0.0,
983 cost_output_per_mtok: 0.0,
984 context_window: 131072,
985 recommended: true,
986 }],
987 tags: vec!["fast".into(), "free".into()],
988 notes: "Cerebras Wafer-Scale — fastest inference available.".into(),
989 auth_flow: AuthFlow::default(),
990 },
991 ProviderDef {
993 id: "mistral".into(),
994 label: "Mistral AI".into(),
995 adapter: "openai-compatible".into(),
996 base_url: "https://api.mistral.ai/v1".into(),
997 api_key_env: Some("MISTRAL_API_KEY".into()),
998 models: vec![
999 ModelDef {
1000 name: "mistral-large-latest".into(),
1001 label: "Mistral Large".into(),
1002 tags: vec!["strong".into(), "code".into(), "tool_support".into()],
1003 cost_input_per_mtok: 2.0,
1004 cost_output_per_mtok: 6.0,
1005 context_window: 131072,
1006 recommended: true,
1007 },
1008 ModelDef {
1009 name: "mistral-small-latest".into(),
1010 label: "Mistral Small".into(),
1011 tags: vec!["cheap".into(), "fast".into()],
1012 cost_input_per_mtok: 0.2,
1013 cost_output_per_mtok: 0.6,
1014 context_window: 32768,
1015 recommended: false,
1016 },
1017 ],
1018 tags: vec!["strong".into(), "code".into()],
1019 notes: "Mistral AI — strong European models.".into(),
1020 auth_flow: AuthFlow::default(),
1021 },
1022 ProviderDef {
1024 id: "fireworks".into(),
1025 label: "Fireworks AI".into(),
1026 adapter: "openai-compatible".into(),
1027 base_url: "https://api.fireworks.ai/inference/v1".into(),
1028 api_key_env: Some("FIREWORKS_API_KEY".into()),
1029 models: vec![ModelDef {
1030 name: "accounts/fireworks/models/llama-v4-maverick".into(),
1031 label: "Llama 4 Maverick".into(),
1032 tags: vec!["fast".into(), "code".into(), "tool_support".into()],
1033 cost_input_per_mtok: 0.2,
1034 cost_output_per_mtok: 0.2,
1035 context_window: 131072,
1036 recommended: true,
1037 }],
1038 tags: vec!["fast".into(), "code".into()],
1039 notes: "Fireworks AI — fast open-source model inference.".into(),
1040 auth_flow: AuthFlow::default(),
1041 },
1042 ProviderDef {
1044 id: "perplexity".into(),
1045 label: "Perplexity".into(),
1046 adapter: "openai-compatible".into(),
1047 base_url: "https://api.perplexity.ai".into(),
1048 api_key_env: Some("PERPLEXITY_API_KEY".into()),
1049 models: vec![
1050 ModelDef {
1051 name: "sonar-pro".into(),
1052 label: "Sonar Pro".into(),
1053 tags: vec!["search".into(), "web".into(), "tool_support".into()],
1054 cost_input_per_mtok: 3.0,
1055 cost_output_per_mtok: 15.0,
1056 context_window: 200000,
1057 recommended: true,
1058 },
1059 ModelDef {
1060 name: "sonar".into(),
1061 label: "Sonar".into(),
1062 tags: vec!["search".into(), "fast".into(), "web".into()],
1063 cost_input_per_mtok: 1.0,
1064 cost_output_per_mtok: 1.0,
1065 context_window: 127000,
1066 recommended: false,
1067 },
1068 ],
1069 tags: vec!["search".into(), "web".into()],
1070 notes: "Perplexity Sonar — live web/search-focused model routing.".into(),
1071 auth_flow: AuthFlow::default(),
1072 },
1073 ProviderDef {
1075 id: "cohere".into(),
1076 label: "Cohere".into(),
1077 adapter: "openai-compatible".into(),
1078 base_url: "https://api.cohere.com/compatibility/v1".into(),
1079 api_key_env: Some("COHERE_API_KEY".into()),
1080 models: vec![
1081 ModelDef {
1082 name: "command-a-03-2025".into(),
1083 label: "Command A".into(),
1084 tags: vec!["strong".into(), "tool_support".into(), "enterprise".into()],
1085 cost_input_per_mtok: 2.5,
1086 cost_output_per_mtok: 10.0,
1087 context_window: 256000,
1088 recommended: true,
1089 },
1090 ModelDef {
1091 name: "command-r7b-12-2024".into(),
1092 label: "Command R7B".into(),
1093 tags: vec!["fast".into(), "cheap".into(), "tool_support".into()],
1094 cost_input_per_mtok: 0.15,
1095 cost_output_per_mtok: 0.6,
1096 context_window: 128000,
1097 recommended: false,
1098 },
1099 ],
1100 tags: vec!["enterprise".into(), "tool_support".into()],
1101 notes: "Cohere Command models through the OpenAI-compatible endpoint.".into(),
1102 auth_flow: AuthFlow::default(),
1103 },
1104 ProviderDef {
1106 id: "google-oauth".into(),
1107 label: "Google (OAuth)".into(),
1108 adapter: "openai-compatible".into(),
1109 base_url: "https://generativelanguage.googleapis.com/v1beta/openai".into(),
1110 api_key_env: None,
1111 models: vec![ModelDef {
1112 name: "gemini-2.5-pro".into(),
1113 label: "Gemini 2.5 Pro".into(),
1114 tags: vec!["strong".into(), "vision".into(), "tool_support".into()],
1115 cost_input_per_mtok: 0.0,
1116 cost_output_per_mtok: 0.0,
1117 context_window: 1_000_000,
1118 recommended: true,
1119 }],
1120 tags: vec!["strong".into()],
1121 notes: "Google Gemini via OAuth device flow — no API key required.".into(),
1122 auth_flow: AuthFlow::DeviceOAuth {
1123 device_endpoint: "https://oauth2.googleapis.com/device/code".into(),
1124 token_endpoint: "https://oauth2.googleapis.com/token".into(),
1125 scope: "openid https://www.googleapis.com/auth/generative-language".into(),
1126 client_id_env: "GOOGLE_OAUTH_CLIENT_ID".into(),
1127 },
1128 },
1129 ProviderDef {
1131 id: "microsoft-oauth".into(),
1132 label: "Microsoft (OAuth)".into(),
1133 adapter: "openai-compatible".into(),
1134 base_url: "https://api.cognitive.microsoft.com/openai/v1".into(),
1135 api_key_env: None,
1136 models: vec![ModelDef {
1137 name: "gpt-4o".into(),
1138 label: "GPT-4o (Azure)".into(),
1139 tags: vec!["strong".into(), "vision".into(), "tool_support".into()],
1140 cost_input_per_mtok: 0.0,
1141 cost_output_per_mtok: 0.0,
1142 context_window: 128000,
1143 recommended: true,
1144 }],
1145 tags: vec!["strong".into()],
1146 notes: "Microsoft Azure OpenAI via OAuth device flow.".into(),
1147 auth_flow: AuthFlow::DeviceOAuth {
1148 device_endpoint: "https://login.microsoftonline.com/common/oauth2/v2.0/devicecode".into(),
1149 token_endpoint: "https://login.microsoftonline.com/common/oauth2/v2.0/token".into(),
1150 scope: "openid profile".into(),
1151 client_id_env: "MICROSOFT_OAUTH_CLIENT_ID".into(),
1152 },
1153 },
1154 ]
1155}
1156
1157pub fn find_provider(id: &str) -> Option<ProviderDef> {
1158 provider_registry().into_iter().find(|p| p.id == id)
1159}
1160
1161pub fn list_oauth_providers() -> Vec<ProviderDef> {
1163 provider_registry()
1164 .into_iter()
1165 .filter(|p| p.auth_flow != AuthFlow::ApiKey)
1166 .collect()
1167}
1168
1169pub fn find_model(provider_id: &str, model_name: &str) -> Option<ModelDef> {
1170 find_provider(provider_id).and_then(|p| p.models.into_iter().find(|m| m.name == model_name))
1171}
1172
1173pub fn default_models(provider_id: &str) -> Vec<String> {
1174 find_provider(provider_id)
1175 .map(|p| {
1176 let recommended: Vec<String> = p
1177 .models
1178 .iter()
1179 .filter(|m| m.recommended)
1180 .map(|m| m.name.clone())
1181 .collect();
1182 if recommended.is_empty() {
1183 p.models.into_iter().map(|m| m.name).collect()
1184 } else {
1185 recommended
1186 }
1187 })
1188 .unwrap_or_default()
1189}
1190
1191pub fn infer_caps_from_name(model_name: &str) -> ModelCaps {
1196 let n = model_name.to_ascii_lowercase();
1197
1198 let vision = n.contains("vision")
1199 || n.contains("-vl")
1200 || n.contains("vl-")
1201 || n.contains("multimodal")
1202 || n.contains("omni");
1203
1204 let tools = n.contains("coder")
1206 || n.contains("code")
1207 || n.contains("instruct")
1208 || n.contains("chat")
1209 || n.contains("nemotron")
1210 || n.contains("qwen")
1211 || n.contains("llama")
1212 || n.contains("mistral")
1213 || n.contains("deepseek")
1214 || n.contains("gpt")
1215 || n.contains("glm");
1216
1217 let is_large = [
1219 "70b", "72b", "120b", "122b", "175b", "180b", "235b", "253b", "340b", "397b", "405b",
1220 "480b", "675b", "ultra", "-large", "super",
1221 ]
1222 .iter()
1223 .any(|t| n.contains(t));
1224 let is_small = n.contains("flash")
1225 || n.contains("nano")
1226 || n.contains("mini")
1227 || n.contains("lite")
1228 || n.contains("-small")
1229 || n.contains("1b")
1230 || n.contains("2b")
1231 || n.contains("3b")
1232 || n.contains("7b")
1233 || n.contains("8b")
1234 || n.contains("9b");
1235
1236 let vendor_ctx: Option<u64> = if n.contains("gemini")
1241 && (n.contains("1.5") || n.contains("2.0") || n.contains("2.5") || n.contains("3."))
1242 {
1243 Some(2_000_000) } else if n.contains("deepseek-v4-pro")
1245 || n.contains("deepseek-v4.5")
1246 || n.contains("deepseek-v5")
1247 || (n.contains("deepseek") && n.contains("pro"))
1248 {
1249 Some(1_000_000) } else if n.contains("deepseek-v4")
1251 || n.contains("deepseek-r1")
1252 || n.contains("deepseek-reasoner")
1253 {
1254 Some(131_072) } else if n.contains("deepseek-chat")
1256 || n.contains("deepseek-coder")
1257 || n.contains("deepseek-v3")
1258 {
1259 Some(65_536) } else if n.contains("qwen3") && (n.contains("235b") || n.contains("max") || n.contains("plus"))
1261 {
1262 Some(1_000_000) } else if n.contains("qwen") && (n.contains("turbo") || n.contains("plus") || n.contains("max"))
1264 {
1265 Some(1_000_000) } else if n.contains("qwen3") || n.contains("qwen2.5") {
1267 Some(131_072)
1268 } else if n.contains("gpt-4.1")
1269 || n.contains("gpt-5")
1270 || n.contains("o1")
1271 || n.contains("o3")
1272 || n.contains("o4")
1273 {
1274 Some(1_000_000) } else if n.contains("gpt-4o") || n.contains("gpt-4-turbo") {
1276 Some(128_000)
1277 } else if n.contains("claude-sonnet-4")
1278 || n.contains("claude-opus-4")
1279 || n.contains("claude-haiku-4")
1280 {
1281 Some(200_000)
1282 } else if n.contains("claude-3.5") || n.contains("claude-3-5") {
1283 Some(200_000)
1284 } else if n.contains("claude") {
1285 Some(200_000)
1286 } else if n.contains("llama-3.3") || n.contains("llama3.3") || n.contains("llama-4") {
1287 Some(128_000)
1288 } else if n.contains("mistral-large") || n.contains("mixtral") || n.contains("codestral") {
1289 Some(128_000)
1290 } else if n.contains("grok-3") || n.contains("grok-4") {
1291 Some(131_072)
1292 } else if n.contains("kimi-k2") || n.contains("moonshot-v1-128k") {
1293 Some(200_000)
1294 } else if n.contains("kimi") || n.contains("moonshot") {
1295 Some(128_000)
1296 } else if n.contains("minimax-m") || n.contains("minimax-text-01") {
1297 Some(1_000_000)
1298 } else if n.contains("step-3") || n.contains("stepfun") {
1299 Some(131_072)
1300 } else if n.contains("glm-4.6") || n.contains("glm-5") {
1301 Some(200_000)
1302 } else if n.contains("glm-4") || n.contains("glm-z1") {
1303 Some(131_072)
1304 } else if n.contains("longctx") || n.contains("long-ctx") || n.contains("1m") {
1305 Some(1_000_000)
1306 } else if n.contains("128k") {
1307 Some(128_000)
1308 } else if n.contains("200k") {
1309 Some(200_000)
1310 } else if n.contains("256k") {
1311 Some(262_144)
1312 } else if n.contains("512k") {
1313 Some(524_288)
1314 } else {
1315 None
1316 };
1317
1318 let (latency, context_window) = if let Some(ctx) = vendor_ctx {
1319 let lat = if is_small {
1320 LatencyClass::Fast
1321 } else if is_large || ctx >= 200_000 {
1322 LatencyClass::Slow
1323 } else {
1324 LatencyClass::Medium
1325 };
1326 (lat, ctx)
1327 } else if is_large {
1328 (LatencyClass::Slow, 131_072)
1329 } else if is_small {
1330 (LatencyClass::Fast, 32_768)
1331 } else {
1332 (LatencyClass::Medium, 65_536)
1333 };
1334
1335 let max_output = (context_window / 8).clamp(4_096, 32_000);
1338
1339 ModelCaps {
1340 context_window,
1341 max_output,
1342 tools,
1343 vision,
1344 cost_input_per_mtok: 0.0,
1345 cost_output_per_mtok: 0.0,
1346 latency,
1347 }
1348}
1349
1350pub fn model_caps(provider_id: &str, model_name: &str) -> ModelCaps {
1351 let Some(model) = find_model(provider_id, model_name) else {
1352 return infer_caps_from_name(model_name);
1354 };
1355
1356 let latency = if model.tags.iter().any(|t| t == "fast") {
1357 LatencyClass::Fast
1358 } else if model.tags.iter().any(|t| t == "strong" || t == "reasoning") {
1359 LatencyClass::Slow
1360 } else {
1361 LatencyClass::Medium
1362 };
1363
1364 ModelCaps {
1365 context_window: model.context_window,
1366 max_output: model.context_window.min(32_000).max(4_096),
1367 tools: model
1368 .tags
1369 .iter()
1370 .any(|t| t == "tool_support" || t == "code"),
1371 vision: model.tags.iter().any(|t| t == "vision"),
1372 cost_input_per_mtok: model.cost_input_per_mtok,
1373 cost_output_per_mtok: model.cost_output_per_mtok,
1374 latency,
1375 }
1376}
1377
1378pub fn onboarding_providers() -> Vec<ProviderDef> {
1379 provider_registry()
1380}