vtcode_config/models/model_id.rs
1use serde::{Deserialize, Serialize};
2
3mod as_str;
4mod capabilities;
5mod collection;
6mod defaults;
7mod description;
8mod display;
9mod format;
10mod openrouter;
11mod parse;
12mod provider;
13
14pub use capabilities::{
15 ModelCatalogEntry, ModelPricing, catalog_provider_keys, model_catalog_entry,
16 supported_models_for_provider,
17};
18
19/// Centralized enum for all supported model identifiers
20#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
21#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
22pub enum ModelId {
23 // Gemini models
24 /// Gemini 3.1 Pro Preview - Latest Gemini 3.1 Pro flagship
25 Gemini31ProPreview,
26 /// Gemini 3.1 Pro Preview Custom Tools - Optimized for custom tools & bash
27 Gemini31ProPreviewCustomTools,
28 /// Gemini 3.1 Flash Lite Preview - Most cost-efficient model, offering fastest performance for high-frequency, lightweight tasks
29 Gemini31FlashLitePreview,
30 /// Gemini 3.5 Flash - High-efficiency frontier model for fast inference
31 #[default]
32 Gemini35Flash,
33
34 // OpenAI models
35 /// GPT-5.5 - Next-gen OpenAI model dated release (2026-04-23)
36 GPT55,
37 /// GPT-5.4 - Mainline frontier GPT model for general-purpose and coding work
38 GPT54,
39 /// GPT-5.4 Pro - Higher-compute GPT-5.4 variant for difficult problems
40 GPT54Pro,
41 /// GPT-5.4 Nano - Lightweight GPT-5.4 variant optimized for speed and cost-efficiency
42 GPT54Nano,
43 /// GPT-5.4 Mini - Compact GPT-5.4 variant for cost-effective tasks with reduced reasoning overhead
44 GPT54Mini,
45 /// GPT-5.3 Codex - Code-focused GPT-5.3 variant optimized for agentic coding with reasoning effort support (low, medium, high, xhigh)
46 GPT53Codex,
47 /// GPT-OSS 20B - OpenAI's open-source 20B parameter model using harmony
48 OpenAIGptOss20b,
49 /// GPT-OSS 120B - OpenAI's open-source 120B parameter model using harmony
50 OpenAIGptOss120b,
51
52 // Anthropic models
53 /// Claude Opus 4.8 - Anthropic's most capable model for complex reasoning and agentic coding
54 ClaudeOpus48,
55 /// Claude Sonnet 4.6 - Balanced flagship Anthropic model in VT Code's conservative rollout
56 ClaudeSonnet46,
57 /// Claude Haiku 4.5 - Latest efficient Anthropic model (2025-10-15)
58 ClaudeHaiku45,
59 /// Claude Mythos Preview - Invitation-only Anthropic research preview for defensive cybersecurity workflows
60 ClaudeMythosPreview,
61 /// GitHub Copilot auto model selection
62 CopilotAuto,
63 /// GitHub Copilot GPT-5.2 Codex
64 CopilotGPT52Codex,
65 /// GitHub Copilot GPT-5.1 Codex Max
66 CopilotGPT51CodexMax,
67 /// GitHub Copilot GPT-5.4
68 CopilotGPT54,
69 /// GitHub Copilot GPT-5.4 Mini
70 CopilotGPT54Mini,
71 /// GitHub Copilot Claude Sonnet 4.6
72 CopilotClaudeSonnet46,
73
74 // DeepSeek models
75 /// DeepSeek V4 Pro - High-performance reasoning model with advanced thinking
76 DeepSeekV4Pro,
77 /// DeepSeek V4 Flash - Fast inference model for cost-effective reasoning
78 DeepSeekV4Flash,
79
80 // Mistral AI models
81 /// Mistral Large 3 - State-of-the-art open-weight general-purpose multimodal model
82 MistralLarge3,
83 // Hugging Face models
84 /// OpenAI GPT-OSS 20B via Hugging Face router
85 HuggingFaceOpenAIGptOss20b,
86 /// OpenAI GPT-OSS 120B via Hugging Face router
87 HuggingFaceOpenAIGptOss120b,
88 /// MiniMax M2.5 via Novita on Hugging Face router
89 HuggingFaceMinimaxM25Novita,
90 /// Z.AI GLM-5 via Novita on Hugging Face router
91 HuggingFaceGlm5Novita,
92 /// Z.AI GLM-5.1 via zai-org provider on Hugging Face router
93 HuggingFaceGlm51ZaiOrg,
94 /// Qwen3-Coder-Next via Novita inference provider on Hugging Face router
95 HuggingFaceQwen3CoderNextNovita,
96 /// Qwen3.5-397B-A17B via Together inference provider on Hugging Face router
97 HuggingFaceQwen35397BA17BTogether,
98 /// Kimi K2.6 via Novita on Hugging Face router
99 HuggingFaceKimiK26Novita,
100 /// DeepSeek V4 Flash via Novita on Hugging Face router
101 HuggingFaceDeepseekV4FlashNovita,
102 /// DeepSeek V4 Pro via Together on Hugging Face router
103 HuggingFaceDeepseekV4ProTogether,
104 /// Step 3.5 Flash via Hugging Face router
105 HuggingFaceStep35Flash,
106 /// Z.AI GLM-5.1 via DeepInfra inference provider on Hugging Face router
107 HuggingFaceGlm51Deepinfra,
108 /// MiniMax M2.7 via Novita on Hugging Face router
109 HuggingFaceMinimaxM27Novita,
110 /// DeepSeek V4 Pro via Novita on Hugging Face router
111 HuggingFaceDeepseekV4ProNovita,
112
113 // StepFun models
114 /// Step 3.7 Flash - StepFun's flagship multimodal reasoning model with tool calling
115 StepFun37Flash,
116
117 /// GLM-5 - Flagship Z.ai foundation model for complex systems
118 ZaiGlm5,
119 /// GLM-5.1 - Next-gen Z.ai foundation model with improved reasoning
120 ZaiGlm51,
121
122 // MiMo models
123 /// MiMo V2.5 Pro - Xiaomi's flagship reasoning model with 1M context
124 MiMoV25Pro,
125 /// MiMo V2.5 - Xiaomi's omni-modal model with full-modal understanding and 1M context
126 MiMoV25,
127
128 // Moonshot models
129 /// Kimi K2.6 - Moonshot.ai's latest 1T MoE flagship (32B active, MLA, MoonViT vision)
130 MoonshotKimiK26,
131
132 // OpenCode Zen models
133 /// GPT-5.4 - OpenCode Zen default flagship model
134 OpenCodeZenGPT54,
135 /// GPT-5.4 Mini - Lower-cost OpenCode Zen GPT option
136 OpenCodeZenGPT54Mini,
137 /// Claude Sonnet 4.6 - Anthropic-backed OpenCode Zen coding model
138 OpenCodeZenClaudeSonnet46,
139 /// GLM-5.1 - Z.AI model served through OpenCode Zen
140 OpenCodeZenGlm51,
141
142 // OpenCode Go models
143 /// GLM-5.1 - Z.AI model included with OpenCode Go
144 OpenCodeGoGlm51,
145 /// MiniMax M2.5 - OpenCode Go subscription model
146 OpenCodeGoMinimaxM25,
147 /// MiniMax M2.7 - Higher-tier OpenCode Go subscription model
148 OpenCodeGoMinimaxM27,
149
150 // Qwen models
151 /// Qwen 3.7 Max - Alibaba Cloud's flagship reasoning model with 131K context
152 Qwen37Max,
153 /// Qwen 3.6 Flash - Alibaba Cloud's fast inference model with 1M context
154 Qwen36Flash,
155 /// Qwen 3.6 Plus - Alibaba Cloud's balanced model with 131K context
156 Qwen36Plus,
157 /// DeepSeek V4 Flash via Qwen Cloud API
158 QwenDeepSeekV4Flash,
159 /// DeepSeek V4 Pro via Qwen Cloud API
160 QwenDeepSeekV4Pro,
161 /// GLM-5.1 via Qwen Cloud API
162 QwenGlm51,
163
164 // Ollama models
165 /// GPT-OSS 20B - Open-weight GPT-OSS 20B model served via Ollama locally
166 OllamaGptOss20b,
167 /// GPT-OSS 20B Cloud - Cloud-hosted GPT-OSS 20B served via Ollama Cloud
168 OllamaGptOss20bCloud,
169 /// GPT-OSS 120B Cloud - Cloud-hosted GPT-OSS 120B served via Ollama Cloud
170 OllamaGptOss120bCloud,
171 /// Qwen3 1.7B - Qwen3 1.7B model served via Ollama
172 OllamaQwen317b,
173 /// Qwen3 Coder Next - Coding-optimized Qwen3 Next model served via Ollama locally
174 OllamaQwen3CoderNext,
175 /// DeepSeek V4 Flash Cloud - Fast inference DeepSeek V4 Flash model via Ollama Cloud
176 OllamaDeepseekV4FlashCloud,
177 /// DeepSeek V4 Pro Cloud - High-performance DeepSeek V4 Pro model via Ollama Cloud
178 OllamaDeepseekV4ProCloud,
179 /// Qwen3 Next 80B Cloud - Next-generation Qwen3 80B via Ollama Cloud
180 OllamaQwen3Next80bCloud,
181 /// MiniMax-M2 Cloud - Cloud-hosted MiniMax-M2 model served via Ollama Cloud
182 OllamaMinimaxM2Cloud,
183 /// MiniMax-M2.7 Cloud - Cloud-hosted MiniMax-M2.7 model served via Ollama Cloud
184 OllamaMinimaxM27Cloud,
185 /// GLM-5 Cloud - Cloud-hosted GLM-5 model served via Ollama Cloud
186 OllamaGlm5Cloud,
187 /// GLM-5.1 Cloud - Cloud-hosted GLM-5.1 model served via Ollama Cloud
188 OllamaGlm51Cloud,
189 /// MiniMax-M2.5 Cloud - Cloud-hosted MiniMax-M2.5 model served via Ollama Cloud
190 OllamaMinimaxM25Cloud,
191 /// Gemini 3 Flash Preview Cloud - Google Gemini 3 Flash Preview via Ollama Cloud
192 OllamaGemini3FlashPreviewCloud,
193 /// Kimi K2.6 Cloud - Moonshot Kimi K2.6 via Ollama Cloud
194 OllamaKimiK26Cloud,
195 /// Nemotron 3 Super Cloud - NVIDIA Nemotron 3 Super 120B via Ollama Cloud
196 OllamaNemotron3SuperCloud,
197 /// Laguna XS.2 - Poolside's 33B MoE model (3B activated) for agentic coding via Ollama
198 OllamaLagunaXs2,
199
200 // llama.cpp models
201 /// Qwen 3.6 27B - Dense Qwen 3.6 local model served through llama.cpp
202 LlamaCppQwen3627b,
203 /// Qwen 3.6 35B A3B - MoE Qwen 3.6 local model served through llama.cpp
204 LlamaCppQwen3635bA3b,
205 /// Gemma 4 26B A4B - Desktop Gemma 4 MoE model served through llama.cpp
206 LlamaCppGemma426bA4b,
207 /// Gemma 4 E4B - Tiny-footprint Gemma 4 model served through llama.cpp
208 LlamaCppGemma4E4b,
209 /// GPT-OSS 20B - OpenAI open-weight model served through llama.cpp
210 LlamaCppGptOss20b,
211 /// Step 3.5 Flash - StepFun local model served through llama.cpp
212 LlamaCppStep35Flash,
213
214 // MiniMax models
215 /// MiniMax-M2.7 - Recursive self-improvement flagship with 204.8K context
216 MinimaxM27,
217 /// MiniMax-M2.5 - Latest MiniMax model with further improvements in reasoning and coding
218 MinimaxM25,
219
220 // OpenRouter models
221 /// Qwen3 32B - Dense 32B Qwen3 deployment
222 OpenRouterQwen332b,
223 /// Qwen3 30B A3B - Active-parameter 30B Qwen3 model
224 OpenRouterQwen330bA3b,
225 /// Qwen3 30B A3B Instruct 2507 - Instruction-tuned Qwen3 30B A3B
226 OpenRouterQwen330bA3bInstruct2507,
227 /// Qwen3 30B A3B Thinking 2507 - Deliberative Qwen3 30B A3B release
228 OpenRouterQwen330bA3bThinking2507,
229 /// Qwen3 14B - Lightweight Qwen3 14B model
230 OpenRouterQwen314b,
231 /// Qwen3 8B - Compact Qwen3 8B deployment
232 OpenRouterQwen38b,
233 /// Qwen3 Next 80B A3B Instruct - Next-generation Qwen3 instruction model
234 OpenRouterQwen3Next80bA3bInstruct,
235 /// Qwen3 Next 80B A3B Thinking - Next-generation Qwen3 reasoning release
236 OpenRouterQwen3Next80bA3bThinking,
237 /// Qwen3.5-397B-A17B - Native vision-language model with linear attention and sparse MoE, 1M context window
238 OpenRouterQwen35Plus0215,
239 /// Qwen3 Coder - Qwen3-based coding model tuned for IDE workflows
240 OpenRouterQwen3Coder,
241 /// Qwen3 Coder Plus - Premium Qwen3 coding model with long context
242 OpenRouterQwen3CoderPlus,
243 /// Qwen3 Coder Flash - Latency optimised Qwen3 coding model
244 OpenRouterQwen3CoderFlash,
245 /// Qwen3 Coder 30B A3B Instruct - Large Mixture-of-Experts coding deployment
246 OpenRouterQwen3Coder30bA3bInstruct,
247 /// Qwen3 Coder Next - Next-generation Qwen3 coding model with enhanced reasoning
248 OpenRouterQwen3CoderNext,
249 /// DeepSeek V4 Pro - High-performance reasoning model via OpenRouter
250 OpenRouterDeepSeekV4Pro,
251 /// DeepSeek V4 Flash - Fast inference model via OpenRouter
252 OpenRouterDeepSeekV4Flash,
253 /// DeepSeek R1 - DeepSeek R1 reasoning model with chain-of-thought
254 OpenRouterDeepSeekR1,
255 /// OpenAI gpt-oss-120b - Open-weight 120B reasoning model via OpenRouter
256 OpenRouterOpenAIGptOss120b,
257 /// OpenAI gpt-oss-120b:free - Open-weight 120B reasoning model free tier via OpenRouter
258 OpenRouterOpenAIGptOss120bFree,
259 /// OpenAI gpt-oss-20b - Open-weight 20B deployment via OpenRouter
260 OpenRouterOpenAIGptOss20b,
261 /// OpenAI GPT-5 - OpenAI GPT-5 model accessed through OpenRouter
262 OpenRouterOpenAIGpt5,
263 /// OpenAI GPT-5.5 - OpenAI GPT-5.5 model accessed through OpenRouter
264 OpenRouterOpenAIGpt55,
265 /// OpenAI GPT-5 Chat - Chat optimised GPT-5 endpoint without tool use
266 OpenRouterOpenAIGpt5Chat,
267
268 /// Gemini 3.1 Pro Preview - Google's latest Gemini 3.1 Pro model via OpenRouter
269 OpenRouterGoogleGemini31ProPreview,
270
271 /// Claude Sonnet 4.6 - Anthropic Claude Sonnet 4.6 listing
272 OpenRouterAnthropicClaudeSonnet46,
273 /// Claude Haiku 4.5 - Anthropic Claude Haiku 4.5 listing
274 OpenRouterAnthropicClaudeHaiku45,
275 /// Mistral Large 3 2512 - Mistral Large 3 2512 model via OpenRouter
276 OpenRouterMistralaiMistralLarge2512,
277 /// DeepSeek V3.1 Nex N1 - Nex AGI DeepSeek V3.1 Nex N1 model via OpenRouter
278 OpenRouterNexAgiDeepseekV31NexN1,
279 /// Step 3.5 Flash (free) - StepFun's most capable open-source reasoning model via OpenRouter
280 OpenRouterStepfunStep35FlashFree,
281 /// Nemotron 3 Super (free) - NVIDIA's 120B hybrid MoE model via OpenRouter
282 OpenRouterNvidiaNemotron3Super120bA12bFree,
283 /// GLM-5 - Z.AI GLM-5 flagship foundation model via OpenRouter
284 OpenRouterZaiGlm5,
285 /// GLM-5.1 - Z.AI GLM-5.1 next-gen foundation model via OpenRouter
286 OpenRouterZaiGlm51,
287 /// MiniMax-M2.5 - MiniMax flagship model via OpenRouter
288 OpenRouterMinimaxM25,
289 /// Kimi K2.6 - Moonshot AI's next-generation multimodal model via OpenRouter
290 OpenRouterMoonshotaiKimiK26,
291 /// Qwen3.7 Max - Alibaba's flagship Qwen3.7 model for coding and agentic workloads via OpenRouter
292 OpenRouterQwenQwen37Max,
293 /// Hy3 Preview - Tencent's high-efficiency MoE model for agentic workflows via OpenRouter
294 OpenRouterTencentHy3Preview,
295 /// Grok Build 0.1 - xAI's fast coding model for agentic software engineering via OpenRouter
296 OpenRouterXAiGrokBuild01,
297 /// MiMo-V2.5-Pro - Xiaomi's flagship agentic model for complex software engineering via OpenRouter
298 OpenRouterXiaomiMimoV25Pro,
299 /// Laguna XS.2 (free) - Poolside's efficient free coding agent model via OpenRouter
300 OpenRouterPoolsideLagunaXs2Free,
301 /// Laguna M.1 (free) - Poolside's flagship free coding agent model via OpenRouter
302 OpenRouterPoolsideLagunaM1Free,
303
304 // Poolside models
305 /// Laguna M.1 - Poolside's flagship MoE coding agent model
306 PoolsideLagunaM1,
307 /// Laguna XS.2 - Poolside's efficient MoE coding agent model
308 PoolsideLagunaXs2,
309}