1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
use serde::{Deserialize, Serialize};
mod as_str;
mod capabilities;
mod collection;
mod defaults;
mod description;
mod display;
mod format;
mod openrouter;
mod parse;
mod provider;
pub use capabilities::{
ModelCatalogEntry, ModelPricing, catalog_provider_keys, model_catalog_entry,
supported_models_for_provider,
};
/// Centralized enum for all supported model identifiers
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum ModelId {
// Gemini models
/// Gemini 3.1 Pro Preview - Latest Gemini 3.1 Pro flagship
Gemini31ProPreview,
/// Gemini 3.1 Pro Preview Custom Tools - Optimized for custom tools & bash
Gemini31ProPreviewCustomTools,
/// Gemini 3.1 Flash Lite Preview - Most cost-efficient model, offering fastest performance for high-frequency, lightweight tasks
Gemini31FlashLitePreview,
/// Gemini 3 Flash Preview - Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding
#[default]
Gemini3FlashPreview,
// OpenAI models
/// GPT-5 - Latest most capable OpenAI model (2025-08-07)
GPT5,
/// GPT-5.2 - Latest flagship general-purpose OpenAI model (2025-12-11)
GPT52,
/// GPT-5.2 Codex - Code-focused GPT-5.2 variant optimized for agentic coding with xhigh reasoning support
GPT52Codex,
/// GPT-5.4 - Mainline frontier GPT model for general-purpose and coding work
GPT54,
/// GPT-5.4 Pro - Higher-compute GPT-5.4 variant for difficult problems
GPT54Pro,
/// GPT-5.4 Nano - Lightweight GPT-5.4 variant optimized for speed and cost-efficiency
GPT54Nano,
/// GPT-5.4 Mini - Compact GPT-5.4 variant for cost-effective tasks with reduced reasoning overhead
GPT54Mini,
/// GPT-5.3 Codex - Code-focused GPT-5.3 variant optimized for agentic coding with reasoning effort support (low, medium, high, xhigh)
GPT53Codex,
/// GPT-5.1 Codex - Code-focused GPT-5.1 variant optimized for agentic coding
GPT51Codex,
/// GPT-5.1 Codex Max - Higher-compute GPT-5.1 Codex variant for longer-running engineering tasks
GPT51CodexMax,
/// GPT-5 Mini - Latest efficient OpenAI model (2025-08-07)
GPT5Mini,
/// GPT-5 Nano - Latest most cost-effective OpenAI model (2025-08-07)
GPT5Nano,
/// GPT-5 Codex - Code-focused GPT-5 variant optimized for agentic coding
GPT5Codex,
/// GPT-OSS 20B - OpenAI's open-source 20B parameter model using harmony
OpenAIGptOss20b,
/// GPT-OSS 120B - OpenAI's open-source 120B parameter model using harmony
OpenAIGptOss120b,
// Anthropic models
/// Claude Opus 4.6 - Next-gen flagship Anthropic model with extended thinking
ClaudeOpus46,
/// Claude Sonnet 4.6 - Balanced flagship Anthropic model with extended thinking
ClaudeSonnet46,
/// Claude Haiku 4.5 - Latest efficient Anthropic model (2025-10-15)
ClaudeHaiku45,
/// GitHub Copilot auto model selection
CopilotAuto,
/// GitHub Copilot GPT-5.2 Codex
CopilotGPT52Codex,
/// GitHub Copilot GPT-5.1 Codex Max
CopilotGPT51CodexMax,
/// GitHub Copilot GPT-5.4
CopilotGPT54,
/// GitHub Copilot GPT-5.4 Mini
CopilotGPT54Mini,
/// GitHub Copilot Claude Sonnet 4.6
CopilotClaudeSonnet46,
// DeepSeek models
/// DeepSeek V3.2 Chat - Fast non-thinking mode
DeepSeekChat,
/// DeepSeek V3.2 Reasoner - Thinking mode with structured reasoning output
DeepSeekReasoner,
// Hugging Face models
/// DeepSeek V3.2 via Hugging Face router
HuggingFaceDeepseekV32,
/// OpenAI GPT-OSS 20B via Hugging Face router
HuggingFaceOpenAIGptOss20b,
/// OpenAI GPT-OSS 120B via Hugging Face router
HuggingFaceOpenAIGptOss120b,
/// DeepSeek V3.2 via Novita on Hugging Face router
HuggingFaceDeepseekV32Novita,
/// Xiaomi MiMo-V2-Flash via Novita on Hugging Face router
HuggingFaceXiaomiMimoV2FlashNovita,
/// MiniMax M2.5 via Novita on Hugging Face router
HuggingFaceMinimaxM25Novita,
/// Z.AI GLM-5 via Novita on Hugging Face router
HuggingFaceGlm5Novita,
/// Z.AI GLM-5.1 via zai-org provider on Hugging Face router
HuggingFaceGlm51ZaiOrg,
/// Qwen3-Coder-Next via Novita inference provider on Hugging Face router
HuggingFaceQwen3CoderNextNovita,
/// Qwen3.5-397B-A17B via Together inference provider on Hugging Face router
HuggingFaceQwen35397BA17BTogether,
/// Step 3.5 Flash via Hugging Face router
HuggingFaceStep35Flash,
/// GLM-5 - Flagship Z.ai foundation model for complex systems
ZaiGlm5,
/// GLM-5.1 - Next-gen Z.ai foundation model with improved reasoning
ZaiGlm51,
// Moonshot models
/// Kimi K2.5 - Moonshot.ai's flagship reasoning model
MoonshotKimiK25,
// Ollama models
/// GPT-OSS 20B - Open-weight GPT-OSS 20B model served via Ollama locally
OllamaGptOss20b,
/// GPT-OSS 20B Cloud - Cloud-hosted GPT-OSS 20B served via Ollama Cloud
OllamaGptOss20bCloud,
/// GPT-OSS 120B Cloud - Cloud-hosted GPT-OSS 120B served via Ollama Cloud
OllamaGptOss120bCloud,
/// Qwen3 1.7B - Qwen3 1.7B model served via Ollama
OllamaQwen317b,
/// Qwen3 Coder Next - Coding-optimized Qwen3 Next model served via Ollama locally
OllamaQwen3CoderNext,
/// DeepSeek V3.2 Cloud - DeepSeek V3.2 reasoning deployment via Ollama Cloud
OllamaDeepseekV32Cloud,
/// Qwen3 Next 80B Cloud - Next-generation Qwen3 80B via Ollama Cloud
OllamaQwen3Next80bCloud,
/// MiniMax-M2 Cloud - Cloud-hosted MiniMax-M2 model served via Ollama Cloud
OllamaMinimaxM2Cloud,
/// MiniMax-M2.7 Cloud - Cloud-hosted MiniMax-M2.7 model served via Ollama Cloud
OllamaMinimaxM27Cloud,
/// GLM-5 Cloud - Cloud-hosted GLM-5 model served via Ollama Cloud
OllamaGlm5Cloud,
/// GLM-5.1 Cloud - Cloud-hosted GLM-5.1 model served via Ollama Cloud
OllamaGlm51Cloud,
/// MiniMax-M2.5 Cloud - Cloud-hosted MiniMax-M2.5 model served via Ollama Cloud
OllamaMinimaxM25Cloud,
/// Gemini 3 Flash Preview Cloud - Google Gemini 3 Flash Preview via Ollama Cloud
OllamaGemini3FlashPreviewCloud,
/// Nemotron 3 Super Cloud - NVIDIA Nemotron 3 Super 120B via Ollama Cloud
OllamaNemotron3SuperCloud,
// MiniMax models
/// MiniMax-M2.7 - Recursive self-improvement flagship with 204.8K context
MinimaxM27,
/// MiniMax-M2.5 - Latest MiniMax model with further improvements in reasoning and coding
MinimaxM25,
// OpenRouter models
/// Qwen3 32B - Dense 32B Qwen3 deployment
OpenRouterQwen332b,
/// Qwen3 30B A3B - Active-parameter 30B Qwen3 model
OpenRouterQwen330bA3b,
/// Qwen3 30B A3B Instruct 2507 - Instruction-tuned Qwen3 30B A3B
OpenRouterQwen330bA3bInstruct2507,
/// Qwen3 30B A3B Thinking 2507 - Deliberative Qwen3 30B A3B release
OpenRouterQwen330bA3bThinking2507,
/// Qwen3 14B - Lightweight Qwen3 14B model
OpenRouterQwen314b,
/// Qwen3 8B - Compact Qwen3 8B deployment
OpenRouterQwen38b,
/// Qwen3 Next 80B A3B Instruct - Next-generation Qwen3 instruction model
OpenRouterQwen3Next80bA3bInstruct,
/// Qwen3 Next 80B A3B Thinking - Next-generation Qwen3 reasoning release
OpenRouterQwen3Next80bA3bThinking,
/// Qwen3.5-397B-A17B - Native vision-language model with linear attention and sparse MoE, 1M context window
OpenRouterQwen35Plus0215,
/// Qwen3 Coder - Qwen3-based coding model tuned for IDE workflows
OpenRouterQwen3Coder,
/// Qwen3 Coder Plus - Premium Qwen3 coding model with long context
OpenRouterQwen3CoderPlus,
/// Qwen3 Coder Flash - Latency optimised Qwen3 coding model
OpenRouterQwen3CoderFlash,
/// Qwen3 Coder 30B A3B Instruct - Large Mixture-of-Experts coding deployment
OpenRouterQwen3Coder30bA3bInstruct,
/// Qwen3 Coder Next - Next-generation Qwen3 coding model with enhanced reasoning
OpenRouterQwen3CoderNext,
/// DeepSeek V3.2 Chat - Official chat model via OpenRouter
OpenRouterDeepseekChat,
/// DeepSeek V3.2 - Standard model with thinking support via OpenRouter
OpenRouterDeepSeekV32,
/// DeepSeek V3.2 Reasoner - Thinking mode via OpenRouter
OpenRouterDeepseekReasoner,
/// DeepSeek V3.2 Speciale - Enhanced reasoning model (no tool-use)
OpenRouterDeepSeekV32Speciale,
/// DeepSeek V3.2 Exp - Experimental DeepSeek V3.2 listing
OpenRouterDeepSeekV32Exp,
/// DeepSeek Chat v3.1 - Advanced DeepSeek model via OpenRouter
OpenRouterDeepSeekChatV31,
/// DeepSeek R1 - DeepSeek R1 reasoning model with chain-of-thought
OpenRouterDeepSeekR1,
/// OpenAI gpt-oss-120b - Open-weight 120B reasoning model via OpenRouter
OpenRouterOpenAIGptOss120b,
/// OpenAI gpt-oss-120b:free - Open-weight 120B reasoning model free tier via OpenRouter
OpenRouterOpenAIGptOss120bFree,
/// OpenAI gpt-oss-20b - Open-weight 20B deployment via OpenRouter
OpenRouterOpenAIGptOss20b,
/// OpenAI GPT-5 - OpenAI GPT-5 model accessed through OpenRouter
OpenRouterOpenAIGpt5,
/// OpenAI GPT-5 Chat - Chat optimised GPT-5 endpoint without tool use
OpenRouterOpenAIGpt5Chat,
/// Gemini 3.1 Pro Preview - Google's latest Gemini 3.1 Pro model via OpenRouter
OpenRouterGoogleGemini31ProPreview,
/// Claude Sonnet 4.5 - Anthropic Claude Sonnet 4.5 listing
OpenRouterAnthropicClaudeSonnet45,
/// Claude Sonnet 4.6 - Anthropic Claude Sonnet 4.6 listing
OpenRouterAnthropicClaudeSonnet46,
/// Claude Haiku 4.5 - Anthropic Claude Haiku 4.5 listing
OpenRouterAnthropicClaudeHaiku45,
/// Mistral Large 3 2512 - Mistral Large 3 2512 model via OpenRouter
OpenRouterMistralaiMistralLarge2512,
/// DeepSeek V3.1 Nex N1 - Nex AGI DeepSeek V3.1 Nex N1 model via OpenRouter
OpenRouterNexAgiDeepseekV31NexN1,
/// Step 3.5 Flash (free) - StepFun's most capable open-source reasoning model via OpenRouter
OpenRouterStepfunStep35FlashFree,
/// Nemotron 3 Super (free) - NVIDIA's 120B hybrid MoE model via OpenRouter
OpenRouterNvidiaNemotron3Super120bA12bFree,
/// GLM-5 - Z.AI GLM-5 flagship foundation model via OpenRouter
OpenRouterZaiGlm5,
/// GLM-5.1 - Z.AI GLM-5.1 next-gen foundation model via OpenRouter
OpenRouterZaiGlm51,
/// MiniMax-M2.5 - MiniMax flagship model via OpenRouter
OpenRouterMinimaxM25,
}