infiniloom_engine/tokenizer/models.rs
1//! Token model definitions for various LLM providers
2//!
3//! This module defines the supported LLM models and their tokenizer properties.
4
5/// Supported LLM models for token counting
6///
7/// Models are grouped by their tokenizer encoding family. Use [`TokenModel::from_model_name`]
8/// to parse user-friendly model names like "gpt-5.2", "o3", "claude-sonnet", etc.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
10pub enum TokenModel {
11 // =========================================================================
12 // OpenAI Models - o200k_base encoding (EXACT tokenization)
13 // =========================================================================
14 /// GPT-5.2 - Latest flagship model (Dec 2025), uses o200k_base
15 Gpt52,
16 /// GPT-5.2 Pro - Enhanced GPT-5.2 variant, uses o200k_base
17 Gpt52Pro,
18 /// GPT-5.1 - Previous flagship (Nov 2025), uses o200k_base
19 Gpt51,
20 /// GPT-5.1 Mini - Smaller GPT-5.1 variant, uses o200k_base
21 Gpt51Mini,
22 /// GPT-5.1 Codex - Code-specialized variant, uses o200k_base
23 Gpt51Codex,
24 /// GPT-5 - Original GPT-5 (Aug 2025), uses o200k_base
25 Gpt5,
26 /// GPT-5 Mini - Smaller GPT-5 variant, uses o200k_base
27 Gpt5Mini,
28 /// GPT-5 Nano - Smallest GPT-5 variant, uses o200k_base
29 Gpt5Nano,
30 /// O4 Mini - Latest reasoning model, uses o200k_base
31 O4Mini,
32 /// O3 - Reasoning model, uses o200k_base
33 O3,
34 /// O3 Mini - Smaller O3 variant, uses o200k_base
35 O3Mini,
36 /// O1 - Original reasoning model, uses o200k_base
37 O1,
38 /// O1 Mini - Smaller O1 variant, uses o200k_base
39 O1Mini,
40 /// O1 Preview - O1 preview version, uses o200k_base
41 O1Preview,
42 /// GPT-4o - Omni model, uses o200k_base encoding (most efficient)
43 Gpt4o,
44 /// GPT-4o Mini - Smaller GPT-4o variant, uses o200k_base encoding
45 Gpt4oMini,
46
47 // =========================================================================
48 // OpenAI Models - cl100k_base encoding (EXACT tokenization, legacy)
49 // =========================================================================
50 /// GPT-4/GPT-4 Turbo - uses cl100k_base encoding (legacy)
51 Gpt4,
52 /// GPT-3.5-turbo - uses cl100k_base encoding (legacy)
53 Gpt35Turbo,
54
55 // =========================================================================
56 // Anthropic Claude - Estimation (~3.5 chars/token)
57 // =========================================================================
58 /// Claude (all versions) - uses estimation based on ~3.5 chars/token
59 Claude,
60
61 // =========================================================================
62 // Google Gemini - Estimation (~3.8 chars/token)
63 // =========================================================================
64 /// Gemini (all versions including 3, 2.5, 1.5) - estimation ~3.8 chars/token
65 Gemini,
66
67 // =========================================================================
68 // Meta Llama - Estimation (~3.5 chars/token)
69 // =========================================================================
70 /// Llama 3/4 - estimation based on ~3.5 chars/token
71 Llama,
72 /// CodeLlama - more granular for code (~3.2 chars/token)
73 CodeLlama,
74
75 // =========================================================================
76 // Mistral AI - Estimation (~3.5 chars/token)
77 // =========================================================================
78 /// Mistral (Large, Medium, Small, Codestral) - estimation ~3.5 chars/token
79 Mistral,
80
81 // =========================================================================
82 // DeepSeek - Estimation (~3.5 chars/token)
83 // =========================================================================
84 /// DeepSeek (V3, R1, Coder) - estimation ~3.5 chars/token
85 DeepSeek,
86
87 // =========================================================================
88 // Qwen (Alibaba) - Estimation (~3.5 chars/token)
89 // =========================================================================
90 /// Qwen (Qwen3, Qwen2.5) - estimation ~3.5 chars/token
91 Qwen,
92
93 // =========================================================================
94 // Cohere - Estimation (~3.6 chars/token)
95 // =========================================================================
96 /// Cohere (Command R+, Command R) - estimation ~3.6 chars/token
97 Cohere,
98
99 // =========================================================================
100 // xAI Grok - Estimation (~3.5 chars/token)
101 // =========================================================================
102 /// Grok (Grok 2, Grok 3) - estimation ~3.5 chars/token
103 Grok,
104}
105
106impl TokenModel {
107 /// Get human-readable name
108 pub fn name(&self) -> &'static str {
109 match self {
110 // OpenAI o200k_base models
111 Self::Gpt52 => "gpt-5.2",
112 Self::Gpt52Pro => "gpt-5.2-pro",
113 Self::Gpt51 => "gpt-5.1",
114 Self::Gpt51Mini => "gpt-5.1-mini",
115 Self::Gpt51Codex => "gpt-5.1-codex",
116 Self::Gpt5 => "gpt-5",
117 Self::Gpt5Mini => "gpt-5-mini",
118 Self::Gpt5Nano => "gpt-5-nano",
119 Self::O4Mini => "o4-mini",
120 Self::O3 => "o3",
121 Self::O3Mini => "o3-mini",
122 Self::O1 => "o1",
123 Self::O1Mini => "o1-mini",
124 Self::O1Preview => "o1-preview",
125 Self::Gpt4o => "gpt-4o",
126 Self::Gpt4oMini => "gpt-4o-mini",
127 // OpenAI cl100k_base models (legacy)
128 Self::Gpt4 => "gpt-4",
129 Self::Gpt35Turbo => "gpt-3.5-turbo",
130 // Other vendors
131 Self::Claude => "claude",
132 Self::Gemini => "gemini",
133 Self::Llama => "llama",
134 Self::CodeLlama => "codellama",
135 Self::Mistral => "mistral",
136 Self::DeepSeek => "deepseek",
137 Self::Qwen => "qwen",
138 Self::Cohere => "cohere",
139 Self::Grok => "grok",
140 }
141 }
142
143 /// Get average characters per token (for estimation fallback)
144 pub fn chars_per_token(&self) -> f32 {
145 match self {
146 // OpenAI o200k_base models - most efficient encoding (~4.0 chars/token)
147 Self::Gpt52
148 | Self::Gpt52Pro
149 | Self::Gpt51
150 | Self::Gpt51Mini
151 | Self::Gpt51Codex
152 | Self::Gpt5
153 | Self::Gpt5Mini
154 | Self::Gpt5Nano
155 | Self::O4Mini
156 | Self::O3
157 | Self::O3Mini
158 | Self::O1
159 | Self::O1Mini
160 | Self::O1Preview
161 | Self::Gpt4o
162 | Self::Gpt4oMini => 4.0,
163 // OpenAI cl100k_base models (legacy) - slightly less efficient
164 Self::Gpt4 | Self::Gpt35Turbo => 3.7,
165 // Anthropic Claude
166 Self::Claude => 3.5,
167 // Google Gemini - slightly more verbose
168 Self::Gemini => 3.8,
169 // Meta Llama
170 Self::Llama => 3.5,
171 Self::CodeLlama => 3.2, // Code-focused, more granular
172 // Mistral AI
173 Self::Mistral => 3.5,
174 // DeepSeek
175 Self::DeepSeek => 3.5,
176 // Qwen (Alibaba)
177 Self::Qwen => 3.5,
178 // Cohere - slightly more verbose
179 Self::Cohere => 3.6,
180 // xAI Grok
181 Self::Grok => 3.5,
182 }
183 }
184
185 /// Whether this model has an exact tokenizer available (via tiktoken)
186 pub fn has_exact_tokenizer(&self) -> bool {
187 matches!(
188 self,
189 // All OpenAI models have exact tokenizers
190 Self::Gpt52
191 | Self::Gpt52Pro
192 | Self::Gpt51
193 | Self::Gpt51Mini
194 | Self::Gpt51Codex
195 | Self::Gpt5
196 | Self::Gpt5Mini
197 | Self::Gpt5Nano
198 | Self::O4Mini
199 | Self::O3
200 | Self::O3Mini
201 | Self::O1
202 | Self::O1Mini
203 | Self::O1Preview
204 | Self::Gpt4o
205 | Self::Gpt4oMini
206 | Self::Gpt4
207 | Self::Gpt35Turbo
208 )
209 }
210
211 /// Whether this model uses the o200k_base encoding
212 pub fn uses_o200k(&self) -> bool {
213 matches!(
214 self,
215 Self::Gpt52
216 | Self::Gpt52Pro
217 | Self::Gpt51
218 | Self::Gpt51Mini
219 | Self::Gpt51Codex
220 | Self::Gpt5
221 | Self::Gpt5Mini
222 | Self::Gpt5Nano
223 | Self::O4Mini
224 | Self::O3
225 | Self::O3Mini
226 | Self::O1
227 | Self::O1Mini
228 | Self::O1Preview
229 | Self::Gpt4o
230 | Self::Gpt4oMini
231 )
232 }
233
234 /// Whether this model uses the cl100k_base encoding (legacy)
235 pub fn uses_cl100k(&self) -> bool {
236 matches!(self, Self::Gpt4 | Self::Gpt35Turbo)
237 }
238
239 /// Parse a model name string into a TokenModel
240 ///
241 /// Supports various formats:
242 /// - OpenAI: "gpt-5.2", "gpt-5.2-pro", "gpt-5.1", "gpt-5", "o3", "o1", "gpt-4o", etc.
243 /// - Claude: "claude", "claude-3", "claude-4", "claude-opus", "claude-sonnet", "claude-haiku"
244 /// - Gemini: "gemini", "gemini-pro", "gemini-flash", "gemini-2.5", "gemini-3"
245 /// - Llama: "llama", "llama-3", "llama-4", "codellama"
246 /// - Others: "mistral", "deepseek", "qwen", "cohere", "grok"
247 ///
248 /// # Examples
249 ///
250 /// ```
251 /// use infiniloom_engine::tokenizer::TokenModel;
252 ///
253 /// assert_eq!(TokenModel::from_model_name("gpt-5.2"), Some(TokenModel::Gpt52));
254 /// assert_eq!(TokenModel::from_model_name("o3"), Some(TokenModel::O3));
255 /// assert_eq!(TokenModel::from_model_name("claude-sonnet"), Some(TokenModel::Claude));
256 /// assert_eq!(TokenModel::from_model_name("unknown-model"), None);
257 /// ```
258 pub fn from_model_name(name: &str) -> Option<Self> {
259 let name_lower = name.to_lowercase();
260 let name_lower = name_lower.as_str();
261
262 match name_lower {
263 // =================================================================
264 // OpenAI GPT-5.2 family
265 // =================================================================
266 "gpt-5.2" | "gpt5.2" | "gpt-52" | "gpt52" => Some(Self::Gpt52),
267 "gpt-5.2-pro" | "gpt5.2-pro" | "gpt-52-pro" | "gpt52pro" => Some(Self::Gpt52Pro),
268 s if s.starts_with("gpt-5.2-") || s.starts_with("gpt5.2-") => Some(Self::Gpt52),
269
270 // =================================================================
271 // OpenAI GPT-5.1 family
272 // =================================================================
273 "gpt-5.1" | "gpt5.1" | "gpt-51" | "gpt51" => Some(Self::Gpt51),
274 "gpt-5.1-mini" | "gpt5.1-mini" | "gpt-51-mini" => Some(Self::Gpt51Mini),
275 "gpt-5.1-codex" | "gpt5.1-codex" | "gpt-51-codex" => Some(Self::Gpt51Codex),
276 s if s.starts_with("gpt-5.1-") || s.starts_with("gpt5.1-") => Some(Self::Gpt51),
277
278 // =================================================================
279 // OpenAI GPT-5 family
280 // =================================================================
281 "gpt-5" | "gpt5" => Some(Self::Gpt5),
282 "gpt-5-mini" | "gpt5-mini" => Some(Self::Gpt5Mini),
283 "gpt-5-nano" | "gpt5-nano" => Some(Self::Gpt5Nano),
284 s if s.starts_with("gpt-5-") || s.starts_with("gpt5-") => Some(Self::Gpt5),
285
286 // =================================================================
287 // OpenAI O-series reasoning models
288 // =================================================================
289 "o4-mini" | "o4mini" => Some(Self::O4Mini),
290 "o3" => Some(Self::O3),
291 "o3-mini" | "o3mini" => Some(Self::O3Mini),
292 s if s.starts_with("o3-") => Some(Self::O3),
293 "o1" => Some(Self::O1),
294 "o1-mini" | "o1mini" => Some(Self::O1Mini),
295 "o1-preview" | "o1preview" => Some(Self::O1Preview),
296 s if s.starts_with("o1-") => Some(Self::O1),
297
298 // =================================================================
299 // OpenAI GPT-4o family
300 // =================================================================
301 "gpt-4o" | "gpt4o" => Some(Self::Gpt4o),
302 "gpt-4o-mini" | "gpt4o-mini" | "gpt-4o-mini-2024-07-18" => Some(Self::Gpt4oMini),
303 s if s.starts_with("gpt-4o-") || s.starts_with("gpt4o-") => Some(Self::Gpt4o),
304
305 // =================================================================
306 // OpenAI GPT-4 family (legacy, cl100k_base)
307 // =================================================================
308 "gpt-4" | "gpt4" | "gpt-4-turbo" | "gpt4-turbo" | "gpt-4-turbo-preview" => {
309 Some(Self::Gpt4)
310 },
311 s if s.starts_with("gpt-4-") && !s.contains("4o") => Some(Self::Gpt4),
312
313 // =================================================================
314 // OpenAI GPT-3.5 family (legacy, cl100k_base)
315 // =================================================================
316 "gpt-3.5-turbo" | "gpt-35-turbo" | "gpt3.5-turbo" | "gpt35-turbo" | "gpt-3.5" => {
317 Some(Self::Gpt35Turbo)
318 },
319 s if s.starts_with("gpt-3.5-") || s.starts_with("gpt-35-") => Some(Self::Gpt35Turbo),
320
321 // =================================================================
322 // Anthropic Claude (all versions map to Claude)
323 // =================================================================
324 "claude" | "claude-3" | "claude-3.5" | "claude-4" | "claude-4.5" | "claude-opus"
325 | "claude-opus-4" | "claude-opus-4.5" | "claude-sonnet" | "claude-sonnet-4"
326 | "claude-sonnet-4.5" | "claude-haiku" | "claude-haiku-4" | "claude-haiku-4.5"
327 | "claude-instant" => Some(Self::Claude),
328 s if s.starts_with("claude") => Some(Self::Claude),
329
330 // =================================================================
331 // Google Gemini (all versions map to Gemini)
332 // =================================================================
333 "gemini" | "gemini-pro" | "gemini-flash" | "gemini-ultra" | "gemini-1.5"
334 | "gemini-1.5-pro" | "gemini-1.5-flash" | "gemini-2" | "gemini-2.5"
335 | "gemini-2.5-pro" | "gemini-2.5-flash" | "gemini-3" | "gemini-3-pro" => {
336 Some(Self::Gemini)
337 },
338 s if s.starts_with("gemini") => Some(Self::Gemini),
339
340 // =================================================================
341 // Meta Llama
342 // =================================================================
343 "llama" | "llama-2" | "llama-3" | "llama-3.1" | "llama-3.2" | "llama-4" | "llama2"
344 | "llama3" | "llama4" => Some(Self::Llama),
345 "codellama" | "code-llama" | "llama-code" => Some(Self::CodeLlama),
346 s if s.starts_with("llama") && !s.contains("code") => Some(Self::Llama),
347 s if s.contains("codellama") || s.contains("code-llama") => Some(Self::CodeLlama),
348
349 // =================================================================
350 // Mistral AI
351 // =================================================================
352 "mistral" | "mistral-large" | "mistral-large-3" | "mistral-medium"
353 | "mistral-medium-3" | "mistral-small" | "mistral-small-3" | "codestral"
354 | "devstral" | "ministral" => Some(Self::Mistral),
355 s if s.starts_with("mistral") || s.contains("stral") => Some(Self::Mistral),
356
357 // =================================================================
358 // DeepSeek
359 // =================================================================
360 "deepseek" | "deepseek-v3" | "deepseek-v3.2" | "deepseek-r1" | "deepseek-coder"
361 | "deepseek-chat" | "deepseek-reasoner" => Some(Self::DeepSeek),
362 s if s.starts_with("deepseek") => Some(Self::DeepSeek),
363
364 // =================================================================
365 // Qwen (Alibaba)
366 // =================================================================
367 "qwen" | "qwen2" | "qwen2.5" | "qwen3" | "qwen-72b" | "qwen-7b" | "qwen-coder" => {
368 Some(Self::Qwen)
369 },
370 s if s.starts_with("qwen") => Some(Self::Qwen),
371
372 // =================================================================
373 // Cohere
374 // =================================================================
375 "cohere" | "command-r" | "command-r-plus" | "command-r+" | "command" => {
376 Some(Self::Cohere)
377 },
378 s if s.starts_with("cohere") || s.starts_with("command") => Some(Self::Cohere),
379
380 // =================================================================
381 // xAI Grok
382 // =================================================================
383 "grok" | "grok-1" | "grok-2" | "grok-3" | "grok-beta" => Some(Self::Grok),
384 s if s.starts_with("grok") => Some(Self::Grok),
385
386 // Unknown model
387 _ => None,
388 }
389 }
390
391 /// Get all available models
392 pub fn all() -> &'static [Self] {
393 &[
394 Self::Gpt52,
395 Self::Gpt52Pro,
396 Self::Gpt51,
397 Self::Gpt51Mini,
398 Self::Gpt51Codex,
399 Self::Gpt5,
400 Self::Gpt5Mini,
401 Self::Gpt5Nano,
402 Self::O4Mini,
403 Self::O3,
404 Self::O3Mini,
405 Self::O1,
406 Self::O1Mini,
407 Self::O1Preview,
408 Self::Gpt4o,
409 Self::Gpt4oMini,
410 Self::Gpt4,
411 Self::Gpt35Turbo,
412 Self::Claude,
413 Self::Gemini,
414 Self::Llama,
415 Self::CodeLlama,
416 Self::Mistral,
417 Self::DeepSeek,
418 Self::Qwen,
419 Self::Cohere,
420 Self::Grok,
421 ]
422 }
423
424 /// Get the vendor/provider name for this model
425 pub fn vendor(&self) -> &'static str {
426 match self {
427 Self::Gpt52
428 | Self::Gpt52Pro
429 | Self::Gpt51
430 | Self::Gpt51Mini
431 | Self::Gpt51Codex
432 | Self::Gpt5
433 | Self::Gpt5Mini
434 | Self::Gpt5Nano
435 | Self::O4Mini
436 | Self::O3
437 | Self::O3Mini
438 | Self::O1
439 | Self::O1Mini
440 | Self::O1Preview
441 | Self::Gpt4o
442 | Self::Gpt4oMini
443 | Self::Gpt4
444 | Self::Gpt35Turbo => "OpenAI",
445 Self::Claude => "Anthropic",
446 Self::Gemini => "Google",
447 Self::Llama | Self::CodeLlama => "Meta",
448 Self::Mistral => "Mistral AI",
449 Self::DeepSeek => "DeepSeek",
450 Self::Qwen => "Alibaba",
451 Self::Cohere => "Cohere",
452 Self::Grok => "xAI",
453 }
454 }
455}