infiniloom_engine/tokenizer/models.rs
1//! Token model definitions for various LLM providers
2//!
3//! This module defines the supported LLM models and their tokenizer properties.
4
5/// Supported LLM models for token counting
6///
7/// Models are grouped by their tokenizer encoding family. Use [`TokenModel::from_model_name`]
8/// to parse user-friendly model names like "gpt-5.2", "o3", "claude-sonnet", etc.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
10pub enum TokenModel {
11 // =========================================================================
12 // OpenAI Models - o200k_base encoding (EXACT tokenization)
13 // =========================================================================
14 /// GPT-5.2 - Latest flagship model (Dec 2025), uses o200k_base
15 Gpt52,
16 /// GPT-5.2 Pro - Enhanced GPT-5.2 variant, uses o200k_base
17 Gpt52Pro,
18 /// GPT-5.1 - Previous flagship (Nov 2025), uses o200k_base
19 Gpt51,
20 /// GPT-5.1 Mini - Smaller GPT-5.1 variant, uses o200k_base
21 Gpt51Mini,
22 /// GPT-5.1 Codex - Code-specialized variant, uses o200k_base
23 Gpt51Codex,
24 /// GPT-5 - Original GPT-5 (Aug 2025), uses o200k_base
25 Gpt5,
26 /// GPT-5 Mini - Smaller GPT-5 variant, uses o200k_base
27 Gpt5Mini,
28 /// GPT-5 Nano - Smallest GPT-5 variant, uses o200k_base
29 Gpt5Nano,
30 /// O4 Mini - Latest reasoning model, uses o200k_base
31 O4Mini,
32 /// O3 - Reasoning model, uses o200k_base
33 O3,
34 /// O3 Mini - Smaller O3 variant, uses o200k_base
35 O3Mini,
36 /// O1 - Original reasoning model, uses o200k_base
37 O1,
38 /// O1 Mini - Smaller O1 variant, uses o200k_base
39 O1Mini,
40 /// O1 Preview - O1 preview version, uses o200k_base
41 O1Preview,
42 /// GPT-4o - Omni model, uses o200k_base encoding (most efficient)
43 Gpt4o,
44 /// GPT-4o Mini - Smaller GPT-4o variant, uses o200k_base encoding
45 Gpt4oMini,
46
47 // =========================================================================
48 // OpenAI Models - cl100k_base encoding (EXACT tokenization, legacy)
49 // =========================================================================
50 /// GPT-4/GPT-4 Turbo - uses cl100k_base encoding (legacy)
51 Gpt4,
52 /// GPT-3.5-turbo - uses cl100k_base encoding (legacy)
53 Gpt35Turbo,
54
55 // =========================================================================
56 // Anthropic Claude - Estimation (~3.5 chars/token)
57 // =========================================================================
58 /// Claude (all versions including 4.6) - uses estimation based on ~3.5 chars/token
59 Claude,
60
61 // =========================================================================
62 // Google Gemini - Estimation (~3.8 chars/token)
63 // =========================================================================
64 /// Gemini (all versions including 3.1, 2.5, 1.5) - estimation ~3.8 chars/token
65 Gemini,
66
67 // =========================================================================
68 // Meta Llama - Estimation (~3.5 chars/token)
69 // =========================================================================
70 /// Llama 3/4 - estimation based on ~3.5 chars/token
71 Llama,
72 /// CodeLlama - more granular for code (~3.2 chars/token)
73 CodeLlama,
74
75 // =========================================================================
76 // Mistral AI - Estimation (~3.5 chars/token)
77 // =========================================================================
78 /// Mistral (Large, Medium, Small, Codestral) - estimation ~3.5 chars/token
79 Mistral,
80
81 // =========================================================================
82 // DeepSeek - Estimation (~3.5 chars/token)
83 // =========================================================================
84 /// DeepSeek (V3, R1, Coder) - estimation ~3.5 chars/token
85 DeepSeek,
86
87 // =========================================================================
88 // Qwen (Alibaba) - Estimation (~3.5 chars/token)
89 // =========================================================================
90 /// Qwen (Qwen3, Qwen2.5) - estimation ~3.5 chars/token
91 Qwen,
92
93 // =========================================================================
94 // Cohere - Estimation (~3.6 chars/token)
95 // =========================================================================
96 /// Cohere (Command R+, Command R) - estimation ~3.6 chars/token
97 Cohere,
98
99 // =========================================================================
100 // xAI Grok - Estimation (~3.5 chars/token)
101 // =========================================================================
102 /// Grok (Grok 2, 3, 4) - estimation ~3.5 chars/token
103 Grok,
104}
105
106impl TokenModel {
107 /// Get human-readable name
108 pub fn name(&self) -> &'static str {
109 match self {
110 // OpenAI o200k_base models
111 Self::Gpt52 => "gpt-5.2",
112 Self::Gpt52Pro => "gpt-5.2-pro",
113 Self::Gpt51 => "gpt-5.1",
114 Self::Gpt51Mini => "gpt-5.1-mini",
115 Self::Gpt51Codex => "gpt-5.1-codex",
116 Self::Gpt5 => "gpt-5",
117 Self::Gpt5Mini => "gpt-5-mini",
118 Self::Gpt5Nano => "gpt-5-nano",
119 Self::O4Mini => "o4-mini",
120 Self::O3 => "o3",
121 Self::O3Mini => "o3-mini",
122 Self::O1 => "o1",
123 Self::O1Mini => "o1-mini",
124 Self::O1Preview => "o1-preview",
125 Self::Gpt4o => "gpt-4o",
126 Self::Gpt4oMini => "gpt-4o-mini",
127 // OpenAI cl100k_base models (legacy)
128 Self::Gpt4 => "gpt-4",
129 Self::Gpt35Turbo => "gpt-3.5-turbo",
130 // Other vendors
131 Self::Claude => "claude",
132 Self::Gemini => "gemini",
133 Self::Llama => "llama",
134 Self::CodeLlama => "codellama",
135 Self::Mistral => "mistral",
136 Self::DeepSeek => "deepseek",
137 Self::Qwen => "qwen",
138 Self::Cohere => "cohere",
139 Self::Grok => "grok",
140 }
141 }
142
143 /// Get average characters per token (for estimation fallback)
144 pub fn chars_per_token(&self) -> f32 {
145 match self {
146 // OpenAI o200k_base models - most efficient encoding (~4.0 chars/token)
147 Self::Gpt52
148 | Self::Gpt52Pro
149 | Self::Gpt51
150 | Self::Gpt51Mini
151 | Self::Gpt51Codex
152 | Self::Gpt5
153 | Self::Gpt5Mini
154 | Self::Gpt5Nano
155 | Self::O4Mini
156 | Self::O3
157 | Self::O3Mini
158 | Self::O1
159 | Self::O1Mini
160 | Self::O1Preview
161 | Self::Gpt4o
162 | Self::Gpt4oMini => 4.0,
163 // OpenAI cl100k_base models (legacy) - slightly less efficient
164 Self::Gpt4 | Self::Gpt35Turbo => 3.7,
165 // Anthropic Claude
166 Self::Claude => 3.5,
167 // Google Gemini - slightly more verbose
168 Self::Gemini => 3.8,
169 // Meta Llama
170 Self::Llama => 3.5,
171 Self::CodeLlama => 3.2, // Code-focused, more granular
172 // Mistral AI
173 Self::Mistral => 3.5,
174 // DeepSeek
175 Self::DeepSeek => 3.5,
176 // Qwen (Alibaba)
177 Self::Qwen => 3.5,
178 // Cohere - slightly more verbose
179 Self::Cohere => 3.6,
180 // xAI Grok
181 Self::Grok => 3.5,
182 }
183 }
184
185 /// Whether this model has an exact tokenizer available (via tiktoken)
186 pub fn has_exact_tokenizer(&self) -> bool {
187 matches!(
188 self,
189 // All OpenAI models have exact tokenizers
190 Self::Gpt52
191 | Self::Gpt52Pro
192 | Self::Gpt51
193 | Self::Gpt51Mini
194 | Self::Gpt51Codex
195 | Self::Gpt5
196 | Self::Gpt5Mini
197 | Self::Gpt5Nano
198 | Self::O4Mini
199 | Self::O3
200 | Self::O3Mini
201 | Self::O1
202 | Self::O1Mini
203 | Self::O1Preview
204 | Self::Gpt4o
205 | Self::Gpt4oMini
206 | Self::Gpt4
207 | Self::Gpt35Turbo
208 )
209 }
210
211 /// Whether this model uses the o200k_base encoding
212 pub fn uses_o200k(&self) -> bool {
213 matches!(
214 self,
215 Self::Gpt52
216 | Self::Gpt52Pro
217 | Self::Gpt51
218 | Self::Gpt51Mini
219 | Self::Gpt51Codex
220 | Self::Gpt5
221 | Self::Gpt5Mini
222 | Self::Gpt5Nano
223 | Self::O4Mini
224 | Self::O3
225 | Self::O3Mini
226 | Self::O1
227 | Self::O1Mini
228 | Self::O1Preview
229 | Self::Gpt4o
230 | Self::Gpt4oMini
231 )
232 }
233
234 /// Whether this model uses the cl100k_base encoding (legacy)
235 pub fn uses_cl100k(&self) -> bool {
236 matches!(self, Self::Gpt4 | Self::Gpt35Turbo)
237 }
238
239 /// Parse a model name string into a TokenModel
240 ///
241 /// Supports various formats:
242 /// - OpenAI: "gpt-5.2", "gpt-5.2-pro", "gpt-5.1", "gpt-5", "o3", "o1", "gpt-4o", etc.
243 /// - Claude: "claude", "claude-3", "claude-4", "claude-opus", "claude-sonnet", "claude-haiku"
244 /// - Gemini: "gemini", "gemini-pro", "gemini-flash", "gemini-2.5", "gemini-3", "gemini-3.1"
245 /// - Llama: "llama", "llama-3", "llama-4", "codellama"
246 /// - Others: "mistral", "deepseek", "qwen", "cohere", "grok"
247 ///
248 /// # Examples
249 ///
250 /// ```
251 /// use infiniloom_engine::tokenizer::TokenModel;
252 ///
253 /// assert_eq!(TokenModel::from_model_name("gpt-5.2"), Some(TokenModel::Gpt52));
254 /// assert_eq!(TokenModel::from_model_name("o3"), Some(TokenModel::O3));
255 /// assert_eq!(TokenModel::from_model_name("claude-sonnet"), Some(TokenModel::Claude));
256 /// assert_eq!(TokenModel::from_model_name("unknown-model"), None);
257 /// ```
258 pub fn from_model_name(name: &str) -> Option<Self> {
259 let name_lower = name.to_lowercase();
260 let name_lower = name_lower.as_str();
261
262 match name_lower {
263 // =================================================================
264 // OpenAI GPT-5.2 family
265 // =================================================================
266 "gpt-5.2" | "gpt5.2" | "gpt-52" | "gpt52" => Some(Self::Gpt52),
267 "gpt-5.2-pro" | "gpt5.2-pro" | "gpt-52-pro" | "gpt52pro" => Some(Self::Gpt52Pro),
268 s if s.starts_with("gpt-5.2-") || s.starts_with("gpt5.2-") => Some(Self::Gpt52),
269
270 // =================================================================
271 // OpenAI GPT-5.1 family
272 // =================================================================
273 "gpt-5.1" | "gpt5.1" | "gpt-51" | "gpt51" => Some(Self::Gpt51),
274 "gpt-5.1-mini" | "gpt5.1-mini" | "gpt-51-mini" => Some(Self::Gpt51Mini),
275 "gpt-5.1-codex" | "gpt5.1-codex" | "gpt-51-codex" => Some(Self::Gpt51Codex),
276 s if s.starts_with("gpt-5.1-") || s.starts_with("gpt5.1-") => Some(Self::Gpt51),
277
278 // =================================================================
279 // OpenAI GPT-5 family
280 // =================================================================
281 "gpt-5" | "gpt5" => Some(Self::Gpt5),
282 "gpt-5-mini" | "gpt5-mini" => Some(Self::Gpt5Mini),
283 "gpt-5-nano" | "gpt5-nano" => Some(Self::Gpt5Nano),
284 s if s.starts_with("gpt-5-") || s.starts_with("gpt5-") => Some(Self::Gpt5),
285
286 // =================================================================
287 // OpenAI O-series reasoning models
288 // =================================================================
289 "o4-mini" | "o4mini" => Some(Self::O4Mini),
290 "o3" => Some(Self::O3),
291 "o3-mini" | "o3mini" => Some(Self::O3Mini),
292 s if s.starts_with("o3-") => Some(Self::O3),
293 "o1" => Some(Self::O1),
294 "o1-mini" | "o1mini" => Some(Self::O1Mini),
295 "o1-preview" | "o1preview" => Some(Self::O1Preview),
296 s if s.starts_with("o1-") => Some(Self::O1),
297
298 // =================================================================
299 // OpenAI GPT-4o family
300 // =================================================================
301 "gpt-4o" | "gpt4o" => Some(Self::Gpt4o),
302 "gpt-4o-mini" | "gpt4o-mini" | "gpt-4o-mini-2024-07-18" => Some(Self::Gpt4oMini),
303 s if s.starts_with("gpt-4o-") || s.starts_with("gpt4o-") => Some(Self::Gpt4o),
304
305 // =================================================================
306 // OpenAI GPT-4 family (legacy, cl100k_base)
307 // =================================================================
308 "gpt-4" | "gpt4" | "gpt-4-turbo" | "gpt4-turbo" | "gpt-4-turbo-preview" => {
309 Some(Self::Gpt4)
310 },
311 s if s.starts_with("gpt-4-") && !s.contains("4o") => Some(Self::Gpt4),
312
313 // =================================================================
314 // OpenAI GPT-3.5 family (legacy, cl100k_base)
315 // =================================================================
316 "gpt-3.5-turbo" | "gpt-35-turbo" | "gpt3.5-turbo" | "gpt35-turbo" | "gpt-3.5" => {
317 Some(Self::Gpt35Turbo)
318 },
319 s if s.starts_with("gpt-3.5-") || s.starts_with("gpt-35-") => Some(Self::Gpt35Turbo),
320
321 // =================================================================
322 // Anthropic Claude (all versions map to Claude)
323 // =================================================================
324 "claude" | "claude-3" | "claude-3.5" | "claude-4" | "claude-4.5" | "claude-4.6"
325 | "claude-opus" | "claude-opus-4" | "claude-opus-4.5" | "claude-opus-4.6"
326 | "claude-sonnet" | "claude-sonnet-4" | "claude-sonnet-4.5" | "claude-sonnet-4.6"
327 | "claude-haiku" | "claude-haiku-4" | "claude-haiku-4.5" | "claude-instant" => {
328 Some(Self::Claude)
329 },
330 s if s.starts_with("claude") => Some(Self::Claude),
331
332 // =================================================================
333 // Google Gemini (all versions map to Gemini)
334 // =================================================================
335 "gemini" | "gemini-pro" | "gemini-flash" | "gemini-ultra" | "gemini-1.5"
336 | "gemini-1.5-pro" | "gemini-1.5-flash" | "gemini-2" | "gemini-2.5"
337 | "gemini-2.5-pro" | "gemini-2.5-flash" | "gemini-3" | "gemini-3-pro"
338 | "gemini-3.1" | "gemini-3.1-pro" | "gemini-3.1-flash" => Some(Self::Gemini),
339 s if s.starts_with("gemini") => Some(Self::Gemini),
340
341 // =================================================================
342 // Meta Llama
343 // =================================================================
344 "llama" | "llama-2" | "llama-3" | "llama-3.1" | "llama-3.2" | "llama-4" | "llama2"
345 | "llama3" | "llama4" => Some(Self::Llama),
346 "codellama" | "code-llama" | "llama-code" => Some(Self::CodeLlama),
347 s if s.starts_with("llama") && !s.contains("code") => Some(Self::Llama),
348 s if s.contains("codellama") || s.contains("code-llama") => Some(Self::CodeLlama),
349
350 // =================================================================
351 // Mistral AI
352 // =================================================================
353 "mistral" | "mistral-large" | "mistral-large-3" | "mistral-medium"
354 | "mistral-medium-3" | "mistral-small" | "mistral-small-3" | "codestral"
355 | "devstral" | "ministral" => Some(Self::Mistral),
356 s if s.starts_with("mistral") || s.contains("stral") => Some(Self::Mistral),
357
358 // =================================================================
359 // DeepSeek
360 // =================================================================
361 "deepseek" | "deepseek-v3" | "deepseek-v3.2" | "deepseek-r1" | "deepseek-coder"
362 | "deepseek-chat" | "deepseek-reasoner" => Some(Self::DeepSeek),
363 s if s.starts_with("deepseek") => Some(Self::DeepSeek),
364
365 // =================================================================
366 // Qwen (Alibaba)
367 // =================================================================
368 "qwen" | "qwen2" | "qwen2.5" | "qwen3" | "qwen-72b" | "qwen-7b" | "qwen-coder" => {
369 Some(Self::Qwen)
370 },
371 s if s.starts_with("qwen") => Some(Self::Qwen),
372
373 // =================================================================
374 // Cohere
375 // =================================================================
376 "cohere" | "command-r" | "command-r-plus" | "command-r+" | "command" => {
377 Some(Self::Cohere)
378 },
379 s if s.starts_with("cohere") || s.starts_with("command") => Some(Self::Cohere),
380
381 // =================================================================
382 // xAI Grok
383 // =================================================================
384 "grok" | "grok-1" | "grok-2" | "grok-3" | "grok-4" | "grok-beta" => Some(Self::Grok),
385 s if s.starts_with("grok") => Some(Self::Grok),
386
387 // Unknown model
388 _ => None,
389 }
390 }
391
392 /// Get all available models
393 pub fn all() -> &'static [Self] {
394 &[
395 Self::Gpt52,
396 Self::Gpt52Pro,
397 Self::Gpt51,
398 Self::Gpt51Mini,
399 Self::Gpt51Codex,
400 Self::Gpt5,
401 Self::Gpt5Mini,
402 Self::Gpt5Nano,
403 Self::O4Mini,
404 Self::O3,
405 Self::O3Mini,
406 Self::O1,
407 Self::O1Mini,
408 Self::O1Preview,
409 Self::Gpt4o,
410 Self::Gpt4oMini,
411 Self::Gpt4,
412 Self::Gpt35Turbo,
413 Self::Claude,
414 Self::Gemini,
415 Self::Llama,
416 Self::CodeLlama,
417 Self::Mistral,
418 Self::DeepSeek,
419 Self::Qwen,
420 Self::Cohere,
421 Self::Grok,
422 ]
423 }
424
425 /// Get approximate context window size for this model (in tokens)
426 pub fn context_window(&self) -> u32 {
427 match self {
428 // OpenAI GPT-5.x - 128K context
429 Self::Gpt52
430 | Self::Gpt52Pro
431 | Self::Gpt51
432 | Self::Gpt51Mini
433 | Self::Gpt51Codex
434 | Self::Gpt5
435 | Self::Gpt5Mini
436 | Self::Gpt5Nano => 128_000,
437 // OpenAI O-series - O3/O4 have 200K context
438 Self::O4Mini | Self::O3 | Self::O3Mini => 200_000,
439 // OpenAI O1 series - 128K context
440 Self::O1 | Self::O1Mini | Self::O1Preview => 128_000,
441 // OpenAI GPT-4o - 128K
442 Self::Gpt4o | Self::Gpt4oMini => 128_000,
443 // Legacy OpenAI
444 Self::Gpt4 => 128_000,
445 Self::Gpt35Turbo => 16_000,
446 // Anthropic Claude - 200K stable default
447 // Note: 1M token context is available in beta (as of March 2026).
448 // Users can upgrade to Claude 1M when it reaches general availability.
449 // Current stable default remains 200K for compatibility.
450 Self::Claude => 200_000,
451 // Google Gemini - 1M+
452 Self::Gemini => 1_000_000,
453 // Meta Llama - 128K
454 Self::Llama | Self::CodeLlama => 128_000,
455 // Mistral - 128K
456 Self::Mistral => 128_000,
457 // DeepSeek - 128K
458 Self::DeepSeek => 128_000,
459 // Qwen - 128K
460 Self::Qwen => 128_000,
461 // Cohere - 128K
462 Self::Cohere => 128_000,
463 // Grok - 2M
464 Self::Grok => 2_000_000,
465 }
466 }
467
468 /// Get recommended default token budget for packing output (about 75% of context window, capped at 500K)
469 pub fn default_budget(&self) -> u32 {
470 let window = self.context_window();
471 // Use ~75% of context to leave room for response
472 let budget = (window as f64 * 0.75) as u32;
473 // Cap at 500K to keep output manageable
474 budget.min(500_000)
475 }
476
477 /// Get the vendor/provider name for this model
478 pub fn vendor(&self) -> &'static str {
479 match self {
480 Self::Gpt52
481 | Self::Gpt52Pro
482 | Self::Gpt51
483 | Self::Gpt51Mini
484 | Self::Gpt51Codex
485 | Self::Gpt5
486 | Self::Gpt5Mini
487 | Self::Gpt5Nano
488 | Self::O4Mini
489 | Self::O3
490 | Self::O3Mini
491 | Self::O1
492 | Self::O1Mini
493 | Self::O1Preview
494 | Self::Gpt4o
495 | Self::Gpt4oMini
496 | Self::Gpt4
497 | Self::Gpt35Turbo => "OpenAI",
498 Self::Claude => "Anthropic",
499 Self::Gemini => "Google",
500 Self::Llama | Self::CodeLlama => "Meta",
501 Self::Mistral => "Mistral AI",
502 Self::DeepSeek => "DeepSeek",
503 Self::Qwen => "Alibaba",
504 Self::Cohere => "Cohere",
505 Self::Grok => "xAI",
506 }
507 }
508}
509
510#[cfg(test)]
511mod tests {
512 use super::*;
513
514 #[test]
515 fn test_context_window_values() {
516 assert_eq!(TokenModel::Gpt35Turbo.context_window(), 16_000);
517 assert_eq!(TokenModel::Gpt4.context_window(), 128_000);
518 assert_eq!(TokenModel::Claude.context_window(), 200_000);
519 assert_eq!(TokenModel::Gemini.context_window(), 1_000_000);
520 assert_eq!(TokenModel::Grok.context_window(), 2_000_000);
521 }
522
523 #[test]
524 fn test_default_budget_cap() {
525 // Gemini: 1M * 0.75 = 750K → capped to 500K
526 assert_eq!(TokenModel::Gemini.default_budget(), 500_000);
527 // Grok: 2M * 0.75 = 1.5M → capped to 500K
528 assert_eq!(TokenModel::Grok.default_budget(), 500_000);
529 }
530
531 #[test]
532 fn test_default_budget_no_cap() {
533 // Gpt35Turbo: 16K * 0.75 = 12K (no cap)
534 assert_eq!(TokenModel::Gpt35Turbo.default_budget(), 12_000);
535 // Claude: 200K * 0.75 = 150K (no cap)
536 assert_eq!(TokenModel::Claude.default_budget(), 150_000);
537 }
538}