infiniloom_engine/tokenizer/
models.rs

1//! Token model definitions for various LLM providers
2//!
3//! This module defines the supported LLM models and their tokenizer properties.
4
5/// Supported LLM models for token counting
6///
7/// Models are grouped by their tokenizer encoding family. Use [`TokenModel::from_model_name`]
8/// to parse user-friendly model names like "gpt-5.2", "o3", "claude-sonnet", etc.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
10pub enum TokenModel {
11    // =========================================================================
12    // OpenAI Models - o200k_base encoding (EXACT tokenization)
13    // =========================================================================
14    /// GPT-5.2 - Latest flagship model (Dec 2025), uses o200k_base
15    Gpt52,
16    /// GPT-5.2 Pro - Enhanced GPT-5.2 variant, uses o200k_base
17    Gpt52Pro,
18    /// GPT-5.1 - Previous flagship (Nov 2025), uses o200k_base
19    Gpt51,
20    /// GPT-5.1 Mini - Smaller GPT-5.1 variant, uses o200k_base
21    Gpt51Mini,
22    /// GPT-5.1 Codex - Code-specialized variant, uses o200k_base
23    Gpt51Codex,
24    /// GPT-5 - Original GPT-5 (Aug 2025), uses o200k_base
25    Gpt5,
26    /// GPT-5 Mini - Smaller GPT-5 variant, uses o200k_base
27    Gpt5Mini,
28    /// GPT-5 Nano - Smallest GPT-5 variant, uses o200k_base
29    Gpt5Nano,
30    /// O4 Mini - Latest reasoning model, uses o200k_base
31    O4Mini,
32    /// O3 - Reasoning model, uses o200k_base
33    O3,
34    /// O3 Mini - Smaller O3 variant, uses o200k_base
35    O3Mini,
36    /// O1 - Original reasoning model, uses o200k_base
37    O1,
38    /// O1 Mini - Smaller O1 variant, uses o200k_base
39    O1Mini,
40    /// O1 Preview - O1 preview version, uses o200k_base
41    O1Preview,
42    /// GPT-4o - Omni model, uses o200k_base encoding (most efficient)
43    Gpt4o,
44    /// GPT-4o Mini - Smaller GPT-4o variant, uses o200k_base encoding
45    Gpt4oMini,
46
47    // =========================================================================
48    // OpenAI Models - cl100k_base encoding (EXACT tokenization, legacy)
49    // =========================================================================
50    /// GPT-4/GPT-4 Turbo - uses cl100k_base encoding (legacy)
51    Gpt4,
52    /// GPT-3.5-turbo - uses cl100k_base encoding (legacy)
53    Gpt35Turbo,
54
55    // =========================================================================
56    // Anthropic Claude - Estimation (~3.5 chars/token)
57    // =========================================================================
58    /// Claude (all versions) - uses estimation based on ~3.5 chars/token
59    Claude,
60
61    // =========================================================================
62    // Google Gemini - Estimation (~3.8 chars/token)
63    // =========================================================================
64    /// Gemini (all versions including 3, 2.5, 1.5) - estimation ~3.8 chars/token
65    Gemini,
66
67    // =========================================================================
68    // Meta Llama - Estimation (~3.5 chars/token)
69    // =========================================================================
70    /// Llama 3/4 - estimation based on ~3.5 chars/token
71    Llama,
72    /// CodeLlama - more granular for code (~3.2 chars/token)
73    CodeLlama,
74
75    // =========================================================================
76    // Mistral AI - Estimation (~3.5 chars/token)
77    // =========================================================================
78    /// Mistral (Large, Medium, Small, Codestral) - estimation ~3.5 chars/token
79    Mistral,
80
81    // =========================================================================
82    // DeepSeek - Estimation (~3.5 chars/token)
83    // =========================================================================
84    /// DeepSeek (V3, R1, Coder) - estimation ~3.5 chars/token
85    DeepSeek,
86
87    // =========================================================================
88    // Qwen (Alibaba) - Estimation (~3.5 chars/token)
89    // =========================================================================
90    /// Qwen (Qwen3, Qwen2.5) - estimation ~3.5 chars/token
91    Qwen,
92
93    // =========================================================================
94    // Cohere - Estimation (~3.6 chars/token)
95    // =========================================================================
96    /// Cohere (Command R+, Command R) - estimation ~3.6 chars/token
97    Cohere,
98
99    // =========================================================================
100    // xAI Grok - Estimation (~3.5 chars/token)
101    // =========================================================================
102    /// Grok (Grok 2, Grok 3) - estimation ~3.5 chars/token
103    Grok,
104}
105
106impl TokenModel {
107    /// Get human-readable name
108    pub fn name(&self) -> &'static str {
109        match self {
110            // OpenAI o200k_base models
111            Self::Gpt52 => "gpt-5.2",
112            Self::Gpt52Pro => "gpt-5.2-pro",
113            Self::Gpt51 => "gpt-5.1",
114            Self::Gpt51Mini => "gpt-5.1-mini",
115            Self::Gpt51Codex => "gpt-5.1-codex",
116            Self::Gpt5 => "gpt-5",
117            Self::Gpt5Mini => "gpt-5-mini",
118            Self::Gpt5Nano => "gpt-5-nano",
119            Self::O4Mini => "o4-mini",
120            Self::O3 => "o3",
121            Self::O3Mini => "o3-mini",
122            Self::O1 => "o1",
123            Self::O1Mini => "o1-mini",
124            Self::O1Preview => "o1-preview",
125            Self::Gpt4o => "gpt-4o",
126            Self::Gpt4oMini => "gpt-4o-mini",
127            // OpenAI cl100k_base models (legacy)
128            Self::Gpt4 => "gpt-4",
129            Self::Gpt35Turbo => "gpt-3.5-turbo",
130            // Other vendors
131            Self::Claude => "claude",
132            Self::Gemini => "gemini",
133            Self::Llama => "llama",
134            Self::CodeLlama => "codellama",
135            Self::Mistral => "mistral",
136            Self::DeepSeek => "deepseek",
137            Self::Qwen => "qwen",
138            Self::Cohere => "cohere",
139            Self::Grok => "grok",
140        }
141    }
142
143    /// Get average characters per token (for estimation fallback)
144    pub fn chars_per_token(&self) -> f32 {
145        match self {
146            // OpenAI o200k_base models - most efficient encoding (~4.0 chars/token)
147            Self::Gpt52
148            | Self::Gpt52Pro
149            | Self::Gpt51
150            | Self::Gpt51Mini
151            | Self::Gpt51Codex
152            | Self::Gpt5
153            | Self::Gpt5Mini
154            | Self::Gpt5Nano
155            | Self::O4Mini
156            | Self::O3
157            | Self::O3Mini
158            | Self::O1
159            | Self::O1Mini
160            | Self::O1Preview
161            | Self::Gpt4o
162            | Self::Gpt4oMini => 4.0,
163            // OpenAI cl100k_base models (legacy) - slightly less efficient
164            Self::Gpt4 | Self::Gpt35Turbo => 3.7,
165            // Anthropic Claude
166            Self::Claude => 3.5,
167            // Google Gemini - slightly more verbose
168            Self::Gemini => 3.8,
169            // Meta Llama
170            Self::Llama => 3.5,
171            Self::CodeLlama => 3.2, // Code-focused, more granular
172            // Mistral AI
173            Self::Mistral => 3.5,
174            // DeepSeek
175            Self::DeepSeek => 3.5,
176            // Qwen (Alibaba)
177            Self::Qwen => 3.5,
178            // Cohere - slightly more verbose
179            Self::Cohere => 3.6,
180            // xAI Grok
181            Self::Grok => 3.5,
182        }
183    }
184
185    /// Whether this model has an exact tokenizer available (via tiktoken)
186    pub fn has_exact_tokenizer(&self) -> bool {
187        matches!(
188            self,
189            // All OpenAI models have exact tokenizers
190            Self::Gpt52
191                | Self::Gpt52Pro
192                | Self::Gpt51
193                | Self::Gpt51Mini
194                | Self::Gpt51Codex
195                | Self::Gpt5
196                | Self::Gpt5Mini
197                | Self::Gpt5Nano
198                | Self::O4Mini
199                | Self::O3
200                | Self::O3Mini
201                | Self::O1
202                | Self::O1Mini
203                | Self::O1Preview
204                | Self::Gpt4o
205                | Self::Gpt4oMini
206                | Self::Gpt4
207                | Self::Gpt35Turbo
208        )
209    }
210
211    /// Whether this model uses the o200k_base encoding
212    pub fn uses_o200k(&self) -> bool {
213        matches!(
214            self,
215            Self::Gpt52
216                | Self::Gpt52Pro
217                | Self::Gpt51
218                | Self::Gpt51Mini
219                | Self::Gpt51Codex
220                | Self::Gpt5
221                | Self::Gpt5Mini
222                | Self::Gpt5Nano
223                | Self::O4Mini
224                | Self::O3
225                | Self::O3Mini
226                | Self::O1
227                | Self::O1Mini
228                | Self::O1Preview
229                | Self::Gpt4o
230                | Self::Gpt4oMini
231        )
232    }
233
234    /// Whether this model uses the cl100k_base encoding (legacy)
235    pub fn uses_cl100k(&self) -> bool {
236        matches!(self, Self::Gpt4 | Self::Gpt35Turbo)
237    }
238
239    /// Parse a model name string into a TokenModel
240    ///
241    /// Supports various formats:
242    /// - OpenAI: "gpt-5.2", "gpt-5.2-pro", "gpt-5.1", "gpt-5", "o3", "o1", "gpt-4o", etc.
243    /// - Claude: "claude", "claude-3", "claude-4", "claude-opus", "claude-sonnet", "claude-haiku"
244    /// - Gemini: "gemini", "gemini-pro", "gemini-flash", "gemini-2.5", "gemini-3"
245    /// - Llama: "llama", "llama-3", "llama-4", "codellama"
246    /// - Others: "mistral", "deepseek", "qwen", "cohere", "grok"
247    ///
248    /// # Examples
249    ///
250    /// ```
251    /// use infiniloom_engine::tokenizer::TokenModel;
252    ///
253    /// assert_eq!(TokenModel::from_model_name("gpt-5.2"), Some(TokenModel::Gpt52));
254    /// assert_eq!(TokenModel::from_model_name("o3"), Some(TokenModel::O3));
255    /// assert_eq!(TokenModel::from_model_name("claude-sonnet"), Some(TokenModel::Claude));
256    /// assert_eq!(TokenModel::from_model_name("unknown-model"), None);
257    /// ```
258    pub fn from_model_name(name: &str) -> Option<Self> {
259        let name_lower = name.to_lowercase();
260        let name_lower = name_lower.as_str();
261
262        match name_lower {
263            // =================================================================
264            // OpenAI GPT-5.2 family
265            // =================================================================
266            "gpt-5.2" | "gpt5.2" | "gpt-52" | "gpt52" => Some(Self::Gpt52),
267            "gpt-5.2-pro" | "gpt5.2-pro" | "gpt-52-pro" | "gpt52pro" => Some(Self::Gpt52Pro),
268            s if s.starts_with("gpt-5.2-") || s.starts_with("gpt5.2-") => Some(Self::Gpt52),
269
270            // =================================================================
271            // OpenAI GPT-5.1 family
272            // =================================================================
273            "gpt-5.1" | "gpt5.1" | "gpt-51" | "gpt51" => Some(Self::Gpt51),
274            "gpt-5.1-mini" | "gpt5.1-mini" | "gpt-51-mini" => Some(Self::Gpt51Mini),
275            "gpt-5.1-codex" | "gpt5.1-codex" | "gpt-51-codex" => Some(Self::Gpt51Codex),
276            s if s.starts_with("gpt-5.1-") || s.starts_with("gpt5.1-") => Some(Self::Gpt51),
277
278            // =================================================================
279            // OpenAI GPT-5 family
280            // =================================================================
281            "gpt-5" | "gpt5" => Some(Self::Gpt5),
282            "gpt-5-mini" | "gpt5-mini" => Some(Self::Gpt5Mini),
283            "gpt-5-nano" | "gpt5-nano" => Some(Self::Gpt5Nano),
284            s if s.starts_with("gpt-5-") || s.starts_with("gpt5-") => Some(Self::Gpt5),
285
286            // =================================================================
287            // OpenAI O-series reasoning models
288            // =================================================================
289            "o4-mini" | "o4mini" => Some(Self::O4Mini),
290            "o3" => Some(Self::O3),
291            "o3-mini" | "o3mini" => Some(Self::O3Mini),
292            s if s.starts_with("o3-") => Some(Self::O3),
293            "o1" => Some(Self::O1),
294            "o1-mini" | "o1mini" => Some(Self::O1Mini),
295            "o1-preview" | "o1preview" => Some(Self::O1Preview),
296            s if s.starts_with("o1-") => Some(Self::O1),
297
298            // =================================================================
299            // OpenAI GPT-4o family
300            // =================================================================
301            "gpt-4o" | "gpt4o" => Some(Self::Gpt4o),
302            "gpt-4o-mini" | "gpt4o-mini" | "gpt-4o-mini-2024-07-18" => Some(Self::Gpt4oMini),
303            s if s.starts_with("gpt-4o-") || s.starts_with("gpt4o-") => Some(Self::Gpt4o),
304
305            // =================================================================
306            // OpenAI GPT-4 family (legacy, cl100k_base)
307            // =================================================================
308            "gpt-4" | "gpt4" | "gpt-4-turbo" | "gpt4-turbo" | "gpt-4-turbo-preview" => {
309                Some(Self::Gpt4)
310            },
311            s if s.starts_with("gpt-4-") && !s.contains("4o") => Some(Self::Gpt4),
312
313            // =================================================================
314            // OpenAI GPT-3.5 family (legacy, cl100k_base)
315            // =================================================================
316            "gpt-3.5-turbo" | "gpt-35-turbo" | "gpt3.5-turbo" | "gpt35-turbo" | "gpt-3.5" => {
317                Some(Self::Gpt35Turbo)
318            },
319            s if s.starts_with("gpt-3.5-") || s.starts_with("gpt-35-") => Some(Self::Gpt35Turbo),
320
321            // =================================================================
322            // Anthropic Claude (all versions map to Claude)
323            // =================================================================
324            "claude" | "claude-3" | "claude-3.5" | "claude-4" | "claude-4.5" | "claude-opus"
325            | "claude-opus-4" | "claude-opus-4.5" | "claude-sonnet" | "claude-sonnet-4"
326            | "claude-sonnet-4.5" | "claude-haiku" | "claude-haiku-4" | "claude-haiku-4.5"
327            | "claude-instant" => Some(Self::Claude),
328            s if s.starts_with("claude") => Some(Self::Claude),
329
330            // =================================================================
331            // Google Gemini (all versions map to Gemini)
332            // =================================================================
333            "gemini" | "gemini-pro" | "gemini-flash" | "gemini-ultra" | "gemini-1.5"
334            | "gemini-1.5-pro" | "gemini-1.5-flash" | "gemini-2" | "gemini-2.5"
335            | "gemini-2.5-pro" | "gemini-2.5-flash" | "gemini-3" | "gemini-3-pro" => {
336                Some(Self::Gemini)
337            },
338            s if s.starts_with("gemini") => Some(Self::Gemini),
339
340            // =================================================================
341            // Meta Llama
342            // =================================================================
343            "llama" | "llama-2" | "llama-3" | "llama-3.1" | "llama-3.2" | "llama-4" | "llama2"
344            | "llama3" | "llama4" => Some(Self::Llama),
345            "codellama" | "code-llama" | "llama-code" => Some(Self::CodeLlama),
346            s if s.starts_with("llama") && !s.contains("code") => Some(Self::Llama),
347            s if s.contains("codellama") || s.contains("code-llama") => Some(Self::CodeLlama),
348
349            // =================================================================
350            // Mistral AI
351            // =================================================================
352            "mistral" | "mistral-large" | "mistral-large-3" | "mistral-medium"
353            | "mistral-medium-3" | "mistral-small" | "mistral-small-3" | "codestral"
354            | "devstral" | "ministral" => Some(Self::Mistral),
355            s if s.starts_with("mistral") || s.contains("stral") => Some(Self::Mistral),
356
357            // =================================================================
358            // DeepSeek
359            // =================================================================
360            "deepseek" | "deepseek-v3" | "deepseek-v3.2" | "deepseek-r1" | "deepseek-coder"
361            | "deepseek-chat" | "deepseek-reasoner" => Some(Self::DeepSeek),
362            s if s.starts_with("deepseek") => Some(Self::DeepSeek),
363
364            // =================================================================
365            // Qwen (Alibaba)
366            // =================================================================
367            "qwen" | "qwen2" | "qwen2.5" | "qwen3" | "qwen-72b" | "qwen-7b" | "qwen-coder" => {
368                Some(Self::Qwen)
369            },
370            s if s.starts_with("qwen") => Some(Self::Qwen),
371
372            // =================================================================
373            // Cohere
374            // =================================================================
375            "cohere" | "command-r" | "command-r-plus" | "command-r+" | "command" => {
376                Some(Self::Cohere)
377            },
378            s if s.starts_with("cohere") || s.starts_with("command") => Some(Self::Cohere),
379
380            // =================================================================
381            // xAI Grok
382            // =================================================================
383            "grok" | "grok-1" | "grok-2" | "grok-3" | "grok-beta" => Some(Self::Grok),
384            s if s.starts_with("grok") => Some(Self::Grok),
385
386            // Unknown model
387            _ => None,
388        }
389    }
390
391    /// Get all available models
392    pub fn all() -> &'static [Self] {
393        &[
394            Self::Gpt52,
395            Self::Gpt52Pro,
396            Self::Gpt51,
397            Self::Gpt51Mini,
398            Self::Gpt51Codex,
399            Self::Gpt5,
400            Self::Gpt5Mini,
401            Self::Gpt5Nano,
402            Self::O4Mini,
403            Self::O3,
404            Self::O3Mini,
405            Self::O1,
406            Self::O1Mini,
407            Self::O1Preview,
408            Self::Gpt4o,
409            Self::Gpt4oMini,
410            Self::Gpt4,
411            Self::Gpt35Turbo,
412            Self::Claude,
413            Self::Gemini,
414            Self::Llama,
415            Self::CodeLlama,
416            Self::Mistral,
417            Self::DeepSeek,
418            Self::Qwen,
419            Self::Cohere,
420            Self::Grok,
421        ]
422    }
423
424    /// Get the vendor/provider name for this model
425    pub fn vendor(&self) -> &'static str {
426        match self {
427            Self::Gpt52
428            | Self::Gpt52Pro
429            | Self::Gpt51
430            | Self::Gpt51Mini
431            | Self::Gpt51Codex
432            | Self::Gpt5
433            | Self::Gpt5Mini
434            | Self::Gpt5Nano
435            | Self::O4Mini
436            | Self::O3
437            | Self::O3Mini
438            | Self::O1
439            | Self::O1Mini
440            | Self::O1Preview
441            | Self::Gpt4o
442            | Self::Gpt4oMini
443            | Self::Gpt4
444            | Self::Gpt35Turbo => "OpenAI",
445            Self::Claude => "Anthropic",
446            Self::Gemini => "Google",
447            Self::Llama | Self::CodeLlama => "Meta",
448            Self::Mistral => "Mistral AI",
449            Self::DeepSeek => "DeepSeek",
450            Self::Qwen => "Alibaba",
451            Self::Cohere => "Cohere",
452            Self::Grok => "xAI",
453        }
454    }
455}