1use dashmap::DashMap;
23use std::hash::{Hash, Hasher};
24use std::sync::OnceLock;
25use tiktoken_rs::{cl100k_base, o200k_base, CoreBPE};
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
32pub enum TokenModel {
33 Gpt52,
38 Gpt52Pro,
40 Gpt51,
42 Gpt51Mini,
44 Gpt51Codex,
46 Gpt5,
48 Gpt5Mini,
50 Gpt5Nano,
52 O4Mini,
54 O3,
56 O3Mini,
58 O1,
60 O1Mini,
62 O1Preview,
64 Gpt4o,
66 Gpt4oMini,
68
69 Gpt4,
74 Gpt35Turbo,
76
77 Claude,
82
83 Gemini,
88
89 Llama,
94 CodeLlama,
96
97 Mistral,
102
103 DeepSeek,
108
109 Qwen,
114
115 Cohere,
120
121 Grok,
126}
127
128impl TokenModel {
129 pub fn name(&self) -> &'static str {
131 match self {
132 Self::Gpt52 => "gpt-5.2",
134 Self::Gpt52Pro => "gpt-5.2-pro",
135 Self::Gpt51 => "gpt-5.1",
136 Self::Gpt51Mini => "gpt-5.1-mini",
137 Self::Gpt51Codex => "gpt-5.1-codex",
138 Self::Gpt5 => "gpt-5",
139 Self::Gpt5Mini => "gpt-5-mini",
140 Self::Gpt5Nano => "gpt-5-nano",
141 Self::O4Mini => "o4-mini",
142 Self::O3 => "o3",
143 Self::O3Mini => "o3-mini",
144 Self::O1 => "o1",
145 Self::O1Mini => "o1-mini",
146 Self::O1Preview => "o1-preview",
147 Self::Gpt4o => "gpt-4o",
148 Self::Gpt4oMini => "gpt-4o-mini",
149 Self::Gpt4 => "gpt-4",
151 Self::Gpt35Turbo => "gpt-3.5-turbo",
152 Self::Claude => "claude",
154 Self::Gemini => "gemini",
155 Self::Llama => "llama",
156 Self::CodeLlama => "codellama",
157 Self::Mistral => "mistral",
158 Self::DeepSeek => "deepseek",
159 Self::Qwen => "qwen",
160 Self::Cohere => "cohere",
161 Self::Grok => "grok",
162 }
163 }
164
165 pub fn chars_per_token(&self) -> f32 {
167 match self {
168 Self::Gpt52
170 | Self::Gpt52Pro
171 | Self::Gpt51
172 | Self::Gpt51Mini
173 | Self::Gpt51Codex
174 | Self::Gpt5
175 | Self::Gpt5Mini
176 | Self::Gpt5Nano
177 | Self::O4Mini
178 | Self::O3
179 | Self::O3Mini
180 | Self::O1
181 | Self::O1Mini
182 | Self::O1Preview
183 | Self::Gpt4o
184 | Self::Gpt4oMini => 4.0,
185 Self::Gpt4 | Self::Gpt35Turbo => 3.7,
187 Self::Claude => 3.5,
189 Self::Gemini => 3.8,
191 Self::Llama => 3.5,
193 Self::CodeLlama => 3.2, Self::Mistral => 3.5,
196 Self::DeepSeek => 3.5,
198 Self::Qwen => 3.5,
200 Self::Cohere => 3.6,
202 Self::Grok => 3.5,
204 }
205 }
206
207 pub fn has_exact_tokenizer(&self) -> bool {
209 matches!(
210 self,
211 Self::Gpt52
213 | Self::Gpt52Pro
214 | Self::Gpt51
215 | Self::Gpt51Mini
216 | Self::Gpt51Codex
217 | Self::Gpt5
218 | Self::Gpt5Mini
219 | Self::Gpt5Nano
220 | Self::O4Mini
221 | Self::O3
222 | Self::O3Mini
223 | Self::O1
224 | Self::O1Mini
225 | Self::O1Preview
226 | Self::Gpt4o
227 | Self::Gpt4oMini
228 | Self::Gpt4
229 | Self::Gpt35Turbo
230 )
231 }
232
233 pub fn uses_o200k(&self) -> bool {
235 matches!(
236 self,
237 Self::Gpt52
238 | Self::Gpt52Pro
239 | Self::Gpt51
240 | Self::Gpt51Mini
241 | Self::Gpt51Codex
242 | Self::Gpt5
243 | Self::Gpt5Mini
244 | Self::Gpt5Nano
245 | Self::O4Mini
246 | Self::O3
247 | Self::O3Mini
248 | Self::O1
249 | Self::O1Mini
250 | Self::O1Preview
251 | Self::Gpt4o
252 | Self::Gpt4oMini
253 )
254 }
255
256 pub fn uses_cl100k(&self) -> bool {
258 matches!(self, Self::Gpt4 | Self::Gpt35Turbo)
259 }
260
261 pub fn from_model_name(name: &str) -> Option<Self> {
281 let name_lower = name.to_lowercase();
282 let name_lower = name_lower.as_str();
283
284 match name_lower {
285 "gpt-5.2" | "gpt5.2" | "gpt-52" | "gpt52" => Some(Self::Gpt52),
289 "gpt-5.2-pro" | "gpt5.2-pro" | "gpt-52-pro" | "gpt52pro" => Some(Self::Gpt52Pro),
290 s if s.starts_with("gpt-5.2-") || s.starts_with("gpt5.2-") => Some(Self::Gpt52),
291
292 "gpt-5.1" | "gpt5.1" | "gpt-51" | "gpt51" => Some(Self::Gpt51),
296 "gpt-5.1-mini" | "gpt5.1-mini" | "gpt-51-mini" => Some(Self::Gpt51Mini),
297 "gpt-5.1-codex" | "gpt5.1-codex" | "gpt-51-codex" => Some(Self::Gpt51Codex),
298 s if s.starts_with("gpt-5.1-") || s.starts_with("gpt5.1-") => Some(Self::Gpt51),
299
300 "gpt-5" | "gpt5" => Some(Self::Gpt5),
304 "gpt-5-mini" | "gpt5-mini" => Some(Self::Gpt5Mini),
305 "gpt-5-nano" | "gpt5-nano" => Some(Self::Gpt5Nano),
306 s if s.starts_with("gpt-5-") || s.starts_with("gpt5-") => Some(Self::Gpt5),
307
308 "o4-mini" | "o4mini" => Some(Self::O4Mini),
312 "o3" => Some(Self::O3),
313 "o3-mini" | "o3mini" => Some(Self::O3Mini),
314 s if s.starts_with("o3-") => Some(Self::O3),
315 "o1" => Some(Self::O1),
316 "o1-mini" | "o1mini" => Some(Self::O1Mini),
317 "o1-preview" | "o1preview" => Some(Self::O1Preview),
318 s if s.starts_with("o1-") => Some(Self::O1),
319
320 "gpt-4o" | "gpt4o" => Some(Self::Gpt4o),
324 "gpt-4o-mini" | "gpt4o-mini" | "gpt-4o-mini-2024-07-18" => Some(Self::Gpt4oMini),
325 s if s.starts_with("gpt-4o-") || s.starts_with("gpt4o-") => Some(Self::Gpt4o),
326
327 "gpt-4" | "gpt4" | "gpt-4-turbo" | "gpt4-turbo" | "gpt-4-turbo-preview" => {
331 Some(Self::Gpt4)
332 },
333 s if s.starts_with("gpt-4-") && !s.contains("4o") => Some(Self::Gpt4),
334
335 "gpt-3.5-turbo" | "gpt-35-turbo" | "gpt3.5-turbo" | "gpt35-turbo" | "gpt-3.5" => {
339 Some(Self::Gpt35Turbo)
340 },
341 s if s.starts_with("gpt-3.5-") || s.starts_with("gpt-35-") => Some(Self::Gpt35Turbo),
342
343 "claude" | "claude-3" | "claude-3.5" | "claude-4" | "claude-4.5" | "claude-opus"
347 | "claude-opus-4" | "claude-opus-4.5" | "claude-sonnet" | "claude-sonnet-4"
348 | "claude-sonnet-4.5" | "claude-haiku" | "claude-haiku-4" | "claude-haiku-4.5"
349 | "claude-instant" => Some(Self::Claude),
350 s if s.starts_with("claude") => Some(Self::Claude),
351
352 "gemini" | "gemini-pro" | "gemini-flash" | "gemini-ultra" | "gemini-1.5"
356 | "gemini-1.5-pro" | "gemini-1.5-flash" | "gemini-2" | "gemini-2.5"
357 | "gemini-2.5-pro" | "gemini-2.5-flash" | "gemini-3" | "gemini-3-pro" => {
358 Some(Self::Gemini)
359 },
360 s if s.starts_with("gemini") => Some(Self::Gemini),
361
362 "llama" | "llama-2" | "llama-3" | "llama-3.1" | "llama-3.2" | "llama-4" | "llama2"
366 | "llama3" | "llama4" => Some(Self::Llama),
367 "codellama" | "code-llama" | "llama-code" => Some(Self::CodeLlama),
368 s if s.starts_with("llama") && !s.contains("code") => Some(Self::Llama),
369 s if s.contains("codellama") || s.contains("code-llama") => Some(Self::CodeLlama),
370
371 "mistral" | "mistral-large" | "mistral-large-3" | "mistral-medium"
375 | "mistral-medium-3" | "mistral-small" | "mistral-small-3" | "codestral"
376 | "devstral" | "ministral" => Some(Self::Mistral),
377 s if s.starts_with("mistral") || s.contains("stral") => Some(Self::Mistral),
378
379 "deepseek" | "deepseek-v3" | "deepseek-v3.2" | "deepseek-r1" | "deepseek-coder"
383 | "deepseek-chat" | "deepseek-reasoner" => Some(Self::DeepSeek),
384 s if s.starts_with("deepseek") => Some(Self::DeepSeek),
385
386 "qwen" | "qwen2" | "qwen2.5" | "qwen3" | "qwen-72b" | "qwen-7b" | "qwen-coder" => {
390 Some(Self::Qwen)
391 },
392 s if s.starts_with("qwen") => Some(Self::Qwen),
393
394 "cohere" | "command-r" | "command-r-plus" | "command-r+" | "command" => {
398 Some(Self::Cohere)
399 },
400 s if s.starts_with("cohere") || s.starts_with("command") => Some(Self::Cohere),
401
402 "grok" | "grok-1" | "grok-2" | "grok-3" | "grok-beta" => Some(Self::Grok),
406 s if s.starts_with("grok") => Some(Self::Grok),
407
408 _ => None,
410 }
411 }
412
413 pub fn all() -> &'static [Self] {
415 &[
416 Self::Gpt52,
417 Self::Gpt52Pro,
418 Self::Gpt51,
419 Self::Gpt51Mini,
420 Self::Gpt51Codex,
421 Self::Gpt5,
422 Self::Gpt5Mini,
423 Self::Gpt5Nano,
424 Self::O4Mini,
425 Self::O3,
426 Self::O3Mini,
427 Self::O1,
428 Self::O1Mini,
429 Self::O1Preview,
430 Self::Gpt4o,
431 Self::Gpt4oMini,
432 Self::Gpt4,
433 Self::Gpt35Turbo,
434 Self::Claude,
435 Self::Gemini,
436 Self::Llama,
437 Self::CodeLlama,
438 Self::Mistral,
439 Self::DeepSeek,
440 Self::Qwen,
441 Self::Cohere,
442 Self::Grok,
443 ]
444 }
445
446 pub fn vendor(&self) -> &'static str {
448 match self {
449 Self::Gpt52
450 | Self::Gpt52Pro
451 | Self::Gpt51
452 | Self::Gpt51Mini
453 | Self::Gpt51Codex
454 | Self::Gpt5
455 | Self::Gpt5Mini
456 | Self::Gpt5Nano
457 | Self::O4Mini
458 | Self::O3
459 | Self::O3Mini
460 | Self::O1
461 | Self::O1Mini
462 | Self::O1Preview
463 | Self::Gpt4o
464 | Self::Gpt4oMini
465 | Self::Gpt4
466 | Self::Gpt35Turbo => "OpenAI",
467 Self::Claude => "Anthropic",
468 Self::Gemini => "Google",
469 Self::Llama | Self::CodeLlama => "Meta",
470 Self::Mistral => "Mistral AI",
471 Self::DeepSeek => "DeepSeek",
472 Self::Qwen => "Alibaba",
473 Self::Cohere => "Cohere",
474 Self::Grok => "xAI",
475 }
476 }
477}
478
479static GPT4O_TOKENIZER: OnceLock<CoreBPE> = OnceLock::new();
481static GPT4_TOKENIZER: OnceLock<CoreBPE> = OnceLock::new();
482
483static TOKEN_CACHE: OnceLock<DashMap<(u64, TokenModel), u32>> = OnceLock::new();
486
487fn get_token_cache() -> &'static DashMap<(u64, TokenModel), u32> {
489 TOKEN_CACHE.get_or_init(DashMap::new)
490}
491
492fn hash_content(content: &str) -> u64 {
494 use std::collections::hash_map::DefaultHasher;
495 let mut hasher = DefaultHasher::new();
496 content.hash(&mut hasher);
497 hasher.finish()
498}
499
500fn get_gpt4o_tokenizer() -> &'static CoreBPE {
502 GPT4O_TOKENIZER.get_or_init(|| {
503 o200k_base().expect("tiktoken o200k_base initialization failed - please report this bug")
504 })
505}
506
507fn get_gpt4_tokenizer() -> &'static CoreBPE {
509 GPT4_TOKENIZER.get_or_init(|| {
510 cl100k_base().expect("tiktoken cl100k_base initialization failed - please report this bug")
511 })
512}
513
514pub struct Tokenizer {
520 use_exact: bool,
522 use_cache: bool,
524}
525
526impl Default for Tokenizer {
527 fn default() -> Self {
528 Self::new()
529 }
530}
531
532impl Tokenizer {
533 pub fn new() -> Self {
535 Self { use_exact: true, use_cache: true }
536 }
537
538 pub fn estimation_only() -> Self {
540 Self { use_exact: false, use_cache: true }
541 }
542
543 pub fn without_cache() -> Self {
545 Self { use_exact: true, use_cache: false }
546 }
547
548 #[must_use]
559 pub fn count(&self, text: &str, model: TokenModel) -> u32 {
560 if text.is_empty() {
561 return 0;
562 }
563
564 if self.use_cache {
565 let cache = get_token_cache();
566 let content_hash = hash_content(text);
567 let key = (content_hash, model);
568
569 if let Some(count) = cache.get(&key) {
571 return *count;
572 }
573
574 let count = self.count_uncached(text, model);
576 cache.insert(key, count);
577 count
578 } else {
579 self.count_uncached(text, model)
580 }
581 }
582
583 fn count_uncached(&self, text: &str, model: TokenModel) -> u32 {
585 if self.use_exact && model.has_exact_tokenizer() {
586 self.count_exact(text, model)
587 } else {
588 self.estimate(text, model)
589 }
590 }
591
592 fn count_exact(&self, text: &str, model: TokenModel) -> u32 {
594 if model.uses_o200k() {
595 let tokenizer = get_gpt4o_tokenizer();
598 tokenizer.encode_ordinary(text).len() as u32
599 } else if model.uses_cl100k() {
600 let tokenizer = get_gpt4_tokenizer();
603 tokenizer.encode_ordinary(text).len() as u32
604 } else {
605 self.estimate(text, model)
607 }
608 }
609
610 fn estimate(&self, text: &str, model: TokenModel) -> u32 {
612 if text.is_empty() {
613 return 0;
614 }
615
616 let chars_per_token = model.chars_per_token();
617 let len = text.len() as f32;
618
619 let mut estimate = len / chars_per_token;
621
622 let whitespace_count = text.chars().filter(|c| *c == ' ' || *c == '\t').count() as f32;
624 estimate -= whitespace_count * 0.3;
625
626 let newline_count = text.chars().filter(|c| *c == '\n').count() as f32;
628 estimate += newline_count * 0.5;
629
630 let special_chars = text
632 .chars()
633 .filter(|c| {
634 matches!(
635 c,
636 '{' | '}'
637 | '('
638 | ')'
639 | '['
640 | ']'
641 | ';'
642 | ':'
643 | ','
644 | '.'
645 | '='
646 | '+'
647 | '-'
648 | '*'
649 | '/'
650 | '<'
651 | '>'
652 | '!'
653 | '&'
654 | '|'
655 | '@'
656 | '#'
657 | '$'
658 | '%'
659 | '^'
660 | '~'
661 | '`'
662 | '"'
663 | '\''
664 )
665 })
666 .count() as f32;
667
668 if matches!(
670 model,
671 TokenModel::CodeLlama | TokenModel::Claude | TokenModel::DeepSeek | TokenModel::Mistral
672 ) {
673 estimate += special_chars * 0.3;
674 }
675
676 estimate.ceil().max(1.0) as u32
677 }
678
679 pub fn count_all(&self, text: &str) -> TokenCounts {
686 TokenCounts {
687 o200k: self.count(text, TokenModel::Gpt4o),
689 cl100k: self.count(text, TokenModel::Gpt4),
691 claude: self.count(text, TokenModel::Claude),
693 gemini: self.count(text, TokenModel::Gemini),
694 llama: self.count(text, TokenModel::Llama),
695 mistral: self.count(text, TokenModel::Mistral),
696 deepseek: self.count(text, TokenModel::DeepSeek),
697 qwen: self.count(text, TokenModel::Qwen),
698 cohere: self.count(text, TokenModel::Cohere),
699 grok: self.count(text, TokenModel::Grok),
700 }
701 }
702
703 pub fn most_efficient_model(&self, text: &str) -> (TokenModel, u32) {
705 let counts = self.count_all(text);
706 let models = [
707 (TokenModel::Gpt4o, counts.o200k), (TokenModel::Gpt4, counts.cl100k), (TokenModel::Claude, counts.claude),
710 (TokenModel::Gemini, counts.gemini),
711 (TokenModel::Llama, counts.llama),
712 (TokenModel::Mistral, counts.mistral),
713 (TokenModel::DeepSeek, counts.deepseek),
714 (TokenModel::Qwen, counts.qwen),
715 (TokenModel::Cohere, counts.cohere),
716 (TokenModel::Grok, counts.grok),
717 ];
718
719 models
721 .into_iter()
722 .min_by_key(|(_, count)| *count)
723 .unwrap_or((TokenModel::Claude, 0))
724 }
725
726 pub fn truncate_to_budget<'a>(&self, text: &'a str, model: TokenModel, budget: u32) -> &'a str {
728 let current = self.count(text, model);
729 if current <= budget {
730 return text;
731 }
732
733 let mut low = 0usize;
735 let mut high = text.len();
736
737 while low < high {
738 let mid_raw = (low + high).div_ceil(2);
739 let mid = text.floor_char_boundary(mid_raw);
741
742 if mid <= low {
746 break;
747 }
748
749 let count = self.count(&text[..mid], model);
750
751 if count <= budget {
752 low = mid;
753 } else {
754 high = mid.saturating_sub(1);
755 }
756 }
757
758 let mut end = low;
760 while end > 0 {
761 let c = text.as_bytes().get(end - 1).copied().unwrap_or(0);
762 if c == b' ' || c == b'\n' {
763 break;
764 }
765 end -= 1;
766 }
767
768 if end > 0 {
769 &text[..end]
770 } else {
771 let low = text.floor_char_boundary(low);
772 &text[..low]
773 }
774 }
775
776 pub fn exceeds_budget(&self, text: &str, model: TokenModel, budget: u32) -> bool {
778 self.count(text, model) > budget
779 }
780}
781
782#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
789pub struct TokenCounts {
790 pub o200k: u32,
792 pub cl100k: u32,
794 pub claude: u32,
796 pub gemini: u32,
798 pub llama: u32,
800 pub mistral: u32,
802 pub deepseek: u32,
804 pub qwen: u32,
806 pub cohere: u32,
808 pub grok: u32,
810}
811
812impl TokenCounts {
813 pub fn zero() -> Self {
815 Self::default()
816 }
817
818 pub fn get(&self, model: TokenModel) -> u32 {
820 match model {
821 TokenModel::Gpt52
823 | TokenModel::Gpt52Pro
824 | TokenModel::Gpt51
825 | TokenModel::Gpt51Mini
826 | TokenModel::Gpt51Codex
827 | TokenModel::Gpt5
828 | TokenModel::Gpt5Mini
829 | TokenModel::Gpt5Nano
830 | TokenModel::O4Mini
831 | TokenModel::O3
832 | TokenModel::O3Mini
833 | TokenModel::O1
834 | TokenModel::O1Mini
835 | TokenModel::O1Preview
836 | TokenModel::Gpt4o
837 | TokenModel::Gpt4oMini => self.o200k,
838 TokenModel::Gpt4 | TokenModel::Gpt35Turbo => self.cl100k,
840 TokenModel::Claude => self.claude,
842 TokenModel::Gemini => self.gemini,
843 TokenModel::Llama | TokenModel::CodeLlama => self.llama,
844 TokenModel::Mistral => self.mistral,
845 TokenModel::DeepSeek => self.deepseek,
846 TokenModel::Qwen => self.qwen,
847 TokenModel::Cohere => self.cohere,
848 TokenModel::Grok => self.grok,
849 }
850 }
851
852 pub fn set(&mut self, model: TokenModel, count: u32) {
854 match model {
855 TokenModel::Gpt52
857 | TokenModel::Gpt52Pro
858 | TokenModel::Gpt51
859 | TokenModel::Gpt51Mini
860 | TokenModel::Gpt51Codex
861 | TokenModel::Gpt5
862 | TokenModel::Gpt5Mini
863 | TokenModel::Gpt5Nano
864 | TokenModel::O4Mini
865 | TokenModel::O3
866 | TokenModel::O3Mini
867 | TokenModel::O1
868 | TokenModel::O1Mini
869 | TokenModel::O1Preview
870 | TokenModel::Gpt4o
871 | TokenModel::Gpt4oMini => self.o200k = count,
872 TokenModel::Gpt4 | TokenModel::Gpt35Turbo => self.cl100k = count,
874 TokenModel::Claude => self.claude = count,
876 TokenModel::Gemini => self.gemini = count,
877 TokenModel::Llama | TokenModel::CodeLlama => self.llama = count,
878 TokenModel::Mistral => self.mistral = count,
879 TokenModel::DeepSeek => self.deepseek = count,
880 TokenModel::Qwen => self.qwen = count,
881 TokenModel::Cohere => self.cohere = count,
882 TokenModel::Grok => self.grok = count,
883 }
884 }
885
886 pub fn total(&self) -> u64 {
888 self.o200k as u64
889 + self.cl100k as u64
890 + self.claude as u64
891 + self.gemini as u64
892 + self.llama as u64
893 + self.mistral as u64
894 + self.deepseek as u64
895 + self.qwen as u64
896 + self.cohere as u64
897 + self.grok as u64
898 }
899
900 pub fn add(&mut self, other: &TokenCounts) {
902 self.o200k += other.o200k;
903 self.cl100k += other.cl100k;
904 self.claude += other.claude;
905 self.gemini += other.gemini;
906 self.llama += other.llama;
907 self.mistral += other.mistral;
908 self.deepseek += other.deepseek;
909 self.qwen += other.qwen;
910 self.cohere += other.cohere;
911 self.grok += other.grok;
912 }
913
914 pub fn min(&self) -> u32 {
916 [
917 self.o200k,
918 self.cl100k,
919 self.claude,
920 self.gemini,
921 self.llama,
922 self.mistral,
923 self.deepseek,
924 self.qwen,
925 self.cohere,
926 self.grok,
927 ]
928 .into_iter()
929 .min()
930 .unwrap_or(0)
931 }
932
933 pub fn max(&self) -> u32 {
935 [
936 self.o200k,
937 self.cl100k,
938 self.claude,
939 self.gemini,
940 self.llama,
941 self.mistral,
942 self.deepseek,
943 self.qwen,
944 self.cohere,
945 self.grok,
946 ]
947 .into_iter()
948 .max()
949 .unwrap_or(0)
950 }
951}
952
953impl std::ops::Add for TokenCounts {
954 type Output = Self;
955
956 fn add(self, rhs: Self) -> Self::Output {
957 Self {
958 o200k: self.o200k + rhs.o200k,
959 cl100k: self.cl100k + rhs.cl100k,
960 claude: self.claude + rhs.claude,
961 gemini: self.gemini + rhs.gemini,
962 llama: self.llama + rhs.llama,
963 mistral: self.mistral + rhs.mistral,
964 deepseek: self.deepseek + rhs.deepseek,
965 qwen: self.qwen + rhs.qwen,
966 cohere: self.cohere + rhs.cohere,
967 grok: self.grok + rhs.grok,
968 }
969 }
970}
971
972impl std::iter::Sum for TokenCounts {
973 fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
974 iter.fold(Self::zero(), |acc, x| acc + x)
975 }
976}
977
978pub fn quick_estimate(text: &str, model: TokenModel) -> u32 {
980 if text.is_empty() {
981 return 0;
982 }
983 let chars_per_token = model.chars_per_token();
984 (text.len() as f32 / chars_per_token).ceil().max(1.0) as u32
985}
986
987#[cfg(test)]
988mod tests {
989 use super::*;
990
991 #[test]
992 fn test_exact_gpt4o_counting() {
993 let tokenizer = Tokenizer::new();
994 let text = "Hello, world!";
995 let count = tokenizer.count(text, TokenModel::Gpt4o);
996
997 assert!(count > 0);
999 assert!(count < 10); }
1001
1002 #[test]
1003 fn test_exact_gpt5_counting() {
1004 let tokenizer = Tokenizer::new();
1005 let text = "def hello():\n print('Hello, World!')\n";
1006
1007 let count_52 = tokenizer.count(text, TokenModel::Gpt52);
1009 let count_51 = tokenizer.count(text, TokenModel::Gpt51);
1010 let count_5 = tokenizer.count(text, TokenModel::Gpt5);
1011 let count_4o = tokenizer.count(text, TokenModel::Gpt4o);
1012
1013 assert_eq!(count_52, count_51);
1014 assert_eq!(count_51, count_5);
1015 assert_eq!(count_5, count_4o);
1016 assert!(count_52 > 5);
1017 assert!(count_52 < 30);
1018 }
1019
1020 #[test]
1021 fn test_exact_o_series_counting() {
1022 let tokenizer = Tokenizer::new();
1023 let text = "Solve this math problem: 2 + 2 = ?";
1024
1025 let count_o4 = tokenizer.count(text, TokenModel::O4Mini);
1027 let count_o3 = tokenizer.count(text, TokenModel::O3);
1028 let count_o1 = tokenizer.count(text, TokenModel::O1);
1029 let count_4o = tokenizer.count(text, TokenModel::Gpt4o);
1030
1031 assert_eq!(count_o4, count_o3);
1032 assert_eq!(count_o3, count_o1);
1033 assert_eq!(count_o1, count_4o);
1034 }
1035
1036 #[test]
1037 fn test_exact_gpt4_counting() {
1038 let tokenizer = Tokenizer::new();
1039 let text = "def hello():\n print('Hello, World!')\n";
1040 let count = tokenizer.count(text, TokenModel::Gpt4);
1041
1042 assert!(count > 5);
1044 assert!(count < 30);
1045 }
1046
1047 #[test]
1048 fn test_estimation_claude() {
1049 let tokenizer = Tokenizer::new();
1050 let text = "This is a test string for token estimation.";
1051 let count = tokenizer.count(text, TokenModel::Claude);
1052
1053 assert!(count > 5);
1055 assert!(count < 30);
1056 }
1057
1058 #[test]
1059 fn test_estimation_new_vendors() {
1060 let tokenizer = Tokenizer::new();
1061 let text = "This is a test string for new vendor token estimation.";
1062
1063 let mistral = tokenizer.count(text, TokenModel::Mistral);
1065 let deepseek = tokenizer.count(text, TokenModel::DeepSeek);
1066 let qwen = tokenizer.count(text, TokenModel::Qwen);
1067 let cohere = tokenizer.count(text, TokenModel::Cohere);
1068 let grok = tokenizer.count(text, TokenModel::Grok);
1069
1070 assert!(mistral > 5 && mistral < 50);
1071 assert!(deepseek > 5 && deepseek < 50);
1072 assert!(qwen > 5 && qwen < 50);
1073 assert!(cohere > 5 && cohere < 50);
1074 assert!(grok > 5 && grok < 50);
1075 }
1076
1077 #[test]
1078 fn test_count_all() {
1079 let tokenizer = Tokenizer::new();
1080 let text = "function hello() { console.log('hello'); }";
1081 let counts = tokenizer.count_all(text);
1082
1083 assert!(counts.o200k > 0);
1084 assert!(counts.cl100k > 0);
1085 assert!(counts.claude > 0);
1086 assert!(counts.gemini > 0);
1087 assert!(counts.llama > 0);
1088 assert!(counts.mistral > 0);
1089 assert!(counts.deepseek > 0);
1090 assert!(counts.qwen > 0);
1091 assert!(counts.cohere > 0);
1092 assert!(counts.grok > 0);
1093 }
1094
1095 #[test]
1096 fn test_empty_string() {
1097 let tokenizer = Tokenizer::new();
1098 assert_eq!(tokenizer.count("", TokenModel::Claude), 0);
1099 assert_eq!(tokenizer.count("", TokenModel::Gpt4o), 0);
1100 assert_eq!(tokenizer.count("", TokenModel::Gpt52), 0);
1101 assert_eq!(tokenizer.count("", TokenModel::O3), 0);
1102 }
1103
1104 #[test]
1105 fn test_truncate_to_budget() {
1106 let tokenizer = Tokenizer::new();
1107 let text = "This is a fairly long string that we want to truncate to fit within a smaller token budget for testing purposes.";
1108
1109 let truncated = tokenizer.truncate_to_budget(text, TokenModel::Gpt4, 10);
1110 let count = tokenizer.count(truncated, TokenModel::Gpt4);
1111
1112 assert!(count <= 10);
1113 assert!(truncated.len() < text.len());
1114 }
1115
1116 #[test]
1117 fn test_quick_estimate() {
1118 let count = quick_estimate("Hello world", TokenModel::Claude);
1119 assert!(count > 0);
1120 assert!(count < 10);
1121 }
1122
1123 #[test]
1124 fn test_token_counts_add() {
1125 let a = TokenCounts {
1126 o200k: 8,
1127 cl100k: 9,
1128 claude: 10,
1129 gemini: 8,
1130 llama: 10,
1131 mistral: 10,
1132 deepseek: 10,
1133 qwen: 10,
1134 cohere: 10,
1135 grok: 10,
1136 };
1137 let b = TokenCounts {
1138 o200k: 4,
1139 cl100k: 5,
1140 claude: 5,
1141 gemini: 4,
1142 llama: 5,
1143 mistral: 5,
1144 deepseek: 5,
1145 qwen: 5,
1146 cohere: 5,
1147 grok: 5,
1148 };
1149 let sum = a + b;
1150
1151 assert_eq!(sum.o200k, 12);
1152 assert_eq!(sum.cl100k, 14);
1153 assert_eq!(sum.claude, 15);
1154 }
1155
1156 #[test]
1157 fn test_token_counts_min_max() {
1158 let counts = TokenCounts {
1159 o200k: 100,
1160 cl100k: 110,
1161 claude: 95,
1162 gemini: 105,
1163 llama: 98,
1164 mistral: 97,
1165 deepseek: 96,
1166 qwen: 99,
1167 cohere: 102,
1168 grok: 101,
1169 };
1170
1171 assert_eq!(counts.min(), 95);
1172 assert_eq!(counts.max(), 110);
1173 }
1174
1175 #[test]
1176 fn test_most_efficient_model() {
1177 let tokenizer = Tokenizer::new();
1178 let text = "const x = 42;";
1179 let (_model, count) = tokenizer.most_efficient_model(text);
1180
1181 assert!(count > 0);
1183 }
1184
1185 #[test]
1186 fn test_from_model_name_openai() {
1187 assert_eq!(TokenModel::from_model_name("gpt-5.2"), Some(TokenModel::Gpt52));
1189 assert_eq!(TokenModel::from_model_name("GPT-5.2"), Some(TokenModel::Gpt52));
1190 assert_eq!(TokenModel::from_model_name("gpt-5.2-pro"), Some(TokenModel::Gpt52Pro));
1191 assert_eq!(TokenModel::from_model_name("gpt-5.2-2025-12-11"), Some(TokenModel::Gpt52));
1192
1193 assert_eq!(TokenModel::from_model_name("gpt-5.1"), Some(TokenModel::Gpt51));
1195 assert_eq!(TokenModel::from_model_name("gpt-5.1-mini"), Some(TokenModel::Gpt51Mini));
1196 assert_eq!(TokenModel::from_model_name("gpt-5.1-codex"), Some(TokenModel::Gpt51Codex));
1197
1198 assert_eq!(TokenModel::from_model_name("gpt-5"), Some(TokenModel::Gpt5));
1200 assert_eq!(TokenModel::from_model_name("gpt-5-mini"), Some(TokenModel::Gpt5Mini));
1201 assert_eq!(TokenModel::from_model_name("gpt-5-nano"), Some(TokenModel::Gpt5Nano));
1202
1203 assert_eq!(TokenModel::from_model_name("o4-mini"), Some(TokenModel::O4Mini));
1205 assert_eq!(TokenModel::from_model_name("o3"), Some(TokenModel::O3));
1206 assert_eq!(TokenModel::from_model_name("o3-mini"), Some(TokenModel::O3Mini));
1207 assert_eq!(TokenModel::from_model_name("o1"), Some(TokenModel::O1));
1208 assert_eq!(TokenModel::from_model_name("o1-mini"), Some(TokenModel::O1Mini));
1209 assert_eq!(TokenModel::from_model_name("o1-preview"), Some(TokenModel::O1Preview));
1210
1211 assert_eq!(TokenModel::from_model_name("gpt-4o"), Some(TokenModel::Gpt4o));
1213 assert_eq!(TokenModel::from_model_name("gpt-4o-mini"), Some(TokenModel::Gpt4oMini));
1214
1215 assert_eq!(TokenModel::from_model_name("gpt-4"), Some(TokenModel::Gpt4));
1217 assert_eq!(TokenModel::from_model_name("gpt-3.5-turbo"), Some(TokenModel::Gpt35Turbo));
1218 }
1219
1220 #[test]
1221 fn test_from_model_name_other_vendors() {
1222 assert_eq!(TokenModel::from_model_name("claude"), Some(TokenModel::Claude));
1224 assert_eq!(TokenModel::from_model_name("claude-sonnet"), Some(TokenModel::Claude));
1225 assert_eq!(TokenModel::from_model_name("claude-opus-4.5"), Some(TokenModel::Claude));
1226
1227 assert_eq!(TokenModel::from_model_name("gemini"), Some(TokenModel::Gemini));
1229 assert_eq!(TokenModel::from_model_name("gemini-2.5-pro"), Some(TokenModel::Gemini));
1230
1231 assert_eq!(TokenModel::from_model_name("llama-4"), Some(TokenModel::Llama));
1233 assert_eq!(TokenModel::from_model_name("codellama"), Some(TokenModel::CodeLlama));
1234
1235 assert_eq!(TokenModel::from_model_name("mistral"), Some(TokenModel::Mistral));
1237 assert_eq!(TokenModel::from_model_name("codestral"), Some(TokenModel::Mistral));
1238
1239 assert_eq!(TokenModel::from_model_name("deepseek"), Some(TokenModel::DeepSeek));
1241 assert_eq!(TokenModel::from_model_name("deepseek-r1"), Some(TokenModel::DeepSeek));
1242
1243 assert_eq!(TokenModel::from_model_name("qwen3"), Some(TokenModel::Qwen));
1245
1246 assert_eq!(TokenModel::from_model_name("cohere"), Some(TokenModel::Cohere));
1248 assert_eq!(TokenModel::from_model_name("command-r+"), Some(TokenModel::Cohere));
1249
1250 assert_eq!(TokenModel::from_model_name("grok-3"), Some(TokenModel::Grok));
1252 }
1253
1254 #[test]
1255 fn test_from_model_name_unknown() {
1256 assert_eq!(TokenModel::from_model_name("unknown-model"), None);
1257 assert_eq!(TokenModel::from_model_name(""), None);
1258 assert_eq!(TokenModel::from_model_name("random"), None);
1259 }
1260
1261 #[test]
1262 fn test_model_properties() {
1263 assert!(TokenModel::Gpt52.uses_o200k());
1265 assert!(TokenModel::O3.uses_o200k());
1266 assert!(TokenModel::Gpt4o.uses_o200k());
1267 assert!(!TokenModel::Gpt4.uses_o200k());
1268 assert!(!TokenModel::Claude.uses_o200k());
1269
1270 assert!(TokenModel::Gpt4.uses_cl100k());
1272 assert!(TokenModel::Gpt35Turbo.uses_cl100k());
1273 assert!(!TokenModel::Gpt52.uses_cl100k());
1274 assert!(!TokenModel::Claude.uses_cl100k());
1275
1276 assert!(TokenModel::Gpt52.has_exact_tokenizer());
1278 assert!(TokenModel::Gpt4.has_exact_tokenizer());
1279 assert!(!TokenModel::Claude.has_exact_tokenizer());
1280 assert!(!TokenModel::Mistral.has_exact_tokenizer());
1281
1282 assert_eq!(TokenModel::Gpt52.vendor(), "OpenAI");
1284 assert_eq!(TokenModel::Claude.vendor(), "Anthropic");
1285 assert_eq!(TokenModel::Gemini.vendor(), "Google");
1286 assert_eq!(TokenModel::Llama.vendor(), "Meta");
1287 assert_eq!(TokenModel::Mistral.vendor(), "Mistral AI");
1288 assert_eq!(TokenModel::DeepSeek.vendor(), "DeepSeek");
1289 assert_eq!(TokenModel::Qwen.vendor(), "Alibaba");
1290 assert_eq!(TokenModel::Cohere.vendor(), "Cohere");
1291 assert_eq!(TokenModel::Grok.vendor(), "xAI");
1292 }
1293
1294 #[test]
1295 fn test_all_models() {
1296 let all = TokenModel::all();
1297 assert_eq!(all.len(), 27); assert!(all.contains(&TokenModel::Gpt52));
1299 assert!(all.contains(&TokenModel::O3));
1300 assert!(all.contains(&TokenModel::Claude));
1301 assert!(all.contains(&TokenModel::Mistral));
1302 }
1303
1304 #[test]
1305 fn test_tokenizer_caching() {
1306 let tokenizer = Tokenizer::new();
1307 let text = "This is a test string for caching verification.";
1308
1309 let count1 = tokenizer.count(text, TokenModel::Gpt4o);
1311
1312 let count2 = tokenizer.count(text, TokenModel::Gpt4o);
1314
1315 assert_eq!(count1, count2);
1317 assert!(count1 > 0);
1318
1319 let count_claude = tokenizer.count(text, TokenModel::Claude);
1321 assert!(count_claude > 0);
1322 }
1323
1324 #[test]
1325 fn test_tokenizer_without_cache() {
1326 let tokenizer = Tokenizer::without_cache();
1327 let text = "Test text for uncached counting.";
1328
1329 let count = tokenizer.count(text, TokenModel::Gpt4o);
1331 assert!(count > 0);
1332 assert!(count < 20);
1333 }
1334
1335 #[test]
1340 fn test_all_models_return_nonzero_for_content() {
1341 let tokenizer = Tokenizer::new();
1342 let content = "fn main() { println!(\"Hello, world!\"); }";
1343
1344 for model in TokenModel::all() {
1346 let count = tokenizer.count(content, *model);
1347 assert!(count > 0, "Model {:?} returned 0 tokens for non-empty content", model);
1348 }
1349 }
1350
1351 #[test]
1352 fn test_unicode_content_handling() {
1353 let tokenizer = Tokenizer::new();
1354
1355 let unicode_samples = [
1357 "Hello, 世界! 🌍", "Привет мир", "مرحبا بالعالم", "🦀🦀🦀 Rust 🦀🦀🦀", "const λ = (x) => x * 2;", ];
1363
1364 for sample in unicode_samples {
1365 let count = tokenizer.count(sample, TokenModel::Gpt4o);
1366 assert!(count > 0, "Unicode sample '{}' returned 0 tokens", sample);
1367
1368 let truncated = tokenizer.truncate_to_budget(sample, TokenModel::Gpt4o, 3);
1370 assert!(truncated.is_char_boundary(truncated.len()));
1371 }
1372 }
1373
1374 #[test]
1375 fn test_very_long_content() {
1376 let tokenizer = Tokenizer::new();
1377
1378 let long_content: String = (0..10000)
1380 .map(|i| format!("Line {}: some repeated content here\n", i))
1381 .collect();
1382
1383 let count = tokenizer.count(&long_content, TokenModel::Claude);
1385 assert!(count > 1000, "Long content should have many tokens");
1386
1387 let truncated = tokenizer.truncate_to_budget(&long_content, TokenModel::Claude, 100);
1389 let truncated_count = tokenizer.count(truncated, TokenModel::Claude);
1390 assert!(truncated_count <= 100, "Truncation should respect budget");
1391 }
1392
1393 #[test]
1394 fn test_whitespace_only_content() {
1395 let tokenizer = Tokenizer::new();
1396
1397 let whitespace_samples = [
1398 " ", "\t\t\t", "\n\n\n", " \t \n ", ];
1403
1404 for sample in whitespace_samples {
1405 let _count = tokenizer.count(sample, TokenModel::Gpt4o);
1407 }
1408 }
1409
1410 #[test]
1411 fn test_special_characters_heavy_code() {
1412 let tokenizer = Tokenizer::new();
1413
1414 let code = r#"
1416 fn process<T: Clone + Debug>(items: &[T]) -> Result<Vec<T>, Error> {
1417 items.iter()
1418 .filter(|x| x.is_valid())
1419 .map(|x| x.clone())
1420 .collect::<Result<Vec<_>, _>>()
1421 }
1422 "#;
1423
1424 let count = tokenizer.count(code, TokenModel::CodeLlama);
1425 assert!(count > 10, "Code content should have meaningful token count");
1426
1427 let claude_count = tokenizer.count(code, TokenModel::Claude);
1429 assert!(claude_count > 10);
1431 }
1432
1433 #[test]
1434 fn test_model_get_consistency() {
1435 let counts = TokenCounts {
1437 o200k: 100,
1438 cl100k: 110,
1439 claude: 95,
1440 gemini: 105,
1441 llama: 98,
1442 mistral: 97,
1443 deepseek: 96,
1444 qwen: 99,
1445 cohere: 102,
1446 grok: 101,
1447 };
1448
1449 assert_eq!(counts.get(TokenModel::Gpt52), 100);
1451 assert_eq!(counts.get(TokenModel::Gpt4o), 100);
1452 assert_eq!(counts.get(TokenModel::O3), 100);
1453
1454 assert_eq!(counts.get(TokenModel::Gpt4), 110);
1456 assert_eq!(counts.get(TokenModel::Gpt35Turbo), 110);
1457
1458 assert_eq!(counts.get(TokenModel::Claude), 95);
1460 assert_eq!(counts.get(TokenModel::Gemini), 105);
1461 assert_eq!(counts.get(TokenModel::Llama), 98);
1462 assert_eq!(counts.get(TokenModel::CodeLlama), 98); assert_eq!(counts.get(TokenModel::Mistral), 97);
1464 assert_eq!(counts.get(TokenModel::DeepSeek), 96);
1465 assert_eq!(counts.get(TokenModel::Qwen), 99);
1466 assert_eq!(counts.get(TokenModel::Cohere), 102);
1467 assert_eq!(counts.get(TokenModel::Grok), 101);
1468 }
1469
1470 #[test]
1471 fn test_budget_exactly_met() {
1472 let tokenizer = Tokenizer::new();
1473 let text = "Hello world!";
1474 let exact_budget = tokenizer.count(text, TokenModel::Gpt4o);
1475
1476 let truncated = tokenizer.truncate_to_budget(text, TokenModel::Gpt4o, exact_budget);
1478 assert_eq!(truncated, text);
1479 }
1480
1481 #[test]
1482 fn test_exceeds_budget_check() {
1483 let tokenizer = Tokenizer::new();
1484 let text = "A fairly long string that should have a decent number of tokens.";
1485
1486 assert!(tokenizer.exceeds_budget(text, TokenModel::Claude, 1));
1487 assert!(!tokenizer.exceeds_budget(text, TokenModel::Claude, 1000));
1488 assert!(!tokenizer.exceeds_budget("", TokenModel::Claude, 0));
1489 }
1490}