token_count/tokenizers/
mod.rs1pub mod claude;
43pub mod google;
44pub mod openai;
45pub mod registry;
46
47use std::fmt;
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum TokenCount {
52 Estimated(usize),
54
55 Exact(usize),
57}
58
59impl TokenCount {
60 pub fn value(&self) -> usize {
62 match self {
63 Self::Estimated(n) | Self::Exact(n) => *n,
64 }
65 }
66
67 pub fn is_estimated(&self) -> bool {
69 matches!(self, Self::Estimated(_))
70 }
71
72 pub fn is_exact(&self) -> bool {
74 matches!(self, Self::Exact(_))
75 }
76}
77
78impl fmt::Display for TokenCount {
79 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80 match self {
81 Self::Estimated(n) => write!(f, "~{}", n),
82 Self::Exact(n) => write!(f, "{}", n),
83 }
84 }
85}
86
87#[derive(Debug, Clone, PartialEq, Eq)]
89pub struct TokenDetail {
90 pub id: u32,
92 pub text: String,
94}
95
96pub trait Tokenizer: Send + Sync {
98 fn count_tokens(&self, text: &str) -> anyhow::Result<usize>;
100
101 fn get_model_info(&self) -> ModelInfo;
103
104 fn encode_with_details(&self, _text: &str) -> anyhow::Result<Option<Vec<TokenDetail>>> {
109 Ok(None)
110 }
111}
112
113#[derive(Debug, Clone)]
115pub struct ModelInfo {
116 pub name: String,
117 pub encoding: String,
118 pub context_window: usize,
119 pub description: String,
120}
121
122impl fmt::Display for ModelInfo {
123 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
124 write!(f, "{} ({})", self.name, self.encoding)
125 }
126}
127
128#[derive(Debug, Clone)]
130pub struct TokenizationResult {
131 pub token_count: usize,
132 pub model_info: ModelInfo,
133 pub token_details: Option<Vec<TokenDetail>>,
135}
136
137#[cfg(test)]
138mod tests {
139 use super::*;
140
141 #[test]
142 fn test_token_count_display_estimated() {
143 let count = TokenCount::Estimated(42);
144 assert_eq!(format!("{}", count), "~42");
145 }
146
147 #[test]
148 fn test_token_count_display_exact() {
149 let count = TokenCount::Exact(42);
150 assert_eq!(format!("{}", count), "42");
151 }
152
153 #[test]
154 fn test_token_count_value() {
155 assert_eq!(TokenCount::Estimated(42).value(), 42);
156 assert_eq!(TokenCount::Exact(42).value(), 42);
157 }
158
159 #[test]
160 fn test_token_count_is_estimated() {
161 assert!(TokenCount::Estimated(42).is_estimated());
162 assert!(!TokenCount::Exact(42).is_estimated());
163 }
164
165 #[test]
166 fn test_token_count_is_exact() {
167 assert!(!TokenCount::Estimated(42).is_exact());
168 assert!(TokenCount::Exact(42).is_exact());
169 }
170
171 #[test]
172 fn test_token_count_equality() {
173 assert_eq!(TokenCount::Estimated(42), TokenCount::Estimated(42));
174 assert_eq!(TokenCount::Exact(42), TokenCount::Exact(42));
175 assert_ne!(TokenCount::Estimated(42), TokenCount::Exact(42));
176 }
177}