token_count/tokenizers/google/
mod.rs1mod models;
4mod tokenizer;
5
6pub use models::google_models;
7
8use crate::error::TokenError;
9use crate::tokenizers::registry::ModelConfig;
10use crate::tokenizers::{ModelInfo, TokenDetail, Tokenizer};
11use tokenizer::GeminiTokenizer;
12
13pub struct GoogleTokenizer {
15 gemini: GeminiTokenizer,
17
18 config: ModelConfig,
20}
21
22impl GoogleTokenizer {
23 pub fn new(config: ModelConfig) -> Result<Self, TokenError> {
32 let gemini = GeminiTokenizer::new(&config.name)?;
33 Ok(Self { gemini, config })
34 }
35}
36
37impl Tokenizer for GoogleTokenizer {
38 fn count_tokens(&self, text: &str) -> anyhow::Result<usize> {
39 self.gemini.count_tokens(text)
40 }
41
42 fn get_model_info(&self) -> ModelInfo {
43 ModelInfo {
44 name: self.config.name.clone(),
45 encoding: self.config.encoding.clone(),
46 context_window: self.config.context_window,
47 description: self.config.description.clone(),
48 }
49 }
50
51 fn encode_with_details(&self, text: &str) -> anyhow::Result<Option<Vec<TokenDetail>>> {
52 let token_details = self.gemini.compute_token_details(text)?;
53
54 let details: Vec<TokenDetail> =
55 token_details.into_iter().map(|(id, text)| TokenDetail { id, text }).collect();
56
57 Ok(Some(details))
58 }
59}
60
61#[cfg(test)]
62mod tests {
63 use super::*;
64 use crate::tokenizers::Tokenizer;
65
66 #[test]
67 fn test_google_tokenizer_creation() {
68 let config = google_models().into_iter().next().unwrap();
69 let tokenizer = GoogleTokenizer::new(config);
70 assert!(tokenizer.is_ok());
71 }
72
73 #[test]
74 fn test_tokenizer_trait_implementation() {
75 let config = google_models().into_iter().next().unwrap();
76 let tokenizer = GoogleTokenizer::new(config).unwrap();
77
78 let count = tokenizer.count_tokens("Hello").unwrap();
80 assert!(count > 0);
81
82 let info = tokenizer.get_model_info();
84 assert_eq!(info.encoding, "gemini-gemma3");
85 }
86}