Skip to main content

token_count/tokenizers/google/
mod.rs

1//! Tokenizer implementation for Google Gemini models
2
3mod models;
4mod tokenizer;
5
6pub use models::google_models;
7
8use crate::error::TokenError;
9use crate::tokenizers::registry::ModelConfig;
10use crate::tokenizers::{ModelInfo, TokenDetail, Tokenizer};
11use tokenizer::GeminiTokenizer;
12
13/// Tokenizer for Google Gemini models
14pub struct GoogleTokenizer {
15    /// Underlying gemini-tokenizer wrapper
16    gemini: GeminiTokenizer,
17
18    /// Model configuration (name, context window, etc.)
19    config: ModelConfig,
20}
21
22impl GoogleTokenizer {
23    /// Create a new Google tokenizer
24    ///
25    /// # Arguments
26    /// * `config` - Model configuration
27    ///
28    /// # Returns
29    /// * `Ok(Self)` - Successfully created tokenizer
30    /// * `Err(TokenError::Tokenization)` - Failed to initialize
31    pub fn new(config: ModelConfig) -> Result<Self, TokenError> {
32        let gemini = GeminiTokenizer::new(&config.name)?;
33        Ok(Self { gemini, config })
34    }
35}
36
37impl Tokenizer for GoogleTokenizer {
38    fn count_tokens(&self, text: &str) -> anyhow::Result<usize> {
39        self.gemini.count_tokens(text)
40    }
41
42    fn get_model_info(&self) -> ModelInfo {
43        ModelInfo {
44            name: self.config.name.clone(),
45            encoding: self.config.encoding.clone(),
46            context_window: self.config.context_window,
47            description: self.config.description.clone(),
48        }
49    }
50
51    fn encode_with_details(&self, text: &str) -> anyhow::Result<Option<Vec<TokenDetail>>> {
52        let token_details = self.gemini.compute_token_details(text)?;
53
54        let details: Vec<TokenDetail> =
55            token_details.into_iter().map(|(id, text)| TokenDetail { id, text }).collect();
56
57        Ok(Some(details))
58    }
59}
60
61#[cfg(test)]
62mod tests {
63    use super::*;
64    use crate::tokenizers::Tokenizer;
65
66    #[test]
67    fn test_google_tokenizer_creation() {
68        let config = google_models().into_iter().next().unwrap();
69        let tokenizer = GoogleTokenizer::new(config);
70        assert!(tokenizer.is_ok());
71    }
72
73    #[test]
74    fn test_tokenizer_trait_implementation() {
75        let config = google_models().into_iter().next().unwrap();
76        let tokenizer = GoogleTokenizer::new(config).unwrap();
77
78        // Test count_tokens
79        let count = tokenizer.count_tokens("Hello").unwrap();
80        assert!(count > 0);
81
82        // Test get_model_info
83        let info = tokenizer.get_model_info();
84        assert_eq!(info.encoding, "gemini-gemma3");
85    }
86}