Skip to main content

token_count/tokenizers/google/
mod.rs

1//! Tokenizer implementation for Google Gemini models
2
3mod models;
4mod tokenizer;
5
6pub use models::google_models;
7
8use crate::error::TokenError;
9use crate::tokenizers::registry::ModelConfig;
10use crate::tokenizers::{ModelInfo, Tokenizer};
11use tokenizer::GeminiTokenizer;
12
13/// Tokenizer for Google Gemini models
14pub struct GoogleTokenizer {
15    /// Underlying gemini-tokenizer wrapper
16    gemini: GeminiTokenizer,
17
18    /// Model configuration (name, context window, etc.)
19    config: ModelConfig,
20}
21
22impl GoogleTokenizer {
23    /// Create a new Google tokenizer
24    ///
25    /// # Arguments
26    /// * `config` - Model configuration
27    ///
28    /// # Returns
29    /// * `Ok(Self)` - Successfully created tokenizer
30    /// * `Err(TokenError::Tokenization)` - Failed to initialize
31    pub fn new(config: ModelConfig) -> Result<Self, TokenError> {
32        let gemini = GeminiTokenizer::new(&config.name)?;
33        Ok(Self { gemini, config })
34    }
35}
36
37impl Tokenizer for GoogleTokenizer {
38    fn count_tokens(&self, text: &str) -> anyhow::Result<usize> {
39        self.gemini.count_tokens(text)
40    }
41
42    fn get_model_info(&self) -> ModelInfo {
43        ModelInfo {
44            name: self.config.name.clone(),
45            encoding: self.config.encoding.clone(),
46            context_window: self.config.context_window,
47            description: self.config.description.clone(),
48        }
49    }
50}
51
52#[cfg(test)]
53mod tests {
54    use super::*;
55    use crate::tokenizers::Tokenizer;
56
57    #[test]
58    fn test_google_tokenizer_creation() {
59        let config = google_models().into_iter().next().unwrap();
60        let tokenizer = GoogleTokenizer::new(config);
61        assert!(tokenizer.is_ok());
62    }
63
64    #[test]
65    fn test_tokenizer_trait_implementation() {
66        let config = google_models().into_iter().next().unwrap();
67        let tokenizer = GoogleTokenizer::new(config).unwrap();
68
69        // Test count_tokens
70        let count = tokenizer.count_tokens("Hello").unwrap();
71        assert!(count > 0);
72
73        // Test get_model_info
74        let info = tokenizer.get_model_info();
75        assert_eq!(info.encoding, "gemini-gemma3");
76    }
77}