mod models;
mod tokenizer;
pub use models::google_models;
use crate::error::TokenError;
use crate::tokenizers::registry::ModelConfig;
use crate::tokenizers::{ModelInfo, TokenDetail, Tokenizer};
use tokenizer::GeminiTokenizer;
pub struct GoogleTokenizer {
gemini: GeminiTokenizer,
config: ModelConfig,
}
impl GoogleTokenizer {
pub fn new(config: ModelConfig) -> Result<Self, TokenError> {
let gemini = GeminiTokenizer::new(&config.name)?;
Ok(Self { gemini, config })
}
}
impl Tokenizer for GoogleTokenizer {
fn count_tokens(&self, text: &str) -> anyhow::Result<usize> {
self.gemini.count_tokens(text)
}
fn get_model_info(&self) -> ModelInfo {
ModelInfo {
name: self.config.name.clone(),
encoding: self.config.encoding.clone(),
context_window: self.config.context_window,
description: self.config.description.clone(),
}
}
fn encode_with_details(&self, text: &str) -> anyhow::Result<Option<Vec<TokenDetail>>> {
let token_details = self.gemini.compute_token_details(text)?;
let details: Vec<TokenDetail> =
token_details.into_iter().map(|(id, text)| TokenDetail { id, text }).collect();
Ok(Some(details))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::tokenizers::Tokenizer;
#[test]
fn test_google_tokenizer_creation() {
let config = google_models().into_iter().next().unwrap();
let tokenizer = GoogleTokenizer::new(config);
assert!(tokenizer.is_ok());
}
#[test]
fn test_tokenizer_trait_implementation() {
let config = google_models().into_iter().next().unwrap();
let tokenizer = GoogleTokenizer::new(config).unwrap();
let count = tokenizer.count_tokens("Hello").unwrap();
assert!(count > 0);
let info = tokenizer.get_model_info();
assert_eq!(info.encoding, "gemini-gemma3");
}
}