use crate::error::TokenError;
use anyhow::Result;
use gemini_tokenizer::LocalTokenizer;
pub struct GeminiTokenizer {
tokenizer: LocalTokenizer,
}
impl GeminiTokenizer {
pub fn new(model_name: &str) -> Result<Self, TokenError> {
let tokenizer = LocalTokenizer::new(model_name).map_err(|e| {
TokenError::Tokenization(format!("Failed to initialize Gemini tokenizer: {}", e))
})?;
Ok(Self { tokenizer })
}
pub fn count_tokens(&self, text: &str) -> Result<usize> {
let result = self.tokenizer.count_tokens(text, None);
Ok(result.total_tokens)
}
pub fn compute_token_details(&self, text: &str) -> Result<Vec<(u32, String)>> {
let result = self.tokenizer.compute_tokens(text);
let mut details = Vec::new();
let mut total_tokens = 0;
for info in &result.tokens_info {
for (token_id, token_bytes) in info.token_ids.iter().zip(&info.tokens) {
if total_tokens >= 10 {
return Ok(details);
}
let text = String::from_utf8_lossy(token_bytes).to_string();
details.push((*token_id, text));
total_tokens += 1;
}
}
Ok(details)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tokenizer_initialization() {
let tokenizer = GeminiTokenizer::new("gemini-2.5-flash");
if let Err(e) = &tokenizer {
eprintln!("Tokenizer initialization error: {:?}", e);
}
assert!(tokenizer.is_ok());
}
#[test]
fn test_count_tokens() {
let tokenizer = GeminiTokenizer::new("gemini-2.5-flash").unwrap();
let count = tokenizer.count_tokens("Hello, Gemini!").unwrap();
assert_eq!(count, 4);
}
#[test]
fn test_empty_string() {
let tokenizer = GeminiTokenizer::new("gemini-2.5-flash").unwrap();
let count = tokenizer.count_tokens("").unwrap();
assert_eq!(count, 0);
}
}