use anyhow::Result;
pub struct Tokenizer;
impl Tokenizer {
pub fn new(_model_name: &str) -> Self {
Self
}
pub fn count_tokens(&self, text: &str) -> Result<usize> {
Ok(text.len().div_ceil(4))
}
pub fn count_chat_tokens(&self, messages: &[(String, String)]) -> Result<usize> {
let total_chars: usize = messages
.iter()
.map(|(role, content)| role.len() + content.len() + 4) .sum();
Ok(total_chars.div_ceil(4))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_token_counting() {
let tokenizer = Tokenizer::new("gpt-3.5-turbo");
let text = "Hello, world! This is a test message.";
let count = tokenizer.count_tokens(text).unwrap();
assert!(count > 0);
assert!(count < text.len());
}
#[test]
fn test_count_chat_tokens() {
let tokenizer = Tokenizer::new("any-model");
let messages = vec![
("user".to_string(), "Hello".to_string()),
("assistant".to_string(), "Hi there".to_string()),
];
let count = tokenizer.count_chat_tokens(&messages).unwrap();
assert!(count > 0);
}
}