mermaid_cli/utils/
tokenizer.rs1use anyhow::Result;
2
3pub struct Tokenizer;
13
14impl Tokenizer {
15 pub fn new(_model_name: &str) -> Self {
19 Self
20 }
21
22 pub fn count_tokens(&self, text: &str) -> Result<usize> {
24 Ok(text.len().div_ceil(4))
25 }
26
27 pub fn count_chat_tokens(&self, messages: &[(String, String)]) -> Result<usize> {
29 let total_chars: usize = messages
30 .iter()
31 .map(|(role, content)| role.len() + content.len() + 4) .sum();
33 Ok(total_chars.div_ceil(4))
34 }
35}
36
37#[cfg(test)]
38mod tests {
39 use super::*;
40
41 #[test]
42 fn test_token_counting() {
43 let tokenizer = Tokenizer::new("gpt-3.5-turbo");
44 let text = "Hello, world! This is a test message.";
45 let count = tokenizer.count_tokens(text).unwrap();
46 assert!(count > 0);
47 assert!(count < text.len());
48 }
49
50 #[test]
51 fn test_count_chat_tokens() {
52 let tokenizer = Tokenizer::new("any-model");
53 let messages = vec![
54 ("user".to_string(), "Hello".to_string()),
55 ("assistant".to_string(), "Hi there".to_string()),
56 ];
57 let count = tokenizer.count_chat_tokens(&messages).unwrap();
58 assert!(count > 0);
59 }
60}