use std::sync::OnceLock;
use tiktoken_rs::CoreBPE;
static BPE: OnceLock<CoreBPE> = OnceLock::new();
fn get_bpe() -> &'static CoreBPE {
BPE.get_or_init(|| {
tiktoken_rs::cl100k_base().expect("Failed to initialize cl100k_base tokenizer")
})
}
pub fn estimate_tokens(text: &str) -> usize {
if text.is_empty() {
return 0;
}
get_bpe().encode_with_special_tokens(text).len()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_estimate_tokens_empty() {
assert_eq!(estimate_tokens(""), 0);
}
#[test]
fn test_estimate_tokens_simple() {
let count = estimate_tokens("hello world");
assert!(count >= 2, "Expected at least 2 tokens, got {}", count);
}
}