use crate::error::Result;
pub trait TokenizerTrait: Send + Sync {
fn count_tokens(&self, text: &str) -> Result<usize>;
}
pub struct DefaultTokenizer {
bpe: tiktoken_rs::CoreBPE,
}
impl DefaultTokenizer {
pub fn make() -> Self {
Self {
bpe: tiktoken_rs::cl100k_base().expect("Failed to initialize default tokenizer"),
}
}
}
impl TokenizerTrait for DefaultTokenizer {
fn count_tokens(&self, text: &str) -> Result<usize> {
Ok(self.bpe.encode_ordinary(text).len())
}
}