use crate::error::{AmbiError, Result};
pub trait TokenizerTrait: Send + Sync {
fn count_tokens(&self, text: &str) -> Result<usize>;
}
pub struct DefaultTokenizer {
bpe: tiktoken_rs::CoreBPE,
}
impl DefaultTokenizer {
pub fn make() -> Result<Self> {
let bpe = tiktoken_rs::cl100k_base().map_err(|e| {
AmbiError::EngineError(format!("Failed to init default tokenizer: {}", e))
})?;
Ok(Self { bpe })
}
}
impl TokenizerTrait for DefaultTokenizer {
fn count_tokens(&self, text: &str) -> Result<usize> {
Ok(self.bpe.encode_ordinary(text).len())
}
}