use std::io;
use std::path::Path;
use tokenizers::Tokenizer;
pub fn load_tokenizer(model_path: &Path) -> io::Result<Tokenizer> {
let tokenizer_path = if model_path
.file_name()
.is_some_and(|name| name == std::ffi::OsStr::new("tokenizer.json"))
{
model_path.to_path_buf()
} else if model_path.is_dir() {
model_path.join("tokenizer.json")
} else {
model_path
.parent()
.ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "Model path has no parent"))?
.join("tokenizer.json")
};
Tokenizer::from_file(&tokenizer_path).map_err(io::Error::other)
}
pub fn load_tokenizer_with_truncation(model_path: &Path, max_len: usize) -> io::Result<Tokenizer> {
use tokenizers::TruncationParams;
let mut tokenizer = load_tokenizer(model_path)?;
let truncation = TruncationParams {
max_length: max_len,
..Default::default()
};
tokenizer
.with_truncation(Some(truncation))
.map_err(|e| io::Error::other(format!("Failed to configure truncation: {}", e)))?;
Ok(tokenizer)
}