#[cfg(feature = "hf-tokenizer")]
mod inner {
use crate::error::{LlamaError, Result};
use crate::token::LlamaToken;
pub struct HfTokenizer {
inner: tokenizers::Tokenizer,
}
impl HfTokenizer {
pub fn from_file(path: impl AsRef<std::path::Path>) -> Result<Self> {
let inner = tokenizers::Tokenizer::from_file(path.as_ref())
.map_err(|e| LlamaError::Batch(format!("hf tokenizer: {e}")))?;
Ok(Self { inner })
}
pub fn encode(&self, text: &str, add_bos: bool) -> Result<Vec<LlamaToken>> {
let enc = self
.inner
.encode(text, add_bos)
.map_err(|e| LlamaError::Batch(format!("hf encode: {e}")))?;
Ok(enc.get_ids().iter().map(|&i| LlamaToken(i as i32)).collect())
}
pub fn decode(&self, tokens: &[LlamaToken]) -> Result<String> {
let ids: Vec<u32> = tokens.iter().map(|t| t.0 as u32).collect();
self.inner
.decode(&ids, false)
.map_err(|e| LlamaError::Batch(format!("hf decode: {e}")))
}
}
}
#[cfg(feature = "hf-tokenizer")]
pub use inner::HfTokenizer;