Trait kalosm_sample::Tokenizer
source · pub trait Tokenizer {
// Required methods
fn encode(&self, text: &str, add_special_tokens: bool) -> Result<Vec<u32>>;
fn decode(&self, ids: &[u32]) -> Result<Cow<'_, str>>;
fn get_all_tokens(&self) -> Result<Cow<'_, [u32]>>;
// Provided methods
fn encode_batch(
&self,
text: &[&str],
add_special_tokens: bool
) -> Result<Vec<Vec<u32>>> { ... }
fn decode_batch(&self, ids: &[&[u32]]) -> Result<Vec<Cow<'_, str>>> { ... }
}
Expand description
A tokenizer is a type that can decode a list of token ids into a string.
Required Methods§
sourcefn encode(&self, text: &str, add_special_tokens: bool) -> Result<Vec<u32>>
fn encode(&self, text: &str, add_special_tokens: bool) -> Result<Vec<u32>>
Encode a string into a list of token ids.
sourcefn decode(&self, ids: &[u32]) -> Result<Cow<'_, str>>
fn decode(&self, ids: &[u32]) -> Result<Cow<'_, str>>
Decode a list of token ids into a string.
sourcefn get_all_tokens(&self) -> Result<Cow<'_, [u32]>>
fn get_all_tokens(&self) -> Result<Cow<'_, [u32]>>
Get all possible tokens.