pub trait Tokenizer {
    // Required methods
    fn encode(&self, text: &str, add_special_tokens: bool) -> Result<Vec<u32>>;
    fn decode(&self, ids: &[u32]) -> Result<Cow<'_, str>>;
    fn get_all_tokens(&self) -> Result<Cow<'_, [u32]>>;

    // Provided methods
    fn encode_batch(
        &self,
        text: &[&str],
        add_special_tokens: bool
    ) -> Result<Vec<Vec<u32>>> { ... }
    fn decode_batch(&self, ids: &[&[u32]]) -> Result<Vec<Cow<'_, str>>> { ... }
}
Expand description

A tokenizer is a type that can decode a list of token ids into a string.

Required Methods§

source

fn encode(&self, text: &str, add_special_tokens: bool) -> Result<Vec<u32>>

Encode a string into a list of token ids.

source

fn decode(&self, ids: &[u32]) -> Result<Cow<'_, str>>

Decode a list of token ids into a string.

source

fn get_all_tokens(&self) -> Result<Cow<'_, [u32]>>

Get all possible tokens.

Provided Methods§

source

fn encode_batch( &self, text: &[&str], add_special_tokens: bool ) -> Result<Vec<Vec<u32>>>

Encode a list of strings into a list of token ids.

source

fn decode_batch(&self, ids: &[&[u32]]) -> Result<Vec<Cow<'_, str>>>

Decode a list of a list of token ids into a string.

Implementations on Foreign Types§

source§

impl Tokenizer for Tokenizer

source§

fn encode(&self, text: &str, special_tokens: bool) -> Result<Vec<u32>>

source§

fn decode(&self, ids: &[u32]) -> Result<Cow<'_, str>>

source§

fn get_all_tokens(&self) -> Result<Cow<'_, [u32]>>

source§

impl<M, N, PT, PP, D> Tokenizer for TokenizerImpl<M, N, PT, PP, D>
where M: Model, N: Normalizer, PT: PreTokenizer, PP: PostProcessor, D: Decoder,

source§

fn encode(&self, text: &str, special_tokens: bool) -> Result<Vec<u32>>

source§

fn decode(&self, ids: &[u32]) -> Result<Cow<'_, str>>

source§

fn get_all_tokens(&self) -> Result<Cow<'_, [u32]>>

Implementors§