pub trait Tokenizer<T: Vocab> {
    // Required methods
    fn vocab(&self) -> &T;
    fn tokenize(&self, text: &str) -> Vec<String>;

    // Provided methods
    fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>> { ... }
    fn convert_tokens_to_ids(&self, tokens: &Vec<String>) -> Vec<i64> { ... }
    fn encode(
        &self,
        text_1: &str,
        text_2: Option<&str>,
        max_len: usize,
        truncation_strategy: &TruncationStrategy,
        stride: usize,
    ) -> TokenizedInput { ... }
    fn encode_list(
        &self,
        text_list: Vec<&str>,
        max_len: usize,
        truncation_strategy: &TruncationStrategy,
        stride: usize,
    ) -> Vec<TokenizedInput> { ... }
    fn encode_pair_list(
        &self,
        text_list: Vec<(&str, &str)>,
        max_len: usize,
        truncation_strategy: &TruncationStrategy,
        stride: usize,
    ) -> Vec<TokenizedInput> { ... }
    fn build_input_with_special_tokens(
        &self,
        tokens_1: Vec<i64>,
        tokens_2: Option<Vec<i64>>,
    ) -> (Vec<i64>, Vec<i8>, Vec<i8>) { ... }
}

Required Methods§

Source

fn vocab(&self) -> &T

Source

fn tokenize(&self, text: &str) -> Vec<String>

Provided Methods§

Source

fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>>

Source

fn convert_tokens_to_ids(&self, tokens: &Vec<String>) -> Vec<i64>

Source

fn encode( &self, text_1: &str, text_2: Option<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize, ) -> TokenizedInput

Source

fn encode_list( &self, text_list: Vec<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize, ) -> Vec<TokenizedInput>

Source

fn encode_pair_list( &self, text_list: Vec<(&str, &str)>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize, ) -> Vec<TokenizedInput>

Source

fn build_input_with_special_tokens( &self, tokens_1: Vec<i64>, tokens_2: Option<Vec<i64>>, ) -> (Vec<i64>, Vec<i8>, Vec<i8>)

Implementors§