Trait Tokenizer
Source pub trait Tokenizer<T: Vocab> {
// Required methods
fn vocab(&self) -> &T;
fn tokenize(&self, text: &str) -> Vec<String>;
// Provided methods
fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>> { ... }
fn convert_tokens_to_ids(&self, tokens: &Vec<String>) -> Vec<i64> { ... }
fn encode(
&self,
text_1: &str,
text_2: Option<&str>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize,
) -> TokenizedInput { ... }
fn encode_list(
&self,
text_list: Vec<&str>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize,
) -> Vec<TokenizedInput> { ... }
fn encode_pair_list(
&self,
text_list: Vec<(&str, &str)>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize,
) -> Vec<TokenizedInput> { ... }
fn build_input_with_special_tokens(
&self,
tokens_1: Vec<i64>,
tokens_2: Option<Vec<i64>>,
) -> (Vec<i64>, Vec<i8>, Vec<i8>) { ... }
}