[−][src]Trait rust_tokenizers::preprocessing::tokenizer::base_tokenizer::Tokenizer

pub trait Tokenizer<T: Vocab> {
    fn vocab(&self) -> &T;
    fn tokenize(&self, text: &str) -> Vec<String>;

    fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>> { ... }
    fn convert_tokens_to_ids(&self, tokens: &Vec<String>) -> Vec<i64> { ... }
    fn encode(
        &self, 
        text_1: &str, 
        text_2: Option<&str>, 
        max_len: usize, 
        truncation_strategy: &TruncationStrategy, 
        stride: usize
    ) -> TokenizedInput { ... }
    fn encode_list(
        &self, 
        text_list: Vec<&str>, 
        max_len: usize, 
        truncation_strategy: &TruncationStrategy, 
        stride: usize
    ) -> Vec<TokenizedInput> { ... }
    fn encode_pair_list(
        &self, 
        text_list: Vec<(&str, &str)>, 
        max_len: usize, 
        truncation_strategy: &TruncationStrategy, 
        stride: usize
    ) -> Vec<TokenizedInput> { ... }
    fn decode(
        &self, 
        token_ids: Vec<i64>, 
        skip_special_tokens: bool, 
        clean_up_tokenization_spaces: bool
    ) -> String { ... }
    fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String { ... }
    fn clean_up_tokenization(&self, input_string: String) -> String { ... }
    fn decode_list(
        &self, 
        token_ids_list: Vec<Vec<i64>>, 
        skip_special_tokens: bool, 
        clean_up_tokenization_spaces: bool
    ) -> Vec<String> { ... }
    fn build_input_with_special_tokens(
        &self, 
        tokens_1: Vec<i64>, 
        tokens_2: Option<Vec<i64>>
    ) -> (Vec<i64>, Vec<i8>, Vec<i8>) { ... }
}

Required methods

`fn vocab(&self) -> &T`

`fn tokenize(&self, text: &str) -> Vec<String>`

Loading content...

Provided methods

`fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>>`

`fn convert_tokens_to_ids(&self, tokens: &Vec<String>) -> Vec<i64>`

`fn encode( &self, text_1: &str, text_2: Option<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize ) -> TokenizedInput`

`fn encode_list( &self, text_list: Vec<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize ) -> Vec<TokenizedInput>`

`fn encode_pair_list( &self, text_list: Vec<(&str, &str)>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize ) -> Vec<TokenizedInput>`

`fn decode( &self, token_ids: Vec<i64>, skip_special_tokens: bool, clean_up_tokenization_spaces: bool ) -> String`

`fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String`

`fn clean_up_tokenization(&self, input_string: String) -> String`

`fn decode_list( &self, token_ids_list: Vec<Vec<i64>>, skip_special_tokens: bool, clean_up_tokenization_spaces: bool ) -> Vec<String>`

`fn build_input_with_special_tokens( &self, tokens_1: Vec<i64>, tokens_2: Option<Vec<i64>> ) -> (Vec<i64>, Vec<i8>, Vec<i8>)`

Loading content...

Implementors

`impl Tokenizer<BertVocab> for BertTokenizer`[src]

`fn vocab(&self) -> &BertVocab`[src]

`fn tokenize(&self, text: &str) -> Vec<String>`[src]

`fn build_input_with_special_tokens( &self, tokens_1: Vec<i64>, tokens_2: Option<Vec<i64>> ) -> (Vec<i64>, Vec<i8>, Vec<i8>)`[src]

`fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String`[src]

`impl Tokenizer<Gpt2Vocab> for Gpt2Tokenizer`[src]

`fn vocab(&self) -> &Gpt2Vocab`[src]

`fn tokenize(&self, text: &str) -> Vec<String>`[src]

`fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String`[src]

`impl Tokenizer<OpenAiGptVocab> for CtrlTokenizer`[src]

`fn vocab(&self) -> &OpenAiGptVocab`[src]

`fn tokenize(&self, text: &str) -> Vec<String>`[src]

`fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String`[src]

`impl Tokenizer<OpenAiGptVocab> for OpenAiGptTokenizer`[src]

`fn vocab(&self) -> &OpenAiGptVocab`[src]

`fn tokenize(&self, text: &str) -> Vec<String>`[src]

`fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String`[src]

`impl Tokenizer<RobertaVocab> for RobertaTokenizer`[src]

`fn vocab(&self) -> &RobertaVocab`[src]

`fn tokenize(&self, text: &str) -> Vec<String>`[src]

`fn build_input_with_special_tokens( &self, tokens_1: Vec<i64>, tokens_2: Option<Vec<i64>> ) -> (Vec<i64>, Vec<i8>, Vec<i8>)`[src]

`fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String`[src]

`impl<T: Vocab + Sync + Send> Tokenizer<T> for BaseTokenizer<T>`[src]

`fn vocab(&self) -> &T`[src]

`fn tokenize(&self, text: &str) -> Vec<String>`[src]

Loading content...

[−][src]Trait rust_tokenizers::preprocessing::tokenizer::base_tokenizer::Tokenizer

Required methods

fn vocab(&self) -> &T

fn tokenize(&self, text: &str) -> Vec<String>

Provided methods

fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>>

fn convert_tokens_to_ids(&self, tokens: &Vec<String>) -> Vec<i64>

fn encode( &self, text_1: &str, text_2: Option<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize) -> TokenizedInput

fn encode_list( &self, text_list: Vec<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize) -> Vec<TokenizedInput>

fn encode_pair_list( &self, text_list: Vec<(&str, &str)>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize) -> Vec<TokenizedInput>

fn decode( &self, token_ids: Vec<i64>, skip_special_tokens: bool, clean_up_tokenization_spaces: bool) -> String

fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String

fn clean_up_tokenization(&self, input_string: String) -> String

fn decode_list( &self, token_ids_list: Vec<Vec<i64>>, skip_special_tokens: bool, clean_up_tokenization_spaces: bool) -> Vec<String>

fn build_input_with_special_tokens( &self, tokens_1: Vec<i64>, tokens_2: Option<Vec<i64>>) -> (Vec<i64>, Vec<i8>, Vec<i8>)

Implementors

impl Tokenizer<BertVocab> for BertTokenizer[src]

fn vocab(&self) -> &BertVocab[src]

fn tokenize(&self, text: &str) -> Vec<String>[src]

fn build_input_with_special_tokens( &self, tokens_1: Vec<i64>, tokens_2: Option<Vec<i64>>) -> (Vec<i64>, Vec<i8>, Vec<i8>)[src]

fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String[src]

impl Tokenizer<Gpt2Vocab> for Gpt2Tokenizer[src]

fn vocab(&self) -> &Gpt2Vocab[src]

fn tokenize(&self, text: &str) -> Vec<String>[src]

fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String[src]

impl Tokenizer<OpenAiGptVocab> for CtrlTokenizer[src]

fn vocab(&self) -> &OpenAiGptVocab[src]

fn tokenize(&self, text: &str) -> Vec<String>[src]

fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String[src]

impl Tokenizer<OpenAiGptVocab> for OpenAiGptTokenizer[src]

fn vocab(&self) -> &OpenAiGptVocab[src]

fn tokenize(&self, text: &str) -> Vec<String>[src]

fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String[src]

impl Tokenizer<RobertaVocab> for RobertaTokenizer[src]

fn vocab(&self) -> &RobertaVocab[src]

fn tokenize(&self, text: &str) -> Vec<String>[src]

fn build_input_with_special_tokens( &self, tokens_1: Vec<i64>, tokens_2: Option<Vec<i64>>) -> (Vec<i64>, Vec<i8>, Vec<i8>)[src]

fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String[src]

impl<T: Vocab + Sync + Send> Tokenizer<T> for BaseTokenizer<T>[src]

fn vocab(&self) -> &T[src]

fn tokenize(&self, text: &str) -> Vec<String>[src]

`fn vocab(&self) -> &T`

`fn tokenize(&self, text: &str) -> Vec<String>`

`fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>>`

`fn convert_tokens_to_ids(&self, tokens: &Vec<String>) -> Vec<i64>`

`fn encode( &self, text_1: &str, text_2: Option<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize ) -> TokenizedInput`

`fn encode_list( &self, text_list: Vec<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize ) -> Vec<TokenizedInput>`

`fn encode_pair_list( &self, text_list: Vec<(&str, &str)>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize ) -> Vec<TokenizedInput>`

`fn decode( &self, token_ids: Vec<i64>, skip_special_tokens: bool, clean_up_tokenization_spaces: bool ) -> String`

`fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String`

`fn clean_up_tokenization(&self, input_string: String) -> String`

`fn decode_list( &self, token_ids_list: Vec<Vec<i64>>, skip_special_tokens: bool, clean_up_tokenization_spaces: bool ) -> Vec<String>`

`fn build_input_with_special_tokens( &self, tokens_1: Vec<i64>, tokens_2: Option<Vec<i64>> ) -> (Vec<i64>, Vec<i8>, Vec<i8>)`

`impl Tokenizer<BertVocab> for BertTokenizer`[src]

`fn vocab(&self) -> &BertVocab`[src]

`fn tokenize(&self, text: &str) -> Vec<String>`[src]

`fn build_input_with_special_tokens( &self, tokens_1: Vec<i64>, tokens_2: Option<Vec<i64>> ) -> (Vec<i64>, Vec<i8>, Vec<i8>)`[src]

`fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String`[src]

`impl Tokenizer<Gpt2Vocab> for Gpt2Tokenizer`[src]

`fn vocab(&self) -> &Gpt2Vocab`[src]

`fn tokenize(&self, text: &str) -> Vec<String>`[src]

`fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String`[src]

`impl Tokenizer<OpenAiGptVocab> for CtrlTokenizer`[src]

`fn vocab(&self) -> &OpenAiGptVocab`[src]

`fn tokenize(&self, text: &str) -> Vec<String>`[src]

`fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String`[src]

`impl Tokenizer<OpenAiGptVocab> for OpenAiGptTokenizer`[src]

`fn vocab(&self) -> &OpenAiGptVocab`[src]

`fn tokenize(&self, text: &str) -> Vec<String>`[src]

`fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String`[src]

`impl Tokenizer<RobertaVocab> for RobertaTokenizer`[src]

`fn vocab(&self) -> &RobertaVocab`[src]

`fn tokenize(&self, text: &str) -> Vec<String>`[src]

`fn build_input_with_special_tokens( &self, tokens_1: Vec<i64>, tokens_2: Option<Vec<i64>> ) -> (Vec<i64>, Vec<i8>, Vec<i8>)`[src]

`fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String`[src]

`impl<T: Vocab + Sync + Send> Tokenizer<T> for BaseTokenizer<T>`[src]

`fn vocab(&self) -> &T`[src]

`fn tokenize(&self, text: &str) -> Vec<String>`[src]