[][src]Trait rust_tokenizers::preprocessing::tokenizer::base_tokenizer::MultiThreadedTokenizer

pub trait MultiThreadedTokenizer<T: Vocab> where
    Self: Sync + Send + Tokenizer<T>, 
{ fn vocab(&self) -> &T { ... }
fn tokenize_list_with_offsets(
        &self,
        text_list: Vec<&str>
    ) -> Vec<(Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)> { ... }
fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>> { ... }
fn encode_list(
        &self,
        text_list: Vec<&str>,
        max_len: usize,
        truncation_strategy: &TruncationStrategy,
        stride: usize
    ) -> Vec<TokenizedInput> { ... }
fn encode_pair_list(
        &self,
        text_list: Vec<(&str, &str)>,
        max_len: usize,
        truncation_strategy: &TruncationStrategy,
        stride: usize
    ) -> Vec<TokenizedInput> { ... }
fn decode_list(
        &self,
        token_ids_list: Vec<Vec<i64>>,
        skip_special_tokens: bool,
        clean_up_tokenization_spaces: bool
    ) -> Vec<String> { ... } }

Provided methods

fn vocab(&self) -> &T

fn tokenize_list_with_offsets(
    &self,
    text_list: Vec<&str>
) -> Vec<(Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)>

fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>>

fn encode_list(
    &self,
    text_list: Vec<&str>,
    max_len: usize,
    truncation_strategy: &TruncationStrategy,
    stride: usize
) -> Vec<TokenizedInput>

fn encode_pair_list(
    &self,
    text_list: Vec<(&str, &str)>,
    max_len: usize,
    truncation_strategy: &TruncationStrategy,
    stride: usize
) -> Vec<TokenizedInput>

fn decode_list(
    &self,
    token_ids_list: Vec<Vec<i64>>,
    skip_special_tokens: bool,
    clean_up_tokenization_spaces: bool
) -> Vec<String>

Loading content...

Implementors

impl MultiThreadedTokenizer<AlbertVocab> for AlbertTokenizer[src]

impl MultiThreadedTokenizer<BertVocab> for BertTokenizer[src]

impl MultiThreadedTokenizer<MarianVocab> for MarianTokenizer[src]

impl MultiThreadedTokenizer<SentencePieceVocab> for SentencePieceTokenizer[src]

impl<T: Vocab + Sync + Send> MultiThreadedTokenizer<T> for BaseTokenizer<T>[src]

Loading content...