[][src]Trait rust_transformers::preprocessing::tokenizer::base_tokenizer::Tokenizer

pub trait Tokenizer<T: Vocab> {
    fn vocab(&self) -> &T;
fn tokenize(&self, text: &str) -> Vec<String>; fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>> { ... }
fn convert_tokens_to_ids(&self, tokens: &Vec<String>) -> Vec<i64> { ... }
fn encode(
        &self,
        text_1: &str,
        text_2: Option<&str>,
        max_len: usize,
        truncation_strategy: &TruncationStrategy,
        stride: usize
    ) -> TokenizedInput { ... }
fn encode_list(
        &self,
        text_list: Vec<&str>,
        max_len: usize,
        truncation_strategy: &TruncationStrategy,
        stride: usize
    ) -> Vec<TokenizedInput> { ... }
fn encode_pair_list(
        &self,
        text_list: Vec<(&str, &str)>,
        max_len: usize,
        truncation_strategy: &TruncationStrategy,
        stride: usize
    ) -> Vec<TokenizedInput> { ... }
fn build_input_with_special_tokens(
        &self,
        tokens_1: Vec<i64>,
        tokens_2: Option<Vec<i64>>
    ) -> (Vec<i64>, Vec<i8>, Vec<i8>) { ... } }

Required methods

fn vocab(&self) -> &T

fn tokenize(&self, text: &str) -> Vec<String>

Loading content...

Provided methods

fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>>

fn convert_tokens_to_ids(&self, tokens: &Vec<String>) -> Vec<i64>

fn encode(
    &self,
    text_1: &str,
    text_2: Option<&str>,
    max_len: usize,
    truncation_strategy: &TruncationStrategy,
    stride: usize
) -> TokenizedInput

fn encode_list(
    &self,
    text_list: Vec<&str>,
    max_len: usize,
    truncation_strategy: &TruncationStrategy,
    stride: usize
) -> Vec<TokenizedInput>

fn encode_pair_list(
    &self,
    text_list: Vec<(&str, &str)>,
    max_len: usize,
    truncation_strategy: &TruncationStrategy,
    stride: usize
) -> Vec<TokenizedInput>

fn build_input_with_special_tokens(
    &self,
    tokens_1: Vec<i64>,
    tokens_2: Option<Vec<i64>>
) -> (Vec<i64>, Vec<i8>, Vec<i8>)

Loading content...

Implementors

impl Tokenizer<BertVocab> for BertTokenizer[src]

impl Tokenizer<Gpt2Vocab> for Gpt2Tokenizer[src]

impl Tokenizer<OpenAiGptVocab> for CtrlTokenizer[src]

impl Tokenizer<OpenAiGptVocab> for OpenAiGptTokenizer[src]

impl Tokenizer<RobertaVocab> for RobertaTokenizer[src]

impl<T: Vocab + Sync + Send> Tokenizer<T> for BaseTokenizer<T>[src]

Loading content...