Enum rust_bert::pipelines::common::TokenizerOption[][src]

pub enum TokenizerOption {
    Bert(BertTokenizer),
    Roberta(RobertaTokenizer),
    XLMRoberta(XLMRobertaTokenizer),
    Marian(MarianTokenizer),
    T5(T5Tokenizer),
    Albert(AlbertTokenizer),
    XLNet(XLNetTokenizer),
    GPT2(Gpt2Tokenizer),
    OpenAiGpt(OpenAiGptTokenizer),
    Reformer(ReformerTokenizer),
    ProphetNet(ProphetNetTokenizer),
}

Variants

Bert Tokenizer

Roberta Tokenizer

XLMRoberta Tokenizer

Marian Tokenizer

T5 Tokenizer

Albert Tokenizer

XLNet Tokenizer

GPT2 Tokenizer

GPT Tokenizer

Reformer Tokenizer

ProphetNet Tokenizer

Implementations

impl TokenizerOption[src]

pub fn from_file(
    model_type: ModelType,
    vocab_path: &str,
    merges_path: Option<&str>,
    lower_case: bool,
    strip_accents: impl Into<Option<bool>>,
    add_prefix_space: impl Into<Option<bool>>
) -> Result<Self, RustBertError>
[src]

Interface method to load a tokenizer from file

pub fn model_type(&self) -> ModelType[src]

Returns the model type

pub fn encode_list(
    &self,
    text_list: &[&str],
    max_len: usize,
    truncation_strategy: &TruncationStrategy,
    stride: usize
) -> Vec<TokenizedInput>
[src]

Interface method

pub fn encode_pair_list(
    &self,
    text_pair_list: &[(&str, &str)],
    max_len: usize,
    truncation_strategy: &TruncationStrategy,
    stride: usize
) -> Vec<TokenizedInput>
[src]

Interface method for pair encoding

pub fn encode_pair(
    &self,
    text_1: &str,
    text_2: Option<&str>,
    max_len: usize,
    truncation_strategy: &TruncationStrategy,
    stride: usize
) -> TokenizedInput
[src]

Interface method for pair encoding (single input)

pub fn tokenize(&self, text: &str) -> Vec<String>[src]

Interface method to tokenization

pub fn tokenize_with_offsets(&self, text: &str) -> TokensWithOffsets[src]

Interface method to tokenization

pub fn tokenize_list(&self, text: &[&str]) -> Vec<Vec<String>>[src]

Interface method to tokenization

pub fn decode(
    &self,
    token_ids: Vec<i64>,
    skip_special_tokens: bool,
    clean_up_tokenization_spaces: bool
) -> String
[src]

Interface method to decoding

pub fn build_input_with_special_tokens(
    &self,
    token_ids_with_offsets_1: TokenIdsWithOffsets,
    token_ids_with_offsets_2: Option<TokenIdsWithOffsets>
) -> TokenizedInput
[src]

Interface method to build input with special tokens

pub fn convert_tokens_to_ids<S, ST>(&self, tokens: S) -> Vec<i64> where
    S: AsRef<[ST]>,
    ST: AsRef<str>, 
[src]

Interface method to convert tokens to ids

pub fn get_unk_id(&self) -> i64[src]

Interface method

pub fn get_pad_id(&self) -> Option<i64>[src]

Interface method

pub fn get_sep_id(&self) -> Option<i64>[src]

Interface method

Auto Trait Implementations

Blanket Implementations

impl<T> Any for T where
    T: 'static + ?Sized
[src]

impl<T> Borrow<T> for T where
    T: ?Sized
[src]

impl<T> BorrowMut<T> for T where
    T: ?Sized
[src]

impl<T> From<T> for T[src]

impl<T> Instrument for T[src]

impl<T, U> Into<U> for T where
    U: From<T>, 
[src]

impl<T> Pointable for T

type Init = T

The type for initializers.

impl<T> Same<T> for T

type Output = T

Should always be Self

impl<T, U> TryFrom<U> for T where
    U: Into<T>, 
[src]

type Error = Infallible

The type returned in the event of a conversion error.

impl<T, U> TryInto<U> for T where
    U: TryFrom<T>, 
[src]

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.

impl<V, T> VZip<V> for T where
    V: MultiLane<T>,