Struct rust_tokenizers::vocab::FNetVocab[−][src]

pub struct FNetVocab {
    pub values: HashMap<String, i64>,
    pub indices: HashMap<i64, String>,
    pub unknown_value: &'static str,
    pub special_values: HashMap<String, i64>,
    pub special_indices: HashMap<i64, String>,
}

Expand description

FNetVocab

Vocabulary for FNet tokenizer. Contains the following special values:

CLS token
SEP token
PAD token
MASK token

Expects a SentencePiece BPE protobuf file when created from file.

Fields

values: HashMap<String, i64>

A mapping of tokens as string to indices (i.e. the encoder base)

indices: HashMap<i64, String>

A mapping of token ids to strings (i.e. the decoder base)

unknown_value: &'static str

The string to use for unknown (out of vocabulary) tokens

special_values: HashMap<String, i64>

A mapping of special value tokens as strings to IDs (i.e. the encoder base for special values), special values typically include things like BOS/EOS markers, class markers, mask markers and padding markers

special_indices: HashMap<i64, String>

A mapping of special value tokens as IDs to strings (i.e. the decoder base for special values)

Implementations

[src]

impl FNetVocab

[src]

pub fn sep_value() -> &'static str

Returns the SEP token for FNet ([SEP])

[src]

pub fn cls_value() -> &'static str

Returns the CLS token for FNet ([CLS])

[src]

pub fn mask_value() -> &'static str

Returns the MASK token for FNet ([MASK])

[src]

pub fn pad_value() -> &'static str

Returns the PAD token for FNet (<pad>)

Trait Implementations

[src]

impl Clone for FNetVocab

[src]

fn clone(&self) -> FNetVocab

Returns a copy of the value. Read more

1.0.0[src]

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

[src]

impl Debug for FNetVocab

[src]

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

[src]

impl MultiThreadedTokenizer<FNetVocab> for FNetTokenizer

[src]

fn vocab(&self) -> &T

returns a reference to the tokenizer vocabulary

[src]

fn tokenize_list_with_offsets<S>(
 &self,
 text_list: &[S]
) -> Vec<TokensWithOffsets>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
 S: AsRef<str> + Sync,

Tokenize a list of strings (with multithreading), where each corresponds to for example a sentence, returns a vector of TokensWithOffsets containing the tokens and their offset information. This calls tokenize_with_offsets on the list provided. Read more

[src]

fn tokenize_list<S>(&self, text_list: &[S]) -> Vec<Vec<String>>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
S: AsRef<str> + Sync,

Multithreaded tokenization of a list of strings, returning tokens with offset information Read more

[src]

fn encode_list<S>(
 &self,
 text_list: &[S],
 max_len: usize,
 truncation_strategy: &TruncationStrategy,
 stride: usize
) -> Vec<TokenizedInput>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
 S: AsRef<str> + Sync,

Multithreaded encoding of a sequence of string-like texts (tokenization followed by encoding). Not that in contrast with encode optional second text, each text provided is encoded independently. Read more

[src]

fn encode_pair_list<S>(
 &self,
 text_list: &[(S, S)],
 max_len: usize,
 truncation_strategy: &TruncationStrategy,
 stride: usize
) -> Vec<TokenizedInput>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
 S: AsRef<str> + Sync,

Multithreaded ncoding of a sequence of string-like text pairs (tokenization followed by encoding). This combines with encode with the list processing of encode_list. Read more

[src]

fn decode_list(
 &self,
 token_ids_list: &[Vec<i64>],
 skip_special_tokens: bool,
 clean_up_tokenization_spaces: bool
) -> Vec<String>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`

Multithreaded conversion a list of sequence of ids (integer) into a string, using the tokenizer and vocabulary with options to remove special tokens and clean up tokenization spaces. This calls decode for each provided sequence of ids Read more

[src]

impl Tokenizer<FNetVocab> for FNetTokenizer

[src]

fn vocab(&self) -> &FNetVocab

returns a reference to the tokenizer vocabulary

[src]

fn tokenize_to_tokens(&self, text: TokenRef<'_>) -> Vec<Token>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`

Tokenize a TokenRef, returning a sequence of tokens Read more

[src]

fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String

Converts a sequence of strings into a single string. This will clean-up artifacts from tokenization (for example sub ##word) and generate a single output string Read more

[src]

fn build_input_with_special_tokens(
 &self,
 tokens_ids_with_offsets_1: TokenIdsWithOffsets,
 tokens_ids_with_offsets_2: Option<TokenIdsWithOffsets>
) -> TokenIdsWithSpecialTokens

Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and adding special tokens. Read more

[src]

fn tokenize(&self, text: &str) -> Vec<String>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`

Tokenize a string, returns a vector of tokens as strings. Use tokenize_with_offsets or tokenize_to_tokens to return offset information. Read more

[src]

fn tokenize_with_offsets(&self, text: &str) -> TokensWithOffsets

Tokenize a string, returning tokens with offset information Read more

[src]

fn tokenize_list<S>(&self, text_list: &[S]) -> Vec<Vec<String>>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
S: AsRef<str>,

Tokenize a list of strings, returning tokens with offset information Read more

[src]

fn tokenize_list_with_offsets<S>(
 &self,
 text_list: &[S]
) -> Vec<TokensWithOffsets>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
 S: AsRef<str>,

Tokenize a list of strings, where each corresponds to for example a sentence, returns a vector of TokensWithOffsets containing the tokens and their offset information. This calls tokenize_with_offsets on the list provided. Read more

[src]

fn convert_tokens_to_ids<S>(&self, tokens: &[S]) -> Vec<i64>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
S: AsRef<str>,

Convert a slice of string-like to a vector ot token indices Read more

[src]

fn encode(
 &self,
 text_1: &str,
 text_2: Option<&str>,
 max_len: usize,
 truncation_strategy: &TruncationStrategy,
 stride: usize
) -> TokenizedInput

Encode a string-like (tokenization followed by encoding) Read more

[src]

fn encode_list<S>(
 &self,
 text_list: &[S],
 max_len: usize,
 truncation_strategy: &TruncationStrategy,
 stride: usize
) -> Vec<TokenizedInput>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
 S: AsRef<str>,

Encode a sequence of string-like texts (tokenization followed by encoding). Not that in contrast with encode optional second text, each text provided is encoded independently. Read more

[src]

fn encode_pair_list<S>(
 &self,
 text_list: &[(S, S)],
 max_len: usize,
 truncation_strategy: &TruncationStrategy,
 stride: usize
) -> Vec<TokenizedInput>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
 S: AsRef<str>,

Encode a sequence of string-like text pairs (tokenization followed by encoding). This combines with encode with the list processing of encode_list. Read more

[src]

fn decode_to_vec(
 &self,
 token_ids: &[i64 ],
 skip_special_tokens: bool
) -> Vec<String>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`

Decode a sequence of token indices to a sequence of Strings, optionally skipping special indices Read more

[src]

fn decode(
    &self,
    token_ids: &[i64 ],
    skip_special_tokens: bool,
    clean_up_tokenization_spaces: bool
) -> String

Converts a sequence of ids (integer) into a string, using the tokenizer and vocabulary with options to remove special tokens and clean up tokenization spaces. Read more

[src]

fn clean_up_tokenization(&self, input_string: String) -> String

Cleans-up tokenization artifacts (for example whitespace before punctuation) Read more

[src]

fn decode_list(
 &self,
 token_ids_list: &[Vec<i64>],
 skip_special_tokens: bool,
 clean_up_tokenization_spaces: bool
) -> Vec<String>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`

Converts a list of sequence of ids (integer) into a string, using the tokenizer and vocabulary with options to remove special tokens and clean up tokenization spaces. This calls decode for each provided sequence of ids Read more

[src]

impl Vocab for FNetVocab

[src]

fn unknown_value() -> &'static str

Associative function returning the unknown value for the vocabulary

[src]

fn get_unknown_value(&self) -> &'static str

Returns the unknown value on an instance

[src]

fn values(&self) -> &HashMap<String, i64>

Return the map of token strings to IDs

[src]

fn indices(&self) -> &HashMap<i64, String>

Return the map of token IDs to strings

[src]

fn special_values(&self) -> &HashMap<String, i64>

Return the map of token strings to IDs

[src]

fn special_indices(&self) -> &HashMap<i64, String>

Return the map of token IDs to strings for special values

[src]

fn from_file(path: &str) -> Result<FNetVocab, TokenizerError>

Read a vocabulary from file Read more

[src]

fn token_to_id(&self, token: &str) -> i64

Converts a token to an id. Read more

[src]

fn id_to_token(&self, id: &i64) -> String

Converts an id to a token. Read more

[src]

fn read_vocab_file(path: &str) -> Result<HashMap<String, i64>, TokenizerError>

Read a Bert-style vocab.txt file (single column, one token per line) The from_file method should be preferred, and needs to be implemented by the specific vocabularies Read more

[src]

fn _token_to_id(
 &self,
 token: &str,
 values: &HashMap<String, i64>,
 special_values: &HashMap<String, i64>,
 unknown_value: &str
) -> i64

Converts a token to an id, provided a HashMap of values, a HashMap of special values and the unknown value token string representation. This is not meant to be directly used, the method token_to_id offers a more convenient interface for most vocabularies, but needs to be implemented by the specific vocabulary. Read more

[src]

fn _id_to_token(
 &self,
 id: &i64,
 indices: &HashMap<i64, String>,
 special_indices: &HashMap<i64, String>,
 unknown_value: &str
) -> String

Converts an id to a token, provided a HashMap of values, a HashMap of special values and the unknown value token string representation. This is not meant to be directly used, the method id_to_token offers a more convenient interface for most vocabularies, but needs to be implemented by the specific vocabulary. Read more

[src]

fn _register_as_special_value(
 token: &str,
 values: &HashMap<String, i64>,
 special_values: &mut HashMap<String, i64>
) -> Result<(), TokenizerError>

Register a token as a special value Read more

[src]

fn convert_tokens_to_ids(&self, tokens: &[&str ]) -> Vec<i64>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`

Converts a list of tokens to a list of indices. Read more

Auto Trait Implementations

impl RefUnwindSafe for FNetVocab

impl Send for FNetVocab

impl Sync for FNetVocab

impl Unpin for FNetVocab

impl UnwindSafe for FNetVocab

Blanket Implementations

[src]

impl<T> Any for T where
T: 'static + ?Sized,

[src]

pub fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

[src]

impl<T> Borrow<T> for T where
T: ?Sized,

[src]

pub fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

[src]

impl<T> BorrowMut<T> for T where
T: ?Sized,

[src]

pub fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

[src]

impl<T> From<T> for T

[src]

pub fn from(t: T) -> T

Performs the conversion.

[src]

impl<T, U> Into for T where
U: From<T>,

[src]

pub fn into(self) -> U

Performs the conversion.

impl<T> Pointable for T

pub const ALIGN: usize

The alignment of pointer.

type Init = T

The type for initializers.

pub unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more

pub unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more

pub unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more

pub unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more

[src]

impl<T> ToOwned for T where
T: Clone,

type Owned = T

The resulting type after obtaining ownership.

[src]

pub fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

[src]

pub fn clone_into(&self, target: &mut T)

🔬 This is a nightly-only experimental API. (toowned_clone_into)

recently added

Uses borrowed data to replace owned data, usually by cloning. Read more

[src]

impl<T, U> TryFrom for T where
U: Into<T>,

type Error = Infallible

The type returned in the event of a conversion error.

[src]

pub fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

[src]

impl<T, U> TryInto for T where
U: TryFrom<T>,

type Error = >::Error

The type returned in the event of a conversion error.

[src]

pub fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Struct rust_tokenizers::vocab::FNetVocab[−][src]

FNetVocab

Fields

Implementations

impl FNetVocab

pub fn sep_value() -> &'static str

pub fn cls_value() -> &'static str

pub fn mask_value() -> &'static str

pub fn pad_value() -> &'static str

Trait Implementations

impl Clone for FNetVocab

fn clone(&self) -> FNetVocab

fn clone_from(&mut self, source: &Self)

impl Debug for FNetVocab

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl MultiThreadedTokenizer<FNetVocab> for FNetTokenizer

fn vocab(&self) -> &T

fn tokenize_list_with_offsets<S>( &self, text_list: &[S]) -> Vec<TokensWithOffsets>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where A: Allocator, where S: AsRef<str> + Sync,

fn tokenize_list<S>(&self, text_list: &[S]) -> Vec<Vec<String>>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where A: Allocator, where S: AsRef<str> + Sync,

fn encode_list<S>( &self, text_list: &[S], max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize) -> Vec<TokenizedInput>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where A: Allocator, where S: AsRef<str> + Sync,

fn encode_pair_list<S>( &self, text_list: &[(S, S)], max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize) -> Vec<TokenizedInput>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where A: Allocator, where S: AsRef<str> + Sync,

fn decode_list( &self, token_ids_list: &[Vec<i64>], skip_special_tokens: bool, clean_up_tokenization_spaces: bool) -> Vec<String>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where A: Allocator,

impl Tokenizer<FNetVocab> for FNetTokenizer

fn vocab(&self) -> &FNetVocab

fn tokenize_to_tokens(&self, text: TokenRef<'_>) -> Vec<Token>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where A: Allocator,

fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String

fn build_input_with_special_tokens( &self, tokens_ids_with_offsets_1: TokenIdsWithOffsets, tokens_ids_with_offsets_2: Option<TokenIdsWithOffsets>) -> TokenIdsWithSpecialTokens

fn tokenize(&self, text: &str) -> Vec<String>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where A: Allocator,

fn tokenize_with_offsets(&self, text: &str) -> TokensWithOffsets

fn tokenize_list<S>(&self, text_list: &[S]) -> Vec<Vec<String>>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where A: Allocator, where S: AsRef<str>,

fn tokenize_list_with_offsets<S>( &self, text_list: &[S]) -> Vec<TokensWithOffsets>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where A: Allocator, where S: AsRef<str>,

fn convert_tokens_to_ids<S>(&self, tokens: &[S]) -> Vec<i64>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where A: Allocator, where S: AsRef<str>,

fn encode( &self, text_1: &str, text_2: Option<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize) -> TokenizedInput

fn encode_list<S>( &self, text_list: &[S], max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize) -> Vec<TokenizedInput>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where A: Allocator, where S: AsRef<str>,

fn encode_pair_list<S>( &self, text_list: &[(S, S)], max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize) -> Vec<TokenizedInput>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where A: Allocator, where S: AsRef<str>,

fn decode_to_vec( &self, token_ids: &[i64], skip_special_tokens: bool) -> Vec<String>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where A: Allocator,

fn decode( &self, token_ids: &[i64], skip_special_tokens: bool, clean_up_tokenization_spaces: bool) -> String

fn clean_up_tokenization(&self, input_string: String) -> String

fn decode_list( &self, token_ids_list: &[Vec<i64>], skip_special_tokens: bool, clean_up_tokenization_spaces: bool) -> Vec<String>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where A: Allocator,

impl Vocab for FNetVocab

fn unknown_value() -> &'static str

fn get_unknown_value(&self) -> &'static str

fn values(&self) -> &HashMap<String, i64>

fn indices(&self) -> &HashMap<i64, String>

fn special_values(&self) -> &HashMap<String, i64>

fn special_indices(&self) -> &HashMap<i64, String>

fn from_file(path: &str) -> Result<FNetVocab, TokenizerError>

fn token_to_id(&self, token: &str) -> i64

fn id_to_token(&self, id: &i64) -> String

fn read_vocab_file(path: &str) -> Result<HashMap<String, i64>, TokenizerError>

fn _token_to_id( &self, token: &str, values: &HashMap<String, i64>, special_values: &HashMap<String, i64>, unknown_value: &str) -> i64

fn _id_to_token( &self, id: &i64, indices: &HashMap<i64, String>, special_indices: &HashMap<i64, String>, unknown_value: &str) -> String

fn _register_as_special_value( token: &str, values: &HashMap<String, i64>, special_values: &mut HashMap<String, i64>) -> Result<(), TokenizerError>

fn convert_tokens_to_ids(&self, tokens: &[&str]) -> Vec<i64>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where A: Allocator,

Auto Trait Implementations

impl RefUnwindSafe for FNetVocab

impl Send for FNetVocab

impl Sync for FNetVocab

impl Unpin for FNetVocab

impl UnwindSafe for FNetVocab

Blanket Implementations

impl<T> Any for T where T: 'static + ?Sized,

pub fn type_id(&self) -> TypeId

impl<T> Borrow<T> for T where T: ?Sized,

pub fn borrow(&self) -> &T

impl<T> BorrowMut<T> for T where T: ?Sized,

pub fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

pub fn from(t: T) -> T

impl<T, U> Into<U> for T where U: From<T>,

pub fn into(self) -> U

impl<T> Pointable for T

pub const ALIGN: usize

type Init = T

pub unsafe fn init(init: <T as Pointable>::Init) -> usize

pub unsafe fn deref<'a>(ptr: usize) -> &'a T

pub unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

pub unsafe fn drop(ptr: usize)

impl<T> ToOwned for T where T: Clone,

type Owned = T

fn tokenize_list_with_offsets<S>(
&self,
text_list: &[S]
) -> Vec<TokensWithOffsets>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
S: AsRef<str> + Sync,

fn tokenize_list<S>(&self, text_list: &[S]) -> Vec<Vec<String>>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
S: AsRef<str> + Sync,

fn encode_list<S>(
&self,
text_list: &[S],
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
S: AsRef<str> + Sync,

fn encode_pair_list<S>(
&self,
text_list: &[(S, S)],
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
S: AsRef<str> + Sync,

fn decode_list(
&self,
token_ids_list: &[Vec<i64>],
skip_special_tokens: bool,
clean_up_tokenization_spaces: bool
) -> Vec<String>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`

fn tokenize_to_tokens(&self, text: TokenRef<'_>) -> Vec<Token>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`

fn build_input_with_special_tokens(
&self,
tokens_ids_with_offsets_1: TokenIdsWithOffsets,
tokens_ids_with_offsets_2: Option<TokenIdsWithOffsets>
) -> TokenIdsWithSpecialTokens

fn tokenize(&self, text: &str) -> Vec<String>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`

fn tokenize_list<S>(&self, text_list: &[S]) -> Vec<Vec<String>>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
S: AsRef<str>,

fn tokenize_list_with_offsets<S>(
&self,
text_list: &[S]
) -> Vec<TokensWithOffsets>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
S: AsRef<str>,

fn convert_tokens_to_ids<S>(&self, tokens: &[S]) -> Vec<i64>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
S: AsRef<str>,

fn encode(
&self,
text_1: &str,
text_2: Option<&str>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> TokenizedInput

fn encode_list<S>(
&self,
text_list: &[S],
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
S: AsRef<str>,

fn encode_pair_list<S>(
&self,
text_list: &[(S, S)],
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`
where
S: AsRef<str>,

fn decode_to_vec(
&self,
token_ids: &[i64 ],
skip_special_tokens: bool
) -> Vec<String>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`

fn decode(
&self,
token_ids: &[i64 ],
skip_special_tokens: bool,
clean_up_tokenization_spaces: bool
) -> String

fn decode_list(
&self,
token_ids_list: &[Vec<i64>],
skip_special_tokens: bool,
clean_up_tokenization_spaces: bool
) -> Vec<String>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`

fn _token_to_id(
&self,
token: &str,
values: &HashMap<String, i64>,
special_values: &HashMap<String, i64>,
unknown_value: &str
) -> i64

fn _id_to_token(
&self,
id: &i64,
indices: &HashMap<i64, String>,
special_indices: &HashMap<i64, String>,
unknown_value: &str
) -> String

fn _register_as_special_value(
token: &str,
values: &HashMap<String, i64>,
special_values: &mut HashMap<String, i64>
) -> Result<(), TokenizerError>

fn convert_tokens_to_ids(&self, tokens: &[&str ]) -> Vec<i64>ⓘ
Notable traits for Vec<u8, A>
`impl<A> Write for Vec<u8, A> where A: Allocator,`

impl<T> Any for T where
T: 'static + ?Sized,

impl<T> Borrow<T> for T where
T: ?Sized,

impl<T> BorrowMut<T> for T where
T: ?Sized,

impl<T, U> Into<U> for T where
U: From<T>,

impl<T> ToOwned for T where
T: Clone,

impl<T, U> TryFrom<U> for T where
U: Into<T>,

impl<T, U> TryInto<U> for T where
U: TryFrom<T>,