pub enum TokenizerOption {
Show 18 variants
Bert(BertTokenizer),
Deberta(DeBERTaTokenizer),
DebertaV2(DeBERTaV2Tokenizer),
Roberta(RobertaTokenizer),
XLMRoberta(XLMRobertaTokenizer),
Marian(MarianTokenizer),
T5(T5Tokenizer),
Albert(AlbertTokenizer),
XLNet(XLNetTokenizer),
GPT2(Gpt2Tokenizer),
OpenAiGpt(OpenAiGptTokenizer),
Reformer(ReformerTokenizer),
ProphetNet(ProphetNetTokenizer),
Pegasus(PegasusTokenizer),
MBart50(MBart50Tokenizer),
M2M100(M2M100Tokenizer),
FNet(FNetTokenizer),
Bart(RobertaTokenizer),
}Expand description
Variants
Bert(BertTokenizer)
Bert Tokenizer
Deberta(DeBERTaTokenizer)
DeBERTa Tokenizer
DebertaV2(DeBERTaV2Tokenizer)
DeBERTa V2 Tokenizer
Roberta(RobertaTokenizer)
Roberta Tokenizer
XLMRoberta(XLMRobertaTokenizer)
XLMRoberta Tokenizer
Marian(MarianTokenizer)
Marian Tokenizer
T5(T5Tokenizer)
T5 Tokenizer
Albert(AlbertTokenizer)
Albert Tokenizer
XLNet(XLNetTokenizer)
XLNet Tokenizer
GPT2(Gpt2Tokenizer)
GPT2 Tokenizer
OpenAiGpt(OpenAiGptTokenizer)
GPT Tokenizer
Reformer(ReformerTokenizer)
Reformer Tokenizer
ProphetNet(ProphetNetTokenizer)
ProphetNet Tokenizer
Pegasus(PegasusTokenizer)
Pegasus Tokenizer
MBart50(MBart50Tokenizer)
MBart50 Tokenizer
M2M100(M2M100Tokenizer)
M2M100 Tokenizer
FNet(FNetTokenizer)
FNet Tokenizer
Bart(RobertaTokenizer)
Bart Tokenizer
Implementations
sourceimpl TokenizerOption
impl TokenizerOption
sourcepub fn from_file(
model_type: ModelType,
vocab_path: &str,
merges_path: Option<&str>,
lower_case: bool,
strip_accents: impl Into<Option<bool>>,
add_prefix_space: impl Into<Option<bool>>
) -> Result<Self, RustBertError>
pub fn from_file(
model_type: ModelType,
vocab_path: &str,
merges_path: Option<&str>,
lower_case: bool,
strip_accents: impl Into<Option<bool>>,
add_prefix_space: impl Into<Option<bool>>
) -> Result<Self, RustBertError>
Interface method to load a tokenizer from file
sourcepub fn model_type(&self) -> ModelType
pub fn model_type(&self) -> ModelType
Returns the model type
sourcepub fn encode_list<S>(
&self,
text_list: &[S],
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
A: Allocator, where
S: AsRef<str> + Sync,
pub fn encode_list<S>(
&self,
text_list: &[S],
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
A: Allocator, where
S: AsRef<str> + Sync,
A: Allocator,
Interface method
sourcepub fn encode_pair_list(
&self,
text_pair_list: &[(&str, &str)],
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
A: Allocator,
pub fn encode_pair_list(
&self,
text_pair_list: &[(&str, &str)],
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
A: Allocator,
A: Allocator,
Interface method for pair encoding
sourcepub fn encode_pair(
&self,
text_1: &str,
text_2: Option<&str>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> TokenizedInput
pub fn encode_pair(
&self,
text_1: &str,
text_2: Option<&str>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> TokenizedInput
Interface method for pair encoding (single input)
sourcepub fn tokenize(&self, text: &str) -> Vec<String>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
A: Allocator,
pub fn tokenize(&self, text: &str) -> Vec<String>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
A: Allocator,
A: Allocator,
Interface method to tokenization
sourcepub fn tokenize_with_offsets(&self, text: &str) -> TokensWithOffsets
pub fn tokenize_with_offsets(&self, text: &str) -> TokensWithOffsets
Interface method to tokenization
sourcepub fn tokenize_list<S>(&self, text: &[S]) -> Vec<Vec<String>>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
A: Allocator, where
S: AsRef<str> + Sync,
pub fn tokenize_list<S>(&self, text: &[S]) -> Vec<Vec<String>>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
A: Allocator, where
S: AsRef<str> + Sync,
A: Allocator,
Interface method to tokenization
sourcepub fn decode(
&self,
token_ids: &[i64],
skip_special_tokens: bool,
clean_up_tokenization_spaces: bool
) -> String
pub fn decode(
&self,
token_ids: &[i64],
skip_special_tokens: bool,
clean_up_tokenization_spaces: bool
) -> String
Interface method to decoding
sourcepub fn build_input_with_special_tokens(
&self,
token_ids_with_offsets_1: TokenIdsWithOffsets,
token_ids_with_offsets_2: Option<TokenIdsWithOffsets>
) -> TokenizedInput
pub fn build_input_with_special_tokens(
&self,
token_ids_with_offsets_1: TokenIdsWithOffsets,
token_ids_with_offsets_2: Option<TokenIdsWithOffsets>
) -> TokenizedInput
Interface method to build input with special tokens
sourcepub fn convert_tokens_to_ids<S>(&self, tokens: &[S]) -> Vec<i64>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
A: Allocator, where
S: AsRef<str>,
pub fn convert_tokens_to_ids<S>(&self, tokens: &[S]) -> Vec<i64>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
A: Allocator, where
S: AsRef<str>,
A: Allocator,
Interface method to convert tokens to ids
sourcepub fn get_unk_id(&self) -> i64
pub fn get_unk_id(&self) -> i64
Interface method
sourcepub fn get_pad_id(&self) -> Option<i64>
pub fn get_pad_id(&self) -> Option<i64>
Interface method
sourcepub fn get_sep_id(&self) -> Option<i64>
pub fn get_sep_id(&self) -> Option<i64>
Interface method
sourcepub fn get_bos_id(&self) -> Option<i64>
pub fn get_bos_id(&self) -> Option<i64>
Interface method
sourcepub fn get_eos_id(&self) -> Option<i64>
pub fn get_eos_id(&self) -> Option<i64>
Interface method
Auto Trait Implementations
impl RefUnwindSafe for TokenizerOption
impl Send for TokenizerOption
impl Sync for TokenizerOption
impl Unpin for TokenizerOption
impl UnwindSafe for TokenizerOption
Blanket Implementations
sourceimpl<T> BorrowMut<T> for T where
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
const: unstable · sourcefn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
sourceimpl<T> Instrument for T
impl<T> Instrument for T
sourcefn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Instruments this type with the provided Span, returning an
Instrumented wrapper. Read more
sourcefn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
impl<T> Pointable for T
impl<T> Pointable for T
impl<V, T> VZip<V> for T where
V: MultiLane<T>,
impl<V, T> VZip<V> for T where
V: MultiLane<T>,
fn vzip(self) -> V
sourceimpl<T> WithSubscriber for T
impl<T> WithSubscriber for T
sourcefn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self> where
S: Into<Dispatch>,
fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self> where
S: Into<Dispatch>,
Attaches the provided Subscriber to this type, returning a
WithDispatch wrapper. Read more
sourcefn with_current_subscriber(self) -> WithDispatch<Self>
fn with_current_subscriber(self) -> WithDispatch<Self>
Attaches the current default Subscriber to this type, returning a
WithDispatch wrapper. Read more