[−][src]Enum rust_bert::pipelines::common::TokenizerOption
Variants
Bert(BertTokenizer)Bert Tokenizer
Roberta(RobertaTokenizer)Roberta Tokenizer
XLMRoberta(XLMRobertaTokenizer)XLMRoberta Tokenizer
Marian(MarianTokenizer)Marian Tokenizer
T5(T5Tokenizer)T5 Tokenizer
Albert(AlbertTokenizer)Albert Tokenizer
XLNet(XLNetTokenizer)XLNet Tokenizer
GPT2(Gpt2Tokenizer)GPT2 Tokenizer
OpenAiGpt(OpenAiGptTokenizer)GPT Tokenizer
Reformer(ReformerTokenizer)Reformer Tokenizer
Implementations
impl TokenizerOption[src]
pub fn from_file(
model_type: ModelType,
vocab_path: &str,
merges_path: Option<&str>,
lower_case: bool,
strip_accents: impl Into<Option<bool>>,
add_prefix_space: impl Into<Option<bool>>
) -> Result<Self, RustBertError>[src]
model_type: ModelType,
vocab_path: &str,
merges_path: Option<&str>,
lower_case: bool,
strip_accents: impl Into<Option<bool>>,
add_prefix_space: impl Into<Option<bool>>
) -> Result<Self, RustBertError>
Interface method to load a tokenizer from file
pub fn model_type(&self) -> ModelType[src]
Returns the model type
pub fn encode_list(
&self,
text_list: &[&str],
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>[src]
&self,
text_list: &[&str],
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>
Interface method
pub fn encode_pair_list(
&self,
text_pair_list: &[(&str, &str)],
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>[src]
&self,
text_pair_list: &[(&str, &str)],
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>
Interface method for pair encoding
pub fn tokenize(&self, text: &str) -> Vec<String>[src]
Interface method to tokenization
pub fn tokenize_list(&self, text: &[&str]) -> Vec<Vec<String>>[src]
Interface method to tokenization
pub fn decode(
&self,
token_ids: Vec<i64>,
skip_special_tokens: bool,
clean_up_tokenization_spaces: bool
) -> String[src]
&self,
token_ids: Vec<i64>,
skip_special_tokens: bool,
clean_up_tokenization_spaces: bool
) -> String
Interface method to decoding
pub fn build_input_with_special_tokens(
&self,
token_ids_with_offsets_1: TokenIdsWithOffsets,
token_ids_with_offsets_2: Option<TokenIdsWithOffsets>
) -> TokenizedInput[src]
&self,
token_ids_with_offsets_1: TokenIdsWithOffsets,
token_ids_with_offsets_2: Option<TokenIdsWithOffsets>
) -> TokenizedInput
Interface method to build input with special tokens
pub fn convert_tokens_to_ids<S, ST>(&self, tokens: S) -> Vec<i64> where
S: AsRef<[ST]>,
ST: AsRef<str>, [src]
S: AsRef<[ST]>,
ST: AsRef<str>,
Interface method to convert tokens to ids
pub fn get_unk_id(&self) -> i64[src]
Interface method
pub fn get_pad_id(&self) -> Option<i64>[src]
Interface method
pub fn get_sep_id(&self) -> Option<i64>[src]
Interface method
Auto Trait Implementations
impl !RefUnwindSafe for TokenizerOption[src]
impl Send for TokenizerOption[src]
impl Sync for TokenizerOption[src]
impl Unpin for TokenizerOption[src]
impl UnwindSafe for TokenizerOption[src]
Blanket Implementations
impl<T> Any for T where
T: 'static + ?Sized, [src]
T: 'static + ?Sized,
impl<T> Borrow<T> for T where
T: ?Sized, [src]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized, [src]
T: ?Sized,
pub fn borrow_mut(&mut self) -> &mut T[src]
impl<T> From<T> for T[src]
impl<T, U> Into<U> for T where
U: From<T>, [src]
U: From<T>,
impl<T> Pointable for T
pub const ALIGN: usize
type Init = T
The type for initializers.
pub unsafe fn init(init: <T as Pointable>::Init) -> usize
pub unsafe fn deref<'a>(ptr: usize) -> &'a T
pub unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T
pub unsafe fn drop(ptr: usize)
impl<T> Same<T> for T
type Output = T
Should always be Self
impl<T, U> TryFrom<U> for T where
U: Into<T>, [src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>, [src]
U: TryFrom<T>,
type Error = <U as TryFrom<T>>::Error
The type returned in the event of a conversion error.
pub fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>[src]
impl<V, T> VZip<V> for T where
V: MultiLane<T>,
V: MultiLane<T>,