Enum rust_bert::pipelines::common::TokenizerOption [−][src]
Variants
Bert(BertTokenizer)
Bert Tokenizer
Roberta(RobertaTokenizer)
Roberta Tokenizer
XLMRoberta(XLMRobertaTokenizer)
XLMRoberta Tokenizer
Marian(MarianTokenizer)
Marian Tokenizer
T5(T5Tokenizer)
T5 Tokenizer
Albert(AlbertTokenizer)
Albert Tokenizer
XLNet(XLNetTokenizer)
XLNet Tokenizer
GPT2(Gpt2Tokenizer)
GPT2 Tokenizer
OpenAiGpt(OpenAiGptTokenizer)
GPT Tokenizer
Reformer(ReformerTokenizer)
Reformer Tokenizer
ProphetNet(ProphetNetTokenizer)
ProphetNet Tokenizer
Implementations
impl TokenizerOption
[src]
pub fn from_file(
model_type: ModelType,
vocab_path: &str,
merges_path: Option<&str>,
lower_case: bool,
strip_accents: impl Into<Option<bool>>,
add_prefix_space: impl Into<Option<bool>>
) -> Result<Self, RustBertError>
[src]
model_type: ModelType,
vocab_path: &str,
merges_path: Option<&str>,
lower_case: bool,
strip_accents: impl Into<Option<bool>>,
add_prefix_space: impl Into<Option<bool>>
) -> Result<Self, RustBertError>
Interface method to load a tokenizer from file
pub fn model_type(&self) -> ModelType
[src]
Returns the model type
pub fn encode_list(
&self,
text_list: &[&str],
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>
[src]
&self,
text_list: &[&str],
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>
Interface method
pub fn encode_pair_list(
&self,
text_pair_list: &[(&str, &str)],
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>
[src]
&self,
text_pair_list: &[(&str, &str)],
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>
Interface method for pair encoding
pub fn encode_pair(
&self,
text_1: &str,
text_2: Option<&str>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> TokenizedInput
[src]
&self,
text_1: &str,
text_2: Option<&str>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> TokenizedInput
Interface method for pair encoding (single input)
pub fn tokenize(&self, text: &str) -> Vec<String>
[src]
Interface method to tokenization
pub fn tokenize_with_offsets(&self, text: &str) -> TokensWithOffsets
[src]
Interface method to tokenization
pub fn tokenize_list(&self, text: &[&str]) -> Vec<Vec<String>>
[src]
Interface method to tokenization
pub fn decode(
&self,
token_ids: Vec<i64>,
skip_special_tokens: bool,
clean_up_tokenization_spaces: bool
) -> String
[src]
&self,
token_ids: Vec<i64>,
skip_special_tokens: bool,
clean_up_tokenization_spaces: bool
) -> String
Interface method to decoding
pub fn build_input_with_special_tokens(
&self,
token_ids_with_offsets_1: TokenIdsWithOffsets,
token_ids_with_offsets_2: Option<TokenIdsWithOffsets>
) -> TokenizedInput
[src]
&self,
token_ids_with_offsets_1: TokenIdsWithOffsets,
token_ids_with_offsets_2: Option<TokenIdsWithOffsets>
) -> TokenizedInput
Interface method to build input with special tokens
pub fn convert_tokens_to_ids<S, ST>(&self, tokens: S) -> Vec<i64> where
S: AsRef<[ST]>,
ST: AsRef<str>,
[src]
S: AsRef<[ST]>,
ST: AsRef<str>,
Interface method to convert tokens to ids
pub fn get_unk_id(&self) -> i64
[src]
Interface method
pub fn get_pad_id(&self) -> Option<i64>
[src]
Interface method
pub fn get_sep_id(&self) -> Option<i64>
[src]
Interface method
Auto Trait Implementations
impl !RefUnwindSafe for TokenizerOption
impl Send for TokenizerOption
impl Sync for TokenizerOption
impl Unpin for TokenizerOption
impl UnwindSafe for TokenizerOption
Blanket Implementations
impl<T> Any for T where
T: 'static + ?Sized,
[src]
T: 'static + ?Sized,
impl<T> Borrow<T> for T where
T: ?Sized,
[src]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
[src]
T: ?Sized,
pub fn borrow_mut(&mut self) -> &mut T
[src]
impl<T> From<T> for T
[src]
impl<T> Instrument for T
[src]
pub fn instrument(self, span: Span) -> Instrumented<Self>
[src]
pub fn in_current_span(self) -> Instrumented<Self>
[src]
impl<T, U> Into<U> for T where
U: From<T>,
[src]
U: From<T>,
impl<T> Pointable for T
pub const ALIGN: usize
type Init = T
The type for initializers.
pub unsafe fn init(init: <T as Pointable>::Init) -> usize
pub unsafe fn deref<'a>(ptr: usize) -> &'a T
pub unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T
pub unsafe fn drop(ptr: usize)
impl<T> Same<T> for T
type Output = T
Should always be Self
impl<T, U> TryFrom<U> for T where
U: Into<T>,
[src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>
[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>,
[src]
U: TryFrom<T>,
type Error = <U as TryFrom<T>>::Error
The type returned in the event of a conversion error.
pub fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>
[src]
impl<V, T> VZip<V> for T where
V: MultiLane<T>,
V: MultiLane<T>,