[−][src]Struct rust_tokenizers::preprocessing::tokenizer::sentence_piece_tokenizer::SentencePieceTokenizer

pub struct SentencePieceTokenizer { /* fields omitted */ }

Implementations

`impl SentencePieceTokenizer`[src]

`pub fn from_file( path: &str, lower_case: bool ) -> Result<SentencePieceTokenizer, TokenizerError>`[src]

`pub fn from_existing_vocab_and_model( vocab: SentencePieceVocab, model: SentencePieceModel, lower_case: bool ) -> SentencePieceTokenizer`[src]

Trait Implementations

`impl MultiThreadedTokenizer<SentencePieceVocab> for SentencePieceTokenizer`[src]

`fn vocab(&self) -> &T`[src]

`fn tokenize_list_with_offsets( &self, text_list: Vec<&str> ) -> Vec<(Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)>`[src]

`fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>>`[src]

`fn encode_list( &self, text_list: Vec<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize ) -> Vec<TokenizedInput>`[src]

`fn encode_pair_list( &self, text_list: Vec<(&str, &str)>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize ) -> Vec<TokenizedInput>`[src]

`fn decode_list( &self, token_ids_list: Vec<Vec<i64>>, skip_special_tokens: bool, clean_up_tokenization_spaces: bool ) -> Vec<String>`[src]

`impl Tokenizer<SentencePieceVocab> for SentencePieceTokenizer`[src]

`fn vocab(&self) -> &SentencePieceVocab`[src]

`fn tokenize_to_tokens(&self, text: TokenRef<'_>) -> Vec<Token>`[src]

`fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String`[src]

`fn tokenize(&self, text: &str) -> Vec<String>`[src]

`fn tokenize_with_offsets( &self, text: &str ) -> (Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)`[src]

`fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>>`[src]

`fn tokenize_list_with_offsets( &self, text_list: Vec<&str> ) -> Vec<(Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)>`[src]

`fn convert_tokens_to_ids(&self, tokens: &Vec<String>) -> Vec<i64>`[src]

`fn encode( &self, text_1: &str, text_2: Option<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize ) -> TokenizedInput`[src]

`fn encode_list( &self, text_list: Vec<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize ) -> Vec<TokenizedInput>`[src]

`fn encode_pair_list( &self, text_list: Vec<(&str, &str)>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize ) -> Vec<TokenizedInput>`[src]

`fn decode_to_vec( &self, token_ids: Vec<i64>, skip_special_tokens: bool ) -> Vec<String>`[src]

`fn decode( &self, token_ids: Vec<i64>, skip_special_tokens: bool, clean_up_tokenization_spaces: bool ) -> String`[src]

`fn clean_up_tokenization(&self, input_string: String) -> String`[src]

`fn decode_list( &self, token_ids_list: Vec<Vec<i64>>, skip_special_tokens: bool, clean_up_tokenization_spaces: bool ) -> Vec<String>`[src]

`fn build_input_with_special_tokens( &self, tokens_1: Vec<i64>, tokens_2: Option<Vec<i64>>, offsets_1: Vec<Option<Offset>>, offsets_2: Option<Vec<Option<Offset>>>, original_offsets_1: Vec<Vec<OffsetSize>>, original_offsets_2: Option<Vec<Vec<OffsetSize>>>, mask: Vec<Mask>, mask_2: Option<Vec<Mask>> ) -> (Vec<i64>, Vec<i8>, Vec<i8>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)`[src]

Auto Trait Implementations

`impl RefUnwindSafe for SentencePieceTokenizer`

`impl Send for SentencePieceTokenizer`

`impl Sync for SentencePieceTokenizer`

`impl Unpin for SentencePieceTokenizer`

`impl UnwindSafe for SentencePieceTokenizer`

Blanket Implementations

`impl<T> Any for T where T: 'static + ?Sized,` [src]

`fn type_id(&self) -> TypeId`[src]

`impl<T> Borrow<T> for T where T: ?Sized,` [src]

`fn borrow(&self) -> &T`[src]

`impl<T> BorrowMut<T> for T where T: ?Sized,` [src]

`fn borrow_mut(&mut self) -> &mut T`[src]

`impl<T> From<T> for T`[src]

`fn from(t: T) -> T`[src]

`impl<T, U> Into for T where U: From<T>,` [src]

`fn into(self) -> U`[src]

`impl<T, U> TryFrom for T where U: Into<T>,` [src]

`type Error = Infallible`

The type returned in the event of a conversion error.

`fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>`[src]

`impl<T, U> TryInto for T where U: TryFrom<T>,` [src]

`type Error = >::Error`

The type returned in the event of a conversion error.

`fn try_into(self) -> Result<U, >::Error>`[src]

[−][src]Struct rust_tokenizers::preprocessing::tokenizer::sentence_piece_tokenizer::SentencePieceTokenizer

Implementations

impl SentencePieceTokenizer[src]

pub fn from_file( path: &str, lower_case: bool) -> Result<SentencePieceTokenizer, TokenizerError>[src]

pub fn from_existing_vocab_and_model( vocab: SentencePieceVocab, model: SentencePieceModel, lower_case: bool) -> SentencePieceTokenizer[src]

Trait Implementations

impl MultiThreadedTokenizer<SentencePieceVocab> for SentencePieceTokenizer[src]

fn vocab(&self) -> &T[src]

fn tokenize_list_with_offsets( &self, text_list: Vec<&str>) -> Vec<(Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)>[src]

fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>>[src]

fn encode_list( &self, text_list: Vec<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize) -> Vec<TokenizedInput>[src]

fn encode_pair_list( &self, text_list: Vec<(&str, &str)>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize) -> Vec<TokenizedInput>[src]

fn decode_list( &self, token_ids_list: Vec<Vec<i64>>, skip_special_tokens: bool, clean_up_tokenization_spaces: bool) -> Vec<String>[src]

impl Tokenizer<SentencePieceVocab> for SentencePieceTokenizer[src]

fn vocab(&self) -> &SentencePieceVocab[src]

fn tokenize_to_tokens(&self, text: TokenRef<'_>) -> Vec<Token>[src]

fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String[src]

fn tokenize(&self, text: &str) -> Vec<String>[src]

fn tokenize_with_offsets( &self, text: &str) -> (Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)[src]

fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>>[src]

fn tokenize_list_with_offsets( &self, text_list: Vec<&str>) -> Vec<(Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)>[src]

fn convert_tokens_to_ids(&self, tokens: &Vec<String>) -> Vec<i64>[src]

fn encode( &self, text_1: &str, text_2: Option<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize) -> TokenizedInput[src]

fn encode_list( &self, text_list: Vec<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize) -> Vec<TokenizedInput>[src]

fn encode_pair_list( &self, text_list: Vec<(&str, &str)>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize) -> Vec<TokenizedInput>[src]

fn decode_to_vec( &self, token_ids: Vec<i64>, skip_special_tokens: bool) -> Vec<String>[src]

fn decode( &self, token_ids: Vec<i64>, skip_special_tokens: bool, clean_up_tokenization_spaces: bool) -> String[src]

fn clean_up_tokenization(&self, input_string: String) -> String[src]

fn decode_list( &self, token_ids_list: Vec<Vec<i64>>, skip_special_tokens: bool, clean_up_tokenization_spaces: bool) -> Vec<String>[src]

Auto Trait Implementations

impl RefUnwindSafe for SentencePieceTokenizer

impl Send for SentencePieceTokenizer

impl Sync for SentencePieceTokenizer

impl Unpin for SentencePieceTokenizer

impl UnwindSafe for SentencePieceTokenizer

Blanket Implementations

impl<T> Any for T where T: 'static + ?Sized, [src]

fn type_id(&self) -> TypeId[src]

impl<T> Borrow<T> for T where T: ?Sized, [src]

fn borrow(&self) -> &T[src]

impl<T> BorrowMut<T> for T where T: ?Sized, [src]

fn borrow_mut(&mut self) -> &mut T[src]

impl<T> From<T> for T[src]

fn from(t: T) -> T[src]

impl<T, U> Into<U> for T where U: From<T>, [src]

fn into(self) -> U[src]

impl<T, U> TryFrom<U> for T where U: Into<T>, [src]

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>[src]

impl<T, U> TryInto<U> for T where U: TryFrom<T>, [src]

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>[src]

`impl SentencePieceTokenizer`[src]

`pub fn from_file( path: &str, lower_case: bool ) -> Result<SentencePieceTokenizer, TokenizerError>`[src]

`pub fn from_existing_vocab_and_model( vocab: SentencePieceVocab, model: SentencePieceModel, lower_case: bool ) -> SentencePieceTokenizer`[src]

`impl MultiThreadedTokenizer<SentencePieceVocab> for SentencePieceTokenizer`[src]

`fn vocab(&self) -> &T`[src]

`fn tokenize_list_with_offsets( &self, text_list: Vec<&str> ) -> Vec<(Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)>`[src]

`fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>>`[src]

`fn encode_list( &self, text_list: Vec<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize ) -> Vec<TokenizedInput>`[src]

`fn encode_pair_list( &self, text_list: Vec<(&str, &str)>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize ) -> Vec<TokenizedInput>`[src]

`fn decode_list( &self, token_ids_list: Vec<Vec<i64>>, skip_special_tokens: bool, clean_up_tokenization_spaces: bool ) -> Vec<String>`[src]

`impl Tokenizer<SentencePieceVocab> for SentencePieceTokenizer`[src]

`fn vocab(&self) -> &SentencePieceVocab`[src]

`fn tokenize_to_tokens(&self, text: TokenRef<'_>) -> Vec<Token>`[src]

`fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String`[src]

`fn tokenize(&self, text: &str) -> Vec<String>`[src]

`fn tokenize_with_offsets( &self, text: &str ) -> (Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)`[src]

`fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>>`[src]

`fn tokenize_list_with_offsets( &self, text_list: Vec<&str> ) -> Vec<(Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)>`[src]

`fn convert_tokens_to_ids(&self, tokens: &Vec<String>) -> Vec<i64>`[src]

`fn encode( &self, text_1: &str, text_2: Option<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize ) -> TokenizedInput`[src]

`fn encode_list( &self, text_list: Vec<&str>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize ) -> Vec<TokenizedInput>`[src]

`fn encode_pair_list( &self, text_list: Vec<(&str, &str)>, max_len: usize, truncation_strategy: &TruncationStrategy, stride: usize ) -> Vec<TokenizedInput>`[src]

`fn decode_to_vec( &self, token_ids: Vec<i64>, skip_special_tokens: bool ) -> Vec<String>`[src]

`fn decode( &self, token_ids: Vec<i64>, skip_special_tokens: bool, clean_up_tokenization_spaces: bool ) -> String`[src]

`fn clean_up_tokenization(&self, input_string: String) -> String`[src]

`fn decode_list( &self, token_ids_list: Vec<Vec<i64>>, skip_special_tokens: bool, clean_up_tokenization_spaces: bool ) -> Vec<String>`[src]

`impl RefUnwindSafe for SentencePieceTokenizer`

`impl Send for SentencePieceTokenizer`

`impl Sync for SentencePieceTokenizer`

`impl Unpin for SentencePieceTokenizer`

`impl UnwindSafe for SentencePieceTokenizer`

`impl<T> Any for T where T: 'static + ?Sized,` [src]

`fn type_id(&self) -> TypeId`[src]

`impl<T> Borrow<T> for T where T: ?Sized,` [src]

`fn borrow(&self) -> &T`[src]

`impl<T> BorrowMut<T> for T where T: ?Sized,` [src]

`fn borrow_mut(&mut self) -> &mut T`[src]

`impl<T> From<T> for T`[src]

`fn from(t: T) -> T`[src]

`impl<T, U> Into<U> for T where U: From<T>,` [src]

`fn into(self) -> U`[src]

`impl<T, U> TryFrom<U> for T where U: Into<T>,` [src]

`type Error = Infallible`

`fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>`[src]

`impl<T, U> TryInto<U> for T where U: TryFrom<T>,` [src]

`type Error = <U as TryFrom<T>>::Error`

`fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>`[src]