[−][src]Struct rust_tokenizers::preprocessing::tokenizer::sentence_piece_tokenizer::SentencePieceTokenizer
Implementations
impl SentencePieceTokenizer
[src]
pub fn from_file(
path: &str,
lower_case: bool
) -> Result<SentencePieceTokenizer, TokenizerError>
[src]
path: &str,
lower_case: bool
) -> Result<SentencePieceTokenizer, TokenizerError>
pub fn from_existing_vocab_and_model(
vocab: SentencePieceVocab,
model: SentencePieceModel,
lower_case: bool
) -> SentencePieceTokenizer
[src]
vocab: SentencePieceVocab,
model: SentencePieceModel,
lower_case: bool
) -> SentencePieceTokenizer
Trait Implementations
impl MultiThreadedTokenizer<SentencePieceVocab> for SentencePieceTokenizer
[src]
fn vocab(&self) -> &T
[src]
fn tokenize_list_with_offsets(
&self,
text_list: Vec<&str>
) -> Vec<(Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)>
[src]
&self,
text_list: Vec<&str>
) -> Vec<(Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)>
fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>>
[src]
fn encode_list(
&self,
text_list: Vec<&str>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>
[src]
&self,
text_list: Vec<&str>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>
fn encode_pair_list(
&self,
text_list: Vec<(&str, &str)>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>
[src]
&self,
text_list: Vec<(&str, &str)>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>
fn decode_list(
&self,
token_ids_list: Vec<Vec<i64>>,
skip_special_tokens: bool,
clean_up_tokenization_spaces: bool
) -> Vec<String>
[src]
&self,
token_ids_list: Vec<Vec<i64>>,
skip_special_tokens: bool,
clean_up_tokenization_spaces: bool
) -> Vec<String>
impl Tokenizer<SentencePieceVocab> for SentencePieceTokenizer
[src]
fn vocab(&self) -> &SentencePieceVocab
[src]
fn tokenize_to_tokens(&self, text: TokenRef<'_>) -> Vec<Token>
[src]
fn convert_tokens_to_string(&self, tokens: Vec<String>) -> String
[src]
fn tokenize(&self, text: &str) -> Vec<String>
[src]
fn tokenize_with_offsets(
&self,
text: &str
) -> (Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)
[src]
&self,
text: &str
) -> (Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)
fn tokenize_list(&self, text_list: Vec<&str>) -> Vec<Vec<String>>
[src]
fn tokenize_list_with_offsets(
&self,
text_list: Vec<&str>
) -> Vec<(Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)>
[src]
&self,
text_list: Vec<&str>
) -> Vec<(Vec<String>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)>
fn convert_tokens_to_ids(&self, tokens: &Vec<String>) -> Vec<i64>
[src]
fn encode(
&self,
text_1: &str,
text_2: Option<&str>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> TokenizedInput
[src]
&self,
text_1: &str,
text_2: Option<&str>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> TokenizedInput
fn encode_list(
&self,
text_list: Vec<&str>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>
[src]
&self,
text_list: Vec<&str>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>
fn encode_pair_list(
&self,
text_list: Vec<(&str, &str)>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>
[src]
&self,
text_list: Vec<(&str, &str)>,
max_len: usize,
truncation_strategy: &TruncationStrategy,
stride: usize
) -> Vec<TokenizedInput>
fn decode_to_vec(
&self,
token_ids: Vec<i64>,
skip_special_tokens: bool
) -> Vec<String>
[src]
&self,
token_ids: Vec<i64>,
skip_special_tokens: bool
) -> Vec<String>
fn decode(
&self,
token_ids: Vec<i64>,
skip_special_tokens: bool,
clean_up_tokenization_spaces: bool
) -> String
[src]
&self,
token_ids: Vec<i64>,
skip_special_tokens: bool,
clean_up_tokenization_spaces: bool
) -> String
fn clean_up_tokenization(&self, input_string: String) -> String
[src]
fn decode_list(
&self,
token_ids_list: Vec<Vec<i64>>,
skip_special_tokens: bool,
clean_up_tokenization_spaces: bool
) -> Vec<String>
[src]
&self,
token_ids_list: Vec<Vec<i64>>,
skip_special_tokens: bool,
clean_up_tokenization_spaces: bool
) -> Vec<String>
fn build_input_with_special_tokens(
&self,
tokens_1: Vec<i64>,
tokens_2: Option<Vec<i64>>,
offsets_1: Vec<Option<Offset>>,
offsets_2: Option<Vec<Option<Offset>>>,
original_offsets_1: Vec<Vec<OffsetSize>>,
original_offsets_2: Option<Vec<Vec<OffsetSize>>>,
mask: Vec<Mask>,
mask_2: Option<Vec<Mask>>
) -> (Vec<i64>, Vec<i8>, Vec<i8>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)
[src]
&self,
tokens_1: Vec<i64>,
tokens_2: Option<Vec<i64>>,
offsets_1: Vec<Option<Offset>>,
offsets_2: Option<Vec<Option<Offset>>>,
original_offsets_1: Vec<Vec<OffsetSize>>,
original_offsets_2: Option<Vec<Vec<OffsetSize>>>,
mask: Vec<Mask>,
mask_2: Option<Vec<Mask>>
) -> (Vec<i64>, Vec<i8>, Vec<i8>, Vec<Option<Offset>>, Vec<Vec<OffsetSize>>, Vec<Mask>)
Auto Trait Implementations
impl RefUnwindSafe for SentencePieceTokenizer
impl Send for SentencePieceTokenizer
impl Sync for SentencePieceTokenizer
impl Unpin for SentencePieceTokenizer
impl UnwindSafe for SentencePieceTokenizer
Blanket Implementations
impl<T> Any for T where
T: 'static + ?Sized,
[src]
T: 'static + ?Sized,
impl<T> Borrow<T> for T where
T: ?Sized,
[src]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
[src]
T: ?Sized,
fn borrow_mut(&mut self) -> &mut T
[src]
impl<T> From<T> for T
[src]
impl<T, U> Into<U> for T where
U: From<T>,
[src]
U: From<T>,
impl<T, U> TryFrom<U> for T where
U: Into<T>,
[src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>
[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>,
[src]
U: TryFrom<T>,