pub struct Encoding { /* private fields */ }
Expand description
Represents the output of a Tokenizer
.
Implementations§
Source§impl Encoding
impl Encoding
pub fn new( ids: Vec<u32>, type_ids: Vec<u32>, tokens: Vec<String>, words: Vec<Option<u32>>, offsets: Vec<Offsets>, special_tokens_mask: Vec<u32>, attention_mask: Vec<u32>, overflowing: Vec<Self>, sequence_ranges: HashMap<usize, Range<usize>>, ) -> Self
pub fn with_capacity(len: usize) -> Self
pub fn from_tokens(tokens: Vec<Token>, type_id: u32) -> Self
Sourcepub fn n_sequences(&self) -> usize
pub fn n_sequences(&self) -> usize
Return the number of sequences combined in this Encoding
Sourcepub fn set_sequence_id(&mut self, sequence_id: usize)
pub fn set_sequence_id(&mut self, sequence_id: usize)
Set the given sequence id for the whole range of tokens contained in this Encoding
pub fn get_tokens(&self) -> &[String]
pub fn get_word_ids(&self) -> &[Option<u32>]
pub fn get_word_ids_mut(&mut self) -> &mut [Option<u32>]
pub fn get_sequence_ids(&self) -> Vec<Option<usize>>
pub fn get_ids(&self) -> &[u32]
pub fn get_type_ids(&self) -> &[u32]
pub fn set_type_ids(&mut self, type_ids: Vec<u32>)
pub fn get_offsets(&self) -> &[Offsets] ⓘ
pub fn get_offsets_mut(&mut self) -> &mut [Offsets] ⓘ
pub fn get_special_tokens_mask(&self) -> &[u32]
pub fn get_attention_mask(&self) -> &[u32]
pub fn get_overflowing(&self) -> &Vec<Encoding>
pub fn set_overflowing(&mut self, overflowing: Vec<Encoding>)
pub fn get_overflowing_mut(&mut self) -> &mut Vec<Encoding>
pub fn take_overflowing(&mut self) -> Vec<Encoding>
Sourcepub fn token_to_sequence(&self, token: usize) -> Option<usize>
pub fn token_to_sequence(&self, token: usize) -> Option<usize>
Returns the index of the sequence containing the given token
Sourcepub fn word_to_tokens(
&self,
word: u32,
sequence_id: usize,
) -> Option<(usize, usize)>
pub fn word_to_tokens( &self, word: u32, sequence_id: usize, ) -> Option<(usize, usize)>
Get the encoded tokens corresponding to the word at the given index in the input sequence, with the form (start_token, end_token + 1)
Sourcepub fn word_to_chars(&self, word: u32, sequence_id: usize) -> Option<Offsets>
pub fn word_to_chars(&self, word: u32, sequence_id: usize) -> Option<Offsets>
Get the offsets of the word at the given index in the input sequence.
Sourcepub fn token_to_chars(&self, token: usize) -> Option<(usize, Offsets)>
pub fn token_to_chars(&self, token: usize) -> Option<(usize, Offsets)>
Get the offsets of the token at the given index.
Sourcepub fn token_to_word(&self, token: usize) -> Option<(usize, u32)>
pub fn token_to_word(&self, token: usize) -> Option<(usize, u32)>
Get the word that contains the token at the given index.
Sourcepub fn char_to_token(&self, pos: usize, sequence_id: usize) -> Option<usize>
pub fn char_to_token(&self, pos: usize, sequence_id: usize) -> Option<usize>
Get the token that contains the given char.
Sourcepub fn char_to_word(&self, pos: usize, sequence_id: usize) -> Option<u32>
pub fn char_to_word(&self, pos: usize, sequence_id: usize) -> Option<u32>
Get the word that contains the given char.
Sourcepub fn truncate(
&mut self,
max_len: usize,
stride: usize,
direction: TruncationDirection,
)
pub fn truncate( &mut self, max_len: usize, stride: usize, direction: TruncationDirection, )
Truncate the current Encoding
.
Panics if stride >= max_len
Sourcepub fn merge<I: IntoIterator<Item = Encoding>>(
encodings: I,
growing_offsets: bool,
) -> Self
pub fn merge<I: IntoIterator<Item = Encoding>>( encodings: I, growing_offsets: bool, ) -> Self
Merge all Encodings together
Sourcepub fn merge_with(&mut self, pair: Encoding, growing_offsets: bool)
pub fn merge_with(&mut self, pair: Encoding, growing_offsets: bool)
Merge ourself with the given Encoding
. Happens in place.
pub fn pad( &mut self, target_length: usize, pad_id: u32, pad_type_id: u32, pad_token: &str, direction: PaddingDirection, )
Trait Implementations§
Source§impl<'de> Deserialize<'de> for Encoding
impl<'de> Deserialize<'de> for Encoding
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Source§impl FromIterator<Encoding> for Encoding
impl FromIterator<Encoding> for Encoding
impl StructuralPartialEq for Encoding
Auto Trait Implementations§
impl Freeze for Encoding
impl RefUnwindSafe for Encoding
impl Send for Encoding
impl Sync for Encoding
impl Unpin for Encoding
impl UnwindSafe for Encoding
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more