pub struct Encoding { /* private fields */ }Expand description
Represents the output of a Tokenizer.
Implementations§
Source§impl Encoding
impl Encoding
pub fn new( ids: Vec<u32>, type_ids: Vec<u32>, tokens: Vec<String>, words: Vec<Option<u32>>, offsets: Vec<(usize, usize)>, special_tokens_mask: Vec<u32>, attention_mask: Vec<u32>, overflowing: Vec<Encoding>, sequence_ranges: AHashMap<usize, Range<usize>>, ) -> Encoding
pub fn with_capacity(len: usize) -> Encoding
pub fn from_tokens(tokens: Vec<Token>, type_id: u32) -> Encoding
Sourcepub fn n_sequences(&self) -> usize
pub fn n_sequences(&self) -> usize
Return the number of sequences combined in this Encoding
Sourcepub fn set_sequence_id(&mut self, sequence_id: usize)
pub fn set_sequence_id(&mut self, sequence_id: usize)
Set the given sequence id for the whole range of tokens contained in this Encoding
pub fn get_tokens(&self) -> &[String]
pub fn get_word_ids(&self) -> &[Option<u32>]
pub fn get_word_ids_mut(&mut self) -> &mut [Option<u32>]
pub fn get_sequence_ids(&self) -> Vec<Option<usize>>
pub fn get_ids(&self) -> &[u32]
pub fn get_type_ids(&self) -> &[u32]
pub fn set_type_ids(&mut self, type_ids: Vec<u32>)
pub fn get_offsets(&self) -> &[(usize, usize)]
pub fn get_offsets_mut(&mut self) -> &mut [(usize, usize)]
pub fn get_special_tokens_mask(&self) -> &[u32]
pub fn get_attention_mask(&self) -> &[u32]
pub fn get_overflowing(&self) -> &Vec<Encoding>
pub fn set_overflowing(&mut self, overflowing: Vec<Encoding>)
pub fn get_overflowing_mut(&mut self) -> &mut Vec<Encoding>
pub fn take_overflowing(&mut self) -> Vec<Encoding>
Sourcepub fn token_to_sequence(&self, token: usize) -> Option<usize>
pub fn token_to_sequence(&self, token: usize) -> Option<usize>
Returns the index of the sequence containing the given token
Sourcepub fn word_to_tokens(
&self,
word: u32,
sequence_id: usize,
) -> Option<(usize, usize)>
pub fn word_to_tokens( &self, word: u32, sequence_id: usize, ) -> Option<(usize, usize)>
Get the encoded tokens corresponding to the word at the given index in the input sequence, with the form (start_token, end_token + 1)
Sourcepub fn word_to_chars(
&self,
word: u32,
sequence_id: usize,
) -> Option<(usize, usize)>
pub fn word_to_chars( &self, word: u32, sequence_id: usize, ) -> Option<(usize, usize)>
Get the offsets of the word at the given index in the input sequence.
Sourcepub fn token_to_chars(&self, token: usize) -> Option<(usize, (usize, usize))>
pub fn token_to_chars(&self, token: usize) -> Option<(usize, (usize, usize))>
Get the offsets of the token at the given index.
Sourcepub fn token_to_word(&self, token: usize) -> Option<(usize, u32)>
pub fn token_to_word(&self, token: usize) -> Option<(usize, u32)>
Get the word that contains the token at the given index.
Sourcepub fn char_to_token(&self, pos: usize, sequence_id: usize) -> Option<usize>
pub fn char_to_token(&self, pos: usize, sequence_id: usize) -> Option<usize>
Get the token that contains the given char.
Sourcepub fn char_to_word(&self, pos: usize, sequence_id: usize) -> Option<u32>
pub fn char_to_word(&self, pos: usize, sequence_id: usize) -> Option<u32>
Get the word that contains the given char.
Sourcepub fn truncate(
&mut self,
max_len: usize,
stride: usize,
direction: TruncationDirection,
)
pub fn truncate( &mut self, max_len: usize, stride: usize, direction: TruncationDirection, )
Truncate the current Encoding.
Panics if stride >= max_len
Sourcepub fn merge<I>(encodings: I, growing_offsets: bool) -> Encodingwhere
I: IntoIterator<Item = Encoding>,
pub fn merge<I>(encodings: I, growing_offsets: bool) -> Encodingwhere
I: IntoIterator<Item = Encoding>,
Merge all Encodings together
Sourcepub fn merge_with(&mut self, pair: Encoding, growing_offsets: bool)
pub fn merge_with(&mut self, pair: Encoding, growing_offsets: bool)
Merge ourself with the given Encoding. Happens in place.
pub fn pad( &mut self, target_length: usize, pad_id: u32, pad_type_id: u32, pad_token: &str, direction: PaddingDirection, )
Trait Implementations§
Source§impl<'de> Deserialize<'de> for Encoding
impl<'de> Deserialize<'de> for Encoding
Source§fn deserialize<__D>(
__deserializer: __D,
) -> Result<Encoding, <__D as Deserializer<'de>>::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(
__deserializer: __D,
) -> Result<Encoding, <__D as Deserializer<'de>>::Error>where
__D: Deserializer<'de>,
Source§impl FromIterator<Encoding> for Encoding
impl FromIterator<Encoding> for Encoding
Source§impl Serialize for Encoding
impl Serialize for Encoding
Source§fn serialize<__S>(
&self,
__serializer: __S,
) -> Result<<__S as Serializer>::Ok, <__S as Serializer>::Error>where
__S: Serializer,
fn serialize<__S>(
&self,
__serializer: __S,
) -> Result<<__S as Serializer>::Ok, <__S as Serializer>::Error>where
__S: Serializer,
impl StructuralPartialEq for Encoding
Auto Trait Implementations§
impl Freeze for Encoding
impl RefUnwindSafe for Encoding
impl Send for Encoding
impl Sync for Encoding
impl Unpin for Encoding
impl UnsafeUnpin for Encoding
impl UnwindSafe for Encoding
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> ConfigSerializable for Twhere
T: Serialize + for<'de> Deserialize<'de>,
impl<T> ConfigSerializable for Twhere
T: Serialize + for<'de> Deserialize<'de>,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more