Struct Encoding

Source
pub struct Encoding { /* private fields */ }
Expand description

Represents the output of a Tokenizer.

Implementations§

Source§

impl Encoding

Source

pub fn new( ids: Vec<u32>, type_ids: Vec<u32>, tokens: Vec<String>, words: Vec<Option<u32>>, offsets: Vec<Offsets>, special_tokens_mask: Vec<u32>, attention_mask: Vec<u32>, overflowing: Vec<Self>, sequence_ranges: HashMap<usize, Range<usize>>, ) -> Self

Source

pub fn with_capacity(len: usize) -> Self

Source

pub fn from_tokens(tokens: Vec<Token>, type_id: u32) -> Self

Source

pub fn is_empty(&self) -> bool

Whether this Encoding is empty

Source

pub fn len(&self) -> usize

Return the total length of this Encoding

Source

pub fn n_sequences(&self) -> usize

Return the number of sequences combined in this Encoding

Source

pub fn set_sequence_id(&mut self, sequence_id: usize)

Set the given sequence id for the whole range of tokens contained in this Encoding

Source

pub fn get_tokens(&self) -> &[String]

Source

pub fn get_word_ids(&self) -> &[Option<u32>]

Source

pub fn get_word_ids_mut(&mut self) -> &mut [Option<u32>]

Source

pub fn get_sequence_ids(&self) -> Vec<Option<usize>>

Source

pub fn get_ids(&self) -> &[u32]

Source

pub fn get_type_ids(&self) -> &[u32]

Source

pub fn set_type_ids(&mut self, type_ids: Vec<u32>)

Source

pub fn get_offsets(&self) -> &[Offsets]

Source

pub fn get_offsets_mut(&mut self) -> &mut [Offsets]

Source

pub fn get_special_tokens_mask(&self) -> &[u32]

Source

pub fn get_attention_mask(&self) -> &[u32]

Source

pub fn get_overflowing(&self) -> &Vec<Encoding>

Source

pub fn set_overflowing(&mut self, overflowing: Vec<Encoding>)

Source

pub fn get_overflowing_mut(&mut self) -> &mut Vec<Encoding>

Source

pub fn take_overflowing(&mut self) -> Vec<Encoding>

Source

pub fn token_to_sequence(&self, token: usize) -> Option<usize>

Returns the index of the sequence containing the given token

Source

pub fn word_to_tokens( &self, word: u32, sequence_id: usize, ) -> Option<(usize, usize)>

Get the encoded tokens corresponding to the word at the given index in the input sequence, with the form (start_token, end_token + 1)

Source

pub fn word_to_chars(&self, word: u32, sequence_id: usize) -> Option<Offsets>

Get the offsets of the word at the given index in the input sequence.

Source

pub fn token_to_chars(&self, token: usize) -> Option<(usize, Offsets)>

Get the offsets of the token at the given index.

Source

pub fn token_to_word(&self, token: usize) -> Option<(usize, u32)>

Get the word that contains the token at the given index.

Source

pub fn char_to_token(&self, pos: usize, sequence_id: usize) -> Option<usize>

Get the token that contains the given char.

Source

pub fn char_to_word(&self, pos: usize, sequence_id: usize) -> Option<u32>

Get the word that contains the given char.

Source

pub fn truncate( &mut self, max_len: usize, stride: usize, direction: TruncationDirection, )

Truncate the current Encoding.

Panics if stride >= max_len

Source

pub fn merge<I: IntoIterator<Item = Encoding>>( encodings: I, growing_offsets: bool, ) -> Self

Merge all Encodings together

Source

pub fn merge_with(&mut self, pair: Encoding, growing_offsets: bool)

Merge ourself with the given Encoding. Happens in place.

Source

pub fn pad( &mut self, target_length: usize, pad_id: u32, pad_type_id: u32, pad_token: &str, direction: PaddingDirection, )

Trait Implementations§

Source§

impl Clone for Encoding

Source§

fn clone(&self) -> Encoding

Returns a copy of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for Encoding

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl Default for Encoding

Source§

fn default() -> Encoding

Returns the “default value” for a type. Read more
Source§

impl<'de> Deserialize<'de> for Encoding

Source§

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more
Source§

impl FromIterator<(u32, String, (usize, usize), Option<u32>, u32)> for Encoding

Source§

fn from_iter<I: IntoIterator<Item = (u32, String, (usize, usize), Option<u32>, u32)>>( iter: I, ) -> Self

Creates a value from an iterator. Read more
Source§

impl FromIterator<Encoding> for Encoding

Source§

fn from_iter<I: IntoIterator<Item = Encoding>>(iter: I) -> Self

Creates a value from an iterator. Read more
Source§

impl PartialEq for Encoding

Source§

fn eq(&self, other: &Encoding) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl Serialize for Encoding

Source§

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>
where __S: Serializer,

Serialize this value into the given Serde serializer. Read more
Source§

impl StructuralPartialEq for Encoding

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,