pub struct BaseVocab {
    pub values: HashMap<String, i64>,
    pub indices: HashMap<i64, String>,
    pub special_token_map: SpecialTokenMap,
    pub special_values: HashMap<String, i64>,
    pub special_indices: HashMap<i64, String>,
}
Expand description

BaseVocab

Base vocabulary with [UNK] unknown token used as a pre-tokenization step for BERT-class tokenizers. Expects a flat text vocabulary when created from file.

Fields§

§values: HashMap<String, i64>

A mapping of tokens as string to indices (i.e. the encoder base)

§indices: HashMap<i64, String>

A mapping of token ids to strings (i.e. the decoder base)

§special_token_map: SpecialTokenMap

Special tokens used by the vocabulary

§special_values: HashMap<String, i64>

A mapping of special value tokens as strings to IDs (i.e. the encoder base for special values), special values typically include things like BOS/EOS markers, class markers, mask markers and padding markers

§special_indices: HashMap<i64, String>

A mapping of special value tokens as IDs to strings (i.e. the decoder base for special values)

Trait Implementations§

source§

impl Clone for BaseVocab

source§

fn clone(&self) -> BaseVocab

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Debug for BaseVocab

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl Vocab for BaseVocab

source§

fn get_unknown_value(&self) -> &str

Returns the unknown value on an instance
source§

fn values(&self) -> &HashMap<String, i64>

Return the map of token strings to IDs
source§

fn indices(&self) -> &HashMap<i64, String>

Return the map of token IDs to strings
source§

fn special_values(&self) -> &HashMap<String, i64>

Return the map of token strings to IDs
source§

fn special_indices(&self) -> &HashMap<i64, String>

Return the map of token IDs to strings for special values
source§

fn values_mut(&mut self) -> &mut HashMap<String, i64>

Return a mutable reference to the map of token strings to IDs
source§

fn indices_mut(&mut self) -> &mut HashMap<i64, String>

Return a mutable reference to the map of token IDs to strings
source§

fn special_values_mut(&mut self) -> &mut HashMap<String, i64>

Return a mutable reference to the map of token strings to IDs
source§

fn special_indices_mut(&mut self) -> &mut HashMap<i64, String>

Return a mutable reference to the map of token IDs to strings for special values
source§

fn from_file<P: AsRef<Path>>(path: P) -> Result<BaseVocab, TokenizerError>

Read a vocabulary from file Read more
source§

fn from_file_with_special_token_mapping<P: AsRef<Path>, S: AsRef<Path>>( path: P, special_token_mapping_path: S ) -> Result<Self, TokenizerError>

Read a vocabulary from file with special token mapping Read more
source§

fn from_values_and_special_token_map( values: HashMap<String, i64>, special_token_map: SpecialTokenMap ) -> Result<Self, TokenizerError>where Self: Sized,

source§

fn token_to_id(&self, token: &str) -> i64

Converts a token to an id. Read more
source§

fn id_to_token(&self, id: &i64) -> String

Converts an id to a token. Read more
source§

fn _token_to_id( &self, token: &str, values: &HashMap<String, i64>, special_values: &HashMap<String, i64>, unknown_value: &str ) -> i64

Converts a token to an id, provided a HashMap of values, a HashMap of special values and the unknown value token string representation. This is not meant to be directly used, the method token_to_id offers a more convenient interface for most vocabularies, but needs to be implemented by the specific vocabulary. Read more
source§

fn _id_to_token( &self, id: &i64, indices: &HashMap<i64, String>, special_indices: &HashMap<i64, String>, unknown_value: &str ) -> String

Converts an id to a token, provided a HashMap of values, a HashMap of special values and the unknown value token string representation. This is not meant to be directly used, the method id_to_token offers a more convenient interface for most vocabularies, but needs to be implemented by the specific vocabulary. Read more
source§

fn convert_tokens_to_ids(&self, tokens: &[&str]) -> Vec<i64>

Converts a list of tokens to a list of indices. Read more
source§

fn add_extra_ids(&mut self, num_extra_ids: i64)

Add extra token ids to the vocab Read more
source§

fn add_tokens(&mut self, tokens: &[&str])

Add arbitrary tokens to the vocabulary. Read more

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for Twhere T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for Twhere T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for Twhere T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for Twhere U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

§

impl<T> Pointable for T

§

const ALIGN: usize = _

The alignment of pointer.
§

type Init = T

The type for initializers.
§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
source§

impl<T> ToOwned for Twhere T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.