[−][src]Struct rust_tokenizers::vocab::BpePairVocab
Byte pair Encoding Vocab
BPE vocab containing the merges (dictionary of pairs with their priority) used to merge pairs together. This vocabulary element is used on BPE tokenizers such as GPT2 or RoBERTa. This vocabulary is not meant to be used directly, but rather as part of a BPE Tokenizer.
Fields
values: HashMap<(String, String), i64>
Implementations
impl BpePairVocab
[src]
pub fn from_file(path: &str) -> Result<BpePairVocab, TokenizerError>
[src]
Create a new BpePairVocab
from a flat file containing merges in the format first elment second element
)
The indices are implied by the lien position of each pair in the merges file. The first line needs to be a
header and is skipped.
Example
use rust_tokenizers::vocab::{BpePairVocab, Vocab}; let path = "path/to/file"; let bpe_vocab = BpePairVocab::from_file(path);
pub fn byte_pair_to_id(&self, byte_pair: &BpePairRef<'_>) -> Option<&i64>
[src]
Gets the id of a "byte pair" in the merges vocab. Returns an optional index for the pair if it is found in the vocabulary.
Example
use rust_tokenizers::vocab::{BpePairRef, BpePairVocab, Vocab}; let path = "path/to/file"; let bpe_vocab = BpePairVocab::from_file(path).unwrap(); let query = BpePairRef { byte_1: &"won".to_string(), byte_2: &"derful".to_string(), }; let id = bpe_vocab.byte_pair_to_id(&query);
Trait Implementations
impl Clone for BpePairVocab
[src]
fn clone(&self) -> BpePairVocab
[src]
fn clone_from(&mut self, source: &Self)
1.0.0[src]
impl Debug for BpePairVocab
[src]
Auto Trait Implementations
impl RefUnwindSafe for BpePairVocab
impl Send for BpePairVocab
impl Sync for BpePairVocab
impl Unpin for BpePairVocab
impl UnwindSafe for BpePairVocab
Blanket Implementations
impl<T> Any for T where
T: 'static + ?Sized,
[src]
T: 'static + ?Sized,
impl<T> Borrow<T> for T where
T: ?Sized,
[src]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
[src]
T: ?Sized,
fn borrow_mut(&mut self) -> &mut T
[src]
impl<T> From<T> for T
[src]
impl<T, U> Into<U> for T where
U: From<T>,
[src]
U: From<T>,
impl<T> ToOwned for T where
T: Clone,
[src]
T: Clone,
type Owned = T
The resulting type after obtaining ownership.
fn to_owned(&self) -> T
[src]
fn clone_into(&self, target: &mut T)
[src]
impl<T, U> TryFrom<U> for T where
U: Into<T>,
[src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>
[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>,
[src]
U: TryFrom<T>,