Struct rust_tokenizers::vocab::BpePairVocab [−][src]
Byte pair Encoding Vocab
BPE vocab containing the merges (dictionary of pairs with their priority) used to merge pairs together. This vocabulary element is used on BPE tokenizers such as GPT2 or RoBERTa. This vocabulary is not meant to be used directly, but rather as part of a BPE Tokenizer.
Fields
values: HashMap<(String, String), i64>
Implementations
impl BpePairVocab
[src]
pub fn from_file(path: &str) -> Result<BpePairVocab, TokenizerError>
[src]
Create a new BpePairVocab
from a flat file containing merges in the format first elment second element
)
The indices are implied by the lien position of each pair in the merges file. The first line needs to be a
header and is skipped.
Example
use rust_tokenizers::vocab::{BpePairVocab, Vocab}; let path = "path/to/file"; let bpe_vocab = BpePairVocab::from_file(path);
pub fn from_sentencepiece_file(
path: &str
) -> Result<BpePairVocab, TokenizerError>
[src]
path: &str
) -> Result<BpePairVocab, TokenizerError>
Create a new BpePairVocab
from a SentencePiece file containing a BPE model.
Example
use rust_tokenizers::vocab::{BpePairVocab, Vocab}; let path = "path/to/spiece.model"; let bpe_vocab = BpePairVocab::from_sentencepiece_file(path);
pub fn byte_pair_to_id(&self, byte_pair: &BpePairRef<'_>) -> Option<&i64>
[src]
Gets the id of a “byte pair” in the merges vocab. Returns an optional index for the pair if it is found in the vocabulary.
Example
use rust_tokenizers::vocab::{BpePairRef, BpePairVocab, Vocab}; let path = "path/to/file"; let bpe_vocab = BpePairVocab::from_file(path).unwrap(); let query = BpePairRef { byte_1: &"won".to_string(), byte_2: &"derful".to_string(), }; let id = bpe_vocab.byte_pair_to_id(&query);
Trait Implementations
impl Clone for BpePairVocab
[src]
fn clone(&self) -> BpePairVocab
[src]
pub fn clone_from(&mut self, source: &Self)
1.0.0[src]
impl Debug for BpePairVocab
[src]
Auto Trait Implementations
impl RefUnwindSafe for BpePairVocab
impl Send for BpePairVocab
impl Sync for BpePairVocab
impl Unpin for BpePairVocab
impl UnwindSafe for BpePairVocab
Blanket Implementations
impl<T> Any for T where
T: 'static + ?Sized,
[src]
T: 'static + ?Sized,
impl<T> Borrow<T> for T where
T: ?Sized,
[src]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
[src]
T: ?Sized,
pub fn borrow_mut(&mut self) -> &mut T
[src]
impl<T> From<T> for T
[src]
impl<T, U> Into<U> for T where
U: From<T>,
[src]
U: From<T>,
impl<T> Pointable for T
pub const ALIGN: usize
type Init = T
The type for initializers.
pub unsafe fn init(init: <T as Pointable>::Init) -> usize
pub unsafe fn deref<'a>(ptr: usize) -> &'a T
pub unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T
pub unsafe fn drop(ptr: usize)
impl<T> ToOwned for T where
T: Clone,
[src]
T: Clone,
type Owned = T
The resulting type after obtaining ownership.
pub fn to_owned(&self) -> T
[src]
pub fn clone_into(&self, target: &mut T)
[src]
impl<T, U> TryFrom<U> for T where
U: Into<T>,
[src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>
[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>,
[src]
U: TryFrom<T>,