[−][src]Struct tokenizers::models::bpe::BpeTrainer
In charge of training a BPE
model from a mapping of words to word counts.
Examples
use std::collections::HashMap; use tokenizers::tokenizer::Trainer; use tokenizers::models::bpe::BpeTrainer; let word_counts: HashMap<String, u32> = [ (String::from("Hello"), 1), (String::from("World"), 1), ].iter().cloned().collect(); let trainer = BpeTrainer::default(); let (model, special_tokens) = trainer.train(word_counts).unwrap();
Methods
impl BpeTrainer
[src]
pub fn new(min_frequency: u32, vocab_size: usize) -> Self
[src]
pub fn builder() -> BpeTrainerBuilder
[src]
pub fn train(
&self,
word_counts: HashMap<String, u32>
) -> Result<(BPE, Vec<AddedToken>)>
[src]
&self,
word_counts: HashMap<String, u32>
) -> Result<(BPE, Vec<AddedToken>)>
Trait Implementations
impl Default for BpeTrainer
[src]
impl Trainer for BpeTrainer
[src]
fn train(
&self,
word_counts: HashMap<String, u32>
) -> Result<(Box<dyn Model>, Vec<AddedToken>)>
[src]
&self,
word_counts: HashMap<String, u32>
) -> Result<(Box<dyn Model>, Vec<AddedToken>)>
Train a BPE model
fn process_tokens(&self, words: &mut HashMap<String, u32>, tokens: Vec<String>)
[src]
Process a bunch of tokens, counting them
fn should_show_progress(&self) -> bool
[src]
Whether we should show progress
Auto Trait Implementations
impl RefUnwindSafe for BpeTrainer
impl Send for BpeTrainer
impl Sync for BpeTrainer
impl Unpin for BpeTrainer
impl UnwindSafe for BpeTrainer
Blanket Implementations
impl<T> Any for T where
T: 'static + ?Sized,
[src]
T: 'static + ?Sized,
impl<T> Borrow<T> for T where
T: ?Sized,
[src]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
[src]
T: ?Sized,
fn borrow_mut(&mut self) -> &mut T
[src]
impl<T> From<T> for T
[src]
impl<T, U> Into<U> for T where
U: From<T>,
[src]
U: From<T>,
impl<T, U> TryFrom<U> for T where
U: Into<T>,
[src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>
[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>,
[src]
U: TryFrom<T>,
type Error = <U as TryFrom<T>>::Error
The type returned in the event of a conversion error.
fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>
[src]
impl<V, T> VZip<V> for T where
V: MultiLane<T>,
V: MultiLane<T>,