[][src]Struct tokenizers::models::bpe::BpeTrainer

pub struct BpeTrainer { /* fields omitted */ }

In charge of training a BPE model from a mapping of words to word counts.

Examples

use std::collections::HashMap;
use tokenizers::tokenizer::Trainer;
use tokenizers::models::bpe::BpeTrainer;

let word_counts: HashMap<String, u32> = [
    (String::from("Hello"), 1),
    (String::from("World"), 1),
].iter().cloned().collect();
let trainer = BpeTrainer::default();
let (model, special_tokens) = trainer.train(word_counts).unwrap();

Methods

impl BpeTrainer[src]

pub fn new(min_frequency: u32, vocab_size: usize) -> Self[src]

pub fn builder() -> BpeTrainerBuilder[src]

pub fn train(
    &self,
    word_counts: HashMap<String, u32>
) -> Result<(BPE, Vec<AddedToken>)>
[src]

Trait Implementations

impl Default for BpeTrainer[src]

impl Trainer for BpeTrainer[src]

fn train(
    &self,
    word_counts: HashMap<String, u32>
) -> Result<(Box<dyn Model>, Vec<AddedToken>)>
[src]

Train a BPE model

fn process_tokens(&self, words: &mut HashMap<String, u32>, tokens: Vec<String>)[src]

Process a bunch of tokens, counting them

fn should_show_progress(&self) -> bool[src]

Whether we should show progress

Auto Trait Implementations

Blanket Implementations

impl<T> Any for T where
    T: 'static + ?Sized
[src]

impl<T> Borrow<T> for T where
    T: ?Sized
[src]

impl<T> BorrowMut<T> for T where
    T: ?Sized
[src]

impl<T> From<T> for T[src]

impl<T, U> Into<U> for T where
    U: From<T>, 
[src]

impl<T, U> TryFrom<U> for T where
    U: Into<T>, 
[src]

type Error = Infallible

The type returned in the event of a conversion error.

impl<T, U> TryInto<U> for T where
    U: TryFrom<T>, 
[src]

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.

impl<V, T> VZip<V> for T where
    V: MultiLane<T>,