[−][src]Trait tokenizers::tokenizer::Trainer

pub trait Trainer: Sync {
    fn should_show_progress(&self) -> bool;
    fn train(
        &self, 
        words: HashMap<String, u32>
    ) -> Result<(Box<dyn Model>, Vec<AddedToken>)>;
    fn process_tokens(
        &self, 
        words: &mut HashMap<String, u32>, 
        tokens: Vec<String>
    );
}

A Trainer has the responsibility to train a model. We feed it with lines/sentences and it returns a Model when done.

Required methods

`fn should_show_progress(&self) -> bool`

Whether we should show progress during the training.

`fn train( &self, words: HashMap<String, u32> ) -> Result<(Box<dyn Model>, Vec<AddedToken>)>`

The actual training method. This will return a new trained Model as well as a list of special_tokens to be added directly to the tokenizer along with the model.

`fn process_tokens(&self, words: &mut HashMap<String, u32>, tokens: Vec<String>)`

Process a bunch of token, counting them as relevant.

Loading content...

Implementors

`impl Trainer for BpeTrainer`[src]

`fn train( &self, word_counts: HashMap<String, u32> ) -> Result<(Box<dyn Model>, Vec<AddedToken>)>`[src]

Train a BPE model

`fn process_tokens(&self, words: &mut HashMap<String, u32>, tokens: Vec<String>)`[src]

Process a bunch of tokens, counting them

`fn should_show_progress(&self) -> bool`[src]

Whether we should show progress

`impl Trainer for WordPieceTrainer`[src]

`fn train( &self, word_counts: HashMap<String, u32> ) -> Result<(Box<dyn Model>, Vec<AddedToken>)>`[src]

`fn process_tokens(&self, words: &mut HashMap<String, u32>, tokens: Vec<String>)`[src]

`fn should_show_progress(&self) -> bool`[src]

Loading content...

[−][src]Trait tokenizers::tokenizer::Trainer

Required methods

fn should_show_progress(&self) -> bool

fn train( &self, words: HashMap<String, u32>) -> Result<(Box<dyn Model>, Vec<AddedToken>)>

fn process_tokens(&self, words: &mut HashMap<String, u32>, tokens: Vec<String>)

Implementors

impl Trainer for BpeTrainer[src]

fn train( &self, word_counts: HashMap<String, u32>) -> Result<(Box<dyn Model>, Vec<AddedToken>)>[src]

fn process_tokens(&self, words: &mut HashMap<String, u32>, tokens: Vec<String>)[src]

fn should_show_progress(&self) -> bool[src]

impl Trainer for WordPieceTrainer[src]

fn train( &self, word_counts: HashMap<String, u32>) -> Result<(Box<dyn Model>, Vec<AddedToken>)>[src]

fn process_tokens(&self, words: &mut HashMap<String, u32>, tokens: Vec<String>)[src]

fn should_show_progress(&self) -> bool[src]

`fn should_show_progress(&self) -> bool`

`fn train( &self, words: HashMap<String, u32> ) -> Result<(Box<dyn Model>, Vec<AddedToken>)>`

`fn process_tokens(&self, words: &mut HashMap<String, u32>, tokens: Vec<String>)`

`impl Trainer for BpeTrainer`[src]

`fn train( &self, word_counts: HashMap<String, u32> ) -> Result<(Box<dyn Model>, Vec<AddedToken>)>`[src]

`fn process_tokens(&self, words: &mut HashMap<String, u32>, tokens: Vec<String>)`[src]

`fn should_show_progress(&self) -> bool`[src]

`impl Trainer for WordPieceTrainer`[src]

`fn train( &self, word_counts: HashMap<String, u32> ) -> Result<(Box<dyn Model>, Vec<AddedToken>)>`[src]

`fn process_tokens(&self, words: &mut HashMap<String, u32>, tokens: Vec<String>)`[src]

`fn should_show_progress(&self) -> bool`[src]