pub struct Trainer<'a> { /* private fields */ }
Available on crate feature
train
only.Expand description
Trainer.
Examples
use std::fs::File;
use std::io::{prelude::*, BufReader, BufWriter};
use vaporetto::{Sentence, SolverType, Trainer};
let mut train_sents = vec![];
let f = BufReader::new(File::open("dataset-train.txt").unwrap());
for (i, line) in f.lines().enumerate() {
train_sents.push(Sentence::from_tokenized(&line.unwrap()).unwrap());
}
let dict: Vec<String> = vec![];
let mut trainer = Trainer::new(3, 3, 3, 3, dict, 0, &[]).unwrap();
for (i, s) in train_sents.iter().enumerate() {
trainer.add_example(&s);
}
let model = trainer.train(0.01, 1., SolverType::L1RegularizedL2LossSVC).unwrap();
let mut f = BufWriter::new(File::create("model.bin").unwrap());
model.write(&mut f).unwrap();
Implementations§
source§impl<'a> Trainer<'a>
impl<'a> Trainer<'a>
sourcepub fn new(
char_window_size: u8,
char_ngram_size: u8,
type_window_size: u8,
type_ngram_size: u8,
dict_words: Vec<String>,
dict_word_max_len: u8,
tag_dictionary: &'a [Sentence<'a, '_>]
) -> Result<Self>
pub fn new( char_window_size: u8, char_ngram_size: u8, type_window_size: u8, type_ngram_size: u8, dict_words: Vec<String>, dict_word_max_len: u8, tag_dictionary: &'a [Sentence<'a, '_>] ) -> Result<Self>
Creates a new trainer.
Arguments
char_window_size
- The character window size.char_ngram_size
- The character n-gram length.type_window_size
- The character type window size.type_ngram_size
- The character type n-gram length.dict_words
- A word dictionary.dict_word_max_len
- Dictionary words longer than this value will be grouped together, where the length is in characters.tag_dictionary
- A tag dictionary. Words not included in the corpus are annotated with the tag specified here.
Errors
If invalid parameters are given, an error variant will be returned.
sourcepub fn add_example<'b>(&mut self, sentence: &'a Sentence<'a, 'b>)
pub fn add_example<'b>(&mut self, sentence: &'a Sentence<'a, 'b>)
Adds a sentence to the trainer.
sourcepub fn n_features(&self) -> usize
pub fn n_features(&self) -> usize
Returns the number of boundary features.