Struct vaporetto::Trainer

source ·
pub struct Trainer<'a> { /* private fields */ }
Available on crate feature train only.
Expand description

Trainer.

Examples

use std::fs::File;
use std::io::{prelude::*, BufReader, BufWriter};

use vaporetto::{Sentence, SolverType, Trainer};

let mut train_sents = vec![];
let f = BufReader::new(File::open("dataset-train.txt").unwrap());
for (i, line) in f.lines().enumerate() {
    train_sents.push(Sentence::from_tokenized(&line.unwrap()).unwrap());
}

let dict: Vec<String> = vec![];
let mut trainer = Trainer::new(3, 3, 3, 3, dict, 0, &[]).unwrap();
for (i, s) in train_sents.iter().enumerate() {
    trainer.add_example(&s);
}

let model = trainer.train(0.01, 1., SolverType::L1RegularizedL2LossSVC).unwrap();
let mut f = BufWriter::new(File::create("model.bin").unwrap());
model.write(&mut f).unwrap();

Implementations§

source§

impl<'a> Trainer<'a>

source

pub fn new( char_window_size: u8, char_ngram_size: u8, type_window_size: u8, type_ngram_size: u8, dict_words: Vec<String>, dict_word_max_len: u8, tag_dictionary: &'a [Sentence<'a, '_>] ) -> Result<Self>

Creates a new trainer.

Arguments
  • char_window_size - The character window size.
  • char_ngram_size - The character n-gram length.
  • type_window_size - The character type window size.
  • type_ngram_size - The character type n-gram length.
  • dict_words - A word dictionary.
  • dict_word_max_len - Dictionary words longer than this value will be grouped together, where the length is in characters.
  • tag_dictionary - A tag dictionary. Words not included in the corpus are annotated with the tag specified here.
Errors

If invalid parameters are given, an error variant will be returned.

source

pub fn add_example<'b>(&mut self, sentence: &'a Sentence<'a, 'b>)

Adds a sentence to the trainer.

source

pub fn train(self, epsilon: f64, cost: f64, solver: SolverType) -> Result<Model>

Trains word boundaries and tags.

Arguments
  • epsilon - The tolerance of the termination criterion.
  • cost - The parameter C.
  • solver - Solver type.
Errors

If the solver returns an error, that will be propagated.

source

pub fn n_features(&self) -> usize

Returns the number of boundary features.

Auto Trait Implementations§

§

impl<'a> RefUnwindSafe for Trainer<'a>

§

impl<'a> Send for Trainer<'a>

§

impl<'a> Sync for Trainer<'a>

§

impl<'a> Unpin for Trainer<'a>

§

impl<'a> UnwindSafe for Trainer<'a>

Blanket Implementations§

source§

impl<T> Any for Twhere T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for Twhere T: ?Sized,

const: unstable · source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for Twhere T: ?Sized,

const: unstable · source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

const: unstable · source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for Twhere U: From<T>,

const: unstable · source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
const: unstable · source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
const: unstable · source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.