Expand description
Module for training models.
Examples
use std::fs::File;
use vibrato::trainer::{Corpus, Trainer, TrainerConfig};
use vibrato::{SystemDictionaryBuilder, Tokenizer};
// Loads configurations
let lexicon_rdr = File::open("src/tests/resources/train_lex.csv")?;
let char_prop_rdr = File::open("src/tests/resources/char.def")?;
let unk_handler_rdr = File::open("src/tests/resources/train_unk.def")?;
let feature_templates_rdr = File::open("src/tests/resources/feature.def")?;
let rewrite_rules_rdr = File::open("src/tests/resources/rewrite.def")?;
let config = TrainerConfig::from_readers(
lexicon_rdr,
char_prop_rdr,
unk_handler_rdr,
feature_templates_rdr,
rewrite_rules_rdr,
)?;
// Initializes trainer
let trainer = Trainer::new(config)?
.regularization_cost(0.01)
.max_iter(300)
.num_threads(20);
// Loads corpus
let corpus_rdr = File::open("src/tests/resources/corpus.txt")?;
let corpus = Corpus::from_reader(corpus_rdr)?;
// Model data
let mut lexicon_trained = vec![];
let mut connector_trained = vec![];
let mut unk_handler_trained = vec![];
let mut user_lexicon_trained = vec![];
// Starts training
let mut model = trainer.train(corpus)?;
model.write_dictionary(
&mut lexicon_trained,
&mut connector_trained,
&mut unk_handler_trained,
&mut user_lexicon_trained,
)?;
// Loads trained model
let char_prop_rdr = File::open("src/tests/resources/char.def")?;
let dict = SystemDictionaryBuilder::from_readers(
&*lexicon_trained,
&*connector_trained,
char_prop_rdr,
&*unk_handler_trained,
)?;
let tokenizer = Tokenizer::new(dict);
let mut worker = tokenizer.new_worker();
worker.reset_sentence("外国人参政権");
worker.tokenize();
assert_eq!(worker.num_tokens(), 4); // 外国/人/参政/権
Structs
- Representation of a corpus.
- Representation of a sentence.
- Tokenization Model
- Trainer of morphological analyzer.
- Configuration for a trainer.
- Representation of a pair of a surface and features.