1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
#![cfg_attr(docsrs, feature(doc_cfg))]
//! # Vaporetto
//!
//! Vaporetto is a fast and lightweight pointwise prediction based tokenizer.
//!
//! ## Examples
//!
//! ```no_run
//! use std::fs::File;
//! use std::io::{prelude::*, stdin, BufReader};
//!
//! use vaporetto::{Model, Predictor, Sentence};
//!
//! let mut f = BufReader::new(File::open("model.bin").unwrap());
//! let model = Model::read(&mut f).unwrap();
//! let mut predictor = Predictor::new(model);
//!
//! for line in stdin().lock().lines() {
//! let s = Sentence::from_raw(line.unwrap()).unwrap();
//! let s = predictor.predict(s);
//! let toks = s.to_tokenized_string().unwrap();
//! println!("{}", toks);
//! }
//! ```
//!
//! Training requires **crate feature** `train`. For more details, see [`Trainer`].
#[macro_use]
mod utils;
mod model;
mod predictor;
mod sentence;
#[cfg(feature = "train")]
mod feature;
#[cfg(feature = "train")]
mod trainer;
#[cfg(feature = "kytea")]
mod kytea_model;
pub use model::Model;
pub use predictor::Predictor;
pub use sentence::{BoundaryType, CharacterType, Sentence};
#[cfg(feature = "multithreading")]
pub use predictor::MultithreadPredictor;
#[cfg(feature = "train")]
pub use trainer::{Dataset, Trainer};
#[cfg(feature = "kytea")]
pub use kytea_model::KyteaModel;