1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#![cfg_attr(docsrs, feature(doc_cfg))]

//! # Vaporetto
//!
//! Vaporetto is a fast and lightweight pointwise prediction based tokenizer.
//!
//! ## Examples
//!
//! ```no_run
//! use std::fs::File;
//! use std::io::{prelude::*, stdin, BufReader};
//!
//! use vaporetto::{Model, Predictor, Sentence};
//!
//! let mut f = BufReader::new(File::open("model.bin").unwrap());
//! let model = Model::read(&mut f).unwrap();
//! let mut predictor = Predictor::new(model);
//!
//! for line in stdin().lock().lines() {
//!     let s = Sentence::from_raw(line.unwrap()).unwrap();
//!     let s = predictor.predict(s);
//!     let toks = s.to_tokenized_string().unwrap();
//!     println!("{}", toks);
//! }
//! ```
//!
//! Training requires **crate feature** `train`. For more details, see [`Trainer`].

#[macro_use]
mod utils;

mod model;
mod predictor;
mod sentence;

#[cfg(feature = "train")]
mod feature;
#[cfg(feature = "train")]
mod trainer;

#[cfg(feature = "kytea")]
mod kytea_model;

pub use model::Model;
pub use predictor::Predictor;
pub use sentence::{BoundaryType, CharacterType, Sentence};

#[cfg(feature = "multithreading")]
pub use predictor::MultithreadPredictor;

#[cfg(feature = "train")]
pub use trainer::{Dataset, Trainer};

#[cfg(feature = "kytea")]
pub use kytea_model::KyteaModel;