vaporetto 0.1.4

Vaporetto: a pointwise prediction based tokenizer


Vaporetto is a fast and lightweight pointwise prediction based tokenizer.


use std::fs::File;
use std::io::{prelude::*, stdin, BufReader};

use vaporetto::{Model, Predictor, Sentence};

let mut f = BufReader::new(File::open("model.bin").unwrap());
let model = Model::read(&mut f).unwrap();
let mut predictor = Predictor::new(model);

for line in stdin().lock().lines() {
    let s = Sentence::from_raw(line.unwrap()).unwrap();
    let s = predictor.predict(s);
    let toks = s.to_tokenized_string().unwrap();
    println!("{}", toks);


Licensed under either of

at your option.


Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.