pragmatic-segmenter 0.1.0

Rust port of pySBD v3.1.0.
Documentation
use std::error::Error;
use std::fs::File;
use std::io::{self, BufRead, BufReader};

use indicatif::{ProgressBar, ProgressStyle};
use rayon::prelude::*;
use serde_json::from_str;
use xz2::read::XzDecoder;

use pragmatic_segmenter::Segmenter;

fn main() -> Result<(), Box<dyn Error>> {
    let segmenter = Segmenter::new()?;

    let inputs = BufReader::new(XzDecoder::new(File::open("tests/test_huge/inputs.xz")?));
    let outputs = BufReader::new(XzDecoder::new(File::open("tests/test_huge/outputs.xz")?));
    let dataset: Vec<_> = inputs
        .lines()
        .zip(outputs.lines())
        .map(|(input, output)| {
            let input: String = from_str(&input?)?;
            let output: Vec<String> = from_str(&output?)?;

            Ok((input, output))
        })
        .collect::<io::Result<_>>()?;
    let count = dataset.len();
    let bar = ProgressBar::new(count as u64);
    bar.set_draw_delta(10);
    bar.set_style(ProgressStyle::default_bar().template(
        "
{percent}% {wide_bar} {pos:>5}/{len}
{elapsed} passed, Currrent speed: {per_sec}, {eta} left
",
    ));

    let good = dataset
        .par_iter()
        .map(|(input, expected)| {
            bar.inc(1);
            let actual: Vec<_> = segmenter.segment(&input).collect();
            actual == *expected
        })
        .filter(|&b| b)
        .count();
    bar.finish();

    assert_eq!(
        count,
        good,
        "Total={}, Good={}, Bad={}, ({:.3}%)",
        count,
        good,
        count - good,
        good as f64 / count as f64 * 100.0
    );

    Ok(())
}