topfew/
lib.rs

1use std::path::PathBuf;
2
3use rayon::iter::{IntoParallelIterator, ParallelIterator};
4
5mod chunks;
6mod counter;
7mod key_finder;
8
9pub use chunks::chunks;
10pub use counter::{Counter, KeyCount};
11pub use key_finder::KeyFinder;
12
13pub fn top_few_from_stream(
14    path: PathBuf,
15    kf: &KeyFinder,
16    num: usize,
17) -> anyhow::Result<Vec<KeyCount>> {
18    let total = chunks(path)?
19        .collect::<Vec<_>>()
20        .into_par_iter()
21        .map(|reader| {
22            let mut counter = Counter::new(None);
23            let mut s = String::new();
24            for ln in reader {
25                s.clear();
26                if let Ok(key) = kf.key(&ln, &mut s) {
27                    counter.add(key, 1)
28                }
29            }
30            counter
31        })
32        .reduce(|| Counter::new(Some(num)), |l, r| l.merge(r));
33
34    Ok(total.top())
35}