rsv_lib/io/
stats.rs

1use crate::args::Stats;
2use crate::utils::cli_result::CliResult;
3use crate::utils::column::Columns;
4use crate::utils::column_stats::ColumnStats;
5use crate::utils::column_type::ColumnTypes;
6use crate::utils::filename::new_file;
7use crate::utils::reader::IoReader;
8use rayon::prelude::*;
9use std::fs::File;
10use std::io::{BufWriter, Write};
11
12impl Stats {
13    pub fn io_run(&self) -> CliResult {
14        // read
15        let rows = IoReader::new().lines();
16
17        // too few rows
18        if rows.len() <= 1 - self.no_header as usize {
19            return Ok(());
20        }
21
22        // split rows
23        let n = self.row_field_count(&rows[0]);
24        let cols = Columns::new(&self.cols).total_col(n).parse();
25        let rows = rows
26            .par_iter()
27            .map(|r| self.split_row_to_vec(r))
28            .collect::<Vec<_>>();
29
30        // header
31        let names = match self.no_header {
32            true => cols.artificial_n_cols(rows[0].len()),
33            false => rows[0].iter().map(|&i| i.to_owned()).collect::<Vec<_>>(),
34        };
35
36        let rows = &rows[(1 - self.no_header as usize)..];
37
38        // column type
39        let typ = ColumnTypes::guess_from_io(rows, &cols);
40
41        // stats holder
42        let mut stat = ColumnStats::new(&typ, &names);
43        let chunks = rows.chunks(1000).collect::<Vec<_>>();
44        let r = chunks
45            .into_par_iter()
46            .map(|chunk| {
47                let mut s = stat.clone();
48                for r in chunk {
49                    s.parse_line_by_fields(r);
50                }
51                s
52            })
53            .collect::<Vec<_>>();
54        r.into_iter().fold(&mut stat, |s, b| {
55            s.merge(b);
56            s
57        });
58
59        stat.cal_unique_and_mean();
60
61        if self.export {
62            let out = new_file("stats.csv");
63            let mut wtr = BufWriter::new(File::create(&out)?);
64            wtr.write_all(stat.to_string().as_bytes())?;
65            println!("Saved to file: {}", out.display());
66        } else {
67            stat.print();
68            println!("Total rows: {}", stat.rows);
69        }
70
71        Ok(())
72    }
73}