rsv_lib/excel/
unique.rs

1use crate::args::Unique;
2use crate::utils::cli_result::CliResult;
3use crate::utils::column::Columns;
4use crate::utils::constants::COMMA;
5use crate::utils::excel::datatype_vec_to_string;
6use crate::utils::filename::new_path;
7use crate::utils::reader::ExcelReader;
8use crate::utils::writer::Writer;
9
10impl Unique {
11    pub fn excel_run(&self) -> CliResult {
12        let path = &self.path();
13        let all_cols = self.cols == "-1";
14
15        // wtr and rdr
16        let out = new_path(path, "-drop-duplicates").with_extension("csv");
17        let mut wtr = Writer::file_or_stdout(self.export, &out)?;
18        let mut rdr = ExcelReader::new(path, self.sheet)?;
19
20        // cols
21        let cols = if all_cols {
22            None
23        } else {
24            Some(Columns::new(&self.cols).total_col(rdr.column_n()).parse())
25        };
26
27        // header
28        if !self.no_header {
29            let Some(r) = rdr.next() else { return Ok(()) };
30            wtr.write_excel_line_unchecked(r, COMMA);
31        }
32
33        // read
34        match (self.keep_last, all_cols) {
35            (true, true) => keep_last_and_all_cols(&mut rdr, &mut wtr)?,
36            (true, false) => keep_last_and_partial_cols(&mut rdr, &mut wtr, cols.unwrap())?,
37            (false, true) => keep_first_and_all_cols(&mut rdr, &mut wtr)?,
38            (false, false) => keep_first_and_partial_cols(&mut rdr, &mut wtr, cols.unwrap())?,
39        };
40
41        if self.export {
42            println!("\nSaved to file: {}", out.display())
43        }
44
45        Ok(())
46    }
47}
48
49fn keep_first_and_all_cols(rdr: &mut ExcelReader, wtr: &mut Writer) -> CliResult {
50    let mut unique_holder = ahash::HashSet::default();
51    for r in rdr.iter().skip(rdr.next_called) {
52        let r = datatype_vec_to_string(r);
53        if !unique_holder.contains(&r) {
54            wtr.write_str_unchecked(&r);
55            unique_holder.insert(r);
56        }
57    }
58
59    Ok(())
60}
61
62fn keep_first_and_partial_cols(
63    rdr: &mut ExcelReader,
64    wtr: &mut Writer,
65    cols: Columns,
66) -> CliResult {
67    let mut unique_holder = ahash::HashSet::default();
68    for r in rdr.iter().skip(rdr.next_called) {
69        let p = cols.select_owned_string_from_excel_datatype(r);
70        if !unique_holder.contains(&p) {
71            wtr.write_excel_line_unchecked(r, COMMA);
72            unique_holder.insert(p);
73        }
74    }
75
76    Ok(())
77}
78
79fn keep_last_and_all_cols(rdr: &mut ExcelReader, wtr: &mut Writer) -> CliResult {
80    let mut unique_n = ahash::HashMap::default();
81
82    // first scan to locate record location
83    for r in rdr.iter().skip(rdr.next_called) {
84        let r = datatype_vec_to_string(r);
85        *unique_n.entry(r).or_insert(0) += 1;
86    }
87
88    // second scan
89    for r in rdr.iter().skip(rdr.next_called) {
90        let r = datatype_vec_to_string(r);
91        if unique_n[&r] == 1 {
92            wtr.write_str_unchecked(r);
93        } else {
94            *unique_n.entry(r).or_insert(0) -= 1;
95        }
96    }
97
98    Ok(())
99}
100
101fn keep_last_and_partial_cols(rdr: &mut ExcelReader, wtr: &mut Writer, cols: Columns) -> CliResult {
102    let mut unique_n = ahash::HashMap::default();
103
104    // first scan to locate record location
105    for r in rdr.iter().skip(rdr.next_called) {
106        let p = cols.select_owned_string_from_excel_datatype(r);
107        *unique_n.entry(p).or_insert(0) += 1;
108    }
109
110    // second scan
111    for r in rdr.iter().skip(rdr.next_called) {
112        let p = cols.select_owned_string_from_excel_datatype(r);
113        if unique_n[&p] == 1 {
114            wtr.write_excel_line_unchecked(r, COMMA);
115        } else {
116            *unique_n.entry(p).or_insert(0) -= 1;
117        }
118    }
119
120    Ok(())
121}