rsv_lib/io/
unique.rs

1use crate::args::Unique;
2use crate::utils::cli_result::CliResult;
3use crate::utils::column::Columns;
4use crate::utils::filename::new_file;
5use crate::utils::reader::IoReader;
6use crate::utils::writer::Writer;
7
8impl Unique {
9    pub fn io_run(&self) -> CliResult {
10        let all_cols = self.cols == "-1";
11
12        // wtr and rdr
13        let out = new_file("drop_duplicates.csv");
14        let mut wtr = Writer::file_or_stdout(self.export, &out)?;
15        let lines = IoReader::new().no_header(self.no_header).lines();
16
17        if lines.is_empty() {
18            return Ok(());
19        }
20
21        // cols
22        let cols = if all_cols {
23            None
24        } else {
25            let n = self.row_field_count(&lines[0]);
26            Some(Columns::new(&self.cols).total_col(n).parse())
27        };
28
29        // header
30        if !self.no_header {
31            wtr.write_str_unchecked(&lines[0]);
32        }
33
34        let lines = if self.no_header {
35            &lines[..]
36        } else {
37            &lines[1..]
38        };
39
40        // read
41        match (self.keep_last, all_cols) {
42            (true, true) => keep_last_and_all_cols(lines, &mut wtr)?,
43            (true, false) => keep_last_and_partial_cols(self, lines, &mut wtr, cols.unwrap())?,
44            (false, true) => keep_first_and_all_cols(lines, &mut wtr)?,
45            (false, false) => keep_first_and_partial_cols(self, lines, &mut wtr, cols.unwrap())?,
46        };
47
48        if self.export {
49            println!("\nSaved to file: {}", out.display())
50        }
51
52        Ok(())
53    }
54}
55
56fn keep_first_and_all_cols(rdr: &[String], wtr: &mut Writer) -> CliResult {
57    let mut unique_holder = ahash::HashSet::default();
58    for r in rdr {
59        if !unique_holder.contains(r) {
60            wtr.write_str_unchecked(r);
61            unique_holder.insert(r);
62        }
63    }
64
65    Ok(())
66}
67
68fn keep_first_and_partial_cols(
69    args: &Unique,
70    rdr: &[String],
71    wtr: &mut Writer,
72    cols: Columns,
73) -> CliResult {
74    let mut unique_holder = ahash::HashSet::default();
75    for r in rdr {
76        let segs = args.split_row_to_vec(r);
77        let p = cols.select_owned_string(&segs);
78        if !unique_holder.contains(&p) {
79            wtr.write_str_unchecked(r);
80            unique_holder.insert(p);
81        }
82    }
83
84    Ok(())
85}
86
87fn keep_last_and_all_cols(rdr: &[String], wtr: &mut Writer) -> CliResult {
88    let mut unique_n = ahash::HashMap::default();
89
90    // first scan to locate record location
91    for r in rdr {
92        *unique_n.entry(r).or_insert(0) += 1;
93    }
94
95    // second scan
96    for r in rdr {
97        if unique_n[&r] == 1 {
98            wtr.write_str_unchecked(r);
99        } else {
100            *unique_n.entry(r).or_insert(0) -= 1;
101        }
102    }
103
104    Ok(())
105}
106
107fn keep_last_and_partial_cols(
108    args: &Unique,
109    rdr: &[String],
110    wtr: &mut Writer,
111    cols: Columns,
112) -> CliResult {
113    let mut unique_n = ahash::HashMap::default();
114
115    // first scan to locate record location
116    for r in rdr {
117        let segs = args.split_row_to_vec(r);
118        let p = cols.select_owned_string(&segs);
119        *unique_n.entry(p).or_insert(0) += 1;
120    }
121
122    // second scan
123    for r in rdr {
124        let segs = args.split_row_to_vec(r);
125        let p = cols.select_owned_string(&segs);
126        if unique_n[&p] == 1 {
127            wtr.write_str_unchecked(r);
128        } else {
129            *unique_n.entry(p).or_insert(0) -= 1;
130        }
131    }
132
133    Ok(())
134}