1use crate::args::Unique;
2use crate::utils::cli_result::CliResult;
3use crate::utils::column::Columns;
4use crate::utils::filename::new_path;
5use crate::utils::writer::Writer;
6use ahash::HashMapExt;
7use std::fs::File;
8use std::io::{BufRead, BufReader, Lines};
9use std::path::Path;
10
11impl Unique {
12 pub fn csv_run(&self) -> CliResult {
13 let path = &self.path();
14 let all_cols = self.cols == "-1";
15
16 let cols = if all_cols {
18 None
19 } else {
20 Some(
21 Columns::new(&self.cols)
22 .total_col_of(path, self.sep, self.quote)
23 .parse(),
24 )
25 };
26
27 let out = new_path(path, "-drop-duplicates");
29 let mut wtr = Writer::file_or_stdout(self.export, &out)?;
30 let mut rdr = BufReader::new(File::open(path)?).lines();
31
32 if !self.no_header {
34 let Some(r) = rdr.next() else { return Ok(()) };
35 wtr.write_str_unchecked(&r?)
36 }
37
38 match (self.keep_last, all_cols) {
40 (true, true) => keep_last_and_all_cols(&mut rdr, &mut wtr, path, self.no_header)?,
41 (true, false) => {
42 keep_last_and_partial_cols(self, &mut rdr, &mut wtr, cols.unwrap(), path)?
43 }
44 (false, true) => keep_first_and_all_cols(&mut rdr, &mut wtr)?,
45 (false, false) => keep_first_and_partial_cols(&mut rdr, &mut wtr, cols.unwrap(), self)?,
46 };
47
48 if self.export {
49 println!("\nSaved to file: {}", out.display())
50 }
51
52 Ok(())
53 }
54}
55
56fn keep_first_and_all_cols(rdr: &mut Lines<BufReader<File>>, wtr: &mut Writer) -> CliResult {
57 let mut unique_holder = ahash::HashSet::default();
58 for r in rdr {
59 let r = r?;
60 if !unique_holder.contains(&r) {
61 wtr.write_str_unchecked(&r);
62 unique_holder.insert(r);
63 }
64 }
65
66 Ok(())
67}
68
69fn keep_first_and_partial_cols(
70 rdr: &mut Lines<BufReader<File>>,
71 wtr: &mut Writer,
72 cols: Columns,
73 args: &Unique,
74) -> CliResult {
75 let mut unique_holder = ahash::HashSet::default();
76 for r in rdr {
77 let r = r?;
78 let segs = args.split_row_to_vec(&r);
79 let p = cols.select_owned_string(&segs);
80 if !unique_holder.contains(&p) {
81 wtr.write_str_unchecked(&r);
82 unique_holder.insert(p);
83 }
84 }
85
86 Ok(())
87}
88
89fn keep_last_and_all_cols(
90 rdr: &mut Lines<BufReader<File>>,
91 wtr: &mut Writer,
92 path: &Path,
93 no_header: bool,
94) -> CliResult {
95 let mut unique_n = ahash::HashMap::default();
96
97 let rdr2 = BufReader::new(File::open(path)?).lines();
99 for r in rdr2.skip(1 - (no_header as usize)) {
100 let r = r?;
101 *unique_n.entry(r).or_insert(0) += 1;
102 }
103
104 for r in rdr {
106 let r = r?;
107 if unique_n[&r] == 1 {
108 wtr.write_str_unchecked(&r);
109 } else {
110 *unique_n.entry(r).or_insert(0) -= 1;
111 }
112 }
113
114 Ok(())
115}
116
117fn keep_last_and_partial_cols(
118 args: &Unique,
119 rdr: &mut Lines<BufReader<File>>,
120 wtr: &mut Writer,
121 cols: Columns,
122 path: &Path,
123) -> CliResult {
124 let mut unique_n = ahash::HashMap::new();
125
126 let rdr2 = BufReader::new(File::open(path)?).lines();
128 for r in rdr2.skip(1 - (args.no_header as usize)) {
129 let r = r?;
130 let segs = args.split_row_to_vec(&r);
131 let p = cols.select_owned_string(&segs);
132 *unique_n.entry(p).or_insert(0) += 1;
133 }
134
135 for r in rdr {
137 let r = r?;
138 let segs = args.split_row_to_vec(&r);
139 let p = cols.select_owned_string(&segs);
140 if unique_n[&p] == 1 {
141 wtr.write_str_unchecked(&r);
142 } else {
143 *unique_n.entry(p).or_insert(0) -= 1;
144 }
145 }
146
147 Ok(())
148}