rsv_lib/utils/
sort.rs

1use crate::utils::row_split::CsvRowSplitter;
2
3use super::{cli_result::CliResult, writer::Writer};
4use rayon::prelude::*;
5use std::{borrow::Cow, error::Error};
6
7pub struct SortColumns(Vec<SortColumn>);
8
9pub struct SortColumn {
10    col: usize,
11    ascending: bool,
12    pub numeric: bool,
13}
14
15impl SortColumns {
16    pub fn from(cols: &str) -> Result<Self, Box<dyn Error>> {
17        let mut r = SortColumns(vec![]);
18
19        for i in cols.split(',') {
20            let mut j = i.replace(' ', "");
21
22            (0..2).for_each(|_| {
23                if j.ends_with(['n', 'N', 'd', 'D']) {
24                    j.pop();
25                }
26            });
27
28            if j.is_empty() {
29                continue;
30            }
31
32            if let Ok(col) = j.parse::<usize>() {
33                r.0.push(SortColumn {
34                    col,
35                    ascending: !i.contains(['d', 'D']),
36                    numeric: i.contains(['n', 'N']),
37                });
38            } else {
39                let e = format!("column syntax error for <-c {i}>. Run <rsv sort -h> for help.");
40                return Err(e.into());
41            }
42        }
43
44        if r.0.is_empty() {
45            return Err("no column is specified.".into());
46        }
47
48        if r.0.len() > 2 {
49            return Err("sort by more than two columns is not supported.".into());
50        }
51
52        Ok(r)
53    }
54
55    fn col_at(&self, n: usize) -> usize {
56        self.0[n].col
57    }
58
59    fn ascending_at(&self, n: usize) -> bool {
60        self.0[n].ascending
61    }
62
63    fn numeric_at(&self, n: usize) -> bool {
64        self.0[n].numeric
65    }
66
67    pub fn sort_and_write(
68        &self,
69        lines: &Vec<String>,
70        sep: char,
71        quote: char,
72        wtr: &mut Writer,
73    ) -> CliResult {
74        match self.0.len() {
75            1 => match self.numeric_at(0) {
76                true => self.sort_numeric_column(lines, sep, quote, wtr),
77                false => self.sort_str_column(lines, sep, quote, wtr),
78            },
79            2 => match (self.numeric_at(0), self.numeric_at(1)) {
80                (true, true) => self.sort_numeric_numeric_columns(lines, sep, quote, wtr),
81                (true, false) => self.sort_numeric_str_columns(lines, sep, quote, wtr),
82                (false, true) => self.sort_str_numeric_columns(lines, sep, quote, wtr),
83                (false, false) => self.sort_str_str_columns(lines, sep, quote, wtr),
84            },
85            _ => {}
86        }
87
88        Ok(())
89    }
90
91    fn sort_str_column(&self, lines: &Vec<String>, sep: char, quote: char, wtr: &mut Writer) {
92        let c = self.col_at(0);
93        let mut r = lines
94            .par_iter()
95            .map(|i| {
96                (
97                    i,
98                    CsvRowSplitter::new(i, sep, quote)
99                        .nth(c)
100                        .unwrap_or_default(),
101                )
102            })
103            .collect::<Vec<_>>();
104        match self.ascending_at(0) {
105            true => r.sort_by(|&a, &b| a.1.cmp(b.1)),
106            false => r.sort_by(|&a, &b| b.1.cmp(a.1)),
107        }
108
109        r.iter().for_each(|(l, _)| wtr.write_str_unchecked(l));
110    }
111
112    fn sort_numeric_column(&self, lines: &Vec<String>, sep: char, quote: char, wtr: &mut Writer) {
113        let c = self.col_at(0);
114        let mut r = lines
115            .par_iter()
116            .map(|i| {
117                let f = CsvRowSplitter::new(i, sep, quote)
118                    .nth(c)
119                    .unwrap_or_default();
120                (i, f.parse::<f64>().unwrap_or_default())
121            })
122            .collect::<Vec<_>>();
123        match self.ascending_at(0) {
124            true => r.sort_by(|&a, &b| a.1.partial_cmp(&b.1).unwrap()),
125            false => r.sort_by(|&a, &b| b.1.partial_cmp(&a.1).unwrap()),
126        };
127
128        r.iter().for_each(|(l, _)| wtr.write_str_unchecked(l));
129    }
130
131    fn sort_str_str_columns(&self, lines: &Vec<String>, sep: char, quote: char, wtr: &mut Writer) {
132        let c1 = self.col_at(0);
133        let c2 = self.col_at(1);
134
135        let mut r = lines
136            .par_iter()
137            .map(|i| {
138                let f = CsvRowSplitter::new(i, sep, quote).collect::<Vec<_>>();
139                (i, f[c1], f[c2])
140            })
141            .collect::<Vec<_>>();
142        match (self.ascending_at(0), self.ascending_at(1)) {
143            (true, true) => r.sort_by(|&a, &b| a.1.cmp(b.1).then(a.2.cmp(b.2))),
144            (true, false) => r.sort_by(|&a, &b| a.1.cmp(b.1).then(b.2.cmp(a.2))),
145            (false, true) => r.sort_by(|&a, &b| b.1.cmp(a.1).then(a.2.cmp(b.2))),
146            (false, false) => r.sort_by(|&a, &b| b.1.cmp(a.1).then(b.2.cmp(a.2))),
147        }
148
149        r.iter().for_each(|(l, _, _)| wtr.write_str_unchecked(l));
150    }
151
152    fn sort_str_numeric_columns(
153        &self,
154        lines: &Vec<String>,
155        sep: char,
156        quote: char,
157        wtr: &mut Writer,
158    ) {
159        let c1 = self.col_at(0);
160        let c2 = self.col_at(1);
161
162        let mut r = lines
163            .par_iter()
164            .map(|i| {
165                let f = CsvRowSplitter::new(i, sep, quote).collect::<Vec<_>>();
166                (i, f[c1], f[c2].parse::<f64>().unwrap_or_default())
167            })
168            .collect::<Vec<_>>();
169        match (self.ascending_at(0), self.ascending_at(1)) {
170            (true, true) => r.sort_by(|&a, &b| a.1.cmp(b.1).then(a.2.partial_cmp(&b.2).unwrap())),
171            (true, false) => r.sort_by(|&a, &b| a.1.cmp(b.1).then(b.2.partial_cmp(&a.2).unwrap())),
172            (false, true) => r.sort_by(|&a, &b| b.1.cmp(a.1).then(a.2.partial_cmp(&b.2).unwrap())),
173            (false, false) => r.sort_by(|&a, &b| b.1.cmp(a.1).then(b.2.partial_cmp(&a.2).unwrap())),
174        }
175
176        r.iter().for_each(|(l, _, _)| wtr.write_str_unchecked(l));
177    }
178
179    fn sort_numeric_str_columns(
180        &self,
181        lines: &Vec<String>,
182        sep: char,
183        quote: char,
184        wtr: &mut Writer,
185    ) {
186        let c1 = self.col_at(0);
187        let c2 = self.col_at(1);
188
189        let mut r = lines
190            .par_iter()
191            .map(|i| {
192                let f = CsvRowSplitter::new(i, sep, quote).collect::<Vec<_>>();
193                (i, f[c1].parse::<f64>().unwrap_or_default(), f[c2])
194            })
195            .collect::<Vec<_>>();
196        match (self.ascending_at(0), self.ascending_at(1)) {
197            (true, true) => r.sort_by(|&a, &b| a.1.partial_cmp(&b.1).unwrap().then(a.2.cmp(b.2))),
198            (true, false) => r.sort_by(|&a, &b| a.1.partial_cmp(&b.1).unwrap().then(b.2.cmp(a.2))),
199            (false, true) => r.sort_by(|&a, &b| b.1.partial_cmp(&a.1).unwrap().then(a.2.cmp(b.2))),
200            (false, false) => r.sort_by(|&a, &b| b.1.partial_cmp(&a.1).unwrap().then(b.2.cmp(a.2))),
201        }
202
203        r.iter().for_each(|(l, _, _)| wtr.write_str_unchecked(l));
204    }
205
206    fn sort_numeric_numeric_columns(
207        &self,
208        lines: &Vec<String>,
209        sep: char,
210        quote: char,
211        wtr: &mut Writer,
212    ) {
213        let c1 = self.col_at(0);
214        let c2 = self.col_at(1);
215
216        let mut r = lines
217            .par_iter()
218            .map(|i| {
219                let f = CsvRowSplitter::new(i, sep, quote).collect::<Vec<_>>();
220                (
221                    i,
222                    f[c1].parse::<f64>().unwrap_or_default(),
223                    f[c2].parse::<f64>().unwrap_or_default(),
224                )
225            })
226            .collect::<Vec<_>>();
227
228        match (self.ascending_at(0), self.ascending_at(1)) {
229            (true, true) => r.sort_by(|&a, &b| {
230                a.1.partial_cmp(&b.1)
231                    .unwrap()
232                    .then(a.2.partial_cmp(&b.2).unwrap())
233            }),
234            (true, false) => r.sort_by(|&a, &b| {
235                a.1.partial_cmp(&b.1)
236                    .unwrap()
237                    .then(b.2.partial_cmp(&a.2).unwrap())
238            }),
239            (false, true) => r.sort_by(|&a, &b| {
240                b.1.partial_cmp(&a.1)
241                    .unwrap()
242                    .then(a.2.partial_cmp(&b.2).unwrap())
243            }),
244            (false, false) => r.sort_by(|&a, &b| {
245                b.1.partial_cmp(&a.1)
246                    .unwrap()
247                    .then(b.2.partial_cmp(&a.2).unwrap())
248            }),
249        }
250
251        r.iter().for_each(|(l, _, _)| wtr.write_str_unchecked(l));
252    }
253
254    pub fn sort_excel_and_write(
255        &self,
256        lines: &mut Vec<Vec<Cow<str>>>,
257        wtr: &mut Writer,
258    ) -> CliResult {
259        match self.0.len() {
260            1 => match self.numeric_at(0) {
261                true => self.sort_excel_numeric_column(lines, wtr),
262                false => self.sort_excel_str_column(lines, wtr),
263            },
264            2 => match (self.numeric_at(0), self.numeric_at(1)) {
265                (true, true) => self.sort_excel_numeric_numeric_columns(lines, wtr),
266                (true, false) => self.sort_excel_numeric_str_columns(lines, wtr),
267                (false, true) => self.sort_excel_str_numeric_columns(lines, wtr),
268                (false, false) => self.sort_excel_str_str_columns(lines, wtr),
269            },
270            _ => {}
271        }
272
273        Ok(())
274    }
275
276    fn sort_excel_str_column(&self, lines: &mut [Vec<Cow<str>>], wtr: &mut Writer) {
277        let c = self.col_at(0);
278        match self.ascending_at(0) {
279            true => lines.sort_by(|a, b| a[c].cmp(&b[c])),
280            false => lines.sort_by(|a, b| b[c].cmp(&a[c])),
281        }
282
283        lines.iter().for_each(|l| wtr.write_fields_unchecked(l));
284    }
285
286    fn sort_excel_numeric_column(&self, lines: &mut Vec<Vec<Cow<str>>>, wtr: &mut Writer) {
287        let c = self.col_at(0);
288        let mut r = lines
289            .par_iter()
290            .map(|i| (i, i[c].parse::<f64>().unwrap_or_default()))
291            .collect::<Vec<_>>();
292        match self.ascending_at(0) {
293            true => r.sort_by(|&a, &b| a.1.partial_cmp(&b.1).unwrap()),
294            false => r.sort_by(|&a, &b| b.1.partial_cmp(&a.1).unwrap()),
295        };
296
297        r.iter().for_each(|(l, _)| wtr.write_fields_unchecked(l));
298    }
299
300    fn sort_excel_str_str_columns(&self, lines: &mut [Vec<Cow<str>>], wtr: &mut Writer) {
301        let c1 = self.col_at(0);
302        let c2 = self.col_at(1);
303        match (self.ascending_at(0), self.ascending_at(1)) {
304            (true, true) => lines.sort_by(|a, b| a[c1].cmp(&b[c1]).then(a[c2].cmp(&b[c2]))),
305            (true, false) => lines.sort_by(|a, b| a[c1].cmp(&b[c1]).then(b[c2].cmp(&a[c2]))),
306            (false, true) => lines.sort_by(|a, b| b[c1].cmp(&a[c1]).then(a[c2].cmp(&b[c2]))),
307            (false, false) => lines.sort_by(|a, b| b[c1].cmp(&a[c1]).then(b[c2].cmp(&a[c2]))),
308        }
309
310        lines.iter().for_each(|l| wtr.write_fields_unchecked(l));
311    }
312
313    fn sort_excel_str_numeric_columns(&self, lines: &mut Vec<Vec<Cow<str>>>, wtr: &mut Writer) {
314        let c1 = self.col_at(0);
315        let c2 = self.col_at(1);
316
317        let mut r = lines
318            .par_iter()
319            .map(|i| (i, &i[c1], i[c2].parse::<f64>().unwrap_or_default()))
320            .collect::<Vec<_>>();
321        match (self.ascending_at(0), self.ascending_at(1)) {
322            (true, true) => r.sort_by(|&a, &b| a.1.cmp(b.1).then(a.2.partial_cmp(&b.2).unwrap())),
323            (true, false) => r.sort_by(|&a, &b| a.1.cmp(b.1).then(b.2.partial_cmp(&a.2).unwrap())),
324            (false, true) => r.sort_by(|&a, &b| b.1.cmp(a.1).then(a.2.partial_cmp(&b.2).unwrap())),
325            (false, false) => r.sort_by(|&a, &b| b.1.cmp(a.1).then(b.2.partial_cmp(&a.2).unwrap())),
326        }
327
328        r.iter().for_each(|(l, _, _)| wtr.write_fields_unchecked(l));
329    }
330
331    fn sort_excel_numeric_str_columns(&self, lines: &mut [Vec<Cow<str>>], wtr: &mut Writer) {
332        let c1 = self.col_at(0);
333        let c2 = self.col_at(1);
334
335        let mut r = lines
336            .par_iter()
337            .map(|i| (i, i[c1].parse::<f64>().unwrap_or_default(), &(i[c2])))
338            .collect::<Vec<_>>();
339        match (self.ascending_at(0), self.ascending_at(1)) {
340            (true, true) => r.sort_by(|&a, &b| a.1.partial_cmp(&b.1).unwrap().then(a.2.cmp(b.2))),
341            (true, false) => r.sort_by(|&a, &b| a.1.partial_cmp(&b.1).unwrap().then(b.2.cmp(a.2))),
342            (false, true) => r.sort_by(|&a, &b| b.1.partial_cmp(&a.1).unwrap().then(a.2.cmp(b.2))),
343            (false, false) => r.sort_by(|&a, &b| b.1.partial_cmp(&a.1).unwrap().then(b.2.cmp(a.2))),
344        }
345
346        r.iter().for_each(|(l, _, _)| wtr.write_fields_unchecked(l));
347    }
348
349    fn sort_excel_numeric_numeric_columns(&self, lines: &mut [Vec<Cow<str>>], wtr: &mut Writer) {
350        let c1 = self.col_at(0);
351        let c2 = self.col_at(1);
352
353        let mut r = lines
354            .par_iter()
355            .map(|i| {
356                (
357                    i,
358                    i[c1].parse::<f64>().unwrap_or_default(),
359                    i[c2].parse::<f64>().unwrap_or_default(),
360                )
361            })
362            .collect::<Vec<_>>();
363
364        match (self.ascending_at(0), self.ascending_at(1)) {
365            (true, true) => r.sort_by(|&a, &b| {
366                a.1.partial_cmp(&b.1)
367                    .unwrap()
368                    .then(a.2.partial_cmp(&b.2).unwrap())
369            }),
370            (true, false) => r.sort_by(|&a, &b| {
371                a.1.partial_cmp(&b.1)
372                    .unwrap()
373                    .then(b.2.partial_cmp(&a.2).unwrap())
374            }),
375            (false, true) => r.sort_by(|&a, &b| {
376                b.1.partial_cmp(&a.1)
377                    .unwrap()
378                    .then(a.2.partial_cmp(&b.2).unwrap())
379            }),
380            (false, false) => r.sort_by(|&a, &b| {
381                b.1.partial_cmp(&a.1)
382                    .unwrap()
383                    .then(b.2.partial_cmp(&a.2).unwrap())
384            }),
385        }
386
387        r.iter().for_each(|(l, _, _)| wtr.write_fields_unchecked(l));
388    }
389}