1use crate::utils::row_split::CsvRowSplitter;
2
3use super::{cli_result::CliResult, writer::Writer};
4use rayon::prelude::*;
5use std::{borrow::Cow, error::Error};
6
7pub struct SortColumns(Vec<SortColumn>);
8
9pub struct SortColumn {
10 col: usize,
11 ascending: bool,
12 pub numeric: bool,
13}
14
15impl SortColumns {
16 pub fn from(cols: &str) -> Result<Self, Box<dyn Error>> {
17 let mut r = SortColumns(vec![]);
18
19 for i in cols.split(',') {
20 let mut j = i.replace(' ', "");
21
22 (0..2).for_each(|_| {
23 if j.ends_with(['n', 'N', 'd', 'D']) {
24 j.pop();
25 }
26 });
27
28 if j.is_empty() {
29 continue;
30 }
31
32 if let Ok(col) = j.parse::<usize>() {
33 r.0.push(SortColumn {
34 col,
35 ascending: !i.contains(['d', 'D']),
36 numeric: i.contains(['n', 'N']),
37 });
38 } else {
39 let e = format!("column syntax error for <-c {i}>. Run <rsv sort -h> for help.");
40 return Err(e.into());
41 }
42 }
43
44 if r.0.is_empty() {
45 return Err("no column is specified.".into());
46 }
47
48 if r.0.len() > 2 {
49 return Err("sort by more than two columns is not supported.".into());
50 }
51
52 Ok(r)
53 }
54
55 fn col_at(&self, n: usize) -> usize {
56 self.0[n].col
57 }
58
59 fn ascending_at(&self, n: usize) -> bool {
60 self.0[n].ascending
61 }
62
63 fn numeric_at(&self, n: usize) -> bool {
64 self.0[n].numeric
65 }
66
67 pub fn sort_and_write(
68 &self,
69 lines: &Vec<String>,
70 sep: char,
71 quote: char,
72 wtr: &mut Writer,
73 ) -> CliResult {
74 match self.0.len() {
75 1 => match self.numeric_at(0) {
76 true => self.sort_numeric_column(lines, sep, quote, wtr),
77 false => self.sort_str_column(lines, sep, quote, wtr),
78 },
79 2 => match (self.numeric_at(0), self.numeric_at(1)) {
80 (true, true) => self.sort_numeric_numeric_columns(lines, sep, quote, wtr),
81 (true, false) => self.sort_numeric_str_columns(lines, sep, quote, wtr),
82 (false, true) => self.sort_str_numeric_columns(lines, sep, quote, wtr),
83 (false, false) => self.sort_str_str_columns(lines, sep, quote, wtr),
84 },
85 _ => {}
86 }
87
88 Ok(())
89 }
90
91 fn sort_str_column(&self, lines: &Vec<String>, sep: char, quote: char, wtr: &mut Writer) {
92 let c = self.col_at(0);
93 let mut r = lines
94 .par_iter()
95 .map(|i| {
96 (
97 i,
98 CsvRowSplitter::new(i, sep, quote)
99 .nth(c)
100 .unwrap_or_default(),
101 )
102 })
103 .collect::<Vec<_>>();
104 match self.ascending_at(0) {
105 true => r.sort_by(|&a, &b| a.1.cmp(b.1)),
106 false => r.sort_by(|&a, &b| b.1.cmp(a.1)),
107 }
108
109 r.iter().for_each(|(l, _)| wtr.write_str_unchecked(l));
110 }
111
112 fn sort_numeric_column(&self, lines: &Vec<String>, sep: char, quote: char, wtr: &mut Writer) {
113 let c = self.col_at(0);
114 let mut r = lines
115 .par_iter()
116 .map(|i| {
117 let f = CsvRowSplitter::new(i, sep, quote)
118 .nth(c)
119 .unwrap_or_default();
120 (i, f.parse::<f64>().unwrap_or_default())
121 })
122 .collect::<Vec<_>>();
123 match self.ascending_at(0) {
124 true => r.sort_by(|&a, &b| a.1.partial_cmp(&b.1).unwrap()),
125 false => r.sort_by(|&a, &b| b.1.partial_cmp(&a.1).unwrap()),
126 };
127
128 r.iter().for_each(|(l, _)| wtr.write_str_unchecked(l));
129 }
130
131 fn sort_str_str_columns(&self, lines: &Vec<String>, sep: char, quote: char, wtr: &mut Writer) {
132 let c1 = self.col_at(0);
133 let c2 = self.col_at(1);
134
135 let mut r = lines
136 .par_iter()
137 .map(|i| {
138 let f = CsvRowSplitter::new(i, sep, quote).collect::<Vec<_>>();
139 (i, f[c1], f[c2])
140 })
141 .collect::<Vec<_>>();
142 match (self.ascending_at(0), self.ascending_at(1)) {
143 (true, true) => r.sort_by(|&a, &b| a.1.cmp(b.1).then(a.2.cmp(b.2))),
144 (true, false) => r.sort_by(|&a, &b| a.1.cmp(b.1).then(b.2.cmp(a.2))),
145 (false, true) => r.sort_by(|&a, &b| b.1.cmp(a.1).then(a.2.cmp(b.2))),
146 (false, false) => r.sort_by(|&a, &b| b.1.cmp(a.1).then(b.2.cmp(a.2))),
147 }
148
149 r.iter().for_each(|(l, _, _)| wtr.write_str_unchecked(l));
150 }
151
152 fn sort_str_numeric_columns(
153 &self,
154 lines: &Vec<String>,
155 sep: char,
156 quote: char,
157 wtr: &mut Writer,
158 ) {
159 let c1 = self.col_at(0);
160 let c2 = self.col_at(1);
161
162 let mut r = lines
163 .par_iter()
164 .map(|i| {
165 let f = CsvRowSplitter::new(i, sep, quote).collect::<Vec<_>>();
166 (i, f[c1], f[c2].parse::<f64>().unwrap_or_default())
167 })
168 .collect::<Vec<_>>();
169 match (self.ascending_at(0), self.ascending_at(1)) {
170 (true, true) => r.sort_by(|&a, &b| a.1.cmp(b.1).then(a.2.partial_cmp(&b.2).unwrap())),
171 (true, false) => r.sort_by(|&a, &b| a.1.cmp(b.1).then(b.2.partial_cmp(&a.2).unwrap())),
172 (false, true) => r.sort_by(|&a, &b| b.1.cmp(a.1).then(a.2.partial_cmp(&b.2).unwrap())),
173 (false, false) => r.sort_by(|&a, &b| b.1.cmp(a.1).then(b.2.partial_cmp(&a.2).unwrap())),
174 }
175
176 r.iter().for_each(|(l, _, _)| wtr.write_str_unchecked(l));
177 }
178
179 fn sort_numeric_str_columns(
180 &self,
181 lines: &Vec<String>,
182 sep: char,
183 quote: char,
184 wtr: &mut Writer,
185 ) {
186 let c1 = self.col_at(0);
187 let c2 = self.col_at(1);
188
189 let mut r = lines
190 .par_iter()
191 .map(|i| {
192 let f = CsvRowSplitter::new(i, sep, quote).collect::<Vec<_>>();
193 (i, f[c1].parse::<f64>().unwrap_or_default(), f[c2])
194 })
195 .collect::<Vec<_>>();
196 match (self.ascending_at(0), self.ascending_at(1)) {
197 (true, true) => r.sort_by(|&a, &b| a.1.partial_cmp(&b.1).unwrap().then(a.2.cmp(b.2))),
198 (true, false) => r.sort_by(|&a, &b| a.1.partial_cmp(&b.1).unwrap().then(b.2.cmp(a.2))),
199 (false, true) => r.sort_by(|&a, &b| b.1.partial_cmp(&a.1).unwrap().then(a.2.cmp(b.2))),
200 (false, false) => r.sort_by(|&a, &b| b.1.partial_cmp(&a.1).unwrap().then(b.2.cmp(a.2))),
201 }
202
203 r.iter().for_each(|(l, _, _)| wtr.write_str_unchecked(l));
204 }
205
206 fn sort_numeric_numeric_columns(
207 &self,
208 lines: &Vec<String>,
209 sep: char,
210 quote: char,
211 wtr: &mut Writer,
212 ) {
213 let c1 = self.col_at(0);
214 let c2 = self.col_at(1);
215
216 let mut r = lines
217 .par_iter()
218 .map(|i| {
219 let f = CsvRowSplitter::new(i, sep, quote).collect::<Vec<_>>();
220 (
221 i,
222 f[c1].parse::<f64>().unwrap_or_default(),
223 f[c2].parse::<f64>().unwrap_or_default(),
224 )
225 })
226 .collect::<Vec<_>>();
227
228 match (self.ascending_at(0), self.ascending_at(1)) {
229 (true, true) => r.sort_by(|&a, &b| {
230 a.1.partial_cmp(&b.1)
231 .unwrap()
232 .then(a.2.partial_cmp(&b.2).unwrap())
233 }),
234 (true, false) => r.sort_by(|&a, &b| {
235 a.1.partial_cmp(&b.1)
236 .unwrap()
237 .then(b.2.partial_cmp(&a.2).unwrap())
238 }),
239 (false, true) => r.sort_by(|&a, &b| {
240 b.1.partial_cmp(&a.1)
241 .unwrap()
242 .then(a.2.partial_cmp(&b.2).unwrap())
243 }),
244 (false, false) => r.sort_by(|&a, &b| {
245 b.1.partial_cmp(&a.1)
246 .unwrap()
247 .then(b.2.partial_cmp(&a.2).unwrap())
248 }),
249 }
250
251 r.iter().for_each(|(l, _, _)| wtr.write_str_unchecked(l));
252 }
253
254 pub fn sort_excel_and_write(
255 &self,
256 lines: &mut Vec<Vec<Cow<str>>>,
257 wtr: &mut Writer,
258 ) -> CliResult {
259 match self.0.len() {
260 1 => match self.numeric_at(0) {
261 true => self.sort_excel_numeric_column(lines, wtr),
262 false => self.sort_excel_str_column(lines, wtr),
263 },
264 2 => match (self.numeric_at(0), self.numeric_at(1)) {
265 (true, true) => self.sort_excel_numeric_numeric_columns(lines, wtr),
266 (true, false) => self.sort_excel_numeric_str_columns(lines, wtr),
267 (false, true) => self.sort_excel_str_numeric_columns(lines, wtr),
268 (false, false) => self.sort_excel_str_str_columns(lines, wtr),
269 },
270 _ => {}
271 }
272
273 Ok(())
274 }
275
276 fn sort_excel_str_column(&self, lines: &mut [Vec<Cow<str>>], wtr: &mut Writer) {
277 let c = self.col_at(0);
278 match self.ascending_at(0) {
279 true => lines.sort_by(|a, b| a[c].cmp(&b[c])),
280 false => lines.sort_by(|a, b| b[c].cmp(&a[c])),
281 }
282
283 lines.iter().for_each(|l| wtr.write_fields_unchecked(l));
284 }
285
286 fn sort_excel_numeric_column(&self, lines: &mut Vec<Vec<Cow<str>>>, wtr: &mut Writer) {
287 let c = self.col_at(0);
288 let mut r = lines
289 .par_iter()
290 .map(|i| (i, i[c].parse::<f64>().unwrap_or_default()))
291 .collect::<Vec<_>>();
292 match self.ascending_at(0) {
293 true => r.sort_by(|&a, &b| a.1.partial_cmp(&b.1).unwrap()),
294 false => r.sort_by(|&a, &b| b.1.partial_cmp(&a.1).unwrap()),
295 };
296
297 r.iter().for_each(|(l, _)| wtr.write_fields_unchecked(l));
298 }
299
300 fn sort_excel_str_str_columns(&self, lines: &mut [Vec<Cow<str>>], wtr: &mut Writer) {
301 let c1 = self.col_at(0);
302 let c2 = self.col_at(1);
303 match (self.ascending_at(0), self.ascending_at(1)) {
304 (true, true) => lines.sort_by(|a, b| a[c1].cmp(&b[c1]).then(a[c2].cmp(&b[c2]))),
305 (true, false) => lines.sort_by(|a, b| a[c1].cmp(&b[c1]).then(b[c2].cmp(&a[c2]))),
306 (false, true) => lines.sort_by(|a, b| b[c1].cmp(&a[c1]).then(a[c2].cmp(&b[c2]))),
307 (false, false) => lines.sort_by(|a, b| b[c1].cmp(&a[c1]).then(b[c2].cmp(&a[c2]))),
308 }
309
310 lines.iter().for_each(|l| wtr.write_fields_unchecked(l));
311 }
312
313 fn sort_excel_str_numeric_columns(&self, lines: &mut Vec<Vec<Cow<str>>>, wtr: &mut Writer) {
314 let c1 = self.col_at(0);
315 let c2 = self.col_at(1);
316
317 let mut r = lines
318 .par_iter()
319 .map(|i| (i, &i[c1], i[c2].parse::<f64>().unwrap_or_default()))
320 .collect::<Vec<_>>();
321 match (self.ascending_at(0), self.ascending_at(1)) {
322 (true, true) => r.sort_by(|&a, &b| a.1.cmp(b.1).then(a.2.partial_cmp(&b.2).unwrap())),
323 (true, false) => r.sort_by(|&a, &b| a.1.cmp(b.1).then(b.2.partial_cmp(&a.2).unwrap())),
324 (false, true) => r.sort_by(|&a, &b| b.1.cmp(a.1).then(a.2.partial_cmp(&b.2).unwrap())),
325 (false, false) => r.sort_by(|&a, &b| b.1.cmp(a.1).then(b.2.partial_cmp(&a.2).unwrap())),
326 }
327
328 r.iter().for_each(|(l, _, _)| wtr.write_fields_unchecked(l));
329 }
330
331 fn sort_excel_numeric_str_columns(&self, lines: &mut [Vec<Cow<str>>], wtr: &mut Writer) {
332 let c1 = self.col_at(0);
333 let c2 = self.col_at(1);
334
335 let mut r = lines
336 .par_iter()
337 .map(|i| (i, i[c1].parse::<f64>().unwrap_or_default(), &(i[c2])))
338 .collect::<Vec<_>>();
339 match (self.ascending_at(0), self.ascending_at(1)) {
340 (true, true) => r.sort_by(|&a, &b| a.1.partial_cmp(&b.1).unwrap().then(a.2.cmp(b.2))),
341 (true, false) => r.sort_by(|&a, &b| a.1.partial_cmp(&b.1).unwrap().then(b.2.cmp(a.2))),
342 (false, true) => r.sort_by(|&a, &b| b.1.partial_cmp(&a.1).unwrap().then(a.2.cmp(b.2))),
343 (false, false) => r.sort_by(|&a, &b| b.1.partial_cmp(&a.1).unwrap().then(b.2.cmp(a.2))),
344 }
345
346 r.iter().for_each(|(l, _, _)| wtr.write_fields_unchecked(l));
347 }
348
349 fn sort_excel_numeric_numeric_columns(&self, lines: &mut [Vec<Cow<str>>], wtr: &mut Writer) {
350 let c1 = self.col_at(0);
351 let c2 = self.col_at(1);
352
353 let mut r = lines
354 .par_iter()
355 .map(|i| {
356 (
357 i,
358 i[c1].parse::<f64>().unwrap_or_default(),
359 i[c2].parse::<f64>().unwrap_or_default(),
360 )
361 })
362 .collect::<Vec<_>>();
363
364 match (self.ascending_at(0), self.ascending_at(1)) {
365 (true, true) => r.sort_by(|&a, &b| {
366 a.1.partial_cmp(&b.1)
367 .unwrap()
368 .then(a.2.partial_cmp(&b.2).unwrap())
369 }),
370 (true, false) => r.sort_by(|&a, &b| {
371 a.1.partial_cmp(&b.1)
372 .unwrap()
373 .then(b.2.partial_cmp(&a.2).unwrap())
374 }),
375 (false, true) => r.sort_by(|&a, &b| {
376 b.1.partial_cmp(&a.1)
377 .unwrap()
378 .then(a.2.partial_cmp(&b.2).unwrap())
379 }),
380 (false, false) => r.sort_by(|&a, &b| {
381 b.1.partial_cmp(&a.1)
382 .unwrap()
383 .then(b.2.partial_cmp(&a.2).unwrap())
384 }),
385 }
386
387 r.iter().for_each(|(l, _, _)| wtr.write_fields_unchecked(l));
388 }
389}