rsv_lib/utils/
column.rs

1use super::excel::write_datatype_to_string;
2use super::row_split::CsvRowSplitter;
3use crate::utils::util::werr_exit;
4use calamine::Data;
5use std::fs::File;
6use std::io::{BufRead, BufReader};
7use std::path::Path;
8
9#[derive(Debug)]
10pub struct Columns<'a> {
11    path: Option<&'a Path>,
12    sep: char,
13    quote: char,
14    pub cols: Vec<usize>,
15    pub max: usize,
16    pub select_all: bool,
17    raw: &'a str,
18    total: Option<usize>,
19    pub parsed: bool,
20}
21
22fn parse_col_usize(col: &str) -> usize {
23    col.parse().unwrap_or_else(|_| {
24        werr_exit!(
25            "{}",
26            "Column syntax error: can be something like 0,1,2,5 or 0-2,5 or -1 or -3--1."
27        );
28    })
29}
30
31fn parse_i32(col: &str) -> i32 {
32    col.parse().unwrap_or_else(|_| {
33        werr_exit!(
34            "{}",
35            "Column syntax error: can be something like 0,1,2,5 or 0-2,5 or -1 or -3--1."
36        );
37    })
38}
39
40fn split_pat_at<'a>(source: &'a str, pat: &'a str, n: usize) -> (&'a str, &'a str) {
41    let (i, _) = source.match_indices(pat).nth(n).unwrap();
42    let (first, second) = source.split_at(i);
43    (first, &second[pat.len()..])
44}
45
46impl<'a> Columns<'a> {
47    pub fn new(raw: &str) -> Columns {
48        Columns {
49            path: None,
50            sep: ',',
51            quote: '"',
52            cols: vec![],
53            max: 0,
54            select_all: true,
55            raw,
56            total: None,
57            parsed: false,
58        }
59    }
60
61    pub fn total_col(mut self, total: usize) -> Self {
62        self.total = Some(total);
63        self
64    }
65
66    pub fn total_col_of(mut self, path: &'a Path, sep: char, quote: char) -> Self {
67        self.path = Some(path);
68        self.sep = sep;
69        self.quote = quote;
70        self
71    }
72
73    pub fn parse(mut self) -> Self {
74        self.parsed = true;
75
76        if self.raw.is_empty() {
77            return self;
78        }
79
80        self.raw.split(',').for_each(|i| {
81            if !i.trim().is_empty() {
82                self.parse_col(i)
83            }
84        });
85        self.update_status();
86
87        self
88    }
89
90    fn parse_col(&mut self, col: &str) {
91        match (col.starts_with('-'), col.matches('-').count()) {
92            // positive
93            (true, 1) => {
94                let c = { self.true_col(col) };
95                self.push(c)
96            }
97            // col range
98            (true, _) => {
99                let (first, second) = split_pat_at(col, "-", 1);
100                let min = { self.true_col(first) };
101                let max = { self.true_col(second) };
102                self.push_range(min, max);
103            }
104            (false, 0) => {
105                let c = { self.true_col(col) };
106                self.push(c)
107            }
108            (false, _) => {
109                let (first, second) = split_pat_at(col, "-", 0);
110                let min = { self.true_col(first) };
111                let max = { self.true_col(second) };
112                self.push_range(min, max);
113            }
114        };
115    }
116
117    fn true_col(&mut self, col: &str) -> usize {
118        if col.starts_with('-') {
119            if self.total.is_none() {
120                let mut first_line = String::new();
121                let f = File::open(self.path.unwrap()).expect("unable to open file.");
122                BufReader::new(f)
123                    .read_line(&mut first_line)
124                    .expect("read error.");
125                self.total = Some(CsvRowSplitter::new(&first_line, self.sep, self.quote).count());
126            }
127            let i = (self.total.unwrap() as i32) + parse_i32(col);
128            if i < 0 {
129                werr_exit!("Column {} does not exist.", col);
130            }
131            i as usize
132        } else {
133            parse_col_usize(col)
134        }
135    }
136
137    fn push(&mut self, col: usize) {
138        if !self.cols.contains(&col) {
139            self.cols.push(col);
140        }
141    }
142
143    fn push_range(&mut self, min: usize, max: usize) {
144        if min > max {
145            werr_exit!("Min column is bigger than max column.");
146        }
147        for i in min..=max {
148            self.push(i)
149        }
150    }
151
152    fn update_status(&mut self) {
153        self.max = *self.cols.iter().max().unwrap();
154        self.select_all = self.cols.is_empty();
155    }
156
157    pub fn iter(&self) -> impl Iterator<Item = &usize> {
158        self.cols.iter()
159    }
160
161    pub fn artificial_cols_with_appended_n(&self) -> Vec<String> {
162        self.iter()
163            .map(|&i| format!("col{i}"))
164            .chain(std::iter::once("n".to_owned()))
165            .collect::<Vec<_>>()
166    }
167
168    pub fn artificial_n_cols(&self, n: usize) -> Vec<String> {
169        (0..n).map(|i| format!("col{i}")).collect::<Vec<_>>()
170    }
171
172    pub fn select_owned_string(&self, all: &[&str]) -> String {
173        self.iter().map(|&i| all[i]).collect::<Vec<_>>().join(",")
174    }
175
176    pub fn select_owned_string_from_excel_datatype(&self, all: &[Data]) -> String {
177        let mut o = String::new();
178        let mut col = self.cols.iter().peekable();
179        while let Some(&i) = col.next() {
180            write_datatype_to_string(&mut o, &all[i]);
181            if col.peek().is_some() {
182                o.push(',');
183            }
184        }
185
186        o
187    }
188
189    pub fn select_owned_vec_from_excel_datatype(&self, all: &[Data]) -> Vec<String> {
190        self.cols
191            .iter()
192            .map(|&i| all[i].to_string())
193            .chain(std::iter::once("n".to_owned()))
194            .collect::<Vec<_>>()
195    }
196
197    pub fn select_owned_vector_and_append_n(&self, all: &[&str]) -> Vec<String> {
198        self.cols
199            .iter()
200            .map(|&i| all[i].to_owned())
201            .chain(std::iter::once("n".to_owned()))
202            .collect::<Vec<_>>()
203    }
204
205    pub fn col_vec_or_length_of(&self, n: usize) -> Vec<usize> {
206        match self.select_all {
207            true => (0..n).collect::<Vec<_>>(),
208            false => self.cols.clone(),
209        }
210    }
211}