meza/
lib.rs

1use std::fs;
2use std::fs::File;
3use std::fs::OpenOptions;
4use std::io::BufReader;
5use std::io::BufRead;
6use std::error::Error;
7use std::path::Path;
8use std::io::Write;
9
10#[derive(Clone, Debug)]
11pub struct Cell {
12    pub float32: Option<f32>,
13    pub string: String
14}
15
16#[derive(Clone, Debug)]
17pub struct Meza<'a> {
18    pub averages: Vec<Option<f32>>,
19    pub columns: Vec<String>,
20    pub rows: Vec<&'a Vec<Cell>>,
21    pub column_types: Vec<ColumnType>,
22    pub data: Vec<Vec<Cell>>
23}
24
25#[derive(Clone, Debug)]
26pub enum ColumnType {
27    String,
28    Float32
29}
30
31impl<'a> Meza<'a> {
32
33    pub fn from_csv(csv_path: &str) -> Result<Meza<'a>, Box<dyn Error>> {
34        
35        let csv_open = File::open(csv_path)?;
36        
37        let csv_buffer = BufReader::new(csv_open);
38
39        let mut csv_lines = csv_buffer.lines();
40        
41        let column_line = csv_lines.next().ok_or("Empty CSV file")??;
42        
43        let columns: Vec<String> = column_line.split(',').map(|y| y.trim().to_string()).collect();
44        
45        let column_types: Vec<ColumnType> = vec![ColumnType::String; columns.len()];
46
47        let averages = vec![None; columns.len()];
48        
49        let mut data: Vec<Vec<Cell>> = Vec::new();
50
51        for line_result in csv_lines {
52            let line = line_result?;
53            let row: Vec<String> = line.split(',').map(|y| y.trim().to_string()).collect();
54            let cells = row
55                .iter()
56                .map(|x| Cell {
57                    float32: None,
58                    string: x.to_string(),
59                })
60                .collect();
61
62            data.push(cells);
63        }
64
65        Ok(Meza {
66            columns,
67            rows: Vec::new(),
68            column_types,
69            data,
70            averages,
71        })
72
73    }
74
75    pub fn to_csv(&self, csv_path: &str) -> Result<(), Box<dyn Error>> {
76        
77        if Path::new(csv_path).is_file() {
78            fs::remove_file(csv_path)?;
79        }
80
81        let mut csv_file = OpenOptions::new().write(true).create(true).open(csv_path)?;
82
83        let column_str = self.columns.join(",");
84        writeln!(csv_file, "{}", column_str)?;
85
86        if !self.data.is_empty() {
87            for row in &self.data {
88                let row_str = row.iter().map(|cell| &cell.string[..]).collect::<Vec<&str>>().join(",");
89                writeln!(csv_file, "{}", row_str)?;
90            }
91        } else {
92            for row in &self.rows {
93                let row_str = row.iter().map(|cell| &cell.string[..]).collect::<Vec<&str>>().join(",");
94                writeln!(csv_file, "{}", row_str)?;
95            }
96        }
97
98        Ok(())
99    }
100
101    pub fn update_csv(&self, csv_path: &str) -> Result<(), Box<dyn Error>> {
102        
103        if self.data.is_empty() && self.rows.is_empty() {
104            return Ok(());
105        }
106
107        let mut csv_file = OpenOptions::new().write(true).create(true).truncate(true).open(csv_path)?;
108
109        if !self.data.is_empty() {
110            for row in self.data.iter() {
111                let row_str = row.iter().map(|cell| &cell.string[..]).collect::<Vec<&str>>().join(",");
112                writeln!(csv_file, "{}", row_str)?;
113            }
114        } else {
115            for row in self.rows.iter() {
116                let row_str = row.iter().map(|cell| &cell.string[..]).collect::<Vec<&str>>().join(",");
117                writeln!(csv_file, "{}", row_str)?;
118            }
119        }
120
121        Ok(())
122
123    }
124
125    pub fn sort(&mut self, column: &str, order: bool) -> Result<(), Box<dyn Error>> {
126        
127        let i = self
128            .columns
129            .iter()
130            .position(|x| x == column)
131            .ok_or_else(|| Box::<dyn Error>::from("No column!"))?;
132
133        if self.rows.is_empty() {
134            match &self.column_types[i] {
135                ColumnType::String => self.data.sort_by(|a, b| a[i].string.cmp(&b[i].string)),
136                ColumnType::Float32 => self.data.sort_by(|a, b| a[i].float32.partial_cmp(&b[i].float32).unwrap()),
137            }
138        } else {
139            match &self.column_types[i] {
140                ColumnType::String => self.rows.sort_by(|a, b| a[i].string.cmp(&b[i].string)),
141                ColumnType::Float32 => self.rows.sort_by(|a, b| a[i].float32.partial_cmp(&b[i].float32).unwrap()),
142            }
143        }
144
145        if !order {
146            if self.rows.is_empty() {
147                self.data.reverse();
148            } else {
149                self.rows.reverse();
150            }
151        }
152
153        Ok(())
154    }
155
156    pub fn filter<F: Fn(&Vec<Cell>) -> bool>(&self, criteria: F) -> Meza {
157        
158        let mut meza = Meza {
159            columns: self.columns.clone(),
160            rows: vec![],
161            column_types: self.column_types.clone(),
162            data: vec![],
163            averages: vec![None; self.columns.len()],
164        };
165
166        if self.rows.is_empty() {
167            meza.data = self.data.iter().cloned().filter(|row| criteria(row)).collect();
168        } else {
169            meza.rows = self.rows.iter().cloned().filter(|row| criteria(row)).collect();
170        }
171
172        meza
173
174    }
175
176    pub fn is_empty(&self) -> bool {
177        self.rows.is_empty() && self.data.is_empty()
178    }
179
180    pub fn new(columns: Vec<&str>) -> Meza {
181        Meza {
182            columns: columns.iter().map(|x| x.to_string()).collect(),
183            rows: vec![],
184            column_types: vec![ColumnType::String; columns.len()],
185            data: vec![],
186            averages: vec![None; columns.len()],
187        }
188    }
189
190    pub fn column(&self, name: &str) -> Result<Vec<&Cell>, Box<dyn Error>> {
191        
192        let index = self
193            .columns
194            .iter()
195            .position(|x| x == name)
196            .ok_or_else(|| Box::<dyn Error>::from("No column!"))?;
197
198        let column = if self.rows.is_empty() {
199            self.data.iter().map(|row| &row[index]).collect()
200        } else {
201            self.rows.iter().map(|row| &row[index]).collect()
202        };
203
204        Ok(column)
205
206    }
207
208    pub fn dedup(&mut self) {
209
210        if self.rows.is_empty() {
211            self.data.sort();
212            self.data.dedup();
213        } else {
214            self.rows.sort();
215            self.rows.dedup();
216        }
217
218    }
219    
220    pub fn slice(&self, start: usize, end: usize) -> Meza {
221        let mut meza = Meza {
222            columns: self.columns.clone(),
223            data: vec![],
224            rows: vec![],
225            column_types: self.column_types.clone(),
226            averages: vec![None; self.columns.len()],
227        };
228
229        if self.rows.is_empty() {
230            meza.data = if end <= self.data.len() {
231                self.data[start..end].to_vec()
232            } else {
233                self.data.clone()
234            };
235        } else {
236            meza.rows = if end <= self.rows.len() {
237                self.rows[start..end].to_vec()
238            } else {
239                self.rows.clone()
240            };
241        }
242
243        meza
244    }
245
246    pub fn average(&mut self, column: &str) -> Result<f32, Box<dyn Error>> {
247        
248        let i = match self.columns.iter().position(|x| x == column) {
249            Some(i) => i,
250            None => Err("Column not found!")?,
251        };
252        
253        let mut sum = 0.0;
254
255        let mut count = 0;
256
257        if self.data.is_empty() {
258            for row in &self.rows {
259                if let Some(cell) = row.get(i) {
260                    match cell.float32 {
261                        Some(value) => {
262                            sum += f32::from(value);
263                            count += 1;
264                        }
265                        None => continue,
266                    }
267                }
268            }
269        } else {
270            for row in &self.data {
271                if let Some(cell) = row.get(i) {
272                    match cell.float32 {
273                        Some(value) => {
274                            sum += f32::from(value);
275                            count += 1;
276                        }
277                        None => continue,
278                    }
279                }
280            }
281        }
282        
283        let avg = if count > 0 {
284            sum / count as f32
285        } else {
286            0.0
287        };
288
289        self.averages[i] = Some(avg);
290
291        Ok(avg)
292
293    }
294    
295    pub fn variance(&mut self, column: &str) -> Result<f32, Box<dyn Error>> {
296        
297        let i = self
298            .columns
299            .iter()
300            .position(|x| x == column)
301            .expect("Column not found!");
302
303        let average = match self.averages[i] {
304            Some(avg) => avg,
305            None => self.average(column)?
306        };
307
308        let values: Vec<f32> = if self.rows.is_empty() {
309            self.data
310                .iter()
311                .filter_map(|row| row[i].float32)
312                .collect()
313        } else {
314            self.rows
315                .iter()
316                .filter_map(|row| row[i].float32)
317                .collect()
318        };
319
320        let squared_differences: Vec<f32> = values
321            .iter()
322            .map(|value| (value - average).powi(2))
323            .collect();
324
325        let sum_squared_differences: f32 = squared_differences.iter().sum();
326
327        let variance = sum_squared_differences / (values.len() as f32 - 1.0);
328
329        Ok(variance)
330
331    }
332
333}
334
335use std::cmp::Ordering;
336
337impl Ord for Cell {
338    fn cmp(&self, other: &Self) -> Ordering {
339        self.string.cmp(&other.string)
340    }
341}
342
343impl PartialOrd for Cell {
344    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
345        Some(self.cmp(other))
346    }
347}
348impl Eq for Cell {}
349
350impl PartialEq for Cell {
351    fn eq(&self, other: &Self) -> bool {
352        self.string == other.string
353    }
354}