1use std::fs;
2use std::fs::File;
3use std::fs::OpenOptions;
4use std::io::BufReader;
5use std::io::BufRead;
6use std::error::Error;
7use std::path::Path;
8use std::io::Write;
9
10#[derive(Clone, Debug)]
11pub struct Cell {
12 pub float32: Option<f32>,
13 pub string: String
14}
15
16#[derive(Clone, Debug)]
17pub struct Meza<'a> {
18 pub averages: Vec<Option<f32>>,
19 pub columns: Vec<String>,
20 pub rows: Vec<&'a Vec<Cell>>,
21 pub column_types: Vec<ColumnType>,
22 pub data: Vec<Vec<Cell>>
23}
24
25#[derive(Clone, Debug)]
26pub enum ColumnType {
27 String,
28 Float32
29}
30
31impl<'a> Meza<'a> {
32
33 pub fn from_csv(csv_path: &str) -> Result<Meza<'a>, Box<dyn Error>> {
34
35 let csv_open = File::open(csv_path)?;
36
37 let csv_buffer = BufReader::new(csv_open);
38
39 let mut csv_lines = csv_buffer.lines();
40
41 let column_line = csv_lines.next().ok_or("Empty CSV file")??;
42
43 let columns: Vec<String> = column_line.split(',').map(|y| y.trim().to_string()).collect();
44
45 let column_types: Vec<ColumnType> = vec![ColumnType::String; columns.len()];
46
47 let averages = vec![None; columns.len()];
48
49 let mut data: Vec<Vec<Cell>> = Vec::new();
50
51 for line_result in csv_lines {
52 let line = line_result?;
53 let row: Vec<String> = line.split(',').map(|y| y.trim().to_string()).collect();
54 let cells = row
55 .iter()
56 .map(|x| Cell {
57 float32: None,
58 string: x.to_string(),
59 })
60 .collect();
61
62 data.push(cells);
63 }
64
65 Ok(Meza {
66 columns,
67 rows: Vec::new(),
68 column_types,
69 data,
70 averages,
71 })
72
73 }
74
75 pub fn to_csv(&self, csv_path: &str) -> Result<(), Box<dyn Error>> {
76
77 if Path::new(csv_path).is_file() {
78 fs::remove_file(csv_path)?;
79 }
80
81 let mut csv_file = OpenOptions::new().write(true).create(true).open(csv_path)?;
82
83 let column_str = self.columns.join(",");
84 writeln!(csv_file, "{}", column_str)?;
85
86 if !self.data.is_empty() {
87 for row in &self.data {
88 let row_str = row.iter().map(|cell| &cell.string[..]).collect::<Vec<&str>>().join(",");
89 writeln!(csv_file, "{}", row_str)?;
90 }
91 } else {
92 for row in &self.rows {
93 let row_str = row.iter().map(|cell| &cell.string[..]).collect::<Vec<&str>>().join(",");
94 writeln!(csv_file, "{}", row_str)?;
95 }
96 }
97
98 Ok(())
99 }
100
101 pub fn update_csv(&self, csv_path: &str) -> Result<(), Box<dyn Error>> {
102
103 if self.data.is_empty() && self.rows.is_empty() {
104 return Ok(());
105 }
106
107 let mut csv_file = OpenOptions::new().write(true).create(true).truncate(true).open(csv_path)?;
108
109 if !self.data.is_empty() {
110 for row in self.data.iter() {
111 let row_str = row.iter().map(|cell| &cell.string[..]).collect::<Vec<&str>>().join(",");
112 writeln!(csv_file, "{}", row_str)?;
113 }
114 } else {
115 for row in self.rows.iter() {
116 let row_str = row.iter().map(|cell| &cell.string[..]).collect::<Vec<&str>>().join(",");
117 writeln!(csv_file, "{}", row_str)?;
118 }
119 }
120
121 Ok(())
122
123 }
124
125 pub fn sort(&mut self, column: &str, order: bool) -> Result<(), Box<dyn Error>> {
126
127 let i = self
128 .columns
129 .iter()
130 .position(|x| x == column)
131 .ok_or_else(|| Box::<dyn Error>::from("No column!"))?;
132
133 if self.rows.is_empty() {
134 match &self.column_types[i] {
135 ColumnType::String => self.data.sort_by(|a, b| a[i].string.cmp(&b[i].string)),
136 ColumnType::Float32 => self.data.sort_by(|a, b| a[i].float32.partial_cmp(&b[i].float32).unwrap()),
137 }
138 } else {
139 match &self.column_types[i] {
140 ColumnType::String => self.rows.sort_by(|a, b| a[i].string.cmp(&b[i].string)),
141 ColumnType::Float32 => self.rows.sort_by(|a, b| a[i].float32.partial_cmp(&b[i].float32).unwrap()),
142 }
143 }
144
145 if !order {
146 if self.rows.is_empty() {
147 self.data.reverse();
148 } else {
149 self.rows.reverse();
150 }
151 }
152
153 Ok(())
154 }
155
156 pub fn filter<F: Fn(&Vec<Cell>) -> bool>(&self, criteria: F) -> Meza {
157
158 let mut meza = Meza {
159 columns: self.columns.clone(),
160 rows: vec![],
161 column_types: self.column_types.clone(),
162 data: vec![],
163 averages: vec![None; self.columns.len()],
164 };
165
166 if self.rows.is_empty() {
167 meza.data = self.data.iter().cloned().filter(|row| criteria(row)).collect();
168 } else {
169 meza.rows = self.rows.iter().cloned().filter(|row| criteria(row)).collect();
170 }
171
172 meza
173
174 }
175
176 pub fn is_empty(&self) -> bool {
177 self.rows.is_empty() && self.data.is_empty()
178 }
179
180 pub fn new(columns: Vec<&str>) -> Meza {
181 Meza {
182 columns: columns.iter().map(|x| x.to_string()).collect(),
183 rows: vec![],
184 column_types: vec![ColumnType::String; columns.len()],
185 data: vec![],
186 averages: vec![None; columns.len()],
187 }
188 }
189
190 pub fn column(&self, name: &str) -> Result<Vec<&Cell>, Box<dyn Error>> {
191
192 let index = self
193 .columns
194 .iter()
195 .position(|x| x == name)
196 .ok_or_else(|| Box::<dyn Error>::from("No column!"))?;
197
198 let column = if self.rows.is_empty() {
199 self.data.iter().map(|row| &row[index]).collect()
200 } else {
201 self.rows.iter().map(|row| &row[index]).collect()
202 };
203
204 Ok(column)
205
206 }
207
208 pub fn dedup(&mut self) {
209
210 if self.rows.is_empty() {
211 self.data.sort();
212 self.data.dedup();
213 } else {
214 self.rows.sort();
215 self.rows.dedup();
216 }
217
218 }
219
220 pub fn slice(&self, start: usize, end: usize) -> Meza {
221 let mut meza = Meza {
222 columns: self.columns.clone(),
223 data: vec![],
224 rows: vec![],
225 column_types: self.column_types.clone(),
226 averages: vec![None; self.columns.len()],
227 };
228
229 if self.rows.is_empty() {
230 meza.data = if end <= self.data.len() {
231 self.data[start..end].to_vec()
232 } else {
233 self.data.clone()
234 };
235 } else {
236 meza.rows = if end <= self.rows.len() {
237 self.rows[start..end].to_vec()
238 } else {
239 self.rows.clone()
240 };
241 }
242
243 meza
244 }
245
246 pub fn average(&mut self, column: &str) -> Result<f32, Box<dyn Error>> {
247
248 let i = match self.columns.iter().position(|x| x == column) {
249 Some(i) => i,
250 None => Err("Column not found!")?,
251 };
252
253 let mut sum = 0.0;
254
255 let mut count = 0;
256
257 if self.data.is_empty() {
258 for row in &self.rows {
259 if let Some(cell) = row.get(i) {
260 match cell.float32 {
261 Some(value) => {
262 sum += f32::from(value);
263 count += 1;
264 }
265 None => continue,
266 }
267 }
268 }
269 } else {
270 for row in &self.data {
271 if let Some(cell) = row.get(i) {
272 match cell.float32 {
273 Some(value) => {
274 sum += f32::from(value);
275 count += 1;
276 }
277 None => continue,
278 }
279 }
280 }
281 }
282
283 let avg = if count > 0 {
284 sum / count as f32
285 } else {
286 0.0
287 };
288
289 self.averages[i] = Some(avg);
290
291 Ok(avg)
292
293 }
294
295 pub fn variance(&mut self, column: &str) -> Result<f32, Box<dyn Error>> {
296
297 let i = self
298 .columns
299 .iter()
300 .position(|x| x == column)
301 .expect("Column not found!");
302
303 let average = match self.averages[i] {
304 Some(avg) => avg,
305 None => self.average(column)?
306 };
307
308 let values: Vec<f32> = if self.rows.is_empty() {
309 self.data
310 .iter()
311 .filter_map(|row| row[i].float32)
312 .collect()
313 } else {
314 self.rows
315 .iter()
316 .filter_map(|row| row[i].float32)
317 .collect()
318 };
319
320 let squared_differences: Vec<f32> = values
321 .iter()
322 .map(|value| (value - average).powi(2))
323 .collect();
324
325 let sum_squared_differences: f32 = squared_differences.iter().sum();
326
327 let variance = sum_squared_differences / (values.len() as f32 - 1.0);
328
329 Ok(variance)
330
331 }
332
333}
334
335use std::cmp::Ordering;
336
337impl Ord for Cell {
338 fn cmp(&self, other: &Self) -> Ordering {
339 self.string.cmp(&other.string)
340 }
341}
342
343impl PartialOrd for Cell {
344 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
345 Some(self.cmp(other))
346 }
347}
348impl Eq for Cell {}
349
350impl PartialEq for Cell {
351 fn eq(&self, other: &Self) -> bool {
352 self.string == other.string
353 }
354}