datatroll/lib.rs
1//! datatroll is a robust and user-friendly Rust library for efficiently loading, manipulating,
2//! and exporting data stored in CSV files. Say goodbye to tedious hand-coding data parsing and
3//! welcome a streamlined workflow for wrangling your data with ease.
4//!
5//! ## Features:
6//! - **Versatile Data Loading:**
7//! - Read data from CSV files with configurable separators and headers.
8//! - Specify data types for each column, ensuring type safety and efficient processing.
9//! - Handle missing values with graceful error handling.
10//! - **Intuitive Data Manipulation:**
11//! - Insert new rows with custom values into your data.
12//! - Drop unwanted rows or columns to focus on relevant data.
13//! - Leverage powerful aggregations to calculate:
14//! - Mean, max, min, and median of numeric columns.
15//! - Mode (most frequent value) of categorical columns.
16//! - Variance of numeric columns.
17//! - Apply custom transformations to specific columns using lambda functions.
18//! - Supports Pagination
19//! - **Seamless Data Export:**
20//! - Write manipulated data back to a new CSV file, retaining original format or specifying your own.
21//! - Customize output with options like separator selection and header inclusion.
22//!
23//! # Example:
24//! ```rust
25//! use datatroll::{Cell, Sheet};
26//!
27//! fn main() {
28//! // Read data from a CSV file
29//! let data = "id ,title , director, release date, review
30//!1, old, quintin, 2011, 3.5
31//!2, her, quintin, 2013, 4.2
32//!3, easy, scorces, 2005, 1.0
33//!4, hey, nolan, 1997, 4.7
34//!5, who, martin, 2017, 5.0";
35//! let mut sheet = Sheet::load_data_from_str(data);
36//!
37//! // drop all the rows in which the review is less than 4.0
38//! sheet.drop_rows("review", |c| {
39//! if let Cell::Float(r) = c {
40//! return *r < 4.0;
41//! }
42//! false
43//! });
44//!
45//! // calculate the variance of the review column
46//! let variance = sheet.variance("review").unwrap();
47//! println!("variance for review is: {variance}");
48//!
49//! // Write the transformed data to a new CSV file
50//! if let Err(err) = sheet.export("output.csv") {
51//! eprintln!("Error exporting data: {}", err);
52//! } else {
53//! println!("Data exported successfully to output.csv");
54//! }
55//! }
56//! ```
57
58use std::{
59 error::Error,
60 fs::{File, OpenOptions},
61 io::{BufReader, BufWriter, Read, Write},
62};
63
64/// Represents different types of data that can be stored in a cell.
65#[derive(Debug, Clone, PartialEq, PartialOrd)]
66pub enum Cell {
67 Null,
68 String(String),
69 Bool(bool),
70 Int(i64),
71 Float(f64),
72}
73
74/// Represents a 2D vector of cells, forming a sheet of data.
75#[derive(Debug, Default)]
76pub struct Sheet {
77 /// 2D vector of cells
78 pub data: Vec<Vec<Cell>>,
79}
80
81impl Sheet {
82 /// new_sheet initialize a Sheet
83 fn new_sheet() -> Self {
84 Self {
85 data: Vec::<Vec<Cell>>::new(),
86 }
87 }
88
89 /// Loads data from a CSV file into the Sheet's data structure.
90 ///
91 /// This function reads the content of a CSV file specified by `file_path` and populates
92 /// the Sheet's data structure accordingly. The file must have a ".csv" extension, and
93 /// its content should be in CSV (Comma-Separated Values) format.
94 ///
95 /// # Arguments
96 ///
97 /// * `file_path` - The path to the CSV file to load.
98 ///
99 /// # Errors
100 ///
101 /// Returns a `Result` indicating success or an error if the file cannot be opened,
102 /// read, or if the file format is unsupported.
103 ///
104 /// # Examples
105 ///
106 /// ```rust
107 /// use datatroll::Sheet;
108 ///
109 /// if let Err(err) = Sheet::load_data("input.csv") {
110 /// eprintln!("Error loading data: {}", err);
111 /// } else {
112 /// println!("Data loaded successfully from input.csv");
113 /// }
114 /// ```
115 pub fn load_data(file_path: &str) -> Result<Self, Box<dyn Error>> {
116 let mut sheet = Self::new_sheet();
117 // check for ext
118 if file_path.split('.').last() != Some("csv") {
119 return Err(Box::from(
120 "the provided file path is invalid, or of unsupported format",
121 ));
122 }
123
124 let f = File::open(file_path)?;
125 let mut reader = BufReader::new(f);
126 let mut data = String::new();
127
128 reader.read_to_string(&mut data)?;
129
130 data.lines().for_each(|line| {
131 let row: Vec<Cell> = line.split(',').map(|s| s.trim()).map(parse_token).collect();
132 sheet.data.push(row);
133 });
134
135 // if some column values are absent from a row, then fill it with a default Cell::Null
136 let col_len = sheet.data[0].len();
137 for i in 1..sheet.data.len() {
138 let row_len = sheet.data[i].len();
139 if row_len < col_len {
140 for _ in 0..col_len - row_len {
141 sheet.data[i].push(Cell::Null);
142 }
143 }
144 }
145
146 Ok(sheet)
147 }
148
149 pub fn load_data_from_str(data: &str) -> Self {
150 let mut sheet = Self::new_sheet();
151
152 data.lines().for_each(|line| {
153 let row: Vec<Cell> = line.split(',').map(|s| s.trim()).map(parse_token).collect();
154 sheet.data.push(row);
155 });
156
157 // if some column values are absent from a row, then fill it with a default Cell::Null
158 let col_len = sheet.data[0].len();
159 for i in 1..sheet.data.len() {
160 let row_len = sheet.data[i].len();
161 if row_len < col_len {
162 for _ in 0..col_len - row_len {
163 sheet.data[i].push(Cell::Null);
164 }
165 }
166 }
167
168 sheet
169 }
170
171 /// Exports the content of a Sheet to a CSV file.
172 ///
173 /// The function writes the content of the Sheet into a CSV file specified by `file_path`.
174 /// If the file already exists, it truncates the file and overwrites its content.
175 ///
176 /// # Arguments
177 ///
178 /// * `file_path` - The path to the CSV file.
179 ///
180 /// # Examples
181 ///
182 /// ```rust
183 /// let cell_string = Cell::String(String::from("Hello, Rust!"));
184 /// let cell_int = Cell::Int(42);
185 ///
186 /// let row1 = vec![cell_string, Cell::Bool(true), cell_int];
187 /// let row2 = vec![Cell::Null, Cell::Float(3.14), Cell::String(String::from("World"))];
188 ///
189 /// let sheet = Sheet { data: vec![row1, row2] };
190 ///
191 /// if let Err(err) = sheet.export("output.csv") {
192 /// eprintln!("Error exporting data: {}", err);
193 /// } else {
194 /// println!("Data exported successfully to output.csv");
195 /// }
196 /// ```
197 ///
198 /// # Errors
199 ///
200 /// Returns an `Result` indicating success or failure.
201 ///
202 pub fn export(&self, file_path: &str) -> Result<(), Box<dyn Error>> {
203 // check for ext
204 if file_path.split('.').last() != Some("csv") {
205 return Err(Box::from(
206 "the provided file path is invalid, or of unsupported format",
207 ));
208 }
209
210 let file = OpenOptions::new()
211 .write(true)
212 .truncate(true)
213 .create(true)
214 .open(file_path)?;
215
216 let mut buf_writer = BufWriter::new(file);
217
218 for row in &self.data {
219 for cell in row {
220 match cell {
221 Cell::Null => write!(buf_writer, ",")?,
222 Cell::String(s) => write!(buf_writer, "{},", s)?,
223 Cell::Bool(b) => write!(buf_writer, "{},", b)?,
224 Cell::Int(i) => write!(buf_writer, "{},", i)?,
225 Cell::Float(f) => write!(buf_writer, "{},", f)?,
226 }
227 }
228 writeln!(buf_writer)?; // Move to the next line after each row
229 }
230
231 buf_writer.flush()?; // Ensure any remaining data is written to the file
232 Ok(())
233 }
234
235 /// insert_row appends a row to the data sheet at the last position
236 ///
237 /// The function takes a comma seperated input string, trim the whitespace, parse it into a
238 /// vector oc Cell and then push it to the sheet.
239 ///
240 /// # Arguments
241 ///
242 /// * `input` - input string to be inserted.
243 ///
244 /// # Errors
245 ///
246 /// Returns a `Result` indicating success or an error if the input is of unvalid format
247 ///
248 /// # Examples
249 ///
250 /// ```rust
251 /// let row1 = vec![Cell::String("Hello, Rust!".to_string()), Cell::Bool(true), Cell::Int(42)];
252 /// let sheet = Sheet { data: vec![row1] };
253 ///
254 /// sheet.insert_row(",3.14,World")?;
255 ///
256 /// assert_eq!(sheet[0], row1);
257 /// assert_eq!(sheet[1], vec![Cell::Null, Cell::Float(3.14), Cell::String("World".to_string()]);
258 /// ```
259 pub fn insert_row(&mut self, input: &str) -> Result<(), Box<dyn Error>> {
260 let row: Vec<Cell> = input
261 .split(',')
262 .map(|s| s.trim())
263 .map(parse_token)
264 .collect();
265 if row.len() != self.data[0].len() {
266 return Err(Box::from("invalid input"));
267 }
268
269 self.data.push(row);
270 Ok(())
271 }
272
273 /// fill_col replace the value of a column in every row
274 ///
275 /// The function takes a column name and the value to be filled, and iterate through every row
276 /// and effectively replace its old cell values with the new value
277 ///
278 /// # Arguments
279 ///
280 /// * `column` - the column to be mutated
281 /// * `value` - the value which every row will be filled with
282 ///
283 /// # Errors
284 ///
285 /// Returns a `Result` indicating success or an error
286 ///
287 /// # Examples
288 ///
289 /// ```rust
290 /// let row1 = vec![Cell::String("greeting".to_string()), Cell::String("is_good".to_string()), Cell::String("count".to_string())];
291 /// let row2 = vec![Cell::String("Hello, Rust!".to_string()), Cell::Bool(false), Cell::Int(42)];
292 /// let row3 = vec![Cell::String("Hello, World!".to_string()), Cell::Bool(true), Cell::Int(145)];
293 /// let sheet = Sheet { data: vec![row1, row2, row3] };
294 ///
295 /// sheet.fill_col("greeting", Cell::Null)?;
296 ///
297 /// assert_eq!(sheet[1][0], Cell::Null);
298 /// assert_eq!(sheet[1][0], Cell::Null);
299 /// ```
300 pub fn fill_col(&mut self, column: &str, value: Cell) -> Result<(), Box<dyn Error>> {
301 let col_index = self.get_col_index(column).expect("column doesn't exist");
302 for i in 1..self.data.len() {
303 let cell = self.data[i]
304 .get_mut(col_index)
305 .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", col_index, i));
306
307 *cell = value.clone();
308 }
309
310 Ok(())
311 }
312
313 /// paginate takes part of a sheet with a fixed size and return it
314 ///
315 /// The function takes a page number and a page size, and slice the sheet and returns it as a page
316 /// of fixed size
317 ///
318 /// # Arguments
319 ///
320 /// * `page` - the number of the page
321 /// * `size` - number of rows for every page
322 ///
323 /// # Errors
324 ///
325 /// Returns a `Result` indicating success or an error
326 ///
327 /// # Examples
328 ///
329 /// ```rust
330 /// let row1 = vec![Cell::String("greeting".to_string()), Cell::String("is_good".to_string()), Cell::String("count".to_string())];
331 /// let row2 = vec![Cell::String("Hello, Rust!".to_string()), Cell::Bool(false), Cell::Int(42)];
332 /// let row3 = vec![Cell::String("Hello, World!".to_string()), Cell::Bool(true), Cell::Int(145)];
333 /// let row4 = vec![Cell::String("Hello, Dzair!".to_string()), Cell::Bool(true), Cell::Int(145)];
334 /// let row5 = vec![Cell::String("Hello, Africa!".to_string()), Cell::Bool(true), Cell::Int(145)];
335 /// let row6 = vec![Cell::String("Hello, Algeria!".to_string()), Cell::Bool(true), Cell::Int(145)];
336 /// let row7 = vec![Cell::String("Hello, Friday!".to_string()), Cell::Bool(true), Cell::Int(145)];
337 /// let sheet = Sheet { data: vec![row1, row2, row3, row4, row5, row6, row7] };
338 ///
339 /// let page = sheet.paginate(1, 2)?;
340 ///
341 /// assert_eq!(page[0][0], Cell::String("Hello, Rust!".to_string()));
342 /// assert_eq!(page[1][0], Cell::String("Hello, World!".to_string()));
343 /// ```
344 pub fn paginate(&self, page: usize, size: usize) -> Result<Vec<Vec<Cell>>, Box<dyn Error>> {
345 if page < 1 || size > 50 {
346 return Err(Box::from(
347 "page should more than or equal 1, size should 50 per page at max",
348 ));
349 }
350 if self.data.len() < size {
351 return Err(Box::from("page unavailabe"));
352 }
353
354 let mut res: Vec<Vec<Cell>> = Default::default();
355 let offset = ((page - 1) * size) + 1;
356
357 for i in offset..(offset + size) {
358 let row = self.data.get(i).unwrap_or_else(|| {
359 panic!(
360 "offset '{}' and amount '{}' are out of bounds",
361 offset, size
362 )
363 });
364 res.push(row.clone())
365 }
366
367 Ok(res)
368 }
369
370 /// Finds the first row in the table that matches a predicate applied to a specific column.
371 ///
372 /// # Panics
373 ///
374 /// Panics if the specified column doesn't exist or is absent for a row.
375 ///
376 /// # Examples
377 ///
378 /// ```rust
379 /// let mut sheet = Sheet::new_sheet();
380 /// sheet.load_data("test_data.csv").unwrap();
381 /// let first_matching_rows = sheet.find_rows("Age", |cell| cell.as_int() >= 30);
382 /// ```
383 ///
384 /// # Generics
385 ///
386 /// The `predicate` argument is a generic function that allows for flexible filtering criteria.
387 /// It accepts a reference to a `Cell` and returns a boolean indicating whether the row matches.
388 ///
389 /// # Returns
390 ///
391 /// An `Option<&Vec<Cell>>`:
392 /// - `Some(&row)` if a matching row is found, where `row` is a reference to the first matching row.
393 /// - `None` if no matching row is found.
394 pub fn find_first_row<F>(&self, column: &str, predicate: F) -> Option<&Vec<Cell>>
395 where
396 F: FnOnce(&Cell) -> bool + Copy,
397 {
398 let col_index = self.get_col_index(column).expect("column doesn't exist");
399
400 for i in 1..self.data.len() {
401 let cell = self.data[i]
402 .get(col_index)
403 .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", col_index, i));
404 if predicate(cell) {
405 return Some(&self.data[i]);
406 }
407 }
408
409 None
410 }
411
412 /// Finds rows in the table that match a predicate applied to a specific column.
413 ///
414 /// # Panics
415 ///
416 /// Panics if the specified column doesn't exist or is absent for a row.
417 ///
418 /// # Examples
419 ///
420 /// ```rust
421 /// let mut sheet = Sheet::new_sheet();
422 /// sheet.load_data("test_data.csv").unwrap();
423 /// let matching_rows = sheet.filter("Age", |cell| cell.as_int() >= 30);
424 /// ```
425 ///
426 /// # Generics
427 ///
428 /// The `predicate` argument is a generic function that allows for flexible filtering criteria.
429 /// It accepts a reference to a `Cell` and returns a boolean indicating whether the row matches.
430 ///
431 /// # Returns
432 ///
433 /// A vector of vectors, where each inner vector represents a row that matches the predicate.
434 pub fn filter<F>(&self, column: &str, predicate: F) -> Vec<Vec<Cell>>
435 where
436 F: FnOnce(&Cell) -> bool + Copy,
437 {
438 let col_index = self.get_col_index(column).expect("column doesn't exist");
439 let mut res: Vec<Vec<Cell>> = Default::default();
440
441 for i in 1..self.data.len() {
442 let cell = self.data[i]
443 .get(col_index)
444 .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", col_index, i));
445 if predicate(cell) {
446 res.push(self.data[i].clone());
447 }
448 }
449
450 res
451 }
452
453 /// The map function applies a given transformation to each column value of rows.
454 ///
455 /// # Errors
456 ///
457 /// Returns a `Result` indicating success or an error
458 ///
459 /// # Examples
460 ///
461 /// ```rust
462 /// use datatroll::{Sheet, Cell};
463 ///
464 ///let data = "id ,title , director, release date, review
465 ///1, old, quintin, 2011, 3.5
466 ///2, her, quintin, 2013, 4.2
467 ///3, easy, scorces, 2005, 1.0
468 ///4, hey, nolan, 1997, 4.7
469 ///5, who, martin, 2017, 5.0";
470 ///
471 /// let mut sheet = Sheet::load_data_from_str(data);
472 ///
473 /// let result = sheet.map("title", |c| match c {
474 /// Cell::String(s) => Cell::String(s.to_uppercase()),
475 /// _ => return c,
476 /// });
477 ///
478 /// assert!(result.is_ok());
479 /// ```
480 pub fn map<F>(&mut self, column: &str, transform: F) -> Result<(), String>
481 where
482 F: Fn(Cell) -> Cell,
483 {
484 match self.get_col_index(column) {
485 Some(i) => {
486 self.data
487 .iter_mut()
488 .for_each(|row| row[i] = transform(row[i].clone()));
489 Ok(())
490 }
491 None => Err(format!("could not find column '{column}'")),
492 }
493 }
494
495 /// Removes rows from the table based on a predicate applied to a specific column.
496 ///
497 /// # Panics
498 ///
499 /// Panics if the specified column doesn't exist.
500 ///
501 /// # Examples
502 ///
503 /// ```rust
504 /// let mut sheet = Sheet::new_sheet();
505 /// sheet.load_data("test_data.csv").unwrap();
506 /// sheet.drop_rows("Age", |cell| cell.as_int() >= 30); // Removes rows where age is 30 or older
507 /// ```
508 ///
509 /// # Generics
510 ///
511 /// The `predicate` argument is a generic function that allows for flexible filtering criteria.
512 /// It accepts a reference to a `Cell` and returns a boolean indicating whether to keep the row.
513 pub fn drop_rows<F>(&mut self, column: &str, predicate: F)
514 where
515 F: FnOnce(&Cell) -> bool + Copy,
516 {
517 let col_index = self.get_col_index(column).expect("column doesn't exist");
518 self.data.retain(|row| !predicate(&row[col_index]));
519 }
520
521 /// Removes a specified column from the table and returns the number of rows affected.
522 ///
523 /// # Panics
524 ///
525 /// Panics if the specified column doesn't exist.
526 ///
527 /// # Returns
528 ///
529 /// The number of rows that were modified by removing the column.
530 ///
531 /// # Examples
532 ///
533 /// ```rust
534 /// let mut sheet = Sheet::new_sheet();
535 /// sheet.load_data("test_data.csv").unwrap();
536 /// let rows_affected = sheet.drop_col("id") // Removes the "id" column and returns 5
537 /// ```
538 pub fn drop_col(&mut self, column: &str) -> i32 {
539 let col_index = self.get_col_index(column).expect("column doesn't exist");
540 let mut rows_affected = 0;
541 for i in 0..self.data.len() {
542 self.data[i].remove(col_index);
543 rows_affected += 1;
544 }
545
546 rows_affected
547 }
548
549 /// Calculates the mean (average) of a specified column.
550 ///
551 /// The mean is the sum of all values in a data set divided by the number of values.
552 ///
553 /// # Formula
554 ///
555 /// X̄ = (ΣX) / N
556 ///
557 /// Where:
558 /// - X̄ is the mean
559 /// - ΣX is the sum of all values in the column
560 /// - N is the number of values in the column
561 ///
562 /// # Errors
563 ///
564 /// Returns an error if:
565 ///
566 /// - The specified column doesn't exist.
567 /// - The specified column contains non-numeric values (i.e., not `i64` or `f64`).
568 ///
569 /// # Examples
570 ///
571 /// ```rust
572 /// let mut sheet = Sheet::new_sheet();
573 /// sheet.load_data("test_data.csv").unwrap();
574 /// let re_mean = sheet.mean("release year")?; // Returns the mean of the "Age" column
575 /// ```
576 ///
577 /// # Returns
578 ///
579 /// The mean of the specified column as an `f64`, or an error if one occurs.
580 pub fn mean(&self, column: &str) -> Result<f64, Box<dyn Error>> {
581 let index = self.get_col_index(column).expect("column doesn't exist");
582 let mut sum = 0_f64;
583
584 for i in 1..self.data.len() {
585 let val = match self.data[i]
586 .get(index)
587 .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
588 {
589 Cell::Int(x) => *x as f64,
590 Cell::Float(f) => *f,
591 _ => return Err(Box::from("column value should be an i64 or a f64")),
592 };
593
594 sum += val
595 }
596
597 Ok(sum / ((self.data.len() - 1) as f64))
598 }
599
600 /// Calculates the variance of a specified column.
601 ///
602 /// Variance measures how far a set of numbers are spread out from their average value.
603 /// It is calculated as the average of the squared differences from the mean.
604 ///
605 /// # Formula
606 ///
607 /// Var(X) = E[(X - μ)²]
608 ///
609 /// Where:
610 /// - Var(X) is the variance
611 /// - E denotes the expected value (average)
612 /// - X is the random variable (the values in the column)
613 /// - μ is the mean of X
614 ///
615 /// # Errors
616 ///
617 /// Returns an error if:
618 ///
619 /// - The specified column doesn't exist.
620 /// - The specified column contains non-numeric values (i.e., not `i64` or `f64`).
621 ///
622 /// # Examples
623 ///
624 /// ```rust
625 /// let mut sheet = Sheet::new_sheet();
626 /// sheet.load_data("test_data.csv").unwrap();
627 /// let re_variance = sheet.variance("release year")?; // Returns the variance of the "release year" column
628 /// ```
629 ///
630 /// # Returns
631 ///
632 /// The variance of the specified column as an `f64`, or an error if one occurs.
633 pub fn variance(&self, column: &str) -> Result<f64, Box<dyn Error>> {
634 let mean = self.mean(column)?;
635
636 let index = self.get_col_index(column).expect("column doesn't exist");
637 let mut total_sum = 0_f64;
638 for i in 1..self.data.len() {
639 let val = match self.data[i]
640 .get(index)
641 .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
642 {
643 Cell::Int(x) => *x as f64,
644 Cell::Float(f) => *f,
645 _ => return Err(Box::from("column value should be an i64 or a f64")),
646 };
647
648 total_sum += (val - mean).powf(2.0)
649 }
650
651 Ok(total_sum / (self.data.len() - 1) as f64)
652 }
653
654 /// Calculates the median value of a specified column.
655 ///
656 /// The median is the value that separates the higher half of a data set from the lower half.
657 /// In this case, it's the value that falls in the middle of the column when the data is sorted.
658 ///
659 /// # Panics
660 ///
661 /// Panics if:
662 ///
663 /// - The specified column doesn't exist.
664 /// - The specified column is absent for the middle row.
665 ///
666 /// # Examples
667 ///
668 /// ```rust
669 /// let mut sheet = Sheet::new_sheet();
670 /// sheet.load_data("test_data.csv").unwrap();
671 /// let median_id = sheet.median("id")?; // Returns a &Int(3)
672 /// ```
673 /// # Returns
674 ///
675 /// A reference to the `Cell` containing the median value of the specified column.
676 pub fn median(&self, column: &str) -> &Cell {
677 let col_index = self.get_col_index(column).expect("column doesn't exist");
678 let row_index = ((self.data.len() - 1) + 1) / 2;
679
680 self.data[row_index]
681 .get(col_index)
682 .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", col_index, row_index))
683 }
684
685 /// mode get the most frequent items of a column
686 ///
687 /// The function gets a vector of the most frequent items in a column, alongside their number of
688 /// occurences.
689 ///
690 /// # Arguments
691 ///
692 /// * `columnn` - the name of the column
693 ///
694 /// # Examples
695 ///
696 /// ```rust
697 /// let mut sheet = Sheet::new_sheet();
698 /// sheet.load_data("test_data.csv").unwrap();
699 ///
700 /// let multimodal = sheet.mode("director");
701 /// println!("mode: {:?}", multimodal) // mode: [(String("quintin"), 2), (String("martin"), 2)]
702 ///```
703 pub fn mode(&self, column: &str) -> Vec<(Cell, i32)> {
704 let col_index = self.get_col_index(column).expect("column doesn't exist");
705 let fq = self.build_frequency_table(col_index);
706 let mut max = 0;
707 let mut multi_mode: Vec<(Cell, i32)> = Vec::new();
708
709 for item in fq.iter() {
710 if max <= item.1 {
711 max = item.1;
712 multi_mode.push(item.clone());
713 }
714 }
715
716 multi_mode
717 }
718
719 /// Builds a frequency table for a specified column, counting the occurrences of each unique value.
720 ///
721 /// # Panics
722 ///
723 /// Panics if the specified column doesn't exist or is absent for a row.
724 ///
725 /// # Returns
726 ///
727 /// A vector of tuples `(Cell, i32)`, where:
728 /// - `Cell` is the unique value from the column.
729 /// - `i32` is the frequency (count) of that value in the column.
730 fn build_frequency_table(&self, col_index: usize) -> Vec<(Cell, i32)> {
731 let mut fq: Vec<(Cell, i32)> = Vec::new();
732
733 for i in 1..self.data.len() {
734 let cell = self.data[i]
735 .get(col_index)
736 .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", col_index, i));
737 if fq.is_empty() {
738 fq.push((cell.clone(), 1));
739 continue;
740 }
741
742 let index = fq.iter().position(|item| item.0 == *cell);
743 if let Some(idx) = index {
744 fq[idx].1 += 1;
745 } else if index.is_none() {
746 fq.push((cell.clone(), 1));
747 }
748 }
749
750 fq
751 }
752
753 /// Finds the maximum value of a specified column, specifically for `i64` values.
754 ///
755 /// # Errors
756 ///
757 /// Returns an error if:
758 ///
759 /// - The specified column doesn't exist.
760 /// - The specified column contains non-integer values (i.e., not `i64`).
761 ///
762 /// # Returns
763 ///
764 /// The maximum `i64` value in the specified column, or an error if one occurs.
765 pub fn max_int64(&self, column: &str) -> Result<i64, Box<dyn Error>> {
766 let index = self.get_col_index(column).expect("column doesn't exist");
767 let mut max = 0_i64;
768
769 for i in 1..self.data.len() {
770 let row_val = match self.data[i]
771 .get(index)
772 .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
773 {
774 Cell::Int(x) => *x,
775 _ => return Err(Box::from("max_int64 should only works on int values")),
776 };
777
778 if max < row_val {
779 max = row_val;
780 }
781 }
782
783 Ok(max)
784 }
785
786 /// Finds the maximum value of a specified column, working with both `f64` and `i64` values.
787 ///
788 /// # Errors
789 ///
790 /// Returns an error if:
791 ///
792 /// - The specified column doesn't exist.
793 /// - The specified column contains non-numeric values (i.e., not `f64` or `i64`).
794 ///
795 /// # Returns
796 ///
797 /// The maximum value in the specified column, either an `f64` or an `i64` cast to `f64`, or an error if one occurs.
798 pub fn max_float64(&self, column: &str) -> Result<f64, Box<dyn Error>> {
799 let index = self.get_col_index(column).expect("column doesn't exist");
800 let mut max = 0_f64;
801
802 for i in 1..self.data.len() {
803 let row_val = match self.data[i]
804 .get(index)
805 .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
806 {
807 Cell::Float(f) => *f,
808 Cell::Int(i) => *i as f64,
809 _ => {
810 return Err(Box::from(
811 "max_float64 should only works on float and int values",
812 ))
813 }
814 };
815
816 if max < row_val {
817 max = row_val;
818 }
819 }
820
821 Ok(max)
822 }
823
824 /// Finds the minimum value of a specified column, specifically for `i64` values.
825 ///
826 /// # Errors
827 ///
828 /// Returns an error if:
829 ///
830 /// - The specified column doesn't exist.
831 /// - The specified column contains non-integer values (i.e., not `i64`).
832 ///
833 /// # Returns
834 ///
835 /// The minimum `i64` value in the specified column, or an error if one occurs.
836 pub fn min_int64(&self, column: &str) -> Result<i64, Box<dyn Error>> {
837 let index = self.get_col_index(column).expect("column doesn't exist");
838 let mut min = 0_i64;
839
840 for i in 1..self.data.len() {
841 let row_val = match self.data[i]
842 .get(index)
843 .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
844 {
845 Cell::Int(x) => *x,
846 _ => return Err(Box::from("min_int64 should only works on int values")),
847 };
848
849 if i == 1 {
850 min = row_val;
851 continue;
852 }
853
854 if min > row_val {
855 min = row_val;
856 }
857 }
858
859 Ok(min)
860 }
861
862 /// Finds the minimum value of a specified column, working with both `f64` and `i64` values.
863 ///
864 /// # Errors
865 ///
866 /// Returns an error if:
867 ///
868 /// - The specified column doesn't exist.
869 /// - The specified column contains non-numeric values (i.e., not `f64` or `i64`).
870 ///
871 /// # Returns
872 ///
873 /// The minimum value in the specified column, either an `f64` or an `i64` cast to `f64`, or an error if one occurs.
874 pub fn min_float64(&self, column: &str) -> Result<f64, Box<dyn Error>> {
875 let index = self.get_col_index(column).expect("column doesn't exist");
876 let mut min = 0_f64;
877
878 for i in 1..self.data.len() {
879 let row_val = match self.data[i]
880 .get(index)
881 .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
882 {
883 Cell::Float(f) => *f,
884 Cell::Int(i) => *i as f64,
885 _ => {
886 return Err(Box::from(
887 "min_float64 should only works on float and int values",
888 ))
889 }
890 };
891
892 if i == 1 {
893 min = row_val;
894 continue;
895 }
896
897 if min > row_val {
898 min = row_val;
899 }
900 }
901
902 Ok(min)
903 }
904
905 /// Prints general information about the sheet to the standard output in a formatted manner.
906 ///
907 /// This includes:
908 ///
909 /// - The first 5 rows of the sheet.
910 /// - A separator line.
911 /// - The last 5 rows of the sheet.
912 /// - The total number of rows and columns
913 pub fn describe(&self) {
914 println!("[");
915 for i in 0..5 {
916 print!("\t(");
917 self.data[i].iter().for_each(|cell| match cell {
918 Cell::String(s) => print!("{s},"),
919 Cell::Bool(b) => print!("{b},"),
920 Cell::Int(x) => print!("{x},"),
921 Cell::Float(f) => print!("{f},"),
922 Cell::Null => print!(" ,"),
923 });
924 println!(")");
925 }
926
927 let col_len = self.data[0].len();
928 for _ in 0..col_len * 10 {
929 print!("-");
930 }
931 println!();
932
933 let len = self.data.len();
934 for i in len - 5..len {
935 print!("\t(");
936 self.data[i].iter().for_each(|cell| match cell {
937 Cell::String(s) => print!("{s},"),
938 Cell::Bool(b) => print!("{b},"),
939 Cell::Int(x) => print!("{x},"),
940 Cell::Float(f) => print!("{f},"),
941 Cell::Null => print!("NULL,"),
942 });
943 println!(")");
944 }
945 println!("]");
946
947 println!(
948 "
949 number of rows: {len}
950 number of columns: {col_len}"
951 )
952 }
953
954 /// Prints the entire sheet to the standard output in a formatted manner.
955 ///
956 /// Each row is enclosed in parentheses and separated by commas, providing a visual representation of the sheet's structure and content.
957 pub fn pretty_print(&self) {
958 println!("[");
959 self.data.iter().for_each(|row| {
960 print!("\t(");
961 row.iter().for_each(|cell| match cell {
962 Cell::String(s) => print!("{s},"),
963 Cell::Bool(b) => print!("{b},"),
964 Cell::Int(x) => print!("{x},"),
965 Cell::Float(f) => print!("{f},"),
966 Cell::Null => print!(" ,"),
967 });
968 println!(")");
969 });
970 println!("]");
971 }
972
973 /// get_col_index returns the index of a given column, and None otherwise
974 fn get_col_index(&self, column: &str) -> Option<usize> {
975 for i in 0..self.data[0].len() {
976 if let Cell::String(colname) = &self.data[0][i] {
977 if colname == column {
978 return Some(i);
979 }
980 };
981 }
982
983 None
984 }
985}
986
987/// Parses a string token into the appropriate Cell type.
988///
989/// # Behavior
990///
991/// - Returns `Cell::Bool(true)` for the token "true".
992/// - Returns `Cell::Bool(false)` for the token "false".
993/// - Returns `Cell::Int(i64)` if the token can be parsed as an integer.
994/// - Returns `Cell::Float(f64)` if the token can be parsed as a floating-point number.
995/// - Returns `Cell::Null` if the token is empty.
996/// - Returns `Cell::String(token.to_string())` for any other string value.
997fn parse_token(token: &str) -> Cell {
998 if token == "true" {
999 return Cell::Bool(true);
1000 }
1001
1002 if token == "false" {
1003 return Cell::Bool(false);
1004 }
1005
1006 if let Ok(i) = token.parse::<i64>() {
1007 return Cell::Int(i);
1008 }
1009
1010 if let Ok(f) = token.parse::<f64>() {
1011 return Cell::Float(f);
1012 }
1013
1014 if token.is_empty() {
1015 return Cell::Null;
1016 }
1017
1018 Cell::String(token.to_string())
1019}
1020
1021#[cfg(test)]
1022mod tests;