datatroll/
lib.rs

1//! datatroll is a robust and user-friendly Rust library for efficiently loading, manipulating,
2//! and exporting data stored in CSV files. Say goodbye to tedious hand-coding data parsing and
3//! welcome a streamlined workflow for wrangling your data with ease.
4//!
5//! ## Features:
6//! - **Versatile Data Loading:**
7//!   - Read data from CSV files with configurable separators and headers.
8//!   - Specify data types for each column, ensuring type safety and efficient processing.
9//!   - Handle missing values with graceful error handling.
10//! - **Intuitive Data Manipulation:**
11//!     - Insert new rows with custom values into your data.
12//!     - Drop unwanted rows or columns to focus on relevant data.
13//!     - Leverage powerful aggregations to calculate:
14//!         - Mean, max, min, and median of numeric columns.
15//!         - Mode (most frequent value) of categorical columns.
16//!         - Variance of numeric columns.
17//!     - Apply custom transformations to specific columns using lambda functions.
18//!     - Supports Pagination
19//! - **Seamless Data Export:**
20//!     - Write manipulated data back to a new CSV file, retaining original format or specifying your own.
21//!     - Customize output with options like separator selection and header inclusion.
22//!
23//! # Example:
24//! ```rust
25//! use datatroll::{Cell, Sheet};
26//!
27//! fn main() {
28//!     // Read data from a CSV file
29//!     let data = "id ,title , director, release date, review
30//!1, old, quintin, 2011, 3.5
31//!2, her, quintin, 2013, 4.2
32//!3, easy, scorces, 2005, 1.0
33//!4, hey, nolan, 1997, 4.7
34//!5, who, martin, 2017, 5.0";
35//!     let mut sheet = Sheet::load_data_from_str(data);
36//!
37//!     // drop all the rows in which the review is less than 4.0
38//!     sheet.drop_rows("review", |c| {
39//!         if let Cell::Float(r) = c {
40//!             return *r < 4.0;
41//!         }
42//!         false
43//!     });
44//!
45//!     // calculate the variance of the review column
46//!     let variance = sheet.variance("review").unwrap();
47//!     println!("variance for review is: {variance}");
48//!     
49//!     // Write the transformed data to a new CSV file
50//!     if let Err(err) = sheet.export("output.csv") {
51//!         eprintln!("Error exporting data: {}", err);
52//!     } else {
53//!         println!("Data exported successfully to output.csv");
54//!     }
55//! }
56//! ```
57
58use std::{
59    error::Error,
60    fs::{File, OpenOptions},
61    io::{BufReader, BufWriter, Read, Write},
62};
63
64/// Represents different types of data that can be stored in a cell.
65#[derive(Debug, Clone, PartialEq, PartialOrd)]
66pub enum Cell {
67    Null,
68    String(String),
69    Bool(bool),
70    Int(i64),
71    Float(f64),
72}
73
74/// Represents a 2D vector of cells, forming a sheet of data.
75#[derive(Debug, Default)]
76pub struct Sheet {
77    /// 2D vector of cells
78    pub data: Vec<Vec<Cell>>,
79}
80
81impl Sheet {
82    /// new_sheet initialize a Sheet
83    fn new_sheet() -> Self {
84        Self {
85            data: Vec::<Vec<Cell>>::new(),
86        }
87    }
88
89    /// Loads data from a CSV file into the Sheet's data structure.
90    ///
91    /// This function reads the content of a CSV file specified by `file_path` and populates
92    /// the Sheet's data structure accordingly. The file must have a ".csv" extension, and
93    /// its content should be in CSV (Comma-Separated Values) format.
94    ///
95    /// # Arguments
96    ///
97    /// * `file_path` - The path to the CSV file to load.
98    ///
99    /// # Errors
100    ///
101    /// Returns a `Result` indicating success or an error if the file cannot be opened,
102    /// read, or if the file format is unsupported.
103    ///
104    /// # Examples
105    ///
106    /// ```rust
107    /// use datatroll::Sheet;
108    ///
109    /// if let Err(err) = Sheet::load_data("input.csv") {
110    ///     eprintln!("Error loading data: {}", err);
111    /// } else {
112    ///     println!("Data loaded successfully from input.csv");
113    /// }
114    /// ```
115    pub fn load_data(file_path: &str) -> Result<Self, Box<dyn Error>> {
116        let mut sheet = Self::new_sheet();
117        // check for ext
118        if file_path.split('.').last() != Some("csv") {
119            return Err(Box::from(
120                "the provided file path is invalid, or of unsupported format",
121            ));
122        }
123
124        let f = File::open(file_path)?;
125        let mut reader = BufReader::new(f);
126        let mut data = String::new();
127
128        reader.read_to_string(&mut data)?;
129
130        data.lines().for_each(|line| {
131            let row: Vec<Cell> = line.split(',').map(|s| s.trim()).map(parse_token).collect();
132            sheet.data.push(row);
133        });
134
135        // if some column values are absent from a row, then fill it with a default Cell::Null
136        let col_len = sheet.data[0].len();
137        for i in 1..sheet.data.len() {
138            let row_len = sheet.data[i].len();
139            if row_len < col_len {
140                for _ in 0..col_len - row_len {
141                    sheet.data[i].push(Cell::Null);
142                }
143            }
144        }
145
146        Ok(sheet)
147    }
148
149    pub fn load_data_from_str(data: &str) -> Self {
150        let mut sheet = Self::new_sheet();
151
152        data.lines().for_each(|line| {
153            let row: Vec<Cell> = line.split(',').map(|s| s.trim()).map(parse_token).collect();
154            sheet.data.push(row);
155        });
156
157        // if some column values are absent from a row, then fill it with a default Cell::Null
158        let col_len = sheet.data[0].len();
159        for i in 1..sheet.data.len() {
160            let row_len = sheet.data[i].len();
161            if row_len < col_len {
162                for _ in 0..col_len - row_len {
163                    sheet.data[i].push(Cell::Null);
164                }
165            }
166        }
167
168        sheet
169    }
170
171    /// Exports the content of a Sheet to a CSV file.
172    ///
173    /// The function writes the content of the Sheet into a CSV file specified by `file_path`.
174    /// If the file already exists, it truncates the file and overwrites its content.
175    ///
176    /// # Arguments
177    ///
178    /// * `file_path` - The path to the CSV file.
179    ///
180    /// # Examples
181    ///
182    /// ```rust
183    /// let cell_string = Cell::String(String::from("Hello, Rust!"));
184    /// let cell_int = Cell::Int(42);
185    ///
186    /// let row1 = vec![cell_string, Cell::Bool(true), cell_int];
187    /// let row2 = vec![Cell::Null, Cell::Float(3.14), Cell::String(String::from("World"))];
188    ///
189    /// let sheet = Sheet { data: vec![row1, row2] };
190    ///
191    /// if let Err(err) = sheet.export("output.csv") {
192    ///     eprintln!("Error exporting data: {}", err);
193    /// } else {
194    ///     println!("Data exported successfully to output.csv");
195    /// }
196    /// ```
197    ///
198    /// # Errors
199    ///
200    /// Returns an `Result` indicating success or failure.
201    ///
202    pub fn export(&self, file_path: &str) -> Result<(), Box<dyn Error>> {
203        // check for ext
204        if file_path.split('.').last() != Some("csv") {
205            return Err(Box::from(
206                "the provided file path is invalid, or of unsupported format",
207            ));
208        }
209
210        let file = OpenOptions::new()
211            .write(true)
212            .truncate(true)
213            .create(true)
214            .open(file_path)?;
215
216        let mut buf_writer = BufWriter::new(file);
217
218        for row in &self.data {
219            for cell in row {
220                match cell {
221                    Cell::Null => write!(buf_writer, ",")?,
222                    Cell::String(s) => write!(buf_writer, "{},", s)?,
223                    Cell::Bool(b) => write!(buf_writer, "{},", b)?,
224                    Cell::Int(i) => write!(buf_writer, "{},", i)?,
225                    Cell::Float(f) => write!(buf_writer, "{},", f)?,
226                }
227            }
228            writeln!(buf_writer)?; // Move to the next line after each row
229        }
230
231        buf_writer.flush()?; // Ensure any remaining data is written to the file
232        Ok(())
233    }
234
235    /// insert_row appends a row to the data sheet at the last position
236    ///
237    /// The function takes a comma seperated input string, trim the whitespace, parse it into a
238    /// vector oc Cell and then push it to the sheet.
239    ///
240    /// # Arguments
241    ///
242    /// * `input` - input string to be inserted.
243    ///
244    /// # Errors
245    ///
246    /// Returns a `Result` indicating success or an error if the input is of unvalid format
247    ///
248    /// # Examples
249    ///
250    /// ```rust
251    /// let row1 = vec![Cell::String("Hello, Rust!".to_string()), Cell::Bool(true), Cell::Int(42)];
252    /// let sheet = Sheet { data: vec![row1] };
253    ///
254    /// sheet.insert_row(",3.14,World")?;
255    ///
256    /// assert_eq!(sheet[0], row1);
257    /// assert_eq!(sheet[1], vec![Cell::Null, Cell::Float(3.14), Cell::String("World".to_string()]);
258    /// ```
259    pub fn insert_row(&mut self, input: &str) -> Result<(), Box<dyn Error>> {
260        let row: Vec<Cell> = input
261            .split(',')
262            .map(|s| s.trim())
263            .map(parse_token)
264            .collect();
265        if row.len() != self.data[0].len() {
266            return Err(Box::from("invalid input"));
267        }
268
269        self.data.push(row);
270        Ok(())
271    }
272
273    /// fill_col replace the value of a column in every row
274    ///
275    /// The function takes a column name and the value to be filled, and iterate through every row
276    /// and effectively replace its old cell values with the new value
277    ///
278    /// # Arguments
279    ///
280    /// * `column` - the column to be mutated
281    /// * `value` - the value which every row will be filled with
282    ///
283    /// # Errors
284    ///
285    /// Returns a `Result` indicating success or an error
286    ///
287    /// # Examples
288    ///
289    /// ```rust
290    /// let row1 = vec![Cell::String("greeting".to_string()), Cell::String("is_good".to_string()), Cell::String("count".to_string())];
291    /// let row2 = vec![Cell::String("Hello, Rust!".to_string()), Cell::Bool(false), Cell::Int(42)];
292    /// let row3 = vec![Cell::String("Hello, World!".to_string()), Cell::Bool(true), Cell::Int(145)];
293    /// let sheet = Sheet { data: vec![row1, row2, row3] };
294    ///
295    /// sheet.fill_col("greeting", Cell::Null)?;
296    ///
297    /// assert_eq!(sheet[1][0], Cell::Null);
298    /// assert_eq!(sheet[1][0], Cell::Null);
299    /// ```
300    pub fn fill_col(&mut self, column: &str, value: Cell) -> Result<(), Box<dyn Error>> {
301        let col_index = self.get_col_index(column).expect("column doesn't exist");
302        for i in 1..self.data.len() {
303            let cell = self.data[i]
304                .get_mut(col_index)
305                .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", col_index, i));
306
307            *cell = value.clone();
308        }
309
310        Ok(())
311    }
312
313    /// paginate takes part of a sheet with a fixed size and return it
314    ///
315    /// The function takes a page number and a page size, and slice the sheet and returns it as a page
316    /// of fixed size
317    ///
318    /// # Arguments
319    ///
320    /// * `page` - the number of the page
321    /// * `size` - number of rows for every page
322    ///
323    /// # Errors
324    ///
325    /// Returns a `Result` indicating success or an error
326    ///
327    /// # Examples
328    ///
329    /// ```rust
330    /// let row1 = vec![Cell::String("greeting".to_string()), Cell::String("is_good".to_string()), Cell::String("count".to_string())];
331    /// let row2 = vec![Cell::String("Hello, Rust!".to_string()), Cell::Bool(false), Cell::Int(42)];
332    /// let row3 = vec![Cell::String("Hello, World!".to_string()), Cell::Bool(true), Cell::Int(145)];
333    /// let row4 = vec![Cell::String("Hello, Dzair!".to_string()), Cell::Bool(true), Cell::Int(145)];
334    /// let row5 = vec![Cell::String("Hello, Africa!".to_string()), Cell::Bool(true), Cell::Int(145)];
335    /// let row6 = vec![Cell::String("Hello, Algeria!".to_string()), Cell::Bool(true), Cell::Int(145)];
336    /// let row7 = vec![Cell::String("Hello, Friday!".to_string()), Cell::Bool(true), Cell::Int(145)];
337    /// let sheet = Sheet { data: vec![row1, row2, row3, row4, row5, row6, row7] };
338    ///
339    /// let page = sheet.paginate(1, 2)?;
340    ///
341    /// assert_eq!(page[0][0], Cell::String("Hello, Rust!".to_string()));
342    /// assert_eq!(page[1][0], Cell::String("Hello, World!".to_string()));
343    /// ```
344    pub fn paginate(&self, page: usize, size: usize) -> Result<Vec<Vec<Cell>>, Box<dyn Error>> {
345        if page < 1 || size > 50 {
346            return Err(Box::from(
347                "page should more than or equal 1, size should 50 per page at max",
348            ));
349        }
350        if self.data.len() < size {
351            return Err(Box::from("page unavailabe"));
352        }
353
354        let mut res: Vec<Vec<Cell>> = Default::default();
355        let offset = ((page - 1) * size) + 1;
356
357        for i in offset..(offset + size) {
358            let row = self.data.get(i).unwrap_or_else(|| {
359                panic!(
360                    "offset '{}' and amount '{}' are out of bounds",
361                    offset, size
362                )
363            });
364            res.push(row.clone())
365        }
366
367        Ok(res)
368    }
369
370    /// Finds the first row in the table that matches a predicate applied to a specific column.
371    ///
372    /// # Panics
373    ///
374    /// Panics if the specified column doesn't exist or is absent for a row.
375    ///
376    /// # Examples
377    ///
378    /// ```rust
379    /// let mut sheet = Sheet::new_sheet();
380    /// sheet.load_data("test_data.csv").unwrap();
381    /// let first_matching_rows = sheet.find_rows("Age", |cell| cell.as_int() >= 30);
382    /// ```
383    ///
384    /// # Generics
385    ///
386    /// The `predicate` argument is a generic function that allows for flexible filtering criteria.
387    /// It accepts a reference to a `Cell` and returns a boolean indicating whether the row matches.
388    ///
389    /// # Returns
390    ///
391    /// An `Option<&Vec<Cell>>`:
392    /// - `Some(&row)` if a matching row is found, where `row` is a reference to the first matching row.
393    /// - `None` if no matching row is found.
394    pub fn find_first_row<F>(&self, column: &str, predicate: F) -> Option<&Vec<Cell>>
395    where
396        F: FnOnce(&Cell) -> bool + Copy,
397    {
398        let col_index = self.get_col_index(column).expect("column doesn't exist");
399
400        for i in 1..self.data.len() {
401            let cell = self.data[i]
402                .get(col_index)
403                .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", col_index, i));
404            if predicate(cell) {
405                return Some(&self.data[i]);
406            }
407        }
408
409        None
410    }
411
412    /// Finds rows in the table that match a predicate applied to a specific column.
413    ///
414    /// # Panics
415    ///
416    /// Panics if the specified column doesn't exist or is absent for a row.
417    ///
418    /// # Examples
419    ///
420    /// ```rust
421    /// let mut sheet = Sheet::new_sheet();
422    /// sheet.load_data("test_data.csv").unwrap();
423    /// let matching_rows = sheet.filter("Age", |cell| cell.as_int() >= 30);
424    /// ```
425    ///
426    /// # Generics
427    ///
428    /// The `predicate` argument is a generic function that allows for flexible filtering criteria.
429    /// It accepts a reference to a `Cell` and returns a boolean indicating whether the row matches.
430    ///
431    /// # Returns
432    ///
433    /// A vector of vectors, where each inner vector represents a row that matches the predicate.
434    pub fn filter<F>(&self, column: &str, predicate: F) -> Vec<Vec<Cell>>
435    where
436        F: FnOnce(&Cell) -> bool + Copy,
437    {
438        let col_index = self.get_col_index(column).expect("column doesn't exist");
439        let mut res: Vec<Vec<Cell>> = Default::default();
440
441        for i in 1..self.data.len() {
442            let cell = self.data[i]
443                .get(col_index)
444                .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", col_index, i));
445            if predicate(cell) {
446                res.push(self.data[i].clone());
447            }
448        }
449
450        res
451    }
452
453    /// The map function applies a given transformation to each column value of rows.
454    ///
455    /// # Errors
456    ///
457    /// Returns a `Result` indicating success or an error
458    ///
459    /// # Examples
460    ///
461    /// ```rust
462    /// use datatroll::{Sheet, Cell};
463    ///
464    ///let data = "id ,title , director, release date, review
465    ///1, old, quintin, 2011, 3.5
466    ///2, her, quintin, 2013, 4.2
467    ///3, easy, scorces, 2005, 1.0
468    ///4, hey, nolan, 1997, 4.7
469    ///5, who, martin, 2017, 5.0";
470    ///
471    /// let mut sheet = Sheet::load_data_from_str(data);
472    ///
473    /// let result = sheet.map("title", |c| match c {
474    ///     Cell::String(s) => Cell::String(s.to_uppercase()),
475    ///     _ => return c,
476    /// });
477    /// 
478    /// assert!(result.is_ok());
479    /// ```
480    pub fn map<F>(&mut self, column: &str, transform: F) -> Result<(), String>
481    where
482        F: Fn(Cell) -> Cell,
483    {
484        match self.get_col_index(column) {
485            Some(i) => {
486                self.data
487                    .iter_mut()
488                    .for_each(|row| row[i] = transform(row[i].clone()));
489                Ok(())
490            }
491            None => Err(format!("could not find column '{column}'")),
492        }
493    }
494
495    /// Removes rows from the table based on a predicate applied to a specific column.
496    ///
497    /// # Panics
498    ///
499    /// Panics if the specified column doesn't exist.
500    ///
501    /// # Examples
502    ///
503    /// ```rust
504    /// let mut sheet = Sheet::new_sheet();
505    /// sheet.load_data("test_data.csv").unwrap();
506    /// sheet.drop_rows("Age", |cell| cell.as_int() >= 30); // Removes rows where age is 30 or older
507    /// ```
508    ///
509    /// # Generics
510    ///
511    /// The `predicate` argument is a generic function that allows for flexible filtering criteria.
512    /// It accepts a reference to a `Cell` and returns a boolean indicating whether to keep the row.
513    pub fn drop_rows<F>(&mut self, column: &str, predicate: F)
514    where
515        F: FnOnce(&Cell) -> bool + Copy,
516    {
517        let col_index = self.get_col_index(column).expect("column doesn't exist");
518        self.data.retain(|row| !predicate(&row[col_index]));
519    }
520
521    /// Removes a specified column from the table and returns the number of rows affected.
522    ///
523    /// # Panics
524    ///
525    /// Panics if the specified column doesn't exist.
526    ///
527    /// # Returns
528    ///
529    /// The number of rows that were modified by removing the column.
530    ///
531    /// # Examples
532    ///
533    /// ```rust
534    /// let mut sheet = Sheet::new_sheet();
535    /// sheet.load_data("test_data.csv").unwrap();
536    /// let rows_affected = sheet.drop_col("id") // Removes the "id" column and returns 5
537    /// ```
538    pub fn drop_col(&mut self, column: &str) -> i32 {
539        let col_index = self.get_col_index(column).expect("column doesn't exist");
540        let mut rows_affected = 0;
541        for i in 0..self.data.len() {
542            self.data[i].remove(col_index);
543            rows_affected += 1;
544        }
545
546        rows_affected
547    }
548
549    /// Calculates the mean (average) of a specified column.
550    ///
551    /// The mean is the sum of all values in a data set divided by the number of values.
552    ///
553    /// # Formula
554    ///
555    /// X̄ = (ΣX) / N
556    ///
557    /// Where:
558    /// - X̄ is the mean
559    /// - ΣX is the sum of all values in the column
560    /// - N is the number of values in the column
561    ///
562    /// # Errors
563    ///
564    /// Returns an error if:
565    ///
566    /// - The specified column doesn't exist.
567    /// - The specified column contains non-numeric values (i.e., not `i64` or `f64`).
568    ///
569    /// # Examples
570    ///
571    /// ```rust
572    /// let mut sheet = Sheet::new_sheet();
573    /// sheet.load_data("test_data.csv").unwrap();
574    /// let re_mean = sheet.mean("release year")?; // Returns the mean of the "Age" column
575    /// ```
576    ///
577    /// # Returns
578    ///
579    /// The mean of the specified column as an `f64`, or an error if one occurs.
580    pub fn mean(&self, column: &str) -> Result<f64, Box<dyn Error>> {
581        let index = self.get_col_index(column).expect("column doesn't exist");
582        let mut sum = 0_f64;
583
584        for i in 1..self.data.len() {
585            let val = match self.data[i]
586                .get(index)
587                .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
588            {
589                Cell::Int(x) => *x as f64,
590                Cell::Float(f) => *f,
591                _ => return Err(Box::from("column value should be an i64 or a f64")),
592            };
593
594            sum += val
595        }
596
597        Ok(sum / ((self.data.len() - 1) as f64))
598    }
599
600    /// Calculates the variance of a specified column.
601    ///
602    /// Variance measures how far a set of numbers are spread out from their average value.
603    /// It is calculated as the average of the squared differences from the mean.
604    ///
605    /// # Formula
606    ///
607    /// Var(X) = E[(X - μ)²]
608    ///
609    /// Where:
610    /// - Var(X) is the variance
611    /// - E denotes the expected value (average)
612    /// - X is the random variable (the values in the column)
613    /// - μ is the mean of X
614    ///
615    /// # Errors
616    ///
617    /// Returns an error if:
618    ///
619    /// - The specified column doesn't exist.
620    /// - The specified column contains non-numeric values (i.e., not `i64` or `f64`).
621    ///
622    /// # Examples
623    ///
624    /// ```rust
625    /// let mut sheet = Sheet::new_sheet();
626    /// sheet.load_data("test_data.csv").unwrap();
627    /// let re_variance = sheet.variance("release year")?; // Returns the variance of the "release year" column
628    /// ```
629    ///
630    /// # Returns
631    ///
632    /// The variance of the specified column as an `f64`, or an error if one occurs.
633    pub fn variance(&self, column: &str) -> Result<f64, Box<dyn Error>> {
634        let mean = self.mean(column)?;
635
636        let index = self.get_col_index(column).expect("column doesn't exist");
637        let mut total_sum = 0_f64;
638        for i in 1..self.data.len() {
639            let val = match self.data[i]
640                .get(index)
641                .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
642            {
643                Cell::Int(x) => *x as f64,
644                Cell::Float(f) => *f,
645                _ => return Err(Box::from("column value should be an i64 or a f64")),
646            };
647
648            total_sum += (val - mean).powf(2.0)
649        }
650
651        Ok(total_sum / (self.data.len() - 1) as f64)
652    }
653
654    /// Calculates the median value of a specified column.
655    ///
656    /// The median is the value that separates the higher half of a data set from the lower half.
657    /// In this case, it's the value that falls in the middle of the column when the data is sorted.
658    ///
659    /// # Panics
660    ///
661    /// Panics if:
662    ///
663    /// - The specified column doesn't exist.
664    /// - The specified column is absent for the middle row.
665    ///
666    /// # Examples
667    ///
668    /// ```rust
669    /// let mut sheet = Sheet::new_sheet();
670    /// sheet.load_data("test_data.csv").unwrap();
671    /// let median_id = sheet.median("id")?; // Returns a &Int(3)
672    /// ```
673    /// # Returns
674    ///
675    /// A reference to the `Cell` containing the median value of the specified column.
676    pub fn median(&self, column: &str) -> &Cell {
677        let col_index = self.get_col_index(column).expect("column doesn't exist");
678        let row_index = ((self.data.len() - 1) + 1) / 2;
679
680        self.data[row_index]
681            .get(col_index)
682            .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", col_index, row_index))
683    }
684
685    /// mode get the most frequent items of a column
686    ///
687    /// The function gets a vector of the most frequent items in a column, alongside their number of
688    /// occurences.
689    ///
690    /// # Arguments
691    ///
692    /// * `columnn` - the name of the column
693    ///
694    /// # Examples
695    ///
696    /// ```rust
697    /// let mut sheet = Sheet::new_sheet();
698    /// sheet.load_data("test_data.csv").unwrap();
699    ///
700    /// let multimodal = sheet.mode("director");
701    /// println!("mode: {:?}", multimodal) // mode: [(String("quintin"), 2), (String("martin"), 2)]
702    ///```
703    pub fn mode(&self, column: &str) -> Vec<(Cell, i32)> {
704        let col_index = self.get_col_index(column).expect("column doesn't exist");
705        let fq = self.build_frequency_table(col_index);
706        let mut max = 0;
707        let mut multi_mode: Vec<(Cell, i32)> = Vec::new();
708
709        for item in fq.iter() {
710            if max <= item.1 {
711                max = item.1;
712                multi_mode.push(item.clone());
713            }
714        }
715
716        multi_mode
717    }
718
719    /// Builds a frequency table for a specified column, counting the occurrences of each unique value.
720    ///
721    /// # Panics
722    ///
723    /// Panics if the specified column doesn't exist or is absent for a row.
724    ///
725    /// # Returns
726    ///
727    /// A vector of tuples `(Cell, i32)`, where:
728    /// - `Cell` is the unique value from the column.
729    /// - `i32` is the frequency (count) of that value in the column.
730    fn build_frequency_table(&self, col_index: usize) -> Vec<(Cell, i32)> {
731        let mut fq: Vec<(Cell, i32)> = Vec::new();
732
733        for i in 1..self.data.len() {
734            let cell = self.data[i]
735                .get(col_index)
736                .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", col_index, i));
737            if fq.is_empty() {
738                fq.push((cell.clone(), 1));
739                continue;
740            }
741
742            let index = fq.iter().position(|item| item.0 == *cell);
743            if let Some(idx) = index {
744                fq[idx].1 += 1;
745            } else if index.is_none() {
746                fq.push((cell.clone(), 1));
747            }
748        }
749
750        fq
751    }
752
753    /// Finds the maximum value of a specified column, specifically for `i64` values.
754    ///
755    /// # Errors
756    ///
757    /// Returns an error if:
758    ///
759    /// - The specified column doesn't exist.
760    /// - The specified column contains non-integer values (i.e., not `i64`).
761    ///
762    /// # Returns
763    ///
764    /// The maximum `i64` value in the specified column, or an error if one occurs.
765    pub fn max_int64(&self, column: &str) -> Result<i64, Box<dyn Error>> {
766        let index = self.get_col_index(column).expect("column doesn't exist");
767        let mut max = 0_i64;
768
769        for i in 1..self.data.len() {
770            let row_val = match self.data[i]
771                .get(index)
772                .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
773            {
774                Cell::Int(x) => *x,
775                _ => return Err(Box::from("max_int64 should only works on int values")),
776            };
777
778            if max < row_val {
779                max = row_val;
780            }
781        }
782
783        Ok(max)
784    }
785
786    /// Finds the maximum value of a specified column, working with both `f64` and `i64` values.
787    ///
788    /// # Errors
789    ///
790    /// Returns an error if:
791    ///
792    /// - The specified column doesn't exist.
793    /// - The specified column contains non-numeric values (i.e., not `f64` or `i64`).
794    ///
795    /// # Returns
796    ///
797    /// The maximum value in the specified column, either an `f64` or an `i64` cast to `f64`, or an error if one occurs.
798    pub fn max_float64(&self, column: &str) -> Result<f64, Box<dyn Error>> {
799        let index = self.get_col_index(column).expect("column doesn't exist");
800        let mut max = 0_f64;
801
802        for i in 1..self.data.len() {
803            let row_val = match self.data[i]
804                .get(index)
805                .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
806            {
807                Cell::Float(f) => *f,
808                Cell::Int(i) => *i as f64,
809                _ => {
810                    return Err(Box::from(
811                        "max_float64 should only works on float and int values",
812                    ))
813                }
814            };
815
816            if max < row_val {
817                max = row_val;
818            }
819        }
820
821        Ok(max)
822    }
823
824    /// Finds the minimum value of a specified column, specifically for `i64` values.
825    ///
826    /// # Errors
827    ///
828    /// Returns an error if:
829    ///
830    /// - The specified column doesn't exist.
831    /// - The specified column contains non-integer values (i.e., not `i64`).
832    ///
833    /// # Returns
834    ///
835    /// The minimum `i64` value in the specified column, or an error if one occurs.
836    pub fn min_int64(&self, column: &str) -> Result<i64, Box<dyn Error>> {
837        let index = self.get_col_index(column).expect("column doesn't exist");
838        let mut min = 0_i64;
839
840        for i in 1..self.data.len() {
841            let row_val = match self.data[i]
842                .get(index)
843                .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
844            {
845                Cell::Int(x) => *x,
846                _ => return Err(Box::from("min_int64 should only works on int values")),
847            };
848
849            if i == 1 {
850                min = row_val;
851                continue;
852            }
853
854            if min > row_val {
855                min = row_val;
856            }
857        }
858
859        Ok(min)
860    }
861
862    /// Finds the minimum value of a specified column, working with both `f64` and `i64` values.
863    ///
864    /// # Errors
865    ///
866    /// Returns an error if:
867    ///
868    /// - The specified column doesn't exist.
869    /// - The specified column contains non-numeric values (i.e., not `f64` or `i64`).
870    ///
871    /// # Returns
872    ///
873    /// The minimum value in the specified column, either an `f64` or an `i64` cast to `f64`, or an error if one occurs.
874    pub fn min_float64(&self, column: &str) -> Result<f64, Box<dyn Error>> {
875        let index = self.get_col_index(column).expect("column doesn't exist");
876        let mut min = 0_f64;
877
878        for i in 1..self.data.len() {
879            let row_val = match self.data[i]
880                .get(index)
881                .unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
882            {
883                Cell::Float(f) => *f,
884                Cell::Int(i) => *i as f64,
885                _ => {
886                    return Err(Box::from(
887                        "min_float64 should only works on float and int values",
888                    ))
889                }
890            };
891
892            if i == 1 {
893                min = row_val;
894                continue;
895            }
896
897            if min > row_val {
898                min = row_val;
899            }
900        }
901
902        Ok(min)
903    }
904
905    /// Prints general information about the sheet to the standard output in a formatted manner.
906    ///
907    /// This includes:
908    ///
909    /// - The first 5 rows of the sheet.
910    /// - A separator line.
911    /// - The last 5 rows of the sheet.
912    /// - The total number of rows and columns
913    pub fn describe(&self) {
914        println!("[");
915        for i in 0..5 {
916            print!("\t(");
917            self.data[i].iter().for_each(|cell| match cell {
918                Cell::String(s) => print!("{s},"),
919                Cell::Bool(b) => print!("{b},"),
920                Cell::Int(x) => print!("{x},"),
921                Cell::Float(f) => print!("{f},"),
922                Cell::Null => print!(" ,"),
923            });
924            println!(")");
925        }
926
927        let col_len = self.data[0].len();
928        for _ in 0..col_len * 10 {
929            print!("-");
930        }
931        println!();
932
933        let len = self.data.len();
934        for i in len - 5..len {
935            print!("\t(");
936            self.data[i].iter().for_each(|cell| match cell {
937                Cell::String(s) => print!("{s},"),
938                Cell::Bool(b) => print!("{b},"),
939                Cell::Int(x) => print!("{x},"),
940                Cell::Float(f) => print!("{f},"),
941                Cell::Null => print!("NULL,"),
942            });
943            println!(")");
944        }
945        println!("]");
946
947        println!(
948            "
949            number of rows: {len}
950            number of columns: {col_len}"
951        )
952    }
953
954    /// Prints the entire sheet to the standard output in a formatted manner.
955    ///
956    /// Each row is enclosed in parentheses and separated by commas, providing a visual representation of the sheet's structure and content.
957    pub fn pretty_print(&self) {
958        println!("[");
959        self.data.iter().for_each(|row| {
960            print!("\t(");
961            row.iter().for_each(|cell| match cell {
962                Cell::String(s) => print!("{s},"),
963                Cell::Bool(b) => print!("{b},"),
964                Cell::Int(x) => print!("{x},"),
965                Cell::Float(f) => print!("{f},"),
966                Cell::Null => print!(" ,"),
967            });
968            println!(")");
969        });
970        println!("]");
971    }
972
973    /// get_col_index returns the index of a given column, and None otherwise
974    fn get_col_index(&self, column: &str) -> Option<usize> {
975        for i in 0..self.data[0].len() {
976            if let Cell::String(colname) = &self.data[0][i] {
977                if colname == column {
978                    return Some(i);
979                }
980            };
981        }
982
983        None
984    }
985}
986
987/// Parses a string token into the appropriate Cell type.
988///
989/// # Behavior
990///
991/// - Returns `Cell::Bool(true)` for the token "true".
992/// - Returns `Cell::Bool(false)` for the token "false".
993/// - Returns `Cell::Int(i64)` if the token can be parsed as an integer.
994/// - Returns `Cell::Float(f64)` if the token can be parsed as a floating-point number.
995/// - Returns `Cell::Null` if the token is empty.
996/// - Returns `Cell::String(token.to_string())` for any other string value.
997fn parse_token(token: &str) -> Cell {
998    if token == "true" {
999        return Cell::Bool(true);
1000    }
1001
1002    if token == "false" {
1003        return Cell::Bool(false);
1004    }
1005
1006    if let Ok(i) = token.parse::<i64>() {
1007        return Cell::Int(i);
1008    }
1009
1010    if let Ok(f) = token.parse::<f64>() {
1011        return Cell::Float(f);
1012    }
1013
1014    if token.is_empty() {
1015        return Cell::Null;
1016    }
1017
1018    Cell::String(token.to_string())
1019}
1020
1021#[cfg(test)]
1022mod tests;
datatroll/lib.rs

datatroll/
lib.rs