use std::{
error::Error,
fs::{File, OpenOptions},
io::{BufReader, BufWriter, Read, Write},
};
#[derive(Debug, Clone, PartialEq, PartialOrd)]
pub enum Cell {
Null,
String(String),
Bool(bool),
Int(i64),
Float(f64),
}
#[derive(Debug, Default)]
pub struct Sheet {
pub data: Vec<Vec<Cell>>,
}
impl Sheet {
fn new_sheet() -> Self {
Self {
data: Vec::<Vec<Cell>>::new(),
}
}
pub fn load_data(file_path: &str) -> Result<Self, Box<dyn Error>> {
let mut sheet = Self::new_sheet();
if file_path.split('.').last() != Some("csv") {
return Err(Box::from(
"the provided file path is invalid, or of unsupported format",
));
}
let f = File::open(file_path)?;
let mut reader = BufReader::new(f);
let mut data = String::new();
reader.read_to_string(&mut data)?;
data.lines().for_each(|line| {
let row: Vec<Cell> = line.split(',').map(|s| s.trim()).map(parse_token).collect();
sheet.data.push(row);
});
let col_len = sheet.data[0].len();
for i in 1..sheet.data.len() {
let row_len = sheet.data[i].len();
if row_len < col_len {
for _ in 0..col_len - row_len {
sheet.data[i].push(Cell::Null);
}
}
}
Ok(sheet)
}
pub fn load_data_from_str(data: &str) -> Self {
let mut sheet = Self::new_sheet();
data.lines().for_each(|line| {
let row: Vec<Cell> = line.split(',').map(|s| s.trim()).map(parse_token).collect();
sheet.data.push(row);
});
let col_len = sheet.data[0].len();
for i in 1..sheet.data.len() {
let row_len = sheet.data[i].len();
if row_len < col_len {
for _ in 0..col_len - row_len {
sheet.data[i].push(Cell::Null);
}
}
}
sheet
}
pub fn export(&self, file_path: &str) -> Result<(), Box<dyn Error>> {
if file_path.split('.').last() != Some("csv") {
return Err(Box::from(
"the provided file path is invalid, or of unsupported format",
));
}
let file = OpenOptions::new()
.write(true)
.truncate(true)
.create(true)
.open(file_path)?;
let mut buf_writer = BufWriter::new(file);
for row in &self.data {
for cell in row {
match cell {
Cell::Null => write!(buf_writer, ",")?,
Cell::String(s) => write!(buf_writer, "{},", s)?,
Cell::Bool(b) => write!(buf_writer, "{},", b)?,
Cell::Int(i) => write!(buf_writer, "{},", i)?,
Cell::Float(f) => write!(buf_writer, "{},", f)?,
}
}
writeln!(buf_writer)?; }
buf_writer.flush()?; Ok(())
}
pub fn insert_row(&mut self, input: &str) -> Result<(), Box<dyn Error>> {
let row: Vec<Cell> = input
.split(',')
.map(|s| s.trim())
.map(parse_token)
.collect();
if row.len() != self.data[0].len() {
return Err(Box::from("invalid input"));
}
self.data.push(row);
Ok(())
}
pub fn fill_col(&mut self, column: &str, value: Cell) -> Result<(), Box<dyn Error>> {
let col_index = self.get_col_index(column).expect("column doesn't exist");
for i in 1..self.data.len() {
let cell = self.data[i]
.get_mut(col_index)
.unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", col_index, i));
*cell = value.clone();
}
Ok(())
}
pub fn paginate(&self, page: usize, size: usize) -> Result<Vec<Vec<Cell>>, Box<dyn Error>> {
if page < 1 || size > 50 {
return Err(Box::from(
"page should more than or equal 1, size should 50 per page at max",
));
}
if self.data.len() < size {
return Err(Box::from("page unavailabe"));
}
let mut res: Vec<Vec<Cell>> = Default::default();
let offset = ((page - 1) * size) + 1;
for i in offset..(offset + size) {
let row = self.data.get(i).unwrap_or_else(|| {
panic!(
"offset '{}' and amount '{}' are out of bounds",
offset, size
)
});
res.push(row.clone())
}
Ok(res)
}
pub fn find_first_row<F>(&self, column: &str, predicate: F) -> Option<&Vec<Cell>>
where
F: FnOnce(&Cell) -> bool + Copy,
{
let col_index = self.get_col_index(column).expect("column doesn't exist");
for i in 1..self.data.len() {
let cell = self.data[i]
.get(col_index)
.unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", col_index, i));
if predicate(cell) {
return Some(&self.data[i]);
}
}
None
}
pub fn filter<F>(&self, column: &str, predicate: F) -> Vec<Vec<Cell>>
where
F: FnOnce(&Cell) -> bool + Copy,
{
let col_index = self.get_col_index(column).expect("column doesn't exist");
let mut res: Vec<Vec<Cell>> = Default::default();
for i in 1..self.data.len() {
let cell = self.data[i]
.get(col_index)
.unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", col_index, i));
if predicate(cell) {
res.push(self.data[i].clone());
}
}
res
}
pub fn map<F>(&mut self, column: &str, transform: F) -> Result<(), String>
where
F: Fn(Cell) -> Cell,
{
match self.get_col_index(column) {
Some(i) => {
self.data
.iter_mut()
.for_each(|row| row[i] = transform(row[i].clone()));
Ok(())
}
None => Err(format!("could not find column '{column}'")),
}
}
pub fn drop_rows<F>(&mut self, column: &str, predicate: F)
where
F: FnOnce(&Cell) -> bool + Copy,
{
let col_index = self.get_col_index(column).expect("column doesn't exist");
self.data.retain(|row| !predicate(&row[col_index]));
}
pub fn drop_col(&mut self, column: &str) -> i32 {
let col_index = self.get_col_index(column).expect("column doesn't exist");
let mut rows_affected = 0;
for i in 0..self.data.len() {
self.data[i].remove(col_index);
rows_affected += 1;
}
rows_affected
}
pub fn mean(&self, column: &str) -> Result<f64, Box<dyn Error>> {
let index = self.get_col_index(column).expect("column doesn't exist");
let mut sum = 0_f64;
for i in 1..self.data.len() {
let val = match self.data[i]
.get(index)
.unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
{
Cell::Int(x) => *x as f64,
Cell::Float(f) => *f,
_ => return Err(Box::from("column value should be an i64 or a f64")),
};
sum += val
}
Ok(sum / ((self.data.len() - 1) as f64))
}
pub fn variance(&self, column: &str) -> Result<f64, Box<dyn Error>> {
let mean = self.mean(column)?;
let index = self.get_col_index(column).expect("column doesn't exist");
let mut total_sum = 0_f64;
for i in 1..self.data.len() {
let val = match self.data[i]
.get(index)
.unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
{
Cell::Int(x) => *x as f64,
Cell::Float(f) => *f,
_ => return Err(Box::from("column value should be an i64 or a f64")),
};
total_sum += (val - mean).powf(2.0)
}
Ok(total_sum / (self.data.len() - 1) as f64)
}
pub fn median(&self, column: &str) -> &Cell {
let col_index = self.get_col_index(column).expect("column doesn't exist");
let row_index = ((self.data.len() - 1) + 1) / 2;
self.data[row_index]
.get(col_index)
.unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", col_index, row_index))
}
pub fn mode(&self, column: &str) -> Vec<(Cell, i32)> {
let col_index = self.get_col_index(column).expect("column doesn't exist");
let fq = self.build_frequency_table(col_index);
let mut max = 0;
let mut multi_mode: Vec<(Cell, i32)> = Vec::new();
for item in fq.iter() {
if max <= item.1 {
max = item.1;
multi_mode.push(item.clone());
}
}
multi_mode
}
fn build_frequency_table(&self, col_index: usize) -> Vec<(Cell, i32)> {
let mut fq: Vec<(Cell, i32)> = Vec::new();
for i in 1..self.data.len() {
let cell = self.data[i]
.get(col_index)
.unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", col_index, i));
if fq.is_empty() {
fq.push((cell.clone(), 1));
continue;
}
let index = fq.iter().position(|item| item.0 == *cell);
if let Some(idx) = index {
fq[idx].1 += 1;
} else if index.is_none() {
fq.push((cell.clone(), 1));
}
}
fq
}
pub fn max_int64(&self, column: &str) -> Result<i64, Box<dyn Error>> {
let index = self.get_col_index(column).expect("column doesn't exist");
let mut max = 0_i64;
for i in 1..self.data.len() {
let row_val = match self.data[i]
.get(index)
.unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
{
Cell::Int(x) => *x,
_ => return Err(Box::from("max_int64 should only works on int values")),
};
if max < row_val {
max = row_val;
}
}
Ok(max)
}
pub fn max_float64(&self, column: &str) -> Result<f64, Box<dyn Error>> {
let index = self.get_col_index(column).expect("column doesn't exist");
let mut max = 0_f64;
for i in 1..self.data.len() {
let row_val = match self.data[i]
.get(index)
.unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
{
Cell::Float(f) => *f,
Cell::Int(i) => *i as f64,
_ => {
return Err(Box::from(
"max_float64 should only works on float and int values",
))
}
};
if max < row_val {
max = row_val;
}
}
Ok(max)
}
pub fn min_int64(&self, column: &str) -> Result<i64, Box<dyn Error>> {
let index = self.get_col_index(column).expect("column doesn't exist");
let mut min = 0_i64;
for i in 1..self.data.len() {
let row_val = match self.data[i]
.get(index)
.unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
{
Cell::Int(x) => *x,
_ => return Err(Box::from("min_int64 should only works on int values")),
};
if i == 1 {
min = row_val;
continue;
}
if min > row_val {
min = row_val;
}
}
Ok(min)
}
pub fn min_float64(&self, column: &str) -> Result<f64, Box<dyn Error>> {
let index = self.get_col_index(column).expect("column doesn't exist");
let mut min = 0_f64;
for i in 1..self.data.len() {
let row_val = match self.data[i]
.get(index)
.unwrap_or_else(|| panic!("column '{}' is absent for row '{}'", index, i))
{
Cell::Float(f) => *f,
Cell::Int(i) => *i as f64,
_ => {
return Err(Box::from(
"min_float64 should only works on float and int values",
))
}
};
if i == 1 {
min = row_val;
continue;
}
if min > row_val {
min = row_val;
}
}
Ok(min)
}
pub fn describe(&self) {
println!("[");
for i in 0..5 {
print!("\t(");
self.data[i].iter().for_each(|cell| match cell {
Cell::String(s) => print!("{s},"),
Cell::Bool(b) => print!("{b},"),
Cell::Int(x) => print!("{x},"),
Cell::Float(f) => print!("{f},"),
Cell::Null => print!(" ,"),
});
println!(")");
}
let col_len = self.data[0].len();
for _ in 0..col_len * 10 {
print!("-");
}
println!();
let len = self.data.len();
for i in len - 5..len {
print!("\t(");
self.data[i].iter().for_each(|cell| match cell {
Cell::String(s) => print!("{s},"),
Cell::Bool(b) => print!("{b},"),
Cell::Int(x) => print!("{x},"),
Cell::Float(f) => print!("{f},"),
Cell::Null => print!("NULL,"),
});
println!(")");
}
println!("]");
println!(
"
number of rows: {len}
number of columns: {col_len}"
)
}
pub fn pretty_print(&self) {
println!("[");
self.data.iter().for_each(|row| {
print!("\t(");
row.iter().for_each(|cell| match cell {
Cell::String(s) => print!("{s},"),
Cell::Bool(b) => print!("{b},"),
Cell::Int(x) => print!("{x},"),
Cell::Float(f) => print!("{f},"),
Cell::Null => print!(" ,"),
});
println!(")");
});
println!("]");
}
fn get_col_index(&self, column: &str) -> Option<usize> {
for i in 0..self.data[0].len() {
if let Cell::String(colname) = &self.data[0][i] {
if colname == column {
return Some(i);
}
};
}
None
}
}
fn parse_token(token: &str) -> Cell {
if token == "true" {
return Cell::Bool(true);
}
if token == "false" {
return Cell::Bool(false);
}
if let Ok(i) = token.parse::<i64>() {
return Cell::Int(i);
}
if let Ok(f) = token.parse::<f64>() {
return Cell::Float(f);
}
if token.is_empty() {
return Cell::Null;
}
Cell::String(token.to_string())
}
#[cfg(test)]
mod tests;