use crate::core::{data_value::DataValue, error::Result, index::IndexTrait};
use std::collections::HashMap;
use std::time::Duration;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Axis {
Row = 0,
Column = 1,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DropNaHow {
Any,
All,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FillMethod {
Forward,
Backward,
Interpolate,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum JoinType {
Inner,
Left,
Right,
Outer,
Cross,
}
#[derive(Debug, Clone)]
pub struct DataFrameInfo {
pub rows: usize,
pub columns: usize,
pub column_info: HashMap<String, String>,
pub memory_usage: usize,
pub non_null_counts: HashMap<String, usize>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AggFunc {
Sum,
Mean,
Median,
Std,
Var,
Min,
Max,
Count,
Nunique,
First,
Last,
Custom(String),
}
pub trait DataFrameOps {
type Output: DataFrameOps;
type Error: std::error::Error + Send + Sync + 'static;
fn select(&self, columns: &[&str]) -> Result<Self::Output, Self::Error>;
fn drop(&self, columns: &[&str]) -> Result<Self::Output, Self::Error>;
fn rename(&self, mapping: &HashMap<String, String>) -> Result<Self::Output, Self::Error>;
fn filter<F>(&self, predicate: F) -> Result<Self::Output, Self::Error>
where
F: Fn(&DataValue) -> bool;
fn head(&self, n: usize) -> Result<Self::Output, Self::Error>;
fn tail(&self, n: usize) -> Result<Self::Output, Self::Error>;
fn sample(&self, n: usize, random_state: Option<u64>) -> Result<Self::Output, Self::Error>;
fn sort_values(&self, by: &[&str], ascending: &[bool]) -> Result<Self::Output, Self::Error>;
fn sort_index(&self) -> Result<Self::Output, Self::Error>;
fn shape(&self) -> (usize, usize);
fn columns(&self) -> Vec<String>;
fn dtypes(&self) -> HashMap<String, String>;
fn info(&self) -> DataFrameInfo;
fn dropna(&self, axis: Option<Axis>, how: DropNaHow) -> Result<Self::Output, Self::Error>;
fn fillna(&self, value: &DataValue, method: Option<FillMethod>) -> Result<Self::Output, Self::Error>;
fn isna(&self) -> Result<Self::Output, Self::Error>;
fn map<F>(&self, func: F) -> Result<Self::Output, Self::Error>
where
F: Fn(&DataValue) -> DataValue;
fn apply<F>(&self, func: F, axis: Axis) -> Result<Self::Output, Self::Error>
where
F: Fn(&crate::series::Series) -> DataValue;
}
pub trait DataFrameAdvancedOps: DataFrameOps {
fn merge(&self, other: &Self, on: &[&str], how: JoinType) -> Result<Self::Output, Self::Error>;
fn concat(&self, others: &[&Self], axis: Axis) -> Result<Self::Output, Self::Error>;
fn pivot(&self, index: &[&str], columns: &[&str], values: &[&str]) -> Result<Self::Output, Self::Error>;
fn melt(&self, id_vars: &[&str], value_vars: &[&str]) -> Result<Self::Output, Self::Error>;
fn stack(&self, level: Option<usize>) -> Result<Self::Output, Self::Error>;
fn unstack(&self, level: Option<usize>) -> Result<Self::Output, Self::Error>;
fn rolling(&self, window: usize) -> Result<RollingWindow<Self>, Self::Error>;
fn expanding(&self) -> Result<ExpandingWindow<Self>, Self::Error>;
fn resample(&self, freq: &str) -> Result<Resampler<Self>, Self::Error>;
fn shift(&self, periods: i64) -> Result<Self::Output, Self::Error>;
fn set_index(&self, keys: &[&str]) -> Result<Self::Output, Self::Error>;
fn reset_index(&self, drop: bool) -> Result<Self::Output, Self::Error>;
fn reindex(&self, index: &dyn IndexTrait) -> Result<Self::Output, Self::Error>;
}
pub trait GroupByOps<T: DataFrameOps> {
type GroupByResult: DataFrameOps;
type Error: std::error::Error;
fn sum(&self) -> Result<Self::GroupByResult, Self::Error>;
fn mean(&self) -> Result<Self::GroupByResult, Self::Error>;
fn median(&self) -> Result<Self::GroupByResult, Self::Error>;
fn std(&self) -> Result<Self::GroupByResult, Self::Error>;
fn var(&self) -> Result<Self::GroupByResult, Self::Error>;
fn min(&self) -> Result<Self::GroupByResult, Self::Error>;
fn max(&self) -> Result<Self::GroupByResult, Self::Error>;
fn count(&self) -> Result<Self::GroupByResult, Self::Error>;
fn nunique(&self) -> Result<Self::GroupByResult, Self::Error>;
fn agg(&self, funcs: &[AggFunc]) -> Result<Self::GroupByResult, Self::Error>;
fn transform<F>(&self, func: F) -> Result<T, Self::Error>
where
F: Fn(&T) -> T;
fn apply<F>(&self, func: F) -> Result<Self::GroupByResult, Self::Error>
where
F: Fn(&T) -> DataValue;
fn filter<F>(&self, func: F) -> Result<T, Self::Error>
where
F: Fn(&T) -> bool;
fn groups(&self) -> GroupIterator<T>;
fn get_group(&self, key: &GroupKey) -> Result<T, Self::Error>;
fn describe(&self) -> Result<Self::GroupByResult, Self::Error>;
fn quantile(&self, q: f64) -> Result<Self::GroupByResult, Self::Error>;
}
pub trait IndexingOps {
type Output;
type Error: std::error::Error;
fn iloc(&self, row_indexer: &RowIndexer, col_indexer: &ColIndexer) -> Result<Self::Output, Self::Error>;
fn iloc_scalar(&self, row: usize, col: usize) -> Result<DataValue, Self::Error>;
fn loc(&self, row_indexer: &LabelIndexer, col_indexer: &LabelIndexer) -> Result<Self::Output, Self::Error>;
fn loc_scalar(&self, row_label: &str, col_label: &str) -> Result<DataValue, Self::Error>;
fn mask(&self, mask: &BooleanMask) -> Result<Self::Output, Self::Error>;
fn where_condition<F>(&self, condition: F) -> Result<Self::Output, Self::Error>
where
F: Fn(&DataValue) -> bool;
fn query(&self, expression: &str) -> Result<Self::Output, Self::Error>;
fn eval(&self, expression: &str) -> Result<crate::series::Series, Self::Error>;
fn at(&self, row_label: &str, col_label: &str) -> Result<DataValue, Self::Error>;
fn iat(&self, row: usize, col: usize) -> Result<DataValue, Self::Error>;
}
#[derive(Debug, Clone)]
pub struct RollingWindow<T> {
dataframe: T,
window_size: usize,
}
impl<T> RollingWindow<T> {
pub fn new(dataframe: T, window_size: usize) -> Self {
Self { dataframe, window_size }
}
}
#[derive(Debug, Clone)]
pub struct ExpandingWindow<T> {
dataframe: T,
}
impl<T> ExpandingWindow<T> {
pub fn new(dataframe: T) -> Self {
Self { dataframe }
}
}
#[derive(Debug, Clone)]
pub struct Resampler<T> {
dataframe: T,
frequency: String,
}
impl<T> Resampler<T> {
pub fn new(dataframe: T, frequency: String) -> Self {
Self { dataframe, frequency }
}
}
pub struct GroupIterator<T> {
groups: Vec<(GroupKey, T)>,
current: usize,
}
impl<T> Iterator for GroupIterator<T> {
type Item = (GroupKey, T);
fn next(&mut self) -> Option<Self::Item> {
if self.current < self.groups.len() {
let item = self.groups[self.current].clone();
self.current += 1;
Some(item)
} else {
None
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct GroupKey {
pub values: Vec<DataValue>,
}
#[derive(Debug, Clone)]
pub enum RowIndexer {
Single(usize),
Multiple(Vec<usize>),
Slice { start: Option<usize>, end: Option<usize>, step: Option<usize> },
Mask(Vec<bool>),
}
#[derive(Debug, Clone)]
pub enum ColIndexer {
Single(usize),
Multiple(Vec<usize>),
Slice { start: Option<usize>, end: Option<usize>, step: Option<usize> },
All,
}
#[derive(Debug, Clone)]
pub enum LabelIndexer {
Single(String),
Multiple(Vec<String>),
Slice { start: Option<String>, end: Option<String> },
All,
}
#[derive(Debug, Clone)]
pub struct BooleanMask {
pub mask: Vec<bool>,
}
impl BooleanMask {
pub fn new(mask: Vec<bool>) -> Self {
Self { mask }
}
pub fn len(&self) -> usize {
self.mask.len()
}
pub fn is_empty(&self) -> bool {
self.mask.is_empty()
}
pub fn count_true(&self) -> usize {
self.mask.iter().filter(|&&x| x).count()
}
pub fn count_false(&self) -> usize {
self.mask.iter().filter(|&&x| !x).count()
}
}