use polars::frame::DataFrame;
use polars::prelude::*;
use polars::series::Series;
pub trait DataFrameTransformer {
fn transform_cols(
&self,
columns: &[&str],
unary_function: impl Fn(&Series) -> Series,
) -> Result<DataFrame, PolarsError>;
fn split(&self, train_size: f64) -> Result<(DataFrame, DataFrame), PolarsError>;
fn z_norm_cols(&self, columns: &[&str]) -> Result<DataFrame, PolarsError>;
fn min_max_norm_cols(&self, columns: &[&str]) -> Result<DataFrame, PolarsError>;
fn get_col_by_index(&self, index: usize) -> Result<Series, PolarsError>;
fn select_rows(df: &DataFrame, indices: Vec<usize>) -> Result<DataFrame, PolarsError>;
}
impl DataFrameTransformer for Result<DataFrame, PolarsError> {
fn transform_cols(
&self,
columns: &[&str],
unary_function: impl Fn(&Series) -> Series,
) -> Result<DataFrame, PolarsError> {
let df: &DataFrame = self.as_ref().unwrap();
df.transform_cols(columns, unary_function)
}
fn split(&self, train_size: f64) -> Result<(DataFrame, DataFrame), PolarsError> {
let df: &DataFrame = self.as_ref().unwrap();
df.split(train_size)
}
fn z_norm_cols(&self, columns: &[&str]) -> Result<DataFrame, PolarsError> {
let df: &DataFrame = self.as_ref().unwrap();
df.z_norm_cols(columns)
}
fn min_max_norm_cols(&self, columns: &[&str]) -> Result<DataFrame, PolarsError> {
let df: &DataFrame = self.as_ref().unwrap();
df.min_max_norm_cols(columns)
}
fn get_col_by_index(&self, index: usize) -> Result<Series, PolarsError> {
let df: &DataFrame = self.as_ref().unwrap();
df.get_col_by_index(index)
}
fn select_rows(df: &DataFrame, indices: Vec<usize>) -> Result<DataFrame, PolarsError> {
DataFrame::select_rows(df, indices)
}
}
impl DataFrameTransformer for DataFrame {
fn transform_cols(
&self,
columns: &[&str],
unary_function: impl Fn(&Series) -> Series,
) -> Result<DataFrame, PolarsError> {
let mut df: DataFrame = self.clone();
for col in columns {
let series: &Series = self.column(col)?;
let transformed_series: Series = unary_function(series);
df.with_column(transformed_series)?;
}
Ok(df)
}
fn split(&self, train_size: f64) -> Result<(DataFrame, DataFrame), PolarsError> {
let num_rows: usize = self.height();
let train_num_rows: i64 = (num_rows as f64 * train_size) as i64;
let train: DataFrame = self.slice(0, train_num_rows as usize);
let test: DataFrame = self.slice(train_num_rows, num_rows);
Ok((train, test))
}
fn z_norm_cols(&self, columns: &[&str]) -> Result<DataFrame, PolarsError> {
let mut df: DataFrame = self.clone();
for col in columns {
let series: &Series = self.column(col)?;
let mean: f64 = series.mean().unwrap();
let std: f64 = if let AnyValue::Float64(value) = series.std_as_series(0).get(0).unwrap()
{
value
} else {
panic!("Standard deviation is not F64");
};
let transformed_series: Series = (series - mean) / std;
df.with_column(transformed_series)?;
}
Ok(df)
}
fn min_max_norm_cols(&self, columns: &[&str]) -> Result<DataFrame, PolarsError> {
let mut df: DataFrame = self.clone();
for col in columns {
let series: &Series = self.column(col)?;
let min: f64 = series.min().unwrap();
let max: f64 = series.max().unwrap();
let transformed_series: Series = (series - min) / (max - min);
df.with_column(transformed_series)?;
}
Ok(df)
}
fn get_col_by_index(&self, index: usize) -> Result<Series, PolarsError> {
let series: &[Series] = self.get_columns();
Ok(series[index].clone())
}
fn select_rows(df: &DataFrame, indices: Vec<usize>) -> Result<DataFrame, PolarsError> {
let mut mask: Vec<bool> = vec![false; df.height()];
for index in indices {
mask[index] = true;
}
let chunked_array_mask: ChunkedArray<BooleanType> = ChunkedArray::new("mask", mask).into();
let selected_df: DataFrame = df.filter(&chunked_array_mask)?;
Ok(selected_df)
}
}
pub mod data_loader_util {
use polars::frame::DataFrame;
use polars::prelude::{CsvReader, PolarsError, SerReader};
use std::path::Path;
pub fn load_csv(path: &Path) -> Result<DataFrame, PolarsError> {
let path: &Path = Path::new(path);
let df: DataFrame = CsvReader::from_path(path)?
.has_header(true)
.finish()
.unwrap();
Ok(df)
}
}
pub mod transformer_functions {
use polars::prelude::*;
use polars::series::Series;
pub fn identity() -> impl Fn(&Series) -> Series {
move |series: &Series| series.clone()
}
pub fn power(power: f64) -> impl Fn(&Series) -> Series {
return move |series: &Series| {
let s_power: Series = series
.f64()
.expect("series was not an f64 dtype")
.apply(|value| value.map(|value| value.powf(power)))
.into();
s_power
};
}
pub fn log(base: f64) -> impl Fn(&Series) -> Series {
return move |series: &Series| {
let s_log: Series = series
.f64()
.expect("series was not an f64 dtype")
.apply(|value| value.map(|value| value.log(base)))
.into();
s_log
};
}
}