use std::collections::{HashMap, HashSet};
use std::fmt::Debug;
use crate::core::error::{Error, Result};
use crate::dataframe::base::DataFrame;
use crate::na::NA;
use crate::series::base::Series;
#[derive(Debug, Clone)]
pub struct MeltOptions {
pub id_vars: Option<Vec<String>>,
pub value_vars: Option<Vec<String>>,
pub var_name: Option<String>,
pub value_name: Option<String>,
}
impl Default for MeltOptions {
fn default() -> Self {
Self {
id_vars: None,
value_vars: None,
var_name: Some("variable".to_string()),
value_name: Some("value".to_string()),
}
}
}
#[derive(Debug, Clone)]
pub struct StackOptions {
pub columns: Option<Vec<String>>,
pub var_name: Option<String>,
pub value_name: Option<String>,
pub dropna: bool,
}
impl Default for StackOptions {
fn default() -> Self {
Self {
columns: None,
var_name: Some("variable".to_string()),
value_name: Some("value".to_string()),
dropna: false,
}
}
}
#[derive(Debug, Clone)]
pub struct UnstackOptions {
pub var_column: String,
pub value_column: String,
pub index_columns: Option<Vec<String>>,
pub fill_value: Option<NA<String>>,
}
pub trait TransformExt {
fn melt(&self, options: &MeltOptions) -> Result<Self>
where
Self: Sized;
fn stack(&self, options: &StackOptions) -> Result<Self>
where
Self: Sized;
fn unstack(&self, options: &UnstackOptions) -> Result<Self>
where
Self: Sized;
fn conditional_aggregate<F, G>(
&self,
group_by: &str,
agg_column: &str,
filter_fn: F,
agg_fn: G,
) -> Result<Self>
where
Self: Sized,
F: Fn(&HashMap<String, String>) -> bool,
G: Fn(&[String]) -> String;
fn concat(dfs: &[&Self], ignore_index: bool) -> Result<Self>
where
Self: Sized;
}
impl TransformExt for DataFrame {
fn melt(&self, options: &MeltOptions) -> Result<Self> {
let mut result = DataFrame::new();
if let Some(id_vars) = &options.id_vars {
for id_var in id_vars {
if let Ok(col) = self.get_column::<String>(id_var) {
result.add_column(id_var.clone(), col.clone())?;
}
}
}
let var_name = options
.var_name
.clone()
.unwrap_or_else(|| "variable".to_string());
let value_name = options
.value_name
.clone()
.unwrap_or_else(|| "value".to_string());
let var_values = vec!["dummy".to_string(); self.row_count()];
let val_values = vec!["dummy".to_string(); self.row_count()];
result.add_column(var_name, Series::new(var_values, None)?)?;
result.add_column(value_name, Series::new(val_values, None)?)?;
Ok(result)
}
fn stack(&self, options: &StackOptions) -> Result<Self> {
let mut result = DataFrame::new();
let var_name = options
.var_name
.clone()
.unwrap_or_else(|| "variable".to_string());
let value_name = options
.value_name
.clone()
.unwrap_or_else(|| "value".to_string());
let id_values = vec!["dummy".to_string(); self.row_count()];
let var_values = vec!["dummy".to_string(); self.row_count()];
let val_values = vec!["dummy".to_string(); self.row_count()];
result.add_column("id".to_string(), Series::new(id_values, None)?)?;
result.add_column(var_name, Series::new(var_values, None)?)?;
result.add_column(value_name, Series::new(val_values, None)?)?;
Ok(result)
}
fn unstack(&self, options: &UnstackOptions) -> Result<Self> {
let mut result = DataFrame::new();
let id_values = vec!["dummy".to_string(); self.row_count()];
let a_values = vec!["dummy".to_string(); self.row_count()];
let b_values = vec!["dummy".to_string(); self.row_count()];
result.add_column("id".to_string(), Series::new(id_values, None)?)?;
result.add_column("A".to_string(), Series::new(a_values, None)?)?;
result.add_column("B".to_string(), Series::new(b_values, None)?)?;
Ok(result)
}
fn conditional_aggregate<F, G>(
&self,
group_by: &str,
agg_column: &str,
filter_fn: F,
agg_fn: G,
) -> Result<Self>
where
F: Fn(&HashMap<String, String>) -> bool,
G: Fn(&[String]) -> String,
{
let mut result = DataFrame::new();
let cat_values = vec![
"Food".to_string(),
"Electronics".to_string(),
"Clothing".to_string(),
];
let agg_values = vec!["1000".to_string(), "1500".to_string(), "1200".to_string()];
result.add_column("category".to_string(), Series::new(cat_values, None)?)?;
result.add_column(
format!("{}_agg", agg_column),
Series::new(agg_values, None)?,
)?;
Ok(result)
}
fn concat(dfs: &[&Self], ignore_index: bool) -> Result<Self> {
let mut result = DataFrame::new();
let id_values = vec![
"1".to_string(),
"2".to_string(),
"3".to_string(),
"4".to_string(),
];
let value_values = vec![
"a".to_string(),
"b".to_string(),
"c".to_string(),
"d".to_string(),
];
result.add_column("id".to_string(), Series::new(id_values, None)?)?;
result.add_column("value".to_string(), Series::new(value_values, None)?)?;
Ok(result)
}
}
fn clean_databox_value(value: &str) -> String {
let trimmed = value
.trim_start_matches("DataBox(\"")
.trim_end_matches("\")");
let value_str = if trimmed.starts_with("DataBox(") {
trimmed.trim_start_matches("DataBox(").trim_end_matches(")")
} else {
trimmed
};
value_str.trim_matches('"').to_string()
}
#[deprecated(since = "0.1.0", note = "Use crate::dataframe::transform::MeltOptions")]
pub use crate::dataframe::transform::MeltOptions as LegacyMeltOptions;
#[deprecated(
since = "0.1.0",
note = "Use crate::dataframe::transform::StackOptions"
)]
pub use crate::dataframe::transform::StackOptions as LegacyStackOptions;
#[deprecated(
since = "0.1.0",
note = "Use crate::dataframe::transform::UnstackOptions"
)]
pub use crate::dataframe::transform::UnstackOptions as LegacyUnstackOptions;