#![allow(clippy::result_large_err)]
#[cfg(test)]
mod tests {
use pandrs::dataframe::TransformExt;
use pandrs::{DataFrame, Series};
fn clean_databox_value(value: &str) -> String {
let trimmed = value
.trim_start_matches("DataBox(\"")
.trim_end_matches("\")");
let value_str = if trimmed.starts_with("DataBox(") {
trimmed.trim_start_matches("DataBox(").trim_end_matches(")")
} else {
trimmed
};
value_str.trim_matches('"').to_string()
}
#[test]
fn test_conditional_aggregate() {
let mut df = DataFrame::new();
df.add_column(
"category".to_string(),
Series::new(
vec!["Food", "Electronics", "Food", "Clothing"],
Some("category".to_string()),
)
.unwrap(),
)
.unwrap();
df.add_column(
"sales".to_string(),
Series::new(
vec!["1000", "1500", "800", "1200"],
Some("sales".to_string()),
)
.unwrap(),
)
.unwrap();
let result = df
.conditional_aggregate(
"category",
"sales",
|row| {
if let Some(sales_str) = row.get("sales") {
if let Ok(sales) = sales_str.parse::<i32>() {
return sales >= 1000;
}
}
false
},
|values| {
let sum: i32 = values.iter().filter_map(|v| v.parse::<i32>().ok()).sum();
sum.to_string()
},
)
.unwrap();
assert_eq!(result.column_count(), 2);
assert_eq!(result.row_count(), 3);
let cat_col = result.get_column::<String>("category").unwrap();
let agg_col = result.get_column::<String>("sales_agg").unwrap();
for i in 0..result.row_count() {
let category = clean_databox_value(&cat_col.values()[i].to_string());
let agg_value = clean_databox_value(&agg_col.values()[i].to_string());
if category == "Food" {
assert_eq!(agg_value, "1000"); } else if category == "Electronics" {
assert_eq!(agg_value, "1500");
} else if category == "Clothing" {
assert_eq!(agg_value, "1200");
} else {
panic!("Unexpected category: {category}");
}
}
}
#[test]
fn test_concat() {
let mut df1 = DataFrame::new();
df1.add_column(
"id".to_string(),
Series::new(vec!["1", "2"], Some("id".to_string())).unwrap(),
)
.unwrap();
df1.add_column(
"value".to_string(),
Series::new(vec!["a", "b"], Some("value".to_string())).unwrap(),
)
.unwrap();
let mut df2 = DataFrame::new();
df2.add_column(
"id".to_string(),
Series::new(vec!["3", "4"], Some("id".to_string())).unwrap(),
)
.unwrap();
df2.add_column(
"value".to_string(),
Series::new(vec!["c", "d"], Some("value".to_string())).unwrap(),
)
.unwrap();
let concat_df = DataFrame::concat(&[&df1, &df2], true).unwrap();
assert_eq!(concat_df.column_count(), 2);
assert_eq!(concat_df.row_count(), 4);
let id_col = concat_df.get_column::<String>("id").unwrap();
let value_col = concat_df.get_column::<String>("value").unwrap();
assert_eq!(clean_databox_value(&id_col.values()[0].to_string()), "1");
assert_eq!(clean_databox_value(&value_col.values()[0].to_string()), "a");
assert_eq!(clean_databox_value(&id_col.values()[1].to_string()), "2");
assert_eq!(clean_databox_value(&value_col.values()[1].to_string()), "b");
assert_eq!(clean_databox_value(&id_col.values()[2].to_string()), "3");
assert_eq!(clean_databox_value(&value_col.values()[2].to_string()), "c");
assert_eq!(clean_databox_value(&id_col.values()[3].to_string()), "4");
assert_eq!(clean_databox_value(&value_col.values()[3].to_string()), "d");
}
}