#![allow(clippy::result_large_err)]
use pandrs::dataframe::apply::Axis;
use pandrs::dataframe::ApplyExt;
use pandrs::DataFrame;
use std::collections::HashMap;
#[test]
fn test_dataframe_apply() {
let mut df = DataFrame::new();
let col1 = ["1", "2", "3"].iter().map(|s| s.to_string()).collect();
let col2 = ["4", "5", "6"].iter().map(|s| s.to_string()).collect();
let series1 = pandrs::Series::new(col1, Some("col1".to_string())).unwrap();
let series2 = pandrs::Series::new(col2, Some("col2".to_string())).unwrap();
df.add_column("col1".to_string(), series1).unwrap();
df.add_column("col2".to_string(), series2).unwrap();
let result = df
.apply(
|col| col.get(0).unwrap().clone(),
Axis::Column,
Some("first_elem".to_string()),
)
.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result.name().unwrap(), "first_elem");
}
#[test]
fn test_dataframe_applymap() {
let mut df = DataFrame::new();
let col1 = ["1", "2", "3"].iter().map(|s| s.to_string()).collect();
let col2 = ["4", "5", "6"].iter().map(|s| s.to_string()).collect();
let series1 = pandrs::Series::new(col1, Some("col1".to_string())).unwrap();
let series2 = pandrs::Series::new(col2, Some("col2".to_string())).unwrap();
df.add_column("col1".to_string(), series1).unwrap();
df.add_column("col2".to_string(), series2).unwrap();
let result = df
.applymap(|x| (x.parse::<i32>().unwrap_or(0) * 2).to_string())
.unwrap();
assert_eq!(result.column_names(), df.column_names());
assert!(result.contains_column("col1"));
assert!(result.contains_column("col2"));
assert_eq!(result.row_count(), 3);
assert_eq!(result.column_count(), 2);
}
#[test]
fn test_dataframe_mask() {
let mut df = DataFrame::new();
let col1 = ["1", "2", "3"].iter().map(|s| s.to_string()).collect();
let col2 = ["4", "5", "6"].iter().map(|s| s.to_string()).collect();
let series1 = pandrs::Series::new(col1, Some("col1".to_string())).unwrap();
let series2 = pandrs::Series::new(col2, Some("col2".to_string())).unwrap();
df.add_column("col1".to_string(), series1).unwrap();
df.add_column("col2".to_string(), series2).unwrap();
let result = df
.mask(|x| x.parse::<i32>().unwrap_or(0) >= 2, "X")
.unwrap();
assert_eq!(result.row_count(), df.row_count());
assert_eq!(result.column_count(), df.column_count());
assert!(result.contains_column("col1"));
assert!(result.contains_column("col2"));
}
#[test]
fn test_dataframe_where_func() {
let mut df = DataFrame::new();
let col1 = ["1", "2", "3"].iter().map(|s| s.to_string()).collect();
let col2 = ["4", "5", "6"].iter().map(|s| s.to_string()).collect();
let series1 = pandrs::Series::new(col1, Some("col1".to_string())).unwrap();
let series2 = pandrs::Series::new(col2, Some("col2".to_string())).unwrap();
df.add_column("col1".to_string(), series1).unwrap();
df.add_column("col2".to_string(), series2).unwrap();
let result = df
.where_func(|x| x.parse::<i32>().unwrap_or(0) >= 3, "X")
.unwrap();
assert_eq!(result.row_count(), df.row_count());
assert_eq!(result.column_count(), df.column_count());
assert!(result.contains_column("col1"));
assert!(result.contains_column("col2"));
}
#[test]
fn test_dataframe_replace() {
let mut df = DataFrame::new();
let col1 = ["a", "b", "c"].iter().map(|s| s.to_string()).collect();
let col2 = ["b", "c", "d"].iter().map(|s| s.to_string()).collect();
let series1 = pandrs::Series::new(col1, Some("col1".to_string())).unwrap();
let series2 = pandrs::Series::new(col2, Some("col2".to_string())).unwrap();
df.add_column("col1".to_string(), series1).unwrap();
df.add_column("col2".to_string(), series2).unwrap();
let mut replace_map = HashMap::new();
replace_map.insert("a".to_string(), "X".to_string());
replace_map.insert("c".to_string(), "Y".to_string());
let result = df.replace(&replace_map).unwrap();
assert_eq!(result.row_count(), df.row_count());
assert_eq!(result.column_count(), df.column_count());
assert!(result.contains_column("col1"));
assert!(result.contains_column("col2"));
}
#[test]
fn test_dataframe_duplicated() {
let mut df = DataFrame::new();
let col1 = ["a", "b", "a", "c"].iter().map(|s| s.to_string()).collect();
let col2 = ["1", "2", "1", "3"].iter().map(|s| s.to_string()).collect();
let series1 = pandrs::Series::new(col1, Some("col1".to_string())).unwrap();
let series2 = pandrs::Series::new(col2, Some("col2".to_string())).unwrap();
df.add_column("col1".to_string(), series1).unwrap();
df.add_column("col2".to_string(), series2).unwrap();
let duplicated_first = df.duplicated(None, Some("first")).unwrap();
assert_eq!(duplicated_first.len(), 4);
assert!(!(*duplicated_first.get(0).unwrap())); assert!(!(*duplicated_first.get(1).unwrap())); assert!(*duplicated_first.get(2).unwrap()); assert!(!(*duplicated_first.get(3).unwrap()));
let duplicated_last = df.duplicated(None, Some("last")).unwrap();
assert_eq!(duplicated_last.len(), 4);
assert!(*duplicated_last.get(0).unwrap()); assert!(!(*duplicated_last.get(1).unwrap())); assert!(!(*duplicated_last.get(2).unwrap())); assert!(!(*duplicated_last.get(3).unwrap())); }
#[test]
fn test_dataframe_drop_duplicates() {
let mut df = DataFrame::new();
let col1 = ["a", "b", "a", "c"].iter().map(|s| s.to_string()).collect();
let col2 = ["1", "2", "1", "3"].iter().map(|s| s.to_string()).collect();
let series1 = pandrs::Series::new(col1, Some("col1".to_string())).unwrap();
let series2 = pandrs::Series::new(col2, Some("col2".to_string())).unwrap();
df.add_column("col1".to_string(), series1).unwrap();
df.add_column("col2".to_string(), series2).unwrap();
let deduped_first = df.drop_duplicates(None, Some("first")).unwrap();
assert_eq!(deduped_first.row_count(), 3); assert_eq!(deduped_first.column_count(), df.column_count());
assert!(deduped_first.contains_column("col1"));
assert!(deduped_first.contains_column("col2"));
let deduped_last = df.drop_duplicates(None, Some("last")).unwrap();
assert_eq!(deduped_last.row_count(), 3); assert_eq!(deduped_last.column_count(), df.column_count());
assert!(deduped_last.contains_column("col1"));
assert!(deduped_last.contains_column("col2"));
}
#[test]
fn test_duplicated_with_subset() {
let mut df = DataFrame::new();
let col1 = ["a", "b", "a", "c"].iter().map(|s| s.to_string()).collect();
let col2 = ["1", "2", "3", "4"].iter().map(|s| s.to_string()).collect();
let series1 = pandrs::Series::new(col1, Some("col1".to_string())).unwrap();
let series2 = pandrs::Series::new(col2, Some("col2".to_string())).unwrap();
df.add_column("col1".to_string(), series1).unwrap();
df.add_column("col2".to_string(), series2).unwrap();
let subset = ["col1".to_string()];
let duplicated = df.duplicated(Some(&subset), Some("first")).unwrap();
assert_eq!(duplicated.len(), 4);
assert!(!(*duplicated.get(0).unwrap())); assert!(!(*duplicated.get(1).unwrap())); assert!(*duplicated.get(2).unwrap()); assert!(!(*duplicated.get(3).unwrap())); }