use csv_processor::frame::DataFrameError;
use csv_processor::*;
#[test]
fn test_empty_dataframe_shape() {
let df = DataFrame::empty();
assert_eq!(df.shape(), (0, 0));
}
#[test]
fn test_load_df_from_file() {
let df = DataFrame::from_csv("samples/sample.csv").unwrap();
assert_eq!(df.shape(), (10, 8))
}
#[test]
#[should_panic]
fn test_load_df_from_wrong_path() {
DataFrame::from_csv("samples/not_exists.csv").unwrap();
}
#[test]
fn test_headers_and_cols_correct_shape() {
let headers = vec!["a".to_string(), "b".to_string()];
let cols = vec![
vec!["a1".to_string(), "a2".to_string(), "a3".to_string()],
vec!["b1".to_string(), "b2".to_string(), "b3".to_string()],
];
let df = DataFrame::new(Some(headers), cols).unwrap();
assert_eq!(df.shape(), (3, 2))
}
#[test]
fn test_no_headers_correct_shape() {
let cols = vec![
vec!["a1".to_string(), "a2".to_string(), "a3".to_string()],
vec!["b1".to_string(), "b2".to_string(), "b3".to_string()],
];
let df = DataFrame::new(None, cols).unwrap();
assert_eq!(df.shape(), (3, 2))
}
#[test]
#[should_panic]
fn test_headers_and_cols_wrong_shape() {
let headers = vec!["a".to_string(), "b".to_string()];
let cols = vec![
vec!["a1".to_string(), "a2".to_string(), "a3".to_string()],
vec!["b1".to_string(), "b2".to_string(), "b3".to_string()],
vec!["c1".to_string(), "c2".to_string(), "c3".to_string()],
];
DataFrame::new(Some(headers), cols).unwrap();
}
#[test]
#[should_panic]
fn test_cols_wrong_shape() {
let headers = vec!["a".to_string(), "b".to_string()];
let cols = vec![
vec!["a1".to_string(), "a2".to_string(), "a3".to_string()],
vec!["b1".to_string(), "b2".to_string()],
];
DataFrame::new(Some(headers), cols).unwrap();
}
#[test]
fn test_dataframe_display_output() {
let headers = vec!["name".to_string(), "age".to_string()];
let cols = vec![
vec!["Alice".to_string(), "Bob".to_string()],
vec!["25".to_string(), "30".to_string()],
];
let df = DataFrame::new(Some(headers), cols).unwrap();
let output = format!("{}", df);
assert!(output.contains("name"));
assert!(output.contains("Alice"));
}
#[test]
fn test_get_column_valid_index() {
let cols = vec![
vec!["1".to_string(), "2".to_string()],
vec!["a".to_string(), "b".to_string()],
];
let df = DataFrame::new(None, cols).unwrap();
assert!(df.get_column(0).is_some());
assert!(df.get_column(1).is_some());
}
#[test]
fn test_get_column_invalid_index() {
let cols = vec![vec!["1".to_string(), "2".to_string()]];
let df = DataFrame::new(None, cols).unwrap();
assert!(df.get_column(5).is_none());
}
#[test]
fn test_integer_column_statistics() {
let cols: Vec<Vec<i64>> = vec![vec![1, 2, 3]];
let df = DataFrame::new(None, cols).unwrap();
let column = df.get_column(0).unwrap();
assert_eq!(column.sum(), Some(6.0));
assert_eq!(column.mean(), Some(2.0));
}
#[test]
fn test_mixed_type_column() {
let cols = vec![vec!["1".to_string(), "hello".to_string(), "3".to_string()]];
let df = DataFrame::from_values(None, cols).unwrap();
let column = df.get_column(0).unwrap();
assert_eq!(column.sum(), None);
}
#[test]
fn test_column_with_nulls() {
let cols = vec![vec![Some(1), Some(2), None, Some(4)]];
let df = DataFrame::new(None, cols).unwrap();
let column = df.get_column(0).unwrap();
assert_eq!(column.null_count(), 1);
assert_eq!(column.len(), 4);
assert_eq!(df.shape(), (4, 1));
}
#[test]
fn test_float_column_with_nan() {
let cols = vec![vec![
"1.5".to_string(),
"NaN".to_string(),
"2.5".to_string(),
]];
let df = DataFrame::from_values(None, cols).unwrap();
let column = df.get_column(0).unwrap();
assert_eq!(column.sum(), Some(4.0)); }
#[test]
fn test_boolean_column_operations() {
let cols = vec![vec![
"true".to_string(),
"false".to_string(),
"true".to_string(),
]];
let df = DataFrame::from_values(None, cols).unwrap();
let column = df.get_column(0).unwrap();
assert_eq!(column.sum(), Some(2.0)); }
#[test]
fn test_single_row_dataframe() {
let cols = vec![vec!["single".to_string()]];
let df = DataFrame::new(None, cols).unwrap();
assert_eq!(df.shape(), (1, 1));
}
#[test]
fn test_empty_columns() {
let cols: Vec<Vec<String>> = vec![vec![], vec![]];
let df = DataFrame::new(None, cols).unwrap();
assert_eq!(df.shape(), (0, 2));
}
#[test]
fn test_typed_integer_column() {
let cols: Vec<Vec<i64>> = vec![vec![1, 2, 3]];
let df = DataFrame::new(None, cols).unwrap();
let col = df.get_column(0).unwrap();
assert_eq!(col.sum(), Some(6.0));
}
#[test]
fn test_typed_float_column() {
let cols = vec![vec![1.5, 2.5, 3.5]];
let df = DataFrame::new(None, cols).unwrap();
let col = df.get_column(0).unwrap();
assert_eq!(col.sum(), Some(7.5));
}
#[test]
fn test_typed_boolean_column() {
let cols = vec![vec![true, false, true]];
let df = DataFrame::new(None, cols).unwrap();
let col = df.get_column(0).unwrap();
assert_eq!(col.sum(), Some(2.0)); }
#[test]
fn test_create_df_from_homog_columns() {
let col1: Vec<i64> = vec![1, 2, 3];
let col2: Vec<i64> = vec![4, 5, 6];
let col3: Vec<i64> = vec![7, 8, 9];
let col4: Vec<i64> = vec![10, 11, 12];
let columns: Vec<Box<dyn ColumnArray>> =
vec![col1.into(), col2.into(), col3.into(), col4.into()];
let df = DataFrame::from_columns(None, columns).unwrap();
assert_eq!(df.shape(), (3, 4));
assert_eq!(df.get_column(0).unwrap().mean().unwrap(), 2.0);
}
#[test]
fn test_create_df_from_heter_columns() {
let col0: Vec<i64> = vec![1, 2, 3];
let col1 = vec![4.0, 5.0, 6.0];
let col2 = vec!["7".to_string(), "8".to_string(), "9".to_string()];
let col3 = vec![true, false, true];
let col4 = vec![Some(true), Some(false), None];
let columns: Vec<Box<dyn ColumnArray>> = vec![
col0.into(),
col1.into(),
col2.into(),
col3.into(),
col4.into(),
];
let df = DataFrame::from_columns(None, columns).unwrap();
assert_eq!(df.shape(), (3, 5));
assert_eq!(df.get_column(0).unwrap().mean().unwrap(), 2.0);
assert_eq!(df.get_column(3).unwrap().sum().unwrap(), 2.0);
assert_eq!(df.get_column(4).unwrap().null_count(), 1);
}
#[test]
fn test_from_strings_basic_csv() {
let csv_content = "name,age,salary\nAlice,28,75000.5\nBob,35,65000\nCharlie,42,82000";
let df = DataFrame::from_strings(csv_content).unwrap();
assert_eq!(df.shape(), (3, 3));
assert_eq!(df.headers(), &["name", "age", "salary"]);
let name_col = df.get_column(0).unwrap();
assert_eq!(name_col.sum(), None);
let age_col = df.get_column(1).unwrap();
assert_eq!(age_col.sum(), Some(105.0)); assert_eq!(age_col.mean(), Some(35.0));
let salary_col = df.get_column(2).unwrap();
assert_eq!(salary_col.sum(), Some(222000.5)); }
#[test]
fn test_from_strings_with_nulls() {
let csv_content = "id,name,active\n1,Alice,true\n2,,false\n3,Charlie,";
let df = DataFrame::from_strings(csv_content).unwrap();
assert_eq!(df.shape(), (3, 3));
let name_col = df.get_column(1).unwrap();
assert_eq!(name_col.null_count(), 1);
let active_col = df.get_column(2).unwrap();
assert_eq!(active_col.null_count(), 1); }
#[test]
fn test_from_strings_boolean_column() {
let csv_content = "flag,status\ntrue,active\nfalse,inactive\ntrue,active";
let df = DataFrame::from_strings(csv_content).unwrap();
assert_eq!(df.shape(), (3, 2));
let flag_col = df.get_column(0).unwrap();
assert_eq!(flag_col.sum(), Some(2.0)); assert_eq!(flag_col.mean(), Some(2.0 / 3.0)); }
#[test]
fn test_from_strings_mixed_types() {
let csv_content =
"id,score,name,passed\n1,95.5,Alice,true\n2,87.2,Bob,false\n3,91.8,Charlie,true";
let df = DataFrame::from_strings(csv_content).unwrap();
assert_eq!(df.shape(), (3, 4));
let id_col = df.get_column(0).unwrap();
assert_eq!(id_col.sum(), Some(6.0));
let score_col = df.get_column(1).unwrap();
assert_eq!(score_col.sum(), Some(274.5));
let name_col = df.get_column(2).unwrap();
assert_eq!(name_col.sum(), None);
let passed_col = df.get_column(3).unwrap();
assert_eq!(passed_col.sum(), Some(2.0)); }
#[test]
fn test_from_strings_single_row() {
let csv_content = "col1,col2\nvalue1,42";
let df = DataFrame::from_strings(csv_content).unwrap();
assert_eq!(df.shape(), (1, 2));
assert_eq!(df.headers(), &["col1", "col2"]);
}
#[test]
fn test_from_strings_headers_only() {
let csv_content = "col1,col2,col3";
let df = DataFrame::from_strings(csv_content).unwrap();
assert_eq!(df.shape(), (0, 0));
assert_eq!(df.headers(), &["col1", "col2", "col3"]);
}
#[test]
fn test_from_strings_malformed_csv() {
let csv_content = "col1,col2\nvalue1,value2\nextra_value,missing,column";
let result = DataFrame::from_strings(csv_content);
assert!(result.is_err());
match result {
Err(DataFrameError::RowLengthMismatch {
index,
expected,
actual,
}) => {
assert_eq!(index, 2); assert_eq!(expected, 2);
assert_eq!(actual, 3);
}
Err(DataFrameError::CsvError(_)) => {
}
other => panic!("Expected RowLengthMismatch or CsvError, got: {:?}", other),
}
}
#[test]
fn test_from_strings_empty_content() {
let csv_content = "";
let result = DataFrame::from_strings(csv_content);
match result {
Ok(df) => {
assert_eq!(df.shape(), (0, 0));
}
Err(DataFrameError::CsvError(_)) => {
}
other => panic!("Expected success or CsvError, got: {:?}", other),
}
}