use pandrs::error::Result;
use pandrs::{BooleanColumn, Column, Float64Column, Int64Column, OptimizedDataFrame, StringColumn};
mod optimized_io_test_utils;
#[cfg(not(feature = "parquet"))]
use crate::optimized_io_test_utils::ParquetCompression;
#[cfg(feature = "parquet")]
use pandrs::optimized::split_dataframe::io::ParquetCompression;
#[allow(unused_imports)]
use crate::optimized_io_test_utils::{ExcelExt, ParquetExt};
use std::fs;
use std::fs::File;
use std::io::Write;
use tempfile::tempdir;
#[test]
#[allow(clippy::result_large_err)]
fn test_optimized_csv_io() -> Result<()> {
let dir = tempdir().expect("Failed to create temporary directory");
let file_path = dir.path().join("test_data.csv");
let mut df = OptimizedDataFrame::new();
let id_col = Int64Column::new(vec![1, 2, 3, 4, 5]);
df.add_column("id", Column::Int64(id_col))?;
let name_col = StringColumn::new(vec![
"Alice".to_string(),
"Bob".to_string(),
"Charlie".to_string(),
"Dave".to_string(),
"Eve".to_string(),
]);
df.add_column("name", Column::String(name_col))?;
let score_col = Float64Column::new(vec![85.5, 92.0, 78.3, 90.1, 88.7]);
df.add_column("score", Column::Float64(score_col))?;
df.to_csv(&file_path, true)?;
let loaded_df = OptimizedDataFrame::from_csv(&file_path, true)?;
assert_eq!(loaded_df.row_count(), 5);
assert_eq!(loaded_df.column_count(), 3);
assert!(loaded_df.contains_column("id"));
assert!(loaded_df.contains_column("name"));
assert!(loaded_df.contains_column("score"));
let id_view = loaded_df.column("id")?;
if let Some(int_col) = id_view.as_int64() {
assert_eq!(int_col.get(0)?, Some(1));
assert_eq!(int_col.get(4)?, Some(5));
} else {
panic!("Could not get ID column as Int64");
}
assert_eq!(loaded_df.row_count(), 5);
assert_eq!(loaded_df.column_count(), 3);
assert!(loaded_df.contains_column("name"));
assert!(loaded_df.contains_column("id"));
assert!(loaded_df.contains_column("score"));
drop(dir);
Ok(())
}
#[test]
#[allow(clippy::result_large_err)]
fn test_optimized_csv_without_header() -> Result<()> {
let dir = tempdir().expect("Failed to create temporary directory");
let file_path = dir.path().join("test_no_header.csv");
let csv_content = "1,Alice,85.5\n2,Bob,92.0\n3,Charlie,78.3\n";
fs::write(&file_path, csv_content).expect("Failed to write CSV file");
let loaded_df = OptimizedDataFrame::from_csv(&file_path, false)?;
let expected_cols = csv_content.lines().next().unwrap().split(',').count();
println!("Expected column count calculated from CSV content: {expected_cols}");
println!("Actual column count: {}", loaded_df.column_count());
println!("Loaded row count: {}", loaded_df.row_count());
println!("Column names: {:?}", loaded_df.column_names());
let lines_count = csv_content.lines().count();
println!("Line count in CSV: {lines_count}");
assert!(loaded_df.row_count() > 0);
drop(dir);
Ok(())
}
#[test]
#[allow(clippy::result_large_err)]
fn test_optimized_csv_empty_dataframe() -> Result<()> {
let dir = tempdir().expect("Failed to create temporary directory");
let file_path = dir.path().join("test_empty.csv");
let df = OptimizedDataFrame::new();
df.to_csv(&file_path, true)?;
let loaded_df = OptimizedDataFrame::from_csv(&file_path, true)?;
assert!(loaded_df.row_count() <= 1);
drop(dir);
Ok(())
}
#[test]
#[allow(clippy::result_large_err)]
fn test_excel_io() -> Result<()> {
let dir = tempdir()?;
let excel_path = dir.path().join("test_data.xlsx");
let mut df = OptimizedDataFrame::new();
let id_col = Int64Column::new(vec![1, 2, 3, 4, 5]);
df.add_column("id", Column::Int64(id_col))?;
let name_col = StringColumn::new(vec![
"Alice".to_string(),
"Bob".to_string(),
"Charlie".to_string(),
"Dave".to_string(),
"Eve".to_string(),
]);
df.add_column("name", Column::String(name_col))?;
let score_col = Float64Column::new(vec![85.5, 92.0, 78.3, 90.1, 88.7]);
df.add_column("score", Column::Float64(score_col))?;
let active_col = BooleanColumn::new(vec![true, false, true, false, true]);
df.add_column("active", Column::Boolean(active_col))?;
df.to_excel(&excel_path, Some("TestSheet"), false)?;
let loaded_df = OptimizedDataFrame::from_excel(&excel_path, Some("TestSheet"), true, 0, None)?;
assert_eq!(loaded_df.row_count(), 5);
assert!(loaded_df.column_count() > 0);
let column_names = loaded_df.column_names();
println!("Loaded columns from Excel: {column_names:?}");
assert!(!column_names.is_empty());
if loaded_df.column_count() > 0 {
let column_name = loaded_df.column_names()[0].clone();
let column_view = loaded_df.column(&column_name)?;
assert!(
column_view.as_int64().is_some()
|| column_view.as_float64().is_some()
|| column_view.as_string().is_some()
|| column_view.as_boolean().is_some()
);
}
drop(dir);
Ok(())
}
#[test]
#[allow(clippy::result_large_err)]
fn test_parquet_io() -> Result<()> {
let dir = tempdir()?;
let parquet_path = dir.path().join("test_data.parquet");
let mut df = OptimizedDataFrame::new();
let id_col = Int64Column::new(vec![1, 2, 3, 4, 5]);
df.add_column("id", Column::Int64(id_col))?;
let name_col = StringColumn::new(vec![
"Alice".to_string(),
"Bob".to_string(),
"Charlie".to_string(),
"Dave".to_string(),
"Eve".to_string(),
]);
df.add_column("name", Column::String(name_col))?;
let score_col = Float64Column::new(vec![85.5, 92.0, 78.3, 90.1, 88.7]);
df.add_column("score", Column::Float64(score_col))?;
let active_col = BooleanColumn::new(vec![true, false, true, false, true]);
df.add_column("active", Column::Boolean(active_col))?;
df.to_parquet(&parquet_path, Some(ParquetCompression::Snappy))?;
let loaded_df = OptimizedDataFrame::from_parquet(&parquet_path)?;
assert_eq!(loaded_df.row_count(), 5);
assert!(loaded_df.column_count() > 0);
let column_names = loaded_df.column_names();
println!("Loaded columns from Parquet: {column_names:?}");
assert!(!column_names.is_empty());
if loaded_df.column_count() > 0 {
let column_name = loaded_df.column_names()[0].clone();
let column_view = loaded_df.column(&column_name)?;
assert!(
column_view.as_int64().is_some()
|| column_view.as_float64().is_some()
|| column_view.as_string().is_some()
|| column_view.as_boolean().is_some()
);
}
drop(dir);
Ok(())
}
#[test]
#[allow(clippy::result_large_err)]
fn test_csv_parquet_integration() -> Result<()> {
let dir = tempdir()?;
let csv_path = dir.path().join("test_data.csv");
let parquet_path = dir.path().join("test_data.parquet");
let mut file = File::create(&csv_path)?;
writeln!(file, "id,value,name,active")?;
writeln!(file, "1,1.1,Alice,true")?;
writeln!(file, "2,2.2,Bob,false")?;
writeln!(file, "3,3.3,Charlie,true")?;
writeln!(file, "4,4.4,Dave,false")?;
writeln!(file, "5,5.5,Eve,true")?;
file.flush()?;
let loaded_df = OptimizedDataFrame::from_csv(&csv_path, true)?;
assert_eq!(loaded_df.row_count(), 5);
assert!(loaded_df.column_count() > 0);
loaded_df.to_parquet(&parquet_path, Some(ParquetCompression::Gzip))?;
let loaded_df2 = OptimizedDataFrame::from_parquet(&parquet_path)?;
assert_eq!(loaded_df2.row_count(), 5);
assert!(loaded_df2.column_count() > 0);
assert!(loaded_df2.column_count() > 0);
if loaded_df2.column_count() > 0 {
let column_name = loaded_df2.column_names()[0].clone();
let column_view = loaded_df2.column(&column_name)?;
assert!(
column_view.as_int64().is_some()
|| column_view.as_float64().is_some()
|| column_view.as_string().is_some()
|| column_view.as_boolean().is_some()
);
}
drop(dir);
Ok(())
}