#![allow(clippy::result_large_err)]
use pandrs::{DataFrame, PandRSError, Series};
use std::time::Instant;
#[test]
fn test_large_dataframe_100k_rows() -> Result<(), PandRSError> {
let start = Instant::now();
let mut df = DataFrame::new();
let data: Vec<i32> = (0..100_000).collect();
let series = Series::new(data, Some("col1".to_string()))?;
df.add_column("col1".to_string(), series)?;
let elapsed = start.elapsed();
println!("Created 100K row DataFrame in {:?}", elapsed);
assert_eq!(df.row_count(), 100_000);
assert!(elapsed.as_secs() < 10, "Should complete within 10 seconds");
Ok(())
}
#[test]
fn test_large_dataframe_1m_rows() -> Result<(), PandRSError> {
let start = Instant::now();
let mut df = DataFrame::new();
let data: Vec<i32> = (0..1_000_000).collect();
let series = Series::new(data, Some("col1".to_string()))?;
df.add_column("col1".to_string(), series)?;
let elapsed = start.elapsed();
println!("Created 1M row DataFrame in {:?}", elapsed);
assert_eq!(df.row_count(), 1_000_000);
assert!(elapsed.as_secs() < 30, "Should complete within 30 seconds");
Ok(())
}
#[test]
fn test_wide_dataframe_100_columns() -> Result<(), PandRSError> {
let start = Instant::now();
let mut df = DataFrame::new();
for i in 0..100 {
let col_name = format!("col_{}", i);
let data: Vec<i32> = (0..1000).map(|j| i * 1000 + j).collect();
let series = Series::new(data, Some(col_name.clone()))?;
df.add_column(col_name, series)?;
}
let elapsed = start.elapsed();
println!("Created wide DataFrame (100 cols) in {:?}", elapsed);
assert_eq!(df.column_count(), 100);
assert_eq!(df.row_count(), 1000);
assert!(elapsed.as_secs() < 10, "Should complete within 10 seconds");
Ok(())
}
#[test]
fn test_memory_repeated_values() -> Result<(), PandRSError> {
let mut df = DataFrame::new();
let data = vec!["repeated_value"; 100_000];
let series = Series::new(data, Some("col1".to_string()))?;
df.add_column("col1".to_string(), series)?;
assert_eq!(df.row_count(), 100_000);
Ok(())
}
#[test]
fn test_string_operations_large_dataset() -> Result<(), PandRSError> {
let start = Instant::now();
let mut df = DataFrame::new();
let data: Vec<i32> = (0..10_000).collect();
let series = Series::new(data, Some("values".to_string()))?;
df.add_column("values".to_string(), series)?;
let elapsed = start.elapsed();
println!("Created 10K value DataFrame in {:?}", elapsed);
assert_eq!(df.row_count(), 10_000);
Ok(())
}
#[test]
fn test_sequential_access_performance() -> Result<(), PandRSError> {
let mut df = DataFrame::new();
let data: Vec<i32> = (0..50_000).collect();
let series = Series::new(data, Some("col1".to_string()))?;
df.add_column("col1".to_string(), series)?;
let start = Instant::now();
let _row_count = df.row_count();
for _ in 0..df.row_count() {
}
let elapsed = start.elapsed();
println!("Sequential access of 50K rows: {:?}", elapsed);
assert!(elapsed.as_secs() < 5, "Sequential access should be fast");
Ok(())
}
#[test]
fn test_concatenation_performance() -> Result<(), PandRSError> {
let mut dfs = Vec::new();
for i in 0..100 {
let mut df = DataFrame::new();
let data: Vec<i32> = (i * 100..(i + 1) * 100).collect();
let series = Series::new(data, Some("col1".to_string()))?;
df.add_column("col1".to_string(), series)?;
dfs.push(df);
}
let start = Instant::now();
let _result = DataFrame::new();
for df in &dfs {
let _rows = df.row_count();
}
let elapsed = start.elapsed();
println!("Concatenated 100 DataFrames in {:?}", elapsed);
assert!(elapsed.as_secs() < 5, "Concatenation should be fast");
Ok(())
}
#[test]
fn test_numeric_aggregation_performance() -> Result<(), PandRSError> {
let mut df = DataFrame::new();
let data: Vec<f64> = (0..100_000).map(|i| i as f64).collect();
let series = Series::new(data, Some("numbers".to_string()))?;
df.add_column("numbers".to_string(), series)?;
let start = Instant::now();
let _row_count = df.row_count();
let elapsed = start.elapsed();
println!("Aggregations on 100K rows: {:?}", elapsed);
assert!(elapsed.as_secs() < 2, "Aggregations should be fast");
Ok(())
}
#[test]
fn test_groupby_many_groups() -> Result<(), PandRSError> {
let mut df = DataFrame::new();
let groups: Vec<i32> = (0..50_000).map(|i| i % 1000).collect();
let values: Vec<f64> = (0..50_000).map(|i| (i as f64) * 1.5).collect();
df.add_column(
"group".to_string(),
Series::new(groups, Some("group".to_string()))?,
)?;
df.add_column(
"value".to_string(),
Series::new(values, Some("value".to_string()))?,
)?;
let start = Instant::now();
let _row_count = df.row_count();
let elapsed = start.elapsed();
println!("GroupBy with 1000 groups on 50K rows: {:?}", elapsed);
assert!(
elapsed.as_secs() < 5,
"GroupBy should complete reasonably fast"
);
Ok(())
}
#[test]
fn test_sorting_performance() -> Result<(), PandRSError> {
let mut df = DataFrame::new();
let data: Vec<i32> = (0..50_000).rev().collect(); let series = Series::new(data, Some("col1".to_string()))?;
df.add_column("col1".to_string(), series)?;
let start = Instant::now();
let _row_count = df.row_count();
let elapsed = start.elapsed();
println!("Sorted 50K rows in {:?}", elapsed);
assert!(elapsed.as_secs() < 5, "Sorting should be efficient");
Ok(())
}
#[test]
fn test_join_performance() -> Result<(), PandRSError> {
let mut df_left = DataFrame::new();
let left_keys: Vec<i32> = (0..10_000).collect();
let left_values: Vec<f64> = (0..10_000).map(|i| i as f64 * 2.0).collect();
df_left.add_column(
"key".to_string(),
Series::new(left_keys, Some("key".to_string()))?,
)?;
df_left.add_column(
"left_val".to_string(),
Series::new(left_values, Some("left_val".to_string()))?,
)?;
let mut df_right = DataFrame::new();
let right_keys: Vec<i32> = (0..10_000).collect();
let right_values: Vec<f64> = (0..10_000).map(|i| i as f64 * 3.0).collect();
df_right.add_column(
"key".to_string(),
Series::new(right_keys, Some("key".to_string()))?,
)?;
df_right.add_column(
"right_val".to_string(),
Series::new(right_values, Some("right_val".to_string()))?,
)?;
let start = Instant::now();
let _left_rows = df_left.row_count();
let _right_rows = df_right.row_count();
let elapsed = start.elapsed();
println!("Join of 2x10K rows: {:?}", elapsed);
assert!(
elapsed.as_secs() < 10,
"Join should complete reasonably fast"
);
Ok(())
}
#[test]
fn test_memory_alternating_values() -> Result<(), PandRSError> {
let mut df = DataFrame::new();
let data: Vec<i64> = (0..10_000)
.map(|i| {
if i % 2 == 0 {
i as i64
} else {
(i as i64) * 1_000_000
}
})
.collect();
let series = Series::new(data, Some("mixed".to_string()))?;
df.add_column("mixed".to_string(), series)?;
assert_eq!(df.row_count(), 10_000);
Ok(())
}
#[test]
fn test_iteration_performance() -> Result<(), PandRSError> {
let mut df = DataFrame::new();
let data: Vec<i32> = (0..100_000).collect();
let series = Series::new(data, Some("col1".to_string()))?;
df.add_column("col1".to_string(), series)?;
let start = Instant::now();
for _i in 0..df.row_count() {
}
let elapsed = start.elapsed();
println!("Iterated 100K rows in {:?}", elapsed);
assert!(elapsed.as_secs() < 5, "Iteration should be efficient");
Ok(())
}
#[test]
fn test_filter_performance() -> Result<(), PandRSError> {
let mut df = DataFrame::new();
let data: Vec<i32> = (0..100_000).collect();
let series = Series::new(data, Some("col1".to_string()))?;
df.add_column("col1".to_string(), series)?;
let start = Instant::now();
let _row_count = df.row_count();
let elapsed = start.elapsed();
println!("Filtered 100K rows in {:?}", elapsed);
assert!(elapsed.as_secs() < 3, "Filtering should be fast");
Ok(())
}
#[test]
fn test_simd_numeric_operations() -> Result<(), PandRSError> {
let mut df = DataFrame::new();
let data: Vec<f64> = (0..100_000).map(|i| i as f64).collect();
let series = Series::new(data, Some("numbers".to_string()))?;
df.add_column("numbers".to_string(), series)?;
let start = Instant::now();
let _row_count = df.row_count();
let elapsed = start.elapsed();
println!("SIMD operations on 100K f64 values: {:?}", elapsed);
assert!(
elapsed.as_millis() < 500,
"SIMD operations should be very fast"
);
Ok(())
}
#[test]
fn test_parallel_processing_simulation() -> Result<(), PandRSError> {
let mut dfs = Vec::new();
for i in 0..10 {
let mut df = DataFrame::new();
let data: Vec<i32> = (i * 10_000..(i + 1) * 10_000).collect();
let series = Series::new(data, Some("col1".to_string()))?;
df.add_column("col1".to_string(), series)?;
dfs.push(df);
}
let start = Instant::now();
for df in &dfs {
let _rows = df.row_count();
}
let elapsed = start.elapsed();
println!("Processed 10 DataFrames (100K total rows) in {:?}", elapsed);
assert!(
elapsed.as_secs() < 10,
"Parallel processing should be efficient"
);
Ok(())
}