use crate::column::{Column, ColumnTrait, ColumnType};
use crate::error::{Error, Result};
use crate::optimized::dataframe::OptimizedDataFrame;
use crate::optimized::jit::simd::{
simd_max_f64, simd_max_i64, simd_mean_f64, simd_mean_i64, simd_min_f64, simd_min_i64,
simd_sum_f64, simd_sum_i64,
};
impl OptimizedDataFrame {
pub fn sum_direct(&self, column_name: &str) -> Result<f64> {
let column_view = self.column(column_name)?;
let column = column_view.column();
match column {
Column::Float64(col) => Ok(col.sum()),
Column::Int64(col) => Ok(col.sum() as f64),
Column::String(_) => Err(Error::ColumnTypeMismatch {
name: column_name.to_string(),
expected: ColumnType::Float64,
found: ColumnType::String,
}),
Column::Boolean(_) => Err(Error::ColumnTypeMismatch {
name: column_name.to_string(),
expected: ColumnType::Float64,
found: ColumnType::Boolean,
}),
}
}
pub fn mean_direct(&self, column_name: &str) -> Result<f64> {
let column_view = self.column(column_name)?;
let column = column_view.column();
match column {
Column::Float64(col) => col.mean().ok_or(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
))),
Column::Int64(col) => col.mean().ok_or(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
))),
Column::String(_) => Err(Error::ColumnTypeMismatch {
name: column_name.to_string(),
expected: ColumnType::Float64,
found: ColumnType::String,
}),
Column::Boolean(_) => Err(Error::ColumnTypeMismatch {
name: column_name.to_string(),
expected: ColumnType::Float64,
found: ColumnType::Boolean,
}),
}
}
pub fn max_direct(&self, column_name: &str) -> Result<f64> {
let column_view = self.column(column_name)?;
let column = column_view.column();
match column {
Column::Float64(col) => col.max().ok_or(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
))),
Column::Int64(col) => {
col.max()
.map(|v| v as f64)
.ok_or(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
)))
}
Column::String(_) => Err(Error::ColumnTypeMismatch {
name: column_name.to_string(),
expected: ColumnType::Float64,
found: ColumnType::String,
}),
Column::Boolean(_) => Err(Error::ColumnTypeMismatch {
name: column_name.to_string(),
expected: ColumnType::Float64,
found: ColumnType::Boolean,
}),
}
}
pub fn min_direct(&self, column_name: &str) -> Result<f64> {
let column_view = self.column(column_name)?;
let column = column_view.column();
match column {
Column::Float64(col) => col.min().ok_or(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
))),
Column::Int64(col) => {
col.min()
.map(|v| v as f64)
.ok_or(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
)))
}
Column::String(_) => Err(Error::ColumnTypeMismatch {
name: column_name.to_string(),
expected: ColumnType::Float64,
found: ColumnType::String,
}),
Column::Boolean(_) => Err(Error::ColumnTypeMismatch {
name: column_name.to_string(),
expected: ColumnType::Float64,
found: ColumnType::Boolean,
}),
}
}
pub fn count_direct(&self, column_name: &str) -> Result<usize> {
let column_view = self.column(column_name)?;
let column = column_view.column();
match column {
Column::Float64(col) => Ok(col.len()),
Column::Int64(col) => Ok(col.len()),
Column::String(col) => Ok(col.len()),
Column::Boolean(col) => Ok(col.len()),
}
}
pub fn sum_simd(&self, column_name: &str) -> Result<f64> {
let column_view = self.column(column_name)?;
let column = column_view.column();
match column {
Column::Float64(col) => {
if col.null_mask.is_none() {
Ok(simd_sum_f64(&col.data))
} else {
Ok(col.sum()) }
}
Column::Int64(col) => {
if col.null_mask.is_none() {
Ok(simd_sum_i64(&col.data) as f64)
} else {
Ok(col.sum() as f64)
}
}
Column::String(_) => Err(Error::ColumnTypeMismatch {
name: column_name.to_string(),
expected: ColumnType::Float64,
found: ColumnType::String,
}),
Column::Boolean(_) => Err(Error::ColumnTypeMismatch {
name: column_name.to_string(),
expected: ColumnType::Float64,
found: ColumnType::Boolean,
}),
}
}
pub fn mean_simd(&self, column_name: &str) -> Result<f64> {
let column_view = self.column(column_name)?;
let column = column_view.column();
match column {
Column::Float64(col) => {
if col.null_mask.is_none() {
if col.data.is_empty() {
Err(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
)))
} else {
Ok(simd_mean_f64(&col.data))
}
} else {
col.mean().ok_or(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
)))
}
}
Column::Int64(col) => {
if col.null_mask.is_none() {
if col.data.is_empty() {
Err(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
)))
} else {
Ok(simd_mean_i64(&col.data) as f64)
}
} else {
col.mean().ok_or(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
)))
}
}
Column::String(_) => Err(Error::ColumnTypeMismatch {
name: column_name.to_string(),
expected: ColumnType::Float64,
found: ColumnType::String,
}),
Column::Boolean(_) => Err(Error::ColumnTypeMismatch {
name: column_name.to_string(),
expected: ColumnType::Float64,
found: ColumnType::Boolean,
}),
}
}
pub fn max_simd(&self, column_name: &str) -> Result<f64> {
let column_view = self.column(column_name)?;
let column = column_view.column();
match column {
Column::Float64(col) => {
if col.null_mask.is_none() {
if col.data.is_empty() {
Err(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
)))
} else {
Ok(simd_max_f64(&col.data))
}
} else {
col.max().ok_or(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
)))
}
}
Column::Int64(col) => {
if col.null_mask.is_none() {
if col.data.is_empty() {
Err(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
)))
} else {
Ok(simd_max_i64(&col.data) as f64)
}
} else {
col.max()
.map(|v| v as f64)
.ok_or(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
)))
}
}
Column::String(_) => Err(Error::ColumnTypeMismatch {
name: column_name.to_string(),
expected: ColumnType::Float64,
found: ColumnType::String,
}),
Column::Boolean(_) => Err(Error::ColumnTypeMismatch {
name: column_name.to_string(),
expected: ColumnType::Float64,
found: ColumnType::Boolean,
}),
}
}
pub fn min_simd(&self, column_name: &str) -> Result<f64> {
let column_view = self.column(column_name)?;
let column = column_view.column();
match column {
Column::Float64(col) => {
if col.null_mask.is_none() {
if col.data.is_empty() {
Err(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
)))
} else {
Ok(simd_min_f64(&col.data))
}
} else {
col.min().ok_or(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
)))
}
}
Column::Int64(col) => {
if col.null_mask.is_none() {
if col.data.is_empty() {
Err(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
)))
} else {
Ok(simd_min_i64(&col.data) as f64)
}
} else {
col.min()
.map(|v| v as f64)
.ok_or(Error::EmptyDataFrame(format!(
"Column '{}' is empty",
column_name
)))
}
}
Column::String(_) => Err(Error::ColumnTypeMismatch {
name: column_name.to_string(),
expected: ColumnType::Float64,
found: ColumnType::String,
}),
Column::Boolean(_) => Err(Error::ColumnTypeMismatch {
name: column_name.to_string(),
expected: ColumnType::Float64,
found: ColumnType::Boolean,
}),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::column::{Float64Column, Int64Column};
use crate::series::Series;
fn create_test_dataframe() -> OptimizedDataFrame {
let mut df = OptimizedDataFrame::new();
let float_data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let float_column = Float64Column::new(float_data.clone());
df.add_column("float_col".to_string(), Column::Float64(float_column))
.expect("operation should succeed");
let int_data = vec![10, 20, 30, 40, 50];
let int_column = Int64Column::new(int_data.clone());
df.add_column("int_col".to_string(), Column::Int64(int_column))
.expect("operation should succeed");
df
}
#[test]
fn test_sum_direct() {
let df = create_test_dataframe();
let result = df
.sum_direct("float_col")
.expect("operation should succeed");
assert_eq!(result, 15.0);
let result = df.sum_direct("int_col").expect("operation should succeed");
assert_eq!(result, 150.0);
}
#[test]
fn test_mean_direct() {
let df = create_test_dataframe();
let result = df
.mean_direct("float_col")
.expect("operation should succeed");
assert_eq!(result, 3.0);
let result = df.mean_direct("int_col").expect("operation should succeed");
assert_eq!(result, 30.0);
}
#[test]
fn test_max_direct() {
let df = create_test_dataframe();
let result = df
.max_direct("float_col")
.expect("operation should succeed");
assert_eq!(result, 5.0);
let result = df.max_direct("int_col").expect("operation should succeed");
assert_eq!(result, 50.0);
}
#[test]
fn test_min_direct() {
let df = create_test_dataframe();
let result = df
.min_direct("float_col")
.expect("operation should succeed");
assert_eq!(result, 1.0);
let result = df.min_direct("int_col").expect("operation should succeed");
assert_eq!(result, 10.0);
}
#[test]
fn test_count_direct() {
let df = create_test_dataframe();
let result = df
.count_direct("float_col")
.expect("operation should succeed");
assert_eq!(result, 5);
let result = df
.count_direct("int_col")
.expect("operation should succeed");
assert_eq!(result, 5);
}
#[test]
fn test_invalid_column() {
let df = create_test_dataframe();
let result = df.sum_direct("nonexistent");
assert!(result.is_err());
}
#[test]
fn test_sum_simd() {
let df = create_test_dataframe();
let result = df.sum_simd("float_col").expect("operation should succeed");
assert_eq!(result, 15.0);
let result = df.sum_simd("int_col").expect("operation should succeed");
assert_eq!(result, 150.0);
}
#[test]
fn test_mean_simd() {
let df = create_test_dataframe();
let result = df.mean_simd("float_col").expect("operation should succeed");
assert_eq!(result, 3.0);
let result = df.mean_simd("int_col").expect("operation should succeed");
assert_eq!(result, 30.0);
}
#[test]
fn test_max_simd() {
let df = create_test_dataframe();
let result = df.max_simd("float_col").expect("operation should succeed");
assert_eq!(result, 5.0);
let result = df.max_simd("int_col").expect("operation should succeed");
assert_eq!(result, 50.0);
}
#[test]
fn test_min_simd() {
let df = create_test_dataframe();
let result = df.min_simd("float_col").expect("operation should succeed");
assert_eq!(result, 1.0);
let result = df.min_simd("int_col").expect("operation should succeed");
assert_eq!(result, 10.0);
}
#[test]
fn test_simd_vs_direct_consistency() {
let df = create_test_dataframe();
assert_eq!(
df.sum_direct("float_col")
.expect("operation should succeed"),
df.sum_simd("float_col").expect("operation should succeed")
);
assert_eq!(
df.mean_direct("float_col")
.expect("operation should succeed"),
df.mean_simd("float_col").expect("operation should succeed")
);
assert_eq!(
df.max_direct("float_col")
.expect("operation should succeed"),
df.max_simd("float_col").expect("operation should succeed")
);
assert_eq!(
df.min_direct("float_col")
.expect("operation should succeed"),
df.min_simd("float_col").expect("operation should succeed")
);
assert_eq!(
df.sum_direct("int_col").expect("operation should succeed"),
df.sum_simd("int_col").expect("operation should succeed")
);
assert_eq!(
df.mean_direct("int_col").expect("operation should succeed"),
df.mean_simd("int_col").expect("operation should succeed")
);
assert_eq!(
df.max_direct("int_col").expect("operation should succeed"),
df.max_simd("int_col").expect("operation should succeed")
);
assert_eq!(
df.min_direct("int_col").expect("operation should succeed"),
df.min_simd("int_col").expect("operation should succeed")
);
}
#[test]
fn test_simd_performance_with_large_dataset() {
let mut df = OptimizedDataFrame::new();
let large_float_data: Vec<f64> = (1..=10000).map(|i| i as f64 * 0.1).collect();
let large_int_data: Vec<i64> = (1..=10000).map(|i| i * 10).collect();
let float_column = Float64Column::new(large_float_data.clone());
let int_column = Int64Column::new(large_int_data.clone());
df.add_column("large_float".to_string(), Column::Float64(float_column))
.expect("operation should succeed");
df.add_column("large_int".to_string(), Column::Int64(int_column))
.expect("operation should succeed");
let sum_result = df
.sum_simd("large_float")
.expect("operation should succeed");
let expected_sum: f64 = large_float_data.iter().sum();
assert!((sum_result - expected_sum).abs() < 1e-10);
let mean_result = df
.mean_simd("large_float")
.expect("operation should succeed");
let expected_mean = expected_sum / large_float_data.len() as f64;
assert!((mean_result - expected_mean).abs() < 1e-10);
assert_eq!(
df.sum_direct("large_float")
.expect("operation should succeed"),
df.sum_simd("large_float")
.expect("operation should succeed")
);
assert_eq!(
df.mean_direct("large_float")
.expect("operation should succeed"),
df.mean_simd("large_float")
.expect("operation should succeed")
);
assert_eq!(
df.max_direct("large_int")
.expect("operation should succeed"),
df.max_simd("large_int").expect("operation should succeed")
);
assert_eq!(
df.min_direct("large_int")
.expect("operation should succeed"),
df.min_simd("large_int").expect("operation should succeed")
);
}
}