1use crate::column::{Column, ColumnTrait, ColumnType};
15use crate::error::{Error, Result};
16use crate::optimized::dataframe::OptimizedDataFrame;
17use crate::optimized::jit::simd::{
18 simd_max_f64, simd_max_i64, simd_mean_f64, simd_mean_i64, simd_min_f64, simd_min_i64,
19 simd_sum_f64, simd_sum_i64,
20};
21
22impl OptimizedDataFrame {
24 pub fn sum_direct(&self, column_name: &str) -> Result<f64> {
31 let column_view = self.column(column_name)?;
32 let column = column_view.column();
33
34 match column {
35 Column::Float64(col) => Ok(col.sum()),
36 Column::Int64(col) => Ok(col.sum() as f64),
37 Column::String(_) => Err(Error::ColumnTypeMismatch {
38 name: column_name.to_string(),
39 expected: ColumnType::Float64,
40 found: ColumnType::String,
41 }),
42 Column::Boolean(_) => Err(Error::ColumnTypeMismatch {
43 name: column_name.to_string(),
44 expected: ColumnType::Float64,
45 found: ColumnType::Boolean,
46 }),
47 }
48 }
49
50 pub fn mean_direct(&self, column_name: &str) -> Result<f64> {
52 let column_view = self.column(column_name)?;
53 let column = column_view.column();
54
55 match column {
56 Column::Float64(col) => col.mean().ok_or(Error::EmptyDataFrame(format!(
57 "Column '{}' is empty",
58 column_name
59 ))),
60 Column::Int64(col) => col.mean().ok_or(Error::EmptyDataFrame(format!(
61 "Column '{}' is empty",
62 column_name
63 ))),
64 Column::String(_) => Err(Error::ColumnTypeMismatch {
65 name: column_name.to_string(),
66 expected: ColumnType::Float64,
67 found: ColumnType::String,
68 }),
69 Column::Boolean(_) => Err(Error::ColumnTypeMismatch {
70 name: column_name.to_string(),
71 expected: ColumnType::Float64,
72 found: ColumnType::Boolean,
73 }),
74 }
75 }
76
77 pub fn max_direct(&self, column_name: &str) -> Result<f64> {
79 let column_view = self.column(column_name)?;
80 let column = column_view.column();
81
82 match column {
83 Column::Float64(col) => col.max().ok_or(Error::EmptyDataFrame(format!(
84 "Column '{}' is empty",
85 column_name
86 ))),
87 Column::Int64(col) => {
88 col.max()
89 .map(|v| v as f64)
90 .ok_or(Error::EmptyDataFrame(format!(
91 "Column '{}' is empty",
92 column_name
93 )))
94 }
95 Column::String(_) => Err(Error::ColumnTypeMismatch {
96 name: column_name.to_string(),
97 expected: ColumnType::Float64,
98 found: ColumnType::String,
99 }),
100 Column::Boolean(_) => Err(Error::ColumnTypeMismatch {
101 name: column_name.to_string(),
102 expected: ColumnType::Float64,
103 found: ColumnType::Boolean,
104 }),
105 }
106 }
107
108 pub fn min_direct(&self, column_name: &str) -> Result<f64> {
110 let column_view = self.column(column_name)?;
111 let column = column_view.column();
112
113 match column {
114 Column::Float64(col) => col.min().ok_or(Error::EmptyDataFrame(format!(
115 "Column '{}' is empty",
116 column_name
117 ))),
118 Column::Int64(col) => {
119 col.min()
120 .map(|v| v as f64)
121 .ok_or(Error::EmptyDataFrame(format!(
122 "Column '{}' is empty",
123 column_name
124 )))
125 }
126 Column::String(_) => Err(Error::ColumnTypeMismatch {
127 name: column_name.to_string(),
128 expected: ColumnType::Float64,
129 found: ColumnType::String,
130 }),
131 Column::Boolean(_) => Err(Error::ColumnTypeMismatch {
132 name: column_name.to_string(),
133 expected: ColumnType::Float64,
134 found: ColumnType::Boolean,
135 }),
136 }
137 }
138
139 pub fn count_direct(&self, column_name: &str) -> Result<usize> {
141 let column_view = self.column(column_name)?;
142 let column = column_view.column();
143
144 match column {
146 Column::Float64(col) => Ok(col.len()),
147 Column::Int64(col) => Ok(col.len()),
148 Column::String(col) => Ok(col.len()),
149 Column::Boolean(col) => Ok(col.len()),
150 }
151 }
152
153 pub fn sum_simd(&self, column_name: &str) -> Result<f64> {
163 let column_view = self.column(column_name)?;
164 let column = column_view.column();
165
166 match column {
167 Column::Float64(col) => {
168 if col.null_mask.is_none() {
170 Ok(simd_sum_f64(&col.data))
171 } else {
172 Ok(col.sum()) }
174 }
175 Column::Int64(col) => {
176 if col.null_mask.is_none() {
177 Ok(simd_sum_i64(&col.data) as f64)
178 } else {
179 Ok(col.sum() as f64)
180 }
181 }
182 Column::String(_) => Err(Error::ColumnTypeMismatch {
183 name: column_name.to_string(),
184 expected: ColumnType::Float64,
185 found: ColumnType::String,
186 }),
187 Column::Boolean(_) => Err(Error::ColumnTypeMismatch {
188 name: column_name.to_string(),
189 expected: ColumnType::Float64,
190 found: ColumnType::Boolean,
191 }),
192 }
193 }
194
195 pub fn mean_simd(&self, column_name: &str) -> Result<f64> {
197 let column_view = self.column(column_name)?;
198 let column = column_view.column();
199
200 match column {
201 Column::Float64(col) => {
202 if col.null_mask.is_none() {
203 if col.data.is_empty() {
204 Err(Error::EmptyDataFrame(format!(
205 "Column '{}' is empty",
206 column_name
207 )))
208 } else {
209 Ok(simd_mean_f64(&col.data))
210 }
211 } else {
212 col.mean().ok_or(Error::EmptyDataFrame(format!(
213 "Column '{}' is empty",
214 column_name
215 )))
216 }
217 }
218 Column::Int64(col) => {
219 if col.null_mask.is_none() {
220 if col.data.is_empty() {
221 Err(Error::EmptyDataFrame(format!(
222 "Column '{}' is empty",
223 column_name
224 )))
225 } else {
226 Ok(simd_mean_i64(&col.data) as f64)
227 }
228 } else {
229 col.mean().ok_or(Error::EmptyDataFrame(format!(
230 "Column '{}' is empty",
231 column_name
232 )))
233 }
234 }
235 Column::String(_) => Err(Error::ColumnTypeMismatch {
236 name: column_name.to_string(),
237 expected: ColumnType::Float64,
238 found: ColumnType::String,
239 }),
240 Column::Boolean(_) => Err(Error::ColumnTypeMismatch {
241 name: column_name.to_string(),
242 expected: ColumnType::Float64,
243 found: ColumnType::Boolean,
244 }),
245 }
246 }
247
248 pub fn max_simd(&self, column_name: &str) -> Result<f64> {
250 let column_view = self.column(column_name)?;
251 let column = column_view.column();
252
253 match column {
254 Column::Float64(col) => {
255 if col.null_mask.is_none() {
256 if col.data.is_empty() {
257 Err(Error::EmptyDataFrame(format!(
258 "Column '{}' is empty",
259 column_name
260 )))
261 } else {
262 Ok(simd_max_f64(&col.data))
263 }
264 } else {
265 col.max().ok_or(Error::EmptyDataFrame(format!(
266 "Column '{}' is empty",
267 column_name
268 )))
269 }
270 }
271 Column::Int64(col) => {
272 if col.null_mask.is_none() {
273 if col.data.is_empty() {
274 Err(Error::EmptyDataFrame(format!(
275 "Column '{}' is empty",
276 column_name
277 )))
278 } else {
279 Ok(simd_max_i64(&col.data) as f64)
280 }
281 } else {
282 col.max()
283 .map(|v| v as f64)
284 .ok_or(Error::EmptyDataFrame(format!(
285 "Column '{}' is empty",
286 column_name
287 )))
288 }
289 }
290 Column::String(_) => Err(Error::ColumnTypeMismatch {
291 name: column_name.to_string(),
292 expected: ColumnType::Float64,
293 found: ColumnType::String,
294 }),
295 Column::Boolean(_) => Err(Error::ColumnTypeMismatch {
296 name: column_name.to_string(),
297 expected: ColumnType::Float64,
298 found: ColumnType::Boolean,
299 }),
300 }
301 }
302
303 pub fn min_simd(&self, column_name: &str) -> Result<f64> {
305 let column_view = self.column(column_name)?;
306 let column = column_view.column();
307
308 match column {
309 Column::Float64(col) => {
310 if col.null_mask.is_none() {
311 if col.data.is_empty() {
312 Err(Error::EmptyDataFrame(format!(
313 "Column '{}' is empty",
314 column_name
315 )))
316 } else {
317 Ok(simd_min_f64(&col.data))
318 }
319 } else {
320 col.min().ok_or(Error::EmptyDataFrame(format!(
321 "Column '{}' is empty",
322 column_name
323 )))
324 }
325 }
326 Column::Int64(col) => {
327 if col.null_mask.is_none() {
328 if col.data.is_empty() {
329 Err(Error::EmptyDataFrame(format!(
330 "Column '{}' is empty",
331 column_name
332 )))
333 } else {
334 Ok(simd_min_i64(&col.data) as f64)
335 }
336 } else {
337 col.min()
338 .map(|v| v as f64)
339 .ok_or(Error::EmptyDataFrame(format!(
340 "Column '{}' is empty",
341 column_name
342 )))
343 }
344 }
345 Column::String(_) => Err(Error::ColumnTypeMismatch {
346 name: column_name.to_string(),
347 expected: ColumnType::Float64,
348 found: ColumnType::String,
349 }),
350 Column::Boolean(_) => Err(Error::ColumnTypeMismatch {
351 name: column_name.to_string(),
352 expected: ColumnType::Float64,
353 found: ColumnType::Boolean,
354 }),
355 }
356 }
357}
358
359#[cfg(test)]
360mod tests {
361 use super::*;
362 use crate::column::{Float64Column, Int64Column};
363 use crate::series::Series;
364
365 fn create_test_dataframe() -> OptimizedDataFrame {
366 let mut df = OptimizedDataFrame::new();
367
368 let float_data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
370 let float_column = Float64Column::new(float_data.clone());
371 df.add_column("float_col".to_string(), Column::Float64(float_column))
372 .unwrap();
373
374 let int_data = vec![10, 20, 30, 40, 50];
376 let int_column = Int64Column::new(int_data.clone());
377 df.add_column("int_col".to_string(), Column::Int64(int_column))
378 .unwrap();
379
380 df
381 }
382
383 #[test]
384 fn test_sum_direct() {
385 let df = create_test_dataframe();
386
387 let result = df.sum_direct("float_col").unwrap();
389 assert_eq!(result, 15.0);
390
391 let result = df.sum_direct("int_col").unwrap();
393 assert_eq!(result, 150.0);
394 }
395
396 #[test]
397 fn test_mean_direct() {
398 let df = create_test_dataframe();
399
400 let result = df.mean_direct("float_col").unwrap();
402 assert_eq!(result, 3.0);
403
404 let result = df.mean_direct("int_col").unwrap();
406 assert_eq!(result, 30.0);
407 }
408
409 #[test]
410 fn test_max_direct() {
411 let df = create_test_dataframe();
412
413 let result = df.max_direct("float_col").unwrap();
415 assert_eq!(result, 5.0);
416
417 let result = df.max_direct("int_col").unwrap();
419 assert_eq!(result, 50.0);
420 }
421
422 #[test]
423 fn test_min_direct() {
424 let df = create_test_dataframe();
425
426 let result = df.min_direct("float_col").unwrap();
428 assert_eq!(result, 1.0);
429
430 let result = df.min_direct("int_col").unwrap();
432 assert_eq!(result, 10.0);
433 }
434
435 #[test]
436 fn test_count_direct() {
437 let df = create_test_dataframe();
438
439 let result = df.count_direct("float_col").unwrap();
441 assert_eq!(result, 5);
442
443 let result = df.count_direct("int_col").unwrap();
445 assert_eq!(result, 5);
446 }
447
448 #[test]
449 fn test_invalid_column() {
450 let df = create_test_dataframe();
451
452 let result = df.sum_direct("nonexistent");
454 assert!(result.is_err());
455 }
456
457 #[test]
459 fn test_sum_simd() {
460 let df = create_test_dataframe();
461
462 let result = df.sum_simd("float_col").unwrap();
464 assert_eq!(result, 15.0);
465
466 let result = df.sum_simd("int_col").unwrap();
468 assert_eq!(result, 150.0);
469 }
470
471 #[test]
472 fn test_mean_simd() {
473 let df = create_test_dataframe();
474
475 let result = df.mean_simd("float_col").unwrap();
477 assert_eq!(result, 3.0);
478
479 let result = df.mean_simd("int_col").unwrap();
481 assert_eq!(result, 30.0);
482 }
483
484 #[test]
485 fn test_max_simd() {
486 let df = create_test_dataframe();
487
488 let result = df.max_simd("float_col").unwrap();
490 assert_eq!(result, 5.0);
491
492 let result = df.max_simd("int_col").unwrap();
494 assert_eq!(result, 50.0);
495 }
496
497 #[test]
498 fn test_min_simd() {
499 let df = create_test_dataframe();
500
501 let result = df.min_simd("float_col").unwrap();
503 assert_eq!(result, 1.0);
504
505 let result = df.min_simd("int_col").unwrap();
507 assert_eq!(result, 10.0);
508 }
509
510 #[test]
511 fn test_simd_vs_direct_consistency() {
512 let df = create_test_dataframe();
513
514 assert_eq!(
516 df.sum_direct("float_col").unwrap(),
517 df.sum_simd("float_col").unwrap()
518 );
519 assert_eq!(
520 df.mean_direct("float_col").unwrap(),
521 df.mean_simd("float_col").unwrap()
522 );
523 assert_eq!(
524 df.max_direct("float_col").unwrap(),
525 df.max_simd("float_col").unwrap()
526 );
527 assert_eq!(
528 df.min_direct("float_col").unwrap(),
529 df.min_simd("float_col").unwrap()
530 );
531
532 assert_eq!(
533 df.sum_direct("int_col").unwrap(),
534 df.sum_simd("int_col").unwrap()
535 );
536 assert_eq!(
537 df.mean_direct("int_col").unwrap(),
538 df.mean_simd("int_col").unwrap()
539 );
540 assert_eq!(
541 df.max_direct("int_col").unwrap(),
542 df.max_simd("int_col").unwrap()
543 );
544 assert_eq!(
545 df.min_direct("int_col").unwrap(),
546 df.min_simd("int_col").unwrap()
547 );
548 }
549
550 #[test]
551 fn test_simd_performance_with_large_dataset() {
552 let mut df = OptimizedDataFrame::new();
554
555 let large_float_data: Vec<f64> = (1..=10000).map(|i| i as f64 * 0.1).collect();
557 let large_int_data: Vec<i64> = (1..=10000).map(|i| i * 10).collect();
558
559 let float_column = Float64Column::new(large_float_data.clone());
560 let int_column = Int64Column::new(large_int_data.clone());
561
562 df.add_column("large_float".to_string(), Column::Float64(float_column))
563 .unwrap();
564 df.add_column("large_int".to_string(), Column::Int64(int_column))
565 .unwrap();
566
567 let sum_result = df.sum_simd("large_float").unwrap();
569 let expected_sum: f64 = large_float_data.iter().sum();
570 assert!((sum_result - expected_sum).abs() < 1e-10);
571
572 let mean_result = df.mean_simd("large_float").unwrap();
573 let expected_mean = expected_sum / large_float_data.len() as f64;
574 assert!((mean_result - expected_mean).abs() < 1e-10);
575
576 assert_eq!(
578 df.sum_direct("large_float").unwrap(),
579 df.sum_simd("large_float").unwrap()
580 );
581 assert_eq!(
582 df.mean_direct("large_float").unwrap(),
583 df.mean_simd("large_float").unwrap()
584 );
585 assert_eq!(
586 df.max_direct("large_int").unwrap(),
587 df.max_simd("large_int").unwrap()
588 );
589 assert_eq!(
590 df.min_direct("large_int").unwrap(),
591 df.min_simd("large_int").unwrap()
592 );
593 }
594}