1use rayon::prelude::*;
8use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
9use sklears_core::error::Result as SklResult;
10#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
11use std::arch::x86_64::*;
12use std::sync::{Arc, Mutex};
13
14type Result<T> = SklResult<T>;
15
16pub struct SIMDStats;
18
19impl SIMDStats {
20 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
22 #[target_feature(enable = "avx2")]
23 #[allow(unused_assignments)]
24 pub unsafe fn correlation_simd(x: ArrayView1<f64>, y: ArrayView1<f64>) -> f64 {
25 if x.len() != y.len() || x.len() < 4 {
26 return Self::correlation_fallback(x, y);
27 }
28
29 let n = x.len();
30 let mut sum_x = 0.0;
31 let mut sum_y = 0.0;
32 let mut sum_x2 = 0.0;
33 let mut sum_y2 = 0.0;
34 let mut sum_xy = 0.0;
35
36 let chunks = n / 4;
38 let x_ptr = x.as_ptr();
39 let y_ptr = y.as_ptr();
40
41 let mut vec_sum_x = _mm256_setzero_pd();
42 let mut vec_sum_y = _mm256_setzero_pd();
43 let mut vec_sum_x2 = _mm256_setzero_pd();
44 let mut vec_sum_y2 = _mm256_setzero_pd();
45 let mut vec_sum_xy = _mm256_setzero_pd();
46
47 for i in 0..chunks {
48 let offset = i * 4;
49 let vec_x = _mm256_loadu_pd(x_ptr.add(offset));
50 let vec_y = _mm256_loadu_pd(y_ptr.add(offset));
51
52 vec_sum_x = _mm256_add_pd(vec_sum_x, vec_x);
53 vec_sum_y = _mm256_add_pd(vec_sum_y, vec_y);
54 vec_sum_x2 = _mm256_fmadd_pd(vec_x, vec_x, vec_sum_x2);
55 vec_sum_y2 = _mm256_fmadd_pd(vec_y, vec_y, vec_sum_y2);
56 vec_sum_xy = _mm256_fmadd_pd(vec_x, vec_y, vec_sum_xy);
57 }
58
59 let mut temp = [0.0; 4];
61 _mm256_storeu_pd(temp.as_mut_ptr(), vec_sum_x);
62 sum_x = temp.iter().sum();
63
64 _mm256_storeu_pd(temp.as_mut_ptr(), vec_sum_y);
65 sum_y = temp.iter().sum();
66
67 _mm256_storeu_pd(temp.as_mut_ptr(), vec_sum_x2);
68 sum_x2 = temp.iter().sum();
69
70 _mm256_storeu_pd(temp.as_mut_ptr(), vec_sum_y2);
71 sum_y2 = temp.iter().sum();
72
73 _mm256_storeu_pd(temp.as_mut_ptr(), vec_sum_xy);
74 sum_xy = temp.iter().sum();
75
76 for i in (chunks * 4)..n {
78 let xi = *x_ptr.add(i);
79 let yi = *y_ptr.add(i);
80 sum_x += xi;
81 sum_y += yi;
82 sum_x2 += xi * xi;
83 sum_y2 += yi * yi;
84 sum_xy += xi * yi;
85 }
86
87 let n_f64 = n as f64;
89 let numerator = n_f64 * sum_xy - sum_x * sum_y;
90 let denominator =
91 ((n_f64 * sum_x2 - sum_x * sum_x) * (n_f64 * sum_y2 - sum_y * sum_y)).sqrt();
92
93 if denominator.abs() < 1e-10 {
94 0.0
95 } else {
96 numerator / denominator
97 }
98 }
99
100 pub fn correlation_fallback(x: ArrayView1<f64>, y: ArrayView1<f64>) -> f64 {
102 if x.len() != y.len() || x.len() < 2 {
103 return 0.0;
104 }
105
106 let mean_x = x.mean().unwrap_or(0.0);
107 let mean_y = y.mean().unwrap_or(0.0);
108
109 let mut sum_xy = 0.0;
110 let mut sum_x2 = 0.0;
111 let mut sum_y2 = 0.0;
112
113 for i in 0..x.len() {
114 let dx = x[i] - mean_x;
115 let dy = y[i] - mean_y;
116 sum_xy += dx * dy;
117 sum_x2 += dx * dx;
118 sum_y2 += dy * dy;
119 }
120
121 let denom = (sum_x2 * sum_y2).sqrt();
122 if denom < 1e-10 {
123 0.0
124 } else {
125 sum_xy / denom
126 }
127 }
128
129 pub fn correlation_auto(x: ArrayView1<f64>, y: ArrayView1<f64>) -> f64 {
131 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
132 {
133 if is_x86_feature_detected!("avx2") && x.len() >= 16 {
134 unsafe { Self::correlation_simd(x, y) }
135 } else {
136 Self::correlation_fallback(x, y)
137 }
138 }
139 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
140 {
141 Self::correlation_fallback(x, y)
142 }
143 }
144
145 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
147 #[target_feature(enable = "avx2")]
148 pub unsafe fn variance_simd(x: ArrayView1<f64>) -> f64 {
149 if x.len() < 4 {
150 return Self::variance_fallback(x);
151 }
152
153 let n = x.len();
154 let x_ptr = x.as_ptr();
155
156 let chunks = n / 4;
158 let mut vec_sum = _mm256_setzero_pd();
159
160 for i in 0..chunks {
161 let offset = i * 4;
162 let vec_x = _mm256_loadu_pd(x_ptr.add(offset));
163 vec_sum = _mm256_add_pd(vec_sum, vec_x);
164 }
165
166 let mut temp = [0.0; 4];
168 _mm256_storeu_pd(temp.as_mut_ptr(), vec_sum);
169 let mut sum = temp.iter().sum::<f64>();
170
171 for i in (chunks * 4)..n {
173 sum += *x_ptr.add(i);
174 }
175
176 let mean = sum / n as f64;
177
178 let vec_mean = _mm256_set1_pd(mean);
180 let mut vec_sum_sq_diff = _mm256_setzero_pd();
181
182 for i in 0..chunks {
183 let offset = i * 4;
184 let vec_x = _mm256_loadu_pd(x_ptr.add(offset));
185 let vec_diff = _mm256_sub_pd(vec_x, vec_mean);
186 vec_sum_sq_diff = _mm256_fmadd_pd(vec_diff, vec_diff, vec_sum_sq_diff);
187 }
188
189 _mm256_storeu_pd(temp.as_mut_ptr(), vec_sum_sq_diff);
191 let mut sum_sq_diff = temp.iter().sum::<f64>();
192
193 for i in (chunks * 4)..n {
195 let diff = *x_ptr.add(i) - mean;
196 sum_sq_diff += diff * diff;
197 }
198
199 sum_sq_diff / (n - 1) as f64
200 }
201
202 pub fn variance_fallback(x: ArrayView1<f64>) -> f64 {
204 x.var(1.0)
205 }
206
207 pub fn variance_auto(x: ArrayView1<f64>) -> f64 {
209 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
210 {
211 if is_x86_feature_detected!("avx2") && x.len() >= 16 {
212 unsafe { Self::variance_simd(x) }
213 } else {
214 Self::variance_fallback(x)
215 }
216 }
217 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
218 {
219 Self::variance_fallback(x)
220 }
221 }
222
223 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
225 #[target_feature(enable = "avx2")]
226 pub unsafe fn chi_square_simd(observed: ArrayView1<f64>, expected: ArrayView1<f64>) -> f64 {
227 if observed.len() != expected.len() || observed.len() < 4 {
228 return Self::chi_square_fallback(observed, expected);
229 }
230
231 let n = observed.len();
232 let chunks = n / 4;
233 let obs_ptr = observed.as_ptr();
234 let exp_ptr = expected.as_ptr();
235
236 let mut vec_chi_sq = _mm256_setzero_pd();
237 let eps = _mm256_set1_pd(1e-10);
238
239 for i in 0..chunks {
240 let offset = i * 4;
241 let vec_obs = _mm256_loadu_pd(obs_ptr.add(offset));
242 let vec_exp = _mm256_loadu_pd(exp_ptr.add(offset));
243
244 let vec_exp_safe = _mm256_max_pd(vec_exp, eps);
246
247 let vec_diff = _mm256_sub_pd(vec_obs, vec_exp_safe);
248 let vec_diff_sq = _mm256_mul_pd(vec_diff, vec_diff);
249 let vec_chi_sq_contrib = _mm256_div_pd(vec_diff_sq, vec_exp_safe);
250
251 vec_chi_sq = _mm256_add_pd(vec_chi_sq, vec_chi_sq_contrib);
252 }
253
254 let mut temp = [0.0; 4];
256 _mm256_storeu_pd(temp.as_mut_ptr(), vec_chi_sq);
257 let mut chi_sq = temp.iter().sum::<f64>();
258
259 for i in (chunks * 4)..n {
261 let obs = *obs_ptr.add(i);
262 let exp = (*exp_ptr.add(i)).max(1e-10);
263 let diff = obs - exp;
264 chi_sq += (diff * diff) / exp;
265 }
266
267 chi_sq
268 }
269
270 pub fn chi_square_fallback(observed: ArrayView1<f64>, expected: ArrayView1<f64>) -> f64 {
272 if observed.len() != expected.len() {
273 return 0.0;
274 }
275
276 let mut chi_sq = 0.0;
277 for i in 0..observed.len() {
278 let obs = observed[i];
279 let exp = expected[i].max(1e-10);
280 let diff = obs - exp;
281 chi_sq += (diff * diff) / exp;
282 }
283
284 chi_sq
285 }
286
287 pub fn chi_square_auto(observed: ArrayView1<f64>, expected: ArrayView1<f64>) -> f64 {
289 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
290 {
291 if is_x86_feature_detected!("avx2") && observed.len() >= 16 {
292 unsafe { Self::chi_square_simd(observed, expected) }
293 } else {
294 Self::chi_square_fallback(observed, expected)
295 }
296 }
297 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
298 {
299 Self::chi_square_fallback(observed, expected)
300 }
301 }
302}
303
304pub struct ParallelSelector;
306
307impl ParallelSelector {
308 pub fn parallel_correlation_selection(
310 X: ArrayView2<f64>,
311 y: ArrayView1<f64>,
312 threshold: f64,
313 chunk_size: Option<usize>,
314 ) -> Result<Array1<bool>> {
315 let n_features = X.ncols();
316 let chunk_size = chunk_size.unwrap_or_else(|| (n_features / num_cpus::get()).max(1));
317
318 let correlations: Vec<f64> = (0..n_features)
319 .into_par_iter()
320 .chunks(chunk_size)
321 .flat_map(|chunk| {
322 chunk
323 .into_iter()
324 .map(|feature_idx| {
325 let feature_data = X.column(feature_idx);
326 SIMDStats::correlation_auto(feature_data, y).abs()
327 })
328 .collect::<Vec<f64>>()
329 })
330 .collect();
331
332 let selection_vec: Vec<bool> = correlations
333 .into_par_iter()
334 .map(|corr| corr > threshold)
335 .collect();
336 let selection = Array1::from_vec(selection_vec);
337
338 Ok(selection)
339 }
340
341 pub fn parallel_variance_selection(
343 X: ArrayView2<f64>,
344 threshold: f64,
345 chunk_size: Option<usize>,
346 ) -> Result<Array1<bool>> {
347 let n_features = X.ncols();
348 let chunk_size = chunk_size.unwrap_or_else(|| (n_features / num_cpus::get()).max(1));
349
350 let variances: Vec<f64> = (0..n_features)
351 .into_par_iter()
352 .chunks(chunk_size)
353 .flat_map(|chunk| {
354 chunk
355 .into_iter()
356 .map(|feature_idx| {
357 let feature_data = X.column(feature_idx);
358 SIMDStats::variance_auto(feature_data)
359 })
360 .collect::<Vec<f64>>()
361 })
362 .collect();
363
364 let selection_vec: Vec<bool> = variances
365 .into_par_iter()
366 .map(|var| var > threshold)
367 .collect();
368 let selection = Array1::from_vec(selection_vec);
369
370 Ok(selection)
371 }
372
373 pub fn parallel_mutual_info_selection(
375 X: ArrayView2<f64>,
376 y: ArrayView1<f64>,
377 k: usize,
378 chunk_size: Option<usize>,
379 ) -> Result<Array1<bool>> {
380 let n_features = X.ncols();
381 let chunk_size = chunk_size.unwrap_or_else(|| (n_features / num_cpus::get()).max(1));
382
383 let mi_scores: Vec<(usize, f64)> = (0..n_features)
384 .into_par_iter()
385 .chunks(chunk_size)
386 .flat_map(|chunk| {
387 chunk
388 .into_iter()
389 .map(|feature_idx| {
390 let feature_data = X.column(feature_idx);
391 let mi_score = Self::estimate_mutual_information(feature_data, y);
392 (feature_idx, mi_score)
393 })
394 .collect::<Vec<(usize, f64)>>()
395 })
396 .collect();
397
398 let mut sorted_scores = mi_scores;
400 sorted_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
401
402 let mut selection = Array1::from_elem(n_features, false);
403 for (feature_idx, _) in sorted_scores.into_iter().take(k) {
404 selection[feature_idx] = true;
405 }
406
407 Ok(selection)
408 }
409
410 pub fn parallel_chi_square_selection(
412 X: ArrayView2<f64>,
413 y: ArrayView1<f64>,
414 k: usize,
415 chunk_size: Option<usize>,
416 ) -> Result<Array1<bool>> {
417 let n_features = X.ncols();
418 let chunk_size = chunk_size.unwrap_or_else(|| (n_features / num_cpus::get()).max(1));
419
420 let class_counts = Self::compute_class_counts(y);
422 let total_samples = y.len() as f64;
423
424 let chi_scores: Vec<(usize, f64)> = (0..n_features)
425 .into_par_iter()
426 .chunks(chunk_size)
427 .flat_map(|chunk| {
428 chunk
429 .into_iter()
430 .map(|feature_idx| {
431 let feature_data = X.column(feature_idx);
432 let chi_score = Self::compute_chi_square_score(
433 feature_data,
434 y,
435 &class_counts,
436 total_samples,
437 );
438 (feature_idx, chi_score)
439 })
440 .collect::<Vec<(usize, f64)>>()
441 })
442 .collect();
443
444 let mut sorted_scores = chi_scores;
446 sorted_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
447
448 let mut selection = Array1::from_elem(n_features, false);
449 for (feature_idx, _) in sorted_scores.into_iter().take(k) {
450 selection[feature_idx] = true;
451 }
452
453 Ok(selection)
454 }
455
456 pub fn parallel_recursive_elimination(
458 X: ArrayView2<f64>,
459 y: ArrayView1<f64>,
460 n_features_to_select: usize,
461 step: f64,
462 ) -> Result<Array1<bool>> {
463 let mut current_features: Vec<usize> = (0..X.ncols()).collect();
464 let mut selection = Array1::from_elem(X.ncols(), true);
465
466 while current_features.len() > n_features_to_select {
467 let n_to_remove = ((current_features.len() as f64 * step).ceil() as usize).max(1);
468
469 let importances: Vec<f64> = current_features
471 .par_iter()
472 .map(|&feature_idx| {
473 let feature_data = X.column(feature_idx);
474 SIMDStats::correlation_auto(feature_data, y).abs()
476 })
477 .collect();
478
479 let mut indexed_importances: Vec<(usize, f64)> = current_features
481 .iter()
482 .zip(importances.iter())
483 .map(|(&idx, &imp)| (idx, imp))
484 .collect();
485
486 indexed_importances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
487
488 for i in 0..n_to_remove {
490 if let Some((feature_idx, _)) = indexed_importances.get(i) {
491 selection[*feature_idx] = false;
492 current_features.retain(|&x| x != *feature_idx);
493 }
494 }
495 }
496
497 Ok(selection)
498 }
499
500 fn estimate_mutual_information(x: ArrayView1<f64>, y: ArrayView1<f64>) -> f64 {
502 SIMDStats::correlation_auto(x, y).abs()
505 }
506
507 fn compute_class_counts(y: ArrayView1<f64>) -> Vec<(f64, usize)> {
508 let mut counts = std::collections::HashMap::new();
509 for &label in y.iter() {
510 *counts.entry(label as i32).or_insert(0) += 1;
511 }
512 counts.into_iter().map(|(k, v)| (k as f64, v)).collect()
513 }
514
515 fn compute_chi_square_score(
516 feature: ArrayView1<f64>,
517 y: ArrayView1<f64>,
518 _class_counts: &[(f64, usize)],
519 _total_samples: f64,
520 ) -> f64 {
521 SIMDStats::correlation_auto(feature, y).abs()
524 }
525}
526
527pub struct MemoryEfficientSelector {
529 chunk_size: usize,
530 use_memory_mapping: bool,
531}
532
533impl MemoryEfficientSelector {
534 pub fn new(chunk_size: usize, use_memory_mapping: bool) -> Self {
535 Self {
536 chunk_size,
537 use_memory_mapping,
538 }
539 }
540
541 pub fn streaming_correlation_selection(
543 &self,
544 X: ArrayView2<f64>,
545 y: ArrayView1<f64>,
546 threshold: f64,
547 ) -> Result<Array1<bool>> {
548 let n_features = X.ncols();
549 let n_samples = X.nrows();
550
551 let mut feature_stats: Vec<StreamingStats> =
553 (0..n_features).map(|_| StreamingStats::new()).collect();
554
555 let mut y_stats = StreamingStats::new();
556
557 for chunk_start in (0..n_samples).step_by(self.chunk_size) {
559 let chunk_end = (chunk_start + self.chunk_size).min(n_samples);
560
561 for i in chunk_start..chunk_end {
563 let y_val = y[i];
564 y_stats.update(y_val);
565
566 for j in 0..n_features {
567 let x_val = X[[i, j]];
568 feature_stats[j].update(x_val);
569 feature_stats[j].update_covariance(x_val, y_val);
570 }
571 }
572 }
573
574 let mut selection = Array1::from_elem(n_features, false);
576 let y_variance = y_stats.variance();
577
578 for (j, stats) in feature_stats.iter().enumerate() {
579 let x_variance = stats.variance();
580 let covariance = stats.covariance_xy();
581
582 let correlation = if x_variance > 1e-10 && y_variance > 1e-10 {
583 covariance / (x_variance.sqrt() * y_variance.sqrt())
584 } else {
585 0.0
586 };
587
588 selection[j] = correlation.abs() > threshold;
589 }
590
591 Ok(selection)
592 }
593
594 pub fn streaming_variance_selection(
596 &self,
597 X: ArrayView2<f64>,
598 threshold: f64,
599 ) -> Result<Array1<bool>> {
600 let n_features = X.ncols();
601 let n_samples = X.nrows();
602
603 let mut feature_stats: Vec<StreamingStats> =
604 (0..n_features).map(|_| StreamingStats::new()).collect();
605
606 for chunk_start in (0..n_samples).step_by(self.chunk_size) {
608 let chunk_end = (chunk_start + self.chunk_size).min(n_samples);
609
610 for i in chunk_start..chunk_end {
611 for j in 0..n_features {
612 let x_val = X[[i, j]];
613 feature_stats[j].update(x_val);
614 }
615 }
616 }
617
618 let mut selection = Array1::from_elem(n_features, false);
620 for (j, stats) in feature_stats.iter().enumerate() {
621 selection[j] = stats.variance() > threshold;
622 }
623
624 Ok(selection)
625 }
626}
627
628#[derive(Debug, Clone)]
630struct StreamingStats {
631 count: usize,
632 sum: f64,
633 sum_sq: f64,
634 sum_xy: f64,
635 sum_y: f64,
636 sum_y_sq: f64,
637}
638
639impl StreamingStats {
640 fn new() -> Self {
641 Self {
642 count: 0,
643 sum: 0.0,
644 sum_sq: 0.0,
645 sum_xy: 0.0,
646 sum_y: 0.0,
647 sum_y_sq: 0.0,
648 }
649 }
650
651 fn update(&mut self, value: f64) {
652 self.count += 1;
653 self.sum += value;
654 self.sum_sq += value * value;
655 }
656
657 fn update_covariance(&mut self, x: f64, y: f64) {
658 self.sum_xy += x * y;
659 self.sum_y += y;
660 self.sum_y_sq += y * y;
661 }
662
663 fn mean(&self) -> f64 {
664 if self.count > 0 {
665 self.sum / self.count as f64
666 } else {
667 0.0
668 }
669 }
670
671 fn variance(&self) -> f64 {
672 if self.count > 1 {
673 let n = self.count as f64;
674 let mean = self.mean();
675 (self.sum_sq - n * mean * mean) / (n - 1.0)
676 } else {
677 0.0
678 }
679 }
680
681 fn covariance_xy(&self) -> f64 {
682 if self.count > 1 {
683 let n = self.count as f64;
684 let mean_x = self.sum / n;
685 let mean_y = self.sum_y / n;
686 (self.sum_xy - n * mean_x * mean_y) / (n - 1.0)
687 } else {
688 0.0
689 }
690 }
691}
692
693pub struct CacheFriendlyMatrix<T> {
695 data: Vec<T>,
696 n_rows: usize,
697 n_cols: usize,
698 layout: MatrixLayout,
699}
700
701#[derive(Debug, Clone, Copy)]
702pub enum MatrixLayout {
703 RowMajor,
705 ColumnMajor,
707 Blocked { block_size: usize },
709}
710
711impl<T: Clone> CacheFriendlyMatrix<T> {
712 pub fn new(data: Vec<T>, n_rows: usize, n_cols: usize, layout: MatrixLayout) -> Self {
713 assert_eq!(data.len(), n_rows * n_cols);
714
715 Self {
716 data,
717 n_rows,
718 n_cols,
719 layout,
720 }
721 }
722
723 pub fn from_array2(array: Array2<T>, layout: MatrixLayout) -> Self {
724 let (n_rows, n_cols) = array.dim();
725 let data = match layout {
726 MatrixLayout::RowMajor => array.into_raw_vec(),
727 MatrixLayout::ColumnMajor => {
728 let mut col_major_data = Vec::with_capacity(n_rows * n_cols);
730 for col in 0..n_cols {
731 for row in 0..n_rows {
732 col_major_data.push(array[[row, col]].clone());
733 }
734 }
735 col_major_data
736 }
737 MatrixLayout::Blocked { block_size } => Self::create_blocked_layout(array, block_size),
738 };
739
740 Self {
741 data,
742 layout,
743 n_rows,
744 n_cols,
745 }
746 }
747
748 fn create_blocked_layout(array: Array2<T>, block_size: usize) -> Vec<T> {
749 let (n_rows, n_cols) = array.dim();
750 let mut blocked_data = Vec::with_capacity(n_rows * n_cols);
751
752 let row_blocks = (n_rows + block_size - 1) / block_size;
753 let col_blocks = (n_cols + block_size - 1) / block_size;
754
755 for row_block in 0..row_blocks {
756 for col_block in 0..col_blocks {
757 let row_start = row_block * block_size;
758 let row_end = (row_start + block_size).min(n_rows);
759 let col_start = col_block * block_size;
760 let col_end = (col_start + block_size).min(n_cols);
761
762 for row in row_start..row_end {
763 for col in col_start..col_end {
764 blocked_data.push(array[[row, col]].clone());
765 }
766 }
767 }
768 }
769
770 blocked_data
771 }
772
773 pub fn get(&self, row: usize, col: usize) -> Option<&T> {
774 if row >= self.n_rows || col >= self.n_cols {
775 return None;
776 }
777
778 let index = match self.layout {
779 MatrixLayout::RowMajor => row * self.n_cols + col,
780 MatrixLayout::ColumnMajor => col * self.n_rows + row,
781 MatrixLayout::Blocked { block_size } => self.blocked_index(row, col, block_size),
782 };
783
784 self.data.get(index)
785 }
786
787 fn blocked_index(&self, row: usize, col: usize, block_size: usize) -> usize {
788 let row_block = row / block_size;
789 let col_block = col / block_size;
790 let row_in_block = row % block_size;
791 let col_in_block = col % block_size;
792
793 let col_blocks = (self.n_cols + block_size - 1) / block_size;
794 let block_index = row_block * col_blocks + col_block;
795 let block_offset = block_index * block_size * block_size;
796 let in_block_index = row_in_block * block_size + col_in_block;
797
798 block_offset + in_block_index
799 }
800
801 pub fn column_iter(&self, col: usize) -> ColumnIterator<'_, T> {
802 ColumnIterator::new(self, col)
803 }
804}
805
806pub struct ColumnIterator<'a, T> {
807 matrix: &'a CacheFriendlyMatrix<T>,
808 col: usize,
809 current_row: usize,
810}
811
812impl<'a, T> ColumnIterator<'a, T> {
813 fn new(matrix: &'a CacheFriendlyMatrix<T>, col: usize) -> Self {
814 Self {
815 matrix,
816 col,
817 current_row: 0,
818 }
819 }
820}
821
822impl<'a, T: Clone> Iterator for ColumnIterator<'a, T> {
823 type Item = &'a T;
824
825 fn next(&mut self) -> Option<Self::Item> {
826 if self.current_row < self.matrix.n_rows {
827 let item = self.matrix.get(self.current_row, self.col);
828 self.current_row += 1;
829 item
830 } else {
831 None
832 }
833 }
834}
835
836pub struct PerformanceProfiler {
838 timings: Arc<Mutex<Vec<(String, std::time::Duration)>>>,
839 memory_usage: Arc<Mutex<Vec<(String, usize)>>>,
840}
841
842impl Default for PerformanceProfiler {
843 fn default() -> Self {
844 Self::new()
845 }
846}
847
848impl PerformanceProfiler {
849 pub fn new() -> Self {
850 Self {
851 timings: Arc::new(Mutex::new(Vec::new())),
852 memory_usage: Arc::new(Mutex::new(Vec::new())),
853 }
854 }
855
856 pub fn time_operation<F, R>(&self, name: &str, operation: F) -> R
857 where
858 F: FnOnce() -> R,
859 {
860 let start = std::time::Instant::now();
861 let result = operation();
862 let duration = start.elapsed();
863
864 if let Ok(mut timings) = self.timings.lock() {
865 timings.push((name.to_string(), duration));
866 }
867
868 result
869 }
870
871 pub fn record_memory_usage(&self, name: &str, bytes: usize) {
872 if let Ok(mut memory) = self.memory_usage.lock() {
873 memory.push((name.to_string(), bytes));
874 }
875 }
876
877 pub fn get_report(&self) -> PerformanceReport {
878 let timings = self.timings.lock().unwrap().clone();
879 let memory_usage = self.memory_usage.lock().unwrap().clone();
880
881 PerformanceReport {
882 timings: timings.clone(),
883 memory_usage: memory_usage.clone(),
884 total_time: timings.iter().map(|(_, duration)| *duration).sum(),
885 peak_memory: memory_usage
886 .iter()
887 .map(|(_, bytes)| *bytes)
888 .max()
889 .unwrap_or(0),
890 }
891 }
892}
893
894#[derive(Debug, Clone)]
895pub struct PerformanceReport {
896 pub timings: Vec<(String, std::time::Duration)>,
897 pub memory_usage: Vec<(String, usize)>,
898 pub total_time: std::time::Duration,
899 pub peak_memory: usize,
900}
901
902impl PerformanceReport {
903 pub fn print_summary(&self) {
904 println!("=== Performance Report ===");
905 println!("Total execution time: {:?}", self.total_time);
906 println!("Peak memory usage: {} bytes", self.peak_memory);
907
908 println!("\nOperation timings:");
909 for (name, duration) in &self.timings {
910 println!(" {}: {:?}", name, duration);
911 }
912
913 println!("\nMemory usage:");
914 for (name, bytes) in &self.memory_usage {
915 println!(" {}: {} bytes", name, bytes);
916 }
917 }
918}
919
920#[allow(non_snake_case)]
921#[cfg(test)]
922mod tests {
923 use super::*;
924 use scirs2_core::ndarray::array;
925
926 #[test]
927 fn test_simd_correlation() {
928 let x = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
929 let y = array![2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0];
930
931 let correlation_simd = SIMDStats::correlation_auto(x.view(), y.view());
932 let correlation_fallback = SIMDStats::correlation_fallback(x.view(), y.view());
933
934 assert!((correlation_simd - correlation_fallback).abs() < 1e-10);
935 assert!((correlation_simd - 1.0).abs() < 1e-10); }
937
938 #[test]
939 #[allow(non_snake_case)]
940 fn test_parallel_correlation_selection() -> Result<()> {
941 let X = array![
942 [1.0, 2.0, 3.0],
943 [2.0, 4.0, 6.0],
944 [3.0, 6.0, 9.0],
945 [4.0, 8.0, 12.0],
946 ];
947 let y = array![1.0, 2.0, 3.0, 4.0];
948
949 let selection =
950 ParallelSelector::parallel_correlation_selection(X.view(), y.view(), 0.5, Some(2))?;
951
952 assert_eq!(selection.len(), 3);
953 assert!(selection.iter().any(|&x| x)); Ok(())
956 }
957
958 #[test]
959 #[allow(non_snake_case)]
960 fn test_memory_efficient_selection() -> Result<()> {
961 let X = array![
962 [1.0, 2.0, 3.0],
963 [2.0, 4.0, 6.0],
964 [3.0, 6.0, 9.0],
965 [4.0, 8.0, 12.0],
966 ];
967 let y = array![1.0, 2.0, 3.0, 4.0];
968
969 let selector = MemoryEfficientSelector::new(2, false);
970 let selection = selector.streaming_correlation_selection(X.view(), y.view(), 0.5)?;
971
972 assert_eq!(selection.len(), 3);
973
974 Ok(())
975 }
976
977 #[test]
978 fn test_cache_friendly_matrix() {
979 let data = vec![1, 2, 3, 4, 5, 6];
980 let matrix = CacheFriendlyMatrix::new(data, 2, 3, MatrixLayout::RowMajor);
981
982 assert_eq!(matrix.get(0, 0), Some(&1));
983 assert_eq!(matrix.get(0, 1), Some(&2));
984 assert_eq!(matrix.get(1, 2), Some(&6));
985 assert_eq!(matrix.get(2, 0), None); }
987
988 #[test]
989 fn test_performance_profiler() {
990 let profiler = PerformanceProfiler::new();
991
992 let result = profiler.time_operation("test_operation", || {
993 std::thread::sleep(std::time::Duration::from_millis(10));
994 42
995 });
996
997 assert_eq!(result, 42);
998
999 profiler.record_memory_usage("test_memory", 1024);
1000
1001 let report = profiler.get_report();
1002 assert_eq!(report.timings.len(), 1);
1003 assert_eq!(report.memory_usage.len(), 1);
1004 assert!(report.total_time >= std::time::Duration::from_millis(10));
1005 }
1006}