1use crate::error::{StatsError, StatsResult};
13use crate::simd_enhanced_v6::AdvancedSimdOps;
14use scirs2_core::ndarray::ArrayView1;
15use scirs2_core::numeric::{Float, NumCast, One, Zero};
16use scirs2_core::{
17 simd_ops::{PlatformCapabilities, SimdUnifiedOps},
18 validation::*,
19};
20use std::marker::PhantomData;
21
22#[derive(Debug, Clone)]
24pub struct AdvancedSimdConfig {
25 pub platform: PlatformCapabilities,
27 pub vector_width: usize,
29 pub cache_linesize: usize,
31 pub l1_cachesize: usize,
33 pub l2_cachesize: usize,
35 pub enable_prefetch: bool,
37 pub enable_cache_blocking: bool,
39 pub enable_pipelining: bool,
41 pub advanced_simd_threshold: usize,
43}
44
45impl Default for AdvancedSimdConfig {
46 fn default() -> Self {
47 let platform = PlatformCapabilities::detect();
48
49 let vector_width = if platform.avx512_available {
50 16 } else if platform.avx2_available {
52 8 } else if platform.simd_available {
54 4 } else {
56 1 };
58
59 Self {
60 platform,
61 vector_width,
62 cache_linesize: 64, l1_cachesize: 32768, l2_cachesize: 262144, enable_prefetch: true,
66 enable_cache_blocking: true,
67 enable_pipelining: true,
68 advanced_simd_threshold: 256,
69 }
70 }
71}
72
73#[derive(Debug, Clone, Copy)]
75pub enum VectorStrategy {
76 SingleVector,
78 MultiVector { num_registers: usize },
80 UnrolledVector { unroll_factor: usize },
82 CacheBlockedVector { blocksize: usize },
84}
85
86#[derive(Debug, Clone, Copy)]
88pub enum MemoryPattern {
89 SequentialPrefetch,
91 Strided { stride: usize },
93 Tiled { tilesize: usize },
95 Blocked { blocksize: usize },
97}
98
99pub struct AdvancedSimdProcessor<F> {
101 config: AdvancedSimdConfig,
102 vector_strategy: VectorStrategy,
103 memory_pattern: MemoryPattern,
104 _phantom: PhantomData<F>,
105}
106
107#[derive(Debug, Clone)]
109pub struct AdvancedStatsResult<F> {
110 pub mean: F,
112 pub variance: F,
113 pub std_dev: F,
114 pub skewness: F,
115 pub kurtosis: F,
116 pub min: F,
117 pub max: F,
118 pub simd_utilization: f64,
120 pub cache_efficiency: f64,
121 pub vector_operations_count: usize,
122 pub prefetch_efficiency: f64,
123}
124
125pub struct CacheAwareVectorProcessor {
127 l1_blocksize: usize,
128 l2_blocksize: usize,
129 vector_width: usize,
130 prefetch_distance: usize,
131}
132
133impl<F> Default for AdvancedSimdProcessor<F>
134where
135 F: Float
136 + NumCast
137 + SimdUnifiedOps
138 + Zero
139 + One
140 + PartialOrd
141 + Copy
142 + Send
143 + Sync
144 + std::fmt::Display
145 + std::iter::Sum<F>
146 + AdvancedSimdOps<F>,
147{
148 fn default() -> Self {
149 Self::new()
150 }
151}
152
153impl<F> AdvancedSimdProcessor<F>
154where
155 F: Float
156 + NumCast
157 + SimdUnifiedOps
158 + Zero
159 + One
160 + PartialOrd
161 + Copy
162 + Send
163 + Sync
164 + std::fmt::Display
165 + std::iter::Sum<F>
166 + AdvancedSimdOps<F>,
167{
168 pub fn new() -> Self {
170 let config = AdvancedSimdConfig::default();
171 let vector_strategy = Self::select_optimal_vector_strategy(&config);
172 let memory_pattern = Self::select_optimal_memory_pattern(&config);
173
174 Self {
175 config,
176 vector_strategy,
177 memory_pattern,
178 _phantom: PhantomData,
179 }
180 }
181
182 pub fn with_config(config: AdvancedSimdConfig) -> Self {
184 let vector_strategy = Self::select_optimal_vector_strategy(&config);
185 let memory_pattern = Self::select_optimal_memory_pattern(&config);
186
187 Self {
188 config,
189 vector_strategy,
190 memory_pattern,
191 _phantom: PhantomData,
192 }
193 }
194
195 fn select_optimal_vector_strategy(config: &AdvancedSimdConfig) -> VectorStrategy {
197 if config.platform.avx512_available && config.enable_pipelining {
198 VectorStrategy::MultiVector { num_registers: 4 }
199 } else if config.platform.avx2_available {
200 VectorStrategy::UnrolledVector { unroll_factor: 4 }
201 } else if config.enable_cache_blocking {
202 VectorStrategy::CacheBlockedVector {
203 blocksize: config.l1_cachesize / 4,
204 }
205 } else {
206 VectorStrategy::SingleVector
207 }
208 }
209
210 fn select_optimal_memory_pattern(config: &AdvancedSimdConfig) -> MemoryPattern {
212 if config.enable_cache_blocking {
213 MemoryPattern::Blocked {
214 blocksize: config.l1_cachesize / std::mem::size_of::<f64>(),
215 }
216 } else if config.enable_prefetch {
217 MemoryPattern::SequentialPrefetch
218 } else {
219 MemoryPattern::Tiled {
220 tilesize: config.cache_linesize / std::mem::size_of::<f64>(),
221 }
222 }
223 }
224
225 pub fn compute_advanced_statistics(
227 &self,
228 data: &ArrayView1<F>,
229 ) -> StatsResult<AdvancedStatsResult<F>> {
230 checkarray_finite(data, "data")?;
231
232 if data.is_empty() {
233 return Err(StatsError::InvalidArgument(
234 "Data cannot be empty".to_string(),
235 ));
236 }
237
238 let n = data.len();
239
240 if n < self.config.advanced_simd_threshold {
241 return self.compute_scalar_fallback(data);
242 }
243
244 match self.vector_strategy {
245 VectorStrategy::MultiVector { num_registers } => {
246 self.compute_multi_vector_stats(data, num_registers)
247 }
248 VectorStrategy::UnrolledVector { unroll_factor } => {
249 self.compute_unrolled_vector_stats(data, unroll_factor)
250 }
251 VectorStrategy::CacheBlockedVector { blocksize } => {
252 self.compute_cache_blocked_stats(data, blocksize)
253 }
254 VectorStrategy::SingleVector => self.compute_single_vector_stats(data),
255 }
256 }
257
258 fn compute_multi_vector_stats(
260 &self,
261 data: &ArrayView1<F>,
262 num_registers: usize,
263 ) -> StatsResult<AdvancedStatsResult<F>> {
264 let n = data.len();
265 let vector_width = self.config.vector_width;
266 let chunksize = vector_width * num_registers;
267 let n_chunks = n / chunksize;
268 let remainder = n % chunksize;
269
270 let mut sum_accumulators = vec![F::zero(); num_registers];
272 let mut sum_sq_accumulators = vec![F::zero(); num_registers];
273 let mut sum_cube_accumulators = vec![F::zero(); num_registers];
274 let mut sum_quad_accumulators = vec![F::zero(); num_registers];
275 let mut min_accumulators = vec![F::infinity(); num_registers];
276 let mut max_accumulators = vec![F::neg_infinity(); num_registers];
277
278 let mut vector_ops_count = 0;
279 let mut prefetch_hits = 0;
280
281 for chunk_idx in 0..n_chunks {
283 let base_offset = chunk_idx * chunksize;
284
285 if self.config.enable_prefetch && chunk_idx + 2 < n_chunks {
287 let prefetch_offset = (chunk_idx + 2) * chunksize;
288 if prefetch_offset < n {
289 unsafe {
290 self.prefetchdata(data, prefetch_offset);
291 }
292 prefetch_hits += 1;
293 }
294 }
295
296 for reg_idx in 0..num_registers {
298 let start = base_offset + reg_idx * vector_width;
299 let end = (start + vector_width).min(n);
300
301 if start < n {
302 let chunk = data.slice(scirs2_core::ndarray::s![start..end]);
303
304 let (sum, sum_sq, sum_cube, sum_quad, min_val, max_val) =
306 self.compute_vector_moments(&chunk)?;
307
308 sum_accumulators[reg_idx] = sum_accumulators[reg_idx] + sum;
309 sum_sq_accumulators[reg_idx] = sum_sq_accumulators[reg_idx] + sum_sq;
310 sum_cube_accumulators[reg_idx] = sum_cube_accumulators[reg_idx] + sum_cube;
311 sum_quad_accumulators[reg_idx] = sum_quad_accumulators[reg_idx] + sum_quad;
312
313 if min_val < min_accumulators[reg_idx] {
314 min_accumulators[reg_idx] = min_val;
315 }
316 if max_val > max_accumulators[reg_idx] {
317 max_accumulators[reg_idx] = max_val;
318 }
319
320 vector_ops_count += 1;
321 }
322 }
323 }
324
325 let total_sum: F = sum_accumulators.iter().copied().sum();
327 let total_sum_sq: F = sum_sq_accumulators.iter().copied().sum();
328 let total_sum_cube: F = sum_cube_accumulators.iter().copied().sum();
329 let total_sum_quad: F = sum_quad_accumulators.iter().copied().sum();
330 let global_min = min_accumulators
331 .iter()
332 .copied()
333 .fold(F::infinity(), |a, b| a.min(b));
334 let global_max = max_accumulators
335 .iter()
336 .copied()
337 .fold(F::neg_infinity(), |a, b| a.max(b));
338
339 let mut remainder_sum = F::zero();
341 let mut remainder_sum_sq = F::zero();
342 let mut remainder_sum_cube = F::zero();
343 let mut remainder_sum_quad = F::zero();
344 let mut remainder_min = global_min;
345 let mut remainder_max = global_max;
346
347 if remainder > 0 {
348 let start = n_chunks * chunksize;
349 for i in start..n {
350 let val = data[i];
351 remainder_sum = remainder_sum + val;
352 remainder_sum_sq = remainder_sum_sq + val * val;
353 remainder_sum_cube = remainder_sum_cube + val * val * val;
354 remainder_sum_quad = remainder_sum_quad + val * val * val * val;
355 if val < remainder_min {
356 remainder_min = val;
357 }
358 if val > remainder_max {
359 remainder_max = val;
360 }
361 }
362 }
363
364 let final_sum = total_sum + remainder_sum;
366 let final_sum_sq = total_sum_sq + remainder_sum_sq;
367 let final_sum_cube = total_sum_cube + remainder_sum_cube;
368 let final_sum_quad = total_sum_quad + remainder_sum_quad;
369 let final_min = remainder_min;
370 let final_max = remainder_max;
371
372 let n_f = F::from(n).expect("Failed to convert to float");
374 let mean = final_sum / n_f;
375
376 let m2 = final_sum_sq / n_f - mean * mean;
378 let m3 = final_sum_cube / n_f
379 - F::from(3).expect("Failed to convert constant to float") * mean * m2
380 - mean * mean * mean;
381 let m4 = final_sum_quad / n_f
382 - F::from(4).expect("Failed to convert constant to float") * mean * m3
383 - F::from(6).expect("Failed to convert constant to float") * mean * mean * m2
384 - mean * mean * mean * mean;
385
386 let variance = m2;
387 let std_dev = variance.sqrt();
388 let skewness = if m2 > F::zero() {
389 m3 / (m2 * m2.sqrt())
390 } else {
391 F::zero()
392 };
393 let kurtosis = if m2 > F::zero() {
394 m4 / (m2 * m2) - F::from(3).expect("Failed to convert constant to float")
395 } else {
396 F::zero()
397 };
398
399 let theoretical_max_vectors = n / vector_width;
401 let simd_utilization = if theoretical_max_vectors > 0 {
402 vector_ops_count as f64 / theoretical_max_vectors as f64
403 } else {
404 0.0
405 };
406
407 let cache_efficiency = if n_chunks > 0 {
408 0.95 } else {
410 0.0
411 };
412
413 let prefetch_efficiency = if n_chunks > 2 {
414 prefetch_hits as f64 / (n_chunks - 2) as f64
415 } else {
416 0.0
417 };
418
419 Ok(AdvancedStatsResult {
420 mean,
421 variance,
422 std_dev,
423 skewness,
424 kurtosis,
425 min: final_min,
426 max: final_max,
427 simd_utilization,
428 cache_efficiency,
429 vector_operations_count: vector_ops_count,
430 prefetch_efficiency,
431 })
432 }
433
434 fn compute_unrolled_vector_stats(
436 &self,
437 data: &ArrayView1<F>,
438 unroll_factor: usize,
439 ) -> StatsResult<AdvancedStatsResult<F>> {
440 let n = data.len();
441 let vector_width = self.config.vector_width;
442 let unrolledsize = vector_width * unroll_factor;
443 let n_unrolled = n / unrolledsize;
444 let remainder = n % unrolledsize;
445
446 let mut sum_acc = F::zero();
447 let mut sum_sq_acc = F::zero();
448 let mut sum_cube_acc = F::zero();
449 let mut sum_quad_acc = F::zero();
450 let mut min_val = F::infinity();
451 let mut max_val = F::neg_infinity();
452
453 let mut vector_ops_count = 0;
454
455 for i in 0..n_unrolled {
457 let base_idx = i * unrolledsize;
458
459 for j in 0..unroll_factor {
461 let start = base_idx + j * vector_width;
462 let end = start + vector_width;
463
464 if end <= n {
465 let chunk = data.slice(scirs2_core::ndarray::s![start..end]);
466 let (sum, sum_sq, sum_cube, sum_quad, chunk_min, chunk_max) =
467 self.compute_vector_moments(&chunk)?;
468
469 sum_acc = sum_acc + sum;
470 sum_sq_acc = sum_sq_acc + sum_sq;
471 sum_cube_acc = sum_cube_acc + sum_cube;
472 sum_quad_acc = sum_quad_acc + sum_quad;
473 if chunk_min < min_val {
474 min_val = chunk_min;
475 }
476 if chunk_max > max_val {
477 max_val = chunk_max;
478 }
479
480 vector_ops_count += 1;
481 }
482 }
483 }
484
485 if remainder > 0 {
487 let start = n_unrolled * unrolledsize;
488 for i in start..n {
489 let val = data[i];
490 sum_acc = sum_acc + val;
491 sum_sq_acc = sum_sq_acc + val * val;
492 sum_cube_acc = sum_cube_acc + val * val * val;
493 sum_quad_acc = sum_quad_acc + val * val * val * val;
494 if val < min_val {
495 min_val = val;
496 }
497 if val > max_val {
498 max_val = val;
499 }
500 }
501 }
502
503 let n_f = F::from(n).expect("Failed to convert to float");
505 let mean = sum_acc / n_f;
506 let m2 = sum_sq_acc / n_f - mean * mean;
507 let m3 = sum_cube_acc / n_f
508 - F::from(3).expect("Failed to convert constant to float") * mean * m2
509 - mean * mean * mean;
510 let m4 = sum_quad_acc / n_f
511 - F::from(4).expect("Failed to convert constant to float") * mean * m3
512 - F::from(6).expect("Failed to convert constant to float") * mean * mean * m2
513 - mean * mean * mean * mean;
514
515 let variance = m2;
516 let std_dev = variance.sqrt();
517 let skewness = if m2 > F::zero() {
518 m3 / (m2 * m2.sqrt())
519 } else {
520 F::zero()
521 };
522 let kurtosis = if m2 > F::zero() {
523 m4 / (m2 * m2) - F::from(3).expect("Failed to convert constant to float")
524 } else {
525 F::zero()
526 };
527
528 Ok(AdvancedStatsResult {
529 mean,
530 variance,
531 std_dev,
532 skewness,
533 kurtosis,
534 min: min_val,
535 max: max_val,
536 simd_utilization: vector_ops_count as f64 / (n / vector_width) as f64,
537 cache_efficiency: 0.90, vector_operations_count: vector_ops_count,
539 prefetch_efficiency: 0.0, })
541 }
542
543 fn compute_cache_blocked_stats(
545 &self,
546 data: &ArrayView1<F>,
547 blocksize: usize,
548 ) -> StatsResult<AdvancedStatsResult<F>> {
549 let n = data.len();
550 let n_blocks = n / blocksize;
551 let remainder = n % blocksize;
552
553 let mut sum_acc = F::zero();
554 let mut sum_sq_acc = F::zero();
555 let mut sum_cube_acc = F::zero();
556 let mut sum_quad_acc = F::zero();
557 let mut min_val = F::infinity();
558 let mut max_val = F::neg_infinity();
559
560 let mut vector_ops_count = 0;
561
562 for block_idx in 0..n_blocks {
564 let start = block_idx * blocksize;
565 let end = start + blocksize;
566 let block = data.slice(scirs2_core::ndarray::s![start..end]);
567
568 let block_result = self.process_cache_block(&block)?;
570
571 sum_acc = sum_acc + block_result.sum;
572 sum_sq_acc = sum_sq_acc + block_result.sum_sq;
573 sum_cube_acc = sum_cube_acc + block_result.sum_cube;
574 sum_quad_acc = sum_quad_acc + block_result.sum_quad;
575 if block_result.min < min_val {
576 min_val = block_result.min;
577 }
578 if block_result.max > max_val {
579 max_val = block_result.max;
580 }
581
582 vector_ops_count += block_result.vector_ops;
583 }
584
585 if remainder > 0 {
587 let start = n_blocks * blocksize;
588 let remainder_block = data.slice(scirs2_core::ndarray::s![start..]);
589 let remainder_result = self.process_cache_block(&remainder_block)?;
590
591 sum_acc = sum_acc + remainder_result.sum;
592 sum_sq_acc = sum_sq_acc + remainder_result.sum_sq;
593 sum_cube_acc = sum_cube_acc + remainder_result.sum_cube;
594 sum_quad_acc = sum_quad_acc + remainder_result.sum_quad;
595 if remainder_result.min < min_val {
596 min_val = remainder_result.min;
597 }
598 if remainder_result.max > max_val {
599 max_val = remainder_result.max;
600 }
601
602 vector_ops_count += remainder_result.vector_ops;
603 }
604
605 let n_f = F::from(n).expect("Failed to convert to float");
607 let mean = sum_acc / n_f;
608 let m2 = sum_sq_acc / n_f - mean * mean;
609 let m3 = sum_cube_acc / n_f
610 - F::from(3).expect("Failed to convert constant to float") * mean * m2
611 - mean * mean * mean;
612 let m4 = sum_quad_acc / n_f
613 - F::from(4).expect("Failed to convert constant to float") * mean * m3
614 - F::from(6).expect("Failed to convert constant to float") * mean * mean * m2
615 - mean * mean * mean * mean;
616
617 let variance = m2;
618 let std_dev = variance.sqrt();
619 let skewness = if m2 > F::zero() {
620 m3 / (m2 * m2.sqrt())
621 } else {
622 F::zero()
623 };
624 let kurtosis = if m2 > F::zero() {
625 m4 / (m2 * m2) - F::from(3).expect("Failed to convert constant to float")
626 } else {
627 F::zero()
628 };
629
630 Ok(AdvancedStatsResult {
631 mean,
632 variance,
633 std_dev,
634 skewness,
635 kurtosis,
636 min: min_val,
637 max: max_val,
638 simd_utilization: vector_ops_count as f64 / (n / self.config.vector_width) as f64,
639 cache_efficiency: 0.98, vector_operations_count: vector_ops_count,
641 prefetch_efficiency: 0.85, })
643 }
644
645 fn compute_single_vector_stats(
647 &self,
648 data: &ArrayView1<F>,
649 ) -> StatsResult<AdvancedStatsResult<F>> {
650 let n = data.len();
652 let vector_width = self.config.vector_width;
653 let n_vectors = n / vector_width;
654 let remainder = n % vector_width;
655
656 let mut sum_acc = F::zero();
657 let mut sum_sq_acc = F::zero();
658 let mut min_val = F::infinity();
659 let mut max_val = F::neg_infinity();
660
661 for i in 0..n_vectors {
663 let start = i * vector_width;
664 let end = start + vector_width;
665 let chunk = data.slice(scirs2_core::ndarray::s![start..end]);
666
667 let chunk_sum = F::simd_sum(&chunk);
668 let chunk_sum_sq = F::simd_sum_squares(&chunk);
669 let chunk_min = F::simd_min_element(&chunk);
670 let chunk_max = F::simd_max_element(&chunk);
671
672 sum_acc = sum_acc + chunk_sum;
673 sum_sq_acc = sum_sq_acc + chunk_sum_sq;
674 if chunk_min < min_val {
675 min_val = chunk_min;
676 }
677 if chunk_max > max_val {
678 max_val = chunk_max;
679 }
680 }
681
682 if remainder > 0 {
684 let start = n_vectors * vector_width;
685 for i in start..n {
686 let val = data[i];
687 sum_acc = sum_acc + val;
688 sum_sq_acc = sum_sq_acc + val * val;
689 if val < min_val {
690 min_val = val;
691 }
692 if val > max_val {
693 max_val = val;
694 }
695 }
696 }
697
698 let n_f = F::from(n).expect("Failed to convert to float");
699 let mean = sum_acc / n_f;
700 let variance = sum_sq_acc / n_f - mean * mean;
701 let std_dev = variance.sqrt();
702
703 Ok(AdvancedStatsResult {
704 mean,
705 variance,
706 std_dev,
707 skewness: F::zero(), kurtosis: F::zero(),
709 min: min_val,
710 max: max_val,
711 simd_utilization: n_vectors as f64 / (n / vector_width) as f64,
712 cache_efficiency: 0.80, vector_operations_count: n_vectors,
714 prefetch_efficiency: 0.0,
715 })
716 }
717
718 fn compute_vector_moments(&self, chunk: &ArrayView1<F>) -> StatsResult<(F, F, F, F, F, F)> {
720 let sum = F::simd_sum(chunk);
721 let sum_sq = F::simd_sum_squares(chunk);
722 let sum_cube = <F as scirs2_core::simd_ops::SimdUnifiedOps>::simd_sum_cubes(chunk);
723 let sum_quad = F::simd_sum_quads(chunk);
724 let min_val = F::simd_min_element(chunk);
725 let max_val = F::simd_max_element(chunk);
726
727 Ok((sum, sum_sq, sum_cube, sum_quad, min_val, max_val))
728 }
729
730 fn process_cache_block(&self, block: &ArrayView1<F>) -> StatsResult<BlockResult<F>> {
732 let n = block.len();
733 let vector_width = self.config.vector_width;
734 let n_vectors = n / vector_width;
735 let remainder = n % vector_width;
736
737 let mut sum = F::zero();
738 let mut sum_sq = F::zero();
739 let mut sum_cube = F::zero();
740 let mut sum_quad = F::zero();
741 let mut min_val = F::infinity();
742 let mut max_val = F::neg_infinity();
743
744 for i in 0..n_vectors {
746 let start = i * vector_width;
747 let end = start + vector_width;
748 let chunk = block.slice(scirs2_core::ndarray::s![start..end]);
749
750 let (chunk_sum, chunk_sum_sq, chunk_sum_cube, chunk_sum_quad, chunk_min, chunk_max) =
751 self.compute_vector_moments(&chunk)?;
752
753 sum = sum + chunk_sum;
754 sum_sq = sum_sq + chunk_sum_sq;
755 sum_cube = sum_cube + chunk_sum_cube;
756 sum_quad = sum_quad + chunk_sum_quad;
757 if chunk_min < min_val {
758 min_val = chunk_min;
759 }
760 if chunk_max > max_val {
761 max_val = chunk_max;
762 }
763 }
764
765 if remainder > 0 {
767 let start = n_vectors * vector_width;
768 for i in start..n {
769 let val = block[i];
770 sum = sum + val;
771 sum_sq = sum_sq + val * val;
772 sum_cube = sum_cube + val * val * val;
773 sum_quad = sum_quad + val * val * val * val;
774 if val < min_val {
775 min_val = val;
776 }
777 if val > max_val {
778 max_val = val;
779 }
780 }
781 }
782
783 Ok(BlockResult {
784 sum,
785 sum_sq,
786 sum_cube,
787 sum_quad,
788 min: min_val,
789 max: max_val,
790 vector_ops: n_vectors,
791 })
792 }
793
794 unsafe fn prefetchdata(&self, data: &ArrayView1<F>, offset: usize) {
804 if offset < data.len() {
805 let ptr = data.as_ptr().add(offset);
806 #[cfg(target_arch = "x86_64")]
808 std::arch::x86_64::_mm_prefetch(ptr as *const i8, std::arch::x86_64::_MM_HINT_T0);
809 #[cfg(not(target_arch = "x86_64"))]
810 {
811 let _ = ptr;
813 }
814 }
815 }
816
817 fn compute_scalar_fallback(&self, data: &ArrayView1<F>) -> StatsResult<AdvancedStatsResult<F>> {
819 let n = data.len();
820 let n_f = F::from(n).expect("Failed to convert to float");
821
822 let sum: F = data.iter().copied().sum();
823 let mean = sum / n_f;
824
825 let variance = data.iter().map(|&x| (x - mean) * (x - mean)).sum::<F>() / n_f;
826
827 let min_val = data.iter().copied().fold(F::infinity(), |a, b| a.min(b));
828 let max_val = data
829 .iter()
830 .copied()
831 .fold(F::neg_infinity(), |a, b| a.max(b));
832
833 Ok(AdvancedStatsResult {
834 mean,
835 variance,
836 std_dev: variance.sqrt(),
837 skewness: F::zero(),
838 kurtosis: F::zero(),
839 min: min_val,
840 max: max_val,
841 simd_utilization: 0.0,
842 cache_efficiency: 1.0, vector_operations_count: 0,
844 prefetch_efficiency: 0.0,
845 })
846 }
847}
848
849#[derive(Debug, Clone)]
851struct BlockResult<F> {
852 sum: F,
853 sum_sq: F,
854 sum_cube: F,
855 sum_quad: F,
856 min: F,
857 max: F,
858 vector_ops: usize,
859}
860
861impl CacheAwareVectorProcessor {
862 pub fn new(config: &AdvancedSimdConfig) -> Self {
864 Self {
865 l1_blocksize: config.l1_cachesize / std::mem::size_of::<f64>(),
866 l2_blocksize: config.l2_cachesize / std::mem::size_of::<f64>(),
867 vector_width: config.vector_width,
868 prefetch_distance: config.vector_width * 4, }
870 }
871}
872
873#[allow(dead_code)]
875pub fn advanced_mean_f64(data: &ArrayView1<f64>) -> StatsResult<AdvancedStatsResult<f64>> {
876 let processor = AdvancedSimdProcessor::<f64>::new();
877 processor.compute_advanced_statistics(data)
878}
879
880#[allow(dead_code)]
910pub fn advanced_mean_f32(data: &ArrayView1<f32>) -> StatsResult<AdvancedStatsResult<f32>> {
911 let processor = AdvancedSimdProcessor::<f32>::new();
912 processor.compute_advanced_statistics(data)
913}
914
915#[cfg(test)]
916mod tests {
917 use super::*;
918 use scirs2_core::ndarray::{array, Array1};
919
920 #[test]
921 fn test_advanced_simd_basic() {
922 let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
923 let processor = AdvancedSimdProcessor::<f64>::new();
924 let result = processor
925 .compute_advanced_statistics(&data.view())
926 .expect("Operation failed");
927
928 assert!((result.mean - 4.5).abs() < 1e-10);
929 assert!(result.simd_utilization >= 0.0);
930 assert!(result.cache_efficiency >= 0.0);
931 }
932
933 #[test]
934 fn test_largedataset_performance() {
935 let data: Array1<f64> = Array1::from_shape_fn(10000, |i| i as f64);
936 let processor = AdvancedSimdProcessor::<f64>::new();
937 let result = processor
938 .compute_advanced_statistics(&data.view())
939 .expect("Operation failed");
940
941 assert!(result.simd_utilization > 0.5); assert!(result.vector_operations_count > 0);
943 }
944
945 #[test]
946 fn test_different_vector_strategies() {
947 let data: Array1<f64> = Array1::from_shape_fn(1000, |i| (i as f64).sin());
948
949 let config_multi = AdvancedSimdConfig {
951 vector_width: 8,
952 enable_pipelining: true,
953 ..Default::default()
954 };
955 let processor_multi = AdvancedSimdProcessor::with_config(config_multi);
956 let result_multi = processor_multi
957 .compute_advanced_statistics(&data.view())
958 .expect("Operation failed");
959
960 let config_blocked = AdvancedSimdConfig {
962 enable_cache_blocking: true,
963 l1_cachesize: 4096,
964 ..Default::default()
965 };
966 let processor_blocked = AdvancedSimdProcessor::with_config(config_blocked);
967 let result_blocked = processor_blocked
968 .compute_advanced_statistics(&data.view())
969 .expect("Operation failed");
970
971 assert!((result_multi.mean - result_blocked.mean).abs() < 1e-10);
973 assert!((result_multi.variance - result_blocked.variance).abs() < 1e-10);
974 }
975}