1use crate::error::{StatsError, StatsResult};
8use crate::error_standardization::ErrorMessages;
9use scirs2_core::ndarray::{s, Array2, ArrayBase, Data, Ix1};
10use scirs2_core::numeric::{Float, NumCast, Zero};
11use scirs2_core::{
12 parallel_ops::*,
13 simd_ops::{PlatformCapabilities, SimdUnifiedOps},
14};
15use serde::{Deserialize, Serialize};
16use std::collections::HashMap;
17use std::sync::{Arc, RwLock};
18use std::time::Instant;
19
20#[derive(Debug, Clone)]
22pub struct AdvancedSimdConfig {
23 pub enable_adaptive_optimization: bool,
25 pub enable_predictive_selection: bool,
27 pub enable_hardware_specialization: bool,
29 pub enable_performance_learning: bool,
31 pub target_accuracy: AccuracyLevel,
33 pub performance_preference: PerformancePreference,
35 pub memory_constraints: MemoryConstraints,
37 pub threading_preferences: ThreadingPreferences,
39}
40
41#[derive(Debug, Clone, Copy, PartialEq)]
43pub enum AccuracyLevel {
44 Fast, Balanced, Precise, Reference, }
49
50#[derive(Debug, Clone, Copy, PartialEq)]
52pub enum PerformancePreference {
53 MaxSpeed, SpeedBiased, Balanced, AccuracyBiased, MaxAccuracy, }
59
60#[derive(Debug, Clone)]
62pub struct MemoryConstraints {
63 pub max_working_set_bytes: usize,
65 pub max_cache_usage_percent: f64,
67 pub enable_memory_mapping: bool,
69 pub prefer_in_place: bool,
71}
72
73#[derive(Debug, Clone)]
75pub struct ThreadingPreferences {
76 pub max_threads: Option<usize>,
78 pub min_work_per_thread: usize,
80 pub enable_numa_optimization: bool,
82 pub affinity_strategy: AffinityStrategy,
84}
85
86#[derive(Debug, Clone, Copy, PartialEq)]
88pub enum AffinityStrategy {
89 None, PerformanceCores, EfficiencyCores, Spread, Compact, }
95
96pub struct AdvancedSimdOptimizer {
98 config: AdvancedSimdConfig,
99 performance_cache: Arc<RwLock<HashMap<String, PerformanceProfile>>>,
100 hardware_profile: HardwareProfile,
101 algorithm_selector: AlgorithmSelector,
102 memory_manager: AdvancedMemoryManager,
103}
104
105#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct PerformanceProfile {
108 pub operation_signature: OperationSignature,
110 pub performance_metrics: PerformanceMetrics,
112 pub optimal_algorithm: AlgorithmChoice,
114 pub last_updated: std::time::SystemTime,
116 pub measurement_count: usize,
118}
119
120#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
122pub struct OperationSignature {
123 pub operation_type: String,
125 pub size_bucket: SizeBucket,
127 pub data_type: String,
129 pub data_characteristics: DataCharacteristics,
131}
132
133#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
135pub enum SizeBucket {
136 Tiny, Small, Medium, Large, Huge, }
142
143#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
145pub struct DataCharacteristics {
146 pub memory_layout: MemoryLayout,
148 pub distribution_pattern: DistributionPattern,
150 pub sparsity_level: SparsityLevel,
152 pub numerical_range: NumericalRange,
154}
155
156#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
158pub enum MemoryLayout {
159 Contiguous,
160 Strided(usize), Fragmented,
162}
163
164#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
166pub enum DistributionPattern {
167 Uniform,
168 Normal,
169 Exponential,
170 Power,
171 Bimodal,
172 Unknown,
173}
174
175#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
177pub enum SparsityLevel {
178 Dense, Moderate, Sparse, VerySparse, }
183
184#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
186pub enum NumericalRange {
187 SmallNumbers, Normal, LargeNumbers, Mixed, }
192
193#[derive(Debug, Clone, Serialize, Deserialize)]
195pub struct PerformanceMetrics {
196 pub execution_time_ns: f64,
198 pub memory_bandwidth_utilization: f64,
200 pub cache_efficiency: f64,
202 pub simd_efficiency: f64,
204 pub accuracy_score: f64,
206 pub energy_efficiency: f64,
208}
209
210#[derive(Debug, Clone, Serialize, Deserialize)]
212pub enum AlgorithmChoice {
213 Scalar { algorithm: ScalarAlgorithm },
215 Simd {
217 instruction_set: SimdInstructionSet,
218 algorithm: SimdAlgorithm,
219 vector_width: usize,
220 },
221 Hybrid {
223 algorithms: Vec<AlgorithmChoice>,
224 thresholds: Vec<usize>,
225 },
226 ParallelSimd {
228 simd_choice: Box<AlgorithmChoice>,
229 thread_count: usize,
230 work_stealing: bool,
231 },
232}
233
234#[derive(Debug, Clone, Serialize, Deserialize)]
236pub enum ScalarAlgorithm {
237 Standard,
238 Kahan, Pairwise, Compensated, Welford, }
243
244#[derive(Debug, Clone, Serialize, Deserialize)]
246pub enum SimdInstructionSet {
247 SSE2,
248 SSE3,
249 SSE41,
250 SSE42,
251 AVX,
252 AVX2,
253 AVX512F,
254 AVX512DQ,
255 NEON,
256 SVE,
257}
258
259#[derive(Debug, Clone, Serialize, Deserialize)]
261pub enum SimdAlgorithm {
262 Vectorized, Unrolled, Prefetched, Interleaved, FusedMultiplyAdd, BranchFree, }
269
270#[derive(Debug, Clone)]
272pub struct HardwareProfile {
273 pub architecture: CpuArchitecture,
275 pub simd_capabilities: Vec<SimdInstructionSet>,
277 pub cache_hierarchy: CacheHierarchy,
279 pub memory_subsystem: MemorySubsystem,
281 pub thermal_profile: ThermalProfile,
283 pub power_profile: PowerProfile,
285}
286
287#[derive(Debug, Clone, PartialEq)]
289pub enum CpuArchitecture {
290 X86_64,
291 AArch64,
292 RISCV64,
293 WASM32,
294}
295
296#[derive(Debug, Clone)]
298pub struct CacheHierarchy {
299 pub l1data_kb: usize,
300 pub l1_instruction_kb: usize,
301 pub l2_kb: usize,
302 pub l3_kb: usize,
303 pub cache_linesize: usize,
304 pub associativity: HashMap<String, usize>,
305}
306
307#[derive(Debug, Clone)]
309pub struct MemorySubsystem {
310 pub memory_channels: usize,
311 pub memory_bandwidth_gbps: f64,
312 pub memory_latency_ns: f64,
313 pub numa_nodes: usize,
314 pub memory_controller_count: usize,
315}
316
317#[derive(Debug, Clone)]
319pub struct ThermalProfile {
320 pub thermal_design_power: f64,
321 pub max_junction_temperature: f64,
322 pub thermal_throttling_threshold: f64,
323 pub cooling_solution: CoolingSolution,
324}
325
326#[derive(Debug, Clone)]
328pub enum CoolingSolution {
329 Passive,
330 ActiveAir,
331 Liquid,
332 Custom,
333}
334
335#[derive(Debug, Clone)]
337pub struct PowerProfile {
338 pub base_power_watts: f64,
339 pub max_power_watts: f64,
340 pub power_efficiency_curve: Vec<(f64, f64)>, pub voltage_frequency_curve: Vec<(f64, f64)>, }
343
344pub struct AlgorithmSelector {
346 decision_tree: DecisionTree,
347 performance_predictor: PerformancePredictor,
348 cost_model: CostModel,
349}
350
351#[derive(Debug, Clone)]
353pub struct DecisionTree {
354 nodes: Vec<DecisionNode>,
355}
356
357#[derive(Debug, Clone)]
359pub struct DecisionNode {
360 pub condition: SelectionCondition,
361 pub true_branch: Option<usize>, pub false_branch: Option<usize>, pub algorithm: Option<AlgorithmChoice>,
364}
365
366#[derive(Debug, Clone)]
368pub enum SelectionCondition {
369 DataSizeThreshold(usize),
370 CacheEfficiencyThreshold(f64),
371 AccuracyRequirement(AccuracyLevel),
372 MemoryConstraint(usize),
373 ThermalConstraint(f64),
374 PowerConstraint(f64),
375 Complex(
376 Box<SelectionCondition>,
377 LogicalOperator,
378 Box<SelectionCondition>,
379 ),
380}
381
382#[derive(Debug, Clone)]
384pub enum LogicalOperator {
385 And,
386 Or,
387 Not,
388}
389
390pub struct PerformancePredictor {
392 models: HashMap<String, PredictionModel>,
393}
394
395#[derive(Debug, Clone)]
397pub struct PredictionModel {
398 pub model_type: ModelType,
399 pub coefficients: Vec<f64>,
400 pub feature_weights: HashMap<String, f64>,
401 pub accuracy: f64,
402}
403
404#[derive(Debug, Clone)]
406pub enum ModelType {
407 Linear,
408 Polynomial(usize),
409 ExponentialDecay,
410 PowerLaw,
411 NeuralNetwork,
412}
413
414pub struct CostModel {
416 computation_cost_weights: HashMap<String, f64>,
417 memory_cost_weights: HashMap<String, f64>,
418 energy_cost_weights: HashMap<String, f64>,
419}
420
421pub struct AdvancedMemoryManager {
423 allocation_strategy: AllocationStrategy,
424 cache_optimizer: CacheOptimizer,
425 numa_manager: NumaManager,
426}
427
428#[derive(Debug, Clone)]
430pub enum AllocationStrategy {
431 Standard,
432 Aligned(usize), Interleaved, LocalFirst, HugePage, }
437
438pub struct CacheOptimizer {
440 prefetch_strategy: PrefetchStrategy,
441 blocking_strategy: BlockingStrategy,
442}
443
444#[derive(Debug, Clone)]
446pub enum PrefetchStrategy {
447 None,
448 Sequential,
449 Stride(usize),
450 Adaptive,
451}
452
453#[derive(Debug, Clone)]
455pub enum BlockingStrategy {
456 None,
457 L1Blocking(usize),
458 L2Blocking(usize),
459 L3Blocking(usize),
460 Adaptive,
461}
462
463pub struct NumaManager {
465 node_topology: Vec<NumaNode>,
466 allocation_policy: NumaAllocationPolicy,
467}
468
469#[derive(Debug, Clone)]
471pub struct NumaNode {
472 pub node_id: usize,
473 pub cpu_cores: Vec<usize>,
474 pub memorysize_gb: f64,
475 pub memory_bandwidth_gbps: f64,
476 pub inter_node_latency_ns: HashMap<usize, f64>,
477}
478
479#[derive(Debug, Clone)]
481pub enum NumaAllocationPolicy {
482 Default,
483 LocalOnly,
484 Interleaved,
485 Preferred(usize), }
487
488impl AdvancedSimdOptimizer {
489 pub fn new(config: AdvancedSimdConfig) -> Self {
491 let hardware_profile = Self::detect_hardware_profile();
492 let algorithm_selector = Self::build_algorithm_selector(&hardware_profile);
493 let memory_manager = Self::create_memory_manager(&config, &hardware_profile);
494
495 Self {
496 config,
497 performance_cache: Arc::new(RwLock::new(HashMap::new())),
498 hardware_profile,
499 algorithm_selector,
500 memory_manager,
501 }
502 }
503
504 pub fn advanced_mean<F, D>(&self, x: &ArrayBase<D, Ix1>) -> StatsResult<F>
506 where
507 F: Float
508 + NumCast
509 + SimdUnifiedOps
510 + Copy
511 + Send
512 + Sync
513 + 'static
514 + std::iter::Sum<F>
515 + std::fmt::Display,
516 D: Data<Elem = F>,
517 {
518 if x.is_empty() {
519 return Err(ErrorMessages::empty_array("x"));
520 }
521
522 let characteristics = self.analyzedata_characteristics(x);
524
525 let signature = OperationSignature {
527 operation_type: "mean".to_string(),
528 size_bucket: Self::categorizesize(x.len()),
529 data_type: std::any::type_name::<F>().to_string(),
530 data_characteristics: characteristics,
531 };
532
533 if let Some(profile) = self.get_cached_performance(&signature) {
535 return self.execute_cached_algorithm(x, &profile.optimal_algorithm);
536 }
537
538 let algorithm = self.select_optimal_algorithm(&signature, x)?;
540
541 let (result, metrics) = self.execute_with_monitoring(x, &algorithm)?;
543
544 if self.config.enable_performance_learning {
546 self.cache_performance_profile(signature, algorithm, metrics);
547 }
548
549 Ok(result)
550 }
551
552 pub fn advanced_variance<F, D>(&self, x: &ArrayBase<D, Ix1>, ddof: usize) -> StatsResult<F>
554 where
555 F: Float
556 + NumCast
557 + SimdUnifiedOps
558 + Copy
559 + Send
560 + Sync
561 + 'static
562 + std::iter::Sum<F>
563 + std::fmt::Display,
564 D: Data<Elem = F>,
565 {
566 if x.is_empty() {
567 return Err(ErrorMessages::empty_array("x"));
568 }
569
570 let n = x.len();
571 if n <= ddof {
572 return Err(ErrorMessages::insufficientdata(
573 "variance calculation",
574 ddof + 1,
575 n,
576 ));
577 }
578
579 let characteristics = self.analyzedata_characteristics(x);
580 let signature = OperationSignature {
581 operation_type: "variance".to_string(),
582 size_bucket: Self::categorizesize(n),
583 data_type: std::any::type_name::<F>().to_string(),
584 data_characteristics: characteristics,
585 };
586
587 if let Some(profile) = self.get_cached_performance(&signature) {
589 return self.execute_variance_cached(x, ddof, &profile.optimal_algorithm);
590 }
591
592 let algorithm = self.select_variance_algorithm(&signature, x, ddof)?;
594 let (result, metrics) = self.execute_variance_with_monitoring(x, ddof, &algorithm)?;
595
596 if self.config.enable_performance_learning {
598 self.cache_performance_profile(signature, algorithm, metrics);
599 }
600
601 Ok(result)
602 }
603
604 pub fn advanced_correlation<F, D1, D2>(
606 &self,
607 x: &ArrayBase<D1, Ix1>,
608 y: &ArrayBase<D2, Ix1>,
609 ) -> StatsResult<F>
610 where
611 F: Float
612 + NumCast
613 + SimdUnifiedOps
614 + Copy
615 + Send
616 + Sync
617 + 'static
618 + std::iter::Sum<F>
619 + std::fmt::Display,
620 D1: Data<Elem = F>,
621 D2: Data<Elem = F>,
622 {
623 if x.is_empty() {
624 return Err(ErrorMessages::empty_array("x"));
625 }
626 if y.is_empty() {
627 return Err(ErrorMessages::empty_array("y"));
628 }
629 if x.len() != y.len() {
630 return Err(ErrorMessages::length_mismatch("x", x.len(), "y", y.len()));
631 }
632
633 let characteristics = self.analyze_bivariate_characteristics(x, y);
634 let signature = OperationSignature {
635 operation_type: "correlation".to_string(),
636 size_bucket: Self::categorizesize(x.len()),
637 data_type: std::any::type_name::<F>().to_string(),
638 data_characteristics: characteristics,
639 };
640
641 if let Some(profile) = self.get_cached_performance(&signature) {
642 return self.execute_correlation_cached(x, y, &profile.optimal_algorithm);
643 }
644
645 let algorithm = self.select_correlation_algorithm(&signature, x, y)?;
646 let (result, metrics) = self.execute_correlation_with_monitoring(x, y, &algorithm)?;
647
648 if self.config.enable_performance_learning {
649 self.cache_performance_profile(signature, algorithm, metrics);
650 }
651
652 Ok(result)
653 }
654
655 pub fn advanced_matrix_multiply<F>(
657 &self,
658 a: &Array2<F>,
659 b: &Array2<F>,
660 ) -> StatsResult<Array2<F>>
661 where
662 F: Float + NumCast + SimdUnifiedOps + Copy + Send + Sync + 'static + std::fmt::Display,
663 {
664 if a.ncols() != b.nrows() {
665 return Err(StatsError::dimension_mismatch(format!(
666 "Matrix multiplication requires A.cols == B.rows, got {} != {}",
667 a.ncols(),
668 b.nrows()
669 )));
670 }
671
672 let characteristics = Self::analyze_matrix_characteristics(a, b);
673 let signature = OperationSignature {
674 operation_type: "matrix_multiply".to_string(),
675 size_bucket: Self::categorize_matrixsize(a.nrows() * a.ncols()),
676 data_type: std::any::type_name::<F>().to_string(),
677 data_characteristics: characteristics,
678 };
679
680 let algorithm = Self::select_matrix_algorithm(&signature, a, b)?;
682
683 self.execute_matrix_multiply(a, b, &algorithm)
685 }
686
687 pub fn advanced_batch_statistics<F, D>(
689 &self,
690 data: &[ArrayBase<D, Ix1>],
691 operations: &[BatchOperation],
692 ) -> StatsResult<BatchResults<F>>
693 where
694 F: Float
695 + NumCast
696 + SimdUnifiedOps
697 + Copy
698 + Send
699 + Sync
700 + 'static
701 + std::iter::Sum<F>
702 + std::fmt::Display,
703 D: Data<Elem = F>,
704 {
705 if data.is_empty() {
706 return Err(ErrorMessages::empty_array("data"));
707 }
708
709 let batchsize = data.len();
711 let avg_arraysize = data.iter().map(|arr| arr.len()).sum::<usize>() / batchsize;
712
713 let strategy = self.select_batch_strategy(batchsize, avg_arraysize, operations)?;
715
716 self.execute_batch_operations(data, operations, &strategy)
718 }
719
720 fn detect_hardware_profile() -> HardwareProfile {
722 let _capabilities = PlatformCapabilities::detect();
723
724 HardwareProfile {
727 architecture: if cfg!(target_arch = "x86_64") {
728 CpuArchitecture::X86_64
729 } else if cfg!(target_arch = "aarch64") {
730 CpuArchitecture::AArch64
731 } else if cfg!(target_arch = "riscv64") {
732 CpuArchitecture::RISCV64
733 } else {
734 CpuArchitecture::WASM32
735 },
736 simd_capabilities: vec![
737 SimdInstructionSet::SSE2,
738 SimdInstructionSet::SSE42,
739 SimdInstructionSet::AVX2,
740 ],
741 cache_hierarchy: CacheHierarchy {
742 l1data_kb: 32,
743 l1_instruction_kb: 32,
744 l2_kb: 256,
745 l3_kb: 8192,
746 cache_linesize: 64,
747 associativity: [
748 ("L1".to_string(), 8),
749 ("L2".to_string(), 8),
750 ("L3".to_string(), 16),
751 ]
752 .iter()
753 .cloned()
754 .collect(),
755 },
756 memory_subsystem: MemorySubsystem {
757 memory_channels: 2,
758 memory_bandwidth_gbps: 50.0,
759 memory_latency_ns: 100.0,
760 numa_nodes: 1,
761 memory_controller_count: 1,
762 },
763 thermal_profile: ThermalProfile {
764 thermal_design_power: 65.0,
765 max_junction_temperature: 100.0,
766 thermal_throttling_threshold: 85.0,
767 cooling_solution: CoolingSolution::ActiveAir,
768 },
769 power_profile: PowerProfile {
770 base_power_watts: 15.0,
771 max_power_watts: 65.0,
772 power_efficiency_curve: vec![(0.1, 0.8), (0.5, 0.9), (1.0, 0.85)],
773 voltage_frequency_curve: vec![(1.0, 2.4), (1.2, 3.2), (1.35, 3.8)],
774 },
775 }
776 }
777
778 fn build_algorithm_selector(hardware: &HardwareProfile) -> AlgorithmSelector {
780 let nodes = vec![
782 DecisionNode {
783 condition: SelectionCondition::DataSizeThreshold(1000),
784 true_branch: Some(1),
785 false_branch: Some(2),
786 algorithm: None,
787 },
788 DecisionNode {
789 condition: SelectionCondition::CacheEfficiencyThreshold(0.8),
790 true_branch: None,
791 false_branch: None,
792 algorithm: Some(AlgorithmChoice::Simd {
793 instruction_set: SimdInstructionSet::AVX2,
794 algorithm: SimdAlgorithm::Vectorized,
795 vector_width: 4,
796 }),
797 },
798 DecisionNode {
799 condition: SelectionCondition::AccuracyRequirement(AccuracyLevel::Precise),
800 true_branch: None,
801 false_branch: None,
802 algorithm: Some(AlgorithmChoice::Scalar {
803 algorithm: ScalarAlgorithm::Kahan,
804 }),
805 },
806 ];
807
808 let decision_tree = DecisionTree { nodes };
809
810 AlgorithmSelector {
811 decision_tree,
812 performance_predictor: PerformancePredictor {
813 models: HashMap::new(),
814 },
815 cost_model: CostModel {
816 computation_cost_weights: HashMap::new(),
817 memory_cost_weights: HashMap::new(),
818 energy_cost_weights: HashMap::new(),
819 },
820 }
821 }
822
823 fn create_memory_manager(
825 config: &AdvancedSimdConfig,
826 hardware: &HardwareProfile,
827 ) -> AdvancedMemoryManager {
828 let allocation_strategy = if config.memory_constraints.prefer_in_place {
829 AllocationStrategy::Standard
830 } else {
831 AllocationStrategy::Aligned(64) };
833
834 let cache_optimizer = CacheOptimizer {
835 prefetch_strategy: PrefetchStrategy::Adaptive,
836 blocking_strategy: BlockingStrategy::Adaptive,
837 };
838
839 let numa_manager = NumaManager {
840 node_topology: vec![NumaNode {
841 node_id: 0,
842 cpu_cores: (0..num_cpus::get()).collect(),
843 memorysize_gb: 16.0,
844 memory_bandwidth_gbps: hardware.memory_subsystem.memory_bandwidth_gbps,
845 inter_node_latency_ns: HashMap::new(),
846 }],
847 allocation_policy: NumaAllocationPolicy::Default,
848 };
849
850 AdvancedMemoryManager {
851 allocation_strategy,
852 cache_optimizer,
853 numa_manager,
854 }
855 }
856
857 fn analyzedata_characteristics<F, D>(&self, x: &ArrayBase<D, Ix1>) -> DataCharacteristics
859 where
860 F: Float + Copy + std::fmt::Display,
861 D: Data<Elem = F>,
862 {
863 let memory_layout = if x.as_slice().is_some() {
865 MemoryLayout::Contiguous
866 } else {
867 MemoryLayout::Strided(1) };
869
870 let zero_count = x.iter().filter(|&&val| val == F::zero()).count();
872 let sparsity_ratio = zero_count as f64 / x.len() as f64;
873 let sparsity_level = match sparsity_ratio {
874 r if r < 0.05 => SparsityLevel::Dense,
875 r if r < 0.5 => SparsityLevel::Moderate,
876 r if r < 0.95 => SparsityLevel::Sparse,
877 _ => SparsityLevel::VerySparse,
878 };
879
880 let (min_abs, max_abs) =
882 x.iter()
883 .fold((F::infinity(), F::zero()), |(min_abs, max_abs), &val| {
884 let abs_val = val.abs();
885 (min_abs.min(abs_val), max_abs.max(abs_val))
886 });
887
888 let numerical_range = if max_abs < F::from(1e-6).unwrap() {
889 NumericalRange::SmallNumbers
890 } else if min_abs > F::from(1e6).unwrap() {
891 NumericalRange::LargeNumbers
892 } else if max_abs / min_abs > F::from(1e12).unwrap() {
893 NumericalRange::Mixed
894 } else {
895 NumericalRange::Normal
896 };
897
898 DataCharacteristics {
899 memory_layout,
900 distribution_pattern: DistributionPattern::Unknown, sparsity_level,
902 numerical_range,
903 }
904 }
905
906 fn analyze_bivariate_characteristics<F, D1, D2>(
908 &self,
909 x: &ArrayBase<D1, Ix1>,
910 _y: &ArrayBase<D2, Ix1>,
911 ) -> DataCharacteristics
912 where
913 F: Float + Copy + std::fmt::Display,
914 D1: Data<Elem = F>,
915 D2: Data<Elem = F>,
916 {
917 self.analyzedata_characteristics(x)
922 }
923
924 fn analyze_matrix_characteristics<F>(a: &Array2<F>, b: &Array2<F>) -> DataCharacteristics
926 where
927 F: Float + Copy + std::fmt::Display,
928 {
929 DataCharacteristics {
931 memory_layout: MemoryLayout::Contiguous, distribution_pattern: DistributionPattern::Unknown,
933 sparsity_level: SparsityLevel::Dense, numerical_range: NumericalRange::Normal,
935 }
936 }
937
938 fn categorizesize(size: usize) -> SizeBucket {
940 match size {
941 s if s < 64 => SizeBucket::Tiny,
942 s if s < 1024 => SizeBucket::Small,
943 s if s < 65536 => SizeBucket::Medium,
944 s if s < 1048576 => SizeBucket::Large,
945 _ => SizeBucket::Huge,
946 }
947 }
948
949 fn categorize_matrixsize(_totalelements: usize) -> SizeBucket {
951 Self::categorizesize(_totalelements)
952 }
953
954 fn get_cached_performance(&self, signature: &OperationSignature) -> Option<PerformanceProfile> {
956 if !self.config.enable_performance_learning {
957 return None;
958 }
959
960 let cache = self.performance_cache.read().ok()?;
961 let key = format!("{:?}", signature);
962 cache.get(&key).cloned()
963 }
964
965 fn cache_performance_profile(
967 &self,
968 signature: OperationSignature,
969 algorithm: AlgorithmChoice,
970 metrics: PerformanceMetrics,
971 ) {
972 if !self.config.enable_performance_learning {
973 return;
974 }
975
976 let profile = PerformanceProfile {
977 operation_signature: signature.clone(),
978 performance_metrics: metrics,
979 optimal_algorithm: algorithm,
980 last_updated: std::time::SystemTime::now(),
981 measurement_count: 1,
982 };
983
984 if let Ok(mut cache) = self.performance_cache.write() {
985 let key = format!("{:?}", signature);
986 cache.insert(key, profile);
987 }
988 }
989
990 fn select_optimal_algorithm<F, D>(
992 &self,
993 signature: &OperationSignature,
994 _x: &ArrayBase<D, Ix1>,
995 ) -> StatsResult<AlgorithmChoice>
996 where
997 F: Float + Copy,
998 D: Data<Elem = F>,
999 {
1000 let algorithm = match (&signature.size_bucket, &self.config.target_accuracy) {
1002 (SizeBucket::Tiny, _) => AlgorithmChoice::Scalar {
1003 algorithm: ScalarAlgorithm::Standard,
1004 },
1005 (SizeBucket::Small, AccuracyLevel::Precise) => AlgorithmChoice::Scalar {
1006 algorithm: ScalarAlgorithm::Kahan,
1007 },
1008 (SizeBucket::Medium | SizeBucket::Large, AccuracyLevel::Fast) => {
1009 AlgorithmChoice::Simd {
1010 instruction_set: SimdInstructionSet::AVX2,
1011 algorithm: SimdAlgorithm::Vectorized,
1012 vector_width: 4,
1013 }
1014 }
1015 (SizeBucket::Huge, _) => AlgorithmChoice::ParallelSimd {
1016 simd_choice: Box::new(AlgorithmChoice::Simd {
1017 instruction_set: SimdInstructionSet::AVX2,
1018 algorithm: SimdAlgorithm::Vectorized,
1019 vector_width: 4,
1020 }),
1021 thread_count: num_cpus::get(),
1022 work_stealing: true,
1023 },
1024 _ => AlgorithmChoice::Simd {
1025 instruction_set: SimdInstructionSet::AVX2,
1026 algorithm: SimdAlgorithm::Vectorized,
1027 vector_width: 4,
1028 },
1029 };
1030
1031 Ok(algorithm)
1032 }
1033
1034 fn select_variance_algorithm<F, D>(
1036 &self,
1037 signature: &OperationSignature,
1038 _x: &ArrayBase<D, Ix1>,
1039 _ddof: usize,
1040 ) -> StatsResult<AlgorithmChoice>
1041 where
1042 F: Float + Copy,
1043 D: Data<Elem = F>,
1044 {
1045 let algorithm = match (&signature.size_bucket, &self.config.target_accuracy) {
1047 (SizeBucket::Tiny | SizeBucket::Small, _) => AlgorithmChoice::Scalar {
1048 algorithm: ScalarAlgorithm::Welford,
1049 },
1050 (_, AccuracyLevel::Precise | AccuracyLevel::Reference) => AlgorithmChoice::Scalar {
1051 algorithm: ScalarAlgorithm::Welford,
1052 },
1053 (SizeBucket::Medium | SizeBucket::Large, _) => AlgorithmChoice::Simd {
1054 instruction_set: SimdInstructionSet::AVX2,
1055 algorithm: SimdAlgorithm::Vectorized,
1056 vector_width: 4,
1057 },
1058 (SizeBucket::Huge, _) => AlgorithmChoice::ParallelSimd {
1059 simd_choice: Box::new(AlgorithmChoice::Scalar {
1060 algorithm: ScalarAlgorithm::Welford,
1061 }),
1062 thread_count: num_cpus::get(),
1063 work_stealing: true,
1064 },
1065 };
1066
1067 Ok(algorithm)
1068 }
1069
1070 fn select_correlation_algorithm<F, D1, D2>(
1072 &self,
1073 signature: &OperationSignature,
1074 _x: &ArrayBase<D1, Ix1>,
1075 _y: &ArrayBase<D2, Ix1>,
1076 ) -> StatsResult<AlgorithmChoice>
1077 where
1078 F: Float + Copy,
1079 D1: Data<Elem = F>,
1080 D2: Data<Elem = F>,
1081 {
1082 let algorithm = match &signature.size_bucket {
1084 SizeBucket::Tiny => AlgorithmChoice::Scalar {
1085 algorithm: ScalarAlgorithm::Standard,
1086 },
1087 SizeBucket::Small => AlgorithmChoice::Scalar {
1088 algorithm: ScalarAlgorithm::Compensated,
1089 },
1090 SizeBucket::Medium | SizeBucket::Large => AlgorithmChoice::Simd {
1091 instruction_set: SimdInstructionSet::AVX2,
1092 algorithm: SimdAlgorithm::FusedMultiplyAdd,
1093 vector_width: 4,
1094 },
1095 SizeBucket::Huge => AlgorithmChoice::ParallelSimd {
1096 simd_choice: Box::new(AlgorithmChoice::Simd {
1097 instruction_set: SimdInstructionSet::AVX2,
1098 algorithm: SimdAlgorithm::FusedMultiplyAdd,
1099 vector_width: 4,
1100 }),
1101 thread_count: num_cpus::get(),
1102 work_stealing: false,
1103 },
1104 };
1105
1106 Ok(algorithm)
1107 }
1108
1109 fn select_matrix_algorithm<F>(
1111 signature: &OperationSignature,
1112 a: &Array2<F>,
1113 b: &Array2<F>,
1114 ) -> StatsResult<AlgorithmChoice>
1115 where
1116 F: Float + Copy + std::fmt::Display,
1117 {
1118 let total_ops = a.nrows() * a.ncols() * b.ncols();
1119
1120 let algorithm = if total_ops < 1000 {
1121 AlgorithmChoice::Scalar {
1122 algorithm: ScalarAlgorithm::Standard,
1123 }
1124 } else if total_ops < 1_000_000 {
1125 AlgorithmChoice::Simd {
1126 instruction_set: SimdInstructionSet::AVX2,
1127 algorithm: SimdAlgorithm::Vectorized,
1128 vector_width: 4,
1129 }
1130 } else {
1131 AlgorithmChoice::ParallelSimd {
1132 simd_choice: Box::new(AlgorithmChoice::Simd {
1133 instruction_set: SimdInstructionSet::AVX2,
1134 algorithm: SimdAlgorithm::Vectorized,
1135 vector_width: 4,
1136 }),
1137 thread_count: num_cpus::get(),
1138 work_stealing: true,
1139 }
1140 };
1141
1142 Ok(algorithm)
1143 }
1144
1145 fn execute_cached_algorithm<F, D>(
1147 &self,
1148 x: &ArrayBase<D, Ix1>,
1149 algorithm: &AlgorithmChoice,
1150 ) -> StatsResult<F>
1151 where
1152 F: Float
1153 + NumCast
1154 + SimdUnifiedOps
1155 + Copy
1156 + Send
1157 + Sync
1158 + std::iter::Sum<F>
1159 + std::fmt::Display,
1160 D: Data<Elem = F>,
1161 {
1162 self.execute_mean_algorithm(x, algorithm)
1163 }
1164
1165 fn execute_with_monitoring<F, D>(
1167 &self,
1168 x: &ArrayBase<D, Ix1>,
1169 algorithm: &AlgorithmChoice,
1170 ) -> StatsResult<(F, PerformanceMetrics)>
1171 where
1172 F: Float
1173 + NumCast
1174 + SimdUnifiedOps
1175 + Copy
1176 + Send
1177 + Sync
1178 + std::iter::Sum<F>
1179 + std::fmt::Display,
1180 D: Data<Elem = F>,
1181 {
1182 let start_time = Instant::now();
1183 let result = self.execute_mean_algorithm(x, algorithm)?;
1184 let execution_time = start_time.elapsed();
1185
1186 let metrics = PerformanceMetrics {
1187 execution_time_ns: execution_time.as_nanos() as f64,
1188 memory_bandwidth_utilization: 0.8, cache_efficiency: 0.9,
1190 simd_efficiency: 0.85,
1191 accuracy_score: 0.99,
1192 energy_efficiency: 1e9, };
1194
1195 Ok((result, metrics))
1196 }
1197
1198 fn execute_mean_algorithm<F, D>(
1200 &self,
1201 x: &ArrayBase<D, Ix1>,
1202 algorithm: &AlgorithmChoice,
1203 ) -> StatsResult<F>
1204 where
1205 F: Float
1206 + NumCast
1207 + SimdUnifiedOps
1208 + Copy
1209 + Send
1210 + Sync
1211 + std::iter::Sum<F>
1212 + std::fmt::Display,
1213 D: Data<Elem = F>,
1214 {
1215 match algorithm {
1216 AlgorithmChoice::Scalar { algorithm } => self.execute_scalar_mean(x, algorithm),
1217 AlgorithmChoice::Simd { .. } => {
1218 Ok(F::simd_sum(&x.view()) / F::from(x.len()).unwrap())
1220 }
1221 AlgorithmChoice::ParallelSimd {
1222 simd_choice,
1223 thread_count,
1224 ..
1225 } => self.execute_parallel_mean(x, simd_choice, *thread_count),
1226 AlgorithmChoice::Hybrid { .. } => {
1227 Ok(F::simd_sum(&x.view()) / F::from(x.len()).unwrap())
1229 }
1230 }
1231 }
1232
1233 fn execute_scalar_mean<F, D>(
1235 &self,
1236 x: &ArrayBase<D, Ix1>,
1237 algorithm: &ScalarAlgorithm,
1238 ) -> StatsResult<F>
1239 where
1240 F: Float + NumCast + Copy + std::fmt::Display,
1241 D: Data<Elem = F>,
1242 {
1243 let result = match algorithm {
1244 ScalarAlgorithm::Standard => x.iter().fold(F::zero(), |acc, &val| acc + val),
1245 ScalarAlgorithm::Kahan => self.kahan_sum(x),
1246 ScalarAlgorithm::Pairwise => self.pairwise_sum(x),
1247 ScalarAlgorithm::Compensated => self.compensated_sum(x),
1248 ScalarAlgorithm::Welford => {
1249 x.iter().fold(F::zero(), |acc, &val| acc + val)
1251 }
1252 };
1253
1254 Ok(result / F::from(x.len()).unwrap())
1255 }
1256
1257 fn kahan_sum<F, D>(&self, x: &ArrayBase<D, Ix1>) -> F
1259 where
1260 F: Float + Copy + std::fmt::Display,
1261 D: Data<Elem = F>,
1262 {
1263 let mut sum = F::zero();
1264 let mut c = F::zero();
1265
1266 for &value in x.iter() {
1267 let y = value - c;
1268 let t = sum + y;
1269 c = (t - sum) - y;
1270 sum = t;
1271 }
1272
1273 sum
1274 }
1275
1276 fn pairwise_sum<F, D>(&self, x: &ArrayBase<D, Ix1>) -> F
1278 where
1279 F: Float + Copy + std::fmt::Display,
1280 D: Data<Elem = F>,
1281 {
1282 if x.len() <= 16 {
1283 return x.iter().fold(F::zero(), |acc, &val| acc + val);
1284 }
1285
1286 let mid = x.len() / 2;
1287 let left_sum = self.pairwise_sum(&x.slice(s![..mid]));
1288 let right_sum = self.pairwise_sum(&x.slice(s![mid..]));
1289 left_sum + right_sum
1290 }
1291
1292 fn compensated_sum<F, D>(&self, x: &ArrayBase<D, Ix1>) -> F
1294 where
1295 F: Float + Copy + std::fmt::Display,
1296 D: Data<Elem = F>,
1297 {
1298 self.kahan_sum(x)
1300 }
1301
1302 fn execute_parallel_mean<F, D>(
1304 &self,
1305 x: &ArrayBase<D, Ix1>,
1306 simd_choice: &AlgorithmChoice,
1307 thread_count: usize,
1308 ) -> StatsResult<F>
1309 where
1310 F: Float
1311 + NumCast
1312 + SimdUnifiedOps
1313 + Copy
1314 + Send
1315 + Sync
1316 + std::iter::Sum<F>
1317 + std::fmt::Display,
1318 D: Data<Elem = F>,
1319 {
1320 if x.len() < 1000 {
1323 return self.execute_mean_algorithm(x, simd_choice);
1325 }
1326
1327 let chunksize = x.len().div_ceil(thread_count);
1328
1329 let sum: F = x
1331 .axis_chunks_iter(scirs2_core::ndarray::Axis(0), chunksize)
1332 .into_par_iter()
1333 .map(|chunk| F::simd_sum(&chunk))
1334 .sum();
1335
1336 Ok(sum / F::from(x.len()).unwrap())
1337 }
1338
1339 fn execute_variance_cached<F, D>(
1344 &self,
1345 x: &ArrayBase<D, Ix1>,
1346 ddof: usize,
1347 algorithm: &AlgorithmChoice,
1348 ) -> StatsResult<F>
1349 where
1350 F: Float
1351 + NumCast
1352 + SimdUnifiedOps
1353 + Copy
1354 + Send
1355 + Sync
1356 + std::iter::Sum<F>
1357 + std::fmt::Display,
1358 D: Data<Elem = F>,
1359 {
1360 let mean = self.execute_mean_algorithm(x, algorithm)?;
1362 let sum_sq_dev = x
1363 .iter()
1364 .map(|&val| {
1365 let dev = val - mean;
1366 dev * dev
1367 })
1368 .fold(F::zero(), |acc, val| acc + val);
1369
1370 Ok(sum_sq_dev / F::from(x.len() - ddof).unwrap())
1371 }
1372
1373 fn execute_variance_with_monitoring<F, D>(
1375 &self,
1376 x: &ArrayBase<D, Ix1>,
1377 ddof: usize,
1378 algorithm: &AlgorithmChoice,
1379 ) -> StatsResult<(F, PerformanceMetrics)>
1380 where
1381 F: Float
1382 + NumCast
1383 + SimdUnifiedOps
1384 + Copy
1385 + Send
1386 + Sync
1387 + std::iter::Sum<F>
1388 + std::fmt::Display,
1389 D: Data<Elem = F>,
1390 {
1391 let start_time = Instant::now();
1392 let result = self.execute_variance_cached(x, ddof, algorithm)?;
1393 let execution_time = start_time.elapsed();
1394
1395 let metrics = PerformanceMetrics {
1396 execution_time_ns: execution_time.as_nanos() as f64,
1397 memory_bandwidth_utilization: 0.8,
1398 cache_efficiency: 0.9,
1399 simd_efficiency: 0.85,
1400 accuracy_score: 0.99,
1401 energy_efficiency: 1e9,
1402 };
1403
1404 Ok((result, metrics))
1405 }
1406
1407 fn execute_correlation_cached<F, D1, D2>(
1409 &self,
1410 x: &ArrayBase<D1, Ix1>,
1411 y: &ArrayBase<D2, Ix1>,
1412 algorithm: &AlgorithmChoice,
1413 ) -> StatsResult<F>
1414 where
1415 F: Float
1416 + NumCast
1417 + SimdUnifiedOps
1418 + Copy
1419 + Send
1420 + Sync
1421 + std::iter::Sum<F>
1422 + std::fmt::Display,
1423 D1: Data<Elem = F>,
1424 D2: Data<Elem = F>,
1425 {
1426 let mean_x = self.execute_mean_algorithm(x, algorithm)?;
1428 let mean_y = self.execute_mean_algorithm(y, algorithm)?;
1429
1430 let mut sum_xy = F::zero();
1431 let mut sum_x2 = F::zero();
1432 let mut sum_y2 = F::zero();
1433
1434 for (x_val, y_val) in x.iter().zip(y.iter()) {
1435 let x_dev = *x_val - mean_x;
1436 let y_dev = *y_val - mean_y;
1437
1438 sum_xy = sum_xy + x_dev * y_dev;
1439 sum_x2 = sum_x2 + x_dev * x_dev;
1440 sum_y2 = sum_y2 + y_dev * y_dev;
1441 }
1442
1443 let denominator = (sum_x2 * sum_y2).sqrt();
1444 if denominator == F::zero() {
1445 Ok(F::zero())
1446 } else {
1447 Ok(sum_xy / denominator)
1448 }
1449 }
1450
1451 fn execute_correlation_with_monitoring<F, D1, D2>(
1453 &self,
1454 x: &ArrayBase<D1, Ix1>,
1455 y: &ArrayBase<D2, Ix1>,
1456 algorithm: &AlgorithmChoice,
1457 ) -> StatsResult<(F, PerformanceMetrics)>
1458 where
1459 F: Float
1460 + NumCast
1461 + SimdUnifiedOps
1462 + Copy
1463 + Send
1464 + Sync
1465 + std::iter::Sum<F>
1466 + std::fmt::Display,
1467 D1: Data<Elem = F>,
1468 D2: Data<Elem = F>,
1469 {
1470 let start_time = Instant::now();
1471 let result = self.execute_correlation_cached(x, y, algorithm)?;
1472 let execution_time = start_time.elapsed();
1473
1474 let metrics = PerformanceMetrics {
1475 execution_time_ns: execution_time.as_nanos() as f64,
1476 memory_bandwidth_utilization: 0.8,
1477 cache_efficiency: 0.9,
1478 simd_efficiency: 0.85,
1479 accuracy_score: 0.99,
1480 energy_efficiency: 1e9,
1481 };
1482
1483 Ok((result, metrics))
1484 }
1485
1486 fn execute_matrix_multiply<F>(
1488 &self,
1489 a: &Array2<F>,
1490 b: &Array2<F>,
1491 _algorithm: &AlgorithmChoice,
1492 ) -> StatsResult<Array2<F>>
1493 where
1494 F: Float + NumCast + SimdUnifiedOps + Copy + Send + Sync + Zero + std::fmt::Display,
1495 {
1496 let (m, k) = (a.nrows(), a.ncols());
1497 let n = b.ncols();
1498 let mut c = Array2::zeros((m, n));
1499
1500 for i in 0..m {
1502 for j in 0..n {
1503 let mut sum = F::zero();
1504 for l in 0..k {
1505 sum = sum + a[[i, l]] * b[[l, j]];
1506 }
1507 c[[i, j]] = sum;
1508 }
1509 }
1510
1511 Ok(c)
1512 }
1513
1514 fn select_batch_strategy(
1516 &self,
1517 batchsize: usize,
1518 avg_arraysize: usize,
1519 _operations: &[BatchOperation],
1520 ) -> StatsResult<BatchStrategy> {
1521 if batchsize < 10 {
1523 Ok(BatchStrategy::Sequential)
1524 } else if avg_arraysize > 1000 {
1525 Ok(BatchStrategy::ParallelArrays)
1526 } else {
1527 Ok(BatchStrategy::ParallelOperations)
1528 }
1529 }
1530
1531 fn execute_batch_operations<F, D>(
1533 &self,
1534 data: &[ArrayBase<D, Ix1>],
1535 operations: &[BatchOperation],
1536 _strategy: &BatchStrategy,
1537 ) -> StatsResult<BatchResults<F>>
1538 where
1539 F: Float
1540 + NumCast
1541 + SimdUnifiedOps
1542 + Copy
1543 + Send
1544 + Sync
1545 + 'static
1546 + std::iter::Sum<F>
1547 + std::fmt::Display,
1548 D: Data<Elem = F>,
1549 {
1550 let mut results = BatchResults {
1552 means: Vec::new(),
1553 variances: Vec::new(),
1554 correlations: Vec::new(),
1555 };
1556
1557 for array in data {
1558 if operations.contains(&BatchOperation::Mean) {
1559 let mean = self.advanced_mean(array)?;
1560 results.means.push(mean);
1561 }
1562 if operations.contains(&BatchOperation::Variance) {
1563 let variance = self.advanced_variance(array, 1)?;
1564 results.variances.push(variance);
1565 }
1566 }
1567
1568 Ok(results)
1569 }
1570}
1571
1572#[derive(Debug, Clone, PartialEq)]
1574pub enum BatchOperation {
1575 Mean,
1576 Variance,
1577 StandardDeviation,
1578 Correlation,
1579 Covariance,
1580}
1581
1582#[derive(Debug, Clone)]
1584pub enum BatchStrategy {
1585 Sequential,
1586 ParallelArrays,
1587 ParallelOperations,
1588 Hybrid,
1589}
1590
1591#[derive(Debug, Clone)]
1593pub struct BatchResults<F> {
1594 pub means: Vec<F>,
1595 pub variances: Vec<F>,
1596 pub correlations: Vec<F>,
1597}
1598
1599impl Default for AdvancedSimdConfig {
1600 fn default() -> Self {
1601 Self {
1602 enable_adaptive_optimization: true,
1603 enable_predictive_selection: true,
1604 enable_hardware_specialization: true,
1605 enable_performance_learning: true,
1606 target_accuracy: AccuracyLevel::Balanced,
1607 performance_preference: PerformancePreference::Balanced,
1608 memory_constraints: MemoryConstraints {
1609 max_working_set_bytes: 1_073_741_824, max_cache_usage_percent: 0.8,
1611 enable_memory_mapping: true,
1612 prefer_in_place: false,
1613 },
1614 threading_preferences: ThreadingPreferences {
1615 max_threads: None, min_work_per_thread: 1000,
1617 enable_numa_optimization: true,
1618 affinity_strategy: AffinityStrategy::Spread,
1619 },
1620 }
1621 }
1622}
1623
1624#[allow(dead_code)]
1626pub fn create_advanced_simd_optimizer(
1627 _config: Option<AdvancedSimdConfig>,
1628) -> AdvancedSimdOptimizer {
1629 let _config = _config.unwrap_or_default();
1630 AdvancedSimdOptimizer::new(_config)
1631}
1632
1633#[cfg(test)]
1634mod tests {
1635 use super::*;
1636 use scirs2_core::ndarray::array;
1637
1638 #[test]
1639 fn test_advanced_simd_optimizer_creation() {
1640 let config = AdvancedSimdConfig::default();
1641 let optimizer = AdvancedSimdOptimizer::new(config);
1642
1643 assert!(matches!(
1645 optimizer.hardware_profile.architecture,
1646 CpuArchitecture::X86_64 | CpuArchitecture::AArch64 | CpuArchitecture::WASM32
1647 ));
1648 }
1649
1650 #[test]
1651 fn test_advanced_mean_calculation() {
1652 let config = AdvancedSimdConfig::default();
1653 let optimizer = AdvancedSimdOptimizer::new(config);
1654
1655 let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
1656 let result = optimizer.advanced_mean(&data.view()).unwrap();
1657
1658 assert!((result - 3.0).abs() < 1e-10);
1659 }
1660
1661 #[test]
1662 fn test_advanced_variance_calculation() {
1663 let config = AdvancedSimdConfig::default();
1664 let optimizer = AdvancedSimdOptimizer::new(config);
1665
1666 let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
1667 let result = optimizer.advanced_variance(&data.view(), 1).unwrap();
1668
1669 assert!((result - 2.5).abs() < 1e-10);
1671 }
1672
1673 #[test]
1674 fn test_advanced_correlation_calculation() {
1675 let config = AdvancedSimdConfig::default();
1676 let optimizer = AdvancedSimdOptimizer::new(config);
1677
1678 let x = array![1.0, 2.0, 3.0, 4.0, 5.0];
1679 let y = array![5.0, 4.0, 3.0, 2.0, 1.0];
1680 let result = optimizer
1681 .advanced_correlation(&x.view(), &y.view())
1682 .unwrap();
1683
1684 assert!((result - (-1.0)).abs() < 1e-10);
1686 }
1687
1688 #[test]
1689 fn testdata_characteristics_analysis() {
1690 let config = AdvancedSimdConfig::default();
1691 let optimizer = AdvancedSimdOptimizer::new(config);
1692
1693 let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
1694 let characteristics = optimizer.analyzedata_characteristics(&data.view());
1695
1696 assert!(matches!(
1697 characteristics.memory_layout,
1698 MemoryLayout::Contiguous
1699 ));
1700 assert!(matches!(
1701 characteristics.sparsity_level,
1702 SparsityLevel::Dense
1703 ));
1704 }
1705
1706 #[test]
1707 fn testsize_categorization() {
1708 assert!(matches!(
1709 AdvancedSimdOptimizer::categorizesize(50),
1710 SizeBucket::Tiny
1711 ));
1712 assert!(matches!(
1713 AdvancedSimdOptimizer::categorizesize(500),
1714 SizeBucket::Small
1715 ));
1716 assert!(matches!(
1717 AdvancedSimdOptimizer::categorizesize(50000),
1718 SizeBucket::Medium
1719 ));
1720 }
1721
1722 #[test]
1723 fn test_kahan_summation() {
1724 let config = AdvancedSimdConfig::default();
1725 let optimizer = AdvancedSimdOptimizer::new(config);
1726
1727 let data = array![1e10, 1.0, -1e10];
1730 let result = optimizer.kahan_sum(&data.view());
1731
1732 assert!((result - 1.0).abs() < 1e-10);
1734
1735 let challengingdata = array![1e8, 1.0, 1e8, -1e8, -1e8];
1737 let challenging_result = optimizer.kahan_sum(&challengingdata.view());
1738 assert!((challenging_result - 1.0).abs() < 1e-10);
1739 }
1740
1741 #[test]
1742 fn test_batch_operations() {
1743 let config = AdvancedSimdConfig::default();
1744 let optimizer = AdvancedSimdOptimizer::new(config);
1745
1746 let data1 = array![1.0, 2.0, 3.0];
1747 let data2 = array![4.0, 5.0, 6.0];
1748 let data_arrays = vec![data1.view(), data2.view()];
1749
1750 let operations = vec![BatchOperation::Mean, BatchOperation::Variance];
1751 let results = optimizer
1752 .advanced_batch_statistics(&data_arrays, &operations)
1753 .unwrap();
1754
1755 assert_eq!(results.means.len(), 2);
1756 assert_eq!(results.variances.len(), 2);
1757 assert!((results.means[0] - 2.0).abs() < 1e-10);
1758 assert!((results.means[1] - 5.0).abs() < 1e-10);
1759 }
1760}