1use crate::error::{StatsError, StatsResult};
8use crate::error_standardization::ErrorMessages;
9use scirs2_core::ndarray::{s, Array2, ArrayBase, Data, Ix1};
10use scirs2_core::numeric::{Float, NumCast, Zero};
11use scirs2_core::{
12 parallel_ops::*,
13 simd_ops::{PlatformCapabilities, SimdUnifiedOps},
14};
15use serde::{Deserialize, Serialize};
16use std::collections::HashMap;
17use std::sync::{Arc, RwLock};
18use std::time::Instant;
19
20#[derive(Debug, Clone)]
22pub struct AdvancedSimdConfig {
23 pub enable_adaptive_optimization: bool,
25 pub enable_predictive_selection: bool,
27 pub enable_hardware_specialization: bool,
29 pub enable_performance_learning: bool,
31 pub target_accuracy: AccuracyLevel,
33 pub performance_preference: PerformancePreference,
35 pub memory_constraints: MemoryConstraints,
37 pub threading_preferences: ThreadingPreferences,
39}
40
41#[derive(Debug, Clone, Copy, PartialEq)]
43pub enum AccuracyLevel {
44 Fast, Balanced, Precise, Reference, }
49
50#[derive(Debug, Clone, Copy, PartialEq)]
52pub enum PerformancePreference {
53 MaxSpeed, SpeedBiased, Balanced, AccuracyBiased, MaxAccuracy, }
59
60#[derive(Debug, Clone)]
62pub struct MemoryConstraints {
63 pub max_working_set_bytes: usize,
65 pub max_cache_usage_percent: f64,
67 pub enable_memory_mapping: bool,
69 pub prefer_in_place: bool,
71}
72
73#[derive(Debug, Clone)]
75pub struct ThreadingPreferences {
76 pub max_threads: Option<usize>,
78 pub min_work_per_thread: usize,
80 pub enable_numa_optimization: bool,
82 pub affinity_strategy: AffinityStrategy,
84}
85
86#[derive(Debug, Clone, Copy, PartialEq)]
88pub enum AffinityStrategy {
89 None, PerformanceCores, EfficiencyCores, Spread, Compact, }
95
96pub struct AdvancedSimdOptimizer {
98 config: AdvancedSimdConfig,
99 performance_cache: Arc<RwLock<HashMap<String, PerformanceProfile>>>,
100 hardware_profile: HardwareProfile,
101 algorithm_selector: AlgorithmSelector,
102 memory_manager: AdvancedMemoryManager,
103}
104
105#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct PerformanceProfile {
108 pub operation_signature: OperationSignature,
110 pub performance_metrics: PerformanceMetrics,
112 pub optimal_algorithm: AlgorithmChoice,
114 pub last_updated: std::time::SystemTime,
116 pub measurement_count: usize,
118}
119
120#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
122pub struct OperationSignature {
123 pub operation_type: String,
125 pub size_bucket: SizeBucket,
127 pub data_type: String,
129 pub data_characteristics: DataCharacteristics,
131}
132
133#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
135pub enum SizeBucket {
136 Tiny, Small, Medium, Large, Huge, }
142
143#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
145pub struct DataCharacteristics {
146 pub memory_layout: MemoryLayout,
148 pub distribution_pattern: DistributionPattern,
150 pub sparsity_level: SparsityLevel,
152 pub numerical_range: NumericalRange,
154}
155
156#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
158pub enum MemoryLayout {
159 Contiguous,
160 Strided(usize), Fragmented,
162}
163
164#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
166pub enum DistributionPattern {
167 Uniform,
168 Normal,
169 Exponential,
170 Power,
171 Bimodal,
172 Unknown,
173}
174
175#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
177pub enum SparsityLevel {
178 Dense, Moderate, Sparse, VerySparse, }
183
184#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
186pub enum NumericalRange {
187 SmallNumbers, Normal, LargeNumbers, Mixed, }
192
193#[derive(Debug, Clone, Serialize, Deserialize)]
195pub struct PerformanceMetrics {
196 pub execution_time_ns: f64,
198 pub memory_bandwidth_utilization: f64,
200 pub cache_efficiency: f64,
202 pub simd_efficiency: f64,
204 pub accuracy_score: f64,
206 pub energy_efficiency: f64,
208}
209
210#[derive(Debug, Clone, Serialize, Deserialize)]
212pub enum AlgorithmChoice {
213 Scalar { algorithm: ScalarAlgorithm },
215 Simd {
217 instruction_set: SimdInstructionSet,
218 algorithm: SimdAlgorithm,
219 vector_width: usize,
220 },
221 Hybrid {
223 algorithms: Vec<AlgorithmChoice>,
224 thresholds: Vec<usize>,
225 },
226 ParallelSimd {
228 simd_choice: Box<AlgorithmChoice>,
229 thread_count: usize,
230 work_stealing: bool,
231 },
232}
233
234#[derive(Debug, Clone, Serialize, Deserialize)]
236pub enum ScalarAlgorithm {
237 Standard,
238 Kahan, Pairwise, Compensated, Welford, }
243
244#[derive(Debug, Clone, Serialize, Deserialize)]
246pub enum SimdInstructionSet {
247 SSE2,
248 SSE3,
249 SSE41,
250 SSE42,
251 AVX,
252 AVX2,
253 AVX512F,
254 AVX512DQ,
255 NEON,
256 SVE,
257}
258
259#[derive(Debug, Clone, Serialize, Deserialize)]
261pub enum SimdAlgorithm {
262 Vectorized, Unrolled, Prefetched, Interleaved, FusedMultiplyAdd, BranchFree, }
269
270#[derive(Debug, Clone)]
272pub struct HardwareProfile {
273 pub architecture: CpuArchitecture,
275 pub simd_capabilities: Vec<SimdInstructionSet>,
277 pub cache_hierarchy: CacheHierarchy,
279 pub memory_subsystem: MemorySubsystem,
281 pub thermal_profile: ThermalProfile,
283 pub power_profile: PowerProfile,
285}
286
287#[derive(Debug, Clone, PartialEq)]
289pub enum CpuArchitecture {
290 X86_64,
291 AArch64,
292 RISCV64,
293 WASM32,
294}
295
296#[derive(Debug, Clone)]
298pub struct CacheHierarchy {
299 pub l1data_kb: usize,
300 pub l1_instruction_kb: usize,
301 pub l2_kb: usize,
302 pub l3_kb: usize,
303 pub cache_linesize: usize,
304 pub associativity: HashMap<String, usize>,
305}
306
307#[derive(Debug, Clone)]
309pub struct MemorySubsystem {
310 pub memory_channels: usize,
311 pub memory_bandwidth_gbps: f64,
312 pub memory_latency_ns: f64,
313 pub numa_nodes: usize,
314 pub memory_controller_count: usize,
315}
316
317#[derive(Debug, Clone)]
319pub struct ThermalProfile {
320 pub thermal_design_power: f64,
321 pub max_junction_temperature: f64,
322 pub thermal_throttling_threshold: f64,
323 pub cooling_solution: CoolingSolution,
324}
325
326#[derive(Debug, Clone)]
328pub enum CoolingSolution {
329 Passive,
330 ActiveAir,
331 Liquid,
332 Custom,
333}
334
335#[derive(Debug, Clone)]
337pub struct PowerProfile {
338 pub base_power_watts: f64,
339 pub max_power_watts: f64,
340 pub power_efficiency_curve: Vec<(f64, f64)>, pub voltage_frequency_curve: Vec<(f64, f64)>, }
343
344pub struct AlgorithmSelector {
346 decision_tree: DecisionTree,
347 performance_predictor: PerformancePredictor,
348 cost_model: CostModel,
349}
350
351#[derive(Debug, Clone)]
353pub struct DecisionTree {
354 nodes: Vec<DecisionNode>,
355}
356
357#[derive(Debug, Clone)]
359pub struct DecisionNode {
360 pub condition: SelectionCondition,
361 pub true_branch: Option<usize>, pub false_branch: Option<usize>, pub algorithm: Option<AlgorithmChoice>,
364}
365
366#[derive(Debug, Clone)]
368pub enum SelectionCondition {
369 DataSizeThreshold(usize),
370 CacheEfficiencyThreshold(f64),
371 AccuracyRequirement(AccuracyLevel),
372 MemoryConstraint(usize),
373 ThermalConstraint(f64),
374 PowerConstraint(f64),
375 Complex(
376 Box<SelectionCondition>,
377 LogicalOperator,
378 Box<SelectionCondition>,
379 ),
380}
381
382#[derive(Debug, Clone)]
384pub enum LogicalOperator {
385 And,
386 Or,
387 Not,
388}
389
390pub struct PerformancePredictor {
392 models: HashMap<String, PredictionModel>,
393}
394
395#[derive(Debug, Clone)]
397pub struct PredictionModel {
398 pub model_type: ModelType,
399 pub coefficients: Vec<f64>,
400 pub feature_weights: HashMap<String, f64>,
401 pub accuracy: f64,
402}
403
404#[derive(Debug, Clone)]
406pub enum ModelType {
407 Linear,
408 Polynomial(usize),
409 ExponentialDecay,
410 PowerLaw,
411 NeuralNetwork,
412}
413
414pub struct CostModel {
416 computation_cost_weights: HashMap<String, f64>,
417 memory_cost_weights: HashMap<String, f64>,
418 energy_cost_weights: HashMap<String, f64>,
419}
420
421pub struct AdvancedMemoryManager {
423 allocation_strategy: AllocationStrategy,
424 cache_optimizer: CacheOptimizer,
425 numa_manager: NumaManager,
426}
427
428#[derive(Debug, Clone)]
430pub enum AllocationStrategy {
431 Standard,
432 Aligned(usize), Interleaved, LocalFirst, HugePage, }
437
438pub struct CacheOptimizer {
440 prefetch_strategy: PrefetchStrategy,
441 blocking_strategy: BlockingStrategy,
442}
443
444#[derive(Debug, Clone)]
446pub enum PrefetchStrategy {
447 None,
448 Sequential,
449 Stride(usize),
450 Adaptive,
451}
452
453#[derive(Debug, Clone)]
455pub enum BlockingStrategy {
456 None,
457 L1Blocking(usize),
458 L2Blocking(usize),
459 L3Blocking(usize),
460 Adaptive,
461}
462
463pub struct NumaManager {
465 node_topology: Vec<NumaNode>,
466 allocation_policy: NumaAllocationPolicy,
467}
468
469#[derive(Debug, Clone)]
471pub struct NumaNode {
472 pub node_id: usize,
473 pub cpu_cores: Vec<usize>,
474 pub memorysize_gb: f64,
475 pub memory_bandwidth_gbps: f64,
476 pub inter_node_latency_ns: HashMap<usize, f64>,
477}
478
479#[derive(Debug, Clone)]
481pub enum NumaAllocationPolicy {
482 Default,
483 LocalOnly,
484 Interleaved,
485 Preferred(usize), }
487
488impl AdvancedSimdOptimizer {
489 pub fn new(config: AdvancedSimdConfig) -> Self {
491 let hardware_profile = Self::detect_hardware_profile();
492 let algorithm_selector = Self::build_algorithm_selector(&hardware_profile);
493 let memory_manager = Self::create_memory_manager(&config, &hardware_profile);
494
495 Self {
496 config,
497 performance_cache: Arc::new(RwLock::new(HashMap::new())),
498 hardware_profile,
499 algorithm_selector,
500 memory_manager,
501 }
502 }
503
504 pub fn advanced_mean<F, D>(&self, x: &ArrayBase<D, Ix1>) -> StatsResult<F>
506 where
507 F: Float
508 + NumCast
509 + SimdUnifiedOps
510 + Copy
511 + Send
512 + Sync
513 + 'static
514 + std::iter::Sum<F>
515 + std::fmt::Display,
516 D: Data<Elem = F>,
517 {
518 if x.is_empty() {
519 return Err(ErrorMessages::empty_array("x"));
520 }
521
522 let characteristics = self.analyzedata_characteristics(x);
524
525 let signature = OperationSignature {
527 operation_type: "mean".to_string(),
528 size_bucket: Self::categorizesize(x.len()),
529 data_type: std::any::type_name::<F>().to_string(),
530 data_characteristics: characteristics,
531 };
532
533 if let Some(profile) = self.get_cached_performance(&signature) {
535 return self.execute_cached_algorithm(x, &profile.optimal_algorithm);
536 }
537
538 let algorithm = self.select_optimal_algorithm(&signature, x)?;
540
541 let (result, metrics) = self.execute_with_monitoring(x, &algorithm)?;
543
544 if self.config.enable_performance_learning {
546 self.cache_performance_profile(signature, algorithm, metrics);
547 }
548
549 Ok(result)
550 }
551
552 pub fn advanced_variance<F, D>(&self, x: &ArrayBase<D, Ix1>, ddof: usize) -> StatsResult<F>
554 where
555 F: Float
556 + NumCast
557 + SimdUnifiedOps
558 + Copy
559 + Send
560 + Sync
561 + 'static
562 + std::iter::Sum<F>
563 + std::fmt::Display,
564 D: Data<Elem = F>,
565 {
566 if x.is_empty() {
567 return Err(ErrorMessages::empty_array("x"));
568 }
569
570 let n = x.len();
571 if n <= ddof {
572 return Err(ErrorMessages::insufficientdata(
573 "variance calculation",
574 ddof + 1,
575 n,
576 ));
577 }
578
579 let characteristics = self.analyzedata_characteristics(x);
580 let signature = OperationSignature {
581 operation_type: "variance".to_string(),
582 size_bucket: Self::categorizesize(n),
583 data_type: std::any::type_name::<F>().to_string(),
584 data_characteristics: characteristics,
585 };
586
587 if let Some(profile) = self.get_cached_performance(&signature) {
589 return self.execute_variance_cached(x, ddof, &profile.optimal_algorithm);
590 }
591
592 let algorithm = self.select_variance_algorithm(&signature, x, ddof)?;
594 let (result, metrics) = self.execute_variance_with_monitoring(x, ddof, &algorithm)?;
595
596 if self.config.enable_performance_learning {
598 self.cache_performance_profile(signature, algorithm, metrics);
599 }
600
601 Ok(result)
602 }
603
604 pub fn advanced_correlation<F, D1, D2>(
606 &self,
607 x: &ArrayBase<D1, Ix1>,
608 y: &ArrayBase<D2, Ix1>,
609 ) -> StatsResult<F>
610 where
611 F: Float
612 + NumCast
613 + SimdUnifiedOps
614 + Copy
615 + Send
616 + Sync
617 + 'static
618 + std::iter::Sum<F>
619 + std::fmt::Display,
620 D1: Data<Elem = F>,
621 D2: Data<Elem = F>,
622 {
623 if x.is_empty() {
624 return Err(ErrorMessages::empty_array("x"));
625 }
626 if y.is_empty() {
627 return Err(ErrorMessages::empty_array("y"));
628 }
629 if x.len() != y.len() {
630 return Err(ErrorMessages::length_mismatch("x", x.len(), "y", y.len()));
631 }
632
633 let characteristics = self.analyze_bivariate_characteristics(x, y);
634 let signature = OperationSignature {
635 operation_type: "correlation".to_string(),
636 size_bucket: Self::categorizesize(x.len()),
637 data_type: std::any::type_name::<F>().to_string(),
638 data_characteristics: characteristics,
639 };
640
641 if let Some(profile) = self.get_cached_performance(&signature) {
642 return self.execute_correlation_cached(x, y, &profile.optimal_algorithm);
643 }
644
645 let algorithm = self.select_correlation_algorithm(&signature, x, y)?;
646 let (result, metrics) = self.execute_correlation_with_monitoring(x, y, &algorithm)?;
647
648 if self.config.enable_performance_learning {
649 self.cache_performance_profile(signature, algorithm, metrics);
650 }
651
652 Ok(result)
653 }
654
655 pub fn advanced_matrix_multiply<F>(
657 &self,
658 a: &Array2<F>,
659 b: &Array2<F>,
660 ) -> StatsResult<Array2<F>>
661 where
662 F: Float + NumCast + SimdUnifiedOps + Copy + Send + Sync + 'static + std::fmt::Display,
663 {
664 if a.ncols() != b.nrows() {
665 return Err(StatsError::dimension_mismatch(format!(
666 "Matrix multiplication requires A.cols == B.rows, got {} != {}",
667 a.ncols(),
668 b.nrows()
669 )));
670 }
671
672 let characteristics = Self::analyze_matrix_characteristics(a, b);
673 let signature = OperationSignature {
674 operation_type: "matrix_multiply".to_string(),
675 size_bucket: Self::categorize_matrixsize(a.nrows() * a.ncols()),
676 data_type: std::any::type_name::<F>().to_string(),
677 data_characteristics: characteristics,
678 };
679
680 let algorithm = Self::select_matrix_algorithm(&signature, a, b)?;
682
683 self.execute_matrix_multiply(a, b, &algorithm)
685 }
686
687 pub fn advanced_batch_statistics<F, D>(
689 &self,
690 data: &[ArrayBase<D, Ix1>],
691 operations: &[BatchOperation],
692 ) -> StatsResult<BatchResults<F>>
693 where
694 F: Float
695 + NumCast
696 + SimdUnifiedOps
697 + Copy
698 + Send
699 + Sync
700 + 'static
701 + std::iter::Sum<F>
702 + std::fmt::Display,
703 D: Data<Elem = F>,
704 {
705 if data.is_empty() {
706 return Err(ErrorMessages::empty_array("data"));
707 }
708
709 let batchsize = data.len();
711 let avg_arraysize = data.iter().map(|arr| arr.len()).sum::<usize>() / batchsize;
712
713 let strategy = self.select_batch_strategy(batchsize, avg_arraysize, operations)?;
715
716 self.execute_batch_operations(data, operations, &strategy)
718 }
719
720 fn detect_hardware_profile() -> HardwareProfile {
722 let _capabilities = PlatformCapabilities::detect();
723
724 HardwareProfile {
727 architecture: if cfg!(target_arch = "x86_64") {
728 CpuArchitecture::X86_64
729 } else if cfg!(target_arch = "aarch64") {
730 CpuArchitecture::AArch64
731 } else if cfg!(target_arch = "riscv64") {
732 CpuArchitecture::RISCV64
733 } else {
734 CpuArchitecture::WASM32
735 },
736 simd_capabilities: vec![
737 SimdInstructionSet::SSE2,
738 SimdInstructionSet::SSE42,
739 SimdInstructionSet::AVX2,
740 ],
741 cache_hierarchy: CacheHierarchy {
742 l1data_kb: 32,
743 l1_instruction_kb: 32,
744 l2_kb: 256,
745 l3_kb: 8192,
746 cache_linesize: 64,
747 associativity: [
748 ("L1".to_string(), 8),
749 ("L2".to_string(), 8),
750 ("L3".to_string(), 16),
751 ]
752 .iter()
753 .cloned()
754 .collect(),
755 },
756 memory_subsystem: MemorySubsystem {
757 memory_channels: 2,
758 memory_bandwidth_gbps: 50.0,
759 memory_latency_ns: 100.0,
760 numa_nodes: 1,
761 memory_controller_count: 1,
762 },
763 thermal_profile: ThermalProfile {
764 thermal_design_power: 65.0,
765 max_junction_temperature: 100.0,
766 thermal_throttling_threshold: 85.0,
767 cooling_solution: CoolingSolution::ActiveAir,
768 },
769 power_profile: PowerProfile {
770 base_power_watts: 15.0,
771 max_power_watts: 65.0,
772 power_efficiency_curve: vec![(0.1, 0.8), (0.5, 0.9), (1.0, 0.85)],
773 voltage_frequency_curve: vec![(1.0, 2.4), (1.2, 3.2), (1.35, 3.8)],
774 },
775 }
776 }
777
778 fn build_algorithm_selector(hardware: &HardwareProfile) -> AlgorithmSelector {
780 let nodes = vec![
782 DecisionNode {
783 condition: SelectionCondition::DataSizeThreshold(1000),
784 true_branch: Some(1),
785 false_branch: Some(2),
786 algorithm: None,
787 },
788 DecisionNode {
789 condition: SelectionCondition::CacheEfficiencyThreshold(0.8),
790 true_branch: None,
791 false_branch: None,
792 algorithm: Some(AlgorithmChoice::Simd {
793 instruction_set: SimdInstructionSet::AVX2,
794 algorithm: SimdAlgorithm::Vectorized,
795 vector_width: 4,
796 }),
797 },
798 DecisionNode {
799 condition: SelectionCondition::AccuracyRequirement(AccuracyLevel::Precise),
800 true_branch: None,
801 false_branch: None,
802 algorithm: Some(AlgorithmChoice::Scalar {
803 algorithm: ScalarAlgorithm::Kahan,
804 }),
805 },
806 ];
807
808 let decision_tree = DecisionTree { nodes };
809
810 AlgorithmSelector {
811 decision_tree,
812 performance_predictor: PerformancePredictor {
813 models: HashMap::new(),
814 },
815 cost_model: CostModel {
816 computation_cost_weights: HashMap::new(),
817 memory_cost_weights: HashMap::new(),
818 energy_cost_weights: HashMap::new(),
819 },
820 }
821 }
822
823 fn create_memory_manager(
825 config: &AdvancedSimdConfig,
826 hardware: &HardwareProfile,
827 ) -> AdvancedMemoryManager {
828 let allocation_strategy = if config.memory_constraints.prefer_in_place {
829 AllocationStrategy::Standard
830 } else {
831 AllocationStrategy::Aligned(64) };
833
834 let cache_optimizer = CacheOptimizer {
835 prefetch_strategy: PrefetchStrategy::Adaptive,
836 blocking_strategy: BlockingStrategy::Adaptive,
837 };
838
839 let numa_manager = NumaManager {
840 node_topology: vec![NumaNode {
841 node_id: 0,
842 cpu_cores: (0..num_cpus::get()).collect(),
843 memorysize_gb: 16.0,
844 memory_bandwidth_gbps: hardware.memory_subsystem.memory_bandwidth_gbps,
845 inter_node_latency_ns: HashMap::new(),
846 }],
847 allocation_policy: NumaAllocationPolicy::Default,
848 };
849
850 AdvancedMemoryManager {
851 allocation_strategy,
852 cache_optimizer,
853 numa_manager,
854 }
855 }
856
857 fn analyzedata_characteristics<F, D>(&self, x: &ArrayBase<D, Ix1>) -> DataCharacteristics
859 where
860 F: Float + Copy + std::fmt::Display,
861 D: Data<Elem = F>,
862 {
863 let memory_layout = if x.as_slice().is_some() {
865 MemoryLayout::Contiguous
866 } else {
867 MemoryLayout::Strided(1) };
869
870 let zero_count = x.iter().filter(|&&val| val == F::zero()).count();
872 let sparsity_ratio = zero_count as f64 / x.len() as f64;
873 let sparsity_level = match sparsity_ratio {
874 r if r < 0.05 => SparsityLevel::Dense,
875 r if r < 0.5 => SparsityLevel::Moderate,
876 r if r < 0.95 => SparsityLevel::Sparse,
877 _ => SparsityLevel::VerySparse,
878 };
879
880 let (min_abs, max_abs) =
882 x.iter()
883 .fold((F::infinity(), F::zero()), |(min_abs, max_abs), &val| {
884 let abs_val = val.abs();
885 (min_abs.min(abs_val), max_abs.max(abs_val))
886 });
887
888 let numerical_range = if max_abs
889 < F::from(1e-6).expect("Failed to convert constant to float")
890 {
891 NumericalRange::SmallNumbers
892 } else if min_abs > F::from(1e6).expect("Failed to convert constant to float") {
893 NumericalRange::LargeNumbers
894 } else if max_abs / min_abs > F::from(1e12).expect("Failed to convert constant to float") {
895 NumericalRange::Mixed
896 } else {
897 NumericalRange::Normal
898 };
899
900 DataCharacteristics {
901 memory_layout,
902 distribution_pattern: DistributionPattern::Unknown, sparsity_level,
904 numerical_range,
905 }
906 }
907
908 fn analyze_bivariate_characteristics<F, D1, D2>(
910 &self,
911 x: &ArrayBase<D1, Ix1>,
912 _y: &ArrayBase<D2, Ix1>,
913 ) -> DataCharacteristics
914 where
915 F: Float + Copy + std::fmt::Display,
916 D1: Data<Elem = F>,
917 D2: Data<Elem = F>,
918 {
919 self.analyzedata_characteristics(x)
924 }
925
926 fn analyze_matrix_characteristics<F>(a: &Array2<F>, b: &Array2<F>) -> DataCharacteristics
928 where
929 F: Float + Copy + std::fmt::Display,
930 {
931 DataCharacteristics {
933 memory_layout: MemoryLayout::Contiguous, distribution_pattern: DistributionPattern::Unknown,
935 sparsity_level: SparsityLevel::Dense, numerical_range: NumericalRange::Normal,
937 }
938 }
939
940 fn categorizesize(size: usize) -> SizeBucket {
942 match size {
943 s if s < 64 => SizeBucket::Tiny,
944 s if s < 1024 => SizeBucket::Small,
945 s if s < 65536 => SizeBucket::Medium,
946 s if s < 1048576 => SizeBucket::Large,
947 _ => SizeBucket::Huge,
948 }
949 }
950
951 fn categorize_matrixsize(_totalelements: usize) -> SizeBucket {
953 Self::categorizesize(_totalelements)
954 }
955
956 fn get_cached_performance(&self, signature: &OperationSignature) -> Option<PerformanceProfile> {
958 if !self.config.enable_performance_learning {
959 return None;
960 }
961
962 let cache = self.performance_cache.read().ok()?;
963 let key = format!("{:?}", signature);
964 cache.get(&key).cloned()
965 }
966
967 fn cache_performance_profile(
969 &self,
970 signature: OperationSignature,
971 algorithm: AlgorithmChoice,
972 metrics: PerformanceMetrics,
973 ) {
974 if !self.config.enable_performance_learning {
975 return;
976 }
977
978 let profile = PerformanceProfile {
979 operation_signature: signature.clone(),
980 performance_metrics: metrics,
981 optimal_algorithm: algorithm,
982 last_updated: std::time::SystemTime::now(),
983 measurement_count: 1,
984 };
985
986 if let Ok(mut cache) = self.performance_cache.write() {
987 let key = format!("{:?}", signature);
988 cache.insert(key, profile);
989 }
990 }
991
992 fn select_optimal_algorithm<F, D>(
994 &self,
995 signature: &OperationSignature,
996 _x: &ArrayBase<D, Ix1>,
997 ) -> StatsResult<AlgorithmChoice>
998 where
999 F: Float + Copy,
1000 D: Data<Elem = F>,
1001 {
1002 let algorithm = match (&signature.size_bucket, &self.config.target_accuracy) {
1004 (SizeBucket::Tiny, _) => AlgorithmChoice::Scalar {
1005 algorithm: ScalarAlgorithm::Standard,
1006 },
1007 (SizeBucket::Small, AccuracyLevel::Precise) => AlgorithmChoice::Scalar {
1008 algorithm: ScalarAlgorithm::Kahan,
1009 },
1010 (SizeBucket::Medium | SizeBucket::Large, AccuracyLevel::Fast) => {
1011 AlgorithmChoice::Simd {
1012 instruction_set: SimdInstructionSet::AVX2,
1013 algorithm: SimdAlgorithm::Vectorized,
1014 vector_width: 4,
1015 }
1016 }
1017 (SizeBucket::Huge, _) => AlgorithmChoice::ParallelSimd {
1018 simd_choice: Box::new(AlgorithmChoice::Simd {
1019 instruction_set: SimdInstructionSet::AVX2,
1020 algorithm: SimdAlgorithm::Vectorized,
1021 vector_width: 4,
1022 }),
1023 thread_count: num_cpus::get(),
1024 work_stealing: true,
1025 },
1026 _ => AlgorithmChoice::Simd {
1027 instruction_set: SimdInstructionSet::AVX2,
1028 algorithm: SimdAlgorithm::Vectorized,
1029 vector_width: 4,
1030 },
1031 };
1032
1033 Ok(algorithm)
1034 }
1035
1036 fn select_variance_algorithm<F, D>(
1038 &self,
1039 signature: &OperationSignature,
1040 _x: &ArrayBase<D, Ix1>,
1041 _ddof: usize,
1042 ) -> StatsResult<AlgorithmChoice>
1043 where
1044 F: Float + Copy,
1045 D: Data<Elem = F>,
1046 {
1047 let algorithm = match (&signature.size_bucket, &self.config.target_accuracy) {
1049 (SizeBucket::Tiny | SizeBucket::Small, _) => AlgorithmChoice::Scalar {
1050 algorithm: ScalarAlgorithm::Welford,
1051 },
1052 (_, AccuracyLevel::Precise | AccuracyLevel::Reference) => AlgorithmChoice::Scalar {
1053 algorithm: ScalarAlgorithm::Welford,
1054 },
1055 (SizeBucket::Medium | SizeBucket::Large, _) => AlgorithmChoice::Simd {
1056 instruction_set: SimdInstructionSet::AVX2,
1057 algorithm: SimdAlgorithm::Vectorized,
1058 vector_width: 4,
1059 },
1060 (SizeBucket::Huge, _) => AlgorithmChoice::ParallelSimd {
1061 simd_choice: Box::new(AlgorithmChoice::Scalar {
1062 algorithm: ScalarAlgorithm::Welford,
1063 }),
1064 thread_count: num_cpus::get(),
1065 work_stealing: true,
1066 },
1067 };
1068
1069 Ok(algorithm)
1070 }
1071
1072 fn select_correlation_algorithm<F, D1, D2>(
1074 &self,
1075 signature: &OperationSignature,
1076 _x: &ArrayBase<D1, Ix1>,
1077 _y: &ArrayBase<D2, Ix1>,
1078 ) -> StatsResult<AlgorithmChoice>
1079 where
1080 F: Float + Copy,
1081 D1: Data<Elem = F>,
1082 D2: Data<Elem = F>,
1083 {
1084 let algorithm = match &signature.size_bucket {
1086 SizeBucket::Tiny => AlgorithmChoice::Scalar {
1087 algorithm: ScalarAlgorithm::Standard,
1088 },
1089 SizeBucket::Small => AlgorithmChoice::Scalar {
1090 algorithm: ScalarAlgorithm::Compensated,
1091 },
1092 SizeBucket::Medium | SizeBucket::Large => AlgorithmChoice::Simd {
1093 instruction_set: SimdInstructionSet::AVX2,
1094 algorithm: SimdAlgorithm::FusedMultiplyAdd,
1095 vector_width: 4,
1096 },
1097 SizeBucket::Huge => AlgorithmChoice::ParallelSimd {
1098 simd_choice: Box::new(AlgorithmChoice::Simd {
1099 instruction_set: SimdInstructionSet::AVX2,
1100 algorithm: SimdAlgorithm::FusedMultiplyAdd,
1101 vector_width: 4,
1102 }),
1103 thread_count: num_cpus::get(),
1104 work_stealing: false,
1105 },
1106 };
1107
1108 Ok(algorithm)
1109 }
1110
1111 fn select_matrix_algorithm<F>(
1113 signature: &OperationSignature,
1114 a: &Array2<F>,
1115 b: &Array2<F>,
1116 ) -> StatsResult<AlgorithmChoice>
1117 where
1118 F: Float + Copy + std::fmt::Display,
1119 {
1120 let total_ops = a.nrows() * a.ncols() * b.ncols();
1121
1122 let algorithm = if total_ops < 1000 {
1123 AlgorithmChoice::Scalar {
1124 algorithm: ScalarAlgorithm::Standard,
1125 }
1126 } else if total_ops < 1_000_000 {
1127 AlgorithmChoice::Simd {
1128 instruction_set: SimdInstructionSet::AVX2,
1129 algorithm: SimdAlgorithm::Vectorized,
1130 vector_width: 4,
1131 }
1132 } else {
1133 AlgorithmChoice::ParallelSimd {
1134 simd_choice: Box::new(AlgorithmChoice::Simd {
1135 instruction_set: SimdInstructionSet::AVX2,
1136 algorithm: SimdAlgorithm::Vectorized,
1137 vector_width: 4,
1138 }),
1139 thread_count: num_cpus::get(),
1140 work_stealing: true,
1141 }
1142 };
1143
1144 Ok(algorithm)
1145 }
1146
1147 fn execute_cached_algorithm<F, D>(
1149 &self,
1150 x: &ArrayBase<D, Ix1>,
1151 algorithm: &AlgorithmChoice,
1152 ) -> StatsResult<F>
1153 where
1154 F: Float
1155 + NumCast
1156 + SimdUnifiedOps
1157 + Copy
1158 + Send
1159 + Sync
1160 + std::iter::Sum<F>
1161 + std::fmt::Display,
1162 D: Data<Elem = F>,
1163 {
1164 self.execute_mean_algorithm(x, algorithm)
1165 }
1166
1167 fn execute_with_monitoring<F, D>(
1169 &self,
1170 x: &ArrayBase<D, Ix1>,
1171 algorithm: &AlgorithmChoice,
1172 ) -> StatsResult<(F, PerformanceMetrics)>
1173 where
1174 F: Float
1175 + NumCast
1176 + SimdUnifiedOps
1177 + Copy
1178 + Send
1179 + Sync
1180 + std::iter::Sum<F>
1181 + std::fmt::Display,
1182 D: Data<Elem = F>,
1183 {
1184 let start_time = Instant::now();
1185 let result = self.execute_mean_algorithm(x, algorithm)?;
1186 let execution_time = start_time.elapsed();
1187
1188 let metrics = PerformanceMetrics {
1189 execution_time_ns: execution_time.as_nanos() as f64,
1190 memory_bandwidth_utilization: 0.8, cache_efficiency: 0.9,
1192 simd_efficiency: 0.85,
1193 accuracy_score: 0.99,
1194 energy_efficiency: 1e9, };
1196
1197 Ok((result, metrics))
1198 }
1199
1200 fn execute_mean_algorithm<F, D>(
1202 &self,
1203 x: &ArrayBase<D, Ix1>,
1204 algorithm: &AlgorithmChoice,
1205 ) -> StatsResult<F>
1206 where
1207 F: Float
1208 + NumCast
1209 + SimdUnifiedOps
1210 + Copy
1211 + Send
1212 + Sync
1213 + std::iter::Sum<F>
1214 + std::fmt::Display,
1215 D: Data<Elem = F>,
1216 {
1217 match algorithm {
1218 AlgorithmChoice::Scalar { algorithm } => self.execute_scalar_mean(x, algorithm),
1219 AlgorithmChoice::Simd { .. } => {
1220 Ok(F::simd_sum(&x.view()) / F::from(x.len()).expect("Operation failed"))
1222 }
1223 AlgorithmChoice::ParallelSimd {
1224 simd_choice,
1225 thread_count,
1226 ..
1227 } => self.execute_parallel_mean(x, simd_choice, *thread_count),
1228 AlgorithmChoice::Hybrid { .. } => {
1229 Ok(F::simd_sum(&x.view()) / F::from(x.len()).expect("Operation failed"))
1231 }
1232 }
1233 }
1234
1235 fn execute_scalar_mean<F, D>(
1237 &self,
1238 x: &ArrayBase<D, Ix1>,
1239 algorithm: &ScalarAlgorithm,
1240 ) -> StatsResult<F>
1241 where
1242 F: Float + NumCast + Copy + std::fmt::Display,
1243 D: Data<Elem = F>,
1244 {
1245 let result = match algorithm {
1246 ScalarAlgorithm::Standard => x.iter().fold(F::zero(), |acc, &val| acc + val),
1247 ScalarAlgorithm::Kahan => self.kahan_sum(x),
1248 ScalarAlgorithm::Pairwise => self.pairwise_sum(x),
1249 ScalarAlgorithm::Compensated => self.compensated_sum(x),
1250 ScalarAlgorithm::Welford => {
1251 x.iter().fold(F::zero(), |acc, &val| acc + val)
1253 }
1254 };
1255
1256 Ok(result / F::from(x.len()).expect("Operation failed"))
1257 }
1258
1259 fn kahan_sum<F, D>(&self, x: &ArrayBase<D, Ix1>) -> F
1261 where
1262 F: Float + Copy + std::fmt::Display,
1263 D: Data<Elem = F>,
1264 {
1265 let mut sum = F::zero();
1266 let mut c = F::zero();
1267
1268 for &value in x.iter() {
1269 let y = value - c;
1270 let t = sum + y;
1271 c = (t - sum) - y;
1272 sum = t;
1273 }
1274
1275 sum
1276 }
1277
1278 fn pairwise_sum<F, D>(&self, x: &ArrayBase<D, Ix1>) -> F
1280 where
1281 F: Float + Copy + std::fmt::Display,
1282 D: Data<Elem = F>,
1283 {
1284 if x.len() <= 16 {
1285 return x.iter().fold(F::zero(), |acc, &val| acc + val);
1286 }
1287
1288 let mid = x.len() / 2;
1289 let left_sum = self.pairwise_sum(&x.slice(s![..mid]));
1290 let right_sum = self.pairwise_sum(&x.slice(s![mid..]));
1291 left_sum + right_sum
1292 }
1293
1294 fn compensated_sum<F, D>(&self, x: &ArrayBase<D, Ix1>) -> F
1296 where
1297 F: Float + Copy + std::fmt::Display,
1298 D: Data<Elem = F>,
1299 {
1300 self.kahan_sum(x)
1302 }
1303
1304 fn execute_parallel_mean<F, D>(
1306 &self,
1307 x: &ArrayBase<D, Ix1>,
1308 simd_choice: &AlgorithmChoice,
1309 thread_count: usize,
1310 ) -> StatsResult<F>
1311 where
1312 F: Float
1313 + NumCast
1314 + SimdUnifiedOps
1315 + Copy
1316 + Send
1317 + Sync
1318 + std::iter::Sum<F>
1319 + std::fmt::Display,
1320 D: Data<Elem = F>,
1321 {
1322 if x.len() < 1000 {
1325 return self.execute_mean_algorithm(x, simd_choice);
1327 }
1328
1329 let chunksize = x.len().div_ceil(thread_count);
1330
1331 let sum: F = x
1333 .axis_chunks_iter(scirs2_core::ndarray::Axis(0), chunksize)
1334 .into_par_iter()
1335 .map(|chunk| F::simd_sum(&chunk))
1336 .sum();
1337
1338 Ok(sum / F::from(x.len()).expect("Operation failed"))
1339 }
1340
1341 fn execute_variance_cached<F, D>(
1346 &self,
1347 x: &ArrayBase<D, Ix1>,
1348 ddof: usize,
1349 algorithm: &AlgorithmChoice,
1350 ) -> StatsResult<F>
1351 where
1352 F: Float
1353 + NumCast
1354 + SimdUnifiedOps
1355 + Copy
1356 + Send
1357 + Sync
1358 + std::iter::Sum<F>
1359 + std::fmt::Display,
1360 D: Data<Elem = F>,
1361 {
1362 let mean = self.execute_mean_algorithm(x, algorithm)?;
1364 let sum_sq_dev = x
1365 .iter()
1366 .map(|&val| {
1367 let dev = val - mean;
1368 dev * dev
1369 })
1370 .fold(F::zero(), |acc, val| acc + val);
1371
1372 Ok(sum_sq_dev / F::from(x.len() - ddof).expect("Operation failed"))
1373 }
1374
1375 fn execute_variance_with_monitoring<F, D>(
1377 &self,
1378 x: &ArrayBase<D, Ix1>,
1379 ddof: usize,
1380 algorithm: &AlgorithmChoice,
1381 ) -> StatsResult<(F, PerformanceMetrics)>
1382 where
1383 F: Float
1384 + NumCast
1385 + SimdUnifiedOps
1386 + Copy
1387 + Send
1388 + Sync
1389 + std::iter::Sum<F>
1390 + std::fmt::Display,
1391 D: Data<Elem = F>,
1392 {
1393 let start_time = Instant::now();
1394 let result = self.execute_variance_cached(x, ddof, algorithm)?;
1395 let execution_time = start_time.elapsed();
1396
1397 let metrics = PerformanceMetrics {
1398 execution_time_ns: execution_time.as_nanos() as f64,
1399 memory_bandwidth_utilization: 0.8,
1400 cache_efficiency: 0.9,
1401 simd_efficiency: 0.85,
1402 accuracy_score: 0.99,
1403 energy_efficiency: 1e9,
1404 };
1405
1406 Ok((result, metrics))
1407 }
1408
1409 fn execute_correlation_cached<F, D1, D2>(
1411 &self,
1412 x: &ArrayBase<D1, Ix1>,
1413 y: &ArrayBase<D2, Ix1>,
1414 algorithm: &AlgorithmChoice,
1415 ) -> StatsResult<F>
1416 where
1417 F: Float
1418 + NumCast
1419 + SimdUnifiedOps
1420 + Copy
1421 + Send
1422 + Sync
1423 + std::iter::Sum<F>
1424 + std::fmt::Display,
1425 D1: Data<Elem = F>,
1426 D2: Data<Elem = F>,
1427 {
1428 let mean_x = self.execute_mean_algorithm(x, algorithm)?;
1430 let mean_y = self.execute_mean_algorithm(y, algorithm)?;
1431
1432 let mut sum_xy = F::zero();
1433 let mut sum_x2 = F::zero();
1434 let mut sum_y2 = F::zero();
1435
1436 for (x_val, y_val) in x.iter().zip(y.iter()) {
1437 let x_dev = *x_val - mean_x;
1438 let y_dev = *y_val - mean_y;
1439
1440 sum_xy = sum_xy + x_dev * y_dev;
1441 sum_x2 = sum_x2 + x_dev * x_dev;
1442 sum_y2 = sum_y2 + y_dev * y_dev;
1443 }
1444
1445 let denominator = (sum_x2 * sum_y2).sqrt();
1446 if denominator == F::zero() {
1447 Ok(F::zero())
1448 } else {
1449 Ok(sum_xy / denominator)
1450 }
1451 }
1452
1453 fn execute_correlation_with_monitoring<F, D1, D2>(
1455 &self,
1456 x: &ArrayBase<D1, Ix1>,
1457 y: &ArrayBase<D2, Ix1>,
1458 algorithm: &AlgorithmChoice,
1459 ) -> StatsResult<(F, PerformanceMetrics)>
1460 where
1461 F: Float
1462 + NumCast
1463 + SimdUnifiedOps
1464 + Copy
1465 + Send
1466 + Sync
1467 + std::iter::Sum<F>
1468 + std::fmt::Display,
1469 D1: Data<Elem = F>,
1470 D2: Data<Elem = F>,
1471 {
1472 let start_time = Instant::now();
1473 let result = self.execute_correlation_cached(x, y, algorithm)?;
1474 let execution_time = start_time.elapsed();
1475
1476 let metrics = PerformanceMetrics {
1477 execution_time_ns: execution_time.as_nanos() as f64,
1478 memory_bandwidth_utilization: 0.8,
1479 cache_efficiency: 0.9,
1480 simd_efficiency: 0.85,
1481 accuracy_score: 0.99,
1482 energy_efficiency: 1e9,
1483 };
1484
1485 Ok((result, metrics))
1486 }
1487
1488 fn execute_matrix_multiply<F>(
1490 &self,
1491 a: &Array2<F>,
1492 b: &Array2<F>,
1493 _algorithm: &AlgorithmChoice,
1494 ) -> StatsResult<Array2<F>>
1495 where
1496 F: Float + NumCast + SimdUnifiedOps + Copy + Send + Sync + Zero + std::fmt::Display,
1497 {
1498 let (m, k) = (a.nrows(), a.ncols());
1499 let n = b.ncols();
1500 let mut c = Array2::zeros((m, n));
1501
1502 for i in 0..m {
1504 for j in 0..n {
1505 let mut sum = F::zero();
1506 for l in 0..k {
1507 sum = sum + a[[i, l]] * b[[l, j]];
1508 }
1509 c[[i, j]] = sum;
1510 }
1511 }
1512
1513 Ok(c)
1514 }
1515
1516 fn select_batch_strategy(
1518 &self,
1519 batchsize: usize,
1520 avg_arraysize: usize,
1521 _operations: &[BatchOperation],
1522 ) -> StatsResult<BatchStrategy> {
1523 if batchsize < 10 {
1525 Ok(BatchStrategy::Sequential)
1526 } else if avg_arraysize > 1000 {
1527 Ok(BatchStrategy::ParallelArrays)
1528 } else {
1529 Ok(BatchStrategy::ParallelOperations)
1530 }
1531 }
1532
1533 fn execute_batch_operations<F, D>(
1535 &self,
1536 data: &[ArrayBase<D, Ix1>],
1537 operations: &[BatchOperation],
1538 _strategy: &BatchStrategy,
1539 ) -> StatsResult<BatchResults<F>>
1540 where
1541 F: Float
1542 + NumCast
1543 + SimdUnifiedOps
1544 + Copy
1545 + Send
1546 + Sync
1547 + 'static
1548 + std::iter::Sum<F>
1549 + std::fmt::Display,
1550 D: Data<Elem = F>,
1551 {
1552 let mut results = BatchResults {
1554 means: Vec::new(),
1555 variances: Vec::new(),
1556 correlations: Vec::new(),
1557 };
1558
1559 for array in data {
1560 if operations.contains(&BatchOperation::Mean) {
1561 let mean = self.advanced_mean(array)?;
1562 results.means.push(mean);
1563 }
1564 if operations.contains(&BatchOperation::Variance) {
1565 let variance = self.advanced_variance(array, 1)?;
1566 results.variances.push(variance);
1567 }
1568 }
1569
1570 Ok(results)
1571 }
1572}
1573
1574#[derive(Debug, Clone, PartialEq)]
1576pub enum BatchOperation {
1577 Mean,
1578 Variance,
1579 StandardDeviation,
1580 Correlation,
1581 Covariance,
1582}
1583
1584#[derive(Debug, Clone)]
1586pub enum BatchStrategy {
1587 Sequential,
1588 ParallelArrays,
1589 ParallelOperations,
1590 Hybrid,
1591}
1592
1593#[derive(Debug, Clone)]
1595pub struct BatchResults<F> {
1596 pub means: Vec<F>,
1597 pub variances: Vec<F>,
1598 pub correlations: Vec<F>,
1599}
1600
1601impl Default for AdvancedSimdConfig {
1602 fn default() -> Self {
1603 Self {
1604 enable_adaptive_optimization: true,
1605 enable_predictive_selection: true,
1606 enable_hardware_specialization: true,
1607 enable_performance_learning: true,
1608 target_accuracy: AccuracyLevel::Balanced,
1609 performance_preference: PerformancePreference::Balanced,
1610 memory_constraints: MemoryConstraints {
1611 max_working_set_bytes: 1_073_741_824, max_cache_usage_percent: 0.8,
1613 enable_memory_mapping: true,
1614 prefer_in_place: false,
1615 },
1616 threading_preferences: ThreadingPreferences {
1617 max_threads: None, min_work_per_thread: 1000,
1619 enable_numa_optimization: true,
1620 affinity_strategy: AffinityStrategy::Spread,
1621 },
1622 }
1623 }
1624}
1625
1626#[allow(dead_code)]
1628pub fn create_advanced_simd_optimizer(
1629 _config: Option<AdvancedSimdConfig>,
1630) -> AdvancedSimdOptimizer {
1631 let _config = _config.unwrap_or_default();
1632 AdvancedSimdOptimizer::new(_config)
1633}
1634
1635#[cfg(test)]
1636mod tests {
1637 use super::*;
1638 use scirs2_core::ndarray::array;
1639
1640 #[test]
1641 fn test_advanced_simd_optimizer_creation() {
1642 let config = AdvancedSimdConfig::default();
1643 let optimizer = AdvancedSimdOptimizer::new(config);
1644
1645 assert!(matches!(
1647 optimizer.hardware_profile.architecture,
1648 CpuArchitecture::X86_64 | CpuArchitecture::AArch64 | CpuArchitecture::WASM32
1649 ));
1650 }
1651
1652 #[test]
1653 fn test_advanced_mean_calculation() {
1654 let config = AdvancedSimdConfig::default();
1655 let optimizer = AdvancedSimdOptimizer::new(config);
1656
1657 let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
1658 let result = optimizer
1659 .advanced_mean(&data.view())
1660 .expect("Operation failed");
1661
1662 assert!((result - 3.0).abs() < 1e-10);
1663 }
1664
1665 #[test]
1666 fn test_advanced_variance_calculation() {
1667 let config = AdvancedSimdConfig::default();
1668 let optimizer = AdvancedSimdOptimizer::new(config);
1669
1670 let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
1671 let result = optimizer
1672 .advanced_variance(&data.view(), 1)
1673 .expect("Operation failed");
1674
1675 assert!((result - 2.5).abs() < 1e-10);
1677 }
1678
1679 #[test]
1680 fn test_advanced_correlation_calculation() {
1681 let config = AdvancedSimdConfig::default();
1682 let optimizer = AdvancedSimdOptimizer::new(config);
1683
1684 let x = array![1.0, 2.0, 3.0, 4.0, 5.0];
1685 let y = array![5.0, 4.0, 3.0, 2.0, 1.0];
1686 let result = optimizer
1687 .advanced_correlation(&x.view(), &y.view())
1688 .expect("Operation failed");
1689
1690 assert!((result - (-1.0)).abs() < 1e-10);
1692 }
1693
1694 #[test]
1695 fn testdata_characteristics_analysis() {
1696 let config = AdvancedSimdConfig::default();
1697 let optimizer = AdvancedSimdOptimizer::new(config);
1698
1699 let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
1700 let characteristics = optimizer.analyzedata_characteristics(&data.view());
1701
1702 assert!(matches!(
1703 characteristics.memory_layout,
1704 MemoryLayout::Contiguous
1705 ));
1706 assert!(matches!(
1707 characteristics.sparsity_level,
1708 SparsityLevel::Dense
1709 ));
1710 }
1711
1712 #[test]
1713 fn testsize_categorization() {
1714 assert!(matches!(
1715 AdvancedSimdOptimizer::categorizesize(50),
1716 SizeBucket::Tiny
1717 ));
1718 assert!(matches!(
1719 AdvancedSimdOptimizer::categorizesize(500),
1720 SizeBucket::Small
1721 ));
1722 assert!(matches!(
1723 AdvancedSimdOptimizer::categorizesize(50000),
1724 SizeBucket::Medium
1725 ));
1726 }
1727
1728 #[test]
1729 fn test_kahan_summation() {
1730 let config = AdvancedSimdConfig::default();
1731 let optimizer = AdvancedSimdOptimizer::new(config);
1732
1733 let data = array![1e10, 1.0, -1e10];
1736 let result = optimizer.kahan_sum(&data.view());
1737
1738 assert!((result - 1.0).abs() < 1e-10);
1740
1741 let challengingdata = array![1e8, 1.0, 1e8, -1e8, -1e8];
1743 let challenging_result = optimizer.kahan_sum(&challengingdata.view());
1744 assert!((challenging_result - 1.0).abs() < 1e-10);
1745 }
1746
1747 #[test]
1748 fn test_batch_operations() {
1749 let config = AdvancedSimdConfig::default();
1750 let optimizer = AdvancedSimdOptimizer::new(config);
1751
1752 let data1 = array![1.0, 2.0, 3.0];
1753 let data2 = array![4.0, 5.0, 6.0];
1754 let data_arrays = vec![data1.view(), data2.view()];
1755
1756 let operations = vec![BatchOperation::Mean, BatchOperation::Variance];
1757 let results = optimizer
1758 .advanced_batch_statistics(&data_arrays, &operations)
1759 .expect("Operation failed");
1760
1761 assert_eq!(results.means.len(), 2);
1762 assert_eq!(results.variances.len(), 2);
1763 assert!((results.means[0] - 2.0).abs() < 1e-10);
1764 assert!((results.means[1] - 5.0).abs() < 1e-10);
1765 }
1766}