1use crate::error::StatsResult;
9use scirs2_core::ndarray::{ArrayView1, ArrayView2};
10use scirs2_core::numeric::{Float, NumCast};
11use scirs2_core::simd_ops::SimdUnifiedOps;
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14use std::sync::{Arc, Mutex};
15use std::time::{Duration, Instant};
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct AdaptiveSimdConfig {
20 pub auto_detect_hardware: bool,
22 pub enable_profiling: bool,
24 pub min_simdsize: usize,
26 pub cachesize: usize,
28 pub benchmark_samples: usize,
30 pub enable_hybrid_processing: bool,
32 pub alignment_requirements: SimdAlignment,
34 pub optimization_level: OptimizationLevel,
36 pub adaptive_vectorization: bool,
38 pub memory_bandwidth_optimization: bool,
40}
41
42impl Default for AdaptiveSimdConfig {
43 fn default() -> Self {
44 Self {
45 auto_detect_hardware: true,
46 enable_profiling: true,
47 min_simdsize: 64,
48 cachesize: 1000,
49 benchmark_samples: 10,
50 enable_hybrid_processing: false,
51 alignment_requirements: SimdAlignment::Optimal,
52 optimization_level: OptimizationLevel::Aggressive,
53 adaptive_vectorization: true,
54 memory_bandwidth_optimization: true,
55 }
56 }
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize)]
61pub enum SimdAlignment {
62 None,
64 Basic,
66 Optimal,
68 Custom(usize),
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
74pub enum OptimizationLevel {
75 Conservative,
77 Balanced,
79 Aggressive,
81 Extreme,
83}
84
85#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct HardwareCapabilities {
88 pub simd_instructions: Vec<SimdInstructionSet>,
90 pub vector_width: usize,
92 pub simd_units: usize,
94 pub cache_info: CacheHierarchy,
96 pub memory_bandwidth: f64,
98 pub cpu_architecture: CpuArchitecture,
100 pub gpu_available: bool,
102 pub gpu_capabilities: Option<GpuCapabilities>,
104}
105
106#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
108pub enum SimdInstructionSet {
109 SSE,
111 SSE2,
113 SSE3,
115 SSE41,
117 SSE42,
119 AVX,
121 AVX2,
123 AVX512,
125 NEON,
127 SVE,
129}
130
131#[derive(Debug, Clone, Serialize, Deserialize)]
133pub struct CacheHierarchy {
134 pub l1size: usize,
136 pub l2size: usize,
138 pub l3size: usize,
140 pub cache_linesize: usize,
142 pub associativity: Vec<usize>,
144}
145
146#[derive(Debug, Clone, Serialize, Deserialize)]
148pub enum CpuArchitecture {
149 X86,
151 X86_64,
153 ARM,
155 ARM64,
157 RISCV,
159 Other(String),
161}
162
163#[derive(Debug, Clone, Serialize, Deserialize)]
165pub struct GpuCapabilities {
166 pub compute_units: usize,
168 pub gpu_memory: usize,
170 pub gpu_bandwidth: f64,
172 pub compute_apis: Vec<String>,
174}
175
176#[derive(Debug, Clone, Serialize, Deserialize)]
178pub struct SimdStrategy {
179 pub name: String,
181 pub instruction_set: SimdInstructionSet,
183 pub vector_width: usize,
185 pub memory_pattern: MemoryAccessPattern,
187 pub alignment: AlignmentStrategy,
189 pub unroll_factor: usize,
191 pub prefetch_strategy: PrefetchStrategy,
193 pub expected_speedup: f64,
195}
196
197#[derive(Debug, Clone, Serialize, Deserialize)]
199pub enum MemoryAccessPattern {
200 Sequential,
202 Strided { stride: usize },
204 Random,
206 Blocked { blocksize: usize },
208 Tiled { tilesize: (usize, usize) },
210}
211
212#[derive(Debug, Clone, Serialize, Deserialize)]
214pub enum AlignmentStrategy {
215 ForceAlign,
217 UnalignedLoads,
219 DynamicAlign,
221 CopyAlign,
223}
224
225#[derive(Debug, Clone, Serialize, Deserialize)]
227pub enum PrefetchStrategy {
228 None,
230 Software { distance: usize },
232 Hardware,
234 Adaptive,
236}
237
238#[derive(Debug, Clone, Serialize, Deserialize)]
240pub struct SimdPerformanceMetrics {
241 pub execution_time: Duration,
243 pub throughput: f64,
245 pub bandwidth_utilization: f64,
247 pub cache_hit_rate: f64,
249 pub simd_efficiency: f64,
251 pub energy_efficiency: Option<f64>,
253}
254
255#[derive(Debug, Clone, Serialize, Deserialize)]
257pub struct SimdOptimizationResult<T> {
258 pub result: T,
260 pub strategy_used: SimdStrategy,
262 pub metrics: SimdPerformanceMetrics,
264 pub success: bool,
266 pub fallback_info: Option<FallbackInfo>,
268}
269
270#[derive(Debug, Clone, Serialize, Deserialize)]
272pub struct FallbackInfo {
273 pub reason: String,
275 pub fallback_strategy: String,
277 pub performance_impact: f64,
279}
280
281#[derive(Debug, Clone, Serialize, Deserialize)]
283pub struct DataCharacteristics {
284 pub size: usize,
286 pub elementsize: usize,
288 pub alignment: usize,
290 pub access_pattern: MemoryAccessPattern,
292 pub locality_score: f64,
294 pub sparsity: Option<f64>,
296 pub value_distribution: ValueDistribution,
298}
299
300#[derive(Debug, Clone, Serialize, Deserialize)]
302pub struct ValueDistribution {
303 pub value_range: (f64, f64),
305 pub has_special_values: bool,
307 pub clustering: ClusteringInfo,
309}
310
311#[derive(Debug, Clone, Serialize, Deserialize)]
313pub struct ClusteringInfo {
314 pub cluster_count: usize,
316 pub density: f64,
318 pub separation: f64,
320}
321
322pub struct AdaptiveSimdOptimizer {
324 config: AdaptiveSimdConfig,
325 hardware_capabilities: HardwareCapabilities,
326 strategy_cache: Arc<Mutex<HashMap<String, SimdStrategy>>>,
327 performance_cache: Arc<Mutex<HashMap<String, SimdPerformanceMetrics>>>,
328 benchmark_results: Arc<Mutex<HashMap<String, Vec<SimdPerformanceMetrics>>>>,
329}
330
331impl AdaptiveSimdOptimizer {
332 pub fn new(config: AdaptiveSimdConfig) -> StatsResult<Self> {
334 let hardware_capabilities = Self::detect_hardware_capabilities()?;
335
336 Ok(Self {
337 config,
338 hardware_capabilities,
339 strategy_cache: Arc::new(Mutex::new(HashMap::new())),
340 performance_cache: Arc::new(Mutex::new(HashMap::new())),
341 benchmark_results: Arc::new(Mutex::new(HashMap::new())),
342 })
343 }
344
345 pub fn default() -> StatsResult<Self> {
347 Self::new(AdaptiveSimdConfig::default())
348 }
349
350 pub fn optimize_vector_operation<F, T>(
352 &self,
353 operation_name: &str,
354 data: ArrayView1<F>,
355 operation: impl Fn(&ArrayView1<F>, &SimdStrategy) -> StatsResult<T> + Send + Sync,
356 ) -> StatsResult<SimdOptimizationResult<T>>
357 where
358 F: Float + NumCast + SimdUnifiedOps + Send + Sync + std::fmt::Display,
359 T: Send + Sync + std::fmt::Display,
360 {
361 let data_characteristics = self.analyzedata_characteristics(&data)?;
362
363 let strategy = self.select_optimal_strategy(operation_name, &data_characteristics)?;
365
366 let start_time = Instant::now();
368 let result = operation(&data, &strategy);
369 let execution_time = start_time.elapsed();
370
371 match result {
372 Ok(value) => {
373 let metrics = self.calculate_performance_metrics(
374 &data_characteristics,
375 &strategy,
376 execution_time,
377 )?;
378
379 self.update_performance_cache(operation_name, &strategy, &metrics);
381
382 Ok(SimdOptimizationResult {
383 result: value,
384 strategy_used: strategy,
385 metrics,
386 success: true,
387 fallback_info: None,
388 })
389 }
390 Err(_e) => {
391 self.try_fallback_strategy(operation_name, data, operation, &strategy)
393 }
394 }
395 }
396
397 pub fn optimize_matrix_operation<F, T>(
399 &self,
400 operation_name: &str,
401 data: ArrayView2<F>,
402 operation: impl Fn(&ArrayView2<F>, &SimdStrategy) -> StatsResult<T> + Send + Sync,
403 ) -> StatsResult<SimdOptimizationResult<T>>
404 where
405 F: Float + NumCast + SimdUnifiedOps + Send + Sync + std::fmt::Display,
406 T: Send + Sync + std::fmt::Display,
407 {
408 let data_characteristics = self.analyze_matrix_characteristics(&data)?;
409 let strategy =
410 self.select_optimal_matrix_strategy(operation_name, &data_characteristics)?;
411
412 let start_time = Instant::now();
413 let result = operation(&data, &strategy);
414 let execution_time = start_time.elapsed();
415
416 match result {
417 Ok(value) => {
418 let metrics = self.calculate_matrix_performance_metrics(
419 &data_characteristics,
420 &strategy,
421 execution_time,
422 )?;
423
424 self.update_performance_cache(operation_name, &strategy, &metrics);
425
426 Ok(SimdOptimizationResult {
427 result: value,
428 strategy_used: strategy,
429 metrics,
430 success: true,
431 fallback_info: None,
432 })
433 }
434 Err(_e) => {
435 self.try_matrix_fallback_strategy(operation_name, data, operation, &strategy)
437 }
438 }
439 }
440
441 fn detect_hardware_capabilities() -> StatsResult<HardwareCapabilities> {
443 let capabilities = HardwareCapabilities {
445 simd_instructions: vec![
446 SimdInstructionSet::SSE2,
447 SimdInstructionSet::AVX,
448 SimdInstructionSet::AVX2,
449 ],
450 vector_width: 256, simd_units: 2,
452 cache_info: CacheHierarchy {
453 l1size: 32 * 1024, l2size: 256 * 1024, l3size: 8 * 1024 * 1024, cache_linesize: 64,
457 associativity: vec![8, 8, 16],
458 },
459 memory_bandwidth: 50.0, cpu_architecture: CpuArchitecture::X86_64,
461 gpu_available: false,
462 gpu_capabilities: None,
463 };
464
465 Ok(capabilities)
466 }
467
468 fn analyzedata_characteristics<F>(
470 &self,
471 data: &ArrayView1<F>,
472 ) -> StatsResult<DataCharacteristics>
473 where
474 F: Float + NumCast + std::fmt::Display,
475 {
476 let size = data.len();
477 let elementsize = std::mem::size_of::<F>();
478
479 let alignment = (data.as_ptr() as usize) % 32; let mut min_val = F::infinity();
484 let mut max_val = F::neg_infinity();
485 let mut has_special = false;
486
487 for &value in data.iter() {
488 if value.is_nan() || value.is_infinite() {
489 has_special = true;
490 } else {
491 if value < min_val {
492 min_val = value;
493 }
494 if value > max_val {
495 max_val = value;
496 }
497 }
498 }
499
500 let value_distribution = ValueDistribution {
501 value_range: (
502 min_val.to_f64().unwrap_or(0.0),
503 max_val.to_f64().unwrap_or(0.0),
504 ),
505 has_special_values: has_special,
506 clustering: ClusteringInfo {
507 cluster_count: 1, density: 1.0,
509 separation: 0.0,
510 },
511 };
512
513 Ok(DataCharacteristics {
514 size,
515 elementsize,
516 alignment,
517 access_pattern: MemoryAccessPattern::Sequential,
518 locality_score: 1.0, sparsity: None,
520 value_distribution,
521 })
522 }
523
524 fn analyze_matrix_characteristics<F>(
526 &self,
527 data: &ArrayView2<F>,
528 ) -> StatsResult<DataCharacteristics>
529 where
530 F: Float + NumCast + std::fmt::Display,
531 {
532 let size = data.len();
533 let elementsize = std::mem::size_of::<F>();
534
535 let access_pattern = if data.is_standard_layout() {
537 MemoryAccessPattern::Sequential
538 } else {
539 MemoryAccessPattern::Strided {
540 stride: data.strides()[0] as usize,
541 }
542 };
543
544 let zero_count = data.iter().filter(|&&x| x == F::zero()).count();
546 let sparsity = if size > 0 {
547 Some(zero_count as f64 / size as f64)
548 } else {
549 None
550 };
551
552 Ok(DataCharacteristics {
553 size,
554 elementsize,
555 alignment: (data.as_ptr() as usize) % 32,
556 access_pattern,
557 locality_score: if data.is_standard_layout() { 1.0 } else { 0.5 },
558 sparsity,
559 value_distribution: ValueDistribution {
560 value_range: (0.0, 1.0), has_special_values: false,
562 clustering: ClusteringInfo {
563 cluster_count: 1,
564 density: 1.0,
565 separation: 0.0,
566 },
567 },
568 })
569 }
570
571 fn select_optimal_strategy(
573 &self,
574 operation_name: &str,
575 characteristics: &DataCharacteristics,
576 ) -> StatsResult<SimdStrategy> {
577 let cache_key = format!(
578 "{}_{}_{}",
579 operation_name, characteristics.size, characteristics.elementsize
580 );
581
582 if let Ok(cache) = self.strategy_cache.lock() {
584 if let Some(strategy) = cache.get(&cache_key) {
585 return Ok(strategy.clone());
586 }
587 }
588
589 let candidates = self.generate_candidate_strategies(characteristics)?;
591
592 let best_strategy = self.evaluate_strategies(&candidates, characteristics)?;
594
595 if let Ok(mut cache) = self.strategy_cache.lock() {
597 cache.insert(cache_key, best_strategy.clone());
598
599 if cache.len() > self.config.cachesize {
601 let oldest_key = cache.keys().next().cloned();
602 if let Some(key) = oldest_key {
603 cache.remove(&key);
604 }
605 }
606 }
607
608 Ok(best_strategy)
609 }
610
611 fn select_optimal_matrix_strategy(
613 &self,
614 operation_name: &str,
615 characteristics: &DataCharacteristics,
616 ) -> StatsResult<SimdStrategy> {
617 let mut strategy = self.select_optimal_strategy(operation_name, characteristics)?;
619
620 if characteristics.size > 1000000 {
622 strategy.memory_pattern = MemoryAccessPattern::Tiled { tilesize: (64, 64) };
624 strategy.prefetch_strategy = PrefetchStrategy::Software { distance: 8 };
625 } else if matches!(
626 characteristics.access_pattern,
627 MemoryAccessPattern::Strided { .. }
628 ) {
629 strategy.memory_pattern = MemoryAccessPattern::Blocked { blocksize: 256 };
630 }
631
632 Ok(strategy)
633 }
634
635 fn generate_candidate_strategies(
637 &self,
638 characteristics: &DataCharacteristics,
639 ) -> StatsResult<Vec<SimdStrategy>> {
640 let mut candidates = Vec::new();
641
642 for instruction_set in &self.hardware_capabilities.simd_instructions {
644 let vector_width = match instruction_set {
645 SimdInstructionSet::SSE | SimdInstructionSet::SSE2 => 128,
646 SimdInstructionSet::AVX | SimdInstructionSet::AVX2 => 256,
647 SimdInstructionSet::AVX512 => 512,
648 SimdInstructionSet::NEON => 128,
649 _ => 128,
650 };
651
652 candidates.push(SimdStrategy {
654 name: format!("{:?}_conservative", instruction_set),
655 instruction_set: instruction_set.clone(),
656 vector_width,
657 memory_pattern: characteristics.access_pattern.clone(),
658 alignment: if characteristics.alignment == 0 {
659 AlignmentStrategy::ForceAlign
660 } else {
661 AlignmentStrategy::UnalignedLoads
662 },
663 unroll_factor: 2,
664 prefetch_strategy: PrefetchStrategy::None,
665 expected_speedup: 2.0,
666 });
667
668 if matches!(
670 self.config.optimization_level,
671 OptimizationLevel::Aggressive | OptimizationLevel::Extreme
672 ) {
673 candidates.push(SimdStrategy {
674 name: format!("{:?}_aggressive", instruction_set),
675 instruction_set: instruction_set.clone(),
676 vector_width,
677 memory_pattern: characteristics.access_pattern.clone(),
678 alignment: AlignmentStrategy::DynamicAlign,
679 unroll_factor: 4,
680 prefetch_strategy: if characteristics.size > 10000 {
681 PrefetchStrategy::Software { distance: 4 }
682 } else {
683 PrefetchStrategy::None
684 },
685 expected_speedup: 4.0,
686 });
687 }
688 }
689
690 Ok(candidates)
691 }
692
693 fn evaluate_strategies(
695 &self,
696 candidates: &[SimdStrategy],
697 characteristics: &DataCharacteristics,
698 ) -> StatsResult<SimdStrategy> {
699 let mut best_strategy = candidates[0].clone();
700 let mut best_score = 0.0;
701
702 for strategy in candidates {
703 let score = self.calculate_strategy_score(strategy, characteristics);
704 if score > best_score {
705 best_score = score;
706 best_strategy = strategy.clone();
707 }
708 }
709
710 Ok(best_strategy)
711 }
712
713 fn calculate_strategy_score(
715 &self,
716 strategy: &SimdStrategy,
717 characteristics: &DataCharacteristics,
718 ) -> f64 {
719 let mut score = strategy.expected_speedup;
720
721 if characteristics.size < self.config.min_simdsize {
723 score *= 0.5; }
725
726 if characteristics.alignment == 0
728 && matches!(strategy.alignment, AlignmentStrategy::ForceAlign)
729 {
730 score *= 1.2;
731 }
732
733 match &characteristics.access_pattern {
735 MemoryAccessPattern::Sequential => score *= 1.0,
736 MemoryAccessPattern::Strided { .. } => score *= 0.8,
737 MemoryAccessPattern::Random => score *= 0.5,
738 _ => score *= 0.7,
739 }
740
741 if self
743 .hardware_capabilities
744 .simd_instructions
745 .contains(&strategy.instruction_set)
746 {
747 score *= 1.5;
748 }
749
750 score
751 }
752
753 fn calculate_performance_metrics(
755 &self,
756 characteristics: &DataCharacteristics,
757 strategy: &SimdStrategy,
758 execution_time: Duration,
759 ) -> StatsResult<SimdPerformanceMetrics> {
760 let throughput = characteristics.size as f64 / execution_time.as_secs_f64();
761
762 let bytes_processed = characteristics.size * characteristics.elementsize;
764 let bandwidth_used = bytes_processed as f64 / execution_time.as_secs_f64() / 1e9; let bandwidth_utilization = bandwidth_used / self.hardware_capabilities.memory_bandwidth;
766
767 let theoretical_max = strategy.vector_width / (characteristics.elementsize * 8); let actual_vectors = characteristics.size / theoretical_max;
770 let simd_efficiency = if actual_vectors > 0 {
771 characteristics.size as f64 / (actual_vectors * theoretical_max) as f64
772 } else {
773 0.0
774 };
775
776 Ok(SimdPerformanceMetrics {
777 execution_time,
778 throughput,
779 bandwidth_utilization: bandwidth_utilization.min(1.0),
780 cache_hit_rate: 0.9, simd_efficiency: simd_efficiency.min(1.0),
782 energy_efficiency: None, })
784 }
785
786 fn calculate_matrix_performance_metrics(
788 &self,
789 characteristics: &DataCharacteristics,
790 strategy: &SimdStrategy,
791 execution_time: Duration,
792 ) -> StatsResult<SimdPerformanceMetrics> {
793 let mut metrics =
795 self.calculate_performance_metrics(characteristics, strategy, execution_time)?;
796
797 metrics.cache_hit_rate = match &characteristics.access_pattern {
799 MemoryAccessPattern::Sequential => 0.95,
800 MemoryAccessPattern::Strided { .. } => 0.8,
801 MemoryAccessPattern::Tiled { .. } => 0.9,
802 _ => 0.7,
803 };
804
805 Ok(metrics)
806 }
807
808 fn try_fallback_strategy<F, T>(
810 &self,
811 _operation_name: &str,
812 data: ArrayView1<F>,
813 operation: impl Fn(&ArrayView1<F>, &SimdStrategy) -> StatsResult<T> + Send + Sync,
814 failed_strategy: &SimdStrategy,
815 ) -> StatsResult<SimdOptimizationResult<T>>
816 where
817 F: Float + NumCast + SimdUnifiedOps + Send + Sync + std::fmt::Display,
818 T: Send + Sync + std::fmt::Display,
819 {
820 let fallback_strategy = SimdStrategy {
822 name: "fallback_conservative".to_string(),
823 instruction_set: SimdInstructionSet::SSE2, vector_width: 128,
825 memory_pattern: MemoryAccessPattern::Sequential,
826 alignment: AlignmentStrategy::UnalignedLoads,
827 unroll_factor: 1,
828 prefetch_strategy: PrefetchStrategy::None,
829 expected_speedup: 1.0,
830 };
831
832 let start_time = Instant::now();
833 match operation(&data, &fallback_strategy) {
834 Ok(result) => {
835 let execution_time = start_time.elapsed();
836 let characteristics = self.analyzedata_characteristics(&data)?;
837 let metrics = self.calculate_performance_metrics(
838 &characteristics,
839 &fallback_strategy,
840 execution_time,
841 )?;
842
843 Ok(SimdOptimizationResult {
844 result,
845 strategy_used: fallback_strategy,
846 metrics,
847 success: true,
848 fallback_info: Some(FallbackInfo {
849 reason: format!("Primary _strategy '{}' failed", failed_strategy.name),
850 fallback_strategy: "conservative_sse2".to_string(),
851 performance_impact: 0.5, }),
853 })
854 }
855 Err(e) => Err(e),
856 }
857 }
858
859 fn try_matrix_fallback_strategy<F, T>(
861 &self,
862 _operation_name: &str,
863 data: ArrayView2<F>,
864 operation: impl Fn(&ArrayView2<F>, &SimdStrategy) -> StatsResult<T> + Send + Sync,
865 failed_strategy: &SimdStrategy,
866 ) -> StatsResult<SimdOptimizationResult<T>>
867 where
868 F: Float + NumCast + SimdUnifiedOps + Send + Sync + std::fmt::Display,
869 T: Send + Sync + std::fmt::Display,
870 {
871 let fallback_strategy = SimdStrategy {
873 name: "matrix_fallback_conservative".to_string(),
874 instruction_set: SimdInstructionSet::SSE2,
875 vector_width: 128,
876 memory_pattern: MemoryAccessPattern::Sequential,
877 alignment: AlignmentStrategy::UnalignedLoads,
878 unroll_factor: 1,
879 prefetch_strategy: PrefetchStrategy::None,
880 expected_speedup: 1.0,
881 };
882
883 let start_time = Instant::now();
884 match operation(&data, &fallback_strategy) {
885 Ok(result) => {
886 let execution_time = start_time.elapsed();
887 let characteristics = self.analyze_matrix_characteristics(&data)?;
888 let metrics = self.calculate_matrix_performance_metrics(
889 &characteristics,
890 &fallback_strategy,
891 execution_time,
892 )?;
893
894 Ok(SimdOptimizationResult {
895 result,
896 strategy_used: fallback_strategy,
897 metrics,
898 success: true,
899 fallback_info: Some(FallbackInfo {
900 reason: format!(
901 "Primary matrix _strategy '{}' failed",
902 failed_strategy.name
903 ),
904 fallback_strategy: "conservative_matrix_sse2".to_string(),
905 performance_impact: 0.6,
906 }),
907 })
908 }
909 Err(e) => Err(e),
910 }
911 }
912
913 fn update_performance_cache(
915 &self,
916 operation_name: &str,
917 strategy: &SimdStrategy,
918 metrics: &SimdPerformanceMetrics,
919 ) {
920 if !self.config.enable_profiling {
921 return;
922 }
923
924 let cache_key = format!("{}_{}", operation_name, strategy.name);
925
926 if let Ok(mut cache) = self.performance_cache.lock() {
927 cache.insert(cache_key.clone(), metrics.clone());
928 }
929
930 if let Ok(mut benchmarks) = self.benchmark_results.lock() {
932 benchmarks
933 .entry(cache_key)
934 .or_insert_with(Vec::new)
935 .push(metrics.clone());
936 }
937 }
938
939 pub fn get_performance_statistics(&self) -> PerformanceStatistics {
941 let cache = self.performance_cache.lock().unwrap();
942 let _benchmarks = self.benchmark_results.lock().unwrap();
943
944 let total_operations = cache.len();
945 let avg_speedup = if !cache.is_empty() {
946 cache.values().map(|m| m.simd_efficiency).sum::<f64>() / cache.len() as f64
947 } else {
948 0.0
949 };
950
951 let best_strategies: Vec<(String, f64)> = cache
952 .iter()
953 .map(|(name, metrics)| (name.clone(), metrics.simd_efficiency))
954 .collect();
955
956 PerformanceStatistics {
957 total_operations,
958 average_speedup: avg_speedup,
959 best_strategies,
960 hardware_utilization: self.calculate_hardware_utilization(&cache),
961 }
962 }
963
964 fn calculate_hardware_utilization(
966 &self,
967 cache: &HashMap<String, SimdPerformanceMetrics>,
968 ) -> HardwareUtilization {
969 let avg_bandwidth = if !cache.is_empty() {
970 cache.values().map(|m| m.bandwidth_utilization).sum::<f64>() / cache.len() as f64
971 } else {
972 0.0
973 };
974
975 let avg_cache_hit_rate = if !cache.is_empty() {
976 cache.values().map(|m| m.cache_hit_rate).sum::<f64>() / cache.len() as f64
977 } else {
978 0.0
979 };
980
981 HardwareUtilization {
982 simd_utilization: 0.8, memory_bandwidth_utilization: avg_bandwidth,
984 cache_efficiency: avg_cache_hit_rate,
985 energy_efficiency: None,
986 }
987 }
988}
989
990#[derive(Debug, Clone, Serialize, Deserialize)]
992pub struct PerformanceStatistics {
993 pub total_operations: usize,
995 pub average_speedup: f64,
997 pub best_strategies: Vec<(String, f64)>,
999 pub hardware_utilization: HardwareUtilization,
1001}
1002
1003#[derive(Debug, Clone, Serialize, Deserialize)]
1005pub struct HardwareUtilization {
1006 pub simd_utilization: f64,
1008 pub memory_bandwidth_utilization: f64,
1010 pub cache_efficiency: f64,
1012 pub energy_efficiency: Option<f64>,
1014}
1015
1016#[allow(dead_code)]
1018pub fn create_adaptive_simd_optimizer() -> StatsResult<AdaptiveSimdOptimizer> {
1019 AdaptiveSimdOptimizer::default()
1020}
1021
1022#[allow(dead_code)]
1023pub fn optimize_simd_operation<F, T>(
1024 operation_name: &str,
1025 data: ArrayView1<F>,
1026 operation: impl Fn(&ArrayView1<F>, &SimdStrategy) -> StatsResult<T> + Send + Sync,
1027) -> StatsResult<SimdOptimizationResult<T>>
1028where
1029 F: Float + NumCast + SimdUnifiedOps + Send + Sync + std::fmt::Display,
1030 T: Send + Sync + std::fmt::Display,
1031{
1032 let optimizer = AdaptiveSimdOptimizer::default()?;
1033 optimizer.optimize_vector_operation(operation_name, data, operation)
1034}
1035
1036#[cfg(test)]
1037mod tests {
1038 use super::*;
1039 use scirs2_core::ndarray::array;
1040
1041 #[test]
1042 fn test_adaptive_simd_config() {
1043 let config = AdaptiveSimdConfig::default();
1044 assert!(config.auto_detect_hardware);
1045 assert!(config.enable_profiling);
1046 assert!(config.min_simdsize > 0);
1047 }
1048
1049 #[test]
1050 fn test_hardware_detection() {
1051 let capabilities = AdaptiveSimdOptimizer::detect_hardware_capabilities().unwrap();
1052 assert!(!capabilities.simd_instructions.is_empty());
1053 assert!(capabilities.vector_width > 0);
1054 }
1055
1056 #[test]
1057 fn testdata_characteristics_analysis() {
1058 let optimizer = AdaptiveSimdOptimizer::default().unwrap();
1059 let data = array![1.0f64, 2.0, 3.0, 4.0, 5.0];
1060
1061 let characteristics = optimizer.analyzedata_characteristics(&data.view()).unwrap();
1062 assert_eq!(characteristics.size, 5);
1063 assert_eq!(characteristics.elementsize, 8); }
1065
1066 #[test]
1067 fn test_strategy_generation() {
1068 let optimizer = AdaptiveSimdOptimizer::default().unwrap();
1069 let characteristics = DataCharacteristics {
1070 size: 1000,
1071 elementsize: 8,
1072 alignment: 0,
1073 access_pattern: MemoryAccessPattern::Sequential,
1074 locality_score: 1.0,
1075 sparsity: None,
1076 value_distribution: ValueDistribution {
1077 value_range: (0.0, 1.0),
1078 has_special_values: false,
1079 clustering: ClusteringInfo {
1080 cluster_count: 1,
1081 density: 1.0,
1082 separation: 0.0,
1083 },
1084 },
1085 };
1086
1087 let strategies = optimizer
1088 .generate_candidate_strategies(&characteristics)
1089 .unwrap();
1090 assert!(!strategies.is_empty());
1091 }
1092
1093 #[test]
1094 fn test_strategy_selection() {
1095 let optimizer = AdaptiveSimdOptimizer::default().unwrap();
1096 let characteristics = DataCharacteristics {
1097 size: 1000,
1098 elementsize: 8,
1099 alignment: 0,
1100 access_pattern: MemoryAccessPattern::Sequential,
1101 locality_score: 1.0,
1102 sparsity: None,
1103 value_distribution: ValueDistribution {
1104 value_range: (0.0, 1.0),
1105 has_special_values: false,
1106 clustering: ClusteringInfo {
1107 cluster_count: 1,
1108 density: 1.0,
1109 separation: 0.0,
1110 },
1111 },
1112 };
1113
1114 let strategy = optimizer
1115 .select_optimal_strategy("test_op", &characteristics)
1116 .unwrap();
1117 assert!(!strategy.name.is_empty());
1118 assert!(strategy.expected_speedup > 0.0);
1119 }
1120
1121 #[test]
1122 fn test_performance_metrics_calculation() {
1123 let optimizer = AdaptiveSimdOptimizer::default().unwrap();
1124 let characteristics = DataCharacteristics {
1125 size: 1000,
1126 elementsize: 8,
1127 alignment: 0,
1128 access_pattern: MemoryAccessPattern::Sequential,
1129 locality_score: 1.0,
1130 sparsity: None,
1131 value_distribution: ValueDistribution {
1132 value_range: (0.0, 1.0),
1133 has_special_values: false,
1134 clustering: ClusteringInfo {
1135 cluster_count: 1,
1136 density: 1.0,
1137 separation: 0.0,
1138 },
1139 },
1140 };
1141
1142 let strategy = SimdStrategy {
1143 name: "test_strategy".to_string(),
1144 instruction_set: SimdInstructionSet::AVX2,
1145 vector_width: 256,
1146 memory_pattern: MemoryAccessPattern::Sequential,
1147 alignment: AlignmentStrategy::ForceAlign,
1148 unroll_factor: 2,
1149 prefetch_strategy: PrefetchStrategy::None,
1150 expected_speedup: 2.0,
1151 };
1152
1153 let metrics = optimizer
1154 .calculate_performance_metrics(&characteristics, &strategy, Duration::from_millis(10))
1155 .unwrap();
1156
1157 assert!(metrics.throughput > 0.0);
1158 assert!(metrics.simd_efficiency >= 0.0 && metrics.simd_efficiency <= 1.0);
1159 }
1160}