1use crate::error::{SpatialError, SpatialResult};
65use scirs2_core::ndarray::{Array1, Array2, ArrayView2};
66use statrs::statistics::Statistics;
67use std::collections::{HashMap, VecDeque};
68use std::sync::Arc;
69use std::time::{Duration, Instant};
70#[cfg(feature = "async")]
71use tokio::sync::RwLock;
72
73#[derive(Debug)]
75pub struct AdaptiveAlgorithmSelector {
76 strategies: Vec<SelectionStrategy>,
78 performance_learning: bool,
80 resource_awareness: bool,
82 quality_optimization: bool,
84 ensemble_methods: bool,
86 performance_history: Arc<RwLock<PerformanceHistory>>,
88 #[allow(dead_code)]
90 pattern_analyzer: PatternAnalyzer,
91 resource_monitor: ResourceMonitor,
93 quality_predictor: QualityPredictor,
95 selection_cache: Arc<RwLock<SelectionCache>>,
97}
98
99#[derive(Debug, Clone)]
101pub enum SelectionStrategy {
102 PatternBased,
104 HistoryBased,
106 ResourceAware,
108 QualityOptimized,
110 EnsembleBased,
112 Hybrid(Vec<SelectionStrategy>),
114}
115
116#[derive(Debug, Clone)]
118pub struct SelectionContext {
119 pub accuracy_priority: f64,
121 pub speed_priority: f64,
123 pub memory_constraint: usize,
125 pub real_time_requirement: bool,
127 pub energy_efficiency: f64,
129 pub quality_tolerance: f64,
131 pub user_preferences: HashMap<String, f64>,
133 pub environmental_constraints: EnvironmentalConstraints,
135}
136
137#[derive(Debug, Clone)]
139pub struct EnvironmentalConstraints {
140 pub available_cores: usize,
142 pub available_memory: usize,
144 pub gpu_available: bool,
146 pub network_bandwidth: Option<f64>,
148 pub power_budget: Option<f64>,
150 pub thermal_budget: Option<f64>,
152}
153
154#[derive(Debug, Clone)]
156pub struct AlgorithmSelection {
157 pub algorithm: SelectedAlgorithm,
159 pub parameters: AlgorithmParameters,
161 pub performance_prediction: PerformancePrediction,
163 pub confidence: f64,
165 pub reasoning: SelectionReasoning,
167 pub alternatives: Vec<AlgorithmSelection>,
169}
170
171#[derive(Debug, Clone, PartialEq, Eq, Hash)]
173pub enum SelectedAlgorithm {
174 KMeans,
176 DBScan,
178 HierarchicalClustering,
180 KDTreeNN,
182 BallTreeNN,
184 QuantumClustering,
186 NeuromorphicClustering,
188 TensorCoreAccelerated,
190 DistributedProcessing,
192 Ensemble(Vec<SelectedAlgorithm>),
194}
195
196#[derive(Debug, Clone)]
198pub struct AlgorithmParameters {
199 pub core_params: HashMap<String, ParameterValue>,
201 pub optimization_params: HashMap<String, ParameterValue>,
203 pub resource_params: HashMap<String, ParameterValue>,
205}
206
207#[derive(Debug, Clone)]
209pub enum ParameterValue {
210 Integer(i64),
211 Float(f64),
212 Boolean(bool),
213 String(String),
214 Array(Vec<f64>),
215}
216
217#[derive(Debug, Clone)]
219pub struct PerformancePrediction {
220 pub execution_time: f64,
222 pub memory_usage: usize,
224 pub accuracy_score: f64,
226 pub energy_consumption: f64,
228 pub confidence_intervals: ConfidenceIntervals,
230}
231
232#[derive(Debug, Clone)]
234pub struct ConfidenceIntervals {
235 pub execution_time_range: (f64, f64),
237 pub memory_usage_range: (usize, usize),
239 pub accuracy_range: (f64, f64),
241}
242
243#[derive(Debug, Clone)]
245pub struct SelectionReasoning {
246 pub primary_factors: Vec<String>,
248 pub decision_weights: HashMap<String, f64>,
250 pub alternatives_considered: usize,
252 pub confidence_factors: Vec<String>,
254}
255
256#[derive(Debug)]
258pub struct PerformanceHistory {
259 records: HashMap<SelectedAlgorithm, VecDeque<PerformanceRecord>>,
261 #[allow(dead_code)]
263 pattern_performance: HashMap<DataPattern, Vec<(SelectedAlgorithm, f64)>>,
264 #[allow(dead_code)]
266 trends: HashMap<SelectedAlgorithm, PerformanceTrend>,
267}
268
269#[derive(Debug, Clone)]
271pub struct PerformanceRecord {
272 pub data_characteristics: DataCharacteristics,
274 pub execution_time: f64,
276 pub memory_usage: usize,
278 pub accuracy: f64,
280 pub energy_consumed: f64,
282 pub timestamp: Instant,
284 pub context: SelectionContext,
286}
287
288#[derive(Debug, Clone, Hash, PartialEq, Eq)]
290pub struct DataCharacteristics {
291 pub size_category: SizeCategory,
293 pub dimensionality_category: DimensionalityCategory,
295 pub density_category: DensityCategory,
297 pub clustering_tendency: ClusteringTendencyCategory,
299 pub noise_level: NoiseLevel,
301 pub distribution_type: DistributionType,
303}
304
305#[derive(Debug, Clone, Hash, PartialEq, Eq)]
307pub enum SizeCategory {
308 Tiny, Small, Medium, Large, Huge, }
314
315#[derive(Debug, Clone, Hash, PartialEq, Eq)]
317pub enum DimensionalityCategory {
318 Low, Medium, High, VeryHigh, }
323
324#[derive(Debug, Clone, Hash, PartialEq, Eq)]
326pub enum DensityCategory {
327 Sparse,
328 Medium,
329 Dense,
330}
331
332#[derive(Debug, Clone, Hash, PartialEq, Eq)]
334pub enum ClusteringTendencyCategory {
335 Random,
336 Structured,
337 HighlyStructured,
338}
339
340#[derive(Debug, Clone, Hash, PartialEq, Eq)]
342pub enum NoiseLevel {
343 Low,
344 Medium,
345 High,
346}
347
348#[derive(Debug, Clone, Hash, PartialEq, Eq)]
350pub enum DistributionType {
351 Uniform,
352 Gaussian,
353 Multimodal,
354 Skewed,
355 Unknown,
356}
357
358#[derive(Debug, Clone, Hash, PartialEq, Eq)]
360pub struct DataPattern {
361 pub characteristics: DataCharacteristics,
362 pub context_hash: u64, }
364
365#[derive(Debug, Clone)]
367pub struct PerformanceTrend {
368 pub trend_direction: TrendDirection,
370 pub trend_strength: f64,
372 pub recent_change: f64,
374 pub stability_score: f64,
376}
377
378#[derive(Debug, Clone)]
380pub enum TrendDirection {
381 Improving,
382 Stable,
383 Degrading,
384}
385
386#[derive(Debug)]
388pub struct PatternAnalyzer {
389 #[allow(dead_code)]
391 pattern_models: HashMap<String, PatternModel>,
392 #[allow(dead_code)]
394 feature_extractors: Vec<FeatureExtractor>,
395 #[allow(dead_code)]
397 pattern_cache: HashMap<u64, DataCharacteristics>,
398}
399
400#[derive(Debug)]
402pub struct PatternModel {
403 pub model_type: PatternModelType,
405 pub parameters: Vec<f64>,
407 pub accuracy: f64,
409 pub last_update: Instant,
411}
412
413#[derive(Debug)]
415pub enum PatternModelType {
416 StatisticalAnalysis,
417 MachineLearning,
418 HeuristicRules,
419}
420
421#[derive(Debug)]
423pub struct FeatureExtractor {
424 pub name: String,
426 pub compute_features: fn(&ArrayView2<'_, f64>) -> Vec<f64>,
428}
429
430#[derive(Debug)]
432pub struct ResourceMonitor {
433 cpu_usage: f64,
435 memory_usage: usize,
437 #[allow(dead_code)]
439 gpu_status: GpuStatus,
440 #[allow(dead_code)]
442 network_status: NetworkStatus,
443 #[allow(dead_code)]
445 power_consumption: f64,
446 #[allow(dead_code)]
448 temperature: f64,
449 #[allow(dead_code)]
451 update_interval: Duration,
452 last_update: Instant,
454}
455
456#[derive(Debug)]
458pub struct GpuStatus {
459 pub available: bool,
461 pub utilization: f64,
463 pub memory_usage: usize,
465 pub temperature: f64,
467}
468
469#[derive(Debug)]
471pub struct NetworkStatus {
472 pub bandwidth: f64,
474 pub latency: f64,
476 pub packet_loss: f64,
478}
479
480#[derive(Debug)]
482pub struct QualityPredictor {
483 #[allow(dead_code)]
485 quality_models: HashMap<SelectedAlgorithm, QualityModel>,
486 #[allow(dead_code)]
488 cv_results: HashMap<SelectedAlgorithm, Vec<f64>>,
489 quality_history: VecDeque<QualityMeasurement>,
491}
492
493#[derive(Debug)]
495pub struct QualityModel {
496 pub coefficients: Vec<f64>,
498 pub intercept: f64,
500 pub r_squared: f64,
502 pub training_size: usize,
504}
505
506#[derive(Debug, Clone)]
508pub struct QualityMeasurement {
509 pub algorithm: SelectedAlgorithm,
511 pub data_characteristics: DataCharacteristics,
513 pub predicted_quality: f64,
515 pub actual_quality: f64,
517 pub prediction_error: f64,
519 pub timestamp: Instant,
521}
522
523#[derive(Debug)]
525pub struct SelectionCache {
526 cache: HashMap<CacheKey, CachedSelection>,
528 #[allow(dead_code)]
530 hit_count: u64,
531 #[allow(dead_code)]
533 miss_count: u64,
534 max_size: usize,
536}
537
538#[derive(Debug, Clone, Hash, PartialEq, Eq)]
540pub struct CacheKey {
541 pub data_hash: u64,
543 pub context_hash: u64,
545 pub time_bucket: u64,
547}
548
549#[derive(Debug, Clone)]
551pub struct CachedSelection {
552 pub selection: AlgorithmSelection,
554 pub timestamp: Instant,
556 pub use_count: u64,
558 pub success_rate: f64,
560}
561
562impl Default for SelectionContext {
563 fn default() -> Self {
564 Self::new()
565 }
566}
567
568impl SelectionContext {
569 pub fn new() -> Self {
571 Self {
572 accuracy_priority: 0.7,
573 speed_priority: 0.7,
574 memory_constraint: usize::MAX,
575 real_time_requirement: false,
576 energy_efficiency: 0.5,
577 quality_tolerance: 0.1,
578 user_preferences: HashMap::new(),
579 environmental_constraints: EnvironmentalConstraints {
580 available_cores: num_cpus::get(),
581 available_memory: 8_000_000_000, gpu_available: false,
583 network_bandwidth: None,
584 power_budget: None,
585 thermal_budget: None,
586 },
587 }
588 }
589
590 pub fn with_accuracy_priority(mut self, priority: f64) -> Self {
592 self.accuracy_priority = priority.clamp(0.0, 1.0);
593 self
594 }
595
596 pub fn with_speed_priority(mut self, priority: f64) -> Self {
598 self.speed_priority = priority.clamp(0.0, 1.0);
599 self
600 }
601
602 pub fn with_memory_constraint(mut self, bytes: usize) -> Self {
604 self.memory_constraint = bytes;
605 self
606 }
607
608 pub fn with_real_time_requirement(mut self, required: bool) -> Self {
610 self.real_time_requirement = required;
611 self
612 }
613}
614
615impl Default for AdaptiveAlgorithmSelector {
616 fn default() -> Self {
617 Self::new()
618 }
619}
620
621impl AdaptiveAlgorithmSelector {
622 pub fn new() -> Self {
624 Self {
625 strategies: vec![
626 SelectionStrategy::PatternBased,
627 SelectionStrategy::HistoryBased,
628 SelectionStrategy::ResourceAware,
629 ],
630 performance_learning: false,
631 resource_awareness: false,
632 quality_optimization: false,
633 ensemble_methods: false,
634 performance_history: Arc::new(RwLock::new(PerformanceHistory {
635 records: HashMap::new(),
636 pattern_performance: HashMap::new(),
637 trends: HashMap::new(),
638 })),
639 pattern_analyzer: PatternAnalyzer {
640 pattern_models: HashMap::new(),
641 feature_extractors: Vec::new(),
642 pattern_cache: HashMap::new(),
643 },
644 resource_monitor: ResourceMonitor {
645 cpu_usage: 0.0,
646 memory_usage: 0,
647 gpu_status: GpuStatus {
648 available: false,
649 utilization: 0.0,
650 memory_usage: 0,
651 temperature: 0.0,
652 },
653 network_status: NetworkStatus {
654 bandwidth: 0.0,
655 latency: 0.0,
656 packet_loss: 0.0,
657 },
658 power_consumption: 0.0,
659 temperature: 0.0,
660 update_interval: Duration::from_secs(1),
661 last_update: Instant::now(),
662 },
663 quality_predictor: QualityPredictor {
664 quality_models: HashMap::new(),
665 cv_results: HashMap::new(),
666 quality_history: VecDeque::new(),
667 },
668 selection_cache: Arc::new(RwLock::new(SelectionCache {
669 cache: HashMap::new(),
670 hit_count: 0,
671 miss_count: 0,
672 max_size: 1000,
673 })),
674 }
675 }
676
677 pub fn with_performance_learning(mut self, enabled: bool) -> Self {
679 self.performance_learning = enabled;
680 if enabled {
681 self.strategies.push(SelectionStrategy::HistoryBased);
682 }
683 self
684 }
685
686 pub fn with_resource_awareness(mut self, enabled: bool) -> Self {
688 self.resource_awareness = enabled;
689 if enabled {
690 self.strategies.push(SelectionStrategy::ResourceAware);
691 }
692 self
693 }
694
695 pub fn with_quality_optimization(mut self, enabled: bool) -> Self {
697 self.quality_optimization = enabled;
698 if enabled {
699 self.strategies.push(SelectionStrategy::QualityOptimized);
700 }
701 self
702 }
703
704 pub fn with_ensemble_methods(mut self, enabled: bool) -> Self {
706 self.ensemble_methods = enabled;
707 if enabled {
708 self.strategies.push(SelectionStrategy::EnsembleBased);
709 }
710 self
711 }
712
713 pub async fn select_optimal_algorithm(
715 &mut self,
716 data: &ArrayView2<'_, f64>,
717 context: &SelectionContext,
718 ) -> SpatialResult<AlgorithmSelection> {
719 if let Some(cached) = self.check_cache(data, context).await? {
721 return Ok(cached.selection);
722 }
723
724 let data_characteristics = self.analyzedata_characteristics(data)?;
726
727 self.update_resource_monitor().await?;
729
730 let candidates = self
732 .generate_candidate_algorithms(&data_characteristics, context)
733 .await?;
734
735 let mut evaluations = Vec::new();
737 for candidate in candidates {
738 let evaluation = self
739 .evaluate_candidate(&candidate, &data_characteristics, context)
740 .await?;
741 evaluations.push(evaluation);
742 }
743
744 let best_selection = self.select_best_candidate(evaluations, context)?;
746
747 self.cache_selection(data, context, &best_selection).await?;
749
750 Ok(best_selection)
751 }
752
753 pub async fn execute_with_feedback(
755 &mut self,
756 selection: &AlgorithmSelection,
757 data: &ArrayView2<'_, f64>,
758 ) -> SpatialResult<ExecutionResult> {
759 let start_time = Instant::now();
760
761 let algorithm_result = self.execute_algorithm(selection, data).await?;
763
764 let execution_time = start_time.elapsed().as_secs_f64();
765
766 let actual_performance = ActualPerformance {
768 execution_time,
769 memory_usage: algorithm_result.memory_usage,
770 accuracy: algorithm_result.accuracy,
771 energy_consumed: 0.0, };
773
774 if self.performance_learning {
776 self.update_performance_history(selection, data, &actual_performance)
777 .await?;
778 }
779
780 if self.quality_optimization {
782 self.update_quality_predictor(selection, &actual_performance)
783 .await?;
784 }
785
786 Ok(ExecutionResult {
787 algorithm_result,
788 actual_performance: actual_performance.clone(),
789 selection_accuracy: self.calculate_selection_accuracy(selection, &actual_performance),
790 })
791 }
792
793 #[allow(dead_code)]
795 fn default_feature_extractors(&self) -> Vec<FeatureExtractor> {
796 vec![
797 FeatureExtractor {
798 name: "basic_stats".to_string(),
799 compute_features: |data| {
800 let (n_points, n_dims) = data.dim();
801 vec![n_points as f64, n_dims as f64]
802 },
803 },
804 FeatureExtractor {
805 name: "distribution_stats".to_string(),
806 compute_features: |data| {
807 let (_, n_dims) = data.dim();
808 let mut features = Vec::new();
809
810 for dim in 0..n_dims {
811 let column = data.column(dim);
812 let mean = column.to_owned().mean();
813 let std = (column.mapv(|x| (x - mean).powi(2)).mean()).sqrt();
814 features.push(mean);
815 features.push(std);
816 }
817
818 features
819 },
820 },
821 ]
822 }
823
824 async fn check_cache(
826 &self,
827 data: &ArrayView2<'_, f64>,
828 context: &SelectionContext,
829 ) -> SpatialResult<Option<CachedSelection>> {
830 let cache_key = self.compute_cache_key(data, context);
831 let cache = self.selection_cache.read().await;
832
833 if let Some(cached) = cache.cache.get(&cache_key) {
834 if cached.timestamp.elapsed() < Duration::from_secs(300) {
836 return Ok(Some(cached.clone()));
838 }
839 }
840
841 Ok(None)
842 }
843
844 fn compute_cache_key(
846 &self,
847 data: &ArrayView2<'_, f64>,
848 context: &SelectionContext,
849 ) -> CacheKey {
850 use std::collections::hash_map::DefaultHasher;
851 use std::hash::{Hash, Hasher};
852
853 let mut data_hasher = DefaultHasher::new();
854 let (n_points, n_dims) = data.dim();
855 n_points.hash(&mut data_hasher);
856 n_dims.hash(&mut data_hasher);
857
858 for (i, point) in data.outer_iter().enumerate() {
860 if i % (n_points / 10 + 1) == 0 {
861 for &coord in point.iter() {
863 (coord as i64).hash(&mut data_hasher);
864 }
865 }
866 }
867
868 let data_hash = data_hasher.finish();
869
870 let mut context_hasher = DefaultHasher::new();
871 context
872 .accuracy_priority
873 .to_bits()
874 .hash(&mut context_hasher);
875 context.speed_priority.to_bits().hash(&mut context_hasher);
876 context.memory_constraint.hash(&mut context_hasher);
877 context.real_time_requirement.hash(&mut context_hasher);
878
879 let context_hash = context_hasher.finish();
880
881 let time_bucket = Instant::now().elapsed().as_secs() / 300; CacheKey {
884 data_hash,
885 context_hash,
886 time_bucket,
887 }
888 }
889
890 fn analyzedata_characteristics(
892 &mut self,
893 data: &ArrayView2<'_, f64>,
894 ) -> SpatialResult<DataCharacteristics> {
895 let (n_points, n_dims) = data.dim();
896
897 let size_category = match n_points {
899 0..=99 => SizeCategory::Tiny,
900 100..=999 => SizeCategory::Small,
901 1000..=99_999 => SizeCategory::Medium,
902 100_000..=999_999 => SizeCategory::Large,
903 _ => SizeCategory::Huge,
904 };
905
906 let dimensionality_category = match n_dims {
908 1..=3 => DimensionalityCategory::Low,
909 4..=20 => DimensionalityCategory::Medium,
910 21..=100 => DimensionalityCategory::High,
911 _ => DimensionalityCategory::VeryHigh,
912 };
913
914 let density = self.estimatedata_density(data)?;
916 let density_category = if density < 0.3 {
917 DensityCategory::Sparse
918 } else if density < 0.7 {
919 DensityCategory::Medium
920 } else {
921 DensityCategory::Dense
922 };
923
924 let clustering_tendency = self.estimate_clustering_tendency(data)?;
926 let clustering_tendency_category = if clustering_tendency < 0.3 {
927 ClusteringTendencyCategory::HighlyStructured
928 } else if clustering_tendency < 0.7 {
929 ClusteringTendencyCategory::Structured
930 } else {
931 ClusteringTendencyCategory::Random
932 };
933
934 let noise_level = self.estimate_noise_level(data)?;
936 let noise_level_category = if noise_level < 0.3 {
937 NoiseLevel::Low
938 } else if noise_level < 0.7 {
939 NoiseLevel::Medium
940 } else {
941 NoiseLevel::High
942 };
943
944 let distribution_type = self.estimate_distribution_type(data)?;
946
947 Ok(DataCharacteristics {
948 size_category,
949 dimensionality_category,
950 density_category,
951 clustering_tendency: clustering_tendency_category,
952 noise_level: noise_level_category,
953 distribution_type,
954 })
955 }
956
957 fn estimatedata_density(&self, data: &ArrayView2<'_, f64>) -> SpatialResult<f64> {
959 let (n_points_, n_dims) = data.dim();
960
961 if n_points_ < 2 {
962 return Ok(0.0);
963 }
964
965 let sample_size = n_points_.min(100);
966 let mut total_inverse_distance = 0.0;
967 let mut count = 0;
968
969 for i in 0..sample_size {
970 let mut nearest_distance = f64::INFINITY;
971
972 for j in 0..n_points_ {
973 if i != j {
974 let dist: f64 = data
975 .row(i)
976 .iter()
977 .zip(data.row(j).iter())
978 .map(|(&a, &b)| (a - b).powi(2))
979 .sum::<f64>()
980 .sqrt();
981
982 if dist < nearest_distance {
983 nearest_distance = dist;
984 }
985 }
986 }
987
988 if nearest_distance > 0.0 && nearest_distance.is_finite() {
989 total_inverse_distance += 1.0 / nearest_distance;
990 count += 1;
991 }
992 }
993
994 Ok(if count > 0 {
995 (total_inverse_distance / count as f64).min(1.0)
996 } else {
997 0.0
998 })
999 }
1000
1001 fn estimate_clustering_tendency(&self, data: &ArrayView2<'_, f64>) -> SpatialResult<f64> {
1003 let (n_points, n_dims) = data.dim();
1004
1005 if n_points < 10 {
1006 return Ok(0.5);
1007 }
1008
1009 let sample_size = n_points.min(20);
1010 let mut real_distances = Vec::new();
1011 let mut random_distances = Vec::new();
1012
1013 for i in 0..sample_size {
1015 let mut min_dist = f64::INFINITY;
1016 for j in 0..n_points {
1017 if i != j {
1018 let dist: f64 = data
1019 .row(i)
1020 .iter()
1021 .zip(data.row(j).iter())
1022 .map(|(&a, &b)| (a - b).powi(2))
1023 .sum::<f64>()
1024 .sqrt();
1025 min_dist = min_dist.min(dist);
1026 }
1027 }
1028 real_distances.push(min_dist);
1029 }
1030
1031 let bounds = self.getdata_bounds(data);
1033 for _ in 0..sample_size {
1034 let random_point: Array1<f64> = Array1::from_shape_fn(n_dims, |i| {
1035 scirs2_core::random::random::<f64>() * (bounds[i].1 - bounds[i].0) + bounds[i].0
1036 });
1037
1038 let mut min_dist = f64::INFINITY;
1039 for j in 0..n_points {
1040 let dist: f64 = random_point
1041 .iter()
1042 .zip(data.row(j).iter())
1043 .map(|(&a, &b)| (a - b).powi(2))
1044 .sum::<f64>()
1045 .sqrt();
1046 min_dist = min_dist.min(dist);
1047 }
1048 random_distances.push(min_dist);
1049 }
1050
1051 let sum_random: f64 = random_distances.iter().sum();
1052 let sum_real: f64 = real_distances.iter().sum();
1053 let hopkins = sum_random / (sum_random + sum_real);
1054
1055 Ok(hopkins)
1056 }
1057
1058 fn estimate_noise_level(&self, data: &ArrayView2<'_, f64>) -> SpatialResult<f64> {
1060 let (n_points_, n_dims) = data.dim();
1061
1062 if n_points_ < 10 {
1063 return Ok(0.0);
1064 }
1065
1066 let sample_size = n_points_.min(50);
1068 let k = 5; let mut outlier_scores = Vec::new();
1071
1072 for i in 0..sample_size {
1073 let mut distances = Vec::new();
1074
1075 for j in 0..n_points_ {
1076 if i != j {
1077 let dist: f64 = data
1078 .row(i)
1079 .iter()
1080 .zip(data.row(j).iter())
1081 .map(|(&a, &b)| (a - b).powi(2))
1082 .sum::<f64>()
1083 .sqrt();
1084 distances.push(dist);
1085 }
1086 }
1087
1088 distances.sort_by(|a, b| a.partial_cmp(b).expect("Operation failed"));
1089
1090 if distances.len() >= k {
1091 let k_distance = distances[k - 1];
1092 let local_density = k as f64 / k_distance;
1093 outlier_scores.push(1.0 / local_density);
1094 }
1095 }
1096
1097 if outlier_scores.is_empty() {
1098 Ok(0.0)
1099 } else {
1100 let mean_score = outlier_scores.iter().sum::<f64>() / outlier_scores.len() as f64;
1101 let variance = outlier_scores
1102 .iter()
1103 .map(|&score| (score - mean_score).powi(2))
1104 .sum::<f64>()
1105 / outlier_scores.len() as f64;
1106
1107 Ok((variance.sqrt() / mean_score).min(1.0))
1108 }
1109 }
1110
1111 fn estimate_distribution_type(
1113 &self,
1114 data: &ArrayView2<'_, f64>,
1115 ) -> SpatialResult<DistributionType> {
1116 let (n_points, n_dims) = data.dim();
1117
1118 if n_points < 10 {
1119 return Ok(DistributionType::Unknown);
1120 }
1121
1122 let mut uniform_count = 0;
1124 let mut gaussian_count = 0;
1125
1126 for dim in 0..n_dims {
1127 let column = data.column(dim);
1128 let mean = column.to_owned().mean();
1129 let std = (column.mapv(|x| (x - mean).powi(2)).mean()).sqrt();
1130
1131 if std < 1e-6 {
1132 continue; }
1134
1135 let min_val = column.fold(f64::INFINITY, |a, &b| a.min(b));
1137 let max_val = column.fold(f64::NEG_INFINITY, |a, &b| a.max(b));
1138 let range = max_val - min_val;
1139
1140 let expected_std_uniform = range / (12.0_f64).sqrt();
1141 if (std - expected_std_uniform).abs() / expected_std_uniform < 0.2 {
1142 uniform_count += 1;
1143 }
1144
1145 let normalized: Vec<f64> = column.iter().map(|&x| (x - mean) / std).collect();
1147 let skewness =
1148 normalized.iter().map(|&x| x.powi(3)).sum::<f64>() / normalized.len() as f64;
1149 let kurtosis =
1150 normalized.iter().map(|&x| x.powi(4)).sum::<f64>() / normalized.len() as f64;
1151
1152 if skewness.abs() < 0.5 && (kurtosis - 3.0).abs() < 1.0 {
1153 gaussian_count += 1;
1154 }
1155 }
1156
1157 if uniform_count > n_dims / 2 {
1158 Ok(DistributionType::Uniform)
1159 } else if gaussian_count > n_dims / 2 {
1160 Ok(DistributionType::Gaussian)
1161 } else {
1162 Ok(DistributionType::Multimodal)
1164 }
1165 }
1166
1167 fn getdata_bounds(&self, data: &ArrayView2<'_, f64>) -> Vec<(f64, f64)> {
1169 let (_, n_dims) = data.dim();
1170 let mut bounds = Vec::new();
1171
1172 for dim in 0..n_dims {
1173 let column = data.column(dim);
1174 let min_val = column.fold(f64::INFINITY, |a, &b| a.min(b));
1175 let max_val = column.fold(f64::NEG_INFINITY, |a, &b| a.max(b));
1176 bounds.push((min_val, max_val));
1177 }
1178
1179 bounds
1180 }
1181
1182 async fn update_resource_monitor(&mut self) -> SpatialResult<()> {
1184 self.resource_monitor.cpu_usage = 0.5; self.resource_monitor.memory_usage = 4_000_000_000; self.resource_monitor.last_update = Instant::now();
1188 Ok(())
1189 }
1190
1191 async fn generate_candidate_algorithms(
1193 &self,
1194 characteristics: &DataCharacteristics,
1195 context: &SelectionContext,
1196 ) -> SpatialResult<Vec<SelectedAlgorithm>> {
1197 let mut candidates = Vec::new();
1198
1199 candidates.push(SelectedAlgorithm::KMeans);
1201 candidates.push(SelectedAlgorithm::DBScan);
1202 candidates.push(SelectedAlgorithm::KDTreeNN);
1203
1204 match characteristics.size_category {
1206 SizeCategory::Huge => {
1207 candidates.push(SelectedAlgorithm::DistributedProcessing);
1208 candidates.push(SelectedAlgorithm::TensorCoreAccelerated);
1209 }
1210 SizeCategory::Large => {
1211 candidates.push(SelectedAlgorithm::TensorCoreAccelerated);
1212 }
1213 _ => {}
1214 }
1215
1216 if context.accuracy_priority > 0.8 {
1218 candidates.push(SelectedAlgorithm::QuantumClustering);
1219 }
1220
1221 if context.energy_efficiency > 0.8 {
1222 candidates.push(SelectedAlgorithm::NeuromorphicClustering);
1223 }
1224
1225 if self.ensemble_methods {
1227 candidates.push(SelectedAlgorithm::Ensemble(vec![
1228 SelectedAlgorithm::KMeans,
1229 SelectedAlgorithm::DBScan,
1230 ]));
1231 }
1232
1233 Ok(candidates)
1234 }
1235
1236 async fn evaluate_candidate(
1238 &self,
1239 candidate: &SelectedAlgorithm,
1240 characteristics: &DataCharacteristics,
1241 context: &SelectionContext,
1242 ) -> SpatialResult<AlgorithmEvaluation> {
1243 let performance_prediction = self
1245 .predict_performance(candidate, characteristics, context)
1246 .await?;
1247
1248 let fitness_score = self.calculate_fitness_score(&performance_prediction, context);
1250
1251 let parameters = self.generate_parameters(candidate, characteristics, context)?;
1253
1254 Ok(AlgorithmEvaluation {
1255 algorithm: candidate.clone(),
1256 parameters,
1257 performance_prediction,
1258 fitness_score,
1259 confidence: 0.8, })
1261 }
1262
1263 async fn predict_performance(
1265 &self,
1266 algorithm: &SelectedAlgorithm,
1267 characteristics: &DataCharacteristics,
1268 context: &SelectionContext,
1269 ) -> SpatialResult<PerformancePrediction> {
1270 let (base_time, base_memory, base_accuracy) = match algorithm {
1272 SelectedAlgorithm::KMeans => (1.0, 1000000, 0.8),
1273 SelectedAlgorithm::DBScan => (2.0, 1500000, 0.85),
1274 SelectedAlgorithm::KDTreeNN => (0.5, 800000, 0.9),
1275 SelectedAlgorithm::QuantumClustering => (3.0, 2000000, 0.95),
1276 SelectedAlgorithm::NeuromorphicClustering => (1.5, 1200000, 0.88),
1277 SelectedAlgorithm::TensorCoreAccelerated => (0.3, 3000000, 0.9),
1278 SelectedAlgorithm::DistributedProcessing => (0.8, 5000000, 0.92),
1279 SelectedAlgorithm::HierarchicalClustering => (5.0, 2500000, 0.9),
1280 SelectedAlgorithm::BallTreeNN => (0.7, 1000000, 0.88),
1281 SelectedAlgorithm::Ensemble(_) => (2.5, 3000000, 0.95),
1282 };
1283
1284 let size_factor = match characteristics.size_category {
1286 SizeCategory::Tiny => 0.1,
1287 SizeCategory::Small => 0.5,
1288 SizeCategory::Medium => 1.0,
1289 SizeCategory::Large => 3.0,
1290 SizeCategory::Huge => 10.0,
1291 };
1292
1293 let dim_factor = match characteristics.dimensionality_category {
1294 DimensionalityCategory::Low => 0.8,
1295 DimensionalityCategory::Medium => 1.0,
1296 DimensionalityCategory::High => 1.5,
1297 DimensionalityCategory::VeryHigh => 2.5,
1298 };
1299
1300 let execution_time = base_time * size_factor * dim_factor;
1301 let memory_usage = (base_memory as f64 * size_factor * dim_factor) as usize;
1302 let accuracy_score = base_accuracy
1303 * (if characteristics.noise_level == NoiseLevel::High {
1304 0.9
1305 } else {
1306 1.0
1307 });
1308
1309 Ok(PerformancePrediction {
1310 execution_time,
1311 memory_usage,
1312 accuracy_score,
1313 energy_consumption: execution_time * 50.0, confidence_intervals: ConfidenceIntervals {
1315 execution_time_range: (execution_time * 0.8, execution_time * 1.2),
1316 memory_usage_range: (
1317 (memory_usage as f64 * 0.9) as usize,
1318 (memory_usage as f64 * 1.1) as usize,
1319 ),
1320 accuracy_range: (accuracy_score * 0.95, accuracy_score.min(1.0)),
1321 },
1322 })
1323 }
1324
1325 fn calculate_fitness_score(
1327 &self,
1328 prediction: &PerformancePrediction,
1329 context: &SelectionContext,
1330 ) -> f64 {
1331 let time_score = if context.real_time_requirement && prediction.execution_time > 1.0 {
1332 0.0
1333 } else {
1334 1.0 / (1.0 + prediction.execution_time)
1335 };
1336
1337 let memory_score = if prediction.memory_usage > context.memory_constraint {
1338 0.0
1339 } else {
1340 1.0 - (prediction.memory_usage as f64) / (context.memory_constraint as f64)
1341 };
1342
1343 let accuracy_score = prediction.accuracy_score;
1344
1345 let energy_score = 1.0 / (1.0 + prediction.energy_consumption / 100.0);
1346
1347 context.speed_priority * time_score
1349 + context.accuracy_priority * accuracy_score
1350 + 0.2 * memory_score
1351 + context.energy_efficiency * energy_score
1352 }
1353
1354 fn generate_parameters(
1356 &self,
1357 algorithm: &SelectedAlgorithm,
1358 characteristics: &DataCharacteristics,
1359 context: &SelectionContext,
1360 ) -> SpatialResult<AlgorithmParameters> {
1361 let mut core_params = HashMap::new();
1362 let optimization_params = HashMap::new();
1363 let mut resource_params = HashMap::new();
1364
1365 match algorithm {
1366 SelectedAlgorithm::KMeans => {
1367 let k = match characteristics.clustering_tendency {
1368 ClusteringTendencyCategory::HighlyStructured => 3,
1369 ClusteringTendencyCategory::Structured => 5,
1370 ClusteringTendencyCategory::Random => 2,
1371 };
1372 core_params.insert("n_clusters".to_string(), ParameterValue::Integer(k));
1373 core_params.insert("max_iter".to_string(), ParameterValue::Integer(300));
1374 core_params.insert("tol".to_string(), ParameterValue::Float(1e-4));
1375 }
1376 SelectedAlgorithm::DBScan => {
1377 let eps = match characteristics.density_category {
1378 DensityCategory::Dense => 0.3,
1379 DensityCategory::Medium => 0.5,
1380 DensityCategory::Sparse => 1.0,
1381 };
1382 core_params.insert("eps".to_string(), ParameterValue::Float(eps));
1383 core_params.insert("min_samples".to_string(), ParameterValue::Integer(5));
1384 }
1385 SelectedAlgorithm::KDTreeNN => {
1386 core_params.insert("leaf_size".to_string(), ParameterValue::Integer(30));
1387 }
1388 _ => {
1389 core_params.insert("tolerance".to_string(), ParameterValue::Float(1e-6));
1391 }
1392 }
1393
1394 resource_params.insert(
1396 "n_jobs".to_string(),
1397 ParameterValue::Integer(context.environmental_constraints.available_cores as i64),
1398 );
1399
1400 Ok(AlgorithmParameters {
1401 core_params,
1402 optimization_params,
1403 resource_params,
1404 })
1405 }
1406
1407 fn select_best_candidate(
1409 &self,
1410 evaluations: Vec<AlgorithmEvaluation>,
1411 _context: &SelectionContext,
1412 ) -> SpatialResult<AlgorithmSelection> {
1413 let best_evaluation = evaluations
1414 .into_iter()
1415 .max_by(|a, b| {
1416 a.fitness_score
1417 .partial_cmp(&b.fitness_score)
1418 .expect("Operation failed")
1419 })
1420 .ok_or_else(|| SpatialError::InvalidInput("No candidate algorithms".to_string()))?;
1421
1422 Ok(AlgorithmSelection {
1423 algorithm: best_evaluation.algorithm,
1424 parameters: best_evaluation.parameters,
1425 performance_prediction: best_evaluation.performance_prediction,
1426 confidence: best_evaluation.confidence,
1427 reasoning: SelectionReasoning {
1428 primary_factors: vec!["fitness_score".to_string()],
1429 decision_weights: HashMap::new(),
1430 alternatives_considered: 1,
1431 confidence_factors: vec!["historical_performance".to_string()],
1432 },
1433 alternatives: Vec::new(),
1434 })
1435 }
1436
1437 async fn cache_selection(
1439 &self,
1440 data: &ArrayView2<'_, f64>,
1441 context: &SelectionContext,
1442 selection: &AlgorithmSelection,
1443 ) -> SpatialResult<()> {
1444 let cache_key = self.compute_cache_key(data, context);
1445 let cached_selection = CachedSelection {
1446 selection: selection.clone(),
1447 timestamp: Instant::now(),
1448 use_count: 1,
1449 success_rate: 1.0,
1450 };
1451
1452 let mut cache = self.selection_cache.write().await;
1453 cache.cache.insert(cache_key, cached_selection);
1454
1455 if cache.cache.len() > cache.max_size {
1457 let oldest_key = cache
1459 .cache
1460 .iter()
1461 .min_by_key(|(_, v)| v.timestamp)
1462 .map(|(k, _)| k.clone());
1463
1464 if let Some(key) = oldest_key {
1465 cache.cache.remove(&key);
1466 }
1467 }
1468
1469 Ok(())
1470 }
1471
1472 async fn execute_algorithm(
1474 &self,
1475 _selection: &AlgorithmSelection,
1476 data: &ArrayView2<'_, f64>,
1477 ) -> SpatialResult<AlgorithmResult> {
1478 #[cfg(feature = "async")]
1480 tokio::time::sleep(Duration::from_millis(100)).await;
1481
1482 Ok(AlgorithmResult {
1483 resultdata: data.to_owned(),
1484 memory_usage: 1000000,
1485 accuracy: 0.85,
1486 execution_details: HashMap::new(),
1487 })
1488 }
1489
1490 async fn update_performance_history(
1492 &mut self,
1493 selection: &AlgorithmSelection,
1494 data: &ArrayView2<'_, f64>,
1495 actual_performance: &ActualPerformance,
1496 ) -> SpatialResult<()> {
1497 let data_characteristics = self.analyzedata_characteristics(data)?;
1498
1499 let record = PerformanceRecord {
1500 data_characteristics,
1501 execution_time: actual_performance.execution_time,
1502 memory_usage: actual_performance.memory_usage,
1503 accuracy: actual_performance.accuracy,
1504 energy_consumed: actual_performance.energy_consumed,
1505 timestamp: Instant::now(),
1506 context: SelectionContext::new(), };
1508
1509 let mut history = self.performance_history.write().await;
1510 history
1511 .records
1512 .entry(selection.algorithm.clone())
1513 .or_insert_with(VecDeque::new)
1514 .push_back(record);
1515
1516 if let Some(algorithm_history) = history.records.get_mut(&selection.algorithm) {
1518 if algorithm_history.len() > 1000 {
1519 algorithm_history.pop_front();
1520 }
1521 }
1522
1523 Ok(())
1524 }
1525
1526 async fn update_quality_predictor(
1528 &mut self,
1529 selection: &AlgorithmSelection,
1530 actual_performance: &ActualPerformance,
1531 ) -> SpatialResult<()> {
1532 let predicted_accuracy = selection.performance_prediction.accuracy_score;
1533 let actual_accuracy = actual_performance.accuracy;
1534 let prediction_error = (predicted_accuracy - actual_accuracy).abs();
1535
1536 let measurement = QualityMeasurement {
1537 algorithm: selection.algorithm.clone(),
1538 data_characteristics: DataCharacteristics {
1539 size_category: SizeCategory::Medium,
1540 dimensionality_category: DimensionalityCategory::Medium,
1541 density_category: DensityCategory::Medium,
1542 clustering_tendency: ClusteringTendencyCategory::Structured,
1543 noise_level: NoiseLevel::Medium,
1544 distribution_type: DistributionType::Gaussian,
1545 },
1546 predicted_quality: predicted_accuracy,
1547 actual_quality: actual_accuracy,
1548 prediction_error,
1549 timestamp: Instant::now(),
1550 };
1551
1552 self.quality_predictor
1553 .quality_history
1554 .push_back(measurement);
1555
1556 if self.quality_predictor.quality_history.len() > 10000 {
1558 self.quality_predictor.quality_history.pop_front();
1559 }
1560
1561 Ok(())
1562 }
1563
1564 fn calculate_selection_accuracy(
1566 &self,
1567 selection: &AlgorithmSelection,
1568 actual_performance: &ActualPerformance,
1569 ) -> f64 {
1570 let time_accuracy = 1.0
1571 - (selection.performance_prediction.execution_time - actual_performance.execution_time)
1572 .abs()
1573 / selection
1574 .performance_prediction
1575 .execution_time
1576 .max(actual_performance.execution_time);
1577
1578 let accuracy_accuracy = 1.0
1579 - (selection.performance_prediction.accuracy_score - actual_performance.accuracy).abs();
1580
1581 (time_accuracy + accuracy_accuracy) / 2.0
1582 }
1583}
1584
1585#[derive(Debug, Clone)]
1587pub struct AlgorithmEvaluation {
1588 pub algorithm: SelectedAlgorithm,
1589 pub parameters: AlgorithmParameters,
1590 pub performance_prediction: PerformancePrediction,
1591 pub fitness_score: f64,
1592 pub confidence: f64,
1593}
1594
1595#[derive(Debug)]
1597pub struct AlgorithmResult {
1598 pub resultdata: Array2<f64>,
1599 pub memory_usage: usize,
1600 pub accuracy: f64,
1601 pub execution_details: HashMap<String, String>,
1602}
1603
1604#[derive(Debug, Clone)]
1606pub struct ActualPerformance {
1607 pub execution_time: f64,
1608 pub memory_usage: usize,
1609 pub accuracy: f64,
1610 pub energy_consumed: f64,
1611}
1612
1613#[derive(Debug)]
1615pub struct ExecutionResult {
1616 pub algorithm_result: AlgorithmResult,
1617 pub actual_performance: ActualPerformance,
1618 pub selection_accuracy: f64,
1619}
1620
1621#[cfg(test)]
1622mod tests {
1623 use super::*;
1624 use scirs2_core::ndarray::array;
1625
1626 #[test]
1627 fn test_selection_context() {
1628 let context = SelectionContext::new()
1629 .with_accuracy_priority(0.9)
1630 .with_speed_priority(0.7)
1631 .with_real_time_requirement(true);
1632
1633 assert_eq!(context.accuracy_priority, 0.9);
1634 assert_eq!(context.speed_priority, 0.7);
1635 assert!(context.real_time_requirement);
1636 }
1637
1638 #[test]
1639 fn test_adaptive_selector_creation() {
1640 let selector = AdaptiveAlgorithmSelector::new()
1641 .with_performance_learning(true)
1642 .with_resource_awareness(true)
1643 .with_quality_optimization(true);
1644
1645 assert!(selector.performance_learning);
1646 assert!(selector.resource_awareness);
1647 assert!(selector.quality_optimization);
1648 }
1649
1650 #[test]
1651 fn testdata_characteristics() {
1652 let selector = AdaptiveAlgorithmSelector::new();
1653 let data = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
1654
1655 let mut selector_mut = selector;
1656 let characteristics = selector_mut.analyzedata_characteristics(&data.view());
1657 assert!(characteristics.is_ok());
1658
1659 let chars = characteristics.expect("Operation failed");
1660 assert_eq!(chars.size_category, SizeCategory::Tiny);
1661 assert_eq!(chars.dimensionality_category, DimensionalityCategory::Low);
1662 }
1663
1664 #[cfg(feature = "async")]
1665 #[tokio::test]
1666 async fn test_algorithm_selection() {
1667 let mut selector = AdaptiveAlgorithmSelector::new();
1668 let context = SelectionContext::new();
1669 let data = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
1670
1671 let result = selector
1672 .select_optimal_algorithm(&data.view(), &context)
1673 .await;
1674 assert!(result.is_ok());
1675
1676 let selection = result.expect("Operation failed");
1677 assert!(matches!(
1678 selection.algorithm,
1679 SelectedAlgorithm::KMeans | SelectedAlgorithm::DBScan | SelectedAlgorithm::KDTreeNN
1680 ));
1681 assert!(selection.confidence > 0.0);
1682 }
1683
1684 #[cfg(feature = "async")]
1685 #[tokio::test]
1686 async fn test_execution_with_feedback() {
1687 let mut selector = AdaptiveAlgorithmSelector::new().with_performance_learning(true);
1688
1689 let context = SelectionContext::new();
1690 let data = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
1691
1692 let selection = selector
1693 .select_optimal_algorithm(&data.view(), &context)
1694 .await
1695 .expect("Operation failed");
1696 let execution_result = selector
1697 .execute_with_feedback(&selection, &data.view())
1698 .await;
1699
1700 assert!(execution_result.is_ok());
1701 let result = execution_result.expect("Operation failed");
1702 assert!(result.selection_accuracy >= 0.0 && result.selection_accuracy <= 1.0);
1703 }
1704}