1use crate::error::{SpatialError, SpatialResult};
65use scirs2_core::ndarray::{Array1, Array2, ArrayView2};
66use statrs::statistics::Statistics;
67use std::collections::{HashMap, VecDeque};
68use std::sync::Arc;
69use std::time::{Duration, Instant};
70use tokio::sync::RwLock;
71
72#[derive(Debug)]
74pub struct AdaptiveAlgorithmSelector {
75 strategies: Vec<SelectionStrategy>,
77 performance_learning: bool,
79 resource_awareness: bool,
81 quality_optimization: bool,
83 ensemble_methods: bool,
85 performance_history: Arc<RwLock<PerformanceHistory>>,
87 #[allow(dead_code)]
89 pattern_analyzer: PatternAnalyzer,
90 resource_monitor: ResourceMonitor,
92 quality_predictor: QualityPredictor,
94 selection_cache: Arc<RwLock<SelectionCache>>,
96}
97
98#[derive(Debug, Clone)]
100pub enum SelectionStrategy {
101 PatternBased,
103 HistoryBased,
105 ResourceAware,
107 QualityOptimized,
109 EnsembleBased,
111 Hybrid(Vec<SelectionStrategy>),
113}
114
115#[derive(Debug, Clone)]
117pub struct SelectionContext {
118 pub accuracy_priority: f64,
120 pub speed_priority: f64,
122 pub memory_constraint: usize,
124 pub real_time_requirement: bool,
126 pub energy_efficiency: f64,
128 pub quality_tolerance: f64,
130 pub user_preferences: HashMap<String, f64>,
132 pub environmental_constraints: EnvironmentalConstraints,
134}
135
136#[derive(Debug, Clone)]
138pub struct EnvironmentalConstraints {
139 pub available_cores: usize,
141 pub available_memory: usize,
143 pub gpu_available: bool,
145 pub network_bandwidth: Option<f64>,
147 pub power_budget: Option<f64>,
149 pub thermal_budget: Option<f64>,
151}
152
153#[derive(Debug, Clone)]
155pub struct AlgorithmSelection {
156 pub algorithm: SelectedAlgorithm,
158 pub parameters: AlgorithmParameters,
160 pub performance_prediction: PerformancePrediction,
162 pub confidence: f64,
164 pub reasoning: SelectionReasoning,
166 pub alternatives: Vec<AlgorithmSelection>,
168}
169
170#[derive(Debug, Clone, PartialEq, Eq, Hash)]
172pub enum SelectedAlgorithm {
173 KMeans,
175 DBScan,
177 HierarchicalClustering,
179 KDTreeNN,
181 BallTreeNN,
183 QuantumClustering,
185 NeuromorphicClustering,
187 TensorCoreAccelerated,
189 DistributedProcessing,
191 Ensemble(Vec<SelectedAlgorithm>),
193}
194
195#[derive(Debug, Clone)]
197pub struct AlgorithmParameters {
198 pub core_params: HashMap<String, ParameterValue>,
200 pub optimization_params: HashMap<String, ParameterValue>,
202 pub resource_params: HashMap<String, ParameterValue>,
204}
205
206#[derive(Debug, Clone)]
208pub enum ParameterValue {
209 Integer(i64),
210 Float(f64),
211 Boolean(bool),
212 String(String),
213 Array(Vec<f64>),
214}
215
216#[derive(Debug, Clone)]
218pub struct PerformancePrediction {
219 pub execution_time: f64,
221 pub memory_usage: usize,
223 pub accuracy_score: f64,
225 pub energy_consumption: f64,
227 pub confidence_intervals: ConfidenceIntervals,
229}
230
231#[derive(Debug, Clone)]
233pub struct ConfidenceIntervals {
234 pub execution_time_range: (f64, f64),
236 pub memory_usage_range: (usize, usize),
238 pub accuracy_range: (f64, f64),
240}
241
242#[derive(Debug, Clone)]
244pub struct SelectionReasoning {
245 pub primary_factors: Vec<String>,
247 pub decision_weights: HashMap<String, f64>,
249 pub alternatives_considered: usize,
251 pub confidence_factors: Vec<String>,
253}
254
255#[derive(Debug)]
257pub struct PerformanceHistory {
258 records: HashMap<SelectedAlgorithm, VecDeque<PerformanceRecord>>,
260 #[allow(dead_code)]
262 pattern_performance: HashMap<DataPattern, Vec<(SelectedAlgorithm, f64)>>,
263 #[allow(dead_code)]
265 trends: HashMap<SelectedAlgorithm, PerformanceTrend>,
266}
267
268#[derive(Debug, Clone)]
270pub struct PerformanceRecord {
271 pub data_characteristics: DataCharacteristics,
273 pub execution_time: f64,
275 pub memory_usage: usize,
277 pub accuracy: f64,
279 pub energy_consumed: f64,
281 pub timestamp: Instant,
283 pub context: SelectionContext,
285}
286
287#[derive(Debug, Clone, Hash, PartialEq, Eq)]
289pub struct DataCharacteristics {
290 pub size_category: SizeCategory,
292 pub dimensionality_category: DimensionalityCategory,
294 pub density_category: DensityCategory,
296 pub clustering_tendency: ClusteringTendencyCategory,
298 pub noise_level: NoiseLevel,
300 pub distribution_type: DistributionType,
302}
303
304#[derive(Debug, Clone, Hash, PartialEq, Eq)]
306pub enum SizeCategory {
307 Tiny, Small, Medium, Large, Huge, }
313
314#[derive(Debug, Clone, Hash, PartialEq, Eq)]
316pub enum DimensionalityCategory {
317 Low, Medium, High, VeryHigh, }
322
323#[derive(Debug, Clone, Hash, PartialEq, Eq)]
325pub enum DensityCategory {
326 Sparse,
327 Medium,
328 Dense,
329}
330
331#[derive(Debug, Clone, Hash, PartialEq, Eq)]
333pub enum ClusteringTendencyCategory {
334 Random,
335 Structured,
336 HighlyStructured,
337}
338
339#[derive(Debug, Clone, Hash, PartialEq, Eq)]
341pub enum NoiseLevel {
342 Low,
343 Medium,
344 High,
345}
346
347#[derive(Debug, Clone, Hash, PartialEq, Eq)]
349pub enum DistributionType {
350 Uniform,
351 Gaussian,
352 Multimodal,
353 Skewed,
354 Unknown,
355}
356
357#[derive(Debug, Clone, Hash, PartialEq, Eq)]
359pub struct DataPattern {
360 pub characteristics: DataCharacteristics,
361 pub context_hash: u64, }
363
364#[derive(Debug, Clone)]
366pub struct PerformanceTrend {
367 pub trend_direction: TrendDirection,
369 pub trend_strength: f64,
371 pub recent_change: f64,
373 pub stability_score: f64,
375}
376
377#[derive(Debug, Clone)]
379pub enum TrendDirection {
380 Improving,
381 Stable,
382 Degrading,
383}
384
385#[derive(Debug)]
387pub struct PatternAnalyzer {
388 #[allow(dead_code)]
390 pattern_models: HashMap<String, PatternModel>,
391 #[allow(dead_code)]
393 feature_extractors: Vec<FeatureExtractor>,
394 #[allow(dead_code)]
396 pattern_cache: HashMap<u64, DataCharacteristics>,
397}
398
399#[derive(Debug)]
401pub struct PatternModel {
402 pub model_type: PatternModelType,
404 pub parameters: Vec<f64>,
406 pub accuracy: f64,
408 pub last_update: Instant,
410}
411
412#[derive(Debug)]
414pub enum PatternModelType {
415 StatisticalAnalysis,
416 MachineLearning,
417 HeuristicRules,
418}
419
420#[derive(Debug)]
422pub struct FeatureExtractor {
423 pub name: String,
425 pub compute_features: fn(&ArrayView2<'_, f64>) -> Vec<f64>,
427}
428
429#[derive(Debug)]
431pub struct ResourceMonitor {
432 cpu_usage: f64,
434 memory_usage: usize,
436 #[allow(dead_code)]
438 gpu_status: GpuStatus,
439 #[allow(dead_code)]
441 network_status: NetworkStatus,
442 #[allow(dead_code)]
444 power_consumption: f64,
445 #[allow(dead_code)]
447 temperature: f64,
448 #[allow(dead_code)]
450 update_interval: Duration,
451 last_update: Instant,
453}
454
455#[derive(Debug)]
457pub struct GpuStatus {
458 pub available: bool,
460 pub utilization: f64,
462 pub memory_usage: usize,
464 pub temperature: f64,
466}
467
468#[derive(Debug)]
470pub struct NetworkStatus {
471 pub bandwidth: f64,
473 pub latency: f64,
475 pub packet_loss: f64,
477}
478
479#[derive(Debug)]
481pub struct QualityPredictor {
482 #[allow(dead_code)]
484 quality_models: HashMap<SelectedAlgorithm, QualityModel>,
485 #[allow(dead_code)]
487 cv_results: HashMap<SelectedAlgorithm, Vec<f64>>,
488 quality_history: VecDeque<QualityMeasurement>,
490}
491
492#[derive(Debug)]
494pub struct QualityModel {
495 pub coefficients: Vec<f64>,
497 pub intercept: f64,
499 pub r_squared: f64,
501 pub training_size: usize,
503}
504
505#[derive(Debug, Clone)]
507pub struct QualityMeasurement {
508 pub algorithm: SelectedAlgorithm,
510 pub data_characteristics: DataCharacteristics,
512 pub predicted_quality: f64,
514 pub actual_quality: f64,
516 pub prediction_error: f64,
518 pub timestamp: Instant,
520}
521
522#[derive(Debug)]
524pub struct SelectionCache {
525 cache: HashMap<CacheKey, CachedSelection>,
527 #[allow(dead_code)]
529 hit_count: u64,
530 #[allow(dead_code)]
532 miss_count: u64,
533 max_size: usize,
535}
536
537#[derive(Debug, Clone, Hash, PartialEq, Eq)]
539pub struct CacheKey {
540 pub data_hash: u64,
542 pub context_hash: u64,
544 pub time_bucket: u64,
546}
547
548#[derive(Debug, Clone)]
550pub struct CachedSelection {
551 pub selection: AlgorithmSelection,
553 pub timestamp: Instant,
555 pub use_count: u64,
557 pub success_rate: f64,
559}
560
561impl Default for SelectionContext {
562 fn default() -> Self {
563 Self::new()
564 }
565}
566
567impl SelectionContext {
568 pub fn new() -> Self {
570 Self {
571 accuracy_priority: 0.7,
572 speed_priority: 0.7,
573 memory_constraint: usize::MAX,
574 real_time_requirement: false,
575 energy_efficiency: 0.5,
576 quality_tolerance: 0.1,
577 user_preferences: HashMap::new(),
578 environmental_constraints: EnvironmentalConstraints {
579 available_cores: num_cpus::get(),
580 available_memory: 8_000_000_000, gpu_available: false,
582 network_bandwidth: None,
583 power_budget: None,
584 thermal_budget: None,
585 },
586 }
587 }
588
589 pub fn with_accuracy_priority(mut self, priority: f64) -> Self {
591 self.accuracy_priority = priority.clamp(0.0, 1.0);
592 self
593 }
594
595 pub fn with_speed_priority(mut self, priority: f64) -> Self {
597 self.speed_priority = priority.clamp(0.0, 1.0);
598 self
599 }
600
601 pub fn with_memory_constraint(mut self, bytes: usize) -> Self {
603 self.memory_constraint = bytes;
604 self
605 }
606
607 pub fn with_real_time_requirement(mut self, required: bool) -> Self {
609 self.real_time_requirement = required;
610 self
611 }
612}
613
614impl Default for AdaptiveAlgorithmSelector {
615 fn default() -> Self {
616 Self::new()
617 }
618}
619
620impl AdaptiveAlgorithmSelector {
621 pub fn new() -> Self {
623 Self {
624 strategies: vec![
625 SelectionStrategy::PatternBased,
626 SelectionStrategy::HistoryBased,
627 SelectionStrategy::ResourceAware,
628 ],
629 performance_learning: false,
630 resource_awareness: false,
631 quality_optimization: false,
632 ensemble_methods: false,
633 performance_history: Arc::new(RwLock::new(PerformanceHistory {
634 records: HashMap::new(),
635 pattern_performance: HashMap::new(),
636 trends: HashMap::new(),
637 })),
638 pattern_analyzer: PatternAnalyzer {
639 pattern_models: HashMap::new(),
640 feature_extractors: Vec::new(),
641 pattern_cache: HashMap::new(),
642 },
643 resource_monitor: ResourceMonitor {
644 cpu_usage: 0.0,
645 memory_usage: 0,
646 gpu_status: GpuStatus {
647 available: false,
648 utilization: 0.0,
649 memory_usage: 0,
650 temperature: 0.0,
651 },
652 network_status: NetworkStatus {
653 bandwidth: 0.0,
654 latency: 0.0,
655 packet_loss: 0.0,
656 },
657 power_consumption: 0.0,
658 temperature: 0.0,
659 update_interval: Duration::from_secs(1),
660 last_update: Instant::now(),
661 },
662 quality_predictor: QualityPredictor {
663 quality_models: HashMap::new(),
664 cv_results: HashMap::new(),
665 quality_history: VecDeque::new(),
666 },
667 selection_cache: Arc::new(RwLock::new(SelectionCache {
668 cache: HashMap::new(),
669 hit_count: 0,
670 miss_count: 0,
671 max_size: 1000,
672 })),
673 }
674 }
675
676 pub fn with_performance_learning(mut self, enabled: bool) -> Self {
678 self.performance_learning = enabled;
679 if enabled {
680 self.strategies.push(SelectionStrategy::HistoryBased);
681 }
682 self
683 }
684
685 pub fn with_resource_awareness(mut self, enabled: bool) -> Self {
687 self.resource_awareness = enabled;
688 if enabled {
689 self.strategies.push(SelectionStrategy::ResourceAware);
690 }
691 self
692 }
693
694 pub fn with_quality_optimization(mut self, enabled: bool) -> Self {
696 self.quality_optimization = enabled;
697 if enabled {
698 self.strategies.push(SelectionStrategy::QualityOptimized);
699 }
700 self
701 }
702
703 pub fn with_ensemble_methods(mut self, enabled: bool) -> Self {
705 self.ensemble_methods = enabled;
706 if enabled {
707 self.strategies.push(SelectionStrategy::EnsembleBased);
708 }
709 self
710 }
711
712 pub async fn select_optimal_algorithm(
714 &mut self,
715 data: &ArrayView2<'_, f64>,
716 context: &SelectionContext,
717 ) -> SpatialResult<AlgorithmSelection> {
718 if let Some(cached) = self.check_cache(data, context).await? {
720 return Ok(cached.selection);
721 }
722
723 let data_characteristics = self.analyzedata_characteristics(data)?;
725
726 self.update_resource_monitor().await?;
728
729 let candidates = self
731 .generate_candidate_algorithms(&data_characteristics, context)
732 .await?;
733
734 let mut evaluations = Vec::new();
736 for candidate in candidates {
737 let evaluation = self
738 .evaluate_candidate(&candidate, &data_characteristics, context)
739 .await?;
740 evaluations.push(evaluation);
741 }
742
743 let best_selection = self.select_best_candidate(evaluations, context)?;
745
746 self.cache_selection(data, context, &best_selection).await?;
748
749 Ok(best_selection)
750 }
751
752 pub async fn execute_with_feedback(
754 &mut self,
755 selection: &AlgorithmSelection,
756 data: &ArrayView2<'_, f64>,
757 ) -> SpatialResult<ExecutionResult> {
758 let start_time = Instant::now();
759
760 let algorithm_result = self.execute_algorithm(selection, data).await?;
762
763 let execution_time = start_time.elapsed().as_secs_f64();
764
765 let actual_performance = ActualPerformance {
767 execution_time,
768 memory_usage: algorithm_result.memory_usage,
769 accuracy: algorithm_result.accuracy,
770 energy_consumed: 0.0, };
772
773 if self.performance_learning {
775 self.update_performance_history(selection, data, &actual_performance)
776 .await?;
777 }
778
779 if self.quality_optimization {
781 self.update_quality_predictor(selection, &actual_performance)
782 .await?;
783 }
784
785 Ok(ExecutionResult {
786 algorithm_result,
787 actual_performance: actual_performance.clone(),
788 selection_accuracy: self.calculate_selection_accuracy(selection, &actual_performance),
789 })
790 }
791
792 #[allow(dead_code)]
794 fn default_feature_extractors(&self) -> Vec<FeatureExtractor> {
795 vec![
796 FeatureExtractor {
797 name: "basic_stats".to_string(),
798 compute_features: |data| {
799 let (n_points, n_dims) = data.dim();
800 vec![n_points as f64, n_dims as f64]
801 },
802 },
803 FeatureExtractor {
804 name: "distribution_stats".to_string(),
805 compute_features: |data| {
806 let (_, n_dims) = data.dim();
807 let mut features = Vec::new();
808
809 for dim in 0..n_dims {
810 let column = data.column(dim);
811 let mean = column.to_owned().mean();
812 let std = (column.mapv(|x| (x - mean).powi(2)).mean()).sqrt();
813 features.push(mean);
814 features.push(std);
815 }
816
817 features
818 },
819 },
820 ]
821 }
822
823 async fn check_cache(
825 &self,
826 data: &ArrayView2<'_, f64>,
827 context: &SelectionContext,
828 ) -> SpatialResult<Option<CachedSelection>> {
829 let cache_key = self.compute_cache_key(data, context);
830 let cache = self.selection_cache.read().await;
831
832 if let Some(cached) = cache.cache.get(&cache_key) {
833 if cached.timestamp.elapsed() < Duration::from_secs(300) {
835 return Ok(Some(cached.clone()));
837 }
838 }
839
840 Ok(None)
841 }
842
843 fn compute_cache_key(
845 &self,
846 data: &ArrayView2<'_, f64>,
847 context: &SelectionContext,
848 ) -> CacheKey {
849 use std::collections::hash_map::DefaultHasher;
850 use std::hash::{Hash, Hasher};
851
852 let mut data_hasher = DefaultHasher::new();
853 let (n_points, n_dims) = data.dim();
854 n_points.hash(&mut data_hasher);
855 n_dims.hash(&mut data_hasher);
856
857 for (i, point) in data.outer_iter().enumerate() {
859 if i % (n_points / 10 + 1) == 0 {
860 for &coord in point.iter() {
862 (coord as i64).hash(&mut data_hasher);
863 }
864 }
865 }
866
867 let data_hash = data_hasher.finish();
868
869 let mut context_hasher = DefaultHasher::new();
870 context
871 .accuracy_priority
872 .to_bits()
873 .hash(&mut context_hasher);
874 context.speed_priority.to_bits().hash(&mut context_hasher);
875 context.memory_constraint.hash(&mut context_hasher);
876 context.real_time_requirement.hash(&mut context_hasher);
877
878 let context_hash = context_hasher.finish();
879
880 let time_bucket = Instant::now().elapsed().as_secs() / 300; CacheKey {
883 data_hash,
884 context_hash,
885 time_bucket,
886 }
887 }
888
889 fn analyzedata_characteristics(
891 &mut self,
892 data: &ArrayView2<'_, f64>,
893 ) -> SpatialResult<DataCharacteristics> {
894 let (n_points, n_dims) = data.dim();
895
896 let size_category = match n_points {
898 0..=99 => SizeCategory::Tiny,
899 100..=999 => SizeCategory::Small,
900 1000..=99_999 => SizeCategory::Medium,
901 100_000..=999_999 => SizeCategory::Large,
902 _ => SizeCategory::Huge,
903 };
904
905 let dimensionality_category = match n_dims {
907 1..=3 => DimensionalityCategory::Low,
908 4..=20 => DimensionalityCategory::Medium,
909 21..=100 => DimensionalityCategory::High,
910 _ => DimensionalityCategory::VeryHigh,
911 };
912
913 let density = self.estimatedata_density(data)?;
915 let density_category = if density < 0.3 {
916 DensityCategory::Sparse
917 } else if density < 0.7 {
918 DensityCategory::Medium
919 } else {
920 DensityCategory::Dense
921 };
922
923 let clustering_tendency = self.estimate_clustering_tendency(data)?;
925 let clustering_tendency_category = if clustering_tendency < 0.3 {
926 ClusteringTendencyCategory::HighlyStructured
927 } else if clustering_tendency < 0.7 {
928 ClusteringTendencyCategory::Structured
929 } else {
930 ClusteringTendencyCategory::Random
931 };
932
933 let noise_level = self.estimate_noise_level(data)?;
935 let noise_level_category = if noise_level < 0.3 {
936 NoiseLevel::Low
937 } else if noise_level < 0.7 {
938 NoiseLevel::Medium
939 } else {
940 NoiseLevel::High
941 };
942
943 let distribution_type = self.estimate_distribution_type(data)?;
945
946 Ok(DataCharacteristics {
947 size_category,
948 dimensionality_category,
949 density_category,
950 clustering_tendency: clustering_tendency_category,
951 noise_level: noise_level_category,
952 distribution_type,
953 })
954 }
955
956 fn estimatedata_density(&self, data: &ArrayView2<'_, f64>) -> SpatialResult<f64> {
958 let (n_points_, n_dims) = data.dim();
959
960 if n_points_ < 2 {
961 return Ok(0.0);
962 }
963
964 let sample_size = n_points_.min(100);
965 let mut total_inverse_distance = 0.0;
966 let mut count = 0;
967
968 for i in 0..sample_size {
969 let mut nearest_distance = f64::INFINITY;
970
971 for j in 0..n_points_ {
972 if i != j {
973 let dist: f64 = data
974 .row(i)
975 .iter()
976 .zip(data.row(j).iter())
977 .map(|(&a, &b)| (a - b).powi(2))
978 .sum::<f64>()
979 .sqrt();
980
981 if dist < nearest_distance {
982 nearest_distance = dist;
983 }
984 }
985 }
986
987 if nearest_distance > 0.0 && nearest_distance.is_finite() {
988 total_inverse_distance += 1.0 / nearest_distance;
989 count += 1;
990 }
991 }
992
993 Ok(if count > 0 {
994 (total_inverse_distance / count as f64).min(1.0)
995 } else {
996 0.0
997 })
998 }
999
1000 fn estimate_clustering_tendency(&self, data: &ArrayView2<'_, f64>) -> SpatialResult<f64> {
1002 let (n_points, n_dims) = data.dim();
1003
1004 if n_points < 10 {
1005 return Ok(0.5);
1006 }
1007
1008 let sample_size = n_points.min(20);
1009 let mut real_distances = Vec::new();
1010 let mut random_distances = Vec::new();
1011
1012 for i in 0..sample_size {
1014 let mut min_dist = f64::INFINITY;
1015 for j in 0..n_points {
1016 if i != j {
1017 let dist: f64 = data
1018 .row(i)
1019 .iter()
1020 .zip(data.row(j).iter())
1021 .map(|(&a, &b)| (a - b).powi(2))
1022 .sum::<f64>()
1023 .sqrt();
1024 min_dist = min_dist.min(dist);
1025 }
1026 }
1027 real_distances.push(min_dist);
1028 }
1029
1030 let bounds = self.getdata_bounds(data);
1032 for _ in 0..sample_size {
1033 let random_point: Array1<f64> = Array1::from_shape_fn(n_dims, |i| {
1034 scirs2_core::random::random::<f64>() * (bounds[i].1 - bounds[i].0) + bounds[i].0
1035 });
1036
1037 let mut min_dist = f64::INFINITY;
1038 for j in 0..n_points {
1039 let dist: f64 = random_point
1040 .iter()
1041 .zip(data.row(j).iter())
1042 .map(|(&a, &b)| (a - b).powi(2))
1043 .sum::<f64>()
1044 .sqrt();
1045 min_dist = min_dist.min(dist);
1046 }
1047 random_distances.push(min_dist);
1048 }
1049
1050 let sum_random: f64 = random_distances.iter().sum();
1051 let sum_real: f64 = real_distances.iter().sum();
1052 let hopkins = sum_random / (sum_random + sum_real);
1053
1054 Ok(hopkins)
1055 }
1056
1057 fn estimate_noise_level(&self, data: &ArrayView2<'_, f64>) -> SpatialResult<f64> {
1059 let (n_points_, n_dims) = data.dim();
1060
1061 if n_points_ < 10 {
1062 return Ok(0.0);
1063 }
1064
1065 let sample_size = n_points_.min(50);
1067 let k = 5; let mut outlier_scores = Vec::new();
1070
1071 for i in 0..sample_size {
1072 let mut distances = Vec::new();
1073
1074 for j in 0..n_points_ {
1075 if i != j {
1076 let dist: f64 = data
1077 .row(i)
1078 .iter()
1079 .zip(data.row(j).iter())
1080 .map(|(&a, &b)| (a - b).powi(2))
1081 .sum::<f64>()
1082 .sqrt();
1083 distances.push(dist);
1084 }
1085 }
1086
1087 distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
1088
1089 if distances.len() >= k {
1090 let k_distance = distances[k - 1];
1091 let local_density = k as f64 / k_distance;
1092 outlier_scores.push(1.0 / local_density);
1093 }
1094 }
1095
1096 if outlier_scores.is_empty() {
1097 Ok(0.0)
1098 } else {
1099 let mean_score = outlier_scores.iter().sum::<f64>() / outlier_scores.len() as f64;
1100 let variance = outlier_scores
1101 .iter()
1102 .map(|&score| (score - mean_score).powi(2))
1103 .sum::<f64>()
1104 / outlier_scores.len() as f64;
1105
1106 Ok((variance.sqrt() / mean_score).min(1.0))
1107 }
1108 }
1109
1110 fn estimate_distribution_type(
1112 &self,
1113 data: &ArrayView2<'_, f64>,
1114 ) -> SpatialResult<DistributionType> {
1115 let (n_points, n_dims) = data.dim();
1116
1117 if n_points < 10 {
1118 return Ok(DistributionType::Unknown);
1119 }
1120
1121 let mut uniform_count = 0;
1123 let mut gaussian_count = 0;
1124
1125 for dim in 0..n_dims {
1126 let column = data.column(dim);
1127 let mean = column.to_owned().mean();
1128 let std = (column.mapv(|x| (x - mean).powi(2)).mean()).sqrt();
1129
1130 if std < 1e-6 {
1131 continue; }
1133
1134 let min_val = column.fold(f64::INFINITY, |a, &b| a.min(b));
1136 let max_val = column.fold(f64::NEG_INFINITY, |a, &b| a.max(b));
1137 let range = max_val - min_val;
1138
1139 let expected_std_uniform = range / (12.0_f64).sqrt();
1140 if (std - expected_std_uniform).abs() / expected_std_uniform < 0.2 {
1141 uniform_count += 1;
1142 }
1143
1144 let normalized: Vec<f64> = column.iter().map(|&x| (x - mean) / std).collect();
1146 let skewness =
1147 normalized.iter().map(|&x| x.powi(3)).sum::<f64>() / normalized.len() as f64;
1148 let kurtosis =
1149 normalized.iter().map(|&x| x.powi(4)).sum::<f64>() / normalized.len() as f64;
1150
1151 if skewness.abs() < 0.5 && (kurtosis - 3.0).abs() < 1.0 {
1152 gaussian_count += 1;
1153 }
1154 }
1155
1156 if uniform_count > n_dims / 2 {
1157 Ok(DistributionType::Uniform)
1158 } else if gaussian_count > n_dims / 2 {
1159 Ok(DistributionType::Gaussian)
1160 } else {
1161 Ok(DistributionType::Multimodal)
1163 }
1164 }
1165
1166 fn getdata_bounds(&self, data: &ArrayView2<'_, f64>) -> Vec<(f64, f64)> {
1168 let (_, n_dims) = data.dim();
1169 let mut bounds = Vec::new();
1170
1171 for dim in 0..n_dims {
1172 let column = data.column(dim);
1173 let min_val = column.fold(f64::INFINITY, |a, &b| a.min(b));
1174 let max_val = column.fold(f64::NEG_INFINITY, |a, &b| a.max(b));
1175 bounds.push((min_val, max_val));
1176 }
1177
1178 bounds
1179 }
1180
1181 async fn update_resource_monitor(&mut self) -> SpatialResult<()> {
1183 self.resource_monitor.cpu_usage = 0.5; self.resource_monitor.memory_usage = 4_000_000_000; self.resource_monitor.last_update = Instant::now();
1187 Ok(())
1188 }
1189
1190 async fn generate_candidate_algorithms(
1192 &self,
1193 characteristics: &DataCharacteristics,
1194 context: &SelectionContext,
1195 ) -> SpatialResult<Vec<SelectedAlgorithm>> {
1196 let mut candidates = Vec::new();
1197
1198 candidates.push(SelectedAlgorithm::KMeans);
1200 candidates.push(SelectedAlgorithm::DBScan);
1201 candidates.push(SelectedAlgorithm::KDTreeNN);
1202
1203 match characteristics.size_category {
1205 SizeCategory::Huge => {
1206 candidates.push(SelectedAlgorithm::DistributedProcessing);
1207 candidates.push(SelectedAlgorithm::TensorCoreAccelerated);
1208 }
1209 SizeCategory::Large => {
1210 candidates.push(SelectedAlgorithm::TensorCoreAccelerated);
1211 }
1212 _ => {}
1213 }
1214
1215 if context.accuracy_priority > 0.8 {
1217 candidates.push(SelectedAlgorithm::QuantumClustering);
1218 }
1219
1220 if context.energy_efficiency > 0.8 {
1221 candidates.push(SelectedAlgorithm::NeuromorphicClustering);
1222 }
1223
1224 if self.ensemble_methods {
1226 candidates.push(SelectedAlgorithm::Ensemble(vec![
1227 SelectedAlgorithm::KMeans,
1228 SelectedAlgorithm::DBScan,
1229 ]));
1230 }
1231
1232 Ok(candidates)
1233 }
1234
1235 async fn evaluate_candidate(
1237 &self,
1238 candidate: &SelectedAlgorithm,
1239 characteristics: &DataCharacteristics,
1240 context: &SelectionContext,
1241 ) -> SpatialResult<AlgorithmEvaluation> {
1242 let performance_prediction = self
1244 .predict_performance(candidate, characteristics, context)
1245 .await?;
1246
1247 let fitness_score = self.calculate_fitness_score(&performance_prediction, context);
1249
1250 let parameters = self.generate_parameters(candidate, characteristics, context)?;
1252
1253 Ok(AlgorithmEvaluation {
1254 algorithm: candidate.clone(),
1255 parameters,
1256 performance_prediction,
1257 fitness_score,
1258 confidence: 0.8, })
1260 }
1261
1262 async fn predict_performance(
1264 &self,
1265 algorithm: &SelectedAlgorithm,
1266 characteristics: &DataCharacteristics,
1267 context: &SelectionContext,
1268 ) -> SpatialResult<PerformancePrediction> {
1269 let (base_time, base_memory, base_accuracy) = match algorithm {
1271 SelectedAlgorithm::KMeans => (1.0, 1000000, 0.8),
1272 SelectedAlgorithm::DBScan => (2.0, 1500000, 0.85),
1273 SelectedAlgorithm::KDTreeNN => (0.5, 800000, 0.9),
1274 SelectedAlgorithm::QuantumClustering => (3.0, 2000000, 0.95),
1275 SelectedAlgorithm::NeuromorphicClustering => (1.5, 1200000, 0.88),
1276 SelectedAlgorithm::TensorCoreAccelerated => (0.3, 3000000, 0.9),
1277 SelectedAlgorithm::DistributedProcessing => (0.8, 5000000, 0.92),
1278 SelectedAlgorithm::HierarchicalClustering => (5.0, 2500000, 0.9),
1279 SelectedAlgorithm::BallTreeNN => (0.7, 1000000, 0.88),
1280 SelectedAlgorithm::Ensemble(_) => (2.5, 3000000, 0.95),
1281 };
1282
1283 let size_factor = match characteristics.size_category {
1285 SizeCategory::Tiny => 0.1,
1286 SizeCategory::Small => 0.5,
1287 SizeCategory::Medium => 1.0,
1288 SizeCategory::Large => 3.0,
1289 SizeCategory::Huge => 10.0,
1290 };
1291
1292 let dim_factor = match characteristics.dimensionality_category {
1293 DimensionalityCategory::Low => 0.8,
1294 DimensionalityCategory::Medium => 1.0,
1295 DimensionalityCategory::High => 1.5,
1296 DimensionalityCategory::VeryHigh => 2.5,
1297 };
1298
1299 let execution_time = base_time * size_factor * dim_factor;
1300 let memory_usage = (base_memory as f64 * size_factor * dim_factor) as usize;
1301 let accuracy_score = base_accuracy
1302 * (if characteristics.noise_level == NoiseLevel::High {
1303 0.9
1304 } else {
1305 1.0
1306 });
1307
1308 Ok(PerformancePrediction {
1309 execution_time,
1310 memory_usage,
1311 accuracy_score,
1312 energy_consumption: execution_time * 50.0, confidence_intervals: ConfidenceIntervals {
1314 execution_time_range: (execution_time * 0.8, execution_time * 1.2),
1315 memory_usage_range: (
1316 (memory_usage as f64 * 0.9) as usize,
1317 (memory_usage as f64 * 1.1) as usize,
1318 ),
1319 accuracy_range: (accuracy_score * 0.95, accuracy_score.min(1.0)),
1320 },
1321 })
1322 }
1323
1324 fn calculate_fitness_score(
1326 &self,
1327 prediction: &PerformancePrediction,
1328 context: &SelectionContext,
1329 ) -> f64 {
1330 let time_score = if context.real_time_requirement && prediction.execution_time > 1.0 {
1331 0.0
1332 } else {
1333 1.0 / (1.0 + prediction.execution_time)
1334 };
1335
1336 let memory_score = if prediction.memory_usage > context.memory_constraint {
1337 0.0
1338 } else {
1339 1.0 - (prediction.memory_usage as f64) / (context.memory_constraint as f64)
1340 };
1341
1342 let accuracy_score = prediction.accuracy_score;
1343
1344 let energy_score = 1.0 / (1.0 + prediction.energy_consumption / 100.0);
1345
1346 context.speed_priority * time_score
1348 + context.accuracy_priority * accuracy_score
1349 + 0.2 * memory_score
1350 + context.energy_efficiency * energy_score
1351 }
1352
1353 fn generate_parameters(
1355 &self,
1356 algorithm: &SelectedAlgorithm,
1357 characteristics: &DataCharacteristics,
1358 context: &SelectionContext,
1359 ) -> SpatialResult<AlgorithmParameters> {
1360 let mut core_params = HashMap::new();
1361 let optimization_params = HashMap::new();
1362 let mut resource_params = HashMap::new();
1363
1364 match algorithm {
1365 SelectedAlgorithm::KMeans => {
1366 let k = match characteristics.clustering_tendency {
1367 ClusteringTendencyCategory::HighlyStructured => 3,
1368 ClusteringTendencyCategory::Structured => 5,
1369 ClusteringTendencyCategory::Random => 2,
1370 };
1371 core_params.insert("n_clusters".to_string(), ParameterValue::Integer(k));
1372 core_params.insert("max_iter".to_string(), ParameterValue::Integer(300));
1373 core_params.insert("tol".to_string(), ParameterValue::Float(1e-4));
1374 }
1375 SelectedAlgorithm::DBScan => {
1376 let eps = match characteristics.density_category {
1377 DensityCategory::Dense => 0.3,
1378 DensityCategory::Medium => 0.5,
1379 DensityCategory::Sparse => 1.0,
1380 };
1381 core_params.insert("eps".to_string(), ParameterValue::Float(eps));
1382 core_params.insert("min_samples".to_string(), ParameterValue::Integer(5));
1383 }
1384 SelectedAlgorithm::KDTreeNN => {
1385 core_params.insert("leaf_size".to_string(), ParameterValue::Integer(30));
1386 }
1387 _ => {
1388 core_params.insert("tolerance".to_string(), ParameterValue::Float(1e-6));
1390 }
1391 }
1392
1393 resource_params.insert(
1395 "n_jobs".to_string(),
1396 ParameterValue::Integer(context.environmental_constraints.available_cores as i64),
1397 );
1398
1399 Ok(AlgorithmParameters {
1400 core_params,
1401 optimization_params,
1402 resource_params,
1403 })
1404 }
1405
1406 fn select_best_candidate(
1408 &self,
1409 evaluations: Vec<AlgorithmEvaluation>,
1410 _context: &SelectionContext,
1411 ) -> SpatialResult<AlgorithmSelection> {
1412 let best_evaluation = evaluations
1413 .into_iter()
1414 .max_by(|a, b| a.fitness_score.partial_cmp(&b.fitness_score).unwrap())
1415 .ok_or_else(|| SpatialError::InvalidInput("No candidate algorithms".to_string()))?;
1416
1417 Ok(AlgorithmSelection {
1418 algorithm: best_evaluation.algorithm,
1419 parameters: best_evaluation.parameters,
1420 performance_prediction: best_evaluation.performance_prediction,
1421 confidence: best_evaluation.confidence,
1422 reasoning: SelectionReasoning {
1423 primary_factors: vec!["fitness_score".to_string()],
1424 decision_weights: HashMap::new(),
1425 alternatives_considered: 1,
1426 confidence_factors: vec!["historical_performance".to_string()],
1427 },
1428 alternatives: Vec::new(),
1429 })
1430 }
1431
1432 async fn cache_selection(
1434 &self,
1435 data: &ArrayView2<'_, f64>,
1436 context: &SelectionContext,
1437 selection: &AlgorithmSelection,
1438 ) -> SpatialResult<()> {
1439 let cache_key = self.compute_cache_key(data, context);
1440 let cached_selection = CachedSelection {
1441 selection: selection.clone(),
1442 timestamp: Instant::now(),
1443 use_count: 1,
1444 success_rate: 1.0,
1445 };
1446
1447 let mut cache = self.selection_cache.write().await;
1448 cache.cache.insert(cache_key, cached_selection);
1449
1450 if cache.cache.len() > cache.max_size {
1452 let oldest_key = cache
1454 .cache
1455 .iter()
1456 .min_by_key(|(_, v)| v.timestamp)
1457 .map(|(k, _)| k.clone());
1458
1459 if let Some(key) = oldest_key {
1460 cache.cache.remove(&key);
1461 }
1462 }
1463
1464 Ok(())
1465 }
1466
1467 async fn execute_algorithm(
1469 &self,
1470 _selection: &AlgorithmSelection,
1471 data: &ArrayView2<'_, f64>,
1472 ) -> SpatialResult<AlgorithmResult> {
1473 tokio::time::sleep(Duration::from_millis(100)).await;
1475
1476 Ok(AlgorithmResult {
1477 resultdata: data.to_owned(),
1478 memory_usage: 1000000,
1479 accuracy: 0.85,
1480 execution_details: HashMap::new(),
1481 })
1482 }
1483
1484 async fn update_performance_history(
1486 &mut self,
1487 selection: &AlgorithmSelection,
1488 data: &ArrayView2<'_, f64>,
1489 actual_performance: &ActualPerformance,
1490 ) -> SpatialResult<()> {
1491 let data_characteristics = self.analyzedata_characteristics(data)?;
1492
1493 let record = PerformanceRecord {
1494 data_characteristics,
1495 execution_time: actual_performance.execution_time,
1496 memory_usage: actual_performance.memory_usage,
1497 accuracy: actual_performance.accuracy,
1498 energy_consumed: actual_performance.energy_consumed,
1499 timestamp: Instant::now(),
1500 context: SelectionContext::new(), };
1502
1503 let mut history = self.performance_history.write().await;
1504 history
1505 .records
1506 .entry(selection.algorithm.clone())
1507 .or_insert_with(VecDeque::new)
1508 .push_back(record);
1509
1510 if let Some(algorithm_history) = history.records.get_mut(&selection.algorithm) {
1512 if algorithm_history.len() > 1000 {
1513 algorithm_history.pop_front();
1514 }
1515 }
1516
1517 Ok(())
1518 }
1519
1520 async fn update_quality_predictor(
1522 &mut self,
1523 selection: &AlgorithmSelection,
1524 actual_performance: &ActualPerformance,
1525 ) -> SpatialResult<()> {
1526 let predicted_accuracy = selection.performance_prediction.accuracy_score;
1527 let actual_accuracy = actual_performance.accuracy;
1528 let prediction_error = (predicted_accuracy - actual_accuracy).abs();
1529
1530 let measurement = QualityMeasurement {
1531 algorithm: selection.algorithm.clone(),
1532 data_characteristics: DataCharacteristics {
1533 size_category: SizeCategory::Medium,
1534 dimensionality_category: DimensionalityCategory::Medium,
1535 density_category: DensityCategory::Medium,
1536 clustering_tendency: ClusteringTendencyCategory::Structured,
1537 noise_level: NoiseLevel::Medium,
1538 distribution_type: DistributionType::Gaussian,
1539 },
1540 predicted_quality: predicted_accuracy,
1541 actual_quality: actual_accuracy,
1542 prediction_error,
1543 timestamp: Instant::now(),
1544 };
1545
1546 self.quality_predictor
1547 .quality_history
1548 .push_back(measurement);
1549
1550 if self.quality_predictor.quality_history.len() > 10000 {
1552 self.quality_predictor.quality_history.pop_front();
1553 }
1554
1555 Ok(())
1556 }
1557
1558 fn calculate_selection_accuracy(
1560 &self,
1561 selection: &AlgorithmSelection,
1562 actual_performance: &ActualPerformance,
1563 ) -> f64 {
1564 let time_accuracy = 1.0
1565 - (selection.performance_prediction.execution_time - actual_performance.execution_time)
1566 .abs()
1567 / selection
1568 .performance_prediction
1569 .execution_time
1570 .max(actual_performance.execution_time);
1571
1572 let accuracy_accuracy = 1.0
1573 - (selection.performance_prediction.accuracy_score - actual_performance.accuracy).abs();
1574
1575 (time_accuracy + accuracy_accuracy) / 2.0
1576 }
1577}
1578
1579#[derive(Debug, Clone)]
1581pub struct AlgorithmEvaluation {
1582 pub algorithm: SelectedAlgorithm,
1583 pub parameters: AlgorithmParameters,
1584 pub performance_prediction: PerformancePrediction,
1585 pub fitness_score: f64,
1586 pub confidence: f64,
1587}
1588
1589#[derive(Debug)]
1591pub struct AlgorithmResult {
1592 pub resultdata: Array2<f64>,
1593 pub memory_usage: usize,
1594 pub accuracy: f64,
1595 pub execution_details: HashMap<String, String>,
1596}
1597
1598#[derive(Debug, Clone)]
1600pub struct ActualPerformance {
1601 pub execution_time: f64,
1602 pub memory_usage: usize,
1603 pub accuracy: f64,
1604 pub energy_consumed: f64,
1605}
1606
1607#[derive(Debug)]
1609pub struct ExecutionResult {
1610 pub algorithm_result: AlgorithmResult,
1611 pub actual_performance: ActualPerformance,
1612 pub selection_accuracy: f64,
1613}
1614
1615#[cfg(test)]
1616mod tests {
1617 use super::*;
1618 use scirs2_core::ndarray::array;
1619
1620 #[test]
1621 fn test_selection_context() {
1622 let context = SelectionContext::new()
1623 .with_accuracy_priority(0.9)
1624 .with_speed_priority(0.7)
1625 .with_real_time_requirement(true);
1626
1627 assert_eq!(context.accuracy_priority, 0.9);
1628 assert_eq!(context.speed_priority, 0.7);
1629 assert!(context.real_time_requirement);
1630 }
1631
1632 #[test]
1633 fn test_adaptive_selector_creation() {
1634 let selector = AdaptiveAlgorithmSelector::new()
1635 .with_performance_learning(true)
1636 .with_resource_awareness(true)
1637 .with_quality_optimization(true);
1638
1639 assert!(selector.performance_learning);
1640 assert!(selector.resource_awareness);
1641 assert!(selector.quality_optimization);
1642 }
1643
1644 #[test]
1645 fn testdata_characteristics() {
1646 let selector = AdaptiveAlgorithmSelector::new();
1647 let data = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
1648
1649 let mut selector_mut = selector;
1650 let characteristics = selector_mut.analyzedata_characteristics(&data.view());
1651 assert!(characteristics.is_ok());
1652
1653 let chars = characteristics.unwrap();
1654 assert_eq!(chars.size_category, SizeCategory::Tiny);
1655 assert_eq!(chars.dimensionality_category, DimensionalityCategory::Low);
1656 }
1657
1658 #[tokio::test]
1659 async fn test_algorithm_selection() {
1660 let mut selector = AdaptiveAlgorithmSelector::new();
1661 let context = SelectionContext::new();
1662 let data = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
1663
1664 let result = selector
1665 .select_optimal_algorithm(&data.view(), &context)
1666 .await;
1667 assert!(result.is_ok());
1668
1669 let selection = result.unwrap();
1670 assert!(matches!(
1671 selection.algorithm,
1672 SelectedAlgorithm::KMeans | SelectedAlgorithm::DBScan | SelectedAlgorithm::KDTreeNN
1673 ));
1674 assert!(selection.confidence > 0.0);
1675 }
1676
1677 #[tokio::test]
1678 async fn test_execution_with_feedback() {
1679 let mut selector = AdaptiveAlgorithmSelector::new().with_performance_learning(true);
1680
1681 let context = SelectionContext::new();
1682 let data = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
1683
1684 let selection = selector
1685 .select_optimal_algorithm(&data.view(), &context)
1686 .await
1687 .unwrap();
1688 let execution_result = selector
1689 .execute_with_feedback(&selection, &data.view())
1690 .await;
1691
1692 assert!(execution_result.is_ok());
1693 let result = execution_result.unwrap();
1694 assert!(result.selection_accuracy >= 0.0 && result.selection_accuracy <= 1.0);
1695 }
1696}