1use std::collections::{HashMap, VecDeque};
9use std::hash::Hash;
10use std::sync::{Arc, RwLock};
11use std::thread;
12use std::time::{Duration, Instant};
13
14use scirs2_core::random::{thread_rng, Rng};
15
16use sklears_core::error::{Result as SklResult, SklearsError};
17
18#[derive(Debug, Clone)]
20pub struct PerformanceProfile {
21 pub operation_id: String,
23 pub data_characteristics: DataCharacteristics,
25 pub metrics: ExecutionMetrics,
27 pub algorithm_variant: String,
29 pub optimization_level: OptimizationLevel,
31 pub hardware_context: HardwareContext,
33 pub timestamp: Instant,
35}
36
37#[derive(Debug, Clone, PartialEq, Eq, Hash)]
39pub struct DataCharacteristics {
40 pub n_samples: usize,
42 pub n_features: usize,
44 pub sparsity_scaled: u32,
46 pub dtype_size: usize,
48 pub memory_layout: MemoryLayout,
50 pub cache_friendliness_scaled: u32,
52}
53
54impl DataCharacteristics {
55 #[must_use]
57 pub fn sparsity(&self) -> f64 {
58 f64::from(self.sparsity_scaled) / 1000.0
59 }
60
61 pub fn set_sparsity(&mut self, sparsity: f64) {
63 self.sparsity_scaled = (sparsity * 1000.0).round() as u32;
64 }
65
66 #[must_use]
68 pub fn cache_friendliness(&self) -> f64 {
69 f64::from(self.cache_friendliness_scaled) / 1000.0
70 }
71
72 pub fn set_cache_friendliness(&mut self, cache_friendliness: f64) {
74 self.cache_friendliness_scaled = (cache_friendliness * 1000.0).round() as u32;
75 }
76}
77
78#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
80pub enum MemoryLayout {
81 RowMajor,
83 ColumnMajor,
85 Interleaved,
87 Custom,
89}
90
91#[derive(Debug, Clone)]
93pub struct ExecutionMetrics {
94 pub execution_time: Duration,
96 pub cpu_time: Duration,
98 pub memory_allocated: usize,
100 pub peak_memory: usize,
102 pub cache_misses: usize,
104 pub simd_operations: usize,
106 pub parallel_efficiency: f64,
108 pub memory_bandwidth: f64,
110 pub flops_per_second: f64,
112}
113
114#[derive(Debug, Clone, Copy, PartialEq)]
116pub enum OptimizationLevel {
117 None,
119 Basic,
121 Advanced,
123 Aggressive,
125}
126
127#[derive(Debug, Clone)]
129pub struct HardwareContext {
130 pub cpu_cores: usize,
132 pub cache_sizes: Vec<usize>,
134 pub simd_features: Vec<SimdFeature>,
136 pub memory_bandwidth: f64,
138 pub cpu_frequency: f64,
140}
141
142#[derive(Debug, Clone, Copy, PartialEq)]
144pub enum SimdFeature {
145 SSE,
147 SSE2,
149 SSE3,
151 SSE4_1,
153 SSE4_2,
155 AVX,
157 AVX2,
159 AVX512F,
161 NEON,
163}
164
165#[derive(Debug)]
167pub struct ProfileGuidedOptimizer {
168 profiles: Arc<RwLock<HashMap<String, VecDeque<PerformanceProfile>>>>,
170 strategies: Arc<RwLock<HashMap<String, OptimizationStrategy>>>,
172 algorithm_cache: Arc<RwLock<HashMap<DataCharacteristics, String>>>,
174 predictors: Arc<RwLock<HashMap<String, Box<dyn PerformancePredictor + Send + Sync>>>>,
176 config: OptimizerConfig,
178 hardware_context: HardwareContext,
180}
181
182#[derive(Debug, Clone)]
184pub struct OptimizationStrategy {
185 pub preferred_algorithm: String,
187 pub optimization_level: OptimizationLevel,
189 pub memory_layout: MemoryLayout,
191 pub parallel_strategy: ParallelStrategy,
193 pub cache_hints: CacheOptimizationHints,
195 pub confidence: f64,
197}
198
199#[derive(Debug, Clone, Copy, PartialEq)]
201pub enum ParallelStrategy {
202 Serial,
203 ThreadParallel,
204 Vectorized,
205 Hybrid,
206 GPU,
207}
208
209#[derive(Debug, Clone)]
211pub struct CacheOptimizationHints {
212 pub block_size: usize,
214 pub use_prefetch: bool,
216 pub access_pattern: AccessPattern,
218 pub cache_friendly_algorithms: bool,
220}
221
222#[derive(Debug, Clone, Copy, PartialEq)]
224pub enum AccessPattern {
225 Sequential,
226 Random,
227 Strided,
228 Blocked,
229}
230
231#[derive(Debug, Clone)]
233pub struct OptimizerConfig {
234 pub max_profiles_per_operation: usize,
236 pub min_profiles_for_optimization: usize,
238 pub confidence_threshold: f64,
240 pub improvement_threshold: f64,
242 pub adaptive_optimization: bool,
244 pub profile_interval: Duration,
246}
247
248pub trait PerformancePredictor: Send + Sync + std::fmt::Debug {
250 fn predict_execution_time(&self, characteristics: &DataCharacteristics) -> SklResult<Duration>;
252
253 fn predict_memory_usage(&self, characteristics: &DataCharacteristics) -> SklResult<usize>;
255
256 fn update(&mut self, profile: &PerformanceProfile) -> SklResult<()>;
258
259 fn accuracy(&self) -> f64;
261}
262
263#[derive(Debug)]
265pub struct MLPerformancePredictor {
266 training_data: Vec<(DataCharacteristics, ExecutionMetrics)>,
268 weights: Vec<f64>,
270 accuracy: f64,
272 training_samples: usize,
274}
275
276impl MLPerformancePredictor {
277 #[must_use]
279 pub fn new() -> Self {
280 Self {
281 training_data: Vec::new(),
282 weights: vec![1.0; 10], accuracy: 0.0,
284 training_samples: 0,
285 }
286 }
287
288 fn extract_features(&self, characteristics: &DataCharacteristics) -> Vec<f64> {
290 vec![
291 characteristics.n_samples as f64,
292 characteristics.n_features as f64,
293 characteristics.sparsity(),
294 characteristics.dtype_size as f64,
295 characteristics.cache_friendliness(),
296 (characteristics.n_samples * characteristics.n_features) as f64, (characteristics.n_samples as f64).log2(),
298 (characteristics.n_features as f64).log2(),
299 characteristics.sparsity() * characteristics.n_features as f64,
300 characteristics.cache_friendliness() * characteristics.n_samples as f64,
301 ]
302 }
303
304 fn train(&mut self) -> SklResult<()> {
306 if self.training_data.len() < 10 {
307 return Ok(()); }
309
310 let learning_rate = 0.001;
312 let epochs = 100;
313
314 for _ in 0..epochs {
315 let mut gradients = vec![0.0; self.weights.len()];
316 let mut total_error = 0.0;
317
318 for (characteristics, metrics) in &self.training_data {
319 let features = self.extract_features(characteristics);
320 let predicted = features
321 .iter()
322 .zip(&self.weights)
323 .map(|(f, w)| f * w)
324 .sum::<f64>();
325
326 let actual = metrics.execution_time.as_secs_f64();
327 let error = predicted - actual;
328 total_error += error * error;
329
330 for (i, feature) in features.iter().enumerate() {
331 gradients[i] += error * feature;
332 }
333 }
334
335 for (weight, gradient) in self.weights.iter_mut().zip(&gradients) {
337 *weight -= learning_rate * gradient / self.training_data.len() as f64;
338 }
339
340 let mse = total_error / self.training_data.len() as f64;
342 self.accuracy = (1.0 - mse).max(0.0).min(1.0);
343 }
344
345 Ok(())
346 }
347}
348
349impl PerformancePredictor for MLPerformancePredictor {
350 fn predict_execution_time(&self, characteristics: &DataCharacteristics) -> SklResult<Duration> {
351 let features = self.extract_features(characteristics);
352 let prediction = features
353 .iter()
354 .zip(&self.weights)
355 .map(|(f, w)| f * w)
356 .sum::<f64>()
357 .max(0.0);
358
359 Ok(Duration::from_secs_f64(prediction))
360 }
361
362 fn predict_memory_usage(&self, characteristics: &DataCharacteristics) -> SklResult<usize> {
363 let base_memory =
365 characteristics.n_samples * characteristics.n_features * characteristics.dtype_size;
366 let overhead_factor = 1.0 + (1.0 - characteristics.sparsity()) * 0.5;
367 Ok((base_memory as f64 * overhead_factor) as usize)
368 }
369
370 fn update(&mut self, profile: &PerformanceProfile) -> SklResult<()> {
371 self.training_data.push((
372 profile.data_characteristics.clone(),
373 profile.metrics.clone(),
374 ));
375
376 self.training_samples += 1;
377
378 if self.training_samples % 50 == 0 {
380 self.train()?;
381 }
382
383 Ok(())
384 }
385
386 fn accuracy(&self) -> f64 {
387 self.accuracy
388 }
389}
390
391impl Default for MLPerformancePredictor {
392 fn default() -> Self {
393 Self::new()
394 }
395}
396
397impl ProfileGuidedOptimizer {
398 pub fn new(config: OptimizerConfig) -> SklResult<Self> {
400 let hardware_context = Self::detect_hardware_context();
401
402 Ok(Self {
403 profiles: Arc::new(RwLock::new(HashMap::new())),
404 strategies: Arc::new(RwLock::new(HashMap::new())),
405 algorithm_cache: Arc::new(RwLock::new(HashMap::new())),
406 predictors: Arc::new(RwLock::new(HashMap::new())),
407 config,
408 hardware_context,
409 })
410 }
411
412 fn detect_hardware_context() -> HardwareContext {
414 let cpu_cores = thread::available_parallelism()
415 .map(std::num::NonZero::get)
416 .unwrap_or(1);
417
418 HardwareContext {
421 cpu_cores,
422 cache_sizes: vec![32768, 262_144, 8_388_608], simd_features: Self::detect_simd_features(),
424 memory_bandwidth: 25.6, cpu_frequency: 3000.0, }
427 }
428
429 fn detect_simd_features() -> Vec<SimdFeature> {
431 let mut features = Vec::new();
432
433 #[cfg(target_arch = "x86_64")]
434 {
435 if is_x86_feature_detected!("sse") {
436 features.push(SimdFeature::SSE);
437 }
438 if is_x86_feature_detected!("sse2") {
439 features.push(SimdFeature::SSE2);
440 }
441 if is_x86_feature_detected!("sse3") {
442 features.push(SimdFeature::SSE3);
443 }
444 if is_x86_feature_detected!("sse4.1") {
445 features.push(SimdFeature::SSE4_1);
446 }
447 if is_x86_feature_detected!("sse4.2") {
448 features.push(SimdFeature::SSE4_2);
449 }
450 if is_x86_feature_detected!("avx") {
451 features.push(SimdFeature::AVX);
452 }
453 if is_x86_feature_detected!("avx2") {
454 features.push(SimdFeature::AVX2);
455 }
456 if is_x86_feature_detected!("avx512f") {
457 features.push(SimdFeature::AVX512F);
458 }
459 }
460
461 #[cfg(target_arch = "aarch64")]
462 {
463 features.push(SimdFeature::NEON);
464 }
465
466 features
467 }
468
469 pub fn add_profile(&self, profile: PerformanceProfile) -> SklResult<()> {
471 let mut profiles = self.profiles.write().map_err(|_| {
472 SklearsError::InvalidInput("Failed to acquire profiles lock".to_string())
473 })?;
474
475 let operation_profiles = profiles
476 .entry(profile.operation_id.clone())
477 .or_insert_with(VecDeque::new);
478
479 operation_profiles.push_back(profile.clone());
480
481 while operation_profiles.len() > self.config.max_profiles_per_operation {
483 operation_profiles.pop_front();
484 }
485
486 if let Ok(mut predictors) = self.predictors.write() {
488 if let Some(predictor) = predictors.get_mut(&profile.operation_id) {
489 let _ = predictor.update(&profile);
490 } else {
491 let mut new_predictor = Box::new(MLPerformancePredictor::new());
492 let _ = new_predictor.update(&profile);
493 predictors.insert(profile.operation_id.clone(), new_predictor);
494 }
495 }
496
497 if operation_profiles.len() >= self.config.min_profiles_for_optimization {
499 self.optimize_strategy(&profile.operation_id)?;
500 }
501
502 Ok(())
503 }
504
505 pub fn get_strategy(
507 &self,
508 operation_id: &str,
509 characteristics: &DataCharacteristics,
510 ) -> SklResult<OptimizationStrategy> {
511 if let Ok(cache) = self.algorithm_cache.read() {
513 if let Some(cached_algorithm) = cache.get(characteristics) {
514 if let Ok(strategies) = self.strategies.read() {
515 if let Some(strategy) = strategies.get(operation_id) {
516 let mut cached_strategy = strategy.clone();
517 cached_strategy.preferred_algorithm = cached_algorithm.clone();
518 return Ok(cached_strategy);
519 }
520 }
521 }
522 }
523
524 self.generate_strategy(operation_id, characteristics)
526 }
527
528 fn generate_strategy(
530 &self,
531 operation_id: &str,
532 characteristics: &DataCharacteristics,
533 ) -> SklResult<OptimizationStrategy> {
534 let preferred_algorithm = self.select_algorithm(operation_id, characteristics)?;
535 let optimization_level = self.select_optimization_level(characteristics);
536 let memory_layout = self.select_memory_layout(characteristics);
537 let parallel_strategy = self.select_parallel_strategy(characteristics);
538 let cache_hints = self.generate_cache_hints(characteristics);
539
540 let confidence = self.calculate_confidence(operation_id, characteristics);
541
542 Ok(OptimizationStrategy {
543 preferred_algorithm,
544 optimization_level,
545 memory_layout,
546 parallel_strategy,
547 cache_hints,
548 confidence,
549 })
550 }
551
552 fn select_algorithm(
554 &self,
555 operation_id: &str,
556 characteristics: &DataCharacteristics,
557 ) -> SklResult<String> {
558 if let Ok(profiles) = self.profiles.read() {
559 if let Some(operation_profiles) = profiles.get(operation_id) {
560 let mut best_algorithm = "default".to_string();
562 let mut best_score = f64::INFINITY;
563
564 for profile in operation_profiles {
565 if self.characteristics_similar(&profile.data_characteristics, characteristics)
566 {
567 let score = profile.metrics.execution_time.as_secs_f64();
568 if score < best_score {
569 best_score = score;
570 best_algorithm = profile.algorithm_variant.clone();
571 }
572 }
573 }
574
575 return Ok(best_algorithm);
576 }
577 }
578
579 Ok(self.heuristic_algorithm_selection(characteristics))
581 }
582
583 fn characteristics_similar(&self, a: &DataCharacteristics, b: &DataCharacteristics) -> bool {
585 let size_ratio = (a.n_samples * a.n_features) as f64 / (b.n_samples * b.n_features) as f64;
586 let sparsity_diff = (a.sparsity() - b.sparsity()).abs();
587
588 (0.5..=2.0).contains(&size_ratio) && sparsity_diff < 0.3
589 }
590
591 fn heuristic_algorithm_selection(&self, characteristics: &DataCharacteristics) -> String {
593 let data_size = characteristics.n_samples * characteristics.n_features;
594
595 if characteristics.sparsity() > 0.7 {
596 "sparse_optimized".to_string()
597 } else if data_size < 10000 {
598 "small_data_optimized".to_string()
599 } else if data_size > 1_000_000 {
600 "large_data_optimized".to_string()
601 } else {
602 "general_purpose".to_string()
603 }
604 }
605
606 fn select_optimization_level(
608 &self,
609 characteristics: &DataCharacteristics,
610 ) -> OptimizationLevel {
611 let data_size = characteristics.n_samples * characteristics.n_features;
612
613 if data_size > 1_000_000 {
614 OptimizationLevel::Aggressive
615 } else if data_size > 100_000 {
616 OptimizationLevel::Advanced
617 } else if data_size > 10000 {
618 OptimizationLevel::Basic
619 } else {
620 OptimizationLevel::None
621 }
622 }
623
624 fn select_memory_layout(&self, characteristics: &DataCharacteristics) -> MemoryLayout {
626 if characteristics.n_features > characteristics.n_samples {
627 MemoryLayout::ColumnMajor
628 } else {
629 MemoryLayout::RowMajor
630 }
631 }
632
633 fn select_parallel_strategy(&self, characteristics: &DataCharacteristics) -> ParallelStrategy {
635 let data_size = characteristics.n_samples * characteristics.n_features;
636
637 if self
638 .hardware_context
639 .simd_features
640 .contains(&SimdFeature::AVX2)
641 && data_size > 100_000
642 {
643 ParallelStrategy::Hybrid
644 } else if self.hardware_context.cpu_cores > 1 && data_size > 50000 {
645 ParallelStrategy::ThreadParallel
646 } else if self.hardware_context.simd_features.len() > 2 {
647 ParallelStrategy::Vectorized
648 } else {
649 ParallelStrategy::Serial
650 }
651 }
652
653 fn generate_cache_hints(
655 &self,
656 characteristics: &DataCharacteristics,
657 ) -> CacheOptimizationHints {
658 let block_size = if self.hardware_context.cache_sizes.len() > 1 {
659 (self.hardware_context.cache_sizes[1] / characteristics.dtype_size).min(1024)
660 } else {
661 256
662 };
663
664 CacheOptimizationHints {
666 block_size,
667 use_prefetch: characteristics.n_samples > 10000,
668 access_pattern: if characteristics.cache_friendliness() > 0.7 {
669 AccessPattern::Sequential
670 } else {
671 AccessPattern::Blocked
672 },
673 cache_friendly_algorithms: characteristics.cache_friendliness() > 0.5,
674 }
675 }
676
677 fn calculate_confidence(
679 &self,
680 operation_id: &str,
681 characteristics: &DataCharacteristics,
682 ) -> f64 {
683 if let Ok(profiles) = self.profiles.read() {
684 if let Some(operation_profiles) = profiles.get(operation_id) {
685 let similar_profiles = operation_profiles
686 .iter()
687 .filter(|p| {
688 self.characteristics_similar(&p.data_characteristics, characteristics)
689 })
690 .count();
691
692 return (similar_profiles as f64 / 10.0).min(1.0);
693 }
694 }
695
696 0.1 }
698
699 fn optimize_strategy(&self, operation_id: &str) -> SklResult<()> {
701 if let Ok(profiles) = self.profiles.read() {
702 if let Some(operation_profiles) = profiles.get(operation_id) {
703 if operation_profiles.len() < self.config.min_profiles_for_optimization {
704 return Ok(());
705 }
706
707 let mut algorithm_performance: HashMap<String, Vec<f64>> = HashMap::new();
709
710 for profile in operation_profiles {
711 let score = profile.metrics.execution_time.as_secs_f64();
712 algorithm_performance
713 .entry(profile.algorithm_variant.clone())
714 .or_default()
715 .push(score);
716 }
717
718 let mut best_algorithm = "default".to_string();
720 let mut best_average = f64::INFINITY;
721
722 for (algorithm, scores) in &algorithm_performance {
723 if scores.len() >= 3 {
724 let average: f64 = scores.iter().sum::<f64>() / scores.len() as f64;
726 if average < best_average {
727 best_average = average;
728 best_algorithm = algorithm.clone();
729 }
730 }
731 }
732
733 if let Ok(mut strategies) = self.strategies.write() {
735 let strategy =
736 strategies
737 .entry(operation_id.to_string())
738 .or_insert_with(|| OptimizationStrategy {
739 preferred_algorithm: best_algorithm.clone(),
740 optimization_level: OptimizationLevel::Basic,
741 memory_layout: MemoryLayout::RowMajor,
742 parallel_strategy: ParallelStrategy::Serial,
743 cache_hints: CacheOptimizationHints {
744 block_size: 256,
745 use_prefetch: false,
746 access_pattern: AccessPattern::Sequential,
747 cache_friendly_algorithms: true,
748 },
749 confidence: 0.5,
750 });
751
752 strategy.preferred_algorithm = best_algorithm;
753 strategy.confidence = (algorithm_performance.len() as f64 / 5.0).min(1.0);
754 }
755 }
756 }
757
758 Ok(())
759 }
760
761 pub fn predict_performance(
763 &self,
764 operation_id: &str,
765 characteristics: &DataCharacteristics,
766 ) -> SklResult<ExecutionMetrics> {
767 if let Ok(predictors) = self.predictors.read() {
768 if let Some(predictor) = predictors.get(operation_id) {
769 let execution_time = predictor.predict_execution_time(characteristics)?;
770 let memory_usage = predictor.predict_memory_usage(characteristics)?;
771
772 return Ok(ExecutionMetrics {
773 execution_time,
774 cpu_time: execution_time,
775 memory_allocated: memory_usage,
776 peak_memory: memory_usage,
777 cache_misses: 0,
778 simd_operations: 0,
779 parallel_efficiency: 1.0,
780 memory_bandwidth: 0.5,
781 flops_per_second: 1e9,
782 });
783 }
784 }
785
786 let data_size = characteristics.n_samples * characteristics.n_features;
788 let estimated_time = Duration::from_millis((data_size / 10000).max(1) as u64);
789 let estimated_memory = data_size * characteristics.dtype_size;
790
791 Ok(ExecutionMetrics {
792 execution_time: estimated_time,
793 cpu_time: estimated_time,
794 memory_allocated: estimated_memory,
795 peak_memory: estimated_memory,
796 cache_misses: 0,
797 simd_operations: 0,
798 parallel_efficiency: 1.0,
799 memory_bandwidth: 0.5,
800 flops_per_second: 1e9,
801 })
802 }
803
804 #[must_use]
806 pub fn get_optimization_stats(&self) -> OptimizationStats {
807 let mut stats = OptimizationStats {
808 total_operations: 0,
809 optimized_operations: 0,
810 average_confidence: 0.0,
811 total_profiles: 0,
812 predictor_accuracy: 0.0,
813 };
814
815 if let Ok(profiles) = self.profiles.read() {
816 stats.total_operations = profiles.len();
817 stats.total_profiles = profiles.values().map(std::collections::VecDeque::len).sum();
818 }
819
820 if let Ok(strategies) = self.strategies.read() {
821 stats.optimized_operations = strategies.len();
822 stats.average_confidence = strategies.values().map(|s| s.confidence).sum::<f64>()
823 / strategies.len().max(1) as f64;
824 }
825
826 if let Ok(predictors) = self.predictors.read() {
827 stats.predictor_accuracy = predictors.values().map(|p| p.accuracy()).sum::<f64>()
828 / predictors.len().max(1) as f64;
829 }
830
831 stats
832 }
833}
834
835#[derive(Debug, Clone)]
837pub struct OptimizationStats {
838 pub total_operations: usize,
840 pub optimized_operations: usize,
842 pub average_confidence: f64,
844 pub total_profiles: usize,
846 pub predictor_accuracy: f64,
848}
849
850impl Default for OptimizerConfig {
851 fn default() -> Self {
852 Self {
853 max_profiles_per_operation: 1000,
854 min_profiles_for_optimization: 10,
855 confidence_threshold: 0.7,
856 improvement_threshold: 0.1,
857 adaptive_optimization: true,
858 profile_interval: Duration::from_secs(60),
859 }
860 }
861}
862
863#[allow(non_snake_case)]
864#[cfg(test)]
865mod tests {
866 use super::*;
867
868 #[test]
869 fn test_optimizer_creation() {
870 let config = OptimizerConfig::default();
871 let optimizer = ProfileGuidedOptimizer::new(config).expect("operation should succeed");
872
873 let stats = optimizer.get_optimization_stats();
874 assert_eq!(stats.total_operations, 0);
875 assert_eq!(stats.optimized_operations, 0);
876 }
877
878 #[test]
879 fn test_data_characteristics() {
880 let mut characteristics = DataCharacteristics {
881 n_samples: 1000,
882 n_features: 50,
883 sparsity_scaled: 100, dtype_size: 8,
885 memory_layout: MemoryLayout::RowMajor,
886 cache_friendliness_scaled: 800, };
888
889 assert_eq!(characteristics.n_samples, 1000);
890 assert_eq!(characteristics.n_features, 50);
891 assert_eq!(characteristics.sparsity(), 0.1);
892 assert_eq!(characteristics.cache_friendliness(), 0.8);
893
894 characteristics.set_sparsity(0.5);
895 assert_eq!(characteristics.sparsity(), 0.5);
896 }
897
898 #[test]
899 fn test_performance_profile() {
900 let profile = PerformanceProfile {
901 operation_id: "test_op".to_string(),
902 data_characteristics: DataCharacteristics {
903 n_samples: 100,
904 n_features: 10,
905 sparsity_scaled: 0, dtype_size: 8,
907 memory_layout: MemoryLayout::RowMajor,
908 cache_friendliness_scaled: 1000, },
910 metrics: ExecutionMetrics {
911 execution_time: Duration::from_millis(100),
912 cpu_time: Duration::from_millis(100),
913 memory_allocated: 8000,
914 peak_memory: 8000,
915 cache_misses: 0,
916 simd_operations: 100,
917 parallel_efficiency: 1.0,
918 memory_bandwidth: 0.5,
919 flops_per_second: 1e6,
920 },
921 algorithm_variant: "test_algo".to_string(),
922 optimization_level: OptimizationLevel::Basic,
923 hardware_context: HardwareContext {
924 cpu_cores: 4,
925 cache_sizes: vec![32768, 262144],
926 simd_features: vec![SimdFeature::SSE2],
927 memory_bandwidth: 25.6,
928 cpu_frequency: 3000.0,
929 },
930 timestamp: Instant::now(),
931 };
932
933 assert_eq!(profile.operation_id, "test_op");
934 assert_eq!(profile.algorithm_variant, "test_algo");
935 }
936
937 #[test]
938 fn test_ml_predictor() {
939 let mut predictor = MLPerformancePredictor::new();
940 assert_eq!(predictor.accuracy(), 0.0);
941
942 let characteristics = DataCharacteristics {
943 n_samples: 100,
944 n_features: 10,
945 sparsity_scaled: 0, dtype_size: 8,
947 memory_layout: MemoryLayout::RowMajor,
948 cache_friendliness_scaled: 1000, };
950
951 let prediction = predictor
952 .predict_execution_time(&characteristics)
953 .unwrap_or_default();
954 assert!(prediction.as_secs_f64() >= 0.0);
955 }
956
957 #[test]
958 fn test_optimization_strategy() {
959 let strategy = OptimizationStrategy {
960 preferred_algorithm: "test_algo".to_string(),
961 optimization_level: OptimizationLevel::Advanced,
962 memory_layout: MemoryLayout::ColumnMajor,
963 parallel_strategy: ParallelStrategy::Hybrid,
964 cache_hints: CacheOptimizationHints {
965 block_size: 512,
966 use_prefetch: true,
967 access_pattern: AccessPattern::Blocked,
968 cache_friendly_algorithms: true,
969 },
970 confidence: 0.9,
971 };
972
973 assert_eq!(strategy.preferred_algorithm, "test_algo");
974 assert_eq!(strategy.optimization_level, OptimizationLevel::Advanced);
975 assert_eq!(strategy.confidence, 0.9);
976 }
977
978 #[test]
979 fn test_simd_feature_detection() {
980 let features = ProfileGuidedOptimizer::detect_simd_features();
981 println!("Detected SIMD features: {:?}", features);
983 }
984}
985
986#[derive(Debug)]
988pub struct RuntimeOptimizer {
989 compiled_variants: Arc<RwLock<HashMap<String, CompiledVariant>>>,
991 compilation_stats: Arc<RwLock<CompilationStats>>,
993 config: RuntimeOptimizerConfig,
995}
996
997#[derive(Debug, Clone)]
999pub struct CompiledVariant {
1000 pub variant_id: String,
1002 pub optimization_level: OptimizationLevel,
1004 pub target_features: Vec<SimdFeature>,
1006 pub compiled_at: Instant,
1008 pub performance_profile: Option<PerformanceProfile>,
1010 pub compilation_successful: bool,
1012}
1013
1014#[derive(Debug, Clone)]
1016pub struct RuntimeOptimizerConfig {
1017 pub enable_jit: bool,
1019 pub max_variants: usize,
1021 pub compilation_timeout: Duration,
1023 pub min_improvement: f64,
1025 pub enable_pgo_recompilation: bool,
1027}
1028
1029#[derive(Debug, Clone)]
1031pub struct CompilationStats {
1032 pub total_compilations: usize,
1034 pub successful_compilations: usize,
1036 pub total_compilation_time: Duration,
1038 pub average_compilation_time: Duration,
1040 pub cache_hits: usize,
1042 pub cache_misses: usize,
1044}
1045
1046impl RuntimeOptimizer {
1047 #[must_use]
1049 pub fn new(config: RuntimeOptimizerConfig) -> Self {
1050 Self {
1051 compiled_variants: Arc::new(RwLock::new(HashMap::new())),
1052 compilation_stats: Arc::new(RwLock::new(CompilationStats {
1053 total_compilations: 0,
1054 successful_compilations: 0,
1055 total_compilation_time: Duration::from_secs(0),
1056 average_compilation_time: Duration::from_secs(0),
1057 cache_hits: 0,
1058 cache_misses: 0,
1059 })),
1060 config,
1061 }
1062 }
1063
1064 pub fn get_optimized_variant(
1066 &self,
1067 operation_id: &str,
1068 characteristics: &DataCharacteristics,
1069 strategy: &OptimizationStrategy,
1070 ) -> SklResult<String> {
1071 let variant_key = self.generate_variant_key(operation_id, characteristics, strategy);
1072
1073 if let Ok(variants) = self.compiled_variants.read() {
1075 if let Some(variant) = variants.get(&variant_key) {
1076 if variant.compilation_successful {
1077 self.update_cache_stats(true);
1078 return Ok(variant.variant_id.clone());
1079 }
1080 }
1081 }
1082
1083 self.update_cache_stats(false);
1084
1085 if self.config.enable_jit {
1087 self.compile_variant(operation_id, characteristics, strategy)
1088 } else {
1089 Ok(strategy.preferred_algorithm.clone())
1090 }
1091 }
1092
1093 fn compile_variant(
1095 &self,
1096 operation_id: &str,
1097 characteristics: &DataCharacteristics,
1098 strategy: &OptimizationStrategy,
1099 ) -> SklResult<String> {
1100 let start_time = Instant::now();
1101 let variant_key = self.generate_variant_key(operation_id, characteristics, strategy);
1102
1103 let compilation_successful = self.simulate_compilation(strategy);
1105 let compilation_time = start_time.elapsed();
1106
1107 let variant = CompiledVariant {
1108 variant_id: format!("{}_{}", operation_id, compilation_time.as_nanos()),
1109 optimization_level: strategy.optimization_level,
1110 target_features: self.select_target_features(strategy),
1111 compiled_at: Instant::now(),
1112 performance_profile: None,
1113 compilation_successful,
1114 };
1115
1116 self.update_compilation_stats(compilation_time, compilation_successful);
1118
1119 if let Ok(mut variants) = self.compiled_variants.write() {
1121 if variants.len() >= self.config.max_variants {
1123 self.evict_old_variants(&mut variants);
1124 }
1125 variants.insert(variant_key, variant.clone());
1126 }
1127
1128 Ok(variant.variant_id)
1129 }
1130
1131 fn generate_variant_key(
1133 &self,
1134 operation_id: &str,
1135 characteristics: &DataCharacteristics,
1136 strategy: &OptimizationStrategy,
1137 ) -> String {
1138 use std::collections::hash_map::DefaultHasher;
1139 use std::hash::Hasher;
1140
1141 let mut hasher = DefaultHasher::new();
1142 operation_id.hash(&mut hasher);
1143 characteristics.hash(&mut hasher);
1144 format!("{:?}", strategy.optimization_level).hash(&mut hasher);
1145 format!("{:?}", strategy.parallel_strategy).hash(&mut hasher);
1146
1147 format!("{}_{:x}", operation_id, hasher.finish())
1148 }
1149
1150 fn simulate_compilation(&self, strategy: &OptimizationStrategy) -> bool {
1152 match strategy.optimization_level {
1154 OptimizationLevel::None => true,
1155 OptimizationLevel::Basic => thread_rng().random::<f64>() > 0.1, OptimizationLevel::Advanced => thread_rng().random::<f64>() > 0.2, OptimizationLevel::Aggressive => thread_rng().random::<f64>() > 0.3, }
1159 }
1160
1161 fn select_target_features(&self, strategy: &OptimizationStrategy) -> Vec<SimdFeature> {
1163 let mut features = Vec::new();
1164
1165 match strategy.optimization_level {
1166 OptimizationLevel::None => {}
1167 OptimizationLevel::Basic => {
1168 features.push(SimdFeature::SSE2);
1169 }
1170 OptimizationLevel::Advanced => {
1171 features.extend_from_slice(&[SimdFeature::SSE2, SimdFeature::AVX]);
1172 }
1173 OptimizationLevel::Aggressive => {
1174 features.extend_from_slice(&[
1175 SimdFeature::SSE2,
1176 SimdFeature::AVX,
1177 SimdFeature::AVX2,
1178 SimdFeature::AVX512F,
1179 ]);
1180 }
1181 }
1182
1183 features
1184 }
1185
1186 fn update_cache_stats(&self, hit: bool) {
1188 if let Ok(mut stats) = self.compilation_stats.write() {
1189 if hit {
1190 stats.cache_hits += 1;
1191 } else {
1192 stats.cache_misses += 1;
1193 }
1194 }
1195 }
1196
1197 fn update_compilation_stats(&self, compilation_time: Duration, successful: bool) {
1199 if let Ok(mut stats) = self.compilation_stats.write() {
1200 stats.total_compilations += 1;
1201 if successful {
1202 stats.successful_compilations += 1;
1203 }
1204 stats.total_compilation_time += compilation_time;
1205 stats.average_compilation_time =
1206 stats.total_compilation_time / stats.total_compilations as u32;
1207 }
1208 }
1209
1210 fn evict_old_variants(&self, variants: &mut HashMap<String, CompiledVariant>) {
1212 if let Some((oldest_key, _)) = variants
1214 .iter()
1215 .min_by_key(|(_, variant)| variant.compiled_at)
1216 .map(|(k, v)| (k.clone(), v.clone()))
1217 {
1218 variants.remove(&oldest_key);
1219 }
1220 }
1221
1222 pub fn get_compilation_stats(&self) -> SklResult<CompilationStats> {
1224 self.compilation_stats
1225 .read()
1226 .map(|stats| stats.clone())
1227 .map_err(|_| SklearsError::InvalidInput("Failed to read compilation stats".to_string()))
1228 }
1229
1230 pub fn trigger_pgo_recompilation(
1232 &self,
1233 operation_id: &str,
1234 performance_profiles: &[PerformanceProfile],
1235 ) -> SklResult<()> {
1236 if !self.config.enable_pgo_recompilation {
1237 return Ok(());
1238 }
1239
1240 let avg_performance = performance_profiles
1242 .iter()
1243 .map(|p| p.metrics.execution_time.as_secs_f64())
1244 .sum::<f64>()
1245 / performance_profiles.len() as f64;
1246
1247 if let Ok(mut variants) = self.compiled_variants.write() {
1249 for (key, variant) in variants.iter_mut() {
1250 if key.starts_with(operation_id) {
1251 if let Some(ref profile) = variant.performance_profile {
1252 let improvement_potential =
1253 profile.metrics.execution_time.as_secs_f64() / avg_performance;
1254 if improvement_potential > (1.0 + self.config.min_improvement) {
1255 variant.compilation_successful = false;
1257 }
1258 }
1259 }
1260 }
1261 }
1262
1263 Ok(())
1264 }
1265}
1266
1267impl Default for RuntimeOptimizerConfig {
1268 fn default() -> Self {
1269 Self {
1270 enable_jit: true,
1271 max_variants: 100,
1272 compilation_timeout: Duration::from_secs(30),
1273 min_improvement: 0.1, enable_pgo_recompilation: true,
1275 }
1276 }
1277}
1278
1279#[derive(Debug)]
1281pub struct EnsemblePerformancePredictor {
1282 predictors: Vec<Box<dyn PerformancePredictor + Send + Sync>>,
1284 weights: Vec<f64>,
1286 ensemble_accuracy: f64,
1288}
1289
1290impl Default for EnsemblePerformancePredictor {
1291 fn default() -> Self {
1292 Self::new()
1293 }
1294}
1295
1296impl EnsemblePerformancePredictor {
1297 #[must_use]
1299 pub fn new() -> Self {
1300 let predictors: Vec<Box<dyn PerformancePredictor + Send + Sync>> = vec![
1301 Box::new(MLPerformancePredictor::new()),
1302 Box::new(HeuristicPredictor::new()),
1303 Box::new(PolynomialPredictor::new()),
1304 ];
1305
1306 let weights = vec![1.0 / predictors.len() as f64; predictors.len()];
1307
1308 Self {
1309 predictors,
1310 weights,
1311 ensemble_accuracy: 0.0,
1312 }
1313 }
1314
1315 fn update_weights(&mut self) {
1317 let total_accuracy: f64 = self.predictors.iter().map(|p| p.accuracy()).sum();
1318
1319 if total_accuracy > 0.0 {
1320 for (i, predictor) in self.predictors.iter().enumerate() {
1321 self.weights[i] = predictor.accuracy() / total_accuracy;
1322 }
1323 }
1324
1325 self.ensemble_accuracy = self
1327 .predictors
1328 .iter()
1329 .enumerate()
1330 .map(|(i, p)| p.accuracy() * self.weights[i])
1331 .sum();
1332 }
1333}
1334
1335impl PerformancePredictor for EnsemblePerformancePredictor {
1336 fn predict_execution_time(&self, characteristics: &DataCharacteristics) -> SklResult<Duration> {
1337 let mut weighted_prediction = 0.0;
1338
1339 for (i, predictor) in self.predictors.iter().enumerate() {
1340 let prediction = predictor
1341 .predict_execution_time(characteristics)?
1342 .as_secs_f64();
1343 weighted_prediction += prediction * self.weights[i];
1344 }
1345
1346 Ok(Duration::from_secs_f64(weighted_prediction.max(0.0)))
1347 }
1348
1349 fn predict_memory_usage(&self, characteristics: &DataCharacteristics) -> SklResult<usize> {
1350 let mut weighted_prediction = 0.0;
1351
1352 for (i, predictor) in self.predictors.iter().enumerate() {
1353 let prediction = predictor.predict_memory_usage(characteristics)? as f64;
1354 weighted_prediction += prediction * self.weights[i];
1355 }
1356
1357 Ok(weighted_prediction.max(0.0) as usize)
1358 }
1359
1360 fn update(&mut self, profile: &PerformanceProfile) -> SklResult<()> {
1361 for predictor in &mut self.predictors {
1362 predictor.update(profile)?;
1363 }
1364
1365 self.update_weights();
1366 Ok(())
1367 }
1368
1369 fn accuracy(&self) -> f64 {
1370 self.ensemble_accuracy
1371 }
1372}
1373
1374#[derive(Debug)]
1376pub struct HeuristicPredictor {
1377 accuracy: f64,
1378}
1379
1380impl Default for HeuristicPredictor {
1381 fn default() -> Self {
1382 Self::new()
1383 }
1384}
1385
1386impl HeuristicPredictor {
1387 #[must_use]
1388 pub fn new() -> Self {
1389 Self { accuracy: 0.6 } }
1391}
1392
1393impl PerformancePredictor for HeuristicPredictor {
1394 fn predict_execution_time(&self, characteristics: &DataCharacteristics) -> SklResult<Duration> {
1395 let base_time = (characteristics.n_samples * characteristics.n_features) as f64;
1396 let sparsity_factor = 1.0 - characteristics.sparsity() * 0.5;
1397 let cache_factor = 1.0 + (1.0 - characteristics.cache_friendliness()) * 0.3;
1398
1399 let estimated_time = base_time * sparsity_factor * cache_factor / 1e6; Ok(Duration::from_secs_f64(estimated_time.max(0.001)))
1401 }
1402
1403 fn predict_memory_usage(&self, characteristics: &DataCharacteristics) -> SklResult<usize> {
1404 let base_memory =
1405 characteristics.n_samples * characteristics.n_features * characteristics.dtype_size;
1406 let overhead = (base_memory as f64 * 0.2) as usize; Ok(base_memory + overhead)
1408 }
1409
1410 fn update(&mut self, _profile: &PerformanceProfile) -> SklResult<()> {
1411 Ok(())
1413 }
1414
1415 fn accuracy(&self) -> f64 {
1416 self.accuracy
1417 }
1418}
1419
1420#[derive(Debug)]
1422pub struct PolynomialPredictor {
1423 coefficients: Vec<f64>,
1424 accuracy: f64,
1425 training_data: Vec<(Vec<f64>, f64)>,
1426}
1427
1428impl Default for PolynomialPredictor {
1429 fn default() -> Self {
1430 Self::new()
1431 }
1432}
1433
1434impl PolynomialPredictor {
1435 #[must_use]
1436 pub fn new() -> Self {
1437 Self {
1438 coefficients: vec![1.0; 15], accuracy: 0.0,
1440 training_data: Vec::new(),
1441 }
1442 }
1443
1444 fn polynomial_features(&self, characteristics: &DataCharacteristics) -> Vec<f64> {
1445 let n_samples = characteristics.n_samples as f64;
1446 let n_features = characteristics.n_features as f64;
1447 let sparsity = characteristics.sparsity();
1448 let cache_friendliness = characteristics.cache_friendliness();
1449
1450 vec![
1451 1.0, n_samples,
1453 n_features,
1454 sparsity,
1455 cache_friendliness,
1456 n_samples * n_features, n_samples * sparsity,
1458 n_features * sparsity,
1459 n_samples * cache_friendliness,
1460 n_features * cache_friendliness,
1461 sparsity * cache_friendliness,
1462 n_samples.powi(2), n_features.powi(2),
1464 sparsity.powi(2),
1465 cache_friendliness.powi(2),
1466 ]
1467 }
1468}
1469
1470impl PerformancePredictor for PolynomialPredictor {
1471 fn predict_execution_time(&self, characteristics: &DataCharacteristics) -> SklResult<Duration> {
1472 let features = self.polynomial_features(characteristics);
1473 let prediction = features
1474 .iter()
1475 .zip(&self.coefficients)
1476 .map(|(f, c)| f * c)
1477 .sum::<f64>()
1478 .max(0.001);
1479
1480 Ok(Duration::from_secs_f64(prediction))
1481 }
1482
1483 fn predict_memory_usage(&self, characteristics: &DataCharacteristics) -> SklResult<usize> {
1484 let base_memory =
1485 characteristics.n_samples * characteristics.n_features * characteristics.dtype_size;
1486 Ok(base_memory)
1487 }
1488
1489 fn update(&mut self, profile: &PerformanceProfile) -> SklResult<()> {
1490 let features = self.polynomial_features(&profile.data_characteristics);
1491 let target = profile.metrics.execution_time.as_secs_f64();
1492
1493 self.training_data.push((features, target));
1494
1495 if self.training_data.len() % 20 == 0 {
1497 self.train_polynomial_regression()?;
1498 }
1499
1500 Ok(())
1501 }
1502
1503 fn accuracy(&self) -> f64 {
1504 self.accuracy
1505 }
1506}
1507
1508impl PolynomialPredictor {
1509 fn train_polynomial_regression(&mut self) -> SklResult<()> {
1510 if self.training_data.len() < 10 {
1511 return Ok(());
1512 }
1513
1514 let n = self.training_data.len();
1516 let p = self.coefficients.len();
1517
1518 let mut x_matrix = vec![vec![0.0; p]; n];
1520 let mut y_vector = vec![0.0; n];
1521
1522 for (i, (features, target)) in self.training_data.iter().enumerate() {
1523 for (j, &feature) in features.iter().enumerate() {
1524 x_matrix[i][j] = feature;
1525 }
1526 y_vector[i] = *target;
1527 }
1528
1529 let learning_rate = 0.0001;
1532 let epochs = 50;
1533
1534 for _ in 0..epochs {
1535 let mut gradients = vec![0.0; p];
1536 let mut total_error = 0.0;
1537
1538 for i in 0..n {
1539 let prediction: f64 = x_matrix[i]
1540 .iter()
1541 .zip(&self.coefficients)
1542 .map(|(x, c)| x * c)
1543 .sum();
1544
1545 let error = prediction - y_vector[i];
1546 total_error += error * error;
1547
1548 for j in 0..p {
1549 gradients[j] += error * x_matrix[i][j];
1550 }
1551 }
1552
1553 for (coeff, grad) in self.coefficients.iter_mut().zip(&gradients) {
1555 *coeff -= learning_rate * grad / n as f64;
1556 }
1557
1558 let mse = total_error / n as f64;
1560 self.accuracy = (1.0 / (1.0 + mse)).min(1.0);
1561 }
1562
1563 Ok(())
1564 }
1565}