1use std::collections::{HashMap, VecDeque};
9use std::hash::Hash;
10use std::sync::{Arc, RwLock};
11use std::thread;
12use std::time::{Duration, Instant};
13
14use scirs2_core::random::{thread_rng, Rng};
15
16use sklears_core::error::{Result as SklResult, SklearsError};
17
18#[derive(Debug, Clone)]
20pub struct PerformanceProfile {
21 pub operation_id: String,
23 pub data_characteristics: DataCharacteristics,
25 pub metrics: ExecutionMetrics,
27 pub algorithm_variant: String,
29 pub optimization_level: OptimizationLevel,
31 pub hardware_context: HardwareContext,
33 pub timestamp: Instant,
35}
36
37#[derive(Debug, Clone, PartialEq, Eq, Hash)]
39pub struct DataCharacteristics {
40 pub n_samples: usize,
42 pub n_features: usize,
44 pub sparsity_scaled: u32,
46 pub dtype_size: usize,
48 pub memory_layout: MemoryLayout,
50 pub cache_friendliness_scaled: u32,
52}
53
54impl DataCharacteristics {
55 #[must_use]
57 pub fn sparsity(&self) -> f64 {
58 f64::from(self.sparsity_scaled) / 1000.0
59 }
60
61 pub fn set_sparsity(&mut self, sparsity: f64) {
63 self.sparsity_scaled = (sparsity * 1000.0).round() as u32;
64 }
65
66 #[must_use]
68 pub fn cache_friendliness(&self) -> f64 {
69 f64::from(self.cache_friendliness_scaled) / 1000.0
70 }
71
72 pub fn set_cache_friendliness(&mut self, cache_friendliness: f64) {
74 self.cache_friendliness_scaled = (cache_friendliness * 1000.0).round() as u32;
75 }
76}
77
78#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
80pub enum MemoryLayout {
81 RowMajor,
83 ColumnMajor,
85 Interleaved,
87 Custom,
89}
90
91#[derive(Debug, Clone)]
93pub struct ExecutionMetrics {
94 pub execution_time: Duration,
96 pub cpu_time: Duration,
98 pub memory_allocated: usize,
100 pub peak_memory: usize,
102 pub cache_misses: usize,
104 pub simd_operations: usize,
106 pub parallel_efficiency: f64,
108 pub memory_bandwidth: f64,
110 pub flops_per_second: f64,
112}
113
114#[derive(Debug, Clone, Copy, PartialEq)]
116pub enum OptimizationLevel {
117 None,
119 Basic,
121 Advanced,
123 Aggressive,
125}
126
127#[derive(Debug, Clone)]
129pub struct HardwareContext {
130 pub cpu_cores: usize,
132 pub cache_sizes: Vec<usize>,
134 pub simd_features: Vec<SimdFeature>,
136 pub memory_bandwidth: f64,
138 pub cpu_frequency: f64,
140}
141
142#[derive(Debug, Clone, Copy, PartialEq)]
144pub enum SimdFeature {
145 SSE,
147 SSE2,
149 SSE3,
151 SSE4_1,
153 SSE4_2,
155 AVX,
157 AVX2,
159 AVX512F,
161 NEON,
163}
164
165#[derive(Debug)]
167pub struct ProfileGuidedOptimizer {
168 profiles: Arc<RwLock<HashMap<String, VecDeque<PerformanceProfile>>>>,
170 strategies: Arc<RwLock<HashMap<String, OptimizationStrategy>>>,
172 algorithm_cache: Arc<RwLock<HashMap<DataCharacteristics, String>>>,
174 predictors: Arc<RwLock<HashMap<String, Box<dyn PerformancePredictor + Send + Sync>>>>,
176 config: OptimizerConfig,
178 hardware_context: HardwareContext,
180}
181
182#[derive(Debug, Clone)]
184pub struct OptimizationStrategy {
185 pub preferred_algorithm: String,
187 pub optimization_level: OptimizationLevel,
189 pub memory_layout: MemoryLayout,
191 pub parallel_strategy: ParallelStrategy,
193 pub cache_hints: CacheOptimizationHints,
195 pub confidence: f64,
197}
198
199#[derive(Debug, Clone, Copy, PartialEq)]
201pub enum ParallelStrategy {
202 Serial,
203 ThreadParallel,
204 Vectorized,
205 Hybrid,
206 GPU,
207}
208
209#[derive(Debug, Clone)]
211pub struct CacheOptimizationHints {
212 pub block_size: usize,
214 pub use_prefetch: bool,
216 pub access_pattern: AccessPattern,
218 pub cache_friendly_algorithms: bool,
220}
221
222#[derive(Debug, Clone, Copy, PartialEq)]
224pub enum AccessPattern {
225 Sequential,
226 Random,
227 Strided,
228 Blocked,
229}
230
231#[derive(Debug, Clone)]
233pub struct OptimizerConfig {
234 pub max_profiles_per_operation: usize,
236 pub min_profiles_for_optimization: usize,
238 pub confidence_threshold: f64,
240 pub improvement_threshold: f64,
242 pub adaptive_optimization: bool,
244 pub profile_interval: Duration,
246}
247
248pub trait PerformancePredictor: Send + Sync + std::fmt::Debug {
250 fn predict_execution_time(&self, characteristics: &DataCharacteristics) -> SklResult<Duration>;
252
253 fn predict_memory_usage(&self, characteristics: &DataCharacteristics) -> SklResult<usize>;
255
256 fn update(&mut self, profile: &PerformanceProfile) -> SklResult<()>;
258
259 fn accuracy(&self) -> f64;
261}
262
263#[derive(Debug)]
265pub struct MLPerformancePredictor {
266 training_data: Vec<(DataCharacteristics, ExecutionMetrics)>,
268 weights: Vec<f64>,
270 accuracy: f64,
272 training_samples: usize,
274}
275
276impl MLPerformancePredictor {
277 #[must_use]
279 pub fn new() -> Self {
280 Self {
281 training_data: Vec::new(),
282 weights: vec![1.0; 10], accuracy: 0.0,
284 training_samples: 0,
285 }
286 }
287
288 fn extract_features(&self, characteristics: &DataCharacteristics) -> Vec<f64> {
290 vec![
291 characteristics.n_samples as f64,
292 characteristics.n_features as f64,
293 characteristics.sparsity(),
294 characteristics.dtype_size as f64,
295 characteristics.cache_friendliness(),
296 (characteristics.n_samples * characteristics.n_features) as f64, (characteristics.n_samples as f64).log2(),
298 (characteristics.n_features as f64).log2(),
299 characteristics.sparsity() * characteristics.n_features as f64,
300 characteristics.cache_friendliness() * characteristics.n_samples as f64,
301 ]
302 }
303
304 fn train(&mut self) -> SklResult<()> {
306 if self.training_data.len() < 10 {
307 return Ok(()); }
309
310 let learning_rate = 0.001;
312 let epochs = 100;
313
314 for _ in 0..epochs {
315 let mut gradients = vec![0.0; self.weights.len()];
316 let mut total_error = 0.0;
317
318 for (characteristics, metrics) in &self.training_data {
319 let features = self.extract_features(characteristics);
320 let predicted = features
321 .iter()
322 .zip(&self.weights)
323 .map(|(f, w)| f * w)
324 .sum::<f64>();
325
326 let actual = metrics.execution_time.as_secs_f64();
327 let error = predicted - actual;
328 total_error += error * error;
329
330 for (i, feature) in features.iter().enumerate() {
331 gradients[i] += error * feature;
332 }
333 }
334
335 for (weight, gradient) in self.weights.iter_mut().zip(&gradients) {
337 *weight -= learning_rate * gradient / self.training_data.len() as f64;
338 }
339
340 let mse = total_error / self.training_data.len() as f64;
342 self.accuracy = (1.0 - mse).max(0.0).min(1.0);
343 }
344
345 Ok(())
346 }
347}
348
349impl PerformancePredictor for MLPerformancePredictor {
350 fn predict_execution_time(&self, characteristics: &DataCharacteristics) -> SklResult<Duration> {
351 let features = self.extract_features(characteristics);
352 let prediction = features
353 .iter()
354 .zip(&self.weights)
355 .map(|(f, w)| f * w)
356 .sum::<f64>()
357 .max(0.0);
358
359 Ok(Duration::from_secs_f64(prediction))
360 }
361
362 fn predict_memory_usage(&self, characteristics: &DataCharacteristics) -> SklResult<usize> {
363 let base_memory =
365 characteristics.n_samples * characteristics.n_features * characteristics.dtype_size;
366 let overhead_factor = 1.0 + (1.0 - characteristics.sparsity()) * 0.5;
367 Ok((base_memory as f64 * overhead_factor) as usize)
368 }
369
370 fn update(&mut self, profile: &PerformanceProfile) -> SklResult<()> {
371 self.training_data.push((
372 profile.data_characteristics.clone(),
373 profile.metrics.clone(),
374 ));
375
376 self.training_samples += 1;
377
378 if self.training_samples % 50 == 0 {
380 self.train()?;
381 }
382
383 Ok(())
384 }
385
386 fn accuracy(&self) -> f64 {
387 self.accuracy
388 }
389}
390
391impl Default for MLPerformancePredictor {
392 fn default() -> Self {
393 Self::new()
394 }
395}
396
397impl ProfileGuidedOptimizer {
398 pub fn new(config: OptimizerConfig) -> SklResult<Self> {
400 let hardware_context = Self::detect_hardware_context();
401
402 Ok(Self {
403 profiles: Arc::new(RwLock::new(HashMap::new())),
404 strategies: Arc::new(RwLock::new(HashMap::new())),
405 algorithm_cache: Arc::new(RwLock::new(HashMap::new())),
406 predictors: Arc::new(RwLock::new(HashMap::new())),
407 config,
408 hardware_context,
409 })
410 }
411
412 fn detect_hardware_context() -> HardwareContext {
414 let cpu_cores = thread::available_parallelism()
415 .map(std::num::NonZero::get)
416 .unwrap_or(1);
417
418 HardwareContext {
421 cpu_cores,
422 cache_sizes: vec![32768, 262_144, 8_388_608], simd_features: Self::detect_simd_features(),
424 memory_bandwidth: 25.6, cpu_frequency: 3000.0, }
427 }
428
429 fn detect_simd_features() -> Vec<SimdFeature> {
431 let mut features = Vec::new();
432
433 #[cfg(target_arch = "x86_64")]
434 {
435 if is_x86_feature_detected!("sse") {
436 features.push(SimdFeature::SSE);
437 }
438 if is_x86_feature_detected!("sse2") {
439 features.push(SimdFeature::SSE2);
440 }
441 if is_x86_feature_detected!("sse3") {
442 features.push(SimdFeature::SSE3);
443 }
444 if is_x86_feature_detected!("sse4.1") {
445 features.push(SimdFeature::SSE4_1);
446 }
447 if is_x86_feature_detected!("sse4.2") {
448 features.push(SimdFeature::SSE4_2);
449 }
450 if is_x86_feature_detected!("avx") {
451 features.push(SimdFeature::AVX);
452 }
453 if is_x86_feature_detected!("avx2") {
454 features.push(SimdFeature::AVX2);
455 }
456 if is_x86_feature_detected!("avx512f") {
457 features.push(SimdFeature::AVX512F);
458 }
459 }
460
461 #[cfg(target_arch = "aarch64")]
462 {
463 features.push(SimdFeature::NEON);
464 }
465
466 features
467 }
468
469 pub fn add_profile(&self, profile: PerformanceProfile) -> SklResult<()> {
471 let mut profiles = self.profiles.write().map_err(|_| {
472 SklearsError::InvalidInput("Failed to acquire profiles lock".to_string())
473 })?;
474
475 let operation_profiles = profiles
476 .entry(profile.operation_id.clone())
477 .or_insert_with(VecDeque::new);
478
479 operation_profiles.push_back(profile.clone());
480
481 while operation_profiles.len() > self.config.max_profiles_per_operation {
483 operation_profiles.pop_front();
484 }
485
486 if let Ok(mut predictors) = self.predictors.write() {
488 if let Some(predictor) = predictors.get_mut(&profile.operation_id) {
489 let _ = predictor.update(&profile);
490 } else {
491 let mut new_predictor = Box::new(MLPerformancePredictor::new());
492 let _ = new_predictor.update(&profile);
493 predictors.insert(profile.operation_id.clone(), new_predictor);
494 }
495 }
496
497 if operation_profiles.len() >= self.config.min_profiles_for_optimization {
499 self.optimize_strategy(&profile.operation_id)?;
500 }
501
502 Ok(())
503 }
504
505 pub fn get_strategy(
507 &self,
508 operation_id: &str,
509 characteristics: &DataCharacteristics,
510 ) -> SklResult<OptimizationStrategy> {
511 if let Ok(cache) = self.algorithm_cache.read() {
513 if let Some(cached_algorithm) = cache.get(characteristics) {
514 if let Ok(strategies) = self.strategies.read() {
515 if let Some(strategy) = strategies.get(operation_id) {
516 let mut cached_strategy = strategy.clone();
517 cached_strategy.preferred_algorithm = cached_algorithm.clone();
518 return Ok(cached_strategy);
519 }
520 }
521 }
522 }
523
524 self.generate_strategy(operation_id, characteristics)
526 }
527
528 fn generate_strategy(
530 &self,
531 operation_id: &str,
532 characteristics: &DataCharacteristics,
533 ) -> SklResult<OptimizationStrategy> {
534 let preferred_algorithm = self.select_algorithm(operation_id, characteristics)?;
535 let optimization_level = self.select_optimization_level(characteristics);
536 let memory_layout = self.select_memory_layout(characteristics);
537 let parallel_strategy = self.select_parallel_strategy(characteristics);
538 let cache_hints = self.generate_cache_hints(characteristics);
539
540 let confidence = self.calculate_confidence(operation_id, characteristics);
541
542 Ok(OptimizationStrategy {
543 preferred_algorithm,
544 optimization_level,
545 memory_layout,
546 parallel_strategy,
547 cache_hints,
548 confidence,
549 })
550 }
551
552 fn select_algorithm(
554 &self,
555 operation_id: &str,
556 characteristics: &DataCharacteristics,
557 ) -> SklResult<String> {
558 if let Ok(profiles) = self.profiles.read() {
559 if let Some(operation_profiles) = profiles.get(operation_id) {
560 let mut best_algorithm = "default".to_string();
562 let mut best_score = f64::INFINITY;
563
564 for profile in operation_profiles {
565 if self.characteristics_similar(&profile.data_characteristics, characteristics)
566 {
567 let score = profile.metrics.execution_time.as_secs_f64();
568 if score < best_score {
569 best_score = score;
570 best_algorithm = profile.algorithm_variant.clone();
571 }
572 }
573 }
574
575 return Ok(best_algorithm);
576 }
577 }
578
579 Ok(self.heuristic_algorithm_selection(characteristics))
581 }
582
583 fn characteristics_similar(&self, a: &DataCharacteristics, b: &DataCharacteristics) -> bool {
585 let size_ratio = (a.n_samples * a.n_features) as f64 / (b.n_samples * b.n_features) as f64;
586 let sparsity_diff = (a.sparsity() - b.sparsity()).abs();
587
588 (0.5..=2.0).contains(&size_ratio) && sparsity_diff < 0.3
589 }
590
591 fn heuristic_algorithm_selection(&self, characteristics: &DataCharacteristics) -> String {
593 let data_size = characteristics.n_samples * characteristics.n_features;
594
595 if characteristics.sparsity() > 0.7 {
596 "sparse_optimized".to_string()
597 } else if data_size < 10000 {
598 "small_data_optimized".to_string()
599 } else if data_size > 1_000_000 {
600 "large_data_optimized".to_string()
601 } else {
602 "general_purpose".to_string()
603 }
604 }
605
606 fn select_optimization_level(
608 &self,
609 characteristics: &DataCharacteristics,
610 ) -> OptimizationLevel {
611 let data_size = characteristics.n_samples * characteristics.n_features;
612
613 if data_size > 1_000_000 {
614 OptimizationLevel::Aggressive
615 } else if data_size > 100_000 {
616 OptimizationLevel::Advanced
617 } else if data_size > 10000 {
618 OptimizationLevel::Basic
619 } else {
620 OptimizationLevel::None
621 }
622 }
623
624 fn select_memory_layout(&self, characteristics: &DataCharacteristics) -> MemoryLayout {
626 if characteristics.n_features > characteristics.n_samples {
627 MemoryLayout::ColumnMajor
628 } else {
629 MemoryLayout::RowMajor
630 }
631 }
632
633 fn select_parallel_strategy(&self, characteristics: &DataCharacteristics) -> ParallelStrategy {
635 let data_size = characteristics.n_samples * characteristics.n_features;
636
637 if self
638 .hardware_context
639 .simd_features
640 .contains(&SimdFeature::AVX2)
641 && data_size > 100_000
642 {
643 ParallelStrategy::Hybrid
644 } else if self.hardware_context.cpu_cores > 1 && data_size > 50000 {
645 ParallelStrategy::ThreadParallel
646 } else if self.hardware_context.simd_features.len() > 2 {
647 ParallelStrategy::Vectorized
648 } else {
649 ParallelStrategy::Serial
650 }
651 }
652
653 fn generate_cache_hints(
655 &self,
656 characteristics: &DataCharacteristics,
657 ) -> CacheOptimizationHints {
658 let block_size = if self.hardware_context.cache_sizes.len() > 1 {
659 (self.hardware_context.cache_sizes[1] / characteristics.dtype_size).min(1024)
660 } else {
661 256
662 };
663
664 CacheOptimizationHints {
666 block_size,
667 use_prefetch: characteristics.n_samples > 10000,
668 access_pattern: if characteristics.cache_friendliness() > 0.7 {
669 AccessPattern::Sequential
670 } else {
671 AccessPattern::Blocked
672 },
673 cache_friendly_algorithms: characteristics.cache_friendliness() > 0.5,
674 }
675 }
676
677 fn calculate_confidence(
679 &self,
680 operation_id: &str,
681 characteristics: &DataCharacteristics,
682 ) -> f64 {
683 if let Ok(profiles) = self.profiles.read() {
684 if let Some(operation_profiles) = profiles.get(operation_id) {
685 let similar_profiles = operation_profiles
686 .iter()
687 .filter(|p| {
688 self.characteristics_similar(&p.data_characteristics, characteristics)
689 })
690 .count();
691
692 return (similar_profiles as f64 / 10.0).min(1.0);
693 }
694 }
695
696 0.1 }
698
699 fn optimize_strategy(&self, operation_id: &str) -> SklResult<()> {
701 if let Ok(profiles) = self.profiles.read() {
702 if let Some(operation_profiles) = profiles.get(operation_id) {
703 if operation_profiles.len() < self.config.min_profiles_for_optimization {
704 return Ok(());
705 }
706
707 let mut algorithm_performance: HashMap<String, Vec<f64>> = HashMap::new();
709
710 for profile in operation_profiles {
711 let score = profile.metrics.execution_time.as_secs_f64();
712 algorithm_performance
713 .entry(profile.algorithm_variant.clone())
714 .or_default()
715 .push(score);
716 }
717
718 let mut best_algorithm = "default".to_string();
720 let mut best_average = f64::INFINITY;
721
722 for (algorithm, scores) in &algorithm_performance {
723 if scores.len() >= 3 {
724 let average: f64 = scores.iter().sum::<f64>() / scores.len() as f64;
726 if average < best_average {
727 best_average = average;
728 best_algorithm = algorithm.clone();
729 }
730 }
731 }
732
733 if let Ok(mut strategies) = self.strategies.write() {
735 let strategy =
736 strategies
737 .entry(operation_id.to_string())
738 .or_insert_with(|| OptimizationStrategy {
739 preferred_algorithm: best_algorithm.clone(),
740 optimization_level: OptimizationLevel::Basic,
741 memory_layout: MemoryLayout::RowMajor,
742 parallel_strategy: ParallelStrategy::Serial,
743 cache_hints: CacheOptimizationHints {
744 block_size: 256,
745 use_prefetch: false,
746 access_pattern: AccessPattern::Sequential,
747 cache_friendly_algorithms: true,
748 },
749 confidence: 0.5,
750 });
751
752 strategy.preferred_algorithm = best_algorithm;
753 strategy.confidence = (algorithm_performance.len() as f64 / 5.0).min(1.0);
754 }
755 }
756 }
757
758 Ok(())
759 }
760
761 pub fn predict_performance(
763 &self,
764 operation_id: &str,
765 characteristics: &DataCharacteristics,
766 ) -> SklResult<ExecutionMetrics> {
767 if let Ok(predictors) = self.predictors.read() {
768 if let Some(predictor) = predictors.get(operation_id) {
769 let execution_time = predictor.predict_execution_time(characteristics)?;
770 let memory_usage = predictor.predict_memory_usage(characteristics)?;
771
772 return Ok(ExecutionMetrics {
773 execution_time,
774 cpu_time: execution_time,
775 memory_allocated: memory_usage,
776 peak_memory: memory_usage,
777 cache_misses: 0,
778 simd_operations: 0,
779 parallel_efficiency: 1.0,
780 memory_bandwidth: 0.5,
781 flops_per_second: 1e9,
782 });
783 }
784 }
785
786 let data_size = characteristics.n_samples * characteristics.n_features;
788 let estimated_time = Duration::from_millis((data_size / 10000).max(1) as u64);
789 let estimated_memory = data_size * characteristics.dtype_size;
790
791 Ok(ExecutionMetrics {
792 execution_time: estimated_time,
793 cpu_time: estimated_time,
794 memory_allocated: estimated_memory,
795 peak_memory: estimated_memory,
796 cache_misses: 0,
797 simd_operations: 0,
798 parallel_efficiency: 1.0,
799 memory_bandwidth: 0.5,
800 flops_per_second: 1e9,
801 })
802 }
803
804 #[must_use]
806 pub fn get_optimization_stats(&self) -> OptimizationStats {
807 let mut stats = OptimizationStats {
808 total_operations: 0,
809 optimized_operations: 0,
810 average_confidence: 0.0,
811 total_profiles: 0,
812 predictor_accuracy: 0.0,
813 };
814
815 if let Ok(profiles) = self.profiles.read() {
816 stats.total_operations = profiles.len();
817 stats.total_profiles = profiles.values().map(std::collections::VecDeque::len).sum();
818 }
819
820 if let Ok(strategies) = self.strategies.read() {
821 stats.optimized_operations = strategies.len();
822 stats.average_confidence = strategies.values().map(|s| s.confidence).sum::<f64>()
823 / strategies.len().max(1) as f64;
824 }
825
826 if let Ok(predictors) = self.predictors.read() {
827 stats.predictor_accuracy = predictors.values().map(|p| p.accuracy()).sum::<f64>()
828 / predictors.len().max(1) as f64;
829 }
830
831 stats
832 }
833}
834
835#[derive(Debug, Clone)]
837pub struct OptimizationStats {
838 pub total_operations: usize,
840 pub optimized_operations: usize,
842 pub average_confidence: f64,
844 pub total_profiles: usize,
846 pub predictor_accuracy: f64,
848}
849
850impl Default for OptimizerConfig {
851 fn default() -> Self {
852 Self {
853 max_profiles_per_operation: 1000,
854 min_profiles_for_optimization: 10,
855 confidence_threshold: 0.7,
856 improvement_threshold: 0.1,
857 adaptive_optimization: true,
858 profile_interval: Duration::from_secs(60),
859 }
860 }
861}
862
863#[allow(non_snake_case)]
864#[cfg(test)]
865mod tests {
866 use super::*;
867
868 #[test]
869 fn test_optimizer_creation() {
870 let config = OptimizerConfig::default();
871 let optimizer = ProfileGuidedOptimizer::new(config).unwrap();
872
873 let stats = optimizer.get_optimization_stats();
874 assert_eq!(stats.total_operations, 0);
875 assert_eq!(stats.optimized_operations, 0);
876 }
877
878 #[test]
879 fn test_data_characteristics() {
880 let mut characteristics = DataCharacteristics {
881 n_samples: 1000,
882 n_features: 50,
883 sparsity_scaled: 100, dtype_size: 8,
885 memory_layout: MemoryLayout::RowMajor,
886 cache_friendliness_scaled: 800, };
888
889 assert_eq!(characteristics.n_samples, 1000);
890 assert_eq!(characteristics.n_features, 50);
891 assert_eq!(characteristics.sparsity(), 0.1);
892 assert_eq!(characteristics.cache_friendliness(), 0.8);
893
894 characteristics.set_sparsity(0.5);
895 assert_eq!(characteristics.sparsity(), 0.5);
896 }
897
898 #[test]
899 fn test_performance_profile() {
900 let profile = PerformanceProfile {
901 operation_id: "test_op".to_string(),
902 data_characteristics: DataCharacteristics {
903 n_samples: 100,
904 n_features: 10,
905 sparsity_scaled: 0, dtype_size: 8,
907 memory_layout: MemoryLayout::RowMajor,
908 cache_friendliness_scaled: 1000, },
910 metrics: ExecutionMetrics {
911 execution_time: Duration::from_millis(100),
912 cpu_time: Duration::from_millis(100),
913 memory_allocated: 8000,
914 peak_memory: 8000,
915 cache_misses: 0,
916 simd_operations: 100,
917 parallel_efficiency: 1.0,
918 memory_bandwidth: 0.5,
919 flops_per_second: 1e6,
920 },
921 algorithm_variant: "test_algo".to_string(),
922 optimization_level: OptimizationLevel::Basic,
923 hardware_context: HardwareContext {
924 cpu_cores: 4,
925 cache_sizes: vec![32768, 262144],
926 simd_features: vec![SimdFeature::SSE2],
927 memory_bandwidth: 25.6,
928 cpu_frequency: 3000.0,
929 },
930 timestamp: Instant::now(),
931 };
932
933 assert_eq!(profile.operation_id, "test_op");
934 assert_eq!(profile.algorithm_variant, "test_algo");
935 }
936
937 #[test]
938 fn test_ml_predictor() {
939 let mut predictor = MLPerformancePredictor::new();
940 assert_eq!(predictor.accuracy(), 0.0);
941
942 let characteristics = DataCharacteristics {
943 n_samples: 100,
944 n_features: 10,
945 sparsity_scaled: 0, dtype_size: 8,
947 memory_layout: MemoryLayout::RowMajor,
948 cache_friendliness_scaled: 1000, };
950
951 let prediction = predictor.predict_execution_time(&characteristics).unwrap();
952 assert!(prediction.as_secs_f64() >= 0.0);
953 }
954
955 #[test]
956 fn test_optimization_strategy() {
957 let strategy = OptimizationStrategy {
958 preferred_algorithm: "test_algo".to_string(),
959 optimization_level: OptimizationLevel::Advanced,
960 memory_layout: MemoryLayout::ColumnMajor,
961 parallel_strategy: ParallelStrategy::Hybrid,
962 cache_hints: CacheOptimizationHints {
963 block_size: 512,
964 use_prefetch: true,
965 access_pattern: AccessPattern::Blocked,
966 cache_friendly_algorithms: true,
967 },
968 confidence: 0.9,
969 };
970
971 assert_eq!(strategy.preferred_algorithm, "test_algo");
972 assert_eq!(strategy.optimization_level, OptimizationLevel::Advanced);
973 assert_eq!(strategy.confidence, 0.9);
974 }
975
976 #[test]
977 fn test_simd_feature_detection() {
978 let features = ProfileGuidedOptimizer::detect_simd_features();
979 println!("Detected SIMD features: {:?}", features);
981 }
982}
983
984#[derive(Debug)]
986pub struct RuntimeOptimizer {
987 compiled_variants: Arc<RwLock<HashMap<String, CompiledVariant>>>,
989 compilation_stats: Arc<RwLock<CompilationStats>>,
991 config: RuntimeOptimizerConfig,
993}
994
995#[derive(Debug, Clone)]
997pub struct CompiledVariant {
998 pub variant_id: String,
1000 pub optimization_level: OptimizationLevel,
1002 pub target_features: Vec<SimdFeature>,
1004 pub compiled_at: Instant,
1006 pub performance_profile: Option<PerformanceProfile>,
1008 pub compilation_successful: bool,
1010}
1011
1012#[derive(Debug, Clone)]
1014pub struct RuntimeOptimizerConfig {
1015 pub enable_jit: bool,
1017 pub max_variants: usize,
1019 pub compilation_timeout: Duration,
1021 pub min_improvement: f64,
1023 pub enable_pgo_recompilation: bool,
1025}
1026
1027#[derive(Debug, Clone)]
1029pub struct CompilationStats {
1030 pub total_compilations: usize,
1032 pub successful_compilations: usize,
1034 pub total_compilation_time: Duration,
1036 pub average_compilation_time: Duration,
1038 pub cache_hits: usize,
1040 pub cache_misses: usize,
1042}
1043
1044impl RuntimeOptimizer {
1045 #[must_use]
1047 pub fn new(config: RuntimeOptimizerConfig) -> Self {
1048 Self {
1049 compiled_variants: Arc::new(RwLock::new(HashMap::new())),
1050 compilation_stats: Arc::new(RwLock::new(CompilationStats {
1051 total_compilations: 0,
1052 successful_compilations: 0,
1053 total_compilation_time: Duration::from_secs(0),
1054 average_compilation_time: Duration::from_secs(0),
1055 cache_hits: 0,
1056 cache_misses: 0,
1057 })),
1058 config,
1059 }
1060 }
1061
1062 pub fn get_optimized_variant(
1064 &self,
1065 operation_id: &str,
1066 characteristics: &DataCharacteristics,
1067 strategy: &OptimizationStrategy,
1068 ) -> SklResult<String> {
1069 let variant_key = self.generate_variant_key(operation_id, characteristics, strategy);
1070
1071 if let Ok(variants) = self.compiled_variants.read() {
1073 if let Some(variant) = variants.get(&variant_key) {
1074 if variant.compilation_successful {
1075 self.update_cache_stats(true);
1076 return Ok(variant.variant_id.clone());
1077 }
1078 }
1079 }
1080
1081 self.update_cache_stats(false);
1082
1083 if self.config.enable_jit {
1085 self.compile_variant(operation_id, characteristics, strategy)
1086 } else {
1087 Ok(strategy.preferred_algorithm.clone())
1088 }
1089 }
1090
1091 fn compile_variant(
1093 &self,
1094 operation_id: &str,
1095 characteristics: &DataCharacteristics,
1096 strategy: &OptimizationStrategy,
1097 ) -> SklResult<String> {
1098 let start_time = Instant::now();
1099 let variant_key = self.generate_variant_key(operation_id, characteristics, strategy);
1100
1101 let compilation_successful = self.simulate_compilation(strategy);
1103 let compilation_time = start_time.elapsed();
1104
1105 let variant = CompiledVariant {
1106 variant_id: format!("{}_{}", operation_id, compilation_time.as_nanos()),
1107 optimization_level: strategy.optimization_level,
1108 target_features: self.select_target_features(strategy),
1109 compiled_at: Instant::now(),
1110 performance_profile: None,
1111 compilation_successful,
1112 };
1113
1114 self.update_compilation_stats(compilation_time, compilation_successful);
1116
1117 if let Ok(mut variants) = self.compiled_variants.write() {
1119 if variants.len() >= self.config.max_variants {
1121 self.evict_old_variants(&mut variants);
1122 }
1123 variants.insert(variant_key, variant.clone());
1124 }
1125
1126 Ok(variant.variant_id)
1127 }
1128
1129 fn generate_variant_key(
1131 &self,
1132 operation_id: &str,
1133 characteristics: &DataCharacteristics,
1134 strategy: &OptimizationStrategy,
1135 ) -> String {
1136 use std::collections::hash_map::DefaultHasher;
1137 use std::hash::Hasher;
1138
1139 let mut hasher = DefaultHasher::new();
1140 operation_id.hash(&mut hasher);
1141 characteristics.hash(&mut hasher);
1142 format!("{:?}", strategy.optimization_level).hash(&mut hasher);
1143 format!("{:?}", strategy.parallel_strategy).hash(&mut hasher);
1144
1145 format!("{}_{:x}", operation_id, hasher.finish())
1146 }
1147
1148 fn simulate_compilation(&self, strategy: &OptimizationStrategy) -> bool {
1150 match strategy.optimization_level {
1152 OptimizationLevel::None => true,
1153 OptimizationLevel::Basic => thread_rng().gen::<f64>() > 0.1, OptimizationLevel::Advanced => thread_rng().gen::<f64>() > 0.2, OptimizationLevel::Aggressive => thread_rng().gen::<f64>() > 0.3, }
1157 }
1158
1159 fn select_target_features(&self, strategy: &OptimizationStrategy) -> Vec<SimdFeature> {
1161 let mut features = Vec::new();
1162
1163 match strategy.optimization_level {
1164 OptimizationLevel::None => {}
1165 OptimizationLevel::Basic => {
1166 features.push(SimdFeature::SSE2);
1167 }
1168 OptimizationLevel::Advanced => {
1169 features.extend_from_slice(&[SimdFeature::SSE2, SimdFeature::AVX]);
1170 }
1171 OptimizationLevel::Aggressive => {
1172 features.extend_from_slice(&[
1173 SimdFeature::SSE2,
1174 SimdFeature::AVX,
1175 SimdFeature::AVX2,
1176 SimdFeature::AVX512F,
1177 ]);
1178 }
1179 }
1180
1181 features
1182 }
1183
1184 fn update_cache_stats(&self, hit: bool) {
1186 if let Ok(mut stats) = self.compilation_stats.write() {
1187 if hit {
1188 stats.cache_hits += 1;
1189 } else {
1190 stats.cache_misses += 1;
1191 }
1192 }
1193 }
1194
1195 fn update_compilation_stats(&self, compilation_time: Duration, successful: bool) {
1197 if let Ok(mut stats) = self.compilation_stats.write() {
1198 stats.total_compilations += 1;
1199 if successful {
1200 stats.successful_compilations += 1;
1201 }
1202 stats.total_compilation_time += compilation_time;
1203 stats.average_compilation_time =
1204 stats.total_compilation_time / stats.total_compilations as u32;
1205 }
1206 }
1207
1208 fn evict_old_variants(&self, variants: &mut HashMap<String, CompiledVariant>) {
1210 if let Some((oldest_key, _)) = variants
1212 .iter()
1213 .min_by_key(|(_, variant)| variant.compiled_at)
1214 .map(|(k, v)| (k.clone(), v.clone()))
1215 {
1216 variants.remove(&oldest_key);
1217 }
1218 }
1219
1220 pub fn get_compilation_stats(&self) -> SklResult<CompilationStats> {
1222 self.compilation_stats
1223 .read()
1224 .map(|stats| stats.clone())
1225 .map_err(|_| SklearsError::InvalidInput("Failed to read compilation stats".to_string()))
1226 }
1227
1228 pub fn trigger_pgo_recompilation(
1230 &self,
1231 operation_id: &str,
1232 performance_profiles: &[PerformanceProfile],
1233 ) -> SklResult<()> {
1234 if !self.config.enable_pgo_recompilation {
1235 return Ok(());
1236 }
1237
1238 let avg_performance = performance_profiles
1240 .iter()
1241 .map(|p| p.metrics.execution_time.as_secs_f64())
1242 .sum::<f64>()
1243 / performance_profiles.len() as f64;
1244
1245 if let Ok(mut variants) = self.compiled_variants.write() {
1247 for (key, variant) in variants.iter_mut() {
1248 if key.starts_with(operation_id) {
1249 if let Some(ref profile) = variant.performance_profile {
1250 let improvement_potential =
1251 profile.metrics.execution_time.as_secs_f64() / avg_performance;
1252 if improvement_potential > (1.0 + self.config.min_improvement) {
1253 variant.compilation_successful = false;
1255 }
1256 }
1257 }
1258 }
1259 }
1260
1261 Ok(())
1262 }
1263}
1264
1265impl Default for RuntimeOptimizerConfig {
1266 fn default() -> Self {
1267 Self {
1268 enable_jit: true,
1269 max_variants: 100,
1270 compilation_timeout: Duration::from_secs(30),
1271 min_improvement: 0.1, enable_pgo_recompilation: true,
1273 }
1274 }
1275}
1276
1277#[derive(Debug)]
1279pub struct EnsemblePerformancePredictor {
1280 predictors: Vec<Box<dyn PerformancePredictor + Send + Sync>>,
1282 weights: Vec<f64>,
1284 ensemble_accuracy: f64,
1286}
1287
1288impl Default for EnsemblePerformancePredictor {
1289 fn default() -> Self {
1290 Self::new()
1291 }
1292}
1293
1294impl EnsemblePerformancePredictor {
1295 #[must_use]
1297 pub fn new() -> Self {
1298 let predictors: Vec<Box<dyn PerformancePredictor + Send + Sync>> = vec![
1299 Box::new(MLPerformancePredictor::new()),
1300 Box::new(HeuristicPredictor::new()),
1301 Box::new(PolynomialPredictor::new()),
1302 ];
1303
1304 let weights = vec![1.0 / predictors.len() as f64; predictors.len()];
1305
1306 Self {
1307 predictors,
1308 weights,
1309 ensemble_accuracy: 0.0,
1310 }
1311 }
1312
1313 fn update_weights(&mut self) {
1315 let total_accuracy: f64 = self.predictors.iter().map(|p| p.accuracy()).sum();
1316
1317 if total_accuracy > 0.0 {
1318 for (i, predictor) in self.predictors.iter().enumerate() {
1319 self.weights[i] = predictor.accuracy() / total_accuracy;
1320 }
1321 }
1322
1323 self.ensemble_accuracy = self
1325 .predictors
1326 .iter()
1327 .enumerate()
1328 .map(|(i, p)| p.accuracy() * self.weights[i])
1329 .sum();
1330 }
1331}
1332
1333impl PerformancePredictor for EnsemblePerformancePredictor {
1334 fn predict_execution_time(&self, characteristics: &DataCharacteristics) -> SklResult<Duration> {
1335 let mut weighted_prediction = 0.0;
1336
1337 for (i, predictor) in self.predictors.iter().enumerate() {
1338 let prediction = predictor
1339 .predict_execution_time(characteristics)?
1340 .as_secs_f64();
1341 weighted_prediction += prediction * self.weights[i];
1342 }
1343
1344 Ok(Duration::from_secs_f64(weighted_prediction.max(0.0)))
1345 }
1346
1347 fn predict_memory_usage(&self, characteristics: &DataCharacteristics) -> SklResult<usize> {
1348 let mut weighted_prediction = 0.0;
1349
1350 for (i, predictor) in self.predictors.iter().enumerate() {
1351 let prediction = predictor.predict_memory_usage(characteristics)? as f64;
1352 weighted_prediction += prediction * self.weights[i];
1353 }
1354
1355 Ok(weighted_prediction.max(0.0) as usize)
1356 }
1357
1358 fn update(&mut self, profile: &PerformanceProfile) -> SklResult<()> {
1359 for predictor in &mut self.predictors {
1360 predictor.update(profile)?;
1361 }
1362
1363 self.update_weights();
1364 Ok(())
1365 }
1366
1367 fn accuracy(&self) -> f64 {
1368 self.ensemble_accuracy
1369 }
1370}
1371
1372#[derive(Debug)]
1374pub struct HeuristicPredictor {
1375 accuracy: f64,
1376}
1377
1378impl Default for HeuristicPredictor {
1379 fn default() -> Self {
1380 Self::new()
1381 }
1382}
1383
1384impl HeuristicPredictor {
1385 #[must_use]
1386 pub fn new() -> Self {
1387 Self { accuracy: 0.6 } }
1389}
1390
1391impl PerformancePredictor for HeuristicPredictor {
1392 fn predict_execution_time(&self, characteristics: &DataCharacteristics) -> SklResult<Duration> {
1393 let base_time = (characteristics.n_samples * characteristics.n_features) as f64;
1394 let sparsity_factor = 1.0 - characteristics.sparsity() * 0.5;
1395 let cache_factor = 1.0 + (1.0 - characteristics.cache_friendliness()) * 0.3;
1396
1397 let estimated_time = base_time * sparsity_factor * cache_factor / 1e6; Ok(Duration::from_secs_f64(estimated_time.max(0.001)))
1399 }
1400
1401 fn predict_memory_usage(&self, characteristics: &DataCharacteristics) -> SklResult<usize> {
1402 let base_memory =
1403 characteristics.n_samples * characteristics.n_features * characteristics.dtype_size;
1404 let overhead = (base_memory as f64 * 0.2) as usize; Ok(base_memory + overhead)
1406 }
1407
1408 fn update(&mut self, _profile: &PerformanceProfile) -> SklResult<()> {
1409 Ok(())
1411 }
1412
1413 fn accuracy(&self) -> f64 {
1414 self.accuracy
1415 }
1416}
1417
1418#[derive(Debug)]
1420pub struct PolynomialPredictor {
1421 coefficients: Vec<f64>,
1422 accuracy: f64,
1423 training_data: Vec<(Vec<f64>, f64)>,
1424}
1425
1426impl Default for PolynomialPredictor {
1427 fn default() -> Self {
1428 Self::new()
1429 }
1430}
1431
1432impl PolynomialPredictor {
1433 #[must_use]
1434 pub fn new() -> Self {
1435 Self {
1436 coefficients: vec![1.0; 15], accuracy: 0.0,
1438 training_data: Vec::new(),
1439 }
1440 }
1441
1442 fn polynomial_features(&self, characteristics: &DataCharacteristics) -> Vec<f64> {
1443 let n_samples = characteristics.n_samples as f64;
1444 let n_features = characteristics.n_features as f64;
1445 let sparsity = characteristics.sparsity();
1446 let cache_friendliness = characteristics.cache_friendliness();
1447
1448 vec![
1449 1.0, n_samples,
1451 n_features,
1452 sparsity,
1453 cache_friendliness,
1454 n_samples * n_features, n_samples * sparsity,
1456 n_features * sparsity,
1457 n_samples * cache_friendliness,
1458 n_features * cache_friendliness,
1459 sparsity * cache_friendliness,
1460 n_samples.powi(2), n_features.powi(2),
1462 sparsity.powi(2),
1463 cache_friendliness.powi(2),
1464 ]
1465 }
1466}
1467
1468impl PerformancePredictor for PolynomialPredictor {
1469 fn predict_execution_time(&self, characteristics: &DataCharacteristics) -> SklResult<Duration> {
1470 let features = self.polynomial_features(characteristics);
1471 let prediction = features
1472 .iter()
1473 .zip(&self.coefficients)
1474 .map(|(f, c)| f * c)
1475 .sum::<f64>()
1476 .max(0.001);
1477
1478 Ok(Duration::from_secs_f64(prediction))
1479 }
1480
1481 fn predict_memory_usage(&self, characteristics: &DataCharacteristics) -> SklResult<usize> {
1482 let base_memory =
1483 characteristics.n_samples * characteristics.n_features * characteristics.dtype_size;
1484 Ok(base_memory)
1485 }
1486
1487 fn update(&mut self, profile: &PerformanceProfile) -> SklResult<()> {
1488 let features = self.polynomial_features(&profile.data_characteristics);
1489 let target = profile.metrics.execution_time.as_secs_f64();
1490
1491 self.training_data.push((features, target));
1492
1493 if self.training_data.len() % 20 == 0 {
1495 self.train_polynomial_regression()?;
1496 }
1497
1498 Ok(())
1499 }
1500
1501 fn accuracy(&self) -> f64 {
1502 self.accuracy
1503 }
1504}
1505
1506impl PolynomialPredictor {
1507 fn train_polynomial_regression(&mut self) -> SklResult<()> {
1508 if self.training_data.len() < 10 {
1509 return Ok(());
1510 }
1511
1512 let n = self.training_data.len();
1514 let p = self.coefficients.len();
1515
1516 let mut x_matrix = vec![vec![0.0; p]; n];
1518 let mut y_vector = vec![0.0; n];
1519
1520 for (i, (features, target)) in self.training_data.iter().enumerate() {
1521 for (j, &feature) in features.iter().enumerate() {
1522 x_matrix[i][j] = feature;
1523 }
1524 y_vector[i] = *target;
1525 }
1526
1527 let learning_rate = 0.0001;
1530 let epochs = 50;
1531
1532 for _ in 0..epochs {
1533 let mut gradients = vec![0.0; p];
1534 let mut total_error = 0.0;
1535
1536 for i in 0..n {
1537 let prediction: f64 = x_matrix[i]
1538 .iter()
1539 .zip(&self.coefficients)
1540 .map(|(x, c)| x * c)
1541 .sum();
1542
1543 let error = prediction - y_vector[i];
1544 total_error += error * error;
1545
1546 for j in 0..p {
1547 gradients[j] += error * x_matrix[i][j];
1548 }
1549 }
1550
1551 for (coeff, grad) in self.coefficients.iter_mut().zip(&gradients) {
1553 *coeff -= learning_rate * grad / n as f64;
1554 }
1555
1556 let mse = total_error / n as f64;
1558 self.accuracy = (1.0 / (1.0 + mse)).min(1.0);
1559 }
1560
1561 Ok(())
1562 }
1563}