sklears_compose/
profile_guided_optimization.rs

1//! Profile-guided optimization for performance-critical paths
2//!
3//! This module provides runtime profiling and optimization capabilities for ML pipelines.
4//! It collects performance data during execution and uses this information to optimize
5//! future pipeline runs through adaptive algorithm selection, memory layout optimization,
6//! and parallel execution strategies.
7
8use std::collections::{HashMap, VecDeque};
9use std::hash::Hash;
10use std::sync::{Arc, RwLock};
11use std::thread;
12use std::time::{Duration, Instant};
13
14use scirs2_core::random::{thread_rng, Rng};
15
16use sklears_core::error::{Result as SklResult, SklearsError};
17
18/// Performance profile data for a specific operation
19#[derive(Debug, Clone)]
20pub struct PerformanceProfile {
21    /// Operation identifier
22    pub operation_id: String,
23    /// Input data characteristics
24    pub data_characteristics: DataCharacteristics,
25    /// Execution metrics
26    pub metrics: ExecutionMetrics,
27    /// Algorithm variant used
28    pub algorithm_variant: String,
29    /// Optimization level applied
30    pub optimization_level: OptimizationLevel,
31    /// Hardware context
32    pub hardware_context: HardwareContext,
33    /// Timestamp of execution
34    pub timestamp: Instant,
35}
36
37/// Data characteristics that affect performance
38#[derive(Debug, Clone, PartialEq, Eq, Hash)]
39pub struct DataCharacteristics {
40    /// Number of samples
41    pub n_samples: usize,
42    /// Number of features
43    pub n_features: usize,
44    /// Data sparsity (scaled by 1000, so 0 = dense, 1000 = fully sparse)
45    pub sparsity_scaled: u32,
46    /// Data type size in bytes
47    pub dtype_size: usize,
48    /// Memory layout (row-major, column-major)
49    pub memory_layout: MemoryLayout,
50    /// Cache friendliness score (scaled by 1000, so 0 = poor, 1000 = excellent)
51    pub cache_friendliness_scaled: u32,
52}
53
54impl DataCharacteristics {
55    /// Get sparsity as f64
56    #[must_use]
57    pub fn sparsity(&self) -> f64 {
58        f64::from(self.sparsity_scaled) / 1000.0
59    }
60
61    /// Set sparsity from f64
62    pub fn set_sparsity(&mut self, sparsity: f64) {
63        self.sparsity_scaled = (sparsity * 1000.0).round() as u32;
64    }
65
66    /// Get cache friendliness as f64
67    #[must_use]
68    pub fn cache_friendliness(&self) -> f64 {
69        f64::from(self.cache_friendliness_scaled) / 1000.0
70    }
71
72    /// Set cache friendliness from f64
73    pub fn set_cache_friendliness(&mut self, cache_friendliness: f64) {
74        self.cache_friendliness_scaled = (cache_friendliness * 1000.0).round() as u32;
75    }
76}
77
78/// Memory layout patterns
79#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
80pub enum MemoryLayout {
81    /// RowMajor
82    RowMajor,
83    /// ColumnMajor
84    ColumnMajor,
85    /// Interleaved
86    Interleaved,
87    /// Custom
88    Custom,
89}
90
91/// Execution metrics collected during profiling
92#[derive(Debug, Clone)]
93pub struct ExecutionMetrics {
94    /// Total execution time
95    pub execution_time: Duration,
96    /// CPU time used
97    pub cpu_time: Duration,
98    /// Memory allocated in bytes
99    pub memory_allocated: usize,
100    /// Peak memory usage in bytes
101    pub peak_memory: usize,
102    /// Number of cache misses (estimated)
103    pub cache_misses: usize,
104    /// Number of SIMD operations executed
105    pub simd_operations: usize,
106    /// Parallel efficiency (0.0 = serial, 1.0 = perfect parallel)
107    pub parallel_efficiency: f64,
108    /// Memory bandwidth utilization (0.0 = none, 1.0 = saturated)
109    pub memory_bandwidth: f64,
110    /// FLOPs per second
111    pub flops_per_second: f64,
112}
113
114/// Optimization levels for algorithms
115#[derive(Debug, Clone, Copy, PartialEq)]
116pub enum OptimizationLevel {
117    /// No optimizations
118    None,
119    /// Basic optimizations (vectorization, basic parallelism)
120    Basic,
121    /// Advanced optimizations (SIMD, cache optimization)
122    Advanced,
123    /// Aggressive optimizations (unsafe code, platform-specific)
124    Aggressive,
125}
126
127/// Hardware context information
128#[derive(Debug, Clone)]
129pub struct HardwareContext {
130    /// Number of CPU cores
131    pub cpu_cores: usize,
132    /// CPU cache sizes in bytes (L1, L2, L3)
133    pub cache_sizes: Vec<usize>,
134    /// Available SIMD instruction sets
135    pub simd_features: Vec<SimdFeature>,
136    /// Memory bandwidth in GB/s
137    pub memory_bandwidth: f64,
138    /// CPU frequency in MHz
139    pub cpu_frequency: f64,
140}
141
142/// SIMD instruction set features
143#[derive(Debug, Clone, Copy, PartialEq)]
144pub enum SimdFeature {
145    /// SSE
146    SSE,
147    /// SSE2
148    SSE2,
149    /// SSE3
150    SSE3,
151    /// SSE4_1
152    SSE4_1,
153    /// SSE4_2
154    SSE4_2,
155    /// AVX
156    AVX,
157    /// AVX2
158    AVX2,
159    /// AVX512F
160    AVX512F,
161    /// NEON
162    NEON,
163}
164
165/// Profile-guided optimization engine
166#[derive(Debug)]
167pub struct ProfileGuidedOptimizer {
168    /// Historical performance profiles
169    profiles: Arc<RwLock<HashMap<String, VecDeque<PerformanceProfile>>>>,
170    /// Current optimization strategies
171    strategies: Arc<RwLock<HashMap<String, OptimizationStrategy>>>,
172    /// Algorithm selection cache
173    algorithm_cache: Arc<RwLock<HashMap<DataCharacteristics, String>>>,
174    /// Performance predictors
175    predictors: Arc<RwLock<HashMap<String, Box<dyn PerformancePredictor + Send + Sync>>>>,
176    /// Configuration
177    config: OptimizerConfig,
178    /// Hardware context
179    hardware_context: HardwareContext,
180}
181
182/// Optimization strategy for a specific operation
183#[derive(Debug, Clone)]
184pub struct OptimizationStrategy {
185    /// Preferred algorithm variant
186    pub preferred_algorithm: String,
187    /// Optimization level to use
188    pub optimization_level: OptimizationLevel,
189    /// Memory layout preference
190    pub memory_layout: MemoryLayout,
191    /// Parallel execution strategy
192    pub parallel_strategy: ParallelStrategy,
193    /// Cache optimization hints
194    pub cache_hints: CacheOptimizationHints,
195    /// Confidence score (0.0 = low, 1.0 = high)
196    pub confidence: f64,
197}
198
199/// Parallel execution strategies
200#[derive(Debug, Clone, Copy, PartialEq)]
201pub enum ParallelStrategy {
202    Serial,
203    ThreadParallel,
204    Vectorized,
205    Hybrid,
206    GPU,
207}
208
209/// Cache optimization hints
210#[derive(Debug, Clone)]
211pub struct CacheOptimizationHints {
212    /// Preferred block size for tiling
213    pub block_size: usize,
214    /// Whether to use prefetching
215    pub use_prefetch: bool,
216    /// Memory access pattern optimization
217    pub access_pattern: AccessPattern,
218    /// Cache-friendly algorithms preference
219    pub cache_friendly_algorithms: bool,
220}
221
222/// Memory access patterns
223#[derive(Debug, Clone, Copy, PartialEq)]
224pub enum AccessPattern {
225    Sequential,
226    Random,
227    Strided,
228    Blocked,
229}
230
231/// Optimizer configuration
232#[derive(Debug, Clone)]
233pub struct OptimizerConfig {
234    /// Maximum number of profiles to keep per operation
235    pub max_profiles_per_operation: usize,
236    /// Minimum number of profiles before optimization
237    pub min_profiles_for_optimization: usize,
238    /// Confidence threshold for strategy changes
239    pub confidence_threshold: f64,
240    /// Performance improvement threshold
241    pub improvement_threshold: f64,
242    /// Enable adaptive optimization
243    pub adaptive_optimization: bool,
244    /// Profile collection interval
245    pub profile_interval: Duration,
246}
247
248/// Trait for performance predictors
249pub trait PerformancePredictor: Send + Sync + std::fmt::Debug {
250    /// Predict execution time for given characteristics
251    fn predict_execution_time(&self, characteristics: &DataCharacteristics) -> SklResult<Duration>;
252
253    /// Predict memory usage
254    fn predict_memory_usage(&self, characteristics: &DataCharacteristics) -> SklResult<usize>;
255
256    /// Update predictor with new performance data
257    fn update(&mut self, profile: &PerformanceProfile) -> SklResult<()>;
258
259    /// Get predictor accuracy
260    fn accuracy(&self) -> f64;
261}
262
263/// Machine learning-based performance predictor
264#[derive(Debug)]
265pub struct MLPerformancePredictor {
266    /// Training data
267    training_data: Vec<(DataCharacteristics, ExecutionMetrics)>,
268    /// Model parameters (simplified linear model)
269    weights: Vec<f64>,
270    /// Prediction accuracy
271    accuracy: f64,
272    /// Number of training samples
273    training_samples: usize,
274}
275
276impl MLPerformancePredictor {
277    /// Create a new ML-based predictor
278    #[must_use]
279    pub fn new() -> Self {
280        Self {
281            training_data: Vec::new(),
282            weights: vec![1.0; 10], // Simplified feature count
283            accuracy: 0.0,
284            training_samples: 0,
285        }
286    }
287
288    /// Extract features from data characteristics
289    fn extract_features(&self, characteristics: &DataCharacteristics) -> Vec<f64> {
290        vec![
291            characteristics.n_samples as f64,
292            characteristics.n_features as f64,
293            characteristics.sparsity(),
294            characteristics.dtype_size as f64,
295            characteristics.cache_friendliness(),
296            (characteristics.n_samples * characteristics.n_features) as f64, // Data size
297            (characteristics.n_samples as f64).log2(),
298            (characteristics.n_features as f64).log2(),
299            characteristics.sparsity() * characteristics.n_features as f64,
300            characteristics.cache_friendliness() * characteristics.n_samples as f64,
301        ]
302    }
303
304    /// Train the predictor using linear regression
305    fn train(&mut self) -> SklResult<()> {
306        if self.training_data.len() < 10 {
307            return Ok(()); // Not enough data
308        }
309
310        // Simple gradient descent for linear regression
311        let learning_rate = 0.001;
312        let epochs = 100;
313
314        for _ in 0..epochs {
315            let mut gradients = vec![0.0; self.weights.len()];
316            let mut total_error = 0.0;
317
318            for (characteristics, metrics) in &self.training_data {
319                let features = self.extract_features(characteristics);
320                let predicted = features
321                    .iter()
322                    .zip(&self.weights)
323                    .map(|(f, w)| f * w)
324                    .sum::<f64>();
325
326                let actual = metrics.execution_time.as_secs_f64();
327                let error = predicted - actual;
328                total_error += error * error;
329
330                for (i, feature) in features.iter().enumerate() {
331                    gradients[i] += error * feature;
332                }
333            }
334
335            // Update weights
336            for (weight, gradient) in self.weights.iter_mut().zip(&gradients) {
337                *weight -= learning_rate * gradient / self.training_data.len() as f64;
338            }
339
340            // Update accuracy (simplified R²)
341            let mse = total_error / self.training_data.len() as f64;
342            self.accuracy = (1.0 - mse).max(0.0).min(1.0);
343        }
344
345        Ok(())
346    }
347}
348
349impl PerformancePredictor for MLPerformancePredictor {
350    fn predict_execution_time(&self, characteristics: &DataCharacteristics) -> SklResult<Duration> {
351        let features = self.extract_features(characteristics);
352        let prediction = features
353            .iter()
354            .zip(&self.weights)
355            .map(|(f, w)| f * w)
356            .sum::<f64>()
357            .max(0.0);
358
359        Ok(Duration::from_secs_f64(prediction))
360    }
361
362    fn predict_memory_usage(&self, characteristics: &DataCharacteristics) -> SklResult<usize> {
363        // Simplified memory prediction
364        let base_memory =
365            characteristics.n_samples * characteristics.n_features * characteristics.dtype_size;
366        let overhead_factor = 1.0 + (1.0 - characteristics.sparsity()) * 0.5;
367        Ok((base_memory as f64 * overhead_factor) as usize)
368    }
369
370    fn update(&mut self, profile: &PerformanceProfile) -> SklResult<()> {
371        self.training_data.push((
372            profile.data_characteristics.clone(),
373            profile.metrics.clone(),
374        ));
375
376        self.training_samples += 1;
377
378        // Retrain periodically
379        if self.training_samples % 50 == 0 {
380            self.train()?;
381        }
382
383        Ok(())
384    }
385
386    fn accuracy(&self) -> f64 {
387        self.accuracy
388    }
389}
390
391impl Default for MLPerformancePredictor {
392    fn default() -> Self {
393        Self::new()
394    }
395}
396
397impl ProfileGuidedOptimizer {
398    /// Create a new profile-guided optimizer
399    pub fn new(config: OptimizerConfig) -> SklResult<Self> {
400        let hardware_context = Self::detect_hardware_context();
401
402        Ok(Self {
403            profiles: Arc::new(RwLock::new(HashMap::new())),
404            strategies: Arc::new(RwLock::new(HashMap::new())),
405            algorithm_cache: Arc::new(RwLock::new(HashMap::new())),
406            predictors: Arc::new(RwLock::new(HashMap::new())),
407            config,
408            hardware_context,
409        })
410    }
411
412    /// Detect hardware context
413    fn detect_hardware_context() -> HardwareContext {
414        let cpu_cores = thread::available_parallelism()
415            .map(std::num::NonZero::get)
416            .unwrap_or(1);
417
418        // Simplified hardware detection
419        /// HardwareContext
420        HardwareContext {
421            cpu_cores,
422            cache_sizes: vec![32768, 262_144, 8_388_608], // Typical L1, L2, L3
423            simd_features: Self::detect_simd_features(),
424            memory_bandwidth: 25.6, // Typical DDR4
425            cpu_frequency: 3000.0,  // 3 GHz typical
426        }
427    }
428
429    /// Detect available SIMD features
430    fn detect_simd_features() -> Vec<SimdFeature> {
431        let mut features = Vec::new();
432
433        #[cfg(target_arch = "x86_64")]
434        {
435            if is_x86_feature_detected!("sse") {
436                features.push(SimdFeature::SSE);
437            }
438            if is_x86_feature_detected!("sse2") {
439                features.push(SimdFeature::SSE2);
440            }
441            if is_x86_feature_detected!("sse3") {
442                features.push(SimdFeature::SSE3);
443            }
444            if is_x86_feature_detected!("sse4.1") {
445                features.push(SimdFeature::SSE4_1);
446            }
447            if is_x86_feature_detected!("sse4.2") {
448                features.push(SimdFeature::SSE4_2);
449            }
450            if is_x86_feature_detected!("avx") {
451                features.push(SimdFeature::AVX);
452            }
453            if is_x86_feature_detected!("avx2") {
454                features.push(SimdFeature::AVX2);
455            }
456            if is_x86_feature_detected!("avx512f") {
457                features.push(SimdFeature::AVX512F);
458            }
459        }
460
461        #[cfg(target_arch = "aarch64")]
462        {
463            features.push(SimdFeature::NEON);
464        }
465
466        features
467    }
468
469    /// Add a performance profile
470    pub fn add_profile(&self, profile: PerformanceProfile) -> SklResult<()> {
471        let mut profiles = self.profiles.write().map_err(|_| {
472            SklearsError::InvalidInput("Failed to acquire profiles lock".to_string())
473        })?;
474
475        let operation_profiles = profiles
476            .entry(profile.operation_id.clone())
477            .or_insert_with(VecDeque::new);
478
479        operation_profiles.push_back(profile.clone());
480
481        // Limit the number of profiles per operation
482        while operation_profiles.len() > self.config.max_profiles_per_operation {
483            operation_profiles.pop_front();
484        }
485
486        // Update predictors
487        if let Ok(mut predictors) = self.predictors.write() {
488            if let Some(predictor) = predictors.get_mut(&profile.operation_id) {
489                let _ = predictor.update(&profile);
490            } else {
491                let mut new_predictor = Box::new(MLPerformancePredictor::new());
492                let _ = new_predictor.update(&profile);
493                predictors.insert(profile.operation_id.clone(), new_predictor);
494            }
495        }
496
497        // Trigger optimization if we have enough profiles
498        if operation_profiles.len() >= self.config.min_profiles_for_optimization {
499            self.optimize_strategy(&profile.operation_id)?;
500        }
501
502        Ok(())
503    }
504
505    /// Get optimization strategy for an operation
506    pub fn get_strategy(
507        &self,
508        operation_id: &str,
509        characteristics: &DataCharacteristics,
510    ) -> SklResult<OptimizationStrategy> {
511        // Check cache first
512        if let Ok(cache) = self.algorithm_cache.read() {
513            if let Some(cached_algorithm) = cache.get(characteristics) {
514                if let Ok(strategies) = self.strategies.read() {
515                    if let Some(strategy) = strategies.get(operation_id) {
516                        let mut cached_strategy = strategy.clone();
517                        cached_strategy.preferred_algorithm = cached_algorithm.clone();
518                        return Ok(cached_strategy);
519                    }
520                }
521            }
522        }
523
524        // Generate strategy based on data characteristics and hardware
525        self.generate_strategy(operation_id, characteristics)
526    }
527
528    /// Generate optimization strategy
529    fn generate_strategy(
530        &self,
531        operation_id: &str,
532        characteristics: &DataCharacteristics,
533    ) -> SklResult<OptimizationStrategy> {
534        let preferred_algorithm = self.select_algorithm(operation_id, characteristics)?;
535        let optimization_level = self.select_optimization_level(characteristics);
536        let memory_layout = self.select_memory_layout(characteristics);
537        let parallel_strategy = self.select_parallel_strategy(characteristics);
538        let cache_hints = self.generate_cache_hints(characteristics);
539
540        let confidence = self.calculate_confidence(operation_id, characteristics);
541
542        Ok(OptimizationStrategy {
543            preferred_algorithm,
544            optimization_level,
545            memory_layout,
546            parallel_strategy,
547            cache_hints,
548            confidence,
549        })
550    }
551
552    /// Select best algorithm based on characteristics
553    fn select_algorithm(
554        &self,
555        operation_id: &str,
556        characteristics: &DataCharacteristics,
557    ) -> SklResult<String> {
558        if let Ok(profiles) = self.profiles.read() {
559            if let Some(operation_profiles) = profiles.get(operation_id) {
560                // Find best performing algorithm for similar data characteristics
561                let mut best_algorithm = "default".to_string();
562                let mut best_score = f64::INFINITY;
563
564                for profile in operation_profiles {
565                    if self.characteristics_similar(&profile.data_characteristics, characteristics)
566                    {
567                        let score = profile.metrics.execution_time.as_secs_f64();
568                        if score < best_score {
569                            best_score = score;
570                            best_algorithm = profile.algorithm_variant.clone();
571                        }
572                    }
573                }
574
575                return Ok(best_algorithm);
576            }
577        }
578
579        // Fallback to heuristic selection
580        Ok(self.heuristic_algorithm_selection(characteristics))
581    }
582
583    /// Check if data characteristics are similar
584    fn characteristics_similar(&self, a: &DataCharacteristics, b: &DataCharacteristics) -> bool {
585        let size_ratio = (a.n_samples * a.n_features) as f64 / (b.n_samples * b.n_features) as f64;
586        let sparsity_diff = (a.sparsity() - b.sparsity()).abs();
587
588        (0.5..=2.0).contains(&size_ratio) && sparsity_diff < 0.3
589    }
590
591    /// Heuristic algorithm selection
592    fn heuristic_algorithm_selection(&self, characteristics: &DataCharacteristics) -> String {
593        let data_size = characteristics.n_samples * characteristics.n_features;
594
595        if characteristics.sparsity() > 0.7 {
596            "sparse_optimized".to_string()
597        } else if data_size < 10000 {
598            "small_data_optimized".to_string()
599        } else if data_size > 1_000_000 {
600            "large_data_optimized".to_string()
601        } else {
602            "general_purpose".to_string()
603        }
604    }
605
606    /// Select optimization level
607    fn select_optimization_level(
608        &self,
609        characteristics: &DataCharacteristics,
610    ) -> OptimizationLevel {
611        let data_size = characteristics.n_samples * characteristics.n_features;
612
613        if data_size > 1_000_000 {
614            OptimizationLevel::Aggressive
615        } else if data_size > 100_000 {
616            OptimizationLevel::Advanced
617        } else if data_size > 10000 {
618            OptimizationLevel::Basic
619        } else {
620            OptimizationLevel::None
621        }
622    }
623
624    /// Select memory layout
625    fn select_memory_layout(&self, characteristics: &DataCharacteristics) -> MemoryLayout {
626        if characteristics.n_features > characteristics.n_samples {
627            MemoryLayout::ColumnMajor
628        } else {
629            MemoryLayout::RowMajor
630        }
631    }
632
633    /// Select parallel strategy
634    fn select_parallel_strategy(&self, characteristics: &DataCharacteristics) -> ParallelStrategy {
635        let data_size = characteristics.n_samples * characteristics.n_features;
636
637        if self
638            .hardware_context
639            .simd_features
640            .contains(&SimdFeature::AVX2)
641            && data_size > 100_000
642        {
643            ParallelStrategy::Hybrid
644        } else if self.hardware_context.cpu_cores > 1 && data_size > 50000 {
645            ParallelStrategy::ThreadParallel
646        } else if self.hardware_context.simd_features.len() > 2 {
647            ParallelStrategy::Vectorized
648        } else {
649            ParallelStrategy::Serial
650        }
651    }
652
653    /// Generate cache optimization hints
654    fn generate_cache_hints(
655        &self,
656        characteristics: &DataCharacteristics,
657    ) -> CacheOptimizationHints {
658        let block_size = if self.hardware_context.cache_sizes.len() > 1 {
659            (self.hardware_context.cache_sizes[1] / characteristics.dtype_size).min(1024)
660        } else {
661            256
662        };
663
664        /// CacheOptimizationHints
665        CacheOptimizationHints {
666            block_size,
667            use_prefetch: characteristics.n_samples > 10000,
668            access_pattern: if characteristics.cache_friendliness() > 0.7 {
669                AccessPattern::Sequential
670            } else {
671                AccessPattern::Blocked
672            },
673            cache_friendly_algorithms: characteristics.cache_friendliness() > 0.5,
674        }
675    }
676
677    /// Calculate confidence in strategy
678    fn calculate_confidence(
679        &self,
680        operation_id: &str,
681        characteristics: &DataCharacteristics,
682    ) -> f64 {
683        if let Ok(profiles) = self.profiles.read() {
684            if let Some(operation_profiles) = profiles.get(operation_id) {
685                let similar_profiles = operation_profiles
686                    .iter()
687                    .filter(|p| {
688                        self.characteristics_similar(&p.data_characteristics, characteristics)
689                    })
690                    .count();
691
692                return (similar_profiles as f64 / 10.0).min(1.0);
693            }
694        }
695
696        0.1 // Low confidence for new operations
697    }
698
699    /// Optimize strategy for an operation
700    fn optimize_strategy(&self, operation_id: &str) -> SklResult<()> {
701        if let Ok(profiles) = self.profiles.read() {
702            if let Some(operation_profiles) = profiles.get(operation_id) {
703                if operation_profiles.len() < self.config.min_profiles_for_optimization {
704                    return Ok(());
705                }
706
707                // Analyze performance trends
708                let mut algorithm_performance: HashMap<String, Vec<f64>> = HashMap::new();
709
710                for profile in operation_profiles {
711                    let score = profile.metrics.execution_time.as_secs_f64();
712                    algorithm_performance
713                        .entry(profile.algorithm_variant.clone())
714                        .or_default()
715                        .push(score);
716                }
717
718                // Find best performing algorithm
719                let mut best_algorithm = "default".to_string();
720                let mut best_average = f64::INFINITY;
721
722                for (algorithm, scores) in &algorithm_performance {
723                    if scores.len() >= 3 {
724                        // Minimum samples for reliability
725                        let average: f64 = scores.iter().sum::<f64>() / scores.len() as f64;
726                        if average < best_average {
727                            best_average = average;
728                            best_algorithm = algorithm.clone();
729                        }
730                    }
731                }
732
733                // Update strategy
734                if let Ok(mut strategies) = self.strategies.write() {
735                    let strategy =
736                        strategies
737                            .entry(operation_id.to_string())
738                            .or_insert_with(|| OptimizationStrategy {
739                                preferred_algorithm: best_algorithm.clone(),
740                                optimization_level: OptimizationLevel::Basic,
741                                memory_layout: MemoryLayout::RowMajor,
742                                parallel_strategy: ParallelStrategy::Serial,
743                                cache_hints: CacheOptimizationHints {
744                                    block_size: 256,
745                                    use_prefetch: false,
746                                    access_pattern: AccessPattern::Sequential,
747                                    cache_friendly_algorithms: true,
748                                },
749                                confidence: 0.5,
750                            });
751
752                    strategy.preferred_algorithm = best_algorithm;
753                    strategy.confidence = (algorithm_performance.len() as f64 / 5.0).min(1.0);
754                }
755            }
756        }
757
758        Ok(())
759    }
760
761    /// Predict performance for given characteristics
762    pub fn predict_performance(
763        &self,
764        operation_id: &str,
765        characteristics: &DataCharacteristics,
766    ) -> SklResult<ExecutionMetrics> {
767        if let Ok(predictors) = self.predictors.read() {
768            if let Some(predictor) = predictors.get(operation_id) {
769                let execution_time = predictor.predict_execution_time(characteristics)?;
770                let memory_usage = predictor.predict_memory_usage(characteristics)?;
771
772                return Ok(ExecutionMetrics {
773                    execution_time,
774                    cpu_time: execution_time,
775                    memory_allocated: memory_usage,
776                    peak_memory: memory_usage,
777                    cache_misses: 0,
778                    simd_operations: 0,
779                    parallel_efficiency: 1.0,
780                    memory_bandwidth: 0.5,
781                    flops_per_second: 1e9,
782                });
783            }
784        }
785
786        // Fallback estimation
787        let data_size = characteristics.n_samples * characteristics.n_features;
788        let estimated_time = Duration::from_millis((data_size / 10000).max(1) as u64);
789        let estimated_memory = data_size * characteristics.dtype_size;
790
791        Ok(ExecutionMetrics {
792            execution_time: estimated_time,
793            cpu_time: estimated_time,
794            memory_allocated: estimated_memory,
795            peak_memory: estimated_memory,
796            cache_misses: 0,
797            simd_operations: 0,
798            parallel_efficiency: 1.0,
799            memory_bandwidth: 0.5,
800            flops_per_second: 1e9,
801        })
802    }
803
804    /// Get optimization statistics
805    #[must_use]
806    pub fn get_optimization_stats(&self) -> OptimizationStats {
807        let mut stats = OptimizationStats {
808            total_operations: 0,
809            optimized_operations: 0,
810            average_confidence: 0.0,
811            total_profiles: 0,
812            predictor_accuracy: 0.0,
813        };
814
815        if let Ok(profiles) = self.profiles.read() {
816            stats.total_operations = profiles.len();
817            stats.total_profiles = profiles.values().map(std::collections::VecDeque::len).sum();
818        }
819
820        if let Ok(strategies) = self.strategies.read() {
821            stats.optimized_operations = strategies.len();
822            stats.average_confidence = strategies.values().map(|s| s.confidence).sum::<f64>()
823                / strategies.len().max(1) as f64;
824        }
825
826        if let Ok(predictors) = self.predictors.read() {
827            stats.predictor_accuracy = predictors.values().map(|p| p.accuracy()).sum::<f64>()
828                / predictors.len().max(1) as f64;
829        }
830
831        stats
832    }
833}
834
835/// Optimization statistics
836#[derive(Debug, Clone)]
837pub struct OptimizationStats {
838    /// Total number of operations tracked
839    pub total_operations: usize,
840    /// Number of operations with optimized strategies
841    pub optimized_operations: usize,
842    /// Average confidence across all strategies
843    pub average_confidence: f64,
844    /// Total number of performance profiles
845    pub total_profiles: usize,
846    /// Average predictor accuracy
847    pub predictor_accuracy: f64,
848}
849
850impl Default for OptimizerConfig {
851    fn default() -> Self {
852        Self {
853            max_profiles_per_operation: 1000,
854            min_profiles_for_optimization: 10,
855            confidence_threshold: 0.7,
856            improvement_threshold: 0.1,
857            adaptive_optimization: true,
858            profile_interval: Duration::from_secs(60),
859        }
860    }
861}
862
863#[allow(non_snake_case)]
864#[cfg(test)]
865mod tests {
866    use super::*;
867
868    #[test]
869    fn test_optimizer_creation() {
870        let config = OptimizerConfig::default();
871        let optimizer = ProfileGuidedOptimizer::new(config).expect("operation should succeed");
872
873        let stats = optimizer.get_optimization_stats();
874        assert_eq!(stats.total_operations, 0);
875        assert_eq!(stats.optimized_operations, 0);
876    }
877
878    #[test]
879    fn test_data_characteristics() {
880        let mut characteristics = DataCharacteristics {
881            n_samples: 1000,
882            n_features: 50,
883            sparsity_scaled: 100, // 0.1 * 1000
884            dtype_size: 8,
885            memory_layout: MemoryLayout::RowMajor,
886            cache_friendliness_scaled: 800, // 0.8 * 1000
887        };
888
889        assert_eq!(characteristics.n_samples, 1000);
890        assert_eq!(characteristics.n_features, 50);
891        assert_eq!(characteristics.sparsity(), 0.1);
892        assert_eq!(characteristics.cache_friendliness(), 0.8);
893
894        characteristics.set_sparsity(0.5);
895        assert_eq!(characteristics.sparsity(), 0.5);
896    }
897
898    #[test]
899    fn test_performance_profile() {
900        let profile = PerformanceProfile {
901            operation_id: "test_op".to_string(),
902            data_characteristics: DataCharacteristics {
903                n_samples: 100,
904                n_features: 10,
905                sparsity_scaled: 0, // 0.0 * 1000
906                dtype_size: 8,
907                memory_layout: MemoryLayout::RowMajor,
908                cache_friendliness_scaled: 1000, // 1.0 * 1000
909            },
910            metrics: ExecutionMetrics {
911                execution_time: Duration::from_millis(100),
912                cpu_time: Duration::from_millis(100),
913                memory_allocated: 8000,
914                peak_memory: 8000,
915                cache_misses: 0,
916                simd_operations: 100,
917                parallel_efficiency: 1.0,
918                memory_bandwidth: 0.5,
919                flops_per_second: 1e6,
920            },
921            algorithm_variant: "test_algo".to_string(),
922            optimization_level: OptimizationLevel::Basic,
923            hardware_context: HardwareContext {
924                cpu_cores: 4,
925                cache_sizes: vec![32768, 262144],
926                simd_features: vec![SimdFeature::SSE2],
927                memory_bandwidth: 25.6,
928                cpu_frequency: 3000.0,
929            },
930            timestamp: Instant::now(),
931        };
932
933        assert_eq!(profile.operation_id, "test_op");
934        assert_eq!(profile.algorithm_variant, "test_algo");
935    }
936
937    #[test]
938    fn test_ml_predictor() {
939        let mut predictor = MLPerformancePredictor::new();
940        assert_eq!(predictor.accuracy(), 0.0);
941
942        let characteristics = DataCharacteristics {
943            n_samples: 100,
944            n_features: 10,
945            sparsity_scaled: 0, // 0.0 * 1000
946            dtype_size: 8,
947            memory_layout: MemoryLayout::RowMajor,
948            cache_friendliness_scaled: 1000, // 1.0 * 1000
949        };
950
951        let prediction = predictor
952            .predict_execution_time(&characteristics)
953            .unwrap_or_default();
954        assert!(prediction.as_secs_f64() >= 0.0);
955    }
956
957    #[test]
958    fn test_optimization_strategy() {
959        let strategy = OptimizationStrategy {
960            preferred_algorithm: "test_algo".to_string(),
961            optimization_level: OptimizationLevel::Advanced,
962            memory_layout: MemoryLayout::ColumnMajor,
963            parallel_strategy: ParallelStrategy::Hybrid,
964            cache_hints: CacheOptimizationHints {
965                block_size: 512,
966                use_prefetch: true,
967                access_pattern: AccessPattern::Blocked,
968                cache_friendly_algorithms: true,
969            },
970            confidence: 0.9,
971        };
972
973        assert_eq!(strategy.preferred_algorithm, "test_algo");
974        assert_eq!(strategy.optimization_level, OptimizationLevel::Advanced);
975        assert_eq!(strategy.confidence, 0.9);
976    }
977
978    #[test]
979    fn test_simd_feature_detection() {
980        let features = ProfileGuidedOptimizer::detect_simd_features();
981        // Just test that we get some features (platform-dependent)
982        println!("Detected SIMD features: {:?}", features);
983    }
984}
985
986/// Advanced runtime compilation and optimization engine
987#[derive(Debug)]
988pub struct RuntimeOptimizer {
989    /// Code generation cache
990    compiled_variants: Arc<RwLock<HashMap<String, CompiledVariant>>>,
991    /// Compilation statistics
992    compilation_stats: Arc<RwLock<CompilationStats>>,
993    /// Runtime configuration
994    config: RuntimeOptimizerConfig,
995}
996
997/// Compiled algorithm variant
998#[derive(Debug, Clone)]
999pub struct CompiledVariant {
1000    /// Variant identifier
1001    pub variant_id: String,
1002    /// Optimization level used
1003    pub optimization_level: OptimizationLevel,
1004    /// Target hardware features
1005    pub target_features: Vec<SimdFeature>,
1006    /// Compilation timestamp
1007    pub compiled_at: Instant,
1008    /// Performance characteristics
1009    pub performance_profile: Option<PerformanceProfile>,
1010    /// Compilation success
1011    pub compilation_successful: bool,
1012}
1013
1014/// Runtime optimizer configuration
1015#[derive(Debug, Clone)]
1016pub struct RuntimeOptimizerConfig {
1017    /// Enable JIT compilation
1018    pub enable_jit: bool,
1019    /// Maximum compiled variants per operation
1020    pub max_variants: usize,
1021    /// Compilation timeout
1022    pub compilation_timeout: Duration,
1023    /// Minimum performance improvement for recompilation
1024    pub min_improvement: f64,
1025    /// Enable profile-guided recompilation
1026    pub enable_pgo_recompilation: bool,
1027}
1028
1029/// Compilation statistics
1030#[derive(Debug, Clone)]
1031pub struct CompilationStats {
1032    /// Total compilations
1033    pub total_compilations: usize,
1034    /// Successful compilations
1035    pub successful_compilations: usize,
1036    /// Total compilation time
1037    pub total_compilation_time: Duration,
1038    /// Average compilation time
1039    pub average_compilation_time: Duration,
1040    /// Cache hits
1041    pub cache_hits: usize,
1042    /// Cache misses
1043    pub cache_misses: usize,
1044}
1045
1046impl RuntimeOptimizer {
1047    /// Create a new runtime optimizer
1048    #[must_use]
1049    pub fn new(config: RuntimeOptimizerConfig) -> Self {
1050        Self {
1051            compiled_variants: Arc::new(RwLock::new(HashMap::new())),
1052            compilation_stats: Arc::new(RwLock::new(CompilationStats {
1053                total_compilations: 0,
1054                successful_compilations: 0,
1055                total_compilation_time: Duration::from_secs(0),
1056                average_compilation_time: Duration::from_secs(0),
1057                cache_hits: 0,
1058                cache_misses: 0,
1059            })),
1060            config,
1061        }
1062    }
1063
1064    /// Get or compile optimized variant
1065    pub fn get_optimized_variant(
1066        &self,
1067        operation_id: &str,
1068        characteristics: &DataCharacteristics,
1069        strategy: &OptimizationStrategy,
1070    ) -> SklResult<String> {
1071        let variant_key = self.generate_variant_key(operation_id, characteristics, strategy);
1072
1073        // Check cache first
1074        if let Ok(variants) = self.compiled_variants.read() {
1075            if let Some(variant) = variants.get(&variant_key) {
1076                if variant.compilation_successful {
1077                    self.update_cache_stats(true);
1078                    return Ok(variant.variant_id.clone());
1079                }
1080            }
1081        }
1082
1083        self.update_cache_stats(false);
1084
1085        // Compile new variant if JIT is enabled
1086        if self.config.enable_jit {
1087            self.compile_variant(operation_id, characteristics, strategy)
1088        } else {
1089            Ok(strategy.preferred_algorithm.clone())
1090        }
1091    }
1092
1093    /// Compile optimized variant
1094    fn compile_variant(
1095        &self,
1096        operation_id: &str,
1097        characteristics: &DataCharacteristics,
1098        strategy: &OptimizationStrategy,
1099    ) -> SklResult<String> {
1100        let start_time = Instant::now();
1101        let variant_key = self.generate_variant_key(operation_id, characteristics, strategy);
1102
1103        // Simulate compilation process (in real implementation, this would invoke LLVM or similar)
1104        let compilation_successful = self.simulate_compilation(strategy);
1105        let compilation_time = start_time.elapsed();
1106
1107        let variant = CompiledVariant {
1108            variant_id: format!("{}_{}", operation_id, compilation_time.as_nanos()),
1109            optimization_level: strategy.optimization_level,
1110            target_features: self.select_target_features(strategy),
1111            compiled_at: Instant::now(),
1112            performance_profile: None,
1113            compilation_successful,
1114        };
1115
1116        // Update compilation statistics
1117        self.update_compilation_stats(compilation_time, compilation_successful);
1118
1119        // Cache the variant
1120        if let Ok(mut variants) = self.compiled_variants.write() {
1121            // Evict old variants if necessary
1122            if variants.len() >= self.config.max_variants {
1123                self.evict_old_variants(&mut variants);
1124            }
1125            variants.insert(variant_key, variant.clone());
1126        }
1127
1128        Ok(variant.variant_id)
1129    }
1130
1131    /// Generate variant key for caching
1132    fn generate_variant_key(
1133        &self,
1134        operation_id: &str,
1135        characteristics: &DataCharacteristics,
1136        strategy: &OptimizationStrategy,
1137    ) -> String {
1138        use std::collections::hash_map::DefaultHasher;
1139        use std::hash::Hasher;
1140
1141        let mut hasher = DefaultHasher::new();
1142        operation_id.hash(&mut hasher);
1143        characteristics.hash(&mut hasher);
1144        format!("{:?}", strategy.optimization_level).hash(&mut hasher);
1145        format!("{:?}", strategy.parallel_strategy).hash(&mut hasher);
1146
1147        format!("{}_{:x}", operation_id, hasher.finish())
1148    }
1149
1150    /// Simulate compilation process
1151    fn simulate_compilation(&self, strategy: &OptimizationStrategy) -> bool {
1152        // Simulate compilation based on optimization level
1153        match strategy.optimization_level {
1154            OptimizationLevel::None => true,
1155            OptimizationLevel::Basic => thread_rng().random::<f64>() > 0.1, // 90% success rate
1156            OptimizationLevel::Advanced => thread_rng().random::<f64>() > 0.2, // 80% success rate
1157            OptimizationLevel::Aggressive => thread_rng().random::<f64>() > 0.3, // 70% success rate
1158        }
1159    }
1160
1161    /// Select target features for compilation
1162    fn select_target_features(&self, strategy: &OptimizationStrategy) -> Vec<SimdFeature> {
1163        let mut features = Vec::new();
1164
1165        match strategy.optimization_level {
1166            OptimizationLevel::None => {}
1167            OptimizationLevel::Basic => {
1168                features.push(SimdFeature::SSE2);
1169            }
1170            OptimizationLevel::Advanced => {
1171                features.extend_from_slice(&[SimdFeature::SSE2, SimdFeature::AVX]);
1172            }
1173            OptimizationLevel::Aggressive => {
1174                features.extend_from_slice(&[
1175                    SimdFeature::SSE2,
1176                    SimdFeature::AVX,
1177                    SimdFeature::AVX2,
1178                    SimdFeature::AVX512F,
1179                ]);
1180            }
1181        }
1182
1183        features
1184    }
1185
1186    /// Update cache statistics
1187    fn update_cache_stats(&self, hit: bool) {
1188        if let Ok(mut stats) = self.compilation_stats.write() {
1189            if hit {
1190                stats.cache_hits += 1;
1191            } else {
1192                stats.cache_misses += 1;
1193            }
1194        }
1195    }
1196
1197    /// Update compilation statistics
1198    fn update_compilation_stats(&self, compilation_time: Duration, successful: bool) {
1199        if let Ok(mut stats) = self.compilation_stats.write() {
1200            stats.total_compilations += 1;
1201            if successful {
1202                stats.successful_compilations += 1;
1203            }
1204            stats.total_compilation_time += compilation_time;
1205            stats.average_compilation_time =
1206                stats.total_compilation_time / stats.total_compilations as u32;
1207        }
1208    }
1209
1210    /// Evict old variants from cache
1211    fn evict_old_variants(&self, variants: &mut HashMap<String, CompiledVariant>) {
1212        // Simple LRU eviction based on compilation time
1213        if let Some((oldest_key, _)) = variants
1214            .iter()
1215            .min_by_key(|(_, variant)| variant.compiled_at)
1216            .map(|(k, v)| (k.clone(), v.clone()))
1217        {
1218            variants.remove(&oldest_key);
1219        }
1220    }
1221
1222    /// Get compilation statistics
1223    pub fn get_compilation_stats(&self) -> SklResult<CompilationStats> {
1224        self.compilation_stats
1225            .read()
1226            .map(|stats| stats.clone())
1227            .map_err(|_| SklearsError::InvalidInput("Failed to read compilation stats".to_string()))
1228    }
1229
1230    /// Trigger profile-guided recompilation
1231    pub fn trigger_pgo_recompilation(
1232        &self,
1233        operation_id: &str,
1234        performance_profiles: &[PerformanceProfile],
1235    ) -> SklResult<()> {
1236        if !self.config.enable_pgo_recompilation {
1237            return Ok(());
1238        }
1239
1240        // Analyze performance trends
1241        let avg_performance = performance_profiles
1242            .iter()
1243            .map(|p| p.metrics.execution_time.as_secs_f64())
1244            .sum::<f64>()
1245            / performance_profiles.len() as f64;
1246
1247        // Find variants that could be improved
1248        if let Ok(mut variants) = self.compiled_variants.write() {
1249            for (key, variant) in variants.iter_mut() {
1250                if key.starts_with(operation_id) {
1251                    if let Some(ref profile) = variant.performance_profile {
1252                        let improvement_potential =
1253                            profile.metrics.execution_time.as_secs_f64() / avg_performance;
1254                        if improvement_potential > (1.0 + self.config.min_improvement) {
1255                            // Mark for recompilation
1256                            variant.compilation_successful = false;
1257                        }
1258                    }
1259                }
1260            }
1261        }
1262
1263        Ok(())
1264    }
1265}
1266
1267impl Default for RuntimeOptimizerConfig {
1268    fn default() -> Self {
1269        Self {
1270            enable_jit: true,
1271            max_variants: 100,
1272            compilation_timeout: Duration::from_secs(30),
1273            min_improvement: 0.1, // 10% improvement threshold
1274            enable_pgo_recompilation: true,
1275        }
1276    }
1277}
1278
1279/// Advanced performance predictor using ensemble methods
1280#[derive(Debug)]
1281pub struct EnsemblePerformancePredictor {
1282    /// Individual predictors
1283    predictors: Vec<Box<dyn PerformancePredictor + Send + Sync>>,
1284    /// Predictor weights
1285    weights: Vec<f64>,
1286    /// Ensemble accuracy
1287    ensemble_accuracy: f64,
1288}
1289
1290impl Default for EnsemblePerformancePredictor {
1291    fn default() -> Self {
1292        Self::new()
1293    }
1294}
1295
1296impl EnsemblePerformancePredictor {
1297    /// Create a new ensemble predictor
1298    #[must_use]
1299    pub fn new() -> Self {
1300        let predictors: Vec<Box<dyn PerformancePredictor + Send + Sync>> = vec![
1301            Box::new(MLPerformancePredictor::new()),
1302            Box::new(HeuristicPredictor::new()),
1303            Box::new(PolynomialPredictor::new()),
1304        ];
1305
1306        let weights = vec![1.0 / predictors.len() as f64; predictors.len()];
1307
1308        Self {
1309            predictors,
1310            weights,
1311            ensemble_accuracy: 0.0,
1312        }
1313    }
1314
1315    /// Update ensemble weights based on individual accuracy
1316    fn update_weights(&mut self) {
1317        let total_accuracy: f64 = self.predictors.iter().map(|p| p.accuracy()).sum();
1318
1319        if total_accuracy > 0.0 {
1320            for (i, predictor) in self.predictors.iter().enumerate() {
1321                self.weights[i] = predictor.accuracy() / total_accuracy;
1322            }
1323        }
1324
1325        // Calculate ensemble accuracy as weighted average
1326        self.ensemble_accuracy = self
1327            .predictors
1328            .iter()
1329            .enumerate()
1330            .map(|(i, p)| p.accuracy() * self.weights[i])
1331            .sum();
1332    }
1333}
1334
1335impl PerformancePredictor for EnsemblePerformancePredictor {
1336    fn predict_execution_time(&self, characteristics: &DataCharacteristics) -> SklResult<Duration> {
1337        let mut weighted_prediction = 0.0;
1338
1339        for (i, predictor) in self.predictors.iter().enumerate() {
1340            let prediction = predictor
1341                .predict_execution_time(characteristics)?
1342                .as_secs_f64();
1343            weighted_prediction += prediction * self.weights[i];
1344        }
1345
1346        Ok(Duration::from_secs_f64(weighted_prediction.max(0.0)))
1347    }
1348
1349    fn predict_memory_usage(&self, characteristics: &DataCharacteristics) -> SklResult<usize> {
1350        let mut weighted_prediction = 0.0;
1351
1352        for (i, predictor) in self.predictors.iter().enumerate() {
1353            let prediction = predictor.predict_memory_usage(characteristics)? as f64;
1354            weighted_prediction += prediction * self.weights[i];
1355        }
1356
1357        Ok(weighted_prediction.max(0.0) as usize)
1358    }
1359
1360    fn update(&mut self, profile: &PerformanceProfile) -> SklResult<()> {
1361        for predictor in &mut self.predictors {
1362            predictor.update(profile)?;
1363        }
1364
1365        self.update_weights();
1366        Ok(())
1367    }
1368
1369    fn accuracy(&self) -> f64 {
1370        self.ensemble_accuracy
1371    }
1372}
1373
1374/// Heuristic-based performance predictor
1375#[derive(Debug)]
1376pub struct HeuristicPredictor {
1377    accuracy: f64,
1378}
1379
1380impl Default for HeuristicPredictor {
1381    fn default() -> Self {
1382        Self::new()
1383    }
1384}
1385
1386impl HeuristicPredictor {
1387    #[must_use]
1388    pub fn new() -> Self {
1389        Self { accuracy: 0.6 } // Fixed reasonable accuracy
1390    }
1391}
1392
1393impl PerformancePredictor for HeuristicPredictor {
1394    fn predict_execution_time(&self, characteristics: &DataCharacteristics) -> SklResult<Duration> {
1395        let base_time = (characteristics.n_samples * characteristics.n_features) as f64;
1396        let sparsity_factor = 1.0 - characteristics.sparsity() * 0.5;
1397        let cache_factor = 1.0 + (1.0 - characteristics.cache_friendliness()) * 0.3;
1398
1399        let estimated_time = base_time * sparsity_factor * cache_factor / 1e6; // Scale to seconds
1400        Ok(Duration::from_secs_f64(estimated_time.max(0.001)))
1401    }
1402
1403    fn predict_memory_usage(&self, characteristics: &DataCharacteristics) -> SklResult<usize> {
1404        let base_memory =
1405            characteristics.n_samples * characteristics.n_features * characteristics.dtype_size;
1406        let overhead = (base_memory as f64 * 0.2) as usize; // 20% overhead
1407        Ok(base_memory + overhead)
1408    }
1409
1410    fn update(&mut self, _profile: &PerformanceProfile) -> SklResult<()> {
1411        // Heuristic predictor doesn't learn, but we can adjust accuracy based on feedback
1412        Ok(())
1413    }
1414
1415    fn accuracy(&self) -> f64 {
1416        self.accuracy
1417    }
1418}
1419
1420/// Polynomial regression performance predictor
1421#[derive(Debug)]
1422pub struct PolynomialPredictor {
1423    coefficients: Vec<f64>,
1424    accuracy: f64,
1425    training_data: Vec<(Vec<f64>, f64)>,
1426}
1427
1428impl Default for PolynomialPredictor {
1429    fn default() -> Self {
1430        Self::new()
1431    }
1432}
1433
1434impl PolynomialPredictor {
1435    #[must_use]
1436    pub fn new() -> Self {
1437        Self {
1438            coefficients: vec![1.0; 15], // Degree-2 polynomial features
1439            accuracy: 0.0,
1440            training_data: Vec::new(),
1441        }
1442    }
1443
1444    fn polynomial_features(&self, characteristics: &DataCharacteristics) -> Vec<f64> {
1445        let n_samples = characteristics.n_samples as f64;
1446        let n_features = characteristics.n_features as f64;
1447        let sparsity = characteristics.sparsity();
1448        let cache_friendliness = characteristics.cache_friendliness();
1449
1450        vec![
1451            1.0, // bias
1452            n_samples,
1453            n_features,
1454            sparsity,
1455            cache_friendliness,
1456            n_samples * n_features, // interaction terms
1457            n_samples * sparsity,
1458            n_features * sparsity,
1459            n_samples * cache_friendliness,
1460            n_features * cache_friendliness,
1461            sparsity * cache_friendliness,
1462            n_samples.powi(2), // quadratic terms
1463            n_features.powi(2),
1464            sparsity.powi(2),
1465            cache_friendliness.powi(2),
1466        ]
1467    }
1468}
1469
1470impl PerformancePredictor for PolynomialPredictor {
1471    fn predict_execution_time(&self, characteristics: &DataCharacteristics) -> SklResult<Duration> {
1472        let features = self.polynomial_features(characteristics);
1473        let prediction = features
1474            .iter()
1475            .zip(&self.coefficients)
1476            .map(|(f, c)| f * c)
1477            .sum::<f64>()
1478            .max(0.001);
1479
1480        Ok(Duration::from_secs_f64(prediction))
1481    }
1482
1483    fn predict_memory_usage(&self, characteristics: &DataCharacteristics) -> SklResult<usize> {
1484        let base_memory =
1485            characteristics.n_samples * characteristics.n_features * characteristics.dtype_size;
1486        Ok(base_memory)
1487    }
1488
1489    fn update(&mut self, profile: &PerformanceProfile) -> SklResult<()> {
1490        let features = self.polynomial_features(&profile.data_characteristics);
1491        let target = profile.metrics.execution_time.as_secs_f64();
1492
1493        self.training_data.push((features, target));
1494
1495        // Periodic retraining
1496        if self.training_data.len() % 20 == 0 {
1497            self.train_polynomial_regression()?;
1498        }
1499
1500        Ok(())
1501    }
1502
1503    fn accuracy(&self) -> f64 {
1504        self.accuracy
1505    }
1506}
1507
1508impl PolynomialPredictor {
1509    fn train_polynomial_regression(&mut self) -> SklResult<()> {
1510        if self.training_data.len() < 10 {
1511            return Ok(());
1512        }
1513
1514        // Simple normal equations solution for polynomial regression
1515        let n = self.training_data.len();
1516        let p = self.coefficients.len();
1517
1518        // Build design matrix X and target vector y
1519        let mut x_matrix = vec![vec![0.0; p]; n];
1520        let mut y_vector = vec![0.0; n];
1521
1522        for (i, (features, target)) in self.training_data.iter().enumerate() {
1523            for (j, &feature) in features.iter().enumerate() {
1524                x_matrix[i][j] = feature;
1525            }
1526            y_vector[i] = *target;
1527        }
1528
1529        // Solve normal equations: (X^T X) β = X^T y
1530        // Simplified implementation using gradient descent
1531        let learning_rate = 0.0001;
1532        let epochs = 50;
1533
1534        for _ in 0..epochs {
1535            let mut gradients = vec![0.0; p];
1536            let mut total_error = 0.0;
1537
1538            for i in 0..n {
1539                let prediction: f64 = x_matrix[i]
1540                    .iter()
1541                    .zip(&self.coefficients)
1542                    .map(|(x, c)| x * c)
1543                    .sum();
1544
1545                let error = prediction - y_vector[i];
1546                total_error += error * error;
1547
1548                for j in 0..p {
1549                    gradients[j] += error * x_matrix[i][j];
1550                }
1551            }
1552
1553            // Update coefficients
1554            for (coeff, grad) in self.coefficients.iter_mut().zip(&gradients) {
1555                *coeff -= learning_rate * grad / n as f64;
1556            }
1557
1558            // Update accuracy
1559            let mse = total_error / n as f64;
1560            self.accuracy = (1.0 / (1.0 + mse)).min(1.0);
1561        }
1562
1563        Ok(())
1564    }
1565}
sklears_compose/profile_guided_optimization.rs

sklears_compose/
profile_guided_optimization.rs