scirs2_transform/
optimization_config.rs

1//! Optimization configuration and auto-tuning system
2//!
3//! This module provides intelligent configuration systems that automatically
4//! choose optimal settings for transformations based on data characteristics
5//! and system resources.
6
7use scirs2_core::Rng;
8#[cfg(feature = "distributed")]
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11
12use crate::error::{Result, TransformError};
13use crate::utils::ProcessingStrategy;
14
15/// System resource information
16#[derive(Debug, Clone)]
17#[cfg_attr(feature = "distributed", derive(Serialize, Deserialize))]
18pub struct SystemResources {
19    /// Available memory in MB
20    pub memory_mb: usize,
21    /// Number of CPU cores
22    pub cpu_cores: usize,
23    /// Whether GPU is available
24    pub has_gpu: bool,
25    /// Whether SIMD instructions are available
26    pub has_simd: bool,
27    /// L3 cache size in KB (affects chunk sizes)
28    pub l3_cache_kb: usize,
29}
30
31impl SystemResources {
32    /// Detect system resources automatically
33    pub fn detect() -> Self {
34        SystemResources {
35            memory_mb: Self::detect_memory_mb(),
36            cpu_cores: num_cpus::get(),
37            has_gpu: Self::detect_gpu(),
38            has_simd: Self::detect_simd(),
39            l3_cache_kb: Self::detect_l3_cache_kb(),
40        }
41    }
42
43    /// Detect available memory
44    fn detect_memory_mb() -> usize {
45        // Simplified detection - in practice, use system APIs
46        #[cfg(target_os = "linux")]
47        {
48            if let Ok(meminfo) = std::fs::read_to_string("/proc/meminfo") {
49                for line in meminfo.lines() {
50                    if line.starts_with("MemAvailable:") {
51                        if let Some(kb_str) = line.split_whitespace().nth(1) {
52                            if let Ok(kb) = kb_str.parse::<usize>() {
53                                return kb / 1024; // Convert to MB
54                            }
55                        }
56                    }
57                }
58            }
59        }
60
61        // Fallback: assume 8GB
62        8 * 1024
63    }
64
65    /// Detect GPU availability
66    fn detect_gpu() -> bool {
67        // Simplified detection
68        #[cfg(feature = "gpu")]
69        {
70            // In practice, check for CUDA or OpenCL
71            true
72        }
73        #[cfg(not(feature = "gpu"))]
74        {
75            false
76        }
77    }
78
79    /// Detect SIMD support
80    fn detect_simd() -> bool {
81        #[cfg(feature = "simd")]
82        {
83            true
84        }
85        #[cfg(not(feature = "simd"))]
86        {
87            false
88        }
89    }
90
91    /// Detect L3 cache size
92    fn detect_l3_cache_kb() -> usize {
93        // Simplified - in practice, use CPUID or /sys/devices/system/cpu
94        8 * 1024 // Assume 8MB L3 cache
95    }
96
97    /// Get conservative memory limit for transformations (80% of available)
98    pub fn safe_memory_mb(&self) -> usize {
99        (self.memory_mb as f64 * 0.8) as usize
100    }
101
102    /// Get optimal chunk size based on cache size
103    pub fn optimal_chunk_size(&self, elementsize: usize) -> usize {
104        // Target 50% of L3 cache
105        let target_bytes = (self.l3_cache_kb * 1024) / 2;
106        (target_bytes / elementsize).max(1000) // At least 1000 elements
107    }
108}
109
110/// Data characteristics for optimization decisions
111#[derive(Debug, Clone)]
112#[cfg_attr(feature = "distributed", derive(Serialize, Deserialize))]
113pub struct DataCharacteristics {
114    /// Number of samples
115    pub n_samples: usize,
116    /// Number of features
117    pub nfeatures: usize,
118    /// Data sparsity (0.0 = dense, 1.0 = all zeros)
119    pub sparsity: f64,
120    /// Data range (max - min)
121    pub data_range: f64,
122    /// Outlier ratio
123    pub outlier_ratio: f64,
124    /// Whether data has missing values
125    pub has_missing: bool,
126    /// Estimated memory footprint in MB
127    pub memory_footprint_mb: f64,
128    /// Data type size (e.g., 8 for f64)
129    pub elementsize: usize,
130}
131
132impl DataCharacteristics {
133    /// Analyze data characteristics from array view
134    pub fn analyze(data: &scirs2_core::ndarray::ArrayView2<f64>) -> Result<Self> {
135        let (n_samples, nfeatures) = data.dim();
136
137        if n_samples == 0 || nfeatures == 0 {
138            return Err(TransformError::InvalidInput("Empty _data".to_string()));
139        }
140
141        // Calculate sparsity
142        let zeros = data.iter().filter(|&&x| x == 0.0).count();
143        let sparsity = zeros as f64 / data.len() as f64;
144
145        // Calculate _data range
146        let mut min_val = f64::INFINITY;
147        let mut max_val = f64::NEG_INFINITY;
148        let mut finite_count = 0;
149        let mut missing_count = 0;
150
151        for &val in data.iter() {
152            if val.is_finite() {
153                min_val = min_val.min(val);
154                max_val = max_val.max(val);
155                finite_count += 1;
156            } else {
157                missing_count += 1;
158            }
159        }
160
161        let data_range = if finite_count > 0 {
162            max_val - min_val
163        } else {
164            0.0
165        };
166        let has_missing = missing_count > 0;
167
168        // Estimate outlier ratio using IQR method (simplified)
169        let outlier_ratio = if n_samples > 10 {
170            let mut sample_values: Vec<f64> = data.iter()
171                .filter(|&&x| x.is_finite())
172                .take(1000) // Sample for efficiency
173                .copied()
174                .collect();
175
176            if sample_values.len() >= 4 {
177                sample_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
178                let n = sample_values.len();
179                let q1 = sample_values[n / 4];
180                let q3 = sample_values[3 * n / 4];
181                let iqr = q3 - q1;
182
183                if iqr > 0.0 {
184                    let lower_bound = q1 - 1.5 * iqr;
185                    let upper_bound = q3 + 1.5 * iqr;
186                    let outliers = sample_values
187                        .iter()
188                        .filter(|&&x| x < lower_bound || x > upper_bound)
189                        .count();
190                    outliers as f64 / sample_values.len() as f64
191                } else {
192                    0.0
193                }
194            } else {
195                0.0
196            }
197        } else {
198            0.0
199        };
200
201        let memory_footprint_mb =
202            (n_samples * nfeatures * std::mem::size_of::<f64>()) as f64 / (1024.0 * 1024.0);
203
204        Ok(DataCharacteristics {
205            n_samples,
206            nfeatures,
207            sparsity,
208            data_range,
209            outlier_ratio,
210            has_missing,
211            memory_footprint_mb,
212            elementsize: std::mem::size_of::<f64>(),
213        })
214    }
215
216    /// Check if data is considered "large"
217    pub fn is_large_dataset(&self) -> bool {
218        self.n_samples > 100_000 || self.nfeatures > 10_000 || self.memory_footprint_mb > 1000.0
219    }
220
221    /// Check if data is considered "wide" (more features than samples)
222    pub fn is_wide_dataset(&self) -> bool {
223        self.nfeatures > self.n_samples
224    }
225
226    /// Check if data is sparse
227    pub fn is_sparse(&self) -> bool {
228        self.sparsity > 0.5
229    }
230
231    /// Check if data has significant outliers
232    pub fn has_outliers(&self) -> bool {
233        self.outlier_ratio > 0.05 // More than 5% outliers
234    }
235}
236
237/// Optimization configuration for a specific transformation
238#[derive(Debug, Clone)]
239#[cfg_attr(feature = "distributed", derive(Serialize, Deserialize))]
240pub struct OptimizationConfig {
241    /// Processing strategy to use
242    pub processing_strategy: ProcessingStrategy,
243    /// Memory limit in MB
244    pub memory_limit_mb: usize,
245    /// Whether to use robust statistics
246    pub use_robust: bool,
247    /// Whether to use parallel processing
248    pub use_parallel: bool,
249    /// Whether to use SIMD acceleration
250    pub use_simd: bool,
251    /// Whether to use GPU acceleration
252    pub use_gpu: bool,
253    /// Chunk size for batch processing
254    pub chunk_size: usize,
255    /// Number of threads to use
256    pub num_threads: usize,
257    /// Additional algorithm-specific parameters
258    pub algorithm_params: HashMap<String, f64>,
259}
260
261impl OptimizationConfig {
262    /// Create optimization config for standardization
263    pub fn for_standardization(datachars: &DataCharacteristics, system: &SystemResources) -> Self {
264        let use_robust = datachars.has_outliers();
265        let use_parallel = datachars.n_samples > 10_000 && system.cpu_cores > 1;
266        let use_simd = system.has_simd && datachars.nfeatures > 100;
267        let use_gpu = system.has_gpu && datachars.memory_footprint_mb > 100.0;
268
269        let processing_strategy = if datachars.memory_footprint_mb > system.safe_memory_mb() as f64
270        {
271            ProcessingStrategy::OutOfCore {
272                chunk_size: system.optimal_chunk_size(datachars.elementsize),
273            }
274        } else if use_parallel {
275            ProcessingStrategy::Parallel
276        } else if use_simd {
277            ProcessingStrategy::Simd
278        } else {
279            ProcessingStrategy::Standard
280        };
281
282        OptimizationConfig {
283            processing_strategy,
284            memory_limit_mb: system.safe_memory_mb(),
285            use_robust,
286            use_parallel,
287            use_simd,
288            use_gpu,
289            chunk_size: system.optimal_chunk_size(datachars.elementsize),
290            num_threads: if use_parallel { system.cpu_cores } else { 1 },
291            algorithm_params: HashMap::new(),
292        }
293    }
294
295    /// Create optimization config for PCA
296    pub fn for_pca(
297        datachars: &DataCharacteristics,
298        system: &SystemResources,
299        n_components: usize,
300    ) -> Self {
301        let use_randomized = datachars.is_large_dataset();
302        let use_parallel = datachars.n_samples > 1_000 && system.cpu_cores > 1;
303        let use_gpu = system.has_gpu && datachars.memory_footprint_mb > 500.0;
304
305        // PCA memory requirements are higher due to covariance matrix
306        let memory_multiplier = if datachars.nfeatures > datachars.n_samples {
307            3.0
308        } else {
309            2.0
310        };
311        let estimated_memory = datachars.memory_footprint_mb * memory_multiplier;
312
313        let processing_strategy = if estimated_memory > system.safe_memory_mb() as f64 {
314            ProcessingStrategy::OutOfCore {
315                chunk_size: (system.safe_memory_mb() * 1024 * 1024)
316                    / (datachars.nfeatures * datachars.elementsize),
317            }
318        } else if use_parallel {
319            ProcessingStrategy::Parallel
320        } else {
321            ProcessingStrategy::Standard
322        };
323
324        let mut algorithm_params = HashMap::new();
325        algorithm_params.insert(
326            "use_randomized".to_string(),
327            if use_randomized { 1.0 } else { 0.0 },
328        );
329        algorithm_params.insert("n_components".to_string(), n_components as f64);
330
331        OptimizationConfig {
332            processing_strategy,
333            memory_limit_mb: system.safe_memory_mb(),
334            use_robust: false, // PCA doesn't typically use robust statistics
335            use_parallel,
336            use_simd: system.has_simd,
337            use_gpu,
338            chunk_size: system.optimal_chunk_size(datachars.elementsize),
339            num_threads: if use_parallel { system.cpu_cores } else { 1 },
340            algorithm_params,
341        }
342    }
343
344    /// Create optimization config for polynomial features
345    pub fn for_polynomial_features(
346        datachars: &DataCharacteristics,
347        system: &SystemResources,
348        degree: usize,
349    ) -> Result<Self> {
350        // Polynomial features can explode in size
351        let estimated_output_features =
352            Self::estimate_polynomial_features(datachars.nfeatures, degree)?;
353        let estimated_memory = datachars.n_samples as f64
354            * estimated_output_features as f64
355            * datachars.elementsize as f64
356            / (1024.0 * 1024.0);
357
358        if estimated_memory > system.memory_mb as f64 * 0.9 {
359            return Err(TransformError::MemoryError(format!(
360                "Polynomial features would require {estimated_memory:.1} MB, but only {} MB available",
361                system.memory_mb
362            )));
363        }
364
365        let use_parallel = datachars.n_samples > 1_000 && system.cpu_cores > 1;
366        let use_simd = system.has_simd && estimated_output_features > 100;
367
368        let processing_strategy = if estimated_memory > system.safe_memory_mb() as f64 {
369            ProcessingStrategy::OutOfCore {
370                chunk_size: (system.safe_memory_mb() * 1024 * 1024)
371                    / (estimated_output_features * datachars.elementsize),
372            }
373        } else if use_parallel {
374            ProcessingStrategy::Parallel
375        } else if use_simd {
376            ProcessingStrategy::Simd
377        } else {
378            ProcessingStrategy::Standard
379        };
380
381        let mut algorithm_params = HashMap::new();
382        algorithm_params.insert("degree".to_string(), degree as f64);
383        algorithm_params.insert(
384            "estimated_output_features".to_string(),
385            estimated_output_features as f64,
386        );
387
388        Ok(OptimizationConfig {
389            processing_strategy,
390            memory_limit_mb: system.safe_memory_mb(),
391            use_robust: false,
392            use_parallel,
393            use_simd,
394            use_gpu: false, // Polynomial features typically don't benefit from GPU
395            chunk_size: system.optimal_chunk_size(datachars.elementsize),
396            num_threads: if use_parallel { system.cpu_cores } else { 1 },
397            algorithm_params,
398        })
399    }
400
401    /// Estimate number of polynomial features
402    fn estimate_polynomial_features(nfeatures: usize, degree: usize) -> Result<usize> {
403        if degree == 0 {
404            return Err(TransformError::InvalidInput(
405                "Degree must be at least 1".to_string(),
406            ));
407        }
408
409        let mut total_features = 1; // bias term
410
411        for d in 1..=degree {
412            // Multinomial coefficient: (nfeatures + d - 1)! / (d! * (nfeatures - 1)!)
413            let mut coeff = 1;
414            for i in 0..d {
415                coeff = coeff * (nfeatures + d - 1 - i) / (i + 1);
416
417                // Check for overflow
418                if coeff > 1_000_000 {
419                    return Err(TransformError::ComputationError(
420                        "Too many polynomial _features would be generated".to_string(),
421                    ));
422                }
423            }
424            total_features += coeff;
425        }
426
427        Ok(total_features)
428    }
429
430    /// Get estimated execution time for this configuration
431    pub fn estimated_execution_time(&self, datachars: &DataCharacteristics) -> std::time::Duration {
432        use std::time::Duration;
433
434        let base_ops = datachars.n_samples as u64 * datachars.nfeatures as u64;
435
436        let ops_per_second = match self.processing_strategy {
437            ProcessingStrategy::Parallel => {
438                1_000_000_000 * self.num_threads as u64 // 1 billion ops/second per thread
439            }
440            ProcessingStrategy::Simd => {
441                2_000_000_000 // 2 billion ops/second with SIMD
442            }
443            ProcessingStrategy::OutOfCore { .. } => {
444                100_000_000 // 100 million ops/second (I/O bound)
445            }
446            ProcessingStrategy::Standard => {
447                500_000_000 // 500 million ops/second
448            }
449        };
450
451        let time_ns = (base_ops * 1_000_000_000) / ops_per_second;
452        Duration::from_nanos(time_ns.max(1000)) // At least 1 microsecond
453    }
454}
455
456/// Auto-tuning system for optimization configurations
457pub struct AutoTuner {
458    /// System resources
459    system: SystemResources,
460    /// Performance history for different configurations
461    performance_history: HashMap<String, Vec<PerformanceRecord>>,
462}
463
464/// Performance record for auto-tuning
465#[derive(Debug, Clone)]
466struct PerformanceRecord {
467    #[allow(dead_code)]
468    config_hash: String,
469    #[allow(dead_code)]
470    execution_time: std::time::Duration,
471    #[allow(dead_code)]
472    memory_used_mb: f64,
473    #[allow(dead_code)]
474    success: bool,
475    #[allow(dead_code)]
476    data_characteristics: DataCharacteristics,
477}
478
479impl Default for AutoTuner {
480    fn default() -> Self {
481        Self::new()
482    }
483}
484
485impl AutoTuner {
486    /// Create a new auto-tuner
487    pub fn new() -> Self {
488        AutoTuner {
489            system: SystemResources::detect(),
490            performance_history: HashMap::new(),
491        }
492    }
493
494    /// Get optimal configuration for a specific transformation
495    pub fn optimize_for_transformation(
496        &self,
497        transformation: &str,
498        datachars: &DataCharacteristics,
499        params: &HashMap<String, f64>,
500    ) -> Result<OptimizationConfig> {
501        match transformation {
502            "standardization" => Ok(OptimizationConfig::for_standardization(
503                datachars,
504                &self.system,
505            )),
506            "pca" => {
507                let n_components = params.get("n_components").unwrap_or(&5.0) as &f64;
508                Ok(OptimizationConfig::for_pca(
509                    datachars,
510                    &self.system,
511                    *n_components as usize,
512                ))
513            }
514            "polynomial" => {
515                let degree = params.get("degree").unwrap_or(&2.0) as &f64;
516                OptimizationConfig::for_polynomial_features(
517                    datachars,
518                    &self.system,
519                    *degree as usize,
520                )
521            }
522            _ => {
523                // Default configuration
524                Ok(OptimizationConfig {
525                    processing_strategy: if datachars.is_large_dataset() {
526                        ProcessingStrategy::Parallel
527                    } else {
528                        ProcessingStrategy::Standard
529                    },
530                    memory_limit_mb: self.system.safe_memory_mb(),
531                    use_robust: datachars.has_outliers(),
532                    use_parallel: datachars.n_samples > 10_000,
533                    use_simd: self.system.has_simd,
534                    use_gpu: self.system.has_gpu && datachars.memory_footprint_mb > 100.0,
535                    chunk_size: self.system.optimal_chunk_size(datachars.elementsize),
536                    num_threads: self.system.cpu_cores,
537                    algorithm_params: HashMap::new(),
538                })
539            }
540        }
541    }
542
543    /// Record performance for learning
544    pub fn record_performance(
545        &mut self,
546        transformation: &str,
547        config: &OptimizationConfig,
548        execution_time: std::time::Duration,
549        memory_used_mb: f64,
550        success: bool,
551        datachars: DataCharacteristics,
552    ) {
553        let config_hash = format!("{config:?}"); // Simplified hash
554
555        let record = PerformanceRecord {
556            config_hash: config_hash.clone(),
557            execution_time,
558            memory_used_mb,
559            success,
560            data_characteristics: datachars,
561        };
562
563        self.performance_history
564            .entry(transformation.to_string())
565            .or_default()
566            .push(record);
567
568        // Keep only recent records (last 100)
569        let records = self
570            .performance_history
571            .get_mut(transformation)
572            .expect("Operation failed");
573        if records.len() > 100 {
574            records.remove(0);
575        }
576    }
577
578    /// Get system resources
579    pub fn system_resources(&self) -> &SystemResources {
580        &self.system
581    }
582
583    /// Generate optimization report
584    pub fn generate_report(&self, datachars: &DataCharacteristics) -> OptimizationReport {
585        let recommendations = vec![
586            self.get_recommendation_for_transformation("standardization", datachars),
587            self.get_recommendation_for_transformation("pca", datachars),
588            self.get_recommendation_for_transformation("polynomial", datachars),
589        ];
590
591        OptimizationReport {
592            system_info: self.system.clone(),
593            data_info: datachars.clone(),
594            recommendations,
595            estimated_total_memory_mb: datachars.memory_footprint_mb * 2.0, // Conservative estimate
596        }
597    }
598
599    fn get_recommendation_for_transformation(
600        &self,
601        transformation: &str,
602        datachars: &DataCharacteristics,
603    ) -> TransformationRecommendation {
604        let config = self
605            .optimize_for_transformation(transformation, datachars, &HashMap::new())
606            .unwrap_or_else(|_| OptimizationConfig {
607                processing_strategy: ProcessingStrategy::Standard,
608                memory_limit_mb: self.system.safe_memory_mb(),
609                use_robust: false,
610                use_parallel: false,
611                use_simd: false,
612                use_gpu: false,
613                chunk_size: 1000,
614                num_threads: 1,
615                algorithm_params: HashMap::new(),
616            });
617
618        let estimated_time = config.estimated_execution_time(datachars);
619
620        TransformationRecommendation {
621            transformation: transformation.to_string(),
622            config,
623            estimated_time,
624            confidence: 0.8, // Placeholder
625            reason: format!(
626                "Optimized for {} samples, {} features",
627                datachars.n_samples, datachars.nfeatures
628            ),
629        }
630    }
631}
632
633/// Optimization report
634#[derive(Debug, Clone)]
635pub struct OptimizationReport {
636    /// System information
637    pub system_info: SystemResources,
638    /// Data characteristics
639    pub data_info: DataCharacteristics,
640    /// Recommendations for different transformations
641    pub recommendations: Vec<TransformationRecommendation>,
642    /// Estimated total memory usage
643    pub estimated_total_memory_mb: f64,
644}
645
646/// Recommendation for a specific transformation
647#[derive(Debug, Clone)]
648pub struct TransformationRecommendation {
649    /// Transformation name
650    pub transformation: String,
651    /// Recommended configuration
652    pub config: OptimizationConfig,
653    /// Estimated execution time
654    pub estimated_time: std::time::Duration,
655    /// Confidence in recommendation (0.0 to 1.0)
656    pub confidence: f64,
657    /// Human-readable reason
658    pub reason: String,
659}
660
661impl OptimizationReport {
662    /// Print a human-readable report
663    pub fn print_report(&self) {
664        println!("=== Optimization Report ===");
665        println!("System Resources:");
666        println!("  Memory: {} MB", self.system_info.memory_mb);
667        println!("  CPU Cores: {}", self.system_info.cpu_cores);
668        println!("  GPU Available: {}", self.system_info.has_gpu);
669        println!("  SIMD Available: {}", self.system_info.has_simd);
670        println!();
671
672        println!("Data Characteristics:");
673        println!("  Samples: {}", self.data_info.n_samples);
674        println!("  Features: {}", self.data_info.nfeatures);
675        println!(
676            "  Memory Footprint: {:.1} MB",
677            self.data_info.memory_footprint_mb
678        );
679        println!("  Sparsity: {:.1}%", self.data_info.sparsity * 100.0);
680        println!("  Has Outliers: {}", self.data_info.has_outliers());
681        println!();
682
683        println!("Recommendations:");
684        for rec in &self.recommendations {
685            println!("  {}:", rec.transformation);
686            println!("    Strategy: {:?}", rec.config.processing_strategy);
687            println!(
688                "    Estimated Time: {:.2}s",
689                rec.estimated_time.as_secs_f64()
690            );
691            println!("    Use Parallel: {}", rec.config.use_parallel);
692            println!("    Use SIMD: {}", rec.config.use_simd);
693            println!("    Use GPU: {}", rec.config.use_gpu);
694            println!("    Reason: {}", rec.reason);
695            println!();
696        }
697    }
698}
699
700/// ✅ Advanced MODE: Intelligent Dynamic Configuration Optimizer
701/// Provides real-time optimization of transformation parameters based on
702/// live performance metrics and adaptive learning from historical patterns.
703pub struct AdvancedConfigOptimizer {
704    /// Historical performance data for different configurations
705    performance_history: HashMap<String, Vec<PerformanceMetric>>,
706    /// Real-time system monitoring
707    system_monitor: SystemMonitor,
708    /// Machine learning model for configuration prediction
709    config_predictor: ConfigurationPredictor,
710    /// Adaptive parameter tuning engine
711    adaptive_tuner: AdaptiveParameterTuner,
712}
713
714/// ✅ Advanced MODE: Performance metrics for configuration optimization
715#[derive(Debug, Clone)]
716pub struct PerformanceMetric {
717    /// Configuration hash for identification
718    #[allow(dead_code)]
719    config_hash: u64,
720    /// Execution time in microseconds
721    execution_time_us: u64,
722    /// Memory usage in bytes
723    memory_usage_bytes: usize,
724    /// Cache hit rate
725    cache_hit_rate: f64,
726    /// CPU utilization percentage
727    cpu_utilization: f64,
728    /// Accuracy/quality score of the transformation
729    quality_score: f64,
730    /// Timestamp of measurement
731    #[allow(dead_code)]
732    timestamp: std::time::Instant,
733}
734
735/// ✅ Advanced MODE: Real-time system performance monitoring
736pub struct SystemMonitor {
737    /// Current CPU load average
738    cpu_load: f64,
739    /// Available memory in bytes
740    available_memory_bytes: usize,
741    /// Cache miss rate
742    cache_miss_rate: f64,
743    /// I/O wait percentage
744    io_wait_percent: f64,
745    /// Temperature information (for thermal throttling)
746    cpu_temperature_celsius: f64,
747}
748
749/// ✅ Advanced MODE: ML-based configuration prediction
750pub struct ConfigurationPredictor {
751    /// Feature weights for different data characteristics
752    #[allow(dead_code)]
753    feature_weights: HashMap<String, f64>,
754    /// Learning rate for online updates
755    #[allow(dead_code)]
756    learning_rate: f64,
757    /// Prediction confidence threshold
758    confidence_threshold: f64,
759    /// Training sample count
760    sample_count: usize,
761}
762
763/// ✅ Advanced MODE: Adaptive parameter tuning with reinforcement learning
764pub struct AdaptiveParameterTuner {
765    /// Q-learning table for parameter optimization
766    q_table: HashMap<(String, String), f64>, // (state, action) -> reward
767    /// Exploration rate (epsilon)
768    exploration_rate: f64,
769    /// Learning rate for Q-learning
770    learning_rate: f64,
771    /// Discount factor for future rewards
772    #[allow(dead_code)]
773    discount_factor: f64,
774    /// Current state representation
775    current_state: String,
776}
777
778impl Default for AdvancedConfigOptimizer {
779    fn default() -> Self {
780        Self::new()
781    }
782}
783
784impl AdvancedConfigOptimizer {
785    /// ✅ Advanced MODE: Create new advanced-intelligent configuration optimizer
786    pub fn new() -> Self {
787        AdvancedConfigOptimizer {
788            performance_history: HashMap::new(),
789            system_monitor: SystemMonitor::new(),
790            config_predictor: ConfigurationPredictor::new(),
791            adaptive_tuner: AdaptiveParameterTuner::new(),
792        }
793    }
794
795    /// ✅ Advanced MODE: Intelligently optimize configuration in real-time
796    pub fn advanced_optimize_config(
797        &mut self,
798        datachars: &DataCharacteristics,
799        transformation_type: &str,
800        user_params: &HashMap<String, f64>,
801    ) -> Result<OptimizationConfig> {
802        // Update real-time system metrics
803        self.system_monitor.update_metrics()?;
804
805        // Generate state representation for ML models
806        let current_state = self.generate_state_representation(datachars, &self.system_monitor);
807
808        // Use ML predictor to suggest initial configuration
809        let predicted_config = self.config_predictor.predict_optimal_config(
810            &current_state,
811            transformation_type,
812            user_params,
813        )?;
814
815        // Apply adaptive parameter tuning
816        let tuned_config = self.adaptive_tuner.tune_parameters(
817            predicted_config,
818            &current_state,
819            transformation_type,
820        )?;
821
822        // Validate configuration against system constraints
823        let validated_config =
824            self.validate_and_adjust_config(tuned_config, &self.system_monitor)?;
825
826        Ok(validated_config)
827    }
828
829    /// ✅ Advanced MODE: Learn from transformation performance feedback
830    pub fn learn_from_performance(
831        &mut self,
832        config: &OptimizationConfig,
833        performance: PerformanceMetric,
834        transformation_type: &str,
835    ) -> Result<()> {
836        let config_hash = self.compute_config_hash(config);
837
838        // Store performance history
839        self.performance_history
840            .entry(transformation_type.to_string())
841            .or_default()
842            .push(performance.clone());
843
844        // Update ML predictor
845        self.config_predictor.update_from_feedback(&performance)?;
846
847        // Update adaptive tuner with reward signal
848        let reward = self.compute_reward_signal(&performance);
849        self.adaptive_tuner.update_q_values(config_hash, reward)?;
850
851        // Trigger online learning if enough samples accumulated
852        if self.config_predictor.sample_count.is_multiple_of(100) {
853            self.retrain_models()?;
854        }
855
856        Ok(())
857    }
858
859    /// Generate state representation for ML models
860    fn generate_state_representation(
861        &self,
862        datachars: &DataCharacteristics,
863        system_monitor: &SystemMonitor,
864    ) -> String {
865        format!(
866            "samples:{}_features:{}_memory:{:.2}_cpu:{:.2}_sparsity:{:.3}",
867            datachars.n_samples,
868            datachars.nfeatures,
869            datachars.memory_footprint_mb,
870            system_monitor.cpu_load,
871            datachars.sparsity,
872        )
873    }
874
875    /// Compute configuration hash for identification
876    fn compute_config_hash(&self, config: &OptimizationConfig) -> u64 {
877        use std::collections::hash_map::DefaultHasher;
878        use std::hash::{Hash, Hasher};
879
880        let mut hasher = DefaultHasher::new();
881        config.memory_limit_mb.hash(&mut hasher);
882        config.use_parallel.hash(&mut hasher);
883        config.use_simd.hash(&mut hasher);
884        config.use_gpu.hash(&mut hasher);
885        config.chunk_size.hash(&mut hasher);
886        config.num_threads.hash(&mut hasher);
887
888        hasher.finish()
889    }
890
891    /// Compute reward signal from performance metrics
892    fn compute_reward_signal(&self, performance: &PerformanceMetric) -> f64 {
893        // Multi-objective reward function
894        let time_score = 1.0 / (1.0 + performance.execution_time_us as f64 / 1_000_000.0);
895        let memory_score = 1.0 / (1.0 + performance.memory_usage_bytes as f64 / 1_000_000_000.0);
896        let cache_score = performance.cache_hit_rate;
897        let cpu_score = 1.0 - performance.cpu_utilization.min(1.0);
898        let quality_score = performance.quality_score;
899
900        // Weighted combination
901        0.3 * time_score
902            + 0.2 * memory_score
903            + 0.2 * cache_score
904            + 0.1 * cpu_score
905            + 0.2 * quality_score
906    }
907
908    /// Validate and adjust configuration based on current system state
909    fn validate_and_adjust_config(
910        &self,
911        mut config: OptimizationConfig,
912        system_monitor: &SystemMonitor,
913    ) -> Result<OptimizationConfig> {
914        // Adjust based on available memory
915        let available_mb = system_monitor.available_memory_bytes / (1024 * 1024);
916        config.memory_limit_mb = config.memory_limit_mb.min(available_mb * 80 / 100); // 80% safety margin
917
918        // Adjust parallelism based on CPU load
919        if system_monitor.cpu_load > 0.8 {
920            config.num_threads = (config.num_threads / 2).max(1);
921        }
922
923        // Disable GPU if thermal throttling detected
924        if system_monitor.cpu_temperature_celsius > 85.0 {
925            config.use_gpu = false;
926        }
927
928        // Adjust chunk size based on cache miss rate
929        if system_monitor.cache_miss_rate > 0.1 {
930            config.chunk_size = (config.chunk_size as f64 * 0.8) as usize;
931        }
932
933        Ok(config)
934    }
935
936    /// Retrain ML models with accumulated data
937    fn retrain_models(&mut self) -> Result<()> {
938        // Retrain configuration predictor
939        self.config_predictor
940            .retrain_with_history(&self.performance_history)?;
941
942        // Update adaptive tuner exploration rate
943        self.adaptive_tuner.decay_exploration_rate();
944
945        Ok(())
946    }
947}
948
949impl Default for SystemMonitor {
950    fn default() -> Self {
951        Self::new()
952    }
953}
954
955impl SystemMonitor {
956    /// Create new system monitor
957    pub fn new() -> Self {
958        SystemMonitor {
959            cpu_load: 0.0,
960            available_memory_bytes: 0,
961            cache_miss_rate: 0.0,
962            io_wait_percent: 0.0,
963            cpu_temperature_celsius: 50.0,
964        }
965    }
966
967    /// ✅ Advanced MODE: Update real-time system metrics
968    pub fn update_metrics(&mut self) -> Result<()> {
969        // In production, these would read from actual system APIs
970        self.cpu_load = self.read_cpu_load()?;
971        self.available_memory_bytes = self.read_available_memory()?;
972        self.cache_miss_rate = self.read_cache_miss_rate()?;
973        self.io_wait_percent = self.read_io_wait()?;
974        self.cpu_temperature_celsius = self.read_cpu_temperature()?;
975
976        Ok(())
977    }
978
979    fn read_cpu_load(&self) -> Result<f64> {
980        // Simplified implementation - in practice, read from /proc/loadavg or similar
981        Ok(0.5) // Placeholder
982    }
983
984    fn read_available_memory(&self) -> Result<usize> {
985        // Simplified implementation - in practice, read from /proc/meminfo
986        Ok(8 * 1024 * 1024 * 1024) // 8GB placeholder
987    }
988
989    fn read_cache_miss_rate(&self) -> Result<f64> {
990        // Simplified implementation - in practice, read from perf counters
991        Ok(0.05) // 5% cache miss rate placeholder
992    }
993
994    fn read_io_wait(&self) -> Result<f64> {
995        // Simplified implementation - in practice, read from /proc/stat
996        Ok(0.02) // 2% I/O wait placeholder
997    }
998
999    fn read_cpu_temperature(&self) -> Result<f64> {
1000        // Simplified implementation - in practice, read from thermal zones
1001        Ok(55.0) // 55°C placeholder
1002    }
1003}
1004
1005impl Default for ConfigurationPredictor {
1006    fn default() -> Self {
1007        Self::new()
1008    }
1009}
1010
1011impl ConfigurationPredictor {
1012    /// Create new configuration predictor
1013    pub fn new() -> Self {
1014        let mut feature_weights = HashMap::new();
1015        feature_weights.insert("n_samples".to_string(), 0.3);
1016        feature_weights.insert("nfeatures".to_string(), 0.25);
1017        feature_weights.insert("memory_footprint".to_string(), 0.2);
1018        feature_weights.insert("sparsity".to_string(), 0.15);
1019        feature_weights.insert("cpu_load".to_string(), 0.1);
1020
1021        ConfigurationPredictor {
1022            feature_weights,
1023            learning_rate: 0.01,
1024            confidence_threshold: 0.8,
1025            sample_count: 0,
1026        }
1027    }
1028
1029    /// Predict optimal configuration using ML model
1030    pub fn predict_optimal_config(
1031        &self,
1032        state: &str,
1033        _transformation_type: &str,
1034        _user_params: &HashMap<String, f64>,
1035    ) -> Result<OptimizationConfig> {
1036        // Extract features from state
1037        let features = self.extract_features(state)?;
1038
1039        // Predict configuration parameters using weighted features
1040        let predicted_memory_limit = self.predict_memory_limit(&features);
1041        let predicted_parallelism = self.predict_parallelism(&features);
1042        let predicted_simd_usage = self.predict_simd_usage(&features);
1043
1044        // Create base configuration
1045        let strategy = if predicted_memory_limit < 1000 {
1046            ProcessingStrategy::OutOfCore { chunk_size: 1024 }
1047        } else if predicted_parallelism {
1048            ProcessingStrategy::Parallel
1049        } else if predicted_simd_usage {
1050            ProcessingStrategy::Simd
1051        } else {
1052            ProcessingStrategy::Standard
1053        };
1054
1055        Ok(OptimizationConfig {
1056            processing_strategy: strategy,
1057            memory_limit_mb: predicted_memory_limit,
1058            use_robust: false,
1059            use_parallel: predicted_parallelism,
1060            use_simd: predicted_simd_usage,
1061            use_gpu: features.get("memory_footprint").unwrap_or(&0.0) > &100.0,
1062            chunk_size: if predicted_memory_limit < 1000 {
1063                512
1064            } else {
1065                2048
1066            },
1067            num_threads: if predicted_parallelism { 4 } else { 1 },
1068            algorithm_params: HashMap::new(),
1069        })
1070    }
1071
1072    /// Extract numerical features from state string
1073    fn extract_features(&self, state: &str) -> Result<HashMap<String, f64>> {
1074        let mut features = HashMap::new();
1075
1076        for part in state.split('_') {
1077            if let Some((key, value)) = part.split_once(':') {
1078                if let Ok(val) = value.parse::<f64>() {
1079                    features.insert(key.to_string(), val);
1080                }
1081            }
1082        }
1083
1084        Ok(features)
1085    }
1086
1087    fn predict_memory_limit(&self, features: &HashMap<String, f64>) -> usize {
1088        let memory_footprint = features.get("memory_footprint").unwrap_or(&100.0);
1089        (memory_footprint * 1.5) as usize
1090    }
1091
1092    fn predict_parallelism(&self, features: &HashMap<String, f64>) -> bool {
1093        let samples = features.get("samples").unwrap_or(&1000.0);
1094        let cpu_load = features.get("cpu").unwrap_or(&0.5);
1095        samples > &5000.0 && cpu_load < &0.7
1096    }
1097
1098    fn predict_simd_usage(&self, features: &HashMap<String, f64>) -> bool {
1099        let features_count = features.get("features").unwrap_or(&10.0);
1100        features_count > &50.0
1101    }
1102
1103    /// Update model from performance feedback
1104    pub fn update_from_feedback(&mut self, performance: &PerformanceMetric) -> Result<()> {
1105        self.sample_count += 1;
1106        // In practice, this would update model weights based on _performance
1107        Ok(())
1108    }
1109
1110    /// Retrain model with historical data
1111    pub fn retrain_with_history(
1112        &mut self,
1113        history: &HashMap<String, Vec<PerformanceMetric>>,
1114    ) -> Result<()> {
1115        // In practice, this would perform full model retraining
1116        self.confidence_threshold = (self.confidence_threshold + 0.01).min(0.95);
1117        Ok(())
1118    }
1119}
1120
1121impl Default for AdaptiveParameterTuner {
1122    fn default() -> Self {
1123        Self::new()
1124    }
1125}
1126
1127impl AdaptiveParameterTuner {
1128    /// Create new adaptive parameter tuner
1129    pub fn new() -> Self {
1130        AdaptiveParameterTuner {
1131            q_table: HashMap::new(),
1132            exploration_rate: 0.1,
1133            learning_rate: 0.1,
1134            discount_factor: 0.9,
1135            current_state: String::new(),
1136        }
1137    }
1138
1139    /// Tune parameters using reinforcement learning
1140    pub fn tune_parameters(
1141        &mut self,
1142        mut config: OptimizationConfig,
1143        state: &str,
1144        _transformation_type: &str,
1145    ) -> Result<OptimizationConfig> {
1146        self.current_state = state.to_string();
1147
1148        // Apply epsilon-greedy policy for parameter exploration
1149        if scirs2_core::random::rng().random_range(0.0..1.0) < self.exploration_rate {
1150            // Explore: randomly adjust parameters
1151            config = self.explore_parameters(config)?;
1152        } else {
1153            // Exploit: use best known parameters from Q-table
1154            config = self.exploit_best_parameters(config, state)?;
1155        }
1156
1157        Ok(config)
1158    }
1159
1160    /// Explore by randomly adjusting parameters
1161    fn explore_parameters(&self, mut config: OptimizationConfig) -> Result<OptimizationConfig> {
1162        let mut rng = scirs2_core::random::rng();
1163
1164        // Randomly adjust memory limit (±20%)
1165        let memory_factor = rng.random_range(0.8..1.2);
1166        config.memory_limit_mb = (config.memory_limit_mb as f64 * memory_factor) as usize;
1167
1168        // Randomly toggle parallelism
1169        if rng.random_range(0.0..1.0) < 0.3 {
1170            config.use_parallel = !config.use_parallel;
1171        }
1172
1173        // Randomly adjust chunk size (±50%)
1174        let chunk_factor = rng.random_range(0.5..1.5);
1175        config.chunk_size = (config.chunk_size as f64 * chunk_factor) as usize;
1176
1177        Ok(config)
1178    }
1179
1180    /// Exploit best known parameters from Q-table
1181    fn exploit_best_parameters(
1182        &self,
1183        config: OptimizationConfig,
1184        state: &str,
1185    ) -> Result<OptimizationConfig> {
1186        // Find best action for current state from Q-table
1187        let _best_action = self.find_best_action(state);
1188
1189        // In practice, this would apply the best known parameter adjustments
1190        // For now, return the original config
1191        Ok(config)
1192    }
1193
1194    /// Find best action for given state
1195    fn find_best_action(&self, state: &str) -> String {
1196        let mut best_action = "default".to_string();
1197        let mut best_value = f64::NEG_INFINITY;
1198
1199        for ((s, action), &value) in &self.q_table {
1200            if s == state && value > best_value {
1201                best_value = value;
1202                best_action = action.clone();
1203            }
1204        }
1205
1206        best_action
1207    }
1208
1209    /// Update Q-values based on reward
1210    pub fn update_q_values(&mut self, confighash: u64, reward: f64) -> Result<()> {
1211        let state_action = (self.current_state.clone(), "current_action".to_string());
1212
1213        // Q-learning update rule
1214        let old_value = self.q_table.get(&state_action).unwrap_or(&0.0);
1215        let new_value = old_value + self.learning_rate * (reward - old_value);
1216
1217        self.q_table.insert(state_action, new_value);
1218
1219        Ok(())
1220    }
1221
1222    /// Decay exploration rate over time
1223    pub fn decay_exploration_rate(&mut self) {
1224        self.exploration_rate = (self.exploration_rate * 0.995).max(0.01);
1225    }
1226}
1227
1228#[cfg(test)]
1229mod tests {
1230    use super::*;
1231    use scirs2_core::ndarray::Array2;
1232
1233    #[test]
1234    fn test_system_resources_detection() {
1235        let resources = SystemResources::detect();
1236        assert!(resources.cpu_cores > 0);
1237        assert!(resources.memory_mb > 0);
1238        assert!(resources.safe_memory_mb() < resources.memory_mb);
1239    }
1240
1241    #[test]
1242    fn test_data_characteristics_analysis() {
1243        let data = Array2::from_shape_vec((100, 10), (0..1000).map(|x| x as f64).collect())
1244            .expect("Operation failed");
1245        let chars = DataCharacteristics::analyze(&data.view()).expect("Operation failed");
1246
1247        assert_eq!(chars.n_samples, 100);
1248        assert_eq!(chars.nfeatures, 10);
1249        assert!(chars.memory_footprint_mb > 0.0);
1250        assert!(!chars.is_large_dataset());
1251    }
1252
1253    #[test]
1254    fn test_optimization_config_for_standardization() {
1255        let data = Array2::ones((1000, 50));
1256        let chars = DataCharacteristics::analyze(&data.view()).expect("Operation failed");
1257        let system = SystemResources::detect();
1258
1259        let config = OptimizationConfig::for_standardization(&chars, &system);
1260        assert!(config.memory_limit_mb > 0);
1261    }
1262
1263    #[test]
1264    fn test_optimization_config_for_pca() {
1265        let data = Array2::ones((500, 20));
1266        let chars = DataCharacteristics::analyze(&data.view()).expect("Operation failed");
1267        let system = SystemResources::detect();
1268
1269        let config = OptimizationConfig::for_pca(&chars, &system, 10);
1270        assert_eq!(config.algorithm_params.get("n_components"), Some(&10.0));
1271    }
1272
1273    #[test]
1274    fn test_polynomial_features_estimation() {
1275        // Test polynomial feature estimation
1276        let result = OptimizationConfig::estimate_polynomial_features(5, 2);
1277        assert!(result.is_ok());
1278
1279        // Should handle large degrees gracefully
1280        let result = OptimizationConfig::estimate_polynomial_features(100, 10);
1281        assert!(result.is_err());
1282    }
1283
1284    #[test]
1285    fn test_auto_tuner() {
1286        let tuner = AutoTuner::new();
1287        let data = Array2::ones((100, 10));
1288        let chars = DataCharacteristics::analyze(&data.view()).expect("Operation failed");
1289
1290        let config = tuner
1291            .optimize_for_transformation("standardization", &chars, &HashMap::new())
1292            .expect("Operation failed");
1293        assert!(config.memory_limit_mb > 0);
1294
1295        let report = tuner.generate_report(&chars);
1296        assert!(!report.recommendations.is_empty());
1297    }
1298
1299    #[test]
1300    fn test_large_dataset_detection() {
1301        let mut chars = DataCharacteristics {
1302            n_samples: 200_000,
1303            nfeatures: 1000,
1304            sparsity: 0.1,
1305            data_range: 100.0,
1306            outlier_ratio: 0.02,
1307            has_missing: false,
1308            memory_footprint_mb: 1500.0,
1309            elementsize: 8,
1310        };
1311
1312        assert!(chars.is_large_dataset());
1313
1314        chars.n_samples = 1000;
1315        chars.memory_footprint_mb = 10.0;
1316        assert!(!chars.is_large_dataset());
1317    }
1318}
scirs2_transform/optimization_config.rs

scirs2_transform/
optimization_config.rs