1use scirs2_core::Rng;
8#[cfg(feature = "distributed")]
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11
12use crate::error::{Result, TransformError};
13use crate::utils::ProcessingStrategy;
14
15#[derive(Debug, Clone)]
17#[cfg_attr(feature = "distributed", derive(Serialize, Deserialize))]
18pub struct SystemResources {
19 pub memory_mb: usize,
21 pub cpu_cores: usize,
23 pub has_gpu: bool,
25 pub has_simd: bool,
27 pub l3_cache_kb: usize,
29}
30
31impl SystemResources {
32 pub fn detect() -> Self {
34 SystemResources {
35 memory_mb: Self::detect_memory_mb(),
36 cpu_cores: num_cpus::get(),
37 has_gpu: Self::detect_gpu(),
38 has_simd: Self::detect_simd(),
39 l3_cache_kb: Self::detect_l3_cache_kb(),
40 }
41 }
42
43 fn detect_memory_mb() -> usize {
45 #[cfg(target_os = "linux")]
47 {
48 if let Ok(meminfo) = std::fs::read_to_string("/proc/meminfo") {
49 for line in meminfo.lines() {
50 if line.starts_with("MemAvailable:") {
51 if let Some(kb_str) = line.split_whitespace().nth(1) {
52 if let Ok(kb) = kb_str.parse::<usize>() {
53 return kb / 1024; }
55 }
56 }
57 }
58 }
59 }
60
61 8 * 1024
63 }
64
65 fn detect_gpu() -> bool {
67 #[cfg(feature = "gpu")]
69 {
70 true
72 }
73 #[cfg(not(feature = "gpu"))]
74 {
75 false
76 }
77 }
78
79 fn detect_simd() -> bool {
81 #[cfg(feature = "simd")]
82 {
83 true
84 }
85 #[cfg(not(feature = "simd"))]
86 {
87 false
88 }
89 }
90
91 fn detect_l3_cache_kb() -> usize {
93 8 * 1024 }
96
97 pub fn safe_memory_mb(&self) -> usize {
99 (self.memory_mb as f64 * 0.8) as usize
100 }
101
102 pub fn optimal_chunk_size(&self, elementsize: usize) -> usize {
104 let target_bytes = (self.l3_cache_kb * 1024) / 2;
106 (target_bytes / elementsize).max(1000) }
108}
109
110#[derive(Debug, Clone)]
112#[cfg_attr(feature = "distributed", derive(Serialize, Deserialize))]
113pub struct DataCharacteristics {
114 pub n_samples: usize,
116 pub nfeatures: usize,
118 pub sparsity: f64,
120 pub data_range: f64,
122 pub outlier_ratio: f64,
124 pub has_missing: bool,
126 pub memory_footprint_mb: f64,
128 pub elementsize: usize,
130}
131
132impl DataCharacteristics {
133 pub fn analyze(data: &scirs2_core::ndarray::ArrayView2<f64>) -> Result<Self> {
135 let (n_samples, nfeatures) = data.dim();
136
137 if n_samples == 0 || nfeatures == 0 {
138 return Err(TransformError::InvalidInput("Empty _data".to_string()));
139 }
140
141 let zeros = data.iter().filter(|&&x| x == 0.0).count();
143 let sparsity = zeros as f64 / data.len() as f64;
144
145 let mut min_val = f64::INFINITY;
147 let mut max_val = f64::NEG_INFINITY;
148 let mut finite_count = 0;
149 let mut missing_count = 0;
150
151 for &val in data.iter() {
152 if val.is_finite() {
153 min_val = min_val.min(val);
154 max_val = max_val.max(val);
155 finite_count += 1;
156 } else {
157 missing_count += 1;
158 }
159 }
160
161 let data_range = if finite_count > 0 {
162 max_val - min_val
163 } else {
164 0.0
165 };
166 let has_missing = missing_count > 0;
167
168 let outlier_ratio = if n_samples > 10 {
170 let mut sample_values: Vec<f64> = data.iter()
171 .filter(|&&x| x.is_finite())
172 .take(1000) .copied()
174 .collect();
175
176 if sample_values.len() >= 4 {
177 sample_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
178 let n = sample_values.len();
179 let q1 = sample_values[n / 4];
180 let q3 = sample_values[3 * n / 4];
181 let iqr = q3 - q1;
182
183 if iqr > 0.0 {
184 let lower_bound = q1 - 1.5 * iqr;
185 let upper_bound = q3 + 1.5 * iqr;
186 let outliers = sample_values
187 .iter()
188 .filter(|&&x| x < lower_bound || x > upper_bound)
189 .count();
190 outliers as f64 / sample_values.len() as f64
191 } else {
192 0.0
193 }
194 } else {
195 0.0
196 }
197 } else {
198 0.0
199 };
200
201 let memory_footprint_mb =
202 (n_samples * nfeatures * std::mem::size_of::<f64>()) as f64 / (1024.0 * 1024.0);
203
204 Ok(DataCharacteristics {
205 n_samples,
206 nfeatures,
207 sparsity,
208 data_range,
209 outlier_ratio,
210 has_missing,
211 memory_footprint_mb,
212 elementsize: std::mem::size_of::<f64>(),
213 })
214 }
215
216 pub fn is_large_dataset(&self) -> bool {
218 self.n_samples > 100_000 || self.nfeatures > 10_000 || self.memory_footprint_mb > 1000.0
219 }
220
221 pub fn is_wide_dataset(&self) -> bool {
223 self.nfeatures > self.n_samples
224 }
225
226 pub fn is_sparse(&self) -> bool {
228 self.sparsity > 0.5
229 }
230
231 pub fn has_outliers(&self) -> bool {
233 self.outlier_ratio > 0.05 }
235}
236
237#[derive(Debug, Clone)]
239#[cfg_attr(feature = "distributed", derive(Serialize, Deserialize))]
240pub struct OptimizationConfig {
241 pub processing_strategy: ProcessingStrategy,
243 pub memory_limit_mb: usize,
245 pub use_robust: bool,
247 pub use_parallel: bool,
249 pub use_simd: bool,
251 pub use_gpu: bool,
253 pub chunk_size: usize,
255 pub num_threads: usize,
257 pub algorithm_params: HashMap<String, f64>,
259}
260
261impl OptimizationConfig {
262 pub fn for_standardization(datachars: &DataCharacteristics, system: &SystemResources) -> Self {
264 let use_robust = datachars.has_outliers();
265 let use_parallel = datachars.n_samples > 10_000 && system.cpu_cores > 1;
266 let use_simd = system.has_simd && datachars.nfeatures > 100;
267 let use_gpu = system.has_gpu && datachars.memory_footprint_mb > 100.0;
268
269 let processing_strategy = if datachars.memory_footprint_mb > system.safe_memory_mb() as f64
270 {
271 ProcessingStrategy::OutOfCore {
272 chunk_size: system.optimal_chunk_size(datachars.elementsize),
273 }
274 } else if use_parallel {
275 ProcessingStrategy::Parallel
276 } else if use_simd {
277 ProcessingStrategy::Simd
278 } else {
279 ProcessingStrategy::Standard
280 };
281
282 OptimizationConfig {
283 processing_strategy,
284 memory_limit_mb: system.safe_memory_mb(),
285 use_robust,
286 use_parallel,
287 use_simd,
288 use_gpu,
289 chunk_size: system.optimal_chunk_size(datachars.elementsize),
290 num_threads: if use_parallel { system.cpu_cores } else { 1 },
291 algorithm_params: HashMap::new(),
292 }
293 }
294
295 pub fn for_pca(
297 datachars: &DataCharacteristics,
298 system: &SystemResources,
299 n_components: usize,
300 ) -> Self {
301 let use_randomized = datachars.is_large_dataset();
302 let use_parallel = datachars.n_samples > 1_000 && system.cpu_cores > 1;
303 let use_gpu = system.has_gpu && datachars.memory_footprint_mb > 500.0;
304
305 let memory_multiplier = if datachars.nfeatures > datachars.n_samples {
307 3.0
308 } else {
309 2.0
310 };
311 let estimated_memory = datachars.memory_footprint_mb * memory_multiplier;
312
313 let processing_strategy = if estimated_memory > system.safe_memory_mb() as f64 {
314 ProcessingStrategy::OutOfCore {
315 chunk_size: (system.safe_memory_mb() * 1024 * 1024)
316 / (datachars.nfeatures * datachars.elementsize),
317 }
318 } else if use_parallel {
319 ProcessingStrategy::Parallel
320 } else {
321 ProcessingStrategy::Standard
322 };
323
324 let mut algorithm_params = HashMap::new();
325 algorithm_params.insert(
326 "use_randomized".to_string(),
327 if use_randomized { 1.0 } else { 0.0 },
328 );
329 algorithm_params.insert("n_components".to_string(), n_components as f64);
330
331 OptimizationConfig {
332 processing_strategy,
333 memory_limit_mb: system.safe_memory_mb(),
334 use_robust: false, use_parallel,
336 use_simd: system.has_simd,
337 use_gpu,
338 chunk_size: system.optimal_chunk_size(datachars.elementsize),
339 num_threads: if use_parallel { system.cpu_cores } else { 1 },
340 algorithm_params,
341 }
342 }
343
344 pub fn for_polynomial_features(
346 datachars: &DataCharacteristics,
347 system: &SystemResources,
348 degree: usize,
349 ) -> Result<Self> {
350 let estimated_output_features =
352 Self::estimate_polynomial_features(datachars.nfeatures, degree)?;
353 let estimated_memory = datachars.n_samples as f64
354 * estimated_output_features as f64
355 * datachars.elementsize as f64
356 / (1024.0 * 1024.0);
357
358 if estimated_memory > system.memory_mb as f64 * 0.9 {
359 return Err(TransformError::MemoryError(format!(
360 "Polynomial features would require {estimated_memory:.1} MB, but only {} MB available",
361 system.memory_mb
362 )));
363 }
364
365 let use_parallel = datachars.n_samples > 1_000 && system.cpu_cores > 1;
366 let use_simd = system.has_simd && estimated_output_features > 100;
367
368 let processing_strategy = if estimated_memory > system.safe_memory_mb() as f64 {
369 ProcessingStrategy::OutOfCore {
370 chunk_size: (system.safe_memory_mb() * 1024 * 1024)
371 / (estimated_output_features * datachars.elementsize),
372 }
373 } else if use_parallel {
374 ProcessingStrategy::Parallel
375 } else if use_simd {
376 ProcessingStrategy::Simd
377 } else {
378 ProcessingStrategy::Standard
379 };
380
381 let mut algorithm_params = HashMap::new();
382 algorithm_params.insert("degree".to_string(), degree as f64);
383 algorithm_params.insert(
384 "estimated_output_features".to_string(),
385 estimated_output_features as f64,
386 );
387
388 Ok(OptimizationConfig {
389 processing_strategy,
390 memory_limit_mb: system.safe_memory_mb(),
391 use_robust: false,
392 use_parallel,
393 use_simd,
394 use_gpu: false, chunk_size: system.optimal_chunk_size(datachars.elementsize),
396 num_threads: if use_parallel { system.cpu_cores } else { 1 },
397 algorithm_params,
398 })
399 }
400
401 fn estimate_polynomial_features(nfeatures: usize, degree: usize) -> Result<usize> {
403 if degree == 0 {
404 return Err(TransformError::InvalidInput(
405 "Degree must be at least 1".to_string(),
406 ));
407 }
408
409 let mut total_features = 1; for d in 1..=degree {
412 let mut coeff = 1;
414 for i in 0..d {
415 coeff = coeff * (nfeatures + d - 1 - i) / (i + 1);
416
417 if coeff > 1_000_000 {
419 return Err(TransformError::ComputationError(
420 "Too many polynomial _features would be generated".to_string(),
421 ));
422 }
423 }
424 total_features += coeff;
425 }
426
427 Ok(total_features)
428 }
429
430 pub fn estimated_execution_time(&self, datachars: &DataCharacteristics) -> std::time::Duration {
432 use std::time::Duration;
433
434 let base_ops = datachars.n_samples as u64 * datachars.nfeatures as u64;
435
436 let ops_per_second = match self.processing_strategy {
437 ProcessingStrategy::Parallel => {
438 1_000_000_000 * self.num_threads as u64 }
440 ProcessingStrategy::Simd => {
441 2_000_000_000 }
443 ProcessingStrategy::OutOfCore { .. } => {
444 100_000_000 }
446 ProcessingStrategy::Standard => {
447 500_000_000 }
449 };
450
451 let time_ns = (base_ops * 1_000_000_000) / ops_per_second;
452 Duration::from_nanos(time_ns.max(1000)) }
454}
455
456pub struct AutoTuner {
458 system: SystemResources,
460 performance_history: HashMap<String, Vec<PerformanceRecord>>,
462}
463
464#[derive(Debug, Clone)]
466struct PerformanceRecord {
467 #[allow(dead_code)]
468 config_hash: String,
469 #[allow(dead_code)]
470 execution_time: std::time::Duration,
471 #[allow(dead_code)]
472 memory_used_mb: f64,
473 #[allow(dead_code)]
474 success: bool,
475 #[allow(dead_code)]
476 data_characteristics: DataCharacteristics,
477}
478
479impl Default for AutoTuner {
480 fn default() -> Self {
481 Self::new()
482 }
483}
484
485impl AutoTuner {
486 pub fn new() -> Self {
488 AutoTuner {
489 system: SystemResources::detect(),
490 performance_history: HashMap::new(),
491 }
492 }
493
494 pub fn optimize_for_transformation(
496 &self,
497 transformation: &str,
498 datachars: &DataCharacteristics,
499 params: &HashMap<String, f64>,
500 ) -> Result<OptimizationConfig> {
501 match transformation {
502 "standardization" => Ok(OptimizationConfig::for_standardization(
503 datachars,
504 &self.system,
505 )),
506 "pca" => {
507 let n_components = params.get("n_components").unwrap_or(&5.0) as &f64;
508 Ok(OptimizationConfig::for_pca(
509 datachars,
510 &self.system,
511 *n_components as usize,
512 ))
513 }
514 "polynomial" => {
515 let degree = params.get("degree").unwrap_or(&2.0) as &f64;
516 OptimizationConfig::for_polynomial_features(
517 datachars,
518 &self.system,
519 *degree as usize,
520 )
521 }
522 _ => {
523 Ok(OptimizationConfig {
525 processing_strategy: if datachars.is_large_dataset() {
526 ProcessingStrategy::Parallel
527 } else {
528 ProcessingStrategy::Standard
529 },
530 memory_limit_mb: self.system.safe_memory_mb(),
531 use_robust: datachars.has_outliers(),
532 use_parallel: datachars.n_samples > 10_000,
533 use_simd: self.system.has_simd,
534 use_gpu: self.system.has_gpu && datachars.memory_footprint_mb > 100.0,
535 chunk_size: self.system.optimal_chunk_size(datachars.elementsize),
536 num_threads: self.system.cpu_cores,
537 algorithm_params: HashMap::new(),
538 })
539 }
540 }
541 }
542
543 pub fn record_performance(
545 &mut self,
546 transformation: &str,
547 config: &OptimizationConfig,
548 execution_time: std::time::Duration,
549 memory_used_mb: f64,
550 success: bool,
551 datachars: DataCharacteristics,
552 ) {
553 let config_hash = format!("{config:?}"); let record = PerformanceRecord {
556 config_hash: config_hash.clone(),
557 execution_time,
558 memory_used_mb,
559 success,
560 data_characteristics: datachars,
561 };
562
563 self.performance_history
564 .entry(transformation.to_string())
565 .or_default()
566 .push(record);
567
568 let records = self
570 .performance_history
571 .get_mut(transformation)
572 .expect("Operation failed");
573 if records.len() > 100 {
574 records.remove(0);
575 }
576 }
577
578 pub fn system_resources(&self) -> &SystemResources {
580 &self.system
581 }
582
583 pub fn generate_report(&self, datachars: &DataCharacteristics) -> OptimizationReport {
585 let recommendations = vec![
586 self.get_recommendation_for_transformation("standardization", datachars),
587 self.get_recommendation_for_transformation("pca", datachars),
588 self.get_recommendation_for_transformation("polynomial", datachars),
589 ];
590
591 OptimizationReport {
592 system_info: self.system.clone(),
593 data_info: datachars.clone(),
594 recommendations,
595 estimated_total_memory_mb: datachars.memory_footprint_mb * 2.0, }
597 }
598
599 fn get_recommendation_for_transformation(
600 &self,
601 transformation: &str,
602 datachars: &DataCharacteristics,
603 ) -> TransformationRecommendation {
604 let config = self
605 .optimize_for_transformation(transformation, datachars, &HashMap::new())
606 .unwrap_or_else(|_| OptimizationConfig {
607 processing_strategy: ProcessingStrategy::Standard,
608 memory_limit_mb: self.system.safe_memory_mb(),
609 use_robust: false,
610 use_parallel: false,
611 use_simd: false,
612 use_gpu: false,
613 chunk_size: 1000,
614 num_threads: 1,
615 algorithm_params: HashMap::new(),
616 });
617
618 let estimated_time = config.estimated_execution_time(datachars);
619
620 TransformationRecommendation {
621 transformation: transformation.to_string(),
622 config,
623 estimated_time,
624 confidence: 0.8, reason: format!(
626 "Optimized for {} samples, {} features",
627 datachars.n_samples, datachars.nfeatures
628 ),
629 }
630 }
631}
632
633#[derive(Debug, Clone)]
635pub struct OptimizationReport {
636 pub system_info: SystemResources,
638 pub data_info: DataCharacteristics,
640 pub recommendations: Vec<TransformationRecommendation>,
642 pub estimated_total_memory_mb: f64,
644}
645
646#[derive(Debug, Clone)]
648pub struct TransformationRecommendation {
649 pub transformation: String,
651 pub config: OptimizationConfig,
653 pub estimated_time: std::time::Duration,
655 pub confidence: f64,
657 pub reason: String,
659}
660
661impl OptimizationReport {
662 pub fn print_report(&self) {
664 println!("=== Optimization Report ===");
665 println!("System Resources:");
666 println!(" Memory: {} MB", self.system_info.memory_mb);
667 println!(" CPU Cores: {}", self.system_info.cpu_cores);
668 println!(" GPU Available: {}", self.system_info.has_gpu);
669 println!(" SIMD Available: {}", self.system_info.has_simd);
670 println!();
671
672 println!("Data Characteristics:");
673 println!(" Samples: {}", self.data_info.n_samples);
674 println!(" Features: {}", self.data_info.nfeatures);
675 println!(
676 " Memory Footprint: {:.1} MB",
677 self.data_info.memory_footprint_mb
678 );
679 println!(" Sparsity: {:.1}%", self.data_info.sparsity * 100.0);
680 println!(" Has Outliers: {}", self.data_info.has_outliers());
681 println!();
682
683 println!("Recommendations:");
684 for rec in &self.recommendations {
685 println!(" {}:", rec.transformation);
686 println!(" Strategy: {:?}", rec.config.processing_strategy);
687 println!(
688 " Estimated Time: {:.2}s",
689 rec.estimated_time.as_secs_f64()
690 );
691 println!(" Use Parallel: {}", rec.config.use_parallel);
692 println!(" Use SIMD: {}", rec.config.use_simd);
693 println!(" Use GPU: {}", rec.config.use_gpu);
694 println!(" Reason: {}", rec.reason);
695 println!();
696 }
697 }
698}
699
700pub struct AdvancedConfigOptimizer {
704 performance_history: HashMap<String, Vec<PerformanceMetric>>,
706 system_monitor: SystemMonitor,
708 config_predictor: ConfigurationPredictor,
710 adaptive_tuner: AdaptiveParameterTuner,
712}
713
714#[derive(Debug, Clone)]
716pub struct PerformanceMetric {
717 #[allow(dead_code)]
719 config_hash: u64,
720 execution_time_us: u64,
722 memory_usage_bytes: usize,
724 cache_hit_rate: f64,
726 cpu_utilization: f64,
728 quality_score: f64,
730 #[allow(dead_code)]
732 timestamp: std::time::Instant,
733}
734
735pub struct SystemMonitor {
737 cpu_load: f64,
739 available_memory_bytes: usize,
741 cache_miss_rate: f64,
743 io_wait_percent: f64,
745 cpu_temperature_celsius: f64,
747}
748
749pub struct ConfigurationPredictor {
751 #[allow(dead_code)]
753 feature_weights: HashMap<String, f64>,
754 #[allow(dead_code)]
756 learning_rate: f64,
757 confidence_threshold: f64,
759 sample_count: usize,
761}
762
763pub struct AdaptiveParameterTuner {
765 q_table: HashMap<(String, String), f64>, exploration_rate: f64,
769 learning_rate: f64,
771 #[allow(dead_code)]
773 discount_factor: f64,
774 current_state: String,
776}
777
778impl Default for AdvancedConfigOptimizer {
779 fn default() -> Self {
780 Self::new()
781 }
782}
783
784impl AdvancedConfigOptimizer {
785 pub fn new() -> Self {
787 AdvancedConfigOptimizer {
788 performance_history: HashMap::new(),
789 system_monitor: SystemMonitor::new(),
790 config_predictor: ConfigurationPredictor::new(),
791 adaptive_tuner: AdaptiveParameterTuner::new(),
792 }
793 }
794
795 pub fn advanced_optimize_config(
797 &mut self,
798 datachars: &DataCharacteristics,
799 transformation_type: &str,
800 user_params: &HashMap<String, f64>,
801 ) -> Result<OptimizationConfig> {
802 self.system_monitor.update_metrics()?;
804
805 let current_state = self.generate_state_representation(datachars, &self.system_monitor);
807
808 let predicted_config = self.config_predictor.predict_optimal_config(
810 ¤t_state,
811 transformation_type,
812 user_params,
813 )?;
814
815 let tuned_config = self.adaptive_tuner.tune_parameters(
817 predicted_config,
818 ¤t_state,
819 transformation_type,
820 )?;
821
822 let validated_config =
824 self.validate_and_adjust_config(tuned_config, &self.system_monitor)?;
825
826 Ok(validated_config)
827 }
828
829 pub fn learn_from_performance(
831 &mut self,
832 config: &OptimizationConfig,
833 performance: PerformanceMetric,
834 transformation_type: &str,
835 ) -> Result<()> {
836 let config_hash = self.compute_config_hash(config);
837
838 self.performance_history
840 .entry(transformation_type.to_string())
841 .or_default()
842 .push(performance.clone());
843
844 self.config_predictor.update_from_feedback(&performance)?;
846
847 let reward = self.compute_reward_signal(&performance);
849 self.adaptive_tuner.update_q_values(config_hash, reward)?;
850
851 if self.config_predictor.sample_count.is_multiple_of(100) {
853 self.retrain_models()?;
854 }
855
856 Ok(())
857 }
858
859 fn generate_state_representation(
861 &self,
862 datachars: &DataCharacteristics,
863 system_monitor: &SystemMonitor,
864 ) -> String {
865 format!(
866 "samples:{}_features:{}_memory:{:.2}_cpu:{:.2}_sparsity:{:.3}",
867 datachars.n_samples,
868 datachars.nfeatures,
869 datachars.memory_footprint_mb,
870 system_monitor.cpu_load,
871 datachars.sparsity,
872 )
873 }
874
875 fn compute_config_hash(&self, config: &OptimizationConfig) -> u64 {
877 use std::collections::hash_map::DefaultHasher;
878 use std::hash::{Hash, Hasher};
879
880 let mut hasher = DefaultHasher::new();
881 config.memory_limit_mb.hash(&mut hasher);
882 config.use_parallel.hash(&mut hasher);
883 config.use_simd.hash(&mut hasher);
884 config.use_gpu.hash(&mut hasher);
885 config.chunk_size.hash(&mut hasher);
886 config.num_threads.hash(&mut hasher);
887
888 hasher.finish()
889 }
890
891 fn compute_reward_signal(&self, performance: &PerformanceMetric) -> f64 {
893 let time_score = 1.0 / (1.0 + performance.execution_time_us as f64 / 1_000_000.0);
895 let memory_score = 1.0 / (1.0 + performance.memory_usage_bytes as f64 / 1_000_000_000.0);
896 let cache_score = performance.cache_hit_rate;
897 let cpu_score = 1.0 - performance.cpu_utilization.min(1.0);
898 let quality_score = performance.quality_score;
899
900 0.3 * time_score
902 + 0.2 * memory_score
903 + 0.2 * cache_score
904 + 0.1 * cpu_score
905 + 0.2 * quality_score
906 }
907
908 fn validate_and_adjust_config(
910 &self,
911 mut config: OptimizationConfig,
912 system_monitor: &SystemMonitor,
913 ) -> Result<OptimizationConfig> {
914 let available_mb = system_monitor.available_memory_bytes / (1024 * 1024);
916 config.memory_limit_mb = config.memory_limit_mb.min(available_mb * 80 / 100); if system_monitor.cpu_load > 0.8 {
920 config.num_threads = (config.num_threads / 2).max(1);
921 }
922
923 if system_monitor.cpu_temperature_celsius > 85.0 {
925 config.use_gpu = false;
926 }
927
928 if system_monitor.cache_miss_rate > 0.1 {
930 config.chunk_size = (config.chunk_size as f64 * 0.8) as usize;
931 }
932
933 Ok(config)
934 }
935
936 fn retrain_models(&mut self) -> Result<()> {
938 self.config_predictor
940 .retrain_with_history(&self.performance_history)?;
941
942 self.adaptive_tuner.decay_exploration_rate();
944
945 Ok(())
946 }
947}
948
949impl Default for SystemMonitor {
950 fn default() -> Self {
951 Self::new()
952 }
953}
954
955impl SystemMonitor {
956 pub fn new() -> Self {
958 SystemMonitor {
959 cpu_load: 0.0,
960 available_memory_bytes: 0,
961 cache_miss_rate: 0.0,
962 io_wait_percent: 0.0,
963 cpu_temperature_celsius: 50.0,
964 }
965 }
966
967 pub fn update_metrics(&mut self) -> Result<()> {
969 self.cpu_load = self.read_cpu_load()?;
971 self.available_memory_bytes = self.read_available_memory()?;
972 self.cache_miss_rate = self.read_cache_miss_rate()?;
973 self.io_wait_percent = self.read_io_wait()?;
974 self.cpu_temperature_celsius = self.read_cpu_temperature()?;
975
976 Ok(())
977 }
978
979 fn read_cpu_load(&self) -> Result<f64> {
980 Ok(0.5) }
983
984 fn read_available_memory(&self) -> Result<usize> {
985 Ok(8 * 1024 * 1024 * 1024) }
988
989 fn read_cache_miss_rate(&self) -> Result<f64> {
990 Ok(0.05) }
993
994 fn read_io_wait(&self) -> Result<f64> {
995 Ok(0.02) }
998
999 fn read_cpu_temperature(&self) -> Result<f64> {
1000 Ok(55.0) }
1003}
1004
1005impl Default for ConfigurationPredictor {
1006 fn default() -> Self {
1007 Self::new()
1008 }
1009}
1010
1011impl ConfigurationPredictor {
1012 pub fn new() -> Self {
1014 let mut feature_weights = HashMap::new();
1015 feature_weights.insert("n_samples".to_string(), 0.3);
1016 feature_weights.insert("nfeatures".to_string(), 0.25);
1017 feature_weights.insert("memory_footprint".to_string(), 0.2);
1018 feature_weights.insert("sparsity".to_string(), 0.15);
1019 feature_weights.insert("cpu_load".to_string(), 0.1);
1020
1021 ConfigurationPredictor {
1022 feature_weights,
1023 learning_rate: 0.01,
1024 confidence_threshold: 0.8,
1025 sample_count: 0,
1026 }
1027 }
1028
1029 pub fn predict_optimal_config(
1031 &self,
1032 state: &str,
1033 _transformation_type: &str,
1034 _user_params: &HashMap<String, f64>,
1035 ) -> Result<OptimizationConfig> {
1036 let features = self.extract_features(state)?;
1038
1039 let predicted_memory_limit = self.predict_memory_limit(&features);
1041 let predicted_parallelism = self.predict_parallelism(&features);
1042 let predicted_simd_usage = self.predict_simd_usage(&features);
1043
1044 let strategy = if predicted_memory_limit < 1000 {
1046 ProcessingStrategy::OutOfCore { chunk_size: 1024 }
1047 } else if predicted_parallelism {
1048 ProcessingStrategy::Parallel
1049 } else if predicted_simd_usage {
1050 ProcessingStrategy::Simd
1051 } else {
1052 ProcessingStrategy::Standard
1053 };
1054
1055 Ok(OptimizationConfig {
1056 processing_strategy: strategy,
1057 memory_limit_mb: predicted_memory_limit,
1058 use_robust: false,
1059 use_parallel: predicted_parallelism,
1060 use_simd: predicted_simd_usage,
1061 use_gpu: features.get("memory_footprint").unwrap_or(&0.0) > &100.0,
1062 chunk_size: if predicted_memory_limit < 1000 {
1063 512
1064 } else {
1065 2048
1066 },
1067 num_threads: if predicted_parallelism { 4 } else { 1 },
1068 algorithm_params: HashMap::new(),
1069 })
1070 }
1071
1072 fn extract_features(&self, state: &str) -> Result<HashMap<String, f64>> {
1074 let mut features = HashMap::new();
1075
1076 for part in state.split('_') {
1077 if let Some((key, value)) = part.split_once(':') {
1078 if let Ok(val) = value.parse::<f64>() {
1079 features.insert(key.to_string(), val);
1080 }
1081 }
1082 }
1083
1084 Ok(features)
1085 }
1086
1087 fn predict_memory_limit(&self, features: &HashMap<String, f64>) -> usize {
1088 let memory_footprint = features.get("memory_footprint").unwrap_or(&100.0);
1089 (memory_footprint * 1.5) as usize
1090 }
1091
1092 fn predict_parallelism(&self, features: &HashMap<String, f64>) -> bool {
1093 let samples = features.get("samples").unwrap_or(&1000.0);
1094 let cpu_load = features.get("cpu").unwrap_or(&0.5);
1095 samples > &5000.0 && cpu_load < &0.7
1096 }
1097
1098 fn predict_simd_usage(&self, features: &HashMap<String, f64>) -> bool {
1099 let features_count = features.get("features").unwrap_or(&10.0);
1100 features_count > &50.0
1101 }
1102
1103 pub fn update_from_feedback(&mut self, performance: &PerformanceMetric) -> Result<()> {
1105 self.sample_count += 1;
1106 Ok(())
1108 }
1109
1110 pub fn retrain_with_history(
1112 &mut self,
1113 history: &HashMap<String, Vec<PerformanceMetric>>,
1114 ) -> Result<()> {
1115 self.confidence_threshold = (self.confidence_threshold + 0.01).min(0.95);
1117 Ok(())
1118 }
1119}
1120
1121impl Default for AdaptiveParameterTuner {
1122 fn default() -> Self {
1123 Self::new()
1124 }
1125}
1126
1127impl AdaptiveParameterTuner {
1128 pub fn new() -> Self {
1130 AdaptiveParameterTuner {
1131 q_table: HashMap::new(),
1132 exploration_rate: 0.1,
1133 learning_rate: 0.1,
1134 discount_factor: 0.9,
1135 current_state: String::new(),
1136 }
1137 }
1138
1139 pub fn tune_parameters(
1141 &mut self,
1142 mut config: OptimizationConfig,
1143 state: &str,
1144 _transformation_type: &str,
1145 ) -> Result<OptimizationConfig> {
1146 self.current_state = state.to_string();
1147
1148 if scirs2_core::random::rng().random_range(0.0..1.0) < self.exploration_rate {
1150 config = self.explore_parameters(config)?;
1152 } else {
1153 config = self.exploit_best_parameters(config, state)?;
1155 }
1156
1157 Ok(config)
1158 }
1159
1160 fn explore_parameters(&self, mut config: OptimizationConfig) -> Result<OptimizationConfig> {
1162 let mut rng = scirs2_core::random::rng();
1163
1164 let memory_factor = rng.random_range(0.8..1.2);
1166 config.memory_limit_mb = (config.memory_limit_mb as f64 * memory_factor) as usize;
1167
1168 if rng.random_range(0.0..1.0) < 0.3 {
1170 config.use_parallel = !config.use_parallel;
1171 }
1172
1173 let chunk_factor = rng.random_range(0.5..1.5);
1175 config.chunk_size = (config.chunk_size as f64 * chunk_factor) as usize;
1176
1177 Ok(config)
1178 }
1179
1180 fn exploit_best_parameters(
1182 &self,
1183 config: OptimizationConfig,
1184 state: &str,
1185 ) -> Result<OptimizationConfig> {
1186 let _best_action = self.find_best_action(state);
1188
1189 Ok(config)
1192 }
1193
1194 fn find_best_action(&self, state: &str) -> String {
1196 let mut best_action = "default".to_string();
1197 let mut best_value = f64::NEG_INFINITY;
1198
1199 for ((s, action), &value) in &self.q_table {
1200 if s == state && value > best_value {
1201 best_value = value;
1202 best_action = action.clone();
1203 }
1204 }
1205
1206 best_action
1207 }
1208
1209 pub fn update_q_values(&mut self, confighash: u64, reward: f64) -> Result<()> {
1211 let state_action = (self.current_state.clone(), "current_action".to_string());
1212
1213 let old_value = self.q_table.get(&state_action).unwrap_or(&0.0);
1215 let new_value = old_value + self.learning_rate * (reward - old_value);
1216
1217 self.q_table.insert(state_action, new_value);
1218
1219 Ok(())
1220 }
1221
1222 pub fn decay_exploration_rate(&mut self) {
1224 self.exploration_rate = (self.exploration_rate * 0.995).max(0.01);
1225 }
1226}
1227
1228#[cfg(test)]
1229mod tests {
1230 use super::*;
1231 use scirs2_core::ndarray::Array2;
1232
1233 #[test]
1234 fn test_system_resources_detection() {
1235 let resources = SystemResources::detect();
1236 assert!(resources.cpu_cores > 0);
1237 assert!(resources.memory_mb > 0);
1238 assert!(resources.safe_memory_mb() < resources.memory_mb);
1239 }
1240
1241 #[test]
1242 fn test_data_characteristics_analysis() {
1243 let data = Array2::from_shape_vec((100, 10), (0..1000).map(|x| x as f64).collect())
1244 .expect("Operation failed");
1245 let chars = DataCharacteristics::analyze(&data.view()).expect("Operation failed");
1246
1247 assert_eq!(chars.n_samples, 100);
1248 assert_eq!(chars.nfeatures, 10);
1249 assert!(chars.memory_footprint_mb > 0.0);
1250 assert!(!chars.is_large_dataset());
1251 }
1252
1253 #[test]
1254 fn test_optimization_config_for_standardization() {
1255 let data = Array2::ones((1000, 50));
1256 let chars = DataCharacteristics::analyze(&data.view()).expect("Operation failed");
1257 let system = SystemResources::detect();
1258
1259 let config = OptimizationConfig::for_standardization(&chars, &system);
1260 assert!(config.memory_limit_mb > 0);
1261 }
1262
1263 #[test]
1264 fn test_optimization_config_for_pca() {
1265 let data = Array2::ones((500, 20));
1266 let chars = DataCharacteristics::analyze(&data.view()).expect("Operation failed");
1267 let system = SystemResources::detect();
1268
1269 let config = OptimizationConfig::for_pca(&chars, &system, 10);
1270 assert_eq!(config.algorithm_params.get("n_components"), Some(&10.0));
1271 }
1272
1273 #[test]
1274 fn test_polynomial_features_estimation() {
1275 let result = OptimizationConfig::estimate_polynomial_features(5, 2);
1277 assert!(result.is_ok());
1278
1279 let result = OptimizationConfig::estimate_polynomial_features(100, 10);
1281 assert!(result.is_err());
1282 }
1283
1284 #[test]
1285 fn test_auto_tuner() {
1286 let tuner = AutoTuner::new();
1287 let data = Array2::ones((100, 10));
1288 let chars = DataCharacteristics::analyze(&data.view()).expect("Operation failed");
1289
1290 let config = tuner
1291 .optimize_for_transformation("standardization", &chars, &HashMap::new())
1292 .expect("Operation failed");
1293 assert!(config.memory_limit_mb > 0);
1294
1295 let report = tuner.generate_report(&chars);
1296 assert!(!report.recommendations.is_empty());
1297 }
1298
1299 #[test]
1300 fn test_large_dataset_detection() {
1301 let mut chars = DataCharacteristics {
1302 n_samples: 200_000,
1303 nfeatures: 1000,
1304 sparsity: 0.1,
1305 data_range: 100.0,
1306 outlier_ratio: 0.02,
1307 has_missing: false,
1308 memory_footprint_mb: 1500.0,
1309 elementsize: 8,
1310 };
1311
1312 assert!(chars.is_large_dataset());
1313
1314 chars.n_samples = 1000;
1315 chars.memory_footprint_mb = 10.0;
1316 assert!(!chars.is_large_dataset());
1317 }
1318}