1use scirs2_core::Rng;
8#[cfg(feature = "distributed")]
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11
12use crate::error::{Result, TransformError};
13use crate::utils::ProcessingStrategy;
14use scirs2_core::random::RngExt;
15
16#[derive(Debug, Clone)]
18#[cfg_attr(feature = "distributed", derive(Serialize, Deserialize))]
19pub struct SystemResources {
20 pub memory_mb: usize,
22 pub cpu_cores: usize,
24 pub has_gpu: bool,
26 pub has_simd: bool,
28 pub l3_cache_kb: usize,
30}
31
32impl SystemResources {
33 pub fn detect() -> Self {
35 SystemResources {
36 memory_mb: Self::detect_memory_mb(),
37 cpu_cores: num_cpus::get(),
38 has_gpu: Self::detect_gpu(),
39 has_simd: Self::detect_simd(),
40 l3_cache_kb: Self::detect_l3_cache_kb(),
41 }
42 }
43
44 fn detect_memory_mb() -> usize {
46 #[cfg(target_os = "linux")]
48 {
49 if let Ok(meminfo) = std::fs::read_to_string("/proc/meminfo") {
50 for line in meminfo.lines() {
51 if line.starts_with("MemAvailable:") {
52 if let Some(kb_str) = line.split_whitespace().nth(1) {
53 if let Ok(kb) = kb_str.parse::<usize>() {
54 return kb / 1024; }
56 }
57 }
58 }
59 }
60 }
61
62 8 * 1024
64 }
65
66 fn detect_gpu() -> bool {
68 #[cfg(feature = "gpu")]
70 {
71 true
73 }
74 #[cfg(not(feature = "gpu"))]
75 {
76 false
77 }
78 }
79
80 fn detect_simd() -> bool {
82 #[cfg(feature = "simd")]
83 {
84 true
85 }
86 #[cfg(not(feature = "simd"))]
87 {
88 false
89 }
90 }
91
92 fn detect_l3_cache_kb() -> usize {
94 8 * 1024 }
97
98 pub fn safe_memory_mb(&self) -> usize {
100 (self.memory_mb as f64 * 0.8) as usize
101 }
102
103 pub fn optimal_chunk_size(&self, elementsize: usize) -> usize {
105 let target_bytes = (self.l3_cache_kb * 1024) / 2;
107 (target_bytes / elementsize).max(1000) }
109}
110
111#[derive(Debug, Clone)]
113#[cfg_attr(feature = "distributed", derive(Serialize, Deserialize))]
114pub struct DataCharacteristics {
115 pub n_samples: usize,
117 pub nfeatures: usize,
119 pub sparsity: f64,
121 pub data_range: f64,
123 pub outlier_ratio: f64,
125 pub has_missing: bool,
127 pub memory_footprint_mb: f64,
129 pub elementsize: usize,
131}
132
133impl DataCharacteristics {
134 pub fn analyze(data: &scirs2_core::ndarray::ArrayView2<f64>) -> Result<Self> {
136 let (n_samples, nfeatures) = data.dim();
137
138 if n_samples == 0 || nfeatures == 0 {
139 return Err(TransformError::InvalidInput("Empty _data".to_string()));
140 }
141
142 let zeros = data.iter().filter(|&&x| x == 0.0).count();
144 let sparsity = zeros as f64 / data.len() as f64;
145
146 let mut min_val = f64::INFINITY;
148 let mut max_val = f64::NEG_INFINITY;
149 let mut finite_count = 0;
150 let mut missing_count = 0;
151
152 for &val in data.iter() {
153 if val.is_finite() {
154 min_val = min_val.min(val);
155 max_val = max_val.max(val);
156 finite_count += 1;
157 } else {
158 missing_count += 1;
159 }
160 }
161
162 let data_range = if finite_count > 0 {
163 max_val - min_val
164 } else {
165 0.0
166 };
167 let has_missing = missing_count > 0;
168
169 let outlier_ratio = if n_samples > 10 {
171 let mut sample_values: Vec<f64> = data.iter()
172 .filter(|&&x| x.is_finite())
173 .take(1000) .copied()
175 .collect();
176
177 if sample_values.len() >= 4 {
178 sample_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
179 let n = sample_values.len();
180 let q1 = sample_values[n / 4];
181 let q3 = sample_values[3 * n / 4];
182 let iqr = q3 - q1;
183
184 if iqr > 0.0 {
185 let lower_bound = q1 - 1.5 * iqr;
186 let upper_bound = q3 + 1.5 * iqr;
187 let outliers = sample_values
188 .iter()
189 .filter(|&&x| x < lower_bound || x > upper_bound)
190 .count();
191 outliers as f64 / sample_values.len() as f64
192 } else {
193 0.0
194 }
195 } else {
196 0.0
197 }
198 } else {
199 0.0
200 };
201
202 let memory_footprint_mb =
203 (n_samples * nfeatures * std::mem::size_of::<f64>()) as f64 / (1024.0 * 1024.0);
204
205 Ok(DataCharacteristics {
206 n_samples,
207 nfeatures,
208 sparsity,
209 data_range,
210 outlier_ratio,
211 has_missing,
212 memory_footprint_mb,
213 elementsize: std::mem::size_of::<f64>(),
214 })
215 }
216
217 pub fn is_large_dataset(&self) -> bool {
219 self.n_samples > 100_000 || self.nfeatures > 10_000 || self.memory_footprint_mb > 1000.0
220 }
221
222 pub fn is_wide_dataset(&self) -> bool {
224 self.nfeatures > self.n_samples
225 }
226
227 pub fn is_sparse(&self) -> bool {
229 self.sparsity > 0.5
230 }
231
232 pub fn has_outliers(&self) -> bool {
234 self.outlier_ratio > 0.05 }
236}
237
238#[derive(Debug, Clone)]
240#[cfg_attr(feature = "distributed", derive(Serialize, Deserialize))]
241pub struct OptimizationConfig {
242 pub processing_strategy: ProcessingStrategy,
244 pub memory_limit_mb: usize,
246 pub use_robust: bool,
248 pub use_parallel: bool,
250 pub use_simd: bool,
252 pub use_gpu: bool,
254 pub chunk_size: usize,
256 pub num_threads: usize,
258 pub algorithm_params: HashMap<String, f64>,
260}
261
262impl OptimizationConfig {
263 pub fn for_standardization(datachars: &DataCharacteristics, system: &SystemResources) -> Self {
265 let use_robust = datachars.has_outliers();
266 let use_parallel = datachars.n_samples > 10_000 && system.cpu_cores > 1;
267 let use_simd = system.has_simd && datachars.nfeatures > 100;
268 let use_gpu = system.has_gpu && datachars.memory_footprint_mb > 100.0;
269
270 let processing_strategy = if datachars.memory_footprint_mb > system.safe_memory_mb() as f64
271 {
272 ProcessingStrategy::OutOfCore {
273 chunk_size: system.optimal_chunk_size(datachars.elementsize),
274 }
275 } else if use_parallel {
276 ProcessingStrategy::Parallel
277 } else if use_simd {
278 ProcessingStrategy::Simd
279 } else {
280 ProcessingStrategy::Standard
281 };
282
283 OptimizationConfig {
284 processing_strategy,
285 memory_limit_mb: system.safe_memory_mb(),
286 use_robust,
287 use_parallel,
288 use_simd,
289 use_gpu,
290 chunk_size: system.optimal_chunk_size(datachars.elementsize),
291 num_threads: if use_parallel { system.cpu_cores } else { 1 },
292 algorithm_params: HashMap::new(),
293 }
294 }
295
296 pub fn for_pca(
298 datachars: &DataCharacteristics,
299 system: &SystemResources,
300 n_components: usize,
301 ) -> Self {
302 let use_randomized = datachars.is_large_dataset();
303 let use_parallel = datachars.n_samples > 1_000 && system.cpu_cores > 1;
304 let use_gpu = system.has_gpu && datachars.memory_footprint_mb > 500.0;
305
306 let memory_multiplier = if datachars.nfeatures > datachars.n_samples {
308 3.0
309 } else {
310 2.0
311 };
312 let estimated_memory = datachars.memory_footprint_mb * memory_multiplier;
313
314 let processing_strategy = if estimated_memory > system.safe_memory_mb() as f64 {
315 ProcessingStrategy::OutOfCore {
316 chunk_size: (system.safe_memory_mb() * 1024 * 1024)
317 / (datachars.nfeatures * datachars.elementsize),
318 }
319 } else if use_parallel {
320 ProcessingStrategy::Parallel
321 } else {
322 ProcessingStrategy::Standard
323 };
324
325 let mut algorithm_params = HashMap::new();
326 algorithm_params.insert(
327 "use_randomized".to_string(),
328 if use_randomized { 1.0 } else { 0.0 },
329 );
330 algorithm_params.insert("n_components".to_string(), n_components as f64);
331
332 OptimizationConfig {
333 processing_strategy,
334 memory_limit_mb: system.safe_memory_mb(),
335 use_robust: false, use_parallel,
337 use_simd: system.has_simd,
338 use_gpu,
339 chunk_size: system.optimal_chunk_size(datachars.elementsize),
340 num_threads: if use_parallel { system.cpu_cores } else { 1 },
341 algorithm_params,
342 }
343 }
344
345 pub fn for_polynomial_features(
347 datachars: &DataCharacteristics,
348 system: &SystemResources,
349 degree: usize,
350 ) -> Result<Self> {
351 let estimated_output_features =
353 Self::estimate_polynomial_features(datachars.nfeatures, degree)?;
354 let estimated_memory = datachars.n_samples as f64
355 * estimated_output_features as f64
356 * datachars.elementsize as f64
357 / (1024.0 * 1024.0);
358
359 if estimated_memory > system.memory_mb as f64 * 0.9 {
360 return Err(TransformError::MemoryError(format!(
361 "Polynomial features would require {estimated_memory:.1} MB, but only {} MB available",
362 system.memory_mb
363 )));
364 }
365
366 let use_parallel = datachars.n_samples > 1_000 && system.cpu_cores > 1;
367 let use_simd = system.has_simd && estimated_output_features > 100;
368
369 let processing_strategy = if estimated_memory > system.safe_memory_mb() as f64 {
370 ProcessingStrategy::OutOfCore {
371 chunk_size: (system.safe_memory_mb() * 1024 * 1024)
372 / (estimated_output_features * datachars.elementsize),
373 }
374 } else if use_parallel {
375 ProcessingStrategy::Parallel
376 } else if use_simd {
377 ProcessingStrategy::Simd
378 } else {
379 ProcessingStrategy::Standard
380 };
381
382 let mut algorithm_params = HashMap::new();
383 algorithm_params.insert("degree".to_string(), degree as f64);
384 algorithm_params.insert(
385 "estimated_output_features".to_string(),
386 estimated_output_features as f64,
387 );
388
389 Ok(OptimizationConfig {
390 processing_strategy,
391 memory_limit_mb: system.safe_memory_mb(),
392 use_robust: false,
393 use_parallel,
394 use_simd,
395 use_gpu: false, chunk_size: system.optimal_chunk_size(datachars.elementsize),
397 num_threads: if use_parallel { system.cpu_cores } else { 1 },
398 algorithm_params,
399 })
400 }
401
402 fn estimate_polynomial_features(nfeatures: usize, degree: usize) -> Result<usize> {
404 if degree == 0 {
405 return Err(TransformError::InvalidInput(
406 "Degree must be at least 1".to_string(),
407 ));
408 }
409
410 let mut total_features = 1; for d in 1..=degree {
413 let mut coeff = 1;
415 for i in 0..d {
416 coeff = coeff * (nfeatures + d - 1 - i) / (i + 1);
417
418 if coeff > 1_000_000 {
420 return Err(TransformError::ComputationError(
421 "Too many polynomial _features would be generated".to_string(),
422 ));
423 }
424 }
425 total_features += coeff;
426 }
427
428 Ok(total_features)
429 }
430
431 pub fn estimated_execution_time(&self, datachars: &DataCharacteristics) -> std::time::Duration {
433 use std::time::Duration;
434
435 let base_ops = datachars.n_samples as u64 * datachars.nfeatures as u64;
436
437 let ops_per_second = match self.processing_strategy {
438 ProcessingStrategy::Parallel => {
439 1_000_000_000 * self.num_threads as u64 }
441 ProcessingStrategy::Simd => {
442 2_000_000_000 }
444 ProcessingStrategy::OutOfCore { .. } => {
445 100_000_000 }
447 ProcessingStrategy::Standard => {
448 500_000_000 }
450 };
451
452 let time_ns = (base_ops * 1_000_000_000) / ops_per_second;
453 Duration::from_nanos(time_ns.max(1000)) }
455}
456
457pub struct AutoTuner {
459 system: SystemResources,
461 performance_history: HashMap<String, Vec<PerformanceRecord>>,
463}
464
465#[derive(Debug, Clone)]
467struct PerformanceRecord {
468 #[allow(dead_code)]
469 config_hash: String,
470 #[allow(dead_code)]
471 execution_time: std::time::Duration,
472 #[allow(dead_code)]
473 memory_used_mb: f64,
474 #[allow(dead_code)]
475 success: bool,
476 #[allow(dead_code)]
477 data_characteristics: DataCharacteristics,
478}
479
480impl Default for AutoTuner {
481 fn default() -> Self {
482 Self::new()
483 }
484}
485
486impl AutoTuner {
487 pub fn new() -> Self {
489 AutoTuner {
490 system: SystemResources::detect(),
491 performance_history: HashMap::new(),
492 }
493 }
494
495 pub fn optimize_for_transformation(
497 &self,
498 transformation: &str,
499 datachars: &DataCharacteristics,
500 params: &HashMap<String, f64>,
501 ) -> Result<OptimizationConfig> {
502 match transformation {
503 "standardization" => Ok(OptimizationConfig::for_standardization(
504 datachars,
505 &self.system,
506 )),
507 "pca" => {
508 let n_components = params.get("n_components").unwrap_or(&5.0) as &f64;
509 Ok(OptimizationConfig::for_pca(
510 datachars,
511 &self.system,
512 *n_components as usize,
513 ))
514 }
515 "polynomial" => {
516 let degree = params.get("degree").unwrap_or(&2.0) as &f64;
517 OptimizationConfig::for_polynomial_features(
518 datachars,
519 &self.system,
520 *degree as usize,
521 )
522 }
523 _ => {
524 Ok(OptimizationConfig {
526 processing_strategy: if datachars.is_large_dataset() {
527 ProcessingStrategy::Parallel
528 } else {
529 ProcessingStrategy::Standard
530 },
531 memory_limit_mb: self.system.safe_memory_mb(),
532 use_robust: datachars.has_outliers(),
533 use_parallel: datachars.n_samples > 10_000,
534 use_simd: self.system.has_simd,
535 use_gpu: self.system.has_gpu && datachars.memory_footprint_mb > 100.0,
536 chunk_size: self.system.optimal_chunk_size(datachars.elementsize),
537 num_threads: self.system.cpu_cores,
538 algorithm_params: HashMap::new(),
539 })
540 }
541 }
542 }
543
544 pub fn record_performance(
546 &mut self,
547 transformation: &str,
548 config: &OptimizationConfig,
549 execution_time: std::time::Duration,
550 memory_used_mb: f64,
551 success: bool,
552 datachars: DataCharacteristics,
553 ) {
554 let config_hash = format!("{config:?}"); let record = PerformanceRecord {
557 config_hash: config_hash.clone(),
558 execution_time,
559 memory_used_mb,
560 success,
561 data_characteristics: datachars,
562 };
563
564 self.performance_history
565 .entry(transformation.to_string())
566 .or_default()
567 .push(record);
568
569 let records = self
571 .performance_history
572 .get_mut(transformation)
573 .expect("Operation failed");
574 if records.len() > 100 {
575 records.remove(0);
576 }
577 }
578
579 pub fn system_resources(&self) -> &SystemResources {
581 &self.system
582 }
583
584 pub fn generate_report(&self, datachars: &DataCharacteristics) -> OptimizationReport {
586 let recommendations = vec![
587 self.get_recommendation_for_transformation("standardization", datachars),
588 self.get_recommendation_for_transformation("pca", datachars),
589 self.get_recommendation_for_transformation("polynomial", datachars),
590 ];
591
592 OptimizationReport {
593 system_info: self.system.clone(),
594 data_info: datachars.clone(),
595 recommendations,
596 estimated_total_memory_mb: datachars.memory_footprint_mb * 2.0, }
598 }
599
600 fn get_recommendation_for_transformation(
601 &self,
602 transformation: &str,
603 datachars: &DataCharacteristics,
604 ) -> TransformationRecommendation {
605 let config = self
606 .optimize_for_transformation(transformation, datachars, &HashMap::new())
607 .unwrap_or_else(|_| OptimizationConfig {
608 processing_strategy: ProcessingStrategy::Standard,
609 memory_limit_mb: self.system.safe_memory_mb(),
610 use_robust: false,
611 use_parallel: false,
612 use_simd: false,
613 use_gpu: false,
614 chunk_size: 1000,
615 num_threads: 1,
616 algorithm_params: HashMap::new(),
617 });
618
619 let estimated_time = config.estimated_execution_time(datachars);
620
621 TransformationRecommendation {
622 transformation: transformation.to_string(),
623 config,
624 estimated_time,
625 confidence: 0.8, reason: format!(
627 "Optimized for {} samples, {} features",
628 datachars.n_samples, datachars.nfeatures
629 ),
630 }
631 }
632}
633
634#[derive(Debug, Clone)]
636pub struct OptimizationReport {
637 pub system_info: SystemResources,
639 pub data_info: DataCharacteristics,
641 pub recommendations: Vec<TransformationRecommendation>,
643 pub estimated_total_memory_mb: f64,
645}
646
647#[derive(Debug, Clone)]
649pub struct TransformationRecommendation {
650 pub transformation: String,
652 pub config: OptimizationConfig,
654 pub estimated_time: std::time::Duration,
656 pub confidence: f64,
658 pub reason: String,
660}
661
662impl OptimizationReport {
663 pub fn print_report(&self) {
665 println!("=== Optimization Report ===");
666 println!("System Resources:");
667 println!(" Memory: {} MB", self.system_info.memory_mb);
668 println!(" CPU Cores: {}", self.system_info.cpu_cores);
669 println!(" GPU Available: {}", self.system_info.has_gpu);
670 println!(" SIMD Available: {}", self.system_info.has_simd);
671 println!();
672
673 println!("Data Characteristics:");
674 println!(" Samples: {}", self.data_info.n_samples);
675 println!(" Features: {}", self.data_info.nfeatures);
676 println!(
677 " Memory Footprint: {:.1} MB",
678 self.data_info.memory_footprint_mb
679 );
680 println!(" Sparsity: {:.1}%", self.data_info.sparsity * 100.0);
681 println!(" Has Outliers: {}", self.data_info.has_outliers());
682 println!();
683
684 println!("Recommendations:");
685 for rec in &self.recommendations {
686 println!(" {}:", rec.transformation);
687 println!(" Strategy: {:?}", rec.config.processing_strategy);
688 println!(
689 " Estimated Time: {:.2}s",
690 rec.estimated_time.as_secs_f64()
691 );
692 println!(" Use Parallel: {}", rec.config.use_parallel);
693 println!(" Use SIMD: {}", rec.config.use_simd);
694 println!(" Use GPU: {}", rec.config.use_gpu);
695 println!(" Reason: {}", rec.reason);
696 println!();
697 }
698 }
699}
700
701pub struct AdvancedConfigOptimizer {
705 performance_history: HashMap<String, Vec<PerformanceMetric>>,
707 system_monitor: SystemMonitor,
709 config_predictor: ConfigurationPredictor,
711 adaptive_tuner: AdaptiveParameterTuner,
713}
714
715#[derive(Debug, Clone)]
717pub struct PerformanceMetric {
718 #[allow(dead_code)]
720 config_hash: u64,
721 execution_time_us: u64,
723 memory_usage_bytes: usize,
725 cache_hit_rate: f64,
727 cpu_utilization: f64,
729 quality_score: f64,
731 #[allow(dead_code)]
733 timestamp: std::time::Instant,
734}
735
736pub struct SystemMonitor {
738 cpu_load: f64,
740 available_memory_bytes: usize,
742 cache_miss_rate: f64,
744 io_wait_percent: f64,
746 cpu_temperature_celsius: f64,
748}
749
750pub struct ConfigurationPredictor {
752 #[allow(dead_code)]
754 feature_weights: HashMap<String, f64>,
755 #[allow(dead_code)]
757 learning_rate: f64,
758 confidence_threshold: f64,
760 sample_count: usize,
762}
763
764pub struct AdaptiveParameterTuner {
766 q_table: HashMap<(String, String), f64>, exploration_rate: f64,
770 learning_rate: f64,
772 #[allow(dead_code)]
774 discount_factor: f64,
775 current_state: String,
777}
778
779impl Default for AdvancedConfigOptimizer {
780 fn default() -> Self {
781 Self::new()
782 }
783}
784
785impl AdvancedConfigOptimizer {
786 pub fn new() -> Self {
788 AdvancedConfigOptimizer {
789 performance_history: HashMap::new(),
790 system_monitor: SystemMonitor::new(),
791 config_predictor: ConfigurationPredictor::new(),
792 adaptive_tuner: AdaptiveParameterTuner::new(),
793 }
794 }
795
796 pub fn advanced_optimize_config(
798 &mut self,
799 datachars: &DataCharacteristics,
800 transformation_type: &str,
801 user_params: &HashMap<String, f64>,
802 ) -> Result<OptimizationConfig> {
803 self.system_monitor.update_metrics()?;
805
806 let current_state = self.generate_state_representation(datachars, &self.system_monitor);
808
809 let predicted_config = self.config_predictor.predict_optimal_config(
811 ¤t_state,
812 transformation_type,
813 user_params,
814 )?;
815
816 let tuned_config = self.adaptive_tuner.tune_parameters(
818 predicted_config,
819 ¤t_state,
820 transformation_type,
821 )?;
822
823 let validated_config =
825 self.validate_and_adjust_config(tuned_config, &self.system_monitor)?;
826
827 Ok(validated_config)
828 }
829
830 pub fn learn_from_performance(
832 &mut self,
833 config: &OptimizationConfig,
834 performance: PerformanceMetric,
835 transformation_type: &str,
836 ) -> Result<()> {
837 let config_hash = self.compute_config_hash(config);
838
839 self.performance_history
841 .entry(transformation_type.to_string())
842 .or_default()
843 .push(performance.clone());
844
845 self.config_predictor.update_from_feedback(&performance)?;
847
848 let reward = self.compute_reward_signal(&performance);
850 self.adaptive_tuner.update_q_values(config_hash, reward)?;
851
852 if self.config_predictor.sample_count.is_multiple_of(100) {
854 self.retrain_models()?;
855 }
856
857 Ok(())
858 }
859
860 fn generate_state_representation(
862 &self,
863 datachars: &DataCharacteristics,
864 system_monitor: &SystemMonitor,
865 ) -> String {
866 format!(
867 "samples:{}_features:{}_memory:{:.2}_cpu:{:.2}_sparsity:{:.3}",
868 datachars.n_samples,
869 datachars.nfeatures,
870 datachars.memory_footprint_mb,
871 system_monitor.cpu_load,
872 datachars.sparsity,
873 )
874 }
875
876 fn compute_config_hash(&self, config: &OptimizationConfig) -> u64 {
878 use std::collections::hash_map::DefaultHasher;
879 use std::hash::{Hash, Hasher};
880
881 let mut hasher = DefaultHasher::new();
882 config.memory_limit_mb.hash(&mut hasher);
883 config.use_parallel.hash(&mut hasher);
884 config.use_simd.hash(&mut hasher);
885 config.use_gpu.hash(&mut hasher);
886 config.chunk_size.hash(&mut hasher);
887 config.num_threads.hash(&mut hasher);
888
889 hasher.finish()
890 }
891
892 fn compute_reward_signal(&self, performance: &PerformanceMetric) -> f64 {
894 let time_score = 1.0 / (1.0 + performance.execution_time_us as f64 / 1_000_000.0);
896 let memory_score = 1.0 / (1.0 + performance.memory_usage_bytes as f64 / 1_000_000_000.0);
897 let cache_score = performance.cache_hit_rate;
898 let cpu_score = 1.0 - performance.cpu_utilization.min(1.0);
899 let quality_score = performance.quality_score;
900
901 0.3 * time_score
903 + 0.2 * memory_score
904 + 0.2 * cache_score
905 + 0.1 * cpu_score
906 + 0.2 * quality_score
907 }
908
909 fn validate_and_adjust_config(
911 &self,
912 mut config: OptimizationConfig,
913 system_monitor: &SystemMonitor,
914 ) -> Result<OptimizationConfig> {
915 let available_mb = system_monitor.available_memory_bytes / (1024 * 1024);
917 config.memory_limit_mb = config.memory_limit_mb.min(available_mb * 80 / 100); if system_monitor.cpu_load > 0.8 {
921 config.num_threads = (config.num_threads / 2).max(1);
922 }
923
924 if system_monitor.cpu_temperature_celsius > 85.0 {
926 config.use_gpu = false;
927 }
928
929 if system_monitor.cache_miss_rate > 0.1 {
931 config.chunk_size = (config.chunk_size as f64 * 0.8) as usize;
932 }
933
934 Ok(config)
935 }
936
937 fn retrain_models(&mut self) -> Result<()> {
939 self.config_predictor
941 .retrain_with_history(&self.performance_history)?;
942
943 self.adaptive_tuner.decay_exploration_rate();
945
946 Ok(())
947 }
948}
949
950impl Default for SystemMonitor {
951 fn default() -> Self {
952 Self::new()
953 }
954}
955
956impl SystemMonitor {
957 pub fn new() -> Self {
959 SystemMonitor {
960 cpu_load: 0.0,
961 available_memory_bytes: 0,
962 cache_miss_rate: 0.0,
963 io_wait_percent: 0.0,
964 cpu_temperature_celsius: 50.0,
965 }
966 }
967
968 pub fn update_metrics(&mut self) -> Result<()> {
970 self.cpu_load = self.read_cpu_load()?;
972 self.available_memory_bytes = self.read_available_memory()?;
973 self.cache_miss_rate = self.read_cache_miss_rate()?;
974 self.io_wait_percent = self.read_io_wait()?;
975 self.cpu_temperature_celsius = self.read_cpu_temperature()?;
976
977 Ok(())
978 }
979
980 fn read_cpu_load(&self) -> Result<f64> {
981 Ok(0.5) }
984
985 fn read_available_memory(&self) -> Result<usize> {
986 Ok(8 * 1024 * 1024 * 1024) }
989
990 fn read_cache_miss_rate(&self) -> Result<f64> {
991 Ok(0.05) }
994
995 fn read_io_wait(&self) -> Result<f64> {
996 Ok(0.02) }
999
1000 fn read_cpu_temperature(&self) -> Result<f64> {
1001 Ok(55.0) }
1004}
1005
1006impl Default for ConfigurationPredictor {
1007 fn default() -> Self {
1008 Self::new()
1009 }
1010}
1011
1012impl ConfigurationPredictor {
1013 pub fn new() -> Self {
1015 let mut feature_weights = HashMap::new();
1016 feature_weights.insert("n_samples".to_string(), 0.3);
1017 feature_weights.insert("nfeatures".to_string(), 0.25);
1018 feature_weights.insert("memory_footprint".to_string(), 0.2);
1019 feature_weights.insert("sparsity".to_string(), 0.15);
1020 feature_weights.insert("cpu_load".to_string(), 0.1);
1021
1022 ConfigurationPredictor {
1023 feature_weights,
1024 learning_rate: 0.01,
1025 confidence_threshold: 0.8,
1026 sample_count: 0,
1027 }
1028 }
1029
1030 pub fn predict_optimal_config(
1032 &self,
1033 state: &str,
1034 _transformation_type: &str,
1035 _user_params: &HashMap<String, f64>,
1036 ) -> Result<OptimizationConfig> {
1037 let features = self.extract_features(state)?;
1039
1040 let predicted_memory_limit = self.predict_memory_limit(&features);
1042 let predicted_parallelism = self.predict_parallelism(&features);
1043 let predicted_simd_usage = self.predict_simd_usage(&features);
1044
1045 let strategy = if predicted_memory_limit < 1000 {
1047 ProcessingStrategy::OutOfCore { chunk_size: 1024 }
1048 } else if predicted_parallelism {
1049 ProcessingStrategy::Parallel
1050 } else if predicted_simd_usage {
1051 ProcessingStrategy::Simd
1052 } else {
1053 ProcessingStrategy::Standard
1054 };
1055
1056 Ok(OptimizationConfig {
1057 processing_strategy: strategy,
1058 memory_limit_mb: predicted_memory_limit,
1059 use_robust: false,
1060 use_parallel: predicted_parallelism,
1061 use_simd: predicted_simd_usage,
1062 use_gpu: features.get("memory_footprint").unwrap_or(&0.0) > &100.0,
1063 chunk_size: if predicted_memory_limit < 1000 {
1064 512
1065 } else {
1066 2048
1067 },
1068 num_threads: if predicted_parallelism { 4 } else { 1 },
1069 algorithm_params: HashMap::new(),
1070 })
1071 }
1072
1073 fn extract_features(&self, state: &str) -> Result<HashMap<String, f64>> {
1075 let mut features = HashMap::new();
1076
1077 for part in state.split('_') {
1078 if let Some((key, value)) = part.split_once(':') {
1079 if let Ok(val) = value.parse::<f64>() {
1080 features.insert(key.to_string(), val);
1081 }
1082 }
1083 }
1084
1085 Ok(features)
1086 }
1087
1088 fn predict_memory_limit(&self, features: &HashMap<String, f64>) -> usize {
1089 let memory_footprint = features.get("memory_footprint").unwrap_or(&100.0);
1090 (memory_footprint * 1.5) as usize
1091 }
1092
1093 fn predict_parallelism(&self, features: &HashMap<String, f64>) -> bool {
1094 let samples = features.get("samples").unwrap_or(&1000.0);
1095 let cpu_load = features.get("cpu").unwrap_or(&0.5);
1096 samples > &5000.0 && cpu_load < &0.7
1097 }
1098
1099 fn predict_simd_usage(&self, features: &HashMap<String, f64>) -> bool {
1100 let features_count = features.get("features").unwrap_or(&10.0);
1101 features_count > &50.0
1102 }
1103
1104 pub fn update_from_feedback(&mut self, performance: &PerformanceMetric) -> Result<()> {
1106 self.sample_count += 1;
1107 Ok(())
1109 }
1110
1111 pub fn retrain_with_history(
1113 &mut self,
1114 history: &HashMap<String, Vec<PerformanceMetric>>,
1115 ) -> Result<()> {
1116 self.confidence_threshold = (self.confidence_threshold + 0.01).min(0.95);
1118 Ok(())
1119 }
1120}
1121
1122impl Default for AdaptiveParameterTuner {
1123 fn default() -> Self {
1124 Self::new()
1125 }
1126}
1127
1128impl AdaptiveParameterTuner {
1129 pub fn new() -> Self {
1131 AdaptiveParameterTuner {
1132 q_table: HashMap::new(),
1133 exploration_rate: 0.1,
1134 learning_rate: 0.1,
1135 discount_factor: 0.9,
1136 current_state: String::new(),
1137 }
1138 }
1139
1140 pub fn tune_parameters(
1142 &mut self,
1143 mut config: OptimizationConfig,
1144 state: &str,
1145 _transformation_type: &str,
1146 ) -> Result<OptimizationConfig> {
1147 self.current_state = state.to_string();
1148
1149 if scirs2_core::random::rng().random_range(0.0..1.0) < self.exploration_rate {
1151 config = self.explore_parameters(config)?;
1153 } else {
1154 config = self.exploit_best_parameters(config, state)?;
1156 }
1157
1158 Ok(config)
1159 }
1160
1161 fn explore_parameters(&self, mut config: OptimizationConfig) -> Result<OptimizationConfig> {
1163 let mut rng = scirs2_core::random::rng();
1164
1165 let memory_factor = rng.random_range(0.8..1.2);
1167 config.memory_limit_mb = (config.memory_limit_mb as f64 * memory_factor) as usize;
1168
1169 if rng.random_range(0.0..1.0) < 0.3 {
1171 config.use_parallel = !config.use_parallel;
1172 }
1173
1174 let chunk_factor = rng.random_range(0.5..1.5);
1176 config.chunk_size = (config.chunk_size as f64 * chunk_factor) as usize;
1177
1178 Ok(config)
1179 }
1180
1181 fn exploit_best_parameters(
1183 &self,
1184 config: OptimizationConfig,
1185 state: &str,
1186 ) -> Result<OptimizationConfig> {
1187 let _best_action = self.find_best_action(state);
1189
1190 Ok(config)
1193 }
1194
1195 fn find_best_action(&self, state: &str) -> String {
1197 let mut best_action = "default".to_string();
1198 let mut best_value = f64::NEG_INFINITY;
1199
1200 for ((s, action), &value) in &self.q_table {
1201 if s == state && value > best_value {
1202 best_value = value;
1203 best_action = action.clone();
1204 }
1205 }
1206
1207 best_action
1208 }
1209
1210 pub fn update_q_values(&mut self, confighash: u64, reward: f64) -> Result<()> {
1212 let state_action = (self.current_state.clone(), "current_action".to_string());
1213
1214 let old_value = self.q_table.get(&state_action).unwrap_or(&0.0);
1216 let new_value = old_value + self.learning_rate * (reward - old_value);
1217
1218 self.q_table.insert(state_action, new_value);
1219
1220 Ok(())
1221 }
1222
1223 pub fn decay_exploration_rate(&mut self) {
1225 self.exploration_rate = (self.exploration_rate * 0.995).max(0.01);
1226 }
1227}
1228
1229#[cfg(test)]
1230mod tests {
1231 use super::*;
1232 use scirs2_core::ndarray::Array2;
1233
1234 #[test]
1235 fn test_system_resources_detection() {
1236 let resources = SystemResources::detect();
1237 assert!(resources.cpu_cores > 0);
1238 assert!(resources.memory_mb > 0);
1239 assert!(resources.safe_memory_mb() < resources.memory_mb);
1240 }
1241
1242 #[test]
1243 fn test_data_characteristics_analysis() {
1244 let data = Array2::from_shape_vec((100, 10), (0..1000).map(|x| x as f64).collect())
1245 .expect("Operation failed");
1246 let chars = DataCharacteristics::analyze(&data.view()).expect("Operation failed");
1247
1248 assert_eq!(chars.n_samples, 100);
1249 assert_eq!(chars.nfeatures, 10);
1250 assert!(chars.memory_footprint_mb > 0.0);
1251 assert!(!chars.is_large_dataset());
1252 }
1253
1254 #[test]
1255 fn test_optimization_config_for_standardization() {
1256 let data = Array2::ones((1000, 50));
1257 let chars = DataCharacteristics::analyze(&data.view()).expect("Operation failed");
1258 let system = SystemResources::detect();
1259
1260 let config = OptimizationConfig::for_standardization(&chars, &system);
1261 assert!(config.memory_limit_mb > 0);
1262 }
1263
1264 #[test]
1265 fn test_optimization_config_for_pca() {
1266 let data = Array2::ones((500, 20));
1267 let chars = DataCharacteristics::analyze(&data.view()).expect("Operation failed");
1268 let system = SystemResources::detect();
1269
1270 let config = OptimizationConfig::for_pca(&chars, &system, 10);
1271 assert_eq!(config.algorithm_params.get("n_components"), Some(&10.0));
1272 }
1273
1274 #[test]
1275 fn test_polynomial_features_estimation() {
1276 let result = OptimizationConfig::estimate_polynomial_features(5, 2);
1278 assert!(result.is_ok());
1279
1280 let result = OptimizationConfig::estimate_polynomial_features(100, 10);
1282 assert!(result.is_err());
1283 }
1284
1285 #[test]
1286 fn test_auto_tuner() {
1287 let tuner = AutoTuner::new();
1288 let data = Array2::ones((100, 10));
1289 let chars = DataCharacteristics::analyze(&data.view()).expect("Operation failed");
1290
1291 let config = tuner
1292 .optimize_for_transformation("standardization", &chars, &HashMap::new())
1293 .expect("Operation failed");
1294 assert!(config.memory_limit_mb > 0);
1295
1296 let report = tuner.generate_report(&chars);
1297 assert!(!report.recommendations.is_empty());
1298 }
1299
1300 #[test]
1301 fn test_large_dataset_detection() {
1302 let mut chars = DataCharacteristics {
1303 n_samples: 200_000,
1304 nfeatures: 1000,
1305 sparsity: 0.1,
1306 data_range: 100.0,
1307 outlier_ratio: 0.02,
1308 has_missing: false,
1309 memory_footprint_mb: 1500.0,
1310 elementsize: 8,
1311 };
1312
1313 assert!(chars.is_large_dataset());
1314
1315 chars.n_samples = 1000;
1316 chars.memory_footprint_mb = 10.0;
1317 assert!(!chars.is_large_dataset());
1318 }
1319}