use scirs2_core::Rng;
#[cfg(feature = "distributed")]
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use crate::error::{Result, TransformError};
use crate::utils::ProcessingStrategy;
use scirs2_core::random::RngExt;
#[derive(Debug, Clone)]
#[cfg_attr(feature = "distributed", derive(Serialize, Deserialize))]
pub struct SystemResources {
pub memory_mb: usize,
pub cpu_cores: usize,
pub has_gpu: bool,
pub has_simd: bool,
pub l3_cache_kb: usize,
}
impl SystemResources {
pub fn detect() -> Self {
SystemResources {
memory_mb: Self::detect_memory_mb(),
cpu_cores: num_cpus::get(),
has_gpu: Self::detect_gpu(),
has_simd: Self::detect_simd(),
l3_cache_kb: Self::detect_l3_cache_kb(),
}
}
fn detect_memory_mb() -> usize {
#[cfg(target_os = "linux")]
{
if let Ok(meminfo) = std::fs::read_to_string("/proc/meminfo") {
for line in meminfo.lines() {
if line.starts_with("MemAvailable:") {
if let Some(kb_str) = line.split_whitespace().nth(1) {
if let Ok(kb) = kb_str.parse::<usize>() {
return kb / 1024; }
}
}
}
}
}
8 * 1024
}
fn detect_gpu() -> bool {
#[cfg(feature = "gpu")]
{
true
}
#[cfg(not(feature = "gpu"))]
{
false
}
}
fn detect_simd() -> bool {
#[cfg(feature = "simd")]
{
true
}
#[cfg(not(feature = "simd"))]
{
false
}
}
fn detect_l3_cache_kb() -> usize {
8 * 1024 }
pub fn safe_memory_mb(&self) -> usize {
(self.memory_mb as f64 * 0.8) as usize
}
pub fn optimal_chunk_size(&self, elementsize: usize) -> usize {
let target_bytes = (self.l3_cache_kb * 1024) / 2;
(target_bytes / elementsize).max(1000) }
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "distributed", derive(Serialize, Deserialize))]
pub struct DataCharacteristics {
pub n_samples: usize,
pub nfeatures: usize,
pub sparsity: f64,
pub data_range: f64,
pub outlier_ratio: f64,
pub has_missing: bool,
pub memory_footprint_mb: f64,
pub elementsize: usize,
}
impl DataCharacteristics {
pub fn analyze(data: &scirs2_core::ndarray::ArrayView2<f64>) -> Result<Self> {
let (n_samples, nfeatures) = data.dim();
if n_samples == 0 || nfeatures == 0 {
return Err(TransformError::InvalidInput("Empty _data".to_string()));
}
let zeros = data.iter().filter(|&&x| x == 0.0).count();
let sparsity = zeros as f64 / data.len() as f64;
let mut min_val = f64::INFINITY;
let mut max_val = f64::NEG_INFINITY;
let mut finite_count = 0;
let mut missing_count = 0;
for &val in data.iter() {
if val.is_finite() {
min_val = min_val.min(val);
max_val = max_val.max(val);
finite_count += 1;
} else {
missing_count += 1;
}
}
let data_range = if finite_count > 0 {
max_val - min_val
} else {
0.0
};
let has_missing = missing_count > 0;
let outlier_ratio = if n_samples > 10 {
let mut sample_values: Vec<f64> = data.iter()
.filter(|&&x| x.is_finite())
.take(1000) .copied()
.collect();
if sample_values.len() >= 4 {
sample_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sample_values.len();
let q1 = sample_values[n / 4];
let q3 = sample_values[3 * n / 4];
let iqr = q3 - q1;
if iqr > 0.0 {
let lower_bound = q1 - 1.5 * iqr;
let upper_bound = q3 + 1.5 * iqr;
let outliers = sample_values
.iter()
.filter(|&&x| x < lower_bound || x > upper_bound)
.count();
outliers as f64 / sample_values.len() as f64
} else {
0.0
}
} else {
0.0
}
} else {
0.0
};
let memory_footprint_mb =
(n_samples * nfeatures * std::mem::size_of::<f64>()) as f64 / (1024.0 * 1024.0);
Ok(DataCharacteristics {
n_samples,
nfeatures,
sparsity,
data_range,
outlier_ratio,
has_missing,
memory_footprint_mb,
elementsize: std::mem::size_of::<f64>(),
})
}
pub fn is_large_dataset(&self) -> bool {
self.n_samples > 100_000 || self.nfeatures > 10_000 || self.memory_footprint_mb > 1000.0
}
pub fn is_wide_dataset(&self) -> bool {
self.nfeatures > self.n_samples
}
pub fn is_sparse(&self) -> bool {
self.sparsity > 0.5
}
pub fn has_outliers(&self) -> bool {
self.outlier_ratio > 0.05 }
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "distributed", derive(Serialize, Deserialize))]
pub struct OptimizationConfig {
pub processing_strategy: ProcessingStrategy,
pub memory_limit_mb: usize,
pub use_robust: bool,
pub use_parallel: bool,
pub use_simd: bool,
pub use_gpu: bool,
pub chunk_size: usize,
pub num_threads: usize,
pub algorithm_params: HashMap<String, f64>,
}
impl OptimizationConfig {
pub fn for_standardization(datachars: &DataCharacteristics, system: &SystemResources) -> Self {
let use_robust = datachars.has_outliers();
let use_parallel = datachars.n_samples > 10_000 && system.cpu_cores > 1;
let use_simd = system.has_simd && datachars.nfeatures > 100;
let use_gpu = system.has_gpu && datachars.memory_footprint_mb > 100.0;
let processing_strategy = if datachars.memory_footprint_mb > system.safe_memory_mb() as f64
{
ProcessingStrategy::OutOfCore {
chunk_size: system.optimal_chunk_size(datachars.elementsize),
}
} else if use_parallel {
ProcessingStrategy::Parallel
} else if use_simd {
ProcessingStrategy::Simd
} else {
ProcessingStrategy::Standard
};
OptimizationConfig {
processing_strategy,
memory_limit_mb: system.safe_memory_mb(),
use_robust,
use_parallel,
use_simd,
use_gpu,
chunk_size: system.optimal_chunk_size(datachars.elementsize),
num_threads: if use_parallel { system.cpu_cores } else { 1 },
algorithm_params: HashMap::new(),
}
}
pub fn for_pca(
datachars: &DataCharacteristics,
system: &SystemResources,
n_components: usize,
) -> Self {
let use_randomized = datachars.is_large_dataset();
let use_parallel = datachars.n_samples > 1_000 && system.cpu_cores > 1;
let use_gpu = system.has_gpu && datachars.memory_footprint_mb > 500.0;
let memory_multiplier = if datachars.nfeatures > datachars.n_samples {
3.0
} else {
2.0
};
let estimated_memory = datachars.memory_footprint_mb * memory_multiplier;
let processing_strategy = if estimated_memory > system.safe_memory_mb() as f64 {
ProcessingStrategy::OutOfCore {
chunk_size: (system.safe_memory_mb() * 1024 * 1024)
/ (datachars.nfeatures * datachars.elementsize),
}
} else if use_parallel {
ProcessingStrategy::Parallel
} else {
ProcessingStrategy::Standard
};
let mut algorithm_params = HashMap::new();
algorithm_params.insert(
"use_randomized".to_string(),
if use_randomized { 1.0 } else { 0.0 },
);
algorithm_params.insert("n_components".to_string(), n_components as f64);
OptimizationConfig {
processing_strategy,
memory_limit_mb: system.safe_memory_mb(),
use_robust: false, use_parallel,
use_simd: system.has_simd,
use_gpu,
chunk_size: system.optimal_chunk_size(datachars.elementsize),
num_threads: if use_parallel { system.cpu_cores } else { 1 },
algorithm_params,
}
}
pub fn for_polynomial_features(
datachars: &DataCharacteristics,
system: &SystemResources,
degree: usize,
) -> Result<Self> {
let estimated_output_features =
Self::estimate_polynomial_features(datachars.nfeatures, degree)?;
let estimated_memory = datachars.n_samples as f64
* estimated_output_features as f64
* datachars.elementsize as f64
/ (1024.0 * 1024.0);
if estimated_memory > system.memory_mb as f64 * 0.9 {
return Err(TransformError::MemoryError(format!(
"Polynomial features would require {estimated_memory:.1} MB, but only {} MB available",
system.memory_mb
)));
}
let use_parallel = datachars.n_samples > 1_000 && system.cpu_cores > 1;
let use_simd = system.has_simd && estimated_output_features > 100;
let processing_strategy = if estimated_memory > system.safe_memory_mb() as f64 {
ProcessingStrategy::OutOfCore {
chunk_size: (system.safe_memory_mb() * 1024 * 1024)
/ (estimated_output_features * datachars.elementsize),
}
} else if use_parallel {
ProcessingStrategy::Parallel
} else if use_simd {
ProcessingStrategy::Simd
} else {
ProcessingStrategy::Standard
};
let mut algorithm_params = HashMap::new();
algorithm_params.insert("degree".to_string(), degree as f64);
algorithm_params.insert(
"estimated_output_features".to_string(),
estimated_output_features as f64,
);
Ok(OptimizationConfig {
processing_strategy,
memory_limit_mb: system.safe_memory_mb(),
use_robust: false,
use_parallel,
use_simd,
use_gpu: false, chunk_size: system.optimal_chunk_size(datachars.elementsize),
num_threads: if use_parallel { system.cpu_cores } else { 1 },
algorithm_params,
})
}
fn estimate_polynomial_features(nfeatures: usize, degree: usize) -> Result<usize> {
if degree == 0 {
return Err(TransformError::InvalidInput(
"Degree must be at least 1".to_string(),
));
}
let mut total_features = 1;
for d in 1..=degree {
let mut coeff = 1;
for i in 0..d {
coeff = coeff * (nfeatures + d - 1 - i) / (i + 1);
if coeff > 1_000_000 {
return Err(TransformError::ComputationError(
"Too many polynomial _features would be generated".to_string(),
));
}
}
total_features += coeff;
}
Ok(total_features)
}
pub fn estimated_execution_time(&self, datachars: &DataCharacteristics) -> std::time::Duration {
use std::time::Duration;
let base_ops = datachars.n_samples as u64 * datachars.nfeatures as u64;
let ops_per_second = match self.processing_strategy {
ProcessingStrategy::Parallel => {
1_000_000_000 * self.num_threads as u64 }
ProcessingStrategy::Simd => {
2_000_000_000 }
ProcessingStrategy::OutOfCore { .. } => {
100_000_000 }
ProcessingStrategy::Standard => {
500_000_000 }
};
let time_ns = (base_ops * 1_000_000_000) / ops_per_second;
Duration::from_nanos(time_ns.max(1000)) }
}
pub struct AutoTuner {
system: SystemResources,
performance_history: HashMap<String, Vec<PerformanceRecord>>,
}
#[derive(Debug, Clone)]
struct PerformanceRecord {
#[allow(dead_code)]
config_hash: String,
#[allow(dead_code)]
execution_time: std::time::Duration,
#[allow(dead_code)]
memory_used_mb: f64,
#[allow(dead_code)]
success: bool,
#[allow(dead_code)]
data_characteristics: DataCharacteristics,
}
impl Default for AutoTuner {
fn default() -> Self {
Self::new()
}
}
impl AutoTuner {
pub fn new() -> Self {
AutoTuner {
system: SystemResources::detect(),
performance_history: HashMap::new(),
}
}
pub fn optimize_for_transformation(
&self,
transformation: &str,
datachars: &DataCharacteristics,
params: &HashMap<String, f64>,
) -> Result<OptimizationConfig> {
match transformation {
"standardization" => Ok(OptimizationConfig::for_standardization(
datachars,
&self.system,
)),
"pca" => {
let n_components = params.get("n_components").unwrap_or(&5.0) as &f64;
Ok(OptimizationConfig::for_pca(
datachars,
&self.system,
*n_components as usize,
))
}
"polynomial" => {
let degree = params.get("degree").unwrap_or(&2.0) as &f64;
OptimizationConfig::for_polynomial_features(
datachars,
&self.system,
*degree as usize,
)
}
_ => {
Ok(OptimizationConfig {
processing_strategy: if datachars.is_large_dataset() {
ProcessingStrategy::Parallel
} else {
ProcessingStrategy::Standard
},
memory_limit_mb: self.system.safe_memory_mb(),
use_robust: datachars.has_outliers(),
use_parallel: datachars.n_samples > 10_000,
use_simd: self.system.has_simd,
use_gpu: self.system.has_gpu && datachars.memory_footprint_mb > 100.0,
chunk_size: self.system.optimal_chunk_size(datachars.elementsize),
num_threads: self.system.cpu_cores,
algorithm_params: HashMap::new(),
})
}
}
}
pub fn record_performance(
&mut self,
transformation: &str,
config: &OptimizationConfig,
execution_time: std::time::Duration,
memory_used_mb: f64,
success: bool,
datachars: DataCharacteristics,
) {
let config_hash = format!("{config:?}");
let record = PerformanceRecord {
config_hash: config_hash.clone(),
execution_time,
memory_used_mb,
success,
data_characteristics: datachars,
};
self.performance_history
.entry(transformation.to_string())
.or_default()
.push(record);
let records = self
.performance_history
.get_mut(transformation)
.expect("Operation failed");
if records.len() > 100 {
records.remove(0);
}
}
pub fn system_resources(&self) -> &SystemResources {
&self.system
}
pub fn generate_report(&self, datachars: &DataCharacteristics) -> OptimizationReport {
let recommendations = vec![
self.get_recommendation_for_transformation("standardization", datachars),
self.get_recommendation_for_transformation("pca", datachars),
self.get_recommendation_for_transformation("polynomial", datachars),
];
OptimizationReport {
system_info: self.system.clone(),
data_info: datachars.clone(),
recommendations,
estimated_total_memory_mb: datachars.memory_footprint_mb * 2.0, }
}
fn get_recommendation_for_transformation(
&self,
transformation: &str,
datachars: &DataCharacteristics,
) -> TransformationRecommendation {
let config = self
.optimize_for_transformation(transformation, datachars, &HashMap::new())
.unwrap_or_else(|_| OptimizationConfig {
processing_strategy: ProcessingStrategy::Standard,
memory_limit_mb: self.system.safe_memory_mb(),
use_robust: false,
use_parallel: false,
use_simd: false,
use_gpu: false,
chunk_size: 1000,
num_threads: 1,
algorithm_params: HashMap::new(),
});
let estimated_time = config.estimated_execution_time(datachars);
TransformationRecommendation {
transformation: transformation.to_string(),
config,
estimated_time,
confidence: 0.8, reason: format!(
"Optimized for {} samples, {} features",
datachars.n_samples, datachars.nfeatures
),
}
}
}
#[derive(Debug, Clone)]
pub struct OptimizationReport {
pub system_info: SystemResources,
pub data_info: DataCharacteristics,
pub recommendations: Vec<TransformationRecommendation>,
pub estimated_total_memory_mb: f64,
}
#[derive(Debug, Clone)]
pub struct TransformationRecommendation {
pub transformation: String,
pub config: OptimizationConfig,
pub estimated_time: std::time::Duration,
pub confidence: f64,
pub reason: String,
}
impl OptimizationReport {
pub fn print_report(&self) {
println!("=== Optimization Report ===");
println!("System Resources:");
println!(" Memory: {} MB", self.system_info.memory_mb);
println!(" CPU Cores: {}", self.system_info.cpu_cores);
println!(" GPU Available: {}", self.system_info.has_gpu);
println!(" SIMD Available: {}", self.system_info.has_simd);
println!();
println!("Data Characteristics:");
println!(" Samples: {}", self.data_info.n_samples);
println!(" Features: {}", self.data_info.nfeatures);
println!(
" Memory Footprint: {:.1} MB",
self.data_info.memory_footprint_mb
);
println!(" Sparsity: {:.1}%", self.data_info.sparsity * 100.0);
println!(" Has Outliers: {}", self.data_info.has_outliers());
println!();
println!("Recommendations:");
for rec in &self.recommendations {
println!(" {}:", rec.transformation);
println!(" Strategy: {:?}", rec.config.processing_strategy);
println!(
" Estimated Time: {:.2}s",
rec.estimated_time.as_secs_f64()
);
println!(" Use Parallel: {}", rec.config.use_parallel);
println!(" Use SIMD: {}", rec.config.use_simd);
println!(" Use GPU: {}", rec.config.use_gpu);
println!(" Reason: {}", rec.reason);
println!();
}
}
}
pub struct AdvancedConfigOptimizer {
performance_history: HashMap<String, Vec<PerformanceMetric>>,
system_monitor: SystemMonitor,
config_predictor: ConfigurationPredictor,
adaptive_tuner: AdaptiveParameterTuner,
}
#[derive(Debug, Clone)]
pub struct PerformanceMetric {
#[allow(dead_code)]
config_hash: u64,
execution_time_us: u64,
memory_usage_bytes: usize,
cache_hit_rate: f64,
cpu_utilization: f64,
quality_score: f64,
#[allow(dead_code)]
timestamp: std::time::Instant,
}
pub struct SystemMonitor {
cpu_load: f64,
available_memory_bytes: usize,
cache_miss_rate: f64,
io_wait_percent: f64,
cpu_temperature_celsius: f64,
}
pub struct ConfigurationPredictor {
#[allow(dead_code)]
feature_weights: HashMap<String, f64>,
#[allow(dead_code)]
learning_rate: f64,
confidence_threshold: f64,
sample_count: usize,
}
pub struct AdaptiveParameterTuner {
q_table: HashMap<(String, String), f64>, exploration_rate: f64,
learning_rate: f64,
#[allow(dead_code)]
discount_factor: f64,
current_state: String,
}
impl Default for AdvancedConfigOptimizer {
fn default() -> Self {
Self::new()
}
}
impl AdvancedConfigOptimizer {
pub fn new() -> Self {
AdvancedConfigOptimizer {
performance_history: HashMap::new(),
system_monitor: SystemMonitor::new(),
config_predictor: ConfigurationPredictor::new(),
adaptive_tuner: AdaptiveParameterTuner::new(),
}
}
pub fn advanced_optimize_config(
&mut self,
datachars: &DataCharacteristics,
transformation_type: &str,
user_params: &HashMap<String, f64>,
) -> Result<OptimizationConfig> {
self.system_monitor.update_metrics()?;
let current_state = self.generate_state_representation(datachars, &self.system_monitor);
let predicted_config = self.config_predictor.predict_optimal_config(
¤t_state,
transformation_type,
user_params,
)?;
let tuned_config = self.adaptive_tuner.tune_parameters(
predicted_config,
¤t_state,
transformation_type,
)?;
let validated_config =
self.validate_and_adjust_config(tuned_config, &self.system_monitor)?;
Ok(validated_config)
}
pub fn learn_from_performance(
&mut self,
config: &OptimizationConfig,
performance: PerformanceMetric,
transformation_type: &str,
) -> Result<()> {
let config_hash = self.compute_config_hash(config);
self.performance_history
.entry(transformation_type.to_string())
.or_default()
.push(performance.clone());
self.config_predictor.update_from_feedback(&performance)?;
let reward = self.compute_reward_signal(&performance);
self.adaptive_tuner.update_q_values(config_hash, reward)?;
if self.config_predictor.sample_count.is_multiple_of(100) {
self.retrain_models()?;
}
Ok(())
}
fn generate_state_representation(
&self,
datachars: &DataCharacteristics,
system_monitor: &SystemMonitor,
) -> String {
format!(
"samples:{}_features:{}_memory:{:.2}_cpu:{:.2}_sparsity:{:.3}",
datachars.n_samples,
datachars.nfeatures,
datachars.memory_footprint_mb,
system_monitor.cpu_load,
datachars.sparsity,
)
}
fn compute_config_hash(&self, config: &OptimizationConfig) -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
config.memory_limit_mb.hash(&mut hasher);
config.use_parallel.hash(&mut hasher);
config.use_simd.hash(&mut hasher);
config.use_gpu.hash(&mut hasher);
config.chunk_size.hash(&mut hasher);
config.num_threads.hash(&mut hasher);
hasher.finish()
}
fn compute_reward_signal(&self, performance: &PerformanceMetric) -> f64 {
let time_score = 1.0 / (1.0 + performance.execution_time_us as f64 / 1_000_000.0);
let memory_score = 1.0 / (1.0 + performance.memory_usage_bytes as f64 / 1_000_000_000.0);
let cache_score = performance.cache_hit_rate;
let cpu_score = 1.0 - performance.cpu_utilization.min(1.0);
let quality_score = performance.quality_score;
0.3 * time_score
+ 0.2 * memory_score
+ 0.2 * cache_score
+ 0.1 * cpu_score
+ 0.2 * quality_score
}
fn validate_and_adjust_config(
&self,
mut config: OptimizationConfig,
system_monitor: &SystemMonitor,
) -> Result<OptimizationConfig> {
let available_mb = system_monitor.available_memory_bytes / (1024 * 1024);
config.memory_limit_mb = config.memory_limit_mb.min(available_mb * 80 / 100);
if system_monitor.cpu_load > 0.8 {
config.num_threads = (config.num_threads / 2).max(1);
}
if system_monitor.cpu_temperature_celsius > 85.0 {
config.use_gpu = false;
}
if system_monitor.cache_miss_rate > 0.1 {
config.chunk_size = (config.chunk_size as f64 * 0.8) as usize;
}
Ok(config)
}
fn retrain_models(&mut self) -> Result<()> {
self.config_predictor
.retrain_with_history(&self.performance_history)?;
self.adaptive_tuner.decay_exploration_rate();
Ok(())
}
}
impl Default for SystemMonitor {
fn default() -> Self {
Self::new()
}
}
impl SystemMonitor {
pub fn new() -> Self {
SystemMonitor {
cpu_load: 0.0,
available_memory_bytes: 0,
cache_miss_rate: 0.0,
io_wait_percent: 0.0,
cpu_temperature_celsius: 50.0,
}
}
pub fn update_metrics(&mut self) -> Result<()> {
self.cpu_load = self.read_cpu_load()?;
self.available_memory_bytes = self.read_available_memory()?;
self.cache_miss_rate = self.read_cache_miss_rate()?;
self.io_wait_percent = self.read_io_wait()?;
self.cpu_temperature_celsius = self.read_cpu_temperature()?;
Ok(())
}
fn read_cpu_load(&self) -> Result<f64> {
Ok(0.5) }
fn read_available_memory(&self) -> Result<usize> {
Ok(8 * 1024 * 1024 * 1024) }
fn read_cache_miss_rate(&self) -> Result<f64> {
Ok(0.05) }
fn read_io_wait(&self) -> Result<f64> {
Ok(0.02) }
fn read_cpu_temperature(&self) -> Result<f64> {
Ok(55.0) }
}
impl Default for ConfigurationPredictor {
fn default() -> Self {
Self::new()
}
}
impl ConfigurationPredictor {
pub fn new() -> Self {
let mut feature_weights = HashMap::new();
feature_weights.insert("n_samples".to_string(), 0.3);
feature_weights.insert("nfeatures".to_string(), 0.25);
feature_weights.insert("memory_footprint".to_string(), 0.2);
feature_weights.insert("sparsity".to_string(), 0.15);
feature_weights.insert("cpu_load".to_string(), 0.1);
ConfigurationPredictor {
feature_weights,
learning_rate: 0.01,
confidence_threshold: 0.8,
sample_count: 0,
}
}
pub fn predict_optimal_config(
&self,
state: &str,
_transformation_type: &str,
_user_params: &HashMap<String, f64>,
) -> Result<OptimizationConfig> {
let features = self.extract_features(state)?;
let predicted_memory_limit = self.predict_memory_limit(&features);
let predicted_parallelism = self.predict_parallelism(&features);
let predicted_simd_usage = self.predict_simd_usage(&features);
let strategy = if predicted_memory_limit < 1000 {
ProcessingStrategy::OutOfCore { chunk_size: 1024 }
} else if predicted_parallelism {
ProcessingStrategy::Parallel
} else if predicted_simd_usage {
ProcessingStrategy::Simd
} else {
ProcessingStrategy::Standard
};
Ok(OptimizationConfig {
processing_strategy: strategy,
memory_limit_mb: predicted_memory_limit,
use_robust: false,
use_parallel: predicted_parallelism,
use_simd: predicted_simd_usage,
use_gpu: features.get("memory_footprint").unwrap_or(&0.0) > &100.0,
chunk_size: if predicted_memory_limit < 1000 {
512
} else {
2048
},
num_threads: if predicted_parallelism { 4 } else { 1 },
algorithm_params: HashMap::new(),
})
}
fn extract_features(&self, state: &str) -> Result<HashMap<String, f64>> {
let mut features = HashMap::new();
for part in state.split('_') {
if let Some((key, value)) = part.split_once(':') {
if let Ok(val) = value.parse::<f64>() {
features.insert(key.to_string(), val);
}
}
}
Ok(features)
}
fn predict_memory_limit(&self, features: &HashMap<String, f64>) -> usize {
let memory_footprint = features.get("memory_footprint").unwrap_or(&100.0);
(memory_footprint * 1.5) as usize
}
fn predict_parallelism(&self, features: &HashMap<String, f64>) -> bool {
let samples = features.get("samples").unwrap_or(&1000.0);
let cpu_load = features.get("cpu").unwrap_or(&0.5);
samples > &5000.0 && cpu_load < &0.7
}
fn predict_simd_usage(&self, features: &HashMap<String, f64>) -> bool {
let features_count = features.get("features").unwrap_or(&10.0);
features_count > &50.0
}
pub fn update_from_feedback(&mut self, performance: &PerformanceMetric) -> Result<()> {
self.sample_count += 1;
Ok(())
}
pub fn retrain_with_history(
&mut self,
history: &HashMap<String, Vec<PerformanceMetric>>,
) -> Result<()> {
self.confidence_threshold = (self.confidence_threshold + 0.01).min(0.95);
Ok(())
}
}
impl Default for AdaptiveParameterTuner {
fn default() -> Self {
Self::new()
}
}
impl AdaptiveParameterTuner {
pub fn new() -> Self {
AdaptiveParameterTuner {
q_table: HashMap::new(),
exploration_rate: 0.1,
learning_rate: 0.1,
discount_factor: 0.9,
current_state: String::new(),
}
}
pub fn tune_parameters(
&mut self,
mut config: OptimizationConfig,
state: &str,
_transformation_type: &str,
) -> Result<OptimizationConfig> {
self.current_state = state.to_string();
if scirs2_core::random::rng().random_range(0.0..1.0) < self.exploration_rate {
config = self.explore_parameters(config)?;
} else {
config = self.exploit_best_parameters(config, state)?;
}
Ok(config)
}
fn explore_parameters(&self, mut config: OptimizationConfig) -> Result<OptimizationConfig> {
let mut rng = scirs2_core::random::rng();
let memory_factor = rng.random_range(0.8..1.2);
config.memory_limit_mb = (config.memory_limit_mb as f64 * memory_factor) as usize;
if rng.random_range(0.0..1.0) < 0.3 {
config.use_parallel = !config.use_parallel;
}
let chunk_factor = rng.random_range(0.5..1.5);
config.chunk_size = (config.chunk_size as f64 * chunk_factor) as usize;
Ok(config)
}
fn exploit_best_parameters(
&self,
config: OptimizationConfig,
state: &str,
) -> Result<OptimizationConfig> {
let _best_action = self.find_best_action(state);
Ok(config)
}
fn find_best_action(&self, state: &str) -> String {
let mut best_action = "default".to_string();
let mut best_value = f64::NEG_INFINITY;
for ((s, action), &value) in &self.q_table {
if s == state && value > best_value {
best_value = value;
best_action = action.clone();
}
}
best_action
}
pub fn update_q_values(&mut self, confighash: u64, reward: f64) -> Result<()> {
let state_action = (self.current_state.clone(), "current_action".to_string());
let old_value = self.q_table.get(&state_action).unwrap_or(&0.0);
let new_value = old_value + self.learning_rate * (reward - old_value);
self.q_table.insert(state_action, new_value);
Ok(())
}
pub fn decay_exploration_rate(&mut self) {
self.exploration_rate = (self.exploration_rate * 0.995).max(0.01);
}
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::Array2;
#[test]
fn test_system_resources_detection() {
let resources = SystemResources::detect();
assert!(resources.cpu_cores > 0);
assert!(resources.memory_mb > 0);
assert!(resources.safe_memory_mb() < resources.memory_mb);
}
#[test]
fn test_data_characteristics_analysis() {
let data = Array2::from_shape_vec((100, 10), (0..1000).map(|x| x as f64).collect())
.expect("Operation failed");
let chars = DataCharacteristics::analyze(&data.view()).expect("Operation failed");
assert_eq!(chars.n_samples, 100);
assert_eq!(chars.nfeatures, 10);
assert!(chars.memory_footprint_mb > 0.0);
assert!(!chars.is_large_dataset());
}
#[test]
fn test_optimization_config_for_standardization() {
let data = Array2::ones((1000, 50));
let chars = DataCharacteristics::analyze(&data.view()).expect("Operation failed");
let system = SystemResources::detect();
let config = OptimizationConfig::for_standardization(&chars, &system);
assert!(config.memory_limit_mb > 0);
}
#[test]
fn test_optimization_config_for_pca() {
let data = Array2::ones((500, 20));
let chars = DataCharacteristics::analyze(&data.view()).expect("Operation failed");
let system = SystemResources::detect();
let config = OptimizationConfig::for_pca(&chars, &system, 10);
assert_eq!(config.algorithm_params.get("n_components"), Some(&10.0));
}
#[test]
fn test_polynomial_features_estimation() {
let result = OptimizationConfig::estimate_polynomial_features(5, 2);
assert!(result.is_ok());
let result = OptimizationConfig::estimate_polynomial_features(100, 10);
assert!(result.is_err());
}
#[test]
fn test_auto_tuner() {
let tuner = AutoTuner::new();
let data = Array2::ones((100, 10));
let chars = DataCharacteristics::analyze(&data.view()).expect("Operation failed");
let config = tuner
.optimize_for_transformation("standardization", &chars, &HashMap::new())
.expect("Operation failed");
assert!(config.memory_limit_mb > 0);
let report = tuner.generate_report(&chars);
assert!(!report.recommendations.is_empty());
}
#[test]
fn test_large_dataset_detection() {
let mut chars = DataCharacteristics {
n_samples: 200_000,
nfeatures: 1000,
sparsity: 0.1,
data_range: 100.0,
outlier_ratio: 0.02,
has_missing: false,
memory_footprint_mb: 1500.0,
elementsize: 8,
};
assert!(chars.is_large_dataset());
chars.n_samples = 1000;
chars.memory_footprint_mb = 10.0;
assert!(!chars.is_large_dataset());
}
}