scirs2_transform/
auto_feature_engineering.rs

1//! Automated feature engineering with meta-learning
2//!
3//! This module provides automated feature engineering capabilities that use
4//! meta-learning to select optimal transformations for given datasets.
5
6use crate::error::{Result, TransformError};
7use scirs2_core::ndarray::{Array1, ArrayStatCompat, ArrayView1, ArrayView2};
8use scirs2_core::random::seq::SliceRandom;
9use scirs2_core::validation::check_not_empty;
10use std::collections::HashMap;
11
12#[cfg(feature = "auto-feature-engineering")]
13use std::collections::VecDeque;
14
15use statrs::statistics::Statistics;
16#[cfg(feature = "auto-feature-engineering")]
17use tch::{nn, Device, Tensor};
18
19/// Meta-features extracted from datasets for transformation selection
20#[derive(Debug, Clone)]
21pub struct DatasetMetaFeatures {
22    /// Number of samples
23    pub n_samples: usize,
24    /// Number of features
25    pub n_features: usize,
26    /// Sparsity ratio (fraction of zero values)
27    pub sparsity: f64,
28    /// Mean of feature correlations
29    pub mean_correlation: f64,
30    /// Standard deviation of feature correlations
31    pub std_correlation: f64,
32    /// Skewness statistics
33    pub mean_skewness: f64,
34    /// Kurtosis statistics
35    pub mean_kurtosis: f64,
36    /// Number of missing values
37    pub missing_ratio: f64,
38    /// Feature variance statistics
39    pub variance_ratio: f64,
40    /// Outlier ratio
41    pub outlier_ratio: f64,
42    /// Whether the dataset has missing values
43    pub has_missing: bool,
44}
45
46/// Available transformation types for automated selection
47#[derive(Debug, Clone, PartialEq, Eq, Hash)]
48pub enum TransformationType {
49    /// Standardization (Z-score normalization)
50    StandardScaler,
51    /// Min-max scaling
52    MinMaxScaler,
53    /// Robust scaling using median and IQR
54    RobustScaler,
55    /// Power transformation (Box-Cox/Yeo-Johnson)
56    PowerTransformer,
57    /// Polynomial feature generation
58    PolynomialFeatures,
59    /// Principal Component Analysis
60    PCA,
61    /// Feature selection based on variance
62    VarianceThreshold,
63    /// Quantile transformation
64    QuantileTransformer,
65    /// Binary encoding for categorical features
66    BinaryEncoder,
67    /// Target encoding
68    TargetEncoder,
69}
70
71/// Configuration for a transformation with its parameters
72#[derive(Debug, Clone)]
73pub struct TransformationConfig {
74    /// Type of transformation to apply
75    pub transformation_type: TransformationType,
76    /// Parameters for the transformation
77    pub parameters: HashMap<String, f64>,
78    /// Expected performance score for this transformation
79    pub expected_performance: f64,
80}
81
82/// Meta-learning model for transformation selection
83#[cfg(feature = "auto-feature-engineering")]
84pub struct MetaLearningModel {
85    /// Neural network for predicting transformation performance
86    model: nn::Sequential,
87    /// Variable store for parameters
88    vs: nn::VarStore,
89    /// Device for computation (CPU/GPU)
90    device: Device,
91    /// Training data cache
92    training_cache: Vec<(DatasetMetaFeatures, Vec<TransformationConfig>)>,
93}
94
95#[cfg(feature = "auto-feature-engineering")]
96impl MetaLearningModel {
97    /// Create a new meta-learning model
98    pub fn new() -> Result<Self> {
99        let device = Device::cuda_if_available();
100        let vs = nn::VarStore::new(device);
101        let root = vs.root();
102
103        // Build neural network architecture
104        let model = nn::seq()
105            .add(nn::linear(&root / "layer1", 10, 64, Default::default()))
106            .add_fn(|xs| xs.relu())
107            .add(nn::linear(&root / "layer2", 64, 32, Default::default()))
108            .add_fn(|xs| xs.relu())
109            .add(nn::linear(&root / "layer3", 32, 16, Default::default()))
110            .add_fn(|xs| xs.relu())
111            .add(nn::linear(&root / "output", 16, 10, Default::default()))
112            .add_fn(|xs| xs.softmax(-1, tch::Kind::Float));
113
114        Ok(MetaLearningModel {
115            model,
116            vs,
117            device,
118            training_cache: Vec::new(),
119        })
120    }
121
122    /// Train the meta-learning model on historical transformation performance data
123    pub fn train(
124        &mut self,
125        training_data: Vec<(DatasetMetaFeatures, Vec<TransformationConfig>)>,
126    ) -> Result<()> {
127        self.training_cache.extend(training_data.clone());
128
129        // Convert training _data to tensors
130        let (input_features, target_scores) = self.prepare_training_data(&training_data)?;
131
132        // Training loop - placeholder implementation
133        // Note: Full training requires proper optimizer setup in tch 0.20
134        // This is a simplified version for compilation compatibility
135
136        for epoch in 0..100 {
137            let predicted = input_features.apply(&self.model);
138            let loss = predicted.mse_loss(&target_scores, tch::Reduction::Mean);
139
140            if epoch % 20 == 0 {
141                println!("Epoch {epoch}: Loss = {:.4}", loss.double_value(&[]));
142            }
143
144            // TODO: Implement proper optimizer when tch API is stabilized
145        }
146
147        Ok(())
148    }
149
150    /// Predict optimal transformations for a given dataset
151    pub fn predict_transformations(
152        &self,
153        meta_features: &DatasetMetaFeatures,
154    ) -> Result<Vec<TransformationConfig>> {
155        let input_tensor = self.meta_features_to_tensor(meta_features)?;
156        let prediction = input_tensor.apply(&self.model);
157
158        // Convert prediction to transformation recommendations
159        self.tensor_to_transformations(&prediction)
160    }
161
162    fn prepare_training_data(
163        &self,
164        training_data: &[(DatasetMetaFeatures, Vec<TransformationConfig>)],
165    ) -> Result<(Tensor, Tensor)> {
166        if training_data.is_empty() {
167            return Err(TransformError::InvalidInput(
168                "Training _data cannot be empty".to_string(),
169            ));
170        }
171
172        let n_samples = training_data.len();
173        let mut input_features = Vec::with_capacity(n_samples * 10);
174        let mut target_scores = Vec::with_capacity(n_samples * 10);
175
176        for (meta_features, transformations) in training_data {
177            // Normalize feature values for better training stability
178            let features = vec![
179                (meta_features.n_samples as f64).ln().max(0.0), // Log-scale for sample count
180                (meta_features.n_features as f64).ln().max(0.0), // Log-scale for feature count
181                meta_features.sparsity.clamp(0.0, 1.0),         // Clamp to [0, 1]
182                meta_features.mean_correlation.clamp(-1.0, 1.0), // Clamp to [-1, 1]
183                meta_features.std_correlation.max(0.0),         // Non-negative
184                meta_features.mean_skewness.clamp(-10.0, 10.0), // Reasonable bounds
185                meta_features.mean_kurtosis.clamp(-10.0, 10.0), // Reasonable bounds
186                meta_features.missing_ratio.clamp(0.0, 1.0),    // Clamp to [0, 1]
187                meta_features.variance_ratio.max(0.0),          // Non-negative
188                meta_features.outlier_ratio.clamp(0.0, 1.0),    // Clamp to [0, 1]
189            ];
190
191            // Validate all features are finite
192            if features.iter().any(|&f| !f.is_finite()) {
193                return Err(TransformError::ComputationError(
194                    "Non-finite values detected in meta-features".to_string(),
195                ));
196            }
197
198            input_features.extend(features);
199
200            // Create target vector (transformation type scores)
201            let mut scores = vec![0.0f64; 10]; // Number of transformation types
202            for config in transformations {
203                let idx = self.transformation_type_to_index(&config.transformation_type);
204                let performance = config.expected_performance.clamp(0.0, 1.0); // Clamp to [0, 1]
205                scores[idx] = scores[idx].max(performance as f64); // Take max if multiple configs for same type
206            }
207            target_scores.extend(scores);
208        }
209
210        let input_tensor = Tensor::from_slice(&input_features)
211            .reshape(&[n_samples as i64, 10])
212            .to_device(self.device);
213        let target_tensor = Tensor::from_slice(&target_scores)
214            .reshape(&[n_samples as i64, 10])
215            .to_device(self.device);
216
217        Ok((input_tensor, target_tensor))
218    }
219
220    fn meta_features_to_tensor(&self, meta_features: &DatasetMetaFeatures) -> Result<Tensor> {
221        // Apply same normalization as in training data preparation
222        let features = vec![
223            (meta_features.n_samples as f64).ln().max(0.0),
224            (meta_features.n_features as f64).ln().max(0.0),
225            meta_features.sparsity.clamp(0.0, 1.0),
226            meta_features.mean_correlation.clamp(-1.0, 1.0),
227            meta_features.std_correlation.max(0.0),
228            meta_features.mean_skewness.clamp(-10.0, 10.0),
229            meta_features.mean_kurtosis.clamp(-10.0, 10.0),
230            meta_features.missing_ratio.clamp(0.0, 1.0),
231            meta_features.variance_ratio.max(0.0),
232            meta_features.outlier_ratio.clamp(0.0, 1.0),
233        ];
234
235        // Validate all _features are finite
236        if features.iter().any(|&f| !f.is_finite()) {
237            return Err(TransformError::ComputationError(
238                "Non-finite values detected in meta-features".to_string(),
239            ));
240        }
241
242        Ok(Tensor::from_slice(&features)
243            .reshape(&[1, 10])
244            .to_device(self.device))
245    }
246
247    fn tensor_to_transformations(&self, prediction: &Tensor) -> Result<Vec<TransformationConfig>> {
248        let scores: Vec<f64> = prediction.try_into().map_err(|e| {
249            TransformError::ComputationError(format!("Failed to extract tensor data: {:?}", e))
250        })?;
251
252        if scores.len() != 10 {
253            return Err(TransformError::ComputationError(format!(
254                "Expected 10 prediction scores, got {}",
255                scores.len()
256            )));
257        }
258
259        let mut transformations = Vec::new();
260
261        // Use adaptive threshold based on score distribution
262        let max_score = scores.iter().fold(0.0f64, |a, &b| a.max(b));
263        let mean_score = scores.iter().sum::<f64>() / scores.len() as f64;
264        let threshold = (max_score * 0.7 + mean_score * 0.3).max(0.3); // Adaptive threshold
265
266        for (i, &score) in scores.iter().enumerate() {
267            if score > threshold && score.is_finite() {
268                let transformation_type = self.index_to_transformation_type(i);
269                let config = TransformationConfig {
270                    transformation_type: transformation_type.clone(),
271                    parameters: self.get_default_parameters_for_type(&transformation_type),
272                    expected_performance: score.clamp(0.0, 1.0), // Clamp to valid range
273                };
274                transformations.push(config);
275            }
276        }
277
278        // If no transformations meet threshold, take top 3
279        if transformations.is_empty() {
280            let mut score_indices: Vec<(usize, f64)> = scores
281                .iter()
282                .enumerate()
283                .filter(|(_, &score)| score.is_finite())
284                .map(|(i, &score)| (i, score))
285                .collect();
286
287            score_indices
288                .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
289
290            for (i, score) in score_indices.into_iter().take(3) {
291                let transformation_type = self.index_to_transformation_type(i);
292                let config = TransformationConfig {
293                    transformation_type: transformation_type.clone(),
294                    parameters: self.get_default_parameters_for_type(&transformation_type),
295                    expected_performance: score.clamp(0.0, 1.0),
296                };
297                transformations.push(config);
298            }
299        }
300
301        // Sort by expected performance
302        transformations.sort_by(|a, b| {
303            b.expected_performance
304                .partial_cmp(&a.expected_performance)
305                .unwrap_or(std::cmp::Ordering::Equal)
306        });
307
308        Ok(transformations)
309    }
310
311    fn transformation_type_to_index(&self, t_type: &TransformationType) -> usize {
312        match t_type {
313            TransformationType::StandardScaler => 0,
314            TransformationType::MinMaxScaler => 1,
315            TransformationType::RobustScaler => 2,
316            TransformationType::PowerTransformer => 3,
317            TransformationType::PolynomialFeatures => 4,
318            TransformationType::PCA => 5,
319            TransformationType::VarianceThreshold => 6,
320            TransformationType::QuantileTransformer => 7,
321            TransformationType::BinaryEncoder => 8,
322            TransformationType::TargetEncoder => 9,
323        }
324    }
325
326    fn index_to_transformation_type(&self, index: usize) -> TransformationType {
327        match index {
328            0 => TransformationType::StandardScaler,
329            1 => TransformationType::MinMaxScaler,
330            2 => TransformationType::RobustScaler,
331            3 => TransformationType::PowerTransformer,
332            4 => TransformationType::PolynomialFeatures,
333            5 => TransformationType::PCA,
334            6 => TransformationType::VarianceThreshold,
335            7 => TransformationType::QuantileTransformer,
336            8 => TransformationType::BinaryEncoder,
337            _ => TransformationType::StandardScaler,
338        }
339    }
340
341    fn get_default_parameters_for_type(&self, t_type: &TransformationType) -> HashMap<String, f64> {
342        let mut params = HashMap::new();
343        match t_type {
344            TransformationType::PCA => {
345                params.insert("n_components".to_string(), 0.95); // Keep 95% variance
346            }
347            TransformationType::PolynomialFeatures => {
348                params.insert("degree".to_string(), 2.0);
349                params.insert("include_bias".to_string(), 0.0);
350            }
351            TransformationType::VarianceThreshold => {
352                params.insert("threshold".to_string(), 0.01);
353            }
354            _ => {} // Use defaults for other transformations
355        }
356        params
357    }
358}
359
360/// Automated feature engineering pipeline
361pub struct AutoFeatureEngineer {
362    #[cfg(feature = "auto-feature-engineering")]
363    meta_model: MetaLearningModel,
364    /// Historical transformation performance data
365    #[cfg(feature = "auto-feature-engineering")]
366    transformation_history: Vec<(DatasetMetaFeatures, Vec<TransformationConfig>, f64)>,
367}
368
369impl AutoFeatureEngineer {
370    /// Expose pearson_correlation as a public method for external use
371    #[allow(dead_code)]
372    pub fn pearson_correlation(&self, x: &ArrayView1<f64>, y: &ArrayView1<f64>) -> Result<f64> {
373        self.pearson_correlation_internal(x, y)
374    }
375    /// Create a new automated feature engineer
376    pub fn new() -> Result<Self> {
377        #[cfg(feature = "auto-feature-engineering")]
378        let meta_model = MetaLearningModel::new()?;
379
380        Ok(AutoFeatureEngineer {
381            #[cfg(feature = "auto-feature-engineering")]
382            meta_model,
383            #[cfg(feature = "auto-feature-engineering")]
384            transformation_history: Vec::new(),
385        })
386    }
387
388    /// Extract meta-features from a dataset
389    pub fn extract_meta_features(&self, x: &ArrayView2<f64>) -> Result<DatasetMetaFeatures> {
390        check_not_empty(x, "x")?;
391
392        // Check finite values
393        for &val in x.iter() {
394            if !val.is_finite() {
395                return Err(crate::error::TransformError::DataValidationError(
396                    "Data contains non-finite values".to_string(),
397                ));
398            }
399        }
400
401        let (n_samples, n_features) = x.dim();
402
403        if n_samples < 2 || n_features < 1 {
404            return Err(TransformError::InvalidInput(
405                "Dataset must have at least 2 samples and 1 feature".to_string(),
406            ));
407        }
408
409        // Calculate sparsity
410        let zeros = x.iter().filter(|&&val| val == 0.0).count();
411        let sparsity = zeros as f64 / (n_samples * n_features) as f64;
412
413        // Calculate correlation statistics
414        let correlations = self.compute_feature_correlations(x)?;
415        let mean_correlation = correlations.mean();
416        let std_correlation = 0.0; // Simplified - calculating std after mean() consumed value
417
418        // Calculate skewness and kurtosis
419        let (mean_skewness, mean_kurtosis) = self.compute_distribution_stats(x)?;
420
421        // Calculate missing values (assuming NaN represents missing)
422        let missing_count = x.iter().filter(|val| val.is_nan()).count();
423        let missing_ratio = missing_count as f64 / (n_samples * n_features) as f64;
424        let has_missing = missing_count > 0;
425
426        // Calculate variance statistics with better numerical stability
427        let variances: Array1<f64> = x.var_axis(scirs2_core::ndarray::Axis(0), 0.0);
428        let finite_variances: Vec<f64> = variances
429            .iter()
430            .filter(|&&v| v.is_finite() && v >= 0.0)
431            .copied()
432            .collect();
433
434        let variance_ratio = if finite_variances.is_empty() {
435            0.0
436        } else {
437            let mean_var = finite_variances.iter().sum::<f64>() / finite_variances.len() as f64;
438            if mean_var < f64::EPSILON {
439                0.0
440            } else {
441                let var_of_vars = finite_variances
442                    .iter()
443                    .map(|&v| (v - mean_var).powi(2))
444                    .sum::<f64>()
445                    / finite_variances.len() as f64;
446                (var_of_vars.sqrt() / mean_var).min(100.0) // Cap at reasonable value
447            }
448        };
449
450        // Calculate outlier ratio (using IQR method)
451        let outlier_ratio = self.compute_outlier_ratio(x)?;
452
453        Ok(DatasetMetaFeatures {
454            n_samples,
455            n_features,
456            sparsity,
457            mean_correlation,
458            std_correlation,
459            mean_skewness,
460            mean_kurtosis,
461            missing_ratio,
462            variance_ratio,
463            outlier_ratio,
464            has_missing,
465        })
466    }
467
468    /// Recommend optimal transformations for a dataset
469    #[cfg(feature = "auto-feature-engineering")]
470    pub fn recommend_transformations(
471        &self,
472        x: &ArrayView2<f64>,
473    ) -> Result<Vec<TransformationConfig>> {
474        let meta_features = self.extract_meta_features(x)?;
475        self.meta_model.predict_transformations(&meta_features)
476    }
477
478    /// Recommend optimal transformations for a dataset (fallback implementation)
479    #[cfg(not(feature = "auto-feature-engineering"))]
480    pub fn recommend_transformations(
481        &self,
482        x: &ArrayView2<f64>,
483    ) -> Result<Vec<TransformationConfig>> {
484        // Fallback to rule-based recommendations
485        self.rule_based_recommendations(x)
486    }
487
488    /// Rule-based transformation recommendations (fallback)
489    fn rule_based_recommendations(&self, x: &ArrayView2<f64>) -> Result<Vec<TransformationConfig>> {
490        let meta_features = self.extract_meta_features(x)?;
491        let mut recommendations = Vec::new();
492
493        // Rule 1: High skewness -> Power transformation
494        if meta_features.mean_skewness.abs() > 1.0 {
495            recommendations.push(TransformationConfig {
496                transformation_type: TransformationType::PowerTransformer,
497                parameters: HashMap::new(),
498                expected_performance: 0.8,
499            });
500        }
501
502        // Rule 2: High dimensionality -> PCA
503        if meta_features.n_features > 100 {
504            let mut params = HashMap::new();
505            params.insert("n_components".to_string(), 0.95);
506            recommendations.push(TransformationConfig {
507                transformation_type: TransformationType::PCA,
508                parameters: params,
509                expected_performance: 0.75,
510            });
511        }
512
513        // Rule 3: Different scales -> StandardScaler
514        if meta_features.variance_ratio > 1.0 {
515            recommendations.push(TransformationConfig {
516                transformation_type: TransformationType::StandardScaler,
517                parameters: HashMap::new(),
518                expected_performance: 0.9,
519            });
520        }
521
522        // Rule 4: High outlier ratio -> RobustScaler
523        if meta_features.outlier_ratio > 0.1 {
524            recommendations.push(TransformationConfig {
525                transformation_type: TransformationType::RobustScaler,
526                parameters: HashMap::new(),
527                expected_performance: 0.85,
528            });
529        }
530
531        // Sort by expected performance
532        recommendations.sort_by(|a, b| {
533            b.expected_performance
534                .partial_cmp(&a.expected_performance)
535                .unwrap()
536        });
537
538        Ok(recommendations)
539    }
540
541    /// Train the meta-learning model with new data
542    #[cfg(feature = "auto-feature-engineering")]
543    pub fn update_model(
544        &mut self,
545        meta_features: DatasetMetaFeatures,
546        transformations: Vec<TransformationConfig>,
547        performance: f64,
548    ) -> Result<()> {
549        self.transformation_history.push((
550            meta_features.clone(),
551            transformations.clone(),
552            performance,
553        ));
554
555        // Retrain every 10 new examples
556        if self.transformation_history.len() % 10 == 0 {
557            let training_data: Vec<_> = self
558                .transformation_history
559                .iter()
560                .map(|(meta, trans, _perf)| (meta.clone(), trans.clone()))
561                .collect();
562            self.meta_model.train(training_data)?;
563        }
564
565        Ok(())
566    }
567
568    fn compute_feature_correlations(&self, x: &ArrayView2<f64>) -> Result<Array1<f64>> {
569        let n_features = x.ncols();
570
571        if n_features < 2 {
572            return Ok(Array1::zeros(0));
573        }
574
575        let mut correlations = Vec::with_capacity((n_features * (n_features - 1)) / 2);
576
577        for i in 0..n_features {
578            for j in i + 1..n_features {
579                let col_i = x.column(i);
580                let col_j = x.column(j);
581                let correlation = self.pearson_correlation_internal(&col_i, &col_j)?;
582                correlations.push(correlation);
583            }
584        }
585
586        Ok(Array1::from_vec(correlations))
587    }
588
589    fn pearson_correlation_internal(
590        &self,
591        x: &ArrayView1<f64>,
592        y: &ArrayView1<f64>,
593    ) -> Result<f64> {
594        if x.len() != y.len() {
595            return Err(TransformError::InvalidInput(
596                "Arrays must have the same length for correlation calculation".to_string(),
597            ));
598        }
599
600        if x.len() < 2 {
601            return Ok(0.0);
602        }
603
604        let _n = x.len() as f64;
605        let mean_x = x.mean_or(0.0);
606        let mean_y = y.mean_or(0.0);
607
608        let numerator: f64 = x
609            .iter()
610            .zip(y.iter())
611            .map(|(&xi, &yi)| (xi - mean_x) * (yi - mean_y))
612            .sum();
613
614        let sum_sq_x: f64 = x.iter().map(|&xi| (xi - mean_x).powi(2)).sum();
615        let sum_sq_y: f64 = y.iter().map(|&yi| (yi - mean_y).powi(2)).sum();
616
617        let denominator = (sum_sq_x * sum_sq_y).sqrt();
618
619        if denominator < f64::EPSILON {
620            Ok(0.0)
621        } else {
622            let correlation = numerator / denominator;
623            // Clamp to valid correlation range due to numerical precision
624            Ok(correlation.clamp(-1.0, 1.0))
625        }
626    }
627
628    fn compute_distribution_stats(&self, x: &ArrayView2<f64>) -> Result<(f64, f64)> {
629        let mut skewness_values = Vec::new();
630        let mut kurtosis_values = Vec::new();
631
632        for col in x.columns() {
633            // Filter out non-finite values
634            let finite_values: Vec<f64> = col
635                .iter()
636                .filter(|&&val| val.is_finite())
637                .copied()
638                .collect();
639
640            if finite_values.len() < 3 {
641                continue; // Need at least 3 values for meaningful skewness/kurtosis
642            }
643
644            let n = finite_values.len() as f64;
645            let mean = finite_values.iter().sum::<f64>() / n;
646
647            // Calculate variance using more numerically stable method
648            let variance = finite_values
649                .iter()
650                .map(|&val| (val - mean).powi(2))
651                .sum::<f64>()
652                / (n - 1.0); // Sample variance
653
654            let std = variance.sqrt();
655
656            if std > f64::EPSILON * 1000.0 {
657                // More robust threshold
658                // Sample skewness with bias correction
659                let m3: f64 = finite_values
660                    .iter()
661                    .map(|&val| ((val - mean) / std).powi(3))
662                    .sum::<f64>()
663                    / n;
664
665                let skew = if n > 2.0 {
666                    m3 * (n * (n - 1.0)).sqrt() / (n - 2.0) // Bias-corrected skewness
667                } else {
668                    m3
669                };
670
671                // Sample kurtosis with bias correction
672                let m4: f64 = finite_values
673                    .iter()
674                    .map(|&val| ((val - mean) / std).powi(4))
675                    .sum::<f64>()
676                    / n;
677
678                let kurt = if n > 3.0 {
679                    // Bias-corrected excess kurtosis
680                    let numerator = (n - 1.0) * ((n + 1.0) * m4 - 3.0 * (n - 1.0));
681                    let denominator = (n - 2.0) * (n - 3.0);
682                    numerator / denominator
683                } else {
684                    m4 - 3.0 // Simple excess kurtosis
685                };
686
687                // Clamp to reasonable ranges to avoid extreme outliers
688                skewness_values.push(skew.clamp(-20.0, 20.0));
689                kurtosis_values.push(kurt.clamp(-20.0, 20.0));
690            }
691        }
692
693        let mean_skewness = if skewness_values.is_empty() {
694            0.0
695        } else {
696            skewness_values.iter().sum::<f64>() / skewness_values.len() as f64
697        };
698
699        let mean_kurtosis = if kurtosis_values.is_empty() {
700            0.0
701        } else {
702            kurtosis_values.iter().sum::<f64>() / kurtosis_values.len() as f64
703        };
704
705        Ok((mean_skewness, mean_kurtosis))
706    }
707
708    fn compute_outlier_ratio(&self, x: &ArrayView2<f64>) -> Result<f64> {
709        let mut total_outliers = 0;
710        let mut total_values = 0;
711
712        for col in x.columns() {
713            let mut sorted_col: Vec<f64> = col
714                .iter()
715                .filter(|&&val| val.is_finite())
716                .copied()
717                .collect();
718
719            if sorted_col.is_empty() {
720                continue;
721            }
722
723            sorted_col.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
724
725            let n = sorted_col.len();
726            if n < 4 {
727                continue;
728            }
729
730            // Use proper quartile calculation
731            let q1_idx = (n as f64 * 0.25) as usize;
732            let q3_idx = (n as f64 * 0.75) as usize;
733            let q1 = sorted_col[q1_idx.min(n - 1)];
734            let q3 = sorted_col[q3_idx.min(n - 1)];
735
736            let iqr = q3 - q1;
737
738            // Avoid division by zero or very small IQR
739            if iqr < f64::EPSILON {
740                continue;
741            }
742
743            let lower_bound = q1 - 1.5 * iqr;
744            let upper_bound = q3 + 1.5 * iqr;
745
746            let outliers = col
747                .iter()
748                .filter(|&&val| val.is_finite() && (val < lower_bound || val > upper_bound))
749                .count();
750
751            total_outliers += outliers;
752            total_values += col.len();
753        }
754
755        if total_values == 0 {
756            Ok(0.0)
757        } else {
758            Ok(total_outliers as f64 / total_values as f64)
759        }
760    }
761}
762
763/// Advanced meta-learning system with deep learning and reinforcement learning
764#[cfg(feature = "auto-feature-engineering")]
765pub struct AdvancedMetaLearningSystem {
766    /// Deep neural network for meta-learning
767    deep_model: nn::Sequential,
768    /// Transformer model for sequence-based recommendations
769    transformer_model: nn::Sequential,
770    /// Reinforcement learning agent for transformation selection
771    rl_agent: Option<RLAgent>,
772    /// Device for computation
773    device: Device,
774    /// Historical performance database
775    performance_db: Vec<PerformanceRecord>,
776    /// Multi-objective optimization weights
777    optimization_weights: FeatureOptimizationWeights,
778    /// Transfer learning cache
779    transfer_cache: HashMap<String, Tensor>,
780}
781
782/// Reinforcement learning agent for transformation selection
783#[cfg(feature = "auto-feature-engineering")]
784pub struct RLAgent {
785    /// Q-network for value estimation
786    q_network: nn::Sequential,
787    /// Target network for stable training
788    target_network: nn::Sequential,
789    /// Experience replay buffer
790    replay_buffer: VecDeque<Experience>,
791    /// Epsilon for exploration
792    epsilon: f64,
793    /// Learning rate
794    learning_rate: f64,
795    /// Discount factor
796    gamma: f64,
797}
798
799/// Experience tuple for reinforcement learning
800#[cfg(feature = "auto-feature-engineering")]
801#[derive(Debug, Clone)]
802pub struct Experience {
803    /// State representation (meta-features)
804    state: Vec<f64>,
805    /// Action taken (transformation choice)
806    action: usize,
807    /// Reward received (performance improvement)
808    reward: f64,
809    /// Next state
810    next_state: Vec<f64>,
811    /// Whether episode terminated
812    done: bool,
813}
814
815/// Performance record for historical analysis
816#[cfg(feature = "auto-feature-engineering")]
817#[derive(Debug, Clone)]
818pub struct PerformanceRecord {
819    /// Dataset meta-features
820    meta_features: DatasetMetaFeatures,
821    /// Applied transformations
822    transformations: Vec<TransformationConfig>,
823    /// Performance metrics
824    metrics: PerformanceMetrics,
825    /// Computational cost
826    computational_cost: f64,
827    /// Timestamp
828    timestamp: u64,
829}
830
831/// Multi-objective optimization weights
832#[cfg(feature = "auto-feature-engineering")]
833#[derive(Debug, Clone)]
834pub struct FeatureOptimizationWeights {
835    /// Weight for prediction performance
836    performance_weight: f64,
837    /// Weight for computational efficiency
838    efficiency_weight: f64,
839    /// Weight for model interpretability
840    interpretability_weight: f64,
841    /// Weight for robustness
842    robustness_weight: f64,
843}
844
845#[cfg(feature = "auto-feature-engineering")]
846impl Default for FeatureOptimizationWeights {
847    fn default() -> Self {
848        FeatureOptimizationWeights {
849            performance_weight: 0.5,
850            efficiency_weight: 0.3,
851            interpretability_weight: 0.1,
852            robustness_weight: 0.1,
853        }
854    }
855}
856
857/// Performance metrics for multi-objective optimization
858#[cfg(feature = "auto-feature-engineering")]
859#[derive(Debug, Clone)]
860pub struct PerformanceMetrics {
861    /// Prediction accuracy/score
862    accuracy: f64,
863    /// Training time in seconds
864    training_time: f64,
865    /// Memory usage in MB
866    memory_usage: f64,
867    /// Model complexity score
868    complexity_score: f64,
869    /// Cross-validation score
870    cv_score: f64,
871}
872
873/// Enhanced meta-features with advanced statistical measures
874#[cfg(feature = "auto-feature-engineering")]
875#[derive(Debug, Clone)]
876pub struct EnhancedMetaFeatures {
877    /// Base meta-features
878    pub base_features: DatasetMetaFeatures,
879    /// Estimated intrinsic dimension
880    pub manifold_dimension: f64,
881    /// Hopkins statistic for clustering tendency
882    pub clustering_tendency: f64,
883    /// Average mutual information between features
884    pub mutual_information_mean: f64,
885    /// Differential entropy estimate
886    pub entropy_estimate: f64,
887    /// Condition number estimate
888    pub condition_number: f64,
889    /// Volume ratio (convex hull to bounding box)
890    pub volume_ratio: f64,
891    /// Autocorrelation coefficient
892    pub autocorrelation: f64,
893    /// Trend strength
894    pub trend_strength: f64,
895    /// Feature connectivity
896    pub connectivity: f64,
897    /// Feature clustering coefficient
898    pub clustering_coefficient: f64,
899}
900
901/// Multi-objective recommendation with performance trade-offs
902#[cfg(feature = "auto-feature-engineering")]
903#[derive(Debug, Clone)]
904pub struct MultiObjectiveRecommendation {
905    /// Transformation configuration
906    pub transformation: TransformationConfig,
907    /// Expected performance score
908    pub performance_score: f64,
909    /// Computational efficiency score
910    pub efficiency_score: f64,
911    /// Interpretability score
912    pub interpretability_score: f64,
913    /// Robustness score
914    pub robustness_score: f64,
915    /// Overall multi-objective score
916    pub overall_score: f64,
917}
918
919#[cfg(feature = "auto-feature-engineering")]
920impl AdvancedMetaLearningSystem {
921    /// Create a new advanced meta-learning system
922    pub fn new() -> Result<Self> {
923        let device = Device::cuda_if_available();
924        let vs = nn::VarStore::new(device);
925        let root = vs.root();
926
927        // Build deep neural network with advanced architecture
928        let deep_model = nn::seq()
929            .add(nn::linear(
930                &root / "deep_layer1",
931                20,
932                128,
933                Default::default(),
934            ))
935            .add_fn(|xs| xs.relu())
936            .add_fn(|xs| xs.dropout(0.3, false))
937            .add(nn::linear(
938                &root / "deep_layer2",
939                128,
940                256,
941                Default::default(),
942            ))
943            .add_fn(|xs| xs.relu())
944            .add(nn::linear(
945                &root / "deep_layer3",
946                256,
947                128,
948                Default::default(),
949            ))
950            .add_fn(|xs| xs.relu())
951            .add_fn(|xs| xs.dropout(0.3, false))
952            .add(nn::linear(
953                &root / "deep_layer4",
954                128,
955                64,
956                Default::default(),
957            ))
958            .add_fn(|xs| xs.relu())
959            .add(nn::linear(
960                &root / "deep_output",
961                64,
962                20,
963                Default::default(),
964            ))
965            .add_fn(|xs| xs.softmax(-1, tch::Kind::Float));
966
967        // Build transformer model for sequence-based recommendations
968        let transformer_model = nn::seq()
969            .add(nn::linear(&root / "trans_embed", 20, 256, Default::default()))
970            // Note: Actual transformer layers would be implemented here
971            .add(nn::linear(&root / "trans_layer1", 256, 256, Default::default()))
972            .add_fn(|xs| xs.relu())
973            .add(nn::linear(&root / "trans_layer2", 256, 128, Default::default()))
974            .add_fn(|xs| xs.relu())
975            .add(nn::linear(&root / "trans_output", 128, 20, Default::default()))
976            .add_fn(|xs| xs.softmax(-1, tch::Kind::Float));
977
978        Ok(AdvancedMetaLearningSystem {
979            deep_model,
980            transformer_model,
981            rl_agent: None,
982            device,
983            performance_db: Vec::new(),
984            optimization_weights: FeatureOptimizationWeights::default(),
985            transfer_cache: HashMap::new(),
986        })
987    }
988
989    /// Initialize reinforcement learning agent
990    pub fn initialize_rl_agent(&mut self) -> Result<()> {
991        let vs = nn::VarStore::new(self.device);
992        let root = vs.root();
993
994        // Q-network architecture
995        let q_network = nn::seq()
996            .add(nn::linear(&root / "q_layer1", 20, 128, Default::default()))
997            .add_fn(|xs| xs.relu())
998            .add(nn::linear(&root / "q_layer2", 128, 256, Default::default()))
999            .add_fn(|xs| xs.relu())
1000            .add(nn::linear(&root / "q_layer3", 256, 128, Default::default()))
1001            .add_fn(|xs| xs.relu())
1002            .add(nn::linear(&root / "q_output", 128, 20, Default::default())); // 20 possible transformations
1003
1004        // Target network (copy of Q-network)
1005        let target_vs = nn::VarStore::new(self.device);
1006        let target_root = target_vs.root();
1007        let target_network = nn::seq()
1008            .add(nn::linear(
1009                &target_root / "target_layer1",
1010                20,
1011                128,
1012                Default::default(),
1013            ))
1014            .add_fn(|xs| xs.relu())
1015            .add(nn::linear(
1016                &target_root / "target_layer2",
1017                128,
1018                256,
1019                Default::default(),
1020            ))
1021            .add_fn(|xs| xs.relu())
1022            .add(nn::linear(
1023                &target_root / "target_layer3",
1024                256,
1025                128,
1026                Default::default(),
1027            ))
1028            .add_fn(|xs| xs.relu())
1029            .add(nn::linear(
1030                &target_root / "target_output",
1031                128,
1032                20,
1033                Default::default(),
1034            ));
1035
1036        self.rl_agent = Some(RLAgent {
1037            q_network,
1038            target_network,
1039            replay_buffer: VecDeque::with_capacity(10000),
1040            epsilon: 0.1,
1041            learning_rate: 0.001,
1042            gamma: 0.99,
1043        });
1044
1045        Ok(())
1046    }
1047
1048    /// Enhanced meta-feature extraction with advanced statistical measures
1049    pub fn extract_enhanced_meta_features(
1050        &self,
1051        x: &ArrayView2<f64>,
1052    ) -> Result<EnhancedMetaFeatures> {
1053        let auto_engineer = AutoFeatureEngineer::new()?;
1054        let base_features = auto_engineer.extract_meta_features(x)?;
1055
1056        // Extract additional advanced meta-features
1057        let _n_samples_n_features = x.dim();
1058
1059        // Topological features
1060        let manifold_dimension = self.estimate_intrinsic_dimension(x)?;
1061        let clustering_tendency = self.hopkins_statistic(x)?;
1062
1063        // Information-theoretic features
1064        let mutual_information_mean = self.average_mutual_information(x)?;
1065        let entropy_estimate = self.differential_entropy_estimate(x)?;
1066
1067        // Geometric features
1068        let condition_number = self.estimate_condition_number(x)?;
1069        let volume_ratio = self.estimate_volume_ratio(x)?;
1070
1071        // Temporal features (if applicable)
1072        let autocorrelation = self.estimate_autocorrelation(x)?;
1073        let trend_strength = self.estimate_trend_strength(x)?;
1074
1075        // Network/graph features
1076        let connectivity = self.estimate_feature_connectivity(x)?;
1077        let clustering_coefficient = self.feature_clustering_coefficient(x)?;
1078
1079        Ok(EnhancedMetaFeatures {
1080            base_features,
1081            manifold_dimension,
1082            clustering_tendency,
1083            mutual_information_mean,
1084            entropy_estimate,
1085            condition_number,
1086            volume_ratio,
1087            autocorrelation,
1088            trend_strength,
1089            connectivity,
1090            clustering_coefficient,
1091        })
1092    }
1093
1094    /// Multi-objective transformation recommendation
1095    pub fn recommend_multi_objective_transformations(
1096        &self,
1097        meta_features: &EnhancedMetaFeatures,
1098    ) -> Result<Vec<MultiObjectiveRecommendation>> {
1099        // Get base recommendations from deep model
1100        let deep_input = self.enhanced_meta_features_to_tensor(meta_features)?;
1101        let deep_predictions = deep_input.apply(&self.deep_model);
1102
1103        // Get sequence-based recommendations from transformer
1104        let transformer_predictions = deep_input.apply(&self.transformer_model);
1105
1106        // Combine predictions using ensemble weighting
1107        let ensemble_predictions = (&deep_predictions * 0.6) + (&transformer_predictions * 0.4);
1108
1109        // Apply reinforcement learning if available
1110        let final_predictions = if let Some(ref rl_agent) = self.rl_agent {
1111            let rl_q_values = deep_input.apply(&rl_agent.q_network);
1112            let rl_softmax = rl_q_values.softmax(-1, tch::Kind::Float);
1113            (&ensemble_predictions * 0.7) + (&rl_softmax * 0.3)
1114        } else {
1115            ensemble_predictions
1116        };
1117
1118        // Convert to multi-objective recommendations
1119        self.tensor_to_multi_objective_recommendations(&final_predictions, meta_features)
1120    }
1121
1122    /// Transfer learning from similar datasets
1123    pub fn apply_transfer_learning(
1124        &mut self,
1125        target_meta_features: &EnhancedMetaFeatures,
1126    ) -> Result<Vec<TransformationConfig>> {
1127        // Find similar datasets in performance database
1128        let similar_records = self.find_similar_datasets(target_meta_features, 5)?;
1129
1130        if similar_records.is_empty() {
1131            return self.fallback_recommendations(target_meta_features);
1132        }
1133
1134        // Extract successful transformations from similar datasets
1135        let mut transformation_votes: HashMap<TransformationType, (f64, usize)> = HashMap::new();
1136
1137        for record in &similar_records {
1138            let similarity =
1139                self.compute_dataset_similarity(target_meta_features, &record.meta_features)?;
1140
1141            for transformation in &record.transformations {
1142                let performance_score = record.metrics.accuracy * similarity;
1143                let entry = transformation_votes
1144                    .entry(transformation.transformation_type.clone())
1145                    .or_insert((0.0, 0));
1146                entry.0 += performance_score;
1147                entry.1 += 1;
1148            }
1149        }
1150
1151        // Rank transformations by weighted performance
1152        let mut ranked_transformations: Vec<_> = transformation_votes
1153            .into_iter()
1154            .map(|(t_type, (total_score, count))| (t_type, total_score / count as f64))
1155            .collect();
1156
1157        ranked_transformations
1158            .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1159
1160        // Convert to transformation configs
1161        let mut recommendations = Vec::new();
1162        for (t_type, score) in ranked_transformations.into_iter().take(5) {
1163            recommendations.push(TransformationConfig {
1164                transformation_type: t_type.clone(),
1165                parameters: self
1166                    .get_optimized_parameters_for_type(&t_type, target_meta_features)?,
1167                expected_performance: score.min(1.0).max(0.0),
1168            });
1169        }
1170
1171        Ok(recommendations)
1172    }
1173
1174    // Helper methods for advanced meta-feature extraction
1175    fn estimate_intrinsic_dimension(&self, x: &ArrayView2<f64>) -> Result<f64> {
1176        // Simplified intrinsic dimension estimation using correlation dimension
1177        let n_samples = x.nrows();
1178        if n_samples < 10 {
1179            return Ok(1.0);
1180        }
1181
1182        // Sample random points and compute distances
1183        use scirs2_core::random::Rng;
1184        let mut rng = scirs2_core::random::rng();
1185        let sample_size = 100.min(n_samples);
1186        let mut distances = Vec::new();
1187
1188        for _ in 0..sample_size {
1189            let i = rng.gen_range(0..n_samples);
1190            let j = rng.gen_range(0..n_samples);
1191            if i != j {
1192                let dist = self.euclidean_distance(&x.row(i), &x.row(j));
1193                distances.push(dist);
1194            }
1195        }
1196
1197        // Estimate dimension using correlation dimension approach
1198        distances.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1199
1200        if distances.is_empty() || distances[0] == 0.0 {
1201            return Ok(1.0);
1202        }
1203
1204        // Count pairs within different distance thresholds
1205        let thresholds = [0.1, 0.2, 0.5, 1.0];
1206        let mut dimension_estimates = Vec::new();
1207
1208        for &threshold in &thresholds {
1209            let count = distances.iter().filter(|&&d| d < threshold).count();
1210            if count > 1 {
1211                let correlation_sum = (count as f64).ln();
1212                let threshold_ln = threshold.ln();
1213                if threshold_ln != 0.0 {
1214                    dimension_estimates.push(correlation_sum / threshold_ln);
1215                }
1216            }
1217        }
1218
1219        let avg_dimension = if dimension_estimates.is_empty() {
1220            1.0
1221        } else {
1222            dimension_estimates.iter().sum::<f64>() / dimension_estimates.len() as f64
1223        };
1224
1225        Ok(avg_dimension.max(1.0).min(x.ncols() as f64))
1226    }
1227
1228    fn hopkins_statistic(&self, x: &ArrayView2<f64>) -> Result<f64> {
1229        // Hopkins statistic for clustering tendency
1230        let (n_samples, n_features) = x.dim();
1231        if n_samples < 10 {
1232            return Ok(0.5); // Neutral value
1233        }
1234
1235        use scirs2_core::random::Rng;
1236        let mut rng = scirs2_core::random::rng();
1237        let sample_size = 10.min(n_samples / 2);
1238
1239        // Generate random points in the data space
1240        let mut min_vals = vec![f64::INFINITY; n_features];
1241        let mut max_vals = vec![f64::NEG_INFINITY; n_features];
1242
1243        for row in x.rows() {
1244            for (j, &val) in row.iter().enumerate() {
1245                min_vals[j] = min_vals[j].min(val);
1246                max_vals[j] = max_vals[j].max(val);
1247            }
1248        }
1249
1250        let mut u_distances = Vec::new();
1251        let mut w_distances = Vec::new();
1252
1253        // Sample random points and compute distances
1254        for _ in 0..sample_size {
1255            // Random point in data space
1256            let mut random_point = vec![0.0; n_features];
1257            for j in 0..n_features {
1258                random_point[j] = rng.gen_range(min_vals[j]..=max_vals[j]);
1259            }
1260
1261            // Find nearest neighbor distance for random point
1262            let mut min_dist_u = f64::INFINITY;
1263            for row in x.rows() {
1264                let dist = self.euclidean_distance_vec(&random_point, &row.to_vec());
1265                min_dist_u = min_dist_u.min(dist);
1266            }
1267            u_distances.push(min_dist_u);
1268
1269            // Random data point
1270            let random_idx = rng.gen_range(0..n_samples);
1271            let data_point = x.row(random_idx).to_vec();
1272
1273            // Find nearest neighbor distance for data point
1274            let mut min_dist_w = f64::INFINITY;
1275            for (i, row) in x.rows().into_iter().enumerate() {
1276                if i != random_idx {
1277                    let dist = self.euclidean_distance_vec(&data_point, &row.to_vec());
1278                    min_dist_w = min_dist_w.min(dist);
1279                }
1280            }
1281            w_distances.push(min_dist_w);
1282        }
1283
1284        let sum_u: f64 = u_distances.iter().sum();
1285        let sum_w: f64 = w_distances.iter().sum();
1286
1287        if sum_u + sum_w == 0.0 {
1288            Ok(0.5)
1289        } else {
1290            Ok(sum_u / (sum_u + sum_w))
1291        }
1292    }
1293
1294    fn average_mutual_information(&self, x: &ArrayView2<f64>) -> Result<f64> {
1295        let (_, n_features) = x.dim();
1296        if n_features < 2 {
1297            return Ok(0.0);
1298        }
1299
1300        let mut mi_sum = 0.0;
1301        let mut pair_count = 0;
1302
1303        // Sample pairs of features to avoid O(n²) complexity
1304        use scirs2_core::random::Rng;
1305        let mut rng = scirs2_core::random::rng();
1306        let max_pairs = 50.min((n_features * (n_features - 1)) / 2);
1307
1308        for _ in 0..max_pairs {
1309            let i = rng.gen_range(0..n_features);
1310            let j = rng.gen_range(0..n_features);
1311            if i != j {
1312                let mi = self.estimate_mutual_information(&x.column(i), &x.column(j))?;
1313                mi_sum += mi;
1314                pair_count += 1;
1315            }
1316        }
1317
1318        Ok(if pair_count > 0 {
1319            mi_sum / pair_count as f64
1320        } else {
1321            0.0
1322        })
1323    }
1324
1325    fn estimate_mutual_information(
1326        &self,
1327        x: &scirs2_core::ndarray::ArrayView1<f64>,
1328        y: &scirs2_core::ndarray::ArrayView1<f64>,
1329    ) -> Result<f64> {
1330        // Simplified MI estimation using binning
1331        let n_bins = 10;
1332        let x_bins = self.create_bins(x, n_bins);
1333        let y_bins = self.create_bins(y, n_bins);
1334
1335        // Create joint histogram
1336        let mut joint_hist = vec![vec![0; n_bins]; n_bins];
1337        let mut x_hist = vec![0; n_bins];
1338        let mut y_hist = vec![0; n_bins];
1339
1340        for (&xi, &yi) in x.iter().zip(y.iter()) {
1341            if xi.is_finite() && yi.is_finite() {
1342                let x_bin = self.find_bin(xi, &x_bins).min(n_bins - 1);
1343                let y_bin = self.find_bin(yi, &y_bins).min(n_bins - 1);
1344                joint_hist[x_bin][y_bin] += 1;
1345                x_hist[x_bin] += 1;
1346                y_hist[y_bin] += 1;
1347            }
1348        }
1349
1350        let total = x.len() as f64;
1351        let mut mi = 0.0;
1352
1353        for i in 0..n_bins {
1354            for j in 0..n_bins {
1355                let p_xy = joint_hist[i][j] as f64 / total;
1356                let p_x = x_hist[i] as f64 / total;
1357                let p_y = y_hist[j] as f64 / total;
1358
1359                if p_xy > 0.0 && p_x > 0.0 && p_y > 0.0 {
1360                    mi += p_xy * (p_xy / (p_x * p_y)).ln();
1361                }
1362            }
1363        }
1364
1365        Ok(mi.max(0.0))
1366    }
1367
1368    fn differential_entropy_estimate(&self, x: &ArrayView2<f64>) -> Result<f64> {
1369        // Simplified differential entropy estimate
1370        let (n_samples, n_features) = x.dim();
1371        if n_samples < 2 {
1372            return Ok(0.0);
1373        }
1374
1375        let mut entropy_sum = 0.0;
1376        for col in x.columns() {
1377            let variance = col.variance();
1378            if variance > 0.0 {
1379                // Gaussian entropy: 0.5 * log(2πeσ²)
1380                entropy_sum +=
1381                    0.5 * (2.0 * std::f64::consts::PI * std::f64::consts::E * variance).ln();
1382            }
1383        }
1384
1385        Ok(entropy_sum / n_features as f64)
1386    }
1387
1388    fn estimate_condition_number(&self, x: &ArrayView2<f64>) -> Result<f64> {
1389        // Simplified condition number estimation
1390        let (n_samples, n_features) = x.dim();
1391        if n_samples < n_features || n_features < 2 {
1392            return Ok(1.0);
1393        }
1394
1395        // Compute correlation matrix
1396        let mut corr_sum = 0.0;
1397        let mut corr_count = 0;
1398
1399        for i in 0..n_features {
1400            for j in (i + 1)..n_features {
1401                let col_i = x.column(i);
1402                let col_j = x.column(j);
1403                if let Ok(corr) = self.quick_correlation(&col_i, &col_j) {
1404                    corr_sum += corr.abs();
1405                    corr_count += 1;
1406                }
1407            }
1408        }
1409
1410        let avg_correlation = if corr_count > 0 {
1411            corr_sum / corr_count as f64
1412        } else {
1413            0.0
1414        };
1415
1416        // Approximate condition number based on correlation
1417        Ok(if avg_correlation > 0.9 {
1418            100.0 // High condition number
1419        } else if avg_correlation > 0.7 {
1420            10.0 // Medium condition number
1421        } else {
1422            1.0 // Low condition number
1423        })
1424    }
1425
1426    // Additional helper methods
1427    fn euclidean_distance(
1428        &self,
1429        a: &scirs2_core::ndarray::ArrayView1<f64>,
1430        b: &scirs2_core::ndarray::ArrayView1<f64>,
1431    ) -> f64 {
1432        a.iter()
1433            .zip(b.iter())
1434            .map(|(&ai, &bi)| (ai - bi).powi(2))
1435            .sum::<f64>()
1436            .sqrt()
1437    }
1438
1439    fn euclidean_distance_vec(&self, a: &[f64], b: &[f64]) -> f64 {
1440        a.iter()
1441            .zip(b.iter())
1442            .map(|(&ai, &bi)| (ai - bi).powi(2))
1443            .sum::<f64>()
1444            .sqrt()
1445    }
1446
1447    fn create_bins(&self, data: &scirs2_core::ndarray::ArrayView1<f64>, n_bins: usize) -> Vec<f64> {
1448        let mut sorted: Vec<f64> = data.iter().filter(|&&x| x.is_finite()).copied().collect();
1449        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1450
1451        if sorted.is_empty() {
1452            return vec![0.0; n_bins + 1];
1453        }
1454
1455        let mut bins = Vec::new();
1456        for i in 0..=n_bins {
1457            let idx = (i * (sorted.len() - 1)) / n_bins;
1458            bins.push(sorted[idx]);
1459        }
1460        bins
1461    }
1462
1463    fn find_bin(&self, value: f64, bins: &[f64]) -> usize {
1464        for (i, &bin_edge) in bins.iter().enumerate().take(bins.len() - 1) {
1465            if value <= bin_edge {
1466                return i;
1467            }
1468        }
1469        bins.len() - 2
1470    }
1471
1472    /// Estimate volume ratio (convex hull to bounding box)
1473    fn estimate_volume_ratio(&self, x: &ArrayView2<f64>) -> Result<f64> {
1474        let (n_samples, n_features) = x.dim();
1475        if n_samples < 4 || n_features < 2 {
1476            return Ok(1.0); // Default for insufficient data
1477        }
1478
1479        // For high-dimensional data, use sampling approach
1480        let sample_size = 1000.min(n_samples);
1481
1482        let mut rng = scirs2_core::random::rng();
1483        let mut all_indices: Vec<usize> = (0..n_samples).collect();
1484        all_indices.shuffle(&mut rng);
1485        let indices: Vec<usize> = all_indices.into_iter().take(sample_size).collect();
1486
1487        // Calculate bounding box volume
1488        let mut min_vals = vec![f64::INFINITY; n_features];
1489        let mut max_vals = vec![f64::NEG_INFINITY; n_features];
1490
1491        for &idx in &indices {
1492            let row = x.row(idx);
1493            for (j, &val) in row.iter().enumerate() {
1494                if val.is_finite() {
1495                    min_vals[j] = min_vals[j].min(val);
1496                    max_vals[j] = max_vals[j].max(val);
1497                }
1498            }
1499        }
1500
1501        // Calculate bounding box volume
1502        let mut box_volume = 1.0;
1503        for j in 0..n_features {
1504            let range = max_vals[j] - min_vals[j];
1505            if range > f64::EPSILON {
1506                box_volume *= range;
1507            } else {
1508                return Ok(0.0); // Degenerate case
1509            }
1510        }
1511
1512        // Estimate convex hull volume using sampling (simplified approach)
1513        // For a proper implementation, you'd use a convex hull algorithm
1514        // Here we estimate using variance-based approximation
1515        let mut variance_product = 1.0;
1516        for j in 0..n_features {
1517            let col_values: Vec<f64> = indices
1518                .iter()
1519                .map(|&idx| x[[idx, j]])
1520                .filter(|&val| val.is_finite())
1521                .collect();
1522
1523            if col_values.len() > 1 {
1524                let mean = col_values.iter().sum::<f64>() / col_values.len() as f64;
1525                let variance = col_values
1526                    .iter()
1527                    .map(|&val| (val - mean).powi(2))
1528                    .sum::<f64>()
1529                    / (col_values.len() - 1) as f64;
1530                variance_product *= variance.sqrt();
1531            }
1532        }
1533
1534        // Approximate volume ratio
1535        if box_volume > f64::EPSILON {
1536            let ratio = (variance_product / box_volume).min(1.0).max(0.0);
1537            Ok(ratio)
1538        } else {
1539            Ok(0.0)
1540        }
1541    }
1542
1543    /// Estimate autocorrelation for time-like patterns
1544    fn estimate_autocorrelation(&self, x: &ArrayView2<f64>) -> Result<f64> {
1545        let (n_samples, n_features) = x.dim();
1546        if n_samples < 3 {
1547            return Ok(0.0);
1548        }
1549
1550        let mut autocorr_sum = 0.0;
1551        let mut feature_count = 0;
1552
1553        // Calculate autocorrelation for each feature
1554        for j in 0..n_features {
1555            let col = x.column(j);
1556            let values: Vec<f64> = col
1557                .iter()
1558                .filter(|&&val| val.is_finite())
1559                .copied()
1560                .collect();
1561
1562            if values.len() < 3 {
1563                continue;
1564            }
1565
1566            // Calculate lag-1 autocorrelation
1567            let mean = values.iter().sum::<f64>() / values.len() as f64;
1568            let mut numerator = 0.0;
1569            let mut denominator = 0.0;
1570
1571            for i in 0..values.len() - 1 {
1572                numerator += (values[i] - mean) * (values[i + 1] - mean);
1573            }
1574
1575            for &val in &values {
1576                denominator += (val - mean).powi(2);
1577            }
1578
1579            if denominator > f64::EPSILON {
1580                autocorr_sum += numerator / denominator;
1581                feature_count += 1;
1582            }
1583        }
1584
1585        if feature_count > 0 {
1586            Ok((autocorr_sum / feature_count as f64).abs())
1587        } else {
1588            Ok(0.0)
1589        }
1590    }
1591
1592    /// Estimate trend strength in the data
1593    fn estimate_trend_strength(&self, x: &ArrayView2<f64>) -> Result<f64> {
1594        let (n_samples, n_features) = x.dim();
1595        if n_samples < 5 {
1596            return Ok(0.0);
1597        }
1598
1599        let mut trend_sum = 0.0;
1600        let mut feature_count = 0;
1601
1602        // Calculate trend strength for each feature
1603        for j in 0..n_features {
1604            let col = x.column(j);
1605            let values: Vec<(f64, f64)> = col
1606                .iter()
1607                .enumerate()
1608                .filter(|(_, val)| val.is_finite())
1609                .map(|(i, val)| (i as f64, *val))
1610                .collect();
1611
1612            if values.len() < 5 {
1613                continue;
1614            }
1615
1616            // Calculate linear trend using least squares
1617            let n = values.len() as f64;
1618            let sum_x: f64 = values.iter().map(|(x, _)| x).sum();
1619            let sum_y: f64 = values.iter().map(|(_, y)| y).sum();
1620            let sum_xy: f64 = values.iter().map(|(x, y)| x * y).sum();
1621            let sum_x2: f64 = values.iter().map(|(x, _)| x * x).sum();
1622
1623            let denominator = n * sum_x2 - sum_x * sum_x;
1624            if denominator.abs() > f64::EPSILON {
1625                let slope = (n * sum_xy - sum_x * sum_y) / denominator;
1626                let intercept = (sum_y - slope * sum_x) / n;
1627
1628                // Calculate R-squared to measure trend strength
1629                let y_mean = sum_y / n;
1630                let mut ss_tot = 0.0;
1631                let mut ss_res = 0.0;
1632
1633                for (x_val, y_val) in &values {
1634                    let y_pred = slope * x_val + intercept;
1635                    ss_tot += (y_val - y_mean).powi(2);
1636                    ss_res += (y_val - y_pred).powi(2);
1637                }
1638
1639                if ss_tot > f64::EPSILON {
1640                    let r_squared = 1.0 - (ss_res / ss_tot);
1641                    trend_sum += r_squared.max(0.0);
1642                    feature_count += 1;
1643                }
1644            }
1645        }
1646
1647        if feature_count > 0 {
1648            Ok(trend_sum / feature_count as f64)
1649        } else {
1650            Ok(0.0)
1651        }
1652    }
1653
1654    /// Estimate feature connectivity (correlation-based)
1655    fn estimate_feature_connectivity(&self, x: &ArrayView2<f64>) -> Result<f64> {
1656        let (_, n_features) = x.dim();
1657        if n_features < 2 {
1658            return Ok(0.0);
1659        }
1660
1661        let mut strong_connections = 0;
1662        let mut total_connections = 0;
1663        let threshold = 0.5; // Threshold for "strong" connection
1664
1665        // Sample pairs to avoid O(n²) complexity for large feature sets
1666        let max_pairs = 100.min((n_features * (n_features - 1)) / 2);
1667        use scirs2_core::random::Rng;
1668        let mut rng = scirs2_core::random::rng();
1669
1670        for _ in 0..max_pairs {
1671            let i = rng.gen_range(0..n_features);
1672            let j = rng.gen_range(0..n_features);
1673
1674            if i != j {
1675                let col_i = x.column(i);
1676                let col_j = x.column(j);
1677
1678                if let Ok(corr) = self.quick_correlation(&col_i, &col_j) {
1679                    if corr.abs() > threshold {
1680                        strong_connections += 1;
1681                    }
1682                    total_connections += 1;
1683                }
1684            }
1685        }
1686
1687        if total_connections > 0 {
1688            Ok(strong_connections as f64 / total_connections as f64)
1689        } else {
1690            Ok(0.0)
1691        }
1692    }
1693
1694    /// Quick correlation calculation without full validation
1695    fn quick_correlation(
1696        &self,
1697        x: &scirs2_core::ndarray::ArrayView1<f64>,
1698        y: &scirs2_core::ndarray::ArrayView1<f64>,
1699    ) -> Result<f64> {
1700        if x.len() != y.len() || x.len() < 2 {
1701            return Ok(0.0);
1702        }
1703
1704        let n = x.len() as f64;
1705        let mean_x = x.iter().sum::<f64>() / n;
1706        let mean_y = y.iter().sum::<f64>() / n;
1707
1708        let mut numerator = 0.0;
1709        let mut sum_sq_x = 0.0;
1710        let mut sum_sq_y = 0.0;
1711
1712        for (&xi, &yi) in x.iter().zip(y.iter()) {
1713            if xi.is_finite() && yi.is_finite() {
1714                let diff_x = xi - mean_x;
1715                let diff_y = yi - mean_y;
1716                numerator += diff_x * diff_y;
1717                sum_sq_x += diff_x * diff_x;
1718                sum_sq_y += diff_y * diff_y;
1719            }
1720        }
1721
1722        let denominator = (sum_sq_x * sum_sq_y).sqrt();
1723
1724        if denominator < f64::EPSILON {
1725            Ok(0.0)
1726        } else {
1727            let correlation = numerator / denominator;
1728            Ok(correlation.max(-1.0).min(1.0))
1729        }
1730    }
1731
1732    /// Calculate feature clustering coefficient
1733    fn feature_clustering_coefficient(&self, x: &ArrayView2<f64>) -> Result<f64> {
1734        let (_, n_features) = x.dim();
1735        if n_features < 3 {
1736            return Ok(0.0);
1737        }
1738
1739        // Build correlation adjacency matrix (sampled)
1740        let sample_size = 20.min(n_features);
1741
1742        let mut rng = scirs2_core::random::rng();
1743        let mut all_features: Vec<usize> = (0..n_features).collect();
1744        all_features.shuffle(&mut rng);
1745        let sampled_features: Vec<usize> = all_features.into_iter().take(sample_size).collect();
1746
1747        let threshold = 0.5;
1748        let mut adjacency = vec![vec![false; sample_size]; sample_size];
1749
1750        // Build adjacency matrix
1751        for (i, &feat_i) in sampled_features.iter().enumerate() {
1752            for (j, &feat_j) in sampled_features.iter().enumerate() {
1753                if i != j {
1754                    let col_i = x.column(feat_i);
1755                    let col_j = x.column(feat_j);
1756
1757                    if let Ok(corr) = self.quick_correlation(&col_i, &col_j) {
1758                        adjacency[i][j] = corr.abs() > threshold;
1759                    }
1760                }
1761            }
1762        }
1763
1764        // Calculate clustering coefficient
1765        let mut total_coefficient = 0.0;
1766        let mut node_count = 0;
1767
1768        for i in 0..sample_size {
1769            // Find neighbors of node i
1770            let neighbors: Vec<usize> = (0..sample_size).filter(|&j| adjacency[i][j]).collect();
1771
1772            if neighbors.len() >= 2 {
1773                // Count edges between neighbors
1774                let mut edges_between_neighbors = 0;
1775                let mut possible_edges = 0;
1776
1777                for (ni, &neighbor_i) in neighbors.iter().enumerate() {
1778                    for &neighbor_j in neighbors.iter().skip(ni + 1) {
1779                        possible_edges += 1;
1780                        if adjacency[neighbor_i][neighbor_j] {
1781                            edges_between_neighbors += 1;
1782                        }
1783                    }
1784                }
1785
1786                if possible_edges > 0 {
1787                    total_coefficient += edges_between_neighbors as f64 / possible_edges as f64;
1788                    node_count += 1;
1789                }
1790            }
1791        }
1792
1793        if node_count > 0 {
1794            Ok(total_coefficient / node_count as f64)
1795        } else {
1796            Ok(0.0)
1797        }
1798    }
1799
1800    /// Convert enhanced meta-features to tensor for neural network input
1801    fn enhanced_meta_features_to_tensor(&self, features: &EnhancedMetaFeatures) -> Result<Tensor> {
1802        // Create feature vector with proper normalization
1803        let feature_vec = vec![
1804            // Base features (normalized)
1805            (features.base_features.n_samples as f64).ln().max(0.0),
1806            (features.base_features.n_features as f64).ln().max(0.0),
1807            features.base_features.sparsity.max(0.0).min(1.0),
1808            features.base_features.mean_correlation.max(-1.0).min(1.0),
1809            features.base_features.std_correlation.max(0.0),
1810            features.base_features.mean_skewness.max(-10.0).min(10.0),
1811            features.base_features.mean_kurtosis.max(-10.0).min(10.0),
1812            features.base_features.missing_ratio.max(0.0).min(1.0),
1813            features.base_features.variance_ratio.max(0.0),
1814            features.base_features.outlier_ratio.max(0.0).min(1.0),
1815            // Enhanced features (normalized)
1816            features
1817                .manifold_dimension
1818                .max(1.0)
1819                .min(features.base_features.n_features as f64)
1820                .ln(),
1821            features.clustering_tendency.max(0.0).min(1.0),
1822            features.mutual_information_mean.max(0.0),
1823            features.entropy_estimate.max(0.0),
1824            (features.condition_number.max(1.0)).ln(),
1825            features.volume_ratio.max(0.0).min(1.0),
1826            features.autocorrelation.max(-1.0).min(1.0),
1827            features.trend_strength.max(0.0).min(1.0),
1828            features.connectivity.max(0.0).min(1.0),
1829            features.clustering_coefficient.max(0.0).min(1.0),
1830        ];
1831
1832        // Validate all features are finite
1833        if feature_vec.iter().any(|&f| !f.is_finite()) {
1834            return Err(TransformError::ComputationError(
1835                "Non-finite values in enhanced meta-features".to_string(),
1836            ));
1837        }
1838
1839        Ok(Tensor::f_from_slice(&feature_vec)?
1840            .reshape(&[1, 20])
1841            .to_device(self.device))
1842    }
1843
1844    /// Convert tensor predictions to multi-objective recommendations
1845    fn tensor_to_multi_objective_recommendations(
1846        &self,
1847        tensor: &Tensor,
1848        features: &EnhancedMetaFeatures,
1849    ) -> Result<Vec<MultiObjectiveRecommendation>> {
1850        let scores: Vec<f64> = tensor.try_into().map_err(|e| {
1851            TransformError::ComputationError(format!("Failed to extract tensor data: {:?}", e))
1852        })?;
1853
1854        if scores.len() != 20 {
1855            return Err(TransformError::ComputationError(format!(
1856                "Expected 20 prediction scores, got {}",
1857                scores.len()
1858            )));
1859        }
1860
1861        let mut recommendations = Vec::new();
1862
1863        // Map scores to transformations (first 10 are transformation types)
1864        let transformation_types = [
1865            TransformationType::StandardScaler,
1866            TransformationType::MinMaxScaler,
1867            TransformationType::RobustScaler,
1868            TransformationType::PowerTransformer,
1869            TransformationType::PolynomialFeatures,
1870            TransformationType::PCA,
1871            TransformationType::VarianceThreshold,
1872            TransformationType::QuantileTransformer,
1873            TransformationType::BinaryEncoder,
1874            TransformationType::TargetEncoder,
1875        ];
1876
1877        for (i, t_type) in transformation_types.iter().enumerate() {
1878            if i < scores.len() && scores[i].is_finite() && scores[i] > 0.3 {
1879                // Calculate multi-objective scores
1880                let performance_score = scores[i].max(0.0).min(1.0);
1881
1882                // Estimate efficiency based on data characteristics
1883                let efficiency_score = self.estimate_efficiency_score(t_type, features)?;
1884
1885                // Estimate interpretability
1886                let interpretability_score = self.estimate_interpretability_score(t_type);
1887
1888                // Estimate robustness
1889                let robustness_score = self.estimate_robustness_score(t_type, features);
1890
1891                // Calculate overall score using default weights
1892                let weights = FeatureOptimizationWeights::default();
1893                let overall_score = performance_score * weights.performance_weight
1894                    + efficiency_score * weights.efficiency_weight
1895                    + interpretability_score * weights.interpretability_weight
1896                    + robustness_score * weights.robustness_weight;
1897
1898                recommendations.push(MultiObjectiveRecommendation {
1899                    transformation: TransformationConfig {
1900                        transformation_type: t_type.clone(),
1901                        parameters: self.get_optimized_parameters_for_type(t_type, features)?,
1902                        expected_performance: performance_score,
1903                    },
1904                    performance_score,
1905                    efficiency_score,
1906                    interpretability_score,
1907                    robustness_score,
1908                    overall_score,
1909                });
1910            }
1911        }
1912
1913        // Sort by overall score
1914        recommendations.sort_by(|a, b| {
1915            b.overall_score
1916                .partial_cmp(&a.overall_score)
1917                .unwrap_or(std::cmp::Ordering::Equal)
1918        });
1919
1920        Ok(recommendations)
1921    }
1922
1923    /// Find similar datasets from performance database
1924    fn find_similar_datasets(
1925        &self,
1926        target: &EnhancedMetaFeatures,
1927        k: usize,
1928    ) -> Result<Vec<PerformanceRecord>> {
1929        if self.performance_db.is_empty() {
1930            return Ok(vec![]);
1931        }
1932
1933        let mut similarities: Vec<(usize, f64)> = Vec::new();
1934
1935        for (i, record) in self.performance_db.iter().enumerate() {
1936            let similarity = self.compute_dataset_similarity(target, &record.meta_features)?;
1937            similarities.push((i, similarity));
1938        }
1939
1940        // Sort by similarity and take top k
1941        similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1942
1943        let mut similar_records = Vec::new();
1944        for (idx, _similarity) in similarities.iter().take(k) {
1945            similar_records.push(self.performance_db[*idx].clone());
1946        }
1947
1948        Ok(similar_records)
1949    }
1950
1951    /// Compute similarity between two datasets using enhanced meta-features
1952    fn compute_dataset_similarity(
1953        &self,
1954        a: &EnhancedMetaFeatures,
1955        b: &DatasetMetaFeatures,
1956    ) -> Result<f64> {
1957        // Compare base features only (since b doesn't have enhanced features)
1958        let features_a = &a.base_features;
1959
1960        // Normalize features for comparison
1961        let scale_similarity = |val_a: f64, val_b: f64, max_val: f64| -> f64 {
1962            if max_val > 0.0 {
1963                1.0 - (val_a - val_b).abs() / max_val
1964            } else {
1965                if (val_a - val_b).abs() < f64::EPSILON {
1966                    1.0
1967                } else {
1968                    0.0
1969                }
1970            }
1971        };
1972
1973        // Calculate similarity for each dimension
1974        let similarities = vec![
1975            scale_similarity(
1976                (features_a.n_samples as f64).ln(),
1977                (b.n_samples as f64).ln(),
1978                20.0, // Reasonable scale for log(samples)
1979            ),
1980            scale_similarity(
1981                (features_a.n_features as f64).ln(),
1982                (b.n_features as f64).ln(),
1983                15.0, // Reasonable scale for log(features)
1984            ),
1985            scale_similarity(features_a.sparsity, b.sparsity, 1.0),
1986            scale_similarity(features_a.mean_correlation, b.mean_correlation, 2.0),
1987            scale_similarity(features_a.std_correlation, b.std_correlation, 1.0),
1988            scale_similarity(features_a.mean_skewness, b.mean_skewness, 20.0),
1989            scale_similarity(features_a.mean_kurtosis, b.mean_kurtosis, 20.0),
1990            scale_similarity(features_a.missing_ratio, b.missing_ratio, 1.0),
1991            scale_similarity(features_a.variance_ratio, b.variance_ratio, 10.0),
1992            scale_similarity(features_a.outlier_ratio, b.outlier_ratio, 1.0),
1993        ];
1994
1995        // Weighted average (give more weight to important characteristics)
1996        let weights = vec![0.15, 0.15, 0.1, 0.15, 0.05, 0.1, 0.1, 0.05, 0.1, 0.05];
1997        let weighted_similarity = similarities
1998            .iter()
1999            .zip(weights.iter())
2000            .map(|(sim, weight)| sim * weight)
2001            .sum::<f64>();
2002
2003        Ok(weighted_similarity.max(0.0).min(1.0))
2004    }
2005
2006    /// Fallback recommendations when meta-learning fails
2007    fn fallback_recommendations(
2008        &self,
2009        features: &EnhancedMetaFeatures,
2010    ) -> Result<Vec<TransformationConfig>> {
2011        let mut recommendations = Vec::new();
2012        let base_features = &features.base_features;
2013
2014        // Rule-based recommendations
2015
2016        // 1. Always recommend StandardScaler for most datasets
2017        recommendations.push(TransformationConfig {
2018            transformation_type: TransformationType::StandardScaler,
2019            parameters: HashMap::new(),
2020            expected_performance: 0.8,
2021        });
2022
2023        // 2. High dimensionality -> PCA
2024        if base_features.n_features > 100 || base_features.n_features > base_features.n_samples {
2025            let mut params = HashMap::new();
2026            params.insert("n_components".to_string(), 0.95); // Keep 95% variance
2027            recommendations.push(TransformationConfig {
2028                transformation_type: TransformationType::PCA,
2029                parameters: params,
2030                expected_performance: 0.75,
2031            });
2032        }
2033
2034        // 3. High outlier ratio -> RobustScaler
2035        if base_features.outlier_ratio > 0.1 {
2036            recommendations.push(TransformationConfig {
2037                transformation_type: TransformationType::RobustScaler,
2038                parameters: HashMap::new(),
2039                expected_performance: 0.85,
2040            });
2041        }
2042
2043        // 4. High skewness -> PowerTransformer
2044        if base_features.mean_skewness.abs() > 1.5 {
2045            recommendations.push(TransformationConfig {
2046                transformation_type: TransformationType::PowerTransformer,
2047                parameters: HashMap::new(),
2048                expected_performance: 0.8,
2049            });
2050        }
2051
2052        // 5. Low variance features -> VarianceThreshold
2053        if base_features.variance_ratio < 0.1 {
2054            let mut params = HashMap::new();
2055            params.insert("threshold".to_string(), 0.01);
2056            recommendations.push(TransformationConfig {
2057                transformation_type: TransformationType::VarianceThreshold,
2058                parameters: params,
2059                expected_performance: 0.7,
2060            });
2061        }
2062
2063        // Sort by expected performance
2064        recommendations.sort_by(|a, b| {
2065            b.expected_performance
2066                .partial_cmp(&a.expected_performance)
2067                .unwrap_or(std::cmp::Ordering::Equal)
2068        });
2069
2070        Ok(recommendations.into_iter().take(3).collect()) // Return top 3
2071    }
2072
2073    /// Get optimized parameters for a transformation type
2074    fn get_optimized_parameters_for_type(
2075        &self,
2076        t_type: &TransformationType,
2077        features: &EnhancedMetaFeatures,
2078    ) -> Result<HashMap<String, f64>> {
2079        let mut params = HashMap::new();
2080        let base_features = &features.base_features;
2081
2082        match t_type {
2083            TransformationType::PCA => {
2084                // Adaptive n_components based on data characteristics
2085                let variance_threshold = if base_features.n_features > 1000 {
2086                    0.99
2087                } else {
2088                    0.95
2089                };
2090                params.insert("variance_threshold".to_string(), variance_threshold);
2091
2092                // Estimate reasonable number of components
2093                let max_components = base_features.n_features.min(base_features.n_samples);
2094                let estimated_components = if base_features.n_features > base_features.n_samples {
2095                    (base_features.n_samples as f64 * 0.8) as usize
2096                } else {
2097                    (max_components as f64 * variance_threshold) as usize
2098                };
2099                params.insert(
2100                    "n_components".to_string(),
2101                    estimated_components.max(1) as f64,
2102                );
2103            }
2104
2105            TransformationType::PolynomialFeatures => {
2106                // Adaptive degree based on dataset size
2107                let degree = if base_features.n_features > 50 { 2 } else { 3 };
2108                params.insert("degree".to_string(), degree as f64);
2109                params.insert("include_bias".to_string(), 1.0);
2110                params.insert(
2111                    "interaction_only".to_string(),
2112                    if base_features.n_features > 20 {
2113                        1.0
2114                    } else {
2115                        0.0
2116                    },
2117                );
2118            }
2119
2120            TransformationType::VarianceThreshold => {
2121                // Adaptive threshold based on data characteristics
2122                let threshold = if base_features.variance_ratio < 0.01 {
2123                    0.001
2124                } else {
2125                    0.01
2126                };
2127                params.insert("threshold".to_string(), threshold);
2128            }
2129
2130            TransformationType::PowerTransformer => {
2131                // Choose method based on data characteristics
2132                let method = if base_features.has_missing || base_features.outlier_ratio > 0.2 {
2133                    "yeo-johnson" // Can handle zeros and negative values
2134                } else {
2135                    "box-cox" // More powerful but requires positive values
2136                };
2137                params.insert(
2138                    "method".to_string(),
2139                    if method == "yeo-johnson" { 1.0 } else { 0.0 },
2140                );
2141                params.insert("standardize".to_string(), 1.0);
2142            }
2143
2144            TransformationType::QuantileTransformer => {
2145                // Adaptive number of quantiles
2146                let n_quantiles = (base_features.n_samples / 10).max(10).min(1000);
2147                params.insert("n_quantiles".to_string(), n_quantiles as f64);
2148                params.insert("output_distribution".to_string(), 0.0); // 0 = uniform, 1 = normal
2149            }
2150
2151            _ => {
2152                // Default parameters for other transformations
2153            }
2154        }
2155
2156        Ok(params)
2157    }
2158
2159    /// Estimate efficiency score for a transformation
2160    fn estimate_efficiency_score(
2161        &self,
2162        t_type: &TransformationType,
2163        features: &EnhancedMetaFeatures,
2164    ) -> Result<f64> {
2165        let base_features = &features.base_features;
2166        let data_size_factor = (base_features.n_samples * base_features.n_features) as f64;
2167        let log_size = data_size_factor.ln();
2168
2169        let score = match t_type {
2170            TransformationType::StandardScaler | TransformationType::MinMaxScaler => {
2171                1.0 - (log_size / 25.0).min(0.3) // Very efficient, slight penalty for large data
2172            }
2173            TransformationType::RobustScaler => {
2174                0.9 - (log_size / 20.0).min(0.3) // Slightly less efficient due to median computation
2175            }
2176            TransformationType::PCA => {
2177                let complexity_penalty = if base_features.n_features > base_features.n_samples {
2178                    0.5 // Expensive for wide datasets
2179                } else {
2180                    0.3
2181                };
2182                0.7 - complexity_penalty - (log_size / 30.0).min(0.2)
2183            }
2184            TransformationType::PolynomialFeatures => {
2185                let feature_penalty = (base_features.n_features as f64 / 100.0).min(0.5);
2186                0.5 - feature_penalty - (log_size / 15.0).min(0.3)
2187            }
2188            TransformationType::PowerTransformer => 0.8 - (log_size / 25.0).min(0.2),
2189            _ => 0.7, // Default efficiency
2190        };
2191
2192        Ok(score.max(0.1).min(1.0))
2193    }
2194
2195    /// Estimate interpretability score for a transformation
2196    fn estimate_interpretability_score(&self, t_type: &TransformationType) -> f64 {
2197        match t_type {
2198            TransformationType::StandardScaler | TransformationType::MinMaxScaler => 0.9,
2199            TransformationType::RobustScaler => 0.85,
2200            TransformationType::VarianceThreshold => 0.95,
2201            TransformationType::QuantileTransformer => 0.6,
2202            TransformationType::PowerTransformer => 0.7,
2203            TransformationType::PCA => 0.4, // Loses original feature meaning
2204            TransformationType::PolynomialFeatures => 0.3, // Creates many new features
2205            TransformationType::BinaryEncoder | TransformationType::TargetEncoder => 0.5,
2206        }
2207    }
2208
2209    /// Estimate robustness score for a transformation
2210    fn estimate_robustness_score(
2211        &self,
2212        t_type: &TransformationType,
2213        features: &EnhancedMetaFeatures,
2214    ) -> f64 {
2215        let base_features = &features.base_features;
2216
2217        let base_score = match t_type {
2218            TransformationType::RobustScaler => 0.95,
2219            TransformationType::QuantileTransformer => 0.9,
2220            TransformationType::StandardScaler => 0.7,
2221            TransformationType::MinMaxScaler => 0.6,
2222            TransformationType::PowerTransformer => 0.8,
2223            TransformationType::PCA => 0.7,
2224            TransformationType::PolynomialFeatures => 0.7,
2225            TransformationType::VarianceThreshold => 0.75,
2226            TransformationType::BinaryEncoder => 0.65,
2227            TransformationType::TargetEncoder => 0.6,
2228        };
2229
2230        // Adjust based on data characteristics
2231        let outlier_penalty = if base_features.outlier_ratio > 0.1 {
2232            match t_type {
2233                TransformationType::RobustScaler | TransformationType::QuantileTransformer => 0.0,
2234                _ => 0.2,
2235            }
2236        } else {
2237            0.0
2238        };
2239
2240        let missing_penalty = if base_features.has_missing { 0.1 } else { 0.0 };
2241
2242        let score: f64 = base_score - outlier_penalty - missing_penalty;
2243        score.max(0.1f64).min(1.0f64)
2244    }
2245}
2246
2247// Stub implementations when auto-feature-engineering is not enabled
2248/// Advanced meta-learning system for feature engineering (placeholder)
2249#[cfg(not(feature = "auto-feature-engineering"))]
2250pub struct AdvancedMetaLearningSystem;
2251
2252/// Enhanced meta-features for advanced analysis (placeholder)
2253#[cfg(not(feature = "auto-feature-engineering"))]
2254pub struct EnhancedMetaFeatures;
2255
2256/// Multi-objective recommendation system (placeholder)
2257#[cfg(not(feature = "auto-feature-engineering"))]
2258pub struct MultiObjectiveRecommendation;