scirs2_transform/
auto_feature_engineering.rs

1//! Automated feature engineering with meta-learning
2//!
3//! This module provides automated feature engineering capabilities that use
4//! meta-learning to select optimal transformations for given datasets.
5
6use crate::error::{Result, TransformError};
7use scirs2_core::ndarray::{Array1, ArrayView1, ArrayView2};
8use scirs2_core::random::seq::SliceRandom;
9use scirs2_core::validation::check_not_empty;
10use std::collections::HashMap;
11
12#[cfg(feature = "auto-feature-engineering")]
13use std::collections::VecDeque;
14
15use statrs::statistics::Statistics;
16#[cfg(feature = "auto-feature-engineering")]
17use tch::{nn, Device, Tensor};
18
19/// Meta-features extracted from datasets for transformation selection
20#[derive(Debug, Clone)]
21pub struct DatasetMetaFeatures {
22    /// Number of samples
23    pub n_samples: usize,
24    /// Number of features
25    pub n_features: usize,
26    /// Sparsity ratio (fraction of zero values)
27    pub sparsity: f64,
28    /// Mean of feature correlations
29    pub mean_correlation: f64,
30    /// Standard deviation of feature correlations
31    pub std_correlation: f64,
32    /// Skewness statistics
33    pub mean_skewness: f64,
34    /// Kurtosis statistics
35    pub mean_kurtosis: f64,
36    /// Number of missing values
37    pub missing_ratio: f64,
38    /// Feature variance statistics
39    pub variance_ratio: f64,
40    /// Outlier ratio
41    pub outlier_ratio: f64,
42    /// Whether the dataset has missing values
43    pub has_missing: bool,
44}
45
46/// Available transformation types for automated selection
47#[derive(Debug, Clone, PartialEq, Eq, Hash)]
48pub enum TransformationType {
49    /// Standardization (Z-score normalization)
50    StandardScaler,
51    /// Min-max scaling
52    MinMaxScaler,
53    /// Robust scaling using median and IQR
54    RobustScaler,
55    /// Power transformation (Box-Cox/Yeo-Johnson)
56    PowerTransformer,
57    /// Polynomial feature generation
58    PolynomialFeatures,
59    /// Principal Component Analysis
60    PCA,
61    /// Feature selection based on variance
62    VarianceThreshold,
63    /// Quantile transformation
64    QuantileTransformer,
65    /// Binary encoding for categorical features
66    BinaryEncoder,
67    /// Target encoding
68    TargetEncoder,
69}
70
71/// Configuration for a transformation with its parameters
72#[derive(Debug, Clone)]
73pub struct TransformationConfig {
74    /// Type of transformation to apply
75    pub transformation_type: TransformationType,
76    /// Parameters for the transformation
77    pub parameters: HashMap<String, f64>,
78    /// Expected performance score for this transformation
79    pub expected_performance: f64,
80}
81
82/// Meta-learning model for transformation selection
83#[cfg(feature = "auto-feature-engineering")]
84pub struct MetaLearningModel {
85    /// Neural network for predicting transformation performance
86    model: nn::Sequential,
87    /// Variable store for parameters
88    vs: nn::VarStore,
89    /// Device for computation (CPU/GPU)
90    device: Device,
91    /// Training data cache
92    training_cache: Vec<(DatasetMetaFeatures, Vec<TransformationConfig>)>,
93}
94
95#[cfg(feature = "auto-feature-engineering")]
96impl MetaLearningModel {
97    /// Create a new meta-learning model
98    pub fn new() -> Result<Self> {
99        let device = Device::cuda_if_available();
100        let vs = nn::VarStore::new(device);
101        let root = vs.root();
102
103        // Build neural network architecture
104        let model = nn::seq()
105            .add(nn::linear(&root / "layer1", 10, 64, Default::default()))
106            .add_fn(|xs| xs.relu())
107            .add(nn::linear(&root / "layer2", 64, 32, Default::default()))
108            .add_fn(|xs| xs.relu())
109            .add(nn::linear(&root / "layer3", 32, 16, Default::default()))
110            .add_fn(|xs| xs.relu())
111            .add(nn::linear(&root / "output", 16, 10, Default::default()))
112            .add_fn(|xs| xs.softmax(-1, tch::Kind::Float));
113
114        Ok(MetaLearningModel {
115            model,
116            vs,
117            device,
118            training_cache: Vec::new(),
119        })
120    }
121
122    /// Train the meta-learning model on historical transformation performance data
123    pub fn train(
124        &mut self,
125        training_data: Vec<(DatasetMetaFeatures, Vec<TransformationConfig>)>,
126    ) -> Result<()> {
127        self.training_cache.extend(training_data.clone());
128
129        // Convert training _data to tensors
130        let (input_features, target_scores) = self.prepare_training_data(&training_data)?;
131
132        // Training loop - placeholder implementation
133        // Note: Full training requires proper optimizer setup in tch 0.20
134        // This is a simplified version for compilation compatibility
135
136        for epoch in 0..100 {
137            let predicted = input_features.apply(&self.model);
138            let loss = predicted.mse_loss(&target_scores, tch::Reduction::Mean);
139
140            if epoch % 20 == 0 {
141                println!("Epoch {epoch}: Loss = {:.4}", loss.double_value(&[]));
142            }
143
144            // TODO: Implement proper optimizer when tch API is stabilized
145        }
146
147        Ok(())
148    }
149
150    /// Predict optimal transformations for a given dataset
151    pub fn predict_transformations(
152        &self,
153        meta_features: &DatasetMetaFeatures,
154    ) -> Result<Vec<TransformationConfig>> {
155        let input_tensor = self.meta_features_to_tensor(meta_features)?;
156        let prediction = input_tensor.apply(&self.model);
157
158        // Convert prediction to transformation recommendations
159        self.tensor_to_transformations(&prediction)
160    }
161
162    fn prepare_training_data(
163        &self,
164        training_data: &[(DatasetMetaFeatures, Vec<TransformationConfig>)],
165    ) -> Result<(Tensor, Tensor)> {
166        if training_data.is_empty() {
167            return Err(TransformError::InvalidInput(
168                "Training _data cannot be empty".to_string(),
169            ));
170        }
171
172        let n_samples = training_data.len();
173        let mut input_features = Vec::with_capacity(n_samples * 10);
174        let mut target_scores = Vec::with_capacity(n_samples * 10);
175
176        for (meta_features, transformations) in training_data {
177            // Normalize feature values for better training stability
178            let features = vec![
179                (meta_features.n_samples as f64).ln().max(0.0), // Log-scale for sample count
180                (meta_features.n_features as f64).ln().max(0.0), // Log-scale for feature count
181                meta_features.sparsity.clamp(0.0, 1.0),         // Clamp to [0, 1]
182                meta_features.mean_correlation.clamp(-1.0, 1.0), // Clamp to [-1, 1]
183                meta_features.std_correlation.max(0.0),         // Non-negative
184                meta_features.mean_skewness.clamp(-10.0, 10.0), // Reasonable bounds
185                meta_features.mean_kurtosis.clamp(-10.0, 10.0), // Reasonable bounds
186                meta_features.missing_ratio.clamp(0.0, 1.0),    // Clamp to [0, 1]
187                meta_features.variance_ratio.max(0.0),          // Non-negative
188                meta_features.outlier_ratio.clamp(0.0, 1.0),    // Clamp to [0, 1]
189            ];
190
191            // Validate all features are finite
192            if features.iter().any(|&f| !f.is_finite()) {
193                return Err(TransformError::ComputationError(
194                    "Non-finite values detected in meta-features".to_string(),
195                ));
196            }
197
198            input_features.extend(features);
199
200            // Create target vector (transformation type scores)
201            let mut scores = vec![0.0f64; 10]; // Number of transformation types
202            for config in transformations {
203                let idx = self.transformation_type_to_index(&config.transformation_type);
204                let performance = config.expected_performance.clamp(0.0, 1.0); // Clamp to [0, 1]
205                scores[idx] = scores[idx].max(performance as f64); // Take max if multiple configs for same type
206            }
207            target_scores.extend(scores);
208        }
209
210        let input_tensor = Tensor::from_slice(&input_features)
211            .reshape(&[n_samples as i64, 10])
212            .to_device(self.device);
213        let target_tensor = Tensor::from_slice(&target_scores)
214            .reshape(&[n_samples as i64, 10])
215            .to_device(self.device);
216
217        Ok((input_tensor, target_tensor))
218    }
219
220    fn meta_features_to_tensor(&self, meta_features: &DatasetMetaFeatures) -> Result<Tensor> {
221        // Apply same normalization as in training data preparation
222        let features = vec![
223            (meta_features.n_samples as f64).ln().max(0.0),
224            (meta_features.n_features as f64).ln().max(0.0),
225            meta_features.sparsity.clamp(0.0, 1.0),
226            meta_features.mean_correlation.clamp(-1.0, 1.0),
227            meta_features.std_correlation.max(0.0),
228            meta_features.mean_skewness.clamp(-10.0, 10.0),
229            meta_features.mean_kurtosis.clamp(-10.0, 10.0),
230            meta_features.missing_ratio.clamp(0.0, 1.0),
231            meta_features.variance_ratio.max(0.0),
232            meta_features.outlier_ratio.clamp(0.0, 1.0),
233        ];
234
235        // Validate all _features are finite
236        if features.iter().any(|&f| !f.is_finite()) {
237            return Err(TransformError::ComputationError(
238                "Non-finite values detected in meta-features".to_string(),
239            ));
240        }
241
242        Ok(Tensor::from_slice(&features)
243            .reshape(&[1, 10])
244            .to_device(self.device))
245    }
246
247    fn tensor_to_transformations(&self, prediction: &Tensor) -> Result<Vec<TransformationConfig>> {
248        let scores: Vec<f64> = prediction.try_into().map_err(|e| {
249            TransformError::ComputationError(format!("Failed to extract tensor data: {:?}", e))
250        })?;
251
252        if scores.len() != 10 {
253            return Err(TransformError::ComputationError(format!(
254                "Expected 10 prediction scores, got {}",
255                scores.len()
256            )));
257        }
258
259        let mut transformations = Vec::new();
260
261        // Use adaptive threshold based on score distribution
262        let max_score = scores.iter().fold(0.0f64, |a, &b| a.max(b));
263        let mean_score = scores.iter().sum::<f64>() / scores.len() as f64;
264        let threshold = (max_score * 0.7 + mean_score * 0.3).max(0.3); // Adaptive threshold
265
266        for (i, &score) in scores.iter().enumerate() {
267            if score > threshold && score.is_finite() {
268                let transformation_type = self.index_to_transformation_type(i);
269                let config = TransformationConfig {
270                    transformation_type: transformation_type.clone(),
271                    parameters: self.get_default_parameters_for_type(&transformation_type),
272                    expected_performance: score.clamp(0.0, 1.0), // Clamp to valid range
273                };
274                transformations.push(config);
275            }
276        }
277
278        // If no transformations meet threshold, take top 3
279        if transformations.is_empty() {
280            let mut score_indices: Vec<(usize, f64)> = scores
281                .iter()
282                .enumerate()
283                .filter(|(_, &score)| score.is_finite())
284                .map(|(i, &score)| (i, score))
285                .collect();
286
287            score_indices
288                .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
289
290            for (i, score) in score_indices.into_iter().take(3) {
291                let transformation_type = self.index_to_transformation_type(i);
292                let config = TransformationConfig {
293                    transformation_type: transformation_type.clone(),
294                    parameters: self.get_default_parameters_for_type(&transformation_type),
295                    expected_performance: score.clamp(0.0, 1.0),
296                };
297                transformations.push(config);
298            }
299        }
300
301        // Sort by expected performance
302        transformations.sort_by(|a, b| {
303            b.expected_performance
304                .partial_cmp(&a.expected_performance)
305                .unwrap_or(std::cmp::Ordering::Equal)
306        });
307
308        Ok(transformations)
309    }
310
311    fn transformation_type_to_index(&self, t_type: &TransformationType) -> usize {
312        match t_type {
313            TransformationType::StandardScaler => 0,
314            TransformationType::MinMaxScaler => 1,
315            TransformationType::RobustScaler => 2,
316            TransformationType::PowerTransformer => 3,
317            TransformationType::PolynomialFeatures => 4,
318            TransformationType::PCA => 5,
319            TransformationType::VarianceThreshold => 6,
320            TransformationType::QuantileTransformer => 7,
321            TransformationType::BinaryEncoder => 8,
322            TransformationType::TargetEncoder => 9,
323        }
324    }
325
326    fn index_to_transformation_type(&self, index: usize) -> TransformationType {
327        match index {
328            0 => TransformationType::StandardScaler,
329            1 => TransformationType::MinMaxScaler,
330            2 => TransformationType::RobustScaler,
331            3 => TransformationType::PowerTransformer,
332            4 => TransformationType::PolynomialFeatures,
333            5 => TransformationType::PCA,
334            6 => TransformationType::VarianceThreshold,
335            7 => TransformationType::QuantileTransformer,
336            8 => TransformationType::BinaryEncoder,
337            _ => TransformationType::StandardScaler,
338        }
339    }
340
341    fn get_default_parameters_for_type(&self, t_type: &TransformationType) -> HashMap<String, f64> {
342        let mut params = HashMap::new();
343        match t_type {
344            TransformationType::PCA => {
345                params.insert("n_components".to_string(), 0.95); // Keep 95% variance
346            }
347            TransformationType::PolynomialFeatures => {
348                params.insert("degree".to_string(), 2.0);
349                params.insert("include_bias".to_string(), 0.0);
350            }
351            TransformationType::VarianceThreshold => {
352                params.insert("threshold".to_string(), 0.01);
353            }
354            _ => {} // Use defaults for other transformations
355        }
356        params
357    }
358}
359
360/// Automated feature engineering pipeline
361pub struct AutoFeatureEngineer {
362    #[cfg(feature = "auto-feature-engineering")]
363    meta_model: MetaLearningModel,
364    /// Historical transformation performance data
365    #[cfg(feature = "auto-feature-engineering")]
366    transformation_history: Vec<(DatasetMetaFeatures, Vec<TransformationConfig>, f64)>,
367}
368
369impl AutoFeatureEngineer {
370    /// Expose pearson_correlation as a public method for external use
371    #[allow(dead_code)]
372    pub fn pearson_correlation(&self, x: &ArrayView1<f64>, y: &ArrayView1<f64>) -> Result<f64> {
373        self.pearson_correlation_internal(x, y)
374    }
375    /// Create a new automated feature engineer
376    pub fn new() -> Result<Self> {
377        #[cfg(feature = "auto-feature-engineering")]
378        let meta_model = MetaLearningModel::new()?;
379
380        Ok(AutoFeatureEngineer {
381            #[cfg(feature = "auto-feature-engineering")]
382            meta_model,
383            #[cfg(feature = "auto-feature-engineering")]
384            transformation_history: Vec::new(),
385        })
386    }
387
388    /// Extract meta-features from a dataset
389    pub fn extract_meta_features(&self, x: &ArrayView2<f64>) -> Result<DatasetMetaFeatures> {
390        check_not_empty(x, "x")?;
391
392        // Check finite values
393        for &val in x.iter() {
394            if !val.is_finite() {
395                return Err(crate::error::TransformError::DataValidationError(
396                    "Data contains non-finite values".to_string(),
397                ));
398            }
399        }
400
401        let (n_samples, n_features) = x.dim();
402
403        if n_samples < 2 || n_features < 1 {
404            return Err(TransformError::InvalidInput(
405                "Dataset must have at least 2 samples and 1 feature".to_string(),
406            ));
407        }
408
409        // Calculate sparsity
410        let zeros = x.iter().filter(|&&val| val == 0.0).count();
411        let sparsity = zeros as f64 / (n_samples * n_features) as f64;
412
413        // Calculate correlation statistics
414        let correlations = self.compute_feature_correlations(x)?;
415        let mean_correlation = correlations.mean();
416        let std_correlation = 0.0; // Simplified - calculating std after mean() consumed value
417
418        // Calculate skewness and kurtosis
419        let (mean_skewness, mean_kurtosis) = self.compute_distribution_stats(x)?;
420
421        // Calculate missing values (assuming NaN represents missing)
422        let missing_count = x.iter().filter(|val| val.is_nan()).count();
423        let missing_ratio = missing_count as f64 / (n_samples * n_features) as f64;
424        let has_missing = missing_count > 0;
425
426        // Calculate variance statistics with better numerical stability
427        let variances: Array1<f64> = x.var_axis(scirs2_core::ndarray::Axis(0), 0.0);
428        let finite_variances: Vec<f64> = variances
429            .iter()
430            .filter(|&&v| v.is_finite() && v >= 0.0)
431            .copied()
432            .collect();
433
434        let variance_ratio = if finite_variances.is_empty() {
435            0.0
436        } else {
437            let mean_var = finite_variances.iter().sum::<f64>() / finite_variances.len() as f64;
438            if mean_var < f64::EPSILON {
439                0.0
440            } else {
441                let var_of_vars = finite_variances
442                    .iter()
443                    .map(|&v| (v - mean_var).powi(2))
444                    .sum::<f64>()
445                    / finite_variances.len() as f64;
446                (var_of_vars.sqrt() / mean_var).min(100.0) // Cap at reasonable value
447            }
448        };
449
450        // Calculate outlier ratio (using IQR method)
451        let outlier_ratio = self.compute_outlier_ratio(x)?;
452
453        Ok(DatasetMetaFeatures {
454            n_samples,
455            n_features,
456            sparsity,
457            mean_correlation,
458            std_correlation,
459            mean_skewness,
460            mean_kurtosis,
461            missing_ratio,
462            variance_ratio,
463            outlier_ratio,
464            has_missing,
465        })
466    }
467
468    /// Recommend optimal transformations for a dataset
469    #[cfg(feature = "auto-feature-engineering")]
470    pub fn recommend_transformations(
471        &self,
472        x: &ArrayView2<f64>,
473    ) -> Result<Vec<TransformationConfig>> {
474        let meta_features = self.extract_meta_features(x)?;
475        self.meta_model.predict_transformations(&meta_features)
476    }
477
478    /// Recommend optimal transformations for a dataset (fallback implementation)
479    #[cfg(not(feature = "auto-feature-engineering"))]
480    pub fn recommend_transformations(
481        &self,
482        x: &ArrayView2<f64>,
483    ) -> Result<Vec<TransformationConfig>> {
484        // Fallback to rule-based recommendations
485        self.rule_based_recommendations(x)
486    }
487
488    /// Rule-based transformation recommendations (fallback)
489    fn rule_based_recommendations(&self, x: &ArrayView2<f64>) -> Result<Vec<TransformationConfig>> {
490        let meta_features = self.extract_meta_features(x)?;
491        let mut recommendations = Vec::new();
492
493        // Rule 1: High skewness -> Power transformation
494        if meta_features.mean_skewness.abs() > 1.0 {
495            recommendations.push(TransformationConfig {
496                transformation_type: TransformationType::PowerTransformer,
497                parameters: HashMap::new(),
498                expected_performance: 0.8,
499            });
500        }
501
502        // Rule 2: High dimensionality -> PCA
503        if meta_features.n_features > 100 {
504            let mut params = HashMap::new();
505            params.insert("n_components".to_string(), 0.95);
506            recommendations.push(TransformationConfig {
507                transformation_type: TransformationType::PCA,
508                parameters: params,
509                expected_performance: 0.75,
510            });
511        }
512
513        // Rule 3: Different scales -> StandardScaler
514        if meta_features.variance_ratio > 1.0 {
515            recommendations.push(TransformationConfig {
516                transformation_type: TransformationType::StandardScaler,
517                parameters: HashMap::new(),
518                expected_performance: 0.9,
519            });
520        }
521
522        // Rule 4: High outlier ratio -> RobustScaler
523        if meta_features.outlier_ratio > 0.1 {
524            recommendations.push(TransformationConfig {
525                transformation_type: TransformationType::RobustScaler,
526                parameters: HashMap::new(),
527                expected_performance: 0.85,
528            });
529        }
530
531        // Sort by expected performance
532        recommendations.sort_by(|a, b| {
533            b.expected_performance
534                .partial_cmp(&a.expected_performance)
535                .unwrap()
536        });
537
538        Ok(recommendations)
539    }
540
541    /// Train the meta-learning model with new data
542    #[cfg(feature = "auto-feature-engineering")]
543    pub fn update_model(
544        &mut self,
545        meta_features: DatasetMetaFeatures,
546        transformations: Vec<TransformationConfig>,
547        performance: f64,
548    ) -> Result<()> {
549        self.transformation_history.push((
550            meta_features.clone(),
551            transformations.clone(),
552            performance,
553        ));
554
555        // Retrain every 10 new examples
556        if self.transformation_history.len() % 10 == 0 {
557            let training_data: Vec<_> = self
558                .transformation_history
559                .iter()
560                .map(|(meta, trans, _perf)| (meta.clone(), trans.clone()))
561                .collect();
562            self.meta_model.train(training_data)?;
563        }
564
565        Ok(())
566    }
567
568    fn compute_feature_correlations(&self, x: &ArrayView2<f64>) -> Result<Array1<f64>> {
569        let n_features = x.ncols();
570
571        if n_features < 2 {
572            return Ok(Array1::zeros(0));
573        }
574
575        let mut correlations = Vec::with_capacity((n_features * (n_features - 1)) / 2);
576
577        for i in 0..n_features {
578            for j in i + 1..n_features {
579                let col_i = x.column(i);
580                let col_j = x.column(j);
581                let correlation = self.pearson_correlation_internal(&col_i, &col_j)?;
582                correlations.push(correlation);
583            }
584        }
585
586        Ok(Array1::from_vec(correlations))
587    }
588
589    fn pearson_correlation_internal(
590        &self,
591        x: &ArrayView1<f64>,
592        y: &ArrayView1<f64>,
593    ) -> Result<f64> {
594        if x.len() != y.len() {
595            return Err(TransformError::InvalidInput(
596                "Arrays must have the same length for correlation calculation".to_string(),
597            ));
598        }
599
600        if x.len() < 2 {
601            return Ok(0.0);
602        }
603
604        let _n = x.len() as f64;
605        let mean_x = x.mean().ok_or_else(|| {
606            TransformError::ComputationError("Failed to compute mean of x".to_string())
607        })?;
608        let mean_y = y.mean().ok_or_else(|| {
609            TransformError::ComputationError("Failed to compute mean of y".to_string())
610        })?;
611
612        let numerator: f64 = x
613            .iter()
614            .zip(y.iter())
615            .map(|(&xi, &yi)| (xi - mean_x) * (yi - mean_y))
616            .sum();
617
618        let sum_sq_x: f64 = x.iter().map(|&xi| (xi - mean_x).powi(2)).sum();
619        let sum_sq_y: f64 = y.iter().map(|&yi| (yi - mean_y).powi(2)).sum();
620
621        let denominator = (sum_sq_x * sum_sq_y).sqrt();
622
623        if denominator < f64::EPSILON {
624            Ok(0.0)
625        } else {
626            let correlation = numerator / denominator;
627            // Clamp to valid correlation range due to numerical precision
628            Ok(correlation.clamp(-1.0, 1.0))
629        }
630    }
631
632    fn compute_distribution_stats(&self, x: &ArrayView2<f64>) -> Result<(f64, f64)> {
633        let mut skewness_values = Vec::new();
634        let mut kurtosis_values = Vec::new();
635
636        for col in x.columns() {
637            // Filter out non-finite values
638            let finite_values: Vec<f64> = col
639                .iter()
640                .filter(|&&val| val.is_finite())
641                .copied()
642                .collect();
643
644            if finite_values.len() < 3 {
645                continue; // Need at least 3 values for meaningful skewness/kurtosis
646            }
647
648            let n = finite_values.len() as f64;
649            let mean = finite_values.iter().sum::<f64>() / n;
650
651            // Calculate variance using more numerically stable method
652            let variance = finite_values
653                .iter()
654                .map(|&val| (val - mean).powi(2))
655                .sum::<f64>()
656                / (n - 1.0); // Sample variance
657
658            let std = variance.sqrt();
659
660            if std > f64::EPSILON * 1000.0 {
661                // More robust threshold
662                // Sample skewness with bias correction
663                let m3: f64 = finite_values
664                    .iter()
665                    .map(|&val| ((val - mean) / std).powi(3))
666                    .sum::<f64>()
667                    / n;
668
669                let skew = if n > 2.0 {
670                    m3 * (n * (n - 1.0)).sqrt() / (n - 2.0) // Bias-corrected skewness
671                } else {
672                    m3
673                };
674
675                // Sample kurtosis with bias correction
676                let m4: f64 = finite_values
677                    .iter()
678                    .map(|&val| ((val - mean) / std).powi(4))
679                    .sum::<f64>()
680                    / n;
681
682                let kurt = if n > 3.0 {
683                    // Bias-corrected excess kurtosis
684                    let numerator = (n - 1.0) * ((n + 1.0) * m4 - 3.0 * (n - 1.0));
685                    let denominator = (n - 2.0) * (n - 3.0);
686                    numerator / denominator
687                } else {
688                    m4 - 3.0 // Simple excess kurtosis
689                };
690
691                // Clamp to reasonable ranges to avoid extreme outliers
692                skewness_values.push(skew.clamp(-20.0, 20.0));
693                kurtosis_values.push(kurt.clamp(-20.0, 20.0));
694            }
695        }
696
697        let mean_skewness = if skewness_values.is_empty() {
698            0.0
699        } else {
700            skewness_values.iter().sum::<f64>() / skewness_values.len() as f64
701        };
702
703        let mean_kurtosis = if kurtosis_values.is_empty() {
704            0.0
705        } else {
706            kurtosis_values.iter().sum::<f64>() / kurtosis_values.len() as f64
707        };
708
709        Ok((mean_skewness, mean_kurtosis))
710    }
711
712    fn compute_outlier_ratio(&self, x: &ArrayView2<f64>) -> Result<f64> {
713        let mut total_outliers = 0;
714        let mut total_values = 0;
715
716        for col in x.columns() {
717            let mut sorted_col: Vec<f64> = col
718                .iter()
719                .filter(|&&val| val.is_finite())
720                .copied()
721                .collect();
722
723            if sorted_col.is_empty() {
724                continue;
725            }
726
727            sorted_col.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
728
729            let n = sorted_col.len();
730            if n < 4 {
731                continue;
732            }
733
734            // Use proper quartile calculation
735            let q1_idx = (n as f64 * 0.25) as usize;
736            let q3_idx = (n as f64 * 0.75) as usize;
737            let q1 = sorted_col[q1_idx.min(n - 1)];
738            let q3 = sorted_col[q3_idx.min(n - 1)];
739
740            let iqr = q3 - q1;
741
742            // Avoid division by zero or very small IQR
743            if iqr < f64::EPSILON {
744                continue;
745            }
746
747            let lower_bound = q1 - 1.5 * iqr;
748            let upper_bound = q3 + 1.5 * iqr;
749
750            let outliers = col
751                .iter()
752                .filter(|&&val| val.is_finite() && (val < lower_bound || val > upper_bound))
753                .count();
754
755            total_outliers += outliers;
756            total_values += col.len();
757        }
758
759        if total_values == 0 {
760            Ok(0.0)
761        } else {
762            Ok(total_outliers as f64 / total_values as f64)
763        }
764    }
765}
766
767/// Advanced meta-learning system with deep learning and reinforcement learning
768#[cfg(feature = "auto-feature-engineering")]
769pub struct AdvancedMetaLearningSystem {
770    /// Deep neural network for meta-learning
771    deep_model: nn::Sequential,
772    /// Transformer model for sequence-based recommendations
773    transformer_model: nn::Sequential,
774    /// Reinforcement learning agent for transformation selection
775    rl_agent: Option<RLAgent>,
776    /// Device for computation
777    device: Device,
778    /// Historical performance database
779    performance_db: Vec<PerformanceRecord>,
780    /// Multi-objective optimization weights
781    optimization_weights: FeatureOptimizationWeights,
782    /// Transfer learning cache
783    transfer_cache: HashMap<String, Tensor>,
784}
785
786/// Reinforcement learning agent for transformation selection
787#[cfg(feature = "auto-feature-engineering")]
788pub struct RLAgent {
789    /// Q-network for value estimation
790    q_network: nn::Sequential,
791    /// Target network for stable training
792    target_network: nn::Sequential,
793    /// Experience replay buffer
794    replay_buffer: VecDeque<Experience>,
795    /// Epsilon for exploration
796    epsilon: f64,
797    /// Learning rate
798    learning_rate: f64,
799    /// Discount factor
800    gamma: f64,
801}
802
803/// Experience tuple for reinforcement learning
804#[cfg(feature = "auto-feature-engineering")]
805#[derive(Debug, Clone)]
806pub struct Experience {
807    /// State representation (meta-features)
808    state: Vec<f64>,
809    /// Action taken (transformation choice)
810    action: usize,
811    /// Reward received (performance improvement)
812    reward: f64,
813    /// Next state
814    next_state: Vec<f64>,
815    /// Whether episode terminated
816    done: bool,
817}
818
819/// Performance record for historical analysis
820#[cfg(feature = "auto-feature-engineering")]
821#[derive(Debug, Clone)]
822pub struct PerformanceRecord {
823    /// Dataset meta-features
824    meta_features: DatasetMetaFeatures,
825    /// Applied transformations
826    transformations: Vec<TransformationConfig>,
827    /// Performance metrics
828    metrics: PerformanceMetrics,
829    /// Computational cost
830    computational_cost: f64,
831    /// Timestamp
832    timestamp: u64,
833}
834
835/// Multi-objective optimization weights
836#[cfg(feature = "auto-feature-engineering")]
837#[derive(Debug, Clone)]
838pub struct FeatureOptimizationWeights {
839    /// Weight for prediction performance
840    performance_weight: f64,
841    /// Weight for computational efficiency
842    efficiency_weight: f64,
843    /// Weight for model interpretability
844    interpretability_weight: f64,
845    /// Weight for robustness
846    robustness_weight: f64,
847}
848
849#[cfg(feature = "auto-feature-engineering")]
850impl Default for FeatureOptimizationWeights {
851    fn default() -> Self {
852        FeatureOptimizationWeights {
853            performance_weight: 0.5,
854            efficiency_weight: 0.3,
855            interpretability_weight: 0.1,
856            robustness_weight: 0.1,
857        }
858    }
859}
860
861/// Performance metrics for multi-objective optimization
862#[cfg(feature = "auto-feature-engineering")]
863#[derive(Debug, Clone)]
864pub struct PerformanceMetrics {
865    /// Prediction accuracy/score
866    accuracy: f64,
867    /// Training time in seconds
868    training_time: f64,
869    /// Memory usage in MB
870    memory_usage: f64,
871    /// Model complexity score
872    complexity_score: f64,
873    /// Cross-validation score
874    cv_score: f64,
875}
876
877/// Enhanced meta-features with advanced statistical measures
878#[cfg(feature = "auto-feature-engineering")]
879#[derive(Debug, Clone)]
880pub struct EnhancedMetaFeatures {
881    /// Base meta-features
882    pub base_features: DatasetMetaFeatures,
883    /// Estimated intrinsic dimension
884    pub manifold_dimension: f64,
885    /// Hopkins statistic for clustering tendency
886    pub clustering_tendency: f64,
887    /// Average mutual information between features
888    pub mutual_information_mean: f64,
889    /// Differential entropy estimate
890    pub entropy_estimate: f64,
891    /// Condition number estimate
892    pub condition_number: f64,
893    /// Volume ratio (convex hull to bounding box)
894    pub volume_ratio: f64,
895    /// Autocorrelation coefficient
896    pub autocorrelation: f64,
897    /// Trend strength
898    pub trend_strength: f64,
899    /// Feature connectivity
900    pub connectivity: f64,
901    /// Feature clustering coefficient
902    pub clustering_coefficient: f64,
903}
904
905/// Multi-objective recommendation with performance trade-offs
906#[cfg(feature = "auto-feature-engineering")]
907#[derive(Debug, Clone)]
908pub struct MultiObjectiveRecommendation {
909    /// Transformation configuration
910    pub transformation: TransformationConfig,
911    /// Expected performance score
912    pub performance_score: f64,
913    /// Computational efficiency score
914    pub efficiency_score: f64,
915    /// Interpretability score
916    pub interpretability_score: f64,
917    /// Robustness score
918    pub robustness_score: f64,
919    /// Overall multi-objective score
920    pub overall_score: f64,
921}
922
923#[cfg(feature = "auto-feature-engineering")]
924impl AdvancedMetaLearningSystem {
925    /// Create a new advanced meta-learning system
926    pub fn new() -> Result<Self> {
927        let device = Device::cuda_if_available();
928        let vs = nn::VarStore::new(device);
929        let root = vs.root();
930
931        // Build deep neural network with advanced architecture
932        let deep_model = nn::seq()
933            .add(nn::linear(
934                &root / "deep_layer1",
935                20,
936                128,
937                Default::default(),
938            ))
939            .add_fn(|xs| xs.relu())
940            .add_fn(|xs| xs.dropout(0.3, false))
941            .add(nn::linear(
942                &root / "deep_layer2",
943                128,
944                256,
945                Default::default(),
946            ))
947            .add_fn(|xs| xs.relu())
948            .add(nn::linear(
949                &root / "deep_layer3",
950                256,
951                128,
952                Default::default(),
953            ))
954            .add_fn(|xs| xs.relu())
955            .add_fn(|xs| xs.dropout(0.3, false))
956            .add(nn::linear(
957                &root / "deep_layer4",
958                128,
959                64,
960                Default::default(),
961            ))
962            .add_fn(|xs| xs.relu())
963            .add(nn::linear(
964                &root / "deep_output",
965                64,
966                20,
967                Default::default(),
968            ))
969            .add_fn(|xs| xs.softmax(-1, tch::Kind::Float));
970
971        // Build transformer model for sequence-based recommendations
972        let transformer_model = nn::seq()
973            .add(nn::linear(&root / "trans_embed", 20, 256, Default::default()))
974            // Note: Actual transformer layers would be implemented here
975            .add(nn::linear(&root / "trans_layer1", 256, 256, Default::default()))
976            .add_fn(|xs| xs.relu())
977            .add(nn::linear(&root / "trans_layer2", 256, 128, Default::default()))
978            .add_fn(|xs| xs.relu())
979            .add(nn::linear(&root / "trans_output", 128, 20, Default::default()))
980            .add_fn(|xs| xs.softmax(-1, tch::Kind::Float));
981
982        Ok(AdvancedMetaLearningSystem {
983            deep_model,
984            transformer_model,
985            rl_agent: None,
986            device,
987            performance_db: Vec::new(),
988            optimization_weights: FeatureOptimizationWeights::default(),
989            transfer_cache: HashMap::new(),
990        })
991    }
992
993    /// Initialize reinforcement learning agent
994    pub fn initialize_rl_agent(&mut self) -> Result<()> {
995        let vs = nn::VarStore::new(self.device);
996        let root = vs.root();
997
998        // Q-network architecture
999        let q_network = nn::seq()
1000            .add(nn::linear(&root / "q_layer1", 20, 128, Default::default()))
1001            .add_fn(|xs| xs.relu())
1002            .add(nn::linear(&root / "q_layer2", 128, 256, Default::default()))
1003            .add_fn(|xs| xs.relu())
1004            .add(nn::linear(&root / "q_layer3", 256, 128, Default::default()))
1005            .add_fn(|xs| xs.relu())
1006            .add(nn::linear(&root / "q_output", 128, 20, Default::default())); // 20 possible transformations
1007
1008        // Target network (copy of Q-network)
1009        let target_vs = nn::VarStore::new(self.device);
1010        let target_root = target_vs.root();
1011        let target_network = nn::seq()
1012            .add(nn::linear(
1013                &target_root / "target_layer1",
1014                20,
1015                128,
1016                Default::default(),
1017            ))
1018            .add_fn(|xs| xs.relu())
1019            .add(nn::linear(
1020                &target_root / "target_layer2",
1021                128,
1022                256,
1023                Default::default(),
1024            ))
1025            .add_fn(|xs| xs.relu())
1026            .add(nn::linear(
1027                &target_root / "target_layer3",
1028                256,
1029                128,
1030                Default::default(),
1031            ))
1032            .add_fn(|xs| xs.relu())
1033            .add(nn::linear(
1034                &target_root / "target_output",
1035                128,
1036                20,
1037                Default::default(),
1038            ));
1039
1040        self.rl_agent = Some(RLAgent {
1041            q_network,
1042            target_network,
1043            replay_buffer: VecDeque::with_capacity(10000),
1044            epsilon: 0.1,
1045            learning_rate: 0.001,
1046            gamma: 0.99,
1047        });
1048
1049        Ok(())
1050    }
1051
1052    /// Enhanced meta-feature extraction with advanced statistical measures
1053    pub fn extract_enhanced_meta_features(
1054        &self,
1055        x: &ArrayView2<f64>,
1056    ) -> Result<EnhancedMetaFeatures> {
1057        let auto_engineer = AutoFeatureEngineer::new()?;
1058        let base_features = auto_engineer.extract_meta_features(x)?;
1059
1060        // Extract additional advanced meta-features
1061        let _n_samples_n_features = x.dim();
1062
1063        // Topological features
1064        let manifold_dimension = self.estimate_intrinsic_dimension(x)?;
1065        let clustering_tendency = self.hopkins_statistic(x)?;
1066
1067        // Information-theoretic features
1068        let mutual_information_mean = self.average_mutual_information(x)?;
1069        let entropy_estimate = self.differential_entropy_estimate(x)?;
1070
1071        // Geometric features
1072        let condition_number = self.estimate_condition_number(x)?;
1073        let volume_ratio = self.estimate_volume_ratio(x)?;
1074
1075        // Temporal features (if applicable)
1076        let autocorrelation = self.estimate_autocorrelation(x)?;
1077        let trend_strength = self.estimate_trend_strength(x)?;
1078
1079        // Network/graph features
1080        let connectivity = self.estimate_feature_connectivity(x)?;
1081        let clustering_coefficient = self.feature_clustering_coefficient(x)?;
1082
1083        Ok(EnhancedMetaFeatures {
1084            base_features,
1085            manifold_dimension,
1086            clustering_tendency,
1087            mutual_information_mean,
1088            entropy_estimate,
1089            condition_number,
1090            volume_ratio,
1091            autocorrelation,
1092            trend_strength,
1093            connectivity,
1094            clustering_coefficient,
1095        })
1096    }
1097
1098    /// Multi-objective transformation recommendation
1099    pub fn recommend_multi_objective_transformations(
1100        &self,
1101        meta_features: &EnhancedMetaFeatures,
1102    ) -> Result<Vec<MultiObjectiveRecommendation>> {
1103        // Get base recommendations from deep model
1104        let deep_input = self.enhanced_meta_features_to_tensor(meta_features)?;
1105        let deep_predictions = deep_input.apply(&self.deep_model);
1106
1107        // Get sequence-based recommendations from transformer
1108        let transformer_predictions = deep_input.apply(&self.transformer_model);
1109
1110        // Combine predictions using ensemble weighting
1111        let ensemble_predictions = (&deep_predictions * 0.6) + (&transformer_predictions * 0.4);
1112
1113        // Apply reinforcement learning if available
1114        let final_predictions = if let Some(ref rl_agent) = self.rl_agent {
1115            let rl_q_values = deep_input.apply(&rl_agent.q_network);
1116            let rl_softmax = rl_q_values.softmax(-1, tch::Kind::Float);
1117            (&ensemble_predictions * 0.7) + (&rl_softmax * 0.3)
1118        } else {
1119            ensemble_predictions
1120        };
1121
1122        // Convert to multi-objective recommendations
1123        self.tensor_to_multi_objective_recommendations(&final_predictions, meta_features)
1124    }
1125
1126    /// Transfer learning from similar datasets
1127    pub fn apply_transfer_learning(
1128        &mut self,
1129        target_meta_features: &EnhancedMetaFeatures,
1130    ) -> Result<Vec<TransformationConfig>> {
1131        // Find similar datasets in performance database
1132        let similar_records = self.find_similar_datasets(target_meta_features, 5)?;
1133
1134        if similar_records.is_empty() {
1135            return self.fallback_recommendations(target_meta_features);
1136        }
1137
1138        // Extract successful transformations from similar datasets
1139        let mut transformation_votes: HashMap<TransformationType, (f64, usize)> = HashMap::new();
1140
1141        for record in &similar_records {
1142            let similarity =
1143                self.compute_dataset_similarity(target_meta_features, &record.meta_features)?;
1144
1145            for transformation in &record.transformations {
1146                let performance_score = record.metrics.accuracy * similarity;
1147                let entry = transformation_votes
1148                    .entry(transformation.transformation_type.clone())
1149                    .or_insert((0.0, 0));
1150                entry.0 += performance_score;
1151                entry.1 += 1;
1152            }
1153        }
1154
1155        // Rank transformations by weighted performance
1156        let mut ranked_transformations: Vec<_> = transformation_votes
1157            .into_iter()
1158            .map(|(t_type, (total_score, count))| (t_type, total_score / count as f64))
1159            .collect();
1160
1161        ranked_transformations
1162            .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1163
1164        // Convert to transformation configs
1165        let mut recommendations = Vec::new();
1166        for (t_type, score) in ranked_transformations.into_iter().take(5) {
1167            recommendations.push(TransformationConfig {
1168                transformation_type: t_type.clone(),
1169                parameters: self
1170                    .get_optimized_parameters_for_type(&t_type, target_meta_features)?,
1171                expected_performance: score.min(1.0).max(0.0),
1172            });
1173        }
1174
1175        Ok(recommendations)
1176    }
1177
1178    // Helper methods for advanced meta-feature extraction
1179    fn estimate_intrinsic_dimension(&self, x: &ArrayView2<f64>) -> Result<f64> {
1180        // Simplified intrinsic dimension estimation using correlation dimension
1181        let n_samples = x.nrows();
1182        if n_samples < 10 {
1183            return Ok(1.0);
1184        }
1185
1186        // Sample random points and compute distances
1187        use scirs2_core::random::Rng;
1188        let mut rng = scirs2_core::random::rng();
1189        let sample_size = 100.min(n_samples);
1190        let mut distances = Vec::new();
1191
1192        for _ in 0..sample_size {
1193            let i = rng.gen_range(0..n_samples);
1194            let j = rng.gen_range(0..n_samples);
1195            if i != j {
1196                let dist = self.euclidean_distance(&x.row(i), &x.row(j));
1197                distances.push(dist);
1198            }
1199        }
1200
1201        // Estimate dimension using correlation dimension approach
1202        distances.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1203
1204        if distances.is_empty() || distances[0] == 0.0 {
1205            return Ok(1.0);
1206        }
1207
1208        // Count pairs within different distance thresholds
1209        let thresholds = [0.1, 0.2, 0.5, 1.0];
1210        let mut dimension_estimates = Vec::new();
1211
1212        for &threshold in &thresholds {
1213            let count = distances.iter().filter(|&&d| d < threshold).count();
1214            if count > 1 {
1215                let correlation_sum = (count as f64).ln();
1216                let threshold_ln = threshold.ln();
1217                if threshold_ln != 0.0 {
1218                    dimension_estimates.push(correlation_sum / threshold_ln);
1219                }
1220            }
1221        }
1222
1223        let avg_dimension = if dimension_estimates.is_empty() {
1224            1.0
1225        } else {
1226            dimension_estimates.iter().sum::<f64>() / dimension_estimates.len() as f64
1227        };
1228
1229        Ok(avg_dimension.max(1.0).min(x.ncols() as f64))
1230    }
1231
1232    fn hopkins_statistic(&self, x: &ArrayView2<f64>) -> Result<f64> {
1233        // Hopkins statistic for clustering tendency
1234        let (n_samples, n_features) = x.dim();
1235        if n_samples < 10 {
1236            return Ok(0.5); // Neutral value
1237        }
1238
1239        use scirs2_core::random::Rng;
1240        let mut rng = scirs2_core::random::rng();
1241        let sample_size = 10.min(n_samples / 2);
1242
1243        // Generate random points in the data space
1244        let mut min_vals = vec![f64::INFINITY; n_features];
1245        let mut max_vals = vec![f64::NEG_INFINITY; n_features];
1246
1247        for row in x.rows() {
1248            for (j, &val) in row.iter().enumerate() {
1249                min_vals[j] = min_vals[j].min(val);
1250                max_vals[j] = max_vals[j].max(val);
1251            }
1252        }
1253
1254        let mut u_distances = Vec::new();
1255        let mut w_distances = Vec::new();
1256
1257        // Sample random points and compute distances
1258        for _ in 0..sample_size {
1259            // Random point in data space
1260            let mut random_point = vec![0.0; n_features];
1261            for j in 0..n_features {
1262                random_point[j] = rng.gen_range(min_vals[j]..=max_vals[j]);
1263            }
1264
1265            // Find nearest neighbor distance for random point
1266            let mut min_dist_u = f64::INFINITY;
1267            for row in x.rows() {
1268                let dist = self.euclidean_distance_vec(&random_point, &row.to_vec());
1269                min_dist_u = min_dist_u.min(dist);
1270            }
1271            u_distances.push(min_dist_u);
1272
1273            // Random data point
1274            let random_idx = rng.gen_range(0..n_samples);
1275            let data_point = x.row(random_idx).to_vec();
1276
1277            // Find nearest neighbor distance for data point
1278            let mut min_dist_w = f64::INFINITY;
1279            for (i, row) in x.rows().into_iter().enumerate() {
1280                if i != random_idx {
1281                    let dist = self.euclidean_distance_vec(&data_point, &row.to_vec());
1282                    min_dist_w = min_dist_w.min(dist);
1283                }
1284            }
1285            w_distances.push(min_dist_w);
1286        }
1287
1288        let sum_u: f64 = u_distances.iter().sum();
1289        let sum_w: f64 = w_distances.iter().sum();
1290
1291        if sum_u + sum_w == 0.0 {
1292            Ok(0.5)
1293        } else {
1294            Ok(sum_u / (sum_u + sum_w))
1295        }
1296    }
1297
1298    fn average_mutual_information(&self, x: &ArrayView2<f64>) -> Result<f64> {
1299        let (_, n_features) = x.dim();
1300        if n_features < 2 {
1301            return Ok(0.0);
1302        }
1303
1304        let mut mi_sum = 0.0;
1305        let mut pair_count = 0;
1306
1307        // Sample pairs of features to avoid O(n²) complexity
1308        use scirs2_core::random::Rng;
1309        let mut rng = scirs2_core::random::rng();
1310        let max_pairs = 50.min((n_features * (n_features - 1)) / 2);
1311
1312        for _ in 0..max_pairs {
1313            let i = rng.gen_range(0..n_features);
1314            let j = rng.gen_range(0..n_features);
1315            if i != j {
1316                let mi = self.estimate_mutual_information(&x.column(i), &x.column(j))?;
1317                mi_sum += mi;
1318                pair_count += 1;
1319            }
1320        }
1321
1322        Ok(if pair_count > 0 {
1323            mi_sum / pair_count as f64
1324        } else {
1325            0.0
1326        })
1327    }
1328
1329    fn estimate_mutual_information(
1330        &self,
1331        x: &scirs2_core::ndarray::ArrayView1<f64>,
1332        y: &scirs2_core::ndarray::ArrayView1<f64>,
1333    ) -> Result<f64> {
1334        // Simplified MI estimation using binning
1335        let n_bins = 10;
1336        let x_bins = self.create_bins(x, n_bins);
1337        let y_bins = self.create_bins(y, n_bins);
1338
1339        // Create joint histogram
1340        let mut joint_hist = vec![vec![0; n_bins]; n_bins];
1341        let mut x_hist = vec![0; n_bins];
1342        let mut y_hist = vec![0; n_bins];
1343
1344        for (&xi, &yi) in x.iter().zip(y.iter()) {
1345            if xi.is_finite() && yi.is_finite() {
1346                let x_bin = self.find_bin(xi, &x_bins).min(n_bins - 1);
1347                let y_bin = self.find_bin(yi, &y_bins).min(n_bins - 1);
1348                joint_hist[x_bin][y_bin] += 1;
1349                x_hist[x_bin] += 1;
1350                y_hist[y_bin] += 1;
1351            }
1352        }
1353
1354        let total = x.len() as f64;
1355        let mut mi = 0.0;
1356
1357        for i in 0..n_bins {
1358            for j in 0..n_bins {
1359                let p_xy = joint_hist[i][j] as f64 / total;
1360                let p_x = x_hist[i] as f64 / total;
1361                let p_y = y_hist[j] as f64 / total;
1362
1363                if p_xy > 0.0 && p_x > 0.0 && p_y > 0.0 {
1364                    mi += p_xy * (p_xy / (p_x * p_y)).ln();
1365                }
1366            }
1367        }
1368
1369        Ok(mi.max(0.0))
1370    }
1371
1372    fn differential_entropy_estimate(&self, x: &ArrayView2<f64>) -> Result<f64> {
1373        // Simplified differential entropy estimate
1374        let (n_samples, n_features) = x.dim();
1375        if n_samples < 2 {
1376            return Ok(0.0);
1377        }
1378
1379        let mut entropy_sum = 0.0;
1380        for col in x.columns() {
1381            let variance = col.variance();
1382            if variance > 0.0 {
1383                // Gaussian entropy: 0.5 * log(2πeσ²)
1384                entropy_sum +=
1385                    0.5 * (2.0 * std::f64::consts::PI * std::f64::consts::E * variance).ln();
1386            }
1387        }
1388
1389        Ok(entropy_sum / n_features as f64)
1390    }
1391
1392    fn estimate_condition_number(&self, x: &ArrayView2<f64>) -> Result<f64> {
1393        // Simplified condition number estimation
1394        let (n_samples, n_features) = x.dim();
1395        if n_samples < n_features || n_features < 2 {
1396            return Ok(1.0);
1397        }
1398
1399        // Compute correlation matrix
1400        let mut corr_sum = 0.0;
1401        let mut corr_count = 0;
1402
1403        for i in 0..n_features {
1404            for j in (i + 1)..n_features {
1405                let col_i = x.column(i);
1406                let col_j = x.column(j);
1407                if let Ok(corr) = self.quick_correlation(&col_i, &col_j) {
1408                    corr_sum += corr.abs();
1409                    corr_count += 1;
1410                }
1411            }
1412        }
1413
1414        let avg_correlation = if corr_count > 0 {
1415            corr_sum / corr_count as f64
1416        } else {
1417            0.0
1418        };
1419
1420        // Approximate condition number based on correlation
1421        Ok(if avg_correlation > 0.9 {
1422            100.0 // High condition number
1423        } else if avg_correlation > 0.7 {
1424            10.0 // Medium condition number
1425        } else {
1426            1.0 // Low condition number
1427        })
1428    }
1429
1430    // Additional helper methods
1431    fn euclidean_distance(
1432        &self,
1433        a: &scirs2_core::ndarray::ArrayView1<f64>,
1434        b: &scirs2_core::ndarray::ArrayView1<f64>,
1435    ) -> f64 {
1436        a.iter()
1437            .zip(b.iter())
1438            .map(|(&ai, &bi)| (ai - bi).powi(2))
1439            .sum::<f64>()
1440            .sqrt()
1441    }
1442
1443    fn euclidean_distance_vec(&self, a: &[f64], b: &[f64]) -> f64 {
1444        a.iter()
1445            .zip(b.iter())
1446            .map(|(&ai, &bi)| (ai - bi).powi(2))
1447            .sum::<f64>()
1448            .sqrt()
1449    }
1450
1451    fn create_bins(&self, data: &scirs2_core::ndarray::ArrayView1<f64>, n_bins: usize) -> Vec<f64> {
1452        let mut sorted: Vec<f64> = data.iter().filter(|&&x| x.is_finite()).copied().collect();
1453        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1454
1455        if sorted.is_empty() {
1456            return vec![0.0; n_bins + 1];
1457        }
1458
1459        let mut bins = Vec::new();
1460        for i in 0..=n_bins {
1461            let idx = (i * (sorted.len() - 1)) / n_bins;
1462            bins.push(sorted[idx]);
1463        }
1464        bins
1465    }
1466
1467    fn find_bin(&self, value: f64, bins: &[f64]) -> usize {
1468        for (i, &bin_edge) in bins.iter().enumerate().take(bins.len() - 1) {
1469            if value <= bin_edge {
1470                return i;
1471            }
1472        }
1473        bins.len() - 2
1474    }
1475
1476    /// Estimate volume ratio (convex hull to bounding box)
1477    fn estimate_volume_ratio(&self, x: &ArrayView2<f64>) -> Result<f64> {
1478        let (n_samples, n_features) = x.dim();
1479        if n_samples < 4 || n_features < 2 {
1480            return Ok(1.0); // Default for insufficient data
1481        }
1482
1483        // For high-dimensional data, use sampling approach
1484        let sample_size = 1000.min(n_samples);
1485
1486        let mut rng = scirs2_core::random::rng();
1487        let mut all_indices: Vec<usize> = (0..n_samples).collect();
1488        all_indices.shuffle(&mut rng);
1489        let indices: Vec<usize> = all_indices.into_iter().take(sample_size).collect();
1490
1491        // Calculate bounding box volume
1492        let mut min_vals = vec![f64::INFINITY; n_features];
1493        let mut max_vals = vec![f64::NEG_INFINITY; n_features];
1494
1495        for &idx in &indices {
1496            let row = x.row(idx);
1497            for (j, &val) in row.iter().enumerate() {
1498                if val.is_finite() {
1499                    min_vals[j] = min_vals[j].min(val);
1500                    max_vals[j] = max_vals[j].max(val);
1501                }
1502            }
1503        }
1504
1505        // Calculate bounding box volume
1506        let mut box_volume = 1.0;
1507        for j in 0..n_features {
1508            let range = max_vals[j] - min_vals[j];
1509            if range > f64::EPSILON {
1510                box_volume *= range;
1511            } else {
1512                return Ok(0.0); // Degenerate case
1513            }
1514        }
1515
1516        // Estimate convex hull volume using sampling (simplified approach)
1517        // For a proper implementation, you'd use a convex hull algorithm
1518        // Here we estimate using variance-based approximation
1519        let mut variance_product = 1.0;
1520        for j in 0..n_features {
1521            let col_values: Vec<f64> = indices
1522                .iter()
1523                .map(|&idx| x[[idx, j]])
1524                .filter(|&val| val.is_finite())
1525                .collect();
1526
1527            if col_values.len() > 1 {
1528                let mean = col_values.iter().sum::<f64>() / col_values.len() as f64;
1529                let variance = col_values
1530                    .iter()
1531                    .map(|&val| (val - mean).powi(2))
1532                    .sum::<f64>()
1533                    / (col_values.len() - 1) as f64;
1534                variance_product *= variance.sqrt();
1535            }
1536        }
1537
1538        // Approximate volume ratio
1539        if box_volume > f64::EPSILON {
1540            let ratio = (variance_product / box_volume).min(1.0).max(0.0);
1541            Ok(ratio)
1542        } else {
1543            Ok(0.0)
1544        }
1545    }
1546
1547    /// Estimate autocorrelation for time-like patterns
1548    fn estimate_autocorrelation(&self, x: &ArrayView2<f64>) -> Result<f64> {
1549        let (n_samples, n_features) = x.dim();
1550        if n_samples < 3 {
1551            return Ok(0.0);
1552        }
1553
1554        let mut autocorr_sum = 0.0;
1555        let mut feature_count = 0;
1556
1557        // Calculate autocorrelation for each feature
1558        for j in 0..n_features {
1559            let col = x.column(j);
1560            let values: Vec<f64> = col
1561                .iter()
1562                .filter(|&&val| val.is_finite())
1563                .copied()
1564                .collect();
1565
1566            if values.len() < 3 {
1567                continue;
1568            }
1569
1570            // Calculate lag-1 autocorrelation
1571            let mean = values.iter().sum::<f64>() / values.len() as f64;
1572            let mut numerator = 0.0;
1573            let mut denominator = 0.0;
1574
1575            for i in 0..values.len() - 1 {
1576                numerator += (values[i] - mean) * (values[i + 1] - mean);
1577            }
1578
1579            for &val in &values {
1580                denominator += (val - mean).powi(2);
1581            }
1582
1583            if denominator > f64::EPSILON {
1584                autocorr_sum += numerator / denominator;
1585                feature_count += 1;
1586            }
1587        }
1588
1589        if feature_count > 0 {
1590            Ok((autocorr_sum / feature_count as f64).abs())
1591        } else {
1592            Ok(0.0)
1593        }
1594    }
1595
1596    /// Estimate trend strength in the data
1597    fn estimate_trend_strength(&self, x: &ArrayView2<f64>) -> Result<f64> {
1598        let (n_samples, n_features) = x.dim();
1599        if n_samples < 5 {
1600            return Ok(0.0);
1601        }
1602
1603        let mut trend_sum = 0.0;
1604        let mut feature_count = 0;
1605
1606        // Calculate trend strength for each feature
1607        for j in 0..n_features {
1608            let col = x.column(j);
1609            let values: Vec<(f64, f64)> = col
1610                .iter()
1611                .enumerate()
1612                .filter(|(_, val)| val.is_finite())
1613                .map(|(i, val)| (i as f64, *val))
1614                .collect();
1615
1616            if values.len() < 5 {
1617                continue;
1618            }
1619
1620            // Calculate linear trend using least squares
1621            let n = values.len() as f64;
1622            let sum_x: f64 = values.iter().map(|(x, _)| x).sum();
1623            let sum_y: f64 = values.iter().map(|(_, y)| y).sum();
1624            let sum_xy: f64 = values.iter().map(|(x, y)| x * y).sum();
1625            let sum_x2: f64 = values.iter().map(|(x, _)| x * x).sum();
1626
1627            let denominator = n * sum_x2 - sum_x * sum_x;
1628            if denominator.abs() > f64::EPSILON {
1629                let slope = (n * sum_xy - sum_x * sum_y) / denominator;
1630                let intercept = (sum_y - slope * sum_x) / n;
1631
1632                // Calculate R-squared to measure trend strength
1633                let y_mean = sum_y / n;
1634                let mut ss_tot = 0.0;
1635                let mut ss_res = 0.0;
1636
1637                for (x_val, y_val) in &values {
1638                    let y_pred = slope * x_val + intercept;
1639                    ss_tot += (y_val - y_mean).powi(2);
1640                    ss_res += (y_val - y_pred).powi(2);
1641                }
1642
1643                if ss_tot > f64::EPSILON {
1644                    let r_squared = 1.0 - (ss_res / ss_tot);
1645                    trend_sum += r_squared.max(0.0);
1646                    feature_count += 1;
1647                }
1648            }
1649        }
1650
1651        if feature_count > 0 {
1652            Ok(trend_sum / feature_count as f64)
1653        } else {
1654            Ok(0.0)
1655        }
1656    }
1657
1658    /// Estimate feature connectivity (correlation-based)
1659    fn estimate_feature_connectivity(&self, x: &ArrayView2<f64>) -> Result<f64> {
1660        let (_, n_features) = x.dim();
1661        if n_features < 2 {
1662            return Ok(0.0);
1663        }
1664
1665        let mut strong_connections = 0;
1666        let mut total_connections = 0;
1667        let threshold = 0.5; // Threshold for "strong" connection
1668
1669        // Sample pairs to avoid O(n²) complexity for large feature sets
1670        let max_pairs = 100.min((n_features * (n_features - 1)) / 2);
1671        use scirs2_core::random::Rng;
1672        let mut rng = scirs2_core::random::rng();
1673
1674        for _ in 0..max_pairs {
1675            let i = rng.gen_range(0..n_features);
1676            let j = rng.gen_range(0..n_features);
1677
1678            if i != j {
1679                let col_i = x.column(i);
1680                let col_j = x.column(j);
1681
1682                if let Ok(corr) = self.quick_correlation(&col_i, &col_j) {
1683                    if corr.abs() > threshold {
1684                        strong_connections += 1;
1685                    }
1686                    total_connections += 1;
1687                }
1688            }
1689        }
1690
1691        if total_connections > 0 {
1692            Ok(strong_connections as f64 / total_connections as f64)
1693        } else {
1694            Ok(0.0)
1695        }
1696    }
1697
1698    /// Quick correlation calculation without full validation
1699    fn quick_correlation(
1700        &self,
1701        x: &scirs2_core::ndarray::ArrayView1<f64>,
1702        y: &scirs2_core::ndarray::ArrayView1<f64>,
1703    ) -> Result<f64> {
1704        if x.len() != y.len() || x.len() < 2 {
1705            return Ok(0.0);
1706        }
1707
1708        let n = x.len() as f64;
1709        let mean_x = x.iter().sum::<f64>() / n;
1710        let mean_y = y.iter().sum::<f64>() / n;
1711
1712        let mut numerator = 0.0;
1713        let mut sum_sq_x = 0.0;
1714        let mut sum_sq_y = 0.0;
1715
1716        for (&xi, &yi) in x.iter().zip(y.iter()) {
1717            if xi.is_finite() && yi.is_finite() {
1718                let diff_x = xi - mean_x;
1719                let diff_y = yi - mean_y;
1720                numerator += diff_x * diff_y;
1721                sum_sq_x += diff_x * diff_x;
1722                sum_sq_y += diff_y * diff_y;
1723            }
1724        }
1725
1726        let denominator = (sum_sq_x * sum_sq_y).sqrt();
1727
1728        if denominator < f64::EPSILON {
1729            Ok(0.0)
1730        } else {
1731            let correlation = numerator / denominator;
1732            Ok(correlation.max(-1.0).min(1.0))
1733        }
1734    }
1735
1736    /// Calculate feature clustering coefficient
1737    fn feature_clustering_coefficient(&self, x: &ArrayView2<f64>) -> Result<f64> {
1738        let (_, n_features) = x.dim();
1739        if n_features < 3 {
1740            return Ok(0.0);
1741        }
1742
1743        // Build correlation adjacency matrix (sampled)
1744        let sample_size = 20.min(n_features);
1745
1746        let mut rng = scirs2_core::random::rng();
1747        let mut all_features: Vec<usize> = (0..n_features).collect();
1748        all_features.shuffle(&mut rng);
1749        let sampled_features: Vec<usize> = all_features.into_iter().take(sample_size).collect();
1750
1751        let threshold = 0.5;
1752        let mut adjacency = vec![vec![false; sample_size]; sample_size];
1753
1754        // Build adjacency matrix
1755        for (i, &feat_i) in sampled_features.iter().enumerate() {
1756            for (j, &feat_j) in sampled_features.iter().enumerate() {
1757                if i != j {
1758                    let col_i = x.column(feat_i);
1759                    let col_j = x.column(feat_j);
1760
1761                    if let Ok(corr) = self.quick_correlation(&col_i, &col_j) {
1762                        adjacency[i][j] = corr.abs() > threshold;
1763                    }
1764                }
1765            }
1766        }
1767
1768        // Calculate clustering coefficient
1769        let mut total_coefficient = 0.0;
1770        let mut node_count = 0;
1771
1772        for i in 0..sample_size {
1773            // Find neighbors of node i
1774            let neighbors: Vec<usize> = (0..sample_size).filter(|&j| adjacency[i][j]).collect();
1775
1776            if neighbors.len() >= 2 {
1777                // Count edges between neighbors
1778                let mut edges_between_neighbors = 0;
1779                let mut possible_edges = 0;
1780
1781                for (ni, &neighbor_i) in neighbors.iter().enumerate() {
1782                    for &neighbor_j in neighbors.iter().skip(ni + 1) {
1783                        possible_edges += 1;
1784                        if adjacency[neighbor_i][neighbor_j] {
1785                            edges_between_neighbors += 1;
1786                        }
1787                    }
1788                }
1789
1790                if possible_edges > 0 {
1791                    total_coefficient += edges_between_neighbors as f64 / possible_edges as f64;
1792                    node_count += 1;
1793                }
1794            }
1795        }
1796
1797        if node_count > 0 {
1798            Ok(total_coefficient / node_count as f64)
1799        } else {
1800            Ok(0.0)
1801        }
1802    }
1803
1804    /// Convert enhanced meta-features to tensor for neural network input
1805    fn enhanced_meta_features_to_tensor(&self, features: &EnhancedMetaFeatures) -> Result<Tensor> {
1806        // Create feature vector with proper normalization
1807        let feature_vec = vec![
1808            // Base features (normalized)
1809            (features.base_features.n_samples as f64).ln().max(0.0),
1810            (features.base_features.n_features as f64).ln().max(0.0),
1811            features.base_features.sparsity.max(0.0).min(1.0),
1812            features.base_features.mean_correlation.max(-1.0).min(1.0),
1813            features.base_features.std_correlation.max(0.0),
1814            features.base_features.mean_skewness.max(-10.0).min(10.0),
1815            features.base_features.mean_kurtosis.max(-10.0).min(10.0),
1816            features.base_features.missing_ratio.max(0.0).min(1.0),
1817            features.base_features.variance_ratio.max(0.0),
1818            features.base_features.outlier_ratio.max(0.0).min(1.0),
1819            // Enhanced features (normalized)
1820            features
1821                .manifold_dimension
1822                .max(1.0)
1823                .min(features.base_features.n_features as f64)
1824                .ln(),
1825            features.clustering_tendency.max(0.0).min(1.0),
1826            features.mutual_information_mean.max(0.0),
1827            features.entropy_estimate.max(0.0),
1828            (features.condition_number.max(1.0)).ln(),
1829            features.volume_ratio.max(0.0).min(1.0),
1830            features.autocorrelation.max(-1.0).min(1.0),
1831            features.trend_strength.max(0.0).min(1.0),
1832            features.connectivity.max(0.0).min(1.0),
1833            features.clustering_coefficient.max(0.0).min(1.0),
1834        ];
1835
1836        // Validate all features are finite
1837        if feature_vec.iter().any(|&f| !f.is_finite()) {
1838            return Err(TransformError::ComputationError(
1839                "Non-finite values in enhanced meta-features".to_string(),
1840            ));
1841        }
1842
1843        Ok(Tensor::f_from_slice(&feature_vec)?
1844            .reshape(&[1, 20])
1845            .to_device(self.device))
1846    }
1847
1848    /// Convert tensor predictions to multi-objective recommendations
1849    fn tensor_to_multi_objective_recommendations(
1850        &self,
1851        tensor: &Tensor,
1852        features: &EnhancedMetaFeatures,
1853    ) -> Result<Vec<MultiObjectiveRecommendation>> {
1854        let scores: Vec<f64> = tensor.try_into().map_err(|e| {
1855            TransformError::ComputationError(format!("Failed to extract tensor data: {:?}", e))
1856        })?;
1857
1858        if scores.len() != 20 {
1859            return Err(TransformError::ComputationError(format!(
1860                "Expected 20 prediction scores, got {}",
1861                scores.len()
1862            )));
1863        }
1864
1865        let mut recommendations = Vec::new();
1866
1867        // Map scores to transformations (first 10 are transformation types)
1868        let transformation_types = [
1869            TransformationType::StandardScaler,
1870            TransformationType::MinMaxScaler,
1871            TransformationType::RobustScaler,
1872            TransformationType::PowerTransformer,
1873            TransformationType::PolynomialFeatures,
1874            TransformationType::PCA,
1875            TransformationType::VarianceThreshold,
1876            TransformationType::QuantileTransformer,
1877            TransformationType::BinaryEncoder,
1878            TransformationType::TargetEncoder,
1879        ];
1880
1881        for (i, t_type) in transformation_types.iter().enumerate() {
1882            if i < scores.len() && scores[i].is_finite() && scores[i] > 0.3 {
1883                // Calculate multi-objective scores
1884                let performance_score = scores[i].max(0.0).min(1.0);
1885
1886                // Estimate efficiency based on data characteristics
1887                let efficiency_score = self.estimate_efficiency_score(t_type, features)?;
1888
1889                // Estimate interpretability
1890                let interpretability_score = self.estimate_interpretability_score(t_type);
1891
1892                // Estimate robustness
1893                let robustness_score = self.estimate_robustness_score(t_type, features);
1894
1895                // Calculate overall score using default weights
1896                let weights = FeatureOptimizationWeights::default();
1897                let overall_score = performance_score * weights.performance_weight
1898                    + efficiency_score * weights.efficiency_weight
1899                    + interpretability_score * weights.interpretability_weight
1900                    + robustness_score * weights.robustness_weight;
1901
1902                recommendations.push(MultiObjectiveRecommendation {
1903                    transformation: TransformationConfig {
1904                        transformation_type: t_type.clone(),
1905                        parameters: self.get_optimized_parameters_for_type(t_type, features)?,
1906                        expected_performance: performance_score,
1907                    },
1908                    performance_score,
1909                    efficiency_score,
1910                    interpretability_score,
1911                    robustness_score,
1912                    overall_score,
1913                });
1914            }
1915        }
1916
1917        // Sort by overall score
1918        recommendations.sort_by(|a, b| {
1919            b.overall_score
1920                .partial_cmp(&a.overall_score)
1921                .unwrap_or(std::cmp::Ordering::Equal)
1922        });
1923
1924        Ok(recommendations)
1925    }
1926
1927    /// Find similar datasets from performance database
1928    fn find_similar_datasets(
1929        &self,
1930        target: &EnhancedMetaFeatures,
1931        k: usize,
1932    ) -> Result<Vec<PerformanceRecord>> {
1933        if self.performance_db.is_empty() {
1934            return Ok(vec![]);
1935        }
1936
1937        let mut similarities: Vec<(usize, f64)> = Vec::new();
1938
1939        for (i, record) in self.performance_db.iter().enumerate() {
1940            let similarity = self.compute_dataset_similarity(target, &record.meta_features)?;
1941            similarities.push((i, similarity));
1942        }
1943
1944        // Sort by similarity and take top k
1945        similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1946
1947        let mut similar_records = Vec::new();
1948        for (idx, _similarity) in similarities.iter().take(k) {
1949            similar_records.push(self.performance_db[*idx].clone());
1950        }
1951
1952        Ok(similar_records)
1953    }
1954
1955    /// Compute similarity between two datasets using enhanced meta-features
1956    fn compute_dataset_similarity(
1957        &self,
1958        a: &EnhancedMetaFeatures,
1959        b: &DatasetMetaFeatures,
1960    ) -> Result<f64> {
1961        // Compare base features only (since b doesn't have enhanced features)
1962        let features_a = &a.base_features;
1963
1964        // Normalize features for comparison
1965        let scale_similarity = |val_a: f64, val_b: f64, max_val: f64| -> f64 {
1966            if max_val > 0.0 {
1967                1.0 - (val_a - val_b).abs() / max_val
1968            } else {
1969                if (val_a - val_b).abs() < f64::EPSILON {
1970                    1.0
1971                } else {
1972                    0.0
1973                }
1974            }
1975        };
1976
1977        // Calculate similarity for each dimension
1978        let similarities = vec![
1979            scale_similarity(
1980                (features_a.n_samples as f64).ln(),
1981                (b.n_samples as f64).ln(),
1982                20.0, // Reasonable scale for log(samples)
1983            ),
1984            scale_similarity(
1985                (features_a.n_features as f64).ln(),
1986                (b.n_features as f64).ln(),
1987                15.0, // Reasonable scale for log(features)
1988            ),
1989            scale_similarity(features_a.sparsity, b.sparsity, 1.0),
1990            scale_similarity(features_a.mean_correlation, b.mean_correlation, 2.0),
1991            scale_similarity(features_a.std_correlation, b.std_correlation, 1.0),
1992            scale_similarity(features_a.mean_skewness, b.mean_skewness, 20.0),
1993            scale_similarity(features_a.mean_kurtosis, b.mean_kurtosis, 20.0),
1994            scale_similarity(features_a.missing_ratio, b.missing_ratio, 1.0),
1995            scale_similarity(features_a.variance_ratio, b.variance_ratio, 10.0),
1996            scale_similarity(features_a.outlier_ratio, b.outlier_ratio, 1.0),
1997        ];
1998
1999        // Weighted average (give more weight to important characteristics)
2000        let weights = vec![0.15, 0.15, 0.1, 0.15, 0.05, 0.1, 0.1, 0.05, 0.1, 0.05];
2001        let weighted_similarity = similarities
2002            .iter()
2003            .zip(weights.iter())
2004            .map(|(sim, weight)| sim * weight)
2005            .sum::<f64>();
2006
2007        Ok(weighted_similarity.max(0.0).min(1.0))
2008    }
2009
2010    /// Fallback recommendations when meta-learning fails
2011    fn fallback_recommendations(
2012        &self,
2013        features: &EnhancedMetaFeatures,
2014    ) -> Result<Vec<TransformationConfig>> {
2015        let mut recommendations = Vec::new();
2016        let base_features = &features.base_features;
2017
2018        // Rule-based recommendations
2019
2020        // 1. Always recommend StandardScaler for most datasets
2021        recommendations.push(TransformationConfig {
2022            transformation_type: TransformationType::StandardScaler,
2023            parameters: HashMap::new(),
2024            expected_performance: 0.8,
2025        });
2026
2027        // 2. High dimensionality -> PCA
2028        if base_features.n_features > 100 || base_features.n_features > base_features.n_samples {
2029            let mut params = HashMap::new();
2030            params.insert("n_components".to_string(), 0.95); // Keep 95% variance
2031            recommendations.push(TransformationConfig {
2032                transformation_type: TransformationType::PCA,
2033                parameters: params,
2034                expected_performance: 0.75,
2035            });
2036        }
2037
2038        // 3. High outlier ratio -> RobustScaler
2039        if base_features.outlier_ratio > 0.1 {
2040            recommendations.push(TransformationConfig {
2041                transformation_type: TransformationType::RobustScaler,
2042                parameters: HashMap::new(),
2043                expected_performance: 0.85,
2044            });
2045        }
2046
2047        // 4. High skewness -> PowerTransformer
2048        if base_features.mean_skewness.abs() > 1.5 {
2049            recommendations.push(TransformationConfig {
2050                transformation_type: TransformationType::PowerTransformer,
2051                parameters: HashMap::new(),
2052                expected_performance: 0.8,
2053            });
2054        }
2055
2056        // 5. Low variance features -> VarianceThreshold
2057        if base_features.variance_ratio < 0.1 {
2058            let mut params = HashMap::new();
2059            params.insert("threshold".to_string(), 0.01);
2060            recommendations.push(TransformationConfig {
2061                transformation_type: TransformationType::VarianceThreshold,
2062                parameters: params,
2063                expected_performance: 0.7,
2064            });
2065        }
2066
2067        // Sort by expected performance
2068        recommendations.sort_by(|a, b| {
2069            b.expected_performance
2070                .partial_cmp(&a.expected_performance)
2071                .unwrap_or(std::cmp::Ordering::Equal)
2072        });
2073
2074        Ok(recommendations.into_iter().take(3).collect()) // Return top 3
2075    }
2076
2077    /// Get optimized parameters for a transformation type
2078    fn get_optimized_parameters_for_type(
2079        &self,
2080        t_type: &TransformationType,
2081        features: &EnhancedMetaFeatures,
2082    ) -> Result<HashMap<String, f64>> {
2083        let mut params = HashMap::new();
2084        let base_features = &features.base_features;
2085
2086        match t_type {
2087            TransformationType::PCA => {
2088                // Adaptive n_components based on data characteristics
2089                let variance_threshold = if base_features.n_features > 1000 {
2090                    0.99
2091                } else {
2092                    0.95
2093                };
2094                params.insert("variance_threshold".to_string(), variance_threshold);
2095
2096                // Estimate reasonable number of components
2097                let max_components = base_features.n_features.min(base_features.n_samples);
2098                let estimated_components = if base_features.n_features > base_features.n_samples {
2099                    (base_features.n_samples as f64 * 0.8) as usize
2100                } else {
2101                    (max_components as f64 * variance_threshold) as usize
2102                };
2103                params.insert(
2104                    "n_components".to_string(),
2105                    estimated_components.max(1) as f64,
2106                );
2107            }
2108
2109            TransformationType::PolynomialFeatures => {
2110                // Adaptive degree based on dataset size
2111                let degree = if base_features.n_features > 50 { 2 } else { 3 };
2112                params.insert("degree".to_string(), degree as f64);
2113                params.insert("include_bias".to_string(), 1.0);
2114                params.insert(
2115                    "interaction_only".to_string(),
2116                    if base_features.n_features > 20 {
2117                        1.0
2118                    } else {
2119                        0.0
2120                    },
2121                );
2122            }
2123
2124            TransformationType::VarianceThreshold => {
2125                // Adaptive threshold based on data characteristics
2126                let threshold = if base_features.variance_ratio < 0.01 {
2127                    0.001
2128                } else {
2129                    0.01
2130                };
2131                params.insert("threshold".to_string(), threshold);
2132            }
2133
2134            TransformationType::PowerTransformer => {
2135                // Choose method based on data characteristics
2136                let method = if base_features.has_missing || base_features.outlier_ratio > 0.2 {
2137                    "yeo-johnson" // Can handle zeros and negative values
2138                } else {
2139                    "box-cox" // More powerful but requires positive values
2140                };
2141                params.insert(
2142                    "method".to_string(),
2143                    if method == "yeo-johnson" { 1.0 } else { 0.0 },
2144                );
2145                params.insert("standardize".to_string(), 1.0);
2146            }
2147
2148            TransformationType::QuantileTransformer => {
2149                // Adaptive number of quantiles
2150                let n_quantiles = (base_features.n_samples / 10).max(10).min(1000);
2151                params.insert("n_quantiles".to_string(), n_quantiles as f64);
2152                params.insert("output_distribution".to_string(), 0.0); // 0 = uniform, 1 = normal
2153            }
2154
2155            _ => {
2156                // Default parameters for other transformations
2157            }
2158        }
2159
2160        Ok(params)
2161    }
2162
2163    /// Estimate efficiency score for a transformation
2164    fn estimate_efficiency_score(
2165        &self,
2166        t_type: &TransformationType,
2167        features: &EnhancedMetaFeatures,
2168    ) -> Result<f64> {
2169        let base_features = &features.base_features;
2170        let data_size_factor = (base_features.n_samples * base_features.n_features) as f64;
2171        let log_size = data_size_factor.ln();
2172
2173        let score = match t_type {
2174            TransformationType::StandardScaler | TransformationType::MinMaxScaler => {
2175                1.0 - (log_size / 25.0).min(0.3) // Very efficient, slight penalty for large data
2176            }
2177            TransformationType::RobustScaler => {
2178                0.9 - (log_size / 20.0).min(0.3) // Slightly less efficient due to median computation
2179            }
2180            TransformationType::PCA => {
2181                let complexity_penalty = if base_features.n_features > base_features.n_samples {
2182                    0.5 // Expensive for wide datasets
2183                } else {
2184                    0.3
2185                };
2186                0.7 - complexity_penalty - (log_size / 30.0).min(0.2)
2187            }
2188            TransformationType::PolynomialFeatures => {
2189                let feature_penalty = (base_features.n_features as f64 / 100.0).min(0.5);
2190                0.5 - feature_penalty - (log_size / 15.0).min(0.3)
2191            }
2192            TransformationType::PowerTransformer => 0.8 - (log_size / 25.0).min(0.2),
2193            _ => 0.7, // Default efficiency
2194        };
2195
2196        Ok(score.max(0.1).min(1.0))
2197    }
2198
2199    /// Estimate interpretability score for a transformation
2200    fn estimate_interpretability_score(&self, t_type: &TransformationType) -> f64 {
2201        match t_type {
2202            TransformationType::StandardScaler | TransformationType::MinMaxScaler => 0.9,
2203            TransformationType::RobustScaler => 0.85,
2204            TransformationType::VarianceThreshold => 0.95,
2205            TransformationType::QuantileTransformer => 0.6,
2206            TransformationType::PowerTransformer => 0.7,
2207            TransformationType::PCA => 0.4, // Loses original feature meaning
2208            TransformationType::PolynomialFeatures => 0.3, // Creates many new features
2209            TransformationType::BinaryEncoder | TransformationType::TargetEncoder => 0.5,
2210        }
2211    }
2212
2213    /// Estimate robustness score for a transformation
2214    fn estimate_robustness_score(
2215        &self,
2216        t_type: &TransformationType,
2217        features: &EnhancedMetaFeatures,
2218    ) -> f64 {
2219        let base_features = &features.base_features;
2220
2221        let base_score = match t_type {
2222            TransformationType::RobustScaler => 0.95,
2223            TransformationType::QuantileTransformer => 0.9,
2224            TransformationType::StandardScaler => 0.7,
2225            TransformationType::MinMaxScaler => 0.6,
2226            TransformationType::PowerTransformer => 0.8,
2227            TransformationType::PCA => 0.7,
2228            TransformationType::PolynomialFeatures => 0.7,
2229            TransformationType::VarianceThreshold => 0.75,
2230            TransformationType::BinaryEncoder => 0.65,
2231            TransformationType::TargetEncoder => 0.6,
2232        };
2233
2234        // Adjust based on data characteristics
2235        let outlier_penalty = if base_features.outlier_ratio > 0.1 {
2236            match t_type {
2237                TransformationType::RobustScaler | TransformationType::QuantileTransformer => 0.0,
2238                _ => 0.2,
2239            }
2240        } else {
2241            0.0
2242        };
2243
2244        let missing_penalty = if base_features.has_missing { 0.1 } else { 0.0 };
2245
2246        let score: f64 = base_score - outlier_penalty - missing_penalty;
2247        score.max(0.1f64).min(1.0f64)
2248    }
2249}
2250
2251// Stub implementations when auto-feature-engineering is not enabled
2252/// Advanced meta-learning system for feature engineering (placeholder)
2253#[cfg(not(feature = "auto-feature-engineering"))]
2254pub struct AdvancedMetaLearningSystem;
2255
2256/// Enhanced meta-features for advanced analysis (placeholder)
2257#[cfg(not(feature = "auto-feature-engineering"))]
2258pub struct EnhancedMetaFeatures;
2259
2260/// Multi-objective recommendation system (placeholder)
2261#[cfg(not(feature = "auto-feature-engineering"))]
2262pub struct MultiObjectiveRecommendation;