sklears_feature_selection/
benchmark.rs

1//! Benchmarking framework for feature selection methods
2//!
3//! This module provides comprehensive benchmarking capabilities to evaluate and compare
4//! feature selection methods against reference implementations and standard datasets.
5//! All implementations follow the SciRS2 policy using scirs2-core for numerical computations.
6
7use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
8use scirs2_core::random::{thread_rng, Rng};
9use sklears_core::error::{Result as SklResult, SklearsError};
10type Result<T> = SklResult<T>;
11use std::collections::HashMap;
12use std::time::{Duration, Instant};
13use thiserror::Error;
14
15#[derive(Debug, Error)]
16pub enum BenchmarkError {
17    #[error("Invalid dataset configuration")]
18    InvalidDataset,
19    #[error("Method execution failed: {0}")]
20    MethodExecutionFailed(String),
21    #[error("Benchmark configuration error: {0}")]
22    ConfigurationError(String),
23    #[error("Statistical analysis failed")]
24    StatisticalAnalysisFailed,
25    #[error("Comparison baseline not found")]
26    BaselineNotFound,
27}
28
29impl From<BenchmarkError> for SklearsError {
30    fn from(err: BenchmarkError) -> Self {
31        SklearsError::FitError(format!("Benchmark error: {}", err))
32    }
33}
34
35/// Comprehensive benchmarking suite for feature selection methods
36#[derive(Debug)]
37pub struct FeatureSelectionBenchmark {
38    datasets: Vec<BenchmarkDataset>,
39    methods: HashMap<String, Box<dyn BenchmarkableMethod>>,
40    config: BenchmarkConfig,
41    results: Vec<BenchmarkResult>,
42}
43
44impl Default for FeatureSelectionBenchmark {
45    fn default() -> Self {
46        Self::new()
47    }
48}
49
50impl FeatureSelectionBenchmark {
51    /// Create a new benchmarking suite
52    pub fn new() -> Self {
53        Self {
54            datasets: Vec::new(),
55            methods: HashMap::new(),
56            config: BenchmarkConfig::default(),
57            results: Vec::new(),
58        }
59    }
60
61    /// Configure the benchmark suite
62    pub fn with_config(mut self, config: BenchmarkConfig) -> Self {
63        self.config = config;
64        self
65    }
66
67    /// Add a dataset to the benchmark suite
68    pub fn add_dataset(&mut self, dataset: BenchmarkDataset) -> Result<()> {
69        if dataset.X.is_empty() || dataset.y.is_empty() {
70            return Err(BenchmarkError::InvalidDataset.into());
71        }
72        self.datasets.push(dataset);
73        Ok(())
74    }
75
76    /// Add multiple standard datasets for benchmarking
77    pub fn add_standard_datasets(&mut self) -> Result<()> {
78        // Add synthetic datasets with known properties
79        self.add_dataset(self.create_synthetic_linear_dataset(500, 50, 10)?)?;
80        self.add_dataset(self.create_synthetic_nonlinear_dataset(300, 100, 15)?)?;
81        self.add_dataset(self.create_high_dimensional_dataset(200, 1000, 5)?)?;
82        self.add_dataset(self.create_correlated_features_dataset(400, 80, 20)?)?;
83        self.add_dataset(self.create_noisy_dataset(600, 60, 12)?)?;
84
85        Ok(())
86    }
87
88    /// Register a benchmarkable method
89    pub fn register_method(&mut self, name: String, method: Box<dyn BenchmarkableMethod>) {
90        self.methods.insert(name, method);
91    }
92
93    /// Run comprehensive benchmark suite
94    pub fn run_benchmark(&mut self) -> Result<BenchmarkSuiteResults> {
95        if self.datasets.is_empty() {
96            return Err(BenchmarkError::ConfigurationError("No datasets added".to_string()).into());
97        }
98
99        if self.methods.is_empty() {
100            return Err(
101                BenchmarkError::ConfigurationError("No methods registered".to_string()).into(),
102            );
103        }
104
105        let mut all_results = Vec::new();
106        let start_time = Instant::now();
107
108        println!(
109            "Starting benchmark suite with {} datasets and {} methods...",
110            self.datasets.len(),
111            self.methods.len()
112        );
113
114        // Run benchmarks for each dataset-method combination
115        for (dataset_idx, dataset) in self.datasets.iter().enumerate() {
116            println!(
117                "Benchmarking dataset {}/{}: {}",
118                dataset_idx + 1,
119                self.datasets.len(),
120                dataset.name
121            );
122
123            for (method_name, method) in self.methods.iter() {
124                println!("  Running method: {}", method_name);
125
126                // Run multiple iterations if configured
127                let mut method_results = Vec::new();
128
129                for iteration in 0..self.config.n_iterations {
130                    if self.config.verbose && iteration % 10 == 0 {
131                        println!(
132                            "    Iteration {}/{}",
133                            iteration + 1,
134                            self.config.n_iterations
135                        );
136                    }
137
138                    let result = self.run_single_benchmark(
139                        method.as_ref(),
140                        method_name,
141                        dataset,
142                        iteration,
143                    )?;
144
145                    method_results.push(result);
146                }
147
148                // Aggregate results for this method on this dataset
149                let aggregated_result = self.aggregate_method_results(&method_results)?;
150                all_results.push(aggregated_result);
151            }
152        }
153
154        let total_duration = start_time.elapsed();
155
156        // Perform statistical analysis
157        let statistical_analysis = self.perform_statistical_analysis(&all_results)?;
158
159        // Generate rankings
160        let rankings = self.generate_rankings(&all_results)?;
161
162        // Create comprehensive results
163        let suite_results = BenchmarkSuiteResults {
164            individual_results: all_results,
165            statistical_analysis,
166            rankings,
167            execution_summary: ExecutionSummary {
168                total_duration,
169                n_datasets: self.datasets.len(),
170                n_methods: self.methods.len(),
171                n_total_runs: self.datasets.len() * self.methods.len() * self.config.n_iterations,
172            },
173            configuration: self.config.clone(),
174        };
175
176        self.results
177            .extend(suite_results.individual_results.clone());
178
179        println!(
180            "Benchmark suite completed in {:.2}s",
181            total_duration.as_secs_f64()
182        );
183
184        Ok(suite_results)
185    }
186
187    /// Run a single benchmark iteration
188    #[allow(non_snake_case)]
189    fn run_single_benchmark(
190        &self,
191        method: &dyn BenchmarkableMethod,
192        method_name: &str,
193        dataset: &BenchmarkDataset,
194        iteration: usize,
195    ) -> Result<BenchmarkResult> {
196        let start_time = Instant::now();
197
198        // Split data if cross-validation is configured
199        let (train_indices, test_indices) = if self.config.use_cross_validation {
200            self.create_cv_split(dataset.X.nrows(), iteration % self.config.cv_folds)?
201        } else {
202            self.create_holdout_split(dataset.X.nrows())?
203        };
204
205        // Extract training and test data
206        let X_train = self.extract_samples(dataset.X.view(), &train_indices);
207        let y_train = self.extract_targets(dataset.y.view(), &train_indices);
208        let X_test = self.extract_samples(dataset.X.view(), &test_indices);
209        let y_test = self.extract_targets(dataset.y.view(), &test_indices);
210
211        // Measure memory usage before method execution
212        let memory_before = self.get_memory_usage();
213
214        // Execute feature selection method
215        let selected_features = match method.select_features(X_train.view(), y_train.view()) {
216            Ok(features) => features,
217            Err(e) => return Err(BenchmarkError::MethodExecutionFailed(format!("{:?}", e)).into()),
218        };
219
220        let selection_time = start_time.elapsed();
221
222        // Measure memory usage after method execution
223        let memory_after = self.get_memory_usage();
224        let memory_delta = memory_after.saturating_sub(memory_before);
225
226        // Evaluate selected features
227        let evaluation_start = Instant::now();
228        let evaluation_metrics = self.evaluate_feature_selection(
229            X_train.view(),
230            y_train.view(),
231            X_test.view(),
232            y_test.view(),
233            &selected_features,
234            &dataset.ground_truth,
235        )?;
236        let evaluation_time = evaluation_start.elapsed();
237
238        // Compute feature quality metrics
239        let feature_metrics =
240            self.compute_feature_metrics(dataset.X.view(), dataset.y.view(), &selected_features)?;
241
242        Ok(BenchmarkResult {
243            method_name: method_name.to_string(),
244            dataset_name: dataset.name.clone(),
245            iteration,
246            selected_features: selected_features.clone(),
247            n_features_selected: selected_features.len(),
248            n_features_total: dataset.X.ncols(),
249            selection_time,
250            evaluation_time,
251            memory_usage: memory_delta,
252            evaluation_metrics,
253            feature_metrics,
254            dataset_properties: dataset.properties.clone(),
255        })
256    }
257
258    /// Create synthetic linear dataset with known relevant features
259    fn create_synthetic_linear_dataset(
260        &self,
261        n_samples: usize,
262        n_features: usize,
263        n_relevant: usize,
264    ) -> Result<BenchmarkDataset> {
265        let mut X = Array2::zeros((n_samples, n_features));
266        let mut y = Array1::zeros(n_samples);
267
268        // Generate relevant features with linear relationship to target
269        for i in 0..n_samples {
270            let mut target = 0.0;
271
272            for j in 0..n_relevant {
273                let value = thread_rng().gen::<f64>() * 10.0 - 5.0; // Range [-5, 5]
274                X[[i, j]] = value;
275                target += value * (j + 1) as f64; // Different coefficients for each feature
276            }
277
278            // Add noise features
279            for j in n_relevant..n_features {
280                X[[i, j]] = thread_rng().gen::<f64>() * 10.0 - 5.0;
281            }
282
283            // Add noise to target
284            target += (thread_rng().gen::<f64>() - 0.5) * 2.0;
285            y[i] = target;
286        }
287
288        let ground_truth = Some((0..n_relevant).collect());
289        let properties = DatasetProperties {
290            task_type: TaskType::Regression,
291            n_informative_features: n_relevant,
292            noise_level: 0.1,
293            correlation_structure: CorrelationStructure::Linear,
294            feature_types: vec![FeatureType::Continuous; n_features],
295        };
296
297        Ok(BenchmarkDataset {
298            name: format!("SyntheticLinear_{}x{}", n_samples, n_features),
299            X,
300            y,
301            ground_truth,
302            properties,
303        })
304    }
305
306    /// Create synthetic nonlinear dataset
307    fn create_synthetic_nonlinear_dataset(
308        &self,
309        n_samples: usize,
310        n_features: usize,
311        n_relevant: usize,
312    ) -> Result<BenchmarkDataset> {
313        let mut X = Array2::zeros((n_samples, n_features));
314        let mut y = Array1::zeros(n_samples);
315
316        for i in 0..n_samples {
317            let mut target = 0.0;
318
319            // Generate relevant features with nonlinear relationship
320            for j in 0..n_relevant {
321                let value = thread_rng().gen::<f64>() * 6.0 - 3.0; // Range [-3, 3]
322                X[[i, j]] = value;
323
324                // Nonlinear relationships: quadratic, sine, exponential
325                match j % 3 {
326                    0 => target += value * value,
327                    1 => target += (value * std::f64::consts::PI / 2.0).sin() * 2.0,
328                    2 => target += (value.abs()).ln().max(-5.0),
329                    _ => unreachable!(),
330                }
331            }
332
333            // Noise features
334            for j in n_relevant..n_features {
335                X[[i, j]] = thread_rng().gen::<f64>() * 6.0 - 3.0;
336            }
337
338            y[i] = target + (thread_rng().gen::<f64>() - 0.5) * 0.5;
339        }
340
341        let ground_truth = Some((0..n_relevant).collect());
342        let properties = DatasetProperties {
343            task_type: TaskType::Regression,
344            n_informative_features: n_relevant,
345            noise_level: 0.15,
346            correlation_structure: CorrelationStructure::Nonlinear,
347            feature_types: vec![FeatureType::Continuous; n_features],
348        };
349
350        Ok(BenchmarkDataset {
351            name: format!("SyntheticNonlinear_{}x{}", n_samples, n_features),
352            X,
353            y,
354            ground_truth,
355            properties,
356        })
357    }
358
359    /// Create high-dimensional dataset (p >> n scenario)
360    fn create_high_dimensional_dataset(
361        &self,
362        n_samples: usize,
363        n_features: usize,
364        n_relevant: usize,
365    ) -> Result<BenchmarkDataset> {
366        let mut X = Array2::zeros((n_samples, n_features));
367        let mut y = Array1::zeros(n_samples);
368
369        for i in 0..n_samples {
370            let mut target = 0.0;
371
372            // Sparse relevant features
373            for j in 0..n_relevant {
374                let value = thread_rng().gen::<f64>() * 4.0 - 2.0;
375                X[[i, j]] = value;
376                target += value * (1.0 / (j + 1) as f64); // Decreasing importance
377            }
378
379            // Many noise features
380            for j in n_relevant..n_features {
381                X[[i, j]] = thread_rng().gen::<f64>() * 4.0 - 2.0;
382            }
383
384            y[i] = if target > 0.0 { 1.0 } else { 0.0 }; // Binary classification
385        }
386
387        let ground_truth = Some((0..n_relevant).collect());
388        let properties = DatasetProperties {
389            task_type: TaskType::BinaryClassification,
390            n_informative_features: n_relevant,
391            noise_level: 0.2,
392            correlation_structure: CorrelationStructure::Sparse,
393            feature_types: vec![FeatureType::Continuous; n_features],
394        };
395
396        Ok(BenchmarkDataset {
397            name: format!("HighDimensional_{}x{}", n_samples, n_features),
398            X,
399            y,
400            ground_truth,
401            properties,
402        })
403    }
404
405    /// Create dataset with correlated features
406    fn create_correlated_features_dataset(
407        &self,
408        n_samples: usize,
409        n_features: usize,
410        n_relevant: usize,
411    ) -> Result<BenchmarkDataset> {
412        let mut X = Array2::zeros((n_samples, n_features));
413        let mut y = Array1::zeros(n_samples);
414
415        for i in 0..n_samples {
416            let mut target = 0.0;
417
418            // Generate base relevant features
419            let base_features: Vec<f64> = (0..n_relevant)
420                .map(|_| thread_rng().gen::<f64>() * 4.0 - 2.0)
421                .collect();
422
423            for j in 0..n_relevant {
424                X[[i, j]] = base_features[j];
425                target += base_features[j] * (j + 1) as f64;
426            }
427
428            // Add correlated copies and linear combinations
429            let mut feature_idx = n_relevant;
430            for j in 0..n_relevant.min((n_features - n_relevant) / 3) {
431                if feature_idx < n_features {
432                    // Highly correlated copy with noise
433                    X[[i, feature_idx]] =
434                        base_features[j] + (thread_rng().gen::<f64>() - 0.5) * 0.2;
435                    feature_idx += 1;
436                }
437
438                if feature_idx < n_features && j + 1 < base_features.len() {
439                    // Linear combination of two features
440                    X[[i, feature_idx]] = 0.5 * base_features[j] + 0.3 * base_features[j + 1];
441                    feature_idx += 1;
442                }
443            }
444
445            // Fill remaining with noise
446            for j in feature_idx..n_features {
447                X[[i, j]] = thread_rng().gen::<f64>() * 4.0 - 2.0;
448            }
449
450            y[i] = target + (thread_rng().gen::<f64>() - 0.5) * 1.0;
451        }
452
453        let ground_truth = Some((0..n_relevant).collect());
454        let properties = DatasetProperties {
455            task_type: TaskType::Regression,
456            n_informative_features: n_relevant,
457            noise_level: 0.25,
458            correlation_structure: CorrelationStructure::HighlyCorrelated,
459            feature_types: vec![FeatureType::Continuous; n_features],
460        };
461
462        Ok(BenchmarkDataset {
463            name: format!("Correlated_{}x{}", n_samples, n_features),
464            X,
465            y,
466            ground_truth,
467            properties,
468        })
469    }
470
471    /// Create noisy dataset
472    fn create_noisy_dataset(
473        &self,
474        n_samples: usize,
475        n_features: usize,
476        n_relevant: usize,
477    ) -> Result<BenchmarkDataset> {
478        let mut X = Array2::zeros((n_samples, n_features));
479        let mut y = Array1::zeros(n_samples);
480
481        for i in 0..n_samples {
482            let mut target = 0.0;
483
484            for j in 0..n_relevant {
485                let value = thread_rng().gen::<f64>() * 8.0 - 4.0;
486                X[[i, j]] = value;
487                target += value * (j + 1) as f64;
488            }
489
490            // Noise features
491            for j in n_relevant..n_features {
492                X[[i, j]] = thread_rng().gen::<f64>() * 8.0 - 4.0;
493            }
494
495            // Heavy noise on target
496            let noise = (thread_rng().gen::<f64>() - 0.5) * target.abs() * 0.5;
497            y[i] = target + noise;
498        }
499
500        let ground_truth = Some((0..n_relevant).collect());
501        let properties = DatasetProperties {
502            task_type: TaskType::Regression,
503            n_informative_features: n_relevant,
504            noise_level: 0.5,
505            correlation_structure: CorrelationStructure::Noisy,
506            feature_types: vec![FeatureType::Continuous; n_features],
507        };
508
509        Ok(BenchmarkDataset {
510            name: format!("Noisy_{}x{}", n_samples, n_features),
511            X,
512            y,
513            ground_truth,
514            properties,
515        })
516    }
517
518    // Helper methods
519    fn create_cv_split(&self, n_samples: usize, fold: usize) -> Result<(Vec<usize>, Vec<usize>)> {
520        let fold_size = n_samples / self.config.cv_folds;
521        let start = (fold % self.config.cv_folds) * fold_size;
522        let end = if fold % self.config.cv_folds == self.config.cv_folds - 1 {
523            n_samples
524        } else {
525            start + fold_size
526        };
527
528        let test_indices: Vec<usize> = (start..end).collect();
529        let train_indices: Vec<usize> = (0..start).chain(end..n_samples).collect();
530
531        Ok((train_indices, test_indices))
532    }
533
534    fn create_holdout_split(&self, n_samples: usize) -> Result<(Vec<usize>, Vec<usize>)> {
535        let n_test = (n_samples as f64 * self.config.test_size) as usize;
536        let n_train = n_samples - n_test;
537
538        // Simple sequential split for deterministic behavior
539        let train_indices: Vec<usize> = (0..n_train).collect();
540        let test_indices: Vec<usize> = (n_train..n_samples).collect();
541
542        Ok((train_indices, test_indices))
543    }
544
545    fn extract_samples(&self, X: ArrayView2<f64>, indices: &[usize]) -> Array2<f64> {
546        let mut samples = Array2::zeros((indices.len(), X.ncols()));
547        for (i, &idx) in indices.iter().enumerate() {
548            samples.row_mut(i).assign(&X.row(idx));
549        }
550        samples
551    }
552
553    fn extract_targets(&self, y: ArrayView1<f64>, indices: &[usize]) -> Array1<f64> {
554        let mut targets = Array1::zeros(indices.len());
555        for (i, &idx) in indices.iter().enumerate() {
556            targets[i] = y[idx];
557        }
558        targets
559    }
560
561    fn get_memory_usage(&self) -> usize {
562        // Simplified memory tracking - in a real implementation, this would
563        // use system calls or memory profiling tools
564        0
565    }
566
567    fn evaluate_feature_selection(
568        &self,
569        X_train: ArrayView2<f64>,
570        y_train: ArrayView1<f64>,
571        _X_test: ArrayView2<f64>,
572        _y_test: ArrayView1<f64>,
573        selected_features: &[usize],
574        ground_truth: &Option<Vec<usize>>,
575    ) -> Result<EvaluationMetrics> {
576        let mut metrics = EvaluationMetrics::default();
577
578        // Compute relevance score (correlation with target)
579        if !selected_features.is_empty() {
580            let mut total_relevance = 0.0;
581            for &feature_idx in selected_features {
582                if feature_idx < X_train.ncols() {
583                    let correlation =
584                        self.compute_correlation(X_train.column(feature_idx), y_train);
585                    total_relevance += correlation.abs();
586                }
587            }
588            metrics.relevance_score = total_relevance / selected_features.len() as f64;
589        }
590
591        // Compute redundancy (average pairwise correlation among selected features)
592        if selected_features.len() > 1 {
593            let mut total_correlation = 0.0;
594            let mut pair_count = 0;
595
596            for i in 0..selected_features.len() {
597                for j in (i + 1)..selected_features.len() {
598                    if selected_features[i] < X_train.ncols()
599                        && selected_features[j] < X_train.ncols()
600                    {
601                        let correlation = self.compute_correlation(
602                            X_train.column(selected_features[i]),
603                            X_train.column(selected_features[j]),
604                        );
605                        total_correlation += correlation.abs();
606                        pair_count += 1;
607                    }
608                }
609            }
610
611            if pair_count > 0 {
612                metrics.redundancy_score = total_correlation / pair_count as f64;
613            }
614        }
615
616        // Compute ground truth metrics if available
617        if let Some(true_features) = ground_truth {
618            let selected_set: std::collections::HashSet<_> = selected_features.iter().collect();
619            let true_set: std::collections::HashSet<_> = true_features.iter().collect();
620
621            let intersection = selected_set.intersection(&true_set).count() as f64;
622            let union = selected_set.union(&true_set).count() as f64;
623
624            if union > 0.0 {
625                metrics.jaccard_score = Some(intersection / union);
626            }
627
628            if !true_features.is_empty() {
629                metrics.precision = Some(intersection / selected_features.len() as f64);
630                metrics.recall = Some(intersection / true_features.len() as f64);
631
632                if let (Some(p), Some(r)) = (metrics.precision, metrics.recall) {
633                    if p + r > 0.0 {
634                        metrics.f1_score = Some(2.0 * p * r / (p + r));
635                    }
636                }
637            }
638        }
639
640        // Simplified predictive performance (correlation-based)
641        metrics.predictive_score = metrics.relevance_score * (1.0 - metrics.redundancy_score * 0.5);
642
643        Ok(metrics)
644    }
645
646    fn compute_feature_metrics(
647        &self,
648        X: ArrayView2<f64>,
649        y: ArrayView1<f64>,
650        selected_features: &[usize],
651    ) -> Result<FeatureMetrics> {
652        let mut metrics = FeatureMetrics::default();
653
654        if selected_features.is_empty() {
655            return Ok(metrics);
656        }
657
658        // Selection ratio
659        metrics.selection_ratio = selected_features.len() as f64 / X.ncols() as f64;
660
661        // Feature importance distribution
662        let mut importances = Vec::new();
663        for &feature_idx in selected_features {
664            if feature_idx < X.ncols() {
665                let importance = self.compute_correlation(X.column(feature_idx), y).abs();
666                importances.push(importance);
667            }
668        }
669
670        if !importances.is_empty() {
671            importances.sort_by(|a, b| b.partial_cmp(a).unwrap());
672            metrics.importance_distribution = ImportanceDistribution {
673                mean: importances.iter().sum::<f64>() / importances.len() as f64,
674                std: {
675                    let mean = importances.iter().sum::<f64>() / importances.len() as f64;
676                    let variance = importances.iter().map(|x| (x - mean).powi(2)).sum::<f64>()
677                        / importances.len() as f64;
678                    variance.sqrt()
679                },
680                max: importances[0],
681                min: importances[importances.len() - 1],
682            };
683        }
684
685        Ok(metrics)
686    }
687
688    fn compute_correlation(&self, x: ArrayView1<f64>, y: ArrayView1<f64>) -> f64 {
689        let n = x.len() as f64;
690        if n < 2.0 {
691            return 0.0;
692        }
693
694        let mean_x = x.mean().unwrap_or(0.0);
695        let mean_y = y.mean().unwrap_or(0.0);
696
697        let mut sum_xy = 0.0;
698        let mut sum_x2 = 0.0;
699        let mut sum_y2 = 0.0;
700
701        for i in 0..x.len() {
702            let dx = x[i] - mean_x;
703            let dy = y[i] - mean_y;
704            sum_xy += dx * dy;
705            sum_x2 += dx * dx;
706            sum_y2 += dy * dy;
707        }
708
709        let denom = (sum_x2 * sum_y2).sqrt();
710        if denom < 1e-10 {
711            0.0
712        } else {
713            sum_xy / denom
714        }
715    }
716
717    fn aggregate_method_results(&self, results: &[BenchmarkResult]) -> Result<BenchmarkResult> {
718        if results.is_empty() {
719            return Err(BenchmarkError::StatisticalAnalysisFailed.into());
720        }
721
722        let first = &results[0];
723
724        // Aggregate numerical metrics
725        let aggregate_evaluation_metrics = EvaluationMetrics {
726            relevance_score: results
727                .iter()
728                .map(|r| r.evaluation_metrics.relevance_score)
729                .sum::<f64>()
730                / results.len() as f64,
731            redundancy_score: results
732                .iter()
733                .map(|r| r.evaluation_metrics.redundancy_score)
734                .sum::<f64>()
735                / results.len() as f64,
736            predictive_score: results
737                .iter()
738                .map(|r| r.evaluation_metrics.predictive_score)
739                .sum::<f64>()
740                / results.len() as f64,
741            jaccard_score: if results
742                .iter()
743                .all(|r| r.evaluation_metrics.jaccard_score.is_some())
744            {
745                Some(
746                    results
747                        .iter()
748                        .map(|r| r.evaluation_metrics.jaccard_score.unwrap())
749                        .sum::<f64>()
750                        / results.len() as f64,
751                )
752            } else {
753                None
754            },
755            precision: if results
756                .iter()
757                .all(|r| r.evaluation_metrics.precision.is_some())
758            {
759                Some(
760                    results
761                        .iter()
762                        .map(|r| r.evaluation_metrics.precision.unwrap())
763                        .sum::<f64>()
764                        / results.len() as f64,
765                )
766            } else {
767                None
768            },
769            recall: if results
770                .iter()
771                .all(|r| r.evaluation_metrics.recall.is_some())
772            {
773                Some(
774                    results
775                        .iter()
776                        .map(|r| r.evaluation_metrics.recall.unwrap())
777                        .sum::<f64>()
778                        / results.len() as f64,
779                )
780            } else {
781                None
782            },
783            f1_score: if results
784                .iter()
785                .all(|r| r.evaluation_metrics.f1_score.is_some())
786            {
787                Some(
788                    results
789                        .iter()
790                        .map(|r| r.evaluation_metrics.f1_score.unwrap())
791                        .sum::<f64>()
792                        / results.len() as f64,
793                )
794            } else {
795                None
796            },
797        };
798
799        // Take most frequent feature selection
800        let most_common_features = first.selected_features.clone(); // Simplified
801
802        Ok(BenchmarkResult {
803            method_name: first.method_name.clone(),
804            dataset_name: first.dataset_name.clone(),
805            iteration: 999, // Special marker for aggregated result
806            selected_features: most_common_features,
807            n_features_selected: results.iter().map(|r| r.n_features_selected).sum::<usize>()
808                / results.len(),
809            n_features_total: first.n_features_total,
810            selection_time: Duration::from_nanos(
811                (results
812                    .iter()
813                    .map(|r| r.selection_time.as_nanos())
814                    .sum::<u128>()
815                    / results.len() as u128) as u64,
816            ),
817            evaluation_time: Duration::from_nanos(
818                (results
819                    .iter()
820                    .map(|r| r.evaluation_time.as_nanos())
821                    .sum::<u128>()
822                    / results.len() as u128) as u64,
823            ),
824            memory_usage: results.iter().map(|r| r.memory_usage).sum::<usize>() / results.len(),
825            evaluation_metrics: aggregate_evaluation_metrics,
826            feature_metrics: first.feature_metrics.clone(), // Simplified
827            dataset_properties: first.dataset_properties.clone(),
828        })
829    }
830
831    fn perform_statistical_analysis(
832        &self,
833        results: &[BenchmarkResult],
834    ) -> Result<StatisticalAnalysis> {
835        // Group results by method and dataset
836        let mut method_performances: HashMap<String, Vec<f64>> = HashMap::new();
837        let mut dataset_difficulties: HashMap<String, Vec<f64>> = HashMap::new();
838
839        for result in results {
840            method_performances
841                .entry(result.method_name.clone())
842                .or_default()
843                .push(result.evaluation_metrics.predictive_score);
844
845            dataset_difficulties
846                .entry(result.dataset_name.clone())
847                .or_default()
848                .push(result.evaluation_metrics.predictive_score);
849        }
850
851        // Compute method rankings
852        let mut method_rankings = Vec::new();
853        for (method, scores) in method_performances {
854            let mean_score = scores.iter().sum::<f64>() / scores.len() as f64;
855            let std_score = {
856                let variance = scores.iter().map(|s| (s - mean_score).powi(2)).sum::<f64>()
857                    / scores.len() as f64;
858                variance.sqrt()
859            };
860
861            method_rankings.push(MethodRanking {
862                method_name: method,
863                mean_score,
864                std_score,
865                scores,
866            });
867        }
868
869        method_rankings.sort_by(|a, b| b.mean_score.partial_cmp(&a.mean_score).unwrap());
870
871        // Compute dataset difficulties
872        let mut dataset_rankings = Vec::new();
873        for (dataset, scores) in dataset_difficulties {
874            let mean_score = scores.iter().sum::<f64>() / scores.len() as f64;
875
876            dataset_rankings.push(DatasetRanking {
877                dataset_name: dataset,
878                difficulty_score: 1.0 - mean_score, // Higher score = more difficult
879                mean_performance: mean_score,
880            });
881        }
882
883        dataset_rankings
884            .sort_by(|a, b| b.difficulty_score.partial_cmp(&a.difficulty_score).unwrap());
885
886        let overall_best_method = method_rankings.first().map(|r| r.method_name.clone());
887
888        Ok(StatisticalAnalysis {
889            method_rankings,
890            dataset_rankings,
891            overall_best_method,
892            performance_summary: self.compute_performance_summary(results)?,
893        })
894    }
895
896    fn compute_performance_summary(
897        &self,
898        results: &[BenchmarkResult],
899    ) -> Result<PerformanceSummary> {
900        if results.is_empty() {
901            return Err(BenchmarkError::StatisticalAnalysisFailed.into());
902        }
903
904        let scores: Vec<f64> = results
905            .iter()
906            .map(|r| r.evaluation_metrics.predictive_score)
907            .collect();
908        let mean_score = scores.iter().sum::<f64>() / scores.len() as f64;
909
910        let std_score = {
911            let variance =
912                scores.iter().map(|s| (s - mean_score).powi(2)).sum::<f64>() / scores.len() as f64;
913            variance.sqrt()
914        };
915
916        let min_score = scores.iter().fold(f64::INFINITY, |acc, &x| acc.min(x));
917        let max_score = scores.iter().fold(f64::NEG_INFINITY, |acc, &x| acc.max(x));
918
919        let total_time: Duration = results
920            .iter()
921            .map(|r| r.selection_time + r.evaluation_time)
922            .sum();
923        let avg_features_selected = results.iter().map(|r| r.n_features_selected).sum::<usize>()
924            as f64
925            / results.len() as f64;
926
927        Ok(PerformanceSummary {
928            mean_score,
929            std_score,
930            min_score,
931            max_score,
932            total_execution_time: total_time,
933            avg_features_selected,
934        })
935    }
936
937    fn generate_rankings(&self, results: &[BenchmarkResult]) -> Result<Vec<MethodRanking>> {
938        let mut method_scores: HashMap<String, Vec<f64>> = HashMap::new();
939
940        for result in results {
941            method_scores
942                .entry(result.method_name.clone())
943                .or_default()
944                .push(result.evaluation_metrics.predictive_score);
945        }
946
947        let mut rankings = Vec::new();
948        for (method, scores) in method_scores {
949            let mean_score = scores.iter().sum::<f64>() / scores.len() as f64;
950            let std_score = {
951                let variance = scores.iter().map(|s| (s - mean_score).powi(2)).sum::<f64>()
952                    / scores.len() as f64;
953                variance.sqrt()
954            };
955
956            rankings.push(MethodRanking {
957                method_name: method,
958                mean_score,
959                std_score,
960                scores,
961            });
962        }
963
964        rankings.sort_by(|a, b| b.mean_score.partial_cmp(&a.mean_score).unwrap());
965        Ok(rankings)
966    }
967}
968
969/// Trait for benchmarkable feature selection methods
970pub trait BenchmarkableMethod: std::fmt::Debug + Send + Sync {
971    fn select_features(
972        &self,
973        X: ArrayView2<f64>,
974        y: ArrayView1<f64>,
975    ) -> std::result::Result<Vec<usize>, BenchmarkError>;
976    fn method_name(&self) -> &str;
977    fn method_params(&self) -> HashMap<String, String>;
978}
979
980// Data structures for benchmarking
981
982#[derive(Debug, Clone)]
983pub struct BenchmarkDataset {
984    pub name: String,
985    pub X: Array2<f64>,
986    pub y: Array1<f64>,
987    pub ground_truth: Option<Vec<usize>>,
988    pub properties: DatasetProperties,
989}
990
991#[derive(Debug, Clone)]
992pub struct DatasetProperties {
993    pub task_type: TaskType,
994    pub n_informative_features: usize,
995    pub noise_level: f64,
996    pub correlation_structure: CorrelationStructure,
997    pub feature_types: Vec<FeatureType>,
998}
999
1000#[derive(Debug, Clone, PartialEq)]
1001pub enum TaskType {
1002    /// BinaryClassification
1003    BinaryClassification,
1004    /// MultiClassification
1005    MultiClassification,
1006    /// Regression
1007    Regression,
1008    /// MultiLabel
1009    MultiLabel,
1010}
1011
1012#[derive(Debug, Clone, PartialEq)]
1013pub enum CorrelationStructure {
1014    /// Linear
1015    Linear,
1016    /// Nonlinear
1017    Nonlinear,
1018    /// Sparse
1019    Sparse,
1020    /// HighlyCorrelated
1021    HighlyCorrelated,
1022    /// Noisy
1023    Noisy,
1024}
1025
1026#[derive(Debug, Clone, PartialEq)]
1027pub enum FeatureType {
1028    /// Continuous
1029    Continuous,
1030    /// Discrete
1031    Discrete,
1032    /// Binary
1033    Binary,
1034    /// Categorical
1035    Categorical,
1036}
1037
1038#[derive(Debug, Clone)]
1039pub struct BenchmarkConfig {
1040    pub n_iterations: usize,
1041    pub use_cross_validation: bool,
1042    pub cv_folds: usize,
1043    pub test_size: f64,
1044    pub timeout_seconds: Option<u64>,
1045    pub memory_limit_mb: Option<usize>,
1046    pub verbose: bool,
1047    pub random_seed: Option<u64>,
1048}
1049
1050impl Default for BenchmarkConfig {
1051    fn default() -> Self {
1052        Self {
1053            n_iterations: 5,
1054            use_cross_validation: true,
1055            cv_folds: 3,
1056            test_size: 0.3,
1057            timeout_seconds: Some(300),  // 5 minutes
1058            memory_limit_mb: Some(2048), // 2 GB
1059            verbose: true,
1060            random_seed: Some(42),
1061        }
1062    }
1063}
1064
1065#[derive(Debug, Clone)]
1066pub struct BenchmarkResult {
1067    pub method_name: String,
1068    pub dataset_name: String,
1069    pub iteration: usize,
1070    pub selected_features: Vec<usize>,
1071    pub n_features_selected: usize,
1072    pub n_features_total: usize,
1073    pub selection_time: Duration,
1074    pub evaluation_time: Duration,
1075    pub memory_usage: usize,
1076    pub evaluation_metrics: EvaluationMetrics,
1077    pub feature_metrics: FeatureMetrics,
1078    pub dataset_properties: DatasetProperties,
1079}
1080
1081#[derive(Debug, Clone, Default)]
1082pub struct EvaluationMetrics {
1083    pub relevance_score: f64,
1084    pub redundancy_score: f64,
1085    pub predictive_score: f64,
1086    pub jaccard_score: Option<f64>,
1087    pub precision: Option<f64>,
1088    pub recall: Option<f64>,
1089    pub f1_score: Option<f64>,
1090}
1091
1092#[derive(Debug, Clone, Default)]
1093pub struct FeatureMetrics {
1094    pub selection_ratio: f64,
1095    pub importance_distribution: ImportanceDistribution,
1096}
1097
1098#[derive(Debug, Clone, Default)]
1099pub struct ImportanceDistribution {
1100    pub mean: f64,
1101    pub std: f64,
1102    pub max: f64,
1103    pub min: f64,
1104}
1105
1106#[derive(Debug, Clone)]
1107pub struct BenchmarkSuiteResults {
1108    pub individual_results: Vec<BenchmarkResult>,
1109    pub statistical_analysis: StatisticalAnalysis,
1110    pub rankings: Vec<MethodRanking>,
1111    pub execution_summary: ExecutionSummary,
1112    pub configuration: BenchmarkConfig,
1113}
1114
1115impl BenchmarkSuiteResults {
1116    /// Generate comprehensive benchmark report
1117    pub fn report(&self) -> String {
1118        let mut report = String::new();
1119
1120        report.push_str(
1121            "╔═══════════════════════════════════════════════════════════════════════════╗\n",
1122        );
1123        report.push_str(
1124            "║                    Feature Selection Benchmark Report                    ║\n",
1125        );
1126        report.push_str(
1127            "╚═══════════════════════════════════════════════════════════════════════════╝\n\n",
1128        );
1129
1130        // Execution summary
1131        report.push_str("=== Execution Summary ===\n");
1132        report.push_str(&format!(
1133            "Total Duration: {:.2}s\n",
1134            self.execution_summary.total_duration.as_secs_f64()
1135        ));
1136        report.push_str(&format!(
1137            "Datasets: {}\n",
1138            self.execution_summary.n_datasets
1139        ));
1140        report.push_str(&format!("Methods: {}\n", self.execution_summary.n_methods));
1141        report.push_str(&format!(
1142            "Total Runs: {}\n",
1143            self.execution_summary.n_total_runs
1144        ));
1145
1146        // Method rankings
1147        if !self.rankings.is_empty() {
1148            report.push_str("\n=== Method Rankings ===\n");
1149            for (i, ranking) in self.rankings.iter().take(10).enumerate() {
1150                report.push_str(&format!(
1151                    "{}. {} - Score: {:.4} ± {:.4}\n",
1152                    i + 1,
1153                    ranking.method_name,
1154                    ranking.mean_score,
1155                    ranking.std_score
1156                ));
1157            }
1158        }
1159
1160        // Best method details
1161        if let Some(best_method) = &self.statistical_analysis.overall_best_method {
1162            report.push_str(&format!("\n=== Best Method: {} ===\n", best_method));
1163
1164            if let Some(best_ranking) = self.rankings.first() {
1165                report.push_str(&format!(
1166                    "Mean Performance: {:.4}\n",
1167                    best_ranking.mean_score
1168                ));
1169                report.push_str(&format!(
1170                    "Consistency (StdDev): {:.4}\n",
1171                    best_ranking.std_score
1172                ));
1173                report.push_str(&format!("Runs: {}\n", best_ranking.scores.len()));
1174            }
1175        }
1176
1177        // Dataset difficulty analysis
1178        if !self.statistical_analysis.dataset_rankings.is_empty() {
1179            report.push_str("\n=== Dataset Difficulty Ranking ===\n");
1180            for (i, dataset) in self
1181                .statistical_analysis
1182                .dataset_rankings
1183                .iter()
1184                .take(5)
1185                .enumerate()
1186            {
1187                report.push_str(&format!(
1188                    "{}. {} - Difficulty: {:.4} (Avg Performance: {:.4})\n",
1189                    i + 1,
1190                    dataset.dataset_name,
1191                    dataset.difficulty_score,
1192                    dataset.mean_performance
1193                ));
1194            }
1195        }
1196
1197        // Performance summary
1198        report.push_str("\n=== Overall Performance Summary ===\n");
1199        let summary = &self.statistical_analysis.performance_summary;
1200        report.push_str(&format!(
1201            "Mean Score: {:.4} ± {:.4}\n",
1202            summary.mean_score, summary.std_score
1203        ));
1204        report.push_str(&format!(
1205            "Score Range: [{:.4}, {:.4}]\n",
1206            summary.min_score, summary.max_score
1207        ));
1208        report.push_str(&format!(
1209            "Total Time: {:.2}s\n",
1210            summary.total_execution_time.as_secs_f64()
1211        ));
1212        report.push_str(&format!(
1213            "Avg Features Selected: {:.1}\n",
1214            summary.avg_features_selected
1215        ));
1216
1217        report
1218    }
1219
1220    /// Export results to CSV format (simplified)
1221    pub fn to_csv(&self) -> String {
1222        let mut csv = String::new();
1223
1224        csv.push_str("Method,Dataset,Iteration,FeaturesSelected,SelectionTime,RelevanceScore,RedundancyScore,PredictiveScore\n");
1225
1226        for result in &self.individual_results {
1227            csv.push_str(&format!(
1228                "{},{},{},{},{:.6},{:.4},{:.4},{:.4}\n",
1229                result.method_name,
1230                result.dataset_name,
1231                result.iteration,
1232                result.n_features_selected,
1233                result.selection_time.as_secs_f64(),
1234                result.evaluation_metrics.relevance_score,
1235                result.evaluation_metrics.redundancy_score,
1236                result.evaluation_metrics.predictive_score
1237            ));
1238        }
1239
1240        csv
1241    }
1242}
1243
1244#[derive(Debug, Clone)]
1245pub struct StatisticalAnalysis {
1246    pub method_rankings: Vec<MethodRanking>,
1247    pub dataset_rankings: Vec<DatasetRanking>,
1248    pub overall_best_method: Option<String>,
1249    pub performance_summary: PerformanceSummary,
1250}
1251
1252#[derive(Debug, Clone)]
1253pub struct MethodRanking {
1254    pub method_name: String,
1255    pub mean_score: f64,
1256    pub std_score: f64,
1257    pub scores: Vec<f64>,
1258}
1259
1260#[derive(Debug, Clone)]
1261pub struct DatasetRanking {
1262    pub dataset_name: String,
1263    pub difficulty_score: f64,
1264    pub mean_performance: f64,
1265}
1266
1267#[derive(Debug, Clone)]
1268pub struct PerformanceSummary {
1269    pub mean_score: f64,
1270    pub std_score: f64,
1271    pub min_score: f64,
1272    pub max_score: f64,
1273    pub total_execution_time: Duration,
1274    pub avg_features_selected: f64,
1275}
1276
1277#[derive(Debug, Clone)]
1278pub struct ExecutionSummary {
1279    pub total_duration: Duration,
1280    pub n_datasets: usize,
1281    pub n_methods: usize,
1282    pub n_total_runs: usize,
1283}
1284
1285// Example benchmarkable method implementations
1286#[derive(Debug)]
1287pub struct UnivariateFilterMethod {
1288    k: usize,
1289}
1290
1291impl UnivariateFilterMethod {
1292    pub fn new(k: usize) -> Self {
1293        Self { k }
1294    }
1295}
1296
1297impl BenchmarkableMethod for UnivariateFilterMethod {
1298    fn select_features(
1299        &self,
1300        X: ArrayView2<f64>,
1301        y: ArrayView1<f64>,
1302    ) -> std::result::Result<Vec<usize>, BenchmarkError> {
1303        let mut correlations: Vec<(usize, f64)> = Vec::new();
1304
1305        for i in 0..X.ncols() {
1306            let correlation = self.compute_correlation(X.column(i), y);
1307            correlations.push((i, correlation.abs()));
1308        }
1309
1310        correlations.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
1311
1312        Ok(correlations
1313            .into_iter()
1314            .take(self.k)
1315            .map(|(idx, _)| idx)
1316            .collect())
1317    }
1318
1319    fn method_name(&self) -> &str {
1320        "UnivariateFilter"
1321    }
1322
1323    fn method_params(&self) -> HashMap<String, String> {
1324        let mut params = HashMap::new();
1325        params.insert("k".to_string(), self.k.to_string());
1326        params
1327    }
1328}
1329
1330impl UnivariateFilterMethod {
1331    fn compute_correlation(&self, x: ArrayView1<f64>, y: ArrayView1<f64>) -> f64 {
1332        let n = x.len() as f64;
1333        if n < 2.0 {
1334            return 0.0;
1335        }
1336
1337        let mean_x = x.mean().unwrap_or(0.0);
1338        let mean_y = y.mean().unwrap_or(0.0);
1339
1340        let mut sum_xy = 0.0;
1341        let mut sum_x2 = 0.0;
1342        let mut sum_y2 = 0.0;
1343
1344        for i in 0..x.len() {
1345            let dx = x[i] - mean_x;
1346            let dy = y[i] - mean_y;
1347            sum_xy += dx * dy;
1348            sum_x2 += dx * dx;
1349            sum_y2 += dy * dy;
1350        }
1351
1352        let denom = (sum_x2 * sum_y2).sqrt();
1353        if denom < 1e-10 {
1354            0.0
1355        } else {
1356            sum_xy / denom
1357        }
1358    }
1359}
1360
1361#[derive(Debug)]
1362pub struct RandomSelectionMethod {
1363    k: usize,
1364}
1365
1366impl RandomSelectionMethod {
1367    pub fn new(k: usize) -> Self {
1368        Self { k }
1369    }
1370}
1371
1372impl BenchmarkableMethod for RandomSelectionMethod {
1373    fn select_features(
1374        &self,
1375        X: ArrayView2<f64>,
1376        _y: ArrayView1<f64>,
1377    ) -> std::result::Result<Vec<usize>, BenchmarkError> {
1378        let mut features: Vec<usize> = (0..X.ncols()).collect();
1379
1380        // Simple random shuffling
1381        for i in (1..features.len()).rev() {
1382            let j = (thread_rng().gen::<f64>() * (i + 1) as f64) as usize;
1383            features.swap(i, j);
1384        }
1385
1386        Ok(features.into_iter().take(self.k.min(X.ncols())).collect())
1387    }
1388
1389    fn method_name(&self) -> &str {
1390        "RandomSelection"
1391    }
1392
1393    fn method_params(&self) -> HashMap<String, String> {
1394        let mut params = HashMap::new();
1395        params.insert("k".to_string(), self.k.to_string());
1396        params
1397    }
1398}
1399
1400#[allow(non_snake_case)]
1401#[cfg(test)]
1402mod tests {
1403    use super::*;
1404    use scirs2_core::ndarray::array;
1405
1406    #[test]
1407    fn test_synthetic_dataset_creation() {
1408        let benchmark = FeatureSelectionBenchmark::new();
1409
1410        let dataset = benchmark
1411            .create_synthetic_linear_dataset(100, 20, 5)
1412            .unwrap();
1413
1414        assert_eq!(dataset.X.nrows(), 100);
1415        assert_eq!(dataset.X.ncols(), 20);
1416        assert_eq!(dataset.y.len(), 100);
1417        assert_eq!(dataset.ground_truth.as_ref().unwrap().len(), 5);
1418        assert_eq!(dataset.properties.n_informative_features, 5);
1419    }
1420
1421    #[test]
1422    #[allow(non_snake_case)]
1423    fn test_benchmarkable_method() {
1424        let method = UnivariateFilterMethod::new(5);
1425
1426        let X = array![
1427            [1.0, 2.0, 3.0, 0.1],
1428            [2.0, 3.0, 4.0, 0.2],
1429            [3.0, 4.0, 5.0, 0.3],
1430            [4.0, 5.0, 6.0, 0.4],
1431        ];
1432        let y = array![1.0, 2.0, 3.0, 4.0];
1433
1434        let selected = method.select_features(X.view(), y.view()).unwrap();
1435
1436        assert!(!selected.is_empty());
1437        assert!(selected.len() <= 4); // Can't select more than available
1438        assert!(selected.iter().all(|&idx| idx < X.ncols()));
1439    }
1440
1441    #[test]
1442    #[allow(non_snake_case)]
1443    fn test_benchmark_execution() {
1444        let mut benchmark = FeatureSelectionBenchmark::new();
1445        benchmark.config.n_iterations = 1; // Fast test
1446        benchmark.config.verbose = false;
1447
1448        // Add simple dataset
1449        let X = array![
1450            [1.0, 2.0, 0.1, 0.2],
1451            [2.0, 3.0, 0.2, 0.3],
1452            [3.0, 4.0, 0.3, 0.4],
1453            [4.0, 5.0, 0.4, 0.5],
1454            [5.0, 6.0, 0.5, 0.6],
1455        ];
1456        let y = array![1.0, 2.0, 3.0, 4.0, 5.0];
1457
1458        let dataset = BenchmarkDataset {
1459            name: "TestDataset".to_string(),
1460            X,
1461            y,
1462            ground_truth: Some(vec![0, 1]),
1463            properties: DatasetProperties {
1464                task_type: TaskType::Regression,
1465                n_informative_features: 2,
1466                noise_level: 0.1,
1467                correlation_structure: CorrelationStructure::Linear,
1468                feature_types: vec![FeatureType::Continuous; 4],
1469            },
1470        };
1471
1472        benchmark.add_dataset(dataset).unwrap();
1473        benchmark.register_method(
1474            "UnivariateFilt".to_string(),
1475            Box::new(UnivariateFilterMethod::new(2)),
1476        );
1477        benchmark.register_method(
1478            "Random".to_string(),
1479            Box::new(RandomSelectionMethod::new(2)),
1480        );
1481
1482        let results = benchmark.run_benchmark().unwrap();
1483
1484        assert_eq!(results.individual_results.len(), 2); // 2 methods
1485        assert_eq!(results.rankings.len(), 2);
1486        assert!(results.statistical_analysis.overall_best_method.is_some());
1487
1488        let report = results.report();
1489        assert!(report.contains("Benchmark Report"));
1490        assert!(report.contains("Method Rankings"));
1491
1492        let csv = results.to_csv();
1493        assert!(csv.contains("Method,Dataset"));
1494    }
1495
1496    #[test]
1497    #[allow(non_snake_case)]
1498    fn test_evaluation_metrics() {
1499        let benchmark = FeatureSelectionBenchmark::new();
1500
1501        let X = array![[1.0, 2.0, 0.1], [2.0, 3.0, 0.2], [3.0, 4.0, 0.3],];
1502        let y = array![1.0, 2.0, 3.0];
1503
1504        let selected_features = vec![0, 1];
1505        let ground_truth = Some(vec![0, 1]);
1506
1507        let metrics = benchmark
1508            .evaluate_feature_selection(
1509                X.view(),
1510                y.view(),
1511                X.view(),
1512                y.view(),
1513                &selected_features,
1514                &ground_truth,
1515            )
1516            .unwrap();
1517
1518        assert!(metrics.relevance_score >= 0.0);
1519        assert!(metrics.redundancy_score >= 0.0);
1520        assert!(metrics.predictive_score >= 0.0);
1521        assert!(metrics.jaccard_score.is_some());
1522        assert!(metrics.precision.is_some());
1523        assert!(metrics.recall.is_some());
1524        assert!(metrics.f1_score.is_some());
1525    }
1526}