sklears_feature_selection/evaluation/
feature_visualization.rs

1//! Feature set visualization utilities for feature selection analysis
2//!
3//! This module provides comprehensive text-based visualization capabilities for feature selection
4//! analysis, including stability plots, importance visualizations, and selection frequency analysis.
5//! All implementations follow the SciRS2 policy.
6
7use scirs2_core::ndarray::ArrayView2;
8use sklears_core::error::{Result as SklResult, SklearsError};
9type Result<T> = SklResult<T>;
10
11impl From<VisualizationError> for SklearsError {
12    fn from(err: VisualizationError) -> Self {
13        SklearsError::FitError(format!("Visualization error: {}", err))
14    }
15}
16use std::collections::HashMap;
17use thiserror::Error;
18
19#[derive(Debug, Error)]
20pub enum VisualizationError {
21    #[error("Empty data provided for visualization")]
22    EmptyData,
23    #[error("Invalid feature indices")]
24    InvalidFeatureIndices,
25    #[error("Mismatched data dimensions")]
26    DimensionMismatch,
27    #[error("Invalid visualization parameters")]
28    InvalidParameters,
29}
30
31/// Text-based feature importance visualization
32#[derive(Debug, Clone)]
33pub struct FeatureImportancePlots;
34
35impl FeatureImportancePlots {
36    /// Create horizontal bar chart for feature importance
37    pub fn horizontal_bar_chart(
38        feature_indices: &[usize],
39        importance_scores: &[f64],
40        feature_names: Option<&[String]>,
41        max_width: usize,
42        title: &str,
43    ) -> Result<String> {
44        if feature_indices.len() != importance_scores.len() {
45            return Err(VisualizationError::DimensionMismatch.into());
46        }
47
48        if feature_indices.is_empty() {
49            return Err(VisualizationError::EmptyData.into());
50        }
51
52        let mut chart = String::new();
53
54        // Title
55        chart.push_str(&format!("=== {} ===\n\n", title));
56
57        // Find max importance for scaling
58        let max_importance = importance_scores
59            .iter()
60            .fold(0.0f64, |acc, &x| acc.max(x.abs()));
61
62        if max_importance <= 0.0 {
63            chart.push_str("All features have zero importance\n");
64            return Ok(chart);
65        }
66
67        // Create sorted indices by importance
68        let mut sorted_indices: Vec<usize> = (0..feature_indices.len()).collect();
69        sorted_indices.sort_by(|&a, &b| {
70            importance_scores[b]
71                .abs()
72                .partial_cmp(&importance_scores[a].abs())
73                .unwrap()
74        });
75
76        // Draw bars
77        for &idx in &sorted_indices {
78            let feature_idx = feature_indices[idx];
79            let importance = importance_scores[idx];
80            let normalized_importance = importance / max_importance;
81
82            let bar_width = ((normalized_importance.abs() * max_width as f64) as usize).max(1);
83            let bar_char = if importance >= 0.0 { '█' } else { '▓' };
84
85            let feature_name = if let Some(names) = feature_names {
86                if idx < names.len() {
87                    names[idx].clone()
88                } else {
89                    format!("Feature_{}", feature_idx)
90                }
91            } else {
92                format!("Feature_{}", feature_idx)
93            };
94
95            let bar = bar_char.to_string().repeat(bar_width);
96            chart.push_str(&format!(
97                "{:>15} |{:<width$} {:>8.4}\n",
98                feature_name,
99                bar,
100                importance,
101                width = max_width + 2
102            ));
103        }
104
105        // Legend
106        chart.push_str(&format!("\n{:>15} |{}\n", "", "-".repeat(max_width + 2)));
107        chart.push_str(&format!(
108            "{:>15} |{:>width$}\n",
109            "",
110            max_importance,
111            width = max_width + 10
112        ));
113        chart.push_str("Legend: █ = positive importance, ▓ = negative importance\n");
114
115        Ok(chart)
116    }
117
118    /// Create vertical bar chart for feature importance
119    pub fn vertical_bar_chart(
120        feature_indices: &[usize],
121        importance_scores: &[f64],
122        feature_names: Option<&[String]>,
123        max_height: usize,
124        title: &str,
125    ) -> Result<String> {
126        if feature_indices.len() != importance_scores.len() {
127            return Err(VisualizationError::DimensionMismatch.into());
128        }
129
130        if feature_indices.is_empty() {
131            return Err(VisualizationError::EmptyData.into());
132        }
133
134        let mut chart = String::new();
135
136        // Title
137        chart.push_str(&format!("=== {} ===\n\n", title));
138
139        // Find max importance for scaling
140        let max_importance = importance_scores
141            .iter()
142            .fold(0.0f64, |acc, &x| acc.max(x.abs()));
143
144        if max_importance <= 0.0 {
145            chart.push_str("All features have zero importance\n");
146            return Ok(chart);
147        }
148
149        // Take top features to display (limit for readability)
150        let max_features = 15.min(feature_indices.len());
151        let mut sorted_indices: Vec<usize> = (0..feature_indices.len()).collect();
152        sorted_indices.sort_by(|&a, &b| {
153            importance_scores[b]
154                .abs()
155                .partial_cmp(&importance_scores[a].abs())
156                .unwrap()
157        });
158        sorted_indices.truncate(max_features);
159
160        // Create bars from top to bottom
161        for row in (0..max_height).rev() {
162            let threshold = (row + 1) as f64 / max_height as f64;
163
164            for &idx in &sorted_indices {
165                let importance = importance_scores[idx];
166                let normalized_importance = importance.abs() / max_importance;
167
168                if normalized_importance >= threshold {
169                    let bar_char = if importance >= 0.0 { '█' } else { '▓' };
170                    chart.push_str(&format!("{} ", bar_char));
171                } else {
172                    chart.push_str("  ");
173                }
174            }
175            chart.push('\n');
176        }
177
178        // Draw base line
179        chart.push_str(&"-".repeat(sorted_indices.len() * 2));
180        chart.push('\n');
181
182        // Feature labels
183        for &idx in &sorted_indices {
184            let feature_idx = feature_indices[idx];
185            let label = if let Some(names) = feature_names {
186                if idx < names.len() {
187                    format!("{}", names[idx].chars().next().unwrap_or('F'))
188                } else {
189                    format!("{}", feature_idx % 10)
190                }
191            } else {
192                format!("{}", feature_idx % 10)
193            };
194            chart.push_str(&format!("{} ", label));
195        }
196        chart.push('\n');
197
198        // Feature importance values
199        for &idx in &sorted_indices {
200            let importance = importance_scores[idx];
201            chart.push_str(&format!("{:.1} ", importance));
202        }
203        chart.push('\n');
204
205        chart.push_str("Legend: █ = positive importance, ▓ = negative importance\n");
206
207        Ok(chart)
208    }
209}
210
211/// Stability visualization for feature selection consistency
212#[derive(Debug, Clone)]
213pub struct StabilityPlots;
214
215impl StabilityPlots {
216    /// Create stability frequency plot
217    pub fn stability_frequency_plot(
218        feature_selections: &[Vec<usize>],
219        total_features: usize,
220        feature_names: Option<&[String]>,
221        title: &str,
222    ) -> Result<String> {
223        if feature_selections.is_empty() {
224            return Err(VisualizationError::EmptyData.into());
225        }
226
227        let mut plot = String::new();
228
229        // Title
230        plot.push_str(&format!("=== {} ===\n\n", title));
231        plot.push_str(&format!("Total candidate features: {}\n\n", total_features));
232
233        // Count feature frequencies
234        let mut feature_counts: HashMap<usize, usize> = HashMap::new();
235        let total_selections = feature_selections.len();
236
237        for selection in feature_selections {
238            for &feature_idx in selection {
239                *feature_counts.entry(feature_idx).or_insert(0) += 1;
240            }
241        }
242
243        if feature_counts.is_empty() {
244            plot.push_str("No features were selected in any iteration\n");
245            return Ok(plot);
246        }
247
248        // Sort features by frequency
249        let mut sorted_features: Vec<(usize, usize)> = feature_counts.into_iter().collect();
250        sorted_features.sort_by(|a, b| b.1.cmp(&a.1));
251
252        // Display top features (limit for readability)
253        let max_features = 20.min(sorted_features.len());
254        let max_width = 50;
255
256        plot.push_str(&format!(
257            "Selection Frequency (out of {} iterations):\n\n",
258            total_selections
259        ));
260
261        for (feature_idx, count) in sorted_features.iter().take(max_features) {
262            let frequency = *count as f64 / total_selections as f64;
263            let bar_width = (frequency * max_width as f64) as usize;
264            let bar = "█".repeat(bar_width);
265
266            let feature_name = if let Some(names) = feature_names {
267                if *feature_idx < names.len() {
268                    names[*feature_idx].clone()
269                } else {
270                    format!("Feature_{}", feature_idx)
271                }
272            } else {
273                format!("Feature_{}", feature_idx)
274            };
275
276            plot.push_str(&format!(
277                "{:>15} |{:<width$} {:>3}/{:<3} ({:>5.1}%)\n",
278                feature_name,
279                bar,
280                count,
281                total_selections,
282                frequency * 100.0,
283                width = max_width + 2
284            ));
285        }
286
287        if sorted_features.len() > max_features {
288            plot.push_str(&format!(
289                "... and {} more features\n",
290                sorted_features.len() - max_features
291            ));
292        }
293
294        // Summary statistics
295        let high_stability_count = sorted_features
296            .iter()
297            .filter(|(_, count)| *count as f64 / total_selections as f64 >= 0.8)
298            .count();
299
300        plot.push_str("\nStability Summary:\n");
301        plot.push_str(&format!(
302            "  High stability features (≥80%): {}\n",
303            high_stability_count
304        ));
305        plot.push_str(&format!(
306            "  Total unique features selected: {}\n",
307            sorted_features.len()
308        ));
309        plot.push_str(&format!(
310            "  Average features per selection: {:.1}\n",
311            feature_selections.iter().map(|s| s.len()).sum::<usize>() as f64
312                / total_selections as f64
313        ));
314
315        Ok(plot)
316    }
317
318    /// Create stability heatmap showing feature co-occurrence
319    pub fn feature_cooccurrence_heatmap(
320        feature_selections: &[Vec<usize>],
321        top_n_features: usize,
322        feature_names: Option<&[String]>,
323        title: &str,
324    ) -> Result<String> {
325        if feature_selections.is_empty() {
326            return Err(VisualizationError::EmptyData.into());
327        }
328
329        let mut heatmap = String::new();
330
331        // Title
332        heatmap.push_str(&format!("=== {} ===\n\n", title));
333
334        // Get most frequently selected features
335        let mut feature_counts: HashMap<usize, usize> = HashMap::new();
336        for selection in feature_selections {
337            for &feature_idx in selection {
338                *feature_counts.entry(feature_idx).or_insert(0) += 1;
339            }
340        }
341
342        let mut sorted_features: Vec<(usize, usize)> = feature_counts.into_iter().collect();
343        sorted_features.sort_by(|a, b| b.1.cmp(&a.1));
344
345        let n_features = top_n_features.min(sorted_features.len()).min(10); // Limit for readability
346        let top_features: Vec<usize> = sorted_features
347            .iter()
348            .take(n_features)
349            .map(|(idx, _)| *idx)
350            .collect();
351
352        if top_features.is_empty() {
353            heatmap.push_str("No features to display\n");
354            return Ok(heatmap);
355        }
356
357        // Compute co-occurrence matrix
358        let mut cooccurrence_matrix = vec![vec![0; n_features]; n_features];
359
360        for selection in feature_selections {
361            let selection_set: std::collections::HashSet<usize> =
362                selection.iter().cloned().collect();
363
364            for i in 0..n_features {
365                for j in 0..n_features {
366                    if selection_set.contains(&top_features[i])
367                        && selection_set.contains(&top_features[j])
368                    {
369                        cooccurrence_matrix[i][j] += 1;
370                    }
371                }
372            }
373        }
374
375        // Create header
376        heatmap.push_str("       ");
377        for j in 0..n_features {
378            heatmap.push_str(&format!("{:>4}", j));
379        }
380        heatmap.push('\n');
381
382        // Create heatmap rows
383        for i in 0..n_features {
384            let feature_name = if let Some(names) = feature_names {
385                if top_features[i] < names.len() {
386                    format!(
387                        "{:>6}",
388                        names[top_features[i]].chars().take(6).collect::<String>()
389                    )
390                } else {
391                    format!("F_{:>3}", top_features[i])
392                }
393            } else {
394                format!("F_{:>3}", top_features[i])
395            };
396
397            heatmap.push_str(&format!("{} ", feature_name));
398
399            for j in 0..n_features {
400                let cooccurrence = cooccurrence_matrix[i][j];
401                let intensity = cooccurrence as f64 / feature_selections.len() as f64;
402                let symbol = match intensity {
403                    x if x >= 0.9 => "██",
404                    x if x >= 0.7 => "▓▓",
405                    x if x >= 0.5 => "▒▒",
406                    x if x >= 0.3 => "░░",
407                    x if x >= 0.1 => "··",
408                    _ => "  ",
409                };
410                heatmap.push_str(symbol);
411            }
412            heatmap.push('\n');
413        }
414
415        // Legend
416        heatmap
417            .push_str("\nIntensity: ██ ≥90%  ▓▓ ≥70%  ▒▒ ≥50%  ░░ ≥30%  ·· ≥10%  [space] <10%\n");
418
419        // Feature mapping
420        heatmap.push_str("\nFeature Mapping:\n");
421        for (i, &feature_idx) in top_features.iter().enumerate() {
422            let feature_name = if let Some(names) = feature_names {
423                if feature_idx < names.len() {
424                    &names[feature_idx]
425                } else {
426                    "Unknown"
427                }
428            } else {
429                "Unknown"
430            };
431            heatmap.push_str(&format!(
432                "  {}: Feature_{} ({})\n",
433                i, feature_idx, feature_name
434            ));
435        }
436
437        Ok(heatmap)
438    }
439}
440
441/// Redundancy heatmaps for feature correlation analysis
442#[derive(Debug, Clone)]
443pub struct RedundancyHeatmaps;
444
445impl RedundancyHeatmaps {
446    /// Create correlation heatmap
447    pub fn correlation_heatmap(
448        correlation_matrix: ArrayView2<f64>,
449        feature_indices: &[usize],
450        feature_names: Option<&[String]>,
451        title: &str,
452    ) -> Result<String> {
453        let n_features = correlation_matrix.nrows();
454        if n_features != correlation_matrix.ncols() {
455            return Err(VisualizationError::DimensionMismatch.into());
456        }
457
458        if n_features != feature_indices.len() {
459            return Err(VisualizationError::DimensionMismatch.into());
460        }
461
462        let mut heatmap = String::new();
463
464        // Title
465        heatmap.push_str(&format!("=== {} ===\n\n", title));
466
467        // Limit size for readability
468        let display_size = n_features.min(12);
469
470        if display_size == 0 {
471            heatmap.push_str("No features to display\n");
472            return Ok(heatmap);
473        }
474
475        // Create header
476        heatmap.push_str("       ");
477        for j in 0..display_size {
478            heatmap.push_str(&format!("{:>4}", j));
479        }
480        heatmap.push('\n');
481
482        // Create heatmap rows
483        for i in 0..display_size {
484            let feature_name = if let Some(names) = feature_names {
485                if i < names.len() {
486                    format!("{:>6}", names[i].chars().take(6).collect::<String>())
487                } else {
488                    format!("F_{:>3}", feature_indices[i])
489                }
490            } else {
491                format!("F_{:>3}", feature_indices[i])
492            };
493
494            heatmap.push_str(&format!("{} ", feature_name));
495
496            for j in 0..display_size {
497                let correlation = correlation_matrix[[i, j]].abs();
498                let symbol = match correlation {
499                    x if x >= 0.9 => "██",
500                    x if x >= 0.7 => "▓▓",
501                    x if x >= 0.5 => "▒▒",
502                    x if x >= 0.3 => "░░",
503                    x if x >= 0.1 => "··",
504                    _ => "  ",
505                };
506                heatmap.push_str(symbol);
507            }
508            heatmap.push('\n');
509        }
510
511        if n_features > display_size {
512            heatmap.push_str(&format!(
513                "... and {} more features (truncated for display)\n",
514                n_features - display_size
515            ));
516        }
517
518        // Legend
519        heatmap
520            .push_str("\nCorrelation: ██ ≥0.9  ▓▓ ≥0.7  ▒▒ ≥0.5  ░░ ≥0.3  ·· ≥0.1  [space] <0.1\n");
521
522        // Feature mapping
523        heatmap.push_str("\nFeature Mapping:\n");
524        for i in 0..display_size {
525            let feature_name = if let Some(names) = feature_names {
526                if i < names.len() {
527                    &names[i]
528                } else {
529                    "Unknown"
530                }
531            } else {
532                "Unknown"
533            };
534            heatmap.push_str(&format!(
535                "  {}: Feature_{} ({})\n",
536                i, feature_indices[i], feature_name
537            ));
538        }
539
540        Ok(heatmap)
541    }
542
543    /// Create redundancy summary visualization
544    pub fn redundancy_summary(
545        highly_correlated_pairs: &[(usize, usize, f64)],
546        feature_names: Option<&[String]>,
547        title: &str,
548    ) -> Result<String> {
549        let mut summary = String::new();
550
551        // Title
552        summary.push_str(&format!("=== {} ===\n\n", title));
553
554        if highly_correlated_pairs.is_empty() {
555            summary.push_str("No highly correlated feature pairs found\n");
556            return Ok(summary);
557        }
558
559        summary.push_str(&format!(
560            "Highly Correlated Feature Pairs ({} pairs):\n\n",
561            highly_correlated_pairs.len()
562        ));
563
564        let max_pairs = 15.min(highly_correlated_pairs.len());
565
566        for (i, &(feat1, feat2, corr)) in highly_correlated_pairs.iter().take(max_pairs).enumerate()
567        {
568            let name1 = if let Some(names) = feature_names {
569                if feat1 < names.len() {
570                    &names[feat1]
571                } else {
572                    "Unknown"
573                }
574            } else {
575                "Unknown"
576            };
577
578            let name2 = if let Some(names) = feature_names {
579                if feat2 < names.len() {
580                    &names[feat2]
581                } else {
582                    "Unknown"
583                }
584            } else {
585                "Unknown"
586            };
587
588            // Create visual correlation strength bar
589            let bar_length = (corr.abs() * 20.0) as usize;
590            let bar = "█".repeat(bar_length);
591
592            summary.push_str(&format!(
593                "{:>2}. Feature_{:>3} ({:<10}) ↔ Feature_{:>3} ({:<10}) |{:<20} {:>6.3}\n",
594                i + 1,
595                feat1,
596                name1.chars().take(10).collect::<String>(),
597                feat2,
598                name2.chars().take(10).collect::<String>(),
599                bar,
600                corr
601            ));
602        }
603
604        if highly_correlated_pairs.len() > max_pairs {
605            summary.push_str(&format!(
606                "... and {} more pairs\n",
607                highly_correlated_pairs.len() - max_pairs
608            ));
609        }
610
611        // Statistics
612        let avg_correlation = highly_correlated_pairs
613            .iter()
614            .map(|(_, _, corr)| corr.abs())
615            .sum::<f64>()
616            / highly_correlated_pairs.len() as f64;
617        let max_correlation = highly_correlated_pairs
618            .iter()
619            .map(|(_, _, corr)| corr.abs())
620            .fold(0.0, f64::max);
621
622        summary.push_str("\nRedundancy Statistics:\n");
623        summary.push_str(&format!("  Average correlation: {:.3}\n", avg_correlation));
624        summary.push_str(&format!("  Maximum correlation: {:.3}\n", max_correlation));
625        summary.push_str(&format!(
626            "  Total redundant pairs: {}\n",
627            highly_correlated_pairs.len()
628        ));
629
630        Ok(summary)
631    }
632}
633
634/// Selection frequency charts
635#[derive(Debug, Clone)]
636pub struct SelectionFrequencyCharts;
637
638impl SelectionFrequencyCharts {
639    /// Create feature selection frequency histogram
640    pub fn frequency_histogram(
641        feature_frequencies: &[(usize, f64)],
642        feature_names: Option<&[String]>,
643        title: &str,
644    ) -> Result<String> {
645        if feature_frequencies.is_empty() {
646            return Err(VisualizationError::EmptyData.into());
647        }
648
649        let mut histogram = String::new();
650
651        // Title
652        histogram.push_str(&format!("=== {} ===\n\n", title));
653
654        // Sort by frequency (descending)
655        let mut sorted_frequencies = feature_frequencies.to_vec();
656        sorted_frequencies.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
657
658        let max_features = 20.min(sorted_frequencies.len());
659        let max_width = 40;
660
661        histogram.push_str("Feature Selection Frequencies:\n\n");
662
663        for (feature_idx, frequency) in sorted_frequencies.iter().take(max_features) {
664            let bar_width = (*frequency * max_width as f64) as usize;
665            let bar = "█".repeat(bar_width.max(1));
666
667            let feature_name = if let Some(names) = feature_names {
668                if *feature_idx < names.len() {
669                    names[*feature_idx].clone()
670                } else {
671                    format!("Feature_{}", feature_idx)
672                }
673            } else {
674                format!("Feature_{}", feature_idx)
675            };
676
677            histogram.push_str(&format!(
678                "{:>15} |{:<width$} {:>6.1}%\n",
679                feature_name,
680                bar,
681                frequency * 100.0,
682                width = max_width + 2
683            ));
684        }
685
686        if sorted_frequencies.len() > max_features {
687            histogram.push_str(&format!(
688                "... and {} more features\n",
689                sorted_frequencies.len() - max_features
690            ));
691        }
692
693        // Statistics
694        let frequencies: Vec<f64> = sorted_frequencies.iter().map(|(_, f)| *f).collect();
695        let avg_frequency = frequencies.iter().sum::<f64>() / frequencies.len() as f64;
696        let std_frequency = {
697            let variance = frequencies
698                .iter()
699                .map(|f| (f - avg_frequency).powi(2))
700                .sum::<f64>()
701                / frequencies.len() as f64;
702            variance.sqrt()
703        };
704
705        histogram.push_str("\nFrequency Statistics:\n");
706        histogram.push_str(&format!(
707            "  Average frequency: {:.1}%\n",
708            avg_frequency * 100.0
709        ));
710        histogram.push_str(&format!(
711            "  Std deviation:     {:.1}%\n",
712            std_frequency * 100.0
713        ));
714        histogram.push_str(&format!(
715            "  Total features:    {}\n",
716            sorted_frequencies.len()
717        ));
718
719        Ok(histogram)
720    }
721}
722
723/// Comprehensive feature set visualization
724#[derive(Debug, Clone)]
725pub struct FeatureSetVisualization;
726
727impl FeatureSetVisualization {
728    /// Create comprehensive feature selection report with visualizations
729    pub fn comprehensive_report(
730        feature_indices: &[usize],
731        importance_scores: &[f64],
732        stability_data: Option<&[Vec<usize>]>,
733        correlation_matrix: Option<ArrayView2<f64>>,
734        feature_names: Option<&[String]>,
735        title: &str,
736    ) -> Result<String> {
737        let mut report = String::new();
738
739        // Main title
740        report.push_str(
741            "╔═══════════════════════════════════════════════════════════════════════════╗\n",
742        );
743        report.push_str(&format!("║ {:<73} ║\n", title));
744        report.push_str(
745            "╚═══════════════════════════════════════════════════════════════════════════╝\n\n",
746        );
747
748        // Feature importance visualization
749        if !importance_scores.is_empty() {
750            let importance_plot = FeatureImportancePlots::horizontal_bar_chart(
751                feature_indices,
752                importance_scores,
753                feature_names,
754                50,
755                "Feature Importance Scores",
756            )?;
757            report.push_str(&importance_plot);
758            report.push('\n');
759        }
760
761        // Stability analysis
762        if let Some(stability_data) = stability_data {
763            let stability_plot = StabilityPlots::stability_frequency_plot(
764                stability_data,
765                feature_indices.len(),
766                feature_names,
767                "Feature Selection Stability",
768            )?;
769            report.push_str(&stability_plot);
770            report.push('\n');
771
772            // Co-occurrence heatmap
773            let cooccurrence_plot = StabilityPlots::feature_cooccurrence_heatmap(
774                stability_data,
775                10,
776                feature_names,
777                "Feature Co-occurrence Matrix",
778            )?;
779            report.push_str(&cooccurrence_plot);
780            report.push('\n');
781        }
782
783        // Correlation analysis
784        if let Some(corr_matrix) = correlation_matrix {
785            let correlation_plot = RedundancyHeatmaps::correlation_heatmap(
786                corr_matrix,
787                feature_indices,
788                feature_names,
789                "Feature Correlation Matrix",
790            )?;
791            report.push_str(&correlation_plot);
792            report.push('\n');
793        }
794
795        // Summary statistics
796        report.push_str("=== Feature Selection Summary ===\n\n");
797        report.push_str(&format!(
798            "Total features selected: {}\n",
799            feature_indices.len()
800        ));
801
802        if !importance_scores.is_empty() {
803            let avg_importance =
804                importance_scores.iter().sum::<f64>() / importance_scores.len() as f64;
805            let max_importance = importance_scores
806                .iter()
807                .fold(0.0f64, |acc, &x| acc.max(x.abs()));
808            report.push_str(&format!("Average importance: {:.4}\n", avg_importance));
809            report.push_str(&format!("Maximum importance: {:.4}\n", max_importance));
810        }
811
812        if let Some(stability_data) = stability_data {
813            let avg_selected = stability_data.iter().map(|s| s.len()).sum::<usize>() as f64
814                / stability_data.len() as f64;
815            report.push_str(&format!(
816                "Average features per iteration: {:.1}\n",
817                avg_selected
818            ));
819            report.push_str(&format!("Stability iterations: {}\n", stability_data.len()));
820        }
821
822        Ok(report)
823    }
824
825    /// Create quick feature summary visualization
826    pub fn quick_summary(
827        feature_indices: &[usize],
828        importance_scores: &[f64],
829        feature_names: Option<&[String]>,
830    ) -> Result<String> {
831        let mut summary = String::new();
832
833        summary.push_str("=== Quick Feature Selection Summary ===\n\n");
834
835        if feature_indices.is_empty() {
836            summary.push_str("No features selected\n");
837            return Ok(summary);
838        }
839
840        // Top 10 features
841        let mut indexed_scores: Vec<(usize, f64)> = feature_indices
842            .iter()
843            .zip(importance_scores.iter())
844            .map(|(&idx, &score)| (idx, score))
845            .collect();
846        indexed_scores.sort_by(|a, b| b.1.abs().partial_cmp(&a.1.abs()).unwrap());
847
848        let top_n = 10.min(indexed_scores.len());
849        summary.push_str(&format!("Top {} Selected Features:\n", top_n));
850        summary.push_str("─────────────────────────────────────────────────\n");
851
852        for (i, (feature_idx, importance)) in indexed_scores.iter().take(top_n).enumerate() {
853            let feature_name = if let Some(names) = feature_names {
854                if *feature_idx < names.len() {
855                    names[*feature_idx].clone()
856                } else {
857                    format!("Feature_{}", feature_idx)
858                }
859            } else {
860                format!("Feature_{}", feature_idx)
861            };
862
863            let bar_length = ((importance.abs() / indexed_scores[0].1.abs()) * 20.0) as usize;
864            let bar = "█".repeat(bar_length.max(1));
865
866            summary.push_str(&format!(
867                "{:>2}. {:>15} |{:<20} {:>8.4}\n",
868                i + 1,
869                feature_name,
870                bar,
871                importance
872            ));
873        }
874
875        // Basic statistics
876        summary.push_str("\nSelection Statistics:\n");
877        summary.push_str(&format!("  Total features: {}\n", feature_indices.len()));
878
879        if !importance_scores.is_empty() {
880            let positive_count = importance_scores.iter().filter(|&&x| x > 0.0).count();
881            let negative_count = importance_scores.iter().filter(|&&x| x < 0.0).count();
882            let zero_count = importance_scores.iter().filter(|&&x| x == 0.0).count();
883
884            summary.push_str(&format!("  Positive importance: {}\n", positive_count));
885            summary.push_str(&format!("  Negative importance: {}\n", negative_count));
886            summary.push_str(&format!("  Zero importance: {}\n", zero_count));
887        }
888
889        Ok(summary)
890    }
891}
892
893#[allow(non_snake_case)]
894#[cfg(test)]
895mod tests {
896    use super::*;
897    use scirs2_core::ndarray::array;
898
899    #[test]
900    fn test_feature_importance_plots() {
901        let feature_indices = vec![0, 1, 2, 3, 4];
902        let importance_scores = vec![0.8, 0.6, -0.4, 0.9, 0.2];
903        let feature_names = vec![
904            "Feature_A".to_string(),
905            "Feature_B".to_string(),
906            "Feature_C".to_string(),
907            "Feature_D".to_string(),
908            "Feature_E".to_string(),
909        ];
910
911        let chart = FeatureImportancePlots::horizontal_bar_chart(
912            &feature_indices,
913            &importance_scores,
914            Some(&feature_names),
915            30,
916            "Test Importance",
917        )
918        .unwrap();
919
920        assert!(chart.contains("Test Importance"));
921        assert!(chart.contains("Feature_A"));
922        assert!(chart.contains("0.8"));
923
924        let _vertical_chart = FeatureImportancePlots::vertical_bar_chart(
925            &feature_indices,
926            &importance_scores,
927            Some(&feature_names),
928            10,
929            "Test Vertical",
930        )
931        .unwrap();
932
933        assert!(chart.contains("Test Importance"));
934    }
935
936    #[test]
937    fn test_stability_plots() {
938        let feature_selections = vec![vec![0, 1, 2], vec![1, 2, 3], vec![0, 2, 4], vec![1, 2, 5]];
939
940        let plot = StabilityPlots::stability_frequency_plot(
941            &feature_selections,
942            6,
943            None,
944            "Test Stability",
945        )
946        .unwrap();
947
948        assert!(plot.contains("Stability"));
949        assert!(plot.contains("Feature_"));
950
951        let heatmap = StabilityPlots::feature_cooccurrence_heatmap(
952            &feature_selections,
953            5,
954            None,
955            "Test Cooccurrence",
956        )
957        .unwrap();
958
959        assert!(heatmap.contains("Cooccurrence"));
960    }
961
962    #[test]
963    fn test_redundancy_heatmaps() {
964        let correlation_matrix = array![[1.0, 0.8, 0.2], [0.8, 1.0, 0.3], [0.2, 0.3, 1.0],];
965        let feature_indices = vec![0, 1, 2];
966
967        let heatmap = RedundancyHeatmaps::correlation_heatmap(
968            correlation_matrix.view(),
969            &feature_indices,
970            None,
971            "Test Correlation",
972        )
973        .unwrap();
974
975        assert!(heatmap.contains("Correlation"));
976
977        let pairs = vec![(0, 1, 0.8), (1, 2, 0.3)];
978        let summary = RedundancyHeatmaps::redundancy_summary(&pairs, None, "Test Summary").unwrap();
979
980        assert!(summary.contains("Correlated"));
981        assert!(summary.contains("0.8"));
982    }
983
984    #[test]
985    fn test_comprehensive_visualization() {
986        let feature_indices = vec![0, 1, 2];
987        let importance_scores = vec![0.8, 0.6, 0.4];
988        let stability_data = vec![vec![0, 1], vec![1, 2], vec![0, 2]];
989        let correlation_matrix = array![[1.0, 0.5, 0.2], [0.5, 1.0, 0.3], [0.2, 0.3, 1.0],];
990
991        let report = FeatureSetVisualization::comprehensive_report(
992            &feature_indices,
993            &importance_scores,
994            Some(&stability_data),
995            Some(correlation_matrix.view()),
996            None,
997            "Test Comprehensive Report",
998        )
999        .unwrap();
1000
1001        assert!(report.contains("Comprehensive Report"));
1002        assert!(report.contains("Feature Importance"));
1003        assert!(report.contains("Stability"));
1004        assert!(report.contains("Summary"));
1005
1006        let quick_summary =
1007            FeatureSetVisualization::quick_summary(&feature_indices, &importance_scores, None)
1008                .unwrap();
1009
1010        assert!(quick_summary.contains("Quick"));
1011        assert!(quick_summary.contains("Top"));
1012    }
1013
1014    #[test]
1015    fn test_selection_frequency_charts() {
1016        let frequencies = vec![(0, 0.9), (1, 0.7), (2, 0.5), (3, 0.3)];
1017
1018        let histogram =
1019            SelectionFrequencyCharts::frequency_histogram(&frequencies, None, "Test Frequencies")
1020                .unwrap();
1021
1022        assert!(histogram.contains("Frequencies"));
1023        assert!(histogram.contains("90.0%"));
1024        assert!(histogram.contains("Feature_0"));
1025    }
1026}