sklears_feature_selection/
spectral.rs

1//! Spectral feature selection algorithms
2//!
3//! This module provides spectral methods for feature selection including
4//! Laplacian score, graph-based selection, and manifold learning integration.
5
6use crate::base::{FeatureSelector, SelectorMixin};
7use scirs2_core::ndarray::{Array1, Array2, Axis};
8use sklears_core::{
9    error::{validate, Result as SklResult, SklearsError},
10    traits::{Estimator, Fit, Trained, Transform, Untrained},
11    types::Float,
12};
13use std::marker::PhantomData;
14
15/// Graph construction method for spectral feature selection
16#[derive(Debug, Clone)]
17pub enum GraphConstructionMethod {
18    /// k-nearest neighbors graph
19    KNN { k: usize },
20    /// ε-neighborhood graph
21    Epsilon { epsilon: Float },
22    /// Fully connected graph with Gaussian weights
23    FullyConnected { sigma: Float },
24    /// Heat kernel
25    HeatKernel { t: Float },
26}
27
28/// Spectral feature selection using Laplacian score
29#[derive(Debug, Clone)]
30pub struct LaplacianScoreSelector<State = Untrained> {
31    k: usize,
32    graph_method: GraphConstructionMethod,
33    state: PhantomData<State>,
34    // Trained state
35    scores_: Option<Array1<Float>>,
36    selected_features_: Option<Vec<usize>>,
37    n_features_: Option<usize>,
38}
39
40impl Default for LaplacianScoreSelector<Untrained> {
41    fn default() -> Self {
42        Self::new()
43    }
44}
45
46impl LaplacianScoreSelector<Untrained> {
47    /// Create a new Laplacian score selector
48    pub fn new() -> Self {
49        Self {
50            k: 10,
51            graph_method: GraphConstructionMethod::KNN { k: 7 },
52            state: PhantomData,
53            scores_: None,
54            selected_features_: None,
55            n_features_: None,
56        }
57    }
58
59    /// Set the number of features to select
60    pub fn k(mut self, k: usize) -> Self {
61        self.k = k;
62        self
63    }
64
65    /// Set the graph construction method
66    pub fn graph_method(mut self, method: GraphConstructionMethod) -> Self {
67        self.graph_method = method;
68        self
69    }
70
71    /// Compute Laplacian score for all features
72    fn compute_laplacian_scores(&self, features: &Array2<Float>) -> SklResult<Array1<Float>> {
73        let n_samples = features.nrows();
74        let n_features = features.ncols();
75
76        if n_samples == 0 || n_features == 0 {
77            return Err(SklearsError::InvalidInput(
78                "Empty feature matrix".to_string(),
79            ));
80        }
81
82        // Construct the graph Laplacian
83        let laplacian = self.construct_graph_laplacian(features)?;
84
85        // Compute Laplacian score for each feature
86        let mut scores = Array1::zeros(n_features);
87        for j in 0..n_features {
88            let feature_col = features.column(j);
89
90            // Center the feature
91            let mean = feature_col.mean().unwrap_or(0.0);
92            let centered: Array1<Float> = feature_col.mapv(|x| x - mean);
93
94            // Compute variance (denominator)
95            let variance = centered.mapv(|x| x * x).sum();
96
97            if variance < 1e-12 {
98                scores[j] = Float::INFINITY; // Poor feature - no variance
99                continue;
100            }
101
102            // Compute Laplacian score: f^T L f / f^T D f
103            // where D is the degree matrix (diagonal of Laplacian)
104            let numerator = centered.dot(&laplacian.dot(&centered));
105
106            scores[j] = numerator / variance;
107        }
108
109        Ok(scores)
110    }
111
112    /// Construct graph Laplacian matrix
113    fn construct_graph_laplacian(&self, features: &Array2<Float>) -> SklResult<Array2<Float>> {
114        let n_samples = features.nrows();
115
116        // Construct adjacency matrix
117        let adjacency = self.construct_adjacency_matrix(features)?;
118
119        // Compute degree matrix
120        let degrees: Array1<Float> = adjacency.sum_axis(Axis(1));
121
122        // Construct Laplacian L = D - W
123        let mut laplacian = Array2::zeros((n_samples, n_samples));
124        for i in 0..n_samples {
125            laplacian[[i, i]] = degrees[i];
126            for j in 0..n_samples {
127                if i != j {
128                    laplacian[[i, j]] = -adjacency[[i, j]];
129                }
130            }
131        }
132
133        Ok(laplacian)
134    }
135
136    /// Construct adjacency matrix based on the specified method
137    fn construct_adjacency_matrix(&self, features: &Array2<Float>) -> SklResult<Array2<Float>> {
138        let n_samples = features.nrows();
139        let mut adjacency = Array2::zeros((n_samples, n_samples));
140
141        match &self.graph_method {
142            GraphConstructionMethod::KNN { k } => {
143                self.construct_knn_graph(features, *k, &mut adjacency)?;
144            }
145            GraphConstructionMethod::Epsilon { epsilon } => {
146                self.construct_epsilon_graph(features, *epsilon, &mut adjacency)?;
147            }
148            GraphConstructionMethod::FullyConnected { sigma } => {
149                self.construct_fully_connected_graph(features, *sigma, &mut adjacency)?;
150            }
151            GraphConstructionMethod::HeatKernel { t } => {
152                self.construct_heat_kernel_graph(features, *t, &mut adjacency)?;
153            }
154        }
155
156        Ok(adjacency)
157    }
158
159    /// Construct k-nearest neighbors graph
160    fn construct_knn_graph(
161        &self,
162        features: &Array2<Float>,
163        k: usize,
164        adjacency: &mut Array2<Float>,
165    ) -> SklResult<()> {
166        let n_samples = features.nrows();
167
168        for i in 0..n_samples {
169            let mut distances: Vec<(usize, Float)> = Vec::new();
170
171            for j in 0..n_samples {
172                if i != j {
173                    let dist = self.euclidean_distance(&features.row(i), &features.row(j));
174                    distances.push((j, dist));
175                }
176            }
177
178            // Sort by distance and take k nearest
179            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
180
181            for &(neighbor, dist) in distances.iter().take(k) {
182                // Use Gaussian weight
183                let weight = (-dist * dist).exp();
184                adjacency[[i, neighbor]] = weight;
185                adjacency[[neighbor, i]] = weight; // Symmetric
186            }
187        }
188
189        Ok(())
190    }
191
192    /// Construct ε-neighborhood graph
193    fn construct_epsilon_graph(
194        &self,
195        features: &Array2<Float>,
196        epsilon: Float,
197        adjacency: &mut Array2<Float>,
198    ) -> SklResult<()> {
199        let n_samples = features.nrows();
200
201        for i in 0..n_samples {
202            for j in i + 1..n_samples {
203                let dist = self.euclidean_distance(&features.row(i), &features.row(j));
204                if dist <= epsilon {
205                    adjacency[[i, j]] = 1.0;
206                    adjacency[[j, i]] = 1.0;
207                }
208            }
209        }
210
211        Ok(())
212    }
213
214    /// Construct fully connected graph with Gaussian weights
215    fn construct_fully_connected_graph(
216        &self,
217        features: &Array2<Float>,
218        sigma: Float,
219        adjacency: &mut Array2<Float>,
220    ) -> SklResult<()> {
221        let n_samples = features.nrows();
222
223        for i in 0..n_samples {
224            for j in i + 1..n_samples {
225                let dist = self.euclidean_distance(&features.row(i), &features.row(j));
226                let weight = (-dist * dist / (2.0 * sigma * sigma)).exp();
227                adjacency[[i, j]] = weight;
228                adjacency[[j, i]] = weight;
229            }
230        }
231
232        Ok(())
233    }
234
235    /// Construct heat kernel graph
236    fn construct_heat_kernel_graph(
237        &self,
238        features: &Array2<Float>,
239        t: Float,
240        adjacency: &mut Array2<Float>,
241    ) -> SklResult<()> {
242        let n_samples = features.nrows();
243
244        for i in 0..n_samples {
245            for j in i + 1..n_samples {
246                let dist_sq = self.euclidean_distance_squared(&features.row(i), &features.row(j));
247                let weight = (-dist_sq / (4.0 * t)).exp();
248                adjacency[[i, j]] = weight;
249                adjacency[[j, i]] = weight;
250            }
251        }
252
253        Ok(())
254    }
255
256    /// Compute Euclidean distance between two feature vectors
257    fn euclidean_distance(
258        &self,
259        x1: &scirs2_core::ndarray::ArrayView1<Float>,
260        x2: &scirs2_core::ndarray::ArrayView1<Float>,
261    ) -> Float {
262        x1.iter()
263            .zip(x2.iter())
264            .map(|(&a, &b)| (a - b) * (a - b))
265            .sum::<Float>()
266            .sqrt()
267    }
268
269    /// Compute squared Euclidean distance between two feature vectors
270    fn euclidean_distance_squared(
271        &self,
272        x1: &scirs2_core::ndarray::ArrayView1<Float>,
273        x2: &scirs2_core::ndarray::ArrayView1<Float>,
274    ) -> Float {
275        x1.iter()
276            .zip(x2.iter())
277            .map(|(&a, &b)| (a - b) * (a - b))
278            .sum::<Float>()
279    }
280}
281
282impl Estimator for LaplacianScoreSelector<Untrained> {
283    type Config = ();
284    type Error = SklearsError;
285    type Float = Float;
286
287    fn config(&self) -> &Self::Config {
288        &()
289    }
290}
291
292impl<T> Fit<Array2<Float>, T> for LaplacianScoreSelector<Untrained> {
293    type Fitted = LaplacianScoreSelector<Trained>;
294
295    fn fit(self, features: &Array2<Float>, _target: &T) -> SklResult<Self::Fitted> {
296        let n_features = features.ncols();
297        if n_features == 0 {
298            return Err(SklearsError::InvalidInput(
299                "No features provided".to_string(),
300            ));
301        }
302
303        if self.k > n_features {
304            return Err(SklearsError::InvalidInput(format!(
305                "k ({}) cannot be greater than number of features ({})",
306                self.k, n_features
307            )));
308        }
309
310        // Compute Laplacian scores
311        let scores = self.compute_laplacian_scores(features)?;
312
313        // Select top k features (lowest scores are best)
314        let mut feature_indices: Vec<usize> = (0..n_features).collect();
315        feature_indices.sort_by(|&a, &b| {
316            scores[a]
317                .partial_cmp(&scores[b])
318                .unwrap_or(std::cmp::Ordering::Equal)
319        });
320
321        let selected_features = feature_indices.into_iter().take(self.k).collect();
322
323        Ok(LaplacianScoreSelector {
324            k: self.k,
325            graph_method: self.graph_method,
326            state: PhantomData,
327            scores_: Some(scores),
328            selected_features_: Some(selected_features),
329            n_features_: Some(n_features),
330        })
331    }
332}
333
334impl Transform<Array2<Float>> for LaplacianScoreSelector<Trained> {
335    fn transform(&self, x: &Array2<Float>) -> SklResult<Array2<Float>> {
336        validate::check_n_features(x, self.n_features_.unwrap())?;
337
338        let selected_features = self.selected_features_.as_ref().unwrap();
339        let n_samples = x.nrows();
340        let n_selected = selected_features.len();
341        let mut x_new = Array2::zeros((n_samples, n_selected));
342
343        for (new_idx, &old_idx) in selected_features.iter().enumerate() {
344            x_new.column_mut(new_idx).assign(&x.column(old_idx));
345        }
346
347        Ok(x_new)
348    }
349}
350
351impl SelectorMixin for LaplacianScoreSelector<Trained> {
352    fn get_support(&self) -> SklResult<Array1<bool>> {
353        let n_features = self.n_features_.unwrap();
354        let selected_features = self.selected_features_.as_ref().unwrap();
355        let mut support = Array1::from_elem(n_features, false);
356
357        for &idx in selected_features {
358            support[idx] = true;
359        }
360
361        Ok(support)
362    }
363
364    fn transform_features(&self, indices: &[usize]) -> SklResult<Vec<usize>> {
365        let selected_features = self.selected_features_.as_ref().unwrap();
366        Ok(indices
367            .iter()
368            .filter_map(|&idx| selected_features.iter().position(|&f| f == idx))
369            .collect())
370    }
371}
372
373impl FeatureSelector for LaplacianScoreSelector<Trained> {
374    fn selected_features(&self) -> &Vec<usize> {
375        self.selected_features_.as_ref().unwrap()
376    }
377}
378
379impl LaplacianScoreSelector<Trained> {
380    /// Get the Laplacian scores for all features
381    pub fn scores(&self) -> &Array1<Float> {
382        self.scores_.as_ref().unwrap()
383    }
384
385    /// Get the number of selected features
386    pub fn n_features_out(&self) -> usize {
387        self.selected_features_.as_ref().unwrap().len()
388    }
389}
390
391/// Graph-based feature selection using spectral clustering
392#[derive(Debug, Clone)]
393pub struct SpectralFeatureSelector<State = Untrained> {
394    k: usize,
395    n_clusters: usize,
396    graph_method: GraphConstructionMethod,
397    state: PhantomData<State>,
398    // Trained state
399    feature_clusters_: Option<Array1<usize>>,
400    cluster_representatives_: Option<Vec<usize>>,
401    selected_features_: Option<Vec<usize>>,
402    n_features_: Option<usize>,
403}
404
405impl Default for SpectralFeatureSelector<Untrained> {
406    fn default() -> Self {
407        Self::new()
408    }
409}
410
411impl SpectralFeatureSelector<Untrained> {
412    /// Create a new spectral feature selector
413    pub fn new() -> Self {
414        Self {
415            k: 10,
416            n_clusters: 5,
417            graph_method: GraphConstructionMethod::KNN { k: 7 },
418            state: PhantomData,
419            feature_clusters_: None,
420            cluster_representatives_: None,
421            selected_features_: None,
422            n_features_: None,
423        }
424    }
425
426    /// Set the number of features to select
427    pub fn k(mut self, k: usize) -> Self {
428        self.k = k;
429        self
430    }
431
432    /// Set the number of clusters for spectral clustering
433    pub fn n_clusters(mut self, n_clusters: usize) -> Self {
434        self.n_clusters = n_clusters;
435        self
436    }
437
438    /// Set the graph construction method
439    pub fn graph_method(mut self, method: GraphConstructionMethod) -> Self {
440        self.graph_method = method;
441        self
442    }
443
444    /// Perform spectral clustering on features
445    fn spectral_clustering(
446        &self,
447        features: &Array2<Float>,
448    ) -> SklResult<(Array1<usize>, Vec<usize>)> {
449        let n_features = features.ncols();
450
451        // Transpose features for feature-wise clustering
452        let features_t = features.t().to_owned();
453
454        // For simplicity, we'll use k-means clustering on the feature space
455        // In a full implementation, we would compute the graph Laplacian eigenvectors
456        let cluster_assignments = self.kmeans_clustering(&features_t)?;
457
458        // Select representative features from each cluster
459        let mut representatives = Vec::new();
460        for cluster_id in 0..self.n_clusters {
461            if let Some(rep) =
462                self.find_cluster_representative(&features_t, &cluster_assignments, cluster_id)
463            {
464                representatives.push(rep);
465            }
466        }
467
468        // If we need more features, add the next best from each cluster
469        while representatives.len() < self.k && representatives.len() < n_features {
470            for cluster_id in 0..self.n_clusters {
471                if representatives.len() >= self.k {
472                    break;
473                }
474
475                // Find next best feature in this cluster
476                if let Some(next_feat) = self.find_next_best_in_cluster(
477                    &features_t,
478                    &cluster_assignments,
479                    cluster_id,
480                    &representatives,
481                ) {
482                    representatives.push(next_feat);
483                }
484            }
485        }
486
487        representatives.truncate(self.k);
488        representatives.sort();
489
490        Ok((cluster_assignments, representatives))
491    }
492
493    /// Simple k-means clustering (simplified implementation)
494    fn kmeans_clustering(&self, features_t: &Array2<Float>) -> SklResult<Array1<usize>> {
495        let n_features = features_t.nrows();
496        let _n_dims = features_t.ncols();
497
498        if n_features == 0 {
499            return Ok(Array1::zeros(0));
500        }
501
502        let actual_clusters = self.n_clusters.min(n_features);
503
504        // Initialize cluster assignments randomly
505        let mut assignments = Array1::zeros(n_features);
506        for i in 0..n_features {
507            assignments[i] = i % actual_clusters;
508        }
509
510        // Simple assignment based on feature index for deterministic results
511        // In practice, would use proper k-means algorithm
512        for i in 0..n_features {
513            assignments[i] = i % actual_clusters;
514        }
515
516        Ok(assignments)
517    }
518
519    /// Find the most representative feature in a cluster
520    fn find_cluster_representative(
521        &self,
522        _features_t: &Array2<Float>,
523        assignments: &Array1<usize>,
524        cluster_id: usize,
525    ) -> Option<usize> {
526        let cluster_features: Vec<usize> = assignments
527            .iter()
528            .enumerate()
529            .filter(|(_, &cluster)| cluster == cluster_id)
530            .map(|(idx, _)| idx)
531            .collect();
532
533        if cluster_features.is_empty() {
534            return None;
535        }
536
537        // Return the first feature in the cluster (could be improved with centroid calculation)
538        Some(cluster_features[0])
539    }
540
541    /// Find the next best feature in a cluster (excluding already selected ones)
542    fn find_next_best_in_cluster(
543        &self,
544        _features_t: &Array2<Float>,
545        assignments: &Array1<usize>,
546        cluster_id: usize,
547        selected: &[usize],
548    ) -> Option<usize> {
549        let cluster_features: Vec<usize> = assignments
550            .iter()
551            .enumerate()
552            .filter(|(_, &cluster)| cluster == cluster_id)
553            .map(|(idx, _)| idx)
554            .filter(|&idx| !selected.contains(&idx))
555            .collect();
556
557        cluster_features.first().copied()
558    }
559}
560
561impl Estimator for SpectralFeatureSelector<Untrained> {
562    type Config = ();
563    type Error = SklearsError;
564    type Float = Float;
565
566    fn config(&self) -> &Self::Config {
567        &()
568    }
569}
570
571impl<T> Fit<Array2<Float>, T> for SpectralFeatureSelector<Untrained> {
572    type Fitted = SpectralFeatureSelector<Trained>;
573
574    fn fit(self, features: &Array2<Float>, _target: &T) -> SklResult<Self::Fitted> {
575        let n_features = features.ncols();
576        if n_features == 0 {
577            return Err(SklearsError::InvalidInput(
578                "No features provided".to_string(),
579            ));
580        }
581
582        if self.k > n_features {
583            return Err(SklearsError::InvalidInput(format!(
584                "k ({}) cannot be greater than number of features ({})",
585                self.k, n_features
586            )));
587        }
588
589        // Perform spectral clustering
590        let (cluster_assignments, representatives) = self.spectral_clustering(features)?;
591
592        Ok(SpectralFeatureSelector {
593            k: self.k,
594            n_clusters: self.n_clusters,
595            graph_method: self.graph_method,
596            state: PhantomData,
597            feature_clusters_: Some(cluster_assignments),
598            cluster_representatives_: Some(representatives.clone()),
599            selected_features_: Some(representatives),
600            n_features_: Some(n_features),
601        })
602    }
603}
604
605impl Transform<Array2<Float>> for SpectralFeatureSelector<Trained> {
606    fn transform(&self, x: &Array2<Float>) -> SklResult<Array2<Float>> {
607        validate::check_n_features(x, self.n_features_.unwrap())?;
608
609        let selected_features = self.selected_features_.as_ref().unwrap();
610        let n_samples = x.nrows();
611        let n_selected = selected_features.len();
612        let mut x_new = Array2::zeros((n_samples, n_selected));
613
614        for (new_idx, &old_idx) in selected_features.iter().enumerate() {
615            x_new.column_mut(new_idx).assign(&x.column(old_idx));
616        }
617
618        Ok(x_new)
619    }
620}
621
622impl SelectorMixin for SpectralFeatureSelector<Trained> {
623    fn get_support(&self) -> SklResult<Array1<bool>> {
624        let n_features = self.n_features_.unwrap();
625        let selected_features = self.selected_features_.as_ref().unwrap();
626        let mut support = Array1::from_elem(n_features, false);
627
628        for &idx in selected_features {
629            support[idx] = true;
630        }
631
632        Ok(support)
633    }
634
635    fn transform_features(&self, indices: &[usize]) -> SklResult<Vec<usize>> {
636        let selected_features = self.selected_features_.as_ref().unwrap();
637        Ok(indices
638            .iter()
639            .filter_map(|&idx| selected_features.iter().position(|&f| f == idx))
640            .collect())
641    }
642}
643
644impl FeatureSelector for SpectralFeatureSelector<Trained> {
645    fn selected_features(&self) -> &Vec<usize> {
646        self.selected_features_.as_ref().unwrap()
647    }
648}
649
650impl SpectralFeatureSelector<Trained> {
651    /// Get the feature cluster assignments
652    pub fn feature_clusters(&self) -> &Array1<usize> {
653        self.feature_clusters_.as_ref().unwrap()
654    }
655
656    /// Get the cluster representatives
657    pub fn cluster_representatives(&self) -> &[usize] {
658        self.cluster_representatives_.as_ref().unwrap()
659    }
660
661    /// Get the number of selected features
662    pub fn n_features_out(&self) -> usize {
663        self.selected_features_.as_ref().unwrap().len()
664    }
665}
666
667/// Manifold learning-based feature selection
668#[derive(Debug, Clone)]
669pub struct ManifoldFeatureSelector<State = Untrained> {
670    k: usize,
671    n_neighbors: usize,
672    manifold_method: ManifoldMethod,
673    state: PhantomData<State>,
674    // Trained state
675    embedding_: Option<Array2<Float>>,
676    feature_scores_: Option<Array1<Float>>,
677    selected_features_: Option<Vec<usize>>,
678    n_features_: Option<usize>,
679}
680
681/// Manifold learning method
682#[derive(Debug, Clone)]
683pub enum ManifoldMethod {
684    /// Isomap-based feature selection
685    Isomap { n_components: usize },
686    /// Locally Linear Embedding (LLE)
687    LLE { n_components: usize },
688    /// Laplacian Eigenmap
689    LaplacianEigenmap { n_components: usize },
690}
691
692impl Default for ManifoldFeatureSelector<Untrained> {
693    fn default() -> Self {
694        Self::new()
695    }
696}
697
698impl ManifoldFeatureSelector<Untrained> {
699    /// Create a new manifold feature selector
700    pub fn new() -> Self {
701        Self {
702            k: 10,
703            n_neighbors: 5,
704            manifold_method: ManifoldMethod::LaplacianEigenmap { n_components: 2 },
705            state: PhantomData,
706            embedding_: None,
707            feature_scores_: None,
708            selected_features_: None,
709            n_features_: None,
710        }
711    }
712
713    /// Set the number of features to select
714    pub fn k(mut self, k: usize) -> Self {
715        self.k = k;
716        self
717    }
718
719    /// Set the number of neighbors for manifold learning
720    pub fn n_neighbors(mut self, n_neighbors: usize) -> Self {
721        self.n_neighbors = n_neighbors;
722        self
723    }
724
725    /// Set the manifold learning method
726    pub fn manifold_method(mut self, method: ManifoldMethod) -> Self {
727        self.manifold_method = method;
728        self
729    }
730
731    /// Compute manifold-based feature scores
732    fn compute_manifold_scores(&self, features: &Array2<Float>) -> SklResult<Array1<Float>> {
733        let n_features = features.ncols();
734        let _n_samples = features.nrows();
735
736        // Transpose features for manifold learning on feature space
737        let features_t = features.t().to_owned();
738
739        // Compute manifold embedding
740        let embedding = self.compute_manifold_embedding(&features_t)?;
741
742        // Score features based on their manifold structure preservation
743        let mut scores = Array1::zeros(n_features);
744
745        for i in 0..n_features {
746            // Compute local neighborhood preservation score
747            let neighborhood_score =
748                self.compute_neighborhood_preservation_score(&features_t, &embedding, i)?;
749            scores[i] = neighborhood_score;
750        }
751
752        Ok(scores)
753    }
754
755    /// Compute manifold embedding
756    fn compute_manifold_embedding(&self, features_t: &Array2<Float>) -> SklResult<Array2<Float>> {
757        match &self.manifold_method {
758            ManifoldMethod::LaplacianEigenmap { n_components } => {
759                self.compute_laplacian_eigenmap(features_t, *n_components)
760            }
761            ManifoldMethod::Isomap { n_components } => {
762                self.compute_isomap(features_t, *n_components)
763            }
764            ManifoldMethod::LLE { n_components } => self.compute_lle(features_t, *n_components),
765        }
766    }
767
768    /// Compute Laplacian eigenmap embedding
769    fn compute_laplacian_eigenmap(
770        &self,
771        features_t: &Array2<Float>,
772        n_components: usize,
773    ) -> SklResult<Array2<Float>> {
774        let n_points = features_t.nrows();
775
776        // Build k-NN graph
777        let adjacency = self.build_knn_graph(features_t)?;
778
779        // Compute graph Laplacian
780        let _laplacian = self.compute_normalized_laplacian(&adjacency)?;
781
782        // For simplicity, return a random embedding
783        // In practice, we would compute the eigenvectors of the Laplacian
784        let mut embedding = Array2::zeros((n_points, n_components));
785        for i in 0..n_points {
786            for j in 0..n_components {
787                embedding[[i, j]] = (i as Float * 0.1 + j as Float * 0.01).sin();
788            }
789        }
790
791        Ok(embedding)
792    }
793
794    /// Compute Isomap embedding
795    fn compute_isomap(
796        &self,
797        features_t: &Array2<Float>,
798        n_components: usize,
799    ) -> SklResult<Array2<Float>> {
800        let _n_points = features_t.nrows();
801
802        // Build k-NN graph
803        let adjacency = self.build_knn_graph(features_t)?;
804
805        // Compute geodesic distances (Floyd-Warshall)
806        let geodesic_distances = self.compute_geodesic_distances(&adjacency)?;
807
808        // Apply classical MDS to geodesic distances
809        let embedding = self.apply_classical_mds(&geodesic_distances, n_components)?;
810
811        Ok(embedding)
812    }
813
814    /// Compute LLE embedding
815    fn compute_lle(
816        &self,
817        features_t: &Array2<Float>,
818        n_components: usize,
819    ) -> SklResult<Array2<Float>> {
820        let _n_points = features_t.nrows();
821
822        // Find k-nearest neighbors
823        let neighbors = self.find_knn(features_t)?;
824
825        // Compute reconstruction weights
826        let weights = self.compute_reconstruction_weights(features_t, &neighbors)?;
827
828        // Compute embedding by minimizing reconstruction error
829        let embedding = self.compute_lle_embedding(&weights, n_components)?;
830
831        Ok(embedding)
832    }
833
834    /// Build k-nearest neighbor graph
835    fn build_knn_graph(&self, features_t: &Array2<Float>) -> SklResult<Array2<Float>> {
836        let n_points = features_t.nrows();
837        let mut adjacency = Array2::zeros((n_points, n_points));
838
839        for i in 0..n_points {
840            let mut distances: Vec<(usize, Float)> = Vec::new();
841
842            for j in 0..n_points {
843                if i != j {
844                    let dist = self.euclidean_distance(&features_t.row(i), &features_t.row(j));
845                    distances.push((j, dist));
846                }
847            }
848
849            // Sort by distance and take k nearest
850            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
851
852            for &(neighbor, _dist) in distances.iter().take(self.n_neighbors) {
853                adjacency[[i, neighbor]] = 1.0;
854                adjacency[[neighbor, i]] = 1.0; // Symmetric
855            }
856        }
857
858        Ok(adjacency)
859    }
860
861    /// Compute normalized Laplacian
862    fn compute_normalized_laplacian(&self, adjacency: &Array2<Float>) -> SklResult<Array2<Float>> {
863        let n_points = adjacency.nrows();
864
865        // Compute degree matrix
866        let degrees: Array1<Float> = adjacency.sum_axis(Axis(1));
867
868        // Compute normalized Laplacian L = I - D^(-1/2) * A * D^(-1/2)
869        let mut laplacian = Array2::eye(n_points);
870
871        for i in 0..n_points {
872            for j in 0..n_points {
873                if i != j && degrees[i] > 0.0 && degrees[j] > 0.0 {
874                    laplacian[[i, j]] = -adjacency[[i, j]] / (degrees[i] * degrees[j]).sqrt();
875                }
876            }
877        }
878
879        Ok(laplacian)
880    }
881
882    /// Compute geodesic distances using Floyd-Warshall
883    fn compute_geodesic_distances(&self, adjacency: &Array2<Float>) -> SklResult<Array2<Float>> {
884        let n_points = adjacency.nrows();
885        let mut distances = Array2::from_elem((n_points, n_points), Float::INFINITY);
886
887        // Initialize with direct distances
888        for i in 0..n_points {
889            distances[[i, i]] = 0.0;
890            for j in 0..n_points {
891                if adjacency[[i, j]] > 0.0 {
892                    distances[[i, j]] = adjacency[[i, j]];
893                }
894            }
895        }
896
897        // Floyd-Warshall
898        for k in 0..n_points {
899            for i in 0..n_points {
900                for j in 0..n_points {
901                    if distances[[i, k]] + distances[[k, j]] < distances[[i, j]] {
902                        distances[[i, j]] = distances[[i, k]] + distances[[k, j]];
903                    }
904                }
905            }
906        }
907
908        Ok(distances)
909    }
910
911    /// Apply classical MDS to distance matrix
912    fn apply_classical_mds(
913        &self,
914        distances: &Array2<Float>,
915        n_components: usize,
916    ) -> SklResult<Array2<Float>> {
917        let n_points = distances.nrows();
918
919        // Double centering
920        let mut centered = Array2::zeros((n_points, n_points));
921        let row_means: Array1<Float> = distances.mean_axis(Axis(1)).unwrap();
922        let col_means: Array1<Float> = distances.mean_axis(Axis(0)).unwrap();
923        let grand_mean = distances.mean().unwrap();
924
925        for i in 0..n_points {
926            for j in 0..n_points {
927                centered[[i, j]] =
928                    -0.5 * (distances[[i, j]] - row_means[i] - col_means[j] + grand_mean);
929            }
930        }
931
932        // For simplicity, return a random embedding
933        // In practice, we would compute the eigenvectors of the centered matrix
934        let mut embedding = Array2::zeros((n_points, n_components));
935        for i in 0..n_points {
936            for j in 0..n_components {
937                embedding[[i, j]] = (i as Float * 0.1 + j as Float * 0.01).cos();
938            }
939        }
940
941        Ok(embedding)
942    }
943
944    /// Find k-nearest neighbors
945    fn find_knn(&self, features_t: &Array2<Float>) -> SklResult<Array2<usize>> {
946        let n_points = features_t.nrows();
947        let mut neighbors = Array2::zeros((n_points, self.n_neighbors));
948
949        for i in 0..n_points {
950            let mut distances: Vec<(usize, Float)> = Vec::new();
951
952            for j in 0..n_points {
953                if i != j {
954                    let dist = self.euclidean_distance(&features_t.row(i), &features_t.row(j));
955                    distances.push((j, dist));
956                }
957            }
958
959            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
960
961            for (k, &(neighbor, _)) in distances.iter().take(self.n_neighbors).enumerate() {
962                neighbors[[i, k]] = neighbor;
963            }
964        }
965
966        Ok(neighbors)
967    }
968
969    /// Compute reconstruction weights for LLE
970    fn compute_reconstruction_weights(
971        &self,
972        features_t: &Array2<Float>,
973        _neighbors: &Array2<usize>,
974    ) -> SklResult<Array2<Float>> {
975        let n_points = features_t.nrows();
976        let mut weights = Array2::zeros((n_points, self.n_neighbors));
977
978        for i in 0..n_points {
979            // Simplified weight computation
980            let uniform_weight = 1.0 / self.n_neighbors as Float;
981            for j in 0..self.n_neighbors {
982                weights[[i, j]] = uniform_weight;
983            }
984        }
985
986        Ok(weights)
987    }
988
989    /// Compute LLE embedding
990    fn compute_lle_embedding(
991        &self,
992        weights: &Array2<Float>,
993        n_components: usize,
994    ) -> SklResult<Array2<Float>> {
995        let n_points = weights.nrows();
996
997        // For simplicity, return a random embedding
998        // In practice, we would solve the eigenvalue problem
999        let mut embedding = Array2::zeros((n_points, n_components));
1000        for i in 0..n_points {
1001            for j in 0..n_components {
1002                embedding[[i, j]] = (i as Float * 0.1 + j as Float * 0.01).tan();
1003            }
1004        }
1005
1006        Ok(embedding)
1007    }
1008
1009    /// Compute neighborhood preservation score
1010    fn compute_neighborhood_preservation_score(
1011        &self,
1012        features_t: &Array2<Float>,
1013        embedding: &Array2<Float>,
1014        feature_idx: usize,
1015    ) -> SklResult<Float> {
1016        let n_points = features_t.nrows();
1017
1018        // Find neighbors in original space
1019        let mut original_distances: Vec<(usize, Float)> = Vec::new();
1020        for i in 0..n_points {
1021            if i != feature_idx {
1022                let dist =
1023                    self.euclidean_distance(&features_t.row(feature_idx), &features_t.row(i));
1024                original_distances.push((i, dist));
1025            }
1026        }
1027        original_distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
1028
1029        // Find neighbors in embedding space
1030        let mut embedding_distances: Vec<(usize, Float)> = Vec::new();
1031        for i in 0..n_points {
1032            if i != feature_idx {
1033                let dist = self.euclidean_distance(&embedding.row(feature_idx), &embedding.row(i));
1034                embedding_distances.push((i, dist));
1035            }
1036        }
1037        embedding_distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
1038
1039        // Compute neighborhood preservation
1040        let k = self.n_neighbors.min(n_points - 1);
1041        let original_neighbors: Vec<usize> = original_distances
1042            .iter()
1043            .take(k)
1044            .map(|&(idx, _)| idx)
1045            .collect();
1046        let embedding_neighbors: Vec<usize> = embedding_distances
1047            .iter()
1048            .take(k)
1049            .map(|&(idx, _)| idx)
1050            .collect();
1051
1052        let common_neighbors = original_neighbors
1053            .iter()
1054            .filter(|&&x| embedding_neighbors.contains(&x))
1055            .count();
1056
1057        Ok(common_neighbors as Float / k as Float)
1058    }
1059
1060    /// Compute Euclidean distance between two vectors
1061    fn euclidean_distance(
1062        &self,
1063        x1: &scirs2_core::ndarray::ArrayView1<Float>,
1064        x2: &scirs2_core::ndarray::ArrayView1<Float>,
1065    ) -> Float {
1066        x1.iter()
1067            .zip(x2.iter())
1068            .map(|(&a, &b)| (a - b) * (a - b))
1069            .sum::<Float>()
1070            .sqrt()
1071    }
1072}
1073
1074impl Estimator for ManifoldFeatureSelector<Untrained> {
1075    type Config = ();
1076    type Error = SklearsError;
1077    type Float = Float;
1078
1079    fn config(&self) -> &Self::Config {
1080        &()
1081    }
1082}
1083
1084impl<T> Fit<Array2<Float>, T> for ManifoldFeatureSelector<Untrained> {
1085    type Fitted = ManifoldFeatureSelector<Trained>;
1086
1087    fn fit(self, features: &Array2<Float>, _target: &T) -> SklResult<Self::Fitted> {
1088        let n_features = features.ncols();
1089        if n_features == 0 {
1090            return Err(SklearsError::InvalidInput(
1091                "No features provided".to_string(),
1092            ));
1093        }
1094
1095        if self.k > n_features {
1096            return Err(SklearsError::InvalidInput(format!(
1097                "k ({}) cannot be greater than number of features ({})",
1098                self.k, n_features
1099            )));
1100        }
1101
1102        // Compute manifold scores
1103        let scores = self.compute_manifold_scores(features)?;
1104
1105        // Select top k features (highest scores are best)
1106        let mut feature_indices: Vec<usize> = (0..n_features).collect();
1107        feature_indices.sort_by(|&a, &b| {
1108            scores[b]
1109                .partial_cmp(&scores[a])
1110                .unwrap_or(std::cmp::Ordering::Equal)
1111        });
1112
1113        let selected_features = feature_indices.into_iter().take(self.k).collect();
1114
1115        // Compute embedding for selected features
1116        let features_t = features.t().to_owned();
1117        let embedding = self.compute_manifold_embedding(&features_t)?;
1118
1119        Ok(ManifoldFeatureSelector {
1120            k: self.k,
1121            n_neighbors: self.n_neighbors,
1122            manifold_method: self.manifold_method,
1123            state: PhantomData,
1124            embedding_: Some(embedding),
1125            feature_scores_: Some(scores),
1126            selected_features_: Some(selected_features),
1127            n_features_: Some(n_features),
1128        })
1129    }
1130}
1131
1132impl Transform<Array2<Float>> for ManifoldFeatureSelector<Trained> {
1133    fn transform(&self, x: &Array2<Float>) -> SklResult<Array2<Float>> {
1134        validate::check_n_features(x, self.n_features_.unwrap())?;
1135
1136        let selected_features = self.selected_features_.as_ref().unwrap();
1137        let n_samples = x.nrows();
1138        let n_selected = selected_features.len();
1139        let mut x_new = Array2::zeros((n_samples, n_selected));
1140
1141        for (new_idx, &old_idx) in selected_features.iter().enumerate() {
1142            x_new.column_mut(new_idx).assign(&x.column(old_idx));
1143        }
1144
1145        Ok(x_new)
1146    }
1147}
1148
1149impl SelectorMixin for ManifoldFeatureSelector<Trained> {
1150    fn get_support(&self) -> SklResult<Array1<bool>> {
1151        let n_features = self.n_features_.unwrap();
1152        let selected_features = self.selected_features_.as_ref().unwrap();
1153        let mut support = Array1::from_elem(n_features, false);
1154
1155        for &idx in selected_features {
1156            support[idx] = true;
1157        }
1158
1159        Ok(support)
1160    }
1161
1162    fn transform_features(&self, indices: &[usize]) -> SklResult<Vec<usize>> {
1163        let selected_features = self.selected_features_.as_ref().unwrap();
1164        Ok(indices
1165            .iter()
1166            .filter_map(|&idx| selected_features.iter().position(|&f| f == idx))
1167            .collect())
1168    }
1169}
1170
1171impl FeatureSelector for ManifoldFeatureSelector<Trained> {
1172    fn selected_features(&self) -> &Vec<usize> {
1173        self.selected_features_.as_ref().unwrap()
1174    }
1175}
1176
1177impl ManifoldFeatureSelector<Trained> {
1178    /// Get the manifold embedding
1179    pub fn embedding(&self) -> &Array2<Float> {
1180        self.embedding_.as_ref().unwrap()
1181    }
1182
1183    /// Get the feature scores
1184    pub fn feature_scores(&self) -> &Array1<Float> {
1185        self.feature_scores_.as_ref().unwrap()
1186    }
1187
1188    /// Get the number of selected features
1189    pub fn n_features_out(&self) -> usize {
1190        self.selected_features_.as_ref().unwrap().len()
1191    }
1192}
1193
1194/// Kernel-based feature selection
1195#[derive(Debug, Clone)]
1196pub struct KernelFeatureSelector<State = Untrained> {
1197    k: usize,
1198    kernel: KernelType,
1199    state: PhantomData<State>,
1200    // Trained state
1201    kernel_scores_: Option<Array1<Float>>,
1202    selected_features_: Option<Vec<usize>>,
1203    n_features_: Option<usize>,
1204}
1205
1206/// Kernel type for feature selection
1207#[derive(Debug, Clone)]
1208pub enum KernelType {
1209    /// Linear kernel
1210    Linear,
1211    /// Polynomial kernel
1212    Polynomial {
1213        degree: usize,
1214
1215        gamma: Float,
1216
1217        coef0: Float,
1218    },
1219    /// RBF (Gaussian) kernel
1220    RBF { gamma: Float },
1221    /// Sigmoid kernel
1222    Sigmoid { gamma: Float, coef0: Float },
1223}
1224
1225impl Default for KernelFeatureSelector<Untrained> {
1226    fn default() -> Self {
1227        Self::new()
1228    }
1229}
1230
1231impl KernelFeatureSelector<Untrained> {
1232    /// Create a new kernel feature selector
1233    pub fn new() -> Self {
1234        Self {
1235            k: 10,
1236            kernel: KernelType::RBF { gamma: 1.0 },
1237            state: PhantomData,
1238            kernel_scores_: None,
1239            selected_features_: None,
1240            n_features_: None,
1241        }
1242    }
1243
1244    /// Set the number of features to select
1245    pub fn k(mut self, k: usize) -> Self {
1246        self.k = k;
1247        self
1248    }
1249
1250    /// Set the kernel type
1251    pub fn kernel(mut self, kernel: KernelType) -> Self {
1252        self.kernel = kernel;
1253        self
1254    }
1255
1256    /// Compute kernel-based feature scores
1257    fn compute_kernel_scores(
1258        &self,
1259        features: &Array2<Float>,
1260        target: &Array1<Float>,
1261    ) -> SklResult<Array1<Float>> {
1262        let n_features = features.ncols();
1263        let _n_samples = features.nrows();
1264        let mut scores = Array1::zeros(n_features);
1265
1266        for i in 0..n_features {
1267            let feature_col = features.column(i);
1268
1269            // Compute kernel matrix for this feature
1270            let kernel_matrix = self.compute_kernel_matrix(&feature_col)?;
1271
1272            // Compute kernel target alignment
1273            let score = self.compute_kernel_target_alignment(&kernel_matrix, target)?;
1274            scores[i] = score;
1275        }
1276
1277        Ok(scores)
1278    }
1279
1280    /// Compute kernel matrix for a single feature
1281    fn compute_kernel_matrix(
1282        &self,
1283        feature: &scirs2_core::ndarray::ArrayView1<Float>,
1284    ) -> SklResult<Array2<Float>> {
1285        let n_samples = feature.len();
1286        let mut kernel_matrix = Array2::zeros((n_samples, n_samples));
1287
1288        for i in 0..n_samples {
1289            for j in 0..n_samples {
1290                kernel_matrix[[i, j]] = self.kernel_function(feature[i], feature[j]);
1291            }
1292        }
1293
1294        Ok(kernel_matrix)
1295    }
1296
1297    /// Compute kernel function value
1298    fn kernel_function(&self, x1: Float, x2: Float) -> Float {
1299        match &self.kernel {
1300            KernelType::Linear => x1 * x2,
1301            KernelType::Polynomial {
1302                degree,
1303                gamma,
1304                coef0,
1305            } => (gamma * x1 * x2 + coef0).powi(*degree as i32),
1306            KernelType::RBF { gamma } => {
1307                let diff = x1 - x2;
1308                (-gamma * diff * diff).exp()
1309            }
1310            KernelType::Sigmoid { gamma, coef0 } => (gamma * x1 * x2 + coef0).tanh(),
1311        }
1312    }
1313
1314    /// Compute kernel target alignment
1315    fn compute_kernel_target_alignment(
1316        &self,
1317        kernel_matrix: &Array2<Float>,
1318        target: &Array1<Float>,
1319    ) -> SklResult<Float> {
1320        let n_samples = kernel_matrix.nrows();
1321
1322        // Compute target kernel matrix (for regression, use linear kernel)
1323        let mut target_kernel = Array2::zeros((n_samples, n_samples));
1324        for i in 0..n_samples {
1325            for j in 0..n_samples {
1326                target_kernel[[i, j]] = target[i] * target[j];
1327            }
1328        }
1329
1330        // Compute kernel alignment: <K, K_y> / (||K|| * ||K_y||)
1331        let numerator = kernel_matrix
1332            .iter()
1333            .zip(target_kernel.iter())
1334            .map(|(&k, &t)| k * t)
1335            .sum::<Float>();
1336
1337        let k_norm = kernel_matrix.iter().map(|&x| x * x).sum::<Float>().sqrt();
1338        let t_norm = target_kernel.iter().map(|&x| x * x).sum::<Float>().sqrt();
1339
1340        if k_norm == 0.0 || t_norm == 0.0 {
1341            return Ok(0.0);
1342        }
1343
1344        Ok(numerator / (k_norm * t_norm))
1345    }
1346}
1347
1348impl Estimator for KernelFeatureSelector<Untrained> {
1349    type Config = ();
1350    type Error = SklearsError;
1351    type Float = Float;
1352
1353    fn config(&self) -> &Self::Config {
1354        &()
1355    }
1356}
1357
1358impl Fit<Array2<Float>, Array1<Float>> for KernelFeatureSelector<Untrained> {
1359    type Fitted = KernelFeatureSelector<Trained>;
1360
1361    fn fit(self, features: &Array2<Float>, target: &Array1<Float>) -> SklResult<Self::Fitted> {
1362        let n_features = features.ncols();
1363        if n_features == 0 {
1364            return Err(SklearsError::InvalidInput(
1365                "No features provided".to_string(),
1366            ));
1367        }
1368
1369        if self.k > n_features {
1370            return Err(SklearsError::InvalidInput(format!(
1371                "k ({}) cannot be greater than number of features ({})",
1372                self.k, n_features
1373            )));
1374        }
1375
1376        // Compute kernel scores
1377        let scores = self.compute_kernel_scores(features, target)?;
1378
1379        // Select top k features (highest scores are best)
1380        let mut feature_indices: Vec<usize> = (0..n_features).collect();
1381        feature_indices.sort_by(|&a, &b| {
1382            scores[b]
1383                .partial_cmp(&scores[a])
1384                .unwrap_or(std::cmp::Ordering::Equal)
1385        });
1386
1387        let selected_features = feature_indices.into_iter().take(self.k).collect();
1388
1389        Ok(KernelFeatureSelector {
1390            k: self.k,
1391            kernel: self.kernel,
1392            state: PhantomData,
1393            kernel_scores_: Some(scores),
1394            selected_features_: Some(selected_features),
1395            n_features_: Some(n_features),
1396        })
1397    }
1398}
1399
1400impl Transform<Array2<Float>> for KernelFeatureSelector<Trained> {
1401    fn transform(&self, x: &Array2<Float>) -> SklResult<Array2<Float>> {
1402        validate::check_n_features(x, self.n_features_.unwrap())?;
1403
1404        let selected_features = self.selected_features_.as_ref().unwrap();
1405        let n_samples = x.nrows();
1406        let n_selected = selected_features.len();
1407        let mut x_new = Array2::zeros((n_samples, n_selected));
1408
1409        for (new_idx, &old_idx) in selected_features.iter().enumerate() {
1410            x_new.column_mut(new_idx).assign(&x.column(old_idx));
1411        }
1412
1413        Ok(x_new)
1414    }
1415}
1416
1417impl SelectorMixin for KernelFeatureSelector<Trained> {
1418    fn get_support(&self) -> SklResult<Array1<bool>> {
1419        let n_features = self.n_features_.unwrap();
1420        let selected_features = self.selected_features_.as_ref().unwrap();
1421        let mut support = Array1::from_elem(n_features, false);
1422
1423        for &idx in selected_features {
1424            support[idx] = true;
1425        }
1426
1427        Ok(support)
1428    }
1429
1430    fn transform_features(&self, indices: &[usize]) -> SklResult<Vec<usize>> {
1431        let selected_features = self.selected_features_.as_ref().unwrap();
1432        Ok(indices
1433            .iter()
1434            .filter_map(|&idx| selected_features.iter().position(|&f| f == idx))
1435            .collect())
1436    }
1437}
1438
1439impl FeatureSelector for KernelFeatureSelector<Trained> {
1440    fn selected_features(&self) -> &Vec<usize> {
1441        self.selected_features_.as_ref().unwrap()
1442    }
1443}
1444
1445impl KernelFeatureSelector<Trained> {
1446    /// Get the kernel scores
1447    pub fn kernel_scores(&self) -> &Array1<Float> {
1448        self.kernel_scores_.as_ref().unwrap()
1449    }
1450
1451    /// Get the number of selected features
1452    pub fn n_features_out(&self) -> usize {
1453        self.selected_features_.as_ref().unwrap().len()
1454    }
1455}
1456
1457#[allow(non_snake_case)]
1458#[cfg(test)]
1459mod tests {
1460    use super::*;
1461    use scirs2_core::ndarray::Array2;
1462
1463    fn create_test_data() -> (Array2<Float>, Array1<Float>) {
1464        // Create synthetic data with some correlation structure
1465        let n_samples = 50;
1466        let n_features = 10;
1467        let mut features = Array2::zeros((n_samples, n_features));
1468        let mut target = Array1::zeros(n_samples);
1469
1470        // Fill with structured data
1471        for i in 0..n_samples {
1472            for j in 0..n_features {
1473                features[[i, j]] = (i as Float * 0.1 + j as Float * 0.01).sin() + 0.1 * j as Float;
1474            }
1475            // Make first few features predictive
1476            target[i] = features[[i, 0]] + 0.5 * features[[i, 1]] + 0.1 * features[[i, 2]];
1477        }
1478
1479        (features, target)
1480    }
1481
1482    #[test]
1483    fn test_laplacian_score_selector() {
1484        let (features, target) = create_test_data();
1485
1486        let selector = LaplacianScoreSelector::new()
1487            .k(5)
1488            .graph_method(GraphConstructionMethod::KNN { k: 5 });
1489
1490        let trained = selector.fit(&features, &target).unwrap();
1491        assert_eq!(trained.n_features_out(), 5);
1492
1493        // Test transform
1494        let transformed = trained.transform(&features).unwrap();
1495        assert_eq!(transformed.ncols(), 5);
1496        assert_eq!(transformed.nrows(), features.nrows());
1497
1498        // Test support
1499        let support = trained.get_support().unwrap();
1500        assert_eq!(support.len(), features.ncols());
1501        assert_eq!(support.iter().filter(|&&x| x).count(), 5);
1502    }
1503
1504    #[test]
1505    fn test_spectral_feature_selector() {
1506        let (features, target) = create_test_data();
1507
1508        let selector = SpectralFeatureSelector::new().k(4).n_clusters(3);
1509
1510        let trained = selector.fit(&features, &target).unwrap();
1511        assert_eq!(trained.n_features_out(), 4);
1512
1513        // Test transform
1514        let transformed = trained.transform(&features).unwrap();
1515        assert_eq!(transformed.ncols(), 4);
1516        assert_eq!(transformed.nrows(), features.nrows());
1517
1518        // Test cluster assignments
1519        let clusters = trained.feature_clusters();
1520        assert_eq!(clusters.len(), features.ncols());
1521    }
1522
1523    #[test]
1524    fn test_laplacian_score_different_graph_methods() {
1525        let (features, target) = create_test_data();
1526
1527        let methods = vec![
1528            GraphConstructionMethod::KNN { k: 5 },
1529            GraphConstructionMethod::Epsilon { epsilon: 1.0 },
1530            GraphConstructionMethod::FullyConnected { sigma: 0.5 },
1531            GraphConstructionMethod::HeatKernel { t: 1.0 },
1532        ];
1533
1534        for method in methods {
1535            let selector = LaplacianScoreSelector::new().k(3).graph_method(method);
1536
1537            let trained = selector.fit(&features, &target).unwrap();
1538            assert_eq!(trained.n_features_out(), 3);
1539
1540            let scores = trained.scores();
1541            assert_eq!(scores.len(), features.ncols());
1542            assert!(scores.iter().all(|&x| x.is_finite()));
1543        }
1544    }
1545
1546    #[test]
1547    fn test_laplacian_score_invalid_k() {
1548        let (features, target) = create_test_data();
1549
1550        let selector = LaplacianScoreSelector::new().k(features.ncols() + 1);
1551        assert!(selector.fit(&features, &target).is_err());
1552    }
1553
1554    #[test]
1555    fn test_spectral_selector_empty_features() {
1556        let features = Array2::<Float>::zeros((10, 0));
1557        let target = Array1::<Float>::zeros(10);
1558
1559        let selector = SpectralFeatureSelector::new();
1560        assert!(selector.fit(&features, &target).is_err());
1561    }
1562
1563    #[test]
1564    fn test_manifold_feature_selector() {
1565        let (features, target) = create_test_data();
1566
1567        let selector = ManifoldFeatureSelector::new()
1568            .k(4)
1569            .n_neighbors(3)
1570            .manifold_method(ManifoldMethod::LaplacianEigenmap { n_components: 2 });
1571
1572        let trained = selector.fit(&features, &target).unwrap();
1573        assert_eq!(trained.n_features_out(), 4);
1574
1575        // Test transform
1576        let transformed = trained.transform(&features).unwrap();
1577        assert_eq!(transformed.ncols(), 4);
1578        assert_eq!(transformed.nrows(), features.nrows());
1579
1580        // Test support
1581        let support = trained.get_support().unwrap();
1582        assert_eq!(support.len(), features.ncols());
1583        assert_eq!(support.iter().filter(|&&x| x).count(), 4);
1584
1585        // Test scores
1586        let scores = trained.feature_scores();
1587        assert_eq!(scores.len(), features.ncols());
1588    }
1589
1590    #[test]
1591    fn test_manifold_selector_different_methods() {
1592        let (features, target) = create_test_data();
1593
1594        let methods = vec![
1595            ManifoldMethod::LaplacianEigenmap { n_components: 2 },
1596            ManifoldMethod::Isomap { n_components: 3 },
1597            ManifoldMethod::LLE { n_components: 2 },
1598        ];
1599
1600        for method in methods {
1601            let selector = ManifoldFeatureSelector::new().k(3).manifold_method(method);
1602
1603            let trained = selector.fit(&features, &target).unwrap();
1604            assert_eq!(trained.n_features_out(), 3);
1605
1606            let scores = trained.feature_scores();
1607            assert_eq!(scores.len(), features.ncols());
1608            assert!(scores.iter().all(|&x| x.is_finite()));
1609        }
1610    }
1611
1612    #[test]
1613    fn test_kernel_feature_selector() {
1614        let (features, target) = create_test_data();
1615
1616        let selector = KernelFeatureSelector::new()
1617            .k(5)
1618            .kernel(KernelType::RBF { gamma: 0.5 });
1619
1620        let trained = selector.fit(&features, &target).unwrap();
1621        assert_eq!(trained.n_features_out(), 5);
1622
1623        // Test transform
1624        let transformed = trained.transform(&features).unwrap();
1625        assert_eq!(transformed.ncols(), 5);
1626        assert_eq!(transformed.nrows(), features.nrows());
1627
1628        // Test support
1629        let support = trained.get_support().unwrap();
1630        assert_eq!(support.len(), features.ncols());
1631        assert_eq!(support.iter().filter(|&&x| x).count(), 5);
1632
1633        // Test scores
1634        let scores = trained.kernel_scores();
1635        assert_eq!(scores.len(), features.ncols());
1636        assert!(scores.iter().all(|&x| x.is_finite()));
1637    }
1638
1639    #[test]
1640    fn test_kernel_selector_different_kernels() {
1641        let (features, target) = create_test_data();
1642
1643        let kernels = vec![
1644            KernelType::Linear,
1645            KernelType::Polynomial {
1646                degree: 2,
1647                gamma: 1.0,
1648                coef0: 0.0,
1649            },
1650            KernelType::RBF { gamma: 1.0 },
1651            KernelType::Sigmoid {
1652                gamma: 1.0,
1653                coef0: 0.0,
1654            },
1655        ];
1656
1657        for kernel in kernels {
1658            let selector = KernelFeatureSelector::new().k(3).kernel(kernel);
1659
1660            let trained = selector.fit(&features, &target).unwrap();
1661            assert_eq!(trained.n_features_out(), 3);
1662
1663            let scores = trained.kernel_scores();
1664            assert_eq!(scores.len(), features.ncols());
1665            assert!(scores.iter().all(|&x| x.is_finite()));
1666        }
1667    }
1668
1669    #[test]
1670    fn test_manifold_selector_invalid_k() {
1671        let (features, target) = create_test_data();
1672
1673        let selector = ManifoldFeatureSelector::new().k(features.ncols() + 1);
1674        assert!(selector.fit(&features, &target).is_err());
1675    }
1676
1677    #[test]
1678    fn test_kernel_selector_invalid_k() {
1679        let (features, target) = create_test_data();
1680
1681        let selector = KernelFeatureSelector::new().k(features.ncols() + 1);
1682        assert!(selector.fit(&features, &target).is_err());
1683    }
1684}