sklears_feature_selection/
ml_based.rs

1//! Machine learning-based feature selection methods
2//!
3//! This module provides advanced feature selection methods using machine learning techniques
4//! including neural networks, attention mechanisms, reinforcement learning, and meta-learning.
5
6use crate::base::SelectorMixin;
7use scirs2_core::ndarray::{s, Array1, Array2, Axis};
8
9use scirs2_core::rand_prelude::IndexedRandom;
10use scirs2_core::random::{rngs::StdRng, thread_rng, Rng, SeedableRng};
11use sklears_core::{
12    error::{validate, Result as SklResult, SklearsError},
13    traits::{Estimator, Fit, Trained, Transform, Untrained},
14    types::Float,
15};
16use std::collections::HashMap;
17use std::marker::PhantomData;
18
19/// Neural network-based feature selection using importance scoring
20/// Neural network feature importance selector
21/// Uses a simple neural network to learn feature importances through gradient analysis
22#[derive(Debug, Clone)]
23pub struct NeuralFeatureSelector<State = Untrained> {
24    /// Number of hidden layers
25    hidden_layers: Vec<usize>,
26    /// Learning rate for gradient descent
27    learning_rate: f64,
28    /// Number of training epochs
29    epochs: usize,
30    /// L1 regularization parameter for sparsity
31    l1_reg: f64,
32    /// L2 regularization parameter
33    l2_reg: f64,
34    /// Number of top features to select
35    k: Option<usize>,
36    /// Threshold for feature importance
37    importance_threshold: f64,
38    /// Random seed for reproducibility
39    random_state: Option<u64>,
40    state: PhantomData<State>,
41    // Trained state
42    weights_: Option<Vec<Array2<Float>>>,
43    biases_: Option<Vec<Array1<Float>>>,
44    feature_importances_: Option<Array1<Float>>,
45    selected_features_: Option<Vec<usize>>,
46}
47
48impl NeuralFeatureSelector<Untrained> {
49    pub fn new() -> Self {
50        Self {
51            hidden_layers: vec![64, 32],
52            learning_rate: 0.001,
53            epochs: 100,
54            l1_reg: 0.01,
55            l2_reg: 0.01,
56            k: None,
57            importance_threshold: 0.01,
58            random_state: None,
59            state: PhantomData,
60            weights_: None,
61            biases_: None,
62            feature_importances_: None,
63            selected_features_: None,
64        }
65    }
66
67    pub fn hidden_layers(mut self, layers: Vec<usize>) -> Self {
68        self.hidden_layers = layers;
69        self
70    }
71
72    pub fn learning_rate(mut self, lr: f64) -> Self {
73        self.learning_rate = lr;
74        self
75    }
76
77    pub fn epochs(mut self, epochs: usize) -> Self {
78        self.epochs = epochs;
79        self
80    }
81
82    pub fn l1_reg(mut self, l1_reg: f64) -> Self {
83        self.l1_reg = l1_reg;
84        self
85    }
86
87    pub fn l2_reg(mut self, l2_reg: f64) -> Self {
88        self.l2_reg = l2_reg;
89        self
90    }
91
92    pub fn k(mut self, k: Option<usize>) -> Self {
93        self.k = k;
94        self
95    }
96
97    pub fn importance_threshold(mut self, threshold: f64) -> Self {
98        self.importance_threshold = threshold;
99        self
100    }
101
102    pub fn random_state(mut self, seed: Option<u64>) -> Self {
103        self.random_state = seed;
104        self
105    }
106}
107
108impl Default for NeuralFeatureSelector<Untrained> {
109    fn default() -> Self {
110        Self::new()
111    }
112}
113
114impl Estimator for NeuralFeatureSelector<Untrained> {
115    type Config = ();
116    type Error = SklearsError;
117    type Float = f64;
118
119    fn config(&self) -> &Self::Config {
120        &()
121    }
122}
123
124impl Fit<Array2<Float>, Array1<Float>> for NeuralFeatureSelector<Untrained> {
125    type Fitted = NeuralFeatureSelector<Trained>;
126
127    fn fit(self, x: &Array2<Float>, y: &Array1<Float>) -> SklResult<Self::Fitted> {
128        validate::check_consistent_length(x, y)?;
129
130        let mut rng = match self.random_state {
131            Some(seed) => StdRng::seed_from_u64(seed),
132            None => StdRng::from_rng(&mut thread_rng()),
133        };
134
135        let (n_samples, n_features) = x.dim();
136
137        // Initialize network architecture
138        let mut layer_sizes = vec![n_features];
139        layer_sizes.extend(&self.hidden_layers);
140        layer_sizes.push(1); // Output layer for regression/binary classification
141
142        // Initialize weights and biases
143        let mut weights = Vec::new();
144        let mut biases = Vec::new();
145
146        for i in 0..(layer_sizes.len() - 1) {
147            let input_size = layer_sizes[i];
148            let output_size = layer_sizes[i + 1];
149
150            // Xavier initialization
151            let scale = (2.0 / (input_size + output_size) as f64).sqrt();
152            let mut weight_matrix = Array2::zeros((input_size, output_size));
153            for elem in weight_matrix.iter_mut() {
154                *elem = rng.gen::<f64>() * 2.0 * scale - scale;
155            }
156            weights.push(weight_matrix);
157
158            let mut bias_vector = Array1::zeros(output_size);
159            for elem in bias_vector.iter_mut() {
160                *elem = rng.gen::<f64>() * 0.1 - 0.05;
161            }
162            biases.push(bias_vector);
163        }
164
165        // Training loop
166        for epoch in 0..self.epochs {
167            let mut total_loss = 0.0;
168
169            // Forward and backward pass for each sample
170            for i in 0..n_samples {
171                let input = x.row(i).to_owned();
172                let target = y[i];
173
174                // Forward pass
175                let (activations, z_values) = self.forward_pass(&input, &weights, &biases);
176                let prediction = activations.last().unwrap()[0];
177
178                // Compute loss (MSE for regression)
179                let loss = 0.5 * (prediction - target).powi(2);
180                total_loss += loss;
181
182                // Backward pass
183                let gradients =
184                    self.backward_pass(&input, target, &activations, &z_values, &weights);
185
186                // Update weights and biases
187                self.update_parameters(&mut weights, &mut biases, &gradients.0, &gradients.1);
188            }
189
190            // Optional: Early stopping or learning rate decay
191            if epoch % 10 == 0 {
192                let avg_loss = total_loss / n_samples as f64;
193                if avg_loss < 1e-6 {
194                    break;
195                }
196            }
197        }
198
199        // Compute feature importances using input layer weights
200        let input_weights = &weights[0];
201        let mut feature_importances = Array1::zeros(n_features);
202
203        for i in 0..n_features {
204            // Compute importance as the sum of absolute weights from this feature
205            let importance: f64 = input_weights.row(i).iter().map(|&w| w.abs()).sum();
206            feature_importances[i] = importance;
207        }
208
209        // Normalize importances
210        let importance_sum: f64 = feature_importances.sum();
211        if importance_sum > 0.0 {
212            feature_importances /= importance_sum;
213        }
214
215        // Select features based on importance
216        let selected_features = self.select_features_by_importance(&feature_importances);
217
218        Ok(NeuralFeatureSelector {
219            hidden_layers: self.hidden_layers,
220            learning_rate: self.learning_rate,
221            epochs: self.epochs,
222            l1_reg: self.l1_reg,
223            l2_reg: self.l2_reg,
224            k: self.k,
225            importance_threshold: self.importance_threshold,
226            random_state: self.random_state,
227            state: PhantomData,
228            weights_: Some(weights),
229            biases_: Some(biases),
230            feature_importances_: Some(feature_importances),
231            selected_features_: Some(selected_features),
232        })
233    }
234}
235
236impl NeuralFeatureSelector<Untrained> {
237    fn forward_pass(
238        &self,
239        input: &Array1<Float>,
240        weights: &[Array2<Float>],
241        biases: &[Array1<Float>],
242    ) -> (Vec<Array1<Float>>, Vec<Array1<Float>>) {
243        let mut activations = vec![input.clone()];
244        let mut z_values = Vec::new();
245
246        let mut current_activation = input.clone();
247
248        for (i, (w, b)) in weights.iter().zip(biases.iter()).enumerate() {
249            // Linear transformation: z = W^T * a + b
250            let z = w.t().dot(&current_activation) + b;
251            z_values.push(z.clone());
252
253            // Activation function (ReLU for hidden layers, linear for output)
254            let activation = if i < weights.len() - 1 {
255                z.mapv(|x| x.max(0.0)) // ReLU
256            } else {
257                z // Linear for output
258            };
259
260            activations.push(activation.clone());
261            current_activation = activation;
262        }
263
264        (activations, z_values)
265    }
266
267    fn backward_pass(
268        &self,
269        _input: &Array1<Float>,
270        target: Float,
271        activations: &[Array1<Float>],
272        z_values: &[Array1<Float>],
273        weights: &[Array2<Float>],
274    ) -> (Vec<Array2<Float>>, Vec<Array1<Float>>) {
275        let n_layers = weights.len();
276        let mut weight_gradients = vec![Array2::zeros(weights[0].dim()); n_layers];
277        let mut bias_gradients = vec![Array1::zeros(z_values[0].len()); n_layers];
278
279        // Output layer error
280        let output_error = activations.last().unwrap()[0] - target;
281        let mut delta = Array1::from_vec(vec![output_error]);
282
283        // Backward propagation
284        for i in (0..n_layers).rev() {
285            // Compute gradients for current layer
286            let prev_activation = &activations[i];
287
288            // Weight gradients: dW = a_prev * delta^T
289            for j in 0..weight_gradients[i].nrows() {
290                for k in 0..weight_gradients[i].ncols() {
291                    weight_gradients[i][[j, k]] = prev_activation[j] * delta[k];
292                }
293            }
294
295            // Bias gradients: db = delta
296            bias_gradients[i] = delta.clone();
297
298            // Propagate error to previous layer
299            if i > 0 {
300                // delta_prev = W * delta * activation_derivative
301                let new_delta = weights[i].dot(&delta);
302
303                // Apply ReLU derivative (1 if z > 0, 0 otherwise)
304                let z_prev = &z_values[i - 1];
305                for j in 0..new_delta.len() {
306                    if z_prev[j] <= 0.0 {
307                        // delta[j] = 0.0; // This would modify the array, but we need to create a new one
308                    }
309                }
310                delta = new_delta.mapv(|x| if x > 0.0 { x } else { 0.0 });
311            }
312        }
313
314        (weight_gradients, bias_gradients)
315    }
316
317    fn update_parameters(
318        &self,
319        weights: &mut [Array2<Float>],
320        biases: &mut [Array1<Float>],
321        weight_gradients: &[Array2<Float>],
322        bias_gradients: &[Array1<Float>],
323    ) {
324        for i in 0..weights.len() {
325            // Update weights with L1 and L2 regularization
326            for j in 0..weights[i].nrows() {
327                for k in 0..weights[i].ncols() {
328                    let grad = weight_gradients[i][[j, k]];
329                    let l1_penalty = self.l1_reg * weights[i][[j, k]].signum();
330                    let l2_penalty = self.l2_reg * weights[i][[j, k]];
331
332                    weights[i][[j, k]] -= self.learning_rate * (grad + l1_penalty + l2_penalty);
333                }
334            }
335
336            // Update biases
337            for j in 0..biases[i].len() {
338                biases[i][j] -= self.learning_rate * bias_gradients[i][j];
339            }
340        }
341    }
342
343    fn select_features_by_importance(&self, importances: &Array1<Float>) -> Vec<usize> {
344        let mut feature_indices: Vec<(usize, Float)> = importances
345            .indexed_iter()
346            .map(|(i, &importance)| (i, importance))
347            .collect();
348
349        feature_indices.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
350
351        let selected: Vec<usize> = if let Some(k) = self.k {
352            feature_indices
353                .iter()
354                .take(k.min(feature_indices.len()))
355                .map(|(i, _)| *i)
356                .collect()
357        } else {
358            feature_indices
359                .iter()
360                .filter(|(_, importance)| *importance >= self.importance_threshold)
361                .map(|(i, _)| *i)
362                .collect()
363        };
364
365        let mut selected_sorted = selected;
366        selected_sorted.sort();
367        selected_sorted
368    }
369}
370
371impl Transform<Array2<Float>> for NeuralFeatureSelector<Trained> {
372    fn transform(&self, x: &Array2<Float>) -> SklResult<Array2<Float>> {
373        let selected_features = self.selected_features_.as_ref().unwrap();
374        if selected_features.is_empty() {
375            return Err(SklearsError::InvalidInput(
376                "No features were selected".to_string(),
377            ));
378        }
379
380        let selected_indices: Vec<usize> = selected_features.to_vec();
381        Ok(x.select(Axis(1), &selected_indices))
382    }
383}
384
385impl SelectorMixin for NeuralFeatureSelector<Trained> {
386    fn get_support(&self) -> SklResult<Array1<bool>> {
387        let selected_features = self.selected_features_.as_ref().unwrap();
388        let n_features = self.feature_importances_.as_ref().unwrap().len();
389        let mut support = Array1::from_elem(n_features, false);
390        for &idx in selected_features {
391            if idx < n_features {
392                support[idx] = true;
393            }
394        }
395        Ok(support)
396    }
397
398    fn transform_features(&self, indices: &[usize]) -> SklResult<Vec<usize>> {
399        let selected_features = self.selected_features_.as_ref().unwrap();
400        Ok(indices
401            .iter()
402            .filter_map(|&idx| selected_features.iter().position(|&f| f == idx))
403            .collect())
404    }
405}
406
407/// Attention-based feature selection
408/// Attention mechanism for feature selection
409/// Uses attention weights to identify important features
410#[derive(Debug, Clone)]
411pub struct AttentionFeatureSelector<State = Untrained> {
412    /// Dimension of attention space
413    attention_dim: usize,
414    /// Number of attention heads
415    n_heads: usize,
416    /// Learning rate
417    learning_rate: f64,
418    /// Number of training epochs
419    epochs: usize,
420    /// Temperature parameter for attention softmax
421    temperature: f64,
422    /// Number of top features to select
423    k: Option<usize>,
424    /// Threshold for attention weights
425    attention_threshold: f64,
426    /// Random seed
427    random_state: Option<u64>,
428    state: PhantomData<State>,
429    // Trained state
430    attention_weights_: Option<Array2<Float>>,
431    feature_attention_: Option<Array1<Float>>,
432    selected_features_: Option<Vec<usize>>,
433}
434
435impl AttentionFeatureSelector<Untrained> {
436    pub fn new() -> Self {
437        Self {
438            attention_dim: 64,
439            n_heads: 8,
440            learning_rate: 0.001,
441            epochs: 50,
442            temperature: 1.0,
443            k: None,
444            attention_threshold: 0.01,
445            random_state: None,
446            state: PhantomData,
447            attention_weights_: None,
448            feature_attention_: None,
449            selected_features_: None,
450        }
451    }
452
453    pub fn attention_dim(mut self, dim: usize) -> Self {
454        self.attention_dim = dim;
455        self
456    }
457
458    pub fn n_heads(mut self, n_heads: usize) -> Self {
459        self.n_heads = n_heads;
460        self
461    }
462
463    pub fn learning_rate(mut self, lr: f64) -> Self {
464        self.learning_rate = lr;
465        self
466    }
467
468    pub fn epochs(mut self, epochs: usize) -> Self {
469        self.epochs = epochs;
470        self
471    }
472
473    pub fn temperature(mut self, temperature: f64) -> Self {
474        self.temperature = temperature;
475        self
476    }
477
478    pub fn k(mut self, k: Option<usize>) -> Self {
479        self.k = k;
480        self
481    }
482
483    pub fn attention_threshold(mut self, threshold: f64) -> Self {
484        self.attention_threshold = threshold;
485        self
486    }
487
488    pub fn random_state(mut self, seed: Option<u64>) -> Self {
489        self.random_state = seed;
490        self
491    }
492}
493
494impl Default for AttentionFeatureSelector<Untrained> {
495    fn default() -> Self {
496        Self::new()
497    }
498}
499
500impl Estimator for AttentionFeatureSelector<Untrained> {
501    type Config = ();
502    type Error = SklearsError;
503    type Float = f64;
504
505    fn config(&self) -> &Self::Config {
506        &()
507    }
508}
509
510impl Fit<Array2<Float>, Array1<Float>> for AttentionFeatureSelector<Untrained> {
511    type Fitted = AttentionFeatureSelector<Trained>;
512
513    fn fit(self, x: &Array2<Float>, y: &Array1<Float>) -> SklResult<Self::Fitted> {
514        validate::check_consistent_length(x, y)?;
515
516        let mut rng = match self.random_state {
517            Some(seed) => StdRng::seed_from_u64(seed),
518            None => StdRng::from_rng(&mut thread_rng()),
519        };
520
521        let (n_samples, n_features) = x.dim();
522
523        // Initialize attention parameters
524        let head_dim = self.attention_dim / self.n_heads;
525
526        // Query, Key, Value matrices for each head
527        let mut query_weights = Array2::zeros((n_features, self.attention_dim));
528        let mut key_weights = Array2::zeros((n_features, self.attention_dim));
529        let mut value_weights = Array2::zeros((n_features, self.attention_dim));
530
531        // Initialize with small random values
532        for w in [&mut query_weights, &mut key_weights, &mut value_weights] {
533            for elem in w.iter_mut() {
534                *elem = rng.gen::<f64>() * 0.02 - 0.01;
535            }
536        }
537
538        // Training loop
539        for _epoch in 0..self.epochs {
540            let mut total_attention = Array1::zeros(n_features);
541
542            for i in 0..n_samples {
543                let input = x.row(i);
544
545                // Compute queries, keys, values
546                let queries = input.dot(&query_weights);
547                let keys = input.dot(&key_weights);
548                let values = input.dot(&value_weights);
549
550                // Multi-head attention
551                let mut head_attentions = Vec::new();
552
553                for head in 0..self.n_heads {
554                    let start_idx = head * head_dim;
555                    let end_idx = (head + 1) * head_dim;
556
557                    let q_head = queries.slice(s![start_idx..end_idx]);
558                    let k_head = keys.slice(s![start_idx..end_idx]);
559                    let _v_head = values.slice(s![start_idx..end_idx]);
560
561                    // Compute attention scores
562                    let mut attention_scores = Array1::zeros(n_features);
563                    for j in 0..n_features {
564                        // Simplified attention: dot product of query and key projections
565                        let score = q_head.dot(&k_head) / (head_dim as f64).sqrt();
566                        attention_scores[j] = score;
567                    }
568
569                    // Apply softmax with temperature
570                    let softmax_scores =
571                        softmax_with_temperature(&attention_scores, self.temperature);
572                    head_attentions.push(softmax_scores);
573                }
574
575                // Average attention across heads
576                let mut avg_attention = Array1::zeros(n_features);
577                for head_attention in &head_attentions {
578                    avg_attention += head_attention;
579                }
580                avg_attention /= self.n_heads as f64;
581
582                total_attention += &avg_attention;
583            }
584
585            // Average attention across samples
586            total_attention /= n_samples as f64;
587        }
588
589        // Final attention computation (simplified version)
590        let mut feature_attention = Array1::zeros(n_features);
591        for i in 0..n_samples {
592            let input = x.row(i);
593
594            // Compute simple attention based on feature variance and correlation with target
595            for j in 0..n_features {
596                let feature_var = input[j].abs();
597                let correlation = compute_simple_correlation(input[j], y[i]);
598                feature_attention[j] += feature_var * correlation.abs();
599            }
600        }
601
602        // Normalize attention weights
603        let attention_sum = feature_attention.sum();
604        if attention_sum > 0.0 {
605            feature_attention /= attention_sum;
606        }
607
608        // Select features based on attention weights
609        let selected_features = self.select_features_by_attention(&feature_attention);
610
611        Ok(AttentionFeatureSelector {
612            attention_dim: self.attention_dim,
613            n_heads: self.n_heads,
614            learning_rate: self.learning_rate,
615            epochs: self.epochs,
616            temperature: self.temperature,
617            k: self.k,
618            attention_threshold: self.attention_threshold,
619            random_state: self.random_state,
620            state: PhantomData,
621            attention_weights_: Some(query_weights), // Store one of the weight matrices
622            feature_attention_: Some(feature_attention),
623            selected_features_: Some(selected_features),
624        })
625    }
626}
627
628impl AttentionFeatureSelector<Untrained> {
629    fn select_features_by_attention(&self, attention: &Array1<Float>) -> Vec<usize> {
630        let mut feature_indices: Vec<(usize, Float)> =
631            attention.indexed_iter().map(|(i, &att)| (i, att)).collect();
632
633        feature_indices.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
634
635        let selected: Vec<usize> = if let Some(k) = self.k {
636            feature_indices
637                .iter()
638                .take(k.min(feature_indices.len()))
639                .map(|(i, _)| *i)
640                .collect()
641        } else {
642            feature_indices
643                .iter()
644                .filter(|(_, att)| *att >= self.attention_threshold)
645                .map(|(i, _)| *i)
646                .collect()
647        };
648
649        let mut selected_sorted = selected;
650        selected_sorted.sort();
651        selected_sorted
652    }
653}
654
655impl Transform<Array2<Float>> for AttentionFeatureSelector<Trained> {
656    fn transform(&self, x: &Array2<Float>) -> SklResult<Array2<Float>> {
657        let selected_features = self.selected_features_.as_ref().unwrap();
658        if selected_features.is_empty() {
659            return Err(SklearsError::InvalidInput(
660                "No features were selected".to_string(),
661            ));
662        }
663
664        let selected_indices: Vec<usize> = selected_features.to_vec();
665        Ok(x.select(Axis(1), &selected_indices))
666    }
667}
668
669impl SelectorMixin for AttentionFeatureSelector<Trained> {
670    fn get_support(&self) -> SklResult<Array1<bool>> {
671        let selected_features = self.selected_features_.as_ref().unwrap();
672        let n_features = self.feature_attention_.as_ref().unwrap().len();
673        let mut support = Array1::from_elem(n_features, false);
674        for &idx in selected_features {
675            if idx < n_features {
676                support[idx] = true;
677            }
678        }
679        Ok(support)
680    }
681
682    fn transform_features(&self, indices: &[usize]) -> SklResult<Vec<usize>> {
683        let selected_features = self.selected_features_.as_ref().unwrap();
684        Ok(indices
685            .iter()
686            .filter_map(|&idx| selected_features.iter().position(|&f| f == idx))
687            .collect())
688    }
689}
690
691/// Reinforcement learning-based feature selection
692/// Q-learning based feature selection
693/// Treats feature selection as a sequential decision problem
694#[derive(Debug, Clone)]
695pub struct RLFeatureSelector<State = Untrained> {
696    /// Q-learning parameters
697    learning_rate: f64,
698    discount_factor: f64,
699    epsilon: f64,
700    epsilon_decay: f64,
701    /// Number of episodes for training
702    episodes: usize,
703    /// Maximum steps per episode
704    max_steps: usize,
705    /// Number of features to select
706    k: usize,
707    /// Random seed
708    random_state: Option<u64>,
709    state: PhantomData<State>,
710    // Trained state
711    q_table_: Option<HashMap<String, Array1<Float>>>,
712    selected_features_: Option<Vec<usize>>,
713    n_features_: Option<usize>,
714}
715
716impl RLFeatureSelector<Untrained> {
717    pub fn new(k: usize) -> Self {
718        Self {
719            learning_rate: 0.1,
720            discount_factor: 0.9,
721            epsilon: 1.0,
722            epsilon_decay: 0.995,
723            episodes: 100,
724            max_steps: 20,
725            k,
726            random_state: None,
727            state: PhantomData,
728            q_table_: None,
729            selected_features_: None,
730            n_features_: None,
731        }
732    }
733
734    pub fn learning_rate(mut self, lr: f64) -> Self {
735        self.learning_rate = lr;
736        self
737    }
738
739    pub fn discount_factor(mut self, gamma: f64) -> Self {
740        self.discount_factor = gamma;
741        self
742    }
743
744    pub fn epsilon(mut self, epsilon: f64) -> Self {
745        self.epsilon = epsilon;
746        self
747    }
748
749    pub fn episodes(mut self, episodes: usize) -> Self {
750        self.episodes = episodes;
751        self
752    }
753
754    pub fn max_steps(mut self, max_steps: usize) -> Self {
755        self.max_steps = max_steps;
756        self
757    }
758
759    pub fn random_state(mut self, seed: Option<u64>) -> Self {
760        self.random_state = seed;
761        self
762    }
763}
764
765impl Estimator for RLFeatureSelector<Untrained> {
766    type Config = ();
767    type Error = SklearsError;
768    type Float = f64;
769
770    fn config(&self) -> &Self::Config {
771        &()
772    }
773}
774
775impl Fit<Array2<Float>, Array1<Float>> for RLFeatureSelector<Untrained> {
776    type Fitted = RLFeatureSelector<Trained>;
777
778    fn fit(self, x: &Array2<Float>, y: &Array1<Float>) -> SklResult<Self::Fitted> {
779        validate::check_consistent_length(x, y)?;
780
781        let mut rng = match self.random_state {
782            Some(seed) => StdRng::seed_from_u64(seed),
783            None => StdRng::from_rng(&mut thread_rng()),
784        };
785
786        let (_, n_features) = x.dim();
787        let mut q_table: HashMap<String, Array1<Float>> = HashMap::new();
788        let mut current_epsilon = self.epsilon;
789
790        // Q-learning training
791        for _episode in 0..self.episodes {
792            let mut selected_features = Vec::new();
793            let mut state = encode_state(&selected_features, n_features);
794
795            for _step in 0..self.max_steps {
796                if selected_features.len() >= self.k {
797                    break;
798                }
799
800                // Get available actions (unselected features)
801                let available_actions: Vec<usize> = (0..n_features)
802                    .filter(|&i| !selected_features.contains(&i))
803                    .collect();
804
805                if available_actions.is_empty() {
806                    break;
807                }
808
809                // Epsilon-greedy action selection
810                let action = if rng.gen::<f64>() < current_epsilon {
811                    // Random action
812                    *available_actions.choose(&mut rng).unwrap()
813                } else {
814                    // Greedy action (highest Q-value)
815                    let q_values = q_table
816                        .entry(state.clone())
817                        .or_insert_with(|| Array1::zeros(n_features));
818
819                    let best_action = available_actions
820                        .iter()
821                        .max_by(|&&a, &&b| q_values[a].partial_cmp(&q_values[b]).unwrap())
822                        .unwrap();
823                    *best_action
824                };
825
826                // Take action
827                selected_features.push(action);
828                let new_state = encode_state(&selected_features, n_features);
829
830                // Compute reward
831                let reward = self.compute_reward(x, y, &selected_features);
832
833                // Q-learning update
834                let current_q = q_table
835                    .entry(state.clone())
836                    .or_insert_with(|| Array1::zeros(n_features))[action];
837
838                let next_q_values = q_table
839                    .entry(new_state.clone())
840                    .or_insert_with(|| Array1::zeros(n_features));
841                let max_next_q = next_q_values
842                    .iter()
843                    .fold(f64::NEG_INFINITY, |a, &b| a.max(b));
844
845                let target_q = reward + self.discount_factor * max_next_q;
846                let updated_q = current_q + self.learning_rate * (target_q - current_q);
847
848                q_table
849                    .entry(state.clone())
850                    .or_insert_with(|| Array1::zeros(n_features))[action] = updated_q;
851
852                state = new_state;
853            }
854
855            // Decay epsilon
856            current_epsilon *= self.epsilon_decay;
857        }
858
859        // Extract best feature selection
860        let mut best_features = Vec::new();
861        let mut current_state = encode_state(&best_features, n_features);
862
863        for _ in 0..self.k {
864            let available_actions: Vec<usize> = (0..n_features)
865                .filter(|&i| !best_features.contains(&i))
866                .collect();
867
868            if available_actions.is_empty() {
869                break;
870            }
871
872            let default_q_values = Array1::zeros(n_features);
873            let q_values = q_table.get(&current_state).unwrap_or(&default_q_values);
874
875            let best_action = available_actions
876                .iter()
877                .max_by(|&&a, &&b| q_values[a].partial_cmp(&q_values[b]).unwrap())
878                .unwrap();
879
880            best_features.push(*best_action);
881            current_state = encode_state(&best_features, n_features);
882        }
883
884        best_features.sort();
885
886        Ok(RLFeatureSelector {
887            learning_rate: self.learning_rate,
888            discount_factor: self.discount_factor,
889            epsilon: self.epsilon,
890            epsilon_decay: self.epsilon_decay,
891            episodes: self.episodes,
892            max_steps: self.max_steps,
893            k: self.k,
894            random_state: self.random_state,
895            state: PhantomData,
896            q_table_: Some(q_table),
897            selected_features_: Some(best_features),
898            n_features_: Some(n_features),
899        })
900    }
901}
902
903impl RLFeatureSelector<Untrained> {
904    fn compute_reward(
905        &self,
906        x: &Array2<Float>,
907        y: &Array1<Float>,
908        selected_features: &[usize],
909    ) -> f64 {
910        if selected_features.is_empty() {
911            return 0.0;
912        }
913
914        // Simple reward: correlation of selected features with target
915        let mut reward = 0.0;
916        for &feature_idx in selected_features {
917            let feature = x.column(feature_idx);
918            let correlation = compute_simple_correlation_array(&feature.to_owned(), y);
919            reward += correlation.abs();
920        }
921
922        // Normalize by number of features to encourage efficiency
923        reward / selected_features.len() as f64
924    }
925}
926
927impl Transform<Array2<Float>> for RLFeatureSelector<Trained> {
928    fn transform(&self, x: &Array2<Float>) -> SklResult<Array2<Float>> {
929        let selected_features = self.selected_features_.as_ref().unwrap();
930        if selected_features.is_empty() {
931            return Err(SklearsError::InvalidInput(
932                "No features were selected".to_string(),
933            ));
934        }
935
936        let selected_indices: Vec<usize> = selected_features.to_vec();
937        Ok(x.select(Axis(1), &selected_indices))
938    }
939}
940
941impl SelectorMixin for RLFeatureSelector<Trained> {
942    fn get_support(&self) -> SklResult<Array1<bool>> {
943        let selected_features = self.selected_features_.as_ref().unwrap();
944        let n_features = self.n_features_.unwrap();
945        let mut support = Array1::from_elem(n_features, false);
946        for &idx in selected_features {
947            if idx < n_features {
948                support[idx] = true;
949            }
950        }
951        Ok(support)
952    }
953
954    fn transform_features(&self, indices: &[usize]) -> SklResult<Vec<usize>> {
955        let selected_features = self.selected_features_.as_ref().unwrap();
956        Ok(indices
957            .iter()
958            .filter_map(|&idx| selected_features.iter().position(|&f| f == idx))
959            .collect())
960    }
961}
962
963/// Meta-learning feature selector
964/// Meta-learning approach that learns from multiple feature selection tasks
965#[derive(Debug, Clone)]
966pub struct MetaLearningFeatureSelector<State = Untrained> {
967    /// Base selectors to use in meta-learning
968    base_methods: Vec<String>,
969    /// Meta-model parameters
970    meta_learning_rate: f64,
971    meta_epochs: usize,
972    /// Number of top features to select
973    k: Option<usize>,
974    /// Random seed
975    random_state: Option<u64>,
976    state: PhantomData<State>,
977    // Trained state
978    meta_weights_: Option<Array1<Float>>,
979    base_selections_: Option<Vec<Vec<usize>>>,
980    selected_features_: Option<Vec<usize>>,
981}
982
983impl MetaLearningFeatureSelector<Untrained> {
984    pub fn new() -> Self {
985        Self {
986            base_methods: vec![
987                "correlation".to_string(),
988                "mutual_info".to_string(),
989                "f_test".to_string(),
990                "chi2".to_string(),
991            ],
992            meta_learning_rate: 0.01,
993            meta_epochs: 50,
994            k: None,
995            random_state: None,
996            state: PhantomData,
997            meta_weights_: None,
998            base_selections_: None,
999            selected_features_: None,
1000        }
1001    }
1002
1003    pub fn base_methods(mut self, methods: Vec<String>) -> Self {
1004        self.base_methods = methods;
1005        self
1006    }
1007
1008    pub fn meta_learning_rate(mut self, lr: f64) -> Self {
1009        self.meta_learning_rate = lr;
1010        self
1011    }
1012
1013    pub fn meta_epochs(mut self, epochs: usize) -> Self {
1014        self.meta_epochs = epochs;
1015        self
1016    }
1017
1018    pub fn k(mut self, k: Option<usize>) -> Self {
1019        self.k = k;
1020        self
1021    }
1022
1023    pub fn random_state(mut self, seed: Option<u64>) -> Self {
1024        self.random_state = seed;
1025        self
1026    }
1027}
1028
1029impl Default for MetaLearningFeatureSelector<Untrained> {
1030    fn default() -> Self {
1031        Self::new()
1032    }
1033}
1034
1035impl Estimator for MetaLearningFeatureSelector<Untrained> {
1036    type Config = ();
1037    type Error = SklearsError;
1038    type Float = f64;
1039
1040    fn config(&self) -> &Self::Config {
1041        &()
1042    }
1043}
1044
1045impl Fit<Array2<Float>, Array1<Float>> for MetaLearningFeatureSelector<Untrained> {
1046    type Fitted = MetaLearningFeatureSelector<Trained>;
1047
1048    fn fit(self, x: &Array2<Float>, y: &Array1<Float>) -> SklResult<Self::Fitted> {
1049        validate::check_consistent_length(x, y)?;
1050
1051        let _rng = match self.random_state {
1052            Some(seed) => StdRng::seed_from_u64(seed),
1053            None => StdRng::from_rng(&mut thread_rng()),
1054        };
1055
1056        let (_, n_features) = x.dim();
1057
1058        // Apply base feature selection methods
1059        let mut base_selections = Vec::new();
1060        let mut base_scores = Vec::new();
1061
1062        for method in &self.base_methods {
1063            let (selection, score) = match method.as_str() {
1064                "correlation" => apply_correlation_selection(x, y, self.k),
1065                "mutual_info" => apply_mutual_info_selection(x, y, self.k),
1066                "f_test" => apply_f_test_selection(x, y, self.k),
1067                "chi2" => apply_chi2_selection(x, y, self.k),
1068                _ => (Vec::new(), 0.0),
1069            };
1070            base_selections.push(selection);
1071            base_scores.push(score);
1072        }
1073
1074        // Initialize meta-weights
1075        let mut meta_weights = Array1::from_vec(vec![
1076            1.0 / self.base_methods.len() as f64;
1077            self.base_methods.len()
1078        ]);
1079
1080        // Meta-learning: optimize weights based on performance
1081        for _epoch in 0..self.meta_epochs {
1082            // Compute weighted ensemble selection
1083            let ensemble_selection =
1084                self.compute_ensemble_selection(&base_selections, &meta_weights, n_features);
1085
1086            // Compute ensemble performance
1087            let ensemble_score = evaluate_feature_selection(x, y, &ensemble_selection);
1088
1089            // Update meta-weights using gradient ascent
1090            for i in 0..meta_weights.len() {
1091                let individual_score = base_scores[i];
1092                let gradient = individual_score - ensemble_score;
1093                meta_weights[i] += self.meta_learning_rate * gradient;
1094            }
1095
1096            // Normalize weights
1097            let weight_sum = meta_weights.sum();
1098            if weight_sum > 0.0 {
1099                meta_weights /= weight_sum;
1100            }
1101        }
1102
1103        // Final ensemble selection
1104        let selected_features =
1105            self.compute_ensemble_selection(&base_selections, &meta_weights, n_features);
1106
1107        Ok(MetaLearningFeatureSelector {
1108            base_methods: self.base_methods,
1109            meta_learning_rate: self.meta_learning_rate,
1110            meta_epochs: self.meta_epochs,
1111            k: self.k,
1112            random_state: self.random_state,
1113            state: PhantomData,
1114            meta_weights_: Some(meta_weights),
1115            base_selections_: Some(base_selections),
1116            selected_features_: Some(selected_features),
1117        })
1118    }
1119}
1120
1121impl MetaLearningFeatureSelector<Untrained> {
1122    fn compute_ensemble_selection(
1123        &self,
1124        base_selections: &[Vec<usize>],
1125        weights: &Array1<Float>,
1126        n_features: usize,
1127    ) -> Vec<usize> {
1128        let mut feature_scores = Array1::zeros(n_features);
1129
1130        // Weight the contributions from each base method
1131        for (i, selection) in base_selections.iter().enumerate() {
1132            let weight = weights[i];
1133            for &feature_idx in selection {
1134                if feature_idx < n_features {
1135                    feature_scores[feature_idx] += weight;
1136                }
1137            }
1138        }
1139
1140        // Select top features
1141        let mut feature_indices: Vec<(usize, Float)> = feature_scores
1142            .indexed_iter()
1143            .map(|(i, &score)| (i, score))
1144            .collect();
1145
1146        feature_indices.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
1147
1148        let selected: Vec<usize> = if let Some(k) = self.k {
1149            feature_indices
1150                .iter()
1151                .take(k.min(feature_indices.len()))
1152                .filter(|(_, score)| *score > 0.0)
1153                .map(|(i, _)| *i)
1154                .collect()
1155        } else {
1156            feature_indices
1157                .iter()
1158                .filter(|(_, score)| *score > 0.1) // Threshold for selection
1159                .map(|(i, _)| *i)
1160                .collect()
1161        };
1162
1163        let mut selected_sorted = selected;
1164        selected_sorted.sort();
1165        selected_sorted
1166    }
1167}
1168
1169impl Transform<Array2<Float>> for MetaLearningFeatureSelector<Trained> {
1170    fn transform(&self, x: &Array2<Float>) -> SklResult<Array2<Float>> {
1171        let selected_features = self.selected_features_.as_ref().unwrap();
1172        if selected_features.is_empty() {
1173            return Err(SklearsError::InvalidInput(
1174                "No features were selected".to_string(),
1175            ));
1176        }
1177
1178        let selected_indices: Vec<usize> = selected_features.to_vec();
1179        Ok(x.select(Axis(1), &selected_indices))
1180    }
1181}
1182
1183impl SelectorMixin for MetaLearningFeatureSelector<Trained> {
1184    fn get_support(&self) -> SklResult<Array1<bool>> {
1185        let selected_features = self.selected_features_.as_ref().unwrap();
1186        let n_features = if let Some(ref selections) = self.base_selections_ {
1187            selections.iter().flatten().max().unwrap_or(&0) + 1
1188        } else {
1189            selected_features.iter().max().unwrap_or(&0) + 1
1190        };
1191
1192        let mut support = Array1::from_elem(n_features, false);
1193        for &idx in selected_features {
1194            if idx < n_features {
1195                support[idx] = true;
1196            }
1197        }
1198        Ok(support)
1199    }
1200
1201    fn transform_features(&self, indices: &[usize]) -> SklResult<Vec<usize>> {
1202        let selected_features = self.selected_features_.as_ref().unwrap();
1203        Ok(indices
1204            .iter()
1205            .filter_map(|&idx| selected_features.iter().position(|&f| f == idx))
1206            .collect())
1207    }
1208}
1209
1210// Helper functions
1211
1212fn softmax_with_temperature(scores: &Array1<Float>, temperature: f64) -> Array1<Float> {
1213    let max_score = scores.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
1214    let exp_scores: Array1<Float> = scores.mapv(|x| ((x - max_score) / temperature).exp());
1215    let sum_exp = exp_scores.sum();
1216    exp_scores / sum_exp
1217}
1218
1219fn compute_simple_correlation(x: Float, y: Float) -> Float {
1220    // Simplified correlation for single values - in practice would need proper correlation
1221    x * y
1222}
1223
1224pub fn compute_simple_correlation_array(x: &Array1<Float>, y: &Array1<Float>) -> Float {
1225    let n = x.len().min(y.len());
1226    if n < 2 {
1227        return 0.0;
1228    }
1229
1230    let x_mean = x.iter().take(n).sum::<Float>() / n as Float;
1231    let y_mean = y.iter().take(n).sum::<Float>() / n as Float;
1232
1233    let mut numerator = 0.0;
1234    let mut x_var = 0.0;
1235    let mut y_var = 0.0;
1236
1237    for i in 0..n {
1238        let x_i = x[i] - x_mean;
1239        let y_i = y[i] - y_mean;
1240        numerator += x_i * y_i;
1241        x_var += x_i * x_i;
1242        y_var += y_i * y_i;
1243    }
1244
1245    let denominator = (x_var * y_var).sqrt();
1246    if denominator.abs() < 1e-10 {
1247        0.0
1248    } else {
1249        numerator / denominator
1250    }
1251}
1252
1253fn encode_state(selected_features: &[usize], n_features: usize) -> String {
1254    let mut state = vec![false; n_features];
1255    for &idx in selected_features {
1256        if idx < n_features {
1257            state[idx] = true;
1258        }
1259    }
1260    state.iter().map(|&b| if b { '1' } else { '0' }).collect()
1261}
1262
1263// Simplified base method implementations
1264fn apply_correlation_selection(
1265    x: &Array2<Float>,
1266    y: &Array1<Float>,
1267    k: Option<usize>,
1268) -> (Vec<usize>, f64) {
1269    let (_, n_features) = x.dim();
1270    let mut correlations = Vec::new();
1271
1272    for j in 0..n_features {
1273        let feature = x.column(j);
1274        let corr = compute_simple_correlation_array(&feature.to_owned(), y);
1275        correlations.push((j, corr.abs()));
1276    }
1277
1278    correlations.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
1279
1280    let selection_size = k.unwrap_or(n_features / 4).min(n_features);
1281    let selection: Vec<usize> = correlations
1282        .iter()
1283        .take(selection_size)
1284        .map(|(i, _)| *i)
1285        .collect();
1286    let score = correlations
1287        .iter()
1288        .take(selection_size)
1289        .map(|(_, corr)| corr)
1290        .sum::<f64>()
1291        / selection_size as f64;
1292
1293    (selection, score)
1294}
1295
1296fn apply_mutual_info_selection(
1297    x: &Array2<Float>,
1298    y: &Array1<Float>,
1299    k: Option<usize>,
1300) -> (Vec<usize>, f64) {
1301    // Simplified mutual information - in practice would use proper MI estimation
1302    apply_correlation_selection(x, y, k)
1303}
1304
1305fn apply_f_test_selection(
1306    x: &Array2<Float>,
1307    y: &Array1<Float>,
1308    k: Option<usize>,
1309) -> (Vec<usize>, f64) {
1310    // Simplified F-test - in practice would use proper F-statistic
1311    apply_correlation_selection(x, y, k)
1312}
1313
1314fn apply_chi2_selection(
1315    x: &Array2<Float>,
1316    y: &Array1<Float>,
1317    k: Option<usize>,
1318) -> (Vec<usize>, f64) {
1319    // Simplified chi-squared test - in practice would use proper chi-squared statistic
1320    apply_correlation_selection(x, y, k)
1321}
1322
1323fn evaluate_feature_selection(x: &Array2<Float>, y: &Array1<Float>, features: &[usize]) -> f64 {
1324    if features.is_empty() {
1325        return 0.0;
1326    }
1327
1328    // Simple evaluation: average correlation of selected features with target
1329    let mut total_corr = 0.0;
1330    for &feature_idx in features {
1331        let feature = x.column(feature_idx);
1332        let corr = compute_simple_correlation_array(&feature.to_owned(), y);
1333        total_corr += corr.abs();
1334    }
1335
1336    total_corr / features.len() as f64
1337}