use super::{BinaryGA, Budget, PerturbativeMetaheuristic, SearchSpace};
#[inline]
#[cfg(test)]
fn count_selected(mask: &[bool]) -> usize {
mask.iter().filter(|&&s| s).count()
}
#[inline]
fn selected_indices(mask: &[bool]) -> Vec<usize> {
mask.iter()
.enumerate()
.filter(|(_, &s)| s)
.map(|(i, _)| i)
.collect()
}
#[derive(Debug, Clone, Copy)]
pub enum SelectionCriterion {
MaxAccuracy,
MinFeatures,
MaxAccuracyMinFeatures {
alpha: f64,
},
MaxAccuracyWithLimit {
max_features: usize,
},
AIC {
n_samples: usize,
},
BIC {
n_samples: usize,
},
}
#[derive(Debug, Clone)]
pub struct FeatureSelectionResult {
pub selected_indices: Vec<usize>,
pub n_selected: usize,
pub score: f64,
pub accuracy: Option<f64>,
pub mask: Vec<bool>,
pub evaluations: usize,
}
#[derive(Debug, Clone)]
pub struct FeatureSelector {
n_features: usize,
criterion: SelectionCriterion,
population_size: usize,
mutation_prob: f64,
seed: Option<u64>,
}
impl FeatureSelector {
#[must_use]
pub fn new(n_features: usize) -> Self {
Self {
n_features,
criterion: SelectionCriterion::MaxAccuracyMinFeatures { alpha: 0.01 },
population_size: 50,
mutation_prob: 1.0 / n_features as f64, seed: None,
}
}
#[must_use]
pub fn with_criterion(mut self, criterion: SelectionCriterion) -> Self {
self.criterion = criterion;
self
}
#[must_use]
pub fn with_population_size(mut self, size: usize) -> Self {
self.population_size = size.max(10);
self
}
#[must_use]
pub fn with_mutation_prob(mut self, prob: f64) -> Self {
self.mutation_prob = prob.clamp(0.001, 0.5);
self
}
#[must_use]
pub fn with_seed(mut self, seed: u64) -> Self {
self.seed = Some(seed);
self
}
pub fn select<F>(&mut self, evaluator: F, budget: Budget) -> FeatureSelectionResult
where
F: Fn(&[bool]) -> (f64, usize),
{
let space = SearchSpace::binary(self.n_features);
let objective = |bits: &[f64]| -> f64 {
let mask: Vec<bool> = bits.iter().map(|&b| b > 0.5).collect();
let (accuracy, n_selected) = evaluator(&mask);
match self.criterion {
SelectionCriterion::MaxAccuracy => -accuracy,
SelectionCriterion::MinFeatures => n_selected as f64,
SelectionCriterion::MaxAccuracyMinFeatures { alpha } => {
-accuracy + alpha * n_selected as f64
}
SelectionCriterion::MaxAccuracyWithLimit { max_features } => {
if n_selected > max_features {
1000.0 + n_selected as f64
} else {
-accuracy
}
}
SelectionCriterion::AIC { n_samples } => {
-accuracy + 2.0 * n_selected as f64 / n_samples as f64
}
SelectionCriterion::BIC { n_samples } => {
let log_n = (n_samples as f64).ln();
-accuracy + log_n * n_selected as f64 / n_samples as f64
}
}
};
let mut ga = BinaryGA::default()
.with_population_size(self.population_size)
.with_mutation_prob(self.mutation_prob);
if let Some(seed) = self.seed {
ga = ga.with_seed(seed);
}
let result = ga.optimize(&objective, &space, budget);
let mask: Vec<bool> = result.solution.iter().map(|&b| b > 0.5).collect();
let sel_indices = selected_indices(&mask);
let n_selected = sel_indices.len();
let (accuracy, _) = evaluator(&mask);
FeatureSelectionResult {
selected_indices: sel_indices,
n_selected,
score: -result.objective_value, accuracy: Some(accuracy),
mask,
evaluations: result.evaluations,
}
}
}
pub fn select_features<F>(n_features: usize, evaluator: F, budget: Budget) -> FeatureSelectionResult
where
F: Fn(&[bool]) -> (f64, usize),
{
let mut selector = FeatureSelector::new(n_features);
selector.select(evaluator, budget)
}
pub fn rank_features<F>(n_features: usize, evaluator: F) -> Vec<(usize, f64)>
where
F: Fn(&[bool]) -> (f64, usize),
{
let all_selected = vec![true; n_features];
let (baseline_acc, _) = evaluator(&all_selected);
let mut importance: Vec<(usize, f64)> = (0..n_features)
.map(|i| {
let mut mask = all_selected.clone();
mask[i] = false;
let (acc, _) = evaluator(&mask);
let drop = baseline_acc - acc;
(i, drop)
})
.collect();
importance.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
importance
}
#[cfg(test)]
#[path = "feature_selection_tests.rs"]
mod tests;