Skip to main content

sklears_model_selection/
evolutionary.rs

1//! Evolutionary algorithms for hyperparameter optimization
2//!
3//! This module implements genetic algorithms and other evolutionary approaches for
4//! hyperparameter optimization. These methods are particularly useful for:
5//! - Non-differentiable objective functions
6//! - Mixed parameter types (categorical, continuous, integer)
7//! - Multi-modal optimization landscapes
8//! - When gradient information is not available
9
10use crate::{CrossValidator, ParameterValue};
11use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Dim, OwnedRepr};
12use scirs2_core::rand_prelude::IndexedRandom;
13use scirs2_core::random::prelude::*;
14use scirs2_core::random::SeedableRng;
15use sklears_core::{
16    error::Result,
17    traits::{Fit, Predict, Score},
18    types::Float,
19};
20
21/// Individual in the genetic algorithm population
22#[derive(Debug, Clone)]
23pub struct Individual {
24    /// Parameter values (chromosome)
25    pub parameters: Vec<ParameterValue>,
26    /// Fitness score (higher is better)
27    pub fitness: Option<f64>,
28    /// Multi-objective fitness scores
29    pub multi_fitness: Option<Vec<f64>>,
30    /// Age of the individual (generations since creation)
31    pub age: usize,
32    /// Domination rank (for multi-objective optimization)
33    pub rank: Option<usize>,
34    /// Crowding distance (for multi-objective optimization)
35    pub crowding_distance: Option<f64>,
36}
37
38impl Individual {
39    /// Create a new individual with given parameters
40    pub fn new(parameters: Vec<ParameterValue>) -> Self {
41        Self {
42            parameters,
43            fitness: None,
44            multi_fitness: None,
45            age: 0,
46            rank: None,
47            crowding_distance: None,
48        }
49    }
50
51    /// Increment the age of the individual
52    pub fn age_increment(&mut self) {
53        self.age += 1;
54    }
55}
56
57/// Configuration for genetic algorithm
58#[derive(Debug, Clone)]
59pub struct GeneticAlgorithmConfig {
60    /// Population size
61    pub population_size: usize,
62    /// Number of generations to run
63    pub n_generations: usize,
64    /// Crossover probability
65    pub crossover_rate: f64,
66    /// Mutation probability
67    pub mutation_rate: f64,
68    /// Elite size (top individuals to preserve)
69    pub elite_size: usize,
70    /// Tournament size for selection
71    pub tournament_size: usize,
72    /// Random state for reproducibility
73    pub random_state: Option<u64>,
74}
75
76impl Default for GeneticAlgorithmConfig {
77    fn default() -> Self {
78        Self {
79            population_size: 50,
80            n_generations: 100,
81            crossover_rate: 0.8,
82            mutation_rate: 0.1,
83            elite_size: 5,
84            tournament_size: 3,
85            random_state: None,
86        }
87    }
88}
89
90/// Parameter space definition for evolutionary optimization
91#[derive(Debug, Clone)]
92pub struct ParameterSpace {
93    /// Parameter definitions
94    pub parameters: Vec<ParameterDef>,
95}
96
97/// Definition of a single parameter
98#[derive(Debug, Clone)]
99pub enum ParameterDef {
100    /// Continuous parameter with bounds
101    Continuous { name: String, low: f64, high: f64 },
102    /// Integer parameter with bounds
103    Integer { name: String, low: i64, high: i64 },
104    /// Categorical parameter with choices
105    Categorical { name: String, choices: Vec<String> },
106    /// Boolean parameter
107    Boolean { name: String },
108}
109
110impl ParameterSpace {
111    /// Create a new parameter space
112    pub fn new() -> Self {
113        Self {
114            parameters: Vec::new(),
115        }
116    }
117
118    /// Add a continuous parameter
119    pub fn add_continuous(mut self, name: &str, low: f64, high: f64) -> Self {
120        self.parameters.push(ParameterDef::Continuous {
121            name: name.to_string(),
122            low,
123            high,
124        });
125        self
126    }
127
128    /// Add an integer parameter
129    pub fn add_integer(mut self, name: &str, low: i64, high: i64) -> Self {
130        self.parameters.push(ParameterDef::Integer {
131            name: name.to_string(),
132            low,
133            high,
134        });
135        self
136    }
137
138    /// Add a categorical parameter
139    pub fn add_categorical(mut self, name: &str, choices: Vec<&str>) -> Self {
140        self.parameters.push(ParameterDef::Categorical {
141            name: name.to_string(),
142            choices: choices.iter().map(|s| s.to_string()).collect(),
143        });
144        self
145    }
146
147    /// Add a boolean parameter
148    pub fn add_boolean(mut self, name: &str) -> Self {
149        self.parameters.push(ParameterDef::Boolean {
150            name: name.to_string(),
151        });
152        self
153    }
154
155    /// Sample parameter values
156    pub fn sample(&self, rng: &mut StdRng) -> Vec<ParameterValue> {
157        let mut parameters = Vec::new();
158
159        for param_def in &self.parameters {
160            let value = match param_def {
161                ParameterDef::Continuous { low, high, .. } => {
162                    ParameterValue::Float(rng.random_range(*low..*high))
163                }
164                ParameterDef::Integer { low, high, .. } => {
165                    ParameterValue::Integer(rng.random_range(*low..*high))
166                }
167                ParameterDef::Categorical { choices, .. } => {
168                    let choice = choices
169                        .as_slice()
170                        .choose(rng)
171                        .expect("operation should succeed");
172                    ParameterValue::String(choice.clone())
173                }
174                ParameterDef::Boolean { .. } => ParameterValue::Boolean(rng.random_bool(0.5)),
175            };
176            parameters.push(value);
177        }
178
179        parameters
180    }
181
182    /// Generate a random individual
183    pub fn random_individual(&self, rng: &mut StdRng) -> Individual {
184        let mut parameters = Vec::new();
185
186        for param_def in &self.parameters {
187            let value = match param_def {
188                ParameterDef::Continuous { low, high, .. } => {
189                    ParameterValue::Float(rng.random_range(*low..*high))
190                }
191                ParameterDef::Integer { low, high, .. } => {
192                    ParameterValue::Integer(rng.random_range(*low..*high))
193                }
194                ParameterDef::Categorical { choices, .. } => {
195                    let choice = choices
196                        .as_slice()
197                        .choose(rng)
198                        .expect("operation should succeed");
199                    ParameterValue::String(choice.clone())
200                }
201                ParameterDef::Boolean { .. } => ParameterValue::Boolean(rng.random_bool(0.5)),
202            };
203            parameters.push(value);
204        }
205
206        Individual::new(parameters)
207    }
208
209    /// Mutate an individual
210    pub fn mutate(&self, individual: &mut Individual, rng: &mut StdRng, mutation_rate: f64) {
211        for (i, param_def) in self.parameters.iter().enumerate() {
212            if rng.random_bool(mutation_rate) {
213                match param_def {
214                    ParameterDef::Continuous { low, high, .. } => {
215                        // Gaussian mutation
216                        if let ParameterValue::Float(ref mut val) = individual.parameters[i] {
217                            let range = high - low;
218                            let stddev = range * 0.1; // 10% of range as std deviation
219                            let noise: f64 = rng.random_range(-stddev..stddev);
220                            *val = (*val + noise).clamp(*low, *high);
221                        }
222                    }
223                    ParameterDef::Integer { low, high, .. } => {
224                        // Random integer in range
225                        individual.parameters[i] =
226                            ParameterValue::Integer(rng.random_range(*low..*high));
227                    }
228                    ParameterDef::Categorical { choices, .. } => {
229                        // Random choice
230                        let choice = choices
231                            .as_slice()
232                            .choose(rng)
233                            .expect("operation should succeed");
234                        individual.parameters[i] = ParameterValue::String(choice.clone());
235                    }
236                    ParameterDef::Boolean { .. } => {
237                        // Flip boolean
238                        if let ParameterValue::Boolean(ref mut val) = individual.parameters[i] {
239                            *val = !*val;
240                        }
241                    }
242                }
243            }
244        }
245        individual.fitness = None; // Reset fitness after mutation
246    }
247
248    /// Perform crossover between two individuals
249    pub fn crossover(
250        &self,
251        parent1: &Individual,
252        parent2: &Individual,
253        rng: &mut StdRng,
254    ) -> (Individual, Individual) {
255        let mut child1_params = Vec::new();
256        let mut child2_params = Vec::new();
257
258        for i in 0..self.parameters.len() {
259            match &self.parameters[i] {
260                ParameterDef::Continuous { .. } => {
261                    // Arithmetic crossover for continuous parameters
262                    if let (ParameterValue::Float(val1), ParameterValue::Float(val2)) =
263                        (&parent1.parameters[i], &parent2.parameters[i])
264                    {
265                        let alpha = rng.random_range(0.0..1.0);
266                        let child1_val = alpha * val1 + (1.0 - alpha) * val2;
267                        let child2_val = alpha * val2 + (1.0 - alpha) * val1;
268                        child1_params.push(ParameterValue::Float(child1_val));
269                        child2_params.push(ParameterValue::Float(child2_val));
270                    }
271                }
272                _ => {
273                    // Uniform crossover for discrete parameters
274                    if rng.random_bool(0.5) {
275                        child1_params.push(parent1.parameters[i].clone());
276                        child2_params.push(parent2.parameters[i].clone());
277                    } else {
278                        child1_params.push(parent2.parameters[i].clone());
279                        child2_params.push(parent1.parameters[i].clone());
280                    }
281                }
282            }
283        }
284
285        (
286            Individual::new(child1_params),
287            Individual::new(child2_params),
288        )
289    }
290}
291
292impl Default for ParameterSpace {
293    fn default() -> Self {
294        Self::new()
295    }
296}
297
298/// Genetic Algorithm for hyperparameter optimization
299pub struct GeneticAlgorithmCV<E, F, C> {
300    estimator: E,
301    parameter_space: ParameterSpace,
302    cv: C,
303    config: GeneticAlgorithmConfig,
304    _phantom: std::marker::PhantomData<F>,
305}
306
307impl<E, F, C> GeneticAlgorithmCV<E, F, C>
308where
309    E: Clone,
310    F: Clone,
311    E: Fit<Array2<Float>, Array1<Float>, Fitted = F>,
312    F: Predict<Array2<Float>, Array1<Float>>,
313    F: Score<Array2<Float>, Array1<Float>, Float = f64>,
314    C: CrossValidator,
315{
316    /// Create a new genetic algorithm optimizer
317    pub fn new(estimator: E, parameter_space: ParameterSpace, cv: C) -> Self {
318        Self {
319            estimator,
320            parameter_space,
321            cv,
322            config: GeneticAlgorithmConfig::default(),
323            _phantom: std::marker::PhantomData,
324        }
325    }
326
327    /// Set the configuration
328    pub fn config(mut self, config: GeneticAlgorithmConfig) -> Self {
329        self.config = config;
330        self
331    }
332
333    /// Evaluate the fitness of an individual
334    fn evaluate_fitness(
335        &self,
336        individual: &Individual,
337        x: &Array2<Float>,
338        y: &Array1<Float>,
339    ) -> Result<f64> {
340        // Configure estimator with parameters
341        let configured_estimator =
342            self.configure_estimator(&self.estimator, &individual.parameters)?;
343
344        // Perform cross-validation
345        let splits = self.cv.split(x.nrows(), None);
346        let mut scores = Vec::new();
347
348        for (train_idx, test_idx) in splits {
349            let x_train = x.select(scirs2_core::ndarray::Axis(0), &train_idx);
350            let y_train = y.select(scirs2_core::ndarray::Axis(0), &train_idx);
351            let x_test = x.select(scirs2_core::ndarray::Axis(0), &test_idx);
352            let y_test = y.select(scirs2_core::ndarray::Axis(0), &test_idx);
353
354            let fitted = configured_estimator.clone().fit(&x_train, &y_train)?;
355            let score = fitted.score(&x_test, &y_test)?;
356            scores.push(score);
357        }
358
359        // Return mean cross-validation score
360        Ok(scores.iter().sum::<f64>() / scores.len() as f64)
361    }
362
363    /// Configure estimator with parameters (placeholder - needs specific implementation)
364    fn configure_estimator(&self, estimator: &E, _parameters: &[ParameterValue]) -> Result<E> {
365        // This is a placeholder - in a real implementation, you would need a way to
366        // configure the estimator with the given parameters
367        Ok(estimator.clone())
368    }
369
370    /// Tournament selection
371    fn tournament_selection<'a>(
372        &self,
373        population: &'a [Individual],
374        rng: &mut StdRng,
375    ) -> &'a Individual {
376        let mut tournament: Vec<&Individual> = population
377            .choose_multiple(rng, self.config.tournament_size)
378            .collect();
379
380        tournament.sort_by(|a, b| {
381            b.fitness
382                .unwrap_or(f64::NEG_INFINITY)
383                .partial_cmp(&a.fitness.unwrap_or(f64::NEG_INFINITY))
384                .expect("operation should succeed")
385        });
386
387        tournament[0]
388    }
389
390    /// Run the genetic algorithm optimization
391    pub fn fit(&mut self, x: &Array2<Float>, y: &Array1<Float>) -> Result<GeneticAlgorithmResult> {
392        let mut rng = match self.config.random_state {
393            Some(seed) => StdRng::seed_from_u64(seed),
394            None => StdRng::seed_from_u64(42),
395        };
396
397        // Initialize population
398        let mut population: Vec<Individual> = (0..self.config.population_size)
399            .map(|_| self.parameter_space.random_individual(&mut rng))
400            .collect();
401
402        // Evaluate initial population
403        for individual in &mut population {
404            let fitness = self.evaluate_fitness(individual, x, y)?;
405            individual.fitness = Some(fitness);
406        }
407
408        let mut best_scores = Vec::new();
409        let mut best_individual = population[0].clone();
410
411        // Evolution loop
412        for generation in 0..self.config.n_generations {
413            // Sort population by fitness
414            population.sort_by(|a, b| {
415                b.fitness
416                    .unwrap_or(f64::NEG_INFINITY)
417                    .partial_cmp(&a.fitness.unwrap_or(f64::NEG_INFINITY))
418                    .expect("operation should succeed")
419            });
420
421            // Track best individual
422            if population[0].fitness.unwrap_or(f64::NEG_INFINITY)
423                > best_individual.fitness.unwrap_or(f64::NEG_INFINITY)
424            {
425                best_individual = population[0].clone();
426            }
427
428            best_scores.push(best_individual.fitness.unwrap_or(f64::NEG_INFINITY));
429
430            // Create new generation
431            let mut new_population = Vec::new();
432
433            // Elitism: preserve best individuals
434            for individual in population.iter().take(self.config.elite_size) {
435                new_population.push(individual.clone());
436            }
437
438            // Generate offspring
439            while new_population.len() < self.config.population_size {
440                // Selection
441                let parent1 = self.tournament_selection(&population, &mut rng);
442                let parent2 = self.tournament_selection(&population, &mut rng);
443
444                // Crossover
445                let (mut child1, mut child2) = if rng.random_bool(self.config.crossover_rate) {
446                    self.parameter_space.crossover(parent1, parent2, &mut rng)
447                } else {
448                    (parent1.clone(), parent2.clone())
449                };
450
451                // Mutation
452                self.parameter_space
453                    .mutate(&mut child1, &mut rng, self.config.mutation_rate);
454                self.parameter_space
455                    .mutate(&mut child2, &mut rng, self.config.mutation_rate);
456
457                // Evaluate offspring
458                if child1.fitness.is_none() {
459                    let fitness = self.evaluate_fitness(&child1, x, y)?;
460                    child1.fitness = Some(fitness);
461                }
462                if child2.fitness.is_none() {
463                    let fitness = self.evaluate_fitness(&child2, x, y)?;
464                    child2.fitness = Some(fitness);
465                }
466
467                new_population.push(child1);
468                if new_population.len() < self.config.population_size {
469                    new_population.push(child2);
470                }
471            }
472
473            // Age population
474            for individual in &mut new_population {
475                individual.age_increment();
476            }
477
478            population = new_population;
479
480            // Optional: early stopping could be implemented here
481            println!(
482                "Generation {}: Best score = {:.6}",
483                generation, best_scores[generation]
484            );
485        }
486
487        Ok(GeneticAlgorithmResult {
488            best_parameters: best_individual.parameters,
489            best_score: best_individual.fitness.unwrap_or(f64::NEG_INFINITY),
490            score_history: best_scores,
491            n_evaluations: self.config.n_generations * self.config.population_size,
492        })
493    }
494}
495
496/// Result of genetic algorithm optimization
497#[derive(Debug, Clone)]
498pub struct GeneticAlgorithmResult {
499    /// Best parameters found
500    pub best_parameters: Vec<ParameterValue>,
501    /// Best score achieved
502    pub best_score: f64,
503    /// History of best scores per generation
504    pub score_history: Vec<f64>,
505    /// Total number of evaluations performed
506    pub n_evaluations: usize,
507}
508
509/// Multi-objective optimization using NSGA-II algorithm
510///
511/// This implements the Non-dominated Sorting Genetic Algorithm II (NSGA-II)
512/// for multi-objective optimization problems where multiple conflicting objectives
513/// need to be optimized simultaneously.
514pub struct MultiObjectiveGA<E, F, C> {
515    estimator: E,
516    parameter_space: ParameterSpace,
517    cv: C,
518    config: GeneticAlgorithmConfig,
519    objective_functions:
520        Vec<Box<dyn Fn(&F, &Array2<Float>, &Array1<Float>) -> Result<f64> + Send + Sync>>,
521    _phantom: std::marker::PhantomData<F>,
522}
523
524impl<E, F, C> MultiObjectiveGA<E, F, C>
525where
526    E: Clone,
527    F: Clone,
528    E: Fit<Array2<Float>, Array1<Float>, Fitted = F>,
529    F: Predict<Array2<Float>, Array1<Float>>,
530    F: Score<Array2<Float>, Array1<Float>, Float = f64>,
531    C: CrossValidator,
532{
533    /// Create a new multi-objective genetic algorithm optimizer
534    pub fn new(estimator: E, parameter_space: ParameterSpace, cv: C) -> Self {
535        Self {
536            estimator,
537            parameter_space,
538            cv,
539            config: GeneticAlgorithmConfig::default(),
540            objective_functions: Vec::new(),
541            _phantom: std::marker::PhantomData,
542        }
543    }
544
545    /// Set the configuration
546    pub fn config(mut self, config: GeneticAlgorithmConfig) -> Self {
547        self.config = config;
548        self
549    }
550
551    /// Add an objective function
552    pub fn add_objective<FObj>(mut self, objective: FObj) -> Self
553    where
554        FObj: Fn(&F, &Array2<Float>, &Array1<Float>) -> Result<f64> + Send + Sync + 'static,
555    {
556        self.objective_functions.push(Box::new(objective));
557        self
558    }
559
560    /// Check if individual A dominates individual B
561    fn dominates(a: &Individual, b: &Individual) -> bool {
562        if let (Some(fitness_a), Some(fitness_b)) = (&a.multi_fitness, &b.multi_fitness) {
563            let mut a_better = false;
564            let mut a_worse = false;
565
566            for (fa, fb) in fitness_a.iter().zip(fitness_b.iter()) {
567                if fa > fb {
568                    a_better = true;
569                } else if fa < fb {
570                    a_worse = true;
571                }
572            }
573
574            a_better && !a_worse
575        } else {
576            false
577        }
578    }
579
580    /// Perform non-dominated sorting
581    fn non_dominated_sort(population: &mut [Individual]) -> Vec<Vec<usize>> {
582        let n = population.len();
583        let mut fronts = Vec::new();
584        let mut domination_count = vec![0; n];
585        let mut dominated_solutions = vec![Vec::new(); n];
586
587        // Calculate domination relationships
588        for i in 0..n {
589            for j in 0..n {
590                if i != j {
591                    if Self::dominates(&population[i], &population[j]) {
592                        dominated_solutions[i].push(j);
593                    } else if Self::dominates(&population[j], &population[i]) {
594                        domination_count[i] += 1;
595                    }
596                }
597            }
598        }
599
600        // First front
601        let mut current_front = Vec::new();
602        for i in 0..n {
603            if domination_count[i] == 0 {
604                population[i].rank = Some(0);
605                current_front.push(i);
606            }
607        }
608        fronts.push(current_front.clone());
609
610        // Subsequent fronts
611        let mut front_num = 0;
612        while !current_front.is_empty() {
613            let mut next_front = Vec::new();
614            for &i in &current_front {
615                for &j in &dominated_solutions[i] {
616                    domination_count[j] -= 1;
617                    if domination_count[j] == 0 {
618                        population[j].rank = Some(front_num + 1);
619                        next_front.push(j);
620                    }
621                }
622            }
623            front_num += 1;
624            current_front = next_front.clone();
625            if !next_front.is_empty() {
626                fronts.push(next_front);
627            }
628        }
629
630        fronts
631    }
632
633    /// Calculate crowding distance for diversity preservation
634    fn calculate_crowding_distance(population: &mut [Individual], front: &[usize]) {
635        let front_size = front.len();
636        if front_size <= 2 {
637            for &idx in front {
638                population[idx].crowding_distance = Some(f64::INFINITY);
639            }
640            return;
641        }
642
643        // Initialize crowding distance
644        for &idx in front {
645            population[idx].crowding_distance = Some(0.0);
646        }
647
648        if let Some(n_objectives) = population[front[0]].multi_fitness.as_ref().map(|f| f.len()) {
649            for obj in 0..n_objectives {
650                // Sort by objective
651                let mut front_sorted = front.to_vec();
652                front_sorted.sort_by(|&a, &b| {
653                    let fitness_a = &population[a]
654                        .multi_fitness
655                        .as_ref()
656                        .expect("operation should succeed")[obj];
657                    let fitness_b = &population[b]
658                        .multi_fitness
659                        .as_ref()
660                        .expect("operation should succeed")[obj];
661                    fitness_a
662                        .partial_cmp(fitness_b)
663                        .unwrap_or(std::cmp::Ordering::Equal)
664                });
665
666                // Set boundary points to infinity
667                population[front_sorted[0]].crowding_distance = Some(f64::INFINITY);
668                population[front_sorted[front_size - 1]].crowding_distance = Some(f64::INFINITY);
669
670                // Calculate crowding distance for internal points
671                let obj_min = population[front_sorted[0]]
672                    .multi_fitness
673                    .as_ref()
674                    .expect("operation should succeed")[obj];
675                let obj_max = population[front_sorted[front_size - 1]]
676                    .multi_fitness
677                    .as_ref()
678                    .expect("operation should succeed")[obj];
679                let obj_range = obj_max - obj_min;
680
681                if obj_range > 0.0 {
682                    for i in 1..front_size - 1 {
683                        let current_idx = front_sorted[i];
684                        let prev_fitness = population[front_sorted[i - 1]]
685                            .multi_fitness
686                            .as_ref()
687                            .expect("operation should succeed")[obj];
688                        let next_fitness = population[front_sorted[i + 1]]
689                            .multi_fitness
690                            .as_ref()
691                            .expect("operation should succeed")[obj];
692
693                        let distance = population[current_idx].crowding_distance.unwrap_or(0.0);
694                        population[current_idx].crowding_distance =
695                            Some(distance + (next_fitness - prev_fitness) / obj_range);
696                    }
697                }
698            }
699        }
700    }
701
702    /// Evaluate multi-objective fitness
703    fn evaluate_multi_fitness(
704        &self,
705        individual: &Individual,
706        x: &Array2<Float>,
707        y: &Array1<Float>,
708    ) -> Result<Vec<f64>> {
709        let configured_estimator =
710            self.configure_estimator(&self.estimator, &individual.parameters)?;
711        let fitted = configured_estimator.fit(x, y)?;
712
713        let mut objectives = Vec::new();
714        for objective_fn in &self.objective_functions {
715            let score = objective_fn(&fitted, x, y)?;
716            objectives.push(score);
717        }
718
719        Ok(objectives)
720    }
721
722    /// Configure estimator with parameters (placeholder)
723    fn configure_estimator(&self, estimator: &E, _parameters: &[ParameterValue]) -> Result<E> {
724        Ok(estimator.clone())
725    }
726
727    /// Run multi-objective optimization
728    pub fn fit(&mut self, x: &Array2<Float>, y: &Array1<Float>) -> Result<MultiObjectiveResult> {
729        if self.objective_functions.is_empty() {
730            return Err(sklears_core::error::SklearsError::InvalidParameter {
731                name: "objective_functions".to_string(),
732                reason: "At least one objective function must be specified".to_string(),
733            });
734        }
735
736        let mut rng = match self.config.random_state {
737            Some(seed) => StdRng::seed_from_u64(seed),
738            None => StdRng::seed_from_u64(42),
739        };
740
741        // Initialize population
742        let mut population: Vec<Individual> = (0..self.config.population_size)
743            .map(|_| self.parameter_space.random_individual(&mut rng))
744            .collect();
745
746        // Evaluate initial population
747        for individual in &mut population {
748            let multi_fitness = self.evaluate_multi_fitness(individual, x, y)?;
749            individual.multi_fitness = Some(multi_fitness);
750        }
751
752        let mut pareto_history = Vec::new();
753
754        // Evolution loop
755        for generation in 0..self.config.n_generations {
756            // Non-dominated sorting
757            let fronts = Self::non_dominated_sort(&mut population);
758
759            // Calculate crowding distance for each front
760            for front in &fronts {
761                Self::calculate_crowding_distance(&mut population, front);
762            }
763
764            // Store Pareto front
765            let pareto_front: Vec<Individual> = fronts[0]
766                .iter()
767                .map(|&idx| population[idx].clone())
768                .collect();
769            pareto_history.push(pareto_front);
770
771            // Generate offspring
772            let mut offspring = Vec::new();
773            while offspring.len() < self.config.population_size {
774                // Tournament selection based on rank and crowding distance
775                let parent1 = self.nsga_tournament_selection(&population, &mut rng);
776                let parent2 = self.nsga_tournament_selection(&population, &mut rng);
777
778                // Crossover
779                let (mut child1, mut child2) = if rng.random_bool(self.config.crossover_rate) {
780                    self.parameter_space.crossover(parent1, parent2, &mut rng)
781                } else {
782                    (parent1.clone(), parent2.clone())
783                };
784
785                // Mutation
786                self.parameter_space
787                    .mutate(&mut child1, &mut rng, self.config.mutation_rate);
788                self.parameter_space
789                    .mutate(&mut child2, &mut rng, self.config.mutation_rate);
790
791                // Evaluate offspring
792                if child1.multi_fitness.is_none() {
793                    let multi_fitness = self.evaluate_multi_fitness(&child1, x, y)?;
794                    child1.multi_fitness = Some(multi_fitness);
795                }
796                if child2.multi_fitness.is_none() {
797                    let multi_fitness = self.evaluate_multi_fitness(&child2, x, y)?;
798                    child2.multi_fitness = Some(multi_fitness);
799                }
800
801                offspring.push(child1);
802                if offspring.len() < self.config.population_size {
803                    offspring.push(child2);
804                }
805            }
806
807            // Combine population and offspring
808            population.extend(offspring);
809
810            // Environmental selection
811            let fronts = Self::non_dominated_sort(&mut population);
812            let mut new_population = Vec::new();
813
814            // Add fronts until population size is reached
815            for front in &fronts {
816                if new_population.len() + front.len() <= self.config.population_size {
817                    for &idx in front {
818                        new_population.push(population[idx].clone());
819                    }
820                } else {
821                    // Calculate crowding distance and select best individuals
822                    Self::calculate_crowding_distance(&mut population, front);
823                    let mut front_individuals: Vec<_> =
824                        front.iter().map(|&idx| &population[idx]).collect();
825
826                    // Sort by crowding distance (descending)
827                    front_individuals.sort_by(|a, b| {
828                        b.crowding_distance
829                            .unwrap_or(0.0)
830                            .partial_cmp(&a.crowding_distance.unwrap_or(0.0))
831                            .unwrap_or(std::cmp::Ordering::Equal)
832                    });
833
834                    let remaining_slots = self.config.population_size - new_population.len();
835                    for individual in front_individuals.into_iter().take(remaining_slots) {
836                        new_population.push(individual.clone());
837                    }
838                    break;
839                }
840            }
841
842            population = new_population;
843
844            println!(
845                "Generation {}: Pareto front size = {}",
846                generation,
847                pareto_history
848                    .last()
849                    .expect("operation should succeed")
850                    .len()
851            );
852        }
853
854        // Final non-dominated sorting
855        let fronts = Self::non_dominated_sort(&mut population);
856        let final_pareto_front: Vec<Individual> = fronts[0]
857            .iter()
858            .map(|&idx| population[idx].clone())
859            .collect();
860
861        Ok(MultiObjectiveResult {
862            pareto_front: final_pareto_front,
863            pareto_history,
864            n_evaluations: self.config.n_generations * self.config.population_size * 2, // Including offspring
865        })
866    }
867
868    /// NSGA-II tournament selection
869    fn nsga_tournament_selection<'a>(
870        &self,
871        population: &'a [Individual],
872        rng: &mut StdRng,
873    ) -> &'a Individual {
874        let tournament: Vec<&Individual> = population
875            .choose_multiple(rng, self.config.tournament_size)
876            .collect();
877
878        // Select best individual based on rank and crowding distance
879        tournament
880            .into_iter()
881            .min_by(|a, b| {
882                // First compare rank
883                let rank_cmp = a
884                    .rank
885                    .unwrap_or(usize::MAX)
886                    .cmp(&b.rank.unwrap_or(usize::MAX));
887                if rank_cmp != std::cmp::Ordering::Equal {
888                    return rank_cmp;
889                }
890
891                // If ranks are equal, compare crowding distance (higher is better)
892                b.crowding_distance
893                    .unwrap_or(0.0)
894                    .partial_cmp(&a.crowding_distance.unwrap_or(0.0))
895                    .unwrap_or(std::cmp::Ordering::Equal)
896            })
897            .expect("operation should succeed")
898    }
899}
900
901/// Result of multi-objective optimization
902#[derive(Debug, Clone)]
903pub struct MultiObjectiveResult {
904    /// Final Pareto front
905    pub pareto_front: Vec<Individual>,
906    /// History of Pareto fronts for each generation
907    pub pareto_history: Vec<Vec<Individual>>,
908    /// Total number of evaluations performed
909    pub n_evaluations: usize,
910}
911
912impl MultiObjectiveResult {
913    /// Get the hypervolume of the Pareto front
914    pub fn hypervolume(&self, reference_point: &[f64]) -> f64 {
915        if self.pareto_front.is_empty() {
916            return 0.0;
917        }
918
919        // Simple hypervolume calculation for 2D case
920        if reference_point.len() == 2 {
921            let mut points: Vec<(f64, f64)> = self
922                .pareto_front
923                .iter()
924                .filter_map(|ind| {
925                    if let Some(fitness) = &ind.multi_fitness {
926                        if fitness.len() >= 2 {
927                            Some((fitness[0], fitness[1]))
928                        } else {
929                            None
930                        }
931                    } else {
932                        None
933                    }
934                })
935                .collect();
936
937            if points.is_empty() {
938                return 0.0;
939            }
940
941            // Sort by first objective
942            points.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
943
944            let mut hv = 0.0;
945            let mut prev_x = reference_point[0];
946
947            for (x, y) in points {
948                if x > prev_x && y > reference_point[1] {
949                    hv += (x - prev_x) * (y - reference_point[1]);
950                    prev_x = x;
951                }
952            }
953
954            hv
955        } else {
956            // For higher dimensions, return 0 (would need more complex algorithm)
957            0.0
958        }
959    }
960}
961
962/// Cross-validation wrapper for evolutionary search
963#[derive(Debug, Clone)]
964pub struct EvolutionarySearchCV<E, S> {
965    estimator: E,
966    parameter_space: ParameterSpace,
967    scorer: Box<S>,
968    cv_folds: usize,
969    population_size: usize,
970    n_generations: usize,
971    crossover_rate: f64,
972    mutation_rate: f64,
973    random_state: Option<u64>,
974}
975
976impl<E, S> EvolutionarySearchCV<E, S>
977where
978    E: Clone
979        + Fit<
980            ArrayBase<OwnedRepr<Float>, Dim<[usize; 2]>, Float>,
981            ArrayBase<OwnedRepr<Float>, Dim<[usize; 1]>, Float>,
982        > + Predict<
983            ArrayBase<OwnedRepr<Float>, Dim<[usize; 2]>, Float>,
984            ArrayBase<OwnedRepr<Float>, Dim<[usize; 1]>, Float>,
985        >,
986    S: Fn(
987        &E::Fitted,
988        &ArrayBase<OwnedRepr<Float>, Dim<[usize; 2]>, Float>,
989        &ArrayBase<OwnedRepr<Float>, Dim<[usize; 1]>, Float>,
990    ) -> Result<f64>,
991{
992    /// Create a new evolutionary search with cross-validation
993    pub fn new(
994        estimator: E,
995        parameter_space: ParameterSpace,
996        scorer: Box<S>,
997        cv_folds: usize,
998    ) -> Self {
999        Self {
1000            estimator,
1001            parameter_space,
1002            scorer,
1003            cv_folds,
1004            population_size: 50,
1005            n_generations: 100,
1006            crossover_rate: 0.8,
1007            mutation_rate: 0.1,
1008            random_state: None,
1009        }
1010    }
1011
1012    /// Set population size
1013    pub fn set_population_size(&mut self, population_size: usize) {
1014        self.population_size = population_size;
1015    }
1016
1017    /// Set number of generations
1018    pub fn set_n_generations(&mut self, n_generations: usize) {
1019        self.n_generations = n_generations;
1020    }
1021
1022    /// Set random state for reproducibility
1023    pub fn set_random_state(&mut self, seed: u64) {
1024        self.random_state = Some(seed);
1025    }
1026
1027    /// Set crossover rate
1028    pub fn set_crossover_rate(&mut self, rate: f64) {
1029        self.crossover_rate = rate.clamp(0.0, 1.0);
1030    }
1031
1032    /// Set mutation rate
1033    pub fn set_mutation_rate(&mut self, rate: f64) {
1034        self.mutation_rate = rate.clamp(0.0, 1.0);
1035    }
1036
1037    /// Perform evolutionary search with cross-validation
1038    pub fn fit(&self, x: &Array2<Float>, y: &Array1<Float>) -> Result<EvolutionarySearchResult> {
1039        let mut rng = match self.random_state {
1040            Some(seed) => StdRng::seed_from_u64(seed),
1041            None => StdRng::seed_from_u64(42),
1042        };
1043
1044        // Initialize population
1045        let mut population: Vec<Individual> = (0..self.population_size)
1046            .map(|_| {
1047                let params = self.parameter_space.sample(&mut rng);
1048                Individual::new(params)
1049            })
1050            .collect();
1051
1052        // Evaluate initial population
1053        for individual in &mut population {
1054            individual.fitness = Some(self.evaluate_individual(individual, x, y)?);
1055        }
1056
1057        let mut best_individual = population[0].clone();
1058        let mut best_fitness = population[0].fitness.expect("operation should succeed");
1059
1060        // Evolution loop
1061        for _generation in 0..self.n_generations {
1062            // Selection and reproduction
1063            let mut new_population = Vec::new();
1064
1065            // Elitism: keep best individuals
1066            let elite_size = (self.population_size as f64 * 0.1) as usize;
1067            population.sort_by(|a, b| {
1068                b.fitness
1069                    .partial_cmp(&a.fitness)
1070                    .expect("operation should succeed")
1071            });
1072            new_population.extend_from_slice(&population[..elite_size]);
1073
1074            // Generate offspring
1075            while new_population.len() < self.population_size {
1076                // Tournament selection
1077                let parent1 = self.tournament_selection(&population, &mut rng);
1078                let parent2 = self.tournament_selection(&population, &mut rng);
1079
1080                // Crossover
1081                let (mut child1, mut child2) = if rng.random::<f64>() < self.crossover_rate {
1082                    self.parameter_space.crossover(parent1, parent2, &mut rng)
1083                } else {
1084                    (parent1.clone(), parent2.clone())
1085                };
1086
1087                // Mutation
1088                if rng.random::<f64>() < self.mutation_rate {
1089                    self.parameter_space.mutate(&mut child1, &mut rng, 0.1);
1090                }
1091                if rng.random::<f64>() < self.mutation_rate {
1092                    self.parameter_space.mutate(&mut child2, &mut rng, 0.1);
1093                }
1094
1095                // Evaluate offspring
1096                child1.fitness = Some(self.evaluate_individual(&child1, x, y)?);
1097                child2.fitness = Some(self.evaluate_individual(&child2, x, y)?);
1098
1099                new_population.push(child1);
1100                if new_population.len() < self.population_size {
1101                    new_population.push(child2);
1102                }
1103            }
1104
1105            population = new_population;
1106
1107            // Update best individual
1108            for individual in &population {
1109                let fitness = individual.fitness.expect("operation should succeed");
1110                if fitness > best_fitness {
1111                    best_fitness = fitness;
1112                    best_individual = individual.clone();
1113                }
1114            }
1115        }
1116
1117        Ok(EvolutionarySearchResult {
1118            best_params: best_individual.parameters,
1119            best_score: best_fitness,
1120            n_generations: self.n_generations,
1121            population_size: self.population_size,
1122        })
1123    }
1124
1125    /// Evaluate individual using cross-validation
1126    fn evaluate_individual(
1127        &self,
1128        _individual: &Individual,
1129        x: &Array2<Float>,
1130        y: &Array1<Float>,
1131    ) -> Result<f64> {
1132        // Simple train-test split for evaluation (placeholder for full CV)
1133        let n_samples = x.nrows();
1134        let train_size = (n_samples as f64 * 0.8) as usize;
1135
1136        let x_train_view = x.slice(scirs2_core::ndarray::s![..train_size, ..]);
1137        let y_train_view = y.slice(scirs2_core::ndarray::s![..train_size]);
1138        let x_test_view = x.slice(scirs2_core::ndarray::s![train_size.., ..]);
1139        let y_test_view = y.slice(scirs2_core::ndarray::s![train_size..]);
1140
1141        let x_train = Array2::from_shape_vec(
1142            (x_train_view.nrows(), x_train_view.ncols()),
1143            x_train_view.iter().copied().collect(),
1144        )?;
1145        let y_train = Array1::from_vec(y_train_view.iter().copied().collect());
1146        let x_test = Array2::from_shape_vec(
1147            (x_test_view.nrows(), x_test_view.ncols()),
1148            x_test_view.iter().copied().collect(),
1149        )?;
1150        let y_test = Array1::from_vec(y_test_view.iter().copied().collect());
1151
1152        // Configure estimator with parameters (simplified)
1153        let estimator = self.estimator.clone();
1154        let fitted = estimator.fit(&x_train, &y_train)?;
1155
1156        (self.scorer)(&fitted, &x_test, &y_test)
1157    }
1158
1159    /// Tournament selection
1160    fn tournament_selection<'a>(
1161        &self,
1162        population: &'a [Individual],
1163        rng: &mut StdRng,
1164    ) -> &'a Individual {
1165        let tournament_size = 3;
1166        let tournament: Vec<&Individual> =
1167            population.choose_multiple(rng, tournament_size).collect();
1168
1169        tournament
1170            .into_iter()
1171            .max_by(|a, b| {
1172                a.fitness
1173                    .partial_cmp(&b.fitness)
1174                    .expect("operation should succeed")
1175            })
1176            .expect("operation should succeed")
1177    }
1178}
1179
1180/// Result of evolutionary search
1181#[derive(Debug, Clone)]
1182pub struct EvolutionarySearchResult {
1183    pub best_params: Vec<ParameterValue>,
1184    pub best_score: f64,
1185    pub n_generations: usize,
1186    pub population_size: usize,
1187}
1188
1189#[allow(non_snake_case)]
1190#[cfg(test)]
1191mod tests {
1192    use super::*;
1193
1194    #[test]
1195    fn test_parameter_space_creation() {
1196        let param_space = ParameterSpace::new()
1197            .add_continuous("alpha", 0.1, 10.0)
1198            .add_integer("max_depth", 1, 20)
1199            .add_categorical("kernel", vec!["linear", "rbf", "poly"])
1200            .add_boolean("fit_intercept");
1201
1202        assert_eq!(param_space.parameters.len(), 4);
1203    }
1204
1205    #[test]
1206    fn test_random_individual_generation() {
1207        let param_space = ParameterSpace::new()
1208            .add_continuous("alpha", 0.1, 10.0)
1209            .add_integer("max_depth", 1, 20)
1210            .add_categorical("kernel", vec!["linear", "rbf"])
1211            .add_boolean("fit_intercept");
1212
1213        let mut rng = StdRng::seed_from_u64(42);
1214        let individual = param_space.random_individual(&mut rng);
1215
1216        assert_eq!(individual.parameters.len(), 4);
1217        assert!(matches!(individual.parameters[0], ParameterValue::Float(_)));
1218        assert!(matches!(
1219            individual.parameters[1],
1220            ParameterValue::Integer(_)
1221        ));
1222        assert!(matches!(
1223            individual.parameters[2],
1224            ParameterValue::String(_)
1225        ));
1226        assert!(matches!(
1227            individual.parameters[3],
1228            ParameterValue::Boolean(_)
1229        ));
1230    }
1231
1232    #[test]
1233    fn test_crossover() {
1234        let param_space = ParameterSpace::new()
1235            .add_continuous("alpha", 0.1, 10.0)
1236            .add_categorical("kernel", vec!["linear", "rbf"]);
1237
1238        let parent1 = Individual::new(vec![
1239            ParameterValue::Float(1.0),
1240            ParameterValue::String("linear".to_string()),
1241        ]);
1242        let parent2 = Individual::new(vec![
1243            ParameterValue::Float(5.0),
1244            ParameterValue::String("rbf".to_string()),
1245        ]);
1246
1247        let mut rng = StdRng::seed_from_u64(42);
1248        let (child1, child2) = param_space.crossover(&parent1, &parent2, &mut rng);
1249
1250        assert_eq!(child1.parameters.len(), 2);
1251        assert_eq!(child2.parameters.len(), 2);
1252    }
1253
1254    #[test]
1255    fn test_mutation() {
1256        let param_space = ParameterSpace::new()
1257            .add_continuous("alpha", 0.1, 10.0)
1258            .add_boolean("fit_intercept");
1259
1260        let mut individual = Individual::new(vec![
1261            ParameterValue::Float(5.0),
1262            ParameterValue::Boolean(true),
1263        ]);
1264
1265        let mut rng = StdRng::seed_from_u64(42);
1266        param_space.mutate(&mut individual, &mut rng, 1.0); // 100% mutation rate
1267
1268        // Values should have changed (though specific values depend on RNG)
1269        assert_eq!(individual.parameters.len(), 2);
1270        assert!(individual.fitness.is_none()); // Fitness should be reset
1271    }
1272
1273    #[test]
1274    fn test_genetic_algorithm_config() {
1275        let config = GeneticAlgorithmConfig {
1276            population_size: 30,
1277            n_generations: 50,
1278            crossover_rate: 0.9,
1279            mutation_rate: 0.05,
1280            elite_size: 3,
1281            tournament_size: 5,
1282            random_state: Some(123),
1283        };
1284
1285        assert_eq!(config.population_size, 30);
1286        assert_eq!(config.n_generations, 50);
1287        assert_eq!(config.crossover_rate, 0.9);
1288        assert_eq!(config.mutation_rate, 0.05);
1289        assert_eq!(config.elite_size, 3);
1290        assert_eq!(config.tournament_size, 5);
1291        assert_eq!(config.random_state, Some(123));
1292    }
1293}