sklears_svm/hyperparameter_optimization/
evolutionary_optimization.rs1use std::time::Instant;
4
5#[cfg(feature = "parallel")]
6use rayon::prelude::*;
7use scirs2_core::ndarray::{Array1, Array2};
8use scirs2_core::random::Random;
9
10use crate::kernels::KernelType;
11use crate::svc::SVC;
12use sklears_core::error::{Result, SklearsError};
13use sklears_core::traits::{Fit, Predict};
14
15use super::{
16 OptimizationConfig, OptimizationResult, ParameterSet, ParameterSpec, ScoringMetric, SearchSpace,
17};
18
19type DMatrix<T> = Array2<T>;
21type DVector<T> = Array1<T>;
22
23#[derive(Debug, Clone)]
25pub enum SelectionMethod {
26 Tournament { size: usize },
27 RouletteWheel,
28 RankBased,
29}
30
31#[derive(Debug, Clone)]
33pub struct Individual {
34 pub params: ParameterSet,
35 pub fitness: f64,
36}
37
38impl Individual {
39 pub fn new(params: ParameterSet) -> Self {
40 Self {
41 params,
42 fitness: -f64::INFINITY,
43 }
44 }
45}
46
47pub struct EvolutionaryOptimizationCV {
49 config: OptimizationConfig,
50 search_space: SearchSpace,
51 rng: Random<scirs2_core::random::rngs::StdRng>,
52 population_size: usize,
53 selection_method: SelectionMethod,
54 mutation_rate: f64,
55 crossover_rate: f64,
56 elite_ratio: f64,
57}
58
59impl EvolutionaryOptimizationCV {
60 pub fn new(config: OptimizationConfig, search_space: SearchSpace) -> Self {
62 let rng = if let Some(seed) = config.random_state {
63 Random::seed(seed)
64 } else {
65 Random::seed(42) };
67
68 Self {
69 config,
70 search_space,
71 rng,
72 population_size: 50,
73 selection_method: SelectionMethod::Tournament { size: 3 },
74 mutation_rate: 0.1,
75 crossover_rate: 0.8,
76 elite_ratio: 0.1,
77 }
78 }
79
80 pub fn population_size(mut self, size: usize) -> Self {
82 self.population_size = size;
83 self
84 }
85
86 pub fn selection_method(mut self, method: SelectionMethod) -> Self {
88 self.selection_method = method;
89 self
90 }
91
92 pub fn mutation_rate(mut self, rate: f64) -> Self {
94 self.mutation_rate = rate;
95 self
96 }
97
98 pub fn crossover_rate(mut self, rate: f64) -> Self {
100 self.crossover_rate = rate;
101 self
102 }
103
104 pub fn elite_ratio(mut self, ratio: f64) -> Self {
106 self.elite_ratio = ratio;
107 self
108 }
109
110 pub fn fit(&mut self, x: &DMatrix<f64>, y: &DVector<f64>) -> Result<OptimizationResult> {
112 let start_time = Instant::now();
113
114 if self.config.verbose {
115 println!(
116 "Evolutionary optimization with {} generations, population size {}",
117 self.config.n_iterations, self.population_size
118 );
119 }
120
121 let mut population = self.initialize_population()?;
123
124 self.evaluate_population(&mut population, x, y)?;
126
127 let mut best_individual = population[0].clone();
128 let mut cv_results = Vec::new();
129 let mut score_history = Vec::new();
130 let mut generations_without_improvement = 0;
131
132 for generation in 0..self.config.n_iterations {
134 population.sort_by(|a, b| {
136 b.fitness
137 .partial_cmp(&a.fitness)
138 .unwrap_or(std::cmp::Ordering::Equal)
139 });
140
141 if population[0].fitness > best_individual.fitness {
143 best_individual = population[0].clone();
144 generations_without_improvement = 0;
145 } else {
146 generations_without_improvement += 1;
147 }
148
149 for ind in &population {
151 cv_results.push((ind.params.clone(), ind.fitness));
152 }
153 score_history.push(best_individual.fitness);
154
155 if self.config.verbose && (generation + 1) % 10 == 0 {
156 println!(
157 "Generation {}/{}: Best score {:.6}",
158 generation + 1,
159 self.config.n_iterations,
160 best_individual.fitness
161 );
162 }
163
164 if let Some(patience) = self.config.early_stopping_patience {
166 if generations_without_improvement >= patience {
167 if self.config.verbose {
168 println!("Early stopping at generation {}", generation + 1);
169 }
170 break;
171 }
172 }
173
174 let mut next_generation = Vec::new();
176
177 let n_elite = (self.population_size as f64 * self.elite_ratio) as usize;
179 for individual in population.iter().take(n_elite.min(population.len())) {
180 next_generation.push(individual.clone());
181 }
182
183 while next_generation.len() < self.population_size {
185 let parent1 = self.select_individual(&population)?;
187 let parent2 = self.select_individual(&population)?;
188
189 use scirs2_core::random::essentials::Uniform;
191 let dist = Uniform::new(0.0, 1.0).map_err(|e| {
192 SklearsError::InvalidInput(format!(
193 "Failed to create uniform distribution: {}",
194 e
195 ))
196 })?;
197 let offspring = if self.rng.sample(dist) < self.crossover_rate {
198 self.crossover(&parent1.params, &parent2.params)?
199 } else {
200 parent1.params.clone()
201 };
202
203 let dist = Uniform::new(0.0, 1.0).map_err(|e| {
205 SklearsError::InvalidInput(format!(
206 "Failed to create uniform distribution: {}",
207 e
208 ))
209 })?;
210 let mutated = if self.rng.sample(dist) < self.mutation_rate {
211 self.mutate(&offspring)?
212 } else {
213 offspring
214 };
215
216 next_generation.push(Individual::new(mutated));
217 }
218
219 self.evaluate_population(&mut next_generation, x, y)?;
221 population = next_generation;
222 }
223
224 population.sort_by(|a, b| {
226 b.fitness
227 .partial_cmp(&a.fitness)
228 .unwrap_or(std::cmp::Ordering::Equal)
229 });
230 let best_individual = population[0].clone();
231
232 if self.config.verbose {
233 println!("Best score: {:.6}", best_individual.fitness);
234 println!("Best params: {:?}", best_individual.params);
235 }
236
237 Ok(OptimizationResult {
238 best_params: best_individual.params,
239 best_score: best_individual.fitness,
240 cv_results,
241 n_iterations: score_history.len(),
242 optimization_time: start_time.elapsed().as_secs_f64(),
243 score_history,
244 })
245 }
246
247 fn initialize_population(&mut self) -> Result<Vec<Individual>> {
249 let mut population = Vec::with_capacity(self.population_size);
250
251 let c_spec = self.search_space.c.clone();
253 let kernel_spec = self.search_space.kernel.clone();
254 let tol_spec = self.search_space.tol.clone();
255 let max_iter_spec = self.search_space.max_iter.clone();
256
257 for _ in 0..self.population_size {
258 let c = self.sample_value(&c_spec)?;
259
260 let kernel = if let Some(ref spec) = kernel_spec {
261 self.sample_kernel(spec)?
262 } else {
263 KernelType::Rbf { gamma: 1.0 }
264 };
265
266 let tol = if let Some(ref spec) = tol_spec {
267 self.sample_value(spec)?
268 } else {
269 1e-3
270 };
271
272 let max_iter = if let Some(ref spec) = max_iter_spec {
273 self.sample_value(spec)? as usize
274 } else {
275 1000
276 };
277
278 population.push(Individual::new(ParameterSet {
279 c,
280 kernel,
281 tol,
282 max_iter,
283 }));
284 }
285
286 Ok(population)
287 }
288
289 fn evaluate_population(
291 &self,
292 population: &mut [Individual],
293 x: &DMatrix<f64>,
294 y: &DVector<f64>,
295 ) -> Result<()> {
296 #[cfg(feature = "parallel")]
297 if self.config.n_jobs.is_some() {
298 let fitnesses: Vec<f64> = population
300 .par_iter()
301 .map(|ind| {
302 self.evaluate_params(&ind.params, x, y)
303 .unwrap_or(-f64::INFINITY)
304 })
305 .collect();
306
307 for (ind, fitness) in population.iter_mut().zip(fitnesses.iter()) {
308 ind.fitness = *fitness;
309 }
310 } else {
311 for ind in population.iter_mut() {
313 ind.fitness = self.evaluate_params(&ind.params, x, y)?;
314 }
315 }
316
317 #[cfg(not(feature = "parallel"))]
318 {
319 for ind in population.iter_mut() {
321 ind.fitness = self.evaluate_params(&ind.params, x, y)?;
322 }
323 }
324
325 Ok(())
326 }
327
328 fn select_individual(&mut self, population: &[Individual]) -> Result<Individual> {
330 match &self.selection_method {
331 SelectionMethod::Tournament { size } => self.tournament_selection(population, *size),
332 SelectionMethod::RouletteWheel => self.roulette_wheel_selection(population),
333 SelectionMethod::RankBased => self.rank_based_selection(population),
334 }
335 }
336
337 fn tournament_selection(
339 &mut self,
340 population: &[Individual],
341 tournament_size: usize,
342 ) -> Result<Individual> {
343 use scirs2_core::random::essentials::Uniform;
344 let dist = Uniform::new(0, population.len()).map_err(|e| {
345 SklearsError::InvalidInput(format!("Failed to create uniform distribution: {}", e))
346 })?;
347
348 let mut best_idx = self.rng.sample(dist);
349 let mut best_fitness = population[best_idx].fitness;
350
351 for _ in 1..tournament_size {
352 let idx = self.rng.sample(dist);
353 if population[idx].fitness > best_fitness {
354 best_idx = idx;
355 best_fitness = population[idx].fitness;
356 }
357 }
358
359 Ok(population[best_idx].clone())
360 }
361
362 fn roulette_wheel_selection(&mut self, population: &[Individual]) -> Result<Individual> {
364 let min_fitness = population
366 .iter()
367 .map(|ind| ind.fitness)
368 .min_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
369 .unwrap_or(0.0);
370 let offset = if min_fitness < 0.0 {
371 -min_fitness + 1.0
372 } else {
373 0.0
374 };
375
376 let total_fitness: f64 = population.iter().map(|ind| ind.fitness + offset).sum();
377
378 if total_fitness <= 0.0 {
379 use scirs2_core::random::essentials::Uniform;
381 let dist = Uniform::new(0, population.len()).map_err(|e| {
382 SklearsError::InvalidInput(format!("Failed to create uniform distribution: {}", e))
383 })?;
384 let idx = self.rng.sample(dist);
385 return Ok(population[idx].clone());
386 }
387
388 use scirs2_core::random::essentials::Uniform;
389 let dist = Uniform::new(0.0, total_fitness).map_err(|e| {
390 SklearsError::InvalidInput(format!("Failed to create uniform distribution: {}", e))
391 })?;
392 let mut spin = self.rng.sample(dist);
393
394 for ind in population {
395 spin -= ind.fitness + offset;
396 if spin <= 0.0 {
397 return Ok(ind.clone());
398 }
399 }
400
401 Ok(population[population.len() - 1].clone())
403 }
404
405 fn rank_based_selection(&mut self, population: &[Individual]) -> Result<Individual> {
407 let total_rank = population.len() * (population.len() + 1) / 2;
409
410 use scirs2_core::random::essentials::Uniform;
411 let dist = Uniform::new(0.0, total_rank as f64).map_err(|e| {
412 SklearsError::InvalidInput(format!("Failed to create uniform distribution: {}", e))
413 })?;
414 let mut spin = self.rng.sample(dist);
415
416 for (i, ind) in population.iter().enumerate() {
417 let rank = population.len() - i; spin -= rank as f64;
419 if spin <= 0.0 {
420 return Ok(ind.clone());
421 }
422 }
423
424 Ok(population[population.len() - 1].clone())
426 }
427
428 fn crossover(
430 &mut self,
431 parent1: &ParameterSet,
432 parent2: &ParameterSet,
433 ) -> Result<ParameterSet> {
434 use scirs2_core::random::essentials::Uniform;
435 let dist = Uniform::new(0.0, 1.0).map_err(|e| {
436 SklearsError::InvalidInput(format!("Failed to create uniform distribution: {}", e))
437 })?;
438
439 let c = if self.rng.sample(dist) < 0.5 {
441 parent1.c
442 } else {
443 parent2.c
444 };
445
446 let kernel = if self.rng.sample(dist) < 0.5 {
447 parent1.kernel.clone()
448 } else {
449 parent2.kernel.clone()
450 };
451
452 let tol = if self.rng.sample(dist) < 0.5 {
453 parent1.tol
454 } else {
455 parent2.tol
456 };
457
458 let max_iter = if self.rng.sample(dist) < 0.5 {
459 parent1.max_iter
460 } else {
461 parent2.max_iter
462 };
463
464 Ok(ParameterSet {
465 c,
466 kernel,
467 tol,
468 max_iter,
469 })
470 }
471
472 fn mutate(&mut self, params: &ParameterSet) -> Result<ParameterSet> {
474 use scirs2_core::random::essentials::Uniform;
475 let dist = Uniform::new(0.0, 1.0).map_err(|e| {
476 SklearsError::InvalidInput(format!("Failed to create uniform distribution: {}", e))
477 })?;
478
479 let c_spec = self.search_space.c.clone();
481 let kernel_spec = self.search_space.kernel.clone();
482 let tol_spec = self.search_space.tol.clone();
483 let max_iter_spec = self.search_space.max_iter.clone();
484
485 let c = if self.rng.sample(dist) < 0.2 {
487 self.sample_value(&c_spec)?
488 } else {
489 params.c
490 };
491
492 let kernel = if self.rng.sample(dist) < 0.2 {
493 if let Some(ref spec) = kernel_spec {
494 self.sample_kernel(spec)?
495 } else {
496 params.kernel.clone()
497 }
498 } else {
499 params.kernel.clone()
500 };
501
502 let tol = if self.rng.sample(dist) < 0.2 {
503 if let Some(ref spec) = tol_spec {
504 self.sample_value(spec)?
505 } else {
506 params.tol
507 }
508 } else {
509 params.tol
510 };
511
512 let max_iter = if self.rng.sample(dist) < 0.2 {
513 if let Some(ref spec) = max_iter_spec {
514 self.sample_value(spec)? as usize
515 } else {
516 params.max_iter
517 }
518 } else {
519 params.max_iter
520 };
521
522 Ok(ParameterSet {
523 c,
524 kernel,
525 tol,
526 max_iter,
527 })
528 }
529
530 fn sample_value(&mut self, spec: &ParameterSpec) -> Result<f64> {
532 match spec {
533 ParameterSpec::Fixed(value) => Ok(*value),
534 ParameterSpec::Uniform { min, max } => {
535 use scirs2_core::random::essentials::Uniform;
536 let dist = Uniform::new(*min, *max).map_err(|e| {
537 SklearsError::InvalidInput(format!(
538 "Failed to create uniform distribution: {}",
539 e
540 ))
541 })?;
542 Ok(self.rng.sample(dist))
543 }
544 ParameterSpec::LogUniform { min, max } => {
545 use scirs2_core::random::essentials::Uniform;
546 let log_min = min.ln();
547 let log_max = max.ln();
548 let dist = Uniform::new(log_min, log_max).map_err(|e| {
549 SklearsError::InvalidInput(format!(
550 "Failed to create uniform distribution: {}",
551 e
552 ))
553 })?;
554 let log_val = self.rng.sample(dist);
555 Ok(log_val.exp())
556 }
557 ParameterSpec::Choice(choices) => {
558 if choices.is_empty() {
559 return Err(SklearsError::InvalidInput("Empty choice list".to_string()));
560 }
561 use scirs2_core::random::essentials::Uniform;
562 let dist = Uniform::new(0, choices.len()).map_err(|e| {
563 SklearsError::InvalidInput(format!(
564 "Failed to create uniform distribution: {}",
565 e
566 ))
567 })?;
568 let idx = self.rng.sample(dist);
569 Ok(choices[idx])
570 }
571 ParameterSpec::KernelChoice(_) => Err(SklearsError::InvalidInput(
572 "Use sample_kernel for kernel specs".to_string(),
573 )),
574 }
575 }
576
577 fn sample_kernel(&mut self, spec: &ParameterSpec) -> Result<KernelType> {
579 match spec {
580 ParameterSpec::KernelChoice(kernels) => {
581 if kernels.is_empty() {
582 return Err(SklearsError::InvalidInput(
583 "Empty kernel choice list".to_string(),
584 ));
585 }
586 use scirs2_core::random::essentials::Uniform;
587 let dist = Uniform::new(0, kernels.len()).map_err(|e| {
588 SklearsError::InvalidInput(format!(
589 "Failed to create uniform distribution: {}",
590 e
591 ))
592 })?;
593 let idx = self.rng.sample(dist);
594 Ok(kernels[idx].clone())
595 }
596 _ => Err(SklearsError::InvalidInput(
597 "Invalid kernel specification".to_string(),
598 )),
599 }
600 }
601
602 fn evaluate_params(
604 &self,
605 params: &ParameterSet,
606 x: &DMatrix<f64>,
607 y: &DVector<f64>,
608 ) -> Result<f64> {
609 let scores = self.cross_validate(params, x, y)?;
610 Ok(scores.iter().sum::<f64>() / scores.len() as f64)
611 }
612
613 fn cross_validate(
615 &self,
616 params: &ParameterSet,
617 x: &DMatrix<f64>,
618 y: &DVector<f64>,
619 ) -> Result<Vec<f64>> {
620 let n_samples = x.nrows();
621 let fold_size = n_samples / self.config.cv_folds;
622 let mut scores = Vec::new();
623
624 for fold in 0..self.config.cv_folds {
625 let start_idx = fold * fold_size;
626 let end_idx = if fold == self.config.cv_folds - 1 {
627 n_samples
628 } else {
629 (fold + 1) * fold_size
630 };
631
632 let mut x_train_data = Vec::new();
634 let mut y_train_vals = Vec::new();
635 let mut x_test_data = Vec::new();
636 let mut y_test_vals = Vec::new();
637
638 for i in 0..n_samples {
639 if i >= start_idx && i < end_idx {
640 for j in 0..x.ncols() {
642 x_test_data.push(x[[i, j]]);
643 }
644 y_test_vals.push(y[i]);
645 } else {
646 for j in 0..x.ncols() {
648 x_train_data.push(x[[i, j]]);
649 }
650 y_train_vals.push(y[i]);
651 }
652 }
653
654 let n_train = y_train_vals.len();
655 let n_test = y_test_vals.len();
656 let n_features = x.ncols();
657
658 let x_train = Array2::from_shape_vec((n_train, n_features), x_train_data)?;
659 let y_train = Array1::from_vec(y_train_vals);
660 let x_test = Array2::from_shape_vec((n_test, n_features), x_test_data)?;
661 let y_test = Array1::from_vec(y_test_vals);
662
663 let svm = SVC::new()
665 .c(params.c)
666 .kernel(params.kernel.clone())
667 .tol(params.tol)
668 .max_iter(params.max_iter);
669
670 let fitted_svm = svm.fit(&x_train, &y_train)?;
671 let y_pred = fitted_svm.predict(&x_test)?;
672
673 let score = self.calculate_score(&y_test, &y_pred)?;
674 scores.push(score);
675 }
676
677 Ok(scores)
678 }
679
680 fn calculate_score(&self, y_true: &DVector<f64>, y_pred: &DVector<f64>) -> Result<f64> {
682 match self.config.scoring {
683 ScoringMetric::Accuracy => {
684 let correct = y_true
685 .iter()
686 .zip(y_pred.iter())
687 .map(|(&t, &p)| if (t - p).abs() < 0.5 { 1.0 } else { 0.0 })
688 .sum::<f64>();
689 Ok(correct / y_true.len() as f64)
690 }
691 ScoringMetric::MeanSquaredError => {
692 let mse = y_true
693 .iter()
694 .zip(y_pred.iter())
695 .map(|(&t, &p)| (t - p).powi(2))
696 .sum::<f64>()
697 / y_true.len() as f64;
698 Ok(-mse) }
700 ScoringMetric::MeanAbsoluteError => {
701 let mae = y_true
702 .iter()
703 .zip(y_pred.iter())
704 .map(|(&t, &p)| (t - p).abs())
705 .sum::<f64>()
706 / y_true.len() as f64;
707 Ok(-mae) }
709 _ => {
710 let correct = y_true
712 .iter()
713 .zip(y_pred.iter())
714 .map(|(&t, &p)| if (t - p).abs() < 0.5 { 1.0 } else { 0.0 })
715 .sum::<f64>();
716 Ok(correct / y_true.len() as f64)
717 }
718 }
719 }
720}