1use scirs2_core::ndarray::{ArrayView1, ArrayView2};
7use scirs2_core::random::{thread_rng, Rng};
8
9use super::automl_core::{AutoMLMethod, DataCharacteristics};
10use super::hyperparameter_optimizer::{MethodConfig, OptimizedMethod};
11use sklears_core::error::Result as SklResult;
12use std::time::{Duration, Instant};
13
14type Result<T> = SklResult<T>;
15
16#[derive(Debug, Clone)]
18pub struct AdvancedHyperparameterOptimizer {
19 optimization_strategy: OptimizationStrategy,
20 max_iterations: usize,
21 time_budget: Duration,
22 parallel_workers: usize,
23 early_stopping: Option<EarlyStoppingConfig>,
24}
25
26#[derive(Debug, Clone, PartialEq)]
27pub enum OptimizationStrategy {
28 GridSearch,
30 RandomSearch,
32 BayesianOptimization,
34 GeneticAlgorithm,
36 ParticleSwarmOptimization,
38 SimulatedAnnealing,
40 HyperBand,
42}
43
44#[derive(Debug, Clone)]
45pub struct EarlyStoppingConfig {
46 pub patience: usize,
47 pub min_improvement: f64,
48 pub restore_best: bool,
49}
50
51impl AdvancedHyperparameterOptimizer {
52 pub fn new() -> Self {
53 Self {
54 optimization_strategy: OptimizationStrategy::BayesianOptimization,
55 max_iterations: 100,
56 time_budget: Duration::from_secs(300), parallel_workers: 1,
58 early_stopping: Some(EarlyStoppingConfig {
59 patience: 10,
60 min_improvement: 0.001,
61 restore_best: true,
62 }),
63 }
64 }
65
66 pub fn with_strategy(mut self, strategy: OptimizationStrategy) -> Self {
67 self.optimization_strategy = strategy;
68 self
69 }
70
71 pub fn with_max_iterations(mut self, max_iterations: usize) -> Self {
72 self.max_iterations = max_iterations;
73 self
74 }
75
76 pub fn with_time_budget(mut self, time_budget: Duration) -> Self {
77 self.time_budget = time_budget;
78 self
79 }
80
81 pub fn with_parallel_workers(mut self, workers: usize) -> Self {
82 self.parallel_workers = workers;
83 self
84 }
85
86 pub fn with_early_stopping(mut self, config: EarlyStoppingConfig) -> Self {
87 self.early_stopping = Some(config);
88 self
89 }
90
91 pub fn optimize_advanced(
93 &self,
94 method: &AutoMLMethod,
95 X: ArrayView2<f64>,
96 y: ArrayView1<f64>,
97 characteristics: &DataCharacteristics,
98 ) -> Result<OptimizedMethod> {
99 let start_time = Instant::now();
100
101 let best_config = match self.optimization_strategy {
102 OptimizationStrategy::BayesianOptimization => {
103 self.bayesian_optimization(method, X, y, characteristics, start_time)?
104 }
105 OptimizationStrategy::GeneticAlgorithm => {
106 self.genetic_algorithm_optimization(method, X, y, characteristics, start_time)?
107 }
108 OptimizationStrategy::RandomSearch => {
109 self.random_search_optimization(method, X, y, characteristics, start_time)?
110 }
111 OptimizationStrategy::GridSearch => {
112 self.grid_search_optimization(method, X, y, characteristics, start_time)?
113 }
114 OptimizationStrategy::ParticleSwarmOptimization => {
115 self.pso_optimization(method, X, y, characteristics, start_time)?
116 }
117 OptimizationStrategy::SimulatedAnnealing => {
118 self.simulated_annealing_optimization(method, X, y, characteristics, start_time)?
119 }
120 OptimizationStrategy::HyperBand => {
121 self.hyperband_optimization(method, X, y, characteristics, start_time)?
122 }
123 };
124
125 let estimated_cost = self.estimate_computational_cost(method, characteristics);
126
127 Ok(OptimizedMethod {
128 method_type: method.clone(),
129 config: best_config,
130 estimated_cost,
131 })
132 }
133
134 fn bayesian_optimization(
135 &self,
136 method: &AutoMLMethod,
137 X: ArrayView2<f64>,
138 y: ArrayView1<f64>,
139 characteristics: &DataCharacteristics,
140 start_time: Instant,
141 ) -> Result<MethodConfig> {
142 let mut best_config = self.generate_initial_config(method, characteristics)?;
144 let mut best_score = self.evaluate_config(method, &best_config, X, y)?;
145
146 for iteration in 0..self.max_iterations {
147 if start_time.elapsed() > self.time_budget {
148 break;
149 }
150
151 let candidate_config =
153 self.generate_candidate_config(method, characteristics, iteration)?;
154 let score = self.evaluate_config(method, &candidate_config, X, y)?;
155
156 if score > best_score {
157 best_score = score;
158 best_config = candidate_config;
159 }
160
161 if let Some(ref early_stopping) = self.early_stopping {
163 if score - best_score < early_stopping.min_improvement
164 && iteration > early_stopping.patience
165 {
166 break;
167 }
168 }
169 }
170
171 Ok(best_config)
172 }
173
174 fn genetic_algorithm_optimization(
175 &self,
176 method: &AutoMLMethod,
177 X: ArrayView2<f64>,
178 y: ArrayView1<f64>,
179 characteristics: &DataCharacteristics,
180 start_time: Instant,
181 ) -> Result<MethodConfig> {
182 const POPULATION_SIZE: usize = 20;
184 const MUTATION_RATE: f64 = 0.1;
185
186 let mut population = Vec::new();
188 for _ in 0..POPULATION_SIZE {
189 population.push(self.generate_initial_config(method, characteristics)?);
190 }
191
192 let mut best_config = population[0].clone();
193 let mut best_score = self.evaluate_config(method, &best_config, X, y)?;
194
195 let mut rng = thread_rng();
196 for _generation in 0..(self.max_iterations / POPULATION_SIZE) {
197 if start_time.elapsed() > self.time_budget {
198 break;
199 }
200
201 let mut scores = Vec::new();
203 for config in &population {
204 let score = self.evaluate_config(method, config, X, y)?;
205 scores.push(score);
206 if score > best_score {
207 best_score = score;
208 best_config = config.clone();
209 }
210 }
211
212 let mut new_population = Vec::new();
214 for _ in 0..POPULATION_SIZE {
215 let parent1_idx = self.select_parent(&scores, &mut rng);
216 let parent2_idx = self.select_parent(&scores, &mut rng);
217 let mut child =
218 self.crossover(&population[parent1_idx], &population[parent2_idx], &mut rng)?;
219
220 if rng.gen::<f64>() < MUTATION_RATE {
221 child = self.mutate(&child, method, characteristics, &mut rng)?;
222 }
223 new_population.push(child);
224 }
225
226 population = new_population;
227 }
228
229 Ok(best_config)
230 }
231
232 fn random_search_optimization(
233 &self,
234 method: &AutoMLMethod,
235 X: ArrayView2<f64>,
236 y: ArrayView1<f64>,
237 characteristics: &DataCharacteristics,
238 start_time: Instant,
239 ) -> Result<MethodConfig> {
240 let mut best_config = self.generate_initial_config(method, characteristics)?;
241 let mut best_score = self.evaluate_config(method, &best_config, X, y)?;
242
243 let mut rng = thread_rng();
244 for _ in 0..self.max_iterations {
245 if start_time.elapsed() > self.time_budget {
246 break;
247 }
248
249 let candidate_config =
250 self.generate_random_config(method, characteristics, &mut rng)?;
251 let score = self.evaluate_config(method, &candidate_config, X, y)?;
252
253 if score > best_score {
254 best_score = score;
255 best_config = candidate_config;
256 }
257 }
258
259 Ok(best_config)
260 }
261
262 fn grid_search_optimization(
263 &self,
264 method: &AutoMLMethod,
265 X: ArrayView2<f64>,
266 y: ArrayView1<f64>,
267 characteristics: &DataCharacteristics,
268 start_time: Instant,
269 ) -> Result<MethodConfig> {
270 let param_grid = self.generate_parameter_grid(method, characteristics)?;
272 let mut best_config = self.generate_initial_config(method, characteristics)?;
273 let mut best_score = self.evaluate_config(method, &best_config, X, y)?;
274
275 for config in param_grid {
276 if start_time.elapsed() > self.time_budget {
277 break;
278 }
279
280 let score = self.evaluate_config(method, &config, X, y)?;
281 if score > best_score {
282 best_score = score;
283 best_config = config;
284 }
285 }
286
287 Ok(best_config)
288 }
289
290 fn pso_optimization(
291 &self,
292 method: &AutoMLMethod,
293 X: ArrayView2<f64>,
294 y: ArrayView1<f64>,
295 characteristics: &DataCharacteristics,
296 start_time: Instant,
297 ) -> Result<MethodConfig> {
298 self.random_search_optimization(method, X, y, characteristics, start_time)
300 }
301
302 fn simulated_annealing_optimization(
303 &self,
304 method: &AutoMLMethod,
305 X: ArrayView2<f64>,
306 y: ArrayView1<f64>,
307 characteristics: &DataCharacteristics,
308 start_time: Instant,
309 ) -> Result<MethodConfig> {
310 self.random_search_optimization(method, X, y, characteristics, start_time)
312 }
313
314 fn hyperband_optimization(
315 &self,
316 method: &AutoMLMethod,
317 X: ArrayView2<f64>,
318 y: ArrayView1<f64>,
319 characteristics: &DataCharacteristics,
320 start_time: Instant,
321 ) -> Result<MethodConfig> {
322 self.random_search_optimization(method, X, y, characteristics, start_time)
324 }
325
326 fn generate_initial_config(
328 &self,
329 method: &AutoMLMethod,
330 characteristics: &DataCharacteristics,
331 ) -> Result<MethodConfig> {
332 match method {
333 AutoMLMethod::UnivariateFiltering => Ok(MethodConfig::Univariate {
334 k: characteristics.n_features / 4,
335 }),
336 AutoMLMethod::CorrelationBased => Ok(MethodConfig::Correlation { threshold: 0.7 }),
337 AutoMLMethod::TreeBased => Ok(MethodConfig::Tree {
338 n_estimators: 50,
339 max_depth: 6,
340 }),
341 AutoMLMethod::LassoBased => Ok(MethodConfig::Lasso { alpha: 0.01 }),
342 _ => Ok(MethodConfig::Univariate {
343 k: characteristics.n_features / 4,
344 }),
345 }
346 }
347
348 fn generate_candidate_config(
349 &self,
350 method: &AutoMLMethod,
351 _characteristics: &DataCharacteristics,
352 iteration: usize,
353 ) -> Result<MethodConfig> {
354 let mut rng = thread_rng();
355 match method {
356 AutoMLMethod::UnivariateFiltering => {
357 let k = (iteration % 100 + 1) * 10; Ok(MethodConfig::Univariate { k })
359 }
360 AutoMLMethod::CorrelationBased => {
361 let threshold = 0.5 + (iteration as f64 * 0.01);
362 Ok(MethodConfig::Correlation { threshold })
363 }
364 _ => self.generate_random_config(method, _characteristics, &mut rng),
365 }
366 }
367
368 fn generate_random_config<R: Rng>(
369 &self,
370 method: &AutoMLMethod,
371 characteristics: &DataCharacteristics,
372 rng: &mut R,
373 ) -> Result<MethodConfig> {
374 match method {
375 AutoMLMethod::UnivariateFiltering => {
376 let k = rng.gen_range(1..characteristics.n_features.min(101));
377 Ok(MethodConfig::Univariate { k })
378 }
379 AutoMLMethod::CorrelationBased => {
380 let threshold = rng.gen_range(0.1..1.9);
381 Ok(MethodConfig::Correlation { threshold })
382 }
383 AutoMLMethod::TreeBased => {
384 let n_estimators = rng.gen_range(10..201);
385 let max_depth = rng.gen_range(3..16);
386 Ok(MethodConfig::Tree {
387 n_estimators,
388 max_depth,
389 })
390 }
391 AutoMLMethod::LassoBased => {
392 let alpha = rng.gen_range(0.001..2.0);
393 Ok(MethodConfig::Lasso { alpha })
394 }
395 _ => Ok(MethodConfig::Univariate {
396 k: characteristics.n_features / 4,
397 }),
398 }
399 }
400
401 fn generate_parameter_grid(
402 &self,
403 method: &AutoMLMethod,
404 characteristics: &DataCharacteristics,
405 ) -> Result<Vec<MethodConfig>> {
406 let mut grid = Vec::new();
407 match method {
408 AutoMLMethod::UnivariateFiltering => {
409 for k in [10, 20, 50, 100].iter() {
410 if *k <= characteristics.n_features {
411 grid.push(MethodConfig::Univariate { k: *k });
412 }
413 }
414 }
415 AutoMLMethod::CorrelationBased => {
416 for threshold in [0.5, 0.6, 0.7, 0.8, 0.9].iter() {
417 grid.push(MethodConfig::Correlation {
418 threshold: *threshold,
419 });
420 }
421 }
422 _ => {
423 grid.push(self.generate_initial_config(method, characteristics)?);
424 }
425 }
426 Ok(grid)
427 }
428
429 fn evaluate_config(
430 &self,
431 _method: &AutoMLMethod,
432 _config: &MethodConfig,
433 _X: ArrayView2<f64>,
434 _y: ArrayView1<f64>,
435 ) -> Result<f64> {
436 let mut rng = thread_rng();
438 Ok(rng.gen_range(0.0..2.0))
439 }
440
441 fn select_parent<R: Rng>(&self, scores: &[f64], rng: &mut R) -> usize {
442 let idx1 = rng.gen_range(0..scores.len());
444 let idx2 = rng.gen_range(0..scores.len());
445 if scores[idx1] > scores[idx2] {
446 idx1
447 } else {
448 idx2
449 }
450 }
451
452 fn crossover<R: Rng>(
453 &self,
454 parent1: &MethodConfig,
455 parent2: &MethodConfig,
456 rng: &mut R,
457 ) -> Result<MethodConfig> {
458 if rng.gen::<bool>() {
460 Ok(parent1.clone())
461 } else {
462 Ok(parent2.clone())
463 }
464 }
465
466 fn mutate<R: Rng>(
467 &self,
468 _config: &MethodConfig,
469 method: &AutoMLMethod,
470 characteristics: &DataCharacteristics,
471 rng: &mut R,
472 ) -> Result<MethodConfig> {
473 self.generate_random_config(method, characteristics, rng)
475 }
476
477 fn estimate_computational_cost(
478 &self,
479 method: &AutoMLMethod,
480 characteristics: &DataCharacteristics,
481 ) -> f64 {
482 let base_cost =
483 characteristics.n_samples as f64 * characteristics.n_features as f64 / 1_000_000.0;
484 let strategy_multiplier = match self.optimization_strategy {
485 OptimizationStrategy::GridSearch => 10.0,
486 OptimizationStrategy::RandomSearch => 5.0,
487 OptimizationStrategy::BayesianOptimization => 15.0,
488 OptimizationStrategy::GeneticAlgorithm => 20.0,
489 OptimizationStrategy::ParticleSwarmOptimization => 18.0,
490 OptimizationStrategy::SimulatedAnnealing => 12.0,
491 OptimizationStrategy::HyperBand => 8.0,
492 };
493
494 let method_multiplier = match method {
495 AutoMLMethod::UnivariateFiltering => 0.1,
496 AutoMLMethod::CorrelationBased => 0.5,
497 AutoMLMethod::TreeBased => 2.0,
498 AutoMLMethod::LassoBased => 1.5,
499 AutoMLMethod::WrapperBased => 10.0,
500 AutoMLMethod::EnsembleBased => 5.0,
501 AutoMLMethod::Hybrid => 3.0,
502 AutoMLMethod::NeuralArchitectureSearch => 15.0,
503 AutoMLMethod::TransferLearning => 8.0,
504 AutoMLMethod::MetaLearningEnsemble => 12.0,
505 };
506
507 base_cost * strategy_multiplier * method_multiplier
508 }
509}
510
511impl Default for AdvancedHyperparameterOptimizer {
512 fn default() -> Self {
513 Self::new()
514 }
515}