1use crate::{amacp::AMacPConfig, novograd::NovoGradConfig};
23use scirs2_core::random::*; use serde::{Deserialize, Serialize};
26use std::collections::HashMap;
27use std::time::{Duration, Instant};
28use trustformers_core::errors::{Result, TrustformersError};
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct HyperparameterSpace {
33 pub learning_rate: (f32, f32),
35 pub beta1: (f32, f32),
37 pub beta2: (f32, f32),
39 pub weight_decay: (f32, f32),
41 pub epsilon: (f32, f32),
43 pub batch_sizes: Vec<usize>,
45 pub log_scale_lr: bool,
47 pub custom_params: HashMap<String, (f32, f32)>,
49}
50
51impl Default for HyperparameterSpace {
52 fn default() -> Self {
53 Self {
54 learning_rate: (1e-5, 1e-1),
55 beta1: (0.8, 0.999),
56 beta2: (0.9, 0.9999),
57 weight_decay: (0.0, 1e-1),
58 epsilon: (1e-10, 1e-6),
59 batch_sizes: vec![16, 32, 64, 128, 256],
60 log_scale_lr: true,
61 custom_params: HashMap::new(),
62 }
63 }
64}
65
66impl HyperparameterSpace {
67 pub fn for_transformers() -> Self {
69 Self {
70 learning_rate: (1e-5, 5e-3),
71 beta1: (0.85, 0.95),
72 beta2: (0.95, 0.999),
73 weight_decay: (1e-3, 1e-1),
74 epsilon: (1e-8, 1e-6),
75 batch_sizes: vec![32, 64, 128, 256],
76 log_scale_lr: true,
77 custom_params: [
78 ("warmup_steps".to_string(), (1000.0, 10000.0)),
79 ("max_grad_norm".to_string(), (0.5, 2.0)),
80 ]
81 .into_iter()
82 .collect(),
83 }
84 }
85
86 pub fn for_vision() -> Self {
88 Self {
89 learning_rate: (1e-4, 1e-1),
90 beta1: (0.9, 0.99),
91 beta2: (0.999, 0.9999),
92 weight_decay: (1e-5, 1e-2),
93 epsilon: (1e-8, 1e-6),
94 batch_sizes: vec![16, 32, 64, 128],
95 log_scale_lr: true,
96 custom_params: HashMap::new(),
97 }
98 }
99
100 pub fn for_scientific_computing() -> Self {
102 Self {
103 learning_rate: (1e-6, 1e-2),
104 beta1: (0.95, 0.999),
105 beta2: (0.999, 0.9999),
106 weight_decay: (0.0, 1e-4),
107 epsilon: (1e-12, 1e-8),
108 batch_sizes: vec![32, 64, 128],
109 log_scale_lr: true,
110 custom_params: [("precision_threshold".to_string(), (1e-8, 1e-6))]
111 .into_iter()
112 .collect(),
113 }
114 }
115}
116
117#[derive(Debug, Clone, Serialize, Deserialize)]
119pub struct HyperparameterSample {
120 pub learning_rate: f32,
121 pub beta1: f32,
122 pub beta2: f32,
123 pub weight_decay: f32,
124 pub epsilon: f32,
125 pub batch_size: usize,
126 pub custom_params: HashMap<String, f32>,
127 pub performance_score: Option<f32>,
129 pub training_time: Option<f32>,
131 pub memory_usage: Option<usize>,
133}
134
135#[derive(Debug, Clone)]
137pub struct OptimizationTask {
138 pub name: String,
139 pub model_size: usize,
140 pub dataset_size: usize,
141 pub max_epochs: usize,
142 pub convergence_threshold: f32,
143 pub target_metric: String,
144 pub task_type: TaskType,
145}
146
147#[derive(Debug, Clone, Serialize, Deserialize)]
148pub enum TaskType {
149 Classification,
150 Regression,
151 LanguageModeling,
152 ComputerVision,
153 ScientificComputing,
154 Reinforcement,
155}
156
157#[derive(Debug, Clone, Serialize, Deserialize)]
159pub struct PerformanceMetrics {
160 pub final_loss: f32,
161 pub convergence_epoch: usize,
162 pub training_time: Duration,
163 pub memory_peak: usize,
164 pub stability_score: f32,
165 pub throughput: f32, pub gradient_norm_variance: f32,
167 pub composite_score: f32,
168}
169
170#[derive(Debug)]
172pub struct BayesianOptimizer {
173 space: HyperparameterSpace,
174 samples: Vec<HyperparameterSample>,
175 good_samples: Vec<HyperparameterSample>,
176 poor_samples: Vec<HyperparameterSample>,
177 performance_threshold: f32,
178 #[allow(dead_code)]
179 exploration_factor: f32,
180 n_startup_trials: usize,
181 gamma: f32, }
183
184impl BayesianOptimizer {
185 pub fn new(space: HyperparameterSpace) -> Self {
186 Self {
187 space,
188 samples: Vec::new(),
189 good_samples: Vec::new(),
190 poor_samples: Vec::new(),
191 performance_threshold: 0.0,
192 exploration_factor: 0.25,
193 n_startup_trials: 20,
194 gamma: 0.25,
195 }
196 }
197
198 pub fn suggest(&mut self) -> HyperparameterSample {
200 if self.samples.len() < self.n_startup_trials {
201 self.random_sample()
203 } else {
204 self.tpe_sample()
206 }
207 }
208
209 pub fn update(&mut self, mut sample: HyperparameterSample, performance: f32) {
211 sample.performance_score = Some(performance);
212
213 let mut performances: Vec<f32> =
215 self.samples.iter().filter_map(|s| s.performance_score).collect();
216 performances.push(performance);
217 performances.sort_by(|a, b| a.partial_cmp(b).unwrap());
218
219 if !performances.is_empty() {
220 self.performance_threshold = performances[performances.len() / 2];
221 }
222
223 if performance > self.performance_threshold {
225 self.good_samples.push(sample.clone());
226 } else {
227 self.poor_samples.push(sample.clone());
228 }
229
230 self.samples.push(sample);
231
232 if self.good_samples.len() > 1 {
234 self.good_samples.sort_by(|a, b| {
235 b.performance_score
236 .unwrap_or(0.0)
237 .partial_cmp(&a.performance_score.unwrap_or(0.0))
238 .unwrap()
239 });
240 let keep_count = ((self.samples.len() as f32 * self.gamma).ceil() as usize).max(1);
241 self.good_samples.truncate(keep_count);
242 }
243 }
244
245 fn random_sample(&self) -> HyperparameterSample {
246 let mut rng = thread_rng();
248
249 let learning_rate = if self.space.log_scale_lr {
250 let log_min = self.space.learning_rate.0.ln();
251 let log_max = self.space.learning_rate.1.ln();
252 (rng.random::<f32>() * (log_max - log_min) + log_min).exp()
253 } else {
254 rng.gen_range(self.space.learning_rate.0..=self.space.learning_rate.1)
255 };
256
257 HyperparameterSample {
258 learning_rate,
259 beta1: rng.gen_range(self.space.beta1.0..=self.space.beta1.1),
260 beta2: rng.gen_range(self.space.beta2.0..=self.space.beta2.1),
261 weight_decay: rng.gen_range(self.space.weight_decay.0..=self.space.weight_decay.1),
262 epsilon: rng.gen_range(self.space.epsilon.0..=self.space.epsilon.1),
263 batch_size: {
264 let idx = rng.gen_range(0..self.space.batch_sizes.len());
265 self.space.batch_sizes[idx]
266 },
267 custom_params: self
268 .space
269 .custom_params
270 .iter()
271 .map(|(k, &(min, max))| (k.clone(), rng.gen_range(min..=max)))
272 .collect(),
273 performance_score: None,
274 training_time: None,
275 memory_usage: None,
276 }
277 }
278
279 fn tpe_sample(&self) -> HyperparameterSample {
280 let mut rng = thread_rng();
284
285 if self.good_samples.is_empty() {
286 return self.random_sample();
287 }
288
289 let idx = rng.gen_range(0..self.good_samples.len());
291 let good_sample = &self.good_samples[idx];
292 let noise_factor = 0.1;
293
294 let learning_rate = if self.space.log_scale_lr {
295 let log_lr = good_sample.learning_rate.ln();
296 let noise = rng.gen_range(-noise_factor..=noise_factor);
297 (log_lr + noise)
298 .exp()
299 .clamp(self.space.learning_rate.0, self.space.learning_rate.1)
300 } else {
301 let noise = rng.gen_range(-noise_factor..=noise_factor)
302 * (self.space.learning_rate.1 - self.space.learning_rate.0);
303 (good_sample.learning_rate + noise)
304 .clamp(self.space.learning_rate.0, self.space.learning_rate.1)
305 };
306
307 HyperparameterSample {
308 learning_rate,
309 beta1: (good_sample.beta1 + rng.gen_range(-0.01..=0.01))
310 .clamp(self.space.beta1.0, self.space.beta1.1),
311 beta2: (good_sample.beta2 + rng.gen_range(-0.001..=0.001))
312 .clamp(self.space.beta2.0, self.space.beta2.1),
313 weight_decay: (good_sample.weight_decay
314 + rng.gen_range(-noise_factor..=noise_factor)
315 * (self.space.weight_decay.1 - self.space.weight_decay.0))
316 .clamp(self.space.weight_decay.0, self.space.weight_decay.1),
317 epsilon: good_sample.epsilon,
318 batch_size: good_sample.batch_size,
319 custom_params: good_sample.custom_params.clone(),
320 performance_score: None,
321 training_time: None,
322 memory_usage: None,
323 }
324 }
325
326 pub fn get_best(&self) -> Option<&HyperparameterSample> {
328 self.samples.iter().filter(|s| s.performance_score.is_some()).max_by(|a, b| {
329 a.performance_score.unwrap().partial_cmp(&b.performance_score.unwrap()).unwrap()
330 })
331 }
332}
333
334#[derive(Debug)]
336pub struct MultiObjectiveOptimizer {
337 bayesian_opt: BayesianOptimizer,
338 #[allow(dead_code)]
339 objectives: Vec<String>,
340 weights: Vec<f32>,
341 pareto_front: Vec<HyperparameterSample>,
342}
343
344impl MultiObjectiveOptimizer {
345 pub fn new(space: HyperparameterSpace, objectives: Vec<String>, weights: Vec<f32>) -> Self {
346 assert_eq!(
347 objectives.len(),
348 weights.len(),
349 "Objectives and weights must have same length"
350 );
351
352 Self {
353 bayesian_opt: BayesianOptimizer::new(space),
354 objectives,
355 weights,
356 pareto_front: Vec::new(),
357 }
358 }
359
360 pub fn update_multi_objective(
362 &mut self,
363 sample: HyperparameterSample,
364 metrics: &PerformanceMetrics,
365 ) {
366 let mut weighted_score = 0.0;
368 weighted_score += self.weights[0] * (1.0 / (1.0 + metrics.final_loss)); weighted_score += self.weights[1] * (1.0 / (1.0 + metrics.convergence_epoch as f32)); if self.weights.len() > 2 {
371 weighted_score += self.weights[2] * metrics.stability_score; }
373 if self.weights.len() > 3 {
374 weighted_score += self.weights[3] * (1.0 / (1.0 + metrics.training_time.as_secs_f32()));
375 }
377
378 self.bayesian_opt.update(sample, weighted_score);
379 self.update_pareto_front();
380 }
381
382 fn update_pareto_front(&mut self) {
383 self.pareto_front.clear();
385
386 for sample in &self.bayesian_opt.samples {
387 if let Some(sample_score) = sample.performance_score {
388 let mut is_dominated = false;
389
390 for other in &self.bayesian_opt.samples {
391 if let Some(other_score) = other.performance_score {
392 if other_score > sample_score {
393 is_dominated = true;
394 break;
395 }
396 }
397 }
398
399 if !is_dominated {
400 self.pareto_front.push(sample.clone());
401 }
402 }
403 }
404 }
405}
406
407#[derive(Debug)]
409pub struct HyperparameterTuner {
410 optimizer_type: OptimizerType,
411 search_space: HyperparameterSpace,
412 bayesian_opt: BayesianOptimizer,
413 multi_objective_opt: Option<MultiObjectiveOptimizer>,
414 task: OptimizationTask,
415 max_trials: usize,
416 current_trial: usize,
417 best_config: Option<HyperparameterSample>,
418 optimization_history: Vec<(HyperparameterSample, PerformanceMetrics)>,
419}
420
421#[derive(Debug, Clone)]
422pub enum OptimizerType {
423 Adam,
424 AdamW,
425 AMacP,
426 NovoGrad,
427 AveragedAdam,
428 Lion,
429 LAMB,
430}
431
432impl HyperparameterTuner {
433 pub fn new(
435 optimizer_type: OptimizerType,
436 search_space: HyperparameterSpace,
437 task: OptimizationTask,
438 max_trials: usize,
439 ) -> Self {
440 let bayesian_opt = BayesianOptimizer::new(search_space.clone());
441
442 Self {
443 optimizer_type,
444 search_space,
445 bayesian_opt,
446 multi_objective_opt: None,
447 task,
448 max_trials,
449 current_trial: 0,
450 best_config: None,
451 optimization_history: Vec::new(),
452 }
453 }
454
455 pub fn enable_multi_objective(&mut self, objectives: Vec<String>, weights: Vec<f32>) {
457 self.multi_objective_opt = Some(MultiObjectiveOptimizer::new(
458 self.search_space.clone(),
459 objectives,
460 weights,
461 ));
462 }
463
464 pub fn suggest_next(&mut self) -> Option<HyperparameterSample> {
466 if self.current_trial >= self.max_trials {
467 return None;
468 }
469
470 self.current_trial += 1;
471 Some(self.bayesian_opt.suggest())
472 }
473
474 pub fn evaluate_config(&mut self, config: HyperparameterSample) -> Result<PerformanceMetrics> {
476 let _start_time = Instant::now();
477
478 let metrics = self.simulate_training(&config)?;
480
481 if let Some(ref mut multi_opt) = self.multi_objective_opt {
483 multi_opt.update_multi_objective(config.clone(), &metrics);
484 } else {
485 self.bayesian_opt.update(config.clone(), metrics.composite_score);
486 }
487
488 if self.best_config.is_none()
490 || metrics.composite_score
491 > self.best_config.as_ref().unwrap().performance_score.unwrap_or(0.0)
492 {
493 let mut best_config = config.clone();
494 best_config.performance_score = Some(metrics.composite_score);
495 self.best_config = Some(best_config);
496 }
497
498 self.optimization_history.push((config, metrics.clone()));
499 Ok(metrics)
500 }
501
502 fn simulate_training(&self, config: &HyperparameterSample) -> Result<PerformanceMetrics> {
503 let mut rng = thread_rng();
505
506 let lr_factor = if config.learning_rate > 1e-2 {
508 0.7_f64 } else if config.learning_rate < 1e-5 {
510 0.8_f64 } else {
512 1.0_f64 };
514
515 let momentum_factor = if config.beta1 > 0.95 { 0.9_f64 } else { 1.0_f64 };
517 let variance_factor = if config.beta2 < 0.99 { 0.85_f64 } else { 1.0_f64 };
518
519 let regularization_factor = if config.weight_decay > 1e-2 { 0.8_f64 } else { 1.0_f64 };
521
522 let base_performance = 0.8_f64;
523 let noise = rng.gen_range(-0.1_f64..=0.1_f64);
524 let final_loss = (1.0_f64
525 - base_performance
526 * lr_factor
527 * momentum_factor
528 * variance_factor
529 * regularization_factor
530 + noise)
531 .max(0.01_f64);
532
533 let convergence_epoch = (50.0 / lr_factor) as usize;
534 let training_time = Duration::from_secs((convergence_epoch as f32 * 0.1) as u64);
535 let memory_peak = (config.batch_size * 1024 * 1024) + rng.gen_range(0..1024 * 1024);
536
537 let stability_score = momentum_factor * variance_factor;
538 let throughput =
539 (config.batch_size as f32) / (training_time.as_secs_f32() / convergence_epoch as f32);
540 let gradient_norm_variance = rng.gen_range(0.01..=0.5);
541
542 let composite_score = (1.0_f64 / final_loss) * 0.4_f64
544 + (1.0_f64 / convergence_epoch as f64) * 0.3_f64
545 + stability_score * 0.2_f64
546 + (throughput as f64 / 1000.0_f64).min(1.0_f64) * 0.1_f64;
547
548 Ok(PerformanceMetrics {
549 final_loss: final_loss as f32,
550 convergence_epoch,
551 training_time,
552 memory_peak,
553 stability_score: stability_score as f32,
554 throughput,
555 gradient_norm_variance,
556 composite_score: composite_score as f32,
557 })
558 }
559
560 pub fn optimize(&mut self) -> Result<HyperparameterSample> {
562 println!(
563 "š Starting hyperparameter optimization for {:?}",
564 self.optimizer_type
565 );
566 println!(
567 "š Task: {} (max {} trials)",
568 self.task.name, self.max_trials
569 );
570
571 let mut trial_results = Vec::new();
572
573 while let Some(config) = self.suggest_next() {
574 println!("\nš Trial {}/{}", self.current_trial, self.max_trials);
575 println!(
576 " LR: {:.2e}, βā: {:.3}, βā: {:.4}, WD: {:.2e}",
577 config.learning_rate, config.beta1, config.beta2, config.weight_decay
578 );
579
580 let metrics = self.evaluate_config(config.clone())?;
581 trial_results.push((config, metrics.clone()));
582
583 println!(
584 " š Score: {:.4}, Loss: {:.4}, Epochs: {}, Time: {:.1}s",
585 metrics.composite_score,
586 metrics.final_loss,
587 metrics.convergence_epoch,
588 metrics.training_time.as_secs_f32()
589 );
590
591 if metrics.composite_score > 0.95 {
593 println!("šÆ Early stopping - excellent configuration found!");
594 break;
595 }
596 }
597
598 self.print_optimization_summary();
599
600 self.best_config.clone().ok_or_else(|| {
601 TrustformersError::new(trustformers_core::errors::ErrorKind::InvalidConfiguration {
602 field: "hyperparameter_optimization".to_string(),
603 reason: "No valid configuration found".to_string(),
604 })
605 })
606 }
607
608 fn print_optimization_summary(&self) {
609 println!("\nš Hyperparameter Optimization Summary");
610 println!("=====================================");
611
612 if let Some(ref best) = self.best_config {
613 println!("š Best Configuration Found:");
614 println!(" Learning Rate: {:.2e}", best.learning_rate);
615 println!(" Beta1: {:.4}", best.beta1);
616 println!(" Beta2: {:.4}", best.beta2);
617 println!(" Weight Decay: {:.2e}", best.weight_decay);
618 println!(" Batch Size: {}", best.batch_size);
619 println!(
620 " Performance Score: {:.4}",
621 best.performance_score.unwrap_or(0.0)
622 );
623 }
624
625 println!("\nš Optimization Statistics:");
626 println!(" Total Trials: {}", self.optimization_history.len());
627
628 if !self.optimization_history.is_empty() {
629 let scores: Vec<f32> =
630 self.optimization_history.iter().map(|(_, m)| m.composite_score).collect();
631 let avg_score = scores.iter().sum::<f32>() / scores.len() as f32;
632 let max_score = scores.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
633 let min_score = scores.iter().fold(f32::INFINITY, |a, &b| a.min(b));
634
635 println!(" Average Score: {:.4}", avg_score);
636 println!(" Score Range: {:.4} - {:.4}", min_score, max_score);
637 println!(
638 " Improvement: {:.1}%",
639 ((max_score - min_score) / min_score * 100.0).max(0.0)
640 );
641 }
642 }
643
644 pub fn get_history(&self) -> &[(HyperparameterSample, PerformanceMetrics)] {
646 &self.optimization_history
647 }
648
649 pub fn get_pareto_front(&self) -> Option<&[HyperparameterSample]> {
651 self.multi_objective_opt.as_ref().map(|opt| opt.pareto_front.as_slice())
652 }
653}
654
655impl HyperparameterTuner {
657 pub fn optimize_amacp_for_transformers(max_trials: usize) -> Result<AMacPConfig> {
659 let space = HyperparameterSpace::for_transformers();
660 let task = OptimizationTask {
661 name: "Transformer Language Modeling".to_string(),
662 model_size: 125_000_000, dataset_size: 1_000_000,
664 max_epochs: 100,
665 convergence_threshold: 0.01,
666 target_metric: "perplexity".to_string(),
667 task_type: TaskType::LanguageModeling,
668 };
669
670 let mut tuner = HyperparameterTuner::new(OptimizerType::AMacP, space, task, max_trials);
671
672 let best_config = tuner.optimize()?;
673
674 Ok(AMacPConfig {
675 learning_rate: best_config.learning_rate,
676 beta1: best_config.beta1,
677 beta2: best_config.beta2,
678 weight_decay: best_config.weight_decay,
679 epsilon: best_config.epsilon,
680 ..AMacPConfig::for_transformers()
681 })
682 }
683
684 pub fn optimize_novograd_for_llms(max_trials: usize) -> Result<NovoGradConfig> {
686 let space = HyperparameterSpace::for_transformers();
687 let task = OptimizationTask {
688 name: "Large Language Model Training".to_string(),
689 model_size: 1_000_000_000, dataset_size: 10_000_000,
691 max_epochs: 50,
692 convergence_threshold: 0.005,
693 target_metric: "loss".to_string(),
694 task_type: TaskType::LanguageModeling,
695 };
696
697 let mut tuner = HyperparameterTuner::new(OptimizerType::NovoGrad, space, task, max_trials);
698
699 let best_config = tuner.optimize()?;
700
701 Ok(NovoGradConfig {
702 learning_rate: best_config.learning_rate,
703 beta1: best_config.beta1,
704 beta2: best_config.beta2,
705 weight_decay: best_config.weight_decay,
706 epsilon: best_config.epsilon,
707 ..NovoGradConfig::for_large_language_models()
708 })
709 }
710}
711
712#[cfg(test)]
713mod tests {
714 use super::*;
715
716 #[test]
717 fn test_hyperparameter_space_creation() {
718 let space = HyperparameterSpace::default();
719 assert_eq!(space.learning_rate, (1e-5, 1e-1));
720 assert!(space.log_scale_lr);
721
722 let transformer_space = HyperparameterSpace::for_transformers();
723 assert!(transformer_space.custom_params.contains_key("warmup_steps"));
724 }
725
726 #[test]
727 fn test_bayesian_optimizer_suggestion() {
728 let space = HyperparameterSpace::default();
729 let mut optimizer = BayesianOptimizer::new(space);
730
731 let sample = optimizer.suggest();
732 assert!(sample.learning_rate >= 1e-5 && sample.learning_rate <= 1e-1);
733 assert!(sample.beta1 >= 0.8 && sample.beta1 <= 0.999);
734 }
735
736 #[test]
737 fn test_bayesian_optimizer_update() {
738 let space = HyperparameterSpace::default();
739 let mut optimizer = BayesianOptimizer::new(space);
740
741 let sample = optimizer.suggest();
742 optimizer.update(sample, 0.85);
743
744 assert_eq!(optimizer.samples.len(), 1);
745 assert!(optimizer.get_best().is_some());
746 }
747
748 #[test]
749 fn test_hyperparameter_tuner_creation() {
750 let space = HyperparameterSpace::for_vision();
751 let task = OptimizationTask {
752 name: "Test Task".to_string(),
753 model_size: 1000,
754 dataset_size: 10000,
755 max_epochs: 10,
756 convergence_threshold: 0.01,
757 target_metric: "accuracy".to_string(),
758 task_type: TaskType::Classification,
759 };
760
761 let tuner = HyperparameterTuner::new(OptimizerType::Adam, space, task, 50);
762
763 assert_eq!(tuner.max_trials, 50);
764 assert_eq!(tuner.current_trial, 0);
765 }
766
767 #[test]
768 fn test_multi_objective_optimizer() {
769 let space = HyperparameterSpace::default();
770 let objectives = vec!["accuracy".to_string(), "speed".to_string()];
771 let weights = vec![0.7, 0.3];
772
773 let mut optimizer = MultiObjectiveOptimizer::new(space, objectives, weights);
774
775 let sample = HyperparameterSample {
776 learning_rate: 1e-3,
777 beta1: 0.9,
778 beta2: 0.999,
779 weight_decay: 1e-4,
780 epsilon: 1e-8,
781 batch_size: 64,
782 custom_params: HashMap::new(),
783 performance_score: None,
784 training_time: None,
785 memory_usage: None,
786 };
787
788 let metrics = PerformanceMetrics {
789 final_loss: 0.1,
790 convergence_epoch: 25,
791 training_time: Duration::from_secs(120),
792 memory_peak: 1024 * 1024,
793 stability_score: 0.9,
794 throughput: 1000.0,
795 gradient_norm_variance: 0.1,
796 composite_score: 0.85,
797 };
798
799 optimizer.update_multi_objective(sample, &metrics);
800 assert!(!optimizer.pareto_front.is_empty());
801 }
802
803 #[test]
804 fn test_performance_metrics_calculation() {
805 let space = HyperparameterSpace::default();
806 let task = OptimizationTask {
807 name: "Test".to_string(),
808 model_size: 1000,
809 dataset_size: 1000,
810 max_epochs: 10,
811 convergence_threshold: 0.01,
812 target_metric: "loss".to_string(),
813 task_type: TaskType::Regression,
814 };
815
816 let tuner = HyperparameterTuner::new(OptimizerType::Adam, space, task, 10);
817
818 let config = HyperparameterSample {
819 learning_rate: 1e-3,
820 beta1: 0.9,
821 beta2: 0.999,
822 weight_decay: 0.0,
823 epsilon: 1e-8,
824 batch_size: 32,
825 custom_params: HashMap::new(),
826 performance_score: None,
827 training_time: None,
828 memory_usage: None,
829 };
830
831 let metrics = tuner.simulate_training(&config);
832 assert!(metrics.is_ok());
833
834 let metrics = metrics.unwrap();
835 assert!(metrics.final_loss >= 0.0);
836 assert!(metrics.convergence_epoch > 0);
837 assert!(metrics.composite_score > 0.0);
838 }
839
840 #[test]
841 fn test_convenience_optimization_functions() {
842 let result = HyperparameterTuner::optimize_amacp_for_transformers(5);
845 assert!(result.is_ok());
846
847 let result = HyperparameterTuner::optimize_novograd_for_llms(5);
848 assert!(result.is_ok());
849 }
850}