1use crate::error::OptimizeError;
12use crate::error::OptimizeResult;
13use crate::result::OptimizeResults;
14use ndarray::{Array1, Array2, ArrayView1};
15use rand::{rng, Rng};
16use statrs::statistics::Statistics;
17use std::collections::HashMap;
18
19type Result<T> = std::result::Result<T, OptimizeError>;
20
21pub mod adaptive_nas_system;
22pub mod adaptive_transformer_enhancement;
23pub mod few_shot_learning_enhancement;
24pub mod learned_hyperparameter_tuner;
25pub mod meta_learning_optimizer;
26pub mod neural_adaptive_optimizer;
27
28#[allow(ambiguous_glob_reexports)]
30pub use adaptive_nas_system::*;
31#[allow(ambiguous_glob_reexports)]
32pub use adaptive_transformer_enhancement::*;
33#[allow(ambiguous_glob_reexports)]
34pub use few_shot_learning_enhancement::*;
35#[allow(ambiguous_glob_reexports)]
36pub use learned_hyperparameter_tuner::*;
37#[allow(ambiguous_glob_reexports)]
38pub use meta_learning_optimizer::*;
39#[allow(ambiguous_glob_reexports)]
40pub use neural_adaptive_optimizer::*;
41
42#[derive(Debug, Clone)]
44pub struct LearnedOptimizationConfig {
45 pub meta_training_episodes: usize,
47 pub meta_learning_rate: f64,
49 pub inner_steps: usize,
51 pub inner_learning_rate: f64,
53 pub batch_size: usize,
55 pub max_parameters: usize,
57 pub use_transformer: bool,
59 pub hidden_size: usize,
61 pub num_heads: usize,
63 pub few_shot_adaptation: bool,
65 pub exploration_temperature: f64,
67}
68
69impl Default for LearnedOptimizationConfig {
70 fn default() -> Self {
71 Self {
72 meta_training_episodes: 10000,
73 meta_learning_rate: 0.001,
74 inner_steps: 10,
75 inner_learning_rate: 0.01,
76 batch_size: 32,
77 max_parameters: 1000,
78 use_transformer: true,
79 hidden_size: 256,
80 num_heads: 8,
81 few_shot_adaptation: true,
82 exploration_temperature: 1.0,
83 }
84 }
85}
86
87#[derive(Debug, Clone)]
89pub struct OptimizationProblem {
90 pub name: String,
92 pub dimension: usize,
94 pub problem_class: String,
96 pub metadata: HashMap<String, f64>,
98 pub max_evaluations: usize,
100 pub target_accuracy: f64,
102}
103
104#[derive(Debug, Clone)]
106pub struct TrainingTask {
107 pub problem: OptimizationProblem,
109 pub initial_distribution: ParameterDistribution,
111 pub true_optimum: Option<Array1<f64>>,
113 pub difficulty_weight: f64,
115}
116
117#[derive(Debug, Clone)]
119pub enum ParameterDistribution {
120 Uniform { low: f64, high: f64 },
122 Normal { mean: f64, std: f64 },
124 Custom { samples: Vec<Array1<f64>> },
126}
127
128#[derive(Debug, Clone)]
130pub struct MetaOptimizerState {
131 pub meta_params: Array1<f64>,
133 pub network_weights: Array2<f64>,
135 pub performance_history: Vec<f64>,
137 pub adaptation_stats: AdaptationStatistics,
139 pub episode: usize,
141}
142
143#[derive(Debug, Clone)]
145pub struct AdaptationStatistics {
146 pub avg_convergence_rate: f64,
148 pub success_rate: f64,
150 pub avg_evaluations: f64,
152 pub transfer_efficiency: f64,
154 pub exploration_ratio: f64,
156}
157
158impl Default for AdaptationStatistics {
159 fn default() -> Self {
160 Self {
161 avg_convergence_rate: 0.0,
162 success_rate: 0.0,
163 avg_evaluations: 0.0,
164 transfer_efficiency: 0.0,
165 exploration_ratio: 0.5,
166 }
167 }
168}
169
170pub trait LearnedOptimizer {
172 fn meta_train(&mut self, training_tasks: &[TrainingTask]) -> Result<()>;
174
175 fn adapt_to_problem(
177 &mut self,
178 problem: &OptimizationProblem,
179 initial_params: &ArrayView1<f64>,
180 ) -> Result<()>;
181
182 fn optimize<F>(
184 &mut self,
185 objective: F,
186 initial_params: &ArrayView1<f64>,
187 ) -> OptimizeResult<OptimizeResults<f64>>
188 where
189 F: Fn(&ArrayView1<f64>) -> f64;
190
191 fn get_state(&self) -> &MetaOptimizerState;
193
194 fn reset(&mut self);
196}
197
198#[derive(Debug, Clone)]
200pub struct OptimizationNetwork {
201 pub input_embedding: Array2<f64>,
203 pub hidden_layers: Vec<Array2<f64>>,
205 pub output_layer: Array2<f64>,
207 pub attention_weights: Option<Vec<Array2<f64>>>,
209 pub layer_norms: Vec<LayerNorm>,
211 pub activation: ActivationType,
213}
214
215#[derive(Debug, Clone)]
217pub struct LayerNorm {
218 pub gamma: Array1<f64>,
220 pub beta: Array1<f64>,
222 pub epsilon: f64,
224}
225
226#[derive(Debug, Clone, Copy)]
228pub enum ActivationType {
229 ReLU,
230 GELU,
231 Swish,
232 Tanh,
233 LeakyReLU,
234}
235
236impl ActivationType {
237 pub fn apply(&self, x: f64) -> f64 {
238 match self {
239 ActivationType::ReLU => x.max(0.0),
240 ActivationType::GELU => {
241 x * 0.5 * (1.0 + (x * 0.7978845608 * (1.0 + 0.044715 * x * x)).tanh())
242 }
243 ActivationType::Swish => x / (1.0 + (-x).exp()),
244 ActivationType::Tanh => x.tanh(),
245 ActivationType::LeakyReLU => {
246 if x > 0.0 {
247 x
248 } else {
249 0.01 * x
250 }
251 }
252 }
253 }
254
255 pub fn derivative(&self, x: f64) -> f64 {
256 match self {
257 ActivationType::ReLU => {
258 if x > 0.0 {
259 1.0
260 } else {
261 0.0
262 }
263 }
264 ActivationType::GELU => {
265 let tanh_arg = x * 0.7978845608 * (1.0 + 0.044715 * x * x);
266 let tanh_val = tanh_arg.tanh();
267 0.5 * (1.0 + tanh_val)
268 + x * 0.5
269 * (1.0 - tanh_val * tanh_val)
270 * 0.7978845608
271 * (1.0 + 0.134145 * x * x)
272 }
273 ActivationType::Swish => {
274 let sigmoid = 1.0 / (1.0 + (-x).exp());
275 sigmoid * (1.0 + x * (1.0 - sigmoid))
276 }
277 ActivationType::Tanh => {
278 let t = x.tanh();
279 1.0 - t * t
280 }
281 ActivationType::LeakyReLU => {
282 if x > 0.0 {
283 1.0
284 } else {
285 0.01
286 }
287 }
288 }
289 }
290}
291
292impl OptimizationNetwork {
293 pub fn new(
295 input_size: usize,
296 hidden_sizes: Vec<usize>,
297 output_size: usize,
298 use_attention: bool,
299 activation: ActivationType,
300 ) -> Self {
301 let mut hidden_layers = Vec::new();
302 let mut layer_norms = Vec::new();
303
304 let mut prev_size = input_size;
306 for &hidden_size in &hidden_sizes {
307 let weights = Array2::from_shape_fn((hidden_size, prev_size), |_| {
308 rand::rng().random_range(-0.5..0.5) * (2.0 / prev_size as f64).sqrt()
309 });
310 hidden_layers.push(weights);
311
312 layer_norms.push(LayerNorm {
314 gamma: Array1::ones(hidden_size),
315 beta: Array1::zeros(hidden_size),
316 epsilon: 1e-6,
317 });
318
319 prev_size = hidden_size;
320 }
321
322 let input_embedding = Array2::from_shape_fn((hidden_sizes[0], input_size), |_| {
324 rand::rng().random_range(-0.5..0.5) * (2.0 / input_size as f64).sqrt()
325 });
326
327 let output_layer = Array2::from_shape_fn((output_size, prev_size), |_| {
329 rand::rng().random_range(-0.5..0.5) * (2.0 / prev_size as f64).sqrt()
330 });
331
332 let attention_weights = if use_attention {
334 Some(vec![Array2::from_shape_fn((prev_size, prev_size), |_| {
335 rand::rng().random_range(-0.5..0.5) * (2.0 / prev_size as f64).sqrt()
336 })])
337 } else {
338 None
339 };
340
341 Self {
342 input_embedding,
343 hidden_layers,
344 output_layer,
345 attention_weights,
346 layer_norms,
347 activation,
348 }
349 }
350
351 pub fn forward(&self, input: &ArrayView1<f64>) -> Array1<f64> {
353 let mut current = Array1::zeros(self.input_embedding.nrows());
355 for i in 0..current.len() {
356 for j in 0..input.len().min(self.input_embedding.ncols()) {
357 current[i] += self.input_embedding[[i, j]] * input[j];
358 }
359 }
360
361 current.mapv_inplace(|x| self.activation.apply(x));
363
364 for (layer_idx, layer) in self.hidden_layers.iter().enumerate() {
366 let mut next = Array1::zeros(layer.nrows());
367
368 for i in 0..next.len() {
370 for j in 0..current.len().min(layer.ncols()) {
371 next[i] += layer[[i, j]] * current[j];
372 }
373 }
374
375 if layer_idx < self.layer_norms.len() {
377 let layer_norm = &self.layer_norms[layer_idx];
378 let mean = next.view().mean();
379 let var = next.view().variance();
380 let std = (var + layer_norm.epsilon).sqrt();
381
382 for i in 0..next.len() {
383 if i < layer_norm.gamma.len() && i < layer_norm.beta.len() {
384 next[i] = layer_norm.gamma[i] * (next[i] - mean) / std + layer_norm.beta[i];
385 }
386 }
387 }
388
389 if let Some(ref attention) = self.attention_weights {
391 if !attention.is_empty() {
392 let attn_weights = &attention[0];
393 let mut attended: Array1<f64> = Array1::zeros(next.len());
394
395 for i in 0..attended.len() {
396 for j in 0..next.len().min(attn_weights.ncols()) {
397 attended[i] += attn_weights[[i, j]] * next[j];
398 }
399 }
400
401 next = &next + &attended;
403 }
404 }
405
406 next.mapv_inplace(|x| self.activation.apply(x));
408 current = next;
409 }
410
411 let mut output = Array1::zeros(self.output_layer.nrows());
413 for i in 0..output.len() {
414 for j in 0..current.len().min(self.output_layer.ncols()) {
415 output[i] += self.output_layer[[i, j]] * current[j];
416 }
417 }
418
419 output
420 }
421}
422
423#[derive(Debug, Clone)]
425pub struct ProblemEncoder {
426 pub dim_encoder: Array2<f64>,
428 pub gradient_encoder: Array2<f64>,
430 pub hessian_encoder: Array2<f64>,
432 pub embedding_size: usize,
434}
435
436impl ProblemEncoder {
437 pub fn new(embedding_size: usize) -> Self {
439 let dim = 10; Self {
442 dim_encoder: Array2::from_shape_fn((embedding_size, dim), |_| {
443 rand::rng().random_range(-0.5..0.5) * 0.1
444 }),
445 gradient_encoder: Array2::from_shape_fn((embedding_size, dim), |_| {
446 rand::rng().random_range(-0.5..0.5) * 0.1
447 }),
448 hessian_encoder: Array2::from_shape_fn((embedding_size, dim), |_| {
449 rand::rng().random_range(-0.5..0.5) * 0.1
450 }),
451 embedding_size,
452 }
453 }
454
455 pub fn encode_problem<F>(
457 &self,
458 objective: &F,
459 current_params: &ArrayView1<f64>,
460 problem: &OptimizationProblem,
461 ) -> Array1<f64>
462 where
463 F: Fn(&ArrayView1<f64>) -> f64,
464 {
465 let mut embedding = Array1::zeros(self.embedding_size);
466
467 let dim_features = self.compute_dimensionality_features(current_params, problem);
469 let grad_features = self.compute_gradient_features(objective, current_params);
470 let hessian_features = self.compute_hessian_features(objective, current_params);
471
472 for i in 0..self.embedding_size {
474 for j in 0..dim_features.len().min(self.dim_encoder.ncols()) {
475 embedding[i] += self.dim_encoder[[i, j]] * dim_features[j];
476 }
477 for j in 0..grad_features.len().min(self.gradient_encoder.ncols()) {
478 embedding[i] += self.gradient_encoder[[i, j]] * grad_features[j];
479 }
480 for j in 0..hessian_features.len().min(self.hessian_encoder.ncols()) {
481 embedding[i] += self.hessian_encoder[[i, j]] * hessian_features[j];
482 }
483 }
484
485 embedding
486 }
487
488 fn compute_dimensionality_features(
489 &self,
490 params: &ArrayView1<f64>,
491 problem: &OptimizationProblem,
492 ) -> Array1<f64> {
493 let mut features = Array1::zeros(10);
494
495 features[0] = (params.len() as f64).ln(); features[1] = params.view().variance(); features[2] = params.view().mean(); features[3] = params.iter().map(|&x| x.abs()).sum::<f64>() / params.len() as f64; features[4] = (params.iter().map(|&x| x * x).sum::<f64>()).sqrt(); features[5] = problem.dimension as f64 / 1000.0; features[6] = problem.max_evaluations as f64 / 10000.0; features[7] = problem.target_accuracy.ln().abs(); if let Some(&complexity) = problem.metadata.get("complexity") {
506 features[8] = complexity.tanh();
507 }
508 if let Some(&sparsity) = problem.metadata.get("sparsity") {
509 features[9] = sparsity;
510 }
511
512 features
513 }
514
515 fn compute_gradient_features<F>(&self, objective: &F, params: &ArrayView1<f64>) -> Array1<f64>
516 where
517 F: Fn(&ArrayView1<f64>) -> f64,
518 {
519 let mut features = Array1::zeros(10);
520 let h = 1e-6;
521 let f0 = objective(params);
522
523 let mut gradient_norm = 0.0;
524 let mut gradient_components = Vec::new();
525
526 for i in 0..params.len().min(20) {
528 let mut params_plus = params.to_owned();
530 params_plus[i] += h;
531 let f_plus = objective(¶ms_plus.view());
532 let grad_i = (f_plus - f0) / h;
533 gradient_components.push(grad_i);
534 gradient_norm += grad_i * grad_i;
535 }
536
537 gradient_norm = gradient_norm.sqrt();
538
539 features[0] = gradient_norm.ln().tanh(); features[1] = f0.abs().ln().tanh(); if !gradient_components.is_empty() {
543 let grad_mean =
544 gradient_components.iter().sum::<f64>() / gradient_components.len() as f64;
545 let grad_var = gradient_components
546 .iter()
547 .map(|&g| (g - grad_mean).powi(2))
548 .sum::<f64>()
549 / gradient_components.len() as f64;
550
551 features[2] = grad_mean.tanh();
552 features[3] = grad_var.sqrt().tanh();
553 features[4] = gradient_components
554 .iter()
555 .map(|&g| g.abs())
556 .max_by(|a, b| a.partial_cmp(b).unwrap())
557 .unwrap_or(0.0)
558 .tanh();
559 features[5] = gradient_components
560 .iter()
561 .map(|&g| g.abs())
562 .min_by(|a, b| a.partial_cmp(b).unwrap())
563 .unwrap_or(0.0)
564 .tanh();
565 }
566
567 features
568 }
569
570 fn compute_hessian_features<F>(&self, objective: &F, params: &ArrayView1<f64>) -> Array1<f64>
571 where
572 F: Fn(&ArrayView1<f64>) -> f64,
573 {
574 let mut features = Array1::zeros(10);
575 let h = 1e-4;
576 let f0 = objective(params);
577
578 for i in 0..params.len().min(5) {
580 let mut params_plus = params.to_owned();
581 let mut params_minus = params.to_owned();
582
583 params_plus[i] += h;
584 params_minus[i] -= h;
585
586 let f_plus = objective(¶ms_plus.view());
587 let f_minus = objective(¶ms_minus.view());
588
589 let hessian_ii = (f_plus - 2.0 * f0 + f_minus) / (h * h);
590
591 if i < features.len() {
592 features[i] = hessian_ii.tanh();
593 }
594 }
595
596 features
597 }
598}
599
600#[allow(dead_code)]
602pub fn learned_optimize<F>(
603 objective: F,
604 initial_params: &ArrayView1<f64>,
605 config: Option<LearnedOptimizationConfig>,
606) -> OptimizeResult<OptimizeResults<f64>>
607where
608 F: Fn(&ArrayView1<f64>) -> f64,
609{
610 let config = config.unwrap_or_default();
611
612 let mut optimizer = MetaLearningOptimizer::new(config);
614
615 optimizer.optimize(objective, initial_params)
617}
618
619#[cfg(test)]
620mod tests {
621 use super::*;
622
623 #[test]
624 fn test_learned_optimization_config() {
625 let config = LearnedOptimizationConfig::default();
626 assert_eq!(config.meta_training_episodes, 10000);
627 assert_eq!(config.hidden_size, 256);
628 assert!(config.use_transformer);
629 }
630
631 #[test]
632 fn test_optimization_network_creation() {
633 let network = OptimizationNetwork::new(10, vec![32, 32], 5, true, ActivationType::GELU);
634
635 assert_eq!(network.hidden_layers.len(), 2);
636 assert_eq!(network.layer_norms.len(), 2);
637 assert!(network.attention_weights.is_some());
638 }
639
640 #[test]
641 fn test_network_forward_pass() {
642 let network = OptimizationNetwork::new(5, vec![10], 3, false, ActivationType::ReLU);
643
644 let input = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
645 let output = network.forward(&input.view());
646
647 assert_eq!(output.len(), 3);
648 assert!(output.iter().all(|&x| x.is_finite()));
649 }
650
651 #[test]
652 fn test_activation_functions() {
653 assert_eq!(ActivationType::ReLU.apply(-1.0), 0.0);
654 assert_eq!(ActivationType::ReLU.apply(1.0), 1.0);
655 assert!(ActivationType::GELU.apply(0.0).abs() < 0.1);
656 assert!(ActivationType::Swish.apply(0.0).abs() < 0.1);
657 }
658
659 #[test]
660 fn test_problem_encoder() {
661 let encoder = ProblemEncoder::new(32);
662 let params = Array1::from(vec![1.0, 2.0]);
663 let objective = |x: &ArrayView1<f64>| x[0].powi(2) + x[1].powi(2);
664
665 let problem = OptimizationProblem {
666 name: "test".to_string(),
667 dimension: 2,
668 problem_class: "quadratic".to_string(),
669 metadata: HashMap::new(),
670 max_evaluations: 1000,
671 target_accuracy: 1e-6,
672 };
673
674 let embedding = encoder.encode_problem(&objective, ¶ms.view(), &problem);
675 assert_eq!(embedding.len(), 32);
676 assert!(embedding.iter().all(|&x| x.is_finite()));
677 }
678
679 #[test]
680 fn test_basic_learned_optimization() {
681 let objective = |x: &ArrayView1<f64>| x[0].powi(2) + x[1].powi(2);
682 let initial = Array1::from(vec![2.0, 2.0]);
683
684 let config = LearnedOptimizationConfig {
685 meta_training_episodes: 10,
686 inner_steps: 5,
687 ..Default::default()
688 };
689
690 let result = learned_optimize(objective, &initial.view(), Some(config)).unwrap();
691
692 assert!(result.fun >= 0.0);
693 assert_eq!(result.x.len(), 2);
694 }
695}
696
697#[allow(dead_code)]
698pub fn placeholder() {
699 }