scirs2_optimize/learned_optimizers/
neural_adaptive_optimizer.rs

1//! Neural Adaptive Optimizer
2//!
3//! Implementation of neural networks that learn adaptive optimization strategies
4//! and can dynamically adjust their behavior based on optimization progress.
5
6use super::{
7    ActivationType, LearnedOptimizationConfig, LearnedOptimizer, MetaOptimizerState,
8    OptimizationProblem, TrainingTask,
9};
10use crate::error::OptimizeResult;
11use crate::result::OptimizeResults;
12use scirs2_core::ndarray::ArrayStatCompat;
13use scirs2_core::ndarray::{Array1, Array2, ArrayView1};
14use scirs2_core::random::Rng;
15use statrs::statistics::Statistics;
16use std::collections::{HashMap, VecDeque};
17
18/// Neural Adaptive Optimizer with dynamic strategy learning
19#[derive(Debug, Clone)]
20pub struct NeuralAdaptiveOptimizer {
21    /// Configuration
22    config: LearnedOptimizationConfig,
23    /// Primary optimization network
24    optimization_network: OptimizationNetwork,
25    /// Adaptation controller
26    adaptation_controller: AdaptationController,
27    /// Performance predictor
28    performance_predictor: PerformancePredictor,
29    /// Meta-optimizer state
30    meta_state: MetaOptimizerState,
31    /// Adaptive statistics
32    adaptive_stats: AdaptiveOptimizationStats,
33    /// Memory-efficient computation cache
34    computation_cache: ComputationCache,
35}
36
37/// Memory-efficient computation cache for reusing allocations
38#[derive(Debug, Clone)]
39pub struct ComputationCache {
40    /// Reusable gradient buffer
41    gradient_buffer: Array1<f64>,
42    /// Reusable feature buffer
43    feature_buffer: Array1<f64>,
44    /// Reusable parameter buffer
45    param_buffer: Array1<f64>,
46    /// Network output buffer
47    network_output_buffer: Array1<f64>,
48    /// Temporary computation buffer
49    temp_buffer: Array1<f64>,
50    /// Maximum buffer size to prevent unbounded growth
51    max_buffer_size: usize,
52}
53
54/// Memory-efficient bounded history collection
55#[derive(Debug, Clone)]
56pub struct BoundedHistory<T> {
57    /// Internal storage
58    pub(crate) data: VecDeque<T>,
59    /// Maximum capacity
60    max_capacity: usize,
61}
62
63impl<T> BoundedHistory<T> {
64    /// Create new bounded history with specified capacity
65    pub fn new(capacity: usize) -> Self {
66        Self {
67            data: VecDeque::with_capacity(capacity),
68            max_capacity: capacity,
69        }
70    }
71
72    /// Add item, removing oldest if at capacity
73    pub fn push(&mut self, item: T) {
74        if self.data.len() >= self.max_capacity {
75            self.data.pop_front();
76        }
77        self.data.push_back(item);
78    }
79
80    /// Get the most recent item
81    pub fn back(&self) -> Option<&T> {
82        self.data.back()
83    }
84
85    /// Clear all items
86    pub fn clear(&mut self) {
87        self.data.clear();
88    }
89
90    /// Get length
91    pub fn len(&self) -> usize {
92        self.data.len()
93    }
94
95    /// Check if empty
96    pub fn is_empty(&self) -> bool {
97        self.data.is_empty()
98    }
99}
100
101impl ComputationCache {
102    /// Create new computation cache
103    pub fn new(max_size: usize) -> Self {
104        Self {
105            gradient_buffer: Array1::zeros(max_size),
106            feature_buffer: Array1::zeros(max_size),
107            param_buffer: Array1::zeros(max_size),
108            network_output_buffer: Array1::zeros(max_size),
109            temp_buffer: Array1::zeros(max_size),
110            max_buffer_size: max_size,
111        }
112    }
113
114    /// Get reusable gradient buffer
115    pub fn get_gradient_buffer(&mut self, size: usize) -> &mut Array1<f64> {
116        if self.gradient_buffer.len() < size {
117            self.gradient_buffer = Array1::zeros(size);
118        }
119        &mut self.gradient_buffer
120    }
121
122    /// Get reusable feature buffer
123    pub fn get_feature_buffer(&mut self, size: usize) -> &mut Array1<f64> {
124        if self.feature_buffer.len() < size {
125            self.feature_buffer = Array1::zeros(size);
126        }
127        &mut self.feature_buffer
128    }
129
130    /// Get reusable parameter buffer
131    pub fn get_param_buffer(&mut self, size: usize) -> &mut Array1<f64> {
132        if self.param_buffer.len() < size {
133            self.param_buffer = Array1::zeros(size);
134        }
135        &mut self.param_buffer
136    }
137
138    /// Get network output buffer
139    pub fn get_network_output_buffer(&mut self, size: usize) -> &mut Array1<f64> {
140        if self.network_output_buffer.len() < size {
141            self.network_output_buffer = Array1::zeros(size);
142        }
143        &mut self.network_output_buffer
144    }
145
146    /// Get temporary buffer
147    pub fn get_temp_buffer(&mut self, size: usize) -> &mut Array1<f64> {
148        if self.temp_buffer.len() < size {
149            self.temp_buffer = Array1::zeros(size);
150        }
151        &mut self.temp_buffer
152    }
153
154    /// Get both gradient and param buffers simultaneously to avoid borrowing conflicts
155    pub fn get_gradient_and_param_buffers(
156        &mut self,
157        gradient_size: usize,
158        param_size: usize,
159    ) -> (&mut Array1<f64>, &mut Array1<f64>) {
160        if self.gradient_buffer.len() < gradient_size {
161            self.gradient_buffer = Array1::zeros(gradient_size);
162        }
163        if self.param_buffer.len() < param_size {
164            self.param_buffer = Array1::zeros(param_size);
165        }
166        (&mut self.gradient_buffer, &mut self.param_buffer)
167    }
168
169    /// Resize buffer if needed (up to max size)
170    fn resize_buffer(&mut self, buffer: &mut Array1<f64>, requested_size: usize) {
171        let size = requested_size.min(self.max_buffer_size);
172        if buffer.len() != size {
173            *buffer = Array1::zeros(size);
174        } else {
175            buffer.fill(0.0);
176        }
177    }
178}
179
180/// Neural network for optimization strategy
181#[derive(Debug, Clone)]
182pub struct OptimizationNetwork {
183    /// Input layer for problem state
184    input_layer: NeuralLayer,
185    /// Hidden layers for strategy computation
186    hidden_layers: Vec<NeuralLayer>,
187    /// Output layer for optimization actions
188    output_layer: NeuralLayer,
189    /// Recurrent connections for memory
190    recurrent_connections: RecurrentConnections,
191    /// Network architecture
192    architecture: NetworkArchitecture,
193}
194
195/// Neural layer
196#[derive(Debug, Clone)]
197pub struct NeuralLayer {
198    /// Weights
199    weights: Array2<f64>,
200    /// Biases
201    biases: Array1<f64>,
202    /// Activation function
203    activation: ActivationType,
204    /// Layer size
205    size: usize,
206    /// Dropout rate
207    dropout_rate: f64,
208    /// Layer normalization
209    layer_norm: Option<LayerNormalization>,
210}
211
212/// Layer normalization
213#[derive(Debug, Clone)]
214pub struct LayerNormalization {
215    /// Scale parameter
216    gamma: Array1<f64>,
217    /// Shift parameter
218    beta: Array1<f64>,
219    /// Running mean
220    running_mean: Array1<f64>,
221    /// Running variance
222    running_var: Array1<f64>,
223    /// Momentum for running stats
224    momentum: f64,
225    /// Epsilon for numerical stability
226    epsilon: f64,
227}
228
229/// Recurrent connections for memory
230#[derive(Debug, Clone)]
231pub struct RecurrentConnections {
232    /// Hidden state
233    hidden_state: Array1<f64>,
234    /// Cell state (for LSTM-like behavior)
235    cell_state: Array1<f64>,
236    /// Recurrent weights
237    recurrent_weights: Array2<f64>,
238    /// Input gate weights
239    input_gate_weights: Array2<f64>,
240    /// Forget gate weights
241    forget_gate_weights: Array2<f64>,
242    /// Output gate weights
243    output_gate_weights: Array2<f64>,
244}
245
246/// Network architecture specification
247#[derive(Debug, Clone)]
248pub struct NetworkArchitecture {
249    /// Input size
250    input_size: usize,
251    /// Hidden sizes
252    hidden_sizes: Vec<usize>,
253    /// Output size
254    output_size: usize,
255    /// Activation functions per layer
256    activations: Vec<ActivationType>,
257    /// Use recurrent connections
258    use_recurrent: bool,
259    /// Use attention mechanisms
260    use_attention: bool,
261}
262
263/// Adaptation controller for dynamic strategy adjustment
264#[derive(Debug, Clone)]
265pub struct AdaptationController {
266    /// Strategy selector network
267    strategy_selector: StrategySelector,
268    /// Adaptation rate controller
269    adaptation_rate_controller: AdaptationRateController,
270    /// Progress monitor
271    progress_monitor: ProgressMonitor,
272    /// Strategy history (bounded to prevent memory growth)
273    strategy_history: BoundedHistory<OptimizationStrategy>,
274}
275
276/// Strategy selector
277#[derive(Debug, Clone)]
278pub struct StrategySelector {
279    /// Selection network
280    selection_network: Array2<f64>,
281    /// Strategy embeddings
282    strategy_embeddings: Array2<f64>,
283    /// Current strategy weights
284    strategy_weights: Array1<f64>,
285    /// Available strategies
286    available_strategies: Vec<OptimizationStrategy>,
287}
288
289/// Optimization strategy
290#[derive(Debug, Clone)]
291pub struct OptimizationStrategy {
292    /// Strategy identifier
293    id: String,
294    /// Strategy parameters
295    parameters: Array1<f64>,
296    /// Expected performance
297    expected_performance: f64,
298    /// Computational cost
299    computational_cost: f64,
300    /// Robustness score
301    robustness: f64,
302}
303
304/// Adaptation rate controller
305#[derive(Debug, Clone)]
306pub struct AdaptationRateController {
307    /// Controller network
308    controller_network: Array2<f64>,
309    /// Current adaptation rate
310    current_rate: f64,
311    /// Rate history (bounded)
312    rate_history: BoundedHistory<f64>,
313    /// Performance correlation
314    performance_correlation: f64,
315}
316
317/// Progress monitor
318#[derive(Debug, Clone)]
319pub struct ProgressMonitor {
320    /// Progress indicators
321    progress_indicators: Vec<ProgressIndicator>,
322    /// Monitoring network
323    monitoring_network: Array2<f64>,
324    /// Alert thresholds
325    alert_thresholds: HashMap<String, f64>,
326    /// Current progress state
327    current_state: ProgressState,
328}
329
330/// Progress indicator
331#[derive(Debug, Clone)]
332pub struct ProgressIndicator {
333    /// Indicator name
334    name: String,
335    /// Current value
336    value: f64,
337    /// Historical values (bounded)
338    history: BoundedHistory<f64>,
339    /// Trend direction
340    trend: f64,
341    /// Importance weight
342    importance: f64,
343}
344
345/// Progress state
346#[derive(Debug, Clone)]
347pub enum ProgressState {
348    Improving,
349    Stagnating,
350    Deteriorating,
351    Converged,
352    Diverging,
353}
354
355/// Performance predictor
356#[derive(Debug, Clone)]
357pub struct PerformancePredictor {
358    /// Prediction network
359    prediction_network: Array2<f64>,
360    /// Feature extractor
361    feature_extractor: FeatureExtractor,
362    /// Prediction horizon
363    prediction_horizon: usize,
364    /// Prediction accuracy
365    prediction_accuracy: f64,
366    /// Confidence estimator
367    confidence_estimator: ConfidenceEstimator,
368}
369
370/// Feature extractor for performance prediction
371#[derive(Debug, Clone)]
372pub struct FeatureExtractor {
373    /// Extraction layers
374    extraction_layers: Vec<Array2<f64>>,
375    /// Feature dimension
376    feature_dim: usize,
377    /// Temporal features
378    temporal_features: TemporalFeatures,
379}
380
381/// Temporal features
382#[derive(Debug, Clone)]
383pub struct TemporalFeatures {
384    /// Time series embeddings
385    time_embeddings: Array2<f64>,
386    /// Trend analysis
387    trend_analyzer: TrendAnalyzer,
388    /// Seasonality detector
389    seasonality_detector: SeasonalityDetector,
390}
391
392/// Trend analyzer
393#[derive(Debug, Clone)]
394pub struct TrendAnalyzer {
395    /// Trend coefficients
396    trend_coefficients: Array1<f64>,
397    /// Window size for trend analysis
398    window_size: usize,
399    /// Trend strength
400    trend_strength: f64,
401}
402
403/// Seasonality detector
404#[derive(Debug, Clone)]
405pub struct SeasonalityDetector {
406    /// Seasonal patterns
407    seasonal_patterns: Array2<f64>,
408    /// Pattern strength
409    pattern_strength: Array1<f64>,
410    /// Detection threshold
411    detection_threshold: f64,
412}
413
414/// Confidence estimator
415#[derive(Debug, Clone)]
416pub struct ConfidenceEstimator {
417    /// Confidence network
418    confidence_network: Array2<f64>,
419    /// Uncertainty quantification
420    uncertainty_quantifier: UncertaintyQuantifier,
421    /// Calibration parameters
422    calibration_params: Array1<f64>,
423}
424
425/// Uncertainty quantification
426#[derive(Debug, Clone)]
427pub struct UncertaintyQuantifier {
428    /// Epistemic uncertainty
429    epistemic_uncertainty: f64,
430    /// Aleatoric uncertainty
431    aleatoric_uncertainty: f64,
432    /// Uncertainty estimation method
433    method: UncertaintyMethod,
434}
435
436/// Uncertainty estimation methods
437#[derive(Debug, Clone)]
438pub enum UncertaintyMethod {
439    Dropout,
440    Ensemble,
441    Bayesian,
442    Evidential,
443}
444
445/// Adaptive optimization statistics
446#[derive(Debug, Clone)]
447pub struct AdaptiveOptimizationStats {
448    /// Number of strategy switches
449    strategy_switches: usize,
450    /// Average adaptation rate
451    avg_adaptation_rate: f64,
452    /// Prediction accuracy
453    prediction_accuracy: f64,
454    /// Computational efficiency
455    computational_efficiency: f64,
456    /// Robustness score
457    robustness_score: f64,
458}
459
460impl NeuralAdaptiveOptimizer {
461    /// Create new neural adaptive optimizer
462    pub fn new(config: LearnedOptimizationConfig) -> Self {
463        let architecture = NetworkArchitecture {
464            input_size: config.max_parameters.min(100),
465            hidden_sizes: vec![config.hidden_size, config.hidden_size / 2],
466            output_size: 32, // Number of optimization actions
467            activations: vec![
468                ActivationType::GELU,
469                ActivationType::GELU,
470                ActivationType::Tanh,
471            ],
472            use_recurrent: true,
473            use_attention: config.use_transformer,
474        };
475
476        let optimization_network = OptimizationNetwork::new(architecture);
477        let adaptation_controller = AdaptationController::new(config.hidden_size);
478        let performance_predictor = PerformancePredictor::new(config.hidden_size);
479        let hidden_size = config.hidden_size;
480        let max_buffer_size = config.max_parameters.max(1000); // Reasonable upper bound
481
482        Self {
483            config,
484            optimization_network,
485            adaptation_controller,
486            performance_predictor,
487            meta_state: MetaOptimizerState {
488                meta_params: Array1::zeros(hidden_size),
489                network_weights: Array2::zeros((hidden_size, hidden_size)),
490                performance_history: Vec::new(),
491                adaptation_stats: super::AdaptationStatistics::default(),
492                episode: 0,
493            },
494            adaptive_stats: AdaptiveOptimizationStats::default(),
495            computation_cache: ComputationCache::new(max_buffer_size),
496        }
497    }
498
499    /// Perform adaptive optimization step
500    pub fn adaptive_optimization_step<F>(
501        &mut self,
502        objective: &F,
503        current_params: &ArrayView1<f64>,
504        step_number: usize,
505    ) -> OptimizeResult<AdaptiveOptimizationStep>
506    where
507        F: Fn(&ArrayView1<f64>) -> f64,
508    {
509        // Extract current state features
510        let state_features = self.extract_state_features(objective, current_params, step_number)?;
511
512        // Forward pass through optimization network
513        let network_output = self.optimization_network.forward(&state_features.view())?;
514
515        // Predict performance
516        let performance_prediction = self.performance_predictor.predict(&state_features)?;
517
518        // Select optimization strategy
519        let strategy = self
520            .adaptation_controller
521            .select_strategy(&network_output, &performance_prediction)?;
522
523        // Monitor progress and adapt if necessary
524        self.adaptation_controller
525            .monitor_and_adapt(&performance_prediction)?;
526
527        // Create optimization step
528        let step = AdaptiveOptimizationStep {
529            strategy: strategy.clone(),
530            predicted_performance: performance_prediction,
531            confidence: self
532                .performance_predictor
533                .confidence_estimator
534                .estimate_confidence(&state_features)?,
535            adaptation_signal: self.adaptation_controller.get_adaptation_signal(),
536            network_output: network_output.clone(),
537        };
538
539        // Update statistics
540        self.update_adaptive_stats(&step)?;
541
542        Ok(step)
543    }
544
545    /// Extract state features for neural network
546    fn extract_state_features<F>(
547        &mut self,
548        objective: &F,
549        current_params: &ArrayView1<f64>,
550        step_number: usize,
551    ) -> OptimizeResult<Array1<f64>>
552    where
553        F: Fn(&ArrayView1<f64>) -> f64,
554    {
555        let mut features = Array1::zeros(self.optimization_network.architecture.input_size);
556        let feature_idx = 0;
557
558        // Parameter features
559        let param_features = self.extract_parameter_features(current_params);
560        self.copy_features(&mut features, &param_features, feature_idx);
561
562        // Objective features
563        let obj_features = self.extract_objective_features(objective, current_params)?;
564        self.copy_features(
565            &mut features,
566            &obj_features,
567            feature_idx + param_features.len(),
568        );
569
570        // Temporal features
571        let temporal_features = self.extract_temporal_features(step_number);
572        self.copy_features(
573            &mut features,
574            &temporal_features,
575            feature_idx + param_features.len() + obj_features.len(),
576        );
577
578        Ok(features)
579    }
580
581    /// Extract parameter-based features
582    fn extract_parameter_features(&self, params: &ArrayView1<f64>) -> Array1<f64> {
583        let mut features = Array1::zeros(20);
584
585        if !params.is_empty() {
586            features[0] = params.view().mean().tanh();
587            features[1] = params.view().variance().sqrt().tanh();
588            features[2] = params.fold(-f64::INFINITY, |a, &b| a.max(b)).tanh();
589            features[3] = params.fold(f64::INFINITY, |a, &b| a.min(b)).tanh();
590            features[4] = (params.len() as f64).ln().tanh();
591
592            // Statistical moments
593            let mean = features[0];
594            let std = features[1];
595            if std > 1e-8 {
596                let skewness = params
597                    .iter()
598                    .map(|&x| ((x - mean) / std).powi(3))
599                    .sum::<f64>()
600                    / params.len() as f64;
601                features[5] = skewness.tanh();
602
603                let kurtosis = params
604                    .iter()
605                    .map(|&x| ((x - mean) / std).powi(4))
606                    .sum::<f64>()
607                    / params.len() as f64
608                    - 3.0;
609                features[6] = kurtosis.tanh();
610            }
611
612            // Norms
613            features[7] =
614                (params.iter().map(|&x| x.abs()).sum::<f64>() / params.len() as f64).tanh(); // L1
615            features[8] = (params.iter().map(|&x| x * x).sum::<f64>()).sqrt().tanh(); // L2
616
617            // Sparsity
618            let zero_count = params.iter().filter(|&&x| x.abs() < 1e-8).count();
619            features[9] = (zero_count as f64 / params.len() as f64).tanh();
620        }
621
622        features
623    }
624
625    /// Extract objective-based features
626    fn extract_objective_features<F>(
627        &mut self,
628        objective: &F,
629        params: &ArrayView1<f64>,
630    ) -> OptimizeResult<Array1<f64>>
631    where
632        F: Fn(&ArrayView1<f64>) -> f64,
633    {
634        let mut features = Array1::zeros(15);
635
636        let f0 = objective(params);
637        features[0] = f0.abs().ln().tanh();
638
639        // Gradient features using cached buffers
640        let h = 1e-6;
641        let gradient_sample_size = params.len().min(10); // Limit for efficiency
642        let (gradient_buffer, param_buffer) = self
643            .computation_cache
644            .get_gradient_and_param_buffers(gradient_sample_size, params.len());
645
646        // Copy parameters to buffer
647        for (i, &val) in params.iter().enumerate() {
648            if i < param_buffer.len() {
649                param_buffer[i] = val;
650            }
651        }
652
653        // Compute gradient components efficiently
654        for i in 0..gradient_sample_size {
655            let original_val = param_buffer[i];
656            param_buffer[i] = original_val + h;
657            let f_plus = objective(&param_buffer.view());
658            param_buffer[i] = original_val; // Restore
659
660            gradient_buffer[i] = (f_plus - f0) / h;
661        }
662
663        let gradient_norm = (gradient_buffer
664            .iter()
665            .take(gradient_sample_size)
666            .map(|&g| g * g)
667            .sum::<f64>())
668        .sqrt();
669        features[1] = gradient_norm.ln().tanh();
670
671        if gradient_sample_size > 0 {
672            let grad_mean = gradient_buffer
673                .iter()
674                .take(gradient_sample_size)
675                .sum::<f64>()
676                / gradient_sample_size as f64;
677            let grad_var = gradient_buffer
678                .iter()
679                .take(gradient_sample_size)
680                .map(|&g| (g - grad_mean).powi(2))
681                .sum::<f64>()
682                / gradient_sample_size as f64;
683
684            features[2] = grad_mean.tanh();
685            features[3] = grad_var.sqrt().tanh();
686        }
687
688        // Curvature approximation using cached buffer
689        if params.len() > 1 {
690            // Reuse param_buffer for mixed partial computation
691            param_buffer[0] += h;
692            param_buffer[1] += h;
693            let f_plus_plus = objective(&param_buffer.view());
694
695            param_buffer[1] -= 2.0 * h; // Now it's +h, -h
696            let f_plus_minus = objective(&param_buffer.view());
697
698            // Restore original values
699            param_buffer[0] -= h;
700            param_buffer[1] += h;
701
702            let mixed_partial = (f_plus_plus - f_plus_minus) / (2.0 * h);
703            features[4] = mixed_partial.tanh();
704        }
705
706        Ok(features)
707    }
708
709    /// Extract temporal features
710    fn extract_temporal_features(&self, step_number: usize) -> Array1<f64> {
711        let mut features = Array1::zeros(10);
712
713        features[0] = (step_number as f64).ln().tanh();
714        features[1] = (step_number as f64 / 1000.0).tanh(); // Normalized step
715
716        // Progress from performance history
717        if self.meta_state.performance_history.len() > 1 {
718            let recent_performance = &self.meta_state.performance_history
719                [self.meta_state.performance_history.len().saturating_sub(5)..];
720
721            if recent_performance.len() > 1 {
722                let trend = (recent_performance[recent_performance.len() - 1]
723                    - recent_performance[0])
724                    / recent_performance.len() as f64;
725                features[2] = trend.tanh();
726
727                let variance = recent_performance.iter().map(|&x| x * x).sum::<f64>()
728                    / recent_performance.len() as f64
729                    - (recent_performance.iter().sum::<f64>() / recent_performance.len() as f64)
730                        .powi(2);
731                features[3] = variance.sqrt().tanh();
732            }
733        }
734
735        features
736    }
737
738    /// Copy features to target array
739    fn copy_features(&self, target: &mut Array1<f64>, source: &Array1<f64>, start_idx: usize) {
740        for (i, &value) in source.iter().enumerate() {
741            if start_idx + i < target.len() {
742                target[start_idx + i] = value;
743            }
744        }
745    }
746
747    /// Update adaptive optimization statistics
748    fn update_adaptive_stats(&mut self, step: &AdaptiveOptimizationStep) -> OptimizeResult<()> {
749        // Update strategy switch count
750        if let Some(last_strategy) = self.adaptation_controller.strategy_history.back() {
751            if last_strategy.id != step.strategy.id {
752                self.adaptive_stats.strategy_switches += 1;
753            }
754        }
755
756        // Update adaptation rate
757        self.adaptive_stats.avg_adaptation_rate =
758            0.9 * self.adaptive_stats.avg_adaptation_rate + 0.1 * step.adaptation_signal;
759
760        // Update prediction accuracy (simplified)
761        self.adaptive_stats.prediction_accuracy =
762            0.95 * self.adaptive_stats.prediction_accuracy + 0.05 * step.confidence;
763
764        Ok(())
765    }
766
767    /// Train the neural networks on optimization data
768    pub fn train_networks(
769        &mut self,
770        training_data: &[OptimizationTrajectory],
771    ) -> OptimizeResult<()> {
772        for trajectory in training_data {
773            // Train optimization network
774            self.train_optimization_network(trajectory)?;
775
776            // Train performance predictor
777            self.train_performance_predictor(trajectory)?;
778
779            // Update adaptation controller
780            self.update_adaptation_controller(trajectory)?;
781        }
782
783        Ok(())
784    }
785
786    /// Train the optimization network
787    fn train_optimization_network(
788        &mut self,
789        trajectory: &OptimizationTrajectory,
790    ) -> OptimizeResult<()> {
791        // Simplified training using trajectory data
792        let learning_rate = self.config.meta_learning_rate;
793
794        for (i, state) in trajectory.states.iter().enumerate() {
795            if i + 1 < trajectory.actions.len() {
796                let target_action = &trajectory.actions[i + 1];
797                let predicted_action = self.optimization_network.forward(&state.view())?;
798
799                // Compute loss (simplified MSE)
800                let mut loss_gradient = Array1::zeros(predicted_action.len());
801                for j in 0..loss_gradient.len().min(target_action.len()) {
802                    loss_gradient[j] = 2.0 * (predicted_action[j] - target_action[j]);
803                }
804
805                // Backpropagate (simplified)
806                self.optimization_network
807                    .backward(&loss_gradient, learning_rate)?;
808            }
809        }
810
811        Ok(())
812    }
813
814    /// Train the performance predictor
815    fn train_performance_predictor(
816        &mut self,
817        trajectory: &OptimizationTrajectory,
818    ) -> OptimizeResult<()> {
819        // Simplified training for performance prediction
820        let learning_rate = self.config.meta_learning_rate * 0.5;
821
822        for (i, state) in trajectory.states.iter().enumerate() {
823            if i + self.performance_predictor.prediction_horizon
824                < trajectory.performance_values.len()
825            {
826                let target_performance = trajectory.performance_values
827                    [i + self.performance_predictor.prediction_horizon];
828                let predicted_performance = self.performance_predictor.predict(state)?;
829
830                let error = target_performance - predicted_performance;
831
832                // Update prediction network (simplified)
833                for row in self.performance_predictor.prediction_network.rows_mut() {
834                    for weight in row {
835                        *weight += learning_rate
836                            * error
837                            * scirs2_core::random::rng().random::<f64>()
838                            * 0.01;
839                    }
840                }
841            }
842        }
843
844        Ok(())
845    }
846
847    /// Update adaptation controller
848    fn update_adaptation_controller(
849        &mut self,
850        trajectory: &OptimizationTrajectory,
851    ) -> OptimizeResult<()> {
852        // Analyze trajectory for adaptation patterns
853        if trajectory.performance_values.len() > 2 {
854            let performance_trend = trajectory
855                .performance_values
856                .last()
857                .expect("Operation failed")
858                - trajectory.performance_values[0];
859
860            // Update strategy selector based on performance
861            if performance_trend > 0.0 {
862                // Good performance, reinforce current strategy
863                self.adaptation_controller.reinforce_current_strategy(0.1)?;
864            } else {
865                // Poor performance, encourage exploration
866                self.adaptation_controller.encourage_exploration(0.1)?;
867            }
868        }
869
870        Ok(())
871    }
872
873    /// Get adaptive optimization statistics
874    pub fn get_adaptive_stats(&self) -> &AdaptiveOptimizationStats {
875        &self.adaptive_stats
876    }
877}
878
879/// Optimization trajectory for training
880#[derive(Debug, Clone)]
881pub struct OptimizationTrajectory {
882    /// State sequence
883    pub states: Vec<Array1<f64>>,
884    /// Action sequence
885    pub actions: Vec<Array1<f64>>,
886    /// Performance values
887    pub performance_values: Vec<f64>,
888    /// Rewards
889    pub rewards: Vec<f64>,
890}
891
892/// Adaptive optimization step result
893#[derive(Debug, Clone)]
894pub struct AdaptiveOptimizationStep {
895    /// Selected strategy
896    pub strategy: OptimizationStrategy,
897    /// Predicted performance
898    pub predicted_performance: f64,
899    /// Confidence in prediction
900    pub confidence: f64,
901    /// Adaptation signal strength
902    pub adaptation_signal: f64,
903    /// Raw network output
904    pub network_output: Array1<f64>,
905}
906
907impl OptimizationNetwork {
908    /// Create new optimization network
909    pub fn new(architecture: NetworkArchitecture) -> Self {
910        let mut hidden_layers = Vec::new();
911
912        // Create hidden layers
913        let mut prev_size = architecture.input_size;
914        for (i, &hidden_size) in architecture.hidden_sizes.iter().enumerate() {
915            let activation = architecture
916                .activations
917                .get(i)
918                .copied()
919                .unwrap_or(ActivationType::ReLU);
920
921            hidden_layers.push(NeuralLayer::new(prev_size, hidden_size, activation));
922            prev_size = hidden_size;
923        }
924
925        // Create input and output layers
926        let input_activation = architecture
927            .activations
928            .first()
929            .copied()
930            .unwrap_or(ActivationType::ReLU);
931        let output_activation = architecture
932            .activations
933            .last()
934            .copied()
935            .unwrap_or(ActivationType::Tanh);
936
937        let input_layer = NeuralLayer::new(
938            architecture.input_size,
939            architecture.input_size,
940            input_activation,
941        );
942        let output_layer = NeuralLayer::new(prev_size, architecture.output_size, output_activation);
943
944        let recurrent_connections = if architecture.use_recurrent {
945            RecurrentConnections::new(prev_size)
946        } else {
947            RecurrentConnections::empty()
948        };
949
950        Self {
951            input_layer,
952            hidden_layers,
953            output_layer,
954            recurrent_connections,
955            architecture,
956        }
957    }
958
959    /// Forward pass through network
960    pub fn forward(&mut self, input: &ArrayView1<f64>) -> OptimizeResult<Array1<f64>> {
961        // Input layer
962        let mut current = self.input_layer.forward(input)?;
963
964        // Hidden layers
965        for layer in &mut self.hidden_layers {
966            current = layer.forward(&current.view())?;
967        }
968
969        // Apply recurrent connections if enabled
970        if self.architecture.use_recurrent {
971            current = self.recurrent_connections.apply(&current)?;
972        }
973
974        // Output layer
975        let output = self.output_layer.forward(&current.view())?;
976
977        Ok(output)
978    }
979
980    /// Backward pass (simplified)
981    pub fn backward(&mut self, gradient: &Array1<f64>, learning_rate: f64) -> OptimizeResult<()> {
982        // Simplified backpropagation
983        // In practice, this would implement proper gradient computation
984
985        // Update output layer
986        for i in 0..self.output_layer.weights.nrows() {
987            for j in 0..self.output_layer.weights.ncols() {
988                let grad = if i < gradient.len() { gradient[i] } else { 0.0 };
989                self.output_layer.weights[[i, j]] -= learning_rate * grad * 0.01;
990            }
991        }
992
993        // Update hidden layers (simplified)
994        for layer in &mut self.hidden_layers {
995            for i in 0..layer.weights.nrows() {
996                for j in 0..layer.weights.ncols() {
997                    layer.weights[[i, j]] -=
998                        learning_rate * scirs2_core::random::rng().random::<f64>() * 0.001;
999                }
1000            }
1001        }
1002
1003        Ok(())
1004    }
1005}
1006
1007impl NeuralLayer {
1008    /// Create new neural layer
1009    pub fn new(input_size: usize, output_size: usize, activation: ActivationType) -> Self {
1010        let xavier_scale = (2.0 / (input_size + output_size) as f64).sqrt();
1011
1012        Self {
1013            weights: Array2::from_shape_fn((output_size, input_size), |_| {
1014                (scirs2_core::random::rng().random::<f64>() - 0.5) * 2.0 * xavier_scale
1015            }),
1016            biases: Array1::zeros(output_size),
1017            size: output_size,
1018            dropout_rate: 0.1,
1019            layer_norm: Some(LayerNormalization::new(output_size)),
1020            activation: ActivationType::ReLU,
1021        }
1022    }
1023
1024    /// Forward pass through layer
1025    pub fn forward(&mut self, input: &ArrayView1<f64>) -> OptimizeResult<Array1<f64>> {
1026        let mut output = Array1::zeros(self.size);
1027
1028        // Linear transformation
1029        for i in 0..self.size {
1030            for j in 0..input.len().min(self.weights.ncols()) {
1031                output[i] += self.weights[[i, j]] * input[j];
1032            }
1033            output[i] += self.biases[i];
1034        }
1035
1036        // Layer normalization
1037        if let Some(ref mut layer_norm) = self.layer_norm {
1038            output = layer_norm.normalize(&output)?;
1039        }
1040
1041        // Activation
1042        output.mapv_inplace(|x| self.activation.apply(x));
1043
1044        // Dropout (simplified - just scaling)
1045        if self.dropout_rate > 0.0 {
1046            output *= 1.0 - self.dropout_rate;
1047        }
1048
1049        Ok(output)
1050    }
1051}
1052
1053impl LayerNormalization {
1054    /// Create new layer normalization
1055    pub fn new(size: usize) -> Self {
1056        Self {
1057            gamma: Array1::ones(size),
1058            beta: Array1::zeros(size),
1059            running_mean: Array1::zeros(size),
1060            running_var: Array1::ones(size),
1061            momentum: 0.9,
1062            epsilon: 1e-6,
1063        }
1064    }
1065
1066    /// Normalize input
1067    pub fn normalize(&mut self, input: &Array1<f64>) -> OptimizeResult<Array1<f64>> {
1068        let mean = input.mean_or(0.0);
1069        let var = input.variance();
1070        let std = (var + self.epsilon).sqrt();
1071
1072        // Update running statistics
1073        self.running_mean = &self.running_mean * self.momentum
1074            + &(Array1::from_elem(input.len(), mean) * (1.0 - self.momentum));
1075        self.running_var = &self.running_var * self.momentum
1076            + &(Array1::from_elem(input.len(), var) * (1.0 - self.momentum));
1077
1078        // Normalize
1079        let mut normalized = Array1::zeros(input.len());
1080        for i in 0..input.len().min(self.gamma.len()) {
1081            normalized[i] = self.gamma[i] * (input[i] - mean) / std + self.beta[i];
1082        }
1083
1084        Ok(normalized)
1085    }
1086}
1087
1088impl RecurrentConnections {
1089    /// Create new recurrent connections
1090    pub fn new(size: usize) -> Self {
1091        Self {
1092            hidden_state: Array1::zeros(size),
1093            cell_state: Array1::zeros(size),
1094            recurrent_weights: Array2::from_shape_fn((size, size), |_| {
1095                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1096            }),
1097            input_gate_weights: Array2::from_shape_fn((size, size), |_| {
1098                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1099            }),
1100            forget_gate_weights: Array2::from_shape_fn((size, size), |_| {
1101                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1102            }),
1103            output_gate_weights: Array2::from_shape_fn((size, size), |_| {
1104                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1105            }),
1106        }
1107    }
1108
1109    /// Create empty recurrent connections
1110    pub fn empty() -> Self {
1111        Self {
1112            hidden_state: Array1::zeros(0),
1113            cell_state: Array1::zeros(0),
1114            recurrent_weights: Array2::zeros((0, 0)),
1115            input_gate_weights: Array2::zeros((0, 0)),
1116            forget_gate_weights: Array2::zeros((0, 0)),
1117            output_gate_weights: Array2::zeros((0, 0)),
1118        }
1119    }
1120
1121    /// Apply recurrent connections (LSTM-like)
1122    pub fn apply(&mut self, input: &Array1<f64>) -> OptimizeResult<Array1<f64>> {
1123        if self.hidden_state.is_empty() {
1124            return Ok(input.clone());
1125        }
1126
1127        let size = self.hidden_state.len().min(input.len());
1128        let mut output = Array1::zeros(size);
1129
1130        // Simplified LSTM computation
1131        for i in 0..size {
1132            // Input gate
1133            let mut input_gate = 0.0;
1134            for j in 0..size {
1135                input_gate += self.input_gate_weights[[i, j]] * input[j];
1136            }
1137            input_gate = (input_gate).tanh();
1138
1139            // Forget gate
1140            let mut forget_gate = 0.0;
1141            for j in 0..size {
1142                forget_gate += self.forget_gate_weights[[i, j]] * self.hidden_state[j];
1143            }
1144            forget_gate = (forget_gate).tanh();
1145
1146            // Update cell state
1147            self.cell_state[i] = forget_gate * self.cell_state[i] + input_gate * input[i];
1148
1149            // Output gate
1150            let mut output_gate = 0.0;
1151            for j in 0..size {
1152                output_gate += self.output_gate_weights[[i, j]] * input[j];
1153            }
1154            output_gate = (output_gate).tanh();
1155
1156            // Update hidden state and output
1157            self.hidden_state[i] = output_gate * self.cell_state[i].tanh();
1158            output[i] = self.hidden_state[i];
1159        }
1160
1161        Ok(output)
1162    }
1163}
1164
1165impl AdaptationController {
1166    /// Create new adaptation controller
1167    pub fn new(hidden_size: usize) -> Self {
1168        Self {
1169            strategy_selector: StrategySelector::new(hidden_size),
1170            adaptation_rate_controller: AdaptationRateController::new(),
1171            progress_monitor: ProgressMonitor::new(),
1172            strategy_history: BoundedHistory::new(100),
1173        }
1174    }
1175
1176    /// Select optimization strategy
1177    pub fn select_strategy(
1178        &mut self,
1179        network_output: &Array1<f64>,
1180        performance_prediction: &f64,
1181    ) -> OptimizeResult<OptimizationStrategy> {
1182        let strategy = self
1183            .strategy_selector
1184            .select(network_output, *performance_prediction)?;
1185        self.strategy_history.push(strategy.clone());
1186
1187        Ok(strategy)
1188    }
1189
1190    /// Monitor progress and adapt
1191    pub fn monitor_and_adapt(&mut self, performance_prediction: &f64) -> OptimizeResult<()> {
1192        self.progress_monitor.update(*performance_prediction)?;
1193
1194        match self.progress_monitor.current_state {
1195            ProgressState::Stagnating | ProgressState::Deteriorating => {
1196                self.adaptation_rate_controller.increase_rate()?;
1197            }
1198            ProgressState::Improving => {
1199                self.adaptation_rate_controller.maintain_rate()?;
1200            }
1201            _ => {}
1202        }
1203
1204        Ok(())
1205    }
1206
1207    /// Get adaptation signal
1208    pub fn get_adaptation_signal(&self) -> f64 {
1209        self.adaptation_rate_controller.current_rate
1210    }
1211
1212    /// Reinforce current strategy
1213    pub fn reinforce_current_strategy(&mut self, strength: f64) -> OptimizeResult<()> {
1214        self.strategy_selector.reinforce_current(strength)
1215    }
1216
1217    /// Encourage exploration
1218    pub fn encourage_exploration(&mut self, strength: f64) -> OptimizeResult<()> {
1219        self.strategy_selector.encourage_exploration(strength)
1220    }
1221}
1222
1223impl StrategySelector {
1224    /// Create new strategy selector
1225    pub fn new(hidden_size: usize) -> Self {
1226        let num_strategies = 5;
1227
1228        Self {
1229            selection_network: Array2::from_shape_fn((num_strategies, hidden_size), |_| {
1230                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1231            }),
1232            strategy_embeddings: Array2::from_shape_fn((num_strategies, hidden_size), |_| {
1233                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1234            }),
1235            strategy_weights: Array1::from_elem(num_strategies, 1.0 / num_strategies as f64),
1236            available_strategies: vec![
1237                OptimizationStrategy::gradient_descent(),
1238                OptimizationStrategy::momentum(),
1239                OptimizationStrategy::adaptive(),
1240                OptimizationStrategy::quasi_newton(),
1241                OptimizationStrategy::trust_region(),
1242            ],
1243        }
1244    }
1245
1246    /// Select strategy based on network output
1247    pub fn select(
1248        &self,
1249        network_output: &Array1<f64>,
1250        performance_prediction: f64,
1251    ) -> OptimizeResult<OptimizationStrategy> {
1252        let mut strategy_scores = Array1::zeros(self.available_strategies.len());
1253
1254        // Compute strategy scores
1255        for i in 0..strategy_scores.len() {
1256            for j in 0..network_output.len().min(self.selection_network.ncols()) {
1257                strategy_scores[i] += self.selection_network[[i, j]] * network_output[j];
1258            }
1259
1260            // Add performance prediction influence
1261            strategy_scores[i] += performance_prediction * 0.1;
1262
1263            // Add current weight
1264            strategy_scores[i] += self.strategy_weights[i];
1265        }
1266
1267        // Apply softmax to get probabilities
1268        let max_score = strategy_scores.fold(-f64::INFINITY, |a, &b| a.max(b));
1269        strategy_scores.mapv_inplace(|x| (x - max_score).exp());
1270        let sum_scores = strategy_scores.sum();
1271        if sum_scores > 0.0 {
1272            strategy_scores /= sum_scores;
1273        }
1274
1275        // Select strategy (argmax for deterministic, or sample for stochastic)
1276        let selected_idx = strategy_scores
1277            .iter()
1278            .enumerate()
1279            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
1280            .map(|(i, _)| i)
1281            .unwrap_or(0);
1282
1283        Ok(self.available_strategies[selected_idx].clone())
1284    }
1285
1286    /// Reinforce current strategy
1287    pub fn reinforce_current(&mut self, strength: f64) -> OptimizeResult<()> {
1288        // Increase weight of current best strategy
1289        if let Some((best_idx, _)) = self
1290            .strategy_weights
1291            .iter()
1292            .enumerate()
1293            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
1294        {
1295            self.strategy_weights[best_idx] += strength;
1296        }
1297
1298        // Renormalize
1299        let sum = self.strategy_weights.sum();
1300        if sum > 0.0 {
1301            self.strategy_weights /= sum;
1302        }
1303
1304        Ok(())
1305    }
1306
1307    /// Encourage exploration
1308    pub fn encourage_exploration(&mut self, strength: f64) -> OptimizeResult<()> {
1309        // Add uniform noise to encourage exploration
1310        for weight in &mut self.strategy_weights {
1311            *weight += strength * scirs2_core::random::rng().random::<f64>();
1312        }
1313
1314        // Renormalize
1315        let sum = self.strategy_weights.sum();
1316        if sum > 0.0 {
1317            self.strategy_weights /= sum;
1318        }
1319
1320        Ok(())
1321    }
1322}
1323
1324impl OptimizationStrategy {
1325    /// Create gradient descent strategy
1326    pub fn gradient_descent() -> Self {
1327        Self {
1328            id: "gradient_descent".to_string(),
1329            parameters: Array1::from(vec![0.01, 0.0, 0.0]), // [learning_rate, momentum, adaptivity]
1330            expected_performance: 0.7,
1331            computational_cost: 0.3,
1332            robustness: 0.8,
1333        }
1334    }
1335
1336    /// Create momentum strategy
1337    pub fn momentum() -> Self {
1338        Self {
1339            id: "momentum".to_string(),
1340            parameters: Array1::from(vec![0.01, 0.9, 0.0]),
1341            expected_performance: 0.8,
1342            computational_cost: 0.4,
1343            robustness: 0.7,
1344        }
1345    }
1346
1347    /// Create adaptive strategy
1348    pub fn adaptive() -> Self {
1349        Self {
1350            id: "adaptive".to_string(),
1351            parameters: Array1::from(vec![0.001, 0.0, 0.9]),
1352            expected_performance: 0.85,
1353            computational_cost: 0.6,
1354            robustness: 0.9,
1355        }
1356    }
1357
1358    /// Create quasi-Newton strategy
1359    pub fn quasi_newton() -> Self {
1360        Self {
1361            id: "quasi_newton".to_string(),
1362            parameters: Array1::from(vec![0.1, 0.0, 0.5]),
1363            expected_performance: 0.9,
1364            computational_cost: 0.8,
1365            robustness: 0.6,
1366        }
1367    }
1368
1369    /// Create trust region strategy
1370    pub fn trust_region() -> Self {
1371        Self {
1372            id: "trust_region".to_string(),
1373            parameters: Array1::from(vec![0.1, 0.0, 0.7]),
1374            expected_performance: 0.95,
1375            computational_cost: 0.9,
1376            robustness: 0.95,
1377        }
1378    }
1379}
1380
1381impl Default for AdaptationRateController {
1382    fn default() -> Self {
1383        Self::new()
1384    }
1385}
1386
1387impl AdaptationRateController {
1388    /// Create new adaptation rate controller
1389    pub fn new() -> Self {
1390        Self {
1391            controller_network: Array2::from_shape_fn((1, 10), |_| {
1392                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1393            }),
1394            current_rate: 0.1,
1395            rate_history: BoundedHistory::new(100),
1396            performance_correlation: 0.0,
1397        }
1398    }
1399
1400    /// Increase adaptation rate
1401    pub fn increase_rate(&mut self) -> OptimizeResult<()> {
1402        self.current_rate = (self.current_rate * 1.2).min(1.0);
1403        self.rate_history.push(self.current_rate);
1404
1405        Ok(())
1406    }
1407
1408    /// Maintain current rate
1409    pub fn maintain_rate(&mut self) -> OptimizeResult<()> {
1410        self.rate_history.push(self.current_rate);
1411
1412        Ok(())
1413    }
1414}
1415
1416impl Default for ProgressMonitor {
1417    fn default() -> Self {
1418        Self::new()
1419    }
1420}
1421
1422impl ProgressMonitor {
1423    /// Create new progress monitor
1424    pub fn new() -> Self {
1425        Self {
1426            progress_indicators: vec![
1427                ProgressIndicator::new("objective_improvement".to_string()),
1428                ProgressIndicator::new("gradient_norm".to_string()),
1429                ProgressIndicator::new("step_size".to_string()),
1430            ],
1431            monitoring_network: Array2::from_shape_fn((4, 10), |_| {
1432                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1433            }),
1434            alert_thresholds: HashMap::new(),
1435            current_state: ProgressState::Improving,
1436        }
1437    }
1438
1439    /// Update progress monitoring
1440    pub fn update(&mut self, performance_value: f64) -> OptimizeResult<()> {
1441        // Update progress indicators
1442        for indicator in &mut self.progress_indicators {
1443            indicator.update(performance_value)?;
1444        }
1445
1446        // Determine current state
1447        self.current_state = self.determine_progress_state()?;
1448
1449        Ok(())
1450    }
1451
1452    /// Determine progress state
1453    fn determine_progress_state(&self) -> OptimizeResult<ProgressState> {
1454        let mut improvement_count = 0;
1455        let mut stagnation_count = 0;
1456
1457        for indicator in &self.progress_indicators {
1458            if indicator.trend > 0.1 {
1459                improvement_count += 1;
1460            } else if indicator.trend.abs() < 0.01 {
1461                stagnation_count += 1;
1462            }
1463        }
1464
1465        if improvement_count >= 2 {
1466            Ok(ProgressState::Improving)
1467        } else if stagnation_count >= 2 {
1468            Ok(ProgressState::Stagnating)
1469        } else {
1470            Ok(ProgressState::Deteriorating)
1471        }
1472    }
1473}
1474
1475impl ProgressIndicator {
1476    /// Create new progress indicator
1477    pub fn new(name: String) -> Self {
1478        Self {
1479            name,
1480            value: 0.0,
1481            history: BoundedHistory::new(50),
1482            trend: 0.0,
1483            importance: 1.0,
1484        }
1485    }
1486
1487    /// Update indicator
1488    pub fn update(&mut self, new_value: f64) -> OptimizeResult<()> {
1489        self.value = new_value;
1490        self.history.push(new_value);
1491
1492        // Compute trend using bounded history
1493        if self.history.len() > 2 {
1494            // Access the underlying data to compute trend
1495            let first = self.history.data.front().copied().unwrap_or(new_value);
1496            let last = self.history.data.back().copied().unwrap_or(new_value);
1497            self.trend = (last - first) / self.history.len() as f64;
1498        }
1499
1500        Ok(())
1501    }
1502}
1503
1504impl PerformancePredictor {
1505    /// Create new performance predictor
1506    pub fn new(hidden_size: usize) -> Self {
1507        Self {
1508            prediction_network: Array2::from_shape_fn((1, hidden_size), |_| {
1509                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1510            }),
1511            feature_extractor: FeatureExtractor::new(hidden_size),
1512            prediction_horizon: 5,
1513            prediction_accuracy: 0.5,
1514            confidence_estimator: ConfidenceEstimator::new(hidden_size),
1515        }
1516    }
1517
1518    /// Predict performance
1519    pub fn predict(&self, state_features: &Array1<f64>) -> OptimizeResult<f64> {
1520        // Extract features for prediction
1521        let prediction_features = self.feature_extractor.extract(state_features)?;
1522
1523        // Forward pass through prediction network
1524        let mut prediction = 0.0;
1525        for j in 0..prediction_features
1526            .len()
1527            .min(self.prediction_network.ncols())
1528        {
1529            prediction += self.prediction_network[[0, j]] * prediction_features[j];
1530        }
1531
1532        Ok(prediction.tanh()) // Normalize to [-1, 1]
1533    }
1534}
1535
1536impl FeatureExtractor {
1537    /// Create new feature extractor
1538    pub fn new(feature_dim: usize) -> Self {
1539        Self {
1540            extraction_layers: vec![Array2::from_shape_fn((feature_dim, feature_dim), |_| {
1541                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1542            })],
1543            feature_dim,
1544            temporal_features: TemporalFeatures::new(feature_dim),
1545        }
1546    }
1547
1548    /// Extract features for prediction
1549    pub fn extract(&self, input: &Array1<f64>) -> OptimizeResult<Array1<f64>> {
1550        let mut features = input.clone();
1551
1552        // Apply extraction layers
1553        for layer in &self.extraction_layers {
1554            let output_dim = layer.nrows().min(features.len());
1555            let input_dim = layer.ncols().min(features.len());
1556            let mut new_features = Array1::zeros(output_dim);
1557
1558            for i in 0..output_dim {
1559                for j in 0..input_dim {
1560                    new_features[i] += layer[[i, j]] * features[j];
1561                }
1562            }
1563            features = new_features;
1564        }
1565
1566        Ok(features)
1567    }
1568}
1569
1570impl TemporalFeatures {
1571    /// Create new temporal features
1572    pub fn new(dim: usize) -> Self {
1573        Self {
1574            time_embeddings: Array2::from_shape_fn((dim, 100), |_| {
1575                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1576            }),
1577            trend_analyzer: TrendAnalyzer::new(),
1578            seasonality_detector: SeasonalityDetector::new(dim),
1579        }
1580    }
1581}
1582
1583impl Default for TrendAnalyzer {
1584    fn default() -> Self {
1585        Self::new()
1586    }
1587}
1588
1589impl TrendAnalyzer {
1590    /// Create new trend analyzer
1591    pub fn new() -> Self {
1592        Self {
1593            trend_coefficients: Array1::from(vec![1.0, 0.5, 0.1]),
1594            window_size: 10,
1595            trend_strength: 0.0,
1596        }
1597    }
1598}
1599
1600impl SeasonalityDetector {
1601    /// Create new seasonality detector
1602    pub fn new(dim: usize) -> Self {
1603        Self {
1604            seasonal_patterns: Array2::zeros((dim, 12)),
1605            pattern_strength: Array1::zeros(12),
1606            detection_threshold: 0.1,
1607        }
1608    }
1609}
1610
1611impl ConfidenceEstimator {
1612    /// Create new confidence estimator
1613    pub fn new(hidden_size: usize) -> Self {
1614        Self {
1615            confidence_network: Array2::from_shape_fn((1, hidden_size), |_| {
1616                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1617            }),
1618            uncertainty_quantifier: UncertaintyQuantifier::new(),
1619            calibration_params: Array1::from(vec![1.0, 0.0, 0.1]),
1620        }
1621    }
1622
1623    /// Estimate confidence in prediction
1624    pub fn estimate_confidence(&self, features: &Array1<f64>) -> OptimizeResult<f64> {
1625        let mut confidence = 0.0;
1626        for j in 0..features.len().min(self.confidence_network.ncols()) {
1627            confidence += self.confidence_network[[0, j]] * features[j];
1628        }
1629
1630        // Apply sigmoid to get [0, 1] range
1631        Ok(1.0 / (1.0 + (-confidence).exp()))
1632    }
1633}
1634
1635impl Default for UncertaintyQuantifier {
1636    fn default() -> Self {
1637        Self::new()
1638    }
1639}
1640
1641impl UncertaintyQuantifier {
1642    /// Create new uncertainty quantifier
1643    pub fn new() -> Self {
1644        Self {
1645            epistemic_uncertainty: 0.1,
1646            aleatoric_uncertainty: 0.1,
1647            method: UncertaintyMethod::Dropout,
1648        }
1649    }
1650}
1651
1652impl Default for AdaptiveOptimizationStats {
1653    fn default() -> Self {
1654        Self {
1655            strategy_switches: 0,
1656            avg_adaptation_rate: 0.1,
1657            prediction_accuracy: 0.5,
1658            computational_efficiency: 0.5,
1659            robustness_score: 0.5,
1660        }
1661    }
1662}
1663
1664impl LearnedOptimizer for NeuralAdaptiveOptimizer {
1665    fn meta_train(&mut self, training_tasks: &[TrainingTask]) -> OptimizeResult<()> {
1666        // Convert training tasks to trajectories
1667        let mut trajectories = Vec::new();
1668
1669        for task in training_tasks {
1670            let trajectory = self.create_trajectory_from_task(task)?;
1671            trajectories.push(trajectory);
1672        }
1673
1674        // Train networks
1675        self.train_networks(&trajectories)?;
1676
1677        Ok(())
1678    }
1679
1680    fn adapt_to_problem(
1681        &mut self,
1682        _problem: &OptimizationProblem,
1683        _params: &ArrayView1<f64>,
1684    ) -> OptimizeResult<()> {
1685        // Adaptation happens dynamically during optimization
1686        Ok(())
1687    }
1688
1689    fn optimize<F>(
1690        &mut self,
1691        objective: F,
1692        initial_params: &ArrayView1<f64>,
1693    ) -> OptimizeResult<OptimizeResults<f64>>
1694    where
1695        F: Fn(&ArrayView1<f64>) -> f64,
1696    {
1697        let mut current_params = initial_params.to_owned();
1698        let mut best_value = objective(initial_params);
1699        let mut iterations = 0;
1700
1701        for step_number in 0..1000 {
1702            iterations = step_number;
1703
1704            // Get adaptive optimization step
1705            let adaptive_step =
1706                self.adaptive_optimization_step(&objective, &current_params.view(), step_number)?;
1707
1708            // Apply the selected strategy
1709            let direction = self.compute_direction_for_strategy(
1710                &objective,
1711                &current_params,
1712                &adaptive_step.strategy,
1713            )?;
1714            let step_size = self.compute_step_size_for_strategy(&adaptive_step.strategy);
1715
1716            // Update parameters
1717            for i in 0..current_params.len().min(direction.len()) {
1718                current_params[i] -= step_size * direction[i];
1719            }
1720
1721            let current_value = objective(&current_params.view());
1722
1723            if current_value < best_value {
1724                best_value = current_value;
1725            }
1726
1727            // Record performance for adaptation
1728            self.meta_state.performance_history.push(current_value);
1729
1730            // Check convergence
1731            if adaptive_step.confidence > 0.95 && step_size < 1e-8 {
1732                break;
1733            }
1734        }
1735
1736        Ok(OptimizeResults::<f64> {
1737            x: current_params,
1738            fun: best_value,
1739            success: true,
1740            nit: iterations,
1741            message: "Neural adaptive optimization completed".to_string(),
1742            jac: None,
1743            hess: None,
1744            constr: None,
1745            nfev: iterations * 5, // Neural network evaluations
1746            njev: 0,
1747            nhev: 0,
1748            maxcv: 0,
1749            status: 0,
1750        })
1751    }
1752
1753    fn get_state(&self) -> &MetaOptimizerState {
1754        &self.meta_state
1755    }
1756
1757    fn reset(&mut self) {
1758        self.adaptive_stats = AdaptiveOptimizationStats::default();
1759        self.meta_state.performance_history.clear();
1760        self.adaptation_controller.strategy_history.clear();
1761        // Clear computation cache buffers
1762        self.computation_cache.gradient_buffer.fill(0.0);
1763        self.computation_cache.feature_buffer.fill(0.0);
1764        self.computation_cache.param_buffer.fill(0.0);
1765        self.computation_cache.network_output_buffer.fill(0.0);
1766        self.computation_cache.temp_buffer.fill(0.0);
1767    }
1768}
1769
1770impl NeuralAdaptiveOptimizer {
1771    fn create_trajectory_from_task(
1772        &self,
1773        task: &TrainingTask,
1774    ) -> OptimizeResult<OptimizationTrajectory> {
1775        // Simplified trajectory creation
1776        let num_steps = 10;
1777        let mut states = Vec::new();
1778        let mut actions = Vec::new();
1779        let mut performance_values = Vec::new();
1780        let mut rewards = Vec::new();
1781
1782        for i in 0..num_steps {
1783            states.push(Array1::from_shape_fn(
1784                self.optimization_network.architecture.input_size,
1785                |_| scirs2_core::random::rng().random::<f64>(),
1786            ));
1787
1788            actions.push(Array1::from_shape_fn(
1789                self.optimization_network.architecture.output_size,
1790                |_| scirs2_core::random::rng().random::<f64>(),
1791            ));
1792
1793            performance_values.push(1.0 - i as f64 / num_steps as f64);
1794            rewards.push(if i > 0 {
1795                performance_values[i - 1] - performance_values[i]
1796            } else {
1797                0.0
1798            });
1799        }
1800
1801        Ok(OptimizationTrajectory {
1802            states,
1803            actions,
1804            performance_values,
1805            rewards,
1806        })
1807    }
1808
1809    fn compute_direction_for_strategy<F>(
1810        &mut self,
1811        objective: &F,
1812        params: &Array1<f64>,
1813        strategy: &OptimizationStrategy,
1814    ) -> OptimizeResult<Array1<f64>>
1815    where
1816        F: Fn(&ArrayView1<f64>) -> f64,
1817    {
1818        // Compute finite difference gradient using cached buffers
1819        let h = 1e-6;
1820        let f0 = objective(&params.view());
1821        let (gradient_buffer, param_buffer) = self
1822            .computation_cache
1823            .get_gradient_and_param_buffers(params.len(), params.len());
1824
1825        // Copy parameters to buffer
1826        for (i, &val) in params.iter().enumerate() {
1827            if i < param_buffer.len() {
1828                param_buffer[i] = val;
1829            }
1830        }
1831
1832        for i in 0..params.len().min(gradient_buffer.len()) {
1833            let original_val = param_buffer[i];
1834            param_buffer[i] = original_val + h;
1835            let f_plus = objective(&param_buffer.view());
1836            param_buffer[i] = original_val; // Restore
1837            gradient_buffer[i] = (f_plus - f0) / h;
1838        }
1839
1840        // Create result gradient from buffer
1841        let mut gradient = Array1::zeros(params.len());
1842        for i in 0..params.len().min(gradient_buffer.len()) {
1843            gradient[i] = gradient_buffer[i];
1844        }
1845
1846        // Apply strategy-specific transformations
1847        match strategy.id.as_str() {
1848            "momentum" => {
1849                // Apply momentum (simplified)
1850                gradient *= strategy.parameters[1]; // momentum factor
1851            }
1852            "adaptive" => {
1853                // Apply adaptive scaling
1854                let adaptivity = strategy.parameters[2];
1855                gradient.mapv_inplace(|g| g / (1.0 + adaptivity * g.abs()));
1856            }
1857            _ => {
1858                // Default gradient descent
1859            }
1860        }
1861
1862        Ok(gradient)
1863    }
1864
1865    fn compute_step_size_for_strategy(&self, strategy: &OptimizationStrategy) -> f64 {
1866        strategy.parameters[0] // Use first parameter as learning rate
1867    }
1868}
1869
1870/// Convenience function for neural adaptive optimization
1871#[allow(dead_code)]
1872pub fn neural_adaptive_optimize<F>(
1873    objective: F,
1874    initial_params: &ArrayView1<f64>,
1875    config: Option<LearnedOptimizationConfig>,
1876) -> OptimizeResult<OptimizeResults<f64>>
1877where
1878    F: Fn(&ArrayView1<f64>) -> f64,
1879{
1880    let config = config.unwrap_or_default();
1881    let mut optimizer = NeuralAdaptiveOptimizer::new(config);
1882    optimizer.optimize(objective, initial_params)
1883}
1884
1885#[cfg(test)]
1886mod tests {
1887    use super::*;
1888
1889    #[test]
1890    fn test_neural_adaptive_optimizer_creation() {
1891        let config = LearnedOptimizationConfig::default();
1892        let optimizer = NeuralAdaptiveOptimizer::new(config);
1893
1894        assert_eq!(optimizer.adaptive_stats.strategy_switches, 0);
1895    }
1896
1897    #[test]
1898    fn test_optimization_network() {
1899        let architecture = NetworkArchitecture {
1900            input_size: 10,
1901            hidden_sizes: vec![16, 8],
1902            output_size: 4,
1903            activations: vec![
1904                ActivationType::ReLU,
1905                ActivationType::ReLU,
1906                ActivationType::Tanh,
1907            ],
1908            use_recurrent: false,
1909            use_attention: false,
1910        };
1911
1912        let mut network = OptimizationNetwork::new(architecture);
1913        let input = Array1::from(vec![1.0; 10]);
1914
1915        let output = network.forward(&input.view()).expect("Operation failed");
1916
1917        assert_eq!(output.len(), 4);
1918        assert!(output.iter().all(|&x| x.is_finite()));
1919    }
1920
1921    #[test]
1922    fn test_neural_layer() {
1923        let mut layer = NeuralLayer::new(5, 3, ActivationType::ReLU);
1924        let input = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1925
1926        let output = layer.forward(&input.view()).expect("Operation failed");
1927
1928        assert_eq!(output.len(), 3);
1929        assert!(output.iter().all(|&x| x.is_finite()));
1930    }
1931
1932    #[test]
1933    fn test_strategy_selector() {
1934        let selector = StrategySelector::new(16);
1935        let network_output = Array1::from(vec![0.5; 16]);
1936
1937        let strategy = selector
1938            .select(&network_output, 0.8)
1939            .expect("Operation failed");
1940
1941        assert!(!strategy.id.is_empty());
1942        assert!(strategy.expected_performance >= 0.0);
1943    }
1944
1945    #[test]
1946    fn test_performance_predictor() {
1947        let predictor = PerformancePredictor::new(32);
1948        let features = Array1::from(vec![0.1; 32]);
1949
1950        let prediction = predictor.predict(&features).expect("Operation failed");
1951
1952        assert!(prediction >= -1.0 && prediction <= 1.0);
1953    }
1954
1955    #[test]
1956    fn test_neural_adaptive_optimization() {
1957        let objective = |x: &ArrayView1<f64>| x[0].powi(2) + x[1].powi(2);
1958        let initial = Array1::from(vec![2.0, 2.0]);
1959
1960        let config = LearnedOptimizationConfig {
1961            hidden_size: 32,
1962            max_parameters: 50,
1963            ..Default::default()
1964        };
1965
1966        let result = neural_adaptive_optimize(objective, &initial.view(), Some(config))
1967            .expect("Operation failed");
1968
1969        assert!(result.fun >= 0.0);
1970        assert_eq!(result.x.len(), 2);
1971        assert!(result.success);
1972    }
1973}
1974
1975#[allow(dead_code)]
1976pub fn placeholder() {
1977    // Placeholder function to prevent unused module warnings
1978}
scirs2_optimize/learned_optimizers/neural_adaptive_optimizer.rs

scirs2_optimize/learned_optimizers/
neural_adaptive_optimizer.rs