scirs2_optimize/learned_optimizers/
neural_adaptive_optimizer.rs

1//! Neural Adaptive Optimizer
2//!
3//! Implementation of neural networks that learn adaptive optimization strategies
4//! and can dynamically adjust their behavior based on optimization progress.
5
6use super::{
7    ActivationType, LearnedOptimizationConfig, LearnedOptimizer, MetaOptimizerState,
8    OptimizationProblem, TrainingTask,
9};
10use crate::error::OptimizeResult;
11use crate::result::OptimizeResults;
12use scirs2_core::ndarray::{Array1, Array2, ArrayView1};
13use scirs2_core::random::Rng;
14use statrs::statistics::Statistics;
15use std::collections::{HashMap, VecDeque};
16
17/// Neural Adaptive Optimizer with dynamic strategy learning
18#[derive(Debug, Clone)]
19pub struct NeuralAdaptiveOptimizer {
20    /// Configuration
21    config: LearnedOptimizationConfig,
22    /// Primary optimization network
23    optimization_network: OptimizationNetwork,
24    /// Adaptation controller
25    adaptation_controller: AdaptationController,
26    /// Performance predictor
27    performance_predictor: PerformancePredictor,
28    /// Meta-optimizer state
29    meta_state: MetaOptimizerState,
30    /// Adaptive statistics
31    adaptive_stats: AdaptiveOptimizationStats,
32    /// Memory-efficient computation cache
33    computation_cache: ComputationCache,
34}
35
36/// Memory-efficient computation cache for reusing allocations
37#[derive(Debug, Clone)]
38pub struct ComputationCache {
39    /// Reusable gradient buffer
40    gradient_buffer: Array1<f64>,
41    /// Reusable feature buffer
42    feature_buffer: Array1<f64>,
43    /// Reusable parameter buffer
44    param_buffer: Array1<f64>,
45    /// Network output buffer
46    network_output_buffer: Array1<f64>,
47    /// Temporary computation buffer
48    temp_buffer: Array1<f64>,
49    /// Maximum buffer size to prevent unbounded growth
50    max_buffer_size: usize,
51}
52
53/// Memory-efficient bounded history collection
54#[derive(Debug, Clone)]
55pub struct BoundedHistory<T> {
56    /// Internal storage
57    pub(crate) data: VecDeque<T>,
58    /// Maximum capacity
59    max_capacity: usize,
60}
61
62impl<T> BoundedHistory<T> {
63    /// Create new bounded history with specified capacity
64    pub fn new(capacity: usize) -> Self {
65        Self {
66            data: VecDeque::with_capacity(capacity),
67            max_capacity: capacity,
68        }
69    }
70
71    /// Add item, removing oldest if at capacity
72    pub fn push(&mut self, item: T) {
73        if self.data.len() >= self.max_capacity {
74            self.data.pop_front();
75        }
76        self.data.push_back(item);
77    }
78
79    /// Get the most recent item
80    pub fn back(&self) -> Option<&T> {
81        self.data.back()
82    }
83
84    /// Clear all items
85    pub fn clear(&mut self) {
86        self.data.clear();
87    }
88
89    /// Get length
90    pub fn len(&self) -> usize {
91        self.data.len()
92    }
93
94    /// Check if empty
95    pub fn is_empty(&self) -> bool {
96        self.data.is_empty()
97    }
98}
99
100impl ComputationCache {
101    /// Create new computation cache
102    pub fn new(max_size: usize) -> Self {
103        Self {
104            gradient_buffer: Array1::zeros(max_size),
105            feature_buffer: Array1::zeros(max_size),
106            param_buffer: Array1::zeros(max_size),
107            network_output_buffer: Array1::zeros(max_size),
108            temp_buffer: Array1::zeros(max_size),
109            max_buffer_size: max_size,
110        }
111    }
112
113    /// Get reusable gradient buffer
114    pub fn get_gradient_buffer(&mut self, size: usize) -> &mut Array1<f64> {
115        if self.gradient_buffer.len() < size {
116            self.gradient_buffer = Array1::zeros(size);
117        }
118        &mut self.gradient_buffer
119    }
120
121    /// Get reusable feature buffer
122    pub fn get_feature_buffer(&mut self, size: usize) -> &mut Array1<f64> {
123        if self.feature_buffer.len() < size {
124            self.feature_buffer = Array1::zeros(size);
125        }
126        &mut self.feature_buffer
127    }
128
129    /// Get reusable parameter buffer
130    pub fn get_param_buffer(&mut self, size: usize) -> &mut Array1<f64> {
131        if self.param_buffer.len() < size {
132            self.param_buffer = Array1::zeros(size);
133        }
134        &mut self.param_buffer
135    }
136
137    /// Get network output buffer
138    pub fn get_network_output_buffer(&mut self, size: usize) -> &mut Array1<f64> {
139        if self.network_output_buffer.len() < size {
140            self.network_output_buffer = Array1::zeros(size);
141        }
142        &mut self.network_output_buffer
143    }
144
145    /// Get temporary buffer
146    pub fn get_temp_buffer(&mut self, size: usize) -> &mut Array1<f64> {
147        if self.temp_buffer.len() < size {
148            self.temp_buffer = Array1::zeros(size);
149        }
150        &mut self.temp_buffer
151    }
152
153    /// Get both gradient and param buffers simultaneously to avoid borrowing conflicts
154    pub fn get_gradient_and_param_buffers(
155        &mut self,
156        gradient_size: usize,
157        param_size: usize,
158    ) -> (&mut Array1<f64>, &mut Array1<f64>) {
159        if self.gradient_buffer.len() < gradient_size {
160            self.gradient_buffer = Array1::zeros(gradient_size);
161        }
162        if self.param_buffer.len() < param_size {
163            self.param_buffer = Array1::zeros(param_size);
164        }
165        (&mut self.gradient_buffer, &mut self.param_buffer)
166    }
167
168    /// Resize buffer if needed (up to max size)
169    fn resize_buffer(&mut self, buffer: &mut Array1<f64>, requested_size: usize) {
170        let size = requested_size.min(self.max_buffer_size);
171        if buffer.len() != size {
172            *buffer = Array1::zeros(size);
173        } else {
174            buffer.fill(0.0);
175        }
176    }
177}
178
179/// Neural network for optimization strategy
180#[derive(Debug, Clone)]
181pub struct OptimizationNetwork {
182    /// Input layer for problem state
183    input_layer: NeuralLayer,
184    /// Hidden layers for strategy computation
185    hidden_layers: Vec<NeuralLayer>,
186    /// Output layer for optimization actions
187    output_layer: NeuralLayer,
188    /// Recurrent connections for memory
189    recurrent_connections: RecurrentConnections,
190    /// Network architecture
191    architecture: NetworkArchitecture,
192}
193
194/// Neural layer
195#[derive(Debug, Clone)]
196pub struct NeuralLayer {
197    /// Weights
198    weights: Array2<f64>,
199    /// Biases
200    biases: Array1<f64>,
201    /// Activation function
202    activation: ActivationType,
203    /// Layer size
204    size: usize,
205    /// Dropout rate
206    dropout_rate: f64,
207    /// Layer normalization
208    layer_norm: Option<LayerNormalization>,
209}
210
211/// Layer normalization
212#[derive(Debug, Clone)]
213pub struct LayerNormalization {
214    /// Scale parameter
215    gamma: Array1<f64>,
216    /// Shift parameter
217    beta: Array1<f64>,
218    /// Running mean
219    running_mean: Array1<f64>,
220    /// Running variance
221    running_var: Array1<f64>,
222    /// Momentum for running stats
223    momentum: f64,
224    /// Epsilon for numerical stability
225    epsilon: f64,
226}
227
228/// Recurrent connections for memory
229#[derive(Debug, Clone)]
230pub struct RecurrentConnections {
231    /// Hidden state
232    hidden_state: Array1<f64>,
233    /// Cell state (for LSTM-like behavior)
234    cell_state: Array1<f64>,
235    /// Recurrent weights
236    recurrent_weights: Array2<f64>,
237    /// Input gate weights
238    input_gate_weights: Array2<f64>,
239    /// Forget gate weights
240    forget_gate_weights: Array2<f64>,
241    /// Output gate weights
242    output_gate_weights: Array2<f64>,
243}
244
245/// Network architecture specification
246#[derive(Debug, Clone)]
247pub struct NetworkArchitecture {
248    /// Input size
249    input_size: usize,
250    /// Hidden sizes
251    hidden_sizes: Vec<usize>,
252    /// Output size
253    output_size: usize,
254    /// Activation functions per layer
255    activations: Vec<ActivationType>,
256    /// Use recurrent connections
257    use_recurrent: bool,
258    /// Use attention mechanisms
259    use_attention: bool,
260}
261
262/// Adaptation controller for dynamic strategy adjustment
263#[derive(Debug, Clone)]
264pub struct AdaptationController {
265    /// Strategy selector network
266    strategy_selector: StrategySelector,
267    /// Adaptation rate controller
268    adaptation_rate_controller: AdaptationRateController,
269    /// Progress monitor
270    progress_monitor: ProgressMonitor,
271    /// Strategy history (bounded to prevent memory growth)
272    strategy_history: BoundedHistory<OptimizationStrategy>,
273}
274
275/// Strategy selector
276#[derive(Debug, Clone)]
277pub struct StrategySelector {
278    /// Selection network
279    selection_network: Array2<f64>,
280    /// Strategy embeddings
281    strategy_embeddings: Array2<f64>,
282    /// Current strategy weights
283    strategy_weights: Array1<f64>,
284    /// Available strategies
285    available_strategies: Vec<OptimizationStrategy>,
286}
287
288/// Optimization strategy
289#[derive(Debug, Clone)]
290pub struct OptimizationStrategy {
291    /// Strategy identifier
292    id: String,
293    /// Strategy parameters
294    parameters: Array1<f64>,
295    /// Expected performance
296    expected_performance: f64,
297    /// Computational cost
298    computational_cost: f64,
299    /// Robustness score
300    robustness: f64,
301}
302
303/// Adaptation rate controller
304#[derive(Debug, Clone)]
305pub struct AdaptationRateController {
306    /// Controller network
307    controller_network: Array2<f64>,
308    /// Current adaptation rate
309    current_rate: f64,
310    /// Rate history (bounded)
311    rate_history: BoundedHistory<f64>,
312    /// Performance correlation
313    performance_correlation: f64,
314}
315
316/// Progress monitor
317#[derive(Debug, Clone)]
318pub struct ProgressMonitor {
319    /// Progress indicators
320    progress_indicators: Vec<ProgressIndicator>,
321    /// Monitoring network
322    monitoring_network: Array2<f64>,
323    /// Alert thresholds
324    alert_thresholds: HashMap<String, f64>,
325    /// Current progress state
326    current_state: ProgressState,
327}
328
329/// Progress indicator
330#[derive(Debug, Clone)]
331pub struct ProgressIndicator {
332    /// Indicator name
333    name: String,
334    /// Current value
335    value: f64,
336    /// Historical values (bounded)
337    history: BoundedHistory<f64>,
338    /// Trend direction
339    trend: f64,
340    /// Importance weight
341    importance: f64,
342}
343
344/// Progress state
345#[derive(Debug, Clone)]
346pub enum ProgressState {
347    Improving,
348    Stagnating,
349    Deteriorating,
350    Converged,
351    Diverging,
352}
353
354/// Performance predictor
355#[derive(Debug, Clone)]
356pub struct PerformancePredictor {
357    /// Prediction network
358    prediction_network: Array2<f64>,
359    /// Feature extractor
360    feature_extractor: FeatureExtractor,
361    /// Prediction horizon
362    prediction_horizon: usize,
363    /// Prediction accuracy
364    prediction_accuracy: f64,
365    /// Confidence estimator
366    confidence_estimator: ConfidenceEstimator,
367}
368
369/// Feature extractor for performance prediction
370#[derive(Debug, Clone)]
371pub struct FeatureExtractor {
372    /// Extraction layers
373    extraction_layers: Vec<Array2<f64>>,
374    /// Feature dimension
375    feature_dim: usize,
376    /// Temporal features
377    temporal_features: TemporalFeatures,
378}
379
380/// Temporal features
381#[derive(Debug, Clone)]
382pub struct TemporalFeatures {
383    /// Time series embeddings
384    time_embeddings: Array2<f64>,
385    /// Trend analysis
386    trend_analyzer: TrendAnalyzer,
387    /// Seasonality detector
388    seasonality_detector: SeasonalityDetector,
389}
390
391/// Trend analyzer
392#[derive(Debug, Clone)]
393pub struct TrendAnalyzer {
394    /// Trend coefficients
395    trend_coefficients: Array1<f64>,
396    /// Window size for trend analysis
397    window_size: usize,
398    /// Trend strength
399    trend_strength: f64,
400}
401
402/// Seasonality detector
403#[derive(Debug, Clone)]
404pub struct SeasonalityDetector {
405    /// Seasonal patterns
406    seasonal_patterns: Array2<f64>,
407    /// Pattern strength
408    pattern_strength: Array1<f64>,
409    /// Detection threshold
410    detection_threshold: f64,
411}
412
413/// Confidence estimator
414#[derive(Debug, Clone)]
415pub struct ConfidenceEstimator {
416    /// Confidence network
417    confidence_network: Array2<f64>,
418    /// Uncertainty quantification
419    uncertainty_quantifier: UncertaintyQuantifier,
420    /// Calibration parameters
421    calibration_params: Array1<f64>,
422}
423
424/// Uncertainty quantification
425#[derive(Debug, Clone)]
426pub struct UncertaintyQuantifier {
427    /// Epistemic uncertainty
428    epistemic_uncertainty: f64,
429    /// Aleatoric uncertainty
430    aleatoric_uncertainty: f64,
431    /// Uncertainty estimation method
432    method: UncertaintyMethod,
433}
434
435/// Uncertainty estimation methods
436#[derive(Debug, Clone)]
437pub enum UncertaintyMethod {
438    Dropout,
439    Ensemble,
440    Bayesian,
441    Evidential,
442}
443
444/// Adaptive optimization statistics
445#[derive(Debug, Clone)]
446pub struct AdaptiveOptimizationStats {
447    /// Number of strategy switches
448    strategy_switches: usize,
449    /// Average adaptation rate
450    avg_adaptation_rate: f64,
451    /// Prediction accuracy
452    prediction_accuracy: f64,
453    /// Computational efficiency
454    computational_efficiency: f64,
455    /// Robustness score
456    robustness_score: f64,
457}
458
459impl NeuralAdaptiveOptimizer {
460    /// Create new neural adaptive optimizer
461    pub fn new(config: LearnedOptimizationConfig) -> Self {
462        let architecture = NetworkArchitecture {
463            input_size: config.max_parameters.min(100),
464            hidden_sizes: vec![config.hidden_size, config.hidden_size / 2],
465            output_size: 32, // Number of optimization actions
466            activations: vec![
467                ActivationType::GELU,
468                ActivationType::GELU,
469                ActivationType::Tanh,
470            ],
471            use_recurrent: true,
472            use_attention: config.use_transformer,
473        };
474
475        let optimization_network = OptimizationNetwork::new(architecture);
476        let adaptation_controller = AdaptationController::new(config.hidden_size);
477        let performance_predictor = PerformancePredictor::new(config.hidden_size);
478        let hidden_size = config.hidden_size;
479        let max_buffer_size = config.max_parameters.max(1000); // Reasonable upper bound
480
481        Self {
482            config,
483            optimization_network,
484            adaptation_controller,
485            performance_predictor,
486            meta_state: MetaOptimizerState {
487                meta_params: Array1::zeros(hidden_size),
488                network_weights: Array2::zeros((hidden_size, hidden_size)),
489                performance_history: Vec::new(),
490                adaptation_stats: super::AdaptationStatistics::default(),
491                episode: 0,
492            },
493            adaptive_stats: AdaptiveOptimizationStats::default(),
494            computation_cache: ComputationCache::new(max_buffer_size),
495        }
496    }
497
498    /// Perform adaptive optimization step
499    pub fn adaptive_optimization_step<F>(
500        &mut self,
501        objective: &F,
502        current_params: &ArrayView1<f64>,
503        step_number: usize,
504    ) -> OptimizeResult<AdaptiveOptimizationStep>
505    where
506        F: Fn(&ArrayView1<f64>) -> f64,
507    {
508        // Extract current state features
509        let state_features = self.extract_state_features(objective, current_params, step_number)?;
510
511        // Forward pass through optimization network
512        let network_output = self.optimization_network.forward(&state_features.view())?;
513
514        // Predict performance
515        let performance_prediction = self.performance_predictor.predict(&state_features)?;
516
517        // Select optimization strategy
518        let strategy = self
519            .adaptation_controller
520            .select_strategy(&network_output, &performance_prediction)?;
521
522        // Monitor progress and adapt if necessary
523        self.adaptation_controller
524            .monitor_and_adapt(&performance_prediction)?;
525
526        // Create optimization step
527        let step = AdaptiveOptimizationStep {
528            strategy: strategy.clone(),
529            predicted_performance: performance_prediction,
530            confidence: self
531                .performance_predictor
532                .confidence_estimator
533                .estimate_confidence(&state_features)?,
534            adaptation_signal: self.adaptation_controller.get_adaptation_signal(),
535            network_output: network_output.clone(),
536        };
537
538        // Update statistics
539        self.update_adaptive_stats(&step)?;
540
541        Ok(step)
542    }
543
544    /// Extract state features for neural network
545    fn extract_state_features<F>(
546        &mut self,
547        objective: &F,
548        current_params: &ArrayView1<f64>,
549        step_number: usize,
550    ) -> OptimizeResult<Array1<f64>>
551    where
552        F: Fn(&ArrayView1<f64>) -> f64,
553    {
554        let mut features = Array1::zeros(self.optimization_network.architecture.input_size);
555        let feature_idx = 0;
556
557        // Parameter features
558        let param_features = self.extract_parameter_features(current_params);
559        self.copy_features(&mut features, &param_features, feature_idx);
560
561        // Objective features
562        let obj_features = self.extract_objective_features(objective, current_params)?;
563        self.copy_features(
564            &mut features,
565            &obj_features,
566            feature_idx + param_features.len(),
567        );
568
569        // Temporal features
570        let temporal_features = self.extract_temporal_features(step_number);
571        self.copy_features(
572            &mut features,
573            &temporal_features,
574            feature_idx + param_features.len() + obj_features.len(),
575        );
576
577        Ok(features)
578    }
579
580    /// Extract parameter-based features
581    fn extract_parameter_features(&self, params: &ArrayView1<f64>) -> Array1<f64> {
582        let mut features = Array1::zeros(20);
583
584        if !params.is_empty() {
585            features[0] = params.view().mean().tanh();
586            features[1] = params.view().variance().sqrt().tanh();
587            features[2] = params.fold(-f64::INFINITY, |a, &b| a.max(b)).tanh();
588            features[3] = params.fold(f64::INFINITY, |a, &b| a.min(b)).tanh();
589            features[4] = (params.len() as f64).ln().tanh();
590
591            // Statistical moments
592            let mean = features[0];
593            let std = features[1];
594            if std > 1e-8 {
595                let skewness = params
596                    .iter()
597                    .map(|&x| ((x - mean) / std).powi(3))
598                    .sum::<f64>()
599                    / params.len() as f64;
600                features[5] = skewness.tanh();
601
602                let kurtosis = params
603                    .iter()
604                    .map(|&x| ((x - mean) / std).powi(4))
605                    .sum::<f64>()
606                    / params.len() as f64
607                    - 3.0;
608                features[6] = kurtosis.tanh();
609            }
610
611            // Norms
612            features[7] =
613                (params.iter().map(|&x| x.abs()).sum::<f64>() / params.len() as f64).tanh(); // L1
614            features[8] = (params.iter().map(|&x| x * x).sum::<f64>()).sqrt().tanh(); // L2
615
616            // Sparsity
617            let zero_count = params.iter().filter(|&&x| x.abs() < 1e-8).count();
618            features[9] = (zero_count as f64 / params.len() as f64).tanh();
619        }
620
621        features
622    }
623
624    /// Extract objective-based features
625    fn extract_objective_features<F>(
626        &mut self,
627        objective: &F,
628        params: &ArrayView1<f64>,
629    ) -> OptimizeResult<Array1<f64>>
630    where
631        F: Fn(&ArrayView1<f64>) -> f64,
632    {
633        let mut features = Array1::zeros(15);
634
635        let f0 = objective(params);
636        features[0] = f0.abs().ln().tanh();
637
638        // Gradient features using cached buffers
639        let h = 1e-6;
640        let gradient_sample_size = params.len().min(10); // Limit for efficiency
641        let (gradient_buffer, param_buffer) = self
642            .computation_cache
643            .get_gradient_and_param_buffers(gradient_sample_size, params.len());
644
645        // Copy parameters to buffer
646        for (i, &val) in params.iter().enumerate() {
647            if i < param_buffer.len() {
648                param_buffer[i] = val;
649            }
650        }
651
652        // Compute gradient components efficiently
653        for i in 0..gradient_sample_size {
654            let original_val = param_buffer[i];
655            param_buffer[i] = original_val + h;
656            let f_plus = objective(&param_buffer.view());
657            param_buffer[i] = original_val; // Restore
658
659            gradient_buffer[i] = (f_plus - f0) / h;
660        }
661
662        let gradient_norm = (gradient_buffer
663            .iter()
664            .take(gradient_sample_size)
665            .map(|&g| g * g)
666            .sum::<f64>())
667        .sqrt();
668        features[1] = gradient_norm.ln().tanh();
669
670        if gradient_sample_size > 0 {
671            let grad_mean = gradient_buffer
672                .iter()
673                .take(gradient_sample_size)
674                .sum::<f64>()
675                / gradient_sample_size as f64;
676            let grad_var = gradient_buffer
677                .iter()
678                .take(gradient_sample_size)
679                .map(|&g| (g - grad_mean).powi(2))
680                .sum::<f64>()
681                / gradient_sample_size as f64;
682
683            features[2] = grad_mean.tanh();
684            features[3] = grad_var.sqrt().tanh();
685        }
686
687        // Curvature approximation using cached buffer
688        if params.len() > 1 {
689            // Reuse param_buffer for mixed partial computation
690            param_buffer[0] += h;
691            param_buffer[1] += h;
692            let f_plus_plus = objective(&param_buffer.view());
693
694            param_buffer[1] -= 2.0 * h; // Now it's +h, -h
695            let f_plus_minus = objective(&param_buffer.view());
696
697            // Restore original values
698            param_buffer[0] -= h;
699            param_buffer[1] += h;
700
701            let mixed_partial = (f_plus_plus - f_plus_minus) / (2.0 * h);
702            features[4] = mixed_partial.tanh();
703        }
704
705        Ok(features)
706    }
707
708    /// Extract temporal features
709    fn extract_temporal_features(&self, step_number: usize) -> Array1<f64> {
710        let mut features = Array1::zeros(10);
711
712        features[0] = (step_number as f64).ln().tanh();
713        features[1] = (step_number as f64 / 1000.0).tanh(); // Normalized step
714
715        // Progress from performance history
716        if self.meta_state.performance_history.len() > 1 {
717            let recent_performance = &self.meta_state.performance_history
718                [self.meta_state.performance_history.len().saturating_sub(5)..];
719
720            if recent_performance.len() > 1 {
721                let trend = (recent_performance[recent_performance.len() - 1]
722                    - recent_performance[0])
723                    / recent_performance.len() as f64;
724                features[2] = trend.tanh();
725
726                let variance = recent_performance.iter().map(|&x| x * x).sum::<f64>()
727                    / recent_performance.len() as f64
728                    - (recent_performance.iter().sum::<f64>() / recent_performance.len() as f64)
729                        .powi(2);
730                features[3] = variance.sqrt().tanh();
731            }
732        }
733
734        features
735    }
736
737    /// Copy features to target array
738    fn copy_features(&self, target: &mut Array1<f64>, source: &Array1<f64>, start_idx: usize) {
739        for (i, &value) in source.iter().enumerate() {
740            if start_idx + i < target.len() {
741                target[start_idx + i] = value;
742            }
743        }
744    }
745
746    /// Update adaptive optimization statistics
747    fn update_adaptive_stats(&mut self, step: &AdaptiveOptimizationStep) -> OptimizeResult<()> {
748        // Update strategy switch count
749        if let Some(last_strategy) = self.adaptation_controller.strategy_history.back() {
750            if last_strategy.id != step.strategy.id {
751                self.adaptive_stats.strategy_switches += 1;
752            }
753        }
754
755        // Update adaptation rate
756        self.adaptive_stats.avg_adaptation_rate =
757            0.9 * self.adaptive_stats.avg_adaptation_rate + 0.1 * step.adaptation_signal;
758
759        // Update prediction accuracy (simplified)
760        self.adaptive_stats.prediction_accuracy =
761            0.95 * self.adaptive_stats.prediction_accuracy + 0.05 * step.confidence;
762
763        Ok(())
764    }
765
766    /// Train the neural networks on optimization data
767    pub fn train_networks(
768        &mut self,
769        training_data: &[OptimizationTrajectory],
770    ) -> OptimizeResult<()> {
771        for trajectory in training_data {
772            // Train optimization network
773            self.train_optimization_network(trajectory)?;
774
775            // Train performance predictor
776            self.train_performance_predictor(trajectory)?;
777
778            // Update adaptation controller
779            self.update_adaptation_controller(trajectory)?;
780        }
781
782        Ok(())
783    }
784
785    /// Train the optimization network
786    fn train_optimization_network(
787        &mut self,
788        trajectory: &OptimizationTrajectory,
789    ) -> OptimizeResult<()> {
790        // Simplified training using trajectory data
791        let learning_rate = self.config.meta_learning_rate;
792
793        for (i, state) in trajectory.states.iter().enumerate() {
794            if i + 1 < trajectory.actions.len() {
795                let target_action = &trajectory.actions[i + 1];
796                let predicted_action = self.optimization_network.forward(&state.view())?;
797
798                // Compute loss (simplified MSE)
799                let mut loss_gradient = Array1::zeros(predicted_action.len());
800                for j in 0..loss_gradient.len().min(target_action.len()) {
801                    loss_gradient[j] = 2.0 * (predicted_action[j] - target_action[j]);
802                }
803
804                // Backpropagate (simplified)
805                self.optimization_network
806                    .backward(&loss_gradient, learning_rate)?;
807            }
808        }
809
810        Ok(())
811    }
812
813    /// Train the performance predictor
814    fn train_performance_predictor(
815        &mut self,
816        trajectory: &OptimizationTrajectory,
817    ) -> OptimizeResult<()> {
818        // Simplified training for performance prediction
819        let learning_rate = self.config.meta_learning_rate * 0.5;
820
821        for (i, state) in trajectory.states.iter().enumerate() {
822            if i + self.performance_predictor.prediction_horizon
823                < trajectory.performance_values.len()
824            {
825                let target_performance = trajectory.performance_values
826                    [i + self.performance_predictor.prediction_horizon];
827                let predicted_performance = self.performance_predictor.predict(state)?;
828
829                let error = target_performance - predicted_performance;
830
831                // Update prediction network (simplified)
832                for row in self.performance_predictor.prediction_network.rows_mut() {
833                    for weight in row {
834                        *weight += learning_rate
835                            * error
836                            * scirs2_core::random::rng().random::<f64>()
837                            * 0.01;
838                    }
839                }
840            }
841        }
842
843        Ok(())
844    }
845
846    /// Update adaptation controller
847    fn update_adaptation_controller(
848        &mut self,
849        trajectory: &OptimizationTrajectory,
850    ) -> OptimizeResult<()> {
851        // Analyze trajectory for adaptation patterns
852        if trajectory.performance_values.len() > 2 {
853            let performance_trend =
854                trajectory.performance_values.last().unwrap() - trajectory.performance_values[0];
855
856            // Update strategy selector based on performance
857            if performance_trend > 0.0 {
858                // Good performance, reinforce current strategy
859                self.adaptation_controller.reinforce_current_strategy(0.1)?;
860            } else {
861                // Poor performance, encourage exploration
862                self.adaptation_controller.encourage_exploration(0.1)?;
863            }
864        }
865
866        Ok(())
867    }
868
869    /// Get adaptive optimization statistics
870    pub fn get_adaptive_stats(&self) -> &AdaptiveOptimizationStats {
871        &self.adaptive_stats
872    }
873}
874
875/// Optimization trajectory for training
876#[derive(Debug, Clone)]
877pub struct OptimizationTrajectory {
878    /// State sequence
879    pub states: Vec<Array1<f64>>,
880    /// Action sequence
881    pub actions: Vec<Array1<f64>>,
882    /// Performance values
883    pub performance_values: Vec<f64>,
884    /// Rewards
885    pub rewards: Vec<f64>,
886}
887
888/// Adaptive optimization step result
889#[derive(Debug, Clone)]
890pub struct AdaptiveOptimizationStep {
891    /// Selected strategy
892    pub strategy: OptimizationStrategy,
893    /// Predicted performance
894    pub predicted_performance: f64,
895    /// Confidence in prediction
896    pub confidence: f64,
897    /// Adaptation signal strength
898    pub adaptation_signal: f64,
899    /// Raw network output
900    pub network_output: Array1<f64>,
901}
902
903impl OptimizationNetwork {
904    /// Create new optimization network
905    pub fn new(architecture: NetworkArchitecture) -> Self {
906        let mut hidden_layers = Vec::new();
907
908        // Create hidden layers
909        let mut prev_size = architecture.input_size;
910        for (i, &hidden_size) in architecture.hidden_sizes.iter().enumerate() {
911            let activation = architecture
912                .activations
913                .get(i)
914                .copied()
915                .unwrap_or(ActivationType::ReLU);
916
917            hidden_layers.push(NeuralLayer::new(prev_size, hidden_size, activation));
918            prev_size = hidden_size;
919        }
920
921        // Create input and output layers
922        let input_activation = architecture
923            .activations
924            .first()
925            .copied()
926            .unwrap_or(ActivationType::ReLU);
927        let output_activation = architecture
928            .activations
929            .last()
930            .copied()
931            .unwrap_or(ActivationType::Tanh);
932
933        let input_layer = NeuralLayer::new(
934            architecture.input_size,
935            architecture.input_size,
936            input_activation,
937        );
938        let output_layer = NeuralLayer::new(prev_size, architecture.output_size, output_activation);
939
940        let recurrent_connections = if architecture.use_recurrent {
941            RecurrentConnections::new(prev_size)
942        } else {
943            RecurrentConnections::empty()
944        };
945
946        Self {
947            input_layer,
948            hidden_layers,
949            output_layer,
950            recurrent_connections,
951            architecture,
952        }
953    }
954
955    /// Forward pass through network
956    pub fn forward(&mut self, input: &ArrayView1<f64>) -> OptimizeResult<Array1<f64>> {
957        // Input layer
958        let mut current = self.input_layer.forward(input)?;
959
960        // Hidden layers
961        for layer in &mut self.hidden_layers {
962            current = layer.forward(&current.view())?;
963        }
964
965        // Apply recurrent connections if enabled
966        if self.architecture.use_recurrent {
967            current = self.recurrent_connections.apply(&current)?;
968        }
969
970        // Output layer
971        let output = self.output_layer.forward(&current.view())?;
972
973        Ok(output)
974    }
975
976    /// Backward pass (simplified)
977    pub fn backward(&mut self, gradient: &Array1<f64>, learning_rate: f64) -> OptimizeResult<()> {
978        // Simplified backpropagation
979        // In practice, this would implement proper gradient computation
980
981        // Update output layer
982        for i in 0..self.output_layer.weights.nrows() {
983            for j in 0..self.output_layer.weights.ncols() {
984                let grad = if i < gradient.len() { gradient[i] } else { 0.0 };
985                self.output_layer.weights[[i, j]] -= learning_rate * grad * 0.01;
986            }
987        }
988
989        // Update hidden layers (simplified)
990        for layer in &mut self.hidden_layers {
991            for i in 0..layer.weights.nrows() {
992                for j in 0..layer.weights.ncols() {
993                    layer.weights[[i, j]] -=
994                        learning_rate * scirs2_core::random::rng().random::<f64>() * 0.001;
995                }
996            }
997        }
998
999        Ok(())
1000    }
1001}
1002
1003impl NeuralLayer {
1004    /// Create new neural layer
1005    pub fn new(input_size: usize, output_size: usize, activation: ActivationType) -> Self {
1006        let xavier_scale = (2.0 / (input_size + output_size) as f64).sqrt();
1007
1008        Self {
1009            weights: Array2::from_shape_fn((output_size, input_size), |_| {
1010                (scirs2_core::random::rng().random::<f64>() - 0.5) * 2.0 * xavier_scale
1011            }),
1012            biases: Array1::zeros(output_size),
1013            size: output_size,
1014            dropout_rate: 0.1,
1015            layer_norm: Some(LayerNormalization::new(output_size)),
1016            activation: ActivationType::ReLU,
1017        }
1018    }
1019
1020    /// Forward pass through layer
1021    pub fn forward(&mut self, input: &ArrayView1<f64>) -> OptimizeResult<Array1<f64>> {
1022        let mut output = Array1::zeros(self.size);
1023
1024        // Linear transformation
1025        for i in 0..self.size {
1026            for j in 0..input.len().min(self.weights.ncols()) {
1027                output[i] += self.weights[[i, j]] * input[j];
1028            }
1029            output[i] += self.biases[i];
1030        }
1031
1032        // Layer normalization
1033        if let Some(ref mut layer_norm) = self.layer_norm {
1034            output = layer_norm.normalize(&output)?;
1035        }
1036
1037        // Activation
1038        output.mapv_inplace(|x| self.activation.apply(x));
1039
1040        // Dropout (simplified - just scaling)
1041        if self.dropout_rate > 0.0 {
1042            output *= 1.0 - self.dropout_rate;
1043        }
1044
1045        Ok(output)
1046    }
1047}
1048
1049impl LayerNormalization {
1050    /// Create new layer normalization
1051    pub fn new(size: usize) -> Self {
1052        Self {
1053            gamma: Array1::ones(size),
1054            beta: Array1::zeros(size),
1055            running_mean: Array1::zeros(size),
1056            running_var: Array1::ones(size),
1057            momentum: 0.9,
1058            epsilon: 1e-6,
1059        }
1060    }
1061
1062    /// Normalize input
1063    pub fn normalize(&mut self, input: &Array1<f64>) -> OptimizeResult<Array1<f64>> {
1064        let mean = input.mean().unwrap_or(0.0);
1065        let var = input.variance();
1066        let std = (var + self.epsilon).sqrt();
1067
1068        // Update running statistics
1069        self.running_mean = &self.running_mean * self.momentum
1070            + &(Array1::from_elem(input.len(), mean) * (1.0 - self.momentum));
1071        self.running_var = &self.running_var * self.momentum
1072            + &(Array1::from_elem(input.len(), var) * (1.0 - self.momentum));
1073
1074        // Normalize
1075        let mut normalized = Array1::zeros(input.len());
1076        for i in 0..input.len().min(self.gamma.len()) {
1077            normalized[i] = self.gamma[i] * (input[i] - mean) / std + self.beta[i];
1078        }
1079
1080        Ok(normalized)
1081    }
1082}
1083
1084impl RecurrentConnections {
1085    /// Create new recurrent connections
1086    pub fn new(size: usize) -> Self {
1087        Self {
1088            hidden_state: Array1::zeros(size),
1089            cell_state: Array1::zeros(size),
1090            recurrent_weights: Array2::from_shape_fn((size, size), |_| {
1091                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1092            }),
1093            input_gate_weights: Array2::from_shape_fn((size, size), |_| {
1094                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1095            }),
1096            forget_gate_weights: Array2::from_shape_fn((size, size), |_| {
1097                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1098            }),
1099            output_gate_weights: Array2::from_shape_fn((size, size), |_| {
1100                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1101            }),
1102        }
1103    }
1104
1105    /// Create empty recurrent connections
1106    pub fn empty() -> Self {
1107        Self {
1108            hidden_state: Array1::zeros(0),
1109            cell_state: Array1::zeros(0),
1110            recurrent_weights: Array2::zeros((0, 0)),
1111            input_gate_weights: Array2::zeros((0, 0)),
1112            forget_gate_weights: Array2::zeros((0, 0)),
1113            output_gate_weights: Array2::zeros((0, 0)),
1114        }
1115    }
1116
1117    /// Apply recurrent connections (LSTM-like)
1118    pub fn apply(&mut self, input: &Array1<f64>) -> OptimizeResult<Array1<f64>> {
1119        if self.hidden_state.is_empty() {
1120            return Ok(input.clone());
1121        }
1122
1123        let size = self.hidden_state.len().min(input.len());
1124        let mut output = Array1::zeros(size);
1125
1126        // Simplified LSTM computation
1127        for i in 0..size {
1128            // Input gate
1129            let mut input_gate = 0.0;
1130            for j in 0..size {
1131                input_gate += self.input_gate_weights[[i, j]] * input[j];
1132            }
1133            input_gate = (input_gate).tanh();
1134
1135            // Forget gate
1136            let mut forget_gate = 0.0;
1137            for j in 0..size {
1138                forget_gate += self.forget_gate_weights[[i, j]] * self.hidden_state[j];
1139            }
1140            forget_gate = (forget_gate).tanh();
1141
1142            // Update cell state
1143            self.cell_state[i] = forget_gate * self.cell_state[i] + input_gate * input[i];
1144
1145            // Output gate
1146            let mut output_gate = 0.0;
1147            for j in 0..size {
1148                output_gate += self.output_gate_weights[[i, j]] * input[j];
1149            }
1150            output_gate = (output_gate).tanh();
1151
1152            // Update hidden state and output
1153            self.hidden_state[i] = output_gate * self.cell_state[i].tanh();
1154            output[i] = self.hidden_state[i];
1155        }
1156
1157        Ok(output)
1158    }
1159}
1160
1161impl AdaptationController {
1162    /// Create new adaptation controller
1163    pub fn new(hidden_size: usize) -> Self {
1164        Self {
1165            strategy_selector: StrategySelector::new(hidden_size),
1166            adaptation_rate_controller: AdaptationRateController::new(),
1167            progress_monitor: ProgressMonitor::new(),
1168            strategy_history: BoundedHistory::new(100),
1169        }
1170    }
1171
1172    /// Select optimization strategy
1173    pub fn select_strategy(
1174        &mut self,
1175        network_output: &Array1<f64>,
1176        performance_prediction: &f64,
1177    ) -> OptimizeResult<OptimizationStrategy> {
1178        let strategy = self
1179            .strategy_selector
1180            .select(network_output, *performance_prediction)?;
1181        self.strategy_history.push(strategy.clone());
1182
1183        Ok(strategy)
1184    }
1185
1186    /// Monitor progress and adapt
1187    pub fn monitor_and_adapt(&mut self, performance_prediction: &f64) -> OptimizeResult<()> {
1188        self.progress_monitor.update(*performance_prediction)?;
1189
1190        match self.progress_monitor.current_state {
1191            ProgressState::Stagnating | ProgressState::Deteriorating => {
1192                self.adaptation_rate_controller.increase_rate()?;
1193            }
1194            ProgressState::Improving => {
1195                self.adaptation_rate_controller.maintain_rate()?;
1196            }
1197            _ => {}
1198        }
1199
1200        Ok(())
1201    }
1202
1203    /// Get adaptation signal
1204    pub fn get_adaptation_signal(&self) -> f64 {
1205        self.adaptation_rate_controller.current_rate
1206    }
1207
1208    /// Reinforce current strategy
1209    pub fn reinforce_current_strategy(&mut self, strength: f64) -> OptimizeResult<()> {
1210        self.strategy_selector.reinforce_current(strength)
1211    }
1212
1213    /// Encourage exploration
1214    pub fn encourage_exploration(&mut self, strength: f64) -> OptimizeResult<()> {
1215        self.strategy_selector.encourage_exploration(strength)
1216    }
1217}
1218
1219impl StrategySelector {
1220    /// Create new strategy selector
1221    pub fn new(hidden_size: usize) -> Self {
1222        let num_strategies = 5;
1223
1224        Self {
1225            selection_network: Array2::from_shape_fn((num_strategies, hidden_size), |_| {
1226                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1227            }),
1228            strategy_embeddings: Array2::from_shape_fn((num_strategies, hidden_size), |_| {
1229                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1230            }),
1231            strategy_weights: Array1::from_elem(num_strategies, 1.0 / num_strategies as f64),
1232            available_strategies: vec![
1233                OptimizationStrategy::gradient_descent(),
1234                OptimizationStrategy::momentum(),
1235                OptimizationStrategy::adaptive(),
1236                OptimizationStrategy::quasi_newton(),
1237                OptimizationStrategy::trust_region(),
1238            ],
1239        }
1240    }
1241
1242    /// Select strategy based on network output
1243    pub fn select(
1244        &self,
1245        network_output: &Array1<f64>,
1246        performance_prediction: f64,
1247    ) -> OptimizeResult<OptimizationStrategy> {
1248        let mut strategy_scores = Array1::zeros(self.available_strategies.len());
1249
1250        // Compute strategy scores
1251        for i in 0..strategy_scores.len() {
1252            for j in 0..network_output.len().min(self.selection_network.ncols()) {
1253                strategy_scores[i] += self.selection_network[[i, j]] * network_output[j];
1254            }
1255
1256            // Add performance prediction influence
1257            strategy_scores[i] += performance_prediction * 0.1;
1258
1259            // Add current weight
1260            strategy_scores[i] += self.strategy_weights[i];
1261        }
1262
1263        // Apply softmax to get probabilities
1264        let max_score = strategy_scores.fold(-f64::INFINITY, |a, &b| a.max(b));
1265        strategy_scores.mapv_inplace(|x| (x - max_score).exp());
1266        let sum_scores = strategy_scores.sum();
1267        if sum_scores > 0.0 {
1268            strategy_scores /= sum_scores;
1269        }
1270
1271        // Select strategy (argmax for deterministic, or sample for stochastic)
1272        let selected_idx = strategy_scores
1273            .iter()
1274            .enumerate()
1275            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
1276            .map(|(i, _)| i)
1277            .unwrap_or(0);
1278
1279        Ok(self.available_strategies[selected_idx].clone())
1280    }
1281
1282    /// Reinforce current strategy
1283    pub fn reinforce_current(&mut self, strength: f64) -> OptimizeResult<()> {
1284        // Increase weight of current best strategy
1285        if let Some((best_idx, _)) = self
1286            .strategy_weights
1287            .iter()
1288            .enumerate()
1289            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
1290        {
1291            self.strategy_weights[best_idx] += strength;
1292        }
1293
1294        // Renormalize
1295        let sum = self.strategy_weights.sum();
1296        if sum > 0.0 {
1297            self.strategy_weights /= sum;
1298        }
1299
1300        Ok(())
1301    }
1302
1303    /// Encourage exploration
1304    pub fn encourage_exploration(&mut self, strength: f64) -> OptimizeResult<()> {
1305        // Add uniform noise to encourage exploration
1306        for weight in &mut self.strategy_weights {
1307            *weight += strength * scirs2_core::random::rng().random::<f64>();
1308        }
1309
1310        // Renormalize
1311        let sum = self.strategy_weights.sum();
1312        if sum > 0.0 {
1313            self.strategy_weights /= sum;
1314        }
1315
1316        Ok(())
1317    }
1318}
1319
1320impl OptimizationStrategy {
1321    /// Create gradient descent strategy
1322    pub fn gradient_descent() -> Self {
1323        Self {
1324            id: "gradient_descent".to_string(),
1325            parameters: Array1::from(vec![0.01, 0.0, 0.0]), // [learning_rate, momentum, adaptivity]
1326            expected_performance: 0.7,
1327            computational_cost: 0.3,
1328            robustness: 0.8,
1329        }
1330    }
1331
1332    /// Create momentum strategy
1333    pub fn momentum() -> Self {
1334        Self {
1335            id: "momentum".to_string(),
1336            parameters: Array1::from(vec![0.01, 0.9, 0.0]),
1337            expected_performance: 0.8,
1338            computational_cost: 0.4,
1339            robustness: 0.7,
1340        }
1341    }
1342
1343    /// Create adaptive strategy
1344    pub fn adaptive() -> Self {
1345        Self {
1346            id: "adaptive".to_string(),
1347            parameters: Array1::from(vec![0.001, 0.0, 0.9]),
1348            expected_performance: 0.85,
1349            computational_cost: 0.6,
1350            robustness: 0.9,
1351        }
1352    }
1353
1354    /// Create quasi-Newton strategy
1355    pub fn quasi_newton() -> Self {
1356        Self {
1357            id: "quasi_newton".to_string(),
1358            parameters: Array1::from(vec![0.1, 0.0, 0.5]),
1359            expected_performance: 0.9,
1360            computational_cost: 0.8,
1361            robustness: 0.6,
1362        }
1363    }
1364
1365    /// Create trust region strategy
1366    pub fn trust_region() -> Self {
1367        Self {
1368            id: "trust_region".to_string(),
1369            parameters: Array1::from(vec![0.1, 0.0, 0.7]),
1370            expected_performance: 0.95,
1371            computational_cost: 0.9,
1372            robustness: 0.95,
1373        }
1374    }
1375}
1376
1377impl Default for AdaptationRateController {
1378    fn default() -> Self {
1379        Self::new()
1380    }
1381}
1382
1383impl AdaptationRateController {
1384    /// Create new adaptation rate controller
1385    pub fn new() -> Self {
1386        Self {
1387            controller_network: Array2::from_shape_fn((1, 10), |_| {
1388                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1389            }),
1390            current_rate: 0.1,
1391            rate_history: BoundedHistory::new(100),
1392            performance_correlation: 0.0,
1393        }
1394    }
1395
1396    /// Increase adaptation rate
1397    pub fn increase_rate(&mut self) -> OptimizeResult<()> {
1398        self.current_rate = (self.current_rate * 1.2).min(1.0);
1399        self.rate_history.push(self.current_rate);
1400
1401        Ok(())
1402    }
1403
1404    /// Maintain current rate
1405    pub fn maintain_rate(&mut self) -> OptimizeResult<()> {
1406        self.rate_history.push(self.current_rate);
1407
1408        Ok(())
1409    }
1410}
1411
1412impl Default for ProgressMonitor {
1413    fn default() -> Self {
1414        Self::new()
1415    }
1416}
1417
1418impl ProgressMonitor {
1419    /// Create new progress monitor
1420    pub fn new() -> Self {
1421        Self {
1422            progress_indicators: vec![
1423                ProgressIndicator::new("objective_improvement".to_string()),
1424                ProgressIndicator::new("gradient_norm".to_string()),
1425                ProgressIndicator::new("step_size".to_string()),
1426            ],
1427            monitoring_network: Array2::from_shape_fn((4, 10), |_| {
1428                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1429            }),
1430            alert_thresholds: HashMap::new(),
1431            current_state: ProgressState::Improving,
1432        }
1433    }
1434
1435    /// Update progress monitoring
1436    pub fn update(&mut self, performance_value: f64) -> OptimizeResult<()> {
1437        // Update progress indicators
1438        for indicator in &mut self.progress_indicators {
1439            indicator.update(performance_value)?;
1440        }
1441
1442        // Determine current state
1443        self.current_state = self.determine_progress_state()?;
1444
1445        Ok(())
1446    }
1447
1448    /// Determine progress state
1449    fn determine_progress_state(&self) -> OptimizeResult<ProgressState> {
1450        let mut improvement_count = 0;
1451        let mut stagnation_count = 0;
1452
1453        for indicator in &self.progress_indicators {
1454            if indicator.trend > 0.1 {
1455                improvement_count += 1;
1456            } else if indicator.trend.abs() < 0.01 {
1457                stagnation_count += 1;
1458            }
1459        }
1460
1461        if improvement_count >= 2 {
1462            Ok(ProgressState::Improving)
1463        } else if stagnation_count >= 2 {
1464            Ok(ProgressState::Stagnating)
1465        } else {
1466            Ok(ProgressState::Deteriorating)
1467        }
1468    }
1469}
1470
1471impl ProgressIndicator {
1472    /// Create new progress indicator
1473    pub fn new(name: String) -> Self {
1474        Self {
1475            name,
1476            value: 0.0,
1477            history: BoundedHistory::new(50),
1478            trend: 0.0,
1479            importance: 1.0,
1480        }
1481    }
1482
1483    /// Update indicator
1484    pub fn update(&mut self, new_value: f64) -> OptimizeResult<()> {
1485        self.value = new_value;
1486        self.history.push(new_value);
1487
1488        // Compute trend using bounded history
1489        if self.history.len() > 2 {
1490            // Access the underlying data to compute trend
1491            let first = self.history.data.front().copied().unwrap_or(new_value);
1492            let last = self.history.data.back().copied().unwrap_or(new_value);
1493            self.trend = (last - first) / self.history.len() as f64;
1494        }
1495
1496        Ok(())
1497    }
1498}
1499
1500impl PerformancePredictor {
1501    /// Create new performance predictor
1502    pub fn new(hidden_size: usize) -> Self {
1503        Self {
1504            prediction_network: Array2::from_shape_fn((1, hidden_size), |_| {
1505                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1506            }),
1507            feature_extractor: FeatureExtractor::new(hidden_size),
1508            prediction_horizon: 5,
1509            prediction_accuracy: 0.5,
1510            confidence_estimator: ConfidenceEstimator::new(hidden_size),
1511        }
1512    }
1513
1514    /// Predict performance
1515    pub fn predict(&self, state_features: &Array1<f64>) -> OptimizeResult<f64> {
1516        // Extract features for prediction
1517        let prediction_features = self.feature_extractor.extract(state_features)?;
1518
1519        // Forward pass through prediction network
1520        let mut prediction = 0.0;
1521        for j in 0..prediction_features
1522            .len()
1523            .min(self.prediction_network.ncols())
1524        {
1525            prediction += self.prediction_network[[0, j]] * prediction_features[j];
1526        }
1527
1528        Ok(prediction.tanh()) // Normalize to [-1, 1]
1529    }
1530}
1531
1532impl FeatureExtractor {
1533    /// Create new feature extractor
1534    pub fn new(feature_dim: usize) -> Self {
1535        Self {
1536            extraction_layers: vec![Array2::from_shape_fn((feature_dim, feature_dim), |_| {
1537                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1538            })],
1539            feature_dim,
1540            temporal_features: TemporalFeatures::new(feature_dim),
1541        }
1542    }
1543
1544    /// Extract features for prediction
1545    pub fn extract(&self, input: &Array1<f64>) -> OptimizeResult<Array1<f64>> {
1546        let mut features = input.clone();
1547
1548        // Apply extraction layers
1549        for layer in &self.extraction_layers {
1550            let output_dim = layer.nrows().min(features.len());
1551            let input_dim = layer.ncols().min(features.len());
1552            let mut new_features = Array1::zeros(output_dim);
1553
1554            for i in 0..output_dim {
1555                for j in 0..input_dim {
1556                    new_features[i] += layer[[i, j]] * features[j];
1557                }
1558            }
1559            features = new_features;
1560        }
1561
1562        Ok(features)
1563    }
1564}
1565
1566impl TemporalFeatures {
1567    /// Create new temporal features
1568    pub fn new(dim: usize) -> Self {
1569        Self {
1570            time_embeddings: Array2::from_shape_fn((dim, 100), |_| {
1571                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1572            }),
1573            trend_analyzer: TrendAnalyzer::new(),
1574            seasonality_detector: SeasonalityDetector::new(dim),
1575        }
1576    }
1577}
1578
1579impl Default for TrendAnalyzer {
1580    fn default() -> Self {
1581        Self::new()
1582    }
1583}
1584
1585impl TrendAnalyzer {
1586    /// Create new trend analyzer
1587    pub fn new() -> Self {
1588        Self {
1589            trend_coefficients: Array1::from(vec![1.0, 0.5, 0.1]),
1590            window_size: 10,
1591            trend_strength: 0.0,
1592        }
1593    }
1594}
1595
1596impl SeasonalityDetector {
1597    /// Create new seasonality detector
1598    pub fn new(dim: usize) -> Self {
1599        Self {
1600            seasonal_patterns: Array2::zeros((dim, 12)),
1601            pattern_strength: Array1::zeros(12),
1602            detection_threshold: 0.1,
1603        }
1604    }
1605}
1606
1607impl ConfidenceEstimator {
1608    /// Create new confidence estimator
1609    pub fn new(hidden_size: usize) -> Self {
1610        Self {
1611            confidence_network: Array2::from_shape_fn((1, hidden_size), |_| {
1612                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1613            }),
1614            uncertainty_quantifier: UncertaintyQuantifier::new(),
1615            calibration_params: Array1::from(vec![1.0, 0.0, 0.1]),
1616        }
1617    }
1618
1619    /// Estimate confidence in prediction
1620    pub fn estimate_confidence(&self, features: &Array1<f64>) -> OptimizeResult<f64> {
1621        let mut confidence = 0.0;
1622        for j in 0..features.len().min(self.confidence_network.ncols()) {
1623            confidence += self.confidence_network[[0, j]] * features[j];
1624        }
1625
1626        // Apply sigmoid to get [0, 1] range
1627        Ok(1.0 / (1.0 + (-confidence).exp()))
1628    }
1629}
1630
1631impl Default for UncertaintyQuantifier {
1632    fn default() -> Self {
1633        Self::new()
1634    }
1635}
1636
1637impl UncertaintyQuantifier {
1638    /// Create new uncertainty quantifier
1639    pub fn new() -> Self {
1640        Self {
1641            epistemic_uncertainty: 0.1,
1642            aleatoric_uncertainty: 0.1,
1643            method: UncertaintyMethod::Dropout,
1644        }
1645    }
1646}
1647
1648impl Default for AdaptiveOptimizationStats {
1649    fn default() -> Self {
1650        Self {
1651            strategy_switches: 0,
1652            avg_adaptation_rate: 0.1,
1653            prediction_accuracy: 0.5,
1654            computational_efficiency: 0.5,
1655            robustness_score: 0.5,
1656        }
1657    }
1658}
1659
1660impl LearnedOptimizer for NeuralAdaptiveOptimizer {
1661    fn meta_train(&mut self, training_tasks: &[TrainingTask]) -> OptimizeResult<()> {
1662        // Convert training tasks to trajectories
1663        let mut trajectories = Vec::new();
1664
1665        for task in training_tasks {
1666            let trajectory = self.create_trajectory_from_task(task)?;
1667            trajectories.push(trajectory);
1668        }
1669
1670        // Train networks
1671        self.train_networks(&trajectories)?;
1672
1673        Ok(())
1674    }
1675
1676    fn adapt_to_problem(
1677        &mut self,
1678        _problem: &OptimizationProblem,
1679        _params: &ArrayView1<f64>,
1680    ) -> OptimizeResult<()> {
1681        // Adaptation happens dynamically during optimization
1682        Ok(())
1683    }
1684
1685    fn optimize<F>(
1686        &mut self,
1687        objective: F,
1688        initial_params: &ArrayView1<f64>,
1689    ) -> OptimizeResult<OptimizeResults<f64>>
1690    where
1691        F: Fn(&ArrayView1<f64>) -> f64,
1692    {
1693        let mut current_params = initial_params.to_owned();
1694        let mut best_value = objective(initial_params);
1695        let mut iterations = 0;
1696
1697        for step_number in 0..1000 {
1698            iterations = step_number;
1699
1700            // Get adaptive optimization step
1701            let adaptive_step =
1702                self.adaptive_optimization_step(&objective, &current_params.view(), step_number)?;
1703
1704            // Apply the selected strategy
1705            let direction = self.compute_direction_for_strategy(
1706                &objective,
1707                &current_params,
1708                &adaptive_step.strategy,
1709            )?;
1710            let step_size = self.compute_step_size_for_strategy(&adaptive_step.strategy);
1711
1712            // Update parameters
1713            for i in 0..current_params.len().min(direction.len()) {
1714                current_params[i] -= step_size * direction[i];
1715            }
1716
1717            let current_value = objective(&current_params.view());
1718
1719            if current_value < best_value {
1720                best_value = current_value;
1721            }
1722
1723            // Record performance for adaptation
1724            self.meta_state.performance_history.push(current_value);
1725
1726            // Check convergence
1727            if adaptive_step.confidence > 0.95 && step_size < 1e-8 {
1728                break;
1729            }
1730        }
1731
1732        Ok(OptimizeResults::<f64> {
1733            x: current_params,
1734            fun: best_value,
1735            success: true,
1736            nit: iterations,
1737            message: "Neural adaptive optimization completed".to_string(),
1738            jac: None,
1739            hess: None,
1740            constr: None,
1741            nfev: iterations * 5, // Neural network evaluations
1742            njev: 0,
1743            nhev: 0,
1744            maxcv: 0,
1745            status: 0,
1746        })
1747    }
1748
1749    fn get_state(&self) -> &MetaOptimizerState {
1750        &self.meta_state
1751    }
1752
1753    fn reset(&mut self) {
1754        self.adaptive_stats = AdaptiveOptimizationStats::default();
1755        self.meta_state.performance_history.clear();
1756        self.adaptation_controller.strategy_history.clear();
1757        // Clear computation cache buffers
1758        self.computation_cache.gradient_buffer.fill(0.0);
1759        self.computation_cache.feature_buffer.fill(0.0);
1760        self.computation_cache.param_buffer.fill(0.0);
1761        self.computation_cache.network_output_buffer.fill(0.0);
1762        self.computation_cache.temp_buffer.fill(0.0);
1763    }
1764}
1765
1766impl NeuralAdaptiveOptimizer {
1767    fn create_trajectory_from_task(
1768        &self,
1769        task: &TrainingTask,
1770    ) -> OptimizeResult<OptimizationTrajectory> {
1771        // Simplified trajectory creation
1772        let num_steps = 10;
1773        let mut states = Vec::new();
1774        let mut actions = Vec::new();
1775        let mut performance_values = Vec::new();
1776        let mut rewards = Vec::new();
1777
1778        for i in 0..num_steps {
1779            states.push(Array1::from_shape_fn(
1780                self.optimization_network.architecture.input_size,
1781                |_| scirs2_core::random::rng().random::<f64>(),
1782            ));
1783
1784            actions.push(Array1::from_shape_fn(
1785                self.optimization_network.architecture.output_size,
1786                |_| scirs2_core::random::rng().random::<f64>(),
1787            ));
1788
1789            performance_values.push(1.0 - i as f64 / num_steps as f64);
1790            rewards.push(if i > 0 {
1791                performance_values[i - 1] - performance_values[i]
1792            } else {
1793                0.0
1794            });
1795        }
1796
1797        Ok(OptimizationTrajectory {
1798            states,
1799            actions,
1800            performance_values,
1801            rewards,
1802        })
1803    }
1804
1805    fn compute_direction_for_strategy<F>(
1806        &mut self,
1807        objective: &F,
1808        params: &Array1<f64>,
1809        strategy: &OptimizationStrategy,
1810    ) -> OptimizeResult<Array1<f64>>
1811    where
1812        F: Fn(&ArrayView1<f64>) -> f64,
1813    {
1814        // Compute finite difference gradient using cached buffers
1815        let h = 1e-6;
1816        let f0 = objective(&params.view());
1817        let (gradient_buffer, param_buffer) = self
1818            .computation_cache
1819            .get_gradient_and_param_buffers(params.len(), params.len());
1820
1821        // Copy parameters to buffer
1822        for (i, &val) in params.iter().enumerate() {
1823            if i < param_buffer.len() {
1824                param_buffer[i] = val;
1825            }
1826        }
1827
1828        for i in 0..params.len().min(gradient_buffer.len()) {
1829            let original_val = param_buffer[i];
1830            param_buffer[i] = original_val + h;
1831            let f_plus = objective(&param_buffer.view());
1832            param_buffer[i] = original_val; // Restore
1833            gradient_buffer[i] = (f_plus - f0) / h;
1834        }
1835
1836        // Create result gradient from buffer
1837        let mut gradient = Array1::zeros(params.len());
1838        for i in 0..params.len().min(gradient_buffer.len()) {
1839            gradient[i] = gradient_buffer[i];
1840        }
1841
1842        // Apply strategy-specific transformations
1843        match strategy.id.as_str() {
1844            "momentum" => {
1845                // Apply momentum (simplified)
1846                gradient *= strategy.parameters[1]; // momentum factor
1847            }
1848            "adaptive" => {
1849                // Apply adaptive scaling
1850                let adaptivity = strategy.parameters[2];
1851                gradient.mapv_inplace(|g| g / (1.0 + adaptivity * g.abs()));
1852            }
1853            _ => {
1854                // Default gradient descent
1855            }
1856        }
1857
1858        Ok(gradient)
1859    }
1860
1861    fn compute_step_size_for_strategy(&self, strategy: &OptimizationStrategy) -> f64 {
1862        strategy.parameters[0] // Use first parameter as learning rate
1863    }
1864}
1865
1866/// Convenience function for neural adaptive optimization
1867#[allow(dead_code)]
1868pub fn neural_adaptive_optimize<F>(
1869    objective: F,
1870    initial_params: &ArrayView1<f64>,
1871    config: Option<LearnedOptimizationConfig>,
1872) -> OptimizeResult<OptimizeResults<f64>>
1873where
1874    F: Fn(&ArrayView1<f64>) -> f64,
1875{
1876    let config = config.unwrap_or_default();
1877    let mut optimizer = NeuralAdaptiveOptimizer::new(config);
1878    optimizer.optimize(objective, initial_params)
1879}
1880
1881#[cfg(test)]
1882mod tests {
1883    use super::*;
1884
1885    #[test]
1886    fn test_neural_adaptive_optimizer_creation() {
1887        let config = LearnedOptimizationConfig::default();
1888        let optimizer = NeuralAdaptiveOptimizer::new(config);
1889
1890        assert_eq!(optimizer.adaptive_stats.strategy_switches, 0);
1891    }
1892
1893    #[test]
1894    fn test_optimization_network() {
1895        let architecture = NetworkArchitecture {
1896            input_size: 10,
1897            hidden_sizes: vec![16, 8],
1898            output_size: 4,
1899            activations: vec![
1900                ActivationType::ReLU,
1901                ActivationType::ReLU,
1902                ActivationType::Tanh,
1903            ],
1904            use_recurrent: false,
1905            use_attention: false,
1906        };
1907
1908        let mut network = OptimizationNetwork::new(architecture);
1909        let input = Array1::from(vec![1.0; 10]);
1910
1911        let output = network.forward(&input.view()).unwrap();
1912
1913        assert_eq!(output.len(), 4);
1914        assert!(output.iter().all(|&x| x.is_finite()));
1915    }
1916
1917    #[test]
1918    fn test_neural_layer() {
1919        let mut layer = NeuralLayer::new(5, 3, ActivationType::ReLU);
1920        let input = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1921
1922        let output = layer.forward(&input.view()).unwrap();
1923
1924        assert_eq!(output.len(), 3);
1925        assert!(output.iter().all(|&x| x.is_finite()));
1926    }
1927
1928    #[test]
1929    fn test_strategy_selector() {
1930        let selector = StrategySelector::new(16);
1931        let network_output = Array1::from(vec![0.5; 16]);
1932
1933        let strategy = selector.select(&network_output, 0.8).unwrap();
1934
1935        assert!(!strategy.id.is_empty());
1936        assert!(strategy.expected_performance >= 0.0);
1937    }
1938
1939    #[test]
1940    fn test_performance_predictor() {
1941        let predictor = PerformancePredictor::new(32);
1942        let features = Array1::from(vec![0.1; 32]);
1943
1944        let prediction = predictor.predict(&features).unwrap();
1945
1946        assert!(prediction >= -1.0 && prediction <= 1.0);
1947    }
1948
1949    #[test]
1950    fn test_neural_adaptive_optimization() {
1951        let objective = |x: &ArrayView1<f64>| x[0].powi(2) + x[1].powi(2);
1952        let initial = Array1::from(vec![2.0, 2.0]);
1953
1954        let config = LearnedOptimizationConfig {
1955            hidden_size: 32,
1956            max_parameters: 50,
1957            ..Default::default()
1958        };
1959
1960        let result = neural_adaptive_optimize(objective, &initial.view(), Some(config)).unwrap();
1961
1962        assert!(result.fun >= 0.0);
1963        assert_eq!(result.x.len(), 2);
1964        assert!(result.success);
1965    }
1966}
1967
1968#[allow(dead_code)]
1969pub fn placeholder() {
1970    // Placeholder function to prevent unused module warnings
1971}
scirs2_optimize/learned_optimizers/neural_adaptive_optimizer.rs

scirs2_optimize/learned_optimizers/
neural_adaptive_optimizer.rs