1#![allow(dead_code)]
14#![allow(clippy::too_many_arguments)]
15
16use crate::common::IntegrateFloat;
17use crate::error::{IntegrateError, IntegrateResult};
18use scirs2_core::gpu::{self, GpuDataType};
19use scirs2_core::ndarray::{Array1, Array2};
20use std::collections::{HashMap, VecDeque};
21use std::sync::{Arc, Mutex};
22use std::time::Instant;
23pub struct NeuralRLStepController<F: IntegrateFloat + GpuDataType> {
27 dqn: Arc<Mutex<DeepQNetwork<F>>>,
29 experience_buffer: Arc<Mutex<PrioritizedExperienceReplay<F>>>,
31 feature_extractor: Arc<Mutex<StateFeatureExtractor<F>>>,
33 reward_calculator: MultiObjectiveRewardCalculator<F>,
35 training_config: TrainingConfiguration,
37 performance_analytics: Arc<Mutex<RLPerformanceAnalytics>>,
39 gpu_context: Arc<Mutex<gpu::GpuContext>>,
41}
42
43pub struct DeepQNetwork<F: IntegrateFloat + GpuDataType> {
45 weights: NetworkWeights<F>,
47 target_weights: NetworkWeights<F>,
49 hyperparams: NetworkHyperparameters<F>,
51 training_stats: TrainingStatistics,
53 optimizer_state: AdamOptimizerState<F>,
55}
56
57#[derive(Debug, Clone)]
59pub struct NetworkWeights<F: IntegrateFloat> {
60 pub layer1_weights: Array2<F>,
62 pub layer1_biases: Array1<F>,
64 pub layer2_weights: Array2<F>,
66 pub layer2_biases: Array1<F>,
68 pub advantage_weights: Array2<F>,
70 pub advantage_biases: Array1<F>,
72 pub value_weights: Array1<F>,
74 pub value_bias: F,
76}
77
78pub struct PrioritizedExperienceReplay<F: IntegrateFloat> {
80 buffer: VecDeque<Experience<F>>,
82 priority_tree: SumTree,
84 config: ReplayBufferConfig,
86 importance_sampling: ImportanceSamplingConfig,
88}
89
90#[derive(Debug, Clone)]
92pub struct Experience<F: IntegrateFloat> {
93 pub state: Array1<F>,
95 pub action: usize,
97 pub reward: F,
99 pub next_state: Array1<F>,
101 pub done: bool,
103 pub tderror: F,
105 pub timestamp: Instant,
107}
108
109pub struct StateFeatureExtractor<F: IntegrateFloat> {
111 error_history: VecDeque<F>,
113 step_history: VecDeque<F>,
115 jacobian_eigenvalues: Array1<F>,
117 problem_characteristics: ProblemCharacteristics<F>,
119 performance_metrics: PerformanceMetrics<F>,
121 normalization: FeatureNormalization<F>,
123}
124
125pub struct MultiObjectiveRewardCalculator<F: IntegrateFloat> {
127 weights: RewardWeights<F>,
129 baselines: PerformanceBaselines<F>,
131 shaping: RewardShaping<F>,
133}
134
135#[derive(Debug, Clone)]
137pub struct TrainingConfiguration {
138 pub learning_rate: f64,
140 pub discount_factor: f64,
142 pub epsilon: f64,
144 pub epsilon_decay: f64,
146 pub epsilon_min: f64,
148 pub target_update_frequency: usize,
150 pub batch_size: usize,
152 pub train_frequency: usize,
154 pub tau: f64,
156 pub priority_alpha: f64,
158 pub priority_beta: f64,
160 pub nstep: usize,
162}
163
164pub struct RLPerformanceAnalytics {
166 episode_rewards: VecDeque<f64>,
168 q_value_estimates: VecDeque<f64>,
170 loss_values: VecDeque<f64>,
172 epsilon_history: VecDeque<f64>,
174 step_acceptance_rates: VecDeque<f64>,
176 convergence_metrics: ConvergenceMetrics,
178}
179
180impl<F: IntegrateFloat + GpuDataType + Default> NeuralRLStepController<F> {
181 pub fn new() -> IntegrateResult<Self> {
183 #[cfg(test)]
184 let gpu_context = Arc::new(Mutex::new(
185 gpu::GpuContext::new(gpu::GpuBackend::Cpu).unwrap_or_else(|_| {
186 gpu::GpuContext::new(gpu::GpuBackend::Cpu).unwrap()
188 }),
189 ));
190
191 #[cfg(not(test))]
192 let gpu_context = Arc::new(Mutex::new(
193 gpu::GpuContext::new(gpu::GpuBackend::Cuda).map_err(|e| {
194 IntegrateError::ComputationError(format!("GPU context creation failed: {e:?}"))
195 })?,
196 ));
197
198 let dqn = Arc::new(Mutex::new(DeepQNetwork::new()?));
199 let experience_buffer = Arc::new(Mutex::new(PrioritizedExperienceReplay::new()?));
200 let feature_extractor = Arc::new(Mutex::new(StateFeatureExtractor::new()?));
201 let reward_calculator = MultiObjectiveRewardCalculator::new()?;
202 let training_config = TrainingConfiguration::default();
203 let performance_analytics = Arc::new(Mutex::new(RLPerformanceAnalytics::new()));
204
205 Ok(NeuralRLStepController {
206 dqn,
207 experience_buffer,
208 feature_extractor,
209 reward_calculator,
210 training_config,
211 performance_analytics,
212 gpu_context,
213 })
214 }
215
216 pub fn initialize(
218 &self,
219 problem_size: usize,
220 initialstep_size: F,
221 problem_type: &str,
222 ) -> IntegrateResult<()> {
223 {
225 let mut extractor = self.feature_extractor.lock().unwrap();
226 extractor.initialize(problem_size, initialstep_size, problem_type)?;
227 }
228
229 {
231 let mut dqn = self.dqn.lock().unwrap();
232 dqn.initialize_weights()?;
233 }
234
235 self.load_neural_rl_shader()?;
237
238 Ok(())
239 }
240
241 pub fn predict_optimalstep(
243 &self,
244 currentstep: F,
245 currenterror: F,
246 problem_state: &ProblemState<F>,
247 performance_metrics: &PerformanceMetrics<F>,
248 ) -> IntegrateResult<StepSizePrediction<F>> {
249 let state_features = {
251 let mut extractor = self.feature_extractor.lock().unwrap();
252 extractor.extract_features(
253 currentstep,
254 currenterror,
255 problem_state,
256 performance_metrics,
257 )?
258 };
259
260 let q_values = {
262 let dqn = self.dqn.lock().unwrap();
263 dqn.forward(&state_features)?
264 };
265
266 let action = self.select_action(&q_values)?;
268
269 let step_multiplier = self.action_tostep_multiplier(action);
271 let predictedstep = currentstep * step_multiplier;
272
273 let minstep = currentstep * F::from(0.01).unwrap(); let maxstep = currentstep * F::from(10.0).unwrap(); let safestep = predictedstep.max(minstep).min(maxstep);
277
278 Ok(StepSizePrediction {
279 predictedstep: safestep,
280 step_multiplier,
281 action_index: action,
282 q_values: q_values.clone(),
283 confidence: self.calculate_prediction_confidence(&q_values),
284 exploration_noise: self.calculate_exploration_noise(),
285 })
286 }
287
288 pub fn train_on_experience(
290 &self,
291 previous_state: &Array1<F>,
292 action_taken: usize,
293 reward: F,
294 current_state: &Array1<F>,
295 done: bool,
296 ) -> IntegrateResult<TrainingResult<F>> {
297 let experience = Experience {
299 state: previous_state.clone(),
300 action: action_taken,
301 reward,
302 next_state: current_state.clone(),
303 done,
304 tderror: F::zero(), timestamp: Instant::now(),
306 };
307
308 {
310 let mut buffer = self.experience_buffer.lock().unwrap();
311 buffer.add_experience(experience)?;
312 }
313
314 if self.should_train()? {
316 self.perform_trainingstep()?;
317 }
318
319 if self.should_update_target()? {
321 self.update_target_network()?;
322 }
323
324 {
326 let mut analytics = self.performance_analytics.lock().unwrap();
327 analytics.update_metrics(reward.to_f64().unwrap_or(0.0), action_taken)?;
328 }
329
330 Ok(TrainingResult {
331 loss: F::zero(), q_value_estimate: F::zero(), tderror: F::zero(), exploration_rate: F::from(self.training_config.epsilon).unwrap(),
335 training_performed: self.should_train()?,
336 })
337 }
338
339 pub fn gpu_accelerated_training(&self) -> IntegrateResult<()> {
341 let gpu_context = self.gpu_context.lock().unwrap();
342
343 let (batch, indices, weights) = {
345 let mut buffer = self.experience_buffer.lock().unwrap();
346 buffer.sample_batch(self.training_config.batch_size)?
347 };
348
349 let gpu_states = self.transfer_states_to_gpu(&batch)?;
351 let gpu_actions = self.transfer_actions_to_gpu(&batch)?;
352 let gpu_rewards = self.transfer_rewards_to_gpu(&batch)?;
353 let gpu_next_states = self.transfer_next_states_to_gpu(&batch)?;
354
355 gpu_context
357 .launch_kernel(
358 "neural_adaptivestep_rl",
359 (self.training_config.batch_size, 1, 1),
360 (32, 1, 1),
361 &[
362 gpu::DynamicKernelArg::Buffer(gpu_states.as_ptr()),
363 gpu::DynamicKernelArg::Buffer(gpu_actions.as_ptr()),
364 gpu::DynamicKernelArg::Buffer(gpu_rewards.as_ptr()),
365 gpu::DynamicKernelArg::Buffer(gpu_next_states.as_ptr()),
366 gpu::DynamicKernelArg::F64(self.training_config.learning_rate),
367 gpu::DynamicKernelArg::F64(self.training_config.discount_factor),
368 gpu::DynamicKernelArg::I32(1), ],
370 )
371 .map_err(|e| {
372 IntegrateError::ComputationError(format!("Neural RL kernel launch failed: {e:?}"))
373 })?;
374
375 self.retrieve_updated_weights_from_gpu()?;
377
378 self.update_experience_priorities(&indices, &weights)?;
380
381 Ok(())
382 }
383
384 pub fn evaluate_performance(&self) -> IntegrateResult<RLEvaluationResults> {
386 let analytics = self.performance_analytics.lock().unwrap();
387
388 let avg_reward =
389 analytics.episode_rewards.iter().sum::<f64>() / analytics.episode_rewards.len() as f64;
390
391 let reward_std = {
392 let variance = analytics
393 .episode_rewards
394 .iter()
395 .map(|r| (r - avg_reward).powi(2))
396 .sum::<f64>()
397 / analytics.episode_rewards.len() as f64;
398 variance.sqrt()
399 };
400
401 let avg_q_value = analytics.q_value_estimates.iter().sum::<f64>()
402 / analytics.q_value_estimates.len() as f64;
403
404 let convergence_rate = analytics.convergence_metrics.convergence_rate;
405 let exploration_rate = analytics.epsilon_history.back().copied().unwrap_or(0.0);
406
407 Ok(RLEvaluationResults {
408 average_reward: avg_reward,
409 reward_standard_deviation: reward_std,
410 average_q_value: avg_q_value,
411 convergence_rate,
412 exploration_rate,
413 training_episodes: analytics.episode_rewards.len(),
414 step_acceptance_rate: analytics
415 .step_acceptance_rates
416 .back()
417 .copied()
418 .unwrap_or(0.0),
419 })
420 }
421
422 pub fn adaptive_hyperparameter_tuning(&mut self) -> IntegrateResult<()> {
424 let performance = self.evaluate_performance()?;
425
426 if performance.convergence_rate < 0.1 {
428 self.training_config.learning_rate *= 0.9; } else if performance.convergence_rate > 0.8 {
430 self.training_config.learning_rate *= 1.1; }
432
433 if performance.reward_standard_deviation < 0.1 {
435 self.training_config.epsilon *= 0.95; }
437
438 if performance.average_q_value > 0.5 {
440 self.training_config.priority_alpha *= 1.05; }
442
443 Ok(())
444 }
445
446 fn load_neural_rl_shader(&self) -> IntegrateResult<()> {
449 let _gpu_context = self.gpu_context.lock().unwrap();
452 Ok(())
453 }
454
455 fn select_action(&self, qvalues: &Array1<F>) -> IntegrateResult<usize> {
456 let random_val: f64 = scirs2_core::random::random();
458
459 if random_val < self.training_config.epsilon {
460 Ok((scirs2_core::random::random::<f64>() * 32.0) as usize % 32)
462 } else {
463 let mut best_action = 0;
465 let mut best_q = qvalues[0];
466
467 for (i, &q_val) in qvalues.iter().enumerate() {
468 if q_val > best_q {
469 best_q = q_val;
470 best_action = i;
471 }
472 }
473
474 Ok(best_action)
475 }
476 }
477
478 fn action_tostep_multiplier(&self, action: usize) -> F {
479 let multipliers = [
481 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7,
482 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2,
483 ];
484
485 F::from(multipliers[action.min(31)]).unwrap_or(F::one())
486 }
487
488 fn calculate_prediction_confidence(&self, _qvalues: &Array1<F>) -> f64 {
489 let max_q = _qvalues
491 .iter()
492 .fold(F::neg_infinity(), |acc, &x| acc.max(x));
493 let min_q = _qvalues.iter().fold(F::infinity(), |acc, &x| acc.min(x));
494 let range = max_q - min_q;
495
496 if range > F::zero() {
497 (max_q - min_q).to_f64().unwrap_or(0.0)
498 } else {
499 0.0
500 }
501 }
502
503 fn calculate_exploration_noise(&self) -> f64 {
504 self.training_config.epsilon * scirs2_core::random::random::<f64>()
506 }
507
508 fn should_train(&self) -> IntegrateResult<bool> {
509 let buffer = self.experience_buffer.lock().unwrap();
510 Ok(buffer.buffer.len() >= self.training_config.batch_size)
511 }
512
513 fn should_update_target(&self) -> IntegrateResult<bool> {
514 Ok(true) }
517
518 fn perform_trainingstep(&self) -> IntegrateResult<()> {
519 self.gpu_accelerated_training()
521 }
522
523 fn update_target_network(&self) -> IntegrateResult<()> {
524 let mut dqn = self.dqn.lock().unwrap();
525 dqn.soft_update_target(self.training_config.tau)
526 }
527
528 fn transfer_states_to_gpu(&self, batch: &[Experience<F>]) -> IntegrateResult<gpu::GpuPtr<F>> {
529 let states: Vec<F> = batch.iter().flat_map(|e| e.state.iter().cloned()).collect();
531 gpu::GpuPtr::allocate(states.len())
532 .map_err(|e| IntegrateError::ComputationError(format!("GPU allocation failed: {e:?}")))
533 }
534
535 fn transfer_actions_to_gpu(&self, batch: &[Experience<F>]) -> IntegrateResult<gpu::GpuPtr<F>> {
536 let actions: Vec<F> = batch
537 .iter()
538 .map(|e| F::from(e.action).unwrap_or(F::zero()))
539 .collect();
540 gpu::GpuPtr::allocate(actions.len())
541 .map_err(|e| IntegrateError::ComputationError(format!("GPU allocation failed: {e:?}")))
542 }
543
544 fn transfer_rewards_to_gpu(&self, batch: &[Experience<F>]) -> IntegrateResult<gpu::GpuPtr<F>> {
545 let rewards: Vec<F> = batch.iter().map(|e| e.reward).collect();
546 gpu::GpuPtr::allocate(rewards.len())
547 .map_err(|e| IntegrateError::ComputationError(format!("GPU allocation failed: {e:?}")))
548 }
549
550 fn transfer_next_states_to_gpu(
551 &self,
552 batch: &[Experience<F>],
553 ) -> IntegrateResult<gpu::GpuPtr<F>> {
554 let next_states: Vec<F> = batch
555 .iter()
556 .flat_map(|e| e.next_state.iter().cloned())
557 .collect();
558 gpu::GpuPtr::allocate(next_states.len())
559 .map_err(|e| IntegrateError::ComputationError(format!("GPU allocation failed: {e:?}")))
560 }
561
562 fn retrieve_updated_weights_from_gpu(&self) -> IntegrateResult<()> {
563 Ok(()) }
566
567 fn update_experience_priorities(
568 &self,
569 indices: &[usize],
570 weights: &[f64],
571 ) -> IntegrateResult<()> {
572 let mut buffer = self.experience_buffer.lock().unwrap();
573 buffer.update_priorities(indices, weights)
574 }
575}
576
577impl<F: IntegrateFloat + scirs2_core::gpu::GpuDataType + std::default::Default> DeepQNetwork<F> {
580 pub fn new() -> IntegrateResult<Self> {
581 Ok(DeepQNetwork {
582 weights: NetworkWeights::new()?,
583 target_weights: NetworkWeights::new()?,
584 hyperparams: NetworkHyperparameters::default(),
585 training_stats: TrainingStatistics::new(),
586 optimizer_state: AdamOptimizerState::new(),
587 })
588 }
589
590 pub fn initialize_weights(&mut self) -> IntegrateResult<()> {
591 self.weights.initialize_xavier()?;
592 self.target_weights = self.weights.clone();
593 Ok(())
594 }
595
596 pub fn forward(&self, input: &Array1<F>) -> IntegrateResult<Array1<F>> {
597 if input.len() != 64 {
598 return Err(IntegrateError::ComputationError(format!(
599 "Input size {} != expected size 64",
600 input.len()
601 )));
602 }
603
604 let mut layer1_output = Array1::zeros(128);
606 for i in 0..128 {
607 let mut sum = self.weights.layer1_biases[i];
608 for j in 0..64 {
609 sum += self.weights.layer1_weights[[i, j]] * input[j];
610 }
611 let mish_val = sum * (F::one() + (-sum).exp()).ln().tanh();
613 layer1_output[i] = mish_val;
614 }
615
616 let mut layer2_output = Array1::zeros(64);
618 for i in 0..64 {
619 let mut sum = self.weights.layer2_biases[i];
620 for j in 0..128 {
621 sum += self.weights.layer2_weights[[i, j]] * layer1_output[j];
622 }
623 let swish_val = sum / (F::one() + (-sum).exp());
625 layer2_output[i] = swish_val;
626 }
627
628 let mut advantage_values = Array1::zeros(32);
630 for i in 0..32 {
631 let mut sum = self.weights.advantage_biases[i];
632 for j in 0..64 {
633 sum += self.weights.advantage_weights[[i, j]] * layer2_output[j];
634 }
635 advantage_values[i] = sum;
636 }
637
638 let mut state_value = self.weights.value_bias;
640 for j in 0..64 {
641 state_value += self.weights.value_weights[j] * layer2_output[j];
642 }
643
644 let mean_advantage = advantage_values.iter().copied().sum::<F>()
646 / F::from(advantage_values.len()).unwrap_or(F::one());
647 let mut q_values = Array1::zeros(32);
648 for i in 0..32 {
649 q_values[i] = state_value + advantage_values[i] - mean_advantage;
650 }
651
652 Ok(q_values)
653 }
654
655 pub fn soft_update_target(&mut self, tau: f64) -> IntegrateResult<()> {
656 let tau_f = F::from(tau).unwrap_or(F::from(0.005).unwrap());
660 let one_minus_tau = F::one() - tau_f;
661
662 for i in 0..128 {
664 for j in 0..64 {
665 self.target_weights.layer1_weights[[i, j]] = tau_f
666 * self.weights.layer1_weights[[i, j]]
667 + one_minus_tau * self.target_weights.layer1_weights[[i, j]];
668 }
669 self.target_weights.layer1_biases[i] = tau_f * self.weights.layer1_biases[i]
670 + one_minus_tau * self.target_weights.layer1_biases[i];
671 }
672
673 for i in 0..64 {
675 for j in 0..128 {
676 self.target_weights.layer2_weights[[i, j]] = tau_f
677 * self.weights.layer2_weights[[i, j]]
678 + one_minus_tau * self.target_weights.layer2_weights[[i, j]];
679 }
680 self.target_weights.layer2_biases[i] = tau_f * self.weights.layer2_biases[i]
681 + one_minus_tau * self.target_weights.layer2_biases[i];
682 }
683
684 for i in 0..32 {
686 for j in 0..64 {
687 self.target_weights.advantage_weights[[i, j]] = tau_f
688 * self.weights.advantage_weights[[i, j]]
689 + one_minus_tau * self.target_weights.advantage_weights[[i, j]];
690 }
691 self.target_weights.advantage_biases[i] = tau_f * self.weights.advantage_biases[i]
692 + one_minus_tau * self.target_weights.advantage_biases[i];
693 }
694
695 for j in 0..64 {
697 self.target_weights.value_weights[j] = tau_f * self.weights.value_weights[j]
698 + one_minus_tau * self.target_weights.value_weights[j];
699 }
700 self.target_weights.value_bias =
701 tau_f * self.weights.value_bias + one_minus_tau * self.target_weights.value_bias;
702
703 Ok(())
704 }
705}
706
707impl<F: IntegrateFloat> NetworkWeights<F> {
708 pub fn new() -> IntegrateResult<Self> {
709 Ok(NetworkWeights {
710 layer1_weights: Array2::zeros((128, 64)),
711 layer1_biases: Array1::zeros(128),
712 layer2_weights: Array2::zeros((64, 128)),
713 layer2_biases: Array1::zeros(64),
714 advantage_weights: Array2::zeros((32, 64)),
715 advantage_biases: Array1::zeros(32),
716 value_weights: Array1::zeros(64),
717 value_bias: F::zero(),
718 })
719 }
720
721 pub fn initialize_xavier(&mut self) -> IntegrateResult<()> {
722 let fan_in = 64.0;
726 let fan_out = 128.0;
727 let xavier_bound_1 = (6.0_f64 / (fan_in + fan_out)).sqrt();
728
729 for i in 0..128 {
730 for j in 0..64 {
731 let random_val = scirs2_core::random::random::<f64>() * 2.0 - 1.0; let weight = F::from(random_val * xavier_bound_1).unwrap_or(F::zero());
733 self.layer1_weights[[i, j]] = weight;
734 }
735 let bias_val = scirs2_core::random::random::<f64>() * 0.01;
737 self.layer1_biases[i] = F::from(bias_val).unwrap_or(F::zero());
738 }
739
740 let fan_in = 128.0;
742 let fan_out = 64.0;
743 let xavier_bound_2 = (6.0_f64 / (fan_in + fan_out)).sqrt();
744
745 for i in 0..64 {
746 for j in 0..128 {
747 let random_val = scirs2_core::random::random::<f64>() * 2.0 - 1.0;
748 let weight = F::from(random_val * xavier_bound_2).unwrap_or(F::zero());
749 self.layer2_weights[[i, j]] = weight;
750 }
751 let bias_val = scirs2_core::random::random::<f64>() * 0.01;
752 self.layer2_biases[i] = F::from(bias_val).unwrap_or(F::zero());
753 }
754
755 let fan_in = 64.0;
757 let fan_out = 32.0;
758 let xavier_bound_3 = (6.0_f64 / (fan_in + fan_out)).sqrt();
759
760 for i in 0..32 {
761 for j in 0..64 {
762 let random_val = scirs2_core::random::random::<f64>() * 2.0 - 1.0;
763 let weight = F::from(random_val * xavier_bound_3).unwrap_or(F::zero());
764 self.advantage_weights[[i, j]] = weight;
765 }
766 let bias_val = scirs2_core::random::random::<f64>() * 0.01;
767 self.advantage_biases[i] = F::from(bias_val).unwrap_or(F::zero());
768 }
769
770 let xavier_bound_4 = (6.0_f64 / (64.0 + 1.0)).sqrt();
772 for j in 0..64 {
773 let random_val = scirs2_core::random::random::<f64>() * 2.0 - 1.0;
774 let weight = F::from(random_val * xavier_bound_4).unwrap_or(F::zero());
775 self.value_weights[j] = weight;
776 }
777 self.value_bias = F::from(scirs2_core::random::random::<f64>() * 0.01).unwrap_or(F::zero());
778
779 Ok(())
780 }
781}
782
783impl<F: IntegrateFloat> PrioritizedExperienceReplay<F> {
784 pub fn new() -> IntegrateResult<Self> {
785 Ok(PrioritizedExperienceReplay {
786 buffer: VecDeque::new(),
787 priority_tree: SumTree::new(10000),
788 config: ReplayBufferConfig::default(),
789 importance_sampling: ImportanceSamplingConfig::default(),
790 })
791 }
792
793 pub fn add_experience(&mut self, experience: Experience<F>) -> IntegrateResult<()> {
794 self.buffer.push_back(experience);
795 if self.buffer.len() > self.config.max_size {
796 self.buffer.pop_front();
797 }
798 Ok(())
799 }
800
801 pub fn sample_batch(
802 &mut self,
803 batch_size: usize,
804 ) -> IntegrateResult<(Vec<Experience<F>>, Vec<usize>, Vec<f64>)> {
805 if self.buffer.is_empty() {
806 return Err(IntegrateError::ComputationError(
807 "Cannot sample from empty experience buffer".to_string(),
808 ));
809 }
810
811 let actual_batch_size = batch_size.min(self.buffer.len());
812 let mut batch = Vec::with_capacity(actual_batch_size);
813 let mut indices = Vec::with_capacity(actual_batch_size);
814 let mut weights = Vec::with_capacity(actual_batch_size);
815
816 let total_priority: f64 = self
818 .buffer
819 .iter()
820 .map(|exp| {
821 exp.tderror
822 .to_f64()
823 .unwrap_or(1.0)
824 .powf(self.importance_sampling.beta_start)
825 })
826 .sum();
827
828 if total_priority <= 0.0 {
829 for _i in 0..actual_batch_size {
831 let random_idx = (scirs2_core::random::random::<f64>() * self.buffer.len() as f64)
832 as usize
833 % self.buffer.len();
834 batch.push(self.buffer[random_idx].clone());
835 indices.push(random_idx);
836 weights.push(1.0); }
838 return Ok((batch, indices, weights));
839 }
840
841 for _ in 0..actual_batch_size {
843 let random_value = scirs2_core::random::random::<f64>() * total_priority;
844 let mut cumulative_priority = 0.0;
845 let mut selected_idx = 0;
846
847 for (idx, experience) in self.buffer.iter().enumerate() {
848 let priority = experience
849 .tderror
850 .to_f64()
851 .unwrap_or(1.0)
852 .powf(self.importance_sampling.beta_start);
853 cumulative_priority += priority;
854
855 if cumulative_priority >= random_value {
856 selected_idx = idx;
857 break;
858 }
859 }
860
861 let experience_priority = self.buffer[selected_idx]
863 .tderror
864 .to_f64()
865 .unwrap_or(1.0)
866 .powf(self.importance_sampling.beta_start);
867 let probability = experience_priority / total_priority;
868 let max_weight = (1.0 / (self.buffer.len() as f64 * probability))
869 .powf(self.importance_sampling.beta_start);
870 let importance_weight =
871 max_weight / total_priority.powf(self.importance_sampling.beta_start);
872
873 batch.push(self.buffer[selected_idx].clone());
874 indices.push(selected_idx);
875 weights.push(importance_weight);
876 }
877
878 Ok((batch, indices, weights))
879 }
880
881 pub fn update_priorities(
882 &mut self,
883 indices: &[usize],
884 priorities: &[f64],
885 ) -> IntegrateResult<()> {
886 if indices.len() != priorities.len() {
887 return Err(IntegrateError::ComputationError(
888 "Indices and priorities length mismatch".to_string(),
889 ));
890 }
891
892 for (idx, &priority) in indices.iter().zip(priorities.iter()) {
893 if *idx < self.buffer.len() {
894 let clamped_priority = priority.max(1e-6); self.buffer[*idx].tderror =
897 F::from(clamped_priority).unwrap_or(F::from(1e-6).unwrap());
898 }
899 }
900
901 Ok(())
905 }
906}
907
908impl<F: IntegrateFloat + std::default::Default> StateFeatureExtractor<F> {
909 pub fn new() -> IntegrateResult<Self> {
910 Ok(StateFeatureExtractor {
911 error_history: VecDeque::new(),
912 step_history: VecDeque::new(),
913 jacobian_eigenvalues: Array1::zeros(8),
914 problem_characteristics: ProblemCharacteristics::default(),
915 performance_metrics: PerformanceMetrics::default(),
916 normalization: FeatureNormalization::default(),
917 })
918 }
919
920 pub fn initialize(
921 &mut self,
922 problem_size: usize,
923 initialstep: F,
924 problem_type: &str,
925 ) -> IntegrateResult<()> {
926 self.problem_characteristics.problem_size = problem_size;
927 self.problem_characteristics.problem_type = problem_type.to_string();
928 self.step_history.push_back(initialstep);
929 Ok(())
930 }
931
932 pub fn extract_features(
933 &mut self,
934 currentstep: F,
935 currenterror: F,
936 _state: &ProblemState<F>,
937 _performance_metrics: &PerformanceMetrics<F>,
938 ) -> IntegrateResult<Array1<F>> {
939 let mut features = Array1::zeros(64);
941
942 for (i, &error) in self.error_history.iter().take(8).enumerate() {
944 features[i] = error;
945 }
946
947 for (i, &step) in self.step_history.iter().take(8).enumerate() {
949 features[8 + i] = step;
950 }
951
952 self.error_history.push_back(currenterror);
954 self.step_history.push_back(currentstep);
955
956 if self.error_history.len() > 8 {
958 self.error_history.pop_front();
959 }
960 if self.step_history.len() > 8 {
961 self.step_history.pop_front();
962 }
963
964 features[16] = F::from(self.problem_characteristics.problem_size).unwrap_or(F::zero());
966
967 Ok(features)
968 }
969}
970
971impl<F: IntegrateFloat + std::default::Default> MultiObjectiveRewardCalculator<F> {
972 pub fn new() -> IntegrateResult<Self> {
973 Ok(MultiObjectiveRewardCalculator {
974 weights: RewardWeights::default(),
975 baselines: PerformanceBaselines::default(),
976 shaping: RewardShaping::default(),
977 })
978 }
979}
980
981impl RLPerformanceAnalytics {
982 pub fn new() -> Self {
983 RLPerformanceAnalytics {
984 episode_rewards: VecDeque::new(),
985 q_value_estimates: VecDeque::new(),
986 loss_values: VecDeque::new(),
987 epsilon_history: VecDeque::new(),
988 step_acceptance_rates: VecDeque::new(),
989 convergence_metrics: ConvergenceMetrics::default(),
990 }
991 }
992
993 pub fn update_metrics(&mut self, _reward: f64, action: usize) -> IntegrateResult<()> {
994 self.episode_rewards.push_back(_reward);
995 if self.episode_rewards.len() > 1000 {
996 self.episode_rewards.pop_front();
997 }
998 Ok(())
999 }
1000}
1001
1002#[derive(Debug, Clone)]
1005pub struct StepSizePrediction<F: IntegrateFloat> {
1006 pub predictedstep: F,
1007 pub step_multiplier: F,
1008 pub action_index: usize,
1009 pub q_values: Array1<F>,
1010 pub confidence: f64,
1011 pub exploration_noise: f64,
1012}
1013
1014#[derive(Debug, Clone)]
1015pub struct TrainingResult<F: IntegrateFloat> {
1016 pub loss: F,
1017 pub q_value_estimate: F,
1018 pub tderror: F,
1019 pub exploration_rate: F,
1020 pub training_performed: bool,
1021}
1022
1023#[derive(Debug, Clone)]
1024pub struct RLEvaluationResults {
1025 pub average_reward: f64,
1026 pub reward_standard_deviation: f64,
1027 pub average_q_value: f64,
1028 pub convergence_rate: f64,
1029 pub exploration_rate: f64,
1030 pub training_episodes: usize,
1031 pub step_acceptance_rate: f64,
1032}
1033
1034#[derive(Debug, Clone, Default)]
1037pub struct NetworkHyperparameters<F: IntegrateFloat> {
1038 pub learning_rate: f64,
1039 pub momentum: f64,
1040 pub weight_decay: f64,
1041 pub phantom: std::marker::PhantomData<F>,
1042}
1043
1044#[derive(Debug, Clone, Default)]
1045pub struct TrainingStatistics {
1046 pub trainingsteps: usize,
1047 pub episodes: usize,
1048 pub total_reward: f64,
1049}
1050
1051impl TrainingStatistics {
1052 pub fn new() -> Self {
1053 Self {
1054 trainingsteps: 0,
1055 episodes: 0,
1056 total_reward: 0.0,
1057 }
1058 }
1059}
1060
1061#[derive(Debug, Clone, Default)]
1062pub struct AdamOptimizerState<F: IntegrateFloat> {
1063 pub m: HashMap<String, Array2<F>>,
1064 pub v: HashMap<String, Array2<F>>,
1065 pub beta1: f64,
1066 pub beta2: f64,
1067}
1068
1069impl<F: IntegrateFloat + std::default::Default> AdamOptimizerState<F> {
1070 pub fn new() -> Self {
1071 Default::default()
1072 }
1073}
1074
1075#[derive(Debug, Clone, Default)]
1076pub struct SumTree {
1077 capacity: usize,
1078 tree: Vec<f64>,
1079 data_pointer: usize,
1080}
1081
1082impl SumTree {
1083 pub fn new(capacity: usize) -> Self {
1084 SumTree {
1085 capacity,
1086 tree: vec![0.0; 2 * capacity - 1],
1087 data_pointer: 0,
1088 }
1089 }
1090}
1091
1092#[derive(Debug, Clone, Default)]
1093pub struct ReplayBufferConfig {
1094 pub max_size: usize,
1095 pub alpha: f64,
1096 pub beta: f64,
1097}
1098
1099#[derive(Debug, Clone, Default)]
1100pub struct ImportanceSamplingConfig {
1101 pub beta_start: f64,
1102 pub beta_end: f64,
1103 pub betasteps: usize,
1104}
1105
1106#[derive(Debug, Clone, Default)]
1107pub struct ProblemCharacteristics<F: IntegrateFloat> {
1108 pub problem_size: usize,
1109 pub problem_type: String,
1110 pub stiffness_ratio: f64,
1111 pub phantom: std::marker::PhantomData<F>,
1112}
1113
1114#[derive(Debug, Clone, Default)]
1115pub struct PerformanceMetrics<F: IntegrateFloat> {
1116 pub throughput: f64,
1117 pub memory_usage: usize,
1118 pub accuracy: f64,
1119 pub phantom: std::marker::PhantomData<F>,
1120}
1121
1122#[derive(Debug, Clone, Default)]
1123pub struct FeatureNormalization<F: IntegrateFloat> {
1124 pub mean: Array1<F>,
1125 pub std: Array1<F>,
1126}
1127
1128#[derive(Debug, Clone, Default)]
1129pub struct RewardWeights<F: IntegrateFloat> {
1130 pub accuracy_weight: F,
1131 pub efficiency_weight: F,
1132 pub stability_weight: F,
1133 pub convergence_weight: F,
1134}
1135
1136#[derive(Debug, Clone, Default)]
1137pub struct PerformanceBaselines<F: IntegrateFloat> {
1138 pub baseline_accuracy: F,
1139 pub baseline_efficiency: F,
1140 pub baseline_stability: F,
1141}
1142
1143#[derive(Debug, Clone, Default)]
1144pub struct RewardShaping<F: IntegrateFloat> {
1145 pub shaped_reward_coefficient: F,
1146 pub intrinsic_motivation: F,
1147}
1148
1149#[derive(Debug, Clone, Default)]
1150pub struct ConvergenceMetrics {
1151 pub convergence_rate: f64,
1152 pub stability_measure: f64,
1153 pub learning_progress: f64,
1154}
1155
1156#[derive(Debug, Clone, Default)]
1157pub struct ProblemState<F: IntegrateFloat> {
1158 pub current_solution: Array1<F>,
1159 pub jacobian_condition: f64,
1160 pub error_estimate: F,
1161}
1162
1163impl Default for TrainingConfiguration {
1164 fn default() -> Self {
1165 TrainingConfiguration {
1166 learning_rate: 0.001,
1167 discount_factor: 0.99,
1168 epsilon: 0.1,
1169 epsilon_decay: 0.995,
1170 epsilon_min: 0.01,
1171 target_update_frequency: 1000,
1172 batch_size: 32,
1173 train_frequency: 4,
1174 tau: 0.005,
1175 priority_alpha: 0.6,
1176 priority_beta: 0.4,
1177 nstep: 3,
1178 }
1179 }
1180}
1181
1182#[cfg(test)]
1183mod tests {
1184 use super::*;
1185 use scirs2_core::ndarray::array;
1186
1187 #[test]
1188 fn test_neural_rl_controller_creation() {
1189 let controller = NeuralRLStepController::<f64>::new();
1190 assert!(controller.is_ok());
1191 }
1192
1193 #[test]
1194 fn test_feature_extraction() {
1195 let mut extractor = StateFeatureExtractor::<f64>::new().unwrap();
1196 extractor.initialize(1000, 0.01, "stiff_ode").unwrap();
1197
1198 let problem_state = ProblemState {
1199 current_solution: array![1.0, 2.0, 3.0],
1200 jacobian_condition: 100.0,
1201 error_estimate: 1e-6,
1202 };
1203
1204 let performance_metrics = PerformanceMetrics {
1205 throughput: 1000.0,
1206 memory_usage: 1024 * 1024,
1207 accuracy: 1e-8,
1208 phantom: std::marker::PhantomData,
1209 };
1210
1211 let features = extractor.extract_features(0.01, 1e-6, &problem_state, &performance_metrics);
1212 assert!(features.is_ok());
1213 assert_eq!(features.unwrap().len(), 64);
1214 }
1215
1216 #[test]
1217 fn test_dqn_forward_pass() {
1218 let dqn = DeepQNetwork::<f64>::new().unwrap();
1219 let input = Array1::zeros(64);
1220 let output = dqn.forward(&input);
1221 assert!(output.is_ok());
1222 assert_eq!(output.unwrap().len(), 32);
1223 }
1224}