1#![allow(dead_code)]
14#![allow(clippy::too_many_arguments)]
15
16use crate::common::IntegrateFloat;
17use crate::error::{IntegrateError, IntegrateResult};
18use scirs2_core::gpu::{self, GpuDataType};
19use scirs2_core::ndarray::{Array1, Array2};
20use std::collections::{HashMap, VecDeque};
21use std::sync::{Arc, Mutex};
22use std::time::Instant;
23pub struct NeuralRLStepController<F: IntegrateFloat + GpuDataType> {
27 dqn: Arc<Mutex<DeepQNetwork<F>>>,
29 experience_buffer: Arc<Mutex<PrioritizedExperienceReplay<F>>>,
31 feature_extractor: Arc<Mutex<StateFeatureExtractor<F>>>,
33 reward_calculator: MultiObjectiveRewardCalculator<F>,
35 training_config: TrainingConfiguration,
37 performance_analytics: Arc<Mutex<RLPerformanceAnalytics>>,
39 gpu_context: Arc<Mutex<gpu::GpuContext>>,
41}
42
43pub struct DeepQNetwork<F: IntegrateFloat + GpuDataType> {
45 weights: NetworkWeights<F>,
47 target_weights: NetworkWeights<F>,
49 hyperparams: NetworkHyperparameters<F>,
51 training_stats: TrainingStatistics,
53 optimizer_state: AdamOptimizerState<F>,
55}
56
57#[derive(Debug, Clone)]
59pub struct NetworkWeights<F: IntegrateFloat> {
60 pub layer1_weights: Array2<F>,
62 pub layer1_biases: Array1<F>,
64 pub layer2_weights: Array2<F>,
66 pub layer2_biases: Array1<F>,
68 pub advantage_weights: Array2<F>,
70 pub advantage_biases: Array1<F>,
72 pub value_weights: Array1<F>,
74 pub value_bias: F,
76}
77
78pub struct PrioritizedExperienceReplay<F: IntegrateFloat> {
80 buffer: VecDeque<Experience<F>>,
82 priority_tree: SumTree,
84 config: ReplayBufferConfig,
86 importance_sampling: ImportanceSamplingConfig,
88}
89
90#[derive(Debug, Clone)]
92pub struct Experience<F: IntegrateFloat> {
93 pub state: Array1<F>,
95 pub action: usize,
97 pub reward: F,
99 pub next_state: Array1<F>,
101 pub done: bool,
103 pub tderror: F,
105 pub timestamp: Instant,
107}
108
109pub struct StateFeatureExtractor<F: IntegrateFloat> {
111 error_history: VecDeque<F>,
113 step_history: VecDeque<F>,
115 jacobian_eigenvalues: Array1<F>,
117 problem_characteristics: ProblemCharacteristics<F>,
119 performance_metrics: PerformanceMetrics<F>,
121 normalization: FeatureNormalization<F>,
123}
124
125pub struct MultiObjectiveRewardCalculator<F: IntegrateFloat> {
127 weights: RewardWeights<F>,
129 baselines: PerformanceBaselines<F>,
131 shaping: RewardShaping<F>,
133}
134
135#[derive(Debug, Clone)]
137pub struct TrainingConfiguration {
138 pub learning_rate: f64,
140 pub discount_factor: f64,
142 pub epsilon: f64,
144 pub epsilon_decay: f64,
146 pub epsilon_min: f64,
148 pub target_update_frequency: usize,
150 pub batch_size: usize,
152 pub train_frequency: usize,
154 pub tau: f64,
156 pub priority_alpha: f64,
158 pub priority_beta: f64,
160 pub nstep: usize,
162}
163
164pub struct RLPerformanceAnalytics {
166 episode_rewards: VecDeque<f64>,
168 q_value_estimates: VecDeque<f64>,
170 loss_values: VecDeque<f64>,
172 epsilon_history: VecDeque<f64>,
174 step_acceptance_rates: VecDeque<f64>,
176 convergence_metrics: ConvergenceMetrics,
178}
179
180impl<F: IntegrateFloat + GpuDataType + Default> NeuralRLStepController<F> {
181 pub fn new() -> IntegrateResult<Self> {
183 #[cfg(test)]
184 let gpu_context = Arc::new(Mutex::new(
185 gpu::GpuContext::new(gpu::GpuBackend::Cpu).unwrap_or_else(|_| {
186 gpu::GpuContext::new(gpu::GpuBackend::Cpu).expect("Operation failed")
188 }),
189 ));
190
191 #[cfg(not(test))]
192 let gpu_context = Arc::new(Mutex::new(
193 gpu::GpuContext::new(gpu::GpuBackend::Cuda).map_err(|e| {
194 IntegrateError::ComputationError(format!("GPU context creation failed: {e:?}"))
195 })?,
196 ));
197
198 let dqn = Arc::new(Mutex::new(DeepQNetwork::new()?));
199 let experience_buffer = Arc::new(Mutex::new(PrioritizedExperienceReplay::new()?));
200 let feature_extractor = Arc::new(Mutex::new(StateFeatureExtractor::new()?));
201 let reward_calculator = MultiObjectiveRewardCalculator::new()?;
202 let training_config = TrainingConfiguration::default();
203 let performance_analytics = Arc::new(Mutex::new(RLPerformanceAnalytics::new()));
204
205 Ok(NeuralRLStepController {
206 dqn,
207 experience_buffer,
208 feature_extractor,
209 reward_calculator,
210 training_config,
211 performance_analytics,
212 gpu_context,
213 })
214 }
215
216 pub fn initialize(
218 &self,
219 problem_size: usize,
220 initialstep_size: F,
221 problem_type: &str,
222 ) -> IntegrateResult<()> {
223 {
225 let mut extractor = self.feature_extractor.lock().expect("Operation failed");
226 extractor.initialize(problem_size, initialstep_size, problem_type)?;
227 }
228
229 {
231 let mut dqn = self.dqn.lock().expect("Operation failed");
232 dqn.initialize_weights()?;
233 }
234
235 self.load_neural_rl_shader()?;
237
238 Ok(())
239 }
240
241 pub fn predict_optimalstep(
243 &self,
244 currentstep: F,
245 currenterror: F,
246 problem_state: &ProblemState<F>,
247 performance_metrics: &PerformanceMetrics<F>,
248 ) -> IntegrateResult<StepSizePrediction<F>> {
249 let state_features = {
251 let mut extractor = self.feature_extractor.lock().expect("Operation failed");
252 extractor.extract_features(
253 currentstep,
254 currenterror,
255 problem_state,
256 performance_metrics,
257 )?
258 };
259
260 let q_values = {
262 let dqn = self.dqn.lock().expect("Operation failed");
263 dqn.forward(&state_features)?
264 };
265
266 let action = self.select_action(&q_values)?;
268
269 let step_multiplier = self.action_tostep_multiplier(action);
271 let predictedstep = currentstep * step_multiplier;
272
273 let minstep = currentstep * F::from(0.01).expect("Failed to convert constant to float"); let maxstep = currentstep * F::from(10.0).expect("Failed to convert constant to float"); let safestep = predictedstep.max(minstep).min(maxstep);
277
278 Ok(StepSizePrediction {
279 predictedstep: safestep,
280 step_multiplier,
281 action_index: action,
282 q_values: q_values.clone(),
283 confidence: self.calculate_prediction_confidence(&q_values),
284 exploration_noise: self.calculate_exploration_noise(),
285 })
286 }
287
288 pub fn train_on_experience(
290 &self,
291 previous_state: &Array1<F>,
292 action_taken: usize,
293 reward: F,
294 current_state: &Array1<F>,
295 done: bool,
296 ) -> IntegrateResult<TrainingResult<F>> {
297 let experience = Experience {
299 state: previous_state.clone(),
300 action: action_taken,
301 reward,
302 next_state: current_state.clone(),
303 done,
304 tderror: F::zero(), timestamp: Instant::now(),
306 };
307
308 {
310 let mut buffer = self.experience_buffer.lock().expect("Operation failed");
311 buffer.add_experience(experience)?;
312 }
313
314 if self.should_train()? {
316 self.perform_trainingstep()?;
317 }
318
319 if self.should_update_target()? {
321 self.update_target_network()?;
322 }
323
324 {
326 let mut analytics = self.performance_analytics.lock().expect("Operation failed");
327 analytics.update_metrics(reward.to_f64().unwrap_or(0.0), action_taken)?;
328 }
329
330 Ok(TrainingResult {
331 loss: F::zero(), q_value_estimate: F::zero(), tderror: F::zero(), exploration_rate: F::from(self.training_config.epsilon)
335 .expect("Failed to convert to float"),
336 training_performed: self.should_train()?,
337 })
338 }
339
340 pub fn gpu_accelerated_training(&self) -> IntegrateResult<()> {
342 let gpu_context = self.gpu_context.lock().expect("Operation failed");
343
344 let (batch, indices, weights) = {
346 let mut buffer = self.experience_buffer.lock().expect("Operation failed");
347 buffer.sample_batch(self.training_config.batch_size)?
348 };
349
350 let gpu_states = self.transfer_states_to_gpu(&batch)?;
352 let gpu_actions = self.transfer_actions_to_gpu(&batch)?;
353 let gpu_rewards = self.transfer_rewards_to_gpu(&batch)?;
354 let gpu_next_states = self.transfer_next_states_to_gpu(&batch)?;
355
356 gpu_context
358 .launch_kernel(
359 "neural_adaptivestep_rl",
360 (self.training_config.batch_size, 1, 1),
361 (32, 1, 1),
362 &[
363 gpu::DynamicKernelArg::Buffer(gpu_states.as_ptr()),
364 gpu::DynamicKernelArg::Buffer(gpu_actions.as_ptr()),
365 gpu::DynamicKernelArg::Buffer(gpu_rewards.as_ptr()),
366 gpu::DynamicKernelArg::Buffer(gpu_next_states.as_ptr()),
367 gpu::DynamicKernelArg::F64(self.training_config.learning_rate),
368 gpu::DynamicKernelArg::F64(self.training_config.discount_factor),
369 gpu::DynamicKernelArg::I32(1), ],
371 )
372 .map_err(|e| {
373 IntegrateError::ComputationError(format!("Neural RL kernel launch failed: {e:?}"))
374 })?;
375
376 self.retrieve_updated_weights_from_gpu()?;
378
379 self.update_experience_priorities(&indices, &weights)?;
381
382 Ok(())
383 }
384
385 pub fn evaluate_performance(&self) -> IntegrateResult<RLEvaluationResults> {
387 let analytics = self.performance_analytics.lock().expect("Operation failed");
388
389 let avg_reward =
390 analytics.episode_rewards.iter().sum::<f64>() / analytics.episode_rewards.len() as f64;
391
392 let reward_std = {
393 let variance = analytics
394 .episode_rewards
395 .iter()
396 .map(|r| (r - avg_reward).powi(2))
397 .sum::<f64>()
398 / analytics.episode_rewards.len() as f64;
399 variance.sqrt()
400 };
401
402 let avg_q_value = analytics.q_value_estimates.iter().sum::<f64>()
403 / analytics.q_value_estimates.len() as f64;
404
405 let convergence_rate = analytics.convergence_metrics.convergence_rate;
406 let exploration_rate = analytics.epsilon_history.back().copied().unwrap_or(0.0);
407
408 Ok(RLEvaluationResults {
409 average_reward: avg_reward,
410 reward_standard_deviation: reward_std,
411 average_q_value: avg_q_value,
412 convergence_rate,
413 exploration_rate,
414 training_episodes: analytics.episode_rewards.len(),
415 step_acceptance_rate: analytics
416 .step_acceptance_rates
417 .back()
418 .copied()
419 .unwrap_or(0.0),
420 })
421 }
422
423 pub fn adaptive_hyperparameter_tuning(&mut self) -> IntegrateResult<()> {
425 let performance = self.evaluate_performance()?;
426
427 if performance.convergence_rate < 0.1 {
429 self.training_config.learning_rate *= 0.9; } else if performance.convergence_rate > 0.8 {
431 self.training_config.learning_rate *= 1.1; }
433
434 if performance.reward_standard_deviation < 0.1 {
436 self.training_config.epsilon *= 0.95; }
438
439 if performance.average_q_value > 0.5 {
441 self.training_config.priority_alpha *= 1.05; }
443
444 Ok(())
445 }
446
447 fn load_neural_rl_shader(&self) -> IntegrateResult<()> {
450 let _gpu_context = self.gpu_context.lock().expect("Operation failed");
453 Ok(())
454 }
455
456 fn select_action(&self, qvalues: &Array1<F>) -> IntegrateResult<usize> {
457 let random_val: f64 = scirs2_core::random::random();
459
460 if random_val < self.training_config.epsilon {
461 Ok((scirs2_core::random::random::<f64>() * 32.0) as usize % 32)
463 } else {
464 let mut best_action = 0;
466 let mut best_q = qvalues[0];
467
468 for (i, &q_val) in qvalues.iter().enumerate() {
469 if q_val > best_q {
470 best_q = q_val;
471 best_action = i;
472 }
473 }
474
475 Ok(best_action)
476 }
477 }
478
479 fn action_tostep_multiplier(&self, action: usize) -> F {
480 let multipliers = [
482 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7,
483 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2,
484 ];
485
486 F::from(multipliers[action.min(31)]).unwrap_or(F::one())
487 }
488
489 fn calculate_prediction_confidence(&self, _qvalues: &Array1<F>) -> f64 {
490 let max_q = _qvalues
492 .iter()
493 .fold(F::neg_infinity(), |acc, &x| acc.max(x));
494 let min_q = _qvalues.iter().fold(F::infinity(), |acc, &x| acc.min(x));
495 let range = max_q - min_q;
496
497 if range > F::zero() {
498 (max_q - min_q).to_f64().unwrap_or(0.0)
499 } else {
500 0.0
501 }
502 }
503
504 fn calculate_exploration_noise(&self) -> f64 {
505 self.training_config.epsilon * scirs2_core::random::random::<f64>()
507 }
508
509 fn should_train(&self) -> IntegrateResult<bool> {
510 let buffer = self.experience_buffer.lock().expect("Operation failed");
511 Ok(buffer.buffer.len() >= self.training_config.batch_size)
512 }
513
514 fn should_update_target(&self) -> IntegrateResult<bool> {
515 Ok(true) }
518
519 fn perform_trainingstep(&self) -> IntegrateResult<()> {
520 self.gpu_accelerated_training()
522 }
523
524 fn update_target_network(&self) -> IntegrateResult<()> {
525 let mut dqn = self.dqn.lock().expect("Operation failed");
526 dqn.soft_update_target(self.training_config.tau)
527 }
528
529 fn transfer_states_to_gpu(&self, batch: &[Experience<F>]) -> IntegrateResult<gpu::GpuPtr<F>> {
530 let states: Vec<F> = batch.iter().flat_map(|e| e.state.iter().cloned()).collect();
532 gpu::GpuPtr::allocate(states.len())
533 .map_err(|e| IntegrateError::ComputationError(format!("GPU allocation failed: {e:?}")))
534 }
535
536 fn transfer_actions_to_gpu(&self, batch: &[Experience<F>]) -> IntegrateResult<gpu::GpuPtr<F>> {
537 let actions: Vec<F> = batch
538 .iter()
539 .map(|e| F::from(e.action).unwrap_or(F::zero()))
540 .collect();
541 gpu::GpuPtr::allocate(actions.len())
542 .map_err(|e| IntegrateError::ComputationError(format!("GPU allocation failed: {e:?}")))
543 }
544
545 fn transfer_rewards_to_gpu(&self, batch: &[Experience<F>]) -> IntegrateResult<gpu::GpuPtr<F>> {
546 let rewards: Vec<F> = batch.iter().map(|e| e.reward).collect();
547 gpu::GpuPtr::allocate(rewards.len())
548 .map_err(|e| IntegrateError::ComputationError(format!("GPU allocation failed: {e:?}")))
549 }
550
551 fn transfer_next_states_to_gpu(
552 &self,
553 batch: &[Experience<F>],
554 ) -> IntegrateResult<gpu::GpuPtr<F>> {
555 let next_states: Vec<F> = batch
556 .iter()
557 .flat_map(|e| e.next_state.iter().cloned())
558 .collect();
559 gpu::GpuPtr::allocate(next_states.len())
560 .map_err(|e| IntegrateError::ComputationError(format!("GPU allocation failed: {e:?}")))
561 }
562
563 fn retrieve_updated_weights_from_gpu(&self) -> IntegrateResult<()> {
564 Ok(()) }
567
568 fn update_experience_priorities(
569 &self,
570 indices: &[usize],
571 weights: &[f64],
572 ) -> IntegrateResult<()> {
573 let mut buffer = self.experience_buffer.lock().expect("Operation failed");
574 buffer.update_priorities(indices, weights)
575 }
576}
577
578impl<F: IntegrateFloat + scirs2_core::gpu::GpuDataType + std::default::Default> DeepQNetwork<F> {
581 pub fn new() -> IntegrateResult<Self> {
582 Ok(DeepQNetwork {
583 weights: NetworkWeights::new()?,
584 target_weights: NetworkWeights::new()?,
585 hyperparams: NetworkHyperparameters::default(),
586 training_stats: TrainingStatistics::new(),
587 optimizer_state: AdamOptimizerState::new(),
588 })
589 }
590
591 pub fn initialize_weights(&mut self) -> IntegrateResult<()> {
592 self.weights.initialize_xavier()?;
593 self.target_weights = self.weights.clone();
594 Ok(())
595 }
596
597 pub fn forward(&self, input: &Array1<F>) -> IntegrateResult<Array1<F>> {
598 if input.len() != 64 {
599 return Err(IntegrateError::ComputationError(format!(
600 "Input size {} != expected size 64",
601 input.len()
602 )));
603 }
604
605 let mut layer1_output = Array1::zeros(128);
607 for i in 0..128 {
608 let mut sum = self.weights.layer1_biases[i];
609 for j in 0..64 {
610 sum += self.weights.layer1_weights[[i, j]] * input[j];
611 }
612 let mish_val = sum * (F::one() + (-sum).exp()).ln().tanh();
614 layer1_output[i] = mish_val;
615 }
616
617 let mut layer2_output = Array1::zeros(64);
619 for i in 0..64 {
620 let mut sum = self.weights.layer2_biases[i];
621 for j in 0..128 {
622 sum += self.weights.layer2_weights[[i, j]] * layer1_output[j];
623 }
624 let swish_val = sum / (F::one() + (-sum).exp());
626 layer2_output[i] = swish_val;
627 }
628
629 let mut advantage_values = Array1::zeros(32);
631 for i in 0..32 {
632 let mut sum = self.weights.advantage_biases[i];
633 for j in 0..64 {
634 sum += self.weights.advantage_weights[[i, j]] * layer2_output[j];
635 }
636 advantage_values[i] = sum;
637 }
638
639 let mut state_value = self.weights.value_bias;
641 for j in 0..64 {
642 state_value += self.weights.value_weights[j] * layer2_output[j];
643 }
644
645 let mean_advantage = advantage_values.iter().copied().sum::<F>()
647 / F::from(advantage_values.len()).unwrap_or(F::one());
648 let mut q_values = Array1::zeros(32);
649 for i in 0..32 {
650 q_values[i] = state_value + advantage_values[i] - mean_advantage;
651 }
652
653 Ok(q_values)
654 }
655
656 pub fn soft_update_target(&mut self, tau: f64) -> IntegrateResult<()> {
657 let tau_f =
661 F::from(tau).unwrap_or(F::from(0.005).expect("Failed to convert constant to float"));
662 let one_minus_tau = F::one() - tau_f;
663
664 for i in 0..128 {
666 for j in 0..64 {
667 self.target_weights.layer1_weights[[i, j]] = tau_f
668 * self.weights.layer1_weights[[i, j]]
669 + one_minus_tau * self.target_weights.layer1_weights[[i, j]];
670 }
671 self.target_weights.layer1_biases[i] = tau_f * self.weights.layer1_biases[i]
672 + one_minus_tau * self.target_weights.layer1_biases[i];
673 }
674
675 for i in 0..64 {
677 for j in 0..128 {
678 self.target_weights.layer2_weights[[i, j]] = tau_f
679 * self.weights.layer2_weights[[i, j]]
680 + one_minus_tau * self.target_weights.layer2_weights[[i, j]];
681 }
682 self.target_weights.layer2_biases[i] = tau_f * self.weights.layer2_biases[i]
683 + one_minus_tau * self.target_weights.layer2_biases[i];
684 }
685
686 for i in 0..32 {
688 for j in 0..64 {
689 self.target_weights.advantage_weights[[i, j]] = tau_f
690 * self.weights.advantage_weights[[i, j]]
691 + one_minus_tau * self.target_weights.advantage_weights[[i, j]];
692 }
693 self.target_weights.advantage_biases[i] = tau_f * self.weights.advantage_biases[i]
694 + one_minus_tau * self.target_weights.advantage_biases[i];
695 }
696
697 for j in 0..64 {
699 self.target_weights.value_weights[j] = tau_f * self.weights.value_weights[j]
700 + one_minus_tau * self.target_weights.value_weights[j];
701 }
702 self.target_weights.value_bias =
703 tau_f * self.weights.value_bias + one_minus_tau * self.target_weights.value_bias;
704
705 Ok(())
706 }
707}
708
709impl<F: IntegrateFloat> NetworkWeights<F> {
710 pub fn new() -> IntegrateResult<Self> {
711 Ok(NetworkWeights {
712 layer1_weights: Array2::zeros((128, 64)),
713 layer1_biases: Array1::zeros(128),
714 layer2_weights: Array2::zeros((64, 128)),
715 layer2_biases: Array1::zeros(64),
716 advantage_weights: Array2::zeros((32, 64)),
717 advantage_biases: Array1::zeros(32),
718 value_weights: Array1::zeros(64),
719 value_bias: F::zero(),
720 })
721 }
722
723 pub fn initialize_xavier(&mut self) -> IntegrateResult<()> {
724 let fan_in = 64.0;
728 let fan_out = 128.0;
729 let xavier_bound_1 = (6.0_f64 / (fan_in + fan_out)).sqrt();
730
731 for i in 0..128 {
732 for j in 0..64 {
733 let random_val = scirs2_core::random::random::<f64>() * 2.0 - 1.0; let weight = F::from(random_val * xavier_bound_1).unwrap_or(F::zero());
735 self.layer1_weights[[i, j]] = weight;
736 }
737 let bias_val = scirs2_core::random::random::<f64>() * 0.01;
739 self.layer1_biases[i] = F::from(bias_val).unwrap_or(F::zero());
740 }
741
742 let fan_in = 128.0;
744 let fan_out = 64.0;
745 let xavier_bound_2 = (6.0_f64 / (fan_in + fan_out)).sqrt();
746
747 for i in 0..64 {
748 for j in 0..128 {
749 let random_val = scirs2_core::random::random::<f64>() * 2.0 - 1.0;
750 let weight = F::from(random_val * xavier_bound_2).unwrap_or(F::zero());
751 self.layer2_weights[[i, j]] = weight;
752 }
753 let bias_val = scirs2_core::random::random::<f64>() * 0.01;
754 self.layer2_biases[i] = F::from(bias_val).unwrap_or(F::zero());
755 }
756
757 let fan_in = 64.0;
759 let fan_out = 32.0;
760 let xavier_bound_3 = (6.0_f64 / (fan_in + fan_out)).sqrt();
761
762 for i in 0..32 {
763 for j in 0..64 {
764 let random_val = scirs2_core::random::random::<f64>() * 2.0 - 1.0;
765 let weight = F::from(random_val * xavier_bound_3).unwrap_or(F::zero());
766 self.advantage_weights[[i, j]] = weight;
767 }
768 let bias_val = scirs2_core::random::random::<f64>() * 0.01;
769 self.advantage_biases[i] = F::from(bias_val).unwrap_or(F::zero());
770 }
771
772 let xavier_bound_4 = (6.0_f64 / (64.0 + 1.0)).sqrt();
774 for j in 0..64 {
775 let random_val = scirs2_core::random::random::<f64>() * 2.0 - 1.0;
776 let weight = F::from(random_val * xavier_bound_4).unwrap_or(F::zero());
777 self.value_weights[j] = weight;
778 }
779 self.value_bias = F::from(scirs2_core::random::random::<f64>() * 0.01).unwrap_or(F::zero());
780
781 Ok(())
782 }
783}
784
785impl<F: IntegrateFloat> PrioritizedExperienceReplay<F> {
786 pub fn new() -> IntegrateResult<Self> {
787 Ok(PrioritizedExperienceReplay {
788 buffer: VecDeque::new(),
789 priority_tree: SumTree::new(10000),
790 config: ReplayBufferConfig::default(),
791 importance_sampling: ImportanceSamplingConfig::default(),
792 })
793 }
794
795 pub fn add_experience(&mut self, experience: Experience<F>) -> IntegrateResult<()> {
796 self.buffer.push_back(experience);
797 if self.buffer.len() > self.config.max_size {
798 self.buffer.pop_front();
799 }
800 Ok(())
801 }
802
803 pub fn sample_batch(
804 &mut self,
805 batch_size: usize,
806 ) -> IntegrateResult<(Vec<Experience<F>>, Vec<usize>, Vec<f64>)> {
807 if self.buffer.is_empty() {
808 return Err(IntegrateError::ComputationError(
809 "Cannot sample from empty experience buffer".to_string(),
810 ));
811 }
812
813 let actual_batch_size = batch_size.min(self.buffer.len());
814 let mut batch = Vec::with_capacity(actual_batch_size);
815 let mut indices = Vec::with_capacity(actual_batch_size);
816 let mut weights = Vec::with_capacity(actual_batch_size);
817
818 let total_priority: f64 = self
820 .buffer
821 .iter()
822 .map(|exp| {
823 exp.tderror
824 .to_f64()
825 .unwrap_or(1.0)
826 .powf(self.importance_sampling.beta_start)
827 })
828 .sum();
829
830 if total_priority <= 0.0 {
831 for _i in 0..actual_batch_size {
833 let random_idx = (scirs2_core::random::random::<f64>() * self.buffer.len() as f64)
834 as usize
835 % self.buffer.len();
836 batch.push(self.buffer[random_idx].clone());
837 indices.push(random_idx);
838 weights.push(1.0); }
840 return Ok((batch, indices, weights));
841 }
842
843 for _ in 0..actual_batch_size {
845 let random_value = scirs2_core::random::random::<f64>() * total_priority;
846 let mut cumulative_priority = 0.0;
847 let mut selected_idx = 0;
848
849 for (idx, experience) in self.buffer.iter().enumerate() {
850 let priority = experience
851 .tderror
852 .to_f64()
853 .unwrap_or(1.0)
854 .powf(self.importance_sampling.beta_start);
855 cumulative_priority += priority;
856
857 if cumulative_priority >= random_value {
858 selected_idx = idx;
859 break;
860 }
861 }
862
863 let experience_priority = self.buffer[selected_idx]
865 .tderror
866 .to_f64()
867 .unwrap_or(1.0)
868 .powf(self.importance_sampling.beta_start);
869 let probability = experience_priority / total_priority;
870 let max_weight = (1.0 / (self.buffer.len() as f64 * probability))
871 .powf(self.importance_sampling.beta_start);
872 let importance_weight =
873 max_weight / total_priority.powf(self.importance_sampling.beta_start);
874
875 batch.push(self.buffer[selected_idx].clone());
876 indices.push(selected_idx);
877 weights.push(importance_weight);
878 }
879
880 Ok((batch, indices, weights))
881 }
882
883 pub fn update_priorities(
884 &mut self,
885 indices: &[usize],
886 priorities: &[f64],
887 ) -> IntegrateResult<()> {
888 if indices.len() != priorities.len() {
889 return Err(IntegrateError::ComputationError(
890 "Indices and priorities length mismatch".to_string(),
891 ));
892 }
893
894 for (idx, &priority) in indices.iter().zip(priorities.iter()) {
895 if *idx < self.buffer.len() {
896 let clamped_priority = priority.max(1e-6); self.buffer[*idx].tderror = F::from(clamped_priority)
899 .unwrap_or(F::from(1e-6).expect("Failed to convert constant to float"));
900 }
901 }
902
903 Ok(())
907 }
908}
909
910impl<F: IntegrateFloat + std::default::Default> StateFeatureExtractor<F> {
911 pub fn new() -> IntegrateResult<Self> {
912 Ok(StateFeatureExtractor {
913 error_history: VecDeque::new(),
914 step_history: VecDeque::new(),
915 jacobian_eigenvalues: Array1::zeros(8),
916 problem_characteristics: ProblemCharacteristics::default(),
917 performance_metrics: PerformanceMetrics::default(),
918 normalization: FeatureNormalization::default(),
919 })
920 }
921
922 pub fn initialize(
923 &mut self,
924 problem_size: usize,
925 initialstep: F,
926 problem_type: &str,
927 ) -> IntegrateResult<()> {
928 self.problem_characteristics.problem_size = problem_size;
929 self.problem_characteristics.problem_type = problem_type.to_string();
930 self.step_history.push_back(initialstep);
931 Ok(())
932 }
933
934 pub fn extract_features(
935 &mut self,
936 currentstep: F,
937 currenterror: F,
938 _state: &ProblemState<F>,
939 _performance_metrics: &PerformanceMetrics<F>,
940 ) -> IntegrateResult<Array1<F>> {
941 let mut features = Array1::zeros(64);
943
944 for (i, &error) in self.error_history.iter().take(8).enumerate() {
946 features[i] = error;
947 }
948
949 for (i, &step) in self.step_history.iter().take(8).enumerate() {
951 features[8 + i] = step;
952 }
953
954 self.error_history.push_back(currenterror);
956 self.step_history.push_back(currentstep);
957
958 if self.error_history.len() > 8 {
960 self.error_history.pop_front();
961 }
962 if self.step_history.len() > 8 {
963 self.step_history.pop_front();
964 }
965
966 features[16] = F::from(self.problem_characteristics.problem_size).unwrap_or(F::zero());
968
969 Ok(features)
970 }
971}
972
973impl<F: IntegrateFloat + std::default::Default> MultiObjectiveRewardCalculator<F> {
974 pub fn new() -> IntegrateResult<Self> {
975 Ok(MultiObjectiveRewardCalculator {
976 weights: RewardWeights::default(),
977 baselines: PerformanceBaselines::default(),
978 shaping: RewardShaping::default(),
979 })
980 }
981}
982
983impl RLPerformanceAnalytics {
984 pub fn new() -> Self {
985 RLPerformanceAnalytics {
986 episode_rewards: VecDeque::new(),
987 q_value_estimates: VecDeque::new(),
988 loss_values: VecDeque::new(),
989 epsilon_history: VecDeque::new(),
990 step_acceptance_rates: VecDeque::new(),
991 convergence_metrics: ConvergenceMetrics::default(),
992 }
993 }
994
995 pub fn update_metrics(&mut self, _reward: f64, action: usize) -> IntegrateResult<()> {
996 self.episode_rewards.push_back(_reward);
997 if self.episode_rewards.len() > 1000 {
998 self.episode_rewards.pop_front();
999 }
1000 Ok(())
1001 }
1002}
1003
1004#[derive(Debug, Clone)]
1007pub struct StepSizePrediction<F: IntegrateFloat> {
1008 pub predictedstep: F,
1009 pub step_multiplier: F,
1010 pub action_index: usize,
1011 pub q_values: Array1<F>,
1012 pub confidence: f64,
1013 pub exploration_noise: f64,
1014}
1015
1016#[derive(Debug, Clone)]
1017pub struct TrainingResult<F: IntegrateFloat> {
1018 pub loss: F,
1019 pub q_value_estimate: F,
1020 pub tderror: F,
1021 pub exploration_rate: F,
1022 pub training_performed: bool,
1023}
1024
1025#[derive(Debug, Clone)]
1026pub struct RLEvaluationResults {
1027 pub average_reward: f64,
1028 pub reward_standard_deviation: f64,
1029 pub average_q_value: f64,
1030 pub convergence_rate: f64,
1031 pub exploration_rate: f64,
1032 pub training_episodes: usize,
1033 pub step_acceptance_rate: f64,
1034}
1035
1036#[derive(Debug, Clone, Default)]
1039pub struct NetworkHyperparameters<F: IntegrateFloat> {
1040 pub learning_rate: f64,
1041 pub momentum: f64,
1042 pub weight_decay: f64,
1043 pub phantom: std::marker::PhantomData<F>,
1044}
1045
1046#[derive(Debug, Clone, Default)]
1047pub struct TrainingStatistics {
1048 pub trainingsteps: usize,
1049 pub episodes: usize,
1050 pub total_reward: f64,
1051}
1052
1053impl TrainingStatistics {
1054 pub fn new() -> Self {
1055 Self {
1056 trainingsteps: 0,
1057 episodes: 0,
1058 total_reward: 0.0,
1059 }
1060 }
1061}
1062
1063#[derive(Debug, Clone, Default)]
1064pub struct AdamOptimizerState<F: IntegrateFloat> {
1065 pub m: HashMap<String, Array2<F>>,
1066 pub v: HashMap<String, Array2<F>>,
1067 pub beta1: f64,
1068 pub beta2: f64,
1069}
1070
1071impl<F: IntegrateFloat + std::default::Default> AdamOptimizerState<F> {
1072 pub fn new() -> Self {
1073 Default::default()
1074 }
1075}
1076
1077#[derive(Debug, Clone, Default)]
1078pub struct SumTree {
1079 capacity: usize,
1080 tree: Vec<f64>,
1081 data_pointer: usize,
1082}
1083
1084impl SumTree {
1085 pub fn new(capacity: usize) -> Self {
1086 SumTree {
1087 capacity,
1088 tree: vec![0.0; 2 * capacity - 1],
1089 data_pointer: 0,
1090 }
1091 }
1092}
1093
1094#[derive(Debug, Clone, Default)]
1095pub struct ReplayBufferConfig {
1096 pub max_size: usize,
1097 pub alpha: f64,
1098 pub beta: f64,
1099}
1100
1101#[derive(Debug, Clone, Default)]
1102pub struct ImportanceSamplingConfig {
1103 pub beta_start: f64,
1104 pub beta_end: f64,
1105 pub betasteps: usize,
1106}
1107
1108#[derive(Debug, Clone, Default)]
1109pub struct ProblemCharacteristics<F: IntegrateFloat> {
1110 pub problem_size: usize,
1111 pub problem_type: String,
1112 pub stiffness_ratio: f64,
1113 pub phantom: std::marker::PhantomData<F>,
1114}
1115
1116#[derive(Debug, Clone, Default)]
1117pub struct PerformanceMetrics<F: IntegrateFloat> {
1118 pub throughput: f64,
1119 pub memory_usage: usize,
1120 pub accuracy: f64,
1121 pub phantom: std::marker::PhantomData<F>,
1122}
1123
1124#[derive(Debug, Clone, Default)]
1125pub struct FeatureNormalization<F: IntegrateFloat> {
1126 pub mean: Array1<F>,
1127 pub std: Array1<F>,
1128}
1129
1130#[derive(Debug, Clone, Default)]
1131pub struct RewardWeights<F: IntegrateFloat> {
1132 pub accuracy_weight: F,
1133 pub efficiency_weight: F,
1134 pub stability_weight: F,
1135 pub convergence_weight: F,
1136}
1137
1138#[derive(Debug, Clone, Default)]
1139pub struct PerformanceBaselines<F: IntegrateFloat> {
1140 pub baseline_accuracy: F,
1141 pub baseline_efficiency: F,
1142 pub baseline_stability: F,
1143}
1144
1145#[derive(Debug, Clone, Default)]
1146pub struct RewardShaping<F: IntegrateFloat> {
1147 pub shaped_reward_coefficient: F,
1148 pub intrinsic_motivation: F,
1149}
1150
1151#[derive(Debug, Clone, Default)]
1152pub struct ConvergenceMetrics {
1153 pub convergence_rate: f64,
1154 pub stability_measure: f64,
1155 pub learning_progress: f64,
1156}
1157
1158#[derive(Debug, Clone, Default)]
1159pub struct ProblemState<F: IntegrateFloat> {
1160 pub current_solution: Array1<F>,
1161 pub jacobian_condition: f64,
1162 pub error_estimate: F,
1163}
1164
1165impl Default for TrainingConfiguration {
1166 fn default() -> Self {
1167 TrainingConfiguration {
1168 learning_rate: 0.001,
1169 discount_factor: 0.99,
1170 epsilon: 0.1,
1171 epsilon_decay: 0.995,
1172 epsilon_min: 0.01,
1173 target_update_frequency: 1000,
1174 batch_size: 32,
1175 train_frequency: 4,
1176 tau: 0.005,
1177 priority_alpha: 0.6,
1178 priority_beta: 0.4,
1179 nstep: 3,
1180 }
1181 }
1182}
1183
1184#[cfg(test)]
1185mod tests {
1186 use super::*;
1187 use scirs2_core::ndarray::array;
1188
1189 #[test]
1190 fn test_neural_rl_controller_creation() {
1191 let controller = NeuralRLStepController::<f64>::new();
1192 assert!(controller.is_ok());
1193 }
1194
1195 #[test]
1196 fn test_feature_extraction() {
1197 let mut extractor = StateFeatureExtractor::<f64>::new().expect("Operation failed");
1198 extractor
1199 .initialize(1000, 0.01, "stiff_ode")
1200 .expect("Operation failed");
1201
1202 let problem_state = ProblemState {
1203 current_solution: array![1.0, 2.0, 3.0],
1204 jacobian_condition: 100.0,
1205 error_estimate: 1e-6,
1206 };
1207
1208 let performance_metrics = PerformanceMetrics {
1209 throughput: 1000.0,
1210 memory_usage: 1024 * 1024,
1211 accuracy: 1e-8,
1212 phantom: std::marker::PhantomData,
1213 };
1214
1215 let features = extractor.extract_features(0.01, 1e-6, &problem_state, &performance_metrics);
1216 assert!(features.is_ok());
1217 assert_eq!(features.expect("Operation failed").len(), 64);
1218 }
1219
1220 #[test]
1221 fn test_dqn_forward_pass() {
1222 let dqn = DeepQNetwork::<f64>::new().expect("Operation failed");
1223 let input = Array1::zeros(64);
1224 let output = dqn.forward(&input);
1225 assert!(output.is_ok());
1226 assert_eq!(output.expect("Operation failed").len(), 32);
1227 }
1228}