1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
//! Neural network optimizers
//!
//! This module provides various optimization algorithms for neural networks
//! such as SGD, Adam, RMSProp, Adagrad, etc.
//! The optimizers in this module are wrappers around the implementations in
//! the `scirs2-optim` crate, adapted to work with the neural network API.
use crateResult;
use crateParamLayer;
use ;
use Float;
use NumAssign;
use Debug;
// Re-export scirs2-optim's optimizers when the feature is enabled
// Note: scirs2_optim crate may not exist yet - commented out for compilation
// #[cfg(feature = "optim")]
// pub use scirs2_optim::optimizers as optim_optimizers;
/// Trait for neural network optimizers
/// Extension trait for optimizers that can work with model layers
/// Blanket implementation for all optimizers
/// Implementation for trait objects
// Standard optimizer implementations
// Re-export standard implementations
pub use Adagrad;
pub use Adam;
pub use AdamW;
pub use ;
pub use MomentumOptimizer;
pub use RAdam;
pub use RMSprop;
pub use SGD;
// Traits are already defined in this module
// Converters between scirs2-optim optimizers and neural network optimizers
// Note: wrappers module commented out due to missing scirs2_optim crate
// mod wrappers {
// use super::*;
// use crate::NeuralError;
// use scirs2_core::ndarray::Dimension;
// use scirs2_optim::optimizers as optim;
// /// Wrapper for optim optimizers to conform to our Optimizer trait
// pub struct OptimOptimizerWrapper<F, D, O>
// where
// F: Float + Debug + ScalarOperand,
// D: Dimension,
// O: optim::Optimizer<F, D>,
// {
// optimizer: O,
// phantom: std::marker::PhantomData<(F, D)>,
// }
// impl<F, D, O> OptimOptimizerWrapper<F, D, O>
// pub fn new(optimizer: O) -> Self {
// Self {
// optimizer,
// phantom: std::marker::PhantomData,
// }
// }
// pub fn inner(&self) -> &O {
// &self.optimizer
// pub fn inner_mut(&mut self) -> &mut O {
// &mut self.optimizer
// impl<F, D, O> super::Optimizer<F> for OptimOptimizerWrapper<F, D, O>
// fn update(
// &mut self,
// params: &mut [Array<F, scirs2_core::ndarray::IxDyn>],
// grads: &[Array<F, scirs2_core::ndarray::IxDyn>],
// ) -> Result<()> {
// if params.len() != grads.len() {
// return Err(NeuralError::TrainingError(format!(
// "Number of parameter arrays ({}) does not match number of gradient arrays ({})",
// params.len(),
// grads.len()
// )));
// // Convert to refs for optim library
// let params_refs: Vec<&Array<F, scirs2_core::ndarray::IxDyn>> = params.iter().collect();
// let grads_refs: Vec<&Array<F, scirs2_core::ndarray::IxDyn>> = grads.iter().collect();
// // Use the optim library's step_list method
// let updated_params = self
// .optimizer
// .step_list(¶ms_refs, &grads_refs)
// .map_err(|e| NeuralError::TrainingError(format!("Optimizer error: {}", e)))?;
// // Update the original params with the updated values
// for (i, param) in params.iter_mut().enumerate() {
// *param = updated_params[i].clone();
// Ok(())
// fn get_learning_rate(&self) -> F {
// self.optimizer.get_learning_rate()
// fn set_learning_rate(&mut self, lr: F) {
// self.optimizer.set_learning_rate(lr);
// // Helper function to create wrapped SGD optimizer
// pub fn wrap_sgd<F: Float + Debug + ScalarOperand>(
// lr: F,
// momentum: F,
// weight_decay: F,
// ) -> OptimOptimizerWrapper<F, scirs2_core::ndarray::IxDyn, optim::SGD<F>> {
// let sgd = optim::SGD::new_with_config(lr, momentum, weight_decay);
// OptimOptimizerWrapper::new(sgd)
// // Helper function to create wrapped Adam optimizer
// pub fn wrap_adam<F: Float + Debug + ScalarOperand>(
// beta1: F,
// beta2: F,
// epsilon: F,
// ) -> OptimOptimizerWrapper<F, scirs2_core::ndarray::IxDyn, optim::Adam<F>> {
// let adam = optim::Adam::new_with_config(lr, beta1, beta2, epsilon, weight_decay);
// OptimOptimizerWrapper::new(adam)
// // Helper function to create wrapped AdamW optimizer
// pub fn wrap_adamw<F: Float + Debug + ScalarOperand>(
// ) -> OptimOptimizerWrapper<F, scirs2_core::ndarray::IxDyn, optim::AdamW<F>> {
// let adamw = optim::AdamW::new_with_config(lr, beta1, beta2, epsilon, weight_decay);
// OptimOptimizerWrapper::new(adamw)
// // Helper function to create wrapped RAdam optimizer
// pub fn wrap_radam<F: Float + Debug + ScalarOperand>(
// ) -> OptimOptimizerWrapper<F, scirs2_core::ndarray::IxDyn, optim::RAdam<F>> {
// let radam = optim::RAdam::new_with_config(lr, beta1, beta2, epsilon, weight_decay);
// OptimOptimizerWrapper::new(radam)