use crate::error::OptimizeError;
use crate::stochastic::{StochasticMethod, StochasticOptions};
use scirs2_core::ndarray::{s, Array1, ScalarOperand};
use scirs2_core::numeric::Float;
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct NeuralParameters<F: Float + ScalarOperand> {
pub parameters: Vec<Array1<F>>,
pub gradients: Vec<Array1<F>>,
pub names: Vec<String>,
}
impl<F: Float + ScalarOperand> Default for NeuralParameters<F> {
fn default() -> Self {
Self {
parameters: Vec::new(),
gradients: Vec::new(),
names: Vec::new(),
}
}
}
impl<F: Float + ScalarOperand> NeuralParameters<F> {
pub fn new() -> Self {
Self::default()
}
pub fn add_parameter(&mut self, name: String, param: Array1<F>) {
self.names.push(name);
self.gradients.push(Array1::zeros(param.raw_dim()));
self.parameters.push(param);
}
pub fn total_parameters(&self) -> usize {
self.parameters.iter().map(|p| p.len()).sum()
}
pub fn flatten_parameters(&self) -> Array1<F> {
let total_len = self.total_parameters();
let mut flat = Array1::zeros(total_len);
let mut offset = 0;
for param in &self.parameters {
let len = param.len();
flat.slice_mut(s![offset..offset + len]).assign(param);
offset += len;
}
flat
}
pub fn flatten_gradients(&self) -> Array1<F> {
let total_len = self.total_parameters();
let mut flat = Array1::zeros(total_len);
let mut offset = 0;
for grad in &self.gradients {
let len = grad.len();
flat.slice_mut(s![offset..offset + len]).assign(grad);
offset += len;
}
flat
}
pub fn update_from_flat(&mut self, flat_params: &Array1<F>) {
let mut offset = 0;
for param in &mut self.parameters {
let len = param.len();
param.assign(&flat_params.slice(s![offset..offset + len]));
offset += len;
}
}
pub fn update_gradients_from_flat(&mut self, flat_grads: &Array1<F>) {
let mut offset = 0;
for grad in &mut self.gradients {
let len = grad.len();
grad.assign(&flat_grads.slice(s![offset..offset + len]));
offset += len;
}
}
}
pub struct NeuralOptimizer<F: Float + ScalarOperand> {
method: StochasticMethod,
options: StochasticOptions,
momentum_buffers: HashMap<String, Array1<F>>,
first_moment: HashMap<String, Array1<F>>,
second_moment: HashMap<String, Array1<F>>,
step_count: usize,
}
impl<F: Float + ScalarOperand> NeuralOptimizer<F>
where
F: 'static + Send + Sync,
{
pub fn new(method: StochasticMethod, options: StochasticOptions) -> Self {
Self {
method,
options,
momentum_buffers: HashMap::new(),
first_moment: HashMap::new(),
second_moment: HashMap::new(),
step_count: 0,
}
}
pub fn sgd(learning_rate: F, max_iter: usize) -> Self {
let options = StochasticOptions {
learning_rate: learning_rate.to_f64().unwrap_or(0.01),
max_iter,
batch_size: None,
tol: 1e-6,
adaptive_lr: false,
lr_decay: 0.99,
lr_schedule: crate::stochastic::LearningRateSchedule::Constant,
gradient_clip: None,
early_stopping_patience: None,
};
Self::new(StochasticMethod::SGD, options)
}
pub fn adam(learning_rate: F, max_iter: usize) -> Self {
let options = StochasticOptions {
learning_rate: learning_rate.to_f64().unwrap_or(0.001),
max_iter,
batch_size: None,
tol: 1e-6,
adaptive_lr: false,
lr_decay: 0.99,
lr_schedule: crate::stochastic::LearningRateSchedule::Constant,
gradient_clip: Some(1.0),
early_stopping_patience: None,
};
Self::new(StochasticMethod::Adam, options)
}
pub fn adamw(learning_rate: F, max_iter: usize) -> Self {
let options = StochasticOptions {
learning_rate: learning_rate.to_f64().unwrap_or(0.001),
max_iter,
batch_size: None,
tol: 1e-6,
adaptive_lr: false,
lr_decay: 0.99,
lr_schedule: crate::stochastic::LearningRateSchedule::Constant,
gradient_clip: Some(1.0),
early_stopping_patience: None,
};
Self::new(StochasticMethod::AdamW, options)
}
pub fn step(&mut self, params: &mut NeuralParameters<F>) -> Result<(), OptimizeError> {
self.step_count += 1;
match self.method {
StochasticMethod::SGD => self.sgd_step(params),
StochasticMethod::Momentum => self.momentum_step(params),
StochasticMethod::Adam => self.adam_step(params),
StochasticMethod::AdamW => self.adamw_step(params),
StochasticMethod::RMSProp => self.rmsprop_step(params),
}
}
fn sgd_step(&self, params: &mut NeuralParameters<F>) -> Result<(), OptimizeError> {
let lr = F::from(self.options.learning_rate)
.unwrap_or_else(|| F::from(0.01).expect("Failed to convert constant to float"));
for (param, grad) in params.parameters.iter_mut().zip(params.gradients.iter()) {
*param = param.clone() - &(grad.clone() * lr);
}
Ok(())
}
fn momentum_step(&mut self, params: &mut NeuralParameters<F>) -> Result<(), OptimizeError> {
let lr = F::from(self.options.learning_rate)
.unwrap_or_else(|| F::from(0.01).expect("Failed to convert constant to float"));
let momentum = F::from(0.9).expect("Failed to convert constant to float");
for (i, (param, grad)) in params
.parameters
.iter_mut()
.zip(params.gradients.iter())
.enumerate()
{
let param_name = format!("param_{}", i);
if !self.momentum_buffers.contains_key(¶m_name) {
self.momentum_buffers
.insert(param_name.clone(), Array1::zeros(param.raw_dim()));
}
let momentum_buffer = self
.momentum_buffers
.get_mut(¶m_name)
.expect("Operation failed");
*momentum_buffer = momentum_buffer.clone() * momentum + grad;
*param = param.clone() - &(momentum_buffer.clone() * lr);
}
Ok(())
}
fn adam_step(&mut self, params: &mut NeuralParameters<F>) -> Result<(), OptimizeError> {
let lr = F::from(self.options.learning_rate)
.unwrap_or_else(|| F::from(0.001).expect("Failed to convert constant to float"));
let beta1 = F::from(0.9).expect("Failed to convert constant to float");
let beta2 = F::from(0.999).expect("Failed to convert constant to float");
let epsilon = F::from(1e-8).expect("Failed to convert constant to float");
for (i, (param, grad)) in params
.parameters
.iter_mut()
.zip(params.gradients.iter())
.enumerate()
{
let param_name = format!("param_{}", i);
if !self.first_moment.contains_key(¶m_name) {
self.first_moment
.insert(param_name.clone(), Array1::zeros(param.raw_dim()));
self.second_moment
.insert(param_name.clone(), Array1::zeros(param.raw_dim()));
}
let m = self
.first_moment
.get_mut(¶m_name)
.expect("Operation failed");
let v = self
.second_moment
.get_mut(¶m_name)
.expect("Operation failed");
*m = m.clone() * beta1 + &(grad.clone() * (F::one() - beta1));
let grad_squared = grad.mapv(|x| x * x);
*v = v.clone() * beta2 + &(grad_squared * (F::one() - beta2));
let step_f = F::from(self.step_count).expect("Failed to convert to float");
let m_hat = m.clone() / (F::one() - beta1.powf(step_f));
let v_hat = v.clone() / (F::one() - beta2.powf(step_f));
let denominator = v_hat.mapv(|x| x.sqrt()) + epsilon;
let update = m_hat / denominator * lr;
*param = param.clone() - &update;
}
Ok(())
}
fn adamw_step(&mut self, params: &mut NeuralParameters<F>) -> Result<(), OptimizeError> {
let lr = F::from(self.options.learning_rate)
.unwrap_or_else(|| F::from(0.001).expect("Failed to convert constant to float"));
let beta1 = F::from(0.9).expect("Failed to convert constant to float");
let beta2 = F::from(0.999).expect("Failed to convert constant to float");
let epsilon = F::from(1e-8).expect("Failed to convert constant to float");
let weight_decay = F::from(0.01).expect("Failed to convert constant to float");
for (i, (param, grad)) in params
.parameters
.iter_mut()
.zip(params.gradients.iter())
.enumerate()
{
let param_name = format!("param_{}", i);
if !self.first_moment.contains_key(¶m_name) {
self.first_moment
.insert(param_name.clone(), Array1::zeros(param.raw_dim()));
self.second_moment
.insert(param_name.clone(), Array1::zeros(param.raw_dim()));
}
let m = self
.first_moment
.get_mut(¶m_name)
.expect("Operation failed");
let v = self
.second_moment
.get_mut(¶m_name)
.expect("Operation failed");
*m = m.clone() * beta1 + &(grad.clone() * (F::one() - beta1));
let grad_squared = grad.mapv(|x| x * x);
*v = v.clone() * beta2 + &(grad_squared * (F::one() - beta2));
let step_f = F::from(self.step_count).expect("Failed to convert to float");
let m_hat = m.clone() / (F::one() - beta1.powf(step_f));
let v_hat = v.clone() / (F::one() - beta2.powf(step_f));
let denominator = v_hat.mapv(|x| x.sqrt()) + epsilon;
let adam_update = m_hat / denominator;
let weight_decay_update = param.clone() * weight_decay;
let total_update = (adam_update + weight_decay_update) * lr;
*param = param.clone() - &total_update;
}
Ok(())
}
fn rmsprop_step(&mut self, params: &mut NeuralParameters<F>) -> Result<(), OptimizeError> {
let lr = F::from(self.options.learning_rate)
.unwrap_or_else(|| F::from(0.001).expect("Failed to convert constant to float"));
let alpha = F::from(0.99).expect("Failed to convert constant to float"); let epsilon = F::from(1e-8).expect("Failed to convert constant to float");
for (i, (param, grad)) in params
.parameters
.iter_mut()
.zip(params.gradients.iter())
.enumerate()
{
let param_name = format!("param_{}", i);
if !self.second_moment.contains_key(¶m_name) {
self.second_moment
.insert(param_name.clone(), Array1::zeros(param.raw_dim()));
}
let v = self
.second_moment
.get_mut(¶m_name)
.expect("Operation failed");
let grad_squared = grad.mapv(|x| x * x);
*v = v.clone() * alpha + &(grad_squared * (F::one() - alpha));
let denominator = v.mapv(|x| x.sqrt()) + epsilon;
let update = grad.clone() / denominator * lr;
*param = param.clone() - &update;
}
Ok(())
}
pub fn get_learning_rate(&self) -> f64 {
self.options.learning_rate
}
pub fn set_learning_rate(&mut self, lr: f64) {
self.options.learning_rate = lr;
}
pub fn reset(&mut self) {
self.momentum_buffers.clear();
self.first_moment.clear();
self.second_moment.clear();
self.step_count = 0;
}
pub fn method_name(&self) -> &'static str {
match self.method {
StochasticMethod::SGD => "SGD",
StochasticMethod::Momentum => "SGD with Momentum",
StochasticMethod::Adam => "Adam",
StochasticMethod::AdamW => "AdamW",
StochasticMethod::RMSProp => "RMSprop",
}
}
}
pub struct NeuralTrainer<F: Float + ScalarOperand> {
optimizer: NeuralOptimizer<F>,
loss_history: Vec<F>,
early_stopping_patience: Option<usize>,
best_loss: Option<F>,
patience_counter: usize,
}
impl<F: Float + ScalarOperand> NeuralTrainer<F>
where
F: 'static + Send + Sync + std::fmt::Display,
{
pub fn new(optimizer: NeuralOptimizer<F>) -> Self {
Self {
optimizer,
loss_history: Vec::new(),
early_stopping_patience: None,
best_loss: None,
patience_counter: 0,
}
}
pub fn with_early_stopping(mut self, patience: usize) -> Self {
self.early_stopping_patience = Some(patience);
self
}
pub fn train_epoch<LossFn, GradFn>(
&mut self,
params: &mut NeuralParameters<F>,
loss_fn: &mut LossFn,
grad_fn: &mut GradFn,
) -> Result<F, OptimizeError>
where
LossFn: FnMut(&NeuralParameters<F>) -> F,
GradFn: FnMut(&NeuralParameters<F>) -> Vec<Array1<F>>,
{
let gradients = grad_fn(params);
params.gradients = gradients;
if let Some(max_norm) = self.optimizer.options.gradient_clip {
self.clip_gradients(params, max_norm);
}
self.optimizer.step(params)?;
let loss = loss_fn(params);
self.loss_history.push(loss);
if let Some(_patience) = self.early_stopping_patience {
if let Some(best_loss) = self.best_loss {
if loss < best_loss {
self.best_loss = Some(loss);
self.patience_counter = 0;
} else {
self.patience_counter += 1;
}
} else {
self.best_loss = Some(loss);
}
}
Ok(loss)
}
pub fn should_stop_early(&self) -> bool {
if let Some(patience) = self.early_stopping_patience {
self.patience_counter >= patience
} else {
false
}
}
pub fn loss_history(&self) -> &[F] {
&self.loss_history
}
pub fn learning_rate(&self) -> f64 {
self.optimizer.get_learning_rate()
}
pub fn set_learning_rate(&mut self, lr: f64) {
self.optimizer.set_learning_rate(lr);
}
fn clip_gradients(&self, params: &mut NeuralParameters<F>, max_norm: f64) {
let max_norm_f = F::from(max_norm).expect("Failed to convert to float");
let mut total_norm_sq = F::zero();
for grad in ¶ms.gradients {
total_norm_sq = total_norm_sq + grad.mapv(|x| x * x).sum();
}
let total_norm = total_norm_sq.sqrt();
if total_norm > max_norm_f {
let scale = max_norm_f / total_norm;
for grad in &mut params.gradients {
grad.mapv_inplace(|x| x * scale);
}
}
}
}
pub mod optimizers {
use super::*;
pub fn sgd<F>(learning_rate: F) -> NeuralOptimizer<F>
where
F: Float + ScalarOperand + 'static + Send + Sync,
{
NeuralOptimizer::sgd(learning_rate, 1000)
}
pub fn adam<F>(learning_rate: F) -> NeuralOptimizer<F>
where
F: Float + ScalarOperand + 'static + Send + Sync,
{
NeuralOptimizer::adam(learning_rate, 1000)
}
pub fn adamw<F>(learning_rate: F) -> NeuralOptimizer<F>
where
F: Float + ScalarOperand + 'static + Send + Sync,
{
NeuralOptimizer::adamw(learning_rate, 1000)
}
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
#[test]
fn test_neural_parameters() {
let mut params = NeuralParameters::<f64>::new();
params.add_parameter("layer1".to_string(), Array1::from_vec(vec![1.0, 2.0, 3.0]));
params.add_parameter("layer2".to_string(), Array1::from_vec(vec![4.0, 5.0]));
assert_eq!(params.total_parameters(), 5);
let flat = params.flatten_parameters();
assert_eq!(
flat.as_slice().expect("Operation failed"),
&[1.0, 2.0, 3.0, 4.0, 5.0]
);
let new_flat = Array1::from_vec(vec![6.0, 7.0, 8.0, 9.0, 10.0]);
params.update_from_flat(&new_flat);
assert_eq!(
params.parameters[0].as_slice().expect("Operation failed"),
&[6.0, 7.0, 8.0]
);
assert_eq!(
params.parameters[1].as_slice().expect("Operation failed"),
&[9.0, 10.0]
);
}
#[test]
fn test_sgd_optimizer() {
let mut optimizer = NeuralOptimizer::sgd(0.1, 100);
let mut params = NeuralParameters::<f64>::new();
params.add_parameter("test".to_string(), Array1::from_vec(vec![1.0, 2.0]));
params.gradients[0] = Array1::from_vec(vec![0.5, 1.0]);
optimizer.step(&mut params).expect("Operation failed");
let expected = [1.0 - 0.1 * 0.5, 2.0 - 0.1 * 1.0];
assert_abs_diff_eq!(params.parameters[0][0], expected[0], epsilon = 1e-10);
assert_abs_diff_eq!(params.parameters[0][1], expected[1], epsilon = 1e-10);
}
#[test]
fn test_adam_optimizer() {
let mut optimizer = NeuralOptimizer::adam(0.001, 100);
let mut params = NeuralParameters::<f64>::new();
params.add_parameter("test".to_string(), Array1::from_vec(vec![1.0, 2.0]));
params.gradients[0] = Array1::from_vec(vec![0.1, 0.2]);
let original_params = params.parameters[0].clone();
optimizer.step(&mut params).expect("Operation failed");
assert_ne!(params.parameters[0][0], original_params[0]);
assert_ne!(params.parameters[0][1], original_params[1]);
assert!(params.parameters[0][0] < original_params[0]);
assert!(params.parameters[0][1] < original_params[1]);
}
#[test]
fn test_neural_trainer() {
let optimizer = NeuralOptimizer::sgd(0.1, 100);
let mut trainer = NeuralTrainer::new(optimizer).with_early_stopping(5);
let mut params = NeuralParameters::<f64>::new();
params.add_parameter("test".to_string(), Array1::from_vec(vec![1.0]));
params.gradients[0] = Array1::from_vec(vec![1.0]);
let mut loss_fn = |p: &NeuralParameters<f64>| p.parameters[0][0] * p.parameters[0][0];
let mut grad_fn =
|p: &NeuralParameters<f64>| vec![Array1::from_vec(vec![2.0 * p.parameters[0][0]])];
let loss = trainer
.train_epoch(&mut params, &mut loss_fn, &mut grad_fn)
.expect("Operation failed");
assert_eq!(trainer.loss_history().len(), 1);
assert_eq!(trainer.loss_history()[0], loss);
}
#[test]
fn test_optimizer_convenience_functions() {
let sgd_opt = optimizers::sgd(0.01);
assert_eq!(sgd_opt.method_name(), "SGD");
let adam_opt = optimizers::adam(0.001);
assert_eq!(adam_opt.method_name(), "Adam");
let adamw_opt = optimizers::adamw(0.001);
assert_eq!(adamw_opt.method_name(), "AdamW");
}
}