#[cfg(feature = "alloc")]
use alloc::{vec, vec::Vec};
use serde::{Deserialize, Serialize};
use super::layer::Layer;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[allow(clippy::upper_case_acronyms)]
pub enum OptimizerType {
SGD,
Momentum {
coefficient: f32,
},
Adam(AdamConfig),
}
impl Default for OptimizerType {
fn default() -> Self {
Self::SGD
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AdamConfig {
pub beta1: f32,
pub beta2: f32,
pub epsilon: f32,
}
impl Default for AdamConfig {
fn default() -> Self {
Self {
beta1: 0.9,
beta2: 0.999,
epsilon: 1e-8,
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct OptimizerState {
pub weight_velocities: Vec<Vec<f32>>,
pub bias_velocities: Vec<Vec<f32>>,
pub weight_m: Vec<Vec<f32>>,
pub bias_m: Vec<Vec<f32>>,
pub weight_v: Vec<Vec<f32>>,
pub bias_v: Vec<Vec<f32>>,
pub t: u64,
}
impl OptimizerState {
#[must_use]
pub fn new(layers: &[Layer], optimizer: &OptimizerType) -> Self {
let mut state = Self::default();
match optimizer {
OptimizerType::SGD => {}
OptimizerType::Momentum { .. } => {
state.weight_velocities =
layers.iter().map(|l| vec![0.0; l.weights.len()]).collect();
state.bias_velocities = layers.iter().map(|l| vec![0.0; l.biases.len()]).collect();
}
OptimizerType::Adam(_) => {
state.weight_m = layers.iter().map(|l| vec![0.0; l.weights.len()]).collect();
state.bias_m = layers.iter().map(|l| vec![0.0; l.biases.len()]).collect();
state.weight_v = layers.iter().map(|l| vec![0.0; l.weights.len()]).collect();
state.bias_v = layers.iter().map(|l| vec![0.0; l.biases.len()]).collect();
}
}
state
}
}