use crate::activation::Function;
use crate::linalg::Matrix;
use crate::optim::Optimizer;
use crate::Float;
use rayon::prelude::IntoParallelRefMutIterator;
use rayon::prelude::*;
pub struct Adam<T: Float> {
learning_rate: T,
beta1: T,
beta2: T,
epsilon: T,
m: Vec<Matrix<T>>,
v: Vec<Matrix<T>>,
t: Vec<usize>,
}
impl<T: Float> Adam<T> {
pub fn new(learning_rate: T, architecture: &Vec<Box<dyn Function<T>>>) -> Self {
let beta1 = T::from_f64(0.9);
let beta2 = T::from_f64(0.999);
let epsilon = T::from_f64(1e-8);
Self::full_new(learning_rate, beta1, beta2, epsilon, architecture)
}
pub fn full_new(
learning_rate: T,
beta1: T,
beta2: T,
epsilon: T,
architecture: &Vec<Box<dyn Function<T>>>,
) -> Self {
let mut m = vec![];
let mut v = vec![];
for lay in architecture {
if lay.is_linear() {
let shape = lay.get_data().unwrap().shape();
m.push(Matrix::zeros(shape));
v.push(Matrix::zeros(shape));
}
}
let t = vec![0; m.len()];
Self {
learning_rate,
beta1,
beta2,
epsilon,
m,
v,
t: t,
}
}
}
impl<T: Float> Optimizer<T> for Adam<T> {
fn step(&mut self, id: usize, weights: &mut Matrix<T>, gradients: &Matrix<T>) {
self.t[id] += 1;
let t = self.t[id];
self.m[id]
.data
.par_iter_mut()
.enumerate()
.for_each(|(i, x)| {
let prev_x = *x;
*x = self.beta1 * prev_x + (T::one() - self.beta1) * gradients.data[i];
});
self.v[id]
.data
.par_iter_mut()
.enumerate()
.for_each(|(i, x)| {
let prev_x = *x;
*x = self.beta2 * prev_x
+ (T::one() - self.beta2) * gradients.data[i].powf(T::from_usize(2));
});
weights.data.par_iter_mut().enumerate().for_each(|(i, x)| {
let m_hat = self.m[id].data[i] / (T::one() - self.beta1.powf(T::from_usize(t)));
let v_hat = self.v[id].data[i] / (T::one() - self.beta2.powf(T::from_usize(t)));
*x += self.learning_rate * m_hat / (v_hat.sqrt() + self.epsilon)
});
}
fn change_learning_rate(&mut self, new_learning_rate: T) {
self.learning_rate = new_learning_rate;
}
}
#[cfg(test)]
mod tests {
use crate::activation::{Function, Sigmoid};
use crate::linalg::Matrix;
use crate::loss::SSE;
use crate::nn::{Linear, Sequential};
use crate::optim::Adam;
use crate::{matrix, DataType};
#[test]
fn learn_with_adam() {
let input = matrix![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
let output = matrix![[0.0], [1.0], [1.0], [0.0]];
let layers: Vec<Box<dyn Function<f32>>> = vec![
Box::new(Linear::new(2, 2, true)),
Box::new(Sigmoid::new()),
Box::new(Linear::new(2, 1, true)),
Box::new(Sigmoid::new()),
];
let mut optim = Adam::new(0.02f32, &layers);
let mut model = Sequential::new(layers);
let loss = SSE::new(DataType::f32());
let mut loss_num = 100f32;
println!("{}", model.forward(input.clone()));
for i in 0..10000 {
if loss_num < 0.001 {
println!("i:{i} LOSS:{loss_num}");
break;
}
loss_num = model.train(input.clone(), output.clone(), &mut optim, &loss);
if i % 1000 == 0 {
println!("{loss_num}");
}
}
println!("{}", model.forward(input));
}
}