use crate::network::{matrix::Matrix, activations::{Activation, Activations}, input::Input};
use super::{layers::Layer, distributions::Distributions};
use rayon::prelude::ParallelIterator;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize)]
pub struct Dense{
pub weights: Matrix,
pub biases: Matrix,
pub data: Matrix,
loss: f32,
pub activation_fn: Activations,
learning_rate: f32,
beta1: f32,
beta2: f32,
epsilon: f32,
time: usize,
m_weights: Matrix,
v_weights: Matrix,
m_biases: Matrix,
v_biases: Matrix
}
impl Dense{
pub fn new(layers: usize, layer_cols_before: usize, activation: Activations, learning_rate: f32, seed: &Option<String>, input_size: usize) -> Dense{
let distribution: Distributions = match activation{
Activations::RELU | Activations::LEAKYRELU | Activations::SOFTMAX => Distributions::He(input_size),
Activations::TANH | Activations::SIGMOID => Distributions::Xavier(input_size, layers),
};
let mut res = Dense {
loss: 1.0,
weights: Matrix::new_random(layer_cols_before, layers, seed, &distribution),
biases: Matrix::new_random(layer_cols_before, 1, seed, &distribution),
m_weights: Matrix::new_empty(layer_cols_before, layers),
v_weights: Matrix::new_empty(layer_cols_before, layers),
m_biases: Matrix::new_empty(layer_cols_before, 1),
v_biases: Matrix::new_empty(layer_cols_before, 1),
data: Matrix::new_empty(0, 0),
activation_fn: activation,
learning_rate,
beta1: 0.0,
beta2: 0.0,
epsilon: 0.0,
time: 1
};
(res.beta1, res.beta2) = res.get_betas();
res.epsilon = res.get_epsilon();
res
}
fn get_betas(&self) -> (f32, f32){
(0.9, 0.999)
}
fn get_epsilon(&self) -> f32{
1e-10
}
}
#[typetag::serde]
impl Layer for Dense{
fn get_data(&self) -> Box<dyn Input>{
Box::new(self.data.clone())
}
fn forward(&mut self, inputs: &Box<dyn Input>) -> Box<dyn Input> {
self.data = self.activation_fn.apply_fn(self.weights.clone() * &Matrix::from(inputs.to_param().to_param_2d()).transpose() + &self.biases);
Box::new(self.data.clone().transpose())
}
fn backward(&mut self, gradients: Box<dyn Input>, errors: Box<dyn Input>, data: Box<dyn Input>) -> Box<dyn Input> {
let mut gradients_mat = Matrix::from(gradients.to_param_2d());
let mut errors_mat = Matrix::from(errors.to_param_2d());
let data_mat = Matrix::from(data.to_param_2d());
gradients_mat = gradients_mat.dot_multiply(&errors_mat) * self.learning_rate;
errors_mat = self.weights.clone().transpose() * &errors_mat;
self.loss = 0.0;
errors_mat.to_param().iter().for_each(|error| {
self.loss += error.powi(2);
});
self.loss = self.loss / errors_mat.to_param().len() as f32;
self.time += 1;
let weight_gradient = gradients_mat.clone() * &(data_mat.clone().transpose());
self.m_weights = self.m_weights.clone() * self.beta1 + &(weight_gradient.clone() * (1.0 - self.beta1));
self.v_weights = self.v_weights.clone() * self.beta2 + &((weight_gradient^2) * (1.0 - self.beta2));
self.m_biases = self.m_biases.clone() * self.beta1 + &(gradients_mat.clone() * (1.0 - self.beta1));
self.v_biases = self.v_biases.clone() * self.beta2 + &((gradients_mat.clone()^2) * (1.0 - self.beta2));
let m_weights_hat = self.m_weights.clone() / (1.0 - self.beta1.powi(self.time as i32));
let v_weights_hat = self.v_weights.clone() / (1.0 - self.beta2.powi(self.time as i32));
let m_bias_hat = self.m_biases.clone() / (1.0 - self.beta1.powi(self.time as i32));
let v_bias_hat = self.v_biases.clone() / (1.0 - self.beta2.powi(self.time as i32));
let weights_update = m_weights_hat.clone() / &(v_weights_hat.sqrt() + self.epsilon);
let bias_update = m_bias_hat.clone() / &(v_bias_hat.sqrt() + self.epsilon);
self.biases = self.biases.clone() + &bias_update;
self.weights = self.weights.clone() + &weights_update;
Box::new(errors_mat)
}
fn update_gradient(&self) -> Box<dyn Input> {
Box::new(self.activation_fn.apply_fn(self.data.clone()))
}
fn get_activation(&self) -> Option<Activations> {
Some(self.activation_fn.clone())
}
fn shape(&self) -> (usize, usize, usize){
(self.weights.columns, 1, 1)
}
fn get_loss(&self) -> f32{
self.loss
}
}