brique/
layers.rs

1use crate::activation::*;
2use crate::matrix::*;
3use crate::optimizer::Optimizer;
4
5const EPSILON: f64 = 10E-8;
6// note : we have directly the transpose of weights (hence the _t)
7// height -> number of inputs
8// width -> number of neurons in the layer
9#[derive(Clone)]
10pub struct Layer {
11    pub weights_t: Matrix,
12    pub biases: Matrix,
13    pub relu: bool,
14    pub output: Matrix,
15
16    // for adam optimizer
17    pub first_moment_weight: Option<Matrix>,
18    pub first_moment_biase: Option<Matrix>,
19    pub second_moment_weight: Option<Matrix>,
20    pub second_moment_biase: Option<Matrix>,
21}
22
23impl Layer {
24    pub fn init(input_size: u32, size: u32, relu: bool) -> Layer {
25        Layer {
26            weights_t: Matrix::init_rand(input_size.try_into().unwrap(), size.try_into().unwrap()),
27            biases: Matrix::init_zero(1, size.try_into().unwrap()),
28            relu,
29            output: Matrix::init_zero(0, 0),
30            first_moment_weight: None,
31            first_moment_biase: None,
32            second_moment_weight: None,
33            second_moment_biase: None,
34        }
35    }
36
37    pub fn init_with_data(weights_t: Matrix, biases: Matrix, relu: bool) -> Layer {
38        Layer {
39            weights_t,
40            biases,
41            relu,
42            output: Matrix::init_zero(0, 0),
43            first_moment_weight: None,
44            first_moment_biase: None,
45            second_moment_weight: None,
46            second_moment_biase: None,
47        }
48    }
49
50    #[allow(dead_code)]
51    pub fn init_test(size: u32, relu: bool, weights_t: Matrix) -> Layer {
52        Layer {
53            weights_t,
54            biases: Matrix::init_zero(1, size.try_into().unwrap()),
55            relu,
56            output: Matrix::init_zero(0, 0),
57            first_moment_weight: None,
58            first_moment_biase: None,
59            second_moment_weight: None,
60            second_moment_biase: None,
61        }
62    }
63
64    pub fn forward(&mut self, input: &Matrix, predict: bool) -> Matrix {
65        let mut tmp_output = input.dot(&self.weights_t);
66        tmp_output = tmp_output.add_1d_matrix_to_all_rows(&self.biases);
67
68        if self.relu {
69            tmp_output = self.relu(&tmp_output);
70        }
71
72        if !predict {
73            self.output = tmp_output.clone();
74        }
75
76        tmp_output
77    }
78
79    // d_w(i) = input(i) * d_output(i)
80    // can be rewritten
81    // d_w(i) = output(i-1) * d_output(i) -> if i > 0
82    // d_b(i) = sum of the rows of d_output(i)
83    // d_output(i) = d_output(i+1) * d_w(i+1)
84    pub fn backprop(
85        &mut self,
86        d_z: &Matrix,
87        z_minus_1: &Matrix,
88        previous_layer_relu: bool,
89        lambda: f64,
90        optimizer: &Optimizer,
91        iteration: i32,
92        is_input_layer: bool,
93        debug: bool,
94        debug_array_d_weights: &mut Option<Vec<Matrix>>,
95        debug_array_d_biaises: &mut Option<Vec<Matrix>>,
96        debug_array_d_outputs: &mut Option<Vec<Matrix>>,
97    ) -> Matrix {
98        let d_w: Matrix = z_minus_1
99            .t()
100            .dot(d_z)
101            .add_two_matrices(&self.weights_t.mult(lambda));
102        let d_b: Matrix = d_z.sum_rows();
103
104        if debug {
105            debug_array_d_outputs
106                .get_or_insert_with(|| Vec::new())
107                .push(d_z.clone());
108            debug_array_d_weights
109                .get_or_insert_with(|| Vec::new())
110                .push(d_w.clone());
111            debug_array_d_biaises
112                .get_or_insert_with(|| Vec::new())
113                .push(d_b.clone());
114        }
115
116        let mut new_d_z = d_z.dot(&self.weights_t.t());
117        if !is_input_layer {
118            if previous_layer_relu {
119                new_d_z.compute_d_relu_inplace(z_minus_1);
120            }
121        }
122
123        self.update_weigths(d_w, optimizer, iteration);
124        self.update_biases(d_b, optimizer, iteration);
125
126        new_d_z
127    }
128
129    //implementing ReLu for this project
130    fn relu(&self, input: &Matrix) -> Matrix {
131        let mut output: Matrix = Matrix::init_zero(input.height, input.width);
132        for r in 0..input.height {
133            for c in 0..input.width {
134                output.set(relu(input.get(r, c)), r, c);
135            }
136        }
137        output
138    }
139
140    pub fn update_weigths(&mut self, input: Matrix, optimizer: &Optimizer, iteration: i32) {
141        match optimizer {
142            Optimizer::SGD { learning_step } => {
143                self.weights_t = self
144                    .weights_t
145                    .add_two_matrices(&input.mult(*learning_step * -1.0));
146            }
147
148            Optimizer::Adam {
149                learning_step,
150                beta1,
151                beta2,
152            } => {
153                let mut corrected_first_moment: Matrix =
154                    self.compute_corrected_first_moment_weights(input.clone(), *beta1, iteration);
155                let mut corrected_second_moment: Matrix =
156                    self.compute_corrected_second_moment_weights(input, *beta2, iteration);
157
158                // W(t+1) = W(t) - learning_step * (first_moment / (Sqrt(second_moment) + epsilon))
159                corrected_second_moment.sqrt_inplace();
160                corrected_second_moment.add_inplace(EPSILON);
161                corrected_first_moment.div_two_matrices_inplace(&corrected_second_moment);
162
163                self.weights_t = self
164                    .weights_t
165                    .add_two_matrices(&corrected_first_moment.mult(*learning_step * -1.0));
166            }
167        }
168    }
169
170    pub fn update_biases(&mut self, input: Matrix, optimizer: &Optimizer, iteration: i32) {
171        match optimizer {
172            Optimizer::SGD { learning_step } => {
173                self.biases = self
174                    .biases
175                    .add_two_matrices(&input.mult(*learning_step * -1.0));
176            }
177
178            Optimizer::Adam {
179                learning_step,
180                beta1,
181                beta2,
182            } => {
183                let mut corrected_first_moment: Matrix =
184                    self.compute_corrected_first_moment_biases(input.clone(), *beta1, iteration);
185                let mut corrected_second_moment: Matrix =
186                    self.compute_corrected_second_moment_biases(input, *beta2, iteration);
187
188                // B(t+1) = B(t) - learning_step * (first_moment / (Sqrt(second_moment) + epsilon))
189                corrected_second_moment.sqrt_inplace();
190                corrected_second_moment.add_inplace(EPSILON);
191                corrected_first_moment.div_two_matrices_inplace(&corrected_second_moment);
192
193                self.biases = self
194                    .biases
195                    .add_two_matrices(&corrected_first_moment.mult(*learning_step * -1.0));
196            }
197        }
198    }
199
200    fn compute_corrected_first_moment_weights(
201        &mut self,
202        mut input: Matrix,
203        beta1: f64,
204        iteration: i32,
205    ) -> Matrix {
206        // first_moment (t+1) = momentum(t) * beta1 + gradient(t+1) * (1 - beta1)
207        input.mult_inplace(1.0 - beta1);
208        self.first_moment_weight
209            .get_or_insert(Matrix::init_zero(
210                self.weights_t.height,
211                self.weights_t.width,
212            ))
213            .mult_inplace(beta1);
214        self.first_moment_weight
215            .get_or_insert(Matrix::init_zero(
216                self.weights_t.height,
217                self.weights_t.width,
218            ))
219            .add_two_matrices_inplace(&input);
220
221        match &self.first_moment_weight {
222            Some(first_moment) => first_moment.div(1.0 - (beta1.powi(iteration))),
223            None => panic!("Weight first_moment should be initalized at this point"),
224        }
225    }
226
227    fn compute_corrected_first_moment_biases(
228        &mut self,
229        mut input: Matrix,
230        beta1: f64,
231        iteration: i32,
232    ) -> Matrix {
233        // first_moment (t+1) = momentum(t) * beta1 + gradient(t+1) * (1 - beta1)
234        input.mult_inplace(1.0 - beta1);
235        self.first_moment_biase
236            .get_or_insert(Matrix::init_zero(self.biases.height, self.biases.width))
237            .mult_inplace(beta1);
238        self.first_moment_biase
239            .get_or_insert(Matrix::init_zero(self.biases.height, self.biases.width))
240            .add_two_matrices_inplace(&input);
241
242        match &self.first_moment_biase {
243            Some(first_moment) => first_moment.div(1.0 - (beta1.powi(iteration))),
244            None => panic!("Biase first_moment should be initalized at this point"),
245        }
246    }
247
248    fn compute_corrected_second_moment_weights(
249        &mut self,
250        mut input: Matrix,
251        beta2: f64,
252        iteration: i32,
253    ) -> Matrix {
254        // second_moment (t+1) = second_momentu(t) * beta2 + gradient(t+1)² * (1 - beta1)
255        input.pow_inplace(2);
256        input.mult_inplace(1.0 - beta2);
257        self.second_moment_weight
258            .get_or_insert(Matrix::init_zero(
259                self.weights_t.height,
260                self.weights_t.width,
261            ))
262            .mult_inplace(beta2);
263        self.second_moment_weight
264            .get_or_insert(Matrix::init_zero(
265                self.weights_t.height,
266                self.weights_t.width,
267            ))
268            .add_two_matrices_inplace(&input);
269
270        match &self.second_moment_weight {
271            Some(second_moment) => second_moment.div(1.0 - (beta2.powi(iteration))),
272            None => panic!("Weight velocity should be initalized at this point"),
273        }
274    }
275
276    fn compute_corrected_second_moment_biases(
277        &mut self,
278        mut input: Matrix,
279        beta2: f64,
280        iteration: i32,
281    ) -> Matrix {
282        // second_moment (t+1) = second_momentu(t) * beta2 + gradient(t+1)² * (1 - beta1)
283        input.pow_inplace(2);
284        input.mult_inplace(1.0 - beta2);
285        self.second_moment_biase
286            .get_or_insert(Matrix::init_zero(self.biases.height, self.biases.width))
287            .mult_inplace(beta2);
288        self.second_moment_biase
289            .get_or_insert(Matrix::init_zero(self.biases.height, self.biases.width))
290            .add_two_matrices_inplace(&input);
291
292        match &self.second_moment_biase {
293            Some(second_moment) => second_moment.div(1.0 - (beta2.powi(iteration))),
294            None => panic!("Biase velocity should be initalized at this point"),
295        }
296    }
297}
298
299//unit test adam optimizer
300#[cfg(test)]
301mod tests {
302    use crate::{optimizer::Optimizer, parse_test_csv::parse_test_csv};
303
304    use super::Layer;
305
306    #[test]
307    fn test_adam_optimizer() {
308        let test_data = parse_test_csv("tests/test_data/adam_test.csv".to_string());
309        let mut layer = Layer::init_with_data(test_data[0].clone(), test_data[1].clone(), true);
310
311        layer.first_moment_weight = Some(test_data[4].clone());
312        layer.first_moment_biase = Some(test_data[5].clone());
313        layer.second_moment_weight = Some(test_data[6].clone());
314        layer.second_moment_biase = Some(test_data[7].clone());
315
316        let adam = Optimizer::Adam {
317            learning_step: 0.001,
318            beta1: 0.9,
319            beta2: 0.999,
320        };
321        let iteration = 7;
322
323        layer.update_weigths(test_data[2].clone(), &adam, iteration);
324        layer.update_biases(test_data[3].clone(), &adam, iteration);
325
326        assert!(
327            layer.weights_t.is_equal(&test_data[16], 10),
328            "Adam test : the updated weights don't have the expected values"
329        );
330        assert!(
331            layer.biases.is_equal(&test_data[17], 10),
332            "Adam test : the updated biases don't have the expected values"
333        );
334    }
335}