cogent/
layer.rs

1use crate::activations::Activation;
2
3use arrayfire::{constant, gt, matmul, mul, randu, sum, Array, Dim4, MatProp};
4
5/// A dense layer.
6pub struct DenseLayer {
7    pub activation: Activation,
8    pub biases: Array<f32>,
9    pub weights: Array<f32>,
10}
11impl DenseLayer {
12    // Constructs new `DenseLayer`
13    pub fn new(from: u64, size: u64, activation: Activation) -> DenseLayer {
14        if size == 0 {
15            panic!("All dense layer sizes must be >0.");
16        }
17        return DenseLayer {
18            activation,
19            biases: (randu::<f32>(Dim4::new(&[size, 1, 1, 1])) * 2f32) - 1f32,
20            weights: ((randu::<f32>(Dim4::new(&[size, from, 1, 1])) * 2f32) - 1f32)
21                / (from as f32).sqrt(),
22        };
23    }
24    // Constructs new `DenseLayer` using a given value for all weights and biases.
25    pub fn new_constant(from: u64, size: u64, activation: Activation, val: f32) -> DenseLayer {
26        if size == 0 {
27            panic!("All dense layer sizes must be >0.");
28        }
29        return DenseLayer {
30            activation,
31            biases: constant(val, Dim4::new(&[size, 1, 1, 1])),
32            weights: constant(val, Dim4::new(&[size, from, 1, 1])),
33        };
34    }
35    // Forward propagates.
36    pub fn forepropagate(&self, a: &Array<f32>, ones: &Array<f32>) -> (Array<f32>, Array<f32>) {
37        let weighted_inputs: Array<f32> = matmul(&self.weights, &a, MatProp::NONE, MatProp::NONE);
38
39        // Using batch `arrayfire::add` is sooo slow, this is why we do it like this
40        let bias_matrix: Array<f32> = matmul(&self.biases, &ones, MatProp::NONE, MatProp::NONE);
41
42        // z
43        let input = weighted_inputs + bias_matrix;
44
45        // a
46        let activation = self.activation.run(&input);
47
48        return (activation, input);
49    }
50    // TODO name `from_error` better
51    // TODO We only need `training_set_length` if `l2 = Some()..`, how can we best pass `training_set_length`?
52    // Backpropagates.
53    // (Updates weights and biases during this process).
54    pub fn backpropagate(
55        &mut self,
56        partial_error: &Array<f32>, // ∂C/∂a as formed by ∇(a)C or (w^{l+1})^T * δ^{l+1}
57        z: &Array<f32>,             // l (input of this layer)
58        a: &Array<f32>,             // l-1 (activation from previous layer)
59        learning_rate: f32,
60        l2: Option<f32>,
61        training_set_length: usize,
62    ) -> Array<f32> {
63        // ∂C/∂z = ∂a/∂z * ∂C/∂a
64        // (∂C/∂z = δ)
65        let error = self.activation.derivative(z) * partial_error;
66
67        // ∂C/∂b = ∂C/∂z
68        let bias_error = sum(&error, 1);
69
70        // ∂C/∂w = δ matmul a^T
71        let weight_error = matmul(&error, a, MatProp::NONE, MatProp::TRANS);
72
73        // ∂C/∂a^{l-1} = w^T matmul ∂C/∂z
74        let nxt_partial_error = matmul(&self.weights, &error, MatProp::TRANS, MatProp::NONE);
75
76        // Number of examples in batch
77        let batch_len = z.dims().get()[1] as f32;
78
79        // TODO Figure out best way to do weight and bias updates
80        // = old weights - avg weight errors
81        if let Some(lambda) = l2 {
82            self.weights = ((1f32 - (learning_rate * lambda / training_set_length as f32))
83                * &self.weights)
84                - (learning_rate * weight_error / batch_len)
85        } else {
86            self.weights = &self.weights - (learning_rate * weight_error / batch_len);
87        }
88
89        // = old biases - avg bias errors
90        self.biases = &self.biases - (learning_rate * bias_error / batch_len);
91
92        // ∂C/∂a^{l-1}
93        return nxt_partial_error;
94    }
95}
96/// A dropout layer.
97pub struct DropoutLayer {
98    pub p: f32,
99    mask: Array<f32>,
100}
101impl DropoutLayer {
102    // Constructs new `DropoutLayer`
103    pub fn new(p: f32) -> DropoutLayer {
104        DropoutLayer {
105            p,
106            mask: Array::<f32>::new_empty(Dim4::new(&[1, 1, 1, 1])),
107        }
108    }
109    // Forward propgates.
110    // Creates a mask to fit given data.
111    pub fn forepropagate(&mut self, z: &Array<f32>, ones: &Array<f32>) -> Array<f32> {
112        // Sets mask dimensions
113        let z_dims = z.dims();
114        let z_dim_arr = z_dims.get();
115        let mask_dims = Dim4::new(&[z_dim_arr[0], 1, 1, 1]);
116        // TODO Look into using `tile`
117        // Updates mask
118        self.mask = matmul(
119            &gt(&randu::<f32>(mask_dims), &self.p, false).cast::<f32>(),
120            ones,
121            MatProp::NONE,
122            MatProp::NONE,
123        );
124        // Applies mask
125        return mul(z, &self.mask, false);
126    }
127    // Backpropgates
128    // Using mask used for last forepropgate (cannot backpropgate dropout layer without first forepropagating).
129    pub fn backpropagate(&self, partial_error: &Array<f32>) -> Array<f32> {
130        return mul(partial_error, &self.mask, false);
131    }
132}