elara_math/
nn.rs

1use crate::mse;
2use crate::Tensor;
3use elara_log::prelude::*;
4use ndarray::prelude::*;
5use ndarray_rand::rand_distr::Uniform;
6use ndarray_rand::RandomExt;
7use std::fmt::Debug;
8use std::iter::zip;
9
10const DEBUGGING_GUIDE: &'static str = r#"
11This is a helpful debug guide to resolve common issues faced in using elara-math.
12If you meet an unexpected error, consider checking the following:
13
141) Learning rate: Do not set this too high or it will cause divergence,
15and do not set this too low or it will cause limited loss reduction by getting
16stuck in a local minimum.
17
182) Training data shapes: Ensure that the training data has the same length as the
19test data.
20
213) Model layer shapes: Ensure that each of the model's layers has the same input
22shapes as the prior layer's output shapes.
23
244) Epoch number: Do not set this too high or it will cause overfitting, or
25set it too low or it will cause underfitting
26"#;
27
28/// A general trait of a layer of a neural
29/// network
30pub trait Layer {
31    fn parameters(&self) -> Vec<&Tensor>;
32
33    fn forward(&self, x: &Tensor) -> Tensor;
34
35    fn zero_grad(&self) {
36        for p in self.parameters() {
37            p.zero_grad();
38        }
39    }
40
41    fn shape(&self) -> (usize, usize);
42}
43
44/// A 2D linearly densely-connected layer
45pub struct Linear {
46    pub weights: Tensor,
47    pub biases: Tensor,
48    pub activation: Activations,
49    input_dim: usize,
50    output_dim: usize,
51}
52
53impl Linear {
54    /// Create a new linear layer
55    pub fn new(input_dim: usize, output_dim: usize, activation: Activations) -> Linear {
56        let weights = Array2::random((input_dim, output_dim), Uniform::new(0.0, 1.0));
57        let biases = Array2::random((1, output_dim), Uniform::new(0.0, 0.1));
58        Linear {
59            weights: Tensor::new(weights),
60            biases: Tensor::new(biases),
61            activation,
62            input_dim,
63            output_dim,
64        }
65    }
66}
67
68impl Debug for dyn Layer {
69    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
70        write!(f, "Layer({}, {})", self.shape().0, self.shape().1)
71    }
72}
73
74impl Layer for Linear {
75    fn parameters(&self) -> Vec<&Tensor> {
76        let p: Vec<&Tensor> = vec![&self.weights, &self.biases];
77        p
78    }
79
80    fn forward(&self, train_data: &Tensor) -> Tensor {
81        let mut out = &train_data.matmul(&self.weights) + &self.biases;
82        out = match &self.activation {
83            Activations::ReLU => out.relu(),
84            Activations::Sigmoid => out.sigmoid(),
85            Activations::None => out,
86        };
87        out
88    }
89
90    fn shape(&self) -> (usize, usize) {
91        (self.input_dim, self.output_dim)
92    }
93}
94
95/// Common activation functions
96pub enum Activations {
97    ReLU,
98    Sigmoid,
99    None,
100}
101
102/// Common optimizers
103pub enum Optimizers {
104    SGD,
105    BGD,
106    None,
107}
108
109/// A neural network model
110/// with a keras-inspired API
111pub struct Model {
112    pub layers: Vec<Box<dyn Layer>>,
113    pub optimizer: Optimizers,
114}
115
116impl Model {
117    /// Create a new model
118    pub fn new() -> Model {
119        Model {
120            layers: vec![],
121            optimizer: Optimizers::None,
122        }
123    }
124
125    /// Add a layer to a model
126    pub fn add_layer(&mut self, layer: Linear) {
127        self.layers.push(Box::new(layer))
128    }
129
130    /// Compute the forward pass of a model
131    pub fn forward(&self, x: &Tensor) -> Tensor {
132        let mut x = x.clone();
133        for layer in self.layers.iter() {
134            x = layer.forward(&x);
135        }
136        x
137    }
138
139    /// Get the weights and biases of a model
140    pub fn parameters(&self) -> Vec<&Tensor> {
141        self.layers
142            .iter()
143            .map(|layer| layer.parameters())
144            .flatten()
145            .collect()
146    }
147
148    fn update(&self, lr: f64) {
149        for t in self.parameters().iter() {
150            t.update(lr);
151        }
152    }
153
154    fn zero_grad(&self) {
155        for t in self.parameters().iter() {
156            t.zero_grad();
157        }
158    }
159
160    /// Configure a model with an optimizer
161    pub fn compile(&mut self, optimizer: Optimizers) {
162        self.optimizer = optimizer;
163    }
164
165    /// Train a model
166    pub fn fit(&mut self, x: &Tensor, y: &Tensor, epochs: usize, lr: f64, debug: bool) {
167        // Do checks to make sure model and input data is valid
168        if debug {
169            info!("{}", DEBUGGING_GUIDE);
170        }
171
172        if self.layers.is_empty() {
173            error!("[elara-math] The model does not contain any layers and cannot be trained.")
174        }
175
176        match self.optimizer {
177            Optimizers::None => {
178                error!("[elara-math] The model was not configured with an optimizer and cannot be trained.")
179            }
180            _ => {}
181        };
182
183        for (idx, (layer, layer_next)) in self
184            .layers
185            .iter()
186            .zip(self.layers[1..self.layers.len()].iter())
187            .enumerate()
188        {
189            if layer.shape().1 != layer_next.shape().0 {
190                error!("[elara-math] Layer #{} was configured with an output size of {}, while layer #{} was configured with an input size of {}. This is invalid, both should match.", idx + 1, layer.shape().1, idx + 2, layer_next.shape().0);
191            }
192        }
193
194        for epoch in 0..(epochs + 1) {
195            match self.optimizer {
196                Optimizers::BGD => {
197                    let out = self.forward(x);
198                    let loss = mse(&out, y);
199                    if debug {
200                        info!("Epoch {}, loss {:?}", epoch, loss);
201                    }
202                    loss.backward();
203                    self.update(lr);
204                    self.zero_grad();
205                }
206                Optimizers::SGD => {
207                    let mut counter = 0;
208                    for (x_el, y_el) in zip(x.clone(), y.clone()) {
209                        if counter > x.shape().0 {
210                            break;
211                        }
212                        let out = self.forward(&x_el);
213                        let loss = mse(&out, &y_el);
214                        if debug {
215                            info!("Epoch {}, sample {}, loss {:?}", epoch, counter, loss);
216                        }
217                        loss.backward();
218                        self.update(lr);
219                        self.zero_grad();
220                        counter += 1;
221                    }
222                }
223                _ => unreachable!(),
224            }
225        }
226    }
227
228    /// Make predictions from a model
229    pub fn predict(&self, x: &Tensor) -> Tensor {
230        self.forward(x)
231    }
232}