1use crate::mse;
2use crate::Tensor;
3use elara_log::prelude::*;
4use ndarray::prelude::*;
5use ndarray_rand::rand_distr::Uniform;
6use ndarray_rand::RandomExt;
7use std::fmt::Debug;
8use std::iter::zip;
9
10const DEBUGGING_GUIDE: &'static str = r#"
11This is a helpful debug guide to resolve common issues faced in using elara-math.
12If you meet an unexpected error, consider checking the following:
13
141) Learning rate: Do not set this too high or it will cause divergence,
15and do not set this too low or it will cause limited loss reduction by getting
16stuck in a local minimum.
17
182) Training data shapes: Ensure that the training data has the same length as the
19test data.
20
213) Model layer shapes: Ensure that each of the model's layers has the same input
22shapes as the prior layer's output shapes.
23
244) Epoch number: Do not set this too high or it will cause overfitting, or
25set it too low or it will cause underfitting
26"#;
27
28pub trait Layer {
31 fn parameters(&self) -> Vec<&Tensor>;
32
33 fn forward(&self, x: &Tensor) -> Tensor;
34
35 fn zero_grad(&self) {
36 for p in self.parameters() {
37 p.zero_grad();
38 }
39 }
40
41 fn shape(&self) -> (usize, usize);
42}
43
44pub struct Linear {
46 pub weights: Tensor,
47 pub biases: Tensor,
48 pub activation: Activations,
49 input_dim: usize,
50 output_dim: usize,
51}
52
53impl Linear {
54 pub fn new(input_dim: usize, output_dim: usize, activation: Activations) -> Linear {
56 let weights = Array2::random((input_dim, output_dim), Uniform::new(0.0, 1.0));
57 let biases = Array2::random((1, output_dim), Uniform::new(0.0, 0.1));
58 Linear {
59 weights: Tensor::new(weights),
60 biases: Tensor::new(biases),
61 activation,
62 input_dim,
63 output_dim,
64 }
65 }
66}
67
68impl Debug for dyn Layer {
69 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
70 write!(f, "Layer({}, {})", self.shape().0, self.shape().1)
71 }
72}
73
74impl Layer for Linear {
75 fn parameters(&self) -> Vec<&Tensor> {
76 let p: Vec<&Tensor> = vec![&self.weights, &self.biases];
77 p
78 }
79
80 fn forward(&self, train_data: &Tensor) -> Tensor {
81 let mut out = &train_data.matmul(&self.weights) + &self.biases;
82 out = match &self.activation {
83 Activations::ReLU => out.relu(),
84 Activations::Sigmoid => out.sigmoid(),
85 Activations::None => out,
86 };
87 out
88 }
89
90 fn shape(&self) -> (usize, usize) {
91 (self.input_dim, self.output_dim)
92 }
93}
94
95pub enum Activations {
97 ReLU,
98 Sigmoid,
99 None,
100}
101
102pub enum Optimizers {
104 SGD,
105 BGD,
106 None,
107}
108
109pub struct Model {
112 pub layers: Vec<Box<dyn Layer>>,
113 pub optimizer: Optimizers,
114}
115
116impl Model {
117 pub fn new() -> Model {
119 Model {
120 layers: vec![],
121 optimizer: Optimizers::None,
122 }
123 }
124
125 pub fn add_layer(&mut self, layer: Linear) {
127 self.layers.push(Box::new(layer))
128 }
129
130 pub fn forward(&self, x: &Tensor) -> Tensor {
132 let mut x = x.clone();
133 for layer in self.layers.iter() {
134 x = layer.forward(&x);
135 }
136 x
137 }
138
139 pub fn parameters(&self) -> Vec<&Tensor> {
141 self.layers
142 .iter()
143 .map(|layer| layer.parameters())
144 .flatten()
145 .collect()
146 }
147
148 fn update(&self, lr: f64) {
149 for t in self.parameters().iter() {
150 t.update(lr);
151 }
152 }
153
154 fn zero_grad(&self) {
155 for t in self.parameters().iter() {
156 t.zero_grad();
157 }
158 }
159
160 pub fn compile(&mut self, optimizer: Optimizers) {
162 self.optimizer = optimizer;
163 }
164
165 pub fn fit(&mut self, x: &Tensor, y: &Tensor, epochs: usize, lr: f64, debug: bool) {
167 if debug {
169 info!("{}", DEBUGGING_GUIDE);
170 }
171
172 if self.layers.is_empty() {
173 error!("[elara-math] The model does not contain any layers and cannot be trained.")
174 }
175
176 match self.optimizer {
177 Optimizers::None => {
178 error!("[elara-math] The model was not configured with an optimizer and cannot be trained.")
179 }
180 _ => {}
181 };
182
183 for (idx, (layer, layer_next)) in self
184 .layers
185 .iter()
186 .zip(self.layers[1..self.layers.len()].iter())
187 .enumerate()
188 {
189 if layer.shape().1 != layer_next.shape().0 {
190 error!("[elara-math] Layer #{} was configured with an output size of {}, while layer #{} was configured with an input size of {}. This is invalid, both should match.", idx + 1, layer.shape().1, idx + 2, layer_next.shape().0);
191 }
192 }
193
194 for epoch in 0..(epochs + 1) {
195 match self.optimizer {
196 Optimizers::BGD => {
197 let out = self.forward(x);
198 let loss = mse(&out, y);
199 if debug {
200 info!("Epoch {}, loss {:?}", epoch, loss);
201 }
202 loss.backward();
203 self.update(lr);
204 self.zero_grad();
205 }
206 Optimizers::SGD => {
207 let mut counter = 0;
208 for (x_el, y_el) in zip(x.clone(), y.clone()) {
209 if counter > x.shape().0 {
210 break;
211 }
212 let out = self.forward(&x_el);
213 let loss = mse(&out, &y_el);
214 if debug {
215 info!("Epoch {}, sample {}, loss {:?}", epoch, counter, loss);
216 }
217 loss.backward();
218 self.update(lr);
219 self.zero_grad();
220 counter += 1;
221 }
222 }
223 _ => unreachable!(),
224 }
225 }
226 }
227
228 pub fn predict(&self, x: &Tensor) -> Tensor {
230 self.forward(x)
231 }
232}