juggernaut/
nn.rs

1use nl::NeuralLayer;
2use sample::Sample;
3use matrix::Matrix;
4use matrix::MatrixTrait;
5use cost::CostFunction;
6use cost::squared_error::SquaredError;
7use utils::sample_input_to_matrix;
8use utils::sample_output_to_matrix;
9use rand::Rng;
10use rand;
11
12/// Represents a Neural Network with layers, inputs and outputs
13pub struct NeuralNetwork {
14    layers: Vec<NeuralLayer>,
15    cost_function: Box<CostFunction>,
16    shuffle_data: bool,
17    on_error_fn: Option<Box<Fn(f64)>>,
18    on_epoch_fn: Option<Box<Fn(&NeuralNetwork)>>,
19}
20
21impl NeuralNetwork {
22    pub fn new() -> NeuralNetwork {
23        NeuralNetwork {
24            layers: vec![],
25            cost_function: Box::new(SquaredError::new()),
26            shuffle_data: true,
27            on_error_fn: None,
28            on_epoch_fn: None,
29        }
30    }
31
32    /// To set shuffle data flag
33    /// Enabling this option shuffles data before each iteration
34    pub fn set_shuffle_data(&mut self, enable: bool) {
35        self.shuffle_data = enable;
36    }
37
38    /// To set a cost function for the network
39    pub fn set_cost_function<T>(&mut self, cost_function: T)
40    where
41        T: 'static + CostFunction,
42    {
43        self.cost_function = Box::new(cost_function);
44    }
45
46    /// To add a callback function and receive the errors of the network during training process
47    /// Please note that there is another function that basically calcualtes the error value
48    pub fn on_error<FN>(&mut self, callback_fn: FN)
49    where
50        FN: 'static + Fn(f64),
51    {
52        self.on_error_fn = Some(Box::new(callback_fn));
53    }
54
55    /// To add a callback function to get called after each epoch
56    pub fn on_epoch<FN>(&mut self, callback_fn: FN)
57    where
58        FN: 'static + Fn(&NeuralNetwork),
59    {
60        self.on_epoch_fn = Some(Box::new(callback_fn));
61    }
62
63    /// To emit the `on_error` callback
64    fn emit_on_error(&self, err: f64) {
65        match self.on_error_fn {
66            Some(ref err_fn) => err_fn(err),
67            None => (),
68        }
69    }
70
71    /// To emit the `on_epoch` callback
72    fn emit_on_epoch(&self) {
73        match self.on_epoch_fn {
74            Some(ref epoch_fn) => epoch_fn(&self),
75            None => (),
76        }
77    }
78
79    /// To add a new layer to the network
80    ///
81    /// Example:
82    ///
83    /// ```
84    /// # #[macro_use] extern crate juggernaut;
85    /// # fn main() {
86    /// use juggernaut::sample::Sample;
87    /// use juggernaut::nl::NeuralLayer;
88    /// use juggernaut::nn::NeuralNetwork;
89    /// use juggernaut::activation::Activation;
90    /// use juggernaut::activation::Sigmoid;
91    ///
92    /// let mut test = NeuralNetwork::new();
93    ///
94    /// // 1st layer = 4 neurons - 2 inputs
95    /// let nl1 = NeuralLayer::new(4, 2, Sigmoid::new());
96    ///
97    /// test.add_layer(nl1);
98    /// # }
99    /// ```
100    pub fn add_layer(&mut self, layer: NeuralLayer) {
101        if self.layers.len() > 0 {
102            let prev_layer_neurons = self.layers[self.layers.len() - 1].neurons();
103
104            if prev_layer_neurons != layer.inputs() {
105                panic!(
106                    "New layer should have enough inputs. \
107                     Expected {}, got {}",
108                    prev_layer_neurons,
109                    layer.inputs()
110                );
111            }
112        }
113
114        self.layers.push(layer);
115    }
116
117    /// To get the layers of the network
118    pub fn get_layers(&self) -> &Vec<NeuralLayer> {
119        &self.layers
120    }
121
122    /// This is the forward method of the network which calculates the random weights
123    /// and multiplies the inputs of given samples to the weights matrix. Thinks.
124    pub fn forward(&self, sample: &Sample) -> Vec<Matrix> {
125        if self.layers.len() == 0 {
126            panic!("Neural network doesn't have any layers.");
127        }
128
129        let mut weights: Vec<Matrix> = vec![];
130
131        let mut prev_weight: Matrix = Matrix::zero(0, 0);
132
133        for (i, layer) in self.layers.iter().enumerate() {
134            // TODO: this part is ridiculously complicated, needs refactoring.
135            // and the reason is Rust's lifetime. clean this part, please.
136            //
137            let transposed_bias = layer.biases().transpose();
138
139            if i > 0 {
140                let mult: Matrix = prev_weight
141                    .dot(&layer.weights().transpose())
142                    .map(&|n, _, j| n + (1f64 * transposed_bias.get(0, j)))
143                    .map_row(&|n| layer.activation.calc(n));
144
145                if i != self.layers.len() - 1 {
146                    prev_weight = mult.clone();
147                }
148
149                weights.push(mult);
150
151            } else {
152                // first layer (first iteration)
153                let samples_input: Matrix = sample_input_to_matrix(&sample);
154
155                let mult: Matrix = samples_input
156                    .dot(&layer.weights().transpose())
157                    .map(&|n, _, j| n + (1f64 * transposed_bias.get(0, j)))
158                    .map_row(&|n| layer.activation.calc(n));
159
160                if self.layers.len() > 1 {
161                    // more than one layer
162                    // storing the result for the next iteration
163                    prev_weight = mult.clone();
164                }
165
166                weights.push(mult);
167            }
168        }
169
170        weights
171    }
172
173    /// Use this function to evaluate a trained neural network
174    ///
175    /// This function simply passes the given sample to the `forward` function and returns the
176    /// output of last layer
177    pub fn evaluate(&self, sample: &Sample) -> Matrix {
178        let forward: Vec<Matrix> = self.forward(sample);
179
180        // TODO (afshinm): is this correct to clone here?
181        forward.last().unwrap().clone()
182    }
183
184    /// This function calculates the error rate of network during training and
185    /// calls the `on_error_fn` if it is available
186    fn error(&self, prediction: &Matrix, target: &Matrix) -> f64 {
187        let err = self.cost_function.calc(prediction, target);
188
189        err
190    }
191
192    /// To train the network. It calls the forward pass and updates the weights using
193    /// backpropagation
194    pub fn train(&mut self, mut samples: Vec<Sample>, epochs: i32, learning_rate: f64) {
195        for _ in 0..epochs {
196            let mut mut_samples = samples.as_mut_slice();
197
198            // shuffle data if it's enabled
199            if self.shuffle_data {
200                rand::thread_rng().shuffle(&mut mut_samples);
201            }
202
203            let mut error_value = vec![];
204
205            for sample in mut_samples.iter() {
206
207                let mut output: Vec<Matrix> = self.forward(&sample);
208
209                // because we are backpropagating
210                output.reverse();
211
212                //let mut error: Matrix = Matrix::zero(0, 0);
213                let mut delta: Matrix = Matrix::zero(0, 0);
214
215                for (i, layer) in output.iter().enumerate() {
216                    // because of `reverse`
217                    let index: usize = self.layers.len() - 1 - i;
218
219                    // because it is different when we want to calculate error for each layer for
220                    // the output layer it is:
221                    //
222                    //      y - output_layer
223                    //
224                    // but for other layers it is:
225                    //
226                    //      output_delta.dot(weights_1)
227                    //
228                    let error = if i == 0 {
229                        //last layer (output)
230                        let samples_outputs = sample_output_to_matrix(&sample);
231
232                        // this is:
233                        //
234                        //     y - last_layer_of_forward
235                        //
236                        // where `last_layer_of_forward` is `layer` because of i == 0 condition
237
238                        let error = Matrix::generate(
239                            samples_outputs.rows(),
240                            samples_outputs.cols(),
241                            &|m, n| samples_outputs.get(m, n) - layer.get(m, n),
242                        );
243
244                        // calculating error of this iteration
245                        error_value.push(self.error(&layer, &samples_outputs));
246
247                        error
248                    } else {
249                        // this is:
250                        //
251                        //     delta_of_previous_layer.dot(layer)
252                        //
253                        delta.dot(&self.layers[index + 1].weights().clone())
254                    };
255
256                    let forward_derivative: Matrix =
257                        layer.map_row(&|n| self.layers[index].activation.derivative(n));
258
259                    delta = Matrix::generate(layer.rows(), layer.cols(), &|m, n| {
260                        error.get(m, n) * forward_derivative.get(m, n)
261                    });
262
263                    let biases = self.layers[index].biases().clone();
264
265                    self.layers[index].set_biases(biases.map(&|n, i, j| {
266                        n + (delta.get(j, i) * learning_rate)
267                    }));
268
269                    let mut prev_layer: Matrix = sample_input_to_matrix(&sample);
270
271                    if i != output.len() - 1 {
272                        // TODO (afshinm): is this necessary to clone here?
273                        prev_layer = output[i + 1].clone();
274                    }
275
276                    // updating weights of this layer
277                    let syn: Matrix = delta.map(&|n, _, _| n * learning_rate).transpose().dot(
278                        &prev_layer,
279                    );
280
281                    // forward output and network layers are the same, with a reversed order
282                    // TODO (afshinm): is this necessary to clone here?
283                    let this_layer_weights: &Matrix = &self.layers[index].weights().clone();
284
285                    // finally, set the new weights
286                    self.layers[index].set_weights(Matrix::generate(
287                        this_layer_weights.rows(),
288                        this_layer_weights.cols(),
289                        &|m, n| syn.get(m, n) + this_layer_weights.get(m, n),
290                    ));
291                }
292            }
293
294            self.emit_on_error(
295                error_value.iter().fold(0f64, |n, sum| sum + n) / mut_samples.len() as f64,
296            );
297
298            // call on_epoch callback
299            self.emit_on_epoch();
300        }
301    }
302}
303
304#[cfg(test)]
305mod tests {
306    use activation::Sigmoid;
307    use activation::SoftMax;
308    use activation::HyperbolicTangent;
309    use sample::Sample;
310    use nl::NeuralLayer;
311    use nn::NeuralNetwork;
312    use matrix::MatrixTrait;
313    use cost::cross_entropy::CrossEntropy;
314
315    #[test]
316    fn get_layers_test() {
317        let mut test = NeuralNetwork::new();
318
319        let layers = vec![NeuralLayer::new(1, 2, Sigmoid::new())];
320
321        for layer in layers {
322            test.add_layer(layer);
323        }
324
325        let get_layers = test.get_layers();
326
327        assert_eq!(get_layers.len(), 1);
328    }
329
330    #[test]
331    fn forward_test() {
332        let dataset = vec![Sample::new(vec![1f64, 0f64], vec![0f64])];
333
334        let mut test = NeuralNetwork::new();
335
336        let sig_activation = Sigmoid::new();
337        // 1st layer = 1 neurons - 2 inputs
338        test.add_layer(NeuralLayer::new(1, 2, sig_activation));
339
340        let forward = test.forward(&dataset[0]);
341        assert_eq!(forward.len(), 1);
342    }
343
344    #[test]
345    fn forward_test_2layers() {
346        let dataset = vec![Sample::new(vec![1f64, 0f64], vec![0f64])];
347
348        let mut test = NeuralNetwork::new();
349
350        let sig_activation = Sigmoid::new();
351
352        // 1st layer = 3 neurons - 2 inputs
353        test.add_layer(NeuralLayer::new(3, 2, sig_activation));
354        // 2nd layer = 1 neuron - 3 inputs
355        test.add_layer(NeuralLayer::new(1, 3, sig_activation));
356
357        let forward = test.forward(&dataset[0]);
358
359        assert_eq!(forward.len(), 2);
360    }
361
362    #[test]
363    fn train_test() {
364        let dataset = vec![Sample::new(vec![1f64, 0f64], vec![0f64])];
365
366        let mut test = NeuralNetwork::new();
367
368        let sig_activation = Sigmoid::new();
369
370        // 1st layer = 1 neurons - 2 inputs
371        test.add_layer(NeuralLayer::new(1, 2, sig_activation));
372
373        test.train(dataset, 10, 0.1f64);
374    }
375
376    #[test]
377    fn train_test_2layers() {
378        let dataset = vec![
379            Sample::new(vec![1f64, 0f64], vec![0f64]),
380            Sample::new(vec![1f64, 1f64], vec![1f64]),
381            Sample::new(vec![1f64, 1f64], vec![1f64]),
382        ];
383
384        let mut test = NeuralNetwork::new();
385
386        let sig_activation = Sigmoid::new();
387
388        // 1st layer = 3 neurons - 2 inputs
389        test.add_layer(NeuralLayer::new(2, 2, sig_activation));
390        // 2nd layer = 1 neuron - 3 inputs
391        test.add_layer(NeuralLayer::new(1, 2, sig_activation));
392
393        let forward = test.forward(&dataset[1]);
394
395        test.train(dataset, 100, 0.1f64);
396
397        assert_eq!(forward.len(), 2);
398    }
399
400
401    #[test]
402    fn train_test_2layers_think() {
403        let dataset = vec![
404            Sample::new(vec![0f64, 0f64, 1f64], vec![0f64]),
405            Sample::new(vec![0f64, 1f64, 1f64], vec![0f64]),
406            Sample::new(vec![1f64, 0f64, 1f64], vec![1f64]),
407            Sample::new(vec![1f64, 1f64, 1f64], vec![1f64]),
408        ];
409
410        let mut test = NeuralNetwork::new();
411
412        let sig_activation = Sigmoid::new();
413
414        // 1st layer = 2 neurons - 3 inputs
415        test.add_layer(NeuralLayer::new(2, 3, sig_activation));
416        // 2nd layer = 1 neuron - 2 inputs
417        test.add_layer(NeuralLayer::new(1, 2, sig_activation));
418
419        test.train(dataset, 5, 0.1f64);
420
421        let think = test.evaluate(&Sample::predict(vec![1f64, 0f64, 1f64]));
422
423        assert_eq!(think.rows(), 1);
424        assert_eq!(think.cols(), 1);
425    }
426
427    #[test]
428    fn error_function_test() {
429        let dataset = vec![
430            Sample::new(vec![0f64, 0f64, 1f64], vec![0f64]),
431            Sample::new(vec![0f64, 1f64, 1f64], vec![0f64]),
432            Sample::new(vec![1f64, 0f64, 1f64], vec![1f64]),
433            Sample::new(vec![1f64, 1f64, 1f64], vec![1f64]),
434        ];
435
436        let mut test = NeuralNetwork::new();
437
438        // error should be more than 0
439        test.on_error(|err| {
440            assert!(err > 0f64);
441        });
442
443        let sig_activation = Sigmoid::new();
444
445        // 1st layer = 2 neurons - 3 inputs
446        test.add_layer(NeuralLayer::new(2, 3, sig_activation));
447        // 2nd layer = 1 neuron - 2 inputs
448        test.add_layer(NeuralLayer::new(1, 2, sig_activation));
449
450        test.train(dataset, 5, 0.1f64);
451    }
452
453    #[test]
454    fn on_epoch_test() {
455        let dataset = vec![
456            Sample::new(vec![0f64, 0f64, 1f64], vec![0f64]),
457            Sample::new(vec![0f64, 1f64, 1f64], vec![0f64]),
458            Sample::new(vec![1f64, 0f64, 1f64], vec![1f64]),
459            Sample::new(vec![1f64, 1f64, 1f64], vec![1f64]),
460        ];
461
462        let mut test = NeuralNetwork::new();
463
464        // TODO (afshinm): this test is not complete.
465        // it should count the number of calls of the closure as well
466        test.on_epoch(|this| {
467            assert_eq!(3, this.layers[0].weights().cols());
468            assert_eq!(2, this.layers[0].weights().rows());
469
470            assert_eq!(2, this.layers[1].weights().cols());
471            assert_eq!(1, this.layers[1].weights().rows());
472        });
473
474        let sig_activation = Sigmoid::new();
475
476        // 1st layer = 2 neurons - 3 inputs
477        test.add_layer(NeuralLayer::new(2, 3, sig_activation));
478        // 2nd layer = 1 neuron - 2 inputs
479        test.add_layer(NeuralLayer::new(1, 2, sig_activation));
480
481        test.train(dataset, 5, 0.1f64);
482    }
483
484    #[test]
485    fn network_with_two_activations() {
486        let dataset = vec![
487            Sample::new(vec![0f64, 0f64, 1f64], vec![0f64]),
488            Sample::new(vec![0f64, 1f64, 1f64], vec![0f64]),
489            Sample::new(vec![1f64, 0f64, 1f64], vec![1f64]),
490            Sample::new(vec![1f64, 1f64, 1f64], vec![1f64]),
491        ];
492
493        let mut test = NeuralNetwork::new();
494
495        // 1st layer = 2 neurons - 3 inputs
496        test.add_layer(NeuralLayer::new(2, 3, Sigmoid::new()));
497        // 2nd layer = 1 neuron - 2 inputs
498        test.add_layer(NeuralLayer::new(1, 2, HyperbolicTangent::new()));
499
500        test.train(dataset, 5, 0.1f64);
501
502        let think = test.evaluate(&Sample::predict(vec![1f64, 0f64, 1f64]));
503
504        assert_eq!(think.rows(), 1);
505        assert_eq!(think.cols(), 1);
506    }
507
508
509    #[test]
510    fn two_hidden_layers() {
511        let dataset = vec![
512            Sample::new(vec![0f64, 0f64, 1f64], vec![0f64]),
513            Sample::new(vec![0f64, 1f64, 1f64], vec![0f64]),
514            Sample::new(vec![1f64, 0f64, 1f64], vec![1f64]),
515            Sample::new(vec![1f64, 1f64, 1f64], vec![1f64]),
516        ];
517
518        let mut test = NeuralNetwork::new();
519
520        // 1st layer = 2 neurons - 3 inputs
521        test.add_layer(NeuralLayer::new(2, 3, Sigmoid::new()));
522        // 2nd layer = 4 neurons - 2 inputs
523        test.add_layer(NeuralLayer::new(4, 2, Sigmoid::new()));
524        // 3rd layer = 1 neuron - 4 inputs
525        test.add_layer(NeuralLayer::new(1, 4, Sigmoid::new()));
526
527        test.train(dataset, 1, 0.1f64);
528
529        let think = test.evaluate(&Sample::predict(vec![1f64, 0f64, 1f64]));
530
531        assert_eq!(think.rows(), 1);
532        assert_eq!(think.cols(), 1);
533    }
534
535    #[test]
536    fn three_hidden_layers() {
537        let dataset = vec![
538            Sample::new(vec![0f64, 0f64, 1f64], vec![0f64]),
539            Sample::new(vec![0f64, 1f64, 1f64], vec![0f64]),
540            Sample::new(vec![1f64, 0f64, 1f64], vec![1f64]),
541            Sample::new(vec![1f64, 1f64, 1f64], vec![1f64]),
542        ];
543
544        let mut test = NeuralNetwork::new();
545
546        // 1st layer = 2 neurons - 3 inputs
547        test.add_layer(NeuralLayer::new(2, 3, Sigmoid::new()));
548        test.add_layer(NeuralLayer::new(1, 2, Sigmoid::new()));
549
550        test.set_cost_function(CrossEntropy);
551
552        test.train(dataset, 5, 0.1f64);
553
554        let think = test.evaluate(&Sample::predict(vec![1f64, 0f64, 1f64]));
555
556        assert_eq!(think.rows(), 1);
557        assert_eq!(think.cols(), 1);
558    }
559
560    #[test]
561    fn train_test_multiclass() {
562        let dataset = vec![
563            Sample::new(vec![1f64, 0f64, 2f64], vec![0f64, 1f64]),
564            Sample::new(vec![1f64, 1f64, 5f64], vec![1f64, 0f64]),
565        ];
566
567        let mut test = NeuralNetwork::new();
568
569        let sig_activation = Sigmoid::new();
570        test.set_cost_function(CrossEntropy);
571
572        // 1st layer = 3 neurons - 2 inputs
573        test.add_layer(NeuralLayer::new(3, 3, sig_activation));
574        // 2nd layer = 1 neuron - 3 inputs
575        test.add_layer(NeuralLayer::new(2, 3, SoftMax::new()));
576
577        test.train(dataset, 5, 0.01f64);
578    }
579
580    #[test]
581    fn shuffle_data() {
582        let dataset = vec![
583            Sample::new(vec![1f64, 0f64, 2f64], vec![0f64, 1f64]),
584            Sample::new(vec![1f64, 1f64, 5f64], vec![1f64, 0f64]),
585        ];
586
587        let mut test = NeuralNetwork::new();
588
589        test.set_shuffle_data(true);
590
591        // 1st layer = 3 neurons - 2 inputs
592        test.add_layer(NeuralLayer::new(3, 3, Sigmoid::new()));
593        // 2nd layer = 1 neuron - 3 inputs
594        test.add_layer(NeuralLayer::new(2, 3, SoftMax::new()));
595
596        test.train(dataset, 5, 0.01f64);
597    }
598}