1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
//! An implementation of a neural network that can be used to learn from target data
//! and to predict results after feeding it some training data.

use std::vec::Vec;

use rand::distributions::Range;
use ndarray_rand::RandomExt;
use ndarray::prelude::*;
use ndarray::{Shape};
use itertools::{multizip, Itertools};

use learn_config::{LearnConfig};
use error_stats::{ErrorStats};
use traits::{
	Prophet,
	Disciple
};
use activation_fn::{BaseFn, DerivedFn};

/// A fully connected layer within a neural net.
/// 
/// The layer constists of a weights and a delta-weights matrix with equal dimensions
/// and an output and gradients vector of equal size.
/// 
/// The values stored within the n-th column of the two weights matrices are respective to 
/// the weights of the n-th input neuron to all other neurons of the next neuron layer.
/// 
/// And respective are all values stored within the n-th row or the two weights matrices
/// interpreted as the weights of all incoming connections to the n-th neuron in the next 
/// neuron layer.
/// 
/// For predicting only the weights matrix and the outputs vector is required.
/// 
/// The outputs and gradients vectors are just used as frequently used intermediate buffers
/// which should speed up computation.
/// 
/// The data structure is organized in a way that it mainly represents the connections
/// between two adjacent neuron layers.
/// It owns its output but needs a reference to its input.
/// This design was chosen to be the most modular one and enables to avoid copy-overhead in 
/// all currently developed situations.
/// 
/// Besides that this design allows to completely avoid heap memory allocations after 
/// setting up the objects initially.
#[derive(Debug, Clone, PartialEq)]
struct FullyConnectedLayer {
	weights:       Array2<f32>,
	delta_weights: Array2<f32>,
	outputs:       Array1<f32>,
	gradients:     Array1<f32>
}

/// A neural net.
/// 
/// Can be trained with testing data and afterwards be used to predict results.
/// 
/// Neural nets in this implementation constists of several stacked neural layers
/// and organized the data flow between them.
/// 
/// For example when the user uses ```predict``` from ```NeuralNet``` this
/// object organizes the input data throughout all of its owned layers and pipes
/// the result in the last layer back to the user.
pub struct NeuralNet {
	/// the layers within this ```NeuralNet```
	layers: Vec<FullyConnectedLayer>,

	/// the config that handles all the parameters to tune the learning process
	pub config: LearnConfig,

	/// holds error stats for the user to query the current learning state of the network
	error_stats: ErrorStats
}

impl FullyConnectedLayer {
	/// Creates a FullyConnectedLayer with randomized weights.
	/// 
	/// Implicitely creates weights for the bias neuron,
	/// so the dimensions of the weights matrix is equal to
	/// (output)x(input+1).
	/// 
	/// The weights are randomized within the open interval (0,1).
	/// This excludes 0.0 and 1.0 as weights.
	/// Other optional intervals may come with a future update!
	fn random(inputs: Ix, outputs: Ix) -> Self {
		assert!(inputs >= 1 && outputs >= 1);

		let inputs          = inputs  + 1; // implicitely add bias!
		let count_gradients = outputs + 1;
		let shape           = Shape::from(Dim([outputs, inputs]));

		FullyConnectedLayer{
			weights:       Array2::random(shape, Range::new(0.0, 1.0)),
			delta_weights: Array2::default(shape),
			outputs:       Array1::default(outputs),
			gradients:     Array1::zeros(count_gradients)
		}
	}

	fn count_rows(&self) -> Ix { let (rows, _) = self.weights.dim(); rows }
	fn count_columns(&self) -> Ix { let (_, cols) = self.weights.dim(); cols }
	fn count_outputs(&self) -> Ix { self.outputs.dim() }
	fn count_gradients(&self) -> Ix { self.gradients.dim() }

	/// Returns this layer's output as readable slice.
	fn output_as_slice(&self) -> &[f32] {
		self.outputs.as_slice().unwrap()
	}

	/// Takes input slice and performs a feed forward procedure
	/// using the given activation function.
	/// Output of this operation will be stored within this layer
	/// and be returned as readable slice.
	/// 
	/// Expects:
	///  - input with n elements
	/// Requires:
	///  - weight matrix with m rows and (n+1) columns
	/// Asserts:
	///  - output with m elements
	fn feed_forward<'a>(
		&'a mut self,
		input: &[f32],
		activation_fn: BaseFn<f32>
	)
		-> &'a [f32]
	{
		debug_assert_eq!(self.count_rows(), self.count_outputs());
		debug_assert_eq!(self.count_columns(), input.len() + 1);

		multizip((self.outputs.iter_mut(), self.weights.outer_iter()))
			.foreach(|(output, weights_row)| {
				*output = activation_fn(
					multizip((weights_row.iter(), input.iter().chain(&[1.0])))
						.map(|(w, i)| w*i)
						.sum()
				) 
			});

		self.output_as_slice()
	}

	/// Used internally in the output layer to initialize gradients for the back propagation phase.
	/// Sets the gradient for the bias neuron to zero - hopefully this is the correct behaviour.
	fn calculate_output_gradients(&mut self, target_values: &[f32], act_fn_dx: DerivedFn<f32>) -> &Self {
		debug_assert_eq!(self.count_outputs(), target_values.len());
		debug_assert_eq!(self.count_gradients(), target_values.len() + 1); // no calculation for bias!

		multizip((self.gradients.iter_mut(), target_values.iter(), self.outputs.iter()))
			.foreach(|(gradient, target, &output)| {
				*gradient = (target - output) * act_fn_dx(output)
			});

		// gradient of bias should be set equal to zero during object initialization already.
		self
	}

	/// Sets all gradient values in this layer to zero.
	/// This is required as initialization step before propagating gradients
	/// for the efficient implementation of this library.
	fn reset_gradients(&mut self) {
		self.gradients.fill(0.0);
		debug_assert!(self.gradients.iter().all(|&g| g == 0.0));
	}

	/// Applies the given activation function on all gradients of this layer.
	fn apply_activation(&mut self, act_fn_dx: DerivedFn<f32>) {
		debug_assert_eq!(self.count_gradients(), self.count_outputs() + 1);

		multizip((self.gradients.iter_mut(), self.outputs.iter().chain(&[1.0])))
			.foreach(|(gradient, &output)| *gradient *= act_fn_dx(output));
	}

	/// Back propagate gradients from the previous layer (in reversed order) to this layer
	/// using the given activation function.
	/// This also computes the gradient for the bias neuron.
	/// Returns readable reference to self to allow chaining.
	fn propagate_gradients(&mut self, prev: &FullyConnectedLayer, act_fn_dx: DerivedFn<f32>) -> &Self {
		debug_assert_eq!(prev.count_rows(), prev.count_gradients() - 1);
		debug_assert_eq!(prev.count_columns(), self.count_gradients());

		self.reset_gradients();

		multizip((prev.weights.outer_iter(), prev.gradients.iter()))
			.foreach(|(prev_weights_row, prev_gradient)| {
				multizip((self.gradients.iter_mut(), prev_weights_row.iter()))
					.foreach(|(gradient, weight)| {
						*gradient += weight * prev_gradient
					})
			});

		self.apply_activation(act_fn_dx);
		self // for chaining in a fold expression
	}

	/// Updates the connection weights of this layer.
	/// This operation is usually used after successful computation of gradients.
	fn update_weights(&mut self,
	                  prev_outputs: &[f32],
	                  train_rate: f32,
	                  learning_momentum: f32) -> &[f32]
	{
		debug_assert_eq!(prev_outputs.len() + 1, self.count_columns());
		debug_assert_eq!(self.count_gradients(), self.count_rows() + 1);

		multizip((self.weights.outer_iter_mut(), self.delta_weights.outer_iter_mut(), self.gradients.iter()))
			.foreach(|(mut weights_row, mut delta_weights_row, gradient)| {
				multizip((prev_outputs.iter().chain(&[1.0]), weights_row.iter_mut(), delta_weights_row.iter_mut()))
					.foreach(|(prev_output, weight, delta_weight)| {
						*delta_weight =
							// Individual input, magnified by the gradient and train rate
							train_rate * prev_output * gradient
							// Also add momentum which is a fraction of the previous delta weight
							+ learning_momentum * *delta_weight;
						*weight += *delta_weight;
					})
			});

		self.output_as_slice()
	}
}



impl NeuralNet {
	fn from_vec(
		learn_config: LearnConfig,
		layers: Vec<FullyConnectedLayer>
	)
		-> Self
	{
		NeuralNet{
			layers: layers,
			config: learn_config,
			error_stats: ErrorStats::default()
		}
	}

	/// Creates a new instance of a ```NeuralNet```.
	/// 
	///  - ```layer_sizes``` define the count of neurons (without bias) per neural layer.
	///  - ```learning_rate``` and ```learning_momentum``` describe the acceleration and momentum
	/// with which the created neural net will be learning. These values can be changed later during
	/// the lifetime of the object if needed.
	///  - ```act_fn``` represents the pair of activation function and derivate used throughout the
	/// neural net layers.
	/// 
	/// Weights between the neural layers are initialized to ```(0,1)```.
	/// 
	/// # Examples
	///
	/// ```
	/// use prophet::prelude::*;
	///
	/// let config  = LearnConfig::new(
	/// 	0.15,                // learning_rate
	/// 	0.4,                 // learning_momentum
	/// 	ActivationFn::tanh() // activation function + derivate
	/// );
	/// let mut net = NeuralNet::new(config, &[2, 4, 3, 1]);
	/// // layer_sizes: - input layer which expects two values
	/// //              - two hidden layers with 4 and 3 neurons
	/// //              - output layer with one neuron
	/// 
	/// // now train the neural net how to be an XOR-operator
	/// let f = -1.0; // represents false
	/// let t =  1.0; // represents true
	/// for _ in 0..1000 {
	/// 	net.train(&[f, f], &[f]);
	/// 	net.train(&[f, t], &[t]);
	/// 	net.train(&[t, f], &[t]);
	/// 	net.train(&[t, t], &[f]);
	/// }
	/// // now check if the neural net has successfully learned it by checking how close
	/// // the latest ```avg_error``` is to ```0.0```:
	/// assert!(net.latest_error_stats().avg_error() < 0.05);
	/// ```
	pub fn new(
		config: LearnConfig,
		layer_sizes: &[Ix]
	)
		-> Self
	{
		let buffer = layer_sizes
			.iter()
			.tuple_windows::<(_, _)>()
			.map(|(&inputs, &outputs)| FullyConnectedLayer::random(inputs, outputs))
			.collect::<Vec<FullyConnectedLayer>>();
		NeuralNet::from_vec(config, buffer)
	}

	fn output_layer(&self) -> &FullyConnectedLayer {
		self.layers.last().unwrap()
	}

	fn overall_net_error(&self, target_values: &[f32]) -> f32 {
		let outputs = self.output_layer().output_as_slice();
		let sum = multizip((outputs.iter(), target_values))
			.map(|(output, target)| { let dx = target - output; dx*dx })
			.sum::<f32>();
		(sum / outputs.len() as f32).sqrt()
	}

	/// Returns the ```ErrorStats``` that were generated by the latest call to ```train```.
	/// 
	/// Returns a default constructed ```ErrorStats``` object when this neural net
	/// was never trained before.
	pub fn latest_error_stats(&self) -> ErrorStats {
		self.error_stats
	}

	fn propagate_gradients(&mut self, target_values: &[f32]) {
		let act_fn_dx = self.config.act_fn.derived_fn(); // because of borrow checker bugs

		if let Some((&mut ref mut last, ref mut tail)) = self.layers.split_last_mut() {
			tail.iter_mut()
				.rev()
				.fold(last.calculate_output_gradients(target_values, act_fn_dx),
				      |prev, layer| layer.propagate_gradients(prev, act_fn_dx));
		}
	}

	fn update_weights(&mut self, input: &[f32]) {
		let learn_rate     = self.config.learn_rate();
		let learn_momentum = self.config.learn_momentum();

		self.layers
			.iter_mut()
			.fold(input, |prev_output, layer| layer.update_weights(prev_output, learn_rate, learn_momentum));
	}

	fn update_error_stats(&mut self, target_values: &[f32]) -> ErrorStats {
		let latest_error = self.overall_net_error(target_values);
		self.error_stats.update(latest_error);
		self.error_stats
	}
}

impl Prophet for NeuralNet {
	type Elem = f32;

	fn predict<'b, 'a: 'b>(&'a mut self, input: &'b [Self::Elem]) -> &'b [Self::Elem] {
		let act_fn = self.config.act_fn.base_fn(); // cannot be used in the fold as self.activation_fn
		self.layers
			.iter_mut()
			.fold(input, |out, layer| layer.feed_forward(out, act_fn))
	}
}

impl Disciple for NeuralNet {
	type Elem = f32;

	fn train(&mut self, input: &[Self::Elem], target_values: &[Self::Elem]) -> ErrorStats {
		self.predict(input);
		self.propagate_gradients(target_values);
		self.update_weights(input);
		self.update_error_stats(target_values)
	}
}

#[cfg(test)]
mod tests {
	use learn_config::{
		LearnConfig
	};
	use traits::*;
	use activation_fn::{
		ActivationFn
	};
	use super::{
		NeuralNet
	};

	#[test]
	fn train_xor() {
		let config  = LearnConfig::new(0.15, 0.4, ActivationFn::<f32>::tanh());
		let mut net = NeuralNet::new(config, &[2, 4, 3, 1]);
		let t =  1.0;
		let f = -1.0;
		let print = false;
		for _ in 0..200 {
			if print {
				println!("(f,f) => {}"  , net.train(&[f, f], &[f]));
				println!("(f,t) => {}"  , net.train(&[f, t], &[t]));
				println!("(t,f) => {}"  , net.train(&[t, f], &[t]));
				println!("(t,t) => {}\n", net.train(&[t, t], &[f]));
			}
			else {
				net.train(&[f, f], &[f]);
				net.train(&[f, t], &[t]);
				net.train(&[t, f], &[t]);
				net.train(&[t, t], &[f]);
			}
		}
		assert!(net.latest_error_stats().avg_error() < 0.05);
	}

	#[test]
	fn train_constant() {
		let config  = LearnConfig::new(0.25, 0.5, ActivationFn::<f32>::identity());
		let mut net = NeuralNet::new(config, &[1, 1]);
		let mut vx = vec![0.0; 1];
		let print = false;
		for _ in 0..100 {
			for &x in &[0.0, 0.2, 0.4, 0.6, 0.8, 1.0] {
				vx[0] = x;
				if print {
					println!("{} => {}", x, net.train(vx.as_slice(), &[1.0]));
				}
				else {
					net.train(vx.as_slice(), &[1.0]);
				}
			}
		}
		assert!(net.latest_error_stats().avg_error() < 0.05);
	}

	#[test]
	fn train_and() {
		let config  = LearnConfig::new(0.15, 0.5, ActivationFn::<f32>::tanh());
		let mut net = NeuralNet::new(config, &[2, 1]);
		let f = -1.0;
		let t =  1.0;
		let print = false;
		for _ in 0..200 {
			if print {
				println!("(f, f) => {}"  , net.train(&[f, f], &[f]));
				println!("(f, t) => {}"  , net.train(&[f, t], &[f]));
				println!("(t, f) => {}"  , net.train(&[t, f], &[f]));
				println!("(t, t) => {}\n", net.train(&[t, t], &[t]));
			}
			else {
				net.train(&[f, f], &[f]);
				net.train(&[f, t], &[f]);
				net.train(&[t, f], &[f]);
				net.train(&[t, t], &[t]);
			}
		}
		assert!(net.latest_error_stats().avg_error() < 0.05);
	}

	#[test]
	fn train_triple_add() {
		use rand::*;
		let config  = LearnConfig::new(0.25, 0.5, ActivationFn::<f32>::identity());
		let mut net = NeuralNet::new(config, &[3, 1]);
		let mut gen = thread_rng();
		let print   = false;
		for _ in 0..200 {
			let a = gen.next_f32();
			let b = gen.next_f32();
			let c = gen.next_f32();
			if print {
				println!("{} + {} + {} (= {}) => {}", a, b, c, a+b+c, net.train(&[a, b, c], &[a+b+c]));
			}
			else {
				net.train(&[a, b, c], &[a+b+c]);
			}
		}
		assert!(net.latest_error_stats().avg_error() < 0.05);
	}
}


#[cfg(all(feature = "bench", test))]
mod bench {
	use super::*;
	use test::{Bencher, black_box};

	use traits::*;
	use learn_config::{LearnConfig};
	use activation_fn::{ActivationFn};
	use super::{NeuralNet};

	#[bench]
	fn bench_giant(bencher: &mut Bencher) {
		let config  = LearnConfig::new(0.25, 0.5, ActivationFn::<f32>::tanh());
		let mut net = NeuralNet::new(config, &[2, 1000, 1000, 1]);
		let f = -1.0;
		let t =  1.0;

		bencher.iter(|| {
			net.train(&[f, f], &[f]);
			net.train(&[f, t], &[f]);
			net.train(&[t, f], &[f]);
			net.train(&[t, t], &[t]);
		});
	}
}