use linalg::{Matrix, MatrixSlice, BaseMatrix, BaseMatrixMut};
use learning::{LearningResult, SupModel};
use learning::error::{Error, ErrorKind};
use learning::toolkit::activ_fn;
use learning::toolkit::activ_fn::ActivationFunc;
use learning::toolkit::cost_fn;
use learning::toolkit::cost_fn::CostFunc;
use learning::toolkit::regularization::Regularization;
use learning::optim::{Optimizable, OptimAlgorithm};
use learning::optim::grad_desc::StochasticGD;
use rand::thread_rng;
use rand::distributions::{Sample, range};
#[derive(Debug)]
pub struct NeuralNet<'a, T, A>
where T: Criterion,
A: OptimAlgorithm<BaseNeuralNet<'a, T>>
{
base: BaseNeuralNet<'a, T>,
alg: A,
}
impl<'a, T, A> SupModel<Matrix<f64>, Matrix<f64>> for NeuralNet<'a, T, A>
where T: Criterion,
A: OptimAlgorithm<BaseNeuralNet<'a, T>>
{
fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
self.base.forward_prop(inputs)
}
fn train(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> {
let optimal_w = self.alg.optimize(&self.base, &self.base.weights, inputs, targets);
self.base.weights = optimal_w;
Ok(())
}
}
impl<'a> NeuralNet<'a, BCECriterion, StochasticGD> {
pub fn default(layer_sizes: &[usize]) -> NeuralNet<BCECriterion, StochasticGD> {
NeuralNet {
base: BaseNeuralNet::default(layer_sizes),
alg: StochasticGD::default(),
}
}
}
impl<'a, T, A> NeuralNet<'a, T, A>
where T: Criterion,
A: OptimAlgorithm<BaseNeuralNet<'a, T>>
{
pub fn new(layer_sizes: &'a [usize], criterion: T, alg: A) -> NeuralNet<'a, T, A> {
NeuralNet {
base: BaseNeuralNet::new(layer_sizes, criterion),
alg: alg,
}
}
pub fn get_net_weights(&self, idx: usize) -> MatrixSlice<f64> {
self.base.get_layer_weights(&self.base.weights[..], idx)
}
}
#[derive(Debug)]
pub struct BaseNeuralNet<'a, T: Criterion> {
layer_sizes: &'a [usize],
weights: Vec<f64>,
criterion: T,
}
impl<'a> BaseNeuralNet<'a, BCECriterion> {
fn default(layer_sizes: &[usize]) -> BaseNeuralNet<BCECriterion> {
BaseNeuralNet::new(layer_sizes, BCECriterion::default())
}
}
impl<'a, T: Criterion> BaseNeuralNet<'a, T> {
fn new(layer_sizes: &[usize], criterion: T) -> BaseNeuralNet<T> {
BaseNeuralNet {
layer_sizes: layer_sizes,
weights: BaseNeuralNet::<T>::create_weights(layer_sizes),
criterion: criterion,
}
}
fn create_weights(layer_sizes: &[usize]) -> Vec<f64> {
let mut between = range::Range::new(0f64, 1f64);
let mut rng = thread_rng();
layer_sizes.windows(2)
.flat_map(|w| {
let l_in = w[0] + 1;
let l_out = w[1];
let eps_init = (6f64 / (l_in + l_out) as f64).sqrt();
(0..l_in * l_out)
.map(|_i| (between.sample(&mut rng) * 2f64 * eps_init) - eps_init)
.collect::<Vec<_>>()
})
.collect()
}
fn get_layer_weights(&self, weights: &[f64], idx: usize) -> MatrixSlice<f64> {
debug_assert!(idx < self.layer_sizes.len() - 1);
let mut full_size = 0usize;
for l in 0..self.layer_sizes.len() - 1 {
full_size += (self.layer_sizes[l] + 1) * self.layer_sizes[l + 1];
}
debug_assert_eq!(full_size, weights.len());
let mut start = 0usize;
for l in 0..idx {
start += (self.layer_sizes[l] + 1) * self.layer_sizes[l + 1]
}
unsafe {
MatrixSlice::from_raw_parts(weights.as_ptr().offset(start as isize),
self.layer_sizes[idx] + 1,
self.layer_sizes[idx + 1],
self.layer_sizes[idx + 1])
}
}
fn get_net_weights(&self, idx: usize) -> MatrixSlice<f64> {
self.get_layer_weights(&self.weights[..], idx)
}
fn get_non_bias_weights(&self, weights: &[f64], idx: usize) -> MatrixSlice<f64> {
let layer_weights = self.get_layer_weights(weights, idx);
layer_weights.reslice([1, 0], layer_weights.rows() - 1, layer_weights.cols())
}
fn compute_grad(&self,
weights: &[f64],
inputs: &Matrix<f64>,
targets: &Matrix<f64>)
-> (f64, Vec<f64>) {
assert_eq!(inputs.cols(), self.layer_sizes[0]);
let mut forward_weights = Vec::with_capacity(self.layer_sizes.len() - 1);
let mut activations = Vec::with_capacity(self.layer_sizes.len());
let net_data = Matrix::ones(inputs.rows(), 1).hcat(inputs);
activations.push(net_data.clone());
{
let mut z = net_data * self.get_layer_weights(weights, 0);
forward_weights.push(z.clone());
for l in 1..self.layer_sizes.len() - 1 {
let mut a = self.criterion.activate(z.clone());
let ones = Matrix::ones(a.rows(), 1);
a = ones.hcat(&a);
z = &a * self.get_layer_weights(weights, l);
activations.push(a);
forward_weights.push(z.clone());
}
activations.push(self.criterion.activate(z));
}
let mut deltas = Vec::with_capacity(self.layer_sizes.len() - 1);
{
let z = forward_weights[self.layer_sizes.len() - 2].clone();
let g = self.criterion.grad_activ(z);
let mut delta = self.criterion
.cost_grad(&activations[self.layer_sizes.len() - 1], targets)
.elemul(&g);
deltas.push(delta.clone());
for l in (1..self.layer_sizes.len() - 1).rev() {
let mut z = forward_weights[l - 1].clone();
let ones = Matrix::ones(z.rows(), 1);
z = ones.hcat(&z);
let g = self.criterion.grad_activ(z);
delta = (delta * Matrix::from(self.get_layer_weights(weights, l)).transpose())
.elemul(&g);
let non_one_rows = &(1..delta.cols()).collect::<Vec<usize>>()[..];
delta = delta.select_cols(non_one_rows);
deltas.push(delta.clone());
}
}
let mut gradients = Vec::with_capacity(weights.len());
for (l, activ_item) in activations.iter().take(self.layer_sizes.len() - 1).enumerate() {
let mut g = deltas[self.layer_sizes.len() - 2 - l].transpose() * activ_item;
if self.criterion.is_regularized() {
let layer = l;
let non_bias_weights = self.get_non_bias_weights(weights, layer);
let zeros = Matrix::zeros(1, non_bias_weights.cols());
g += zeros.vcat(&self.criterion.reg_cost_grad(non_bias_weights));
}
gradients.append(&mut (g / inputs.rows() as f64).into_vec());
}
let mut cost = self.criterion.cost(&activations[activations.len() - 1], targets);
if self.criterion.is_regularized() {
for i in 0..self.layer_sizes.len() - 1 {
cost += self.criterion.reg_cost(self.get_non_bias_weights(weights, i));
}
}
(cost, gradients)
}
fn forward_prop(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
if inputs.cols() != self.layer_sizes[0] {
Err(Error::new(ErrorKind::InvalidData,
"The input data dimensions must match the first layer."))
} else {
let net_data = Matrix::ones(inputs.rows(), 1).hcat(inputs);
let mut z = net_data * self.get_net_weights(0);
let mut a = self.criterion.activate(z.clone());
for l in 1..self.layer_sizes.len() - 1 {
let ones = Matrix::ones(a.rows(), 1);
a = ones.hcat(&a);
z = a * self.get_net_weights(l);
a = self.criterion.activate(z.clone());
}
Ok(a)
}
}
}
impl<'a, T: Criterion> Optimizable for BaseNeuralNet<'a, T> {
type Inputs = Matrix<f64>;
type Targets = Matrix<f64>;
fn compute_grad(&self,
params: &[f64],
inputs: &Matrix<f64>,
targets: &Matrix<f64>)
-> (f64, Vec<f64>) {
self.compute_grad(params, inputs, targets)
}
}
pub trait Criterion {
type ActFunc: ActivationFunc;
type Cost: CostFunc<Matrix<f64>>;
fn activate(&self, mat: Matrix<f64>) -> Matrix<f64> {
mat.apply(&Self::ActFunc::func)
}
fn grad_activ(&self, mat: Matrix<f64>) -> Matrix<f64> {
mat.apply(&Self::ActFunc::func_grad)
}
fn cost(&self, outputs: &Matrix<f64>, targets: &Matrix<f64>) -> f64 {
Self::Cost::cost(outputs, targets)
}
fn cost_grad(&self, outputs: &Matrix<f64>, targets: &Matrix<f64>) -> Matrix<f64> {
Self::Cost::grad_cost(outputs, targets)
}
fn regularization(&self) -> Regularization<f64> {
Regularization::None
}
fn is_regularized(&self) -> bool {
match self.regularization() {
Regularization::None => false,
_ => true,
}
}
fn reg_cost(&self, reg_weights: MatrixSlice<f64>) -> f64 {
self.regularization().reg_cost(reg_weights)
}
fn reg_cost_grad(&self, reg_weights: MatrixSlice<f64>) -> Matrix<f64> {
self.regularization().reg_grad(reg_weights)
}
}
#[derive(Clone, Copy, Debug)]
pub struct BCECriterion {
regularization: Regularization<f64>,
}
impl Criterion for BCECriterion {
type ActFunc = activ_fn::Sigmoid;
type Cost = cost_fn::CrossEntropyError;
fn regularization(&self) -> Regularization<f64> {
self.regularization
}
}
impl Default for BCECriterion {
fn default() -> Self {
BCECriterion { regularization: Regularization::None }
}
}
impl BCECriterion {
pub fn new(regularization: Regularization<f64>) -> Self {
BCECriterion { regularization: regularization }
}
}
#[derive(Clone, Copy, Debug)]
pub struct MSECriterion {
regularization: Regularization<f64>,
}
impl Criterion for MSECriterion {
type ActFunc = activ_fn::Linear;
type Cost = cost_fn::MeanSqError;
fn regularization(&self) -> Regularization<f64> {
self.regularization
}
}
impl Default for MSECriterion {
fn default() -> Self {
MSECriterion { regularization: Regularization::None }
}
}
impl MSECriterion {
pub fn new(regularization: Regularization<f64>) -> Self {
MSECriterion { regularization: regularization }
}
}