logo
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init module ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//! Layers' parameters initialization functions.
//!
//! These initializers define a way to set the initial random weights of neuronika's layers.
//!
//! # Using an initializer
//!
//! You can freely access any learnable component of any layer, as their visibility is public,
//! and pass them, via a mutable reference, to the initialization function of your choice.
//!
//! ```
//! use neuronika::nn;
//! use neuronika::nn::init::{calculate_gain, xavier_normal};
//!
//! let mut lin = nn::Linear::new(10, 10);
//!
//! xavier_normal(&lin.weight, calculate_gain("relu"));
//! ```
use super::Learnable;
use ndarray::{Dimension, Ix2};
use rand::thread_rng;
use rand_distr::{Distribution, Normal, Uniform};

/// Returns the recommended gain value for the given non-linearity function.
///
/// Supported non-linearities are:
/// * linear
/// * sigmoid
/// * tanh
/// * relu
/// * leaky_relu
///
/// # Arguments
///
/// `non_linearity` - a non-linearity function's name.
///
/// # Panics
///
/// If `non_linearity` is not among those listed above.
pub fn calculate_gain(non_linearity: &str) -> f32 {
    match non_linearity {
        "linear" | "sigmoid" => 1.0,
        "tanh" => 5.0 / 3.0,
        "relu" => 2.0_f32.sqrt(),
        "leaky_relu" => (2.0 / (1.0 + 0.01_f32.powi(2))).sqrt(),
        _ => panic!("error: unsupported nonlinearity: {}", non_linearity),
    }
}

/// Returns the *fan_in* and the *fan_out*.
///
/// For *MLPs* *fan_in* and *fan_out* are respectively the number of inputs and outputs to an
/// hidden unit of the layer. For *CNNs* however, the number of input feature maps and the size
/// of the receptive field must be taken into account .
///
/// # Arguments
///
/// `param` - differentiable variable for which the *fan in* and the *fan out* must be
/// calculated.
pub fn calculate_fan_in_fan_out<D: Dimension>(param: &Learnable<D>) -> (f32, f32) {
    let data = param.data();
    let shape = data.shape();

    let num_input_fmaps = shape[1];
    let num_output_fmaps = shape[0];

    let (fan_in, fan_out) = {
        if shape.len() > 2 {
            let numel = shape.iter().skip(2).sum::<usize>();
            (num_input_fmaps * numel, num_output_fmaps * numel)
        } else {
            (num_input_fmaps, num_output_fmaps)
        }
    };

    (fan_in as f32, fan_out as f32)
}

/// Fills the differentiable leaf variable with a constant value.
///
/// # Arguments
///
/// * `param` - differentiable variable to initialize.
///
/// * `value` - value to fill the variable with.
pub fn constant<D: Dimension>(param: &Learnable<D>, value: f32) {
    param.data_mut().map_inplace(|el| *el = value);
}

/// Fills the differentiable leaf variable with zeros.
///
/// # Arguments
///
/// `param` - differentiable variable to initialize.
pub fn zeros<D: Dimension>(param: &Learnable<D>) {
    param.data_mut().map_inplace(|el| *el = 0.);
}

/// Fills the differentiable leaf variable with ones.
///
/// # Arguments
///
/// `param` - differentiable variable to initialize.
pub fn ones<D: Dimension>(param: &Learnable<D>) {
    param.data_mut().map_inplace(|el| *el = 1.0);
}

/// Fills the matrix differentiable leaf variable with the identity matrix.
///
/// Preserves the identity of the inputs in Linear layers, where as
/// many inputs are preserved as possible.
///
/// # Arguments
///
/// `param` - differentiable variable to initialize.
pub fn eye(param: &Learnable<Ix2>) {
    for ((x, y), el) in param.data_mut().indexed_iter_mut() {
        if x == y {
            *el = 1.
        } else {
            *el = 0.
        }
    }
}

/// Fills the {3, 4, 5}-dimensional differentiable leaf variable with the Dirac delta function.
///
/// Preserves the identity of the inputs in convolutional layers, where as many input channels
/// are preserved as possible. In case of `groups > 1`, each group of channels preserves
/// identity.
///
/// # Arguments
///
/// * `param` - differentiable variable to initialize.
///
/// * `groups` - number of groups.
///
/// # Panics
///
/// If the differentiable variable is not {3, 4, 5}-dimensional and the number of output
/// channels is not divisible by `groups`. The number of output channels is equal to the length
/// of the first axis of `param`'s data.
pub fn dirac<D: Dimension>(param: &Learnable<D>, groups: usize) {
    let mut data = param.data_mut();
    let shape = data.shape().to_vec();
    let no_dim = shape.len();

    if !(3..=5).contains(&no_dim) {
        panic!("error: only 3, 4 and 5 dimensional parameters are supported.");
    }
    assert_eq!(
        shape[0].rem_euclid(groups),
        0,
        "error: output channels must be divisible by groups."
    );
    let out_channels_per_groups = shape[0] / groups;
    let min_dim = out_channels_per_groups.min(shape[1]);

    for g in 0..groups {
        for d in 0..min_dim {
            let mut index = D::zeros(no_dim);
            index[0] = g * out_channels_per_groups + d;
            index[1] = d;
            index
                .slice_mut()
                .iter_mut()
                .skip(2)
                .zip(shape.iter().skip(2))
                .for_each(|(el, sh)| *el = sh / 2);
            data[index] = 1.
        }
    }
}

/// Fills the differentiable leaf variable with elements drawn from the uniform distribution
/// *U(low, high)*.
///
/// # Arguments
///
/// * `param` - differentiable variable to initialize.
///
/// * `low` - lower bound of the uniform distribution.
///
/// * `high` - upper bound of the uniform distribution.
///
/// # Panics
///
/// If `low` >= `high`.
pub fn uniform<D: Dimension>(param: &Learnable<D>, low: f32, high: f32) {
    let unif_dstr = Uniform::new(low, high);
    let mut t_rng = thread_rng();
    param
        .data_mut()
        .map_inplace(|el| *el = unif_dstr.sample(&mut t_rng));
}

/// Fills the differentiable leaf variable with elements drawn from the normal distribution
/// *N(mean, std^2)*.
///
/// # Arguments
///
/// * `param` - differentiable variable to initialize.
///
/// * `mean` - mean of the normal distribution.
///
/// * `std` - standard deviation of the normal distribution.
pub fn normal<D: Dimension>(param: &Learnable<D>, mean: f32, std: f32) {
    let norm_dstr = Normal::new(mean, std).unwrap();
    let mut t_rng = thread_rng();
    param
        .data_mut()
        .map_inplace(|el| *el = norm_dstr.sample(&mut t_rng));
}

/// Fills the differentiable leaf variable with values according to the method described in
/// [Understanding the difficulty of training deep feedforward
/// neural networks](http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf) - Glorot, X. &
/// Bengio, Y. (2010), using a uniform distribution.
///
/// # Arguments
///
/// * `param` - differentiable variable to initialize.
///
/// * `gain` - optional scaling factor. See also [`calculate_gain`](function@calculate_gain).
pub fn xavier_uniform<D: Dimension>(param: &Learnable<D>, gain: f32) {
    let (fan_in, fan_out) = calculate_fan_in_fan_out(param);
    let std = gain * (2. / ((fan_in + fan_out) as f32)).sqrt();
    let a = 3.0_f32.sqrt() * std;
    let unif_distr = Uniform::new(-a, a);
    let mut t_rng = thread_rng();
    param
        .data_mut()
        .map_inplace(|el| *el = unif_distr.sample(&mut t_rng));
}

/// Fills the differentiable leaf variable with values according to the method described in
/// [Understanding the difficulty of training deep feedforward
/// neural networks](http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf) - Glorot, X. &
/// Bengio, Y. (2010), using a normal distribution.
///
/// Also known as **Glorot initialization**.
///
/// # Arguments
///
/// * `param` - differentiable variable to initialize.
///
/// * `gain` - optional scaling factor. See also [`calculate_gain`](function@calculate_gain).
pub fn xavier_normal<D: Dimension>(param: &Learnable<D>, gain: f32) {
    let (fan_in, fan_out) = calculate_fan_in_fan_out(param);
    let std = gain * (2. / ((fan_in + fan_out) as f32)).sqrt();
    let norm_distr = Normal::new(0., std).unwrap();
    let mut t_rng = thread_rng();
    param
        .data_mut()
        .map_inplace(|el| *el = norm_distr.sample(&mut t_rng));
}