nevermind_neu/layers/
fc_layer.rs

1use std::collections::HashMap;
2
3use ndarray::{indices, Zip};
4
5use log::debug;
6
7use rand::{thread_rng, Rng, ThreadRng};
8
9use std::ops::{Deref, DerefMut};
10
11use super::abstract_layer::{AbstractLayer, LayerBackwardResult, LayerForwardResult};
12use crate::cpu_params::*;
13use crate::util::*;
14
15use crate::util::{Variant, WithParams};
16
17// Fully-connected layer
18#[derive(Clone)]
19pub struct FcLayer<T: Fn(f32) -> f32 + Clone, TD: Fn(f32) -> f32 + Clone> {
20    pub lr_params: CpuParams,
21    size: usize,
22    dropout: f32,
23    l2_regul: f32,
24    l1_regul: f32,
25    pub activation: Activation<T, TD>,
26    rng: ThreadRng,
27}
28
29impl<T, TD> AbstractLayer for FcLayer<T, TD>
30where
31    T: Fn(f32) -> f32 + Sync + Clone + 'static,
32    TD: Fn(f32) -> f32 + Sync + Clone + 'static,
33{
34    fn forward(&mut self, input: ParamsBlob) -> LayerForwardResult {
35        let inp_m = input[0].get_2d_buf_t(TypeBuffer::Output);
36        let inp_m = inp_m.borrow();
37        let inp_m = inp_m.deref();
38
39        let out_m = self.lr_params.get_2d_buf_t(TypeBuffer::Output);
40        let mut out_m = out_m.borrow_mut();
41        let out_m = out_m.deref_mut();
42
43        let ws = self.lr_params.get_2d_buf_t(TypeBuffer::Weights);
44        let ws = ws.borrow();
45        let ws = ws.deref();
46
47        let bias_out = self.lr_params.get_1d_buf_t(TypeBuffer::Bias);
48        let bias_out = bias_out.borrow();
49        let bias_out = bias_out.deref();
50
51        let dropout_len = (self.size as f32 * self.dropout) as usize;
52        let dropout_n = self.rng.gen_range(0, self.size - dropout_len as usize);
53        let dropout_y = dropout_n + dropout_len;
54
55        // for each input batch
56        Zip::from(inp_m.rows())
57            .and(out_m.rows_mut())
58            .par_for_each(|inp_b, out_b| {
59                // for each batch
60                let mul_res = ws.dot(&inp_b);
61
62                let mut counter_neu = 0;
63
64                // for each neuron
65                Zip::from(out_b)
66                    .and(&mul_res)
67                    .and(bias_out)
68                    .for_each(|out_el, in_row, bias_el| {
69                        // for each "neuron"
70                        if counter_neu >= dropout_n && counter_neu < dropout_y {
71                            // zero neuron
72                            *out_el = 0.0;
73                            counter_neu += 1;
74                        } else {
75                            *out_el = (self.activation.func)(in_row + bias_el);
76                            counter_neu += 1;
77                        }
78                    });
79            });
80
81        debug!("[ok] HiddenLayer forward()");
82
83        Ok(vec![self.lr_params.clone()])
84    }
85
86    fn backward(&mut self, prev_input: ParamsBlob, next_input: ParamsBlob) -> LayerBackwardResult {
87        let next_err_vals = next_input[0].get_2d_buf_t(TypeBuffer::NeuGrad);
88        let next_err_vals = next_err_vals.borrow();
89        let next_err_vals = next_err_vals.deref();
90
91        let next_ws = next_input[0].get_2d_buf_t(TypeBuffer::Weights);
92        let next_ws = next_ws.borrow();
93        let next_ws = next_ws.deref();
94
95        let self_err_vals = self.lr_params.get_2d_buf_t(TypeBuffer::NeuGrad);
96        let mut self_err_vals = self_err_vals.borrow_mut();
97        let self_err_vals = self_err_vals.deref_mut();
98
99        let self_output = self.lr_params.get_2d_buf_t(TypeBuffer::Output);
100        let self_output = self_output.borrow();
101        let self_output = self_output.deref();
102
103        let self_bias_grad = self.lr_params.get_1d_buf_t(TypeBuffer::BiasGrad);
104        let mut self_bias_grad = self_bias_grad.borrow_mut();
105        let self_bias_grad = self_bias_grad.deref_mut();
106
107        let self_bias = self.lr_params.get_1d_buf_t(TypeBuffer::Bias);
108        let mut self_bias = self_bias.borrow_mut();
109        let self_bias = self_bias.deref_mut();
110
111        Zip::from(self_err_vals.rows_mut())
112            .and(next_err_vals.rows())
113            .and(self_output.rows())
114            .par_for_each(|err_val_r, next_err_val_r, output_r| {
115                let mul_res = next_ws.t().dot(&next_err_val_r);
116
117                Zip::from(err_val_r).and(output_r).and(&mul_res).for_each(
118                    |err_val, output, col| {
119                        *err_val = (self.activation.func_deriv)(*output) * col;
120                    },
121                );
122            });
123
124        debug!("[hidden layer] i am here 2");
125
126        // calc per-weight gradient
127        // for prev_layer :
128        let prev_input = prev_input[0].get_2d_buf_t(TypeBuffer::Output);
129        let prev_input = prev_input.borrow();
130        let prev_input = prev_input.deref();
131
132        let ws = self.lr_params.get_2d_buf_t(TypeBuffer::Weights);
133        let ws = ws.borrow();
134        let ws = ws.deref();
135
136        let ws_grad = self.lr_params.get_2d_buf_t(TypeBuffer::WeightsGrad);
137        let mut ws_grad = ws_grad.borrow_mut();
138        let ws_grad = ws_grad.deref_mut();
139
140        // calc grad for weights
141        let ws_idxs = indices(ws_grad.dim());
142        Zip::from(ws_grad)
143            .and(ws)
144            .and(ws_idxs)
145            .par_for_each(|val_ws_grad, val_ws, ws_idx| {
146                let self_neu_idx = ws_idx.0;
147                let prev_neu_idx = ws_idx.1;
148
149                let mut avg = 0.0;
150
151                Zip::from(prev_input.column(prev_neu_idx))
152                    .and(self_err_vals.column(self_neu_idx))
153                    .for_each(|prev_val, err_val| {
154                        avg += prev_val * err_val;
155                    });
156
157                avg = avg / prev_input.column(prev_neu_idx).len() as f32;
158
159                let mut l2_penalty = 0.0;
160                if self.l2_regul != 0.0 {
161                    l2_penalty = self.l2_regul * val_ws;
162                }
163
164                let mut l1_penalty = 0.0;
165                if self.l1_regul == 0.0 {
166                    l1_penalty = self.l1_regul * sign(*val_ws);
167                }
168
169                *val_ws_grad = avg - l2_penalty - l1_penalty;
170            });
171
172        // calc grad for bias
173        Zip::from(self_err_vals.columns())
174            .and(self_bias_grad)
175            .and(self_bias)
176            .for_each(|err_vals, bias_grad, bias| {
177                let grad = err_vals.mean().unwrap();
178
179                let mut l2_penalty = 0.0;
180                if self.l2_regul != 0.0 {
181                    l2_penalty = self.l2_regul * *bias;
182                }
183
184                let mut l1_penalty = 0.0;
185                if self.l1_regul == 0.0 {
186                    l1_penalty = self.l1_regul * sign(*bias);
187                }
188
189                *bias_grad = grad - l2_penalty - l1_penalty;
190            });
191
192        debug!("[ok] HiddenLayer backward()");
193
194        Ok(vec![self.lr_params.clone()])
195    }
196
197    fn cpu_params(&self) -> Option<CpuParams> {
198        Some(self.lr_params.clone())
199    }
200
201    fn set_cpu_params(&mut self, lp: CpuParams) {
202        self.lr_params = lp;
203    }
204
205    fn layer_type(&self) -> &str {
206        "FcLayer"
207    }
208
209    /// Carefull this method overwrites weights and all other params
210    fn set_input_shape(&mut self, sh: &[usize]) {
211        self.lr_params = CpuParams::new_with_bias(self.size, sh[0]);
212    }
213
214    fn size(&self) -> usize {
215        self.size
216    }
217
218    fn copy_layer(&self) -> Box<dyn AbstractLayer> {
219        let mut copy_l = Box::new(FcLayer::new(self.size, self.activation.clone()));
220        copy_l.set_cpu_params(self.lr_params.copy());
221        copy_l
222    }
223
224    fn clone_layer(&self) -> Box<dyn AbstractLayer> {
225        Box::new(self.clone())
226    }
227}
228
229impl<T, TD> FcLayer<T, TD>
230where
231    T: Fn(f32) -> f32 + Clone,
232    TD: Fn(f32) -> f32 + Clone,
233{
234    pub fn new(size: usize, activation: Activation<T, TD>) -> Self {
235        Self {
236            size,
237            dropout: 0.0,
238            lr_params: CpuParams::empty(),
239            activation,
240            l2_regul: 0.0,
241            l1_regul: 0.0,
242            rng: thread_rng(),
243        }
244    }
245
246    pub fn new_box(size: usize, activation: Activation<T, TD>) -> Box<Self> {
247        Box::new(FcLayer::new(size, activation))
248    }
249
250    pub fn dropout(mut self, val: f32) -> Self {
251        self.dropout = val;
252        self
253    }
254
255    pub fn set_dropout(&mut self, val: f32) {
256        self.dropout = val;
257    }
258
259    pub fn l2_regularization(mut self, coef: f32) -> Self {
260        self.l2_regul = coef;
261        self
262    }
263
264    pub fn set_l2_regularization(&mut self, coef: f32) {
265        self.l2_regul = coef;
266    }
267
268    pub fn l1_regularization(mut self, coef: f32) -> Self {
269        self.l1_regul = coef;
270        self
271    }
272
273    pub fn set_l1_regularization(&mut self, coef: f32) {
274        self.l1_regul = coef;
275    }
276}
277
278impl<T, TD> WithParams for FcLayer<T, TD>
279where
280    T: Fn(f32) -> f32 + Sync + Clone + 'static,
281    TD: Fn(f32) -> f32 + Sync + Clone + 'static,
282{
283    fn cfg(&self) -> HashMap<String, Variant> {
284        let mut cfg: HashMap<String, Variant> = HashMap::new();
285
286        cfg.insert("size".to_owned(), Variant::Int(self.size as i32));
287        // cfg.insert("prev_size".to_owned(), Variant::Int(self.prev_size as i32));
288        cfg.insert(
289            "activation".to_owned(),
290            Variant::String(self.activation.name.clone()),
291        );
292        cfg.insert("l2_regul".to_owned(), Variant::Float(self.l2_regul));
293        cfg.insert("l1_regul".to_owned(), Variant::Float(self.l1_regul));
294        cfg.insert("dropout".to_owned(), Variant::Float(self.dropout));
295
296        cfg
297    }
298
299    fn set_cfg(&mut self, cfg: &HashMap<String, Variant>) {
300        let mut size: usize = 0;
301
302        if let Some(var_size) = cfg.get("size") {
303            if let Variant::Int(var_size) = var_size {
304                size = *var_size as usize;
305            }
306        }
307
308        if size > 0 {
309            self.size = size;
310            self.lr_params = CpuParams::empty();
311        }
312
313        if let Some(dropout) = cfg.get("dropout") {
314            if let Variant::Float(dropout) = dropout {
315                self.dropout = *dropout;
316            }
317        }
318
319        if let Some(l1_regul) = cfg.get("l1_regul") {
320            if let Variant::Float(l1_regul) = l1_regul {
321                self.l1_regul = *l1_regul;
322            }
323        }
324
325        if let Some(l2_regul) = cfg.get("l2_regul") {
326            if let Variant::Float(l2_regul) = l2_regul {
327                self.l2_regul = *l2_regul;
328            }
329        }
330    }
331}