nevermind_neu/layers/
softmax_loss_layer.rs

1use std::{
2    ops::{Deref, DerefMut},
3    sync::{
4        atomic::{AtomicU32, Ordering},
5    },
6};
7
8use std::collections::HashMap;
9use std::f32::consts::E;
10
11use ndarray::{Array1, Axis, Zip, indices};
12
13use log::{debug, info, warn};
14
15use crate::cpu_params::*;
16use crate::layers::*;
17use crate::util::*;
18
19#[derive(Default, Clone)]
20pub struct SoftmaxLossLayer {
21    pub size: usize,
22    pub lr_params: CpuParams,
23    metrics: HashMap<String, f64>,
24}
25
26impl AbstractLayer for SoftmaxLossLayer {
27    fn forward(&mut self, input: ParamsBlob) -> LayerForwardResult {
28        let inp_m = input[0].get_2d_buf_t(TypeBuffer::Output);
29        let inp_m = inp_m.borrow();
30        let inp_m = inp_m.deref();
31
32        let out_m = self.lr_params.get_2d_buf_t(TypeBuffer::Output);
33        let mut out_m = out_m.borrow_mut();
34        let out_m = out_m.deref_mut();
35
36        let ws = self.lr_params.get_2d_buf_t(TypeBuffer::Weights);
37        let ws = ws.borrow();
38        let ws = ws.deref();
39
40        Zip::from(inp_m.rows())
41            .and(out_m.rows_mut())
42            .par_for_each(|inp_b, out_b| {
43                // for each batch
44                let mut mul_res = ws.dot(&inp_b);
45
46                // let mut e_rows = mul_res.map_axis(Axis(1), |row| row.sum());
47                let e_rows_max = array_helpers::max(&mul_res);
48                mul_res = mul_res - e_rows_max;
49                mul_res = mul_res.mapv_into(|v| E.powf(v));
50                let sum_rows = mul_res.sum();
51
52                Zip::from(out_b).and(&mul_res).for_each(|out_el, in_e| {
53                    // for each "neuron"
54                    *out_el = in_e / sum_rows;
55                });
56            });
57
58        debug!("[ok] SoftmaxLossLayer forward()");
59
60        Ok(vec![self.lr_params.clone()])
61    }
62
63    fn backward_output(
64        &mut self,
65        prev_input: ParamsBlob,
66        expected_vec: Array2D,
67    ) -> LayerBackwardResult {
68        let prev_input = &prev_input[0].get_2d_buf_t(TypeBuffer::Output);
69        let prev_input = prev_input.borrow();
70        let prev_input = prev_input.deref();
71
72        let self_neu_grad = self.lr_params.get_2d_buf_t(TypeBuffer::NeuGrad);
73        let mut self_neu_grad = self_neu_grad.borrow_mut();
74        let self_neu_grad = self_neu_grad.deref_mut();
75
76        let self_output = self.lr_params.get_2d_buf_t(TypeBuffer::Output);
77        let mut self_output = self_output.borrow_mut();
78        let self_output = self_output.deref_mut();
79
80        let match_cnt = AtomicU32::new(0);
81        let batch_len = self_output.len_of(Axis(0)) as f64;
82
83        Zip::from(self_neu_grad.rows_mut())
84            .and(self_output.rows())
85            .and(expected_vec.rows())
86            .par_for_each(|err_val_b, out_b, expected_b| {
87                // for each batch
88                let (mut out_idx, mut out_max_val) = (-1, 0.0);
89                let mut idx = 0;
90                let mut b_expected_idx = -1;
91
92                Zip::from(err_val_b).and(out_b).and(expected_b).for_each(
93                    |err_val, output, expected| {
94                        if *output > out_max_val {
95                            out_max_val = *output;
96                            out_idx = idx;
97                        }
98
99                        if *expected == 1.0 {
100                            b_expected_idx = idx;
101                            *err_val = 1.0 - *output;
102                        } else {
103                            *err_val = (-1.0) * *output;
104                        }
105
106                        idx += 1;
107                    },
108                );
109
110                if b_expected_idx == out_idx {
111                    match_cnt.fetch_add(1, Ordering::Relaxed);
112                }
113            });
114
115        let accuracy = match_cnt.load(Ordering::SeqCst) as f64 / batch_len;
116        self.metrics.insert("accuracy".to_string(), accuracy);
117
118        let ws_grad = self.lr_params.get_2d_buf_t(TypeBuffer::WeightsGrad);
119        let mut ws_grad = ws_grad.borrow_mut();
120        let ws_grad = ws_grad.deref_mut();
121
122        // calc grad for weights
123        let ws_idxs = indices(ws_grad.dim());
124        Zip::from(ws_grad)
125            .and(ws_idxs)
126            .for_each(|val_ws_grad, ws_idx| {
127                let self_neu_idx = ws_idx.0;
128                let prev_neu_idx = ws_idx.1;
129
130                let mut avg = 0.0;
131
132                Zip::from(prev_input.column(prev_neu_idx))
133                    .and(self_neu_grad.column(self_neu_idx))
134                    .for_each(|prev_val, err_val| {
135                        avg += prev_val * err_val;
136                    });
137
138                avg = avg / prev_input.column(prev_neu_idx).len() as f32;
139
140                *val_ws_grad = avg;
141            });
142
143        debug!("[ok] SoftmaxLossLayer backward()");
144
145        Ok(vec![self.lr_params.clone()])
146    }
147
148    fn layer_type(&self) -> &str {
149        "SoftmaxLossLayer"
150    }
151
152    fn cpu_params(&self) -> Option<CpuParams> {
153        Some(self.lr_params.clone())
154    }
155
156    fn set_cpu_params(&mut self, lp: CpuParams) {
157        self.lr_params = lp;
158    }
159
160    fn size(&self) -> usize {
161        self.size
162    }
163
164    fn metrics(&self) -> Option<&HashMap<String, f64>> {
165        Some(&self.metrics)
166    }
167
168    fn trainable_bufs(&self) -> TrainableBufsIds {
169        (
170            &[TypeBuffer::Weights as i32],
171            &[TypeBuffer::WeightsGrad as i32],
172        )
173    }
174
175    fn serializable_bufs(&self) -> &[i32] {
176        &[TypeBuffer::Weights as i32]
177    }
178
179    fn copy_layer(&self) -> Box<dyn AbstractLayer> {
180        let mut copy_l = SoftmaxLossLayer::new(self.size);
181        copy_l.set_cpu_params(self.lr_params.copy());
182        Box::new(copy_l)
183    }
184
185    fn clone_layer(&self) -> Box<dyn AbstractLayer> {
186        Box::new(self.clone())
187    }
188
189    fn set_input_shape(&mut self, sh: &[usize]) {
190        self.lr_params = CpuParams::new(self.size, sh[0]);
191    }
192}
193
194impl SoftmaxLossLayer {
195    pub fn new(size: usize) -> Self {
196        Self {
197            size,
198            lr_params: CpuParams::empty(),
199            metrics: HashMap::new(),
200        }
201    }
202
203    pub fn new_box(size: usize) -> Box<Self> {
204        Box::new(SoftmaxLossLayer::new(size))
205    }
206}
207
208impl WithParams for SoftmaxLossLayer {
209    fn cfg(&self) -> HashMap<String, Variant> {
210        let mut cfg = HashMap::new();
211
212        cfg.insert("size".to_owned(), Variant::Int(self.size as i32));
213
214        cfg
215    }
216
217    fn set_cfg(&mut self, cfg: &HashMap<String, Variant>) {
218        let mut size = 0;
219
220        if let Some(var_size) = cfg.get("size") {
221            if let Variant::Int(var_size) = var_size {
222                size = *var_size as usize;
223            }
224        }
225
226        if size > 0 {
227            self.size = size;
228            self.lr_params = CpuParams::empty();
229        }
230    }
231}