nevermind_neu/layers/
softmax_loss_layer.rs1use std::{
2 ops::{Deref, DerefMut},
3 sync::{
4 atomic::{AtomicU32, Ordering},
5 },
6};
7
8use std::collections::HashMap;
9use std::f32::consts::E;
10
11use ndarray::{Array1, Axis, Zip, indices};
12
13use log::{debug, info, warn};
14
15use crate::cpu_params::*;
16use crate::layers::*;
17use crate::util::*;
18
19#[derive(Default, Clone)]
20pub struct SoftmaxLossLayer {
21 pub size: usize,
22 pub lr_params: CpuParams,
23 metrics: HashMap<String, f64>,
24}
25
26impl AbstractLayer for SoftmaxLossLayer {
27 fn forward(&mut self, input: ParamsBlob) -> LayerForwardResult {
28 let inp_m = input[0].get_2d_buf_t(TypeBuffer::Output);
29 let inp_m = inp_m.borrow();
30 let inp_m = inp_m.deref();
31
32 let out_m = self.lr_params.get_2d_buf_t(TypeBuffer::Output);
33 let mut out_m = out_m.borrow_mut();
34 let out_m = out_m.deref_mut();
35
36 let ws = self.lr_params.get_2d_buf_t(TypeBuffer::Weights);
37 let ws = ws.borrow();
38 let ws = ws.deref();
39
40 Zip::from(inp_m.rows())
41 .and(out_m.rows_mut())
42 .par_for_each(|inp_b, out_b| {
43 let mut mul_res = ws.dot(&inp_b);
45
46 let e_rows_max = array_helpers::max(&mul_res);
48 mul_res = mul_res - e_rows_max;
49 mul_res = mul_res.mapv_into(|v| E.powf(v));
50 let sum_rows = mul_res.sum();
51
52 Zip::from(out_b).and(&mul_res).for_each(|out_el, in_e| {
53 *out_el = in_e / sum_rows;
55 });
56 });
57
58 debug!("[ok] SoftmaxLossLayer forward()");
59
60 Ok(vec![self.lr_params.clone()])
61 }
62
63 fn backward_output(
64 &mut self,
65 prev_input: ParamsBlob,
66 expected_vec: Array2D,
67 ) -> LayerBackwardResult {
68 let prev_input = &prev_input[0].get_2d_buf_t(TypeBuffer::Output);
69 let prev_input = prev_input.borrow();
70 let prev_input = prev_input.deref();
71
72 let self_neu_grad = self.lr_params.get_2d_buf_t(TypeBuffer::NeuGrad);
73 let mut self_neu_grad = self_neu_grad.borrow_mut();
74 let self_neu_grad = self_neu_grad.deref_mut();
75
76 let self_output = self.lr_params.get_2d_buf_t(TypeBuffer::Output);
77 let mut self_output = self_output.borrow_mut();
78 let self_output = self_output.deref_mut();
79
80 let match_cnt = AtomicU32::new(0);
81 let batch_len = self_output.len_of(Axis(0)) as f64;
82
83 Zip::from(self_neu_grad.rows_mut())
84 .and(self_output.rows())
85 .and(expected_vec.rows())
86 .par_for_each(|err_val_b, out_b, expected_b| {
87 let (mut out_idx, mut out_max_val) = (-1, 0.0);
89 let mut idx = 0;
90 let mut b_expected_idx = -1;
91
92 Zip::from(err_val_b).and(out_b).and(expected_b).for_each(
93 |err_val, output, expected| {
94 if *output > out_max_val {
95 out_max_val = *output;
96 out_idx = idx;
97 }
98
99 if *expected == 1.0 {
100 b_expected_idx = idx;
101 *err_val = 1.0 - *output;
102 } else {
103 *err_val = (-1.0) * *output;
104 }
105
106 idx += 1;
107 },
108 );
109
110 if b_expected_idx == out_idx {
111 match_cnt.fetch_add(1, Ordering::Relaxed);
112 }
113 });
114
115 let accuracy = match_cnt.load(Ordering::SeqCst) as f64 / batch_len;
116 self.metrics.insert("accuracy".to_string(), accuracy);
117
118 let ws_grad = self.lr_params.get_2d_buf_t(TypeBuffer::WeightsGrad);
119 let mut ws_grad = ws_grad.borrow_mut();
120 let ws_grad = ws_grad.deref_mut();
121
122 let ws_idxs = indices(ws_grad.dim());
124 Zip::from(ws_grad)
125 .and(ws_idxs)
126 .for_each(|val_ws_grad, ws_idx| {
127 let self_neu_idx = ws_idx.0;
128 let prev_neu_idx = ws_idx.1;
129
130 let mut avg = 0.0;
131
132 Zip::from(prev_input.column(prev_neu_idx))
133 .and(self_neu_grad.column(self_neu_idx))
134 .for_each(|prev_val, err_val| {
135 avg += prev_val * err_val;
136 });
137
138 avg = avg / prev_input.column(prev_neu_idx).len() as f32;
139
140 *val_ws_grad = avg;
141 });
142
143 debug!("[ok] SoftmaxLossLayer backward()");
144
145 Ok(vec![self.lr_params.clone()])
146 }
147
148 fn layer_type(&self) -> &str {
149 "SoftmaxLossLayer"
150 }
151
152 fn cpu_params(&self) -> Option<CpuParams> {
153 Some(self.lr_params.clone())
154 }
155
156 fn set_cpu_params(&mut self, lp: CpuParams) {
157 self.lr_params = lp;
158 }
159
160 fn size(&self) -> usize {
161 self.size
162 }
163
164 fn metrics(&self) -> Option<&HashMap<String, f64>> {
165 Some(&self.metrics)
166 }
167
168 fn trainable_bufs(&self) -> TrainableBufsIds {
169 (
170 &[TypeBuffer::Weights as i32],
171 &[TypeBuffer::WeightsGrad as i32],
172 )
173 }
174
175 fn serializable_bufs(&self) -> &[i32] {
176 &[TypeBuffer::Weights as i32]
177 }
178
179 fn copy_layer(&self) -> Box<dyn AbstractLayer> {
180 let mut copy_l = SoftmaxLossLayer::new(self.size);
181 copy_l.set_cpu_params(self.lr_params.copy());
182 Box::new(copy_l)
183 }
184
185 fn clone_layer(&self) -> Box<dyn AbstractLayer> {
186 Box::new(self.clone())
187 }
188
189 fn set_input_shape(&mut self, sh: &[usize]) {
190 self.lr_params = CpuParams::new(self.size, sh[0]);
191 }
192}
193
194impl SoftmaxLossLayer {
195 pub fn new(size: usize) -> Self {
196 Self {
197 size,
198 lr_params: CpuParams::empty(),
199 metrics: HashMap::new(),
200 }
201 }
202
203 pub fn new_box(size: usize) -> Box<Self> {
204 Box::new(SoftmaxLossLayer::new(size))
205 }
206}
207
208impl WithParams for SoftmaxLossLayer {
209 fn cfg(&self) -> HashMap<String, Variant> {
210 let mut cfg = HashMap::new();
211
212 cfg.insert("size".to_owned(), Variant::Int(self.size as i32));
213
214 cfg
215 }
216
217 fn set_cfg(&mut self, cfg: &HashMap<String, Variant>) {
218 let mut size = 0;
219
220 if let Some(var_size) = cfg.get("size") {
221 if let Variant::Int(var_size) = var_size {
222 size = *var_size as usize;
223 }
224 }
225
226 if size > 0 {
227 self.size = size;
228 self.lr_params = CpuParams::empty();
229 }
230 }
231}