nevermind_neu/layers/
euclidean_loss_layer_ocl.rs

1use crate::layers::*;
2use crate::cpu_params::*;
3use crate::ocl::*;
4use crate::util::*;
5
6use log::{debug, warn};
7
8use ocl::MemFlags;
9use ocl::{Buffer, Context, Device, Kernel, Program, Queue};
10
11use std::{
12    collections::HashMap,
13    ops::{Deref, DerefMut},
14};
15
16static EUCLIDEAN_LOSS_KERNEL_FWD: &'static str = r#"
17    __kernel void euclidean_loss(
18                __private int const batch_size,
19                __private int const prev_shape,
20                __private int const self_shape,
21                __global const float *in,
22                __global const float *bias,
23                __global const float *ws,
24                __global float *out)
25    {
26        uint const idx = get_global_id(0);
27        __private uint const real_idx = idx % self_shape;
28        __private uint const batch_idx = idx / self_shape;
29
30        __private float sum = 0.0;
31
32        for (int j = 0; j < prev_shape; ++j) {
33            sum += ws[real_idx * prev_shape + j] * in[j + prev_shape * batch_idx];
34        }
35            
36        out[idx] = activation(sum + bias[real_idx]);
37    }
38"#;
39
40static EUCLIDEAN_LOSS_KERNEL_BWD: &'static str = r#"
41    __kernel void euclidean_loss_grad(
42                __private int const batch_size,
43                __private int const prev_shape,
44                __private int const self_shape,
45                __global const float *self_out,
46                __global const float *prev_out,
47                __global const float *labels,
48                __global float *neu_grad, // counter
49                __global float *ws_grad)
50    {
51        uint const idx = get_global_id(0);
52
53        for (int i = 0; i < batch_size; ++i) {
54            __private int inner_idx = i * self_shape + idx;
55            neu_grad[inner_idx] = (labels[inner_idx] - self_out[inner_idx]) * deriv(self_out[inner_idx]);
56        }
57            
58        for (int i = 0; i < prev_shape; ++i) {
59            __private float avg_grad = 0.0;
60
61            for (int j = 0; j < batch_size; ++j) {
62                avg_grad += neu_grad[j * self_shape + idx] * prev_out[j * prev_shape + i];
63            }
64
65            avg_grad = avg_grad / batch_size;
66
67            ws_grad[idx * prev_shape + i] = avg_grad;
68        }
69    }
70"#;
71
72pub struct EuclideanLossLayerOcl {
73    cpu_params: CpuParams,
74    ocl_params: OclParams,
75    size: usize,
76    batch_size: usize,
77    ocl_queue: Option<Queue>,
78    ocl_kernel: Option<Kernel>,
79    ocl_kernel_grad: Option<Kernel>,
80    ocl_act_func: OclActivationFunc,
81}
82
83impl EuclideanLossLayerOcl {
84    pub fn new(size: usize) -> Self {
85        Self {
86            cpu_params: CpuParams::empty(),
87            ocl_params: OclParams::empty(),
88            size,
89            batch_size: 1,
90            ocl_queue: None,
91            ocl_kernel: None,
92            ocl_kernel_grad: None,
93            ocl_act_func: OclActivationFunc::Raw,
94        }
95    }
96
97    pub fn new_with_activation(size: usize, act: OclActivationFunc) -> Self {
98        let mut out = EuclideanLossLayerOcl::new(size);
99        out.set_activation_function(act);
100        out
101    }
102
103    /// Must be set before init_ocl() was called
104    pub fn set_activation_function(&mut self, act: OclActivationFunc) {
105        if self.ocl_kernel.is_some() {
106            warn!("Setting ocl activation function, while kernel is already built");
107        }
108
109        self.ocl_act_func = act;
110    }
111}
112
113impl AbstractLayer for EuclideanLossLayerOcl {
114    fn layer_type(&self) -> &str {
115        "EuclideanLossLayerOcl"
116    }
117
118    fn size(&self) -> usize {
119        self.size
120    }
121
122    fn set_batch_size(&mut self, batch_size: usize) {
123        self.batch_size = batch_size;
124
125        self.ocl_kernel
126            .as_mut()
127            .unwrap()
128            .set_default_global_work_size(ocl::SpatialDims::One(self.size * self.batch_size));
129
130        self.ocl_kernel
131            .as_mut()
132            .unwrap()
133            .set_arg("batch_size", batch_size as i32)
134            .expect("[euc_ocl] Failed to set batch_size arg");
135        self.ocl_kernel_grad
136            .as_mut()
137            .unwrap()
138            .set_arg("batch_size", batch_size as i32)
139            .expect("[euc_ocl] Failed to set batch_size arg");
140
141        self.ocl_params
142            .fit_to_batch_size_ocl(
143                self.size,
144                batch_size,
145                self.ocl_queue.as_ref().unwrap().clone(),
146            )
147            .expect("[euc_ocl] Failed fit to batch size");
148
149        self.cpu_params.fit_to_batch_size(batch_size);
150    }
151
152    fn cpu_params(&self) -> Option<CpuParams> {
153        Some(self.cpu_params.clone())
154    }
155
156    fn set_cpu_params(&mut self, lp: CpuParams) {
157        self.cpu_params = lp;
158    }
159
160    fn trainable_bufs(&self) -> TrainableBufsIds {
161        (
162            &[TypeBuffer::Weights as i32, TypeBuffer::Bias as i32],
163            &[TypeBuffer::WeightsGrad as i32, TypeBuffer::NeuGrad as i32],
164        )
165    }
166
167    fn set_input_shape(&mut self, sh: &[usize]) {
168        let kern = self.ocl_kernel.as_mut().unwrap();
169        kern.set_arg("prev_shape", sh[0] as i32)
170            .expect("[euc_ocl] Failed to set prev_shape arg");
171
172        let kern_grad = self.ocl_kernel_grad.as_mut().unwrap();
173        kern_grad
174            .set_arg("prev_shape", sh[0] as i32)
175            .expect("[euc_ocl] Failed to set prev_shape arg");
176
177        let queue = self.ocl_queue.as_ref().unwrap();
178        // buffer routine
179        self.ocl_params =
180            init_ocl_params(queue.clone(), self.size, sh, true).expect("Buffer create failure");
181
182        self.cpu_params = CpuParams::new(self.size, sh[0]);
183    }
184
185    // Do copy layer memory(ws, output, ...)
186    fn copy_layer(&self) -> Box<dyn AbstractLayer> {
187        panic!("Do not copy OCL layers !");
188    }
189
190    // Do copy only Rc
191    fn clone_layer(&self) -> Box<dyn AbstractLayer> {
192        panic!("Do not copy OCL layers !");
193    }
194}
195
196impl AbstractLayerOcl for EuclideanLossLayerOcl {
197    fn init_ocl(
198        &mut self,
199        ocl_ctx: &Context,
200        device: Device,
201        queue: Queue,
202    ) -> Result<(), Box<dyn std::error::Error>> {
203        let fwd_act = match self.ocl_act_func {
204            OclActivationFunc::Sigmoid => OCL_ACTIVATION_SIGMOID,
205            OclActivationFunc::Tanh => OCL_ACTIVATION_TANH,
206            OclActivationFunc::ReLU => OCL_ACTIVATION_RELU,
207            OclActivationFunc::Raw => OCL_ACTIVATION_RAW,
208            OclActivationFunc::LeakyReLU => OCL_ACTIVATION_LEAKY_RELU,
209            _ => todo!(),
210        };
211
212        let bwd_act = match self.ocl_act_func {
213            OclActivationFunc::Sigmoid => OCL_ACTIVATION_SIGMOID_DERIV,
214            OclActivationFunc::Tanh => OCL_ACTIVATION_TANH_DERIV,
215            OclActivationFunc::ReLU => OCL_ACTIVATION_RELU_DERIV,
216            OclActivationFunc::LeakyReLU => OCL_ACTIVATION_LEAKY_RELU_DERIV,
217            OclActivationFunc::Raw => OCL_ACTIVATION_RAW_DERIV,
218            _ => todo!(),
219        };
220
221        let program_fwd = [fwd_act, EUCLIDEAN_LOSS_KERNEL_FWD].join("\n");
222        let program_bwd = [bwd_act, EUCLIDEAN_LOSS_KERNEL_BWD].join("\n");
223
224        let program = Program::builder()
225            .devices(device)
226            .src(program_fwd)
227            .build(&ocl_ctx)?;
228        let program_grad = Program::builder()
229            .devices(device)
230            .src(program_bwd)
231            .build(&ocl_ctx)?;
232
233        let kern_fwd = Kernel::builder()
234            .name("euclidean_loss")
235            .program(&program)
236            .queue(queue.clone())
237            .global_work_size(self.size * self.batch_size)
238            .arg_named("batch_size", 0 as i32)
239            .arg_named("prev_shape", 0 as i32)
240            .arg_named("self_shape", self.size as i32)
241            .arg_named("in", None::<&Buffer<f32>>)
242            .arg_named("bias", None::<&Buffer<f32>>)
243            .arg_named("ws", None::<&Buffer<f32>>)
244            .arg_named("out", None::<&Buffer<f32>>)
245            .build()?;
246
247        let kern_bwd = Kernel::builder()
248            .name("euclidean_loss_grad")
249            .program(&program_grad)
250            .queue(queue.clone())
251            .global_work_size(self.size)
252            .arg_named("batch_size", self.batch_size as i32)
253            .arg_named("prev_shape", 0 as i32)
254            .arg_named("self_shape", self.size as i32)
255            .arg_named("self_out", None::<&Buffer<f32>>)
256            .arg_named("prev_out", None::<&Buffer<f32>>)
257            .arg_named("labels", None::<&Buffer<f32>>)
258            .arg_named("neu_grad", None::<&Buffer<f32>>)
259            .arg_named("ws_grad", None::<&Buffer<f32>>)
260            .build()?;
261
262        self.ocl_queue = Some(queue);
263        self.ocl_kernel = Some(kern_fwd);
264        self.ocl_kernel_grad = Some(kern_bwd);
265
266        Ok(())
267    }
268
269    fn forward_ocl(&mut self, params: OclParamsBlob) -> LayerOclResult {
270        let prev_params = params.first().unwrap();
271        let prev_output = prev_params.get_buf_t(TypeBuffer::Output);
272        let prev_output = prev_output.0.borrow();
273
274        let self_ws = self.ocl_params.get_buf_t(TypeBuffer::Weights);
275        let self_ws = self_ws.0.borrow();
276
277        let self_output = self.ocl_params.get_buf_t(TypeBuffer::Output);
278        let self_output = self_output.0.borrow();
279
280        let self_bias = self.ocl_params.get_buf_t(TypeBuffer::Bias);
281        let self_bias = self_bias.0.borrow();
282
283        let self_kern = self.ocl_kernel.as_mut().unwrap();
284
285        self_kern
286            .set_arg("in", prev_output.deref())
287            .expect("[euc_ocl] Setting param IN failure");
288        self_kern
289            .set_arg("bias", self_bias.deref())
290            .expect("[euc_ocl] Failed to set BIAS param");
291        self_kern
292            .set_arg("ws", self_ws.deref())
293            .expect("[euc_ocl] Setting param WS failure");
294        self_kern
295            .set_arg("out", self_output.deref())
296            .expect("[euc_ocl] Setting param OUT failure");
297
298        unsafe {
299            self_kern
300                .enq()
301                .expect("[euc_ocl] Enqueue forward kernel failure");
302        }
303
304        debug!("[euc_ocl] forward");
305
306        Ok(vec![self.ocl_params.clone()])
307    }
308
309    fn backward_output_ocl(
310        &mut self,
311        prev_input: OclParamsBlob,
312        expected: Array2D,
313    ) -> LayerOclResult {
314        let ocl_queue = self.ocl_queue.as_ref().unwrap();
315
316        let lbl_buf = Buffer::builder()
317            .queue(ocl_queue.clone())
318            .flags(MemFlags::new().read_only())
319            .len(expected.len())
320            .copy_host_slice(expected.as_slice().unwrap())
321            .build()
322            .expect("[euc_ocl] Couldn't create label buffer");
323
324        let self_out = self.ocl_params.get_buf_t(TypeBuffer::Output);
325        let self_out = self_out.0.borrow();
326
327        let self_neu_grad = self.ocl_params.get_buf_t(TypeBuffer::NeuGrad);
328        let self_neu_grad = self_neu_grad.0.borrow_mut();
329
330        let self_ws_grad = self.ocl_params.get_buf_t(TypeBuffer::WeightsGrad);
331        let self_ws_grad = self_ws_grad.0.borrow_mut();
332
333        let prev_out = prev_input.first().unwrap().get_buf_t(TypeBuffer::Output);
334        let prev_out = prev_out.0.borrow();
335
336        let self_kern = self.ocl_kernel_grad.as_ref().unwrap();
337
338        self_kern
339            .set_arg("self_out", self_out.deref())
340            .expect("[euc_ocl] Setting param SELF_OUT failure");
341        self_kern
342            .set_arg("prev_out", prev_out.deref())
343            .expect("[euc_ocl] Setting param PREV_OUT failure");
344        self_kern
345            .set_arg("labels", &lbl_buf)
346            .expect("[euc_ocl] Setting param LABELS failure");
347        self_kern
348            .set_arg("neu_grad", self_neu_grad.deref())
349            .expect("[euc_ocl] Setting param NEU_GRAD failure");
350        self_kern
351            .set_arg("ws_grad", self_ws_grad.deref())
352            .expect("[euc_ocl] Setting param WS_GRAD failure");
353
354        unsafe {
355            self_kern
356                .enq()
357                .expect("[euc_ocl] Enqueue backward kernel failure");
358        }
359
360        Ok(vec![self.ocl_params.clone()])
361    }
362
363    fn ocl_params(&self) -> Option<OclParams> {
364        Some(self.ocl_params.clone())
365    }
366
367    fn set_ocl_params(&mut self, params: OclParams) {
368        self.ocl_params = params;
369    }
370
371    fn copy_layer_ocl(&self) -> Box<dyn AbstractLayerOcl> {
372        todo!()
373    }
374
375    fn clone_layer_ocl(&self) -> Box<dyn AbstractLayerOcl> {
376        Box::new(self.clone())
377    }
378}
379
380impl Default for EuclideanLossLayerOcl {
381    fn default() -> Self {
382        Self {
383            cpu_params: CpuParams::empty(),
384            ocl_params: OclParams::empty(),
385            size: 0,
386            batch_size: 1,
387            ocl_queue: None,
388            ocl_kernel: None,
389            ocl_kernel_grad: None,
390            ocl_act_func: OclActivationFunc::Raw,
391        }
392    }
393}
394
395impl Clone for EuclideanLossLayerOcl {
396    fn clone(&self) -> Self {
397        let queue = self.ocl_queue.as_ref().unwrap();
398
399        Self {
400            cpu_params: self.cpu_params.clone(),
401            ocl_params: self.ocl_params.clone(),
402            size: self.size,
403            batch_size: self.batch_size,
404            ocl_kernel: None,
405            ocl_kernel_grad: None,
406            ocl_act_func: self.ocl_act_func.clone(),
407            ocl_queue: Some(queue.clone()),
408        }
409    }
410
411    fn clone_from(&mut self, _source: &Self) {
412        todo!()
413    }
414}
415
416impl WithParams for EuclideanLossLayerOcl {
417    fn cfg(&self) -> HashMap<String, Variant> {
418        let mut out = HashMap::new();
419
420        out.insert("size".to_string(), Variant::Int(self.size as i32));
421        out.insert(
422            "activation".to_string(),
423            Variant::String(self.ocl_act_func.to_string()),
424        );
425
426        out
427    }
428
429    fn set_cfg(&mut self, args: &HashMap<String, Variant>) {
430        if let Some(size) = args.get("size") {
431            if let Variant::Int(size) = size {
432                self.size = *size as usize;
433            }
434        }
435
436        if let Some(act_f) = args.get("activation") {
437            if let Variant::String(act_f) = act_f {
438                let cvt_res = OclActivationFunc::try_from(act_f.as_str());
439                if let Ok(cvt) = cvt_res {
440                    self.ocl_act_func = cvt;
441                }
442            }
443        }
444    }
445}