1use crate::layers::*;
2use crate::cpu_params::*;
3use crate::ocl::*;
4use crate::util::*;
5
6use log::{debug, warn};
7
8use ocl::MemFlags;
9use ocl::{Buffer, Context, Device, Kernel, Program, Queue};
10
11use std::{
12 collections::HashMap,
13 ops::{Deref, DerefMut},
14};
15
16static EUCLIDEAN_LOSS_KERNEL_FWD: &'static str = r#"
17 __kernel void euclidean_loss(
18 __private int const batch_size,
19 __private int const prev_shape,
20 __private int const self_shape,
21 __global const float *in,
22 __global const float *bias,
23 __global const float *ws,
24 __global float *out)
25 {
26 uint const idx = get_global_id(0);
27 __private uint const real_idx = idx % self_shape;
28 __private uint const batch_idx = idx / self_shape;
29
30 __private float sum = 0.0;
31
32 for (int j = 0; j < prev_shape; ++j) {
33 sum += ws[real_idx * prev_shape + j] * in[j + prev_shape * batch_idx];
34 }
35
36 out[idx] = activation(sum + bias[real_idx]);
37 }
38"#;
39
40static EUCLIDEAN_LOSS_KERNEL_BWD: &'static str = r#"
41 __kernel void euclidean_loss_grad(
42 __private int const batch_size,
43 __private int const prev_shape,
44 __private int const self_shape,
45 __global const float *self_out,
46 __global const float *prev_out,
47 __global const float *labels,
48 __global float *neu_grad, // counter
49 __global float *ws_grad)
50 {
51 uint const idx = get_global_id(0);
52
53 for (int i = 0; i < batch_size; ++i) {
54 __private int inner_idx = i * self_shape + idx;
55 neu_grad[inner_idx] = (labels[inner_idx] - self_out[inner_idx]) * deriv(self_out[inner_idx]);
56 }
57
58 for (int i = 0; i < prev_shape; ++i) {
59 __private float avg_grad = 0.0;
60
61 for (int j = 0; j < batch_size; ++j) {
62 avg_grad += neu_grad[j * self_shape + idx] * prev_out[j * prev_shape + i];
63 }
64
65 avg_grad = avg_grad / batch_size;
66
67 ws_grad[idx * prev_shape + i] = avg_grad;
68 }
69 }
70"#;
71
72pub struct EuclideanLossLayerOcl {
73 cpu_params: CpuParams,
74 ocl_params: OclParams,
75 size: usize,
76 batch_size: usize,
77 ocl_queue: Option<Queue>,
78 ocl_kernel: Option<Kernel>,
79 ocl_kernel_grad: Option<Kernel>,
80 ocl_act_func: OclActivationFunc,
81}
82
83impl EuclideanLossLayerOcl {
84 pub fn new(size: usize) -> Self {
85 Self {
86 cpu_params: CpuParams::empty(),
87 ocl_params: OclParams::empty(),
88 size,
89 batch_size: 1,
90 ocl_queue: None,
91 ocl_kernel: None,
92 ocl_kernel_grad: None,
93 ocl_act_func: OclActivationFunc::Raw,
94 }
95 }
96
97 pub fn new_with_activation(size: usize, act: OclActivationFunc) -> Self {
98 let mut out = EuclideanLossLayerOcl::new(size);
99 out.set_activation_function(act);
100 out
101 }
102
103 pub fn set_activation_function(&mut self, act: OclActivationFunc) {
105 if self.ocl_kernel.is_some() {
106 warn!("Setting ocl activation function, while kernel is already built");
107 }
108
109 self.ocl_act_func = act;
110 }
111}
112
113impl AbstractLayer for EuclideanLossLayerOcl {
114 fn layer_type(&self) -> &str {
115 "EuclideanLossLayerOcl"
116 }
117
118 fn size(&self) -> usize {
119 self.size
120 }
121
122 fn set_batch_size(&mut self, batch_size: usize) {
123 self.batch_size = batch_size;
124
125 self.ocl_kernel
126 .as_mut()
127 .unwrap()
128 .set_default_global_work_size(ocl::SpatialDims::One(self.size * self.batch_size));
129
130 self.ocl_kernel
131 .as_mut()
132 .unwrap()
133 .set_arg("batch_size", batch_size as i32)
134 .expect("[euc_ocl] Failed to set batch_size arg");
135 self.ocl_kernel_grad
136 .as_mut()
137 .unwrap()
138 .set_arg("batch_size", batch_size as i32)
139 .expect("[euc_ocl] Failed to set batch_size arg");
140
141 self.ocl_params
142 .fit_to_batch_size_ocl(
143 self.size,
144 batch_size,
145 self.ocl_queue.as_ref().unwrap().clone(),
146 )
147 .expect("[euc_ocl] Failed fit to batch size");
148
149 self.cpu_params.fit_to_batch_size(batch_size);
150 }
151
152 fn cpu_params(&self) -> Option<CpuParams> {
153 Some(self.cpu_params.clone())
154 }
155
156 fn set_cpu_params(&mut self, lp: CpuParams) {
157 self.cpu_params = lp;
158 }
159
160 fn trainable_bufs(&self) -> TrainableBufsIds {
161 (
162 &[TypeBuffer::Weights as i32, TypeBuffer::Bias as i32],
163 &[TypeBuffer::WeightsGrad as i32, TypeBuffer::NeuGrad as i32],
164 )
165 }
166
167 fn set_input_shape(&mut self, sh: &[usize]) {
168 let kern = self.ocl_kernel.as_mut().unwrap();
169 kern.set_arg("prev_shape", sh[0] as i32)
170 .expect("[euc_ocl] Failed to set prev_shape arg");
171
172 let kern_grad = self.ocl_kernel_grad.as_mut().unwrap();
173 kern_grad
174 .set_arg("prev_shape", sh[0] as i32)
175 .expect("[euc_ocl] Failed to set prev_shape arg");
176
177 let queue = self.ocl_queue.as_ref().unwrap();
178 self.ocl_params =
180 init_ocl_params(queue.clone(), self.size, sh, true).expect("Buffer create failure");
181
182 self.cpu_params = CpuParams::new(self.size, sh[0]);
183 }
184
185 fn copy_layer(&self) -> Box<dyn AbstractLayer> {
187 panic!("Do not copy OCL layers !");
188 }
189
190 fn clone_layer(&self) -> Box<dyn AbstractLayer> {
192 panic!("Do not copy OCL layers !");
193 }
194}
195
196impl AbstractLayerOcl for EuclideanLossLayerOcl {
197 fn init_ocl(
198 &mut self,
199 ocl_ctx: &Context,
200 device: Device,
201 queue: Queue,
202 ) -> Result<(), Box<dyn std::error::Error>> {
203 let fwd_act = match self.ocl_act_func {
204 OclActivationFunc::Sigmoid => OCL_ACTIVATION_SIGMOID,
205 OclActivationFunc::Tanh => OCL_ACTIVATION_TANH,
206 OclActivationFunc::ReLU => OCL_ACTIVATION_RELU,
207 OclActivationFunc::Raw => OCL_ACTIVATION_RAW,
208 OclActivationFunc::LeakyReLU => OCL_ACTIVATION_LEAKY_RELU,
209 _ => todo!(),
210 };
211
212 let bwd_act = match self.ocl_act_func {
213 OclActivationFunc::Sigmoid => OCL_ACTIVATION_SIGMOID_DERIV,
214 OclActivationFunc::Tanh => OCL_ACTIVATION_TANH_DERIV,
215 OclActivationFunc::ReLU => OCL_ACTIVATION_RELU_DERIV,
216 OclActivationFunc::LeakyReLU => OCL_ACTIVATION_LEAKY_RELU_DERIV,
217 OclActivationFunc::Raw => OCL_ACTIVATION_RAW_DERIV,
218 _ => todo!(),
219 };
220
221 let program_fwd = [fwd_act, EUCLIDEAN_LOSS_KERNEL_FWD].join("\n");
222 let program_bwd = [bwd_act, EUCLIDEAN_LOSS_KERNEL_BWD].join("\n");
223
224 let program = Program::builder()
225 .devices(device)
226 .src(program_fwd)
227 .build(&ocl_ctx)?;
228 let program_grad = Program::builder()
229 .devices(device)
230 .src(program_bwd)
231 .build(&ocl_ctx)?;
232
233 let kern_fwd = Kernel::builder()
234 .name("euclidean_loss")
235 .program(&program)
236 .queue(queue.clone())
237 .global_work_size(self.size * self.batch_size)
238 .arg_named("batch_size", 0 as i32)
239 .arg_named("prev_shape", 0 as i32)
240 .arg_named("self_shape", self.size as i32)
241 .arg_named("in", None::<&Buffer<f32>>)
242 .arg_named("bias", None::<&Buffer<f32>>)
243 .arg_named("ws", None::<&Buffer<f32>>)
244 .arg_named("out", None::<&Buffer<f32>>)
245 .build()?;
246
247 let kern_bwd = Kernel::builder()
248 .name("euclidean_loss_grad")
249 .program(&program_grad)
250 .queue(queue.clone())
251 .global_work_size(self.size)
252 .arg_named("batch_size", self.batch_size as i32)
253 .arg_named("prev_shape", 0 as i32)
254 .arg_named("self_shape", self.size as i32)
255 .arg_named("self_out", None::<&Buffer<f32>>)
256 .arg_named("prev_out", None::<&Buffer<f32>>)
257 .arg_named("labels", None::<&Buffer<f32>>)
258 .arg_named("neu_grad", None::<&Buffer<f32>>)
259 .arg_named("ws_grad", None::<&Buffer<f32>>)
260 .build()?;
261
262 self.ocl_queue = Some(queue);
263 self.ocl_kernel = Some(kern_fwd);
264 self.ocl_kernel_grad = Some(kern_bwd);
265
266 Ok(())
267 }
268
269 fn forward_ocl(&mut self, params: OclParamsBlob) -> LayerOclResult {
270 let prev_params = params.first().unwrap();
271 let prev_output = prev_params.get_buf_t(TypeBuffer::Output);
272 let prev_output = prev_output.0.borrow();
273
274 let self_ws = self.ocl_params.get_buf_t(TypeBuffer::Weights);
275 let self_ws = self_ws.0.borrow();
276
277 let self_output = self.ocl_params.get_buf_t(TypeBuffer::Output);
278 let self_output = self_output.0.borrow();
279
280 let self_bias = self.ocl_params.get_buf_t(TypeBuffer::Bias);
281 let self_bias = self_bias.0.borrow();
282
283 let self_kern = self.ocl_kernel.as_mut().unwrap();
284
285 self_kern
286 .set_arg("in", prev_output.deref())
287 .expect("[euc_ocl] Setting param IN failure");
288 self_kern
289 .set_arg("bias", self_bias.deref())
290 .expect("[euc_ocl] Failed to set BIAS param");
291 self_kern
292 .set_arg("ws", self_ws.deref())
293 .expect("[euc_ocl] Setting param WS failure");
294 self_kern
295 .set_arg("out", self_output.deref())
296 .expect("[euc_ocl] Setting param OUT failure");
297
298 unsafe {
299 self_kern
300 .enq()
301 .expect("[euc_ocl] Enqueue forward kernel failure");
302 }
303
304 debug!("[euc_ocl] forward");
305
306 Ok(vec![self.ocl_params.clone()])
307 }
308
309 fn backward_output_ocl(
310 &mut self,
311 prev_input: OclParamsBlob,
312 expected: Array2D,
313 ) -> LayerOclResult {
314 let ocl_queue = self.ocl_queue.as_ref().unwrap();
315
316 let lbl_buf = Buffer::builder()
317 .queue(ocl_queue.clone())
318 .flags(MemFlags::new().read_only())
319 .len(expected.len())
320 .copy_host_slice(expected.as_slice().unwrap())
321 .build()
322 .expect("[euc_ocl] Couldn't create label buffer");
323
324 let self_out = self.ocl_params.get_buf_t(TypeBuffer::Output);
325 let self_out = self_out.0.borrow();
326
327 let self_neu_grad = self.ocl_params.get_buf_t(TypeBuffer::NeuGrad);
328 let self_neu_grad = self_neu_grad.0.borrow_mut();
329
330 let self_ws_grad = self.ocl_params.get_buf_t(TypeBuffer::WeightsGrad);
331 let self_ws_grad = self_ws_grad.0.borrow_mut();
332
333 let prev_out = prev_input.first().unwrap().get_buf_t(TypeBuffer::Output);
334 let prev_out = prev_out.0.borrow();
335
336 let self_kern = self.ocl_kernel_grad.as_ref().unwrap();
337
338 self_kern
339 .set_arg("self_out", self_out.deref())
340 .expect("[euc_ocl] Setting param SELF_OUT failure");
341 self_kern
342 .set_arg("prev_out", prev_out.deref())
343 .expect("[euc_ocl] Setting param PREV_OUT failure");
344 self_kern
345 .set_arg("labels", &lbl_buf)
346 .expect("[euc_ocl] Setting param LABELS failure");
347 self_kern
348 .set_arg("neu_grad", self_neu_grad.deref())
349 .expect("[euc_ocl] Setting param NEU_GRAD failure");
350 self_kern
351 .set_arg("ws_grad", self_ws_grad.deref())
352 .expect("[euc_ocl] Setting param WS_GRAD failure");
353
354 unsafe {
355 self_kern
356 .enq()
357 .expect("[euc_ocl] Enqueue backward kernel failure");
358 }
359
360 Ok(vec![self.ocl_params.clone()])
361 }
362
363 fn ocl_params(&self) -> Option<OclParams> {
364 Some(self.ocl_params.clone())
365 }
366
367 fn set_ocl_params(&mut self, params: OclParams) {
368 self.ocl_params = params;
369 }
370
371 fn copy_layer_ocl(&self) -> Box<dyn AbstractLayerOcl> {
372 todo!()
373 }
374
375 fn clone_layer_ocl(&self) -> Box<dyn AbstractLayerOcl> {
376 Box::new(self.clone())
377 }
378}
379
380impl Default for EuclideanLossLayerOcl {
381 fn default() -> Self {
382 Self {
383 cpu_params: CpuParams::empty(),
384 ocl_params: OclParams::empty(),
385 size: 0,
386 batch_size: 1,
387 ocl_queue: None,
388 ocl_kernel: None,
389 ocl_kernel_grad: None,
390 ocl_act_func: OclActivationFunc::Raw,
391 }
392 }
393}
394
395impl Clone for EuclideanLossLayerOcl {
396 fn clone(&self) -> Self {
397 let queue = self.ocl_queue.as_ref().unwrap();
398
399 Self {
400 cpu_params: self.cpu_params.clone(),
401 ocl_params: self.ocl_params.clone(),
402 size: self.size,
403 batch_size: self.batch_size,
404 ocl_kernel: None,
405 ocl_kernel_grad: None,
406 ocl_act_func: self.ocl_act_func.clone(),
407 ocl_queue: Some(queue.clone()),
408 }
409 }
410
411 fn clone_from(&mut self, _source: &Self) {
412 todo!()
413 }
414}
415
416impl WithParams for EuclideanLossLayerOcl {
417 fn cfg(&self) -> HashMap<String, Variant> {
418 let mut out = HashMap::new();
419
420 out.insert("size".to_string(), Variant::Int(self.size as i32));
421 out.insert(
422 "activation".to_string(),
423 Variant::String(self.ocl_act_func.to_string()),
424 );
425
426 out
427 }
428
429 fn set_cfg(&mut self, args: &HashMap<String, Variant>) {
430 if let Some(size) = args.get("size") {
431 if let Variant::Int(size) = size {
432 self.size = *size as usize;
433 }
434 }
435
436 if let Some(act_f) = args.get("activation") {
437 if let Variant::String(act_f) = act_f {
438 let cvt_res = OclActivationFunc::try_from(act_f.as_str());
439 if let Ok(cvt) = cvt_res {
440 self.ocl_act_func = cvt;
441 }
442 }
443 }
444 }
445}