1use std::mem::size_of;
10use std::ptr::null_mut;
11use std::sync::Arc;
12use std::sync::Mutex;
13use crate::Backend;
14use crate::BackendArray;
15use crate::Error;
16use crate::Result;
17use crate::mutex_lock;
18
19pub use opencl3::context::Context;
20pub use opencl3::device::Device;
21pub use opencl3::device::CL_DEVICE_TYPE_ACCELERATOR;
22pub use opencl3::device::CL_DEVICE_TYPE_ALL;
23pub use opencl3::device::CL_DEVICE_TYPE_CPU;
24pub use opencl3::device::CL_DEVICE_TYPE_CUSTOM;
25pub use opencl3::device::CL_DEVICE_TYPE_DEFAULT;
26pub use opencl3::device::CL_DEVICE_TYPE_GPU;
27pub use opencl3::device::cl_device_id;
28pub use opencl3::error_codes::ClError;
29pub use opencl3::platform::Platform;
30pub use opencl3::platform::get_platforms;
31
32use cl3::info_type::InfoType;
33use opencl3::command_queue::CommandQueue;
34use opencl3::device::CL_DEVICE_MAX_WORK_GROUP_SIZE;
35use opencl3::device::get_device_info;
36use opencl3::event::Event;
37use opencl3::kernel::ExecuteKernel;
38use opencl3::kernel::Kernel;
39use opencl3::memory::Buffer;
40use opencl3::memory::ClMem;
41use opencl3::memory::cl_mem;
42use opencl3::memory::CL_MEM_READ_WRITE;
43use opencl3::program::Program;
44use opencl3::types::CL_TRUE;
45
46const SOURCE: &'static str = include_str!("opencl.cl");
47
48#[derive(Debug)]
52pub struct ClBackendArray
53{
54 buffer: Arc<Mutex<Buffer<f32>>>,
55 len: usize,
56}
57
58struct ClInnerBackend
59{
60 context: Context,
61 command_queue: CommandQueue,
62 program: Program,
63 group_size_for_1d: usize,
64 group_size_for_2d: usize,
65}
66
67pub struct ClBackend
69{
70 inner: Mutex<ClInnerBackend>,
71}
72
73fn preferred_work_sizes(n: usize, m: usize, group_size_for_1d: usize, group_size_for_2d: usize, is_mul: bool) -> (usize, usize, usize, usize)
74{
75 if m == 1 && !is_mul {
76 let n2 = ((n + group_size_for_1d - 1) / group_size_for_1d) * group_size_for_1d;
77 (group_size_for_1d, 1, n2, 1)
78 } else if n == 1 && !is_mul {
79 let m2 = ((m + group_size_for_1d - 1) / group_size_for_1d) * group_size_for_1d;
80 (1, group_size_for_1d, 1, m2)
81 } else if is_mul {
82 let n2 = (((n + 3) / 4 + ((group_size_for_2d + 1) / 2) - 1) / ((group_size_for_2d + 1) / 2)) * ((group_size_for_2d + 1) / 2);
83 let m2 = (((m + 3) / 4 + ((group_size_for_2d + 1) / 2) - 1) / ((group_size_for_2d + 1) / 2)) * ((group_size_for_2d + 1) / 2);
84 ((group_size_for_2d + 1) / 2, (group_size_for_2d + 1) / 2, n2, m2)
85 } else {
86 let n2 = ((n + group_size_for_2d - 1) / group_size_for_2d) * group_size_for_2d;
87 let m2 = ((m + group_size_for_2d - 1) / group_size_for_2d) * group_size_for_2d;
88 (group_size_for_2d, group_size_for_2d, n2, m2)
89 }
90}
91
92impl ClBackend
93{
94 pub fn new() -> Result<ClBackend>
96 {
97 let platforms = match get_platforms() {
98 Ok(tmp_platforms) => tmp_platforms,
99 Err(err) => return Err(Error::OpenCl(err)),
100 };
101 if platforms.is_empty() {
102 return Err(Error::NoPlatform);
103 }
104 let device_ids = match platforms[0].get_devices(CL_DEVICE_TYPE_DEFAULT) {
105 Ok(tmp_device_ids) => tmp_device_ids,
106 Err(err) => return Err(Error::OpenCl(err)),
107 };
108 if device_ids.is_empty() {
109 return Err(Error::NoDevice);
110 }
111 let device = Device::new(device_ids[0]);
112 let context = match Context::from_device(&device) {
113 Ok(tmp_context) => tmp_context,
114 Err(err) => return Err(Error::OpenCl(err)),
115 };
116 Self::new_with_context(context)
117 }
118
119 pub fn new_with_context(context: Context) -> Result<ClBackend>
121 {
122 let command_queue = match CommandQueue::create_default_with_properties(&context, 0, 0) {
123 Ok(tmp_command_queue) => tmp_command_queue,
124 Err(err) => return Err(Error::OpenCl(err)),
125 };
126 let program = match Program::create_and_build_from_source(&context, SOURCE, "") {
127 Ok(tmp_program) => tmp_program,
128 Err(msg) => return Err(Error::Compilation(msg)),
129 };
130 let group_size_for_1d = match get_device_info(context.default_device(), CL_DEVICE_MAX_WORK_GROUP_SIZE) {
131 Ok(InfoType::Size(tmp_group_size_for_1d)) => tmp_group_size_for_1d,
132 _ => return Err(Error::InvalidDeviceInfoType),
133 };
134 let group_size_for_2d = (group_size_for_1d as f64).sqrt().floor() as usize;
135 let inner = ClInnerBackend {
136 context,
137 command_queue,
138 program,
139 group_size_for_1d,
140 group_size_for_2d,
141 };
142 Ok(ClBackend { inner: Mutex::new(inner), })
143 }
144
145 fn check_and_enqueue_nd_range2<F, G>(&self, kernel_name: &str, a: &BackendArray, b: &BackendArray, f: F, g: G) -> Result<()>
146 where F: FnOnce(&ClBackendArray, &ClBackendArray) -> Result<()>,
147 G: FnOnce(&ClInnerBackend, &Kernel, cl_mem, cl_mem) -> Result<Event>
148 {
149 #[allow(unreachable_patterns)]
150 match (a, b) {
151 (BackendArray::OpenCl(a2), BackendArray::OpenCl(b2)) => {
152 f(a2, b2)?;
153 let inner_g = mutex_lock(&self.inner)?;
154 let kernel = match Kernel::create(&inner_g.program, kernel_name) {
155 Ok(tmp_kernel) => tmp_kernel,
156 Err(err) => return Err(Error::OpenCl(err)),
157 };
158 let event = if !Arc::ptr_eq(&a2.buffer, &b2.buffer) {
159 let a_buffer_g = mutex_lock(&a2.buffer)?;
160 let mut b_buffer_g = mutex_lock(&b2.buffer)?;
161 g(&*inner_g, &kernel, a_buffer_g.get(), b_buffer_g.get_mut())?
162 } else {
163 let mut a_buffer_g = mutex_lock(&a2.buffer)?;
164 g(&*inner_g, &kernel, a_buffer_g.get(), a_buffer_g.get_mut())?
165 };
166 match event.wait() {
167 Ok(()) => (),
168 Err(err) => return Err(Error::OpenCl(err)),
169 }
170 Ok(())
171 },
172 _ => Err(Error::InvalidBackendArray),
173 }
174 }
175
176 fn check_and_enqueue_nd_range3<F, G>(&self, kernel_name: &str, a: &BackendArray, b: &BackendArray, c: &BackendArray, f: F, g: G) -> Result<()>
177 where F: FnOnce(&ClBackendArray, &ClBackendArray, &ClBackendArray) -> Result<()>,
178 G: FnOnce(&ClInnerBackend, &Kernel, cl_mem, cl_mem, cl_mem) -> Result<Event>
179 {
180 #[allow(unreachable_patterns)]
181 match (a, b, c) {
182 (BackendArray::OpenCl(a2), BackendArray::OpenCl(b2), BackendArray::OpenCl(c2)) => {
183 f(a2, b2, c2)?;
184 let inner_g = mutex_lock(&self.inner)?;
185 let kernel = match Kernel::create(&inner_g.program, kernel_name) {
186 Ok(tmp_kernel) => tmp_kernel,
187 Err(err) => return Err(Error::OpenCl(err)),
188 };
189 let event = match (Arc::ptr_eq(&a2.buffer, &b2.buffer), Arc::ptr_eq(&a2.buffer, &c2.buffer), Arc::ptr_eq(&b2.buffer, &c2.buffer)) {
190 (false, false, false) => {
191 let a_buffer_g = mutex_lock(&a2.buffer)?;
192 let b_buffer_g = mutex_lock(&b2.buffer)?;
193 let mut c_buffer_g = mutex_lock(&c2.buffer)?;
194 g(&*inner_g, &kernel, a_buffer_g.get(), b_buffer_g.get(), c_buffer_g.get_mut())?
195 },
196 (true, false, false) => {
197 let a_buffer_g = mutex_lock(&a2.buffer)?;
198 let mut c_buffer_g = mutex_lock(&c2.buffer)?;
199 g(&*inner_g, &kernel, a_buffer_g.get(), a_buffer_g.get(), c_buffer_g.get_mut())?
200 },
201 (false, true, false) => {
202 let mut a_buffer_g = mutex_lock(&a2.buffer)?;
203 let b_buffer_g = mutex_lock(&b2.buffer)?;
204 g(&*inner_g, &kernel, a_buffer_g.get(), b_buffer_g.get(), a_buffer_g.get_mut())?
205 },
206 (false, false, true) => {
207 let a_buffer_g = mutex_lock(&a2.buffer)?;
208 let mut b_buffer_g = mutex_lock(&b2.buffer)?;
209 g(&*inner_g, &kernel, a_buffer_g.get(), b_buffer_g.get(), b_buffer_g.get_mut())?
210 },
211 _ => {
212 let mut a_buffer_g = mutex_lock(&a2.buffer)?;
213 g(&*inner_g, &kernel, a_buffer_g.get(), a_buffer_g.get(), a_buffer_g.get_mut())?
214 },
215 };
216 match event.wait() {
217 Ok(()) => (),
218 Err(err) => return Err(Error::OpenCl(err)),
219 }
220 Ok(())
221 },
222 _ => Err(Error::InvalidBackendArray),
223 }
224 }
225
226 fn check_and_enqueue_nd_range_for_fun(&self, kernel_name: &str, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
227 {
228 self.check_and_enqueue_nd_range2(kernel_name, a, b, |a2, b2| {
229 if a2.len != n * m {
230 return Err(Error::BackendArrayElemCount(a2.len, n * m));
231 }
232 if b2.len != n * m {
233 return Err(Error::BackendArrayElemCount(b2.len, n * m));
234 }
235 Ok(())
236 }, |inner, kernel, a_mem, b_mem| {
237 let n2 = n as u64;
238 let m2 = m as u64;
239 let (n3, m3, n4, m4) = preferred_work_sizes(n, m, inner.group_size_for_1d, inner.group_size_for_2d, false);
240 unsafe {
241 let res = ExecuteKernel::new(kernel)
242 .set_arg(&a_mem)
243 .set_arg(&b_mem)
244 .set_arg(&n2)
245 .set_arg(&m2)
246 .set_local_work_sizes(&[n3, m3])
247 .set_global_work_sizes(&[n4, m4])
248 .enqueue_nd_range(&inner.command_queue);
249 match res {
250 Ok(event) => Ok(event),
251 Err(err) => Err(Error::OpenCl(err)),
252 }
253 }
254 })
255 }
256
257 fn check_and_enqueue_nd_range_for_op(&self, kernel_name: &str, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
258 {
259 self.check_and_enqueue_nd_range3(kernel_name, a, b, c, |a2, b2, c2| {
260 if a2.len != n * m {
261 return Err(Error::BackendArrayElemCount(a2.len, n * m));
262 }
263 if b2.len != n * m {
264 return Err(Error::BackendArrayElemCount(b2.len, n * m));
265 }
266 if c2.len != n * m {
267 return Err(Error::BackendArrayElemCount(c2.len, n * m));
268 }
269 Ok(())
270 }, |inner, kernel, a_mem, b_mem, c_mem| {
271 let n2 = n as u64;
272 let m2 = m as u64;
273 let (n3, m3, n4, m4) = preferred_work_sizes(n, m, inner.group_size_for_1d, inner.group_size_for_2d, false);
274 unsafe {
275 let res = ExecuteKernel::new(kernel)
276 .set_arg(&a_mem)
277 .set_arg(&b_mem)
278 .set_arg(&c_mem)
279 .set_arg(&n2)
280 .set_arg(&m2)
281 .set_local_work_sizes(&[n3, m3])
282 .set_global_work_sizes(&[n4, m4])
283 .enqueue_nd_range(&inner.command_queue);
284 match res {
285 Ok(event) => Ok(event),
286 Err(err) => Err(Error::OpenCl(err)),
287 }
288 }
289 })
290 }
291
292 fn check_and_enqueue_nd_range_for_mul(&self, kernel_name: &str, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize, l: usize) -> Result<()>
293 {
294 self.check_and_enqueue_nd_range3(kernel_name, a, b, c, |a2, b2, c2| {
295 if a2.len != n * l {
296 return Err(Error::BackendArrayElemCount(a2.len, n * l));
297 }
298 if b2.len != l * m {
299 return Err(Error::BackendArrayElemCount(b2.len, l * m));
300 }
301 if c2.len != n * m {
302 return Err(Error::BackendArrayElemCount(c2.len, n * m));
303 }
304 Ok(())
305 }, |inner, kernel, a_mem, b_mem, c_mem| {
306 let n2 = n as u64;
307 let m2 = m as u64;
308 let l2 = l as u64;
309 let (n3, m3, n4, m4) = preferred_work_sizes(n, m, inner.group_size_for_1d, inner.group_size_for_2d, true);
310 unsafe {
311 let res = ExecuteKernel::new(kernel)
312 .set_arg(&a_mem)
313 .set_arg(&b_mem)
314 .set_arg(&c_mem)
315 .set_arg_local_buffer(n3 * m3 * 4 * size_of::<f32>())
316 .set_arg_local_buffer(n3 * m3 * 4 * size_of::<f32>())
317 .set_arg(&n2)
318 .set_arg(&m2)
319 .set_arg(&l2)
320 .set_local_work_sizes(&[n3, m3])
321 .set_global_work_sizes(&[n4, m4])
322 .enqueue_nd_range(&inner.command_queue);
323 match res {
324 Ok(event) => Ok(event),
325 Err(err) => Err(Error::OpenCl(err)),
326 }
327 }
328 })
329 }
330
331 fn check_and_enqueue_nd_range_for_scalar(&self, kernel_name: &str, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
332 {
333 self.check_and_enqueue_nd_range2(kernel_name, a, c, |a2, c2| {
334 if a2.len != n * m {
335 return Err(Error::BackendArrayElemCount(a2.len, n * m));
336 }
337 if c2.len != n * m {
338 return Err(Error::BackendArrayElemCount(c2.len, n * m));
339 }
340 Ok(())
341 }, |inner, kernel, a_mem, c_mem| {
342 let n2 = n as u64;
343 let m2 = m as u64;
344 let (n3, m3, n4, m4) = preferred_work_sizes(n, m, inner.group_size_for_1d, inner.group_size_for_2d, false);
345 unsafe {
346 let res = ExecuteKernel::new(kernel)
347 .set_arg(&a_mem)
348 .set_arg(&b)
349 .set_arg(&c_mem)
350 .set_arg(&n2)
351 .set_arg(&m2)
352 .set_local_work_sizes(&[n3, m3])
353 .set_global_work_sizes(&[n4, m4])
354 .enqueue_nd_range(&inner.command_queue);
355 match res {
356 Ok(event) => Ok(event),
357 Err(err) => Err(Error::OpenCl(err)),
358 }
359 }
360 })
361 }
362
363 fn check_and_enqueue_nd_range_for_fun_and_tiles(&self, kernel_name: &str, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
364 {
365 self.check_and_enqueue_nd_range2(kernel_name, a, b, |a2, b2| {
366 if a2.len != n * m {
367 return Err(Error::BackendArrayElemCount(a2.len, n * m));
368 }
369 if b2.len != n * m {
370 return Err(Error::BackendArrayElemCount(b2.len, n * m));
371 }
372 Ok(())
373 }, |inner, kernel, a_mem, b_mem| {
374 let n2 = n as u64;
375 let m2 = m as u64;
376 let (n3, m3, n4, m4) = preferred_work_sizes(n, m, inner.group_size_for_1d, inner.group_size_for_2d, false);
377 unsafe {
378 let res = ExecuteKernel::new(kernel)
379 .set_arg(&a_mem)
380 .set_arg(&b_mem)
381 .set_arg_local_buffer(n3 * m3 *size_of::<f32>())
382 .set_arg(&n2)
383 .set_arg(&m2)
384 .set_local_work_sizes(&[n3, m3])
385 .set_global_work_sizes(&[n4, m4])
386 .enqueue_nd_range(&inner.command_queue);
387 match res {
388 Ok(event) => Ok(event),
389 Err(err) => Err(Error::OpenCl(err)),
390 }
391 }
392 })
393 }
394
395 fn check_and_enqueue_nd_range_for_repeat_col(&self, kernel_name: &str, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
396 {
397 self.check_and_enqueue_nd_range2(kernel_name, a, b, |a2, b2| {
398 if a2.len != n {
399 return Err(Error::BackendArrayElemCount(a2.len, n));
400 }
401 if b2.len != n * m {
402 return Err(Error::BackendArrayElemCount(b2.len, n * m));
403 }
404 Ok(())
405 }, |inner, kernel, a_mem, b_mem| {
406 let n2 = n as u64;
407 let m2 = m as u64;
408 let (n3, m3, n4, m4) = preferred_work_sizes(n, m, inner.group_size_for_1d, inner.group_size_for_2d, false);
409 unsafe {
410 let res = ExecuteKernel::new(kernel)
411 .set_arg(&a_mem)
412 .set_arg(&b_mem)
413 .set_arg(&n2)
414 .set_arg(&m2)
415 .set_local_work_sizes(&[n3, m3])
416 .set_global_work_sizes(&[n4, m4])
417 .enqueue_nd_range(&inner.command_queue);
418 match res {
419 Ok(event) => Ok(event),
420 Err(err) => Err(Error::OpenCl(err)),
421 }
422 }
423 })
424 }
425
426 fn check_and_enqueue_nd_range_for_repeat_row(&self, kernel_name: &str, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
427 {
428 self.check_and_enqueue_nd_range2(kernel_name, a, b, |a2, b2| {
429 if a2.len != m {
430 return Err(Error::BackendArrayElemCount(a2.len, m));
431 }
432 if b2.len != n * m {
433 return Err(Error::BackendArrayElemCount(b2.len, n * m));
434 }
435 Ok(())
436 }, |inner, kernel, a_mem, b_mem| {
437 let n2 = n as u64;
438 let m2 = m as u64;
439 let (n3, m3, n4, m4) = preferred_work_sizes(n, m, inner.group_size_for_1d, inner.group_size_for_2d, false);
440 unsafe {
441 let res = ExecuteKernel::new(kernel)
442 .set_arg(&a_mem)
443 .set_arg(&b_mem)
444 .set_arg(&n2)
445 .set_arg(&m2)
446 .set_local_work_sizes(&[n3, m3])
447 .set_global_work_sizes(&[n4, m4])
448 .enqueue_nd_range(&inner.command_queue);
449 match res {
450 Ok(event) => Ok(event),
451 Err(err) => Err(Error::OpenCl(err)),
452 }
453 }
454 })
455 }
456}
457
458impl Backend for ClBackend
459{
460 fn name(&self) -> &'static str
461 { "OpenCL" }
462
463 fn has_cublas(&self) -> bool
464 { false }
465
466 unsafe fn alloc(&self, n: usize) -> Result<BackendArray>
467 {
468 let inner_g = mutex_lock(&self.inner)?;
469 let buffer: Buffer<f32> = match Buffer::create(&inner_g.context, CL_MEM_READ_WRITE, n, null_mut()) {
470 Ok(tmp_buffer) => tmp_buffer,
471 Err(err) => return Err(Error::OpenCl(err)),
472 };
473 let cl_array = ClBackendArray { buffer: Arc::new(Mutex::new(buffer)), len: n, };
474 Ok(BackendArray::OpenCl(cl_array))
475 }
476
477 fn alloc_and_store_zeros(&self, n: usize) -> Result<BackendArray>
478 {
479 let inner_g = mutex_lock(&self.inner)?;
480 let mut buffer: Buffer<f32> = match unsafe { Buffer::create(&inner_g.context, CL_MEM_READ_WRITE, n, null_mut()) } {
481 Ok(tmp_buffer) => tmp_buffer,
482 Err(err) => return Err(Error::OpenCl(err)),
483 };
484 let event = match unsafe { inner_g.command_queue.enqueue_fill_buffer(&mut buffer, &[0.0f32], 0, n * size_of::<f32>(), &[]) } {
485 Ok(tmp_event) => tmp_event,
486 Err(err) => return Err(Error::OpenCl(err)),
487 };
488 match event.wait() {
489 Ok(()) => (),
490 Err(err) => return Err(Error::OpenCl(err)),
491 }
492 let cl_array = ClBackendArray { buffer: Arc::new(Mutex::new(buffer)), len: n, };
493 Ok(BackendArray::OpenCl(cl_array))
494 }
495
496 fn alloc_and_store(&self, elems: &[f32]) -> Result<BackendArray>
497 {
498 let inner_g = mutex_lock(&self.inner)?;
499 let mut buffer: Buffer<f32> = match unsafe { Buffer::create(&inner_g.context, CL_MEM_READ_WRITE, elems.len(), null_mut()) } {
500 Ok(tmp_buffer) => tmp_buffer,
501 Err(err) => return Err(Error::OpenCl(err)),
502 };
503 let event = match unsafe { inner_g.command_queue.enqueue_write_buffer(&mut buffer, CL_TRUE, 0, elems, &[]) } {
504 Ok(tmp_event) => tmp_event,
505 Err(err) => return Err(Error::OpenCl(err)),
506 };
507 match event.wait() {
508 Ok(()) => (),
509 Err(err) => return Err(Error::OpenCl(err)),
510 }
511 let cl_array = ClBackendArray { buffer: Arc::new(Mutex::new(buffer)), len: elems.len(), };
512 Ok(BackendArray::OpenCl(cl_array))
513 }
514
515 fn load(&self, a: &BackendArray, elems: &mut [f32]) -> Result<()>
516 {
517 #[allow(unreachable_patterns)]
518 match a {
519 BackendArray::OpenCl(a2) => {
520 if a2.len != elems.len() {
521 return Err(Error::BackendArrayElemCount(a2.len, elems.len()));
522 }
523 let inner_g = mutex_lock(&self.inner)?;
524 let a_buffer_g = mutex_lock(&a2.buffer)?;
525 let event = match unsafe { inner_g.command_queue.enqueue_read_buffer(&*a_buffer_g, CL_TRUE, 0, elems, &[]) } {
526 Ok(tmp_event) => tmp_event,
527 Err(err) => return Err(Error::OpenCl(err)),
528 };
529 match event.wait() {
530 Ok(()) => (),
531 Err(err) => return Err(Error::OpenCl(err)),
532 }
533 },
534 _ => return Err(Error::InvalidBackendArray),
535 }
536 Ok(())
537 }
538
539 fn store(&self, a: &BackendArray, elems: &[f32]) -> Result<()>
540 {
541 #[allow(unreachable_patterns)]
542 match a {
543 BackendArray::OpenCl(a2) => {
544 if a2.len != elems.len() {
545 return Err(Error::BackendArrayElemCount(a2.len, elems.len()));
546 }
547 let inner_g = mutex_lock(&self.inner)?;
548 let mut a_buffer_g = mutex_lock(&a2.buffer)?;
549 let event = match unsafe { inner_g.command_queue.enqueue_write_buffer(&mut *a_buffer_g, CL_TRUE, 0, elems, &[]) } {
550 Ok(tmp_event) => tmp_event,
551 Err(err) => return Err(Error::OpenCl(err)),
552 };
553 match event.wait() {
554 Ok(()) => (),
555 Err(err) => return Err(Error::OpenCl(err)),
556 }
557 },
558 _ => return Err(Error::InvalidBackendArray),
559 }
560 Ok(())
561 }
562
563 fn copy(&self, a: &BackendArray, b: &BackendArray) -> Result<()>
564 {
565 #[allow(unreachable_patterns)]
566 match (a, b) {
567 (BackendArray::OpenCl(a2), BackendArray::OpenCl(b2)) => {
568 if Arc::ptr_eq(&a2.buffer, &b2.buffer) {
569 return Ok(());
570 }
571 if a2.len != b2.len {
572 return Err(Error::TwoBackendArrayElemCounts(a2.len, b2.len));
573 }
574 let inner_g = mutex_lock(&self.inner)?;
575 let a_buffer_g = mutex_lock(&a2.buffer)?;
576 let mut b_buffer_g = mutex_lock(&b2.buffer)?;
577 let event = match unsafe { inner_g.command_queue.enqueue_copy_buffer(&*a_buffer_g, &mut *b_buffer_g, 0, 0, a2.len * size_of::<f32>(), &[]) } {
578 Ok(tmp_event) => tmp_event,
579 Err(err) => return Err(Error::OpenCl(err)),
580 };
581 match event.wait() {
582 Ok(()) => (),
583 Err(err) => return Err(Error::OpenCl(err)),
584 }
585 },
586 _ => return Err(Error::InvalidBackendArray),
587 }
588 Ok(())
589 }
590
591 fn transpose_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
592 { self.check_and_enqueue_nd_range_for_fun("transpose_a", a, b, n, m) }
593
594 fn add_a_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
595 { self.check_and_enqueue_nd_range_for_op("add_a_b", a, b, c, n, m) }
596
597 fn add_at_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
598 { self.check_and_enqueue_nd_range_for_op("add_at_b", a, b, c, n, m) }
599
600 fn add_a_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
601 { self.check_and_enqueue_nd_range_for_op("add_a_bt", a, b, c, n, m) }
602
603 fn add_at_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
604 { self.check_and_enqueue_nd_range_for_op("add_at_bt", a, b, c, n, m) }
605
606 fn sub_a_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
607 { self.check_and_enqueue_nd_range_for_op("sub_a_b", a, b, c, n, m) }
608
609 fn sub_at_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
610 { self.check_and_enqueue_nd_range_for_op("sub_at_b", a, b, c, n, m) }
611
612 fn sub_a_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
613 { self.check_and_enqueue_nd_range_for_op("sub_a_bt", a, b, c, n, m) }
614
615 fn sub_at_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
616 { self.check_and_enqueue_nd_range_for_op("sub_at_bt", a, b, c, n, m) }
617
618 fn mul_a_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize, l: usize) -> Result<()>
619 { self.check_and_enqueue_nd_range_for_mul("mul_a_b", a, b, c, n, m, l) }
620
621 fn mul_at_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize, l: usize) -> Result<()>
622 { self.check_and_enqueue_nd_range_for_mul("mul_at_b", a, b, c, n, m, l) }
623
624 fn mul_a_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize, l: usize) -> Result<()>
625 { self.check_and_enqueue_nd_range_for_mul("mul_a_bt", a, b, c, n, m, l) }
626
627 fn mul_at_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize, l: usize) -> Result<()>
628 { self.check_and_enqueue_nd_range_for_mul("mul_at_bt", a, b, c, n, m, l) }
629
630 fn mul_a_b_for_elems(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
631 { self.check_and_enqueue_nd_range_for_op("mul_a_b_for_elems", a, b, c, n, m) }
632
633 fn mul_at_b_for_elems(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
634 { self.check_and_enqueue_nd_range_for_op("mul_at_b_for_elems", a, b, c, n, m) }
635
636 fn mul_a_bt_for_elems(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
637 { self.check_and_enqueue_nd_range_for_op("mul_a_bt_for_elems", a, b, c, n, m) }
638
639 fn mul_at_bt_for_elems(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
640 { self.check_and_enqueue_nd_range_for_op("mul_at_bt_for_elems", a, b, c, n, m) }
641
642 fn div_a_b_for_elems(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
643 { self.check_and_enqueue_nd_range_for_op("div_a_b_for_elems", a, b, c, n, m) }
644
645 fn div_at_b_for_elems(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
646 { self.check_and_enqueue_nd_range_for_op("div_at_b_for_elems", a, b, c, n, m) }
647
648 fn div_a_bt_for_elems(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
649 { self.check_and_enqueue_nd_range_for_op("div_a_bt_for_elems", a, b, c, n, m) }
650
651 fn div_at_bt_for_elems(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
652 { self.check_and_enqueue_nd_range_for_op("div_at_bt_for_elems", a, b, c, n, m) }
653
654 fn add_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
655 { self.check_and_enqueue_nd_range_for_scalar("add_a_b_for_scalar", a, b, c, n, m) }
656
657 fn add_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
658 { self.check_and_enqueue_nd_range_for_scalar("add_at_b_for_scalar", a, b, c, n, m) }
659
660 fn sub_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
661 { self.check_and_enqueue_nd_range_for_scalar("sub_a_b_for_scalar", a, b, c, n, m) }
662
663 fn sub_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
664 { self.check_and_enqueue_nd_range_for_scalar("sub_at_b_for_scalar", a, b, c, n, m) }
665
666 fn rsub_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
667 { self.check_and_enqueue_nd_range_for_scalar("rsub_a_b_for_scalar", a, b, c, n, m) }
668
669 fn rsub_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
670 { self.check_and_enqueue_nd_range_for_scalar("rsub_at_b_for_scalar", a, b, c, n, m) }
671
672 fn mul_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
673 { self.check_and_enqueue_nd_range_for_scalar("mul_a_b_for_scalar", a, b, c, n, m) }
674
675 fn mul_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
676 { self.check_and_enqueue_nd_range_for_scalar("mul_at_b_for_scalar", a, b, c, n, m) }
677
678 fn div_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
679 { self.check_and_enqueue_nd_range_for_scalar("div_a_b_for_scalar", a, b, c, n, m) }
680
681 fn div_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
682 { self.check_and_enqueue_nd_range_for_scalar("div_at_b_for_scalar", a, b, c, n, m) }
683
684 fn rdiv_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
685 { self.check_and_enqueue_nd_range_for_scalar("rdiv_a_b_for_scalar", a, b, c, n, m) }
686
687 fn rdiv_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
688 { self.check_and_enqueue_nd_range_for_scalar("rdiv_at_b_for_scalar", a, b, c, n, m) }
689
690 fn sigmoid_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
691 { self.check_and_enqueue_nd_range_for_fun("sigmoid_a", a, b, n, m) }
692
693 fn sigmoid_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
694 { self.check_and_enqueue_nd_range_for_fun("sigmoid_at", a, b, n, m) }
695
696 fn tanh_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
697 { self.check_and_enqueue_nd_range_for_fun("tanh_a", a, b, n, m) }
698
699 fn tanh_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
700 { self.check_and_enqueue_nd_range_for_fun("tanh_at", a, b, n, m) }
701
702 fn swish_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
703 { self.check_and_enqueue_nd_range_for_fun("swish_a", a, b, n, m) }
704
705 fn swish_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
706 { self.check_and_enqueue_nd_range_for_fun("swish_at", a, b, n, m) }
707
708 fn softmax_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
709 { self.check_and_enqueue_nd_range_for_fun_and_tiles("softmax_a", a, b, n, m) }
710
711 fn softmax_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
712 { self.check_and_enqueue_nd_range_for_fun_and_tiles("softmax_at", a, b, n, m) }
713
714 fn sqrt_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
715 { self.check_and_enqueue_nd_range_for_fun("sqrt_a", a, b, n, m) }
716
717 fn sqrt_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
718 { self.check_and_enqueue_nd_range_for_fun("sqrt_at", a, b, n, m) }
719
720 fn repeat_col_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
721 { self.check_and_enqueue_nd_range_for_repeat_col("repeat_col_a", a, b, n, m) }
722
723 fn repeat_row_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
724 { self.check_and_enqueue_nd_range_for_repeat_row("repeat_row_a", a, b, n, m) }
725
726 fn abs_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
727 { self.check_and_enqueue_nd_range_for_fun("abs_a", a, b, n, m) }
728
729 fn abs_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
730 { self.check_and_enqueue_nd_range_for_fun("abs_at", a, b, n, m) }
731
732 fn pow_a_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
733 { self.check_and_enqueue_nd_range_for_op("pow_a_b", a, b, c, n, m) }
734
735 fn pow_at_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
736 { self.check_and_enqueue_nd_range_for_op("pow_at_b", a, b, c, n, m) }
737
738 fn pow_a_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
739 { self.check_and_enqueue_nd_range_for_op("pow_a_bt", a, b, c, n, m) }
740
741 fn pow_at_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
742 { self.check_and_enqueue_nd_range_for_op("pow_at_bt", a, b, c, n, m) }
743
744 fn pow_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
745 { self.check_and_enqueue_nd_range_for_scalar("pow_a_b_for_scalar", a, b, c, n, m) }
746
747 fn pow_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
748 { self.check_and_enqueue_nd_range_for_scalar("pow_at_b_for_scalar", a, b, c, n, m) }
749
750 fn rpow_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
751 { self.check_and_enqueue_nd_range_for_scalar("rpow_a_b_for_scalar", a, b, c, n, m) }
752
753 fn rpow_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
754 { self.check_and_enqueue_nd_range_for_scalar("rpow_at_b_for_scalar", a, b, c, n, m) }
755
756 fn exp_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
757 { self.check_and_enqueue_nd_range_for_fun("exp_a", a, b, n, m) }
758
759 fn exp_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
760 { self.check_and_enqueue_nd_range_for_fun("exp_at", a, b, n, m) }
761
762 fn ln_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
763 { self.check_and_enqueue_nd_range_for_fun("ln_a", a, b, n, m) }
764
765 fn ln_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
766 { self.check_and_enqueue_nd_range_for_fun("ln_at", a, b, n, m) }
767
768 fn log2_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
769 { self.check_and_enqueue_nd_range_for_fun("log2_a", a, b, n, m) }
770
771 fn log2_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
772 { self.check_and_enqueue_nd_range_for_fun("log2_at", a, b, n, m) }
773
774 fn log10_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
775 { self.check_and_enqueue_nd_range_for_fun("log10_a", a, b, n, m) }
776
777 fn log10_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
778 { self.check_and_enqueue_nd_range_for_fun("log10_at", a, b, n, m) }
779
780 fn sin_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
781 { self.check_and_enqueue_nd_range_for_fun("sin_a", a, b, n, m) }
782
783 fn sin_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
784 { self.check_and_enqueue_nd_range_for_fun("sin_at", a, b, n, m) }
785
786 fn cos_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
787 { self.check_and_enqueue_nd_range_for_fun("cos_a", a, b, n, m) }
788
789 fn cos_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
790 { self.check_and_enqueue_nd_range_for_fun("cos_at", a, b, n, m) }
791
792 fn tan_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
793 { self.check_and_enqueue_nd_range_for_fun("tan_a", a, b, n, m) }
794
795 fn tan_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
796 { self.check_and_enqueue_nd_range_for_fun("tan_at", a, b, n, m) }
797
798 fn asin_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
799 { self.check_and_enqueue_nd_range_for_fun("asin_a", a, b, n, m) }
800
801 fn asin_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
802 { self.check_and_enqueue_nd_range_for_fun("asin_at", a, b, n, m) }
803
804 fn acos_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
805 { self.check_and_enqueue_nd_range_for_fun("acos_a", a, b, n, m) }
806
807 fn acos_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
808 { self.check_and_enqueue_nd_range_for_fun("acos_at", a, b, n, m) }
809
810 fn atan_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
811 { self.check_and_enqueue_nd_range_for_fun("atan_a", a, b, n, m) }
812
813 fn atan_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
814 { self.check_and_enqueue_nd_range_for_fun("atan_at", a, b, n, m) }
815
816 fn atan2_a_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
817 { self.check_and_enqueue_nd_range_for_op("atan2_a_b", a, b, c, n, m) }
818
819 fn atan2_at_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
820 { self.check_and_enqueue_nd_range_for_op("atan2_at_b", a, b, c, n, m) }
821
822 fn atan2_a_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
823 { self.check_and_enqueue_nd_range_for_op("atan2_a_bt", a, b, c, n, m) }
824
825 fn atan2_at_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
826 { self.check_and_enqueue_nd_range_for_op("atan2_at_bt", a, b, c, n, m) }
827
828 fn atan2_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
829 { self.check_and_enqueue_nd_range_for_scalar("atan2_a_b_for_scalar", a, b, c, n, m) }
830
831 fn atan2_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
832 { self.check_and_enqueue_nd_range_for_scalar("atan2_at_b_for_scalar", a, b, c, n, m) }
833
834 fn ratan2_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
835 { self.check_and_enqueue_nd_range_for_scalar("ratan2_a_b_for_scalar", a, b, c, n, m) }
836
837 fn ratan2_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
838 { self.check_and_enqueue_nd_range_for_scalar("ratan2_at_b_for_scalar", a, b, c, n, m) }
839
840 fn sinh_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
841 { self.check_and_enqueue_nd_range_for_fun("sinh_a", a, b, n, m) }
842
843 fn sinh_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
844 { self.check_and_enqueue_nd_range_for_fun("sinh_at", a, b, n, m) }
845
846 fn cosh_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
847 { self.check_and_enqueue_nd_range_for_fun("cosh_a", a, b, n, m) }
848
849 fn cosh_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
850 { self.check_and_enqueue_nd_range_for_fun("cosh_at", a, b, n, m) }
851
852 fn asinh_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
853 { self.check_and_enqueue_nd_range_for_fun("asinh_a", a, b, n, m) }
854
855 fn asinh_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
856 { self.check_and_enqueue_nd_range_for_fun("asinh_at", a, b, n, m) }
857
858 fn acosh_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
859 { self.check_and_enqueue_nd_range_for_fun("acosh_a", a, b, n, m) }
860
861 fn acosh_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
862 { self.check_and_enqueue_nd_range_for_fun("acosh_at", a, b, n, m) }
863
864 fn atanh_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
865 { self.check_and_enqueue_nd_range_for_fun("atanh_a", a, b, n, m) }
866
867 fn atanh_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
868 { self.check_and_enqueue_nd_range_for_fun("atanh_at", a, b, n, m) }
869
870 fn signum_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
871 { self.check_and_enqueue_nd_range_for_fun("signum_a", a, b, n, m) }
872
873 fn signum_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
874 { self.check_and_enqueue_nd_range_for_fun("signum_at", a, b, n, m) }
875
876 fn ceil_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
877 { self.check_and_enqueue_nd_range_for_fun("ceil_a", a, b, n, m) }
878
879 fn ceil_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
880 { self.check_and_enqueue_nd_range_for_fun("ceil_at", a, b, n, m) }
881
882 fn floor_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
883 { self.check_and_enqueue_nd_range_for_fun("floor_a", a, b, n, m) }
884
885 fn floor_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
886 { self.check_and_enqueue_nd_range_for_fun("floor_at", a, b, n, m) }
887
888 fn round_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
889 { self.check_and_enqueue_nd_range_for_fun("round_a", a, b, n, m) }
890
891 fn round_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
892 { self.check_and_enqueue_nd_range_for_fun("round_at", a, b, n, m) }
893
894 fn trunc_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
895 { self.check_and_enqueue_nd_range_for_fun("trunc_a", a, b, n, m) }
896
897 fn trunc_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
898 { self.check_and_enqueue_nd_range_for_fun("trunc_at", a, b, n, m) }
899
900 fn max_a_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
901 { self.check_and_enqueue_nd_range_for_op("max_a_b", a, b, c, n, m) }
902
903 fn max_at_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
904 { self.check_and_enqueue_nd_range_for_op("max_at_b", a, b, c, n, m) }
905
906 fn max_a_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
907 { self.check_and_enqueue_nd_range_for_op("max_a_bt", a, b, c, n, m) }
908
909 fn max_at_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
910 { self.check_and_enqueue_nd_range_for_op("max_at_bt", a, b, c, n, m) }
911
912 fn max_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
913 { self.check_and_enqueue_nd_range_for_scalar("max_a_b_for_scalar", a, b, c, n, m) }
914
915 fn max_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
916 { self.check_and_enqueue_nd_range_for_scalar("max_at_b_for_scalar", a, b, c, n, m) }
917
918 fn min_a_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
919 { self.check_and_enqueue_nd_range_for_op("min_a_b", a, b, c, n, m) }
920
921 fn min_at_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
922 { self.check_and_enqueue_nd_range_for_op("min_at_b", a, b, c, n, m) }
923
924 fn min_a_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
925 { self.check_and_enqueue_nd_range_for_op("min_a_bt", a, b, c, n, m) }
926
927 fn min_at_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
928 { self.check_and_enqueue_nd_range_for_op("min_at_bt", a, b, c, n, m) }
929
930 fn min_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
931 { self.check_and_enqueue_nd_range_for_scalar("min_a_b_for_scalar", a, b, c, n, m) }
932
933 fn min_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
934 { self.check_and_enqueue_nd_range_for_scalar("min_at_b_for_scalar", a, b, c, n, m) }
935}
936
937#[cfg(test)]
938mod tests;