Skip to main content

unmtx_gpu/
opencl.rs

1//
2// Copyright (c) 2025-2026 Ɓukasz Szpakowski
3//
4// This Source Code Form is subject to the terms of the Mozilla Public
5// License, v. 2.0. If a copy of the MPL was not distributed with this
6// file, You can obtain one at https://mozilla.org/MPL/2.0/.
7//
8//! A module that contains an OpenCL backend.
9use std::mem::size_of;
10use std::ptr::null_mut;
11use std::sync::Arc;
12use std::sync::Mutex;
13use crate::Backend;
14use crate::BackendArray;
15use crate::Error;
16use crate::Result;
17use crate::mutex_lock;
18
19pub use opencl3::context::Context;
20pub use opencl3::device::Device;
21pub use opencl3::device::CL_DEVICE_TYPE_ACCELERATOR;
22pub use opencl3::device::CL_DEVICE_TYPE_ALL;
23pub use opencl3::device::CL_DEVICE_TYPE_CPU;
24pub use opencl3::device::CL_DEVICE_TYPE_CUSTOM;
25pub use opencl3::device::CL_DEVICE_TYPE_DEFAULT;
26pub use opencl3::device::CL_DEVICE_TYPE_GPU;
27pub use opencl3::device::cl_device_id;
28pub use opencl3::error_codes::ClError;
29pub use opencl3::platform::Platform;
30pub use opencl3::platform::get_platforms;
31
32use cl3::info_type::InfoType;
33use opencl3::command_queue::CommandQueue;
34use opencl3::device::CL_DEVICE_MAX_WORK_GROUP_SIZE;
35use opencl3::device::get_device_info;
36use opencl3::event::Event;
37use opencl3::kernel::ExecuteKernel;
38use opencl3::kernel::Kernel;
39use opencl3::memory::Buffer;
40use opencl3::memory::ClMem;
41use opencl3::memory::cl_mem;
42use opencl3::memory::CL_MEM_READ_WRITE;
43use opencl3::program::Program;
44use opencl3::types::CL_TRUE;
45
46const SOURCE: &'static str = include_str!("opencl.cl");
47
48/// A structure of OpenCL backend array.
49///
50/// This structure contains the reference to an OpenCL buffer.
51#[derive(Debug)]
52pub struct ClBackendArray
53{
54    buffer: Arc<Mutex<Buffer<f32>>>,
55    len: usize,
56}
57
58struct ClInnerBackend
59{
60    context: Context,
61    command_queue: CommandQueue,
62    program: Program,
63    group_size_for_1d: usize,
64    group_size_for_2d: usize,
65}
66
67/// A structure of OpenCL backend.
68pub struct ClBackend
69{
70    inner: Mutex<ClInnerBackend>,
71}
72
73fn preferred_work_sizes(n: usize, m: usize, group_size_for_1d: usize, group_size_for_2d: usize, is_mul: bool) -> (usize, usize, usize, usize)
74{
75    if m == 1 && !is_mul {
76        let n2 = ((n + group_size_for_1d - 1) / group_size_for_1d) * group_size_for_1d;
77        (group_size_for_1d, 1, n2, 1)
78    } else if n == 1 && !is_mul {
79        let m2 = ((m + group_size_for_1d - 1) / group_size_for_1d) * group_size_for_1d;
80        (1, group_size_for_1d, 1, m2)
81    } else if is_mul {
82        let n2 = (((n + 3) / 4 + ((group_size_for_2d + 1) / 2) - 1) / ((group_size_for_2d + 1) / 2)) * ((group_size_for_2d + 1) / 2);
83        let m2 = (((m + 3) / 4 + ((group_size_for_2d + 1) / 2) - 1) / ((group_size_for_2d + 1) / 2)) * ((group_size_for_2d + 1) / 2);
84        ((group_size_for_2d + 1) / 2, (group_size_for_2d + 1) / 2, n2, m2)
85    } else {
86        let n2 = ((n + group_size_for_2d - 1) / group_size_for_2d) * group_size_for_2d;
87        let m2 = ((m + group_size_for_2d - 1) / group_size_for_2d) * group_size_for_2d;
88        (group_size_for_2d, group_size_for_2d, n2, m2)
89    }
90}
91
92impl ClBackend
93{
94    /// Creates an OpenCL backend for a first OpenCL platform and a first OpenCL device.
95    pub fn new() -> Result<ClBackend>
96    {
97        let platforms = match get_platforms() {
98            Ok(tmp_platforms) => tmp_platforms,
99            Err(err) => return Err(Error::OpenCl(err)),
100        };
101        if platforms.is_empty() {
102            return Err(Error::NoPlatform);
103        }
104        let device_ids = match platforms[0].get_devices(CL_DEVICE_TYPE_DEFAULT) {
105            Ok(tmp_device_ids) => tmp_device_ids,
106            Err(err) => return Err(Error::OpenCl(err)),
107        };
108        if device_ids.is_empty() {
109            return Err(Error::NoDevice);
110        }
111        let device = Device::new(device_ids[0]);
112        let context = match Context::from_device(&device) {
113            Ok(tmp_context) => tmp_context,
114            Err(err) => return Err(Error::OpenCl(err)),
115        };
116        Self::new_with_context(context)
117    }
118    
119    /// Creates an OpenCL backend with the context.
120    pub fn new_with_context(context: Context) -> Result<ClBackend>
121    {
122        let command_queue = match CommandQueue::create_default_with_properties(&context, 0, 0) {
123            Ok(tmp_command_queue) => tmp_command_queue,
124            Err(err) => return Err(Error::OpenCl(err)),
125        };
126        let program = match Program::create_and_build_from_source(&context, SOURCE, "") {
127            Ok(tmp_program) => tmp_program,
128            Err(msg) => return Err(Error::Compilation(msg)),
129        };
130        let group_size_for_1d = match get_device_info(context.default_device(), CL_DEVICE_MAX_WORK_GROUP_SIZE) {
131            Ok(InfoType::Size(tmp_group_size_for_1d)) => tmp_group_size_for_1d,
132            _ => return Err(Error::InvalidDeviceInfoType),
133        };
134        let group_size_for_2d = (group_size_for_1d as f64).sqrt().floor() as usize;
135        let inner = ClInnerBackend {
136            context,
137            command_queue,
138            program,
139            group_size_for_1d,
140            group_size_for_2d,
141        };
142        Ok(ClBackend { inner: Mutex::new(inner), })
143    }
144    
145    fn check_and_enqueue_nd_range2<F, G>(&self, kernel_name: &str, a: &BackendArray, b: &BackendArray, f: F, g: G) -> Result<()>
146        where F: FnOnce(&ClBackendArray, &ClBackendArray) -> Result<()>,
147            G: FnOnce(&ClInnerBackend, &Kernel, cl_mem, cl_mem) -> Result<Event>
148    {
149        #[allow(unreachable_patterns)]
150        match (a, b) {
151            (BackendArray::OpenCl(a2), BackendArray::OpenCl(b2)) => {
152                f(a2, b2)?;
153                let inner_g = mutex_lock(&self.inner)?;
154                let kernel = match Kernel::create(&inner_g.program, kernel_name) {
155                    Ok(tmp_kernel) => tmp_kernel,
156                    Err(err) => return Err(Error::OpenCl(err)),
157                };
158                let event = if !Arc::ptr_eq(&a2.buffer, &b2.buffer) {
159                    let a_buffer_g = mutex_lock(&a2.buffer)?;
160                    let mut b_buffer_g = mutex_lock(&b2.buffer)?;
161                    g(&*inner_g, &kernel, a_buffer_g.get(), b_buffer_g.get_mut())?
162                } else {
163                    let mut a_buffer_g = mutex_lock(&a2.buffer)?;
164                    g(&*inner_g, &kernel, a_buffer_g.get(), a_buffer_g.get_mut())?
165                };
166                match event.wait() {
167                    Ok(()) => (),
168                    Err(err) => return Err(Error::OpenCl(err)),
169                }
170                Ok(())
171            },
172            _ => Err(Error::InvalidBackendArray),
173        }
174    }
175
176    fn check_and_enqueue_nd_range3<F, G>(&self, kernel_name: &str, a: &BackendArray, b: &BackendArray, c: &BackendArray, f: F, g: G) -> Result<()>
177        where F: FnOnce(&ClBackendArray, &ClBackendArray, &ClBackendArray) -> Result<()>,
178            G: FnOnce(&ClInnerBackend, &Kernel, cl_mem, cl_mem, cl_mem) -> Result<Event>
179    {
180        #[allow(unreachable_patterns)]
181        match (a, b, c) {
182            (BackendArray::OpenCl(a2), BackendArray::OpenCl(b2), BackendArray::OpenCl(c2)) => {
183                f(a2, b2, c2)?;
184                let inner_g = mutex_lock(&self.inner)?;
185                let kernel = match Kernel::create(&inner_g.program, kernel_name) {
186                    Ok(tmp_kernel) => tmp_kernel,
187                    Err(err) => return Err(Error::OpenCl(err)),
188                };
189                let event = match (Arc::ptr_eq(&a2.buffer, &b2.buffer), Arc::ptr_eq(&a2.buffer, &c2.buffer), Arc::ptr_eq(&b2.buffer, &c2.buffer)) {
190                    (false, false, false) => {
191                        let a_buffer_g = mutex_lock(&a2.buffer)?;
192                        let b_buffer_g = mutex_lock(&b2.buffer)?;
193                        let mut c_buffer_g = mutex_lock(&c2.buffer)?;
194                        g(&*inner_g, &kernel, a_buffer_g.get(), b_buffer_g.get(), c_buffer_g.get_mut())?
195                    },
196                    (true, false, false) => {
197                        let a_buffer_g = mutex_lock(&a2.buffer)?;
198                        let mut c_buffer_g = mutex_lock(&c2.buffer)?;
199                        g(&*inner_g, &kernel, a_buffer_g.get(), a_buffer_g.get(), c_buffer_g.get_mut())?
200                    },
201                    (false, true, false) => {
202                        let mut a_buffer_g = mutex_lock(&a2.buffer)?;
203                        let b_buffer_g = mutex_lock(&b2.buffer)?;
204                        g(&*inner_g, &kernel, a_buffer_g.get(), b_buffer_g.get(), a_buffer_g.get_mut())?
205                    },
206                    (false, false, true) => {
207                        let a_buffer_g = mutex_lock(&a2.buffer)?;
208                        let mut b_buffer_g = mutex_lock(&b2.buffer)?;
209                        g(&*inner_g, &kernel, a_buffer_g.get(), b_buffer_g.get(), b_buffer_g.get_mut())?
210                    },
211                    _ => {
212                        let mut a_buffer_g = mutex_lock(&a2.buffer)?;
213                        g(&*inner_g, &kernel, a_buffer_g.get(), a_buffer_g.get(), a_buffer_g.get_mut())?
214                    },
215                };
216                match event.wait() {
217                    Ok(()) => (),
218                    Err(err) => return Err(Error::OpenCl(err)),
219                }
220                Ok(())
221            },
222            _ => Err(Error::InvalidBackendArray),
223        }
224    }
225    
226    fn check_and_enqueue_nd_range_for_fun(&self, kernel_name: &str, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
227    {
228        self.check_and_enqueue_nd_range2(kernel_name, a, b, |a2, b2| {
229                if a2.len != n * m {
230                    return Err(Error::BackendArrayElemCount(a2.len, n * m));
231                }
232                if b2.len != n * m {
233                    return Err(Error::BackendArrayElemCount(b2.len, n * m));
234                }
235                Ok(())
236        }, |inner, kernel, a_mem, b_mem| {
237                let n2 = n as u64;
238                let m2 = m as u64;
239                let (n3, m3, n4, m4) = preferred_work_sizes(n, m, inner.group_size_for_1d, inner.group_size_for_2d, false);
240                unsafe {
241                    let res = ExecuteKernel::new(kernel)
242                    .set_arg(&a_mem)
243                    .set_arg(&b_mem)
244                    .set_arg(&n2)
245                    .set_arg(&m2)
246                    .set_local_work_sizes(&[n3, m3])
247                    .set_global_work_sizes(&[n4, m4])
248                    .enqueue_nd_range(&inner.command_queue);
249                    match res {
250                        Ok(event) => Ok(event),
251                        Err(err) => Err(Error::OpenCl(err)),
252                    }
253                }
254        })
255    }
256
257    fn check_and_enqueue_nd_range_for_op(&self, kernel_name: &str, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
258    {
259        self.check_and_enqueue_nd_range3(kernel_name, a, b, c, |a2, b2, c2| {
260                if a2.len != n * m {
261                    return Err(Error::BackendArrayElemCount(a2.len, n * m));
262                }
263                if b2.len != n * m {
264                    return Err(Error::BackendArrayElemCount(b2.len, n * m));
265                }
266                if c2.len != n * m {
267                    return Err(Error::BackendArrayElemCount(c2.len, n * m));
268                }
269                Ok(())
270        }, |inner, kernel, a_mem, b_mem, c_mem| {
271                let n2 = n as u64;
272                let m2 = m as u64;
273                let (n3, m3, n4, m4) = preferred_work_sizes(n, m, inner.group_size_for_1d, inner.group_size_for_2d, false);
274                unsafe {
275                    let res = ExecuteKernel::new(kernel)
276                    .set_arg(&a_mem)
277                    .set_arg(&b_mem)
278                    .set_arg(&c_mem)
279                    .set_arg(&n2)
280                    .set_arg(&m2)
281                    .set_local_work_sizes(&[n3, m3])
282                    .set_global_work_sizes(&[n4, m4])
283                    .enqueue_nd_range(&inner.command_queue);
284                    match res {
285                        Ok(event) => Ok(event),
286                        Err(err) => Err(Error::OpenCl(err)),
287                    }
288                }
289        })
290    }
291
292    fn check_and_enqueue_nd_range_for_mul(&self, kernel_name: &str, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize, l: usize) -> Result<()>
293    {
294        self.check_and_enqueue_nd_range3(kernel_name, a, b, c, |a2, b2, c2| {
295                if a2.len != n * l {
296                    return Err(Error::BackendArrayElemCount(a2.len, n * l));
297                }
298                if b2.len != l * m {
299                    return Err(Error::BackendArrayElemCount(b2.len, l * m));
300                }
301                if c2.len != n * m {
302                    return Err(Error::BackendArrayElemCount(c2.len, n * m));
303                }
304                Ok(())
305        }, |inner, kernel, a_mem, b_mem, c_mem| {
306                let n2 = n as u64;
307                let m2 = m as u64;
308                let l2 = l as u64;
309                let (n3, m3, n4, m4) = preferred_work_sizes(n, m, inner.group_size_for_1d, inner.group_size_for_2d, true);
310                unsafe {
311                    let res = ExecuteKernel::new(kernel)
312                    .set_arg(&a_mem)
313                    .set_arg(&b_mem)
314                    .set_arg(&c_mem)
315                    .set_arg_local_buffer(n3 * m3 * 4 * size_of::<f32>())
316                    .set_arg_local_buffer(n3 * m3 * 4 * size_of::<f32>())
317                    .set_arg(&n2)
318                    .set_arg(&m2)
319                    .set_arg(&l2)
320                    .set_local_work_sizes(&[n3, m3])
321                    .set_global_work_sizes(&[n4, m4])
322                    .enqueue_nd_range(&inner.command_queue);
323                    match res {
324                        Ok(event) => Ok(event),
325                        Err(err) => Err(Error::OpenCl(err)),
326                    }
327                }
328        })
329    }
330
331    fn check_and_enqueue_nd_range_for_scalar(&self, kernel_name: &str, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
332    {
333        self.check_and_enqueue_nd_range2(kernel_name, a, c, |a2, c2| {
334                if a2.len != n * m  {
335                    return Err(Error::BackendArrayElemCount(a2.len, n * m));
336                }
337                if c2.len != n * m {
338                    return Err(Error::BackendArrayElemCount(c2.len, n * m));
339                }
340                Ok(())
341        }, |inner, kernel, a_mem, c_mem| {
342                let n2 = n as u64;
343                let m2 = m as u64;
344                let (n3, m3, n4, m4) = preferred_work_sizes(n, m, inner.group_size_for_1d, inner.group_size_for_2d, false);
345                unsafe {
346                    let res = ExecuteKernel::new(kernel)
347                    .set_arg(&a_mem)
348                    .set_arg(&b)
349                    .set_arg(&c_mem)
350                    .set_arg(&n2)
351                    .set_arg(&m2)
352                    .set_local_work_sizes(&[n3, m3])
353                    .set_global_work_sizes(&[n4, m4])
354                    .enqueue_nd_range(&inner.command_queue);
355                    match res {
356                        Ok(event) => Ok(event),
357                        Err(err) => Err(Error::OpenCl(err)),
358                    }
359                }
360        })
361    }
362
363    fn check_and_enqueue_nd_range_for_fun_and_tiles(&self, kernel_name: &str, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
364    {
365        self.check_and_enqueue_nd_range2(kernel_name, a, b, |a2, b2| {
366                if a2.len != n * m {
367                    return Err(Error::BackendArrayElemCount(a2.len, n * m));
368                }
369                if b2.len != n * m {
370                    return Err(Error::BackendArrayElemCount(b2.len, n * m));
371                }
372                Ok(())
373        }, |inner, kernel, a_mem, b_mem| {
374                let n2 = n as u64;
375                let m2 = m as u64;
376                let (n3, m3, n4, m4) = preferred_work_sizes(n, m, inner.group_size_for_1d, inner.group_size_for_2d, false);
377                unsafe {
378                    let res = ExecuteKernel::new(kernel)
379                    .set_arg(&a_mem)
380                    .set_arg(&b_mem)
381                    .set_arg_local_buffer(n3 * m3 *size_of::<f32>())
382                    .set_arg(&n2)
383                    .set_arg(&m2)
384                    .set_local_work_sizes(&[n3, m3])
385                    .set_global_work_sizes(&[n4, m4])
386                    .enqueue_nd_range(&inner.command_queue);
387                    match res {
388                        Ok(event) => Ok(event),
389                        Err(err) => Err(Error::OpenCl(err)),
390                    }
391                }
392        })
393    }
394
395    fn check_and_enqueue_nd_range_for_repeat_col(&self, kernel_name: &str, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
396    {
397        self.check_and_enqueue_nd_range2(kernel_name, a, b, |a2, b2| {
398                if a2.len != n {
399                    return Err(Error::BackendArrayElemCount(a2.len, n));
400                }
401                if b2.len != n * m {
402                    return Err(Error::BackendArrayElemCount(b2.len, n * m));
403                }
404                Ok(())
405        }, |inner, kernel, a_mem, b_mem| {
406                let n2 = n as u64;
407                let m2 = m as u64;
408                let (n3, m3, n4, m4) = preferred_work_sizes(n, m, inner.group_size_for_1d, inner.group_size_for_2d, false);
409                unsafe {
410                    let res = ExecuteKernel::new(kernel)
411                    .set_arg(&a_mem)
412                    .set_arg(&b_mem)
413                    .set_arg(&n2)
414                    .set_arg(&m2)
415                    .set_local_work_sizes(&[n3, m3])
416                    .set_global_work_sizes(&[n4, m4])
417                    .enqueue_nd_range(&inner.command_queue);
418                    match res {
419                        Ok(event) => Ok(event),
420                        Err(err) => Err(Error::OpenCl(err)),
421                    }
422                }
423        })
424    }
425
426    fn check_and_enqueue_nd_range_for_repeat_row(&self, kernel_name: &str, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
427    {
428        self.check_and_enqueue_nd_range2(kernel_name, a, b, |a2, b2| {
429                if a2.len != m {
430                    return Err(Error::BackendArrayElemCount(a2.len, m));
431                }
432                if b2.len != n * m {
433                    return Err(Error::BackendArrayElemCount(b2.len, n * m));
434                }
435                Ok(())
436        }, |inner, kernel, a_mem, b_mem| {
437                let n2 = n as u64;
438                let m2 = m as u64;
439                let (n3, m3, n4, m4) = preferred_work_sizes(n, m, inner.group_size_for_1d, inner.group_size_for_2d, false);
440                unsafe {
441                    let res = ExecuteKernel::new(kernel)
442                    .set_arg(&a_mem)
443                    .set_arg(&b_mem)
444                    .set_arg(&n2)
445                    .set_arg(&m2)
446                    .set_local_work_sizes(&[n3, m3])
447                    .set_global_work_sizes(&[n4, m4])
448                    .enqueue_nd_range(&inner.command_queue);
449                    match res {
450                        Ok(event) => Ok(event),
451                        Err(err) => Err(Error::OpenCl(err)),
452                    }
453                }
454        })
455    }
456}
457
458impl Backend for ClBackend
459{
460    fn name(&self) -> &'static str
461    { "OpenCL" }
462    
463    fn has_cublas(&self) -> bool
464    { false }
465    
466    unsafe fn alloc(&self, n: usize) -> Result<BackendArray>
467    {
468        let inner_g = mutex_lock(&self.inner)?;
469        let buffer: Buffer<f32> = match Buffer::create(&inner_g.context, CL_MEM_READ_WRITE, n, null_mut()) {
470            Ok(tmp_buffer) => tmp_buffer,
471            Err(err) => return Err(Error::OpenCl(err)),
472        };
473        let cl_array = ClBackendArray { buffer: Arc::new(Mutex::new(buffer)), len: n, };
474        Ok(BackendArray::OpenCl(cl_array))
475    }
476
477    fn alloc_and_store_zeros(&self, n: usize) -> Result<BackendArray>
478    {
479        let inner_g = mutex_lock(&self.inner)?;
480        let mut buffer: Buffer<f32> = match unsafe { Buffer::create(&inner_g.context, CL_MEM_READ_WRITE, n, null_mut()) } {
481            Ok(tmp_buffer) => tmp_buffer,
482            Err(err) => return Err(Error::OpenCl(err)),
483        };
484        let event = match unsafe { inner_g.command_queue.enqueue_fill_buffer(&mut buffer, &[0.0f32], 0, n * size_of::<f32>(), &[]) } {
485            Ok(tmp_event) => tmp_event,
486            Err(err) => return Err(Error::OpenCl(err)),
487        };
488        match event.wait() {
489            Ok(()) => (),
490            Err(err) => return Err(Error::OpenCl(err)),
491        }
492        let cl_array = ClBackendArray { buffer: Arc::new(Mutex::new(buffer)), len: n, };
493        Ok(BackendArray::OpenCl(cl_array))
494    }
495    
496    fn alloc_and_store(&self, elems: &[f32]) -> Result<BackendArray>
497    {
498        let inner_g = mutex_lock(&self.inner)?;
499        let mut buffer: Buffer<f32> = match unsafe { Buffer::create(&inner_g.context, CL_MEM_READ_WRITE, elems.len(), null_mut()) } {
500            Ok(tmp_buffer) => tmp_buffer,
501            Err(err) => return Err(Error::OpenCl(err)),
502        };
503        let event = match unsafe { inner_g.command_queue.enqueue_write_buffer(&mut buffer, CL_TRUE, 0, elems, &[]) } {
504            Ok(tmp_event) => tmp_event,
505            Err(err) => return Err(Error::OpenCl(err)),
506        };
507        match event.wait() {
508            Ok(()) => (),
509            Err(err) => return Err(Error::OpenCl(err)),
510        }
511        let cl_array = ClBackendArray { buffer: Arc::new(Mutex::new(buffer)), len: elems.len(), };
512        Ok(BackendArray::OpenCl(cl_array))
513    }
514    
515    fn load(&self, a: &BackendArray, elems: &mut [f32]) -> Result<()>
516    {
517        #[allow(unreachable_patterns)]
518        match a {
519            BackendArray::OpenCl(a2) => {
520                if a2.len != elems.len() {
521                    return Err(Error::BackendArrayElemCount(a2.len, elems.len()));
522                }
523                let inner_g = mutex_lock(&self.inner)?;
524                let a_buffer_g = mutex_lock(&a2.buffer)?;
525                let event = match unsafe { inner_g.command_queue.enqueue_read_buffer(&*a_buffer_g, CL_TRUE, 0, elems, &[]) } {
526                    Ok(tmp_event) => tmp_event,
527                    Err(err) => return Err(Error::OpenCl(err)),
528                };
529                match event.wait() {
530                    Ok(()) => (),
531                    Err(err) => return Err(Error::OpenCl(err)),
532                }
533            },
534            _ => return Err(Error::InvalidBackendArray),
535        }
536        Ok(())
537    }
538
539    fn store(&self, a: &BackendArray, elems: &[f32]) -> Result<()>
540    {
541        #[allow(unreachable_patterns)]
542        match a {
543            BackendArray::OpenCl(a2) => {
544                if a2.len != elems.len() {
545                    return Err(Error::BackendArrayElemCount(a2.len, elems.len()));
546                }
547                let inner_g = mutex_lock(&self.inner)?;
548                let mut a_buffer_g = mutex_lock(&a2.buffer)?;
549                let event = match unsafe { inner_g.command_queue.enqueue_write_buffer(&mut *a_buffer_g, CL_TRUE, 0, elems, &[]) } {
550                    Ok(tmp_event) => tmp_event,
551                    Err(err) => return Err(Error::OpenCl(err)),
552                };
553                match event.wait() {
554                    Ok(()) => (),
555                    Err(err) => return Err(Error::OpenCl(err)),
556                }
557            },
558            _ => return Err(Error::InvalidBackendArray),
559        }
560        Ok(())
561    }
562    
563    fn copy(&self, a: &BackendArray, b: &BackendArray) -> Result<()>
564    {
565        #[allow(unreachable_patterns)]
566        match (a, b) {
567            (BackendArray::OpenCl(a2), BackendArray::OpenCl(b2)) => {
568                if Arc::ptr_eq(&a2.buffer, &b2.buffer) {
569                    return Ok(());
570                }
571                if a2.len != b2.len {
572                    return Err(Error::TwoBackendArrayElemCounts(a2.len, b2.len));
573                }
574                let inner_g = mutex_lock(&self.inner)?;
575                let a_buffer_g = mutex_lock(&a2.buffer)?;
576                let mut b_buffer_g = mutex_lock(&b2.buffer)?;
577                let event = match unsafe { inner_g.command_queue.enqueue_copy_buffer(&*a_buffer_g, &mut *b_buffer_g, 0, 0, a2.len * size_of::<f32>(), &[]) } {
578                    Ok(tmp_event) => tmp_event,
579                    Err(err) => return Err(Error::OpenCl(err)),
580                };
581                match event.wait() {
582                    Ok(()) => (),
583                    Err(err) => return Err(Error::OpenCl(err)),
584                }
585            },
586            _ => return Err(Error::InvalidBackendArray),
587        }
588        Ok(())
589    }
590
591    fn transpose_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
592    { self.check_and_enqueue_nd_range_for_fun("transpose_a", a, b, n, m) }
593
594    fn add_a_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
595    { self.check_and_enqueue_nd_range_for_op("add_a_b", a, b, c, n, m) }
596
597    fn add_at_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
598    { self.check_and_enqueue_nd_range_for_op("add_at_b", a, b, c, n, m) }
599    
600    fn add_a_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
601    { self.check_and_enqueue_nd_range_for_op("add_a_bt", a, b, c, n, m) }
602
603    fn add_at_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
604    { self.check_and_enqueue_nd_range_for_op("add_at_bt", a, b, c, n, m) }
605
606    fn sub_a_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
607    { self.check_and_enqueue_nd_range_for_op("sub_a_b", a, b, c, n, m) }
608
609    fn sub_at_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
610    { self.check_and_enqueue_nd_range_for_op("sub_at_b", a, b, c, n, m) }
611    
612    fn sub_a_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
613    { self.check_and_enqueue_nd_range_for_op("sub_a_bt", a, b, c, n, m) }
614
615    fn sub_at_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>    
616    { self.check_and_enqueue_nd_range_for_op("sub_at_bt", a, b, c, n, m) }
617    
618    fn mul_a_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize, l: usize) -> Result<()>
619    { self.check_and_enqueue_nd_range_for_mul("mul_a_b", a, b, c, n, m, l) }
620
621    fn mul_at_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize, l: usize) -> Result<()>
622    { self.check_and_enqueue_nd_range_for_mul("mul_at_b", a, b, c, n, m, l) }
623
624    fn mul_a_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize, l: usize) -> Result<()>
625    { self.check_and_enqueue_nd_range_for_mul("mul_a_bt", a, b, c, n, m, l) }
626
627    fn mul_at_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize, l: usize) -> Result<()>
628    { self.check_and_enqueue_nd_range_for_mul("mul_at_bt", a, b, c, n, m, l) }
629
630    fn mul_a_b_for_elems(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
631    { self.check_and_enqueue_nd_range_for_op("mul_a_b_for_elems", a, b, c, n, m) }
632
633    fn mul_at_b_for_elems(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
634    { self.check_and_enqueue_nd_range_for_op("mul_at_b_for_elems", a, b, c, n, m) }
635    
636    fn mul_a_bt_for_elems(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
637    { self.check_and_enqueue_nd_range_for_op("mul_a_bt_for_elems", a, b, c, n, m) }
638    
639    fn mul_at_bt_for_elems(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
640    { self.check_and_enqueue_nd_range_for_op("mul_at_bt_for_elems", a, b, c, n, m) }
641
642    fn div_a_b_for_elems(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
643    { self.check_and_enqueue_nd_range_for_op("div_a_b_for_elems", a, b, c, n, m) }
644
645    fn div_at_b_for_elems(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
646    { self.check_and_enqueue_nd_range_for_op("div_at_b_for_elems", a, b, c, n, m) }
647    
648    fn div_a_bt_for_elems(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
649    { self.check_and_enqueue_nd_range_for_op("div_a_bt_for_elems", a, b, c, n, m) }
650    
651    fn div_at_bt_for_elems(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
652    { self.check_and_enqueue_nd_range_for_op("div_at_bt_for_elems", a, b, c, n, m) }
653
654    fn add_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
655    { self.check_and_enqueue_nd_range_for_scalar("add_a_b_for_scalar", a, b, c, n, m) }
656
657    fn add_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
658    { self.check_and_enqueue_nd_range_for_scalar("add_at_b_for_scalar", a, b, c, n, m) }
659
660    fn sub_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
661    { self.check_and_enqueue_nd_range_for_scalar("sub_a_b_for_scalar", a, b, c, n, m) }
662
663    fn sub_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
664    { self.check_and_enqueue_nd_range_for_scalar("sub_at_b_for_scalar", a, b, c, n, m) }
665
666    fn rsub_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
667    { self.check_and_enqueue_nd_range_for_scalar("rsub_a_b_for_scalar", a, b, c, n, m) }
668
669    fn rsub_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
670    { self.check_and_enqueue_nd_range_for_scalar("rsub_at_b_for_scalar", a, b, c, n, m) }
671    
672    fn mul_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
673    { self.check_and_enqueue_nd_range_for_scalar("mul_a_b_for_scalar", a, b, c, n, m) }
674
675    fn mul_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
676    { self.check_and_enqueue_nd_range_for_scalar("mul_at_b_for_scalar", a, b, c, n, m) }
677
678    fn div_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
679    { self.check_and_enqueue_nd_range_for_scalar("div_a_b_for_scalar", a, b, c, n, m) }
680
681    fn div_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
682    { self.check_and_enqueue_nd_range_for_scalar("div_at_b_for_scalar", a, b, c, n, m) }
683
684    fn rdiv_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
685    { self.check_and_enqueue_nd_range_for_scalar("rdiv_a_b_for_scalar", a, b, c, n, m) }
686
687    fn rdiv_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
688    { self.check_and_enqueue_nd_range_for_scalar("rdiv_at_b_for_scalar", a, b, c, n, m) }
689
690    fn sigmoid_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
691    { self.check_and_enqueue_nd_range_for_fun("sigmoid_a", a, b, n, m) }
692
693    fn sigmoid_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
694    { self.check_and_enqueue_nd_range_for_fun("sigmoid_at", a, b, n, m) }
695
696    fn tanh_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
697    { self.check_and_enqueue_nd_range_for_fun("tanh_a", a, b, n, m) }
698
699    fn tanh_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
700    { self.check_and_enqueue_nd_range_for_fun("tanh_at", a, b, n, m) }
701
702    fn swish_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
703    { self.check_and_enqueue_nd_range_for_fun("swish_a", a, b, n, m) }
704
705    fn swish_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
706    { self.check_and_enqueue_nd_range_for_fun("swish_at", a, b, n, m) }
707
708    fn softmax_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
709    { self.check_and_enqueue_nd_range_for_fun_and_tiles("softmax_a", a, b, n, m) }
710
711    fn softmax_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
712    { self.check_and_enqueue_nd_range_for_fun_and_tiles("softmax_at", a, b, n, m) }
713
714    fn sqrt_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
715    { self.check_and_enqueue_nd_range_for_fun("sqrt_a", a, b, n, m) }
716
717    fn sqrt_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
718    { self.check_and_enqueue_nd_range_for_fun("sqrt_at", a, b, n, m) }
719    
720    fn repeat_col_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
721    { self.check_and_enqueue_nd_range_for_repeat_col("repeat_col_a", a, b, n, m) }
722
723    fn repeat_row_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
724    { self.check_and_enqueue_nd_range_for_repeat_row("repeat_row_a", a, b, n, m) }
725
726    fn abs_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
727    { self.check_and_enqueue_nd_range_for_fun("abs_a", a, b, n, m) }
728
729    fn abs_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
730    { self.check_and_enqueue_nd_range_for_fun("abs_at", a, b, n, m) }
731
732    fn pow_a_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
733    { self.check_and_enqueue_nd_range_for_op("pow_a_b", a, b, c, n, m) }
734
735    fn pow_at_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
736    { self.check_and_enqueue_nd_range_for_op("pow_at_b", a, b, c, n, m) }
737    
738    fn pow_a_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
739    { self.check_and_enqueue_nd_range_for_op("pow_a_bt", a, b, c, n, m) }
740    
741    fn pow_at_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
742    { self.check_and_enqueue_nd_range_for_op("pow_at_bt", a, b, c, n, m) }
743
744    fn pow_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
745    { self.check_and_enqueue_nd_range_for_scalar("pow_a_b_for_scalar", a, b, c, n, m) }
746
747    fn pow_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
748    { self.check_and_enqueue_nd_range_for_scalar("pow_at_b_for_scalar", a, b, c, n, m) }
749
750    fn rpow_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
751    { self.check_and_enqueue_nd_range_for_scalar("rpow_a_b_for_scalar", a, b, c, n, m) }
752
753    fn rpow_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
754    { self.check_and_enqueue_nd_range_for_scalar("rpow_at_b_for_scalar", a, b, c, n, m) }
755
756    fn exp_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
757    { self.check_and_enqueue_nd_range_for_fun("exp_a", a, b, n, m) }
758
759    fn exp_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
760    { self.check_and_enqueue_nd_range_for_fun("exp_at", a, b, n, m) }
761
762    fn ln_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
763    { self.check_and_enqueue_nd_range_for_fun("ln_a", a, b, n, m) }
764
765    fn ln_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
766    { self.check_and_enqueue_nd_range_for_fun("ln_at", a, b, n, m) }
767
768    fn log2_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
769    { self.check_and_enqueue_nd_range_for_fun("log2_a", a, b, n, m) }
770
771    fn log2_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
772    { self.check_and_enqueue_nd_range_for_fun("log2_at", a, b, n, m) }
773
774    fn log10_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
775    { self.check_and_enqueue_nd_range_for_fun("log10_a", a, b, n, m) }
776
777    fn log10_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
778    { self.check_and_enqueue_nd_range_for_fun("log10_at", a, b, n, m) }
779
780    fn sin_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
781    { self.check_and_enqueue_nd_range_for_fun("sin_a", a, b, n, m) }
782
783    fn sin_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
784    { self.check_and_enqueue_nd_range_for_fun("sin_at", a, b, n, m) }
785
786    fn cos_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
787    { self.check_and_enqueue_nd_range_for_fun("cos_a", a, b, n, m) }
788
789    fn cos_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
790    { self.check_and_enqueue_nd_range_for_fun("cos_at", a, b, n, m) }
791
792    fn tan_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
793    { self.check_and_enqueue_nd_range_for_fun("tan_a", a, b, n, m) }
794
795    fn tan_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
796    { self.check_and_enqueue_nd_range_for_fun("tan_at", a, b, n, m) }
797
798    fn asin_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
799    { self.check_and_enqueue_nd_range_for_fun("asin_a", a, b, n, m) }
800
801    fn asin_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
802    { self.check_and_enqueue_nd_range_for_fun("asin_at", a, b, n, m) }
803
804    fn acos_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
805    { self.check_and_enqueue_nd_range_for_fun("acos_a", a, b, n, m) }
806
807    fn acos_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
808    { self.check_and_enqueue_nd_range_for_fun("acos_at", a, b, n, m) }
809
810    fn atan_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
811    { self.check_and_enqueue_nd_range_for_fun("atan_a", a, b, n, m) }
812
813    fn atan_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
814    { self.check_and_enqueue_nd_range_for_fun("atan_at", a, b, n, m) }
815
816    fn atan2_a_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
817    { self.check_and_enqueue_nd_range_for_op("atan2_a_b", a, b, c, n, m) }
818
819    fn atan2_at_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
820    { self.check_and_enqueue_nd_range_for_op("atan2_at_b", a, b, c, n, m) }
821    
822    fn atan2_a_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
823    { self.check_and_enqueue_nd_range_for_op("atan2_a_bt", a, b, c, n, m) }
824    
825    fn atan2_at_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
826    { self.check_and_enqueue_nd_range_for_op("atan2_at_bt", a, b, c, n, m) }
827
828    fn atan2_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
829    { self.check_and_enqueue_nd_range_for_scalar("atan2_a_b_for_scalar", a, b, c, n, m) }
830
831    fn atan2_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
832    { self.check_and_enqueue_nd_range_for_scalar("atan2_at_b_for_scalar", a, b, c, n, m) }
833
834    fn ratan2_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
835    { self.check_and_enqueue_nd_range_for_scalar("ratan2_a_b_for_scalar", a, b, c, n, m) }
836
837    fn ratan2_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
838    { self.check_and_enqueue_nd_range_for_scalar("ratan2_at_b_for_scalar", a, b, c, n, m) }
839
840    fn sinh_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
841    { self.check_and_enqueue_nd_range_for_fun("sinh_a", a, b, n, m) }
842
843    fn sinh_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
844    { self.check_and_enqueue_nd_range_for_fun("sinh_at", a, b, n, m) }
845
846    fn cosh_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
847    { self.check_and_enqueue_nd_range_for_fun("cosh_a", a, b, n, m) }
848
849    fn cosh_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
850    { self.check_and_enqueue_nd_range_for_fun("cosh_at", a, b, n, m) }
851
852    fn asinh_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
853    { self.check_and_enqueue_nd_range_for_fun("asinh_a", a, b, n, m) }
854
855    fn asinh_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
856    { self.check_and_enqueue_nd_range_for_fun("asinh_at", a, b, n, m) }
857
858    fn acosh_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
859    { self.check_and_enqueue_nd_range_for_fun("acosh_a", a, b, n, m) }
860
861    fn acosh_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
862    { self.check_and_enqueue_nd_range_for_fun("acosh_at", a, b, n, m) }
863
864    fn atanh_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
865    { self.check_and_enqueue_nd_range_for_fun("atanh_a", a, b, n, m) }
866
867    fn atanh_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
868    { self.check_and_enqueue_nd_range_for_fun("atanh_at", a, b, n, m) }
869
870    fn signum_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
871    { self.check_and_enqueue_nd_range_for_fun("signum_a", a, b, n, m) }
872
873    fn signum_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
874    { self.check_and_enqueue_nd_range_for_fun("signum_at", a, b, n, m) }
875
876    fn ceil_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
877    { self.check_and_enqueue_nd_range_for_fun("ceil_a", a, b, n, m) }
878
879    fn ceil_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
880    { self.check_and_enqueue_nd_range_for_fun("ceil_at", a, b, n, m) }
881
882    fn floor_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
883    { self.check_and_enqueue_nd_range_for_fun("floor_a", a, b, n, m) }
884
885    fn floor_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
886    { self.check_and_enqueue_nd_range_for_fun("floor_at", a, b, n, m) }
887
888    fn round_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
889    { self.check_and_enqueue_nd_range_for_fun("round_a", a, b, n, m) }
890
891    fn round_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
892    { self.check_and_enqueue_nd_range_for_fun("round_at", a, b, n, m) }
893
894    fn trunc_a(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
895    { self.check_and_enqueue_nd_range_for_fun("trunc_a", a, b, n, m) }
896
897    fn trunc_at(&self, a: &BackendArray, b: &BackendArray, n: usize, m: usize) -> Result<()>
898    { self.check_and_enqueue_nd_range_for_fun("trunc_at", a, b, n, m) }
899
900    fn max_a_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
901    { self.check_and_enqueue_nd_range_for_op("max_a_b", a, b, c, n, m) }
902
903    fn max_at_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
904    { self.check_and_enqueue_nd_range_for_op("max_at_b", a, b, c, n, m) }
905    
906    fn max_a_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
907    { self.check_and_enqueue_nd_range_for_op("max_a_bt", a, b, c, n, m) }
908    
909    fn max_at_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
910    { self.check_and_enqueue_nd_range_for_op("max_at_bt", a, b, c, n, m) }
911
912    fn max_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
913    { self.check_and_enqueue_nd_range_for_scalar("max_a_b_for_scalar", a, b, c, n, m) }
914
915    fn max_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
916    { self.check_and_enqueue_nd_range_for_scalar("max_at_b_for_scalar", a, b, c, n, m) }
917
918    fn min_a_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
919    { self.check_and_enqueue_nd_range_for_op("min_a_b", a, b, c, n, m) }
920
921    fn min_at_b(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
922    { self.check_and_enqueue_nd_range_for_op("min_at_b", a, b, c, n, m) }
923    
924    fn min_a_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
925    { self.check_and_enqueue_nd_range_for_op("min_a_bt", a, b, c, n, m) }
926    
927    fn min_at_bt(&self, a: &BackendArray, b: &BackendArray, c: &BackendArray, n: usize, m: usize) -> Result<()>
928    { self.check_and_enqueue_nd_range_for_op("min_at_bt", a, b, c, n, m) }
929
930    fn min_a_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
931    { self.check_and_enqueue_nd_range_for_scalar("min_a_b_for_scalar", a, b, c, n, m) }
932
933    fn min_at_b_for_scalar(&self, a: &BackendArray, b: f32, c: &BackendArray, n: usize, m: usize) -> Result<()>
934    { self.check_and_enqueue_nd_range_for_scalar("min_at_b_for_scalar", a, b, c, n, m) }
935}
936
937#[cfg(test)]
938mod tests;