1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
use custos::{cache::Cache, number::Number, Buffer, CPU};
#[cfg(any(feature = "cuda", feature = "opencl"))]
use custos::CDatatype;
#[cfg(feature = "opencl")]
use crate::cl_tew_self;
#[cfg(feature = "opencl")]
use custos::CLDevice;
#[cfg(feature = "cuda")]
use crate::cu_ew_self;
use crate::{assign_to_lhs, element_wise_op_mut, Matrix};
pub trait AssignOps<T> {
fn add_assign(&self, lhs: &mut Buffer<T>, rhs: &Buffer<T>);
fn sub_assign(&self, lhs: &mut Buffer<T>, rhs: &Buffer<T>);
}
pub fn ew_op<'a, T: Copy + Default, F: Fn(T, T) -> T>(
device: &'a CPU,
lhs: &Matrix<T>,
rhs: &Matrix<T>,
f: F,
) -> Matrix<'a, T> {
let mut out = Cache::get(device, lhs.size(), [lhs.node.idx, rhs.node.idx]);
element_wise_op_mut(lhs, rhs, &mut out, f);
(out, lhs.dims()).into()
}
impl<T: Number> AssignOps<T> for CPU {
fn add_assign(&self, lhs: &mut Buffer<T>, rhs: &Buffer<T>) {
assign_to_lhs(lhs, rhs, |x, y| *x += y)
}
fn sub_assign(&self, lhs: &mut Buffer<T>, rhs: &Buffer<T>) {
assign_to_lhs(lhs, rhs, |x, y| *x -= y)
}
}
#[cfg(feature = "opencl")]
impl<T: CDatatype> AssignOps<T> for CLDevice {
fn add_assign(&self, lhs: &mut Buffer<T>, rhs: &Buffer<T>) {
cl_tew_self(self, lhs, rhs, "+").unwrap()
}
fn sub_assign(&self, lhs: &mut Buffer<T>, rhs: &Buffer<T>) {
cl_tew_self(self, lhs, rhs, "-").unwrap()
}
}
#[cfg(feature = "cuda")]
impl<T: CDatatype> AssignOps<T> for custos::CudaDevice {
fn add_assign(&self, lhs: &mut Buffer<T>, rhs: &Buffer<T>) {
cu_ew_self(self, lhs, rhs, "+").unwrap();
}
fn sub_assign(&self, lhs: &mut Buffer<T>, rhs: &Buffer<T>) {
cu_ew_self(self, lhs, rhs, "-").unwrap();
}
}