1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
use custos::{CLDevice, CPU, opencl::construct_buffer, VecRead};
use crate::Matrix;
pub fn cpu_exec<T, F>(device: &CLDevice, matrix: &Matrix<T>, f: F) -> custos::Result<Matrix<T>>
where
F: Fn(&CPU, Matrix<T>) -> Matrix<T>,
T: Copy+Default
{
let cpu = CPU::new();
if device.unified_mem() && !cfg!(feature="safe") {
let no_drop = f(&cpu, matrix.clone());
return construct_buffer(device, &cpu, no_drop.to_buf())
.map(|buf| (buf, no_drop.dims()).into());
}
let x = if device.unified_mem() {
matrix.clone()
} else {
Matrix::from((&cpu, matrix.dims(), device.read(matrix.as_buf())))
};
Ok(Matrix::from((device, f(&cpu, x))))
}
pub fn cpu_exec_lhs_rhs<T, F>(device: &CLDevice, lhs: &Matrix<T>, rhs: &Matrix<T>, f: F) -> custos::Result<Matrix<T>>
where
F: Fn(&CPU, &Matrix<T>, &Matrix<T>) -> Matrix<T>,
T: Copy+Default
{
let cpu = CPU::new();
if device.unified_mem() && !cfg!(feature="safe") {
let no_drop = f(&cpu, lhs, rhs);
let no_drop_dims = no_drop.dims();
return construct_buffer(device, &cpu, no_drop.to_buf()).map(|buf| (buf, no_drop_dims).into());
}
let (lhs, rhs) = if device.unified_mem() {
(lhs.clone(), rhs.clone())
} else {
(
Matrix::from((&cpu, lhs.dims(), device.read(lhs.as_buf()))),
Matrix::from((&cpu, rhs.dims(), device.read(rhs.as_buf())))
)
};
Ok(Matrix::from((device, f(&cpu, &lhs, &rhs))))
}
pub fn cpu_exec_scalar<T, F>(device: &CLDevice, matrix: &Matrix<T>, f: F) -> T
where
F: Fn(&CPU, Matrix<T>) -> T,
T: Copy + Default
{
let cpu = CPU::new();
let x = if device.unified_mem() {
matrix.clone()
} else {
Matrix::from((&cpu, matrix.dims(), device.read(matrix.as_buf())))
};
f(&cpu, x)
}