1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
use crate::Matrix;
use custos::{number::Number, CDatatype, Device, MainMemory, CPU};
#[cfg(feature = "cpu")]
use custos::cache::Cache;
#[cfg(feature = "cuda")]
use crate::{cu_to_cpu_s, cu_to_cpu_scalar};
#[cfg(feature = "cuda")]
use custos::CUDA;
#[cfg(feature = "opencl")]
use super::{cl_to_cpu_s, cl_to_cpu_scalar};
#[cfg(feature = "opencl")]
use custos::OpenCL;
impl<'a, T, D: MaxOps<T>> Matrix<'a, T, D> {
#[inline]
pub fn max(&self) -> T {
self.device().max(self)
}
#[inline]
pub fn max_rows(&self) -> Matrix<'a, T, D> {
self.device().max_rows(self)
}
#[inline]
pub fn max_cols(&self) -> Matrix<'a, T, D> {
self.device().max_cols(self)
}
}
pub trait MaxOps<T, D: Device = Self>: Device {
fn max(&self, x: &Matrix<T, D>) -> T;
fn max_rows(&self, x: &Matrix<T, D>) -> Matrix<T, Self>;
fn max_cols(&self, x: &Matrix<T, D>) -> Matrix<T, Self>;
}
#[cfg(feature = "cpu")]
impl<T: Copy + PartialOrd, D: MainMemory> MaxOps<T, D> for CPU {
fn max(&self, x: &Matrix<T, D>) -> T {
let mut max = x[0];
for value in x.iter() {
if *value > max {
max = *value;
}
}
max
}
fn max_rows(&self, x: &Matrix<T, D>) -> Matrix<T> {
let mut out = Cache::get(self, x.cols(), x.node.idx);
let data = x.as_slice();
let max_rows = out.as_mut_slice();
max_rows.copy_from_slice(&data[..max_rows.len()]);
for idx in 0..x.rows() {
let index = idx * x.cols();
let row = &data[index..index + x.cols()];
for (i, data) in row.iter().enumerate() {
if data > &max_rows[i] {
max_rows[i] = *data;
}
}
}
(out, 1, x.cols()).into()
}
fn max_cols(&self, x: &Matrix<T, D>) -> Matrix<T> {
let data = x.as_slice();
let mut y = Cache::get(self, x.rows(), x.node.idx);
let max_cols = y.as_mut_slice();
for (idx, max_cols_val) in max_cols.iter_mut().enumerate().take(x.rows()) {
let index = idx * x.cols();
let row = &data[index..index + x.cols()];
let mut max = row[0];
for data in row {
if data > &max {
max = *data;
}
}
*max_cols_val = max;
}
(y, x.rows(), 1).into()
}
}
#[cfg(feature = "opencl")]
impl<T: CDatatype> MaxOps<T> for OpenCL {
fn max(&self, x: &Matrix<T, Self>) -> T {
cl_to_cpu_scalar(self, x, |device, x| device.max(x))
}
fn max_rows(&self, x: &Matrix<T, Self>) -> Matrix<T, Self> {
cl_to_cpu_s(self, x, |device, x| device.max_rows(x))
}
fn max_cols(&self, x: &Matrix<T, Self>) -> Matrix<T, Self> {
cl_to_cpu_s(self, x, |device, x| device.max_cols(x))
}
}
#[cfg(feature = "cuda")]
impl<T: Number> MaxOps<T> for CUDA {
fn max(&self, x: &Matrix<T, CUDA>) -> T {
cu_to_cpu_scalar(x, |cpu, x| cpu.max(&x))
}
fn max_rows(&self, x: &Matrix<T, CUDA>) -> Matrix<T, CUDA> {
cu_to_cpu_s(self, x, |cpu, x| cpu.max_rows(&x))
}
fn max_cols(&self, x: &Matrix<T, CUDA>) -> Matrix<T, CUDA> {
cu_to_cpu_s(self, x, |cpu, x| cpu.max_cols(&x))
}
}