custos_math/ops/
sum.rs

1use crate::Matrix;
2use custos::{impl_stack, number::Number, CDatatype, Device, MainMemory, Shape, CPU};
3
4#[cfg(feature = "stack")]
5use custos::Stack;
6
7#[cfg(feature = "cpu")]
8use custos::Cache;
9
10#[cfg(feature = "opencl")]
11use super::{cl_to_cpu_s, cl_to_cpu_scalar};
12#[cfg(feature = "opencl")]
13use custos::OpenCL;
14
15#[cfg(feature = "cuda")]
16use crate::{cu_to_cpu_s, cu_to_cpu_scalar};
17#[cfg(feature = "cuda")]
18use custos::CUDA;
19
20impl<'a, T, IS: Shape, D: SumOps<T, IS>> Matrix<'a, T, D, IS> {
21    pub fn sum(&self) -> T {
22        self.device().sum(self)
23    }
24
25    pub fn mean(&self) -> T {
26        self.device().mean(self)
27    }
28}
29
30impl<'a, T, D: Device, IS: Shape> Matrix<'a, T, D, IS> {
31    pub fn sum_rows<OS: Shape>(&self) -> Matrix<'a, T, D, OS>
32    where
33        D: SumOverOps<T, IS, OS>,
34    {
35        self.device().sum_rows(self)
36    }
37
38    pub fn sum_cols<OS: Shape>(&self) -> Matrix<'a, T, D, OS>
39    where
40        D: SumOverOps<T, IS, OS>,
41    {
42        self.device().sum_cols(self)
43    }
44}
45
46pub trait SumOps<T, IS: Shape = (), D: Device = Self>: Device {
47    fn sum(&self, x: &Matrix<T, D, IS>) -> T;
48    fn mean(&self, x: &Matrix<T, D, IS>) -> T;
49}
50
51pub trait SumOverOps<T, IS: Shape = (), OS: Shape = (), D: Device = Self>: Device {
52    fn sum_rows(&self, x: &Matrix<T, D, IS>) -> Matrix<T, Self, OS>;
53    fn sum_cols(&self, x: &Matrix<T, D, IS>) -> Matrix<T, Self, OS>;
54}
55
56#[cfg(feature = "cpu")]
57#[impl_stack]
58impl<T: Number, D: MainMemory, IS: Shape> SumOps<T, IS, D> for CPU {
59    fn sum(&self, x: &Matrix<T, D, IS>) -> T {
60        x.iter().copied().sum()
61        /*let mut sum = T::default();
62        for value in x.as_slice() {
63            sum += *value;
64        }
65        sum*/
66    }
67
68    fn mean(&self, x: &Matrix<T, D, IS>) -> T {
69        let sum = self.sum(x);
70        sum / T::from_usize(x.size())
71    }
72}
73
74#[cfg(feature = "cpu")]
75impl<T: Copy + Default + core::ops::AddAssign, D: MainMemory, IS: Shape, OS: Shape>
76    SumOverOps<T, IS, OS, D> for CPU
77{
78    fn sum_rows(&self, x: &Matrix<T, D, IS>) -> Matrix<T, Self, OS> {
79        let mut out = Cache::get(self, x.cols(), x.node.idx);
80
81        let data = x.as_slice();
82        let sum_slice = out.as_mut_slice();
83
84        for value in sum_slice.iter_mut() {
85            *value = T::default();
86        }
87
88        for idx in 0..x.rows() {
89            let index = idx * x.cols();
90            let row = &data[index..index + x.cols()];
91
92            for (i, value) in row.iter().enumerate() {
93                sum_slice[i] += *value;
94            }
95        }
96        (out, 1, x.cols()).into()
97    }
98
99    fn sum_cols(&self, x: &Matrix<T, D, IS>) -> Matrix<T, Self, OS> {
100        let mut out = Cache::get(self, x.rows(), x.node.idx);
101
102        let data = x.as_slice();
103        let sum_slice = out.as_mut_slice();
104
105        for (idx, col_vec_value) in sum_slice.iter_mut().enumerate().take(x.rows()) {
106            let index = idx * x.cols();
107            let row = &data[index..index + x.cols()];
108            let mut sum = T::default();
109
110            for data in row {
111                sum += *data;
112            }
113            *col_vec_value = sum;
114        }
115        (out, x.rows(), 1).into()
116    }
117}
118
119#[cfg(feature = "opencl")]
120impl<T: Number> SumOps<T> for OpenCL {
121    #[inline]
122    fn sum(&self, x: &Matrix<T, Self>) -> T {
123        cl_to_cpu_scalar(self, x, |device, x| device.sum(x))
124    }
125
126    #[inline]
127    fn mean(&self, x: &Matrix<T, Self>) -> T {
128        cl_to_cpu_scalar(self, x, |device, x| device.mean(x))
129    }
130}
131
132#[cfg(feature = "opencl")]
133impl<T: CDatatype> SumOverOps<T> for OpenCL {
134    #[inline]
135    fn sum_rows<'a>(&'a self, x: &Matrix<T, Self>) -> Matrix<'a, T, Self> {
136        cl_to_cpu_s(self, x, |device, x| device.sum_rows(x))
137    }
138
139    #[inline]
140    fn sum_cols(&self, x: &Matrix<T, Self>) -> Matrix<T, Self> {
141        cl_to_cpu_s(self, x, |device, x| device.sum_cols(x))
142    }
143}
144
145#[cfg(feature = "cuda")]
146impl<T: CDatatype> SumOps<T> for CUDA {
147    #[inline]
148    fn sum(&self, x: &Matrix<T, CUDA>) -> T {
149        cu_to_cpu_scalar(x, |device, x| device.sum(&x))
150    }
151
152    #[inline]
153    fn mean(&self, x: &Matrix<T, CUDA>) -> T {
154        cu_to_cpu_scalar(x, |device, x| device.mean(&x))
155    }
156}
157
158#[cfg(feature = "cuda")]
159impl<T: CDatatype> SumOverOps<T> for CUDA {
160    #[inline]
161    fn sum_rows(&self, x: &Matrix<T, CUDA>) -> Matrix<T, CUDA> {
162        cu_to_cpu_s(self, x, |device, x| device.sum_rows(&x))
163    }
164
165    #[inline]
166    fn sum_cols(&self, x: &Matrix<T, CUDA>) -> Matrix<T, CUDA> {
167        cu_to_cpu_s(self, x, |device, x| device.sum_cols(&x))
168    }
169}