1use crate::{cpu::row_op, row_op_slice_lhs, Matrix};
2use custos::{impl_stack, number::Number, CDatatype, Device, MainMemory, Shape};
3
4#[cfg(feature = "stack")]
5use custos::Stack;
6
7#[cfg(feature = "cpu")]
8use custos::CPU;
9
10#[cfg(feature = "opencl")]
11use crate::{cl_to_cpu_lr, opencl};
12#[cfg(feature = "opencl")]
13use custos::OpenCL;
14
15#[cfg(feature = "cuda")]
16use crate::{cu_to_cpu_lr, cu_to_cpu_lr_mut};
17#[cfg(feature = "cuda")]
18use custos::CUDA;
19
20impl<'a, T, LS: Shape, D: Device> Matrix<'a, T, D, LS> {
21 #[inline]
22 pub fn add_row<RS: Shape>(&self, rhs: &Matrix<T, D, RS>) -> Matrix<'a, T, D, LS>
23 where
24 D: RowOp<T, LS, RS>,
25 {
26 self.device().add_row(self, rhs)
27 }
28
29 #[inline]
30 pub fn add_row_mut<RS: Shape>(&mut self, rhs: &Matrix<'a, T, D, RS>)
31 where
32 D: RowOp<T, LS, RS>,
33 {
34 rhs.device().add_row_mut(self, rhs)
35 }
36}
37
38pub trait RowOp<T, LS: Shape = (), RS: Shape = (), D: Device = Self>: Device {
39 fn add_row(&self, lhs: &Matrix<T, D, LS>, rhs: &Matrix<T, D, RS>) -> Matrix<T, Self, LS>;
40 fn add_row_mut(&self, lhs: &mut Matrix<T, D, LS>, rhs: &Matrix<T, D, RS>);
41}
42
43#[impl_stack]
45impl<T: Number, D: MainMemory, LS: Shape, RS: Shape> RowOp<T, LS, RS, D> for CPU {
46 #[inline]
47 fn add_row(&self, lhs: &Matrix<T, D, LS>, rhs: &Matrix<T, D, RS>) -> Matrix<T, Self, LS> {
48 row_op(self, lhs, rhs, |c, a, b| *c = a + b)
49 }
50
51 #[inline]
52 fn add_row_mut(&self, lhs: &mut Matrix<T, D, LS>, rhs: &Matrix<T, D, RS>) {
53 let (lhs_rows, lhs_cols) = lhs.dims();
54 row_op_slice_lhs(lhs, lhs_rows, lhs_cols, rhs, |c, a| *c += a)
55 }
56}
57
58#[cfg(feature = "opencl")]
60impl<T: CDatatype> RowOp<T> for OpenCL {
61 #[inline]
62 fn add_row(&self, lhs: &Matrix<T, Self>, rhs: &Matrix<T, Self>) -> Matrix<T, Self> {
63 cl_to_cpu_lr(self, lhs, rhs, |device, lhs, rhs| device.add_row(lhs, rhs))
64 }
65
66 #[inline]
67 fn add_row_mut(&self, lhs: &mut Matrix<T, Self>, rhs: &Matrix<T, Self>) {
68 opencl::cpu_exec_lhs_rhs_mut(self, lhs, rhs, |cpu, lhs, rhs| cpu.add_row_mut(lhs, rhs))
69 .unwrap();
70 }
71}
72
73#[cfg(feature = "cuda")]
74impl<T: CDatatype> RowOp<T> for CUDA {
75 #[inline]
76 fn add_row(&self, lhs: &Matrix<T, CUDA>, rhs: &Matrix<T, CUDA>) -> Matrix<T, CUDA> {
77 cu_to_cpu_lr(self, lhs, rhs, |device, lhs, rhs| device.add_row(lhs, rhs))
78 }
79
80 #[inline]
81 fn add_row_mut(&self, lhs: &mut Matrix<T, CUDA>, rhs: &Matrix<T, CUDA>) {
82 cu_to_cpu_lr_mut(self, lhs, rhs, |device, lhs, rhs| {
83 device.add_row_mut(lhs, rhs)
84 })
85 }
86}