zenu_matrix/operation/
basic_operations.rs

1use crate::{
2    device::{cpu::Cpu, DeviceBase},
3    dim::{DimDyn, DimTrait},
4    index::Index0D,
5    matrix::{Matrix, Owned, Ref, Repr},
6    num::Num,
7    with_clousers::{
8        array_array, array_array_array, array_array_scalar, scalar_array_with_closure,
9    },
10};
11
12#[cfg(feature = "nvidia")]
13use crate::device::nvidia::Nvidia;
14
15#[cfg(feature = "nvidia")]
16use zenu_cuda::kernel::{
17    array_abs, array_abs_assign, array_acos, array_acos_assign, array_add, array_array_add_assign,
18    array_array_div_assign, array_array_mul_assign, array_array_sub_assign, array_asin,
19    array_asin_assign, array_atan, array_atan_assign, array_cos, array_cos_assign, array_cosh,
20    array_cosh_assign, array_div, array_exp, array_exp_assign, array_log, array_log_assign,
21    array_mul, array_pow, array_pow_assign, array_scalar_add, array_scalar_add_assign,
22    array_scalar_add_assign_ptr, array_scalar_add_ptr, array_scalar_div, array_scalar_div_assign,
23    array_scalar_div_assign_ptr, array_scalar_div_ptr, array_scalar_mul, array_scalar_mul_assign,
24    array_scalar_mul_assign_ptr, array_scalar_mul_ptr, array_scalar_sub, array_scalar_sub_assign,
25    array_scalar_sub_assign_ptr, array_scalar_sub_ptr, array_sin, array_sin_assign, array_sinh,
26    array_sinh_assign, array_sqrt, array_sqrt_assign, array_sub, array_tan, array_tan_assign,
27    array_tanh, array_tanh_assign,
28};
29
30use super::copy_from::CopyBlas;
31
32macro_rules! impl_basic_op_trait {
33    (
34        $name:ident,
35        $cpu_method:ident,
36        $cpu_assign_method:ident,
37        $gpu_array:ident,
38        $gpu_array_assign:ident,
39        $gpu_scalar:ident,
40        $gpu_scalar_assign:ident,
41        $gpu_scalar_ptr:ident,
42        $gpu_scalar_assign_ptr:ident
43    ) => {
44        pub trait $name: DeviceBase {
45            fn array_array<T: Num>(
46                to: *mut T,
47                lhs: *const T,
48                rhs: *const T,
49                num_elm: usize,
50                to_stride: usize,
51                lhs_stride: usize,
52                rhs_stride: usize,
53            );
54
55            fn array_assign<T: Num>(
56                to: *mut T,
57                rhs: *const T,
58                num_elm: usize,
59                to_stride: usize,
60                rhs_stride: usize,
61            );
62
63            fn scalar<T: Num>(
64                to: *mut T,
65                lhs: *const T,
66                rhs: T,
67                num_elm: usize,
68                to_stride: usize,
69                lhs_stride: usize,
70            );
71
72            fn scalar_assign<T: Num>(to: *mut T, rhs: T, num_elm: usize, to_stride: usize);
73
74            fn scalar_ptr<T: Num>(
75                to: *mut T,
76                rhs: *const T,
77                scalar: *const T,
78                to_stride: usize,
79                rhs_stride: usize,
80                num_elm: usize
81            );
82
83            fn scalar_assign_ptr<T: Num>(
84                to: *mut T,
85                scalar: *const T,
86                num_elm: usize,
87                to_stride: usize
88            );
89        }
90
91        impl$name for Cpu {
92            #[expect(clippy::not_unsafe_ptr_arg_deref)]
93            fn array_array<T: Num>(
94                to: *mut T,
95                lhs: *const T,
96                rhs: *const T,
97                num_elm: usize,
98                to_stride: usize,
99                lhs_stride: usize,
100                rhs_stride: usize,
101            ) {
102                for i in 0..num_elm {
103                    unsafe {
104                        *to.add(i * to_stride) =
105                            T::$cpu_method(*lhs.add(i * lhs_stride), *rhs.add(i * rhs_stride));
106                    }
107                }
108            }
109
110            #[expect(clippy::not_unsafe_ptr_arg_deref)]
111            fn array_assign<T: Num>(
112                to: *mut T,
113                rhs: *const T,
114                num_elm: usize,
115                to_stride: usize,
116                rhs_stride: usize,
117            ) {
118                for i in 0..num_elm {
119                    unsafe {
120                        T::$cpu_assign_method(
121                            &mut *to.add(i * to_stride),
122                            *rhs.add(i * rhs_stride),
123                        );
124                    }
125                }
126            }
127
128            #[expect(clippy::not_unsafe_ptr_arg_deref)]
129            fn scalar<T: Num>(
130                to: *mut T,
131                lhs: *const T,
132                rhs: T,
133                num_elm: usize,
134                to_stride: usize,
135                lhs_stride: usize,
136            ) {
137                for i in 0..num_elm {
138                    unsafe {
139                        *to.add(i * to_stride) = T::$cpu_method(*lhs.add(i * lhs_stride), rhs);
140                    }
141                }
142            }
143
144            #[expect(clippy::not_unsafe_ptr_arg_deref)]
145            fn scalar_assign<T: Num>(to: *mut T, rhs: T, num_elm: usize, to_stride: usize) {
146                for i in 0..num_elm {
147                    unsafe {
148                        T::$cpu_assign_method(&mut *to.add(i * to_stride), rhs);
149                    }
150                }
151            }
152
153            #[expect(clippy::not_unsafe_ptr_arg_deref)]
154            fn scalar_ptr<T: Num>(
155                to: *mut T,
156                lhs: *const T,
157                scalar: *const T,
158                to_stride: usize,
159                lhs_stride: usize,
160                num_elm: usize
161            ) {
162                for i in 0..num_elm {
163                    unsafe {
164                        *to.add(i * to_stride) = T::$cpu_method(*lhs.add(i * lhs_stride), *scalar);
165                    }
166                }
167            }
168
169            #[expect(clippy::not_unsafe_ptr_arg_deref)]
170            fn scalar_assign_ptr<T: Num>(
171                to: *mut T,
172                scalar: *const T,
173                num_elm: usize,
174                to_stride: usize
175            ) {
176                for i in 0..num_elm {
177                    unsafe {
178                        T::$cpu_assign_method(&mut *to.add(i * to_stride), *scalar);
179                    }
180                }
181            }
182        }
183
184        #[cfg(feature = "nvidia")]
185        impl $name for Nvidia {
186            fn array_array<T: Num>(
187                to: *mut T,
188                lhs: *const T,
189                rhs: *const T,
190                num_elm: usize,
191                to_stride: usize,
192                lhs_stride: usize,
193                rhs_stride: usize,
194            ) {
195                $gpu_array(to, lhs, rhs, num_elm, to_stride, lhs_stride, rhs_stride);
196            }
197
198            fn array_assign<T: Num>(
199                to: *mut T,
200                rhs: *const T,
201                num_elm: usize,
202                to_stride: usize,
203                rhs_stride: usize,
204            ) {
205                $gpu_array_assign(to, rhs, num_elm, to_stride, rhs_stride);
206            }
207
208            fn scalar<T: Num>(
209                to: *mut T,
210                lhs: *const T,
211                rhs: T,
212                num_elm: usize,
213                to_stride: usize,
214                lhs_stride: usize,
215            ) {
216                $gpu_scalar(to, lhs, rhs, num_elm, to_stride, lhs_stride);
217            }
218
219            fn scalar_assign<T: Num>(to: *mut T, rhs: T, num_elm: usize, to_stride: usize) {
220                $gpu_scalar_assign(to, rhs, num_elm, to_stride);
221            }
222
223            fn scalar_ptr<T: Num>(to: *mut T, lhs: *const T, scalar: *const T, to_stride: usize, lhs_stride: usize, num_elm: usize) {
224                $gpu_scalar_ptr(to, lhs, scalar, num_elm, to_stride, lhs_stride);
225            }
226
227            fn scalar_assign_ptr<T: Num>(to: *mut T, scalar: *const T, num_elm: usize, to_stride: usize) {
228                $gpu_scalar_assign_ptr(to, scalar, num_elm, to_stride);
229            }
230        }
231    };
232}
233impl_basic_op_trait!(
234    AddOps,
235    add,
236    add_assign,
237    array_add,
238    array_array_add_assign,
239    array_scalar_add,
240    array_scalar_add_assign,
241    array_scalar_add_ptr,
242    array_scalar_add_assign_ptr
243);
244impl_basic_op_trait!(
245    SubOps,
246    sub,
247    sub_assign,
248    array_sub,
249    array_array_sub_assign,
250    array_scalar_sub,
251    array_scalar_sub_assign,
252    array_scalar_sub_ptr,
253    array_scalar_sub_assign_ptr
254);
255impl_basic_op_trait!(
256    MulOps,
257    mul,
258    mul_assign,
259    array_mul,
260    array_array_mul_assign,
261    array_scalar_mul,
262    array_scalar_mul_assign,
263    array_scalar_mul_ptr,
264    array_scalar_mul_assign_ptr
265);
266impl_basic_op_trait!(
267    DivOps,
268    div,
269    div_assign,
270    array_div,
271    array_array_div_assign,
272    array_scalar_div,
273    array_scalar_div_assign,
274    array_scalar_div_ptr,
275    array_scalar_div_assign_ptr
276);
277
278macro_rules! impl_basic_ops {
279    (
280        $method:ident,
281        $assign_method:ident,
282        $scalar_method:ident,
283        $scalar_assign_method:ident,
284        $device_trait:ident
285    ) => {
286        impl<T, D> Matrix<Ref<&mut T>, DimDyn, D>
287        where
288            T: Num,
289            D: DeviceBase + $device_trait,
290        {
291            pub fn $scalar_method<OR: Repr<Item = T>>(
292                &mut self,
293                other: &Matrix<OR, DimDyn, D>,
294                scalar: T,
295            ) {
296                array_array_scalar(self, &other.to_ref(), scalar, |a, b, c| {
297                    let num_elm = a.shape().num_elm();
298                    let to_stride = a.stride().into_iter().last().unwrap_or(1);
299                    let rhs_stride = b.stride().into_iter().last().unwrap_or(1);
300                    D::scalar(
301                        a.as_mut_ptr(),
302                        b.as_ptr(),
303                        c,
304                        num_elm,
305                        to_stride,
306                        rhs_stride,
307                    );
308                });
309            }
310            pub fn $scalar_assign_method(&mut self, scalar: T) {
311                scalar_array_with_closure(self, scalar, |a, b| {
312                    let num_elm = a.shape().num_elm();
313                    let stride = a.stride().into_iter().last().unwrap_or(1);
314                    D::scalar_assign(a.as_mut_ptr(), b, num_elm, stride);
315                });
316            }
317
318            pub fn $assign_method(&mut self, other: &Matrix<Ref<&T>, DimDyn, D>) {
319                array_array(
320                    self,
321                    &other,
322                    |a, b| {
323                        let num_elm = a.shape().num_elm();
324                        let to_stride = a.stride().into_iter().last().unwrap_or(1);
325                        let rhs_stride = b.stride().into_iter().last().unwrap_or(1);
326                        D::array_assign(a.as_mut_ptr(), b.as_ptr(), num_elm, to_stride, rhs_stride);
327                    },
328                    |a, b| {
329                        let num_elm = a.shape().num_elm();
330                        let stride = a.stride().into_iter().last().unwrap_or(1);
331                        D::scalar_assign_ptr(a.as_mut_ptr(), b, num_elm, stride);
332                    },
333                );
334            }
335
336            pub fn $method<LR: Repr<Item = T>, RR: Repr<Item = T>>(
337                &mut self,
338                lhs: &Matrix<LR, DimDyn, D>,
339                rhs: &Matrix<RR, DimDyn, D>,
340            ) {
341                array_array_array(
342                    self,
343                    &lhs.to_ref(),
344                    &rhs.to_ref(),
345                    |a, b, c| {
346                        let num_elm = a.shape().num_elm();
347                        let to_stride = a.stride().into_iter().last().unwrap_or(1);
348                        let lhs_stride = b.stride().into_iter().last().unwrap_or(1);
349                        let rhs_stride = c.stride().into_iter().last().unwrap_or(1);
350                        D::array_array(
351                            a.as_mut_ptr(),
352                            b.as_ptr(),
353                            c.as_ptr(),
354                            num_elm,
355                            to_stride,
356                            lhs_stride,
357                            rhs_stride,
358                        );
359                    },
360                    |a, b, c| {
361                        let num_elm = a.shape().num_elm();
362                        let to_stride = a.stride().into_iter().last().unwrap_or(1);
363                        let lhs_stride = b.stride().into_iter().last().unwrap_or(1);
364                        D::scalar_ptr(
365                            a.as_mut_ptr(),
366                            b.as_ptr(),
367                            c,
368                            to_stride,
369                            lhs_stride,
370                            num_elm,
371                        );
372                    },
373                );
374            }
375        }
376    };
377}
378impl_basic_ops!(add_array, add_assign, add_scalar, add_scalar_assign, AddOps);
379impl_basic_ops!(sub_array, sub_assign, sub_scalar, sub_scalar_assign, SubOps);
380impl_basic_ops!(mul_array, mul_assign, mul_scalar, mul_scalar_assign, MulOps);
381impl_basic_ops!(div_array, div_assign, div_scalar, div_scalar_assign, DivOps);
382
383macro_rules! impl_basic_ops_no_inputs {
384    ($name:ident, $cpu_method:ident, $gpu_method:ident, $gpu_assign_method:ident) => {
385        pub trait $name: DeviceBase {
386            fn array<T: Num>(
387                to: *mut T,
388                other: *const T,
389                num_elm: usize,
390                to_stride: usize,
391                other_stride: usize,
392            );
393
394            fn array_assign<T: Num>(to: *mut T, num_elm: usize, to_stride: usize);
395        }
396
397        impl $name for Cpu {
398            #[expect(clippy::not_unsafe_ptr_arg_deref)]
399            fn array<T: Num>(
400                to: *mut T,
401                other: *const T,
402                num_elm: usize,
403                to_stride: usize,
404                other_stride: usize,
405            ) {
406                for i in 0..num_elm {
407                    unsafe {
408                        *to.add(i * to_stride) = T::$cpu_method(*other.add(i * other_stride));
409                    }
410                }
411            }
412
413            #[expect(clippy::not_unsafe_ptr_arg_deref)]
414            fn array_assign<T: Num>(to: *mut T, num_elm: usize, to_stride: usize) {
415                for i in 0..num_elm {
416                    unsafe {
417                        *to.add(i * to_stride) = T::$cpu_method(*to.add(i * to_stride));
418                    }
419                }
420            }
421        }
422
423        #[cfg(feature = "nvidia")]
424        impl $name for Nvidia {
425            fn array<T: Num>(
426                to: *mut T,
427                other: *const T,
428                num_elm: usize,
429                to_stride: usize,
430                other_stride: usize,
431            ) {
432                $gpu_method(to, other, num_elm, to_stride, other_stride);
433            }
434
435            fn array_assign<T: Num>(to: *mut T, num_elm: usize, to_stride: usize) {
436                $gpu_assign_method(to, num_elm, to_stride);
437            }
438        }
439    };
440}
441impl_basic_ops_no_inputs!(SinOps, sin, array_sin, array_sin_assign);
442impl_basic_ops_no_inputs!(CosOps, cos, array_cos, array_cos_assign);
443impl_basic_ops_no_inputs!(TanOps, tan, array_tan, array_tan_assign);
444impl_basic_ops_no_inputs!(AsinOps, asin, array_asin, array_asin_assign);
445impl_basic_ops_no_inputs!(AcosOps, acos, array_acos, array_acos_assign);
446impl_basic_ops_no_inputs!(AtanOps, atan, array_atan, array_atan_assign);
447impl_basic_ops_no_inputs!(SinhOps, sinh, array_sinh, array_sinh_assign);
448impl_basic_ops_no_inputs!(CoshOps, cosh, array_cosh, array_cosh_assign);
449impl_basic_ops_no_inputs!(TanhOps, tanh, array_tanh, array_tanh_assign);
450impl_basic_ops_no_inputs!(AbsOps, abs, array_abs, array_abs_assign);
451impl_basic_ops_no_inputs!(SqrtOps, sqrt, array_sqrt, array_sqrt_assign);
452impl_basic_ops_no_inputs!(ExpOps, exp, array_exp, array_exp_assign);
453impl_basic_ops_no_inputs!(LogOps, ln, array_log, array_log_assign);
454
455pub trait PowOws: DeviceBase {
456    fn array<T: Num>(
457        to: *mut T,
458        other: *const T,
459        scalar: T,
460        num_elm: usize,
461        to_stride: usize,
462        other_stride: usize,
463    );
464
465    fn array_assign<T: Num>(to: *mut T, scalar: T, num_elm: usize, to_stride: usize);
466}
467impl PowOws for Cpu {
468    #[expect(clippy::not_unsafe_ptr_arg_deref)]
469    fn array<T: Num>(
470        to: *mut T,
471        other: *const T,
472        scalar: T,
473        num_elm: usize,
474        to_stride: usize,
475        other_stride: usize,
476    ) {
477        for i in 0..num_elm {
478            unsafe {
479                *to.add(i * to_stride) = T::powf(*other.add(i * other_stride), scalar);
480            }
481        }
482    }
483
484    #[expect(clippy::not_unsafe_ptr_arg_deref)]
485    fn array_assign<T: Num>(to: *mut T, scalar: T, num_elm: usize, to_stride: usize) {
486        for i in 0..num_elm {
487            unsafe {
488                *to.add(i * to_stride) = T::powf(*to.add(i * to_stride), scalar);
489            }
490        }
491    }
492}
493
494#[cfg(feature = "nvidia")]
495impl PowOws for Nvidia {
496    fn array<T: Num>(
497        to: *mut T,
498        other: *const T,
499        scalar: T,
500        num_elm: usize,
501        to_stride: usize,
502        other_stride: usize,
503    ) {
504        array_pow(other, num_elm, other_stride, scalar, to, to_stride);
505    }
506
507    fn array_assign<T: Num>(to: *mut T, scalar: T, num_elm: usize, to_stride: usize) {
508        array_pow_assign(to, num_elm, to_stride, scalar);
509    }
510}
511
512macro_rules! impl_basic_ops_no_inputs {
513    ($trait_name:ident, $output:ident, $method:ident, $assign:ident) => {
514        impl<T: Num, S: DimTrait, D: DeviceBase + $trait_name> Matrix<Ref<&mut T>, S, D> {
515            pub fn $method<R: Repr<Item = T>, SO: DimTrait>(&self, other: &Matrix<R, SO, D>) {
516                if self.shape().slice() != other.shape().slice() {
517                    panic!("Matrix shape mismatch");
518                }
519                if self.shape().is_empty() {
520                    D::array(self.as_mut_ptr(), other.as_ptr(), 1, 1, 1);
521                } else if self.shape().len() == 1 {
522                    D::array(
523                        self.as_mut_ptr(),
524                        other.as_ptr(),
525                        self.shape().num_elm(),
526                        self.stride()[0],
527                        other.stride()[0],
528                    );
529                } else {
530                    let num_iter = self.shape()[0];
531                    for idx in 0..num_iter {
532                        let s = self.index_axis_mut_dyn(Index0D::new(idx));
533                        let o = other.index_axis_dyn(Index0D::new(idx));
534                        s.$method(&o);
535                    }
536                }
537            }
538
539            pub fn $assign(&mut self) {
540                if self.shape().is_empty() {
541                    D::array_assign(self.as_mut_ptr(), 1, 1);
542                } else if self.shape().len() == 1 {
543                    D::array_assign(self.as_mut_ptr(), self.shape().num_elm(), self.stride()[0]);
544                } else {
545                    let num_iter = self.shape()[0];
546                    for idx in 0..num_iter {
547                        let mut s = self.index_axis_mut_dyn(Index0D::new(idx));
548                        s.$assign();
549                    }
550                }
551            }
552        }
553
554        impl<T: Num, R: Repr<Item = T>, S: DimTrait, D: DeviceBase + $trait_name> Matrix<R, S, D> {
555            pub fn $output(&self) -> Matrix<Owned<T>, S, D> {
556                let mut ans = Matrix::alloc(self.shape().clone());
557                ans.to_ref_mut().$method(self);
558                ans
559            }
560        }
561    };
562}
563impl_basic_ops_no_inputs!(SinOps, sin, sin_array, sin_assign);
564impl_basic_ops_no_inputs!(CosOps, cos, cos_array, cos_assign);
565impl_basic_ops_no_inputs!(TanOps, tan, tan_array, tan_assign);
566impl_basic_ops_no_inputs!(AsinOps, asin, asin_array, asin_assign);
567impl_basic_ops_no_inputs!(AcosOps, acos, acos_array, acos_assign);
568impl_basic_ops_no_inputs!(AtanOps, atan, atan_array, atan_assign);
569impl_basic_ops_no_inputs!(SinhOps, sinh, sinh_array, sinh_assign);
570impl_basic_ops_no_inputs!(CoshOps, cosh, cosh_array, cosh_assign);
571impl_basic_ops_no_inputs!(TanhOps, tanh, tanh_array, tanh_assign);
572impl_basic_ops_no_inputs!(AbsOps, abs, abs_array, abs_assign);
573impl_basic_ops_no_inputs!(SqrtOps, sqrt, sqrt_array, sqrt_assign);
574impl_basic_ops_no_inputs!(ExpOps, exp, exp_array, exp_assign);
575impl_basic_ops_no_inputs!(LogOps, log, log_array, log_assign);
576
577impl<R: Repr, S: DimTrait, D: DeviceBase + PowOws + CopyBlas> Matrix<R, S, D> {
578    pub fn powf_array(&self, scalar: R::Item) -> Matrix<Owned<R::Item>, S, D> {
579        let mut powf = Matrix::alloc(self.shape());
580        powf.to_ref_mut().powf(self, scalar);
581        powf
582    }
583}
584
585impl<T: Num, S: DimTrait, D: DeviceBase + PowOws + CopyBlas> Matrix<Ref<&mut T>, S, D> {
586    #[expect(clippy::missing_panics_doc)]
587    pub fn powf<R: Repr<Item = T>, SO: DimTrait>(&self, other: &Matrix<R, SO, D>, scalar: T) {
588        assert!(
589            self.shape().slice() == other.shape().slice(),
590            "Matrix shape mismatch"
591        );
592
593        if self.shape().is_empty() {
594            D::array(self.as_mut_ptr(), other.as_ptr(), scalar, 1, 1, 1);
595        } else if self.shape().len() == 1 {
596            D::array(
597                self.as_mut_ptr(),
598                other.as_ptr(),
599                scalar,
600                self.shape().num_elm(),
601                self.stride()[0],
602                other.stride()[0],
603            );
604        } else {
605            let num_iter = self.shape()[0];
606            for idx in 0..num_iter {
607                let s = self.index_axis_mut_dyn(Index0D::new(idx));
608                let o = other.index_axis_dyn(Index0D::new(idx));
609                s.powf(&o, scalar);
610            }
611        }
612    }
613
614    pub fn powf_assign(&self, scalar: T) {
615        if self.shape().is_empty() {
616            D::array_assign(self.as_mut_ptr(), scalar, 1, 1);
617        } else if self.shape().len() == 1 {
618            D::array_assign(
619                self.as_mut_ptr(),
620                scalar,
621                self.shape().num_elm(),
622                self.stride()[0],
623            );
624        } else {
625            let num_iter = self.shape()[0];
626            for idx in 0..num_iter {
627                let s = self.index_axis_mut_dyn(Index0D::new(idx));
628                s.powf_assign(scalar);
629            }
630        }
631    }
632}
633
634#[cfg(test)]
635mod basic_ops {
636    #![expect(clippy::float_cmp, clippy::cast_precision_loss)]
637    use crate::{
638        device::Device,
639        dim::DimDyn,
640        matrix::{Matrix, Owned},
641        slice_dynamic,
642    };
643
644    // 必要なテスト群
645    // default stride
646    // sliced
647    // transposed
648
649    fn scalar_add_1d<D: Device>() {
650        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(vec![1., 2., 3.], [3]);
651        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([3]);
652        ans.to_ref_mut().add_scalar(&a, 1.);
653        assert_eq!(ans.index_item([0]), 2.);
654        assert_eq!(ans.index_item([1]), 3.);
655        assert_eq!(ans.index_item([2]), 4.);
656    }
657    #[test]
658    fn scalar_add_1d_cpu() {
659        scalar_add_1d::<crate::device::cpu::Cpu>();
660    }
661    #[cfg(feature = "nvidia")]
662    #[test]
663    fn scalar_add_1d_gpu() {
664        scalar_add_1d::<crate::device::nvidia::Nvidia>();
665    }
666
667    fn scalar_add_2d<D: Device>() {
668        let a: Matrix<Owned<f32>, DimDyn, D> =
669            Matrix::from_vec(vec![1., 2., 3., 4., 5., 6.], [2, 3]);
670        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([2, 3]);
671        ans.to_ref_mut().add_scalar(&a, 1.);
672        assert_eq!(ans.index_item([0, 0]), 2.);
673        assert_eq!(ans.index_item([0, 1]), 3.);
674        assert_eq!(ans.index_item([0, 2]), 4.);
675        assert_eq!(ans.index_item([1, 0]), 5.);
676        assert_eq!(ans.index_item([1, 1]), 6.);
677        assert_eq!(ans.index_item([1, 2]), 7.);
678    }
679    #[test]
680    fn scalar_add_2d_cpu() {
681        scalar_add_2d::<crate::device::cpu::Cpu>();
682    }
683    #[cfg(feature = "nvidia")]
684    #[test]
685    fn scalar_add_2d_gpu() {
686        scalar_add_2d::<crate::device::nvidia::Nvidia>();
687    }
688
689    fn sliced_3d<D: Device>() {
690        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(
691            vec![
692                1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
693            ],
694            [2, 2, 4],
695        );
696        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([1, 2, 2]);
697        let sliced = a.slice(slice_dynamic!(1.., .., ..;2));
698        ans.to_ref_mut().add_scalar(&sliced, 1.);
699        assert_eq!(ans.index_item([0, 0, 0]), 10.);
700        assert_eq!(ans.index_item([0, 0, 1]), 12.);
701        assert_eq!(ans.index_item([0, 1, 0]), 14.);
702        assert_eq!(ans.index_item([0, 1, 1]), 16.);
703    }
704    #[test]
705    fn sliced_3d_cpu() {
706        sliced_3d::<crate::device::cpu::Cpu>();
707    }
708    #[cfg(feature = "nvidia")]
709    #[test]
710    fn sliced_3d_gpu() {
711        sliced_3d::<crate::device::nvidia::Nvidia>();
712    }
713
714    fn scalar_assign_4d<D: Device>() {
715        let mut a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(
716            vec![
717                1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
718            ],
719            [2, 2, 2, 2],
720        );
721        a.to_ref_mut().add_scalar_assign(1.);
722        assert_eq!(a.index_item([0, 0, 0, 0]), 2.);
723        assert_eq!(a.index_item([0, 0, 0, 1]), 3.);
724        assert_eq!(a.index_item([0, 0, 1, 0]), 4.);
725        assert_eq!(a.index_item([0, 0, 1, 1]), 5.);
726        assert_eq!(a.index_item([0, 1, 0, 0]), 6.);
727        assert_eq!(a.index_item([0, 1, 0, 1]), 7.);
728        assert_eq!(a.index_item([0, 1, 1, 0]), 8.);
729        assert_eq!(a.index_item([0, 1, 1, 1]), 9.);
730        assert_eq!(a.index_item([1, 0, 0, 0]), 10.);
731        assert_eq!(a.index_item([1, 0, 0, 1]), 11.);
732        assert_eq!(a.index_item([1, 0, 1, 0]), 12.);
733        assert_eq!(a.index_item([1, 0, 1, 1]), 13.);
734        assert_eq!(a.index_item([1, 1, 0, 0]), 14.);
735        assert_eq!(a.index_item([1, 1, 0, 1]), 15.);
736        assert_eq!(a.index_item([1, 1, 1, 0]), 16.);
737        assert_eq!(a.index_item([1, 1, 1, 1]), 17.);
738    }
739    #[test]
740    fn scalar_assign_4d_cpu() {
741        scalar_assign_4d::<crate::device::cpu::Cpu>();
742    }
743    #[cfg(feature = "nvidia")]
744    #[test]
745    fn scalar_assign_4d_gpu() {
746        scalar_assign_4d::<crate::device::nvidia::Nvidia>();
747    }
748
749    fn sliced_3d_assign<D: Device>() {
750        let mut a = Vec::new();
751        for i in 0..3 {
752            for j in 0..4 {
753                for k in 0..5 {
754                    a.push((i * 100 + j * 10 + k) as f32);
755                }
756            }
757        }
758        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [3, 4, 5]);
759        // shape [1, 2, 3]
760        let a = a.slice(slice_dynamic!(2, 1..3, ..;2));
761        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([2, 3]);
762        ans.to_ref_mut().add_scalar(&a, 1.);
763        assert_eq!(ans.index_item([0, 0]), 211.);
764        assert_eq!(ans.index_item([0, 1]), 213.);
765        assert_eq!(ans.index_item([0, 2]), 215.);
766        assert_eq!(ans.index_item([1, 0]), 221.);
767        assert_eq!(ans.index_item([1, 1]), 223.);
768        assert_eq!(ans.index_item([1, 2]), 225.);
769    }
770    #[test]
771    fn sliced_3d_assign_cpu() {
772        sliced_3d_assign::<crate::device::cpu::Cpu>();
773    }
774    #[cfg(feature = "nvidia")]
775    #[test]
776    fn sliced_3d_assign_gpu() {
777        sliced_3d_assign::<crate::device::nvidia::Nvidia>();
778    }
779
780    fn matrix_add_4d<D: Device>() {
781        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(
782            vec![
783                1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
784            ],
785            [2, 2, 2, 2],
786        );
787        let b: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(
788            vec![
789                16., 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1.,
790            ],
791            [2, 2, 2, 2],
792        );
793
794        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([2, 2, 2, 2]);
795        ans.to_ref_mut().add_array(&a, &b);
796        assert_eq!(ans.index_item([0, 0, 0, 0]), 17.);
797        assert_eq!(ans.index_item([0, 0, 0, 1]), 17.);
798        assert_eq!(ans.index_item([0, 0, 1, 0]), 17.);
799        assert_eq!(ans.index_item([0, 0, 1, 1]), 17.);
800        assert_eq!(ans.index_item([0, 1, 0, 0]), 17.);
801        assert_eq!(ans.index_item([0, 1, 0, 1]), 17.);
802        assert_eq!(ans.index_item([0, 1, 1, 0]), 17.);
803        assert_eq!(ans.index_item([0, 1, 1, 1]), 17.);
804        assert_eq!(ans.index_item([1, 0, 0, 0]), 17.);
805        assert_eq!(ans.index_item([1, 0, 0, 1]), 17.);
806        assert_eq!(ans.index_item([1, 0, 1, 0]), 17.);
807        assert_eq!(ans.index_item([1, 0, 1, 1]), 17.);
808        assert_eq!(ans.index_item([1, 1, 0, 0]), 17.);
809        assert_eq!(ans.index_item([1, 1, 0, 1]), 17.);
810        assert_eq!(ans.index_item([1, 1, 1, 0]), 17.);
811        assert_eq!(ans.index_item([1, 1, 1, 1]), 17.);
812    }
813    #[test]
814    fn matrix_add_4d_cpu() {
815        matrix_add_4d::<crate::device::cpu::Cpu>();
816    }
817    #[cfg(feature = "nvidia")]
818    #[test]
819    fn matrix_add_4d_gpu() {
820        matrix_add_4d::<crate::device::nvidia::Nvidia>();
821    }
822
823    fn matrix_add_sliced<D: Device>() {
824        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(
825            vec![
826                1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
827            ],
828            [4, 4],
829        );
830        let b: Matrix<_, DimDyn, _> = Matrix::from_vec(
831            vec![
832                1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
833            ],
834            [4, 4],
835        );
836
837        let a = a.slice(slice_dynamic!(1..;2, ..;2));
838        let b = b.slice(slice_dynamic!(..;2, 1..;2));
839        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([2, 2]);
840        ans.to_ref_mut().add_array(&a, &b);
841        assert_eq!(ans.index_item([0, 0]), 7.);
842        assert_eq!(ans.index_item([0, 1]), 11.);
843        assert_eq!(ans.index_item([1, 0]), 23.);
844        assert_eq!(ans.index_item([1, 1]), 27.);
845    }
846    #[test]
847    fn matrix_add_sliced_cpu() {
848        matrix_add_sliced::<crate::device::cpu::Cpu>();
849    }
850    #[cfg(feature = "nvidia")]
851    #[test]
852    fn matrix_add_sliced_gpu() {
853        matrix_add_sliced::<crate::device::nvidia::Nvidia>();
854    }
855
856    fn transposed<D: Device>() {
857        let mut a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(
858            vec![
859                1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
860            ],
861            [4, 4],
862        );
863        let b: Matrix<_, DimDyn, _> = Matrix::from_vec(
864            vec![
865                1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
866            ],
867            [4, 4],
868        );
869
870        a.transpose();
871        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([4, 4]);
872        ans.to_ref_mut().add_array(&a, &b);
873        assert_eq!(ans.index_item([0, 0]), 2.);
874        assert_eq!(ans.index_item([0, 1]), 7.);
875        assert_eq!(ans.index_item([0, 2]), 12.);
876        assert_eq!(ans.index_item([0, 3]), 17.);
877        assert_eq!(ans.index_item([1, 0]), 7.);
878        assert_eq!(ans.index_item([1, 1]), 12.);
879        assert_eq!(ans.index_item([1, 2]), 17.);
880        assert_eq!(ans.index_item([1, 3]), 22.);
881        assert_eq!(ans.index_item([2, 0]), 12.);
882        assert_eq!(ans.index_item([2, 1]), 17.);
883        assert_eq!(ans.index_item([2, 2]), 22.);
884        assert_eq!(ans.index_item([2, 3]), 27.);
885        assert_eq!(ans.index_item([3, 0]), 17.);
886        assert_eq!(ans.index_item([3, 1]), 22.);
887        assert_eq!(ans.index_item([3, 2]), 27.);
888        assert_eq!(ans.index_item([3, 3]), 32.);
889    }
890    #[test]
891    fn transposed_cpu() {
892        transposed::<crate::device::cpu::Cpu>();
893    }
894    #[cfg(feature = "nvidia")]
895    #[test]
896    fn transposed_gpu() {
897        transposed::<crate::device::nvidia::Nvidia>();
898    }
899
900    fn broadcast_add<D: Device>() {
901        let a = vec![1., 2., 3., 4., 5., 6., 7., 8.];
902        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 2, 2]);
903        let b: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(vec![1., 1.], [1, 1, 2]);
904        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([2, 2, 2]);
905        ans.to_ref_mut().add_array(&a, &b);
906        assert_eq!(ans.index_item([0, 0, 0]), 2.);
907        assert_eq!(ans.index_item([0, 0, 1]), 3.);
908        assert_eq!(ans.index_item([0, 1, 0]), 4.);
909        assert_eq!(ans.index_item([0, 1, 1]), 5.);
910        assert_eq!(ans.index_item([1, 0, 0]), 6.);
911        assert_eq!(ans.index_item([1, 0, 1]), 7.);
912        assert_eq!(ans.index_item([1, 1, 0]), 8.);
913        assert_eq!(ans.index_item([1, 1, 1]), 9.);
914    }
915    #[test]
916    fn broadcast_add_cpu() {
917        broadcast_add::<crate::device::cpu::Cpu>();
918    }
919    #[cfg(feature = "nvidia")]
920    #[test]
921    fn broadcast_add_gpu() {
922        broadcast_add::<crate::device::nvidia::Nvidia>();
923    }
924
925    fn add_2d_1d<D: Device>() {
926        let a = vec![1., 2., 3., 4., 5., 6., 7., 8.];
927        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 2, 2]);
928        let b: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(vec![1., 1.], [2]);
929        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([2, 2, 2]);
930        ans.to_ref_mut().add_array(&a, &b);
931        assert_eq!(ans.index_item([0, 0, 0]), 2.);
932        assert_eq!(ans.index_item([0, 0, 1]), 3.);
933        assert_eq!(ans.index_item([0, 1, 0]), 4.);
934        assert_eq!(ans.index_item([0, 1, 1]), 5.);
935        assert_eq!(ans.index_item([1, 0, 0]), 6.);
936        assert_eq!(ans.index_item([1, 0, 1]), 7.);
937        assert_eq!(ans.index_item([1, 1, 0]), 8.);
938        assert_eq!(ans.index_item([1, 1, 1]), 9.);
939    }
940    #[test]
941    fn add_2d_1d_cpu() {
942        add_2d_1d::<crate::device::cpu::Cpu>();
943    }
944    #[cfg(feature = "nvidia")]
945    #[test]
946    fn add_2d_1d_gpu() {
947        add_2d_1d::<crate::device::nvidia::Nvidia>();
948    }
949
950    fn add_2d_0d<D: Device>() {
951        let a = vec![1., 2., 3., 4., 5., 6., 7., 8.];
952        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 2, 2]);
953        let b: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(vec![1.], []);
954        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([2, 2, 2]);
955        ans.to_ref_mut().add_array(&a, &b);
956        assert_eq!(ans.index_item([0, 0, 0]), 2.);
957        assert_eq!(ans.index_item([0, 0, 1]), 3.);
958        assert_eq!(ans.index_item([0, 1, 0]), 4.);
959        assert_eq!(ans.index_item([0, 1, 1]), 5.);
960        assert_eq!(ans.index_item([1, 0, 0]), 6.);
961        assert_eq!(ans.index_item([1, 0, 1]), 7.);
962        assert_eq!(ans.index_item([1, 1, 0]), 8.);
963        assert_eq!(ans.index_item([1, 1, 1]), 9.);
964    }
965    #[test]
966    fn add_2d_0d_cpu() {
967        add_2d_0d::<crate::device::cpu::Cpu>();
968    }
969    #[cfg(feature = "nvidia")]
970    #[test]
971    fn add_2d_0d_gpu() {
972        add_2d_0d::<crate::device::nvidia::Nvidia>();
973    }
974
975    fn broad_cast_4x1x1x1_4x3x3x3<D: Device>() {
976        let a = Matrix::<Owned<f32>, DimDyn, D>::from_vec(vec![1., 2., 3., 4.], [4, 1, 1, 1]);
977        let b = Matrix::<Owned<f32>, DimDyn, D>::zeros([4, 2, 3, 3]);
978        let mut ans = Matrix::<Owned<f32>, DimDyn, D>::zeros([4, 2, 3, 3]);
979        ans.to_ref_mut().add_array(&a, &b);
980        let one = vec![1; 2 * 3 * 3];
981        let two = vec![2; 2 * 3 * 3];
982        let three = vec![3; 2 * 3 * 3];
983        let four = vec![4; 2 * 3 * 3];
984        let mut result = Vec::new();
985        result.extend_from_slice(&one);
986        result.extend_from_slice(&two);
987        result.extend_from_slice(&three);
988        result.extend_from_slice(&four);
989        let result = result.into_iter().map(|x| x as f32).collect::<Vec<f32>>();
990        let result = Matrix::<Owned<f32>, DimDyn, D>::from_vec(result, [4, 2, 3, 3]);
991        let diff = ans - result;
992        let diff = diff.asum();
993        assert!(diff == 0.0);
994    }
995    #[test]
996    fn broad_cast_4x1x1x1_4x3x3x3_cpu() {
997        broad_cast_4x1x1x1_4x3x3x3::<crate::device::cpu::Cpu>();
998    }
999    #[cfg(feature = "nvidia")]
1000    #[test]
1001    fn broad_cast_4x1x1x1_4x3x3x3_gpu() {
1002        broad_cast_4x1x1x1_4x3x3x3::<crate::device::nvidia::Nvidia>();
1003    }
1004
1005    fn broadcast_add_1x4x1x1_3x4x5x5<D: Device>() {
1006        let a = Matrix::<Owned<f32>, DimDyn, D>::from_vec(vec![1., 2., 3., 4.], [1, 4, 1, 1]);
1007        let b = Matrix::<Owned<f32>, DimDyn, D>::zeros([3, 4, 5, 5]);
1008        let mut ans = Matrix::<Owned<f32>, DimDyn, D>::zeros([3, 4, 5, 5]);
1009        ans.to_ref_mut().add_array(&a, &b);
1010        let one = vec![1; 3 * 5 * 5];
1011        let two = vec![2; 3 * 5 * 5];
1012        let three = vec![3; 3 * 5 * 5];
1013        let four = vec![4; 3 * 5 * 5];
1014        let mut result = Vec::new();
1015        result.extend_from_slice(&one);
1016        result.extend_from_slice(&two);
1017        result.extend_from_slice(&three);
1018        result.extend_from_slice(&four);
1019        let result = result.into_iter().map(|x| x as f32).collect::<Vec<f32>>();
1020        let mut result = Matrix::<Owned<f32>, DimDyn, D>::from_vec(result, [4, 3, 5, 5]);
1021        result.transpose_swap_index(0, 1);
1022        let diff = (ans - result).asum();
1023        assert!(diff == 0.0);
1024    }
1025    #[test]
1026    fn broadcast_add_1x4x1x1_3x4x5x5_cpu() {
1027        broadcast_add_1x4x1x1_3x4x5x5::<crate::device::cpu::Cpu>();
1028    }
1029    #[cfg(feature = "nvidia")]
1030    #[test]
1031    fn broadcast_add_1x4x1x1_3x4x5x5_gpu() {
1032        broadcast_add_1x4x1x1_3x4x5x5::<crate::device::nvidia::Nvidia>();
1033    }
1034
1035    fn sub_3d_scalar<D: Device>() {
1036        let a = vec![1., 2., 3., 4., 5., 6., 7., 8.];
1037        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 2, 2]);
1038        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([2, 2, 2]);
1039        ans.to_ref_mut().sub_scalar(&a, 1.);
1040
1041        assert_eq!(ans.index_item([0, 0, 0]), 0.);
1042        assert_eq!(ans.index_item([0, 0, 1]), 1.);
1043        assert_eq!(ans.index_item([0, 1, 0]), 2.);
1044        assert_eq!(ans.index_item([0, 1, 1]), 3.);
1045        assert_eq!(ans.index_item([1, 0, 0]), 4.);
1046        assert_eq!(ans.index_item([1, 0, 1]), 5.);
1047        assert_eq!(ans.index_item([1, 1, 0]), 6.);
1048        assert_eq!(ans.index_item([1, 1, 1]), 7.);
1049    }
1050    #[test]
1051    fn sub_3d_scalar_cpu() {
1052        sub_3d_scalar::<crate::device::cpu::Cpu>();
1053    }
1054    #[cfg(feature = "nvidia")]
1055    #[test]
1056    fn sub_3d_scalar_gpu() {
1057        sub_3d_scalar::<crate::device::nvidia::Nvidia>();
1058    }
1059
1060    fn sub_3d_scalar_assign<D: Device>() {
1061        let a = vec![1., 2., 3., 4., 5., 6., 7., 8.];
1062        let mut a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 2, 2]);
1063        a.to_ref_mut().sub_scalar_assign(1.);
1064
1065        assert_eq!(a.index_item([0, 0, 0]), 0.);
1066        assert_eq!(a.index_item([0, 0, 1]), 1.);
1067        assert_eq!(a.index_item([0, 1, 0]), 2.);
1068        assert_eq!(a.index_item([0, 1, 1]), 3.);
1069        assert_eq!(a.index_item([1, 0, 0]), 4.);
1070        assert_eq!(a.index_item([1, 0, 1]), 5.);
1071        assert_eq!(a.index_item([1, 1, 0]), 6.);
1072        assert_eq!(a.index_item([1, 1, 1]), 7.);
1073    }
1074    #[test]
1075    fn sub_3d_scalar_assign_cpu() {
1076        sub_3d_scalar_assign::<crate::device::cpu::Cpu>();
1077    }
1078    #[cfg(feature = "nvidia")]
1079    #[test]
1080    fn sub_3d_scalar_assign_gpu() {
1081        sub_3d_scalar_assign::<crate::device::nvidia::Nvidia>();
1082    }
1083
1084    fn sub_3d_array<D: Device>() {
1085        let a = vec![1., 2., 3., 4., 5., 6., 7., 8.];
1086        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 2, 2]);
1087        let b = vec![1., 1., 1., 1., 1., 1., 1., 1.];
1088        let b: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(b, [2, 2, 2]);
1089        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([2, 2, 2]);
1090        ans.to_ref_mut().sub_array(&a, &b);
1091
1092        assert_eq!(ans.index_item([0, 0, 0]), 0.);
1093        assert_eq!(ans.index_item([0, 0, 1]), 1.);
1094        assert_eq!(ans.index_item([0, 1, 0]), 2.);
1095        assert_eq!(ans.index_item([0, 1, 1]), 3.);
1096        assert_eq!(ans.index_item([1, 0, 0]), 4.);
1097        assert_eq!(ans.index_item([1, 0, 1]), 5.);
1098        assert_eq!(ans.index_item([1, 1, 0]), 6.);
1099        assert_eq!(ans.index_item([1, 1, 1]), 7.);
1100    }
1101    #[test]
1102    fn sub_3d_array_cpu() {
1103        sub_3d_array::<crate::device::cpu::Cpu>();
1104    }
1105    #[cfg(feature = "nvidia")]
1106    #[test]
1107    fn sub_3d_array_gpu() {
1108        sub_3d_array::<crate::device::nvidia::Nvidia>();
1109    }
1110
1111    fn sub_assign_array_3d<D: Device>() {
1112        let a = vec![1., 2., 3., 4., 5., 6., 7., 8.];
1113        let mut a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 2, 2]);
1114        let b = vec![1., 1., 1., 1., 1., 1., 1., 1.];
1115        let b: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(b, [2, 2, 2]);
1116        a.to_ref_mut().sub_assign(&b.to_ref());
1117
1118        assert_eq!(a.index_item([0, 0, 0]), 0.);
1119        assert_eq!(a.index_item([0, 0, 1]), 1.);
1120        assert_eq!(a.index_item([0, 1, 0]), 2.);
1121        assert_eq!(a.index_item([0, 1, 1]), 3.);
1122        assert_eq!(a.index_item([1, 0, 0]), 4.);
1123        assert_eq!(a.index_item([1, 0, 1]), 5.);
1124        assert_eq!(a.index_item([1, 1, 0]), 6.);
1125        assert_eq!(a.index_item([1, 1, 1]), 7.);
1126    }
1127    #[test]
1128    fn sub_assign_array_3d_cpu() {
1129        sub_assign_array_3d::<crate::device::cpu::Cpu>();
1130    }
1131    #[cfg(feature = "nvidia")]
1132    #[test]
1133    fn sub_assign_array_3d_gpu() {
1134        sub_assign_array_3d::<crate::device::nvidia::Nvidia>();
1135    }
1136
1137    fn mul_scalar<D: Device>() {
1138        let a = vec![1., 2., 3., 4., 5., 6., 7., 8.];
1139        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 2, 2]);
1140        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([2, 2, 2]);
1141        ans.to_ref_mut().mul_scalar(&a, 2.);
1142
1143        assert_eq!(ans.index_item([0, 0, 0]), 2.);
1144        assert_eq!(ans.index_item([0, 0, 1]), 4.);
1145        assert_eq!(ans.index_item([0, 1, 0]), 6.);
1146        assert_eq!(ans.index_item([0, 1, 1]), 8.);
1147        assert_eq!(ans.index_item([1, 0, 0]), 10.);
1148        assert_eq!(ans.index_item([1, 0, 1]), 12.);
1149        assert_eq!(ans.index_item([1, 1, 0]), 14.);
1150        assert_eq!(ans.index_item([1, 1, 1]), 16.);
1151    }
1152    #[test]
1153    fn mul_scalar_cpu() {
1154        mul_scalar::<crate::device::cpu::Cpu>();
1155    }
1156    #[cfg(feature = "nvidia")]
1157    #[test]
1158    fn mul_scalar_gpu() {
1159        mul_scalar::<crate::device::nvidia::Nvidia>();
1160    }
1161
1162    fn mul_scalar_assign<D: Device>() {
1163        let a = vec![1., 2., 3., 4., 5., 6., 7., 8.];
1164        let mut a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 2, 2]);
1165        a.to_ref_mut().mul_scalar_assign(2.);
1166
1167        assert_eq!(a.index_item([0, 0, 0]), 2.);
1168        assert_eq!(a.index_item([0, 0, 1]), 4.);
1169        assert_eq!(a.index_item([0, 1, 0]), 6.);
1170        assert_eq!(a.index_item([0, 1, 1]), 8.);
1171        assert_eq!(a.index_item([1, 0, 0]), 10.);
1172        assert_eq!(a.index_item([1, 0, 1]), 12.);
1173        assert_eq!(a.index_item([1, 1, 0]), 14.);
1174        assert_eq!(a.index_item([1, 1, 1]), 16.);
1175    }
1176    #[test]
1177    fn mul_scalar_assign_cpu() {
1178        mul_scalar_assign::<crate::device::cpu::Cpu>();
1179    }
1180    #[cfg(feature = "nvidia")]
1181    #[test]
1182    fn mul_scalar_assign_gpu() {
1183        mul_scalar_assign::<crate::device::nvidia::Nvidia>();
1184    }
1185
1186    fn mul_array<D: Device>() {
1187        let a = vec![1., 2., 3., 4., 5., 6., 7., 8.];
1188        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 2, 2]);
1189        let b = vec![8., 7., 6., 5., 4., 3., 2., 1.];
1190        let b: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(b, [2, 2, 2]);
1191        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([2, 2, 2]);
1192        ans.to_ref_mut().mul_array(&a, &b);
1193
1194        assert_eq!(ans.index_item([0, 0, 0]), 8.);
1195        assert_eq!(ans.index_item([0, 0, 1]), 14.);
1196        assert_eq!(ans.index_item([0, 1, 0]), 18.);
1197        assert_eq!(ans.index_item([0, 1, 1]), 20.);
1198        assert_eq!(ans.index_item([1, 0, 0]), 20.);
1199        assert_eq!(ans.index_item([1, 0, 1]), 18.);
1200        assert_eq!(ans.index_item([1, 1, 0]), 14.);
1201        assert_eq!(ans.index_item([1, 1, 1]), 8.);
1202    }
1203    #[test]
1204    fn mul_array_cpu() {
1205        mul_array::<crate::device::cpu::Cpu>();
1206    }
1207    #[cfg(feature = "nvidia")]
1208    #[test]
1209    fn mul_array_gpu() {
1210        mul_array::<crate::device::nvidia::Nvidia>();
1211    }
1212
1213    fn mul_assign_array<D: Device>() {
1214        let a = vec![1., 2., 3., 4., 5., 6., 7., 8.];
1215        let mut a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 2, 2]);
1216        let b = vec![8., 7., 6., 5., 4., 3., 2., 1.];
1217        let b: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(b, [2, 2, 2]);
1218        a.to_ref_mut().mul_assign(&b.to_ref());
1219
1220        assert_eq!(a.index_item([0, 0, 0]), 8.);
1221        assert_eq!(a.index_item([0, 0, 1]), 14.);
1222        assert_eq!(a.index_item([0, 1, 0]), 18.);
1223        assert_eq!(a.index_item([0, 1, 1]), 20.);
1224        assert_eq!(a.index_item([1, 0, 0]), 20.);
1225        assert_eq!(a.index_item([1, 0, 1]), 18.);
1226        assert_eq!(a.index_item([1, 1, 0]), 14.);
1227        assert_eq!(a.index_item([1, 1, 1]), 8.);
1228    }
1229    #[test]
1230    fn mul_assign_array_cpu() {
1231        mul_assign_array::<crate::device::cpu::Cpu>();
1232    }
1233    #[cfg(feature = "nvidia")]
1234    #[test]
1235    fn mul_assign_array_gpu() {
1236        mul_assign_array::<crate::device::nvidia::Nvidia>();
1237    }
1238
1239    fn div_scalar<D: Device>() {
1240        let a = vec![1., 2., 3., 4., 5., 6., 7., 8.];
1241        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 2, 2]);
1242        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([2, 2, 2]);
1243        ans.to_ref_mut().div_scalar(&a, 2.);
1244
1245        assert_eq!(ans.index_item([0, 0, 0]), 0.5);
1246        assert_eq!(ans.index_item([0, 0, 1]), 1.);
1247        assert_eq!(ans.index_item([0, 1, 0]), 1.5);
1248        assert_eq!(ans.index_item([0, 1, 1]), 2.);
1249        assert_eq!(ans.index_item([1, 0, 0]), 2.5);
1250        assert_eq!(ans.index_item([1, 0, 1]), 3.);
1251        assert_eq!(ans.index_item([1, 1, 0]), 3.5);
1252        assert_eq!(ans.index_item([1, 1, 1]), 4.);
1253    }
1254    #[test]
1255    fn div_scalar_cpu() {
1256        div_scalar::<crate::device::cpu::Cpu>();
1257    }
1258    #[cfg(feature = "nvidia")]
1259    #[test]
1260    fn div_scalar_gpu() {
1261        div_scalar::<crate::device::nvidia::Nvidia>();
1262    }
1263
1264    fn div_scalar_assign<D: Device>() {
1265        let a = vec![1., 2., 3., 4., 5., 6., 7., 8.];
1266        let mut a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 2, 2]);
1267        a.to_ref_mut().div_scalar_assign(2.);
1268
1269        assert_eq!(a.index_item([0, 0, 0]), 0.5);
1270        assert_eq!(a.index_item([0, 0, 1]), 1.);
1271        assert_eq!(a.index_item([0, 1, 0]), 1.5);
1272        assert_eq!(a.index_item([0, 1, 1]), 2.);
1273        assert_eq!(a.index_item([1, 0, 0]), 2.5);
1274        assert_eq!(a.index_item([1, 0, 1]), 3.);
1275        assert_eq!(a.index_item([1, 1, 0]), 3.5);
1276        assert_eq!(a.index_item([1, 1, 1]), 4.);
1277    }
1278    #[test]
1279    fn div_scalar_assign_cpu() {
1280        div_scalar_assign::<crate::device::cpu::Cpu>();
1281    }
1282    #[cfg(feature = "nvidia")]
1283    #[test]
1284    fn div_scalar_assign_gpu() {
1285        div_scalar_assign::<crate::device::nvidia::Nvidia>();
1286    }
1287
1288    fn div_array<D: Device>() {
1289        let a = vec![1., 2., 3., 4., 5., 6., 7., 8.];
1290        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 2, 2]);
1291        let b = vec![8., 7., 6., 5., 4., 3., 2., 1.];
1292        let b: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(b, [2, 2, 2]);
1293        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([2, 2, 2]);
1294        ans.to_ref_mut().div_array(&a, &b);
1295
1296        assert_eq!(ans.index_item([0, 0, 0]), 1. / 8.);
1297        assert_eq!(ans.index_item([0, 0, 1]), 2. / 7.);
1298        assert_eq!(ans.index_item([0, 1, 0]), 3. / 6.);
1299        assert_eq!(ans.index_item([0, 1, 1]), 4. / 5.);
1300        assert_eq!(ans.index_item([1, 0, 0]), 5. / 4.);
1301        assert_eq!(ans.index_item([1, 0, 1]), 6. / 3.);
1302        assert_eq!(ans.index_item([1, 1, 0]), 7. / 2.);
1303        assert_eq!(ans.index_item([1, 1, 1]), 8. / 1.);
1304    }
1305    #[test]
1306    fn div_array_cpu() {
1307        div_array::<crate::device::cpu::Cpu>();
1308    }
1309    #[cfg(feature = "nvidia")]
1310    #[test]
1311    fn div_array_gpu() {
1312        div_array::<crate::device::nvidia::Nvidia>();
1313    }
1314
1315    fn div_assign_array<D: Device>() {
1316        let a = vec![1., 2., 3., 4., 5., 6., 7., 8.];
1317        let mut a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 2, 2]);
1318        let b = vec![8., 7., 6., 5., 4., 3., 2., 1.];
1319        let b: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(b, [2, 2, 2]);
1320        a.to_ref_mut().div_assign(&b.to_ref());
1321
1322        assert_eq!(a.index_item([0, 0, 0]), 1. / 8.);
1323        assert_eq!(a.index_item([0, 0, 1]), 2. / 7.);
1324        assert_eq!(a.index_item([0, 1, 0]), 3. / 6.);
1325        assert_eq!(a.index_item([0, 1, 1]), 4. / 5.);
1326        assert_eq!(a.index_item([1, 0, 0]), 5. / 4.);
1327        assert_eq!(a.index_item([1, 0, 1]), 6. / 3.);
1328        assert_eq!(a.index_item([1, 1, 0]), 7. / 2.);
1329        assert_eq!(a.index_item([1, 1, 1]), 8. / 1.);
1330    }
1331    #[test]
1332    fn div_assign_array_cpu() {
1333        div_assign_array::<crate::device::cpu::Cpu>();
1334    }
1335    #[cfg(feature = "nvidia")]
1336    #[test]
1337    fn div_assign_array_gpu() {
1338        div_assign_array::<crate::device::nvidia::Nvidia>();
1339    }
1340
1341    fn sin_3d<D: Device>() {
1342        let a = vec![0., 1., 2., 3., 4., 5., 6., 7.];
1343        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 2, 2]);
1344        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([2, 2, 2]);
1345        ans.to_ref_mut().sin_array(&a);
1346
1347        // convert this test code to epsilon comparison
1348        assert!(ans.index_item([0, 0, 0]) - 0. < 1e-6);
1349        assert!((ans.index_item([0, 0, 1]) - f32::sin(1.)).abs() < 1e-6);
1350        assert!((ans.index_item([0, 1, 0]) - f32::sin(2.)).abs() < 1e-6);
1351        assert!((ans.index_item([0, 1, 1]) - f32::sin(3.)).abs() < 1e-6);
1352        assert!((ans.index_item([1, 0, 0]) - f32::sin(4.)).abs() < 1e-6);
1353        assert!((ans.index_item([1, 0, 1]) - f32::sin(5.)).abs() < 1e-6);
1354        assert!((ans.index_item([1, 1, 0]) - f32::sin(6.)).abs() < 1e-6);
1355        assert!((ans.index_item([1, 1, 1]) - f32::sin(7.)).abs() < 1e-6);
1356    }
1357    #[test]
1358    fn sin_3d_cpu() {
1359        sin_3d::<crate::device::cpu::Cpu>();
1360    }
1361    #[cfg(feature = "nvidia")]
1362    #[test]
1363    fn sin_3d_gpu() {
1364        sin_3d::<crate::device::nvidia::Nvidia>();
1365    }
1366
1367    fn sin_1d_sliced<D: Device>() {
1368        let a = vec![0., 1., 2., 3., 4., 5., 6., 7.];
1369        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [8]);
1370        let a = a.slice(slice_dynamic![1..;2]);
1371        let mut ans: Matrix<Owned<f32>, DimDyn, D> = Matrix::zeros([4]);
1372        ans.to_ref_mut().sin_array(&a);
1373
1374        assert!((ans.index_item([0]) - f32::sin(1.)).abs() < 1e-6);
1375        assert!((ans.index_item([1]) - f32::sin(3.)).abs() < 1e-6);
1376        assert!((ans.index_item([2]) - f32::sin(5.)).abs() < 1e-6);
1377        assert!((ans.index_item([3]) - f32::sin(7.)).abs() < 1e-6);
1378    }
1379    #[test]
1380    fn sin_1d_sliced_cpu() {
1381        sin_1d_sliced::<crate::device::cpu::Cpu>();
1382    }
1383    #[cfg(feature = "nvidia")]
1384    #[test]
1385    fn sin_1d_sliced_gpu() {
1386        sin_1d_sliced::<crate::device::nvidia::Nvidia>();
1387    }
1388
1389    fn pow_1d<D: Device>() {
1390        let a = vec![0., 1., 2., 3., 4., 5., 6., 7.];
1391        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [8]);
1392        let ans = a.powf_array(2.);
1393        assert_eq!(ans.index_item([0]), 0.);
1394        assert_eq!(ans.index_item([1]), 1.);
1395        assert_eq!(ans.index_item([2]), 4.);
1396        assert_eq!(ans.index_item([3]), 9.);
1397        assert_eq!(ans.index_item([4]), 16.);
1398        assert_eq!(ans.index_item([5]), 25.);
1399        assert_eq!(ans.index_item([6]), 36.);
1400        assert_eq!(ans.index_item([7]), 49.);
1401    }
1402    #[test]
1403    fn pow_1d_cpu() {
1404        pow_1d::<crate::device::cpu::Cpu>();
1405    }
1406    #[cfg(feature = "nvidia")]
1407    #[test]
1408    fn pow_1d_gpu() {
1409        pow_1d::<crate::device::nvidia::Nvidia>();
1410    }
1411
1412    fn pow_0d<D: Device>() {
1413        let a = vec![4.];
1414        let a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, []);
1415        let ans = a.powf_array(2.);
1416        assert_eq!(ans.index_item([]), 16.);
1417    }
1418    #[test]
1419    fn pow_0d_cpu() {
1420        pow_0d::<crate::device::cpu::Cpu>();
1421    }
1422    #[cfg(feature = "nvidia")]
1423    #[test]
1424    fn pow_0d_gpu() {
1425        pow_0d::<crate::device::nvidia::Nvidia>();
1426    }
1427
1428    fn pow_2d_transposed<D: Device>() {
1429        let a = vec![1., 2., 3., 4., 5., 6., 7., 8.];
1430        let mut a: Matrix<Owned<f32>, DimDyn, D> = Matrix::from_vec(a, [2, 4]);
1431        a.transpose();
1432        let ans = a.powf_array(2.);
1433        assert_eq!(ans.index_item([0, 0]), 1.);
1434        assert_eq!(ans.index_item([0, 1]), 25.);
1435        assert_eq!(ans.index_item([1, 0]), 4.);
1436        assert_eq!(ans.index_item([1, 1]), 36.);
1437        assert_eq!(ans.index_item([2, 0]), 9.);
1438        assert_eq!(ans.index_item([2, 1]), 49.);
1439        assert_eq!(ans.index_item([3, 0]), 16.);
1440        assert_eq!(ans.index_item([3, 1]), 64.);
1441    }
1442    #[test]
1443    fn pow_2d_transposed_cpu() {
1444        pow_2d_transposed::<crate::device::cpu::Cpu>();
1445    }
1446    #[cfg(feature = "nvidia")]
1447    #[test]
1448    fn pow_2d_transposed_gpu() {
1449        pow_2d_transposed::<crate::device::nvidia::Nvidia>();
1450    }
1451}