rstsr_core/device_cpu_serial/
reduction.rs

1use crate::prelude_dev::*;
2use core::ops::{Add, Mul};
3use num::complex::ComplexFloat;
4use num::{Bounded, FromPrimitive, One, Zero};
5use rstsr_dtype_traits::MinMaxAPI;
6
7impl<T, D> OpSumAPI<T, D> for DeviceCpuSerial
8where
9    T: Zero + Add<Output = T> + Clone,
10    D: DimAPI,
11{
12    type TOut = T;
13
14    fn sum_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<T> {
15        let f_init = T::zero;
16        let f = |acc, x| acc + x;
17        let f_sum = |acc1, acc2| acc1 + acc2;
18        let f_out = |acc| acc;
19
20        reduce_all_cpu_serial(a, la, f_init, f, f_sum, f_out)
21    }
22
23    fn sum_axes(
24        &self,
25        a: &Vec<T>,
26        la: &Layout<D>,
27        axes: &[isize],
28    ) -> Result<(Storage<DataOwned<Vec<T>>, T, Self>, Layout<IxD>)> {
29        let f_init = T::zero;
30        let f = |acc, x| acc + x;
31        let f_sum = |acc1, acc2| acc1 + acc2;
32        let f_out = |acc| acc;
33
34        let (out, layout_out) = reduce_axes_cpu_serial(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out)?;
35        Ok((Storage::new(out.into(), self.clone()), layout_out))
36    }
37}
38
39impl<T, D> OpMinAPI<T, D> for DeviceCpuSerial
40where
41    T: Clone + MinMaxAPI + Bounded,
42    D: DimAPI,
43{
44    type TOut = T;
45
46    fn min_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<T> {
47        if la.size() == 0 {
48            rstsr_raise!(InvalidValue, "zero-size array is not supported for min")?;
49        }
50
51        let f_init = T::max_value;
52        let f = |acc: T, x: T| acc.min(x);
53        let f_sum = |acc1: T, acc2: T| acc1.min(acc2);
54        let f_out = |acc| acc;
55        reduce_all_cpu_serial(a, la, f_init, f, f_sum, f_out)
56    }
57
58    fn min_axes(
59        &self,
60        a: &Vec<T>,
61        la: &Layout<D>,
62        axes: &[isize],
63    ) -> Result<(Storage<DataOwned<Vec<T>>, T, Self>, Layout<IxD>)> {
64        if la.size() == 0 {
65            rstsr_raise!(InvalidValue, "zero-size array is not supported for min")?;
66        }
67
68        let f_init = T::max_value;
69        let f = |acc: T, x: T| acc.min(x);
70        let f_sum = |acc1: T, acc2: T| acc1.min(acc2);
71        let f_out = |acc| acc;
72
73        let (out, layout_out) = reduce_axes_cpu_serial(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out)?;
74        Ok((Storage::new(out.into(), self.clone()), layout_out))
75    }
76}
77
78impl<T, D> OpMaxAPI<T, D> for DeviceCpuSerial
79where
80    T: Clone + MinMaxAPI + Bounded,
81    D: DimAPI,
82{
83    type TOut = T;
84
85    fn max_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<T> {
86        if la.size() == 0 {
87            rstsr_raise!(InvalidValue, "zero-size array is not supported for max")?;
88        }
89
90        let f_init = T::min_value;
91        let f = |acc: T, x: T| acc.max(x);
92        let f_sum = |acc1: T, acc2: T| acc1.max(acc2);
93        let f_out = |acc| acc;
94
95        reduce_all_cpu_serial(a, la, f_init, f, f_sum, f_out)
96    }
97
98    fn max_axes(
99        &self,
100        a: &Vec<T>,
101        la: &Layout<D>,
102        axes: &[isize],
103    ) -> Result<(Storage<DataOwned<Vec<T>>, T, Self>, Layout<IxD>)> {
104        if la.size() == 0 {
105            rstsr_raise!(InvalidValue, "zero-size array is not supported for max")?;
106        }
107
108        let f_init = T::min_value;
109        let f = |acc: T, x: T| acc.max(x);
110        let f_sum = |acc1: T, acc2: T| acc1.max(acc2);
111        let f_out = |acc| acc;
112
113        let (out, layout_out) = reduce_axes_cpu_serial(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out)?;
114        Ok((Storage::new(out.into(), self.clone()), layout_out))
115    }
116}
117
118impl<T, D> OpProdAPI<T, D> for DeviceCpuSerial
119where
120    T: Clone + One + Mul<Output = T>,
121    D: DimAPI,
122{
123    type TOut = T;
124
125    fn prod_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<T> {
126        let f_init = T::one;
127        let f = |acc, x| acc * x;
128        let f_sum = |acc1, acc2| acc1 * acc2;
129        let f_out = |acc| acc;
130
131        reduce_all_cpu_serial(a, la, f_init, f, f_sum, f_out)
132    }
133
134    fn prod_axes(
135        &self,
136        a: &Vec<T>,
137        la: &Layout<D>,
138        axes: &[isize],
139    ) -> Result<(Storage<DataOwned<Vec<T>>, T, Self>, Layout<IxD>)> {
140        let f_init = T::one;
141        let f = |acc, x| acc * x;
142        let f_sum = |acc1, acc2| acc1 * acc2;
143        let f_out = |acc| acc;
144
145        let (out, layout_out) = reduce_axes_cpu_serial(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out)?;
146        Ok((Storage::new(out.into(), self.clone()), layout_out))
147    }
148}
149
150impl<T, D> OpMeanAPI<T, D> for DeviceCpuSerial
151where
152    T: Clone + ComplexFloat + FromPrimitive,
153    D: DimAPI,
154{
155    type TOut = T;
156
157    fn mean_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<T> {
158        let size = la.size();
159        let f_init = T::zero;
160        let f = |acc, x| acc + x;
161        let f_sum = |acc, x| acc + x;
162        let f_out = |acc| acc / T::from_usize(size).unwrap();
163
164        let sum = reduce_all_cpu_serial(a, la, f_init, f, f_sum, f_out)?;
165        Ok(sum)
166    }
167
168    fn mean_axes(
169        &self,
170        a: &Vec<T>,
171        la: &Layout<D>,
172        axes: &[isize],
173    ) -> Result<(Storage<DataOwned<Vec<T>>, T, Self>, Layout<IxD>)> {
174        let (layout_axes, _) = la.dim_split_axes(axes)?;
175        let size = layout_axes.size();
176        let f_init = T::zero;
177        let f = |acc, x| acc + x;
178        let f_sum = |acc, x| acc + x;
179        let f_out = |acc| acc / T::from_usize(size).unwrap();
180
181        let (out, layout_out) = reduce_axes_cpu_serial(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out)?;
182        Ok((Storage::new(out.into(), self.clone()), layout_out))
183    }
184}
185
186impl<T, D> OpVarAPI<T, D> for DeviceCpuSerial
187where
188    T: Clone + ComplexFloat + FromPrimitive,
189    T::Real: Clone + ComplexFloat + FromPrimitive,
190    D: DimAPI,
191{
192    type TOut = T::Real;
193
194    fn var_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<T::Real> {
195        let size = la.size();
196
197        let f_init = || (T::zero(), T::Real::zero());
198        let f = |(acc_1, acc_2): (T, T::Real), x: T| (acc_1 + x, acc_2 + (x * x.conj()).re());
199        let f_sum = |(acc_1, acc_2): (T, T::Real), (x_1, x_2)| (acc_1 + x_1, acc_2 + x_2);
200        let f_out = |(acc_1, acc_2): (T, T::Real)| {
201            let size_1 = T::from_usize(size).unwrap();
202            let size_2 = T::Real::from_usize(size).unwrap();
203            let mean = acc_1 / size_1;
204            acc_2 / size_2 - (mean * mean.conj()).re()
205        };
206
207        let result = reduce_all_cpu_serial(a, la, f_init, f, f_sum, f_out)?;
208        Ok(result)
209    }
210
211    fn var_axes(
212        &self,
213        a: &Vec<T>,
214        la: &Layout<D>,
215        axes: &[isize],
216    ) -> Result<(Storage<DataOwned<Vec<T::Real>>, T::Real, Self>, Layout<IxD>)> {
217        let (layout_axes, _) = la.dim_split_axes(axes)?;
218        let size = layout_axes.size();
219
220        let f_init = || (T::zero(), T::Real::zero());
221        let f = |(acc_1, acc_2): (T, T::Real), x: T| (acc_1 + x, acc_2 + (x * x.conj()).re());
222        let f_sum = |(acc_1, acc_2): (T, T::Real), (x_1, x_2)| (acc_1 + x_1, acc_2 + x_2);
223        let f_out = |(acc_1, acc_2): (T, T::Real)| {
224            let size_1 = T::from_usize(size).unwrap();
225            let size_2 = T::Real::from_usize(size).unwrap();
226            let mean = acc_1 / size_1;
227            acc_2 / size_2 - (mean * mean.conj()).re()
228        };
229
230        let (out, layout_out) = reduce_axes_difftype_cpu_serial(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out)?;
231
232        Ok((Storage::new(out.into(), self.clone()), layout_out))
233    }
234}
235
236impl<T, D> OpStdAPI<T, D> for DeviceCpuSerial
237where
238    T: Clone + ComplexFloat + FromPrimitive,
239    T::Real: Clone + ComplexFloat + FromPrimitive,
240    D: DimAPI,
241{
242    type TOut = T::Real;
243
244    fn std_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<T::Real> {
245        let size = la.size();
246
247        let f_init = || (T::zero(), T::Real::zero());
248        let f = |(acc_1, acc_2): (T, T::Real), x: T| (acc_1 + x, acc_2 + (x * x.conj()).re());
249        let f_sum = |(acc_1, acc_2): (T, T::Real), (x_1, x_2)| (acc_1 + x_1, acc_2 + x_2);
250        let f_out = |(acc_1, acc_2): (T, T::Real)| {
251            let size_1 = T::from_usize(size).unwrap();
252            let size_2 = T::Real::from_usize(size).unwrap();
253            let mean = acc_1 / size_1;
254            let var = acc_2 / size_2 - (mean * mean.conj()).re();
255            var.sqrt()
256        };
257
258        let result = reduce_all_cpu_serial(a, la, f_init, f, f_sum, f_out)?;
259        Ok(result)
260    }
261
262    fn std_axes(
263        &self,
264        a: &Vec<T>,
265        la: &Layout<D>,
266        axes: &[isize],
267    ) -> Result<(Storage<DataOwned<Vec<T::Real>>, T::Real, Self>, Layout<IxD>)> {
268        let (layout_axes, _) = la.dim_split_axes(axes)?;
269        let size = layout_axes.size();
270
271        let f_init = || (T::zero(), T::Real::zero());
272        let f = |(acc_1, acc_2): (T, T::Real), x: T| (acc_1 + x, acc_2 + (x * x.conj()).re());
273        let f_sum = |(acc_1, acc_2): (T, T::Real), (x_1, x_2)| (acc_1 + x_1, acc_2 + x_2);
274        let f_out = |(acc_1, acc_2): (T, T::Real)| {
275            let size_1 = T::from_usize(size).unwrap();
276            let size_2 = T::Real::from_usize(size).unwrap();
277            let mean = acc_1 / size_1;
278            let var = acc_2 / size_2 - (mean * mean.conj()).re();
279            var.sqrt()
280        };
281
282        let (out, layout_out) = reduce_axes_difftype_cpu_serial(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out)?;
283
284        Ok((Storage::new(out.into(), self.clone()), layout_out))
285    }
286}
287
288impl<T, D> OpL2NormAPI<T, D> for DeviceCpuSerial
289where
290    T: Clone + ComplexFloat + FromPrimitive,
291    T::Real: Clone + ComplexFloat + FromPrimitive,
292    D: DimAPI,
293{
294    type TOut = T::Real;
295
296    fn l2_norm_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<T::Real> {
297        let f_init = || T::Real::zero();
298        let f = |acc: T::Real, x: T| acc + (x * x.conj()).re();
299        let f_sum = |acc: T::Real, x: T::Real| acc + x;
300        let f_out = |acc: T::Real| acc.sqrt();
301
302        let result = reduce_all_cpu_serial(a, la, f_init, f, f_sum, f_out)?;
303        Ok(result)
304    }
305
306    fn l2_norm_axes(
307        &self,
308        a: &Vec<T>,
309        la: &Layout<D>,
310        axes: &[isize],
311    ) -> Result<(Storage<DataOwned<Vec<T::Real>>, T::Real, Self>, Layout<IxD>)> {
312        let f_init = || T::Real::zero();
313        let f = |acc: T::Real, x: T| acc + (x * x.conj()).re();
314        let f_sum = |acc: T::Real, x: T::Real| acc + x;
315        let f_out = |acc: T::Real| acc.sqrt();
316
317        let (out, layout_out) = reduce_axes_cpu_serial(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out)?;
318
319        Ok((Storage::new(out.into(), self.clone()), layout_out))
320    }
321}
322
323impl<T, D> OpArgMinAPI<T, D> for DeviceCpuSerial
324where
325    T: Clone + PartialOrd,
326    D: DimAPI,
327{
328    type TOut = usize;
329
330    fn argmin_axes(
331        &self,
332        a: &Vec<T>,
333        la: &Layout<D>,
334        axes: &[isize],
335    ) -> Result<(Storage<DataOwned<Vec<usize>>, Self::TOut, Self>, Layout<IxD>)> {
336        let f_comp = |x: Option<T>, y: T| -> Option<bool> {
337            if let Some(x) = x {
338                Some(y < x)
339            } else {
340                Some(true)
341            }
342        };
343        let f_eq = |x: Option<T>, y: T| -> Option<bool> {
344            if let Some(x) = x {
345                Some(y == x)
346            } else {
347                Some(false)
348            }
349        };
350        let (out, layout_out) = reduce_axes_arg_cpu_serial(a, la, axes, f_comp, f_eq, RowMajor)?;
351        Ok((Storage::new(out.into(), self.clone()), layout_out))
352    }
353
354    fn argmin_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<Self::TOut> {
355        let f_comp = |x: Option<T>, y: T| -> Option<bool> {
356            if let Some(x) = x {
357                Some(y < x)
358            } else {
359                Some(true)
360            }
361        };
362        let f_eq = |x: Option<T>, y: T| -> Option<bool> {
363            if let Some(x) = x {
364                Some(y == x)
365            } else {
366                Some(false)
367            }
368        };
369        let result = reduce_all_arg_cpu_serial(a, la, f_comp, f_eq, RowMajor)?;
370        Ok(result)
371    }
372}
373
374impl<T, D> OpArgMaxAPI<T, D> for DeviceCpuSerial
375where
376    T: Clone + PartialOrd,
377    D: DimAPI,
378{
379    type TOut = usize;
380
381    fn argmax_axes(
382        &self,
383        a: &Vec<T>,
384        la: &Layout<D>,
385        axes: &[isize],
386    ) -> Result<(Storage<DataOwned<Vec<usize>>, Self::TOut, Self>, Layout<IxD>)> {
387        let f_comp = |x: Option<T>, y: T| -> Option<bool> {
388            if let Some(x) = x {
389                Some(y > x)
390            } else {
391                Some(true)
392            }
393        };
394        let f_eq = |x: Option<T>, y: T| -> Option<bool> {
395            if let Some(x) = x {
396                Some(y == x)
397            } else {
398                Some(false)
399            }
400        };
401        let (out, layout_out) = reduce_axes_arg_cpu_serial(a, la, axes, f_comp, f_eq, RowMajor)?;
402        Ok((Storage::new(out.into(), self.clone()), layout_out))
403    }
404
405    fn argmax_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<Self::TOut> {
406        let f_comp = |x: Option<T>, y: T| -> Option<bool> {
407            if let Some(x) = x {
408                Some(y > x)
409            } else {
410                Some(true)
411            }
412        };
413        let f_eq = |x: Option<T>, y: T| -> Option<bool> {
414            if let Some(x) = x {
415                Some(y == x)
416            } else {
417                Some(false)
418            }
419        };
420        let result = reduce_all_arg_cpu_serial(a, la, f_comp, f_eq, RowMajor)?;
421        Ok(result)
422    }
423}
424
425impl<T, D> OpUnraveledArgMinAPI<T, D> for DeviceCpuSerial
426where
427    T: Clone + PartialOrd,
428    D: DimAPI,
429{
430    fn unraveled_argmin_axes(
431        &self,
432        a: &Vec<T>,
433        la: &Layout<D>,
434        axes: &[isize],
435    ) -> Result<(Storage<DataOwned<Vec<IxD>>, IxD, Self>, Layout<IxD>)> {
436        let f_comp = |x: Option<T>, y: T| -> Option<bool> {
437            if let Some(x) = x {
438                Some(y < x)
439            } else {
440                Some(true)
441            }
442        };
443        let f_eq = |x: Option<T>, y: T| -> Option<bool> {
444            if let Some(x) = x {
445                Some(y == x)
446            } else {
447                Some(false)
448            }
449        };
450        let (out, layout_out) = reduce_axes_unraveled_arg_cpu_serial(a, la, axes, f_comp, f_eq)?;
451        Ok((Storage::new(out.into(), self.clone()), layout_out))
452    }
453
454    fn unraveled_argmin_all(&self, a: &<Self as DeviceRawAPI<T>>::Raw, la: &Layout<D>) -> Result<D> {
455        let f_comp = |x: Option<T>, y: T| -> Option<bool> {
456            if let Some(x) = x {
457                Some(y < x)
458            } else {
459                Some(true)
460            }
461        };
462        let f_eq = |x: Option<T>, y: T| -> Option<bool> {
463            if let Some(x) = x {
464                Some(y == x)
465            } else {
466                Some(false)
467            }
468        };
469        let result = reduce_all_unraveled_arg_cpu_serial(a, la, f_comp, f_eq)?;
470        Ok(result)
471    }
472}
473
474impl<T, D> OpUnraveledArgMaxAPI<T, D> for DeviceCpuSerial
475where
476    T: Clone + PartialOrd,
477    D: DimAPI,
478{
479    fn unraveled_argmax_axes(
480        &self,
481        a: &Vec<T>,
482        la: &Layout<D>,
483        axes: &[isize],
484    ) -> Result<(Storage<DataOwned<Vec<IxD>>, IxD, Self>, Layout<IxD>)> {
485        let f_comp = |x: Option<T>, y: T| -> Option<bool> {
486            if let Some(x) = x {
487                Some(y > x)
488            } else {
489                Some(true)
490            }
491        };
492        let f_eq = |x: Option<T>, y: T| -> Option<bool> {
493            if let Some(x) = x {
494                Some(y == x)
495            } else {
496                Some(false)
497            }
498        };
499        let (out, layout_out) = reduce_axes_unraveled_arg_cpu_serial(a, la, axes, f_comp, f_eq)?;
500        Ok((Storage::new(out.into(), self.clone()), layout_out))
501    }
502
503    fn unraveled_argmax_all(&self, a: &<Self as DeviceRawAPI<T>>::Raw, la: &Layout<D>) -> Result<D> {
504        let f_comp = |x: Option<T>, y: T| -> Option<bool> {
505            if let Some(x) = x {
506                Some(y > x)
507            } else {
508                Some(true)
509            }
510        };
511        let f_eq = |x: Option<T>, y: T| -> Option<bool> {
512            if let Some(x) = x {
513                Some(y == x)
514            } else {
515                Some(false)
516            }
517        };
518        let result = reduce_all_unraveled_arg_cpu_serial(a, la, f_comp, f_eq)?;
519        Ok(result)
520    }
521}
522
523impl<D> OpSumBoolAPI<D> for DeviceCpuSerial
524where
525    D: DimAPI,
526{
527    fn sum_all(&self, a: &Vec<bool>, la: &Layout<D>) -> Result<usize> {
528        let f_init = || 0;
529        let f = |acc, x| match x {
530            true => acc + 1,
531            false => acc,
532        };
533        let f_sum = |acc1, acc2| acc1 + acc2;
534        let f_out = |acc| acc;
535
536        reduce_all_cpu_serial(a, la, f_init, f, f_sum, f_out)
537    }
538
539    fn sum_axes(
540        &self,
541        a: &Vec<bool>,
542        la: &Layout<D>,
543        axes: &[isize],
544    ) -> Result<(Storage<DataOwned<Vec<usize>>, usize, Self>, Layout<IxD>)> {
545        let f_init = || 0;
546        let f = |acc, x| match x {
547            true => acc + 1,
548            false => acc,
549        };
550        let f_sum = |acc1, acc2| acc1 + acc2;
551        let f_out = |acc| acc;
552
553        let (out, layout_out) = reduce_axes_cpu_serial(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out)?;
554        Ok((Storage::new(out.into(), self.clone()), layout_out))
555    }
556}