rstsr_openblas/rayon_auto_impl/
reduction.rs

1use crate::prelude_dev::*;
2use core::ops::{Add, Mul};
3use num::complex::ComplexFloat;
4use num::{FromPrimitive, One, Zero};
5use rstsr_dtype_traits::ExtReal;
6
7impl<T, D> OpSumAPI<T, D> for DeviceRayonAutoImpl
8where
9    T: Clone + Send + Sync + Zero + Add<Output = T>,
10    D: DimAPI,
11{
12    type TOut = T;
13
14    fn sum_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<T> {
15        let pool = self.get_current_pool();
16
17        let f_init = T::zero;
18        let f = |acc, x| acc + x;
19        let f_sum = |acc1, acc2| acc1 + acc2;
20        let f_out = |acc| acc;
21
22        reduce_all_cpu_rayon(a, la, f_init, f, f_sum, f_out, pool)
23    }
24
25    fn sum_axes(
26        &self,
27        a: &Vec<T>,
28        la: &Layout<D>,
29        axes: &[isize],
30    ) -> Result<(Storage<DataOwned<Vec<T>>, T, Self>, Layout<IxD>)> {
31        let pool = self.get_current_pool();
32
33        let f_init = T::zero;
34        let f = |acc, x| acc + x;
35        let f_sum = |acc1, acc2| acc1 + acc2;
36        let f_out = |acc| acc;
37
38        let (out, layout_out) = reduce_axes_cpu_rayon(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out, pool)?;
39        Ok((Storage::new(out.into(), self.clone()), layout_out))
40    }
41}
42
43impl<T, D> OpMinAPI<T, D> for DeviceRayonAutoImpl
44where
45    T: ExtReal + Send + Sync,
46    D: DimAPI,
47{
48    type TOut = T;
49
50    fn min_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<T> {
51        if la.size() == 0 {
52            rstsr_raise!(InvalidValue, "zero-size array is not supported for min")?;
53        }
54
55        let pool = self.get_current_pool();
56
57        let f_init = T::ext_max_value;
58        let f = |acc: T, x: T| acc.ext_min(x);
59        let f_sum = |acc1: T, acc2: T| acc1.ext_min(acc2);
60        let f_out = |acc| acc;
61
62        reduce_all_cpu_rayon(a, la, f_init, f, f_sum, f_out, pool)
63    }
64
65    fn min_axes(
66        &self,
67        a: &Vec<T>,
68        la: &Layout<D>,
69        axes: &[isize],
70    ) -> Result<(Storage<DataOwned<Vec<T>>, T, Self>, Layout<IxD>)> {
71        if la.size() == 0 {
72            rstsr_raise!(InvalidValue, "zero-size array is not supported for min")?;
73        }
74
75        let pool = self.get_current_pool();
76
77        let f_init = T::ext_max_value;
78        let f = |acc: T, x: T| acc.ext_min(x);
79        let f_sum = |acc1: T, acc2: T| acc1.ext_min(acc2);
80        let f_out = |acc| acc;
81
82        let (out, layout_out) = reduce_axes_cpu_rayon(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out, pool)?;
83        Ok((Storage::new(out.into(), self.clone()), layout_out))
84    }
85}
86
87impl<T, D> OpMaxAPI<T, D> for DeviceRayonAutoImpl
88where
89    T: ExtReal + Send + Sync,
90    D: DimAPI,
91{
92    type TOut = T;
93
94    fn max_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<T> {
95        if la.size() == 0 {
96            rstsr_raise!(InvalidValue, "zero-size array is not supported for max")?;
97        }
98
99        let pool = self.get_current_pool();
100
101        let f_init = T::ext_min_value;
102        let f = |acc: T, x: T| acc.ext_max(x);
103        let f_sum = |acc1: T, acc2: T| acc1.ext_max(acc2);
104        let f_out = |acc| acc;
105
106        reduce_all_cpu_rayon(a, la, f_init, f, f_sum, f_out, pool)
107    }
108
109    fn max_axes(
110        &self,
111        a: &Vec<T>,
112        la: &Layout<D>,
113        axes: &[isize],
114    ) -> Result<(Storage<DataOwned<Vec<T>>, T, Self>, Layout<IxD>)> {
115        if la.size() == 0 {
116            rstsr_raise!(InvalidValue, "zero-size array is not supported for max")?;
117        }
118
119        let pool = self.get_current_pool();
120
121        let f_init = T::ext_min_value;
122        let f = |acc: T, x: T| acc.ext_max(x);
123        let f_sum = |acc1: T, acc2: T| acc1.ext_max(acc2);
124        let f_out = |acc| acc;
125
126        let (out, layout_out) = reduce_axes_cpu_rayon(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out, pool)?;
127        Ok((Storage::new(out.into(), self.clone()), layout_out))
128    }
129}
130
131impl<T, D> OpProdAPI<T, D> for DeviceRayonAutoImpl
132where
133    T: Clone + Send + Sync + One + Mul<Output = T>,
134    D: DimAPI,
135{
136    type TOut = T;
137
138    fn prod_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<T> {
139        let pool = self.get_current_pool();
140
141        let f_init = T::one;
142        let f = |acc, x| acc * x;
143        let f_sum = |acc1, acc2| acc1 * acc2;
144        let f_out = |acc| acc;
145
146        reduce_all_cpu_rayon(a, la, f_init, f, f_sum, f_out, pool)
147    }
148
149    fn prod_axes(
150        &self,
151        a: &Vec<T>,
152        la: &Layout<D>,
153        axes: &[isize],
154    ) -> Result<(Storage<DataOwned<Vec<T>>, T, Self>, Layout<IxD>)> {
155        let pool = self.get_current_pool();
156
157        let f_init = T::one;
158        let f = |acc, x| acc * x;
159        let f_sum = |acc1, acc2| acc1 * acc2;
160        let f_out = |acc| acc;
161
162        let (out, layout_out) = reduce_axes_cpu_rayon(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out, pool)?;
163        Ok((Storage::new(out.into(), self.clone()), layout_out))
164    }
165}
166
167impl<T, D> OpMeanAPI<T, D> for DeviceRayonAutoImpl
168where
169    T: Clone + Send + Sync + ComplexFloat + FromPrimitive,
170    D: DimAPI,
171{
172    type TOut = T;
173
174    fn mean_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<T> {
175        let pool = self.get_current_pool();
176
177        let size = la.size();
178        let f_init = T::zero;
179        let f = |acc, x| acc + x;
180        let f_sum = |acc, x| acc + x;
181        let f_out = |acc| acc / T::from_usize(size).unwrap();
182
183        let sum = reduce_all_cpu_rayon(a, la, f_init, f, f_sum, f_out, pool)?;
184        Ok(sum)
185    }
186
187    fn mean_axes(
188        &self,
189        a: &Vec<T>,
190        la: &Layout<D>,
191        axes: &[isize],
192    ) -> Result<(Storage<DataOwned<Vec<T>>, T, Self>, Layout<IxD>)> {
193        let pool = self.get_current_pool();
194
195        let (layout_axes, _) = la.dim_split_axes(axes)?;
196        let size = layout_axes.size();
197        let f_init = T::zero;
198        let f = |acc, x| acc + x;
199        let f_sum = |acc, x| acc + x;
200        let f_out = |acc| acc / T::from_usize(size).unwrap();
201
202        let (out, layout_out) = reduce_axes_cpu_rayon(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out, pool)?;
203        Ok((Storage::new(out.into(), self.clone()), layout_out))
204    }
205}
206
207impl<T, D> OpVarAPI<T, D> for DeviceRayonAutoImpl
208where
209    T: Clone + Send + Sync + ComplexFloat + FromPrimitive,
210    T::Real: Clone + Send + Sync + ComplexFloat + FromPrimitive,
211    D: DimAPI,
212{
213    type TOut = T::Real;
214
215    fn var_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<T::Real> {
216        let pool = self.get_current_pool();
217
218        let size = la.size();
219
220        let f_init = || (T::zero(), T::Real::zero());
221        let f = |(acc_1, acc_2): (T, T::Real), x: T| (acc_1 + x, acc_2 + (x * x.conj()).re());
222        let f_sum = |(acc_1, acc_2): (T, T::Real), (x_1, x_2)| (acc_1 + x_1, acc_2 + x_2);
223        let f_out = |(acc_1, acc_2): (T, T::Real)| {
224            let size_1 = T::from_usize(size).unwrap();
225            let size_2 = T::Real::from_usize(size).unwrap();
226            let mean = acc_1 / size_1;
227            acc_2 / size_2 - (mean * mean.conj()).re()
228        };
229
230        let result = reduce_all_cpu_rayon(a, la, f_init, f, f_sum, f_out, pool)?;
231        Ok(result)
232    }
233
234    fn var_axes(
235        &self,
236        a: &Vec<T>,
237        la: &Layout<D>,
238        axes: &[isize],
239    ) -> Result<(Storage<DataOwned<Vec<T::Real>>, T::Real, Self>, Layout<IxD>)> {
240        let pool = self.get_current_pool();
241
242        let (layout_axes, _) = la.dim_split_axes(axes)?;
243        let size = layout_axes.size();
244
245        let f_init = || (T::zero(), T::Real::zero());
246        let f = |(acc_1, acc_2): (T, T::Real), x: T| (acc_1 + x, acc_2 + (x * x.conj()).re());
247        let f_sum = |(acc_1, acc_2): (T, T::Real), (x_1, x_2)| (acc_1 + x_1, acc_2 + x_2);
248        let f_out = |(acc_1, acc_2): (T, T::Real)| {
249            let size_1 = T::from_usize(size).unwrap();
250            let size_2 = T::Real::from_usize(size).unwrap();
251            let mean = acc_1 / size_1;
252            acc_2 / size_2 - (mean * mean.conj()).re()
253        };
254
255        let (out, layout_out) = reduce_axes_difftype_cpu_rayon(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out, pool)?;
256
257        Ok((Storage::new(out.into(), self.clone()), layout_out))
258    }
259}
260
261impl<T, D> OpStdAPI<T, D> for DeviceRayonAutoImpl
262where
263    T: Clone + Send + Sync + ComplexFloat + FromPrimitive,
264    T::Real: Clone + Send + Sync + ComplexFloat + FromPrimitive,
265    D: DimAPI,
266{
267    type TOut = T::Real;
268
269    fn std_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<T::Real> {
270        let pool = self.get_current_pool();
271
272        let size = la.size();
273
274        let f_init = || (T::zero(), T::Real::zero());
275        let f = |(acc_1, acc_2): (T, T::Real), x: T| (acc_1 + x, acc_2 + (x * x.conj()).re());
276        let f_sum = |(acc_1, acc_2): (T, T::Real), (x_1, x_2)| (acc_1 + x_1, acc_2 + x_2);
277        let f_out = |(acc_1, acc_2): (T, T::Real)| {
278            let size_1 = T::from_usize(size).unwrap();
279            let size_2 = T::Real::from_usize(size).unwrap();
280            let mean = acc_1 / size_1;
281            let var = acc_2 / size_2 - (mean * mean.conj()).re();
282            var.sqrt()
283        };
284
285        let result = reduce_all_cpu_rayon(a, la, f_init, f, f_sum, f_out, pool)?;
286        Ok(result)
287    }
288
289    fn std_axes(
290        &self,
291        a: &Vec<T>,
292        la: &Layout<D>,
293        axes: &[isize],
294    ) -> Result<(Storage<DataOwned<Vec<T::Real>>, T::Real, Self>, Layout<IxD>)> {
295        let pool = self.get_current_pool();
296
297        let (layout_axes, _) = la.dim_split_axes(axes)?;
298        let size = layout_axes.size();
299
300        let f_init = || (T::zero(), T::Real::zero());
301        let f = |(acc_1, acc_2): (T, T::Real), x: T| (acc_1 + x, acc_2 + (x * x.conj()).re());
302        let f_sum = |(acc_1, acc_2): (T, T::Real), (x_1, x_2)| (acc_1 + x_1, acc_2 + x_2);
303        let f_out = |(acc_1, acc_2): (T, T::Real)| {
304            let size_1 = T::from_usize(size).unwrap();
305            let size_2 = T::Real::from_usize(size).unwrap();
306            let mean = acc_1 / size_1;
307            let var = acc_2 / size_2 - (mean * mean.conj()).re();
308            var.sqrt()
309        };
310
311        let (out, layout_out) = reduce_axes_difftype_cpu_rayon(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out, pool)?;
312
313        Ok((Storage::new(out.into(), self.clone()), layout_out))
314    }
315}
316
317impl<T, D> OpL2NormAPI<T, D> for DeviceRayonAutoImpl
318where
319    T: Clone + Send + Sync + ComplexFloat + FromPrimitive,
320    T::Real: Clone + Send + Sync + ComplexFloat + FromPrimitive,
321    D: DimAPI,
322{
323    type TOut = T::Real;
324
325    fn l2_norm_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<T::Real> {
326        let pool = self.get_current_pool();
327
328        let f_init = || T::Real::zero();
329        let f = |acc: T::Real, x: T| acc + (x * x.conj()).re();
330        let f_sum = |acc: T::Real, x: T::Real| acc + x;
331        let f_out = |acc: T::Real| acc.sqrt();
332
333        let result = reduce_all_cpu_rayon(a, la, f_init, f, f_sum, f_out, pool)?;
334        Ok(result)
335    }
336
337    fn l2_norm_axes(
338        &self,
339        a: &Vec<T>,
340        la: &Layout<D>,
341        axes: &[isize],
342    ) -> Result<(Storage<DataOwned<Vec<T::Real>>, T::Real, Self>, Layout<IxD>)> {
343        let pool = self.get_current_pool();
344
345        let f_init = || T::Real::zero();
346        let f = |acc: T::Real, x: T| acc + (x * x.conj()).re();
347        let f_sum = |acc: T::Real, x: T::Real| acc + x;
348        let f_out = |acc: T::Real| acc.sqrt();
349
350        let (out, layout_out) = reduce_axes_cpu_rayon(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out, pool)?;
351
352        Ok((Storage::new(out.into(), self.clone()), layout_out))
353    }
354}
355
356impl<T, D> OpArgMinAPI<T, D> for DeviceRayonAutoImpl
357where
358    T: Clone + PartialOrd + Send + Sync,
359    D: DimAPI,
360{
361    type TOut = usize;
362
363    fn argmin_axes(
364        &self,
365        a: &Vec<T>,
366        la: &Layout<D>,
367        axes: &[isize],
368    ) -> Result<(Storage<DataOwned<Vec<usize>>, Self::TOut, Self>, Layout<IxD>)> {
369        let pool = self.get_current_pool();
370
371        let f_comp = |x: Option<T>, y: T| -> Option<bool> {
372            if let Some(x) = x {
373                Some(y < x)
374            } else {
375                Some(true)
376            }
377        };
378        let f_eq = |x: Option<T>, y: T| -> Option<bool> {
379            if let Some(x) = x {
380                Some(y == x)
381            } else {
382                Some(false)
383            }
384        };
385        let (out, layout_out) = reduce_axes_arg_cpu_rayon(a, la, axes, f_comp, f_eq, RowMajor, pool)?;
386        Ok((Storage::new(out.into(), self.clone()), layout_out))
387    }
388
389    fn argmin_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<Self::TOut> {
390        let pool = self.get_current_pool();
391
392        let f_comp = |x: Option<T>, y: T| -> Option<bool> {
393            if let Some(x) = x {
394                Some(y < x)
395            } else {
396                Some(true)
397            }
398        };
399        let f_eq = |x: Option<T>, y: T| -> Option<bool> {
400            if let Some(x) = x {
401                Some(y == x)
402            } else {
403                Some(false)
404            }
405        };
406        let result = reduce_all_arg_cpu_rayon(a, la, f_comp, f_eq, RowMajor, pool)?;
407        Ok(result)
408    }
409}
410
411impl<T, D> OpArgMaxAPI<T, D> for DeviceRayonAutoImpl
412where
413    T: Clone + PartialOrd + Send + Sync,
414    D: DimAPI,
415{
416    type TOut = usize;
417
418    fn argmax_axes(
419        &self,
420        a: &Vec<T>,
421        la: &Layout<D>,
422        axes: &[isize],
423    ) -> Result<(Storage<DataOwned<Vec<usize>>, Self::TOut, Self>, Layout<IxD>)> {
424        let pool = self.get_current_pool();
425
426        let f_comp = |x: Option<T>, y: T| -> Option<bool> {
427            if let Some(x) = x {
428                Some(y > x)
429            } else {
430                Some(true)
431            }
432        };
433        let f_eq = |x: Option<T>, y: T| -> Option<bool> {
434            if let Some(x) = x {
435                Some(y == x)
436            } else {
437                Some(false)
438            }
439        };
440        let (out, layout_out) = reduce_axes_arg_cpu_rayon(a, la, axes, f_comp, f_eq, RowMajor, pool)?;
441        Ok((Storage::new(out.into(), self.clone()), layout_out))
442    }
443
444    fn argmax_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<Self::TOut> {
445        let pool = self.get_current_pool();
446
447        let f_comp = |x: Option<T>, y: T| -> Option<bool> {
448            if let Some(x) = x {
449                Some(y > x)
450            } else {
451                Some(true)
452            }
453        };
454        let f_eq = |x: Option<T>, y: T| -> Option<bool> {
455            if let Some(x) = x {
456                Some(y == x)
457            } else {
458                Some(false)
459            }
460        };
461        let result = reduce_all_arg_cpu_rayon(a, la, f_comp, f_eq, RowMajor, pool)?;
462        Ok(result)
463    }
464}
465
466impl<D> OpAllAPI<bool, D> for DeviceRayonAutoImpl
467where
468    D: DimAPI,
469{
470    type TOut = bool;
471
472    fn all_all(&self, a: &Vec<bool>, la: &Layout<D>) -> Result<bool> {
473        let pool = self.get_current_pool();
474
475        let f_init = || true;
476        let f = |acc, x| acc && x;
477        let f_sum = |acc1, acc2| acc1 && acc2;
478        let f_out = |acc| acc;
479
480        reduce_all_cpu_rayon(a, la, f_init, f, f_sum, f_out, pool)
481    }
482
483    fn all_axes(
484        &self,
485        a: &Vec<bool>,
486        la: &Layout<D>,
487        axes: &[isize],
488    ) -> Result<(Storage<DataOwned<Vec<bool>>, bool, Self>, Layout<IxD>)> {
489        let pool = self.get_current_pool();
490
491        let f_init = || true;
492        let f = |acc, x| acc && x;
493        let f_sum = |acc1, acc2| acc1 && acc2;
494        let f_out = |acc| acc;
495
496        let (out, layout_out) = reduce_axes_cpu_rayon(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out, pool)?;
497        Ok((Storage::new(out.into(), self.clone()), layout_out))
498    }
499}
500
501impl<D> OpAnyAPI<bool, D> for DeviceRayonAutoImpl
502where
503    D: DimAPI,
504{
505    type TOut = bool;
506
507    fn any_all(&self, a: &Vec<bool>, la: &Layout<D>) -> Result<bool> {
508        let pool = self.get_current_pool();
509
510        let f_init = || false;
511        let f = |acc, x| acc || x;
512        let f_sum = |acc1, acc2| acc1 || acc2;
513        let f_out = |acc| acc;
514
515        reduce_all_cpu_rayon(a, la, f_init, f, f_sum, f_out, pool)
516    }
517
518    fn any_axes(
519        &self,
520        a: &Vec<bool>,
521        la: &Layout<D>,
522        axes: &[isize],
523    ) -> Result<(Storage<DataOwned<Vec<bool>>, bool, Self>, Layout<IxD>)> {
524        let pool = self.get_current_pool();
525
526        let f_init = || false;
527        let f = |acc, x| acc || x;
528        let f_sum = |acc1, acc2| acc1 || acc2;
529        let f_out = |acc| acc;
530
531        let (out, layout_out) = reduce_axes_cpu_rayon(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out, pool)?;
532        Ok((Storage::new(out.into(), self.clone()), layout_out))
533    }
534}
535
536impl<T, D> OpCountNonZeroAPI<T, D> for DeviceRayonAutoImpl
537where
538    T: Clone + PartialEq + Zero + Send + Sync,
539    D: DimAPI,
540{
541    type TOut = usize;
542
543    fn count_nonzero_all(&self, a: &Vec<T>, la: &Layout<D>) -> Result<usize> {
544        let pool = self.get_current_pool();
545
546        let f_init = || 0;
547        let f = |acc, x| if x != T::zero() { acc + 1 } else { acc };
548        let f_sum = |acc1, acc2| acc1 + acc2;
549        let f_out = |acc| acc;
550
551        reduce_all_cpu_rayon(a, la, f_init, f, f_sum, f_out, pool)
552    }
553
554    fn count_nonzero_axes(
555        &self,
556        a: &Vec<T>,
557        la: &Layout<D>,
558        axes: &[isize],
559    ) -> Result<(Storage<DataOwned<Vec<usize>>, usize, Self>, Layout<IxD>)> {
560        let pool = self.get_current_pool();
561
562        let f_init = || 0;
563        let f = |acc, x| if x != T::zero() { acc + 1 } else { acc };
564        let f_sum = |acc1, acc2| acc1 + acc2;
565        let f_out = |acc| acc;
566
567        let (out, layout_out) = reduce_axes_cpu_rayon(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out, pool)?;
568        Ok((Storage::new(out.into(), self.clone()), layout_out))
569    }
570}
571
572impl<T, D> OpUnraveledArgMinAPI<T, D> for DeviceRayonAutoImpl
573where
574    T: Clone + PartialOrd + Send + Sync,
575    D: DimAPI,
576{
577    fn unraveled_argmin_axes(
578        &self,
579        a: &Vec<T>,
580        la: &Layout<D>,
581        axes: &[isize],
582    ) -> Result<(Storage<DataOwned<Vec<IxD>>, IxD, Self>, Layout<IxD>)> {
583        let pool = self.get_current_pool();
584
585        let f_comp = |x: Option<T>, y: T| -> Option<bool> {
586            if let Some(x) = x {
587                Some(y < x)
588            } else {
589                Some(true)
590            }
591        };
592        let f_eq = |x: Option<T>, y: T| -> Option<bool> {
593            if let Some(x) = x {
594                Some(y == x)
595            } else {
596                Some(false)
597            }
598        };
599        let (out, layout_out) = reduce_axes_unraveled_arg_cpu_rayon(a, la, axes, f_comp, f_eq, pool)?;
600        Ok((Storage::new(out.into(), self.clone()), layout_out))
601    }
602
603    fn unraveled_argmin_all(&self, a: &<Self as DeviceRawAPI<T>>::Raw, la: &Layout<D>) -> Result<D> {
604        let pool = self.get_current_pool();
605
606        let f_comp = |x: Option<T>, y: T| -> Option<bool> {
607            if let Some(x) = x {
608                Some(y < x)
609            } else {
610                Some(true)
611            }
612        };
613        let f_eq = |x: Option<T>, y: T| -> Option<bool> {
614            if let Some(x) = x {
615                Some(y == x)
616            } else {
617                Some(false)
618            }
619        };
620        let result = reduce_all_unraveled_arg_cpu_rayon(a, la, f_comp, f_eq, pool)?;
621        Ok(result)
622    }
623}
624
625impl<T, D> OpUnraveledArgMaxAPI<T, D> for DeviceRayonAutoImpl
626where
627    T: Clone + PartialOrd + Send + Sync,
628    D: DimAPI,
629{
630    fn unraveled_argmax_axes(
631        &self,
632        a: &Vec<T>,
633        la: &Layout<D>,
634        axes: &[isize],
635    ) -> Result<(Storage<DataOwned<Vec<IxD>>, IxD, Self>, Layout<IxD>)> {
636        let pool = self.get_current_pool();
637
638        let f_comp = |x: Option<T>, y: T| -> Option<bool> {
639            if let Some(x) = x {
640                Some(y > x)
641            } else {
642                Some(true)
643            }
644        };
645        let f_eq = |x: Option<T>, y: T| -> Option<bool> {
646            if let Some(x) = x {
647                Some(y == x)
648            } else {
649                Some(false)
650            }
651        };
652        let (out, layout_out) = reduce_axes_unraveled_arg_cpu_rayon(a, la, axes, f_comp, f_eq, pool)?;
653        Ok((Storage::new(out.into(), self.clone()), layout_out))
654    }
655
656    fn unraveled_argmax_all(&self, a: &<Self as DeviceRawAPI<T>>::Raw, la: &Layout<D>) -> Result<D> {
657        let pool = self.get_current_pool();
658
659        let f_comp = |x: Option<T>, y: T| -> Option<bool> {
660            if let Some(x) = x {
661                Some(y > x)
662            } else {
663                Some(true)
664            }
665        };
666        let f_eq = |x: Option<T>, y: T| -> Option<bool> {
667            if let Some(x) = x {
668                Some(y == x)
669            } else {
670                Some(false)
671            }
672        };
673        let result = reduce_all_unraveled_arg_cpu_rayon(a, la, f_comp, f_eq, pool)?;
674        Ok(result)
675    }
676}
677
678impl<D> OpSumBoolAPI<D> for DeviceRayonAutoImpl
679where
680    D: DimAPI,
681{
682    fn sum_all(&self, a: &Vec<bool>, la: &Layout<D>) -> Result<usize> {
683        let pool = self.get_current_pool();
684
685        let f_init = || 0;
686        let f = |acc, x| match x {
687            true => acc + 1,
688            false => acc,
689        };
690        let f_sum = |acc1, acc2| acc1 + acc2;
691        let f_out = |acc| acc;
692
693        reduce_all_cpu_rayon(a, la, f_init, f, f_sum, f_out, pool)
694    }
695
696    fn sum_axes(
697        &self,
698        a: &Vec<bool>,
699        la: &Layout<D>,
700        axes: &[isize],
701    ) -> Result<(Storage<DataOwned<Vec<usize>>, usize, Self>, Layout<IxD>)> {
702        let pool = self.get_current_pool();
703
704        let f_init = || 0;
705        let f = |acc, x| match x {
706            true => acc + 1,
707            false => acc,
708        };
709        let f_sum = |acc1, acc2| acc1 + acc2;
710        let f_out = |acc| acc;
711
712        let (out, layout_out) = reduce_axes_cpu_rayon(a, &la.to_dim()?, axes, f_init, f, f_sum, f_out, pool)?;
713        Ok((Storage::new(out.into(), self.clone()), layout_out))
714    }
715}
716
717impl<TA, TB, TE, D> OpAllCloseAPI<TA, TB, TE, D> for DeviceRayonAutoImpl
718where
719    TA: Clone + Send + Sync + DTypePromoteAPI<TB>,
720    TB: Clone + Send + Sync,
721    <TA as DTypePromoteAPI<TB>>::Res: ExtNum<AbsOut: DTypeCastAPI<TE>>,
722    TE: ExtFloat + Add<TE, Output = TE> + Mul<TE, Output = TE> + PartialOrd + Clone + Send + Sync,
723    D: DimAPI,
724{
725    fn allclose_all(
726        &self,
727        a: &<Self as DeviceRawAPI<TA>>::Raw,
728        la: &Layout<D>,
729        b: &<Self as DeviceRawAPI<TB>>::Raw,
730        lb: &Layout<D>,
731        isclose_args: &IsCloseArgs<TE>,
732    ) -> Result<bool> {
733        use rstsr_dtype_traits::isclose;
734
735        let pool = self.get_current_pool();
736
737        if la.size() == 0 || lb.size() == 0 {
738            rstsr_raise!(InvalidValue, "zero-size array is not supported for allclose")?;
739        }
740
741        let f_init = || true;
742        let f = |acc: bool, (a_elem, b_elem): (TA, TB)| {
743            let result = isclose(&a_elem, &b_elem, isclose_args);
744            acc && result
745        };
746        let f_sum = |acc1: bool, acc2: bool| acc1 && acc2;
747        let f_out = |acc: bool| acc;
748
749        reduce_all_binary_cpu_rayon(a, la, b, lb, f_init, f, f_sum, f_out, pool)
750    }
751
752    fn allclose_axes(
753        &self,
754        _a: &<Self as DeviceRawAPI<TA>>::Raw,
755        _la: &Layout<D>,
756        _b: &<Self as DeviceRawAPI<TB>>::Raw,
757        _lb: &Layout<D>,
758        _axes: &[isize],
759        _isclose_args: &IsCloseArgs<TE>,
760    ) -> Result<(Storage<DataOwned<<Self as DeviceRawAPI<bool>>::Raw>, bool, Self>, Layout<IxD>)> {
761        unimplemented!("This function (`allclose_axes`) is not planned to be implemented yet.");
762    }
763}