polars_compute/
sum.rs

1use std::ops::Add;
2#[cfg(feature = "simd")]
3use std::simd::prelude::*;
4
5use arrow::array::{Array, PrimitiveArray};
6use arrow::bitmap::bitmask::BitMask;
7use arrow::types::NativeType;
8use num_traits::Zero;
9
10macro_rules! wrapping_impl {
11    ($trait_name:ident, $method:ident, $t:ty) => {
12        impl $trait_name for $t {
13            #[inline(always)]
14            fn wrapping_add(&self, v: &Self) -> Self {
15                <$t>::$method(*self, *v)
16            }
17        }
18    };
19}
20
21/// Performs addition that wraps around on overflow.
22///
23/// Differs from num::WrappingAdd in that this is also implemented for floats.
24pub trait WrappingAdd: Sized {
25    /// Wrapping (modular) addition. Computes `self + other`, wrapping around at
26    /// the boundary of the type.
27    fn wrapping_add(&self, v: &Self) -> Self;
28}
29
30wrapping_impl!(WrappingAdd, wrapping_add, u8);
31wrapping_impl!(WrappingAdd, wrapping_add, u16);
32wrapping_impl!(WrappingAdd, wrapping_add, u32);
33wrapping_impl!(WrappingAdd, wrapping_add, u64);
34wrapping_impl!(WrappingAdd, wrapping_add, usize);
35wrapping_impl!(WrappingAdd, wrapping_add, u128);
36
37wrapping_impl!(WrappingAdd, wrapping_add, i8);
38wrapping_impl!(WrappingAdd, wrapping_add, i16);
39wrapping_impl!(WrappingAdd, wrapping_add, i32);
40wrapping_impl!(WrappingAdd, wrapping_add, i64);
41wrapping_impl!(WrappingAdd, wrapping_add, isize);
42wrapping_impl!(WrappingAdd, wrapping_add, i128);
43
44wrapping_impl!(WrappingAdd, add, f32);
45wrapping_impl!(WrappingAdd, add, f64);
46
47#[cfg(feature = "simd")]
48const STRIPE: usize = 16;
49
50fn wrapping_sum_with_mask_scalar<T: Zero + WrappingAdd + Copy>(vals: &[T], mask: &BitMask) -> T {
51    assert!(vals.len() == mask.len());
52    vals.iter()
53        .enumerate()
54        .map(|(i, x)| {
55            // No filter but rather select of 0 for cmov opt.
56            if mask.get(i) { *x } else { T::zero() }
57        })
58        .fold(T::zero(), |a, b| a.wrapping_add(&b))
59}
60
61#[cfg(not(feature = "simd"))]
62impl<T> WrappingSum for T
63where
64    T: NativeType + WrappingAdd + Zero,
65{
66    fn wrapping_sum(vals: &[Self]) -> Self {
67        vals.iter()
68            .copied()
69            .fold(T::zero(), |a, b| a.wrapping_add(&b))
70    }
71
72    fn wrapping_sum_with_validity(vals: &[Self], mask: &BitMask) -> Self {
73        wrapping_sum_with_mask_scalar(vals, mask)
74    }
75}
76
77#[cfg(feature = "simd")]
78impl<T> WrappingSum for T
79where
80    T: NativeType + WrappingAdd + Zero + crate::SimdPrimitive,
81{
82    fn wrapping_sum(vals: &[Self]) -> Self {
83        vals.iter()
84            .copied()
85            .fold(T::zero(), |a, b| a.wrapping_add(&b))
86    }
87
88    fn wrapping_sum_with_validity(vals: &[Self], mask: &BitMask) -> Self {
89        assert!(vals.len() == mask.len());
90        let remainder = vals.len() % STRIPE;
91        let (rest, main) = vals.split_at(remainder);
92        let (rest_mask, main_mask) = mask.split_at(remainder);
93        let zero: Simd<T, STRIPE> = Simd::default();
94
95        let vsum = main
96            .chunks_exact(STRIPE)
97            .enumerate()
98            .map(|(i, a)| {
99                let m: Mask<_, STRIPE> = main_mask.get_simd(i * STRIPE);
100                m.select(Simd::from_slice(a), zero)
101            })
102            .fold(zero, |a, b| {
103                let a = a.to_array();
104                let b = b.to_array();
105                Simd::from_array(std::array::from_fn(|i| a[i].wrapping_add(&b[i])))
106            });
107
108        let mainsum = vsum
109            .to_array()
110            .into_iter()
111            .fold(T::zero(), |a, b| a.wrapping_add(&b));
112
113        // TODO: faster remainder.
114        let restsum = wrapping_sum_with_mask_scalar(rest, &rest_mask);
115        mainsum.wrapping_add(&restsum)
116    }
117}
118
119#[cfg(feature = "simd")]
120impl WrappingSum for u128 {
121    fn wrapping_sum(vals: &[Self]) -> Self {
122        vals.iter().copied().fold(0, |a, b| a.wrapping_add(b))
123    }
124
125    fn wrapping_sum_with_validity(vals: &[Self], mask: &BitMask) -> Self {
126        wrapping_sum_with_mask_scalar(vals, mask)
127    }
128}
129
130#[cfg(feature = "simd")]
131impl WrappingSum for i128 {
132    fn wrapping_sum(vals: &[Self]) -> Self {
133        vals.iter().copied().fold(0, |a, b| a.wrapping_add(b))
134    }
135
136    fn wrapping_sum_with_validity(vals: &[Self], mask: &BitMask) -> Self {
137        wrapping_sum_with_mask_scalar(vals, mask)
138    }
139}
140
141pub trait WrappingSum: Sized {
142    fn wrapping_sum(vals: &[Self]) -> Self;
143    fn wrapping_sum_with_validity(vals: &[Self], mask: &BitMask) -> Self;
144}
145
146pub fn wrapping_sum_arr<T>(arr: &PrimitiveArray<T>) -> T
147where
148    T: NativeType + WrappingSum,
149{
150    let validity = arr.validity().filter(|_| arr.null_count() > 0);
151    if let Some(mask) = validity {
152        WrappingSum::wrapping_sum_with_validity(arr.values(), &BitMask::from_bitmap(mask))
153    } else {
154        WrappingSum::wrapping_sum(arr.values())
155    }
156}