Skip to main content

simd_kernels/kernels/
aggregate.rs

1// Copyright (c) 2025 Peter Bower
2// SPDX-License-Identifier: AGPL-3.0-or-later
3// Commercial licensing available. See LICENSE and LICENSING.md.
4
5//! # **Aggregation Kernels Module** - *High-Performance Statistical Reduction Operations*
6//!
7//! Null-aware aggregation kernels implementing statistical reductions with SIMD acceleration.
8//! Core building blocks for analytical workloads requiring high-throughput data summarisation.
9//!
10//! ## Core Operations
11//! - **Sum aggregation**: Optimised summation with overflow handling and numerical stability
12//! - **Min/Max operations**: Efficient extrema detection with proper null handling
13//! - **Count operations**: Fast counting with distinct value detection capabilities
14//! - **Statistical moments**: Mean, variance, and standard deviation calculations
15//! - **Percentile calculations**: Quantile computation with interpolation support
16//! - **String aggregation**: Concatenation and string-based reduction operations
17include!(concat!(env!("OUT_DIR"), "/simd_lanes.rs"));
18
19#[cfg(not(feature = "fast_hash"))]
20use std::collections::HashMap;
21use std::collections::HashSet;
22use std::hash::Hash;
23use std::ops::{Mul, Sub};
24#[cfg(feature = "simd")]
25use std::simd::{
26    Mask, Select, Simd, SimdElement,
27    cmp::{SimdOrd, SimdPartialOrd},
28    num::{SimdFloat, SimdInt, SimdUint},
29};
30
31#[cfg(feature = "fast_hash")]
32use ahash::AHashMap;
33use minarrow::{Bitmask, Vec64};
34use num_traits::{Float, NumCast, One, ToPrimitive, Zero};
35
36use crate::kernels::sort::{sort_float, total_cmp_f};
37use crate::traits::dense_iter::collect_valid;
38use crate::traits::to_bits::ToBits;
39use crate::utils::has_nulls;
40use minarrow::utils::is_simd_aligned;
41
42#[cfg(feature = "simd")]
43use crate::utils::bitmask_to_simd_mask;
44
45// --- Neumaier compensated summation helper ------------------------------------
46
47/// Applies one step of Neumaier compensated summation.
48///
49/// Neumaier is a refinement of Kahan summation that also handles the case
50/// where the addend is larger in magnitude than the running sum, reducing
51/// floating-point accumulation error from O(n) to O(1) for well-conditioned inputs.
52///
53/// When infinities are involved, compensation is skipped to avoid NaN propagation
54/// from `inf - inf` in the error term.
55#[inline(always)]
56pub(crate) fn neumaier_add(sum: &mut f64, comp: &mut f64, value: f64) {
57    let t = *sum + value;
58    // Skip compensation when infinities are involved to avoid NaN from inf - inf
59    if t.is_finite() {
60        if sum.abs() >= value.abs() {
61            *comp += (*sum - t) + value;
62        } else {
63            *comp += (value - t) + *sum;
64        }
65    }
66    *sum = t;
67}
68
69/// SIMD-width Neumaier compensated summation step.
70///
71/// Same algorithm as the scalar `neumaier_add` but operating on all SIMD lanes
72/// in parallel. Each lane independently tracks its own sum and compensation term.
73#[cfg(feature = "simd")]
74#[inline(always)]
75pub(crate) fn neumaier_simd_add<const N: usize>(
76    sum: &mut Simd<f64, N>,
77    comp: &mut Simd<f64, N>,
78    value: Simd<f64, N>,
79) where
80{
81    let t = *sum + value;
82    let sum_dominant = sum.abs().simd_ge(value.abs());
83    let comp_a = (*sum - t) + value;
84    let comp_b = (value - t) + *sum;
85    let delta = sum_dominant.select(comp_a, comp_b);
86    // Skip compensation for non-finite lanes to avoid NaN from inf - inf
87    let finite = t.is_finite();
88    *comp += finite.select(delta, Simd::splat(0.0));
89    *sum = t;
90}
91
92// --- SIMD/stat-moments helpers -----------------------------------------------
93
94/// Generates stat‐moment aggregation (sum, sum², count)
95/// for float types using SIMD or scalar fallback.
96/// Uses Neumaier compensated summation for improved numerical accuracy.
97macro_rules! impl_stat_moments_float {
98    ($name:ident, $ty:ty, $LANES:expr) => {
99        #[doc = concat!("Computes sum, sum-of-squares, and count for a `", stringify!($ty), "` slice.")]
100        #[doc = "Skips nulls if present. Uses SIMD if available."]
101        #[doc = "Uses Neumaier compensated summation for numerical accuracy."]
102        #[inline(always)]
103        pub fn $name(
104            data: &[$ty],
105            mask: Option<&Bitmask>,
106            null_count: Option<usize>,
107        ) -> (f64, f64, usize) {
108            let has_nulls = has_nulls(null_count, mask);
109
110            #[cfg(feature = "simd")]
111            {
112                // Check if both arrays are 64-byte aligned for SIMD
113                if is_simd_aligned(data) {
114                    type V<const L: usize> = Simd<$ty, L>;
115                    // The mask‐type for `Simd<$ty, N>` is `<$ty as SimdElement>::Mask`.
116                    type M<const L: usize> = <$ty as SimdElement>::Mask;
117
118                    const N: usize = $LANES;
119                    let len = data.len();
120                    let mut i = 0;
121                    let (mut sum, mut sum2) = (0.0_f64, 0.0_f64);
122                    let (mut comp, mut comp2) = (0.0_f64, 0.0_f64);
123                    let mut cnt = 0_usize;
124
125                    if !has_nulls {
126                        // dense path
127                        while i + N <= len {
128                            let v = V::<N>::from_slice(&data[i..i + N]);
129                            let dv = v.cast::<f64>();
130                            cnt += N;
131                            let chunk_sum = dv.reduce_sum();
132                            neumaier_add(&mut sum, &mut comp, chunk_sum);
133                            let chunk_sum2 = (dv * dv).reduce_sum();
134                            neumaier_add(&mut sum2, &mut comp2, chunk_sum2);
135                            i += N;
136                        }
137                        // tail
138                        for &v in &data[i..] {
139                            let x = v as f64;
140                            neumaier_add(&mut sum, &mut comp, x);
141                            neumaier_add(&mut sum2, &mut comp2, x * x);
142                            cnt += 1;
143                        }
144                    } else {
145                        // null‐aware SIMD path
146                        let mb = mask.expect("Mask must be Some if nulls are present");
147                        let mask_bytes = mb.as_bytes();
148
149                        while i + N <= len {
150                            let v = V::<N>::from_slice(&data[i..i + N]);
151                            // extract a SIMD mask of valid lanes
152                            let lane_mask: Mask<M<N>, N> =
153                                bitmask_to_simd_mask::<N, M<N>>(mask_bytes, i, len);
154                            // zero‐out invalid lanes
155                            let zeros = V::<N>::splat(0.0 as $ty);
156                            let vv = lane_mask.select(v, zeros);
157
158                            // count valid lanes
159                            cnt += lane_mask.to_bitmask().count_ones() as usize;
160
161                            let dv = vv.cast::<f64>();
162                            let chunk_sum = dv.reduce_sum();
163                            neumaier_add(&mut sum, &mut comp, chunk_sum);
164                            let chunk_sum2 = (dv * dv).reduce_sum();
165                            neumaier_add(&mut sum2, &mut comp2, chunk_sum2);
166
167                            i += N;
168                        }
169                        // scalar tail
170                        for j in i..len {
171                            if unsafe { mb.get_unchecked(j) } {
172                                let x = data[j] as f64;
173                                neumaier_add(&mut sum, &mut comp, x);
174                                neumaier_add(&mut sum2, &mut comp2, x * x);
175                                cnt += 1;
176                            }
177                        }
178                    }
179                    return (sum + comp, sum2 + comp2, cnt);
180                }
181                // Fall through to scalar path if alignment check failed
182            }
183
184            // Scalar fallback - alignment check failed
185            let (mut sum, mut sum2, mut cnt) = (0.0_f64, 0.0_f64, 0_usize);
186            let (mut comp, mut comp2) = (0.0_f64, 0.0_f64);
187            if !has_nulls {
188                for &v in data {
189                    let x = v as f64;
190                    neumaier_add(&mut sum, &mut comp, x);
191                    neumaier_add(&mut sum2, &mut comp2, x * x);
192                    cnt += 1;
193                }
194            } else {
195                let mb = mask.expect("Mask must be Some if nulls are present");
196                for (i, &v) in data.iter().enumerate() {
197                    if unsafe { mb.get_unchecked(i) } {
198                        let x = v as f64;
199                        neumaier_add(&mut sum, &mut comp, x);
200                        neumaier_add(&mut sum2, &mut comp2, x * x);
201                        cnt += 1;
202                    }
203                }
204            }
205            (sum + comp, sum2 + comp2, cnt)
206        }
207    };
208}
209
210/// Generates stat-moment aggregation (sum, sum², count)
211/// for integer types using SIMD or scalar fallback.
212macro_rules! impl_stat_moments_int {
213    ($name:ident, $ty:ty, $acc:ty, $LANES:expr, $mask_ty:ty) => {
214        #[doc = concat!("Computes sum, sum-of-squares, and count for `", stringify!($ty), "` integers.")]
215        #[doc = concat!(" Uses `", stringify!($acc), "` for accumulation. Skips nulls if present.")]
216        #[inline(always)]
217        pub fn $name(
218            data: &[$ty],
219            mask: Option<&Bitmask>,
220            null_count: Option<usize>
221        ) -> (f64, f64, usize) {
222            let has_nulls = has_nulls(null_count, mask);
223
224            #[cfg(feature = "simd")]
225            {
226                // Check if both arrays are 64-byte aligned for SIMD
227                if is_simd_aligned(data) {
228                const N: usize = $LANES;
229                let mut i = 0;
230                let mut sum: $acc = 0;
231                let mut sum2: $acc = 0;
232                let mut cnt = 0usize;
233
234                if !has_nulls {
235                    while i + N <= data.len() {
236                        let v = Simd::<$ty, N>::from_slice(&data[i..i + N]);
237                        sum += v.reduce_sum() as $acc;
238                        sum2 += (v * v).reduce_sum() as $acc;
239                        cnt += N;
240                        i += N;
241                    }
242                    for &x in &data[i..] {
243                        let x = x as $acc;
244                        sum += x;
245                        sum2 += x * x;
246                        cnt += 1;
247                    }
248                } else {
249                    let mb = mask.expect("Mask must be Some if nulls are present or mask was supplied");
250                    let mask_bytes = &mb.bits;
251                    while i + N <= data.len() {
252                        let v = Simd::<$ty, N>::from_slice(&data[i..i + N]);
253                        // use signed type for mask to satisfy trait bound
254                        let lane_mask = bitmask_to_simd_mask::<N, $mask_ty>(mask_bytes, i, data.len());
255                        let vv = lane_mask.select(v, Simd::<$ty, N>::splat(0 as $ty));
256                        sum += vv.reduce_sum() as $acc;
257                        sum2 += (vv * vv).reduce_sum() as $acc;
258                        cnt += lane_mask.to_bitmask().count_ones() as usize;
259                        i += N;
260                    }
261                    for j in i..data.len() {
262                        if unsafe { mb.get_unchecked(j) } {
263                            let x = data[j] as $acc;
264                            sum += x;
265                            sum2 += x * x;
266                            cnt += 1;
267                        }
268                    }
269                }
270                    return (sum as f64, sum2 as f64, cnt);
271                }
272                // Fall through to scalar path if alignment check failed
273            }
274
275            // Scalar fallback - alignment check failed
276            let mut sum: $acc = 0;
277            let mut sum2: $acc = 0;
278            let mut cnt = 0usize;
279            if !has_nulls {
280                for &x in data.iter() {
281                    let xi = x as $acc;
282                    sum += xi;
283                    sum2 += xi * xi;
284                    cnt += 1;
285                }
286            } else {
287                let mb = mask.expect("Mask must be Some if nulls are present or mask was supplied");
288                for (i, &x) in data.iter().enumerate() {
289                    if unsafe { mb.get_unchecked(i) } {
290                        let xi = x as $acc;
291                        sum += xi;
292                        sum2 += xi * xi;
293                        cnt += 1;
294                    }
295                }
296            }
297            (sum as f64, sum2 as f64, cnt)
298        }
299    };
300}
301
302/// Implements SIMD-enabled min/max reducers
303/// for numeric slices with or without nulls.
304macro_rules! impl_reduce_min_max {
305    ($name:ident, $ty:ty, $LANES:expr, $minval:expr, $maxval:expr) => {
306        /// Finds the minimum and maximum values in a numeric slice.
307        ///
308        /// Uses SIMD operations when available for optimal performance.
309        /// Handles null values through optional bitmask.
310        ///
311        /// # Arguments
312        ///
313        /// * `data` - The slice of numeric values to process
314        /// * `mask` - Optional bitmask indicating valid/invalid elements
315        /// * `null_count` - Optional count of null values for optimisation
316        ///
317        /// # Returns
318        ///
319        /// `Some((min, max))` if any valid values exist, `None` if all values are null
320        #[inline(always)]
321        pub fn $name(
322            data: &[$ty],
323            mask: Option<&Bitmask>,
324            null_count: Option<usize>,
325        ) -> Option<($ty, $ty)> {
326            let has_nulls = has_nulls(null_count, mask);
327            let len = data.len();
328            if len == 0 {
329                return None;
330            }
331
332            #[cfg(feature = "simd")]
333            {
334                // Check if both arrays are 64-byte aligned for SIMD
335                if is_simd_aligned(data) {
336                    type V<const N: usize> = Simd<$ty, N>;
337                    type M<const N: usize> = <$ty as SimdElement>::Mask;
338                    const N: usize = $LANES;
339
340                    let mut first_valid = None;
341                    if !has_nulls {
342                        // Find first value
343                        if len == 0 {
344                            return None;
345                        }
346                        first_valid = Some((0, data[0]));
347                    } else {
348                        let mb = mask.expect("Mask must be Some if nulls are present");
349                        for (idx, &x) in data.iter().enumerate() {
350                            if unsafe { mb.get_unchecked(idx) } {
351                                first_valid = Some((idx, x));
352                                break;
353                            }
354                        }
355                    }
356                    let (mut i, x0) = match first_valid {
357                        Some(v) => v,
358                        None => return None,
359                    };
360                    let mut min_s = x0;
361                    let mut max_s = x0;
362                    i += 1;
363
364                    // SIMD alignment: process until next SIMD-aligned index
365                    while i < len && (i % N != 0) {
366                        let x = data[i];
367                        if !has_nulls
368                            || (has_nulls && {
369                                let mb = mask.unwrap();
370                                unsafe { mb.get_unchecked(i) }
371                            })
372                        {
373                            min_s = min_s.min(x);
374                            max_s = max_s.max(x);
375                        }
376                        i += 1;
377                    }
378
379                    let mut min_v = V::<N>::splat(min_s);
380                    let mut max_v = V::<N>::splat(max_s);
381
382                    if !has_nulls {
383                        while i + N <= len {
384                            let v = V::<N>::from_slice(&data[i..i + N]);
385                            min_v = min_v.simd_min(v);
386                            max_v = max_v.simd_max(v);
387                            i += N;
388                        }
389                    } else {
390                        let mb = mask.unwrap();
391                        let mask_bytes = mb.as_bytes();
392                        while i + N <= len {
393                            let v = V::<N>::from_slice(&data[i..i + N]);
394                            let lane_m: Mask<M<N>, N> =
395                                bitmask_to_simd_mask::<N, M<N>>(mask_bytes, i, len);
396                            // Null entries are replaced by previous running min/max
397                            let v_min = lane_m.select(v, V::<N>::splat(min_s));
398                            let v_max = lane_m.select(v, V::<N>::splat(max_s));
399                            min_v = min_v.simd_min(v_min);
400                            max_v = max_v.simd_max(v_max);
401                            i += N;
402                        }
403                    }
404
405                    for idx in i..len {
406                        let x = data[idx];
407                        if !has_nulls
408                            || (has_nulls && {
409                                let mb = mask.unwrap();
410                                unsafe { mb.get_unchecked(idx) }
411                            })
412                        {
413                            min_s = min_s.min(x);
414                            max_s = max_s.max(x);
415                        }
416                    }
417
418                    min_s = min_s.min(min_v.reduce_min());
419                    max_s = max_s.max(max_v.reduce_max());
420                    return Some((min_s, max_s));
421                }
422                // Fall through to scalar path if alignment check failed
423            }
424
425            // Scalar fallback - alignment check failed
426            let mut min = None;
427            let mut max = None;
428            if !has_nulls {
429                for &x in data {
430                    match min {
431                        None => {
432                            min = Some(x);
433                            max = Some(x);
434                        }
435                        Some(m) => {
436                            min = Some(m.min(x));
437                            max = Some(max.unwrap().max(x));
438                        }
439                    }
440                }
441            } else {
442                let mb = mask.expect("Mask must be Some if nulls are present");
443                for (i, &x) in data.iter().enumerate() {
444                    if unsafe { mb.get_unchecked(i) } {
445                        match min {
446                            None => {
447                                min = Some(x);
448                                max = Some(x);
449                            }
450                            Some(m) => {
451                                min = Some(m.min(x));
452                                max = Some(max.unwrap().max(x));
453                            }
454                        }
455                    }
456                }
457            }
458            min.zip(max)
459        }
460    };
461}
462
463/// Implements SIMD-enabled floating-point min/max reducers
464/// for numeric slices with or without nulls, handling NaN values.
465macro_rules! impl_reduce_min_max_float {
466    ($fn_name:ident, $ty:ty, $LANES:ident, $SIMD:ident, $MASK:ident) => {
467        /// Finds the minimum and maximum values in a floating-point slice.
468        ///
469        /// Handles NaN values correctly by treating them as missing data.
470        /// Uses SIMD operations when available for optimal performance.
471        ///
472        /// # Arguments
473        ///
474        /// * `data` - The slice of floating-point values to process
475        /// * `mask` - Optional bitmask indicating valid/invalid elements
476        /// * `null_count` - Optional count of null values for optimisation
477        ///
478        /// # Returns
479        ///
480        /// `Some((min, max))` if any valid values exist, `None` if all values are null/NaN
481        #[inline(always)]
482        pub fn $fn_name(
483            data: &[$ty],
484            mask: Option<&Bitmask>,
485            null_count: Option<usize>,
486        ) -> Option<($ty, $ty)> {
487            let has_nulls = has_nulls(null_count, mask);
488            let len = data.len();
489            if len == 0 {
490                return None;
491            }
492
493            #[cfg(feature = "simd")]
494            {
495                // Check if both arrays are 64-byte aligned for SIMD
496                if is_simd_aligned(data) {
497                    const LANES: usize = $LANES;
498                    type V = $SIMD<$ty, LANES>;
499                    type M = <$ty as SimdElement>::Mask;
500
501                    // Find the first valid value (not null and not NaN)
502                    let mut first_valid = None;
503                    if !has_nulls {
504                        for (idx, &x) in data.iter().enumerate() {
505                            if !x.is_nan() {
506                                first_valid = Some((idx, x));
507                                break;
508                            }
509                        }
510                    } else {
511                        let mb = mask.expect("Bitmask must be Some if nulls are present");
512                        for (idx, &x) in data.iter().enumerate() {
513                            if unsafe { mb.get_unchecked(idx) } && !x.is_nan() {
514                                first_valid = Some((idx, x));
515                                break;
516                            }
517                        }
518                    }
519                    let (mut i, x0) = match first_valid {
520                        Some(v) => v,
521                        None => return None,
522                    };
523                    let mut min_s = x0;
524                    let mut max_s = x0;
525                    i += 1;
526
527                    // SIMD alignment for tail
528                    while i < len && (i % LANES != 0) {
529                        let x = data[i];
530                        if (!has_nulls && !x.is_nan())
531                            || (has_nulls && {
532                                let mb = mask.unwrap();
533                                unsafe { mb.get_unchecked(i) && !x.is_nan() }
534                            })
535                        {
536                            min_s = min_s.min(x);
537                            max_s = max_s.max(x);
538                        }
539                        i += 1;
540                    }
541
542                    let mut min_v = V::splat(min_s);
543                    let mut max_v = V::splat(max_s);
544
545                    if !has_nulls {
546                        while i + LANES <= len {
547                            let v = V::from_slice(&data[i..i + LANES]);
548                            let valid = !v.is_nan();
549                            let v_min = valid.select(v, V::splat(<$ty>::INFINITY));
550                            let v_max = valid.select(v, V::splat(<$ty>::NEG_INFINITY));
551                            min_v = min_v.simd_min(v_min);
552                            max_v = max_v.simd_max(v_max);
553                            i += LANES;
554                        }
555                    } else {
556                        let mb = mask.unwrap();
557                        let mask_bytes = mb.as_bytes();
558                        while i + LANES <= len {
559                            let v = V::from_slice(&data[i..i + LANES]);
560                            let valid_mask =
561                                bitmask_to_simd_mask::<LANES, M>(mask_bytes, i, len) & !v.is_nan();
562                            let v_min = valid_mask.select(v, V::splat(<$ty>::INFINITY));
563                            let v_max = valid_mask.select(v, V::splat(<$ty>::NEG_INFINITY));
564                            min_v = min_v.simd_min(v_min);
565                            max_v = max_v.simd_max(v_max);
566                            i += LANES;
567                        }
568                    }
569
570                    for idx in i..len {
571                        let x = data[idx];
572                        if (!has_nulls && !x.is_nan())
573                            || (has_nulls && {
574                                let mb = mask.unwrap();
575                                unsafe { mb.get_unchecked(idx) && !x.is_nan() }
576                            })
577                        {
578                            min_s = min_s.min(x);
579                            max_s = max_s.max(x);
580                        }
581                    }
582
583                    min_s = min_s.min(min_v.reduce_min());
584                    max_s = max_s.max(max_v.reduce_max());
585                    return Some((min_s, max_s));
586                }
587                // Fall through to scalar path if alignment check failed
588            }
589
590            // Scalar fallback - alignment check failed
591            let mut min = None;
592            let mut max = None;
593            if !has_nulls {
594                for &x in data {
595                    if !x.is_nan() {
596                        match min {
597                            None => {
598                                min = Some(x);
599                                max = Some(x);
600                            }
601                            Some(m) => {
602                                min = Some(m.min(x));
603                                max = Some(max.unwrap().max(x));
604                            }
605                        }
606                    }
607                }
608            } else {
609                let mb = mask.expect("Bitmask must be Some if nulls are present");
610                for (i, &x) in data.iter().enumerate() {
611                    if unsafe { mb.get_unchecked(i) } && !x.is_nan() {
612                        match min {
613                            None => {
614                                min = Some(x);
615                                max = Some(x);
616                            }
617                            Some(m) => {
618                                min = Some(m.min(x));
619                                max = Some(max.unwrap().max(x));
620                            }
621                        }
622                    }
623                }
624            }
625            min.zip(max)
626        }
627    };
628}
629
630/// Dispatches to the right stat-moment function by type.
631macro_rules! stat_moments {
632    (f64 => $d:expr, $m:expr, $hn:expr) => {
633        stat_moments_f64($d, $m, $hn)
634    };
635    (f32 => $d:expr, $m:expr, $hn:expr) => {
636        stat_moments_f32($d, $m, $hn)
637    };
638    (i64 => $d:expr, $m:expr, $hn:expr) => {
639        stat_moments_i64($d, $m, $hn)
640    };
641    (u64 => $d:expr, $m:expr, $hn:expr) => {
642        stat_moments_u64($d, $m, $hn)
643    };
644    (i32 => $d:expr, $m:expr, $hn:expr) => {
645        stat_moments_i32($d, $m, $hn)
646    };
647    (u32 => $d:expr, $m:expr, $hn:expr) => {
648        stat_moments_u32($d, $m, $hn)
649    };
650}
651
652/// Generates a variance function using stat moments.
653/// Generates both population and sample variance for a numeric type.
654macro_rules! impl_variance {
655    ($ty:ident, $fn_var:ident) => {
656        #[doc = concat!("Returns the variance for `", stringify!($ty), "`; if `sample` is true, uses Bessel's correction (n-1).")]
657        #[inline(always)]
658        pub fn $fn_var(
659            data: &[$ty],
660            mask: Option<&Bitmask>,
661            null_count: Option<usize>,
662            sample: bool,
663        ) -> Option<f64> {
664            let (s, s2, n) = stat_moments!($ty => data, mask, null_count);
665            if n == 0 || (sample && n < 2) {
666                None
667            } else if sample {
668                Some((s2 - s * s / n as f64) / (n as f64 - 1.0))
669            } else {
670                Some((s2 - s * s / n as f64) / n as f64)
671            }
672        }
673    };
674}
675
676/// Generates min, max, and (min, max) range extractors from a reduce.
677macro_rules! impl_min_max_range {
678    ($ty:ty, $reduce_fn:ident, $min_fn:ident, $max_fn:ident, $range_fn:ident) => {
679        #[doc = concat!("Returns the minimum of a `", stringify!($ty), "` slice.")]
680        #[inline(always)]
681        pub fn $min_fn(
682            data: &[$ty],
683            mask: Option<&Bitmask>,
684            null_count: Option<usize>,
685        ) -> Option<$ty> {
686            $reduce_fn(data, mask, null_count).map(|(min, _)| min)
687        }
688
689        #[doc = concat!("Returns the maximum of a `", stringify!($ty), "` slice.")]
690        #[inline(always)]
691        pub fn $max_fn(
692            data: &[$ty],
693            mask: Option<&Bitmask>,
694            null_count: Option<usize>,
695        ) -> Option<$ty> {
696            $reduce_fn(data, mask, null_count).map(|(_, max)| max)
697        }
698
699        #[doc = concat!("Returns the (min, max) of a `", stringify!($ty), "` slice.")]
700        #[inline(always)]
701        pub fn $range_fn(
702            data: &[$ty],
703            mask: Option<&Bitmask>,
704            null_count: Option<usize>,
705        ) -> Option<($ty, $ty)> {
706            $reduce_fn(data, mask, null_count)
707        }
708    };
709}
710
711// ---------- 1. SIMD helpers: SUM / COUNT / SUM² ------------------------
712
713/// Implements raw (sum, count) aggregation for float types.
714macro_rules! impl_sum_float {
715    ($name:ident, $ty:ty, $LANES:expr) => {
716        #[doc = concat!("Returns (sum, count) for non-null `", stringify!($ty), "` values.")]
717        #[doc = " Uses SIMD where available. Uses Neumaier compensated summation."]
718        #[inline(always)]
719        pub fn $name(
720            data: &[$ty],
721            mask: Option<&Bitmask>,
722            null_count: Option<usize>,
723        ) -> (f64, usize) {
724            let has_nulls = has_nulls(null_count, mask);
725
726            #[cfg(feature = "simd")]
727            {
728                // Check if both arrays are 64-byte aligned for SIMD
729                if is_simd_aligned(data) {
730                    const N: usize = $LANES;
731                    type V<const L: usize> = Simd<$ty, L>;
732                    type M<const L: usize> = <$ty as SimdElement>::Mask;
733
734                    let len = data.len();
735                    let mut i = 0;
736                    let mut sum = 0.0f64;
737                    let mut comp = 0.0f64;
738                    let mut cnt = 0usize;
739
740                    if !has_nulls {
741                        // Dense path
742                        while i + N <= len {
743                            let v = V::<N>::from_slice(&data[i..i + N]);
744                            let chunk_sum = v.cast::<f64>().reduce_sum();
745                            neumaier_add(&mut sum, &mut comp, chunk_sum);
746                            cnt += N;
747                            i += N;
748                        }
749                        for &x in &data[i..] {
750                            neumaier_add(&mut sum, &mut comp, x as f64);
751                            cnt += 1;
752                        }
753                    } else {
754                        // Null‐aware path
755                        let mb = mask.expect(
756                            "Bitmask must be Some if nulls are present or mask is supplied",
757                        );
758                        let bytes = mb.as_bytes();
759
760                        while i + N <= len {
761                            let v = V::<N>::from_slice(&data[i..i + N]);
762                            // extract a SIMD‐mask for these N lanes
763                            let lane_m: Mask<M<N>, N> =
764                                bitmask_to_simd_mask::<N, M<N>>(bytes, i, len);
765                            // zero out the null lanes
766                            let vv = lane_m.select(v, V::<N>::splat(0.0 as $ty));
767                            let chunk_sum = vv.cast::<f64>().reduce_sum();
768                            neumaier_add(&mut sum, &mut comp, chunk_sum);
769                            // count the 1‐bits in lane_m
770                            cnt += lane_m.to_bitmask().count_ones() as usize;
771                            i += N;
772                        }
773                        for idx in i..len {
774                            if unsafe { mb.get_unchecked(idx) } {
775                                neumaier_add(&mut sum, &mut comp, data[idx] as f64);
776                                cnt += 1;
777                            }
778                        }
779                    }
780
781                    return (sum + comp, cnt);
782                }
783                // Fall through to scalar path if alignment check failed
784            }
785
786            // Scalar fallback - alignment check failed
787            let mut sum = 0.0f64;
788            let mut comp = 0.0f64;
789            let mut cnt = 0usize;
790            let len = data.len();
791
792            if !has_nulls {
793                for &x in data {
794                    neumaier_add(&mut sum, &mut comp, x as f64);
795                    cnt += 1;
796                }
797            } else {
798                let mb =
799                    mask.expect("Bitmask must be Some if nulls are present or mask is supplied");
800                for i in 0..len {
801                    if unsafe { mb.get_unchecked(i) } {
802                        neumaier_add(&mut sum, &mut comp, data[i] as f64);
803                        cnt += 1;
804                    }
805                }
806            }
807
808            (sum + comp, cnt)
809        }
810    };
811}
812
813/// Macro to implement raw sum/count aggregation for integer types.
814macro_rules! impl_sum_int {
815    ($name:ident, $ty:ty, $acc:ty, $LANES:expr) => {
816        #[doc = concat!("Returns (sum, count) for non-null `", stringify!($ty), "` values.")]
817        #[doc = concat!(" Accumulates using `", stringify!($acc), "`. Uses SIMD if available.")]
818        #[inline(always)]
819        pub fn $name(
820            data: &[$ty],
821            mask: Option<&Bitmask>,
822            null_count: Option<usize>,
823        ) -> ($acc, usize) {
824            let has_nulls = has_nulls(null_count, mask);
825
826            #[cfg(feature = "simd")]
827            {
828                // Check if both arrays are 64-byte aligned for SIMD
829                if is_simd_aligned(data) {
830                    const N: usize = $LANES;
831                    type V<const L: usize> = Simd<$ty, L>;
832                    type M<const L: usize> = <$ty as SimdElement>::Mask;
833
834                    let len = data.len();
835                    let mut i = 0;
836                    let mut sum: $acc = 0;
837                    let mut cnt = 0_usize;
838
839                    if !has_nulls {
840                        // dense path
841                        while i + N <= len {
842                            let v = V::<N>::from_slice(&data[i..i + N]);
843                            sum += v.reduce_sum() as $acc;
844                            cnt += N;
845                            i += N;
846                        }
847                        // tail
848                        for &x in &data[i..] {
849                            sum += x as $acc;
850                            cnt += 1;
851                        }
852                    } else {
853                        // null‐aware path
854                        let mb = mask.expect(
855                            "Bitmask must be Some if nulls are present or mask is supplied",
856                        );
857                        let bytes = mb.as_bytes();
858
859                        while i + N <= len {
860                            let v = V::<N>::from_slice(&data[i..i + N]);
861                            // extract the SIMD validity mask
862                            let lane_m: Mask<M<N>, N> =
863                                bitmask_to_simd_mask::<N, M<N>>(bytes, i, len);
864                            // zero‐out invalid lanes
865                            let vv = lane_m.select(v, V::<N>::splat(0 as $ty));
866                            sum += vv.reduce_sum() as $acc;
867                            // count ones in the mask
868                            cnt += lane_m.to_bitmask().count_ones() as usize;
869                            i += N;
870                        }
871                        // scalar tail
872                        for idx in i..len {
873                            if unsafe { mb.get_unchecked(idx) } {
874                                sum += data[idx] as $acc;
875                                cnt += 1;
876                            }
877                        }
878                    }
879
880                    return (sum, cnt);
881                }
882                // Fall through to scalar path if alignment check failed
883            }
884
885            // Scalar fallback - alignment check failed
886            let mut sum: $acc = 0;
887            let mut cnt = 0_usize;
888            let len = data.len();
889
890            if !has_nulls {
891                for &x in data {
892                    sum += x as $acc;
893                    cnt += 1;
894                }
895            } else {
896                let mb =
897                    mask.expect("Bitmask must be Some if nulls are present or mask is supplied");
898                for i in 0..len {
899                    if unsafe { mb.get_unchecked(i) } {
900                        sum += data[i] as $acc;
901                        cnt += 1;
902                    }
903                }
904            }
905
906            (sum, cnt)
907        }
908    };
909}
910
911/// Computes Σx² for `$ty` values, skipping nulls.
912/// SIMD‐accelerated when available. Uses Neumaier compensated summation.
913macro_rules! impl_sum2_float {
914    ($fn_name:ident, $ty:ty, $LANES:expr) => {
915        #[doc = concat!("Computes sum-of-squares for `", stringify!($ty), "` (Σx²).")]
916        #[doc = " Skips nulls if present. SIMD‐enabled. Neumaier compensated."]
917        #[inline(always)]
918        pub fn $fn_name(data: &[$ty], mask: Option<&Bitmask>, null_count: Option<usize>) -> f64 {
919            let has_nulls = has_nulls(null_count, mask);
920
921            #[cfg(feature = "simd")]
922            {
923                // Check if both arrays are 64-byte aligned for SIMD
924                if is_simd_aligned(data) {
925                    // Vector type for the original element type
926                    type V<const N: usize> = Simd<$ty, N>;
927                    // Mask type comes from the element's SimdElement impl
928                    type M<const N: usize> = <$ty as SimdElement>::Mask;
929
930                    const N: usize = $LANES;
931                    let len = data.len();
932                    let mut i = 0;
933                    let mut s2 = 0.0f64;
934                    let mut comp = 0.0f64;
935
936                    if !has_nulls {
937                        // Dense path: no nulls
938                        while i + N <= len {
939                            let dv = V::<N>::from_slice(&data[i..i + N]).cast::<f64>();
940                            let chunk = (dv * dv).reduce_sum();
941                            neumaier_add(&mut s2, &mut comp, chunk);
942                            i += N;
943                        }
944                        // tail
945                        for &x in &data[i..] {
946                            let y = x as f64;
947                            neumaier_add(&mut s2, &mut comp, y * y);
948                        }
949                    } else {
950                        // Null‐aware path
951                        let mb = mask.expect("mask must be Some if nulls present");
952                        let bytes = mb.as_bytes();
953                        while i + N <= len {
954                            let v = V::<N>::from_slice(&data[i..i + N]);
955                            let lane_m: Mask<M<N>, N> =
956                                bitmask_to_simd_mask::<N, M<N>>(bytes, i, len);
957                            let vv = lane_m.select(v, V::<N>::splat(0 as $ty));
958                            let dv = vv.cast::<f64>();
959                            let chunk = (dv * dv).reduce_sum();
960                            neumaier_add(&mut s2, &mut comp, chunk);
961                            i += N;
962                        }
963                        // tail
964                        for idx in i..len {
965                            if unsafe { mb.get_unchecked(idx) } {
966                                let y = data[idx] as f64;
967                                neumaier_add(&mut s2, &mut comp, y * y);
968                            }
969                        }
970                    }
971                    return s2 + comp;
972                }
973                // Fall through to scalar path if alignment check failed
974            }
975
976            // Scalar fallback - alignment check failed
977            let mut s2 = 0.0f64;
978            let mut comp = 0.0f64;
979            if !has_nulls {
980                for &x in data {
981                    let y = x as f64;
982                    neumaier_add(&mut s2, &mut comp, y * y);
983                }
984            } else {
985                let mb = mask.expect("mask must be Some if nulls present");
986                for (i, &x) in data.iter().enumerate() {
987                    if unsafe { mb.get_unchecked(i) } {
988                        let y = x as f64;
989                        neumaier_add(&mut s2, &mut comp, y * y);
990                    }
991                }
992            }
993            s2 + comp
994        }
995    };
996}
997
998/// Macro to implement null-aware element counting.
999macro_rules! impl_count {
1000    ($fn_name:ident) => {
1001        #[doc = "Returns the number of valid (non-null) elements."]
1002        #[inline(always)]
1003        pub fn $fn_name(len: usize, mask: Option<&Bitmask>, null_count: Option<usize>) -> usize {
1004            match null_count {
1005                Some(n) => len - n,
1006                None => match mask {
1007                    Some(m) => m.count_ones(),
1008                    None => len,
1009                },
1010            }
1011        }
1012    };
1013}
1014
1015/// Macro to implement sum and average aggregations for float/int types.
1016/// Also handles raw (sum, count) cases internally.
1017macro_rules! impl_sum_avg {
1018    // floats ---------------------------------------------------
1019    (float $ty:ident, $sum_fn:ident, $avg_fn:ident, $raw:ident) => {
1020        #[doc = concat!("Returns the sum of `", stringify!($ty), "` values, or None if all-null.")]
1021        #[inline(always)]
1022        pub fn $sum_fn(d: &[$ty], m: Option<&Bitmask>, null_count: Option<usize>) -> Option<f64> {
1023            let (s, n) = $raw(d, m, null_count);
1024            if n == 0 { None } else { Some(s) }
1025        }
1026
1027        #[doc = concat!("Returns the average of `", stringify!($ty), "` values, or None if all-null.")]
1028        #[inline(always)]
1029        pub fn $avg_fn(d: &[$ty], m: Option<&Bitmask>, null_count: Option<usize>) -> Option<f64> {
1030            let (s, n) = $raw(d, m, null_count);
1031            if n == 0 { None } else { Some(s / n as f64) }
1032        }
1033    };
1034
1035    // ints -----------------------------------------------------
1036    (int $ty:ident, $sum_fn:ident, $avg_fn:ident, $acc:ty, $raw:ident) => {
1037        #[doc = concat!("Returns the sum of `", stringify!($ty), "` values, or None if all-null.")]
1038        #[inline(always)]
1039        pub fn $sum_fn(d: &[$ty], m: Option<&Bitmask>, null_count: Option<usize>) -> Option<$acc> {
1040            let (s, n) = $raw(d, m, null_count);
1041            if n == 0 { None } else { Some(s) }
1042        }
1043
1044        #[doc = concat!("Returns the average of `", stringify!($ty), "` values, or None if all-null.")]
1045        #[inline(always)]
1046        pub fn $avg_fn(d: &[$ty], m: Option<&Bitmask>, null_count: Option<usize>) -> Option<f64> {
1047            let (s, n) = $raw(d, m, null_count);
1048            if n == 0 { None } else { Some(s as f64 / n as f64) }
1049        }
1050    };
1051}
1052
1053macro_rules! impl_stat_moments4_float {
1054    ($fn_name:ident, $ty:ty, $LANES:expr) => {
1055        #[doc = concat!(
1056                                    "SIMD-accelerated (∑x … ∑x⁴) for `", stringify!($ty), "`.\n",
1057                                    "Returns `(s1,s2,s3,s4,n)` as `(f64,f64,f64,f64,usize)`.\n",
1058                                    "Uses Neumaier compensated summation for all 4 accumulators."
1059                                )]
1060        #[inline(always)]
1061        pub fn $fn_name(
1062            data: &[$ty],
1063            mask: Option<&Bitmask>,
1064            null_count: Option<usize>,
1065        ) -> (f64, f64, f64, f64, usize) {
1066            let has_nulls = has_nulls(null_count, mask);
1067
1068            #[cfg(feature = "simd")]
1069            {
1070                // Check if both arrays are 64-byte aligned for SIMD
1071                if is_simd_aligned(data) {
1072                    type V<const L: usize> = Simd<$ty, L>;
1073                    type M<const L: usize> = <$ty as SimdElement>::Mask;
1074
1075                    const N: usize = $LANES;
1076                    let len = data.len();
1077                    let mut i = 0usize;
1078
1079                    let mut s1 = 0.0;
1080                    let mut s2 = 0.0;
1081                    let mut s3 = 0.0;
1082                    let mut s4 = 0.0;
1083                    let mut c1 = 0.0;
1084                    let mut c2 = 0.0;
1085                    let mut c3 = 0.0;
1086                    let mut c4 = 0.0;
1087                    let mut n = 0usize;
1088
1089                    if !has_nulls {
1090                        // Dense, no nulls
1091                        while i + N <= len {
1092                            let v = V::<N>::from_slice(&data[i..i + N]).cast::<f64>();
1093                            let v2 = v * v;
1094                            neumaier_add(&mut s1, &mut c1, v.reduce_sum());
1095                            neumaier_add(&mut s2, &mut c2, v2.reduce_sum());
1096                            neumaier_add(&mut s3, &mut c3, (v2 * v).reduce_sum());
1097                            neumaier_add(&mut s4, &mut c4, (v2 * v2).reduce_sum());
1098                            n += N;
1099                            i += N;
1100                        }
1101                        for &x in &data[i..] {
1102                            let xf = x as f64;
1103                            let x2 = xf * xf;
1104                            neumaier_add(&mut s1, &mut c1, xf);
1105                            neumaier_add(&mut s2, &mut c2, x2);
1106                            neumaier_add(&mut s3, &mut c3, x2 * xf);
1107                            neumaier_add(&mut s4, &mut c4, x2 * x2);
1108                            n += 1;
1109                        }
1110                    } else {
1111                        // Null-aware SIMD
1112                        let mb = mask.expect("mask must be Some when nulls present");
1113                        let mask_bytes = mb.as_bytes();
1114                        while i + N <= len {
1115                            let v_raw = V::<N>::from_slice(&data[i..i + N]);
1116                            let lane_m: Mask<M<N>, N> =
1117                                bitmask_to_simd_mask::<N, M<N>>(mask_bytes, i, len);
1118                            if !lane_m.any() {
1119                                i += N;
1120                                continue;
1121                            }
1122                            // Zero out nulls
1123                            let v = lane_m.select(v_raw, V::<N>::splat(0 as $ty)).cast::<f64>();
1124                            let v2 = v * v;
1125                            neumaier_add(&mut s1, &mut c1, v.reduce_sum());
1126                            neumaier_add(&mut s2, &mut c2, v2.reduce_sum());
1127                            neumaier_add(&mut s3, &mut c3, (v2 * v).reduce_sum());
1128                            neumaier_add(&mut s4, &mut c4, (v2 * v2).reduce_sum());
1129                            n += lane_m.to_bitmask().count_ones() as usize;
1130                            i += N;
1131                        }
1132                        for j in i..len {
1133                            if unsafe { mb.get_unchecked(j) } {
1134                                let xf = data[j] as f64;
1135                                let x2 = xf * xf;
1136                                neumaier_add(&mut s1, &mut c1, xf);
1137                                neumaier_add(&mut s2, &mut c2, x2);
1138                                neumaier_add(&mut s3, &mut c3, x2 * xf);
1139                                neumaier_add(&mut s4, &mut c4, x2 * x2);
1140                                n += 1;
1141                            }
1142                        }
1143                    }
1144                    return (s1 + c1, s2 + c2, s3 + c3, s4 + c4, n);
1145                }
1146                // Fall through to scalar path if alignment check failed
1147            }
1148
1149            // Scalar fallback - alignment check failed
1150            let mut s1 = 0.0;
1151            let mut s2 = 0.0;
1152            let mut s3 = 0.0;
1153            let mut s4 = 0.0;
1154            let mut c1 = 0.0;
1155            let mut c2 = 0.0;
1156            let mut c3 = 0.0;
1157            let mut c4 = 0.0;
1158            let mut n = 0usize;
1159
1160            if !has_nulls {
1161                for &x in data {
1162                    let xf = x as f64;
1163                    let x2 = xf * xf;
1164                    neumaier_add(&mut s1, &mut c1, xf);
1165                    neumaier_add(&mut s2, &mut c2, x2);
1166                    neumaier_add(&mut s3, &mut c3, x2 * xf);
1167                    neumaier_add(&mut s4, &mut c4, x2 * x2);
1168                    n += 1;
1169                }
1170            } else {
1171                let mb = mask.expect("mask must be Some when nulls present");
1172                for (idx, &x) in data.iter().enumerate() {
1173                    if unsafe { mb.get_unchecked(idx) } {
1174                        let xf = x as f64;
1175                        let x2 = xf * xf;
1176                        neumaier_add(&mut s1, &mut c1, xf);
1177                        neumaier_add(&mut s2, &mut c2, x2);
1178                        neumaier_add(&mut s3, &mut c3, x2 * xf);
1179                        neumaier_add(&mut s4, &mut c4, x2 * x2);
1180                        n += 1;
1181                    }
1182                }
1183            }
1184            (s1 + c1, s2 + c2, s3 + c3, s4 + c4, n)
1185        }
1186    };
1187}
1188
1189macro_rules! impl_stat_moments4_int {
1190    ($fn_name:ident, $ty:ty, $acc:ty, $LANES:expr) => {
1191        #[doc = concat!(
1192                    "SIMD-accelerated (∑x … ∑x⁴) for `", stringify!($ty), "` (promoted to f64).\n",
1193                    "Accumulator type for scalar paths: `", stringify!($acc), "`."
1194                )]
1195        #[inline(always)]
1196        pub fn $fn_name(
1197            data: &[$ty],
1198            mask: Option<&Bitmask>,
1199            null_count: Option<usize>
1200        ) -> (f64, f64, f64, f64, usize) {
1201            let has_nulls = has_nulls(null_count, mask);
1202
1203            #[cfg(feature = "simd")]
1204            {
1205                // Check if both arrays are 64-byte aligned for SIMD
1206                if is_simd_aligned(data) {
1207                type V<const L: usize> = Simd<$ty, L>;
1208                type M<const L: usize> = <$ty as SimdElement>::Mask;
1209
1210                const N: usize = $LANES;
1211                let len = data.len();
1212                let mut i = 0usize;
1213
1214                let mut s1 = 0.0;
1215                let mut s2 = 0.0;
1216                let mut s3 = 0.0;
1217                let mut s4 = 0.0;
1218                let mut n = 0usize;
1219
1220                if !has_nulls {
1221                    while i + N <= len {
1222                        let v_i = V::<N>::from_slice(&data[i..i + N]);
1223                        let v_f = v_i.cast::<f64>();
1224                        let v2 = v_f * v_f;
1225                        s1 += v_f.reduce_sum();
1226                        s2 += v2.reduce_sum();
1227                        s3 += (v2 * v_f).reduce_sum();
1228                        s4 += (v2 * v2).reduce_sum();
1229                        n += N;
1230                        i += N;
1231                    }
1232                    for &x in &data[i..] {
1233                        let xf = x as f64;
1234                        s1 += xf;
1235                        let x2 = xf * xf;
1236                        s2 += x2;
1237                        s3 += x2 * xf;
1238                        s4 += x2 * x2;
1239                        n += 1;
1240                    }
1241                } else {
1242                    let mb = mask.expect("mask must be Some when nulls present");
1243                    let mask_bytes = mb.as_bytes();
1244                    while i + N <= len {
1245                        let v_i = V::<N>::from_slice(&data[i..i + N]);
1246                        let lane_m: Mask<M<N>, N> =
1247                            bitmask_to_simd_mask::<N, M<N>>(mask_bytes, i, len);
1248                        if !lane_m.any() {
1249                            i += N;
1250                            continue;
1251                        }
1252                        let v_masked = lane_m.select(v_i, V::<N>::splat(0));
1253                        let v_f = v_masked.cast::<f64>();
1254                        let v2 = v_f * v_f;
1255                        s1 += v_f.reduce_sum();
1256                        s2 += v2.reduce_sum();
1257                        s3 += (v2 * v_f).reduce_sum();
1258                        s4 += (v2 * v2).reduce_sum();
1259                        n += lane_m.to_bitmask().count_ones() as usize;
1260                        i += N;
1261                    }
1262                    for j in i..len {
1263                        if unsafe { mb.get_unchecked(j) } {
1264                            let xf = data[j] as f64;
1265                            let x2 = xf * xf;
1266                            s1 += xf;
1267                            s2 += x2;
1268                            s3 += x2 * xf;
1269                            s4 += x2 * x2;
1270                            n += 1;
1271                        }
1272                    }
1273                }
1274                return (s1, s2, s3, s4, n);
1275                }
1276                // Fall through to scalar path if alignment check failed
1277            }
1278
1279            // Scalar fallback - alignment check failed
1280            // For int types, accumulate in a wide accumulator, then cast
1281            let mut s1: $acc = 0;
1282            let mut s2: $acc = 0;
1283            let mut s3: $acc = 0;
1284            let mut s4: $acc = 0;
1285            let mut n = 0usize;
1286
1287            if !has_nulls {
1288                for &x in data {
1289                    let xi: $acc = x as $acc;
1290                    let x2 = xi * xi;
1291                    s1 += xi;
1292                    s2 += x2;
1293                    s3 += x2 * xi;
1294                    s4 += x2 * x2;
1295                    n += 1;
1296                }
1297            } else {
1298                let mb = mask.expect("mask must be Some when nulls present");
1299                for (idx, &x) in data.iter().enumerate() {
1300                    if unsafe { mb.get_unchecked(idx) } {
1301                        let xi: $acc = x as $acc;
1302                        let x2 = xi * xi;
1303                        s1 += xi;
1304                        s2 += x2;
1305                        s3 += x2 * xi;
1306                        s4 += x2 * x2;
1307                        n += 1;
1308                    }
1309                }
1310            }
1311            (s1 as f64, s2 as f64, s3 as f64, s4 as f64, n)
1312        }
1313    };
1314}
1315
1316// Weighted-Sum-2  &  Pair-Stats kernels
1317macro_rules! impl_weighted_sum2_float {
1318    ($fn:ident, $ty:ty, $LANES:expr) => {
1319        #[doc = concat!(
1320                                    "Returns `(∑w·x, ∑w, ∑w·x², rows)` for `", stringify!($ty),
1321                                    "` slices `vals`, `wts` with optional null mask.\n",
1322                                    "Uses Neumaier compensated summation for all 3 accumulators."
1323                                )]
1324        #[inline(always)]
1325        pub fn $fn(
1326            vals: &[$ty],
1327            wts: &[$ty],
1328            mask: Option<&Bitmask>,
1329            null_count: Option<usize>,
1330        ) -> (f64, f64, f64, usize) {
1331            debug_assert_eq!(vals.len(), wts.len(), "weighted_sum2x: len mismatch");
1332            let has_nulls = has_nulls(null_count, mask);
1333
1334            #[cfg(feature = "simd")]
1335            {
1336                // Check if both arrays are 64-byte aligned for SIMD
1337                if is_simd_aligned(vals) && is_simd_aligned(wts) {
1338                    type V<const L: usize> = Simd<$ty, L>;
1339                    type M<const L: usize> = <$ty as SimdElement>::Mask;
1340                    const N: usize = $LANES;
1341
1342                    let len = vals.len();
1343                    let mut i = 0usize;
1344                    let mut swx = 0.0;
1345                    let mut sw = 0.0;
1346                    let mut swx2 = 0.0;
1347                    let mut cwx = 0.0;
1348                    let mut cw = 0.0;
1349                    let mut cwx2 = 0.0;
1350                    let mut n = 0usize;
1351
1352                    if !has_nulls {
1353                        while i + N <= len {
1354                            let v = V::<N>::from_slice(&vals[i..i + N]).cast::<f64>();
1355                            let w = V::<N>::from_slice(&wts[i..i + N]).cast::<f64>();
1356                            neumaier_add(&mut swx, &mut cwx, (v * w).reduce_sum());
1357                            neumaier_add(&mut sw, &mut cw, w.reduce_sum());
1358                            neumaier_add(&mut swx2, &mut cwx2, (w * v * v).reduce_sum());
1359                            n += N;
1360                            i += N;
1361                        }
1362                        for j in i..len {
1363                            let vf = vals[j] as f64;
1364                            let wf = wts[j] as f64;
1365                            neumaier_add(&mut swx, &mut cwx, wf * vf);
1366                            neumaier_add(&mut sw, &mut cw, wf);
1367                            neumaier_add(&mut swx2, &mut cwx2, wf * vf * vf);
1368                            n += 1;
1369                        }
1370                    } else {
1371                        let mb = mask.expect("mask = Some when nulls present");
1372                        let mask_bytes = mb.as_bytes();
1373                        while i + N <= len {
1374                            let v_raw = V::<N>::from_slice(&vals[i..i + N]);
1375                            let w_raw = V::<N>::from_slice(&wts[i..i + N]);
1376                            let lane_m: Mask<M<N>, N> =
1377                                bitmask_to_simd_mask::<N, M<N>>(mask_bytes, i, len);
1378                            if !lane_m.any() {
1379                                i += N;
1380                                continue;
1381                            }
1382                            let v = lane_m.select(v_raw, V::<N>::splat(0.0)).cast::<f64>();
1383                            let w = lane_m.select(w_raw, V::<N>::splat(0.0)).cast::<f64>();
1384                            neumaier_add(&mut swx, &mut cwx, (v * w).reduce_sum());
1385                            neumaier_add(&mut sw, &mut cw, w.reduce_sum());
1386                            neumaier_add(&mut swx2, &mut cwx2, (w * v * v).reduce_sum());
1387                            n += lane_m.to_bitmask().count_ones() as usize;
1388                            i += N;
1389                        }
1390                        for j in i..len {
1391                            if unsafe { mb.get_unchecked(j) } {
1392                                let vf = vals[j] as f64;
1393                                let wf = wts[j] as f64;
1394                                neumaier_add(&mut swx, &mut cwx, wf * vf);
1395                                neumaier_add(&mut sw, &mut cw, wf);
1396                                neumaier_add(&mut swx2, &mut cwx2, wf * vf * vf);
1397                                n += 1;
1398                            }
1399                        }
1400                    }
1401                    return (swx + cwx, sw + cw, swx2 + cwx2, n);
1402                }
1403                // Fall through to scalar path if alignment check failed
1404            }
1405
1406            // Scalar fallback - alignment check failed
1407            let mut swx = 0.0;
1408            let mut sw = 0.0;
1409            let mut swx2 = 0.0;
1410            let mut cwx = 0.0;
1411            let mut cw = 0.0;
1412            let mut cwx2 = 0.0;
1413            let mut n = 0usize;
1414
1415            if !has_nulls {
1416                for i in 0..vals.len() {
1417                    let vf = vals[i] as f64;
1418                    let wf = wts[i] as f64;
1419                    neumaier_add(&mut swx, &mut cwx, wf * vf);
1420                    neumaier_add(&mut sw, &mut cw, wf);
1421                    neumaier_add(&mut swx2, &mut cwx2, wf * vf * vf);
1422                    n += 1;
1423                }
1424            } else {
1425                let mb = mask.expect("mask = Some when nulls present");
1426                for i in 0..vals.len() {
1427                    if unsafe { mb.get_unchecked(i) } {
1428                        let vf = vals[i] as f64;
1429                        let wf = wts[i] as f64;
1430                        neumaier_add(&mut swx, &mut cwx, wf * vf);
1431                        neumaier_add(&mut sw, &mut cw, wf);
1432                        neumaier_add(&mut swx2, &mut cwx2, wf * vf * vf);
1433                        n += 1;
1434                    }
1435                }
1436            }
1437            (swx + cwx, sw + cw, swx2 + cwx2, n)
1438        }
1439    };
1440}
1441
1442/// Statistical summary for paired data analysis.
1443///
1444/// Accumulates fundamental statistical quantities for bivariate data pairs,
1445/// providing the building blocks for correlation analysis, regression coefficients,
1446/// and covariance calculations with SIMD-accelerated computation.
1447///
1448/// # Fields
1449/// - **`n`**: Count of valid (non-null) data pairs
1450/// - **`sx`**: Sum of x-values (Σx)
1451/// - **`sy`**: Sum of y-values (Σy)
1452/// - **`sxy`**: Sum of products (Σxy) for covariance calculation
1453/// - **`sx2`**: Sum of x-squared values (Σx²) for variance calculation
1454/// - **`sy2`**: Sum of y-squared values (Σy²) for variance calculation
1455///
1456/// # Applications
1457/// These statistics enable efficient calculation of:
1458/// - **Pearson correlation coefficient**: r = (n⋅Σxy - Σx⋅Σy) / √[(n⋅Σx² - (Σx)²)(n⋅Σy² - (Σy)²)]
1459/// - **Linear regression slope**: β₁ = (n⋅Σxy - Σx⋅Σy) / (n⋅Σx² - (Σx)²)
1460/// - **Sample covariance**: cov(x,y) = (Σxy - n⋅x̄⋅ȳ) / (n-1)
1461/// - **Coefficient of determination**: R² for regression analysis
1462#[derive(Default)]
1463pub struct PairStats {
1464    /// Count of valid (non-null) paired observations
1465    pub n: usize,
1466    /// Sum of x-values (Σx)
1467    pub sx: f64,
1468    /// Sum of y-values (Σy)
1469    pub sy: f64,
1470    /// Sum of cross-products (Σxy) for covariance computation
1471    pub sxy: f64,
1472    /// Sum of squared x-values (Σx²) for variance computation
1473    pub sx2: f64,
1474    /// Sum of squared y-values (Σy²) for variance computation
1475    pub sy2: f64,
1476}
1477
1478// PAIR - Σ stats (x,y)  (float)
1479macro_rules! impl_pair_stats_float {
1480    ($fn:ident, $ty:ty, $LANES:expr) => {
1481        /// Returns PairStats (n, sum_x, sum_y, sum_xy, sum_x2, sum_y2) for floating point types.
1482        /// Uses Neumaier compensated summation for all 5 accumulators.
1483        #[inline(always)]
1484        pub fn $fn(
1485            xs: &[$ty],
1486            ys: &[$ty],
1487            mask: Option<&Bitmask>,
1488            null_count: Option<usize>,
1489        ) -> PairStats {
1490            debug_assert_eq!(xs.len(), ys.len(), "pair_stats: len mismatch ");
1491            let has_nulls = has_nulls(null_count, mask);
1492
1493            #[cfg(feature = "simd")]
1494            {
1495                // Check if both arrays are 64-byte aligned for SIMD
1496                if is_simd_aligned(xs) && is_simd_aligned(ys) {
1497                    type V<const L: usize> = Simd<$ty, L>;
1498                    type M<const L: usize> = <$ty as SimdElement>::Mask;
1499                    const N: usize = $LANES;
1500                    let len = xs.len();
1501
1502                    let mut i = 0usize;
1503                    let mut sx = 0.0;
1504                    let mut sy = 0.0;
1505                    let mut sxy = 0.0;
1506                    let mut sx2 = 0.0;
1507                    let mut sy2 = 0.0;
1508                    let mut cx = 0.0;
1509                    let mut cy = 0.0;
1510                    let mut cxy = 0.0;
1511                    let mut cx2 = 0.0;
1512                    let mut cy2 = 0.0;
1513                    let mut n = 0usize;
1514
1515                    if !has_nulls {
1516                        while i + N <= len {
1517                            let vx = V::<N>::from_slice(&xs[i..i + N]).cast::<f64>();
1518                            let vy = V::<N>::from_slice(&ys[i..i + N]).cast::<f64>();
1519
1520                            neumaier_add(&mut sx, &mut cx, vx.reduce_sum());
1521                            neumaier_add(&mut sy, &mut cy, vy.reduce_sum());
1522                            neumaier_add(&mut sxy, &mut cxy, (vx * vy).reduce_sum());
1523                            neumaier_add(&mut sx2, &mut cx2, (vx * vx).reduce_sum());
1524                            neumaier_add(&mut sy2, &mut cy2, (vy * vy).reduce_sum());
1525                            n += N;
1526                            i += N;
1527                        }
1528                        for j in i..len {
1529                            let xf = xs[j] as f64;
1530                            let yf = ys[j] as f64;
1531                            neumaier_add(&mut sx, &mut cx, xf);
1532                            neumaier_add(&mut sy, &mut cy, yf);
1533                            neumaier_add(&mut sxy, &mut cxy, xf * yf);
1534                            neumaier_add(&mut sx2, &mut cx2, xf * xf);
1535                            neumaier_add(&mut sy2, &mut cy2, yf * yf);
1536                            n += 1;
1537                        }
1538                    } else {
1539                        let mb = mask.expect("mask Some when nulls present");
1540                        let mask_bytes = mb.as_bytes();
1541
1542                        while i + N <= len {
1543                            let vx_raw = V::<N>::from_slice(&xs[i..i + N]);
1544                            let vy_raw = V::<N>::from_slice(&ys[i..i + N]);
1545                            let lane_m: Mask<M<N>, N> =
1546                                bitmask_to_simd_mask::<N, M<N>>(mask_bytes, i, len);
1547                            if !lane_m.any() {
1548                                i += N;
1549                                continue;
1550                            }
1551
1552                            let vx = lane_m.select(vx_raw, V::<N>::splat(0.0)).cast::<f64>();
1553                            let vy = lane_m.select(vy_raw, V::<N>::splat(0.0)).cast::<f64>();
1554
1555                            neumaier_add(&mut sx, &mut cx, vx.reduce_sum());
1556                            neumaier_add(&mut sy, &mut cy, vy.reduce_sum());
1557                            neumaier_add(&mut sxy, &mut cxy, (vx * vy).reduce_sum());
1558                            neumaier_add(&mut sx2, &mut cx2, (vx * vx).reduce_sum());
1559                            neumaier_add(&mut sy2, &mut cy2, (vy * vy).reduce_sum());
1560                            n += lane_m.to_bitmask().count_ones() as usize;
1561                            i += N;
1562                        }
1563                        for j in i..len {
1564                            if unsafe { mb.get_unchecked(j) } {
1565                                let xf = xs[j] as f64;
1566                                let yf = ys[j] as f64;
1567                                neumaier_add(&mut sx, &mut cx, xf);
1568                                neumaier_add(&mut sy, &mut cy, yf);
1569                                neumaier_add(&mut sxy, &mut cxy, xf * yf);
1570                                neumaier_add(&mut sx2, &mut cx2, xf * xf);
1571                                neumaier_add(&mut sy2, &mut cy2, yf * yf);
1572                                n += 1;
1573                            }
1574                        }
1575                    }
1576                    return PairStats {
1577                        n,
1578                        sx: sx + cx,
1579                        sy: sy + cy,
1580                        sxy: sxy + cxy,
1581                        sx2: sx2 + cx2,
1582                        sy2: sy2 + cy2,
1583                    };
1584                }
1585                // Fall through to scalar path if alignment check failed
1586            }
1587
1588            // Scalar fallback - alignment check failed
1589            let mut sx = 0.0;
1590            let mut sy = 0.0;
1591            let mut sxy = 0.0;
1592            let mut sx2 = 0.0;
1593            let mut sy2 = 0.0;
1594            let mut cx = 0.0;
1595            let mut cy = 0.0;
1596            let mut cxy = 0.0;
1597            let mut cx2 = 0.0;
1598            let mut cy2 = 0.0;
1599            let mut n = 0usize;
1600
1601            if !has_nulls {
1602                for i in 0..xs.len() {
1603                    let xf = xs[i] as f64;
1604                    let yf = ys[i] as f64;
1605                    neumaier_add(&mut sx, &mut cx, xf);
1606                    neumaier_add(&mut sy, &mut cy, yf);
1607                    neumaier_add(&mut sxy, &mut cxy, xf * yf);
1608                    neumaier_add(&mut sx2, &mut cx2, xf * xf);
1609                    neumaier_add(&mut sy2, &mut cy2, yf * yf);
1610                    n += 1;
1611                }
1612            } else {
1613                let mb = mask.expect("mask Some when nulls present");
1614                for i in 0..xs.len() {
1615                    if unsafe { mb.get_unchecked(i) } {
1616                        let xf = xs[i] as f64;
1617                        let yf = ys[i] as f64;
1618                        neumaier_add(&mut sx, &mut cx, xf);
1619                        neumaier_add(&mut sy, &mut cy, yf);
1620                        neumaier_add(&mut sxy, &mut cxy, xf * yf);
1621                        neumaier_add(&mut sx2, &mut cx2, xf * xf);
1622                        neumaier_add(&mut sy2, &mut cy2, yf * yf);
1623                        n += 1;
1624                    }
1625                }
1626            }
1627            PairStats {
1628                n,
1629                sx: sx + cx,
1630                sy: sy + cy,
1631                sxy: sxy + cxy,
1632                sx2: sx2 + cx2,
1633                sy2: sy2 + cy2,
1634            }
1635        }
1636    };
1637}
1638
1639// 4-moment running-update helper (scalar). Works for any Into<f64> value.
1640#[inline(always)]
1641fn moments4_scalar<I>(iter: I) -> (usize, f64, f64, f64, f64)
1642where
1643    I: IntoIterator<Item = f64>,
1644{
1645    let mut n = 0usize;
1646    let mut m1 = 0.0; // mean
1647    let mut m2 = 0.0; // Σ(x-μ)²
1648    let mut m3 = 0.0; // Σ(x-μ)³
1649    let mut m4 = 0.0; // Σ(x-μ)⁴
1650
1651    for x in iter {
1652        n += 1;
1653        let n_f = n as f64;
1654        let delta = x - m1;
1655        let delta_n = delta / n_f;
1656        let delta_n2 = delta_n * delta_n;
1657        let term1 = delta * delta_n * (n_f - 1.0);
1658
1659        m4 += term1 * delta_n2 * (n_f * n_f - 3.0 * n_f + 3.0) + 6.0 * delta_n2 * m2
1660            - 4.0 * delta_n * m3;
1661
1662        m3 += term1 * delta_n * (n_f - 2.0) - 3.0 * delta_n * m2;
1663        m2 += term1;
1664        m1 += delta_n;
1665    }
1666    (n, m1, m2, m3, m4)
1667}
1668
1669//  Skewness & Kurtosis — float
1670/// Implements skewness and kurtosis calculation functions for floating-point types.
1671macro_rules! impl_skew_kurt_float {
1672    ($ty:ident, $skew_fn:ident, $kurt_fn:ident) => {
1673        /// Calculates the skewness of a floating-point dataset.
1674        ///
1675        /// Skewness measures the asymmetry of the probability distribution.
1676        /// Positive skewness indicates a longer tail on the right side,
1677        /// negative skewness indicates a longer tail on the left side.
1678        ///
1679        /// # Arguments
1680        ///
1681        /// * `d` - The slice of floating-point values
1682        /// * `m` - Optional bitmask for null value handling
1683        /// * `null_count` - Optional count of null values
1684        /// * `adjust_sample_bias` - Whether to apply sample bias correction
1685        ///
1686        /// # Returns
1687        ///
1688        /// `Some(skewness)` if calculation is possible, `None` if insufficient data
1689        #[inline(always)]
1690        pub fn $skew_fn(
1691            d: &[$ty],
1692            m: Option<&Bitmask>,
1693            null_count: Option<usize>,
1694            adjust_sample_bias: bool,
1695        ) -> Option<f64> {
1696            let vals: Vec<f64> = if has_nulls(null_count, m) {
1697                collect_valid(d, m).into_iter().map(|x| x as f64).collect()
1698            } else {
1699                d.iter().map(|&x| x as f64).collect()
1700            };
1701
1702            let (n, _m1, m2, m3, _) = moments4_scalar(vals);
1703            if n < 3 || m2 == 0.0 {
1704                return None;
1705            }
1706            let n_f = n as f64;
1707
1708            // population skewness (fisher-style)
1709            let pop = n_f.sqrt() * m3 / m2.powf(1.5);
1710            if !adjust_sample_bias {
1711                return Some(pop);
1712            }
1713
1714            // unbiased sample skewness (Joanes & Gill)
1715            Some(n_f * (n_f - 1.0).sqrt() * m3 / ((n_f - 2.0) * m2.powf(1.5)))
1716        }
1717
1718        /// Calculates the kurtosis of a floating-point dataset.
1719        ///
1720        /// Kurtosis measures the "tailedness" of the probability distribution.
1721        /// Higher kurtosis indicates more extreme outliers, lower kurtosis
1722        /// indicates a distribution with lighter tails.
1723        ///
1724        /// # Arguments
1725        ///
1726        /// * `d` - The slice of floating-point values
1727        /// * `m` - Optional bitmask for null value handling
1728        /// * `null_count` - Optional count of null values
1729        /// * `adjust_sample_bias` - Whether to apply sample bias correction
1730        ///
1731        /// # Returns
1732        ///
1733        /// `Some(kurtosis)` if calculation is possible, `None` if insufficient data
1734        #[inline(always)]
1735        pub fn $kurt_fn(
1736            d: &[$ty],
1737            m: Option<&Bitmask>,
1738            null_count: Option<usize>,
1739            adjust_sample_bias: bool,
1740        ) -> Option<f64> {
1741            let vals: Vec<f64> = if has_nulls(null_count, m) {
1742                collect_valid(d, m).into_iter().map(|x| x as f64).collect()
1743            } else {
1744                d.iter().map(|&x| x as f64).collect()
1745            };
1746
1747            let (n, _m1, m2, _m3, m4) = moments4_scalar(vals);
1748            if n < 2 || m2 == 0.0 {
1749                return None;
1750            }
1751            let n_f = n as f64;
1752            let pop_excess = n_f * m4 / (m2 * m2) - 3.0;
1753            if !adjust_sample_bias {
1754                return Some(pop_excess);
1755            }
1756
1757            // n < 4 -> unbiased formula undefined; fall back to population value
1758            if n < 4 {
1759                return Some(pop_excess);
1760            }
1761
1762            // unbiased sample excess kurtosis (Joanes & Gill)
1763            let numerator = (n_f + 1.0) * n_f * (m4 / (m2 * m2)) - 3.0 * (n_f - 1.0);
1764            let denominator = (n_f - 2.0) * (n_f - 3.0);
1765            Some(numerator * (n_f - 1.0) / denominator)
1766        }
1767    };
1768}
1769
1770//  Skewness & Kurtosis — Integer
1771/// Implements skewness and kurtosis calculation functions for integer types.
1772macro_rules! impl_skew_kurt_int {
1773    ($ty:ident, $skew_fn:ident, $kurt_fn:ident) => {
1774        /// Calculates the skewness of an integer dataset.
1775        ///
1776        /// Skewness measures the asymmetry of the probability distribution.
1777        /// Values are converted to f64 for calculation to maintain precision.
1778        ///
1779        /// # Arguments
1780        ///
1781        /// * `d` - The slice of integer values
1782        /// * `m` - Optional bitmask for null value handling
1783        /// * `null_count` - Optional count of null values
1784        /// * `adjust_sample_bias` - Whether to apply sample bias correction
1785        ///
1786        /// # Returns
1787        ///
1788        /// `Some(skewness)` if calculation is possible, `None` if insufficient data
1789        #[inline(always)]
1790        pub fn $skew_fn(
1791            d: &[$ty],
1792            m: Option<&Bitmask>,
1793            null_count: Option<usize>,
1794            adjust_sample_bias: bool,
1795        ) -> Option<f64> {
1796            let vals: Vec<f64> = if has_nulls(null_count, m) {
1797                collect_valid(d, m).into_iter().map(|x| x as f64).collect()
1798            } else {
1799                d.iter().map(|&x| x as f64).collect()
1800            };
1801
1802            let (n, _m1, m2, m3, _) = moments4_scalar(vals);
1803            if n < 3 || m2 == 0.0 {
1804                return None;
1805            }
1806            let n_f = n as f64;
1807
1808            let pop = n_f.sqrt() * m3 / m2.powf(1.5);
1809            if !adjust_sample_bias {
1810                return Some(pop);
1811            }
1812
1813            Some(n_f * (n_f - 1.0).sqrt() * m3 / ((n_f - 2.0) * m2.powf(1.5)))
1814        }
1815
1816        /// Calculates the kurtosis of an integer dataset.
1817        ///
1818        /// Kurtosis measures the "tailedness" of the probability distribution.
1819        /// Values are converted to f64 for calculation to maintain precision.
1820        ///
1821        /// # Arguments
1822        ///
1823        /// * `d` - The slice of integer values
1824        /// * `m` - Optional bitmask for null value handling
1825        /// * `null_count` - Optional count of null values
1826        /// * `adjust_sample_bias` - Whether to apply sample bias correction
1827        ///
1828        /// # Returns
1829        ///
1830        /// `Some(kurtosis)` if calculation is possible, `None` if insufficient data
1831        #[inline(always)]
1832        pub fn $kurt_fn(
1833            d: &[$ty],
1834            m: Option<&Bitmask>,
1835            null_count: Option<usize>,
1836            adjust_sample_bias: bool,
1837        ) -> Option<f64> {
1838            let vals: Vec<f64> = if has_nulls(null_count, m) {
1839                collect_valid(d, m).into_iter().map(|x| x as f64).collect()
1840            } else {
1841                d.iter().map(|&x| x as f64).collect()
1842            };
1843
1844            let (n, _m1, m2, _m3, m4) = moments4_scalar(vals);
1845            if n < 2 || m2 == 0.0 {
1846                return None;
1847            }
1848            let n_f = n as f64;
1849
1850            let pop_excess = n_f * m4 / (m2 * m2) - 3.0;
1851            if !adjust_sample_bias {
1852                return Some(pop_excess);
1853            }
1854            if n < 4 {
1855                return Some(pop_excess);
1856            }
1857
1858            let numerator = (n_f + 1.0) * n_f * (m4 / (m2 * m2)) - 3.0 * (n_f - 1.0);
1859            let denominator = (n_f - 2.0) * (n_f - 3.0);
1860            Some(numerator * (n_f - 1.0) / denominator)
1861        }
1862    };
1863}
1864
1865/// Computes the sum of squares (Σx²).
1866/// Kernel: SumP2 (L1-2)
1867/// Uses Neumaier compensated summation for numerical accuracy.
1868#[inline(always)]
1869pub fn sum_squares(v: &[f64], mask: Option<&Bitmask>, null_count: Option<usize>) -> f64 {
1870    let has_nulls = has_nulls(null_count, mask);
1871
1872    #[cfg(feature = "simd")]
1873    if is_simd_aligned(v) {
1874        type V<const L: usize> = Simd<f64, L>;
1875        type M<const L: usize> = <f64 as SimdElement>::Mask;
1876        const N: usize = W64;
1877        let len = v.len();
1878        let mut i = 0;
1879        let mut acc = 0.0f64;
1880        let mut comp = 0.0f64;
1881        if !has_nulls {
1882            while i + N <= len {
1883                let simd_v = V::<N>::from_slice(&v[i..i + N]);
1884                let chunk = (simd_v * simd_v).reduce_sum();
1885                neumaier_add(&mut acc, &mut comp, chunk);
1886                i += N;
1887            }
1888            for &x in &v[i..] {
1889                neumaier_add(&mut acc, &mut comp, x * x);
1890            }
1891        } else {
1892            let mb = mask.expect("sum_squares: mask must be Some when nulls are present");
1893            let mask_bytes = mb.as_bytes();
1894            while i + N <= len {
1895                let simd_v = V::<N>::from_slice(&v[i..i + N]);
1896                let lane_m: Mask<M<N>, N> = bitmask_to_simd_mask::<N, M<N>>(mask_bytes, i, len);
1897                let simd_v_masked = lane_m.select(simd_v, V::<N>::splat(0.0));
1898                let chunk = (simd_v_masked * simd_v_masked).reduce_sum();
1899                neumaier_add(&mut acc, &mut comp, chunk);
1900                i += N;
1901            }
1902            for j in i..len {
1903                if unsafe { mb.get_unchecked(j) } {
1904                    neumaier_add(&mut acc, &mut comp, v[j] * v[j]);
1905                }
1906            }
1907        }
1908        return acc + comp;
1909    }
1910
1911    // Scalar fallback - alignment check failed or no simd flag
1912    let mut acc = 0.0f64;
1913    let mut comp = 0.0f64;
1914    if !has_nulls {
1915        for &x in v {
1916            neumaier_add(&mut acc, &mut comp, x * x);
1917        }
1918    } else {
1919        let mb = mask.expect("sum_squares: mask must be Some when nulls are present");
1920        for (j, &x) in v.iter().enumerate() {
1921            if unsafe { mb.get_unchecked(j) } {
1922                neumaier_add(&mut acc, &mut comp, x * x);
1923            }
1924        }
1925    }
1926    acc + comp
1927}
1928
1929/// Computes the sum of cubes (Σx³).
1930/// Kernel: SumP3 (L1-3)
1931/// Uses Neumaier compensated summation for numerical accuracy.
1932#[inline(always)]
1933pub fn sum_cubes(v: &[f64], mask: Option<&Bitmask>, null_count: Option<usize>) -> f64 {
1934    let has_nulls = has_nulls(null_count, mask);
1935
1936    #[cfg(feature = "simd")]
1937    if is_simd_aligned(v) {
1938        type V<const L: usize> = Simd<f64, L>;
1939        type M<const L: usize> = <f64 as SimdElement>::Mask;
1940        const N: usize = W64;
1941        let len = v.len();
1942        let mut i = 0;
1943        let mut acc = 0.0f64;
1944        let mut comp = 0.0f64;
1945        let mut found_infinite = false;
1946
1947        if !has_nulls {
1948            while i + N <= len {
1949                let simd_v = V::<N>::from_slice(&v[i..i + N]);
1950                let simd_v2 = simd_v * simd_v;
1951                let simd_cubes = simd_v2 * simd_v;
1952                // SIMD horizontal check for infinities
1953                let infinite_mask = simd_cubes.is_infinite();
1954                if infinite_mask.any() {
1955                    found_infinite = true;
1956                }
1957                neumaier_add(&mut acc, &mut comp, simd_cubes.reduce_sum());
1958                i += N;
1959            }
1960            for &x in &v[i..] {
1961                let cube = x * x * x;
1962                if cube.is_infinite() {
1963                    found_infinite = true;
1964                }
1965                neumaier_add(&mut acc, &mut comp, cube);
1966            }
1967        } else {
1968            let mb = mask.expect("sum_cubes: mask must be Some when nulls are present");
1969            let mask_bytes = mb.as_bytes();
1970            while i + N <= len {
1971                let simd_v = V::<N>::from_slice(&v[i..i + N]);
1972                let lane_m: Mask<M<N>, N> = bitmask_to_simd_mask::<N, M<N>>(mask_bytes, i, len);
1973                let simd_v_masked = lane_m.select(simd_v, V::<N>::splat(0.0));
1974                let simd_v2 = simd_v_masked * simd_v_masked;
1975                let simd_cubes = simd_v2 * simd_v_masked;
1976                let infinite_mask = simd_cubes.is_infinite();
1977                if infinite_mask.any() {
1978                    found_infinite = true;
1979                }
1980                neumaier_add(&mut acc, &mut comp, simd_cubes.reduce_sum());
1981                i += N;
1982            }
1983            for j in i..len {
1984                if unsafe { mb.get_unchecked(j) } {
1985                    let cube = v[j] * v[j] * v[j];
1986                    if cube.is_infinite() {
1987                        found_infinite = true;
1988                    }
1989                    neumaier_add(&mut acc, &mut comp, cube);
1990                }
1991            }
1992        }
1993        let result = acc + comp;
1994        return if result.is_nan() && found_infinite {
1995            f64::INFINITY
1996        } else {
1997            result
1998        };
1999    }
2000
2001    // Scalar fallback - alignment check failed or no simd flag
2002    let mut acc = 0.0f64;
2003    let mut comp = 0.0f64;
2004    let mut found_infinite = false;
2005    if !has_nulls {
2006        for &x in v {
2007            let cube = x * x * x;
2008            if cube.is_infinite() {
2009                found_infinite = true;
2010            }
2011            neumaier_add(&mut acc, &mut comp, cube);
2012        }
2013    } else {
2014        let mb = mask.expect("sum_cubes: mask must be Some when nulls are present");
2015        for (j, &x) in v.iter().enumerate() {
2016            if unsafe { mb.get_unchecked(j) } {
2017                let cube = x * x * x;
2018                if cube.is_infinite() {
2019                    found_infinite = true;
2020                }
2021                neumaier_add(&mut acc, &mut comp, cube);
2022            }
2023        }
2024    }
2025    let result = acc + comp;
2026    if result.is_nan() && found_infinite {
2027        f64::INFINITY
2028    } else {
2029        result
2030    }
2031}
2032
2033/// Computes the sum of fourth powers (Σx⁴).
2034/// Kernel: SumP4 (L1-4)
2035/// Uses Neumaier compensated summation for numerical accuracy.
2036#[inline(always)]
2037pub fn sum_quartics(v: &[f64], mask: Option<&Bitmask>, null_count: Option<usize>) -> f64 {
2038    let has_nulls = has_nulls(null_count, mask);
2039
2040    #[cfg(feature = "simd")]
2041    if is_simd_aligned(v) {
2042        type V<const L: usize> = Simd<f64, L>;
2043        type M<const L: usize> = <f64 as SimdElement>::Mask;
2044        const N: usize = W64;
2045        let len = v.len();
2046        let mut i = 0;
2047        let mut acc = 0.0f64;
2048        let mut comp = 0.0f64;
2049        if !has_nulls {
2050            while i + N <= len {
2051                let simd_v = V::<N>::from_slice(&v[i..i + N]);
2052                let simd_v2 = simd_v * simd_v;
2053                let chunk = (simd_v2 * simd_v2).reduce_sum();
2054                neumaier_add(&mut acc, &mut comp, chunk);
2055                i += N;
2056            }
2057            for &x in &v[i..] {
2058                let x2 = x * x;
2059                neumaier_add(&mut acc, &mut comp, x2 * x2);
2060            }
2061        } else {
2062            let mb = mask.expect("sum_quartics: mask must be Some when nulls are present");
2063            let mask_bytes = mb.as_bytes();
2064            while i + N <= len {
2065                let simd_v = V::<N>::from_slice(&v[i..i + N]);
2066                let lane_m: Mask<M<N>, N> = bitmask_to_simd_mask::<N, M<N>>(mask_bytes, i, len);
2067                let simd_v_masked = lane_m.select(simd_v, V::<N>::splat(0.0));
2068                let simd_v2 = simd_v_masked * simd_v_masked;
2069                let chunk = (simd_v2 * simd_v2).reduce_sum();
2070                neumaier_add(&mut acc, &mut comp, chunk);
2071                i += N;
2072            }
2073            for j in i..len {
2074                if unsafe { mb.get_unchecked(j) } {
2075                    let x2 = v[j] * v[j];
2076                    neumaier_add(&mut acc, &mut comp, x2 * x2);
2077                }
2078            }
2079        }
2080        return acc + comp;
2081    }
2082
2083    // Scalar fallback - alignment check failed or no simd flag
2084    let mut acc = 0.0f64;
2085    let mut comp = 0.0f64;
2086    if !has_nulls {
2087        for &x in v {
2088            let x2 = x * x;
2089            neumaier_add(&mut acc, &mut comp, x2 * x2);
2090        }
2091    } else {
2092        let mb = mask.expect("sum_quartics: mask must be Some when nulls are present");
2093        for (j, &x) in v.iter().enumerate() {
2094            if unsafe { mb.get_unchecked(j) } {
2095                let x2 = x * x;
2096                neumaier_add(&mut acc, &mut comp, x2 * x2);
2097            }
2098        }
2099    }
2100    acc + comp
2101}
2102
2103// Macro impls per type
2104
2105impl_weighted_sum2_float!(weighted_sum2_f64, f64, W64);
2106impl_weighted_sum2_float!(weighted_sum2_f32, f32, W32);
2107
2108impl_pair_stats_float!(pair_stats_f64, f64, W64);
2109impl_pair_stats_float!(pair_stats_f32, f32, W32);
2110
2111impl_stat_moments4_float!(stat_moments4_f64, f64, W64);
2112impl_stat_moments4_float!(stat_moments4_f32, f32, W32);
2113
2114impl_stat_moments4_int!(stat_moments4_i64, i64, i128, W64);
2115impl_stat_moments4_int!(stat_moments4_u64, u64, u128, W64);
2116impl_stat_moments4_int!(stat_moments4_i32, i32, i128, W32);
2117impl_stat_moments4_int!(stat_moments4_u32, u32, u128, W32);
2118
2119impl_stat_moments_float!(stat_moments_f64, f64, W64);
2120impl_stat_moments_float!(stat_moments_f32, f32, W32);
2121impl_stat_moments_int!(stat_moments_i64, i64, i64, W64, i64);
2122impl_stat_moments_int!(stat_moments_u64, u64, u64, W64, i64);
2123impl_stat_moments_int!(stat_moments_i32, i32, i64, W32, i32);
2124impl_stat_moments_int!(stat_moments_u32, u32, u64, W32, i32);
2125impl_reduce_min_max!(reduce_min_max_i64, i64, W64, i64::MAX, i64::MIN);
2126impl_reduce_min_max!(reduce_min_max_u64, u64, W64, u64::MAX, u64::MIN);
2127impl_reduce_min_max!(reduce_min_max_i32, i32, W32, i32::MAX, i32::MIN);
2128impl_reduce_min_max!(reduce_min_max_u32, u32, W32, u32::MAX, u32::MIN);
2129impl_reduce_min_max_float!(reduce_min_max_f64, f64, W64, Simd, Mask);
2130impl_reduce_min_max_float!(reduce_min_max_f32, f32, W32, Simd, Mask);
2131
2132impl_min_max_range!(f64, reduce_min_max_f64, min_f64, max_f64, range_f64);
2133impl_min_max_range!(f32, reduce_min_max_f32, min_f32, max_f32, range_f32);
2134impl_min_max_range!(i64, reduce_min_max_i64, min_i64, max_i64, range_i64);
2135impl_min_max_range!(u64, reduce_min_max_u64, min_u64, max_u64, range_u64);
2136impl_min_max_range!(i32, reduce_min_max_i32, min_i32, max_i32, range_i32);
2137impl_min_max_range!(u32, reduce_min_max_u32, min_u32, max_u32, range_u32);
2138
2139impl_sum_avg!(float f64, sum_f64, average_f64, sum_f64_raw);
2140impl_sum_avg!(float f32, sum_f32, average_f32, sum_f32_raw);
2141
2142impl_sum_avg!(int  i64, sum_i64, average_i64, i64, sum_i64_raw);
2143impl_sum_avg!(int  u64, sum_u64, average_u64, u64, sum_u64_raw);
2144impl_sum_avg!(int  i32, sum_i32, average_i32, i64 , sum_i32_raw);
2145impl_sum_avg!(int  u32, sum_u32, average_u32, u64 , sum_u32_raw);
2146
2147impl_variance!(f64, variance_f64);
2148impl_variance!(f32, variance_f32);
2149impl_variance!(i64, variance_i64);
2150impl_variance!(u64, variance_u64);
2151impl_variance!(i32, variance_i32);
2152impl_variance!(u32, variance_u32);
2153
2154impl_sum_float!(sum_f64_raw, f64, W64);
2155impl_sum_float!(sum_f32_raw, f32, W32);
2156impl_sum_int!(sum_i64_raw, i64, i64, W64);
2157impl_sum_int!(sum_u64_raw, u64, u64, W64);
2158impl_sum_int!(sum_i32_raw, i32, i64, W32);
2159impl_sum_int!(sum_u32_raw, u32, u64, W32);
2160
2161impl_sum2_float!(sum2_f64_raw, f64, W64);
2162impl_sum2_float!(sum2_f32_raw, f32, W32);
2163
2164impl_count!(count_generic_raw);
2165
2166impl_skew_kurt_float!(f64, skewness_f64, kurtosis_f64);
2167impl_skew_kurt_float!(f32, skewness_f32, kurtosis_f32);
2168
2169impl_skew_kurt_int!(i64, skewness_i64, kurtosis_i64);
2170impl_skew_kurt_int!(u64, skewness_u64, kurtosis_u64);
2171impl_skew_kurt_int!(i32, skewness_i32, kurtosis_i32);
2172impl_skew_kurt_int!(u32, skewness_u32, kurtosis_u32);
2173
2174/// Computes median of a sorted/unsorted slice with optional nulls.
2175#[inline(always)]
2176pub fn median<T: Ord + Copy>(
2177    d: &[T],
2178    m: Option<&Bitmask>,
2179    null_count: Option<usize>,
2180    sort: bool,
2181) -> Option<T> {
2182    let has_nulls = match null_count {
2183        Some(n) => n > 0,
2184        None => m.is_some(),
2185    };
2186
2187    // collect valid values
2188    let mut v = if !has_nulls {
2189        Vec64::from_slice(d)
2190    } else {
2191        collect_valid(d, m)
2192    };
2193
2194    if v.is_empty() {
2195        return None;
2196    }
2197
2198    let len = v.len();
2199    let mid = len / 2;
2200
2201    if len % 2 == 1 {
2202        // odd: middle element
2203        if sort {
2204            let (_, nth, _) = v.select_nth_unstable(mid);
2205            Some(*nth)
2206        } else {
2207            Some(v[mid])
2208        }
2209    } else {
2210        // even: keep the *lower* median when sorted,
2211        // but for the unsorted case pick the element at index mid
2212        if sort {
2213            let (_, nth, _) = v.select_nth_unstable(mid - 1);
2214            Some(*nth)
2215        } else {
2216            Some(v[mid])
2217        }
2218    }
2219}
2220
2221/// Computes median for float types, with optional nulls and sorting.
2222#[inline(always)]
2223pub fn median_f<T: Float + Copy>(
2224    d: &[T],
2225    m: Option<&Bitmask>,
2226    null_count: Option<usize>,
2227    sort: bool,
2228) -> Option<T> {
2229    let has_nulls = match null_count {
2230        Some(n) => n > 0,
2231        None => m.is_some(),
2232    };
2233
2234    let mut v = if !has_nulls {
2235        Vec64::from_slice(d)
2236    } else {
2237        collect_valid(d, m)
2238    };
2239
2240    if v.is_empty() {
2241        return None;
2242    }
2243
2244    let len = v.len();
2245    let mid = len / 2;
2246
2247    if !sort {
2248        return Some(if len & 1 == 0 {
2249            (v[mid - 1] + v[mid]) / T::from(2.0).unwrap()
2250        } else {
2251            v[mid]
2252        });
2253    }
2254
2255    if len & 1 == 1 {
2256        let (_, nth, _) = v.select_nth_unstable_by(mid, total_cmp_f);
2257        Some(*nth)
2258    } else {
2259        let (_, nth_hi, _) = v.select_nth_unstable_by(mid, total_cmp_f);
2260        let hi = *nth_hi;
2261        let (_, nth_lo, _) = v.select_nth_unstable_by(mid - 1, total_cmp_f);
2262        let lo = *nth_lo;
2263        Some((lo + hi) / T::from(2.0).unwrap())
2264    }
2265}
2266/// Computes percentile for ordinal types, optionally sorted.
2267#[inline(always)]
2268pub fn percentile_ord<T: Ord + Copy>(
2269    d: &[T],
2270    m: Option<&Bitmask>,
2271    null_count: Option<usize>,
2272    p: f64,
2273    sort: bool,
2274) -> Option<T> {
2275    let has_nulls = match null_count {
2276        Some(n) => n > 0,
2277        None => m.is_some(),
2278    };
2279
2280    let mut v = if !has_nulls {
2281        Vec64::from_slice(d)
2282    } else {
2283        collect_valid(d, m)
2284    };
2285
2286    if v.is_empty() {
2287        return None;
2288    }
2289
2290    let idx = ((p / 100.0) * (v.len() as f64 - 1.0)).round() as usize;
2291
2292    if !sort {
2293        return v.get(idx).copied();
2294    }
2295
2296    let (_, nth, _) = v.select_nth_unstable(idx);
2297    Some(*nth)
2298}
2299
2300/// Computes percentile for float types, optionally sorted.
2301#[inline(always)]
2302pub fn percentile_f<T: Float + Copy>(
2303    d: &[T],
2304    m: Option<&Bitmask>,
2305    null_count: Option<usize>,
2306    p: f64,
2307    sort: bool,
2308) -> Option<T> {
2309    let has_nulls = match null_count {
2310        Some(n) => n > 0,
2311        None => m.is_some(),
2312    };
2313
2314    let mut v = if !has_nulls {
2315        Vec64::from_slice(d)
2316    } else {
2317        collect_valid(d, m)
2318    };
2319
2320    if v.is_empty() {
2321        return None;
2322    }
2323
2324    let idx = (p.clamp(0.0, 1.0) * (v.len() - 1) as f64).round() as usize;
2325
2326    if !sort {
2327        return v.get(idx).copied();
2328    }
2329
2330    let (_, nth, _) = v.select_nth_unstable_by(idx, total_cmp_f);
2331    Some(*nth)
2332}
2333
2334/// Computes `q` quantiles for ordinal types, optionally sorted.
2335#[inline(always)]
2336pub fn quantile<T: Ord + Copy>(
2337    d: &[T],
2338    m: Option<&Bitmask>,
2339    null_count: Option<usize>,
2340    q: usize,
2341    sort: bool,
2342) -> Option<Vec64<T>> {
2343    if q < 2 {
2344        return None;
2345    }
2346
2347    let has_nulls = match null_count {
2348        Some(n) => n > 0,
2349        None => m.is_some(),
2350    };
2351
2352    let mut v = if !has_nulls {
2353        Vec64::from_slice(d)
2354    } else {
2355        collect_valid(d, m)
2356    };
2357
2358    let n = v.len();
2359    if n == 0 {
2360        return None;
2361    }
2362
2363    if sort {
2364        v.sort_unstable();
2365    }
2366
2367    let mut out = Vec64::with_capacity(q - 1);
2368    unsafe {
2369        out.set_len(q - 1);
2370    }
2371
2372    for k in 1..q {
2373        let p = k as f64 / q as f64;
2374        let h = 1.0 + (n as f64 - 1.0) * p;
2375        let idx = h.floor() as usize;
2376        let idx = idx.saturating_sub(1).min(n - 1);
2377        unsafe {
2378            *out.get_unchecked_mut(k - 1) = v[idx];
2379        }
2380    }
2381
2382    Some(out)
2383}
2384
2385/// Computes `q` quantiles for float types, optionally sorted.
2386#[inline(always)]
2387pub fn quantile_f<T: Float + Copy>(
2388    d: &[T],
2389    m: Option<&Bitmask>,
2390    null_count: Option<usize>,
2391    q: usize,
2392    sort: bool,
2393) -> Option<Vec64<T>> {
2394    if q < 2 {
2395        return None;
2396    }
2397
2398    let has_nulls = match null_count {
2399        Some(n) => n > 0,
2400        None => m.is_some(),
2401    };
2402
2403    let mut v = if !has_nulls {
2404        Vec64::from_slice(d)
2405    } else {
2406        collect_valid(d, m)
2407    };
2408
2409    let n = v.len();
2410    if n == 0 {
2411        return None;
2412    }
2413
2414    if sort {
2415        sort_float(&mut v);
2416    }
2417
2418    let mut out = Vec64::with_capacity(q - 1);
2419    unsafe {
2420        out.set_len(q - 1);
2421    }
2422
2423    for k in 1..q {
2424        let p = k as f64 / q as f64;
2425        let h = 1.0 + (n as f64 - 1.0) * p;
2426        let hf = h.floor();
2427        let hc = h.ceil();
2428
2429        let idx_lo = (hf as usize).saturating_sub(1).min(n - 1);
2430        let idx_hi = (hc as usize).saturating_sub(1).min(n - 1);
2431
2432        let value = if idx_lo == idx_hi {
2433            v[idx_lo]
2434        } else {
2435            let weight = T::from(h - hf).unwrap();
2436            let v_lo = v[idx_lo];
2437            let v_hi = v[idx_hi];
2438            v_lo + (v_hi - v_lo) * weight
2439        };
2440
2441        unsafe {
2442            *out.get_unchecked_mut(k - 1) = value;
2443        }
2444    }
2445
2446    Some(out)
2447}
2448
2449/// Computes interquartile range for ordinal types.
2450#[inline(always)]
2451pub fn iqr<T>(d: &[T], m: Option<&Bitmask>, null_count: Option<usize>, s: bool) -> Option<T>
2452where
2453    T: Ord + Copy + Sub<Output = T>,
2454{
2455    Some(percentile_ord(d, m, null_count, 75.0, s)? - percentile_ord(d, m, null_count, 25.0, s)?)
2456}
2457
2458/// Computes interquartile range for float types.
2459#[inline(always)]
2460pub fn iqr_f<T: Float + Copy>(
2461    d: &[T],
2462    m: Option<&Bitmask>,
2463    null_count: Option<usize>,
2464    s: bool,
2465) -> Option<T> {
2466    Some(percentile_f(d, m, null_count, 0.75, s)? - percentile_f(d, m, null_count, 0.25, s)?)
2467}
2468
2469/// Computes mode (most frequent value) for ordinal types.
2470#[inline(always)]
2471pub fn mode<T: Eq + Hash + Copy>(
2472    d: &[T],
2473    m: Option<&Bitmask>,
2474    null_count: Option<usize>,
2475) -> Option<T> {
2476    let has_nulls = match null_count {
2477        Some(n) => n > 0,
2478        None => m.is_some(),
2479    };
2480
2481    #[cfg(feature = "fast_hash")]
2482    let mut f = AHashMap::with_capacity(d.len());
2483    #[cfg(not(feature = "fast_hash"))]
2484    let mut f = HashMap::with_capacity(d.len());
2485
2486    if !has_nulls {
2487        for &v in d {
2488            *f.entry(v).or_insert(0usize) += 1;
2489        }
2490    } else {
2491        let mb = m.expect("mode: nulls present but mask is None");
2492        for (i, &v) in d.iter().enumerate() {
2493            if unsafe { mb.get_unchecked(i) } {
2494                *f.entry(v).or_insert(0usize) += 1;
2495            }
2496        }
2497    }
2498
2499    f.into_iter().max_by_key(|e| e.1).map(|e| e.0)
2500}
2501
2502/// Computes mode for float types via bit-level equivalence.
2503#[inline(always)]
2504pub fn mode_f<T: ToBits + Copy>(
2505    d: &[T],
2506    m: Option<&Bitmask>,
2507    null_count: Option<usize>,
2508) -> Option<T> {
2509    let has_nulls = match null_count {
2510        Some(n) => n > 0,
2511        None => m.is_some(),
2512    };
2513
2514    #[cfg(feature = "fast_hash")]
2515    let mut f: AHashMap<T::Bits, (usize, T)> = AHashMap::with_capacity(d.len());
2516    #[cfg(not(feature = "fast_hash"))]
2517    let mut f: HashMap<T::Bits, (usize, T)> = HashMap::with_capacity(d.len());
2518
2519    if !has_nulls {
2520        for &v in d {
2521            let e = f.entry(v.to_bits()).or_insert((0, v));
2522            e.0 += 1;
2523        }
2524    } else {
2525        let mb = m.expect("mode_f: nulls present but mask is None");
2526        for (i, &v) in d.iter().enumerate() {
2527            if unsafe { mb.get_unchecked(i) } {
2528                let e = f.entry(v.to_bits()).or_insert((0, v));
2529                e.0 += 1;
2530            }
2531        }
2532    }
2533
2534    f.into_values().max_by_key(|x| x.0).map(|x| x.1)
2535}
2536
2537/// Counts distinct values in an ordinal slice, with optional nulls.
2538#[inline(always)]
2539pub fn count_distinct<T: Eq + Hash + Copy>(
2540    d: &[T],
2541    m: Option<&Bitmask>,
2542    null_count: Option<usize>,
2543) -> usize {
2544    let has_nulls = match null_count {
2545        Some(n) => n > 0,
2546        None => m.is_some(),
2547    };
2548
2549    if !has_nulls {
2550        d.iter().collect::<HashSet<_>>().len()
2551    } else {
2552        let mb = m.expect("count_distinct: nulls present but mask is None");
2553        d.iter()
2554            .enumerate()
2555            .filter(|(i, _)| unsafe { mb.get_unchecked(*i) })
2556            .map(|(_, v)| v)
2557            .collect::<HashSet<_>>()
2558            .len()
2559    }
2560}
2561
2562/// Counts distinct values in a float slice using bitwise equality.
2563#[inline(always)]
2564pub fn count_distinct_f<T: Float + ToBits + Copy>(
2565    d: &[T],
2566    m: Option<&Bitmask>,
2567    null_count: Option<usize>,
2568) -> usize {
2569    let has_nulls = match null_count {
2570        Some(n) => n > 0,
2571        None => m.is_some(),
2572    };
2573
2574    if !has_nulls {
2575        d.iter().map(|v| v.to_bits()).collect::<HashSet<_>>().len()
2576    } else {
2577        let mb = m.expect("count_distinct_f: nulls present but mask is None");
2578        d.iter()
2579            .enumerate()
2580            .filter(|(i, _)| unsafe { mb.get_unchecked(*i) })
2581            .map(|(_, v)| v.to_bits())
2582            .collect::<HashSet<_>>()
2583            .len()
2584    }
2585}
2586
2587/// Computes harmonic mean for integer types.
2588#[inline(always)]
2589pub fn harmonic_mean_int<T: NumCast + Copy + PartialOrd>(
2590    d: &[T],
2591    m: Option<&Bitmask>,
2592    null_count: Option<usize>,
2593) -> f64 {
2594    let has_nulls = match null_count {
2595        Some(n) => n > 0,
2596        None => m.is_some(),
2597    };
2598
2599    let v: Vec<f64> = if !has_nulls {
2600        d.iter().map(|&x| NumCast::from(x).unwrap()).collect()
2601    } else {
2602        collect_valid(d, m)
2603            .into_iter()
2604            .map(|x| NumCast::from(x).unwrap())
2605            .collect()
2606    };
2607
2608    if v.is_empty() {
2609        panic!("harmonic_mean_int: input data is empty");
2610    }
2611    if v.iter().any(|&x| x <= 0.0) {
2612        panic!("harmonic_mean_int: non-positive values are invalid");
2613    }
2614
2615    let n = v.len() as f64;
2616    let denom: f64 = v.iter().map(|&x| 1.0 / x).sum();
2617
2618    if denom == 0.0 {
2619        panic!("harmonic_mean_int: denominator is zero");
2620    }
2621
2622    n / denom
2623}
2624
2625/// Computes harmonic mean for unsigned integers.
2626#[inline(always)]
2627pub fn harmonic_mean_uint<T: NumCast + Copy + PartialOrd>(
2628    d: &[T],
2629    m: Option<&Bitmask>,
2630    null_count: Option<usize>,
2631) -> f64 {
2632    harmonic_mean_int(d, m, null_count)
2633}
2634
2635/// Computes harmonic mean for float types.
2636#[inline(always)]
2637pub fn harmonic_mean_f<T: Float + Into<f64> + Copy>(
2638    d: &[T],
2639    m: Option<&Bitmask>,
2640    null_count: Option<usize>,
2641) -> f64 {
2642    let has_nulls = match null_count {
2643        Some(n) => n > 0,
2644        None => m.is_some(),
2645    };
2646
2647    let (mut s, mut n) = (0.0, 0usize);
2648
2649    if !has_nulls {
2650        for &v in d {
2651            let x = v.into();
2652            if x <= 0.0 {
2653                panic!("harmonic_mean_f: non-positive values are invalid");
2654            }
2655            s += 1.0 / x;
2656            n += 1;
2657        }
2658    } else {
2659        let mb = m.expect("harmonic_mean_f: nulls present but mask is None");
2660        for (i, &v) in d.iter().enumerate() {
2661            if unsafe { mb.get_unchecked(i) } {
2662                let x = v.into();
2663                if x <= 0.0 {
2664                    panic!("harmonic_mean_f: non-positive values are invalid");
2665                }
2666                s += 1.0 / x;
2667                n += 1;
2668            }
2669        }
2670    }
2671
2672    if n == 0 {
2673        panic!("harmonic_mean_f: input data is empty");
2674    }
2675    if s == 0.0 {
2676        panic!("harmonic_mean_f: denominator is zero");
2677    }
2678
2679    n as f64 / s
2680}
2681
2682/// Computes geometric mean for integer types.
2683#[inline(always)]
2684pub fn geometric_mean_int<T: ToPrimitive + Copy>(
2685    d: &[T],
2686    m: Option<&Bitmask>,
2687    null_count: Option<usize>,
2688) -> f64 {
2689    let has_nulls = match null_count {
2690        Some(n) => n > 0,
2691        None => m.is_some(),
2692    };
2693
2694    let (mut s, mut n) = (0.0, 0usize);
2695    if !has_nulls {
2696        for &v in d.iter() {
2697            let val = v.to_f64().unwrap();
2698            if val <= 0.0 {
2699                panic!("geometric_mean_int: non-positive values are invalid");
2700            }
2701            s += val.ln();
2702            n += 1;
2703        }
2704    } else {
2705        let mb = m.expect("geometric_mean_int: nulls present but mask is None");
2706        for (i, &v) in d.iter().enumerate() {
2707            if unsafe { mb.get_unchecked(i) } {
2708                let val = v.to_f64().unwrap();
2709                if val <= 0.0 {
2710                    panic!("geometric_mean_int: non-positive values are invalid");
2711                }
2712                s += val.ln();
2713                n += 1;
2714            }
2715        }
2716    }
2717
2718    if n == 0 {
2719        panic!("geometric_mean_int: input data is empty");
2720    }
2721
2722    (s / n as f64).exp()
2723}
2724
2725/// Computes geometric mean for unsigned integer types.
2726#[inline(always)]
2727pub fn geometric_mean_uint<T: ToPrimitive + Copy>(
2728    d: &[T],
2729    m: Option<&Bitmask>,
2730    null_count: Option<usize>,
2731) -> f64 {
2732    geometric_mean_int(d, m, null_count)
2733}
2734
2735/// Computes geometric mean for float types.
2736#[inline(always)]
2737pub fn geometric_mean_f<T: Float + Into<f64> + Copy>(
2738    d: &[T],
2739    m: Option<&Bitmask>,
2740    null_count: Option<usize>,
2741) -> f64 {
2742    let has_nulls = match null_count {
2743        Some(n) => n > 0,
2744        None => m.is_some(),
2745    };
2746
2747    let (mut s, mut n) = (0.0, 0usize);
2748    if !has_nulls {
2749        for &v in d.iter() {
2750            let val = v.into();
2751            if val <= 0.0 {
2752                panic!("geometric_mean_f: non-positive values are invalid");
2753            }
2754            s += val.ln();
2755            n += 1;
2756        }
2757    } else {
2758        let mb = m.expect("geometric_mean_f: nulls present but mask is None");
2759        for (i, &v) in d.iter().enumerate() {
2760            if unsafe { mb.get_unchecked(i) } {
2761                let val = v.into();
2762                if val <= 0.0 {
2763                    panic!("geometric_mean_f: non-positive values are invalid");
2764                }
2765                s += val.ln();
2766                n += 1;
2767            }
2768        }
2769    }
2770
2771    if n == 0 {
2772        panic!("geometric_mean_f: input data is empty");
2773    }
2774
2775    (s / n as f64).exp()
2776}
2777
2778/// Returns the accumulating product of all values
2779#[inline(always)]
2780pub fn agg_product<T: Copy + One + Mul<Output = T> + Zero + PartialEq + 'static>(
2781    data: &[T],
2782    mask: Option<&Bitmask>,
2783    offset: usize,
2784    len: usize,
2785) -> T {
2786    let mut prod = T::one();
2787    match mask {
2788        Some(mask) => {
2789            let mask = mask.slice_clone(offset, len);
2790            for (i, &x) in data[offset..offset + len].iter().enumerate() {
2791                if unsafe { mask.get_unchecked(i) } {
2792                    prod = prod * x;
2793                    if prod == T::zero() {
2794                        break;
2795                    }
2796                }
2797            }
2798        }
2799        None => {
2800            for &x in &data[offset..offset + len] {
2801                prod = prod * x;
2802                if prod == T::zero() {
2803                    break;
2804                }
2805            }
2806        }
2807    }
2808    prod
2809}
2810
2811#[cfg(test)]
2812mod tests {
2813    use minarrow::{Vec64, vec64};
2814    use num_traits::Float;
2815
2816    use super::*;
2817
2818    // Build Arrow-style validity mask from bools (least significant bit is index 0)
2819    fn mask_from_bools(bits: &[bool]) -> Bitmask {
2820        Bitmask::from_bools(bits)
2821    }
2822
2823    // Compare floats robustly (for variance etc)
2824    fn approx_eq(a: f64, b: f64, eps: f64) -> bool {
2825        (a - b).abs() <= eps
2826    }
2827
2828    #[test]
2829    fn test_stat_moments_f64_dense() {
2830        let data = vec64![1.0, 2.0, 3.0, 4.0];
2831        let (sum, sum2, count) = stat_moments_f64(&data, None, None);
2832        assert_eq!(sum, 10.0);
2833        assert_eq!(sum2, 30.0);
2834        assert_eq!(count, 4);
2835    }
2836
2837    #[test]
2838    fn test_stat_moments_f64_masked() {
2839        let data = vec64![1.0, 2.0, 3.0, 4.0];
2840        let mask = mask_from_bools(&[true, false, true, false]);
2841        let (sum, sum2, count) = stat_moments_f64(&data, Some(&mask), Some(2));
2842        assert_eq!(sum, 4.0);
2843        assert_eq!(sum2, 10.0);
2844        assert_eq!(count, 2);
2845    }
2846
2847    #[test]
2848    fn test_stat_moments_i32_dense() {
2849        let data = vec64![1i32, 2, 3, 4];
2850        let (sum, sum2, count) = stat_moments_i32(&data, None, None);
2851        assert_eq!(sum, 10.0);
2852        assert_eq!(sum2, 30.0);
2853        assert_eq!(count, 4);
2854    }
2855
2856    #[test]
2857    fn test_stat_moments_i32_masked() {
2858        let data = vec64![1i32, 2, 3, 4];
2859        let mask = mask_from_bools(&[true, false, false, true]);
2860        let (sum, sum2, count) = stat_moments_i32(&data, Some(&mask), Some(2));
2861        assert_eq!(sum, 5.0);
2862        assert_eq!(sum2, 17.0);
2863        assert_eq!(count, 2);
2864    }
2865
2866    #[test]
2867    fn test_min_max_range_f64_dense() {
2868        let data = vec64![5.0, 3.0, 1.0, 7.0];
2869        assert_eq!(min_f64(&data, None, None), Some(1.0));
2870        assert_eq!(max_f64(&data, None, None), Some(7.0));
2871        assert_eq!(range_f64(&data, None, None), Some((1.0, 7.0)));
2872    }
2873
2874    #[test]
2875    fn test_min_max_range_f64_masked() {
2876        let data = vec64![5.0, 3.0, 1.0, 7.0];
2877        let mask = mask_from_bools(&[false, true, true, false]);
2878        assert_eq!(min_f64(&data, Some(&mask), Some(2)), Some(1.0));
2879        assert_eq!(max_f64(&data, Some(&mask), Some(2)), Some(3.0));
2880        assert_eq!(range_f64(&data, Some(&mask), Some(2)), Some((1.0, 3.0)));
2881    }
2882
2883    #[test]
2884    fn test_sum_and_avg_i32() {
2885        let data = vec64![1i32, 2, 3, 4];
2886        assert_eq!(sum_i32(&data, None, None), Some(10));
2887        assert_eq!(average_i32(&data, None, None), Some(2.5));
2888        let mask = mask_from_bools(&[true, false, true, true]);
2889        assert_eq!(sum_i32(&data, Some(&mask), Some(1)), Some(8));
2890        assert_eq!(average_i32(&data, Some(&mask), Some(1)), Some(8.0 / 3.0));
2891    }
2892
2893    #[test]
2894    fn test_sum_and_avg_f32() {
2895        let data = vec64![1.0f32, 2.0, 3.0, 4.0];
2896        assert_eq!(sum_f32(&data, None, None), Some(10.0));
2897        assert_eq!(average_f32(&data, None, None), Some(2.5));
2898        let mask = mask_from_bools(&[false, true, true, false]);
2899        assert_eq!(sum_f32(&data, Some(&mask), Some(2)), Some(5.0));
2900        assert_eq!(average_f32(&data, Some(&mask), Some(2)), Some(2.5));
2901    }
2902
2903    #[test]
2904    fn test_variance_f64() {
2905        let data = vec64![1.0, 2.0, 3.0, 4.0];
2906        let v = variance_f64(&data, None, None, false);
2907        assert!(approx_eq(v.unwrap(), 1.25, 1e-9));
2908        let mask = mask_from_bools(&[true, true, false, false]);
2909        let v2 = variance_f64(&data, Some(&mask), Some(2), false);
2910        assert!(approx_eq(v2.unwrap(), 0.25, 1e-9));
2911    }
2912
2913    #[test]
2914    fn test_count_generic_raw() {
2915        let n = 100;
2916        let mask = mask_from_bools(&[true; 100]);
2917        assert_eq!(count_generic_raw(n, None, None), 100);
2918        assert_eq!(count_generic_raw(n, Some(&mask), Some(0)), 100);
2919
2920        let mask = mask_from_bools(&(0..n).map(|i| i % 2 == 0).collect::<Vec<_>>());
2921        assert_eq!(count_generic_raw(n, Some(&mask), Some(50)), 50);
2922    }
2923
2924    #[test]
2925    fn test_median_ord_sorted() {
2926        let data = vec64![1i32, 4, 2, 3];
2927        assert_eq!(median(&data, None, None, true), Some(2));
2928    }
2929
2930    #[test]
2931    fn test_median_ord_unsorted() {
2932        let data = vec64![1i32, 4, 2, 3];
2933        // unsorted, even-length ⇒ index mid = 2 ⇒ value 2
2934        assert_eq!(median(&data, None, None, false), Some(2));
2935    }
2936
2937    #[test]
2938    fn test_median_ord_masked() {
2939        let data = vec64![1i32, 4, 2, 3];
2940        let mask = mask_from_bools(&[true, true, false, false]);
2941        assert_eq!(median(&data, Some(&mask), Some(2), true), Some(1));
2942    }
2943
2944    #[test]
2945    fn test_median_f_sorted() {
2946        let dataf = vec64![1.0f64, 2.0, 3.0, 4.0];
2947        assert_eq!(median_f(&dataf, None, None, true), Some(2.5));
2948    }
2949
2950    #[test]
2951    fn test_median_f_masked() {
2952        let dataf = vec64![1.0f64, 2.0, 3.0, 4.0];
2953        let mask = mask_from_bools(&[true, false, true, false]);
2954        assert_eq!(median_f(&dataf, Some(&mask), Some(2), true), Some(2.0));
2955    }
2956
2957    #[test]
2958    fn test_percentile_ord_and_f() {
2959        let data = vec64![10i32, 20, 30, 40, 50];
2960        assert_eq!(percentile_ord(&data, None, None, 0.0, true), Some(10));
2961        assert_eq!(percentile_ord(&data, None, None, 100.0, true), Some(50));
2962        assert_eq!(percentile_ord(&data, None, None, 50.0, true), Some(30));
2963        let mask = mask_from_bools(&[false, true, true, false, true]);
2964        assert_eq!(
2965            percentile_ord(&data, Some(&mask), Some(2), 50.0, true),
2966            Some(30)
2967        );
2968
2969        let dataf = vec64![1.0f64, 2.0, 3.0, 4.0, 5.0];
2970        assert_eq!(percentile_f(&dataf, None, None, 0.0, true), Some(1.0));
2971        assert_eq!(percentile_f(&dataf, None, None, 1.0, true), Some(5.0));
2972        assert_eq!(percentile_f(&dataf, None, None, 0.5, true), Some(3.0));
2973    }
2974
2975    #[test]
2976    fn test_quantile_ord_and_f() {
2977        let data = vec64![1i32, 2, 3, 4, 5];
2978        let out = quantile(&data, None, None, 4, true).unwrap();
2979        assert_eq!(out.len(), 3);
2980        assert_eq!(out, Vec64::from_slice(&[2, 3, 4]));
2981
2982        let mask = mask_from_bools(&[true, false, true, true, false]);
2983        let out2 = quantile(&data, Some(&mask), Some(2), 3, true).unwrap();
2984        assert_eq!(out2, Vec64::from_slice(&[1, 3]));
2985
2986        let dataf = vec64![10.0f64, 20.0, 30.0, 40.0, 50.0];
2987        let out = quantile_f(&dataf, None, None, 4, true).unwrap();
2988        assert_eq!(out.len(), 3);
2989        assert!(approx_eq(out[1], 30.0, 1e-9));
2990    }
2991
2992    #[test]
2993    fn test_iqr_ord() {
2994        let data = vec64![1i32, 2, 3, 4, 5];
2995        assert_eq!(iqr(&data, None, None, true), Some(2));
2996    }
2997
2998    #[test]
2999    fn test_iqr_f64() {
3000        let dataf = vec64![10.0f64, 20.0, 30.0, 40.0, 50.0];
3001        // p*(n−1) rounding definition, Q3=40, Q1=20 -> IQR=20
3002        assert_eq!(iqr_f(&dataf, None, None, true), Some(20.0));
3003    }
3004    #[test]
3005    fn test_mode_ord_and_f() {
3006        let data = vec64![1i32, 2, 3, 2, 2, 4, 5, 1];
3007        assert_eq!(mode(&data, None, None), Some(2));
3008        let dataf = vec64![1.0f32, 2.0, 2.0, 3.0, 3.0, 3.0];
3009        assert_eq!(mode_f(&dataf, None, None), Some(3.0));
3010        let data = vec64![1i32, 2, 3, 3, 2, 1];
3011        let m = mode(&data, None, None);
3012        assert!(m == Some(1) || m == Some(2) || m == Some(3));
3013    }
3014
3015    #[test]
3016    fn test_count_distinct_and_f() {
3017        let data = vec64![1i32, 2, 2, 3, 3, 4];
3018        assert_eq!(count_distinct(&data, None, None), 4);
3019        let dataf = vec64![1.0f32, 2.0, 2.0, 3.0, 3.0, 4.0];
3020        assert_eq!(count_distinct_f(&dataf, None, None), 4);
3021    }
3022
3023    #[test]
3024    fn test_harmonic_and_geometric_mean() {
3025        let data = vec64![1.0f64, 2.0, 4.0];
3026        let hm = harmonic_mean_f(&data, None, None);
3027        assert!(approx_eq(hm, 1.714285714, 1e-6));
3028
3029        let gm = geometric_mean_f(&data, None, None);
3030        assert!(approx_eq(gm, 2.0, 1e-6));
3031
3032        let data = vec64![1i32, 2, 4];
3033        let hm = harmonic_mean_int(&data, None, None);
3034        assert!(approx_eq(hm, 1.714285714, 1e-6));
3035
3036        let gm = geometric_mean_int(&data, None, None);
3037        assert!(approx_eq(gm, 2.0, 1e-6));
3038    }
3039
3040    #[test]
3041    fn test_empty_arrays() {
3042        assert_eq!(stat_moments_f64(&[], None, None), (0.0, 0.0, 0));
3043        assert_eq!(stat_moments_i32(&[], None, None), (0.0, 0.0, 0));
3044        assert_eq!(sum_f64(&[], None, None), None);
3045        assert_eq!(average_i32(&[], None, None), None);
3046        assert_eq!(variance_f64(&[], None, None, false), None);
3047        assert_eq!(median::<i32>(&[], None, None, true), None);
3048        assert_eq!(median_f::<f64>(&[], None, None, true), None);
3049        assert_eq!(mode::<i32>(&[], None, None), None);
3050        assert_eq!(mode_f::<f32>(&[], None, None), None);
3051        assert_eq!(quantile::<i32>(&[], None, None, 3, true), None);
3052        assert_eq!(quantile_f::<f64>(&[], None, None, 3, true), None);
3053        assert_eq!(iqr::<i32>(&[], None, None, true), None);
3054        assert_eq!(iqr_f::<f64>(&[], None, None, true), None);
3055        assert_eq!(count_distinct::<i32>(&[], None, None), 0);
3056        assert_eq!(count_distinct_f::<f32>(&[], None, None), 0);
3057    }
3058
3059    // --- i64, u64, f32, u32: moments and basic stats (dense/masked) ---
3060    #[test]
3061    fn test_stat_moments_all_types() {
3062        let i64_data = vec64![-8i64, 2, 8, -2, 3];
3063        let mask = mask_from_bools(&[true, false, true, true, true]);
3064        let (sum, sum2, count) = stat_moments_i64(&i64_data, Some(&mask), Some(1));
3065        assert_eq!(sum, (-8.0) + 8.0 + (-2.0) + 3.0);
3066        assert_eq!(sum2, 64.0 + 64.0 + 4.0 + 9.0);
3067        assert_eq!(count, 4);
3068
3069        let u64_data = vec64![1u64, 2, 3, 4, 5];
3070        let mask = mask_from_bools(&[false, true, true, true, false]);
3071        let (sum, sum2, count) = stat_moments_u64(&u64_data, Some(&mask), Some(2));
3072        assert_eq!(sum, 2.0 + 3.0 + 4.0);
3073        assert_eq!(sum2, 4.0 + 9.0 + 16.0);
3074        assert_eq!(count, 3);
3075
3076        let f32_data = vec64![0.5f32, 2.0, 4.5, -2.0, 3.0];
3077        let mask = mask_from_bools(&[true, true, false, false, true]);
3078        let (sum, sum2, count) = stat_moments_f32(&f32_data, Some(&mask), Some(2));
3079        assert!(approx_eq(sum, 0.5 + 2.0 + 3.0, 1e-6));
3080        assert!(approx_eq(sum2, 0.25 + 4.0 + 9.0, 1e-6));
3081        assert_eq!(count, 3);
3082
3083        let u32_data = vec64![100u32, 200, 300, 400, 500];
3084        let mask = mask_from_bools(&[true, false, false, true, true]);
3085        let (sum, sum2, count) = stat_moments_u32(&u32_data, Some(&mask), Some(2));
3086        assert_eq!(sum, 100.0 + 400.0 + 500.0);
3087        assert_eq!(sum2, 10000.0 + 160000.0 + 250000.0);
3088        assert_eq!(count, 3);
3089    }
3090
3091    #[test]
3092    fn test_reduce_min_max_for_all_types() {
3093        let i64_data = vec64![-8i64, 2, 8, -2, 3];
3094        assert_eq!(reduce_min_max_i64(&i64_data, None, None), Some((-8, 8)));
3095        let mask = mask_from_bools(&[true, false, false, false, true]);
3096        assert_eq!(
3097            reduce_min_max_i64(&i64_data, Some(&mask), Some(3)),
3098            Some((-8, 3))
3099        );
3100
3101        let u64_data = vec64![1u64, 2, 3, 4, 5];
3102        assert_eq!(reduce_min_max_u64(&u64_data, None, None), Some((1, 5)));
3103        let mask = mask_from_bools(&[false, false, true, true, false]);
3104        assert_eq!(
3105            reduce_min_max_u64(&u64_data, Some(&mask), Some(3)),
3106            Some((3, 4))
3107        );
3108
3109        let i32_data = vec64![9i32, -2, 5, -2];
3110        assert_eq!(reduce_min_max_i32(&i32_data, None, None), Some((-2, 9)));
3111        let mask = mask_from_bools(&[false, false, true, false]);
3112        assert_eq!(
3113            reduce_min_max_i32(&i32_data, Some(&mask), Some(3)),
3114            Some((5, 5))
3115        );
3116
3117        let u32_data = vec64![7u32, 2, 5, 11];
3118        assert_eq!(reduce_min_max_u32(&u32_data, None, None), Some((2, 11)));
3119        let mask = mask_from_bools(&[true, false, false, true]);
3120        assert_eq!(
3121            reduce_min_max_u32(&u32_data, Some(&mask), Some(2)),
3122            Some((7, 11))
3123        );
3124
3125        let f64_data = vec64![f64::NAN, 3.0, 1.0, 7.0];
3126        assert_eq!(reduce_min_max_f64(&f64_data, None, None), Some((1.0, 7.0))); // NaN ignored
3127    }
3128
3129    #[test]
3130    fn test_empty_and_all_null_min_max() {
3131        let empty: [i32; 0] = [];
3132        assert_eq!(reduce_min_max_i32(&empty, None, None), None);
3133
3134        let all_nulls = vec64![5i32, 10, 15];
3135        let mask = mask_from_bools(&[false, false, false]);
3136        assert_eq!(reduce_min_max_i32(&all_nulls, Some(&mask), Some(3)), None);
3137    }
3138
3139    // --- sum/avg: all types, empty and all-null ---
3140    #[test]
3141    fn test_sum_avg_and_variance_all_types() {
3142        // u32
3143        let data = vec64![100u32, 200, 0, 400];
3144        assert_eq!(sum_u32(&data, None, None), Some(700));
3145        assert_eq!(average_u32(&data, None, None), Some(175.0));
3146        let mask = mask_from_bools(&[false, true, false, true]);
3147        assert_eq!(sum_u32(&data, Some(&mask), Some(2)), Some(600));
3148        assert_eq!(average_u32(&data, Some(&mask), Some(2)), Some(300.0));
3149        assert_eq!(
3150            variance_u32(&data, None, None, false).unwrap(),
3151            ((100f64.powi(2) + 200f64.powi(2) + 400f64.powi(2)) / 4.0 - (700.0 / 4.0).powi(2))
3152        );
3153
3154        // i64
3155        let data = vec64![5i64, -10, 25, 5];
3156        assert_eq!(sum_i64(&data, None, None), Some(25));
3157        assert_eq!(average_i64(&data, None, None), Some(6.25));
3158        let mask = mask_from_bools(&[true, false, false, false]);
3159        assert_eq!(sum_i64(&data, Some(&mask), Some(3)), Some(5));
3160        assert_eq!(average_i64(&data, Some(&mask), Some(3)), Some(5.0));
3161        assert!(variance_i64(&data, None, None, false).unwrap() > 0.0);
3162
3163        // u64
3164        let data = vec64![10u64, 0, 10, 10];
3165        assert_eq!(sum_u64(&data, None, None), Some(30));
3166        assert_eq!(average_u64(&data, None, None), Some(7.5));
3167        let mask = mask_from_bools(&[false, false, false, false]);
3168        assert_eq!(sum_u64(&data, Some(&mask), Some(4)), None);
3169        assert_eq!(average_u64(&data, Some(&mask), Some(4)), None);
3170
3171        // f32
3172        let data = vec64![2.0f32, 4.0, 6.0];
3173        assert_eq!(sum_f32(&data, None, None), Some(12.0));
3174        assert_eq!(average_f32(&data, None, None), Some(4.0));
3175        let mask = mask_from_bools(&[true, false, true]);
3176        assert_eq!(sum_f32(&data, Some(&mask), Some(1)), Some(8.0));
3177        assert_eq!(average_f32(&data, Some(&mask), Some(1)), Some(4.0));
3178        assert!(variance_f32(&data, None, None, false).unwrap() > 0.0);
3179
3180        // f64
3181        let data = vec64![1.0f64, f64::INFINITY, f64::NAN];
3182        assert!(
3183            sum_f64(&data, None, None).unwrap().is_nan()
3184                || sum_f64(&data, None, None).unwrap().is_infinite()
3185        );
3186    }
3187
3188    // --- min/max/range: singleton, duplicates, negative, NaN, INF ---
3189    #[test]
3190    fn test_min_max_range_i32_singleton() {
3191        let d = vec64![1i32];
3192        assert_eq!(min_i32(&d, None, None), Some(1));
3193        assert_eq!(max_i32(&d, None, None), Some(1));
3194        assert_eq!(range_i32(&d, None, None), Some((1, 1)));
3195    }
3196
3197    #[test]
3198    fn test_min_f64_nan() {
3199        let d = vec64![5.0f64, f64::NAN];
3200        // we ignore NaNs, so the min of [5.0, NaN] is 5.0
3201        assert_eq!(min_f64(&d, None, None), Some(5.0));
3202    }
3203
3204    #[test]
3205    fn test_max_f64_nan() {
3206        let d = vec64![5.0f64, f64::NAN];
3207        // we ignore NaNs, so the max of [5.0, NaN] is 5.0
3208        assert_eq!(max_f64(&d, None, None), Some(5.0));
3209    }
3210
3211    #[test]
3212    fn test_min_f32_neg_inf() {
3213        let d = vec64![f32::NEG_INFINITY, 0.0f32, 5.0f32];
3214        assert_eq!(min_f32(&d, None, None), Some(f32::NEG_INFINITY));
3215    }
3216
3217    #[test]
3218    fn test_max_f32_regular() {
3219        let d = vec64![f32::NEG_INFINITY, 0.0f32, 5.0f32];
3220        assert_eq!(max_f32(&d, None, None), Some(5.0));
3221    }
3222
3223    #[test]
3224    fn test_median_ord_even() {
3225        let d = vec64![2i64, 4, 6, 8];
3226        // sorted ⇒ [2,4,6,8], lower median is 4
3227        // Keep integers as integers - with users able
3228        // to cast to floats as needed
3229        assert_eq!(median(&d, None, None, true), Some(4));
3230    }
3231
3232    #[test]
3233    fn test_median_ord_odd() {
3234        let d = vec64![7i32, 3, 5];
3235        assert_eq!(median(&d, None, None, true), Some(5));
3236    }
3237
3238    #[test]
3239    fn test_median_ord_with_mask() {
3240        let d = vec64![7i32, 3, 5];
3241        let mask = mask_from_bools(&[false, true, false]);
3242        assert_eq!(median(&d, Some(&mask), Some(2), true), Some(3));
3243    }
3244
3245    #[test]
3246    fn test_quantile_ord() {
3247        let d = vec64![10u32, 20, 30, 40, 50, 60];
3248        let q = quantile(&d, None, None, 3, true).unwrap();
3249        assert_eq!(q.len(), 2);
3250        assert!(q[0] >= 20 && q[1] >= 40);
3251    }
3252
3253    #[test]
3254    fn test_median_f64() {
3255        let df = vec64![1.0f64, 5.0, 3.0, 2.0];
3256        assert!(approx_eq(
3257            median_f(&df, None, None, true).unwrap(),
3258            2.5,
3259            1e-9
3260        ));
3261    }
3262
3263    #[test]
3264    fn test_quantile_f64() {
3265        let df = vec64![1.0f64, 5.0, 3.0, 2.0];
3266        let qf = quantile_f(&df, None, None, 3, true).unwrap();
3267        // we produce exactly q−1 = 2 interior tertile values
3268        assert_eq!(qf.len(), 2);
3269        assert!(qf.iter().all(|&v| v >= 1.0 && v <= 5.0));
3270    }
3271
3272    #[test]
3273    fn test_iqr_i32() {
3274        let d = vec64![1i32, 3, 2, 8, 4, 5];
3275        assert!(iqr(&d, None, None, true).unwrap() >= 2);
3276    }
3277
3278    #[test]
3279    fn test_iqr_f32() {
3280        let d = vec64![10.0f32, 100.0, 30.0, 50.0];
3281        assert!(iqr_f(&d, None, None, true).unwrap() > 0.0);
3282    }
3283
3284    // --- mode & count_distinct: ties, all unique, all equal, NaN/Inf ---
3285    #[test]
3286    fn test_mode_and_count_distinct_exhaustive() {
3287        // Mode: all unique
3288        let d = vec64![7u64, 5, 3, 1];
3289        assert!(d.contains(&mode(&d, None, None).unwrap()));
3290
3291        // Mode: all equal
3292        let d = vec64![2i64; 6];
3293        assert_eq!(mode(&d, None, None), Some(2));
3294
3295        // Mode: tie
3296        let d = vec64![3i32, 5, 3, 5];
3297        let m = mode(&d, None, None);
3298        assert!(m == Some(3) || m == Some(5));
3299
3300        // Mode: NaN in floats, inf, repeated
3301        let d = vec64![f32::NAN, f32::INFINITY, 5.0, 5.0, f32::NAN];
3302        let m = mode_f(&d, None, None);
3303        assert!(m == Some(5.0) || m.unwrap().is_nan() || m.unwrap().is_infinite());
3304
3305        // Count distinct: all equal, all unique, mask
3306        let d = vec64![1u64, 1, 1, 1];
3307        assert_eq!(count_distinct(&d, None, None), 1);
3308
3309        let d = vec64![1i32, 2, 3, 4, 5];
3310        assert_eq!(count_distinct(&d, None, None), 5);
3311
3312        let mask = mask_from_bools(&[true, false, false, true, true]);
3313        assert_eq!(count_distinct(&d, Some(&mask), Some(2)), 3);
3314
3315        let d = vec64![f64::NAN, f64::INFINITY, 0.0];
3316        assert_eq!(count_distinct_f(&d, None, None), 3);
3317    }
3318
3319    // --- harmonic & geometric means: all types, zero/negative/NaN ---
3320    #[test]
3321    fn test_harmonic_mean_f64() {
3322        let data = vec64![0.5f64, 2.0, 1.0];
3323        // the true harmonic mean is 3/(2 + 0.5 + 1) ≈ 0.8571
3324        let hm = harmonic_mean_f(&data, None, None);
3325        assert!(approx_eq(hm, 0.857142857, 1e-2));
3326    }
3327
3328    #[test]
3329    fn test_geometric_mean_f64() {
3330        let data = vec64![0.5f64, 2.0, 1.0];
3331        let gm = geometric_mean_f(&data, None, None);
3332        assert!(approx_eq(gm, 1.0, 1e-9));
3333    }
3334
3335    #[test]
3336    #[should_panic(expected = "harmonic_mean_int: non-positive values are invalid")]
3337    fn test_harmonic_mean_int_all_zeros() {
3338        let d = vec64![0i32, 0, 0];
3339        // Panics due to non-positive values.
3340        let _ = harmonic_mean_int(&d, None, None);
3341    }
3342
3343    #[test]
3344    fn test_geometric_mean_uint() {
3345        let data = vec64![2u64, 8, 4];
3346        let gm = geometric_mean_uint(&data, None, None);
3347        assert!(approx_eq(gm, 4.0, 1e-9));
3348    }
3349
3350    #[test]
3351    #[should_panic(expected = "geometric_mean_int: non-positive values are invalid")]
3352    fn test_geometric_mean_int_negative() {
3353        // Should panic on negative input, even if result is complex
3354        let d = vec64![-2i64, -4, -8];
3355        let _ = geometric_mean_int(&d, None, None);
3356    }
3357
3358    // --- percentile: boundary and empty, floats with mask, p out of range ---
3359    #[test]
3360    fn test_percentile_ord_and_f_all_cases() {
3361        let d = vec64![1i32, 4, 6, 8, 10];
3362        assert_eq!(percentile_ord(&d, None, None, 0.0, true), Some(1));
3363        assert_eq!(percentile_ord(&d, None, None, 100.0, true), Some(10));
3364        let mask = mask_from_bools(&[false, false, false, false, false]);
3365        assert_eq!(percentile_ord(&d, Some(&mask), Some(5), 50.0, true), None);
3366
3367        let df = vec64![1.0f64, 2.0, 3.0, 4.0, 5.0];
3368        assert_eq!(percentile_f(&df, None, None, 0.0, true), Some(1.0));
3369        assert_eq!(percentile_f(&df, None, None, 1.0, true), Some(5.0));
3370        let mask = mask_from_bools(&[false, false, false, false, false]);
3371        assert_eq!(percentile_f(&df, Some(&mask), Some(5), 0.5, true), None);
3372
3373        // Out of range p (should clamp)
3374        assert_eq!(percentile_f(&df, None, None, 10.0, true), Some(5.0));
3375        assert_eq!(percentile_f(&df, None, None, -5.0, true), Some(1.0));
3376    }
3377
3378    // --- quantile: small q, empty, all-null, mask with only one valid ---
3379    #[test]
3380    fn test_quantile_f_and_ord_special_cases() {
3381        let d = vec64![2.0f32, 4.0, 6.0, 8.0];
3382        // q < 2 (should return None)
3383        assert_eq!(quantile_f(&d, None, None, 1, true), None);
3384        // all nulls
3385        let mask = mask_from_bools(&[false, false, false, false]);
3386        assert_eq!(quantile_f(&d, Some(&mask), Some(4), 2, true), None);
3387
3388        // mask: only one valid
3389        let mask = mask_from_bools(&[false, false, true, false]);
3390        let out = quantile_f(&d, Some(&mask), Some(3), 2, true).unwrap();
3391        assert_eq!(out.len(), 1);
3392        assert!(out.iter().all(|&x| x == 6.0));
3393    }
3394
3395    // --- Test for proper handling of singletons, all-nulls, degenerate input for all aggregation APIs ---
3396    #[test]
3397    fn test_all_apis_singleton_and_nulls() {
3398        let d = vec64![7u32];
3399        assert_eq!(sum_u32(&d, None, None), Some(7));
3400        assert_eq!(average_u32(&d, None, None), Some(7.0));
3401        assert_eq!(variance_u32(&d, None, None, false).unwrap(), 0.0);
3402        assert_eq!(mode(&d, None, None), Some(7));
3403        assert_eq!(median(&d, None, None, true), Some(7));
3404        assert_eq!(
3405            quantile(&d, None, None, 3, true).unwrap(),
3406            Vec64::from_slice(&[7, 7])
3407        );
3408        assert_eq!(iqr(&d, None, None, true), Some(0));
3409
3410        let mask = mask_from_bools(&[false]);
3411        assert_eq!(sum_u32(&d, Some(&mask), Some(1)), None);
3412        assert_eq!(median(&d, Some(&mask), Some(1), true), None);
3413        assert_eq!(quantile(&d, Some(&mask), Some(1), 3, true), None);
3414        assert_eq!(mode(&d, Some(&mask), Some(1)), None);
3415        assert_eq!(iqr(&d, Some(&mask), Some(1), true), None);
3416    }
3417
3418    #[test]
3419    fn test_skewness_kurtosis_dense() {
3420        // Data 1..=5 -> mean 3, symmetric
3421        // population skewness = 0, population excess kurtosis = -1.3
3422        // sample skewness = 0, sample excess kurtosis = -1.2
3423        let d_f64 = vec64![1.0f64, 2.0, 3.0, 4.0, 5.0];
3424        // population
3425        assert!(approx_eq(
3426            skewness_f64(&d_f64, None, None, false).unwrap(),
3427            0.0,
3428            1e-12
3429        ));
3430        assert!(approx_eq(
3431            kurtosis_f64(&d_f64, None, None, false).unwrap(),
3432            -1.3,
3433            1e-12
3434        ));
3435        // sample
3436        assert!(approx_eq(
3437            skewness_f64(&d_f64, None, None, true).unwrap(),
3438            0.0,
3439            1e-12
3440        ));
3441        assert!(approx_eq(
3442            kurtosis_f64(&d_f64, None, None, true).unwrap(),
3443            -1.2,
3444            1e-12
3445        ));
3446
3447        let d_f32 = vec64![1.0f32, 2.0, 3.0, 4.0, 5.0];
3448        // population
3449        assert!(approx_eq(
3450            skewness_f32(&d_f32, None, None, false).unwrap(),
3451            0.0,
3452            1e-6
3453        ));
3454        assert!(approx_eq(
3455            kurtosis_f32(&d_f32, None, None, false).unwrap(),
3456            -1.3,
3457            1e-6
3458        ));
3459        // sample
3460        assert!(approx_eq(
3461            skewness_f32(&d_f32, None, None, true).unwrap(),
3462            0.0,
3463            1e-6
3464        ));
3465        assert!(approx_eq(
3466            kurtosis_f32(&d_f32, None, None, true).unwrap(),
3467            -1.2,
3468            1e-6
3469        ));
3470
3471        let d_i32 = vec64![1i32, 2, 3, 4, 5];
3472        assert!(approx_eq(
3473            skewness_i32(&d_i32, None, None, false).unwrap(),
3474            0.0,
3475            1e-12
3476        ));
3477        assert!(approx_eq(
3478            kurtosis_i32(&d_i32, None, None, false).unwrap(),
3479            -1.3,
3480            1e-12
3481        ));
3482        assert!(approx_eq(
3483            skewness_i32(&d_i32, None, None, true).unwrap(),
3484            0.0,
3485            1e-12
3486        ));
3487        assert!(approx_eq(
3488            kurtosis_i32(&d_i32, None, None, true).unwrap(),
3489            -1.2,
3490            1e-12
3491        ));
3492    }
3493
3494    #[test]
3495    fn test_skewness_kurtosis_masked() {
3496        // Mask out 1 & 5 -> [2,3,4]
3497        // population skewness = 0, population & sample excess kurtosis = -1.5
3498        let data = vec64![1i32, 2, 3, 4, 5];
3499        let mask = mask_from_bools(&[false, true, true, true, false]);
3500        // population
3501        assert!(approx_eq(
3502            skewness_i32(&data, Some(&mask), Some(2), false).unwrap(),
3503            0.0,
3504            1e-12
3505        ));
3506        assert!(approx_eq(
3507            kurtosis_i32(&data, Some(&mask), Some(2), false).unwrap(),
3508            -1.5,
3509            1e-12
3510        ));
3511        // sample
3512        assert!(approx_eq(
3513            skewness_i32(&data, Some(&mask), Some(2), true).unwrap(),
3514            0.0,
3515            1e-12
3516        ));
3517        assert!(approx_eq(
3518            kurtosis_i32(&data, Some(&mask), Some(2), true).unwrap(),
3519            -1.5,
3520            1e-12
3521        ));
3522    }
3523
3524    #[test]
3525    fn test_skewness_kurtosis_constant_none() {
3526        // Variance zero -> expect None
3527        let d = vec64![7u32; 5];
3528        assert_eq!(skewness_u32(&d, None, None, false), None);
3529        assert_eq!(kurtosis_u32(&d, None, None, false), None);
3530        assert_eq!(skewness_u32(&d, None, None, true), None);
3531        assert_eq!(kurtosis_u32(&d, None, None, true), None);
3532    }
3533
3534    #[test]
3535    fn test_kurtosis_skewness_large_f32() {
3536        // Mixed negative/positive of length 11:
3537        // population: skew ≈ -0.20081265, kurt ≈ -1.28414694
3538        // sample:     skew ≈ -0.23401565, kurt ≈ -1.30691156
3539        let data = vec64![-2.0, -1.0, -1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0];
3540        // population
3541        assert!(approx_eq(
3542            skewness_f32(&data, None, None, false).unwrap(),
3543            -0.20081265422848577_f64,
3544            1e-12
3545        ));
3546        assert!(approx_eq(
3547            kurtosis_f32(&data, None, None, false).unwrap(),
3548            -1.2841469381610144_f64,
3549            1e-12
3550        ));
3551        // sample
3552        assert!(approx_eq(
3553            skewness_f32(&data, None, None, true).unwrap(),
3554            -0.23401565397707677_f64,
3555            1e-12
3556        ));
3557        assert!(approx_eq(
3558            kurtosis_f32(&data, None, None, true).unwrap(),
3559            -1.3069115636016906_f64,
3560            1e-12
3561        ));
3562    }
3563
3564    fn bitmask_from_bools(bs: &[bool]) -> Bitmask {
3565        let mut mask = Bitmask::with_capacity(bs.len());
3566        for (i, &b) in bs.iter().enumerate() {
3567            mask.set(i, b);
3568        }
3569        mask
3570    }
3571
3572    #[test]
3573    fn test_kurtosis_skewness_large_masked_i32() {
3574        // Drop 4th element (4), leaving 9 values:
3575        // population: skew ≈ 2.43067998, kurt ≈ 3.99120344
3576        // sample:     skew ≈ 2.94642909, kurt ≈ 8.74514940
3577        let data = vec64![1, 2, 3, 4, 5, 6, 7, 100, 1000, 10000];
3578        let mask = mask_from_bools(&[true, true, true, false, true, true, true, true, true, true]);
3579        // population
3580        assert!(approx_eq(
3581            skewness_i32(&data, Some(&mask), None, false).unwrap(),
3582            2.4306799843134046_f64,
3583            1e-12
3584        ));
3585        assert!(approx_eq(
3586            kurtosis_i32(&data, Some(&mask), None, false).unwrap(),
3587            3.991203437112363_f64,
3588            1e-12
3589        ));
3590        // sample
3591        assert!(approx_eq(
3592            skewness_i32(&data, Some(&mask), None, true).unwrap(),
3593            2.946429085375575_f64,
3594            1e-12
3595        ));
3596        assert!(approx_eq(
3597            kurtosis_i32(&data, Some(&mask), None, true).unwrap(),
3598            8.745149404023547_f64,
3599            1e-12
3600        ));
3601    }
3602
3603    #[test]
3604    fn test_kurtosis_skewness_no_valids() {
3605        let data: [f64; 3] = [0.0, 0.0, 0.0];
3606        let mask = bitmask_from_bools(&[false, false, false]);
3607        assert_eq!(kurtosis_f64(&data, Some(&mask), None, false), None);
3608        assert_eq!(skewness_f64(&data, Some(&mask), None, false), None);
3609        assert_eq!(kurtosis_f64(&data, Some(&mask), None, true), None);
3610        assert_eq!(skewness_f64(&data, Some(&mask), None, true), None);
3611    }
3612
3613    #[test]
3614    fn test_kurtosis_skewness_single_value() {
3615        let data = vec64![1u32];
3616        assert_eq!(kurtosis_u32(&data, None, None, false), None);
3617        assert_eq!(skewness_u32(&data, None, None, false), None);
3618        assert_eq!(kurtosis_u32(&data, None, None, true), None);
3619        assert_eq!(skewness_u32(&data, None, None, true), None);
3620    }
3621
3622    #[test]
3623    fn test_kurtosis_skewness_all_equal() {
3624        let data = vec64![7.0, 7.0, 7.0, 7.0];
3625        assert_eq!(kurtosis_f64(&data, None, None, false), None);
3626        assert_eq!(skewness_f64(&data, None, None, false), None);
3627        assert_eq!(kurtosis_f64(&data, None, None, true), None);
3628        assert_eq!(skewness_f64(&data, None, None, true), None);
3629    }
3630
3631    #[test]
3632    fn test_kurtosis_skewness_mixed_nulls() {
3633        // Valid [2.0,7.0,10.0] (length 3):
3634        // population: skew ≈ -0.29479962, kurt = -1.5
3635        // sample:     skew ≈ -0.72210865, kurt = -1.5
3636        let data = vec64![2.0, 5.0, 7.0, 3.0, 10.0];
3637        let mask = mask_from_bools(&[true, false, true, false, true]);
3638        // population
3639        assert!(approx_eq(
3640            skewness_f64(&data, Some(&mask), None, false).unwrap(),
3641            -0.29479962014482897_f64,
3642            1e-12
3643        ));
3644        assert!(approx_eq(
3645            kurtosis_f64(&data, Some(&mask), None, false).unwrap(),
3646            -1.5_f64,
3647            1e-12
3648        ));
3649        // sample
3650        assert!(approx_eq(
3651            skewness_f64(&data, Some(&mask), None, true).unwrap(),
3652            -0.7221086457211346_f64,
3653            1e-12
3654        ));
3655        assert!(approx_eq(
3656            kurtosis_f64(&data, Some(&mask), None, true).unwrap(),
3657            -1.5_f64,
3658            1e-12
3659        ));
3660    }
3661
3662    #[test]
3663    fn test_kurtosis_skewness_integers_large() {
3664        let data = vec64![1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9];
3665        assert!(kurtosis_i32(&data, None, None, false).unwrap().abs() < 1.5);
3666        assert!(skewness_i32(&data, None, None, false).unwrap().abs() < 1e-6);
3667        // Sample
3668        assert!(kurtosis_i32(&data, None, None, true).unwrap().abs() < 2.0);
3669        assert!(skewness_i32(&data, None, None, true).unwrap().abs() < 1e-6);
3670    }
3671
3672    #[test]
3673    fn test_agg_product_dense() {
3674        // No mask, all values included
3675        let data = vec64![2, 3, 4, 5];
3676        let prod = agg_product(&data, None, 0, data.len());
3677        assert_eq!(prod, 120);
3678
3679        // Offset/len window
3680        let prod = agg_product(&data, None, 1, 2);
3681        assert_eq!(prod, 12); // 3*4
3682    }
3683
3684    #[test]
3685    fn test_agg_product_with_mask_all_valid() {
3686        let data = vec64![2, 3, 4, 5];
3687        let mask = Bitmask::new_set_all(4, true);
3688        let prod = agg_product(&data, Some(&mask), 0, 4);
3689        assert_eq!(prod, 120);
3690    }
3691
3692    #[test]
3693    fn test_agg_product_with_mask_some_nulls() {
3694        let data = vec64![2, 3, 4, 5];
3695        let mut mask = Bitmask::new_set_all(4, false);
3696        mask.set(0, true);
3697        mask.set(2, true);
3698        // Only data[0] and data[2] included: 2*4=8
3699        let prod = agg_product(&data, Some(&mask), 0, 4);
3700        assert_eq!(prod, 8);
3701    }
3702
3703    #[test]
3704    fn test_agg_product_zero_short_circuit() {
3705        let data = vec64![2, 0, 4, 5];
3706        let prod = agg_product(&data, None, 0, 4);
3707        assert_eq!(prod, 0);
3708    }
3709
3710    #[test]
3711    fn test_agg_product_empty() {
3712        let data: [i32; 0] = [];
3713        let prod = agg_product(&data, None, 0, 0);
3714        assert_eq!(prod, 1); // One by convention
3715    }
3716
3717    #[test]
3718    fn test_agg_product_mask_all_null() {
3719        let data = vec64![2, 3, 4, 5];
3720        let mask = Bitmask::new_set_all(4, false);
3721        let prod = agg_product(&data, Some(&mask), 0, 4);
3722        assert_eq!(prod, 1); // No values included: returns one
3723    }
3724
3725    #[test]
3726    fn test_agg_product_offset_and_mask() {
3727        let data = vec64![2, 3, 4, 5, 6];
3728        let mut mask = Bitmask::new_set_all(5, false);
3729        mask.set(1, true);
3730        mask.set(3, true);
3731        let prod = agg_product(&data, Some(&mask), 1, 3);
3732        assert_eq!(prod, 15);
3733    }
3734
3735    #[test]
3736    fn test_sum_squares_empty() {
3737        let v: [f64; 0] = [];
3738        assert_eq!(sum_squares(&v, None, None), 0.0);
3739    }
3740
3741    #[test]
3742    fn test_sum_squares_basic() {
3743        let v = vec64![1.0, 2.0, 3.0];
3744        assert_eq!(sum_squares(&v, None, None), 1.0 + 4.0 + 9.0);
3745    }
3746
3747    #[test]
3748    fn test_sum_squares_neg() {
3749        let v = vec64![-1.0, -2.0, -3.0];
3750        assert_eq!(sum_squares(&v, None, None), 1.0 + 4.0 + 9.0);
3751    }
3752
3753    #[test]
3754    fn test_sum_squares_large() {
3755        let v: Vec<f64> = (0..1000).map(|x| x as f64).collect();
3756        let expect: f64 = v.iter().map(|x| x * x).sum();
3757        assert!((sum_squares(&v, None, None) - expect).abs() < 1e-9);
3758    }
3759
3760    #[test]
3761    fn test_sum_squares_masked() {
3762        let v = vec64![1.0, 2.0, 3.0, 4.0];
3763        let mask = Bitmask::from_bools(&[true, false, true, false]);
3764        assert_eq!(sum_squares(&v, Some(&mask), Some(2)), 1.0 + 9.0);
3765    }
3766
3767    #[test]
3768    fn test_sum_cubes_empty() {
3769        let v: [f64; 0] = [];
3770        assert_eq!(sum_cubes(&v, None, None), 0.0);
3771    }
3772
3773    #[test]
3774    fn test_sum_cubes_basic() {
3775        let v = vec64![1.0, 2.0, 3.0];
3776        assert_eq!(sum_cubes(&v, None, None), 1.0 + 8.0 + 27.0);
3777    }
3778
3779    #[test]
3780    fn test_sum_cubes_neg() {
3781        let v = vec64![-1.0, -2.0, -3.0];
3782        assert_eq!(sum_cubes(&v, None, None), -1.0 - 8.0 - 27.0);
3783    }
3784
3785    #[test]
3786    fn test_sum_cubes_large() {
3787        let v: Vec<f64> = (0..1000).map(|x| x as f64).collect();
3788        let expect: f64 = v.iter().map(|x| x * x * x).sum();
3789        assert!((sum_cubes(&v, None, None) - expect).abs() < 1e-6);
3790    }
3791
3792    #[test]
3793    fn test_sum_cubes_masked() {
3794        let v = vec64![1.0, 2.0, 3.0, 4.0];
3795        let mask = Bitmask::from_bools(&[true, true, false, true]);
3796        assert_eq!(sum_cubes(&v, Some(&mask), Some(1)), 1.0 + 8.0 + 64.0);
3797    }
3798
3799    #[test]
3800    fn test_sum_quartics_empty() {
3801        let v: [f64; 0] = [];
3802        assert_eq!(sum_quartics(&v, None, None), 0.0);
3803    }
3804
3805    #[test]
3806    fn test_sum_quartics_basic() {
3807        let v = vec64![1.0, 2.0, 3.0];
3808        assert_eq!(sum_quartics(&v, None, None), 1.0 + 16.0 + 81.0);
3809    }
3810
3811    #[test]
3812    fn test_sum_quartics_neg() {
3813        let v = vec64![-1.0, -2.0, -3.0];
3814        assert_eq!(sum_quartics(&v, None, None), 1.0 + 16.0 + 81.0);
3815    }
3816
3817    #[test]
3818    fn test_sum_quartics_large() {
3819        let v: Vec<f64> = (0..1000).map(|x| x as f64).collect();
3820        let expect: f64 = v
3821            .iter()
3822            .map(|x| {
3823                let x2 = x * x;
3824                x2 * x2
3825            })
3826            .sum();
3827        assert!((sum_quartics(&v, None, None) - expect).abs() < 1e-3);
3828    }
3829
3830    #[test]
3831    fn test_sum_quartics_masked() {
3832        let v = vec64![1.0, 2.0, 3.0, 4.0];
3833        let mask = Bitmask::from_bools(&[false, true, false, true]);
3834        assert_eq!(sum_quartics(&v, Some(&mask), Some(2)), 16.0 + 256.0);
3835    }
3836
3837    #[test]
3838    fn test_sum_powers_singleton() {
3839        let v = vec64![4.2];
3840        assert!((sum_squares(&v, None, None) - 4.2 * 4.2).abs() < 1e-12);
3841        assert!((sum_cubes(&v, None, None) - 4.2 * 4.2 * 4.2).abs() < 1e-12);
3842        assert!((sum_quartics(&v, None, None) - (4.2 * 4.2 * 4.2 * 4.2)).abs() < 1e-12);
3843    }
3844
3845    #[test]
3846    fn test_sum_powers_f64_extremes() {
3847        let v = vec64![f64::MAX, -f64::MAX, 0.0, f64::MIN_POSITIVE];
3848        let ss = sum_squares(&v, None, None).is_infinite();
3849        let sc = sum_cubes(&v, None, None).is_infinite();
3850        let sq = sum_quartics(&v, None, None).is_infinite();
3851        println!("{:?}", ss);
3852        println!("{:?}", sc);
3853        println!("{:?}", sq);
3854        // f64::MAX * f64::MAX is inf, so expect inf for all powers >1
3855        assert!(sum_squares(&v, None, None).is_infinite());
3856        assert!(sum_cubes(&v, None, None).is_infinite());
3857        assert!(sum_quartics(&v, None, None).is_infinite());
3858    }
3859}