vortex_array/stats/
stats_set.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5
6use enum_iterator::{Sequence, all};
7use num_traits::CheckedAdd;
8use vortex_dtype::DType;
9use vortex_error::{VortexExpect, VortexResult, vortex_err};
10use vortex_scalar::{Scalar, ScalarValue};
11
12use super::traits::StatsProvider;
13use super::{IsSorted, IsStrictSorted, NaNCount, NullCount, StatType, UncompressedSizeInBytes};
14use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
15
16#[derive(Default, Debug, Clone)]
17pub struct StatsSet {
18    values: Vec<(Stat, Precision<ScalarValue>)>,
19}
20
21impl StatsSet {
22    /// Create new StatSet without validating uniqueness of all the entries
23    ///
24    /// # Safety
25    ///
26    /// This method will not panic or trigger UB, but may lead to duplicate stats being stored.
27    pub fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
28        Self { values }
29    }
30
31    /// Specialized constructor for the case where the StatsSet represents
32    /// an array consisting entirely of [null](vortex_dtype::DType::Null) values.
33    pub fn nulls(len: usize) -> Self {
34        let mut stats = Self::new_unchecked(vec![(Stat::NullCount, Precision::exact(len))]);
35
36        if len > 0 {
37            stats.set(Stat::IsConstant, Precision::exact(true));
38            stats.set(Stat::IsSorted, Precision::exact(true));
39            stats.set(Stat::IsStrictSorted, Precision::exact(len < 2));
40        }
41
42        stats
43    }
44
45    /// A convenience method for creating a stats set which will represent an empty array.
46    pub fn empty_array() -> StatsSet {
47        StatsSet::new_unchecked(vec![(Stat::NullCount, Precision::exact(0))])
48    }
49
50    pub fn constant(scalar: Scalar, length: usize) -> Self {
51        let (dtype, sv) = scalar.into_parts();
52        let mut stats = Self::default();
53        if length > 0 {
54            stats.extend([
55                (Stat::IsConstant, Precision::exact(true)),
56                (Stat::IsSorted, Precision::exact(true)),
57                (Stat::IsStrictSorted, Precision::exact(length <= 1)),
58            ]);
59        }
60
61        let null_count = if sv.is_null() { length as u64 } else { 0 };
62        stats.set(Stat::NullCount, Precision::exact(null_count));
63
64        if !sv.is_null() {
65            stats.extend([
66                (Stat::Min, Precision::exact(sv.clone())),
67                (Stat::Max, Precision::exact(sv.clone())),
68            ]);
69        }
70
71        if matches!(dtype, DType::Bool(_)) {
72            let bool_val = <Option<bool>>::try_from(&sv).vortex_expect("Checked dtype");
73            let true_count = bool_val
74                .map(|b| if b { length as u64 } else { 0 })
75                .unwrap_or(0);
76            stats.set(Stat::Sum, Precision::exact(true_count));
77        }
78
79        stats
80    }
81
82    pub fn bools_with_sum_and_null_count(true_count: usize, null_count: usize, len: usize) -> Self {
83        StatsSet::new_unchecked(vec![
84            (Stat::Sum, Precision::exact(true_count)),
85            (Stat::NullCount, Precision::exact(null_count)),
86            (Stat::Min, Precision::exact(true_count == len)),
87            (Stat::Max, Precision::exact(true_count > 0)),
88            (
89                Stat::IsConstant,
90                Precision::exact((true_count == 0 && null_count == 0) || true_count == len),
91            ),
92        ])
93    }
94
95    pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
96        Self::new_unchecked(vec![(stat, value)])
97    }
98
99    fn reserve_full_capacity(&mut self) {
100        if self.values.capacity() < Stat::CARDINALITY {
101            self.values
102                .reserve_exact(Stat::CARDINALITY - self.values.capacity());
103        }
104    }
105}
106
107// Getters and setters for individual stats.
108impl StatsSet {
109    /// Set the stat `stat` to `value`.
110    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
111        self.reserve_full_capacity();
112
113        if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
114            *existing = (stat, value);
115        } else {
116            self.values.push((stat, value));
117        }
118    }
119
120    /// Clear the stat `stat` from the set.
121    pub fn clear(&mut self, stat: Stat) {
122        self.values.retain(|(s, _)| *s != stat);
123    }
124
125    pub fn retain_only(&mut self, stats: &[Stat]) {
126        self.values.retain(|(s, _)| stats.contains(s));
127    }
128
129    pub fn keep_inexact_stats(self, inexact_keep: &[Stat]) -> Self {
130        self.values
131            .into_iter()
132            .filter_map(|(s, v)| inexact_keep.contains(&s).then(|| (s, v.into_inexact())))
133            .collect()
134    }
135
136    /// Iterate over the statistic names and values in-place.
137    ///
138    /// See [Iterator].
139    pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
140        self.values.iter()
141    }
142}
143
144// StatSetIntoIter just exists to protect current implementation from exposure on the public API.
145
146/// Owned iterator over the stats.
147///
148/// See [IntoIterator].
149pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
150
151impl Iterator for StatsSetIntoIter {
152    type Item = (Stat, Precision<ScalarValue>);
153
154    fn next(&mut self) -> Option<Self::Item> {
155        self.0.next()
156    }
157}
158
159impl IntoIterator for StatsSet {
160    type Item = (Stat, Precision<ScalarValue>);
161    type IntoIter = StatsSetIntoIter;
162
163    fn into_iter(self) -> Self::IntoIter {
164        StatsSetIntoIter(self.values.into_iter())
165    }
166}
167
168impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
169    fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
170        let iter = iter.into_iter();
171        let mut values = Vec::default();
172        values.reserve_exact(Stat::CARDINALITY);
173
174        let mut this = Self { values };
175        this.extend(iter);
176        this
177    }
178}
179
180impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
181    #[inline]
182    fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
183        let iter = iter.into_iter();
184        self.reserve_full_capacity();
185
186        iter.for_each(|(stat, value)| self.set(stat, value));
187    }
188}
189
190// Merge helpers
191impl StatsSet {
192    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
193    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
194    pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
195        for s in all::<Stat>() {
196            match s {
197                Stat::IsConstant => self.merge_is_constant(other, dtype),
198                Stat::IsSorted => self.merge_is_sorted(other, dtype),
199                Stat::IsStrictSorted => self.merge_is_strict_sorted(other, dtype),
200                Stat::Max => self.merge_max(other, dtype),
201                Stat::Min => self.merge_min(other, dtype),
202                Stat::Sum => self.merge_sum(other, dtype),
203                Stat::NullCount => self.merge_null_count(other),
204                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
205                Stat::NaNCount => self.merge_nan_count(other),
206            }
207        }
208
209        self
210    }
211
212    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
213    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
214    pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
215        for s in all::<Stat>() {
216            if !s.is_commutative() {
217                self.clear(s);
218                continue;
219            }
220
221            match s {
222                Stat::IsConstant => self.merge_is_constant(other, dtype),
223                Stat::Max => self.merge_max(other, dtype),
224                Stat::Min => self.merge_min(other, dtype),
225                Stat::Sum => self.merge_sum(other, dtype),
226                Stat::NullCount => self.merge_null_count(other),
227                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
228                Stat::IsSorted | Stat::IsStrictSorted => {
229                    unreachable!("not commutative")
230                }
231                Stat::NaNCount => self.merge_nan_count(other),
232            }
233        }
234
235        self
236    }
237
238    /// Given two sets of stats (of differing precision) for the same array, combine them
239    pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
240        let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
241        for s in other_stats {
242            match s {
243                Stat::Max => self.combine_bound::<Max>(other, dtype)?,
244                Stat::Min => self.combine_bound::<Min>(other, dtype)?,
245                Stat::UncompressedSizeInBytes => {
246                    self.combine_bound::<UncompressedSizeInBytes>(other, dtype)?
247                }
248                Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
249                Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
250                Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
251                Stat::NullCount => self.combine_bound::<NullCount>(other, dtype)?,
252                Stat::Sum => self.combine_bound::<Sum>(other, dtype)?,
253                Stat::NaNCount => self.combine_bound::<NaNCount>(other, dtype)?,
254            }
255        }
256        Ok(())
257    }
258
259    fn combine_bound<S: StatType<Scalar>>(
260        &mut self,
261        other: &Self,
262        dtype: &DType,
263    ) -> VortexResult<()>
264    where
265        S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
266    {
267        match (
268            self.get_scalar_bound::<S>(dtype),
269            other.get_scalar_bound::<S>(dtype),
270        ) {
271            (Some(m1), Some(m2)) => {
272                let meet = m1
273                    .intersection(&m2)
274                    .vortex_expect("can always compare scalar")
275                    .ok_or_else(|| {
276                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
277                    })?;
278                if meet != m1 {
279                    self.set(S::STAT, meet.into_value().map(Scalar::into_value));
280                }
281            }
282            (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
283            (Some(_), _) => (),
284            (None, None) => self.clear(S::STAT),
285        }
286        Ok(())
287    }
288
289    fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &Self) -> VortexResult<()>
290    where
291        S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
292    {
293        match (
294            self.get_as_bound::<S, bool>(),
295            other.get_as_bound::<S, bool>(),
296        ) {
297            (Some(m1), Some(m2)) => {
298                let intersection = m1
299                    .intersection(&m2)
300                    .vortex_expect("can always compare boolean")
301                    .ok_or_else(|| {
302                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
303                    })?;
304                if intersection != m1 {
305                    self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
306                }
307            }
308            (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
309            (Some(_), None) => (),
310            (None, None) => self.clear(S::STAT),
311        }
312        Ok(())
313    }
314
315    fn merge_min(&mut self, other: &Self, dtype: &DType) {
316        match (
317            self.get_scalar_bound::<Min>(dtype),
318            other.get_scalar_bound::<Min>(dtype),
319        ) {
320            (Some(m1), Some(m2)) => {
321                let meet = m1.union(&m2).vortex_expect("can compare scalar");
322                if meet != m1 {
323                    self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
324                }
325            }
326            _ => self.clear(Stat::Min),
327        }
328    }
329
330    fn merge_max(&mut self, other: &Self, dtype: &DType) {
331        match (
332            self.get_scalar_bound::<Max>(dtype),
333            other.get_scalar_bound::<Max>(dtype),
334        ) {
335            (Some(m1), Some(m2)) => {
336                let meet = m1.union(&m2).vortex_expect("can compare scalar");
337                if meet != m1 {
338                    self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
339                }
340            }
341            _ => self.clear(Stat::Max),
342        }
343    }
344
345    fn merge_sum(&mut self, other: &Self, dtype: &DType) {
346        match (
347            self.get_scalar_bound::<Sum>(dtype),
348            other.get_scalar_bound::<Sum>(dtype),
349        ) {
350            (Some(m1), Some(m2)) => {
351                // If the combine sum is exact, then we can sum them.
352                if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
353                    s1.as_primitive()
354                        .checked_add(&s2.as_primitive())
355                        .map(|pscalar| {
356                            pscalar
357                                .pvalue()
358                                .map(|pvalue| {
359                                    Scalar::primitive_value(
360                                        pvalue,
361                                        pscalar.ptype(),
362                                        pscalar.dtype().nullability(),
363                                    )
364                                    .into_value()
365                                })
366                                .unwrap_or_else(ScalarValue::null)
367                        })
368                }) {
369                    self.set(Stat::Sum, Precision::Exact(scalar_value));
370                }
371            }
372            _ => self.clear(Stat::Sum),
373        }
374    }
375
376    fn merge_is_constant(&mut self, other: &Self, dtype: &DType) {
377        let self_const = self.get_as(Stat::IsConstant);
378        let other_const = other.get_as(Stat::IsConstant);
379        let self_min = self.get_scalar(Stat::Min, dtype);
380        let other_min = other.get_scalar(Stat::Min, dtype);
381
382        if let (
383            Some(Precision::Exact(self_const)),
384            Some(Precision::Exact(other_const)),
385            Some(Precision::Exact(self_min)),
386            Some(Precision::Exact(other_min)),
387        ) = (self_const, other_const, self_min, other_min)
388        {
389            if self_const && other_const && self_min == other_min {
390                self.set(Stat::IsConstant, Precision::exact(true));
391            } else {
392                self.set(Stat::IsConstant, Precision::inexact(false));
393            }
394        }
395        self.set(Stat::IsConstant, Precision::exact(false));
396    }
397
398    fn merge_is_sorted(&mut self, other: &Self, dtype: &DType) {
399        self.merge_sortedness_stat(other, Stat::IsSorted, dtype, PartialOrd::le)
400    }
401
402    fn merge_is_strict_sorted(&mut self, other: &Self, dtype: &DType) {
403        self.merge_sortedness_stat(other, Stat::IsStrictSorted, dtype, PartialOrd::lt)
404    }
405
406    fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
407        &mut self,
408        other: &Self,
409        stat: Stat,
410        dtype: &DType,
411        cmp: F,
412    ) {
413        if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
414            == (self.get_as(stat), other.get_as(stat))
415        {
416            // There might be no stat because it was dropped, or it doesn't exist
417            // (e.g. an all null array).
418            // We assume that it was the dropped case since the doesn't exist might imply sorted,
419            // but this in-precision is correct.
420            if let (Some(self_max), Some(other_min)) = (
421                self.get_scalar_bound::<Max>(dtype),
422                other.get_scalar_bound::<Min>(dtype),
423            ) {
424                return if cmp(&self_max.max_value(), &other_min.min_value()) {
425                    // keep value
426                } else {
427                    self.set(stat, Precision::inexact(false));
428                };
429            }
430        }
431        self.clear(stat);
432    }
433
434    fn merge_null_count(&mut self, other: &Self) {
435        self.merge_sum_stat(Stat::NullCount, other)
436    }
437
438    fn merge_nan_count(&mut self, other: &Self) {
439        self.merge_sum_stat(Stat::NaNCount, other)
440    }
441
442    fn merge_uncompressed_size_in_bytes(&mut self, other: &Self) {
443        self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
444    }
445
446    fn merge_sum_stat(&mut self, stat: Stat, other: &Self) {
447        match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
448            (Some(nc1), Some(nc2)) => {
449                self.set(
450                    stat,
451                    nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
452                );
453            }
454            _ => self.clear(stat),
455        }
456    }
457}
458
459impl StatsProvider for StatsSet {
460    fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
461        self.values
462            .iter()
463            .find(|(s, _)| *s == stat)
464            .map(|(_, v)| v.clone())
465    }
466
467    fn len(&self) -> usize {
468        self.values.len()
469    }
470}
471
472#[cfg(test)]
473mod test {
474    use enum_iterator::all;
475    use itertools::Itertools;
476    use vortex_dtype::{DType, Nullability, PType};
477
478    use crate::arrays::PrimitiveArray;
479    use crate::stats::{IsConstant, Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
480
481    #[test]
482    fn test_iter() {
483        let set = StatsSet::new_unchecked(vec![
484            (Stat::Max, Precision::exact(100)),
485            (Stat::Min, Precision::exact(42)),
486        ]);
487        let mut iter = set.iter();
488        let first = iter.next().unwrap().clone();
489        assert_eq!(first.0, Stat::Max);
490        assert_eq!(
491            first.1.map(|f| i32::try_from(&f).unwrap()),
492            Precision::exact(100)
493        );
494        let snd = iter.next().unwrap().clone();
495        assert_eq!(snd.0, Stat::Min);
496        assert_eq!(snd.1.map(|s| i32::try_from(&s).unwrap()), 42);
497    }
498
499    #[test]
500    fn into_iter() {
501        let mut set = StatsSet::new_unchecked(vec![
502            (Stat::Max, Precision::exact(100)),
503            (Stat::Min, Precision::exact(42)),
504        ])
505        .into_iter();
506        let (stat, first) = set.next().unwrap();
507        assert_eq!(stat, Stat::Max);
508        assert_eq!(
509            first.map(|f| i32::try_from(&f).unwrap()),
510            Precision::exact(100)
511        );
512        let snd = set.next().unwrap();
513        assert_eq!(snd.0, Stat::Min);
514        assert_eq!(
515            snd.1.map(|s| i32::try_from(&s).unwrap()),
516            Precision::exact(42)
517        );
518    }
519
520    #[test]
521    fn merge_constant() {
522        let first = StatsSet::from_iter([
523            (Stat::Min, Precision::exact(42)),
524            (Stat::IsConstant, Precision::exact(true)),
525        ])
526        .merge_ordered(
527            &StatsSet::from_iter([
528                (Stat::Min, Precision::inexact(42)),
529                (Stat::IsConstant, Precision::exact(true)),
530            ]),
531            &DType::Primitive(PType::I32, Nullability::NonNullable),
532        );
533        assert_eq!(
534            first.get_as::<bool>(Stat::IsConstant),
535            Some(Precision::exact(false))
536        );
537        assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
538    }
539
540    #[test]
541    fn merge_into_min() {
542        let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
543            &StatsSet::default(),
544            &DType::Primitive(PType::I32, Nullability::NonNullable),
545        );
546        assert!(first.get(Stat::Min).is_none());
547    }
548
549    #[test]
550    fn merge_from_min() {
551        let first = StatsSet::default().merge_ordered(
552            &StatsSet::of(Stat::Min, Precision::exact(42)),
553            &DType::Primitive(PType::I32, Nullability::NonNullable),
554        );
555        assert!(first.get(Stat::Min).is_none());
556    }
557
558    #[test]
559    fn merge_mins() {
560        let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
561            &StatsSet::of(Stat::Min, Precision::exact(42)),
562            &DType::Primitive(PType::I32, Nullability::NonNullable),
563        );
564        assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(37)));
565    }
566
567    #[test]
568    fn merge_into_bound_max() {
569        let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
570            &StatsSet::default(),
571            &DType::Primitive(PType::I32, Nullability::NonNullable),
572        );
573        assert!(first.get(Stat::Max).is_none());
574    }
575
576    #[test]
577    fn merge_from_max() {
578        let first = StatsSet::default().merge_ordered(
579            &StatsSet::of(Stat::Max, Precision::exact(42)),
580            &DType::Primitive(PType::I32, Nullability::NonNullable),
581        );
582        assert!(first.get(Stat::Max).is_none());
583    }
584
585    #[test]
586    fn merge_maxes() {
587        let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
588            &StatsSet::of(Stat::Max, Precision::exact(42)),
589            &DType::Primitive(PType::I32, Nullability::NonNullable),
590        );
591        assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::exact(42)));
592    }
593
594    #[test]
595    fn merge_maxes_bound() {
596        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
597        let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
598            .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
599        assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::inexact(43)));
600    }
601
602    #[test]
603    fn merge_into_scalar() {
604        let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
605            &StatsSet::default(),
606            &DType::Primitive(PType::I32, Nullability::NonNullable),
607        );
608        assert!(first.get(Stat::Sum).is_none());
609    }
610
611    #[test]
612    fn merge_from_scalar() {
613        let first = StatsSet::default().merge_ordered(
614            &StatsSet::of(Stat::Sum, Precision::exact(42)),
615            &DType::Primitive(PType::I32, Nullability::NonNullable),
616        );
617        assert!(first.get(Stat::Sum).is_none());
618    }
619
620    #[test]
621    fn merge_scalars() {
622        let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
623            &StatsSet::of(Stat::Sum, Precision::exact(42)),
624            &DType::Primitive(PType::I32, Nullability::NonNullable),
625        );
626        assert_eq!(
627            first.get_as::<usize>(Stat::Sum),
628            Some(Precision::exact(79usize))
629        );
630    }
631
632    #[test]
633    fn merge_into_sortedness() {
634        let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
635            &StatsSet::default(),
636            &DType::Primitive(PType::I32, Nullability::NonNullable),
637        );
638        assert!(first.get(Stat::IsStrictSorted).is_none());
639    }
640
641    #[test]
642    fn merge_from_sortedness() {
643        let first = StatsSet::default().merge_ordered(
644            &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
645            &DType::Primitive(PType::I32, Nullability::NonNullable),
646        );
647        assert!(first.get(Stat::IsStrictSorted).is_none());
648    }
649
650    #[test]
651    fn merge_sortedness() {
652        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
653        first.set(Stat::Max, Precision::exact(1));
654        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
655        second.set(Stat::Min, Precision::exact(2));
656        first = first.merge_ordered(
657            &second,
658            &DType::Primitive(PType::I32, Nullability::NonNullable),
659        );
660        assert_eq!(
661            first.get_as::<bool>(Stat::IsStrictSorted),
662            Some(Precision::exact(true))
663        );
664    }
665
666    #[test]
667    fn merge_sortedness_out_of_order() {
668        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
669        first.set(Stat::Min, Precision::exact(1));
670        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
671        second.set(Stat::Max, Precision::exact(2));
672        second = second.merge_ordered(
673            &first,
674            &DType::Primitive(PType::I32, Nullability::NonNullable),
675        );
676        assert_eq!(
677            second.get_as::<bool>(Stat::IsStrictSorted),
678            Some(Precision::inexact(false))
679        );
680    }
681
682    #[test]
683    fn merge_sortedness_only_one_sorted() {
684        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
685        first.set(Stat::Max, Precision::exact(1));
686        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
687        second.set(Stat::Min, Precision::exact(2));
688        first.merge_ordered(
689            &second,
690            &DType::Primitive(PType::I32, Nullability::NonNullable),
691        );
692        assert_eq!(
693            second.get_as::<bool>(Stat::IsStrictSorted),
694            Some(Precision::exact(false))
695        );
696    }
697
698    #[test]
699    fn merge_sortedness_missing_min() {
700        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
701        first.set(Stat::Max, Precision::exact(1));
702        let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
703        first = first.merge_ordered(
704            &second,
705            &DType::Primitive(PType::I32, Nullability::NonNullable),
706        );
707        assert!(first.get(Stat::IsStrictSorted).is_none());
708    }
709
710    #[test]
711    fn merge_sortedness_bound_min() {
712        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
713        first.set(Stat::Max, Precision::exact(1));
714        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
715        second.set(Stat::Min, Precision::inexact(2));
716        first = first.merge_ordered(
717            &second,
718            &DType::Primitive(PType::I32, Nullability::NonNullable),
719        );
720        assert_eq!(
721            first.get_as::<bool>(Stat::IsStrictSorted),
722            Some(Precision::exact(true))
723        );
724    }
725
726    #[test]
727    fn merge_unordered() {
728        let array =
729            PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
730        let all_stats = all::<Stat>()
731            .filter(|s| !matches!(s, Stat::Sum))
732            .filter(|s| !matches!(s, Stat::NaNCount))
733            .collect_vec();
734        array.statistics().compute_all(&all_stats).unwrap();
735
736        let stats = array.statistics().to_owned();
737        for stat in &all_stats {
738            assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
739        }
740
741        let merged = stats.clone().merge_unordered(
742            &stats,
743            &DType::Primitive(PType::I32, Nullability::NonNullable),
744        );
745        for stat in &all_stats {
746            assert_eq!(
747                merged.get(*stat).is_some(),
748                stat.is_commutative(),
749                "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
750            )
751        }
752
753        assert_eq!(
754            merged.get_as::<i32>(Stat::Min),
755            stats.get_as::<i32>(Stat::Min)
756        );
757        assert_eq!(
758            merged.get_as::<i32>(Stat::Max),
759            stats.get_as::<i32>(Stat::Max)
760        );
761        assert_eq!(
762            merged.get_as::<u64>(Stat::NullCount).unwrap(),
763            stats.get_as::<u64>(Stat::NullCount).unwrap().map(|s| s * 2)
764        );
765    }
766
767    #[test]
768    fn merge_min_bound_same() {
769        // Merging a stat with a bound and another with an exact results in exact stat.
770        // since bound for min is a lower bound, it can in fact contain any value >= bound.
771        let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
772            &StatsSet::of(Stat::Min, Precision::exact(5)),
773            &DType::Primitive(PType::I32, Nullability::NonNullable),
774        );
775        assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::exact(5)));
776    }
777
778    #[test]
779    fn merge_min_bound_bound_lower() {
780        let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
781            &StatsSet::of(Stat::Min, Precision::exact(5)),
782            &DType::Primitive(PType::I32, Nullability::NonNullable),
783        );
784        assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::inexact(4)));
785    }
786
787    #[test]
788    fn retain_approx() {
789        let set = StatsSet::from_iter([
790            (Stat::Max, Precision::exact(100)),
791            (Stat::Min, Precision::exact(50)),
792            (Stat::Sum, Precision::inexact(10)),
793        ]);
794
795        let set = set.keep_inexact_stats(&[Stat::Min, Stat::Max]);
796
797        assert_eq!(set.len(), 2);
798        assert_eq!(set.get_as::<i32>(Stat::Max), Some(Precision::inexact(100)));
799        assert_eq!(set.get_as::<i32>(Stat::Min), Some(Precision::inexact(50)));
800        assert_eq!(set.get_as::<i32>(Stat::Sum), None);
801    }
802
803    #[test]
804    fn test_combine_is_constant() {
805        {
806            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
807            let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
808            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
809            assert_eq!(
810                stats.get_as::<bool>(Stat::IsConstant),
811                Some(Precision::exact(true))
812            );
813        }
814
815        {
816            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
817            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
818            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
819            assert_eq!(
820                stats.get_as::<bool>(Stat::IsConstant),
821                Some(Precision::exact(true))
822            );
823        }
824
825        {
826            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
827            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
828            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
829            assert_eq!(
830                stats.get_as::<bool>(Stat::IsConstant),
831                Some(Precision::exact(false))
832            );
833        }
834    }
835
836    #[test]
837    fn test_combine_sets_boolean_conflict() {
838        let mut stats1 = StatsSet::from_iter([
839            (Stat::IsConstant, Precision::exact(true)),
840            (Stat::IsSorted, Precision::exact(true)),
841        ]);
842
843        let stats2 = StatsSet::from_iter([
844            (Stat::IsConstant, Precision::exact(false)),
845            (Stat::IsSorted, Precision::exact(true)),
846        ]);
847
848        let result = stats1.combine_sets(
849            &stats2,
850            &DType::Primitive(PType::I32, Nullability::NonNullable),
851        );
852        assert!(result.is_err());
853    }
854
855    #[test]
856    fn test_combine_sets_with_missing_stats() {
857        let mut stats1 = StatsSet::from_iter([
858            (Stat::Min, Precision::exact(42)),
859            (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
860        ]);
861
862        let stats2 = StatsSet::from_iter([
863            (Stat::Max, Precision::exact(100)),
864            (Stat::IsStrictSorted, Precision::exact(true)),
865        ]);
866
867        stats1
868            .combine_sets(
869                &stats2,
870                &DType::Primitive(PType::I32, Nullability::NonNullable),
871            )
872            .unwrap();
873
874        // Min should remain unchanged
875        assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
876        // Max should be added
877        assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(100)));
878        // IsStrictSorted should be added
879        assert_eq!(
880            stats1.get_as::<bool>(Stat::IsStrictSorted),
881            Some(Precision::exact(true))
882        );
883    }
884
885    #[test]
886    fn test_combine_sets_with_inexact() {
887        let mut stats1 = StatsSet::from_iter([
888            (Stat::Min, Precision::exact(42)),
889            (Stat::Max, Precision::inexact(100)),
890            (Stat::IsConstant, Precision::exact(false)),
891        ]);
892
893        let stats2 = StatsSet::from_iter([
894            // Must ensure Min from stats2 is <= Min from stats1
895            (Stat::Min, Precision::inexact(40)),
896            (Stat::Max, Precision::exact(90)),
897            (Stat::IsSorted, Precision::exact(true)),
898        ]);
899
900        stats1
901            .combine_sets(
902                &stats2,
903                &DType::Primitive(PType::I32, Nullability::NonNullable),
904            )
905            .unwrap();
906
907        // Min should remain unchanged since it's more restrictive than the inexact value
908        assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
909        // Check that max was updated with the exact value
910        assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(90)));
911        // Check that IsSorted was added
912        assert_eq!(
913            stats1.get_as::<bool>(Stat::IsSorted),
914            Some(Precision::exact(true))
915        );
916    }
917}