vortex_array/stats/
stats_set.rs

1use std::fmt::Debug;
2
3use enum_iterator::{Sequence, all};
4use num_traits::CheckedAdd;
5use vortex_dtype::DType;
6use vortex_error::{VortexExpect, VortexResult, vortex_err};
7use vortex_scalar::{Scalar, ScalarValue};
8
9use super::traits::StatsProvider;
10use super::{IsSorted, IsStrictSorted, NullCount, StatType, UncompressedSizeInBytes};
11use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
12
13#[derive(Default, Debug, Clone)]
14pub struct StatsSet {
15    values: Vec<(Stat, Precision<ScalarValue>)>,
16}
17
18impl StatsSet {
19    /// Create new StatSet without validating uniqueness of all the entries
20    ///
21    /// # Safety
22    ///
23    /// This method will not panic or trigger UB, but may lead to duplicate stats being stored.
24    pub fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
25        Self { values }
26    }
27
28    /// Specialized constructor for the case where the StatsSet represents
29    /// an array consisting entirely of [null](vortex_dtype::DType::Null) values.
30    pub fn nulls(len: usize) -> Self {
31        let mut stats = Self::new_unchecked(vec![(Stat::NullCount, Precision::exact(len))]);
32
33        if len > 0 {
34            stats.set(Stat::IsConstant, Precision::exact(true));
35            stats.set(Stat::IsSorted, Precision::exact(true));
36            stats.set(Stat::IsStrictSorted, Precision::exact(len < 2));
37        }
38
39        stats
40    }
41
42    // A convenience method for creating a stats set which will represent an empty array.
43    pub fn empty_array() -> StatsSet {
44        StatsSet::new_unchecked(vec![(Stat::NullCount, Precision::exact(0))])
45    }
46
47    pub fn constant(scalar: Scalar, length: usize) -> Self {
48        let (dtype, sv) = scalar.into_parts();
49        let mut stats = Self::default();
50        if length > 0 {
51            stats.extend([
52                (Stat::IsConstant, Precision::exact(true)),
53                (Stat::IsSorted, Precision::exact(true)),
54                (Stat::IsStrictSorted, Precision::exact(length <= 1)),
55            ]);
56        }
57
58        let null_count = if sv.is_null() { length as u64 } else { 0 };
59        stats.set(Stat::NullCount, Precision::exact(null_count));
60
61        if !sv.is_null() {
62            stats.extend([
63                (Stat::Min, Precision::exact(sv.clone())),
64                (Stat::Max, Precision::exact(sv.clone())),
65            ]);
66        }
67
68        if matches!(dtype, DType::Bool(_)) {
69            let bool_val = <Option<bool>>::try_from(&sv).vortex_expect("Checked dtype");
70            let true_count = bool_val
71                .map(|b| if b { length as u64 } else { 0 })
72                .unwrap_or(0);
73            stats.set(Stat::Sum, Precision::exact(true_count));
74        }
75
76        stats
77    }
78
79    pub fn bools_with_sum_and_null_count(true_count: usize, null_count: usize, len: usize) -> Self {
80        StatsSet::new_unchecked(vec![
81            (Stat::Sum, Precision::exact(true_count)),
82            (Stat::NullCount, Precision::exact(null_count)),
83            (Stat::Min, Precision::exact(true_count == len)),
84            (Stat::Max, Precision::exact(true_count > 0)),
85            (
86                Stat::IsConstant,
87                Precision::exact((true_count == 0 && null_count == 0) || true_count == len),
88            ),
89        ])
90    }
91
92    pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
93        Self::new_unchecked(vec![(stat, value)])
94    }
95
96    fn reserve_full_capacity(&mut self) {
97        if self.values.capacity() < Stat::CARDINALITY {
98            self.values
99                .reserve_exact(Stat::CARDINALITY - self.values.capacity());
100        }
101    }
102}
103
104// Getters and setters for individual stats.
105impl StatsSet {
106    /// Set the stat `stat` to `value`.
107    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
108        self.reserve_full_capacity();
109
110        if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
111            *existing = (stat, value);
112        } else {
113            self.values.push((stat, value));
114        }
115    }
116
117    /// Clear the stat `stat` from the set.
118    pub fn clear(&mut self, stat: Stat) {
119        self.values.retain(|(s, _)| *s != stat);
120    }
121
122    pub fn retain_only(&mut self, stats: &[Stat]) {
123        self.values.retain(|(s, _)| stats.contains(s));
124    }
125
126    pub fn keep_inexact_stats(self, inexact_keep: &[Stat]) -> Self {
127        self.values
128            .into_iter()
129            .filter_map(|(s, v)| inexact_keep.contains(&s).then(|| (s, v.into_inexact())))
130            .collect()
131    }
132
133    /// Iterate over the statistic names and values in-place.
134    ///
135    /// See [Iterator].
136    pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
137        self.values.iter()
138    }
139}
140
141// StatSetIntoIter just exists to protect current implementation from exposure on the public API.
142
143/// Owned iterator over the stats.
144///
145/// See [IntoIterator].
146pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
147
148impl Iterator for StatsSetIntoIter {
149    type Item = (Stat, Precision<ScalarValue>);
150
151    fn next(&mut self) -> Option<Self::Item> {
152        self.0.next()
153    }
154}
155
156impl IntoIterator for StatsSet {
157    type Item = (Stat, Precision<ScalarValue>);
158    type IntoIter = StatsSetIntoIter;
159
160    fn into_iter(self) -> Self::IntoIter {
161        StatsSetIntoIter(self.values.into_iter())
162    }
163}
164
165impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
166    fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
167        let iter = iter.into_iter();
168        let mut values = Vec::default();
169        values.reserve_exact(Stat::CARDINALITY);
170
171        let mut this = Self { values };
172        this.extend(iter);
173        this
174    }
175}
176
177impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
178    #[inline]
179    fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
180        let iter = iter.into_iter();
181        self.reserve_full_capacity();
182
183        iter.for_each(|(stat, value)| self.set(stat, value));
184    }
185}
186
187// Merge helpers
188impl StatsSet {
189    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
190    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
191    pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
192        for s in all::<Stat>() {
193            match s {
194                Stat::IsConstant => self.merge_is_constant(other, dtype),
195                Stat::IsSorted => self.merge_is_sorted(other, dtype),
196                Stat::IsStrictSorted => self.merge_is_strict_sorted(other, dtype),
197                Stat::Max => self.merge_max(other, dtype),
198                Stat::Min => self.merge_min(other, dtype),
199                Stat::Sum => self.merge_sum(other, dtype),
200                Stat::NullCount => self.merge_null_count(other),
201                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
202            }
203        }
204
205        self
206    }
207
208    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
209    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
210    pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
211        for s in all::<Stat>() {
212            if !s.is_commutative() {
213                self.clear(s);
214                continue;
215            }
216
217            match s {
218                Stat::IsConstant => self.merge_is_constant(other, dtype),
219                Stat::Max => self.merge_max(other, dtype),
220                Stat::Min => self.merge_min(other, dtype),
221                Stat::Sum => self.merge_sum(other, dtype),
222                Stat::NullCount => self.merge_null_count(other),
223                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
224                Stat::IsSorted | Stat::IsStrictSorted => {
225                    unreachable!("not commutative")
226                }
227            }
228        }
229
230        self
231    }
232
233    // given two sets of stats (of differing precision) for the same array, combine them
234    pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
235        let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
236        for s in other_stats {
237            match s {
238                Stat::Max => self.combine_bound::<Max>(other, dtype)?,
239                Stat::Min => self.combine_bound::<Min>(other, dtype)?,
240                Stat::UncompressedSizeInBytes => {
241                    self.combine_bound::<UncompressedSizeInBytes>(other, dtype)?
242                }
243                Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
244                Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
245                Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
246                Stat::NullCount => self.combine_bound::<NullCount>(other, dtype)?,
247                Stat::Sum => self.combine_bound::<Sum>(other, dtype)?,
248            }
249        }
250        Ok(())
251    }
252
253    fn combine_bound<S: StatType<Scalar>>(
254        &mut self,
255        other: &Self,
256        dtype: &DType,
257    ) -> VortexResult<()>
258    where
259        S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
260    {
261        match (
262            self.get_scalar_bound::<S>(dtype),
263            other.get_scalar_bound::<S>(dtype),
264        ) {
265            (Some(m1), Some(m2)) => {
266                let meet = m1
267                    .intersection(&m2)
268                    .vortex_expect("can always compare scalar")
269                    .ok_or_else(|| {
270                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
271                    })?;
272                if meet != m1 {
273                    self.set(S::STAT, meet.into_value().map(Scalar::into_value));
274                }
275            }
276            (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
277            (Some(_), _) => (),
278            (None, None) => self.clear(S::STAT),
279        }
280        Ok(())
281    }
282
283    fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &Self) -> VortexResult<()>
284    where
285        S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
286    {
287        match (
288            self.get_as_bound::<S, bool>(),
289            other.get_as_bound::<S, bool>(),
290        ) {
291            (Some(m1), Some(m2)) => {
292                let intersection = m1
293                    .intersection(&m2)
294                    .vortex_expect("can always compare boolean")
295                    .ok_or_else(|| {
296                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
297                    })?;
298                if intersection != m1 {
299                    self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
300                }
301            }
302            (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
303            (Some(_), None) => (),
304            (None, None) => self.clear(S::STAT),
305        }
306        Ok(())
307    }
308
309    fn merge_min(&mut self, other: &Self, dtype: &DType) {
310        match (
311            self.get_scalar_bound::<Min>(dtype),
312            other.get_scalar_bound::<Min>(dtype),
313        ) {
314            (Some(m1), Some(m2)) => {
315                let meet = m1.union(&m2).vortex_expect("can compare scalar");
316                if meet != m1 {
317                    self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
318                }
319            }
320            _ => self.clear(Stat::Min),
321        }
322    }
323
324    fn merge_max(&mut self, other: &Self, dtype: &DType) {
325        match (
326            self.get_scalar_bound::<Max>(dtype),
327            other.get_scalar_bound::<Max>(dtype),
328        ) {
329            (Some(m1), Some(m2)) => {
330                let meet = m1.union(&m2).vortex_expect("can compare scalar");
331                if meet != m1 {
332                    self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
333                }
334            }
335            _ => self.clear(Stat::Max),
336        }
337    }
338
339    fn merge_sum(&mut self, other: &Self, dtype: &DType) {
340        match (
341            self.get_scalar_bound::<Sum>(dtype),
342            other.get_scalar_bound::<Sum>(dtype),
343        ) {
344            (Some(m1), Some(m2)) => {
345                // If the combine sum is exact, then we can sum them.
346                if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
347                    s1.as_primitive()
348                        .checked_add(&s2.as_primitive())
349                        .map(|pscalar| {
350                            pscalar
351                                .pvalue()
352                                .map(|pvalue| {
353                                    Scalar::primitive_value(
354                                        pvalue,
355                                        pscalar.ptype(),
356                                        pscalar.dtype().nullability(),
357                                    )
358                                    .into_value()
359                                })
360                                .unwrap_or_else(ScalarValue::null)
361                        })
362                }) {
363                    self.set(Stat::Sum, Precision::Exact(scalar_value));
364                }
365            }
366            _ => self.clear(Stat::Sum),
367        }
368    }
369
370    fn merge_is_constant(&mut self, other: &Self, dtype: &DType) {
371        let self_const = self.get_as(Stat::IsConstant);
372        let other_const = other.get_as(Stat::IsConstant);
373        let self_min = self.get_scalar(Stat::Min, dtype);
374        let other_min = other.get_scalar(Stat::Min, dtype);
375
376        if let (
377            Some(Precision::Exact(self_const)),
378            Some(Precision::Exact(other_const)),
379            Some(Precision::Exact(self_min)),
380            Some(Precision::Exact(other_min)),
381        ) = (self_const, other_const, self_min, other_min)
382        {
383            if self_const && other_const && self_min == other_min {
384                self.set(Stat::IsConstant, Precision::exact(true));
385            } else {
386                self.set(Stat::IsConstant, Precision::inexact(false));
387            }
388        }
389        self.set(Stat::IsConstant, Precision::exact(false));
390    }
391
392    fn merge_is_sorted(&mut self, other: &Self, dtype: &DType) {
393        self.merge_sortedness_stat(other, Stat::IsSorted, dtype, PartialOrd::le)
394    }
395
396    fn merge_is_strict_sorted(&mut self, other: &Self, dtype: &DType) {
397        self.merge_sortedness_stat(other, Stat::IsStrictSorted, dtype, PartialOrd::lt)
398    }
399
400    fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
401        &mut self,
402        other: &Self,
403        stat: Stat,
404        dtype: &DType,
405        cmp: F,
406    ) {
407        if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
408            == (self.get_as(stat), other.get_as(stat))
409        {
410            // There might be no stat because it was dropped, or it doesn't exist
411            // (e.g. an all null array).
412            // We assume that it was the dropped case since the doesn't exist might imply sorted,
413            // but this in-precision is correct.
414            if let (Some(self_max), Some(other_min)) = (
415                self.get_scalar_bound::<Max>(dtype),
416                other.get_scalar_bound::<Min>(dtype),
417            ) {
418                return if cmp(&self_max.max_value(), &other_min.min_value()) {
419                    // keep value
420                } else {
421                    self.set(stat, Precision::inexact(false));
422                };
423            }
424        }
425        self.clear(stat);
426    }
427
428    fn merge_null_count(&mut self, other: &Self) {
429        self.merge_sum_stat(Stat::NullCount, other)
430    }
431
432    fn merge_uncompressed_size_in_bytes(&mut self, other: &Self) {
433        self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
434    }
435
436    fn merge_sum_stat(&mut self, stat: Stat, other: &Self) {
437        match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
438            (Some(nc1), Some(nc2)) => {
439                self.set(
440                    stat,
441                    nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
442                );
443            }
444            _ => self.clear(stat),
445        }
446    }
447}
448
449impl StatsProvider for StatsSet {
450    fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
451        self.values
452            .iter()
453            .find(|(s, _)| *s == stat)
454            .map(|(_, v)| v.clone())
455    }
456
457    fn len(&self) -> usize {
458        self.values.len()
459    }
460}
461
462#[cfg(test)]
463mod test {
464    use enum_iterator::all;
465    use itertools::Itertools;
466    use vortex_dtype::{DType, Nullability, PType};
467
468    use crate::Array;
469    use crate::arrays::PrimitiveArray;
470    use crate::stats::{IsConstant, Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
471
472    #[test]
473    fn test_iter() {
474        let set = StatsSet::new_unchecked(vec![
475            (Stat::Max, Precision::exact(100)),
476            (Stat::Min, Precision::exact(42)),
477        ]);
478        let mut iter = set.iter();
479        let first = iter.next().unwrap().clone();
480        assert_eq!(first.0, Stat::Max);
481        assert_eq!(
482            first.1.map(|f| i32::try_from(&f).unwrap()),
483            Precision::exact(100)
484        );
485        let snd = iter.next().unwrap().clone();
486        assert_eq!(snd.0, Stat::Min);
487        assert_eq!(snd.1.map(|s| i32::try_from(&s).unwrap()), 42);
488    }
489
490    #[test]
491    fn into_iter() {
492        let mut set = StatsSet::new_unchecked(vec![
493            (Stat::Max, Precision::exact(100)),
494            (Stat::Min, Precision::exact(42)),
495        ])
496        .into_iter();
497        let (stat, first) = set.next().unwrap();
498        assert_eq!(stat, Stat::Max);
499        assert_eq!(
500            first.map(|f| i32::try_from(&f).unwrap()),
501            Precision::exact(100)
502        );
503        let snd = set.next().unwrap();
504        assert_eq!(snd.0, Stat::Min);
505        assert_eq!(
506            snd.1.map(|s| i32::try_from(&s).unwrap()),
507            Precision::exact(42)
508        );
509    }
510
511    #[test]
512    fn merge_constant() {
513        let first = StatsSet::from_iter([
514            (Stat::Min, Precision::exact(42)),
515            (Stat::IsConstant, Precision::exact(true)),
516        ])
517        .merge_ordered(
518            &StatsSet::from_iter([
519                (Stat::Min, Precision::inexact(42)),
520                (Stat::IsConstant, Precision::exact(true)),
521            ]),
522            &DType::Primitive(PType::I32, Nullability::NonNullable),
523        );
524        assert_eq!(
525            first.get_as::<bool>(Stat::IsConstant),
526            Some(Precision::exact(false))
527        );
528        assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
529    }
530
531    #[test]
532    fn merge_into_min() {
533        let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
534            &StatsSet::default(),
535            &DType::Primitive(PType::I32, Nullability::NonNullable),
536        );
537        assert!(first.get(Stat::Min).is_none());
538    }
539
540    #[test]
541    fn merge_from_min() {
542        let first = StatsSet::default().merge_ordered(
543            &StatsSet::of(Stat::Min, Precision::exact(42)),
544            &DType::Primitive(PType::I32, Nullability::NonNullable),
545        );
546        assert!(first.get(Stat::Min).is_none());
547    }
548
549    #[test]
550    fn merge_mins() {
551        let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
552            &StatsSet::of(Stat::Min, Precision::exact(42)),
553            &DType::Primitive(PType::I32, Nullability::NonNullable),
554        );
555        assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(37)));
556    }
557
558    #[test]
559    fn merge_into_bound_max() {
560        let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
561            &StatsSet::default(),
562            &DType::Primitive(PType::I32, Nullability::NonNullable),
563        );
564        assert!(first.get(Stat::Max).is_none());
565    }
566
567    #[test]
568    fn merge_from_max() {
569        let first = StatsSet::default().merge_ordered(
570            &StatsSet::of(Stat::Max, Precision::exact(42)),
571            &DType::Primitive(PType::I32, Nullability::NonNullable),
572        );
573        assert!(first.get(Stat::Max).is_none());
574    }
575
576    #[test]
577    fn merge_maxes() {
578        let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
579            &StatsSet::of(Stat::Max, Precision::exact(42)),
580            &DType::Primitive(PType::I32, Nullability::NonNullable),
581        );
582        assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::exact(42)));
583    }
584
585    #[test]
586    fn merge_maxes_bound() {
587        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
588        let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
589            .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
590        assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::inexact(43)));
591    }
592
593    #[test]
594    fn merge_into_scalar() {
595        let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
596            &StatsSet::default(),
597            &DType::Primitive(PType::I32, Nullability::NonNullable),
598        );
599        assert!(first.get(Stat::Sum).is_none());
600    }
601
602    #[test]
603    fn merge_from_scalar() {
604        let first = StatsSet::default().merge_ordered(
605            &StatsSet::of(Stat::Sum, Precision::exact(42)),
606            &DType::Primitive(PType::I32, Nullability::NonNullable),
607        );
608        assert!(first.get(Stat::Sum).is_none());
609    }
610
611    #[test]
612    fn merge_scalars() {
613        let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
614            &StatsSet::of(Stat::Sum, Precision::exact(42)),
615            &DType::Primitive(PType::I32, Nullability::NonNullable),
616        );
617        assert_eq!(
618            first.get_as::<usize>(Stat::Sum),
619            Some(Precision::exact(79usize))
620        );
621    }
622
623    #[test]
624    fn merge_into_sortedness() {
625        let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
626            &StatsSet::default(),
627            &DType::Primitive(PType::I32, Nullability::NonNullable),
628        );
629        assert!(first.get(Stat::IsStrictSorted).is_none());
630    }
631
632    #[test]
633    fn merge_from_sortedness() {
634        let first = StatsSet::default().merge_ordered(
635            &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
636            &DType::Primitive(PType::I32, Nullability::NonNullable),
637        );
638        assert!(first.get(Stat::IsStrictSorted).is_none());
639    }
640
641    #[test]
642    fn merge_sortedness() {
643        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
644        first.set(Stat::Max, Precision::exact(1));
645        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
646        second.set(Stat::Min, Precision::exact(2));
647        first = first.merge_ordered(
648            &second,
649            &DType::Primitive(PType::I32, Nullability::NonNullable),
650        );
651        assert_eq!(
652            first.get_as::<bool>(Stat::IsStrictSorted),
653            Some(Precision::exact(true))
654        );
655    }
656
657    #[test]
658    fn merge_sortedness_out_of_order() {
659        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
660        first.set(Stat::Min, Precision::exact(1));
661        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
662        second.set(Stat::Max, Precision::exact(2));
663        second = second.merge_ordered(
664            &first,
665            &DType::Primitive(PType::I32, Nullability::NonNullable),
666        );
667        assert_eq!(
668            second.get_as::<bool>(Stat::IsStrictSorted),
669            Some(Precision::inexact(false))
670        );
671    }
672
673    #[test]
674    fn merge_sortedness_only_one_sorted() {
675        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
676        first.set(Stat::Max, Precision::exact(1));
677        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
678        second.set(Stat::Min, Precision::exact(2));
679        first.merge_ordered(
680            &second,
681            &DType::Primitive(PType::I32, Nullability::NonNullable),
682        );
683        assert_eq!(
684            second.get_as::<bool>(Stat::IsStrictSorted),
685            Some(Precision::exact(false))
686        );
687    }
688
689    #[test]
690    fn merge_sortedness_missing_min() {
691        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
692        first.set(Stat::Max, Precision::exact(1));
693        let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
694        first = first.merge_ordered(
695            &second,
696            &DType::Primitive(PType::I32, Nullability::NonNullable),
697        );
698        assert!(first.get(Stat::IsStrictSorted).is_none());
699    }
700
701    #[test]
702    fn merge_sortedness_bound_min() {
703        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
704        first.set(Stat::Max, Precision::exact(1));
705        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
706        second.set(Stat::Min, Precision::inexact(2));
707        first = first.merge_ordered(
708            &second,
709            &DType::Primitive(PType::I32, Nullability::NonNullable),
710        );
711        assert_eq!(
712            first.get_as::<bool>(Stat::IsStrictSorted),
713            Some(Precision::exact(true))
714        );
715    }
716
717    #[test]
718    fn merge_unordered() {
719        let array =
720            PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
721        let all_stats = all::<Stat>()
722            .filter(|s| !matches!(s, Stat::Sum))
723            .collect_vec();
724        array.statistics().compute_all(&all_stats).unwrap();
725
726        let stats = array.statistics().to_owned();
727        for stat in &all_stats {
728            assert!(stats.get(*stat).is_some(), "Stat {} is missing", stat);
729        }
730
731        let merged = stats.clone().merge_unordered(
732            &stats,
733            &DType::Primitive(PType::I32, Nullability::NonNullable),
734        );
735        for stat in &all_stats {
736            assert_eq!(
737                merged.get(*stat).is_some(),
738                stat.is_commutative(),
739                "Stat {} remains after merge_unordered despite not being commutative, or was removed despite being commutative",
740                stat
741            )
742        }
743
744        assert_eq!(
745            merged.get_as::<i32>(Stat::Min),
746            stats.get_as::<i32>(Stat::Min)
747        );
748        assert_eq!(
749            merged.get_as::<i32>(Stat::Max),
750            stats.get_as::<i32>(Stat::Max)
751        );
752        assert_eq!(
753            merged.get_as::<u64>(Stat::NullCount).unwrap(),
754            stats.get_as::<u64>(Stat::NullCount).unwrap().map(|s| s * 2)
755        );
756    }
757
758    #[test]
759    fn merge_min_bound_same() {
760        // Merging a stat with a bound and another with an exact results in exact stat.
761        // since bound for min is a lower bound, it can in fact contain any value >= bound.
762        let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
763            &StatsSet::of(Stat::Min, Precision::exact(5)),
764            &DType::Primitive(PType::I32, Nullability::NonNullable),
765        );
766        assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::exact(5)));
767    }
768
769    #[test]
770    fn merge_min_bound_bound_lower() {
771        let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
772            &StatsSet::of(Stat::Min, Precision::exact(5)),
773            &DType::Primitive(PType::I32, Nullability::NonNullable),
774        );
775        assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::inexact(4)));
776    }
777
778    #[test]
779    fn retain_approx() {
780        let set = StatsSet::from_iter([
781            (Stat::Max, Precision::exact(100)),
782            (Stat::Min, Precision::exact(50)),
783            (Stat::Sum, Precision::inexact(10)),
784        ]);
785
786        let set = set.keep_inexact_stats(&[Stat::Min, Stat::Max]);
787
788        assert_eq!(set.len(), 2);
789        assert_eq!(set.get_as::<i32>(Stat::Max), Some(Precision::inexact(100)));
790        assert_eq!(set.get_as::<i32>(Stat::Min), Some(Precision::inexact(50)));
791        assert_eq!(set.get_as::<i32>(Stat::Sum), None);
792    }
793
794    #[test]
795    fn test_combine_is_constant() {
796        {
797            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
798            let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
799            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
800            assert_eq!(
801                stats.get_as::<bool>(Stat::IsConstant),
802                Some(Precision::exact(true))
803            );
804        }
805
806        {
807            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
808            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
809            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
810            assert_eq!(
811                stats.get_as::<bool>(Stat::IsConstant),
812                Some(Precision::exact(true))
813            );
814        }
815
816        {
817            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
818            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
819            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
820            assert_eq!(
821                stats.get_as::<bool>(Stat::IsConstant),
822                Some(Precision::exact(false))
823            );
824        }
825    }
826
827    #[test]
828    fn test_combine_sets_boolean_conflict() {
829        let mut stats1 = StatsSet::from_iter([
830            (Stat::IsConstant, Precision::exact(true)),
831            (Stat::IsSorted, Precision::exact(true)),
832        ]);
833
834        let stats2 = StatsSet::from_iter([
835            (Stat::IsConstant, Precision::exact(false)),
836            (Stat::IsSorted, Precision::exact(true)),
837        ]);
838
839        let result = stats1.combine_sets(
840            &stats2,
841            &DType::Primitive(PType::I32, Nullability::NonNullable),
842        );
843        assert!(result.is_err());
844    }
845
846    #[test]
847    fn test_combine_sets_with_missing_stats() {
848        let mut stats1 = StatsSet::from_iter([
849            (Stat::Min, Precision::exact(42)),
850            (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
851        ]);
852
853        let stats2 = StatsSet::from_iter([
854            (Stat::Max, Precision::exact(100)),
855            (Stat::IsStrictSorted, Precision::exact(true)),
856        ]);
857
858        stats1
859            .combine_sets(
860                &stats2,
861                &DType::Primitive(PType::I32, Nullability::NonNullable),
862            )
863            .unwrap();
864
865        // Min should remain unchanged
866        assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
867        // Max should be added
868        assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(100)));
869        // IsStrictSorted should be added
870        assert_eq!(
871            stats1.get_as::<bool>(Stat::IsStrictSorted),
872            Some(Precision::exact(true))
873        );
874    }
875
876    #[test]
877    fn test_combine_sets_with_inexact() {
878        let mut stats1 = StatsSet::from_iter([
879            (Stat::Min, Precision::exact(42)),
880            (Stat::Max, Precision::inexact(100)),
881            (Stat::IsConstant, Precision::exact(false)),
882        ]);
883
884        let stats2 = StatsSet::from_iter([
885            // Must ensure Min from stats2 is <= Min from stats1
886            (Stat::Min, Precision::inexact(40)),
887            (Stat::Max, Precision::exact(90)),
888            (Stat::IsSorted, Precision::exact(true)),
889        ]);
890
891        stats1
892            .combine_sets(
893                &stats2,
894                &DType::Primitive(PType::I32, Nullability::NonNullable),
895            )
896            .unwrap();
897
898        // Min should remain unchanged since it's more restrictive than the inexact value
899        assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
900        // Check that max was updated with the exact value
901        assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(90)));
902        // Check that IsSorted was added
903        assert_eq!(
904            stats1.get_as::<bool>(Stat::IsSorted),
905            Some(Precision::exact(true))
906        );
907    }
908}