vortex_array/stats/
stats_set.rs

1use enum_iterator::{Sequence, all};
2use num_traits::CheckedAdd;
3use vortex_dtype::DType;
4use vortex_error::{VortexExpect, VortexResult, vortex_err};
5use vortex_scalar::{Scalar, ScalarValue};
6
7use super::traits::StatsProvider;
8use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
9
10#[derive(Default, Debug, Clone)]
11pub struct StatsSet {
12    values: Vec<(Stat, Precision<ScalarValue>)>,
13}
14
15impl StatsSet {
16    /// Create new StatSet without validating uniqueness of all the entries
17    ///
18    /// # Safety
19    ///
20    /// This method will not panic or trigger UB, but may lead to duplicate stats being stored.
21    pub fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
22        Self { values }
23    }
24
25    /// Specialized constructor for the case where the StatsSet represents
26    /// an array consisting entirely of [null](vortex_dtype::DType::Null) values.
27    pub fn nulls(len: usize) -> Self {
28        let mut stats = Self::new_unchecked(vec![(Stat::NullCount, Precision::exact(len))]);
29
30        if len > 0 {
31            stats.set(Stat::IsConstant, Precision::exact(true));
32            stats.set(Stat::IsSorted, Precision::exact(true));
33            stats.set(Stat::IsStrictSorted, Precision::exact(len < 2));
34        }
35
36        stats
37    }
38
39    // A convenience method for creating a stats set which will represent an empty array.
40    pub fn empty_array() -> StatsSet {
41        StatsSet::new_unchecked(vec![(Stat::NullCount, Precision::exact(0))])
42    }
43
44    pub fn constant(scalar: Scalar, length: usize) -> Self {
45        let (dtype, sv) = scalar.into_parts();
46        let mut stats = Self::default();
47        if length > 0 {
48            stats.extend([
49                (Stat::IsConstant, Precision::exact(true)),
50                (Stat::IsSorted, Precision::exact(true)),
51                (Stat::IsStrictSorted, Precision::exact(length <= 1)),
52            ]);
53        }
54
55        let null_count = if sv.is_null() { length as u64 } else { 0 };
56        stats.set(Stat::NullCount, Precision::exact(null_count));
57
58        if !sv.is_null() {
59            stats.extend([
60                (Stat::Min, Precision::exact(sv.clone())),
61                (Stat::Max, Precision::exact(sv.clone())),
62            ]);
63        }
64
65        if matches!(dtype, DType::Bool(_)) {
66            let bool_val = <Option<bool>>::try_from(&sv).vortex_expect("Checked dtype");
67            let true_count = bool_val
68                .map(|b| if b { length as u64 } else { 0 })
69                .unwrap_or(0);
70            stats.set(Stat::Sum, Precision::exact(true_count));
71        }
72
73        stats
74    }
75
76    pub fn bools_with_sum_and_null_count(true_count: usize, null_count: usize, len: usize) -> Self {
77        StatsSet::new_unchecked(vec![
78            (Stat::Sum, Precision::exact(true_count)),
79            (Stat::NullCount, Precision::exact(null_count)),
80            (Stat::Min, Precision::exact(true_count == len)),
81            (Stat::Max, Precision::exact(true_count > 0)),
82            (
83                Stat::IsConstant,
84                Precision::exact((true_count == 0 && null_count == 0) || true_count == len),
85            ),
86        ])
87    }
88
89    pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
90        Self::new_unchecked(vec![(stat, value)])
91    }
92
93    fn reserve_full_capacity(&mut self) {
94        if self.values.capacity() < Stat::CARDINALITY {
95            self.values
96                .reserve_exact(Stat::CARDINALITY - self.values.capacity());
97        }
98    }
99}
100
101// Getters and setters for individual stats.
102impl StatsSet {
103    /// Set the stat `stat` to `value`.
104    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
105        self.reserve_full_capacity();
106
107        if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
108            *existing = (stat, value);
109        } else {
110            self.values.push((stat, value));
111        }
112    }
113
114    /// Clear the stat `stat` from the set.
115    pub fn clear(&mut self, stat: Stat) {
116        self.values.retain(|(s, _)| *s != stat);
117    }
118
119    pub fn retain_only(&mut self, stats: &[Stat]) {
120        self.values.retain(|(s, _)| stats.contains(s));
121    }
122
123    pub fn keep_inexact_stats(self, inexact_keep: &[Stat]) -> Self {
124        self.values
125            .into_iter()
126            .filter_map(|(s, v)| inexact_keep.contains(&s).then(|| (s, v.into_inexact())))
127            .collect()
128    }
129
130    /// Iterate over the statistic names and values in-place.
131    ///
132    /// See [Iterator].
133    pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
134        self.values.iter()
135    }
136}
137
138// StatSetIntoIter just exists to protect current implementation from exposure on the public API.
139
140/// Owned iterator over the stats.
141///
142/// See [IntoIterator].
143pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
144
145impl Iterator for StatsSetIntoIter {
146    type Item = (Stat, Precision<ScalarValue>);
147
148    fn next(&mut self) -> Option<Self::Item> {
149        self.0.next()
150    }
151}
152
153impl IntoIterator for StatsSet {
154    type Item = (Stat, Precision<ScalarValue>);
155    type IntoIter = StatsSetIntoIter;
156
157    fn into_iter(self) -> Self::IntoIter {
158        StatsSetIntoIter(self.values.into_iter())
159    }
160}
161
162impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
163    fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
164        let iter = iter.into_iter();
165        let mut values = Vec::default();
166        values.reserve_exact(Stat::CARDINALITY);
167
168        let mut this = Self { values };
169        this.extend(iter);
170        this
171    }
172}
173
174impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
175    #[inline]
176    fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
177        let iter = iter.into_iter();
178        self.reserve_full_capacity();
179
180        iter.for_each(|(stat, value)| self.set(stat, value));
181    }
182}
183
184// Merge helpers
185impl StatsSet {
186    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
187    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
188    pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
189        for s in all::<Stat>() {
190            match s {
191                Stat::IsConstant => self.merge_is_constant(other, dtype),
192                Stat::IsSorted => self.merge_is_sorted(other, dtype),
193                Stat::IsStrictSorted => self.merge_is_strict_sorted(other, dtype),
194                Stat::Max => self.merge_max(other, dtype),
195                Stat::Min => self.merge_min(other, dtype),
196                Stat::Sum => self.merge_sum(other, dtype),
197                Stat::NullCount => self.merge_null_count(other),
198                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
199            }
200        }
201
202        self
203    }
204
205    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
206    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
207    pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
208        for s in all::<Stat>() {
209            if !s.is_commutative() {
210                self.clear(s);
211                continue;
212            }
213
214            match s {
215                Stat::IsConstant => self.merge_is_constant(other, dtype),
216                Stat::Max => self.merge_max(other, dtype),
217                Stat::Min => self.merge_min(other, dtype),
218                Stat::Sum => self.merge_sum(other, dtype),
219                Stat::NullCount => self.merge_null_count(other),
220                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
221                Stat::IsSorted | Stat::IsStrictSorted => {
222                    unreachable!("not commutative")
223                }
224            }
225        }
226
227        self
228    }
229
230    // given two sets of stats (of differing precision) for the same array, combine them
231    pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
232        self.combine_max(other, dtype)?;
233        self.combine_min(other, dtype)?;
234        self.combine_is_constant(other)
235    }
236
237    fn combine_min(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
238        match (
239            self.get_scalar_bound::<Min>(dtype),
240            other.get_scalar_bound::<Min>(dtype),
241        ) {
242            (Some(m1), Some(m2)) => {
243                let meet = m1
244                    .intersection(&m2)
245                    .vortex_expect("can always compare scalar")
246                    .ok_or_else(|| vortex_err!("Min bounds ({m1:?}, {m2:?}) do not overlap"))?;
247                if meet != m1 {
248                    self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
249                }
250            }
251            (None, Some(m)) => self.set(Stat::Min, m.into_value().map(Scalar::into_value)),
252            (Some(_), _) => (),
253            (None, None) => self.clear(Stat::Min),
254        }
255        Ok(())
256    }
257
258    fn combine_max(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
259        match (
260            self.get_scalar_bound::<Max>(dtype),
261            other.get_scalar_bound::<Max>(dtype),
262        ) {
263            (Some(m1), Some(m2)) => {
264                let meet = m1
265                    .intersection(&m2)
266                    .vortex_expect("can always compare scalar")
267                    .ok_or_else(|| vortex_err!("Max bounds ({m1:?}, {m2:?}) do not overlap"))?;
268                if meet != m1 {
269                    self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
270                }
271            }
272            (None, Some(m)) => self.set(Stat::Max, m.into_value().map(Scalar::into_value)),
273            (Some(_), None) => (),
274            (None, None) => self.clear(Stat::Max),
275        }
276        Ok(())
277    }
278
279    fn combine_is_constant(&mut self, other: &Self) -> VortexResult<()> {
280        match (
281            self.get_as_bound::<IsConstant, bool>(),
282            other.get_as_bound::<IsConstant, bool>(),
283        ) {
284            (Some(m1), Some(m2)) => {
285                let intersection = m1
286                    .intersection(&m2)
287                    .vortex_expect("can always compare scalar")
288                    .ok_or_else(|| {
289                        vortex_err!("IsConstant bounds ({m1:?}, {m2:?}) do not overlap")
290                    })?;
291                if intersection != m1 {
292                    self.set(Stat::IsConstant, intersection.map(ScalarValue::from));
293                }
294            }
295            (None, Some(m)) => self.set(Stat::IsConstant, m.map(ScalarValue::from)),
296            (Some(_), None) => (),
297            (None, None) => self.clear(Stat::IsConstant),
298        }
299        Ok(())
300    }
301
302    fn merge_min(&mut self, other: &Self, dtype: &DType) {
303        match (
304            self.get_scalar_bound::<Min>(dtype),
305            other.get_scalar_bound::<Min>(dtype),
306        ) {
307            (Some(m1), Some(m2)) => {
308                let meet = m1.union(&m2).vortex_expect("can compare scalar");
309                if meet != m1 {
310                    self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
311                }
312            }
313            _ => self.clear(Stat::Min),
314        }
315    }
316
317    fn merge_max(&mut self, other: &Self, dtype: &DType) {
318        match (
319            self.get_scalar_bound::<Max>(dtype),
320            other.get_scalar_bound::<Max>(dtype),
321        ) {
322            (Some(m1), Some(m2)) => {
323                let meet = m1.union(&m2).vortex_expect("can compare scalar");
324                if meet != m1 {
325                    self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
326                }
327            }
328            _ => self.clear(Stat::Max),
329        }
330    }
331
332    fn merge_sum(&mut self, other: &Self, dtype: &DType) {
333        match (
334            self.get_scalar_bound::<Sum>(dtype),
335            other.get_scalar_bound::<Sum>(dtype),
336        ) {
337            (Some(m1), Some(m2)) => {
338                // If the combine sum is exact, then we can sum them.
339                if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
340                    s1.as_primitive()
341                        .checked_add(&s2.as_primitive())
342                        .map(|pscalar| {
343                            pscalar
344                                .pvalue()
345                                .map(|pvalue| {
346                                    Scalar::primitive_value(
347                                        pvalue,
348                                        pscalar.ptype(),
349                                        pscalar.dtype().nullability(),
350                                    )
351                                    .into_value()
352                                })
353                                .unwrap_or_else(ScalarValue::null)
354                        })
355                }) {
356                    self.set(Stat::Sum, Precision::Exact(scalar_value));
357                }
358            }
359            _ => self.clear(Stat::Sum),
360        }
361    }
362
363    fn merge_is_constant(&mut self, other: &Self, dtype: &DType) {
364        let self_const = self.get_as(Stat::IsConstant);
365        let other_const = other.get_as(Stat::IsConstant);
366        let self_min = self.get_scalar(Stat::Min, dtype);
367        let other_min = other.get_scalar(Stat::Min, dtype);
368
369        if let (
370            Some(Precision::Exact(self_const)),
371            Some(Precision::Exact(other_const)),
372            Some(Precision::Exact(self_min)),
373            Some(Precision::Exact(other_min)),
374        ) = (self_const, other_const, self_min, other_min)
375        {
376            if self_const && other_const && self_min == other_min {
377                self.set(Stat::IsConstant, Precision::exact(true));
378            } else {
379                self.set(Stat::IsConstant, Precision::inexact(false));
380            }
381        }
382        self.set(Stat::IsConstant, Precision::exact(false));
383    }
384
385    fn merge_is_sorted(&mut self, other: &Self, dtype: &DType) {
386        self.merge_sortedness_stat(other, Stat::IsSorted, dtype, PartialOrd::le)
387    }
388
389    fn merge_is_strict_sorted(&mut self, other: &Self, dtype: &DType) {
390        self.merge_sortedness_stat(other, Stat::IsStrictSorted, dtype, PartialOrd::lt)
391    }
392
393    fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
394        &mut self,
395        other: &Self,
396        stat: Stat,
397        dtype: &DType,
398        cmp: F,
399    ) {
400        if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
401            == (self.get_as(stat), other.get_as(stat))
402        {
403            // There might be no stat because it was dropped, or it doesn't exist
404            // (e.g. an all null array).
405            // We assume that it was the dropped case since the doesn't exist might imply sorted,
406            // but this in-precision is correct.
407            if let (Some(self_max), Some(other_min)) = (
408                self.get_scalar_bound::<Max>(dtype),
409                other.get_scalar_bound::<Min>(dtype),
410            ) {
411                return if cmp(&self_max.max_value(), &other_min.min_value()) {
412                    // keep value
413                } else {
414                    self.set(stat, Precision::inexact(false));
415                };
416            }
417        }
418        self.clear(stat);
419    }
420
421    fn merge_null_count(&mut self, other: &Self) {
422        self.merge_sum_stat(Stat::NullCount, other)
423    }
424
425    fn merge_uncompressed_size_in_bytes(&mut self, other: &Self) {
426        self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
427    }
428
429    fn merge_sum_stat(&mut self, stat: Stat, other: &Self) {
430        match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
431            (Some(nc1), Some(nc2)) => {
432                self.set(
433                    stat,
434                    nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
435                );
436            }
437            _ => self.clear(stat),
438        }
439    }
440}
441
442impl StatsProvider for StatsSet {
443    fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
444        self.values
445            .iter()
446            .find(|(s, _)| *s == stat)
447            .map(|(_, v)| v.clone())
448    }
449
450    fn len(&self) -> usize {
451        self.values.len()
452    }
453}
454
455#[cfg(test)]
456mod test {
457    use enum_iterator::all;
458    use itertools::Itertools;
459    use vortex_dtype::{DType, Nullability, PType};
460
461    use crate::Array;
462    use crate::arrays::PrimitiveArray;
463    use crate::stats::{Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
464
465    #[test]
466    fn test_iter() {
467        let set = StatsSet::new_unchecked(vec![
468            (Stat::Max, Precision::exact(100)),
469            (Stat::Min, Precision::exact(42)),
470        ]);
471        let mut iter = set.iter();
472        let first = iter.next().unwrap().clone();
473        assert_eq!(first.0, Stat::Max);
474        assert_eq!(
475            first.1.map(|f| i32::try_from(&f).unwrap()),
476            Precision::exact(100)
477        );
478        let snd = iter.next().unwrap().clone();
479        assert_eq!(snd.0, Stat::Min);
480        assert_eq!(snd.1.map(|s| i32::try_from(&s).unwrap()), 42);
481    }
482
483    #[test]
484    fn into_iter() {
485        let mut set = StatsSet::new_unchecked(vec![
486            (Stat::Max, Precision::exact(100)),
487            (Stat::Min, Precision::exact(42)),
488        ])
489        .into_iter();
490        let (stat, first) = set.next().unwrap();
491        assert_eq!(stat, Stat::Max);
492        assert_eq!(
493            first.map(|f| i32::try_from(&f).unwrap()),
494            Precision::exact(100)
495        );
496        let snd = set.next().unwrap();
497        assert_eq!(snd.0, Stat::Min);
498        assert_eq!(
499            snd.1.map(|s| i32::try_from(&s).unwrap()),
500            Precision::exact(42)
501        );
502    }
503
504    #[test]
505    fn merge_constant() {
506        let first = StatsSet::from_iter([
507            (Stat::Min, Precision::exact(42)),
508            (Stat::IsConstant, Precision::exact(true)),
509        ])
510        .merge_ordered(
511            &StatsSet::from_iter([
512                (Stat::Min, Precision::inexact(42)),
513                (Stat::IsConstant, Precision::exact(true)),
514            ]),
515            &DType::Primitive(PType::I32, Nullability::NonNullable),
516        );
517        assert_eq!(
518            first.get_as::<bool>(Stat::IsConstant),
519            Some(Precision::exact(false))
520        );
521        assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
522    }
523
524    #[test]
525    fn merge_into_min() {
526        let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
527            &StatsSet::default(),
528            &DType::Primitive(PType::I32, Nullability::NonNullable),
529        );
530        assert!(first.get(Stat::Min).is_none());
531    }
532
533    #[test]
534    fn merge_from_min() {
535        let first = StatsSet::default().merge_ordered(
536            &StatsSet::of(Stat::Min, Precision::exact(42)),
537            &DType::Primitive(PType::I32, Nullability::NonNullable),
538        );
539        assert!(first.get(Stat::Min).is_none());
540    }
541
542    #[test]
543    fn merge_mins() {
544        let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
545            &StatsSet::of(Stat::Min, Precision::exact(42)),
546            &DType::Primitive(PType::I32, Nullability::NonNullable),
547        );
548        assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(37)));
549    }
550
551    #[test]
552    fn merge_into_bound_max() {
553        let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
554            &StatsSet::default(),
555            &DType::Primitive(PType::I32, Nullability::NonNullable),
556        );
557        assert!(first.get(Stat::Max).is_none());
558    }
559
560    #[test]
561    fn merge_from_max() {
562        let first = StatsSet::default().merge_ordered(
563            &StatsSet::of(Stat::Max, Precision::exact(42)),
564            &DType::Primitive(PType::I32, Nullability::NonNullable),
565        );
566        assert!(first.get(Stat::Max).is_none());
567    }
568
569    #[test]
570    fn merge_maxes() {
571        let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
572            &StatsSet::of(Stat::Max, Precision::exact(42)),
573            &DType::Primitive(PType::I32, Nullability::NonNullable),
574        );
575        assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::exact(42)));
576    }
577
578    #[test]
579    fn merge_maxes_bound() {
580        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
581        let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
582            .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
583        assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::inexact(43)));
584    }
585
586    #[test]
587    fn merge_into_scalar() {
588        let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
589            &StatsSet::default(),
590            &DType::Primitive(PType::I32, Nullability::NonNullable),
591        );
592        assert!(first.get(Stat::Sum).is_none());
593    }
594
595    #[test]
596    fn merge_from_scalar() {
597        let first = StatsSet::default().merge_ordered(
598            &StatsSet::of(Stat::Sum, Precision::exact(42)),
599            &DType::Primitive(PType::I32, Nullability::NonNullable),
600        );
601        assert!(first.get(Stat::Sum).is_none());
602    }
603
604    #[test]
605    fn merge_scalars() {
606        let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
607            &StatsSet::of(Stat::Sum, Precision::exact(42)),
608            &DType::Primitive(PType::I32, Nullability::NonNullable),
609        );
610        assert_eq!(
611            first.get_as::<usize>(Stat::Sum),
612            Some(Precision::exact(79usize))
613        );
614    }
615
616    #[test]
617    fn merge_into_sortedness() {
618        let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
619            &StatsSet::default(),
620            &DType::Primitive(PType::I32, Nullability::NonNullable),
621        );
622        assert!(first.get(Stat::IsStrictSorted).is_none());
623    }
624
625    #[test]
626    fn merge_from_sortedness() {
627        let first = StatsSet::default().merge_ordered(
628            &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
629            &DType::Primitive(PType::I32, Nullability::NonNullable),
630        );
631        assert!(first.get(Stat::IsStrictSorted).is_none());
632    }
633
634    #[test]
635    fn merge_sortedness() {
636        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
637        first.set(Stat::Max, Precision::exact(1));
638        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
639        second.set(Stat::Min, Precision::exact(2));
640        first = first.merge_ordered(
641            &second,
642            &DType::Primitive(PType::I32, Nullability::NonNullable),
643        );
644        assert_eq!(
645            first.get_as::<bool>(Stat::IsStrictSorted),
646            Some(Precision::exact(true))
647        );
648    }
649
650    #[test]
651    fn merge_sortedness_out_of_order() {
652        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
653        first.set(Stat::Min, Precision::exact(1));
654        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
655        second.set(Stat::Max, Precision::exact(2));
656        second = second.merge_ordered(
657            &first,
658            &DType::Primitive(PType::I32, Nullability::NonNullable),
659        );
660        assert_eq!(
661            second.get_as::<bool>(Stat::IsStrictSorted),
662            Some(Precision::inexact(false))
663        );
664    }
665
666    #[test]
667    fn merge_sortedness_only_one_sorted() {
668        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
669        first.set(Stat::Max, Precision::exact(1));
670        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
671        second.set(Stat::Min, Precision::exact(2));
672        first.merge_ordered(
673            &second,
674            &DType::Primitive(PType::I32, Nullability::NonNullable),
675        );
676        assert_eq!(
677            second.get_as::<bool>(Stat::IsStrictSorted),
678            Some(Precision::exact(false))
679        );
680    }
681
682    #[test]
683    fn merge_sortedness_missing_min() {
684        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
685        first.set(Stat::Max, Precision::exact(1));
686        let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
687        first = first.merge_ordered(
688            &second,
689            &DType::Primitive(PType::I32, Nullability::NonNullable),
690        );
691        assert!(first.get(Stat::IsStrictSorted).is_none());
692    }
693
694    #[test]
695    fn merge_sortedness_bound_min() {
696        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
697        first.set(Stat::Max, Precision::exact(1));
698        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
699        second.set(Stat::Min, Precision::inexact(2));
700        first = first.merge_ordered(
701            &second,
702            &DType::Primitive(PType::I32, Nullability::NonNullable),
703        );
704        assert_eq!(
705            first.get_as::<bool>(Stat::IsStrictSorted),
706            Some(Precision::exact(true))
707        );
708    }
709
710    #[test]
711    fn merge_unordered() {
712        let array =
713            PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
714        let all_stats = all::<Stat>()
715            .filter(|s| !matches!(s, Stat::Sum))
716            .collect_vec();
717        array.statistics().compute_all(&all_stats).unwrap();
718
719        let stats = array.statistics().to_owned();
720        for stat in &all_stats {
721            assert!(stats.get(*stat).is_some(), "Stat {} is missing", stat);
722        }
723
724        let merged = stats.clone().merge_unordered(
725            &stats,
726            &DType::Primitive(PType::I32, Nullability::NonNullable),
727        );
728        for stat in &all_stats {
729            assert_eq!(
730                merged.get(*stat).is_some(),
731                stat.is_commutative(),
732                "Stat {} remains after merge_unordered despite not being commutative, or was removed despite being commutative",
733                stat
734            )
735        }
736
737        assert_eq!(
738            merged.get_as::<i32>(Stat::Min),
739            stats.get_as::<i32>(Stat::Min)
740        );
741        assert_eq!(
742            merged.get_as::<i32>(Stat::Max),
743            stats.get_as::<i32>(Stat::Max)
744        );
745        assert_eq!(
746            merged.get_as::<u64>(Stat::NullCount).unwrap(),
747            stats.get_as::<u64>(Stat::NullCount).unwrap().map(|s| s * 2)
748        );
749    }
750
751    #[test]
752    fn merge_min_bound_same() {
753        // Merging a stat with a bound and another with an exact results in exact stat.
754        // since bound for min is a lower bound, it can in fact contain any value >= bound.
755        let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
756            &StatsSet::of(Stat::Min, Precision::exact(5)),
757            &DType::Primitive(PType::I32, Nullability::NonNullable),
758        );
759        assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::exact(5)));
760    }
761
762    #[test]
763    fn merge_min_bound_bound_lower() {
764        let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
765            &StatsSet::of(Stat::Min, Precision::exact(5)),
766            &DType::Primitive(PType::I32, Nullability::NonNullable),
767        );
768        assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::inexact(4)));
769    }
770
771    #[test]
772    fn retain_approx() {
773        let set = StatsSet::from_iter([
774            (Stat::Max, Precision::exact(100)),
775            (Stat::Min, Precision::exact(50)),
776            (Stat::Sum, Precision::inexact(10)),
777        ]);
778
779        let set = set.keep_inexact_stats(&[Stat::Min, Stat::Max]);
780
781        assert_eq!(set.len(), 2);
782        assert_eq!(set.get_as::<i32>(Stat::Max), Some(Precision::inexact(100)));
783        assert_eq!(set.get_as::<i32>(Stat::Min), Some(Precision::inexact(50)));
784        assert_eq!(set.get_as::<i32>(Stat::Sum), None);
785    }
786
787    #[test]
788    fn test_combine_is_constant() {
789        {
790            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
791            let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
792            stats.combine_is_constant(&stats2).unwrap();
793            assert_eq!(
794                stats.get_as::<bool>(Stat::IsConstant),
795                Some(Precision::exact(true))
796            );
797        }
798
799        {
800            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
801            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
802            stats.combine_is_constant(&stats2).unwrap();
803            assert_eq!(
804                stats.get_as::<bool>(Stat::IsConstant),
805                Some(Precision::exact(true))
806            );
807        }
808
809        {
810            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
811            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
812            stats.combine_is_constant(&stats2).unwrap();
813            assert_eq!(
814                stats.get_as::<bool>(Stat::IsConstant),
815                Some(Precision::exact(false))
816            );
817        }
818    }
819}