vortex_array/stats/
stats_set.rs

1use std::fmt::Debug;
2
3use enum_iterator::{Sequence, all};
4use num_traits::CheckedAdd;
5use vortex_dtype::DType;
6use vortex_error::{VortexExpect, VortexResult, vortex_err};
7use vortex_scalar::{Scalar, ScalarValue};
8
9use super::traits::StatsProvider;
10use super::{IsSorted, IsStrictSorted, NaNCount, NullCount, StatType, UncompressedSizeInBytes};
11use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
12
13#[derive(Default, Debug, Clone)]
14pub struct StatsSet {
15    values: Vec<(Stat, Precision<ScalarValue>)>,
16}
17
18impl StatsSet {
19    /// Create new StatSet without validating uniqueness of all the entries
20    ///
21    /// # Safety
22    ///
23    /// This method will not panic or trigger UB, but may lead to duplicate stats being stored.
24    pub fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
25        Self { values }
26    }
27
28    /// Specialized constructor for the case where the StatsSet represents
29    /// an array consisting entirely of [null](vortex_dtype::DType::Null) values.
30    pub fn nulls(len: usize) -> Self {
31        let mut stats = Self::new_unchecked(vec![(Stat::NullCount, Precision::exact(len))]);
32
33        if len > 0 {
34            stats.set(Stat::IsConstant, Precision::exact(true));
35            stats.set(Stat::IsSorted, Precision::exact(true));
36            stats.set(Stat::IsStrictSorted, Precision::exact(len < 2));
37        }
38
39        stats
40    }
41
42    /// A convenience method for creating a stats set which will represent an empty array.
43    pub fn empty_array() -> StatsSet {
44        StatsSet::new_unchecked(vec![(Stat::NullCount, Precision::exact(0))])
45    }
46
47    pub fn constant(scalar: Scalar, length: usize) -> Self {
48        let (dtype, sv) = scalar.into_parts();
49        let mut stats = Self::default();
50        if length > 0 {
51            stats.extend([
52                (Stat::IsConstant, Precision::exact(true)),
53                (Stat::IsSorted, Precision::exact(true)),
54                (Stat::IsStrictSorted, Precision::exact(length <= 1)),
55            ]);
56        }
57
58        let null_count = if sv.is_null() { length as u64 } else { 0 };
59        stats.set(Stat::NullCount, Precision::exact(null_count));
60
61        if !sv.is_null() {
62            stats.extend([
63                (Stat::Min, Precision::exact(sv.clone())),
64                (Stat::Max, Precision::exact(sv.clone())),
65            ]);
66        }
67
68        if matches!(dtype, DType::Bool(_)) {
69            let bool_val = <Option<bool>>::try_from(&sv).vortex_expect("Checked dtype");
70            let true_count = bool_val
71                .map(|b| if b { length as u64 } else { 0 })
72                .unwrap_or(0);
73            stats.set(Stat::Sum, Precision::exact(true_count));
74        }
75
76        stats
77    }
78
79    pub fn bools_with_sum_and_null_count(true_count: usize, null_count: usize, len: usize) -> Self {
80        StatsSet::new_unchecked(vec![
81            (Stat::Sum, Precision::exact(true_count)),
82            (Stat::NullCount, Precision::exact(null_count)),
83            (Stat::Min, Precision::exact(true_count == len)),
84            (Stat::Max, Precision::exact(true_count > 0)),
85            (
86                Stat::IsConstant,
87                Precision::exact((true_count == 0 && null_count == 0) || true_count == len),
88            ),
89        ])
90    }
91
92    pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
93        Self::new_unchecked(vec![(stat, value)])
94    }
95
96    fn reserve_full_capacity(&mut self) {
97        if self.values.capacity() < Stat::CARDINALITY {
98            self.values
99                .reserve_exact(Stat::CARDINALITY - self.values.capacity());
100        }
101    }
102}
103
104// Getters and setters for individual stats.
105impl StatsSet {
106    /// Set the stat `stat` to `value`.
107    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
108        self.reserve_full_capacity();
109
110        if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
111            *existing = (stat, value);
112        } else {
113            self.values.push((stat, value));
114        }
115    }
116
117    /// Clear the stat `stat` from the set.
118    pub fn clear(&mut self, stat: Stat) {
119        self.values.retain(|(s, _)| *s != stat);
120    }
121
122    pub fn retain_only(&mut self, stats: &[Stat]) {
123        self.values.retain(|(s, _)| stats.contains(s));
124    }
125
126    pub fn keep_inexact_stats(self, inexact_keep: &[Stat]) -> Self {
127        self.values
128            .into_iter()
129            .filter_map(|(s, v)| inexact_keep.contains(&s).then(|| (s, v.into_inexact())))
130            .collect()
131    }
132
133    /// Iterate over the statistic names and values in-place.
134    ///
135    /// See [Iterator].
136    pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
137        self.values.iter()
138    }
139}
140
141// StatSetIntoIter just exists to protect current implementation from exposure on the public API.
142
143/// Owned iterator over the stats.
144///
145/// See [IntoIterator].
146pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
147
148impl Iterator for StatsSetIntoIter {
149    type Item = (Stat, Precision<ScalarValue>);
150
151    fn next(&mut self) -> Option<Self::Item> {
152        self.0.next()
153    }
154}
155
156impl IntoIterator for StatsSet {
157    type Item = (Stat, Precision<ScalarValue>);
158    type IntoIter = StatsSetIntoIter;
159
160    fn into_iter(self) -> Self::IntoIter {
161        StatsSetIntoIter(self.values.into_iter())
162    }
163}
164
165impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
166    fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
167        let iter = iter.into_iter();
168        let mut values = Vec::default();
169        values.reserve_exact(Stat::CARDINALITY);
170
171        let mut this = Self { values };
172        this.extend(iter);
173        this
174    }
175}
176
177impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
178    #[inline]
179    fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
180        let iter = iter.into_iter();
181        self.reserve_full_capacity();
182
183        iter.for_each(|(stat, value)| self.set(stat, value));
184    }
185}
186
187// Merge helpers
188impl StatsSet {
189    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
190    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
191    pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
192        for s in all::<Stat>() {
193            match s {
194                Stat::IsConstant => self.merge_is_constant(other, dtype),
195                Stat::IsSorted => self.merge_is_sorted(other, dtype),
196                Stat::IsStrictSorted => self.merge_is_strict_sorted(other, dtype),
197                Stat::Max => self.merge_max(other, dtype),
198                Stat::Min => self.merge_min(other, dtype),
199                Stat::Sum => self.merge_sum(other, dtype),
200                Stat::NullCount => self.merge_null_count(other),
201                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
202                Stat::NaNCount => self.merge_nan_count(other),
203            }
204        }
205
206        self
207    }
208
209    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
210    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
211    pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
212        for s in all::<Stat>() {
213            if !s.is_commutative() {
214                self.clear(s);
215                continue;
216            }
217
218            match s {
219                Stat::IsConstant => self.merge_is_constant(other, dtype),
220                Stat::Max => self.merge_max(other, dtype),
221                Stat::Min => self.merge_min(other, dtype),
222                Stat::Sum => self.merge_sum(other, dtype),
223                Stat::NullCount => self.merge_null_count(other),
224                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
225                Stat::IsSorted | Stat::IsStrictSorted => {
226                    unreachable!("not commutative")
227                }
228                Stat::NaNCount => self.merge_nan_count(other),
229            }
230        }
231
232        self
233    }
234
235    /// Given two sets of stats (of differing precision) for the same array, combine them
236    pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
237        let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
238        for s in other_stats {
239            match s {
240                Stat::Max => self.combine_bound::<Max>(other, dtype)?,
241                Stat::Min => self.combine_bound::<Min>(other, dtype)?,
242                Stat::UncompressedSizeInBytes => {
243                    self.combine_bound::<UncompressedSizeInBytes>(other, dtype)?
244                }
245                Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
246                Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
247                Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
248                Stat::NullCount => self.combine_bound::<NullCount>(other, dtype)?,
249                Stat::Sum => self.combine_bound::<Sum>(other, dtype)?,
250                Stat::NaNCount => self.combine_bound::<NaNCount>(other, dtype)?,
251            }
252        }
253        Ok(())
254    }
255
256    fn combine_bound<S: StatType<Scalar>>(
257        &mut self,
258        other: &Self,
259        dtype: &DType,
260    ) -> VortexResult<()>
261    where
262        S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
263    {
264        match (
265            self.get_scalar_bound::<S>(dtype),
266            other.get_scalar_bound::<S>(dtype),
267        ) {
268            (Some(m1), Some(m2)) => {
269                let meet = m1
270                    .intersection(&m2)
271                    .vortex_expect("can always compare scalar")
272                    .ok_or_else(|| {
273                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
274                    })?;
275                if meet != m1 {
276                    self.set(S::STAT, meet.into_value().map(Scalar::into_value));
277                }
278            }
279            (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
280            (Some(_), _) => (),
281            (None, None) => self.clear(S::STAT),
282        }
283        Ok(())
284    }
285
286    fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &Self) -> VortexResult<()>
287    where
288        S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
289    {
290        match (
291            self.get_as_bound::<S, bool>(),
292            other.get_as_bound::<S, bool>(),
293        ) {
294            (Some(m1), Some(m2)) => {
295                let intersection = m1
296                    .intersection(&m2)
297                    .vortex_expect("can always compare boolean")
298                    .ok_or_else(|| {
299                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
300                    })?;
301                if intersection != m1 {
302                    self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
303                }
304            }
305            (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
306            (Some(_), None) => (),
307            (None, None) => self.clear(S::STAT),
308        }
309        Ok(())
310    }
311
312    fn merge_min(&mut self, other: &Self, dtype: &DType) {
313        match (
314            self.get_scalar_bound::<Min>(dtype),
315            other.get_scalar_bound::<Min>(dtype),
316        ) {
317            (Some(m1), Some(m2)) => {
318                let meet = m1.union(&m2).vortex_expect("can compare scalar");
319                if meet != m1 {
320                    self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
321                }
322            }
323            _ => self.clear(Stat::Min),
324        }
325    }
326
327    fn merge_max(&mut self, other: &Self, dtype: &DType) {
328        match (
329            self.get_scalar_bound::<Max>(dtype),
330            other.get_scalar_bound::<Max>(dtype),
331        ) {
332            (Some(m1), Some(m2)) => {
333                let meet = m1.union(&m2).vortex_expect("can compare scalar");
334                if meet != m1 {
335                    self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
336                }
337            }
338            _ => self.clear(Stat::Max),
339        }
340    }
341
342    fn merge_sum(&mut self, other: &Self, dtype: &DType) {
343        match (
344            self.get_scalar_bound::<Sum>(dtype),
345            other.get_scalar_bound::<Sum>(dtype),
346        ) {
347            (Some(m1), Some(m2)) => {
348                // If the combine sum is exact, then we can sum them.
349                if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
350                    s1.as_primitive()
351                        .checked_add(&s2.as_primitive())
352                        .map(|pscalar| {
353                            pscalar
354                                .pvalue()
355                                .map(|pvalue| {
356                                    Scalar::primitive_value(
357                                        pvalue,
358                                        pscalar.ptype(),
359                                        pscalar.dtype().nullability(),
360                                    )
361                                    .into_value()
362                                })
363                                .unwrap_or_else(ScalarValue::null)
364                        })
365                }) {
366                    self.set(Stat::Sum, Precision::Exact(scalar_value));
367                }
368            }
369            _ => self.clear(Stat::Sum),
370        }
371    }
372
373    fn merge_is_constant(&mut self, other: &Self, dtype: &DType) {
374        let self_const = self.get_as(Stat::IsConstant);
375        let other_const = other.get_as(Stat::IsConstant);
376        let self_min = self.get_scalar(Stat::Min, dtype);
377        let other_min = other.get_scalar(Stat::Min, dtype);
378
379        if let (
380            Some(Precision::Exact(self_const)),
381            Some(Precision::Exact(other_const)),
382            Some(Precision::Exact(self_min)),
383            Some(Precision::Exact(other_min)),
384        ) = (self_const, other_const, self_min, other_min)
385        {
386            if self_const && other_const && self_min == other_min {
387                self.set(Stat::IsConstant, Precision::exact(true));
388            } else {
389                self.set(Stat::IsConstant, Precision::inexact(false));
390            }
391        }
392        self.set(Stat::IsConstant, Precision::exact(false));
393    }
394
395    fn merge_is_sorted(&mut self, other: &Self, dtype: &DType) {
396        self.merge_sortedness_stat(other, Stat::IsSorted, dtype, PartialOrd::le)
397    }
398
399    fn merge_is_strict_sorted(&mut self, other: &Self, dtype: &DType) {
400        self.merge_sortedness_stat(other, Stat::IsStrictSorted, dtype, PartialOrd::lt)
401    }
402
403    fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
404        &mut self,
405        other: &Self,
406        stat: Stat,
407        dtype: &DType,
408        cmp: F,
409    ) {
410        if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
411            == (self.get_as(stat), other.get_as(stat))
412        {
413            // There might be no stat because it was dropped, or it doesn't exist
414            // (e.g. an all null array).
415            // We assume that it was the dropped case since the doesn't exist might imply sorted,
416            // but this in-precision is correct.
417            if let (Some(self_max), Some(other_min)) = (
418                self.get_scalar_bound::<Max>(dtype),
419                other.get_scalar_bound::<Min>(dtype),
420            ) {
421                return if cmp(&self_max.max_value(), &other_min.min_value()) {
422                    // keep value
423                } else {
424                    self.set(stat, Precision::inexact(false));
425                };
426            }
427        }
428        self.clear(stat);
429    }
430
431    fn merge_null_count(&mut self, other: &Self) {
432        self.merge_sum_stat(Stat::NullCount, other)
433    }
434
435    fn merge_nan_count(&mut self, other: &Self) {
436        self.merge_sum_stat(Stat::NaNCount, other)
437    }
438
439    fn merge_uncompressed_size_in_bytes(&mut self, other: &Self) {
440        self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
441    }
442
443    fn merge_sum_stat(&mut self, stat: Stat, other: &Self) {
444        match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
445            (Some(nc1), Some(nc2)) => {
446                self.set(
447                    stat,
448                    nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
449                );
450            }
451            _ => self.clear(stat),
452        }
453    }
454}
455
456impl StatsProvider for StatsSet {
457    fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
458        self.values
459            .iter()
460            .find(|(s, _)| *s == stat)
461            .map(|(_, v)| v.clone())
462    }
463
464    fn len(&self) -> usize {
465        self.values.len()
466    }
467}
468
469#[cfg(test)]
470mod test {
471    use enum_iterator::all;
472    use itertools::Itertools;
473    use vortex_dtype::{DType, Nullability, PType};
474
475    use crate::arrays::PrimitiveArray;
476    use crate::stats::{IsConstant, Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
477
478    #[test]
479    fn test_iter() {
480        let set = StatsSet::new_unchecked(vec![
481            (Stat::Max, Precision::exact(100)),
482            (Stat::Min, Precision::exact(42)),
483        ]);
484        let mut iter = set.iter();
485        let first = iter.next().unwrap().clone();
486        assert_eq!(first.0, Stat::Max);
487        assert_eq!(
488            first.1.map(|f| i32::try_from(&f).unwrap()),
489            Precision::exact(100)
490        );
491        let snd = iter.next().unwrap().clone();
492        assert_eq!(snd.0, Stat::Min);
493        assert_eq!(snd.1.map(|s| i32::try_from(&s).unwrap()), 42);
494    }
495
496    #[test]
497    fn into_iter() {
498        let mut set = StatsSet::new_unchecked(vec![
499            (Stat::Max, Precision::exact(100)),
500            (Stat::Min, Precision::exact(42)),
501        ])
502        .into_iter();
503        let (stat, first) = set.next().unwrap();
504        assert_eq!(stat, Stat::Max);
505        assert_eq!(
506            first.map(|f| i32::try_from(&f).unwrap()),
507            Precision::exact(100)
508        );
509        let snd = set.next().unwrap();
510        assert_eq!(snd.0, Stat::Min);
511        assert_eq!(
512            snd.1.map(|s| i32::try_from(&s).unwrap()),
513            Precision::exact(42)
514        );
515    }
516
517    #[test]
518    fn merge_constant() {
519        let first = StatsSet::from_iter([
520            (Stat::Min, Precision::exact(42)),
521            (Stat::IsConstant, Precision::exact(true)),
522        ])
523        .merge_ordered(
524            &StatsSet::from_iter([
525                (Stat::Min, Precision::inexact(42)),
526                (Stat::IsConstant, Precision::exact(true)),
527            ]),
528            &DType::Primitive(PType::I32, Nullability::NonNullable),
529        );
530        assert_eq!(
531            first.get_as::<bool>(Stat::IsConstant),
532            Some(Precision::exact(false))
533        );
534        assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
535    }
536
537    #[test]
538    fn merge_into_min() {
539        let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
540            &StatsSet::default(),
541            &DType::Primitive(PType::I32, Nullability::NonNullable),
542        );
543        assert!(first.get(Stat::Min).is_none());
544    }
545
546    #[test]
547    fn merge_from_min() {
548        let first = StatsSet::default().merge_ordered(
549            &StatsSet::of(Stat::Min, Precision::exact(42)),
550            &DType::Primitive(PType::I32, Nullability::NonNullable),
551        );
552        assert!(first.get(Stat::Min).is_none());
553    }
554
555    #[test]
556    fn merge_mins() {
557        let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
558            &StatsSet::of(Stat::Min, Precision::exact(42)),
559            &DType::Primitive(PType::I32, Nullability::NonNullable),
560        );
561        assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(37)));
562    }
563
564    #[test]
565    fn merge_into_bound_max() {
566        let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
567            &StatsSet::default(),
568            &DType::Primitive(PType::I32, Nullability::NonNullable),
569        );
570        assert!(first.get(Stat::Max).is_none());
571    }
572
573    #[test]
574    fn merge_from_max() {
575        let first = StatsSet::default().merge_ordered(
576            &StatsSet::of(Stat::Max, Precision::exact(42)),
577            &DType::Primitive(PType::I32, Nullability::NonNullable),
578        );
579        assert!(first.get(Stat::Max).is_none());
580    }
581
582    #[test]
583    fn merge_maxes() {
584        let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
585            &StatsSet::of(Stat::Max, Precision::exact(42)),
586            &DType::Primitive(PType::I32, Nullability::NonNullable),
587        );
588        assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::exact(42)));
589    }
590
591    #[test]
592    fn merge_maxes_bound() {
593        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
594        let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
595            .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
596        assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::inexact(43)));
597    }
598
599    #[test]
600    fn merge_into_scalar() {
601        let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
602            &StatsSet::default(),
603            &DType::Primitive(PType::I32, Nullability::NonNullable),
604        );
605        assert!(first.get(Stat::Sum).is_none());
606    }
607
608    #[test]
609    fn merge_from_scalar() {
610        let first = StatsSet::default().merge_ordered(
611            &StatsSet::of(Stat::Sum, Precision::exact(42)),
612            &DType::Primitive(PType::I32, Nullability::NonNullable),
613        );
614        assert!(first.get(Stat::Sum).is_none());
615    }
616
617    #[test]
618    fn merge_scalars() {
619        let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
620            &StatsSet::of(Stat::Sum, Precision::exact(42)),
621            &DType::Primitive(PType::I32, Nullability::NonNullable),
622        );
623        assert_eq!(
624            first.get_as::<usize>(Stat::Sum),
625            Some(Precision::exact(79usize))
626        );
627    }
628
629    #[test]
630    fn merge_into_sortedness() {
631        let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
632            &StatsSet::default(),
633            &DType::Primitive(PType::I32, Nullability::NonNullable),
634        );
635        assert!(first.get(Stat::IsStrictSorted).is_none());
636    }
637
638    #[test]
639    fn merge_from_sortedness() {
640        let first = StatsSet::default().merge_ordered(
641            &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
642            &DType::Primitive(PType::I32, Nullability::NonNullable),
643        );
644        assert!(first.get(Stat::IsStrictSorted).is_none());
645    }
646
647    #[test]
648    fn merge_sortedness() {
649        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
650        first.set(Stat::Max, Precision::exact(1));
651        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
652        second.set(Stat::Min, Precision::exact(2));
653        first = first.merge_ordered(
654            &second,
655            &DType::Primitive(PType::I32, Nullability::NonNullable),
656        );
657        assert_eq!(
658            first.get_as::<bool>(Stat::IsStrictSorted),
659            Some(Precision::exact(true))
660        );
661    }
662
663    #[test]
664    fn merge_sortedness_out_of_order() {
665        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
666        first.set(Stat::Min, Precision::exact(1));
667        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
668        second.set(Stat::Max, Precision::exact(2));
669        second = second.merge_ordered(
670            &first,
671            &DType::Primitive(PType::I32, Nullability::NonNullable),
672        );
673        assert_eq!(
674            second.get_as::<bool>(Stat::IsStrictSorted),
675            Some(Precision::inexact(false))
676        );
677    }
678
679    #[test]
680    fn merge_sortedness_only_one_sorted() {
681        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
682        first.set(Stat::Max, Precision::exact(1));
683        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
684        second.set(Stat::Min, Precision::exact(2));
685        first.merge_ordered(
686            &second,
687            &DType::Primitive(PType::I32, Nullability::NonNullable),
688        );
689        assert_eq!(
690            second.get_as::<bool>(Stat::IsStrictSorted),
691            Some(Precision::exact(false))
692        );
693    }
694
695    #[test]
696    fn merge_sortedness_missing_min() {
697        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
698        first.set(Stat::Max, Precision::exact(1));
699        let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
700        first = first.merge_ordered(
701            &second,
702            &DType::Primitive(PType::I32, Nullability::NonNullable),
703        );
704        assert!(first.get(Stat::IsStrictSorted).is_none());
705    }
706
707    #[test]
708    fn merge_sortedness_bound_min() {
709        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
710        first.set(Stat::Max, Precision::exact(1));
711        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
712        second.set(Stat::Min, Precision::inexact(2));
713        first = first.merge_ordered(
714            &second,
715            &DType::Primitive(PType::I32, Nullability::NonNullable),
716        );
717        assert_eq!(
718            first.get_as::<bool>(Stat::IsStrictSorted),
719            Some(Precision::exact(true))
720        );
721    }
722
723    #[test]
724    fn merge_unordered() {
725        let array =
726            PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
727        let all_stats = all::<Stat>()
728            .filter(|s| !matches!(s, Stat::Sum))
729            .filter(|s| !matches!(s, Stat::NaNCount))
730            .collect_vec();
731        array.statistics().compute_all(&all_stats).unwrap();
732
733        let stats = array.statistics().to_owned();
734        for stat in &all_stats {
735            assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
736        }
737
738        let merged = stats.clone().merge_unordered(
739            &stats,
740            &DType::Primitive(PType::I32, Nullability::NonNullable),
741        );
742        for stat in &all_stats {
743            assert_eq!(
744                merged.get(*stat).is_some(),
745                stat.is_commutative(),
746                "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
747            )
748        }
749
750        assert_eq!(
751            merged.get_as::<i32>(Stat::Min),
752            stats.get_as::<i32>(Stat::Min)
753        );
754        assert_eq!(
755            merged.get_as::<i32>(Stat::Max),
756            stats.get_as::<i32>(Stat::Max)
757        );
758        assert_eq!(
759            merged.get_as::<u64>(Stat::NullCount).unwrap(),
760            stats.get_as::<u64>(Stat::NullCount).unwrap().map(|s| s * 2)
761        );
762    }
763
764    #[test]
765    fn merge_min_bound_same() {
766        // Merging a stat with a bound and another with an exact results in exact stat.
767        // since bound for min is a lower bound, it can in fact contain any value >= bound.
768        let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
769            &StatsSet::of(Stat::Min, Precision::exact(5)),
770            &DType::Primitive(PType::I32, Nullability::NonNullable),
771        );
772        assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::exact(5)));
773    }
774
775    #[test]
776    fn merge_min_bound_bound_lower() {
777        let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
778            &StatsSet::of(Stat::Min, Precision::exact(5)),
779            &DType::Primitive(PType::I32, Nullability::NonNullable),
780        );
781        assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::inexact(4)));
782    }
783
784    #[test]
785    fn retain_approx() {
786        let set = StatsSet::from_iter([
787            (Stat::Max, Precision::exact(100)),
788            (Stat::Min, Precision::exact(50)),
789            (Stat::Sum, Precision::inexact(10)),
790        ]);
791
792        let set = set.keep_inexact_stats(&[Stat::Min, Stat::Max]);
793
794        assert_eq!(set.len(), 2);
795        assert_eq!(set.get_as::<i32>(Stat::Max), Some(Precision::inexact(100)));
796        assert_eq!(set.get_as::<i32>(Stat::Min), Some(Precision::inexact(50)));
797        assert_eq!(set.get_as::<i32>(Stat::Sum), None);
798    }
799
800    #[test]
801    fn test_combine_is_constant() {
802        {
803            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
804            let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
805            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
806            assert_eq!(
807                stats.get_as::<bool>(Stat::IsConstant),
808                Some(Precision::exact(true))
809            );
810        }
811
812        {
813            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
814            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
815            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
816            assert_eq!(
817                stats.get_as::<bool>(Stat::IsConstant),
818                Some(Precision::exact(true))
819            );
820        }
821
822        {
823            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
824            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
825            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
826            assert_eq!(
827                stats.get_as::<bool>(Stat::IsConstant),
828                Some(Precision::exact(false))
829            );
830        }
831    }
832
833    #[test]
834    fn test_combine_sets_boolean_conflict() {
835        let mut stats1 = StatsSet::from_iter([
836            (Stat::IsConstant, Precision::exact(true)),
837            (Stat::IsSorted, Precision::exact(true)),
838        ]);
839
840        let stats2 = StatsSet::from_iter([
841            (Stat::IsConstant, Precision::exact(false)),
842            (Stat::IsSorted, Precision::exact(true)),
843        ]);
844
845        let result = stats1.combine_sets(
846            &stats2,
847            &DType::Primitive(PType::I32, Nullability::NonNullable),
848        );
849        assert!(result.is_err());
850    }
851
852    #[test]
853    fn test_combine_sets_with_missing_stats() {
854        let mut stats1 = StatsSet::from_iter([
855            (Stat::Min, Precision::exact(42)),
856            (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
857        ]);
858
859        let stats2 = StatsSet::from_iter([
860            (Stat::Max, Precision::exact(100)),
861            (Stat::IsStrictSorted, Precision::exact(true)),
862        ]);
863
864        stats1
865            .combine_sets(
866                &stats2,
867                &DType::Primitive(PType::I32, Nullability::NonNullable),
868            )
869            .unwrap();
870
871        // Min should remain unchanged
872        assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
873        // Max should be added
874        assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(100)));
875        // IsStrictSorted should be added
876        assert_eq!(
877            stats1.get_as::<bool>(Stat::IsStrictSorted),
878            Some(Precision::exact(true))
879        );
880    }
881
882    #[test]
883    fn test_combine_sets_with_inexact() {
884        let mut stats1 = StatsSet::from_iter([
885            (Stat::Min, Precision::exact(42)),
886            (Stat::Max, Precision::inexact(100)),
887            (Stat::IsConstant, Precision::exact(false)),
888        ]);
889
890        let stats2 = StatsSet::from_iter([
891            // Must ensure Min from stats2 is <= Min from stats1
892            (Stat::Min, Precision::inexact(40)),
893            (Stat::Max, Precision::exact(90)),
894            (Stat::IsSorted, Precision::exact(true)),
895        ]);
896
897        stats1
898            .combine_sets(
899                &stats2,
900                &DType::Primitive(PType::I32, Nullability::NonNullable),
901            )
902            .unwrap();
903
904        // Min should remain unchanged since it's more restrictive than the inexact value
905        assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
906        // Check that max was updated with the exact value
907        assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(90)));
908        // Check that IsSorted was added
909        assert_eq!(
910            stats1.get_as::<bool>(Stat::IsSorted),
911            Some(Precision::exact(true))
912        );
913    }
914}