vortex_array/stats/
stats_set.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5
6use enum_iterator::{Sequence, all};
7use num_traits::CheckedAdd;
8use vortex_dtype::DType;
9use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_err, vortex_panic};
10use vortex_scalar::{Scalar, ScalarValue};
11
12use super::{
13    IsSorted, IsStrictSorted, NaNCount, NullCount, StatType, StatsProvider, UncompressedSizeInBytes,
14};
15use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
16
17#[derive(Default, Debug, Clone)]
18pub struct StatsSet {
19    values: Vec<(Stat, Precision<ScalarValue>)>,
20}
21
22impl StatsSet {
23    /// Create new StatSet without validating uniqueness of all the entries
24    ///
25    /// # Safety
26    ///
27    /// This method will not panic or trigger UB, but may lead to duplicate stats being stored.
28    pub unsafe fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
29        Self { values }
30    }
31
32    /// Create StatsSet from single stat and value
33    pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
34        // SAFETY: No duplicate stats will be set here.
35        unsafe { Self::new_unchecked(vec![(stat, value)]) }
36    }
37
38    fn reserve_full_capacity(&mut self) {
39        if self.values.capacity() < Stat::CARDINALITY {
40            self.values
41                .reserve_exact(Stat::CARDINALITY - self.values.capacity());
42        }
43    }
44
45    /// Wrap stats set with a dtype for mutable typed scalar access
46    pub fn as_mut_typed_ref<'a, 'b>(&'a mut self, dtype: &'b DType) -> MutTypedStatsSetRef<'a, 'b> {
47        MutTypedStatsSetRef {
48            values: self,
49            dtype,
50        }
51    }
52
53    /// Wrap stats set with a dtype for typed scalar access
54    pub fn as_typed_ref<'a, 'b>(&'a self, dtype: &'b DType) -> TypedStatsSetRef<'a, 'b> {
55        TypedStatsSetRef {
56            values: self,
57            dtype,
58        }
59    }
60}
61
62// Getters and setters for individual stats.
63impl StatsSet {
64    /// Set the stat `stat` to `value`.
65    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
66        self.reserve_full_capacity();
67
68        if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
69            *existing = (stat, value);
70        } else {
71            self.values.push((stat, value));
72        }
73    }
74
75    /// Clear the stat `stat` from the set.
76    pub fn clear(&mut self, stat: Stat) {
77        self.values.retain(|(s, _)| *s != stat);
78    }
79
80    /// Only keep given stats
81    pub fn retain_only(&mut self, stats: &[Stat]) {
82        self.values.retain(|(s, _)| stats.contains(s));
83    }
84
85    /// Keep given stats as inexact values
86    pub fn keep_inexact_stats(self, inexact_keep: &[Stat]) -> Self {
87        self.values
88            .into_iter()
89            .filter_map(|(s, v)| inexact_keep.contains(&s).then(|| (s, v.into_inexact())))
90            .collect()
91    }
92
93    /// Iterate over the statistic names and values in-place.
94    ///
95    /// See [Iterator].
96    pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
97        self.values.iter()
98    }
99
100    /// Get value for a given stat
101    pub fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
102        self.values
103            .iter()
104            .find(|(s, _)| *s == stat)
105            .map(|(_, v)| v.clone())
106    }
107
108    /// Length of the stats set
109    pub fn len(&self) -> usize {
110        self.values.len()
111    }
112
113    /// Check whether the statset is empty
114    pub fn is_empty(&self) -> bool {
115        self.values.is_empty()
116    }
117
118    /// Get scalar value of a given dtype
119    pub fn get_as<T: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
120        &self,
121        stat: Stat,
122        dtype: &DType,
123    ) -> Option<Precision<T>> {
124        self.get(stat).map(|v| {
125            v.map(|v| {
126                T::try_from(&Scalar::new(dtype.clone(), v)).unwrap_or_else(|err| {
127                    vortex_panic!(
128                        err,
129                        "Failed to get stat {} as {}",
130                        stat,
131                        std::any::type_name::<T>()
132                    )
133                })
134            })
135        })
136    }
137}
138
139// StatSetIntoIter just exists to protect current implementation from exposure on the public API.
140
141/// Owned iterator over the stats.
142///
143/// See [IntoIterator].
144pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
145
146impl Iterator for StatsSetIntoIter {
147    type Item = (Stat, Precision<ScalarValue>);
148
149    fn next(&mut self) -> Option<Self::Item> {
150        self.0.next()
151    }
152}
153
154impl IntoIterator for StatsSet {
155    type Item = (Stat, Precision<ScalarValue>);
156    type IntoIter = StatsSetIntoIter;
157
158    fn into_iter(self) -> Self::IntoIter {
159        StatsSetIntoIter(self.values.into_iter())
160    }
161}
162
163impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
164    fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
165        let iter = iter.into_iter();
166        let mut values = Vec::default();
167        values.reserve_exact(Stat::CARDINALITY);
168
169        let mut this = Self { values };
170        this.extend(iter);
171        this
172    }
173}
174
175impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
176    #[inline]
177    fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
178        let iter = iter.into_iter();
179        self.reserve_full_capacity();
180
181        iter.for_each(|(stat, value)| self.set(stat, value));
182    }
183}
184
185/// Merge helpers
186impl StatsSet {
187    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
188    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
189    pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
190        self.as_mut_typed_ref(dtype)
191            .merge_ordered(&other.as_typed_ref(dtype));
192        self
193    }
194
195    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
196    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
197    pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
198        self.as_mut_typed_ref(dtype)
199            .merge_unordered(&other.as_typed_ref(dtype));
200        self
201    }
202
203    /// Given two sets of stats (of differing precision) for the same array, combine them
204    pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
205        self.as_mut_typed_ref(dtype)
206            .combine_sets(&other.as_typed_ref(dtype))
207    }
208}
209
210pub struct TypedStatsSetRef<'a, 'b> {
211    pub values: &'a StatsSet,
212    pub dtype: &'b DType,
213}
214
215impl StatsProvider for TypedStatsSetRef<'_, '_> {
216    fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
217        self.values.get(stat).map(|p| {
218            p.map(|sv| {
219                Scalar::new(
220                    stat.dtype(self.dtype)
221                        .vortex_expect("Must have valid dtype if value is present"),
222                    sv,
223                )
224            })
225        })
226    }
227
228    fn len(&self) -> usize {
229        self.values.len()
230    }
231}
232
233pub struct MutTypedStatsSetRef<'a, 'b> {
234    pub values: &'a mut StatsSet,
235    pub dtype: &'b DType,
236}
237
238impl MutTypedStatsSetRef<'_, '_> {
239    /// Set the stat `stat` to `value`.
240    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
241        self.values.set(stat, value);
242    }
243
244    /// Clear the stat `stat` from the set.
245    pub fn clear(&mut self, stat: Stat) {
246        self.values.clear(stat);
247    }
248}
249
250impl StatsProvider for MutTypedStatsSetRef<'_, '_> {
251    fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
252        self.values.get(stat).map(|p| {
253            p.map(|sv| {
254                Scalar::new(
255                    stat.dtype(self.dtype)
256                        .vortex_expect("Must have valid dtype if value is present"),
257                    sv,
258                )
259            })
260        })
261    }
262
263    fn len(&self) -> usize {
264        self.values.len()
265    }
266}
267
268// Merge helpers
269impl MutTypedStatsSetRef<'_, '_> {
270    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
271    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
272    pub fn merge_ordered(mut self, other: &TypedStatsSetRef) -> Self {
273        for s in all::<Stat>() {
274            match s {
275                Stat::IsConstant => self.merge_is_constant(other),
276                Stat::IsSorted => self.merge_is_sorted(other),
277                Stat::IsStrictSorted => self.merge_is_strict_sorted(other),
278                Stat::Max => self.merge_max(other),
279                Stat::Min => self.merge_min(other),
280                Stat::Sum => self.merge_sum(other),
281                Stat::NullCount => self.merge_null_count(other),
282                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
283                Stat::NaNCount => self.merge_nan_count(other),
284            }
285        }
286
287        self
288    }
289
290    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
291    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
292    pub fn merge_unordered(mut self, other: &TypedStatsSetRef) -> Self {
293        for s in all::<Stat>() {
294            if !s.is_commutative() {
295                self.clear(s);
296                continue;
297            }
298
299            match s {
300                Stat::IsConstant => self.merge_is_constant(other),
301                Stat::Max => self.merge_max(other),
302                Stat::Min => self.merge_min(other),
303                Stat::Sum => self.merge_sum(other),
304                Stat::NullCount => self.merge_null_count(other),
305                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
306                Stat::IsSorted | Stat::IsStrictSorted => {
307                    unreachable!("not commutative")
308                }
309                Stat::NaNCount => self.merge_nan_count(other),
310            }
311        }
312
313        self
314    }
315
316    /// Given two sets of stats (of differing precision) for the same array, combine them
317    pub fn combine_sets(&mut self, other: &TypedStatsSetRef) -> VortexResult<()> {
318        let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
319        for s in other_stats {
320            match s {
321                Stat::Max => self.combine_bound::<Max>(other)?,
322                Stat::Min => self.combine_bound::<Min>(other)?,
323                Stat::UncompressedSizeInBytes => {
324                    self.combine_bound::<UncompressedSizeInBytes>(other)?
325                }
326                Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
327                Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
328                Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
329                Stat::NullCount => self.combine_bound::<NullCount>(other)?,
330                Stat::Sum => self.combine_bound::<Sum>(other)?,
331                Stat::NaNCount => self.combine_bound::<NaNCount>(other)?,
332            }
333        }
334        Ok(())
335    }
336
337    fn combine_bound<S: StatType<Scalar>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
338    where
339        S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
340    {
341        match (self.get_scalar_bound::<S>(), other.get_scalar_bound::<S>()) {
342            (Some(m1), Some(m2)) => {
343                let meet = m1
344                    .intersection(&m2)
345                    .vortex_expect("can always compare scalar")
346                    .ok_or_else(|| {
347                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
348                    })?;
349                if meet != m1 {
350                    self.set(S::STAT, meet.into_value().map(Scalar::into_value));
351                }
352            }
353            (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
354            (Some(_), _) => (),
355            (None, None) => self.clear(S::STAT),
356        }
357        Ok(())
358    }
359
360    fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
361    where
362        S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
363    {
364        match (
365            self.get_as_bound::<S, bool>(),
366            other.get_as_bound::<S, bool>(),
367        ) {
368            (Some(m1), Some(m2)) => {
369                let intersection = m1
370                    .intersection(&m2)
371                    .vortex_expect("can always compare boolean")
372                    .ok_or_else(|| {
373                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
374                    })?;
375                if intersection != m1 {
376                    self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
377                }
378            }
379            (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
380            (Some(_), None) => (),
381            (None, None) => self.clear(S::STAT),
382        }
383        Ok(())
384    }
385
386    fn merge_min(&mut self, other: &TypedStatsSetRef) {
387        match (
388            self.get_scalar_bound::<Min>(),
389            other.get_scalar_bound::<Min>(),
390        ) {
391            (Some(m1), Some(m2)) => {
392                let meet = m1.union(&m2).vortex_expect("can compare scalar");
393                if meet != m1 {
394                    self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
395                }
396            }
397            _ => self.clear(Stat::Min),
398        }
399    }
400
401    fn merge_max(&mut self, other: &TypedStatsSetRef) {
402        match (
403            self.get_scalar_bound::<Max>(),
404            other.get_scalar_bound::<Max>(),
405        ) {
406            (Some(m1), Some(m2)) => {
407                let meet = m1.union(&m2).vortex_expect("can compare scalar");
408                if meet != m1 {
409                    self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
410                }
411            }
412            _ => self.clear(Stat::Max),
413        }
414    }
415
416    fn merge_sum(&mut self, other: &TypedStatsSetRef) {
417        match (
418            self.get_scalar_bound::<Sum>(),
419            other.get_scalar_bound::<Sum>(),
420        ) {
421            (Some(m1), Some(m2)) => {
422                // If the combine sum is exact, then we can sum them.
423                if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
424                    s1.as_primitive()
425                        .checked_add(&s2.as_primitive())
426                        .map(|pscalar| {
427                            pscalar
428                                .pvalue()
429                                .map(|pvalue| {
430                                    Scalar::primitive_value(
431                                        pvalue,
432                                        pscalar.ptype(),
433                                        pscalar.dtype().nullability(),
434                                    )
435                                    .into_value()
436                                })
437                                .unwrap_or_else(ScalarValue::null)
438                        })
439                }) {
440                    self.set(Stat::Sum, Precision::Exact(scalar_value));
441                }
442            }
443            _ => self.clear(Stat::Sum),
444        }
445    }
446
447    fn merge_is_constant(&mut self, other: &TypedStatsSetRef) {
448        let self_const = self.get_as(Stat::IsConstant);
449        let other_const = other.get_as(Stat::IsConstant);
450        let self_min = self.get(Stat::Min);
451        let other_min = other.get(Stat::Min);
452
453        if let (
454            Some(Precision::Exact(self_const)),
455            Some(Precision::Exact(other_const)),
456            Some(Precision::Exact(self_min)),
457            Some(Precision::Exact(other_min)),
458        ) = (self_const, other_const, self_min, other_min)
459        {
460            if self_const && other_const && self_min == other_min {
461                self.set(Stat::IsConstant, Precision::exact(true));
462            } else {
463                self.set(Stat::IsConstant, Precision::inexact(false));
464            }
465        }
466        self.set(Stat::IsConstant, Precision::exact(false));
467    }
468
469    fn merge_is_sorted(&mut self, other: &TypedStatsSetRef) {
470        self.merge_sortedness_stat(other, Stat::IsSorted, PartialOrd::le)
471    }
472
473    fn merge_is_strict_sorted(&mut self, other: &TypedStatsSetRef) {
474        self.merge_sortedness_stat(other, Stat::IsStrictSorted, PartialOrd::lt)
475    }
476
477    fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
478        &mut self,
479        other: &TypedStatsSetRef,
480        stat: Stat,
481        cmp: F,
482    ) {
483        if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
484            == (self.get_as(stat), other.get_as(stat))
485        {
486            // There might be no stat because it was dropped, or it doesn't exist
487            // (e.g. an all null array).
488            // We assume that it was the dropped case since the doesn't exist might imply sorted,
489            // but this in-precision is correct.
490            if let (Some(self_max), Some(other_min)) = (
491                self.get_scalar_bound::<Max>(),
492                other.get_scalar_bound::<Min>(),
493            ) {
494                return if cmp(&self_max.max_value(), &other_min.min_value()) {
495                    // keep value
496                } else {
497                    self.set(stat, Precision::inexact(false));
498                };
499            }
500        }
501        self.clear(stat);
502    }
503
504    fn merge_null_count(&mut self, other: &TypedStatsSetRef) {
505        self.merge_sum_stat(Stat::NullCount, other)
506    }
507
508    fn merge_nan_count(&mut self, other: &TypedStatsSetRef) {
509        self.merge_sum_stat(Stat::NaNCount, other)
510    }
511
512    fn merge_uncompressed_size_in_bytes(&mut self, other: &TypedStatsSetRef) {
513        self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
514    }
515
516    fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) {
517        match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
518            (Some(nc1), Some(nc2)) => {
519                self.set(
520                    stat,
521                    nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
522                );
523            }
524            _ => self.clear(stat),
525        }
526    }
527}
528
529#[cfg(test)]
530mod test {
531    use enum_iterator::all;
532    use itertools::Itertools;
533    use vortex_dtype::{DType, Nullability, PType};
534
535    use crate::arrays::PrimitiveArray;
536    use crate::stats::stats_set::Scalar;
537    use crate::stats::{IsConstant, Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
538
539    #[test]
540    fn test_iter() {
541        // SAFETY: No duplicate stats.
542        let set = unsafe {
543            StatsSet::new_unchecked(vec![
544                (Stat::Max, Precision::exact(100)),
545                (Stat::Min, Precision::exact(42)),
546            ])
547        };
548        let mut iter = set.iter();
549        let first = iter.next().unwrap().clone();
550        assert_eq!(first.0, Stat::Max);
551        assert_eq!(
552            first
553                .1
554                .map(|f| i32::try_from(&Scalar::new(PType::I32.into(), f)).unwrap()),
555            Precision::exact(100)
556        );
557        let snd = iter.next().unwrap().clone();
558        assert_eq!(snd.0, Stat::Min);
559        assert_eq!(
560            snd.1
561                .map(|s| i32::try_from(&Scalar::new(PType::I32.into(), s)).unwrap()),
562            42
563        );
564    }
565
566    #[test]
567    fn into_iter() {
568        // SAFETY: No duplicate stats.
569        let mut set = unsafe {
570            StatsSet::new_unchecked(vec![
571                (Stat::Max, Precision::exact(100)),
572                (Stat::Min, Precision::exact(42)),
573            ])
574        }
575        .into_iter();
576        let (stat, first) = set.next().unwrap();
577        assert_eq!(stat, Stat::Max);
578        assert_eq!(
579            first.map(|f| i32::try_from(&Scalar::new(PType::I32.into(), f)).unwrap()),
580            Precision::exact(100)
581        );
582        let snd = set.next().unwrap();
583        assert_eq!(snd.0, Stat::Min);
584        assert_eq!(
585            snd.1
586                .map(|s| i32::try_from(&Scalar::new(PType::I32.into(), s)).unwrap()),
587            Precision::exact(42)
588        );
589    }
590
591    #[test]
592    fn merge_constant() {
593        let first = StatsSet::from_iter([
594            (Stat::Min, Precision::exact(42)),
595            (Stat::IsConstant, Precision::exact(true)),
596        ])
597        .merge_ordered(
598            &StatsSet::from_iter([
599                (Stat::Min, Precision::inexact(42)),
600                (Stat::IsConstant, Precision::exact(true)),
601            ]),
602            &DType::Primitive(PType::I32, Nullability::NonNullable),
603        );
604
605        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
606        assert_eq!(
607            first_ref.get_as::<bool>(Stat::IsConstant),
608            Some(Precision::exact(false))
609        );
610        assert_eq!(
611            first_ref.get_as::<i32>(Stat::Min),
612            Some(Precision::exact(42))
613        );
614    }
615
616    #[test]
617    fn merge_into_min() {
618        let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
619            &StatsSet::default(),
620            &DType::Primitive(PType::I32, Nullability::NonNullable),
621        );
622
623        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
624        assert!(first_ref.get(Stat::Min).is_none());
625    }
626
627    #[test]
628    fn merge_from_min() {
629        let first = StatsSet::default().merge_ordered(
630            &StatsSet::of(Stat::Min, Precision::exact(42)),
631            &DType::Primitive(PType::I32, Nullability::NonNullable),
632        );
633
634        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
635        assert!(first_ref.get(Stat::Min).is_none());
636    }
637
638    #[test]
639    fn merge_mins() {
640        let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
641            &StatsSet::of(Stat::Min, Precision::exact(42)),
642            &DType::Primitive(PType::I32, Nullability::NonNullable),
643        );
644
645        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
646        assert_eq!(
647            first_ref.get_as::<i32>(Stat::Min),
648            Some(Precision::exact(37))
649        );
650    }
651
652    #[test]
653    fn merge_into_bound_max() {
654        let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
655            &StatsSet::default(),
656            &DType::Primitive(PType::I32, Nullability::NonNullable),
657        );
658        assert!(first.get(Stat::Max).is_none());
659    }
660
661    #[test]
662    fn merge_from_max() {
663        let first = StatsSet::default().merge_ordered(
664            &StatsSet::of(Stat::Max, Precision::exact(42)),
665            &DType::Primitive(PType::I32, Nullability::NonNullable),
666        );
667        assert!(first.get(Stat::Max).is_none());
668    }
669
670    #[test]
671    fn merge_maxes() {
672        let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
673            &StatsSet::of(Stat::Max, Precision::exact(42)),
674            &DType::Primitive(PType::I32, Nullability::NonNullable),
675        );
676        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
677        assert_eq!(
678            first_ref.get_as::<i32>(Stat::Max),
679            Some(Precision::exact(42))
680        );
681    }
682
683    #[test]
684    fn merge_maxes_bound() {
685        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
686        let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
687            .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
688        let first_ref = first.as_typed_ref(&dtype);
689        assert_eq!(
690            first_ref.get_as::<i32>(Stat::Max),
691            Some(Precision::inexact(43))
692        );
693    }
694
695    #[test]
696    fn merge_into_scalar() {
697        let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
698            &StatsSet::default(),
699            &DType::Primitive(PType::I32, Nullability::NonNullable),
700        );
701        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
702        assert!(first_ref.get(Stat::Sum).is_none());
703    }
704
705    #[test]
706    fn merge_from_scalar() {
707        let first = StatsSet::default().merge_ordered(
708            &StatsSet::of(Stat::Sum, Precision::exact(42)),
709            &DType::Primitive(PType::I32, Nullability::NonNullable),
710        );
711        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
712        assert!(first_ref.get(Stat::Sum).is_none());
713    }
714
715    #[test]
716    fn merge_scalars() {
717        let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
718            &StatsSet::of(Stat::Sum, Precision::exact(42)),
719            &DType::Primitive(PType::I32, Nullability::NonNullable),
720        );
721        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
722        assert_eq!(
723            first_ref.get_as::<usize>(Stat::Sum),
724            Some(Precision::exact(79usize))
725        );
726    }
727
728    #[test]
729    fn merge_into_sortedness() {
730        let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
731            &StatsSet::default(),
732            &DType::Primitive(PType::I32, Nullability::NonNullable),
733        );
734        assert!(first.get(Stat::IsStrictSorted).is_none());
735    }
736
737    #[test]
738    fn merge_from_sortedness() {
739        let first = StatsSet::default().merge_ordered(
740            &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
741            &DType::Primitive(PType::I32, Nullability::NonNullable),
742        );
743        assert!(first.get(Stat::IsStrictSorted).is_none());
744    }
745
746    #[test]
747    fn merge_sortedness() {
748        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
749        first.set(Stat::Max, Precision::exact(1));
750        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
751        second.set(Stat::Min, Precision::exact(2));
752        first = first.merge_ordered(
753            &second,
754            &DType::Primitive(PType::I32, Nullability::NonNullable),
755        );
756
757        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
758        assert_eq!(
759            first_ref.get_as::<bool>(Stat::IsStrictSorted),
760            Some(Precision::exact(true))
761        );
762    }
763
764    #[test]
765    fn merge_sortedness_out_of_order() {
766        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
767        first.set(Stat::Min, Precision::exact(1));
768        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
769        second.set(Stat::Max, Precision::exact(2));
770        second = second.merge_ordered(
771            &first,
772            &DType::Primitive(PType::I32, Nullability::NonNullable),
773        );
774
775        let second_ref =
776            second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
777        assert_eq!(
778            second_ref.get_as::<bool>(Stat::IsStrictSorted),
779            Some(Precision::inexact(false))
780        );
781    }
782
783    #[test]
784    fn merge_sortedness_only_one_sorted() {
785        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
786        first.set(Stat::Max, Precision::exact(1));
787        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
788        second.set(Stat::Min, Precision::exact(2));
789        first.merge_ordered(
790            &second,
791            &DType::Primitive(PType::I32, Nullability::NonNullable),
792        );
793
794        let second_ref =
795            second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
796        assert_eq!(
797            second_ref.get_as::<bool>(Stat::IsStrictSorted),
798            Some(Precision::exact(false))
799        );
800    }
801
802    #[test]
803    fn merge_sortedness_missing_min() {
804        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
805        first.set(Stat::Max, Precision::exact(1));
806        let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
807        first = first.merge_ordered(
808            &second,
809            &DType::Primitive(PType::I32, Nullability::NonNullable),
810        );
811        assert!(first.get(Stat::IsStrictSorted).is_none());
812    }
813
814    #[test]
815    fn merge_sortedness_bound_min() {
816        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
817        first.set(Stat::Max, Precision::exact(1));
818        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
819        second.set(Stat::Min, Precision::inexact(2));
820        first = first.merge_ordered(
821            &second,
822            &DType::Primitive(PType::I32, Nullability::NonNullable),
823        );
824
825        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
826        assert_eq!(
827            first_ref.get_as::<bool>(Stat::IsStrictSorted),
828            Some(Precision::exact(true))
829        );
830    }
831
832    #[test]
833    fn merge_unordered() {
834        let array =
835            PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
836        let all_stats = all::<Stat>()
837            .filter(|s| !matches!(s, Stat::Sum))
838            .filter(|s| !matches!(s, Stat::NaNCount))
839            .collect_vec();
840        array.statistics().compute_all(&all_stats).unwrap();
841
842        let stats = array.statistics().to_owned();
843        for stat in &all_stats {
844            assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
845        }
846
847        let merged = stats.clone().merge_unordered(
848            &stats,
849            &DType::Primitive(PType::I32, Nullability::NonNullable),
850        );
851        for stat in &all_stats {
852            assert_eq!(
853                merged.get(*stat).is_some(),
854                stat.is_commutative(),
855                "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
856            )
857        }
858
859        let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
860        let stats_ref = stats.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
861
862        assert_eq!(
863            merged_ref.get_as::<i32>(Stat::Min),
864            stats_ref.get_as::<i32>(Stat::Min)
865        );
866        assert_eq!(
867            merged_ref.get_as::<i32>(Stat::Max),
868            stats_ref.get_as::<i32>(Stat::Max)
869        );
870        assert_eq!(
871            merged_ref.get_as::<u64>(Stat::NullCount).unwrap(),
872            stats_ref
873                .get_as::<u64>(Stat::NullCount)
874                .unwrap()
875                .map(|s| s * 2)
876        );
877    }
878
879    #[test]
880    fn merge_min_bound_same() {
881        // Merging a stat with a bound and another with an exact results in exact stat.
882        // since bound for min is a lower bound, it can in fact contain any value >= bound.
883        let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
884            &StatsSet::of(Stat::Min, Precision::exact(5)),
885            &DType::Primitive(PType::I32, Nullability::NonNullable),
886        );
887        let merged_ref =
888            merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
889        assert_eq!(
890            merged_ref.get_as::<i32>(Stat::Min),
891            Some(Precision::exact(5))
892        );
893    }
894
895    #[test]
896    fn merge_min_bound_bound_lower() {
897        let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
898            &StatsSet::of(Stat::Min, Precision::exact(5)),
899            &DType::Primitive(PType::I32, Nullability::NonNullable),
900        );
901        let merged_ref =
902            merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
903        assert_eq!(
904            merged_ref.get_as::<i32>(Stat::Min),
905            Some(Precision::inexact(4))
906        );
907    }
908
909    #[test]
910    fn retain_approx() {
911        let set = StatsSet::from_iter([
912            (Stat::Max, Precision::exact(100)),
913            (Stat::Min, Precision::exact(50)),
914            (Stat::Sum, Precision::inexact(10)),
915        ]);
916
917        let set = set.keep_inexact_stats(&[Stat::Min, Stat::Max]);
918
919        let set_ref = set.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
920
921        assert_eq!(set.len(), 2);
922        assert_eq!(
923            set_ref.get_as::<i32>(Stat::Max),
924            Some(Precision::inexact(100))
925        );
926        assert_eq!(
927            set_ref.get_as::<i32>(Stat::Min),
928            Some(Precision::inexact(50))
929        );
930        assert_eq!(set_ref.get_as::<i32>(Stat::Sum), None);
931    }
932
933    #[test]
934    fn test_combine_is_constant() {
935        {
936            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
937            let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
938            let mut stats_ref =
939                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
940            stats_ref
941                .combine_bool_stat::<IsConstant>(
942                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
943                )
944                .unwrap();
945            assert_eq!(
946                stats_ref.get_as::<bool>(Stat::IsConstant),
947                Some(Precision::exact(true))
948            );
949        }
950
951        {
952            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
953            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
954            let mut stats_ref =
955                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
956            stats_ref
957                .combine_bool_stat::<IsConstant>(
958                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
959                )
960                .unwrap();
961            assert_eq!(
962                stats_ref.get_as::<bool>(Stat::IsConstant),
963                Some(Precision::exact(true))
964            );
965        }
966
967        {
968            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
969            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
970            let mut stats_ref =
971                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
972            stats_ref
973                .combine_bool_stat::<IsConstant>(
974                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
975                )
976                .unwrap();
977            assert_eq!(
978                stats_ref.get_as::<bool>(Stat::IsConstant),
979                Some(Precision::exact(false))
980            );
981        }
982    }
983
984    #[test]
985    fn test_combine_sets_boolean_conflict() {
986        let mut stats1 = StatsSet::from_iter([
987            (Stat::IsConstant, Precision::exact(true)),
988            (Stat::IsSorted, Precision::exact(true)),
989        ]);
990
991        let stats2 = StatsSet::from_iter([
992            (Stat::IsConstant, Precision::exact(false)),
993            (Stat::IsSorted, Precision::exact(true)),
994        ]);
995
996        let result = stats1.combine_sets(
997            &stats2,
998            &DType::Primitive(PType::I32, Nullability::NonNullable),
999        );
1000        assert!(result.is_err());
1001    }
1002
1003    #[test]
1004    fn test_combine_sets_with_missing_stats() {
1005        let mut stats1 = StatsSet::from_iter([
1006            (Stat::Min, Precision::exact(42)),
1007            (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
1008        ]);
1009
1010        let stats2 = StatsSet::from_iter([
1011            (Stat::Max, Precision::exact(100)),
1012            (Stat::IsStrictSorted, Precision::exact(true)),
1013        ]);
1014
1015        stats1
1016            .combine_sets(
1017                &stats2,
1018                &DType::Primitive(PType::I32, Nullability::NonNullable),
1019            )
1020            .unwrap();
1021
1022        let stats_ref =
1023            stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1024
1025        // Min should remain unchanged
1026        assert_eq!(
1027            stats_ref.get_as::<i32>(Stat::Min),
1028            Some(Precision::exact(42))
1029        );
1030        // Max should be added
1031        assert_eq!(
1032            stats_ref.get_as::<i32>(Stat::Max),
1033            Some(Precision::exact(100))
1034        );
1035        // IsStrictSorted should be added
1036        assert_eq!(
1037            stats_ref.get_as::<bool>(Stat::IsStrictSorted),
1038            Some(Precision::exact(true))
1039        );
1040    }
1041
1042    #[test]
1043    fn test_combine_sets_with_inexact() {
1044        let mut stats1 = StatsSet::from_iter([
1045            (Stat::Min, Precision::exact(42)),
1046            (Stat::Max, Precision::inexact(100)),
1047            (Stat::IsConstant, Precision::exact(false)),
1048        ]);
1049
1050        let stats2 = StatsSet::from_iter([
1051            // Must ensure Min from stats2 is <= Min from stats1
1052            (Stat::Min, Precision::inexact(40)),
1053            (Stat::Max, Precision::exact(90)),
1054            (Stat::IsSorted, Precision::exact(true)),
1055        ]);
1056
1057        stats1
1058            .combine_sets(
1059                &stats2,
1060                &DType::Primitive(PType::I32, Nullability::NonNullable),
1061            )
1062            .unwrap();
1063
1064        let stats_ref =
1065            stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1066
1067        // Min should remain unchanged since it's more restrictive than the inexact value
1068        assert_eq!(
1069            stats_ref.get_as::<i32>(Stat::Min),
1070            Some(Precision::exact(42))
1071        );
1072        // Check that max was updated with the exact value
1073        assert_eq!(
1074            stats_ref.get_as::<i32>(Stat::Max),
1075            Some(Precision::exact(90))
1076        );
1077        // Check that IsSorted was added
1078        assert_eq!(
1079            stats_ref.get_as::<bool>(Stat::IsSorted),
1080            Some(Precision::exact(true))
1081        );
1082    }
1083}