vortex_array/stats/
stats_set.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5
6use enum_iterator::{Sequence, all};
7use num_traits::CheckedAdd;
8use vortex_dtype::DType;
9use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_err, vortex_panic};
10use vortex_scalar::{Scalar, ScalarValue};
11
12use super::{
13    IsSorted, IsStrictSorted, NaNCount, NullCount, StatType, StatsProvider, UncompressedSizeInBytes,
14};
15use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
16
17#[derive(Default, Debug, Clone)]
18pub struct StatsSet {
19    values: Vec<(Stat, Precision<ScalarValue>)>,
20}
21
22impl StatsSet {
23    /// Create new StatSet without validating uniqueness of all the entries
24    ///
25    /// # Safety
26    ///
27    /// This method will not panic or trigger UB, but may lead to duplicate stats being stored.
28    pub unsafe fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
29        Self { values }
30    }
31
32    /// Create StatsSet from single stat and value
33    pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
34        // SAFETY: No duplicate stats will be set here.
35        unsafe { Self::new_unchecked(vec![(stat, value)]) }
36    }
37
38    fn reserve_full_capacity(&mut self) {
39        if self.values.capacity() < Stat::CARDINALITY {
40            self.values
41                .reserve_exact(Stat::CARDINALITY - self.values.capacity());
42        }
43    }
44
45    /// Wrap stats set with a dtype for mutable typed scalar access
46    pub fn as_mut_typed_ref<'a, 'b>(&'a mut self, dtype: &'b DType) -> MutTypedStatsSetRef<'a, 'b> {
47        MutTypedStatsSetRef {
48            values: self,
49            dtype,
50        }
51    }
52
53    /// Wrap stats set with a dtype for typed scalar access
54    pub fn as_typed_ref<'a, 'b>(&'a self, dtype: &'b DType) -> TypedStatsSetRef<'a, 'b> {
55        TypedStatsSetRef {
56            values: self,
57            dtype,
58        }
59    }
60}
61
62// Getters and setters for individual stats.
63impl StatsSet {
64    /// Set the stat `stat` to `value`.
65    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
66        self.reserve_full_capacity();
67
68        if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
69            *existing = (stat, value);
70        } else {
71            self.values.push((stat, value));
72        }
73    }
74
75    /// Clear the stat `stat` from the set.
76    pub fn clear(&mut self, stat: Stat) {
77        self.values.retain(|(s, _)| *s != stat);
78    }
79
80    /// Only keep given stats
81    pub fn retain_only(&mut self, stats: &[Stat]) {
82        self.values.retain(|(s, _)| stats.contains(s));
83    }
84
85    /// Iterate over the statistic names and values in-place.
86    ///
87    /// See [Iterator].
88    pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
89        self.values.iter()
90    }
91
92    /// Get value for a given stat
93    pub fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
94        self.values
95            .iter()
96            .find(|(s, _)| *s == stat)
97            .map(|(_, v)| v.clone())
98    }
99
100    /// Length of the stats set
101    pub fn len(&self) -> usize {
102        self.values.len()
103    }
104
105    /// Check whether the statset is empty
106    pub fn is_empty(&self) -> bool {
107        self.values.is_empty()
108    }
109
110    /// Get scalar value of a given dtype
111    pub fn get_as<T: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
112        &self,
113        stat: Stat,
114        dtype: &DType,
115    ) -> Option<Precision<T>> {
116        self.get(stat).map(|v| {
117            v.map(|v| {
118                T::try_from(&Scalar::new(dtype.clone(), v)).unwrap_or_else(|err| {
119                    vortex_panic!(
120                        err,
121                        "Failed to get stat {} as {}",
122                        stat,
123                        std::any::type_name::<T>()
124                    )
125                })
126            })
127        })
128    }
129}
130
131// StatSetIntoIter just exists to protect current implementation from exposure on the public API.
132
133/// Owned iterator over the stats.
134///
135/// See [IntoIterator].
136pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
137
138impl Iterator for StatsSetIntoIter {
139    type Item = (Stat, Precision<ScalarValue>);
140
141    fn next(&mut self) -> Option<Self::Item> {
142        self.0.next()
143    }
144}
145
146impl IntoIterator for StatsSet {
147    type Item = (Stat, Precision<ScalarValue>);
148    type IntoIter = StatsSetIntoIter;
149
150    fn into_iter(self) -> Self::IntoIter {
151        StatsSetIntoIter(self.values.into_iter())
152    }
153}
154
155impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
156    fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
157        let iter = iter.into_iter();
158        let mut values = Vec::default();
159        values.reserve_exact(Stat::CARDINALITY);
160
161        let mut this = Self { values };
162        this.extend(iter);
163        this
164    }
165}
166
167impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
168    #[inline]
169    fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
170        let iter = iter.into_iter();
171        self.reserve_full_capacity();
172
173        iter.for_each(|(stat, value)| self.set(stat, value));
174    }
175}
176
177/// Merge helpers
178impl StatsSet {
179    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
180    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
181    pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
182        self.as_mut_typed_ref(dtype)
183            .merge_ordered(&other.as_typed_ref(dtype));
184        self
185    }
186
187    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
188    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
189    pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
190        self.as_mut_typed_ref(dtype)
191            .merge_unordered(&other.as_typed_ref(dtype));
192        self
193    }
194
195    /// Given two sets of stats (of differing precision) for the same array, combine them
196    pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
197        self.as_mut_typed_ref(dtype)
198            .combine_sets(&other.as_typed_ref(dtype))
199    }
200}
201
202pub struct TypedStatsSetRef<'a, 'b> {
203    pub values: &'a StatsSet,
204    pub dtype: &'b DType,
205}
206
207impl StatsProvider for TypedStatsSetRef<'_, '_> {
208    fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
209        self.values.get(stat).map(|p| {
210            p.map(|sv| {
211                Scalar::new(
212                    stat.dtype(self.dtype)
213                        .vortex_expect("Must have valid dtype if value is present"),
214                    sv,
215                )
216            })
217        })
218    }
219
220    fn len(&self) -> usize {
221        self.values.len()
222    }
223}
224
225pub struct MutTypedStatsSetRef<'a, 'b> {
226    pub values: &'a mut StatsSet,
227    pub dtype: &'b DType,
228}
229
230impl MutTypedStatsSetRef<'_, '_> {
231    /// Set the stat `stat` to `value`.
232    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
233        self.values.set(stat, value);
234    }
235
236    /// Clear the stat `stat` from the set.
237    pub fn clear(&mut self, stat: Stat) {
238        self.values.clear(stat);
239    }
240}
241
242impl StatsProvider for MutTypedStatsSetRef<'_, '_> {
243    fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
244        self.values.get(stat).map(|p| {
245            p.map(|sv| {
246                Scalar::new(
247                    stat.dtype(self.dtype)
248                        .vortex_expect("Must have valid dtype if value is present"),
249                    sv,
250                )
251            })
252        })
253    }
254
255    fn len(&self) -> usize {
256        self.values.len()
257    }
258}
259
260// Merge helpers
261impl MutTypedStatsSetRef<'_, '_> {
262    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
263    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
264    pub fn merge_ordered(mut self, other: &TypedStatsSetRef) -> Self {
265        for s in all::<Stat>() {
266            match s {
267                Stat::IsConstant => self.merge_is_constant(other),
268                Stat::IsSorted => self.merge_is_sorted(other),
269                Stat::IsStrictSorted => self.merge_is_strict_sorted(other),
270                Stat::Max => self.merge_max(other),
271                Stat::Min => self.merge_min(other),
272                Stat::Sum => self.merge_sum(other),
273                Stat::NullCount => self.merge_null_count(other),
274                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
275                Stat::NaNCount => self.merge_nan_count(other),
276            }
277        }
278
279        self
280    }
281
282    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
283    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
284    pub fn merge_unordered(mut self, other: &TypedStatsSetRef) -> Self {
285        for s in all::<Stat>() {
286            if !s.is_commutative() {
287                self.clear(s);
288                continue;
289            }
290
291            match s {
292                Stat::IsConstant => self.merge_is_constant(other),
293                Stat::Max => self.merge_max(other),
294                Stat::Min => self.merge_min(other),
295                Stat::Sum => self.merge_sum(other),
296                Stat::NullCount => self.merge_null_count(other),
297                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
298                Stat::IsSorted | Stat::IsStrictSorted => {
299                    unreachable!("not commutative")
300                }
301                Stat::NaNCount => self.merge_nan_count(other),
302            }
303        }
304
305        self
306    }
307
308    /// Given two sets of stats (of differing precision) for the same array, combine them
309    pub fn combine_sets(&mut self, other: &TypedStatsSetRef) -> VortexResult<()> {
310        let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
311        for s in other_stats {
312            match s {
313                Stat::Max => self.combine_bound::<Max>(other)?,
314                Stat::Min => self.combine_bound::<Min>(other)?,
315                Stat::UncompressedSizeInBytes => {
316                    self.combine_bound::<UncompressedSizeInBytes>(other)?
317                }
318                Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
319                Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
320                Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
321                Stat::NullCount => self.combine_bound::<NullCount>(other)?,
322                Stat::Sum => self.combine_bound::<Sum>(other)?,
323                Stat::NaNCount => self.combine_bound::<NaNCount>(other)?,
324            }
325        }
326        Ok(())
327    }
328
329    fn combine_bound<S: StatType<Scalar>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
330    where
331        S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
332    {
333        match (self.get_scalar_bound::<S>(), other.get_scalar_bound::<S>()) {
334            (Some(m1), Some(m2)) => {
335                let meet = m1
336                    .intersection(&m2)
337                    .vortex_expect("can always compare scalar")
338                    .ok_or_else(|| {
339                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
340                    })?;
341                if meet != m1 {
342                    self.set(S::STAT, meet.into_value().map(Scalar::into_value));
343                }
344            }
345            (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
346            (Some(_), _) => (),
347            (None, None) => self.clear(S::STAT),
348        }
349        Ok(())
350    }
351
352    fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
353    where
354        S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
355    {
356        match (
357            self.get_as_bound::<S, bool>(),
358            other.get_as_bound::<S, bool>(),
359        ) {
360            (Some(m1), Some(m2)) => {
361                let intersection = m1
362                    .intersection(&m2)
363                    .vortex_expect("can always compare boolean")
364                    .ok_or_else(|| {
365                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
366                    })?;
367                if intersection != m1 {
368                    self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
369                }
370            }
371            (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
372            (Some(_), None) => (),
373            (None, None) => self.clear(S::STAT),
374        }
375        Ok(())
376    }
377
378    fn merge_min(&mut self, other: &TypedStatsSetRef) {
379        match (
380            self.get_scalar_bound::<Min>(),
381            other.get_scalar_bound::<Min>(),
382        ) {
383            (Some(m1), Some(m2)) => {
384                let meet = m1.union(&m2).vortex_expect("can compare scalar");
385                if meet != m1 {
386                    self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
387                }
388            }
389            _ => self.clear(Stat::Min),
390        }
391    }
392
393    fn merge_max(&mut self, other: &TypedStatsSetRef) {
394        match (
395            self.get_scalar_bound::<Max>(),
396            other.get_scalar_bound::<Max>(),
397        ) {
398            (Some(m1), Some(m2)) => {
399                let meet = m1.union(&m2).vortex_expect("can compare scalar");
400                if meet != m1 {
401                    self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
402                }
403            }
404            _ => self.clear(Stat::Max),
405        }
406    }
407
408    fn merge_sum(&mut self, other: &TypedStatsSetRef) {
409        match (
410            self.get_scalar_bound::<Sum>(),
411            other.get_scalar_bound::<Sum>(),
412        ) {
413            (Some(m1), Some(m2)) => {
414                // If the combine sum is exact, then we can sum them.
415                if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
416                    s1.as_primitive()
417                        .checked_add(&s2.as_primitive())
418                        .map(|pscalar| {
419                            pscalar
420                                .pvalue()
421                                .map(|pvalue| {
422                                    Scalar::primitive_value(
423                                        pvalue,
424                                        pscalar.ptype(),
425                                        pscalar.dtype().nullability(),
426                                    )
427                                    .into_value()
428                                })
429                                .unwrap_or_else(ScalarValue::null)
430                        })
431                }) {
432                    self.set(Stat::Sum, Precision::Exact(scalar_value));
433                }
434            }
435            _ => self.clear(Stat::Sum),
436        }
437    }
438
439    fn merge_is_constant(&mut self, other: &TypedStatsSetRef) {
440        let self_const = self.get_as(Stat::IsConstant);
441        let other_const = other.get_as(Stat::IsConstant);
442        let self_min = self.get(Stat::Min);
443        let other_min = other.get(Stat::Min);
444
445        if let (
446            Some(Precision::Exact(self_const)),
447            Some(Precision::Exact(other_const)),
448            Some(Precision::Exact(self_min)),
449            Some(Precision::Exact(other_min)),
450        ) = (self_const, other_const, self_min, other_min)
451        {
452            if self_const && other_const && self_min == other_min {
453                self.set(Stat::IsConstant, Precision::exact(true));
454            } else {
455                self.set(Stat::IsConstant, Precision::inexact(false));
456            }
457        }
458        self.set(Stat::IsConstant, Precision::exact(false));
459    }
460
461    fn merge_is_sorted(&mut self, other: &TypedStatsSetRef) {
462        self.merge_sortedness_stat(other, Stat::IsSorted, PartialOrd::le)
463    }
464
465    fn merge_is_strict_sorted(&mut self, other: &TypedStatsSetRef) {
466        self.merge_sortedness_stat(other, Stat::IsStrictSorted, PartialOrd::lt)
467    }
468
469    fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
470        &mut self,
471        other: &TypedStatsSetRef,
472        stat: Stat,
473        cmp: F,
474    ) {
475        if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
476            == (self.get_as(stat), other.get_as(stat))
477        {
478            // There might be no stat because it was dropped, or it doesn't exist
479            // (e.g. an all null array).
480            // We assume that it was the dropped case since the doesn't exist might imply sorted,
481            // but this in-precision is correct.
482            if let (Some(self_max), Some(other_min)) = (
483                self.get_scalar_bound::<Max>(),
484                other.get_scalar_bound::<Min>(),
485            ) {
486                return if cmp(&self_max.max_value(), &other_min.min_value()) {
487                    // keep value
488                } else {
489                    self.set(stat, Precision::inexact(false));
490                };
491            }
492        }
493        self.clear(stat);
494    }
495
496    fn merge_null_count(&mut self, other: &TypedStatsSetRef) {
497        self.merge_sum_stat(Stat::NullCount, other)
498    }
499
500    fn merge_nan_count(&mut self, other: &TypedStatsSetRef) {
501        self.merge_sum_stat(Stat::NaNCount, other)
502    }
503
504    fn merge_uncompressed_size_in_bytes(&mut self, other: &TypedStatsSetRef) {
505        self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
506    }
507
508    fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) {
509        match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
510            (Some(nc1), Some(nc2)) => {
511                self.set(
512                    stat,
513                    nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
514                );
515            }
516            _ => self.clear(stat),
517        }
518    }
519}
520
521#[cfg(test)]
522mod test {
523    use enum_iterator::all;
524    use itertools::Itertools;
525    use vortex_dtype::{DType, Nullability, PType};
526
527    use crate::arrays::PrimitiveArray;
528    use crate::stats::stats_set::Scalar;
529    use crate::stats::{IsConstant, Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
530
531    #[test]
532    fn test_iter() {
533        // SAFETY: No duplicate stats.
534        let set = unsafe {
535            StatsSet::new_unchecked(vec![
536                (Stat::Max, Precision::exact(100)),
537                (Stat::Min, Precision::exact(42)),
538            ])
539        };
540        let mut iter = set.iter();
541        let first = iter.next().unwrap().clone();
542        assert_eq!(first.0, Stat::Max);
543        assert_eq!(
544            first
545                .1
546                .map(|f| i32::try_from(&Scalar::new(PType::I32.into(), f)).unwrap()),
547            Precision::exact(100)
548        );
549        let snd = iter.next().unwrap().clone();
550        assert_eq!(snd.0, Stat::Min);
551        assert_eq!(
552            snd.1
553                .map(|s| i32::try_from(&Scalar::new(PType::I32.into(), s)).unwrap()),
554            42
555        );
556    }
557
558    #[test]
559    fn into_iter() {
560        // SAFETY: No duplicate stats.
561        let mut set = unsafe {
562            StatsSet::new_unchecked(vec![
563                (Stat::Max, Precision::exact(100)),
564                (Stat::Min, Precision::exact(42)),
565            ])
566        }
567        .into_iter();
568        let (stat, first) = set.next().unwrap();
569        assert_eq!(stat, Stat::Max);
570        assert_eq!(
571            first.map(|f| i32::try_from(&Scalar::new(PType::I32.into(), f)).unwrap()),
572            Precision::exact(100)
573        );
574        let snd = set.next().unwrap();
575        assert_eq!(snd.0, Stat::Min);
576        assert_eq!(
577            snd.1
578                .map(|s| i32::try_from(&Scalar::new(PType::I32.into(), s)).unwrap()),
579            Precision::exact(42)
580        );
581    }
582
583    #[test]
584    fn merge_constant() {
585        let first = StatsSet::from_iter([
586            (Stat::Min, Precision::exact(42)),
587            (Stat::IsConstant, Precision::exact(true)),
588        ])
589        .merge_ordered(
590            &StatsSet::from_iter([
591                (Stat::Min, Precision::inexact(42)),
592                (Stat::IsConstant, Precision::exact(true)),
593            ]),
594            &DType::Primitive(PType::I32, Nullability::NonNullable),
595        );
596
597        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
598        assert_eq!(
599            first_ref.get_as::<bool>(Stat::IsConstant),
600            Some(Precision::exact(false))
601        );
602        assert_eq!(
603            first_ref.get_as::<i32>(Stat::Min),
604            Some(Precision::exact(42))
605        );
606    }
607
608    #[test]
609    fn merge_into_min() {
610        let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
611            &StatsSet::default(),
612            &DType::Primitive(PType::I32, Nullability::NonNullable),
613        );
614
615        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
616        assert!(first_ref.get(Stat::Min).is_none());
617    }
618
619    #[test]
620    fn merge_from_min() {
621        let first = StatsSet::default().merge_ordered(
622            &StatsSet::of(Stat::Min, Precision::exact(42)),
623            &DType::Primitive(PType::I32, Nullability::NonNullable),
624        );
625
626        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
627        assert!(first_ref.get(Stat::Min).is_none());
628    }
629
630    #[test]
631    fn merge_mins() {
632        let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
633            &StatsSet::of(Stat::Min, Precision::exact(42)),
634            &DType::Primitive(PType::I32, Nullability::NonNullable),
635        );
636
637        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
638        assert_eq!(
639            first_ref.get_as::<i32>(Stat::Min),
640            Some(Precision::exact(37))
641        );
642    }
643
644    #[test]
645    fn merge_into_bound_max() {
646        let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
647            &StatsSet::default(),
648            &DType::Primitive(PType::I32, Nullability::NonNullable),
649        );
650        assert!(first.get(Stat::Max).is_none());
651    }
652
653    #[test]
654    fn merge_from_max() {
655        let first = StatsSet::default().merge_ordered(
656            &StatsSet::of(Stat::Max, Precision::exact(42)),
657            &DType::Primitive(PType::I32, Nullability::NonNullable),
658        );
659        assert!(first.get(Stat::Max).is_none());
660    }
661
662    #[test]
663    fn merge_maxes() {
664        let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
665            &StatsSet::of(Stat::Max, Precision::exact(42)),
666            &DType::Primitive(PType::I32, Nullability::NonNullable),
667        );
668        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
669        assert_eq!(
670            first_ref.get_as::<i32>(Stat::Max),
671            Some(Precision::exact(42))
672        );
673    }
674
675    #[test]
676    fn merge_maxes_bound() {
677        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
678        let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
679            .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
680        let first_ref = first.as_typed_ref(&dtype);
681        assert_eq!(
682            first_ref.get_as::<i32>(Stat::Max),
683            Some(Precision::inexact(43))
684        );
685    }
686
687    #[test]
688    fn merge_into_scalar() {
689        let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
690            &StatsSet::default(),
691            &DType::Primitive(PType::I32, Nullability::NonNullable),
692        );
693        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
694        assert!(first_ref.get(Stat::Sum).is_none());
695    }
696
697    #[test]
698    fn merge_from_scalar() {
699        let first = StatsSet::default().merge_ordered(
700            &StatsSet::of(Stat::Sum, Precision::exact(42)),
701            &DType::Primitive(PType::I32, Nullability::NonNullable),
702        );
703        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
704        assert!(first_ref.get(Stat::Sum).is_none());
705    }
706
707    #[test]
708    fn merge_scalars() {
709        let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
710            &StatsSet::of(Stat::Sum, Precision::exact(42)),
711            &DType::Primitive(PType::I32, Nullability::NonNullable),
712        );
713        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
714        assert_eq!(
715            first_ref.get_as::<usize>(Stat::Sum),
716            Some(Precision::exact(79usize))
717        );
718    }
719
720    #[test]
721    fn merge_into_sortedness() {
722        let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
723            &StatsSet::default(),
724            &DType::Primitive(PType::I32, Nullability::NonNullable),
725        );
726        assert!(first.get(Stat::IsStrictSorted).is_none());
727    }
728
729    #[test]
730    fn merge_from_sortedness() {
731        let first = StatsSet::default().merge_ordered(
732            &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
733            &DType::Primitive(PType::I32, Nullability::NonNullable),
734        );
735        assert!(first.get(Stat::IsStrictSorted).is_none());
736    }
737
738    #[test]
739    fn merge_sortedness() {
740        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
741        first.set(Stat::Max, Precision::exact(1));
742        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
743        second.set(Stat::Min, Precision::exact(2));
744        first = first.merge_ordered(
745            &second,
746            &DType::Primitive(PType::I32, Nullability::NonNullable),
747        );
748
749        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
750        assert_eq!(
751            first_ref.get_as::<bool>(Stat::IsStrictSorted),
752            Some(Precision::exact(true))
753        );
754    }
755
756    #[test]
757    fn merge_sortedness_out_of_order() {
758        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
759        first.set(Stat::Min, Precision::exact(1));
760        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
761        second.set(Stat::Max, Precision::exact(2));
762        second = second.merge_ordered(
763            &first,
764            &DType::Primitive(PType::I32, Nullability::NonNullable),
765        );
766
767        let second_ref =
768            second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
769        assert_eq!(
770            second_ref.get_as::<bool>(Stat::IsStrictSorted),
771            Some(Precision::inexact(false))
772        );
773    }
774
775    #[test]
776    fn merge_sortedness_only_one_sorted() {
777        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
778        first.set(Stat::Max, Precision::exact(1));
779        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
780        second.set(Stat::Min, Precision::exact(2));
781        first.merge_ordered(
782            &second,
783            &DType::Primitive(PType::I32, Nullability::NonNullable),
784        );
785
786        let second_ref =
787            second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
788        assert_eq!(
789            second_ref.get_as::<bool>(Stat::IsStrictSorted),
790            Some(Precision::exact(false))
791        );
792    }
793
794    #[test]
795    fn merge_sortedness_missing_min() {
796        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
797        first.set(Stat::Max, Precision::exact(1));
798        let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
799        first = first.merge_ordered(
800            &second,
801            &DType::Primitive(PType::I32, Nullability::NonNullable),
802        );
803        assert!(first.get(Stat::IsStrictSorted).is_none());
804    }
805
806    #[test]
807    fn merge_sortedness_bound_min() {
808        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
809        first.set(Stat::Max, Precision::exact(1));
810        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
811        second.set(Stat::Min, Precision::inexact(2));
812        first = first.merge_ordered(
813            &second,
814            &DType::Primitive(PType::I32, Nullability::NonNullable),
815        );
816
817        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
818        assert_eq!(
819            first_ref.get_as::<bool>(Stat::IsStrictSorted),
820            Some(Precision::exact(true))
821        );
822    }
823
824    #[test]
825    fn merge_unordered() {
826        let array =
827            PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
828        let all_stats = all::<Stat>()
829            .filter(|s| !matches!(s, Stat::Sum))
830            .filter(|s| !matches!(s, Stat::NaNCount))
831            .collect_vec();
832        array.statistics().compute_all(&all_stats).unwrap();
833
834        let stats = array.statistics().to_owned();
835        for stat in &all_stats {
836            assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
837        }
838
839        let merged = stats.clone().merge_unordered(
840            &stats,
841            &DType::Primitive(PType::I32, Nullability::NonNullable),
842        );
843        for stat in &all_stats {
844            assert_eq!(
845                merged.get(*stat).is_some(),
846                stat.is_commutative(),
847                "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
848            )
849        }
850
851        let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
852        let stats_ref = stats.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
853
854        assert_eq!(
855            merged_ref.get_as::<i32>(Stat::Min),
856            stats_ref.get_as::<i32>(Stat::Min)
857        );
858        assert_eq!(
859            merged_ref.get_as::<i32>(Stat::Max),
860            stats_ref.get_as::<i32>(Stat::Max)
861        );
862        assert_eq!(
863            merged_ref.get_as::<u64>(Stat::NullCount).unwrap(),
864            stats_ref
865                .get_as::<u64>(Stat::NullCount)
866                .unwrap()
867                .map(|s| s * 2)
868        );
869    }
870
871    #[test]
872    fn merge_min_bound_same() {
873        // Merging a stat with a bound and another with an exact results in exact stat.
874        // since bound for min is a lower bound, it can in fact contain any value >= bound.
875        let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
876            &StatsSet::of(Stat::Min, Precision::exact(5)),
877            &DType::Primitive(PType::I32, Nullability::NonNullable),
878        );
879        let merged_ref =
880            merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
881        assert_eq!(
882            merged_ref.get_as::<i32>(Stat::Min),
883            Some(Precision::exact(5))
884        );
885    }
886
887    #[test]
888    fn merge_min_bound_bound_lower() {
889        let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
890            &StatsSet::of(Stat::Min, Precision::exact(5)),
891            &DType::Primitive(PType::I32, Nullability::NonNullable),
892        );
893        let merged_ref =
894            merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
895        assert_eq!(
896            merged_ref.get_as::<i32>(Stat::Min),
897            Some(Precision::inexact(4))
898        );
899    }
900
901    #[test]
902    fn test_combine_is_constant() {
903        {
904            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
905            let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
906            let mut stats_ref =
907                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
908            stats_ref
909                .combine_bool_stat::<IsConstant>(
910                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
911                )
912                .unwrap();
913            assert_eq!(
914                stats_ref.get_as::<bool>(Stat::IsConstant),
915                Some(Precision::exact(true))
916            );
917        }
918
919        {
920            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
921            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
922            let mut stats_ref =
923                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
924            stats_ref
925                .combine_bool_stat::<IsConstant>(
926                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
927                )
928                .unwrap();
929            assert_eq!(
930                stats_ref.get_as::<bool>(Stat::IsConstant),
931                Some(Precision::exact(true))
932            );
933        }
934
935        {
936            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
937            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
938            let mut stats_ref =
939                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
940            stats_ref
941                .combine_bool_stat::<IsConstant>(
942                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
943                )
944                .unwrap();
945            assert_eq!(
946                stats_ref.get_as::<bool>(Stat::IsConstant),
947                Some(Precision::exact(false))
948            );
949        }
950    }
951
952    #[test]
953    fn test_combine_sets_boolean_conflict() {
954        let mut stats1 = StatsSet::from_iter([
955            (Stat::IsConstant, Precision::exact(true)),
956            (Stat::IsSorted, Precision::exact(true)),
957        ]);
958
959        let stats2 = StatsSet::from_iter([
960            (Stat::IsConstant, Precision::exact(false)),
961            (Stat::IsSorted, Precision::exact(true)),
962        ]);
963
964        let result = stats1.combine_sets(
965            &stats2,
966            &DType::Primitive(PType::I32, Nullability::NonNullable),
967        );
968        assert!(result.is_err());
969    }
970
971    #[test]
972    fn test_combine_sets_with_missing_stats() {
973        let mut stats1 = StatsSet::from_iter([
974            (Stat::Min, Precision::exact(42)),
975            (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
976        ]);
977
978        let stats2 = StatsSet::from_iter([
979            (Stat::Max, Precision::exact(100)),
980            (Stat::IsStrictSorted, Precision::exact(true)),
981        ]);
982
983        stats1
984            .combine_sets(
985                &stats2,
986                &DType::Primitive(PType::I32, Nullability::NonNullable),
987            )
988            .unwrap();
989
990        let stats_ref =
991            stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
992
993        // Min should remain unchanged
994        assert_eq!(
995            stats_ref.get_as::<i32>(Stat::Min),
996            Some(Precision::exact(42))
997        );
998        // Max should be added
999        assert_eq!(
1000            stats_ref.get_as::<i32>(Stat::Max),
1001            Some(Precision::exact(100))
1002        );
1003        // IsStrictSorted should be added
1004        assert_eq!(
1005            stats_ref.get_as::<bool>(Stat::IsStrictSorted),
1006            Some(Precision::exact(true))
1007        );
1008    }
1009
1010    #[test]
1011    fn test_combine_sets_with_inexact() {
1012        let mut stats1 = StatsSet::from_iter([
1013            (Stat::Min, Precision::exact(42)),
1014            (Stat::Max, Precision::inexact(100)),
1015            (Stat::IsConstant, Precision::exact(false)),
1016        ]);
1017
1018        let stats2 = StatsSet::from_iter([
1019            // Must ensure Min from stats2 is <= Min from stats1
1020            (Stat::Min, Precision::inexact(40)),
1021            (Stat::Max, Precision::exact(90)),
1022            (Stat::IsSorted, Precision::exact(true)),
1023        ]);
1024
1025        stats1
1026            .combine_sets(
1027                &stats2,
1028                &DType::Primitive(PType::I32, Nullability::NonNullable),
1029            )
1030            .unwrap();
1031
1032        let stats_ref =
1033            stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1034
1035        // Min should remain unchanged since it's more restrictive than the inexact value
1036        assert_eq!(
1037            stats_ref.get_as::<i32>(Stat::Min),
1038            Some(Precision::exact(42))
1039        );
1040        // Check that max was updated with the exact value
1041        assert_eq!(
1042            stats_ref.get_as::<i32>(Stat::Max),
1043            Some(Precision::exact(90))
1044        );
1045        // Check that IsSorted was added
1046        assert_eq!(
1047            stats_ref.get_as::<bool>(Stat::IsSorted),
1048            Some(Precision::exact(true))
1049        );
1050    }
1051}