vortex_array/stats/
stats_set.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5
6use enum_iterator::{Sequence, all};
7use num_traits::CheckedAdd;
8use vortex_dtype::DType;
9use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_err, vortex_panic};
10use vortex_scalar::{Scalar, ScalarValue};
11
12use super::{
13    IsSorted, IsStrictSorted, NaNCount, NullCount, StatType, StatsProvider, UncompressedSizeInBytes,
14};
15use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
16
17#[derive(Default, Debug, Clone)]
18pub struct StatsSet {
19    values: Vec<(Stat, Precision<ScalarValue>)>,
20}
21
22impl StatsSet {
23    /// Create new StatSet without validating uniqueness of all the entries
24    ///
25    /// # Safety
26    ///
27    /// This method will not panic or trigger UB, but may lead to duplicate stats being stored.
28    pub fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
29        Self { values }
30    }
31
32    /// Create StatsSet from single stat and value
33    pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
34        Self::new_unchecked(vec![(stat, value)])
35    }
36
37    fn reserve_full_capacity(&mut self) {
38        if self.values.capacity() < Stat::CARDINALITY {
39            self.values
40                .reserve_exact(Stat::CARDINALITY - self.values.capacity());
41        }
42    }
43
44    /// Wrap stats set with a dtype for mutable typed scalar access
45    pub fn as_mut_typed_ref<'a, 'b>(&'a mut self, dtype: &'b DType) -> MutTypedStatsSetRef<'a, 'b> {
46        MutTypedStatsSetRef {
47            values: self,
48            dtype,
49        }
50    }
51
52    /// Wrap stats set with a dtype for typed scalar access
53    pub fn as_typed_ref<'a, 'b>(&'a self, dtype: &'b DType) -> TypedStatsSetRef<'a, 'b> {
54        TypedStatsSetRef {
55            values: self,
56            dtype,
57        }
58    }
59}
60
61// Getters and setters for individual stats.
62impl StatsSet {
63    /// Set the stat `stat` to `value`.
64    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
65        self.reserve_full_capacity();
66
67        if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
68            *existing = (stat, value);
69        } else {
70            self.values.push((stat, value));
71        }
72    }
73
74    /// Clear the stat `stat` from the set.
75    pub fn clear(&mut self, stat: Stat) {
76        self.values.retain(|(s, _)| *s != stat);
77    }
78
79    /// Only keep given stats
80    pub fn retain_only(&mut self, stats: &[Stat]) {
81        self.values.retain(|(s, _)| stats.contains(s));
82    }
83
84    /// Keep given stats as inexact values
85    pub fn keep_inexact_stats(self, inexact_keep: &[Stat]) -> Self {
86        self.values
87            .into_iter()
88            .filter_map(|(s, v)| inexact_keep.contains(&s).then(|| (s, v.into_inexact())))
89            .collect()
90    }
91
92    /// Iterate over the statistic names and values in-place.
93    ///
94    /// See [Iterator].
95    pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
96        self.values.iter()
97    }
98
99    /// Get value for a given stat
100    pub fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
101        self.values
102            .iter()
103            .find(|(s, _)| *s == stat)
104            .map(|(_, v)| v.clone())
105    }
106
107    /// Length of the stats set
108    pub fn len(&self) -> usize {
109        self.values.len()
110    }
111
112    /// Check whether the statset is empty
113    pub fn is_empty(&self) -> bool {
114        self.values.is_empty()
115    }
116
117    /// Get scalar value of a given dtype
118    pub fn get_as<T: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
119        &self,
120        stat: Stat,
121        dtype: &DType,
122    ) -> Option<Precision<T>> {
123        self.get(stat).map(|v| {
124            v.map(|v| {
125                T::try_from(&Scalar::new(dtype.clone(), v)).unwrap_or_else(|err| {
126                    vortex_panic!(
127                        err,
128                        "Failed to get stat {} as {}",
129                        stat,
130                        std::any::type_name::<T>()
131                    )
132                })
133            })
134        })
135    }
136}
137
138// StatSetIntoIter just exists to protect current implementation from exposure on the public API.
139
140/// Owned iterator over the stats.
141///
142/// See [IntoIterator].
143pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
144
145impl Iterator for StatsSetIntoIter {
146    type Item = (Stat, Precision<ScalarValue>);
147
148    fn next(&mut self) -> Option<Self::Item> {
149        self.0.next()
150    }
151}
152
153impl IntoIterator for StatsSet {
154    type Item = (Stat, Precision<ScalarValue>);
155    type IntoIter = StatsSetIntoIter;
156
157    fn into_iter(self) -> Self::IntoIter {
158        StatsSetIntoIter(self.values.into_iter())
159    }
160}
161
162impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
163    fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
164        let iter = iter.into_iter();
165        let mut values = Vec::default();
166        values.reserve_exact(Stat::CARDINALITY);
167
168        let mut this = Self { values };
169        this.extend(iter);
170        this
171    }
172}
173
174impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
175    #[inline]
176    fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
177        let iter = iter.into_iter();
178        self.reserve_full_capacity();
179
180        iter.for_each(|(stat, value)| self.set(stat, value));
181    }
182}
183
184/// Merge helpers
185impl StatsSet {
186    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
187    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
188    pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
189        self.as_mut_typed_ref(dtype)
190            .merge_ordered(&other.as_typed_ref(dtype));
191        self
192    }
193
194    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
195    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
196    pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
197        self.as_mut_typed_ref(dtype)
198            .merge_unordered(&other.as_typed_ref(dtype));
199        self
200    }
201
202    /// Given two sets of stats (of differing precision) for the same array, combine them
203    pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
204        self.as_mut_typed_ref(dtype)
205            .combine_sets(&other.as_typed_ref(dtype))
206    }
207}
208
209pub struct TypedStatsSetRef<'a, 'b> {
210    pub values: &'a StatsSet,
211    pub dtype: &'b DType,
212}
213
214impl StatsProvider for TypedStatsSetRef<'_, '_> {
215    fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
216        self.values.get(stat).map(|p| {
217            p.map(|sv| {
218                Scalar::new(
219                    stat.dtype(self.dtype)
220                        .vortex_expect("Must have valid dtype if value is present"),
221                    sv,
222                )
223            })
224        })
225    }
226
227    fn len(&self) -> usize {
228        self.values.len()
229    }
230}
231
232pub struct MutTypedStatsSetRef<'a, 'b> {
233    pub values: &'a mut StatsSet,
234    pub dtype: &'b DType,
235}
236
237impl MutTypedStatsSetRef<'_, '_> {
238    /// Set the stat `stat` to `value`.
239    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
240        self.values.set(stat, value);
241    }
242
243    /// Clear the stat `stat` from the set.
244    pub fn clear(&mut self, stat: Stat) {
245        self.values.clear(stat);
246    }
247}
248
249impl StatsProvider for MutTypedStatsSetRef<'_, '_> {
250    fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
251        self.values.get(stat).map(|p| {
252            p.map(|sv| {
253                Scalar::new(
254                    stat.dtype(self.dtype)
255                        .vortex_expect("Must have valid dtype if value is present"),
256                    sv,
257                )
258            })
259        })
260    }
261
262    fn len(&self) -> usize {
263        self.values.len()
264    }
265}
266
267// Merge helpers
268impl MutTypedStatsSetRef<'_, '_> {
269    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
270    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
271    pub fn merge_ordered(mut self, other: &TypedStatsSetRef) -> Self {
272        for s in all::<Stat>() {
273            match s {
274                Stat::IsConstant => self.merge_is_constant(other),
275                Stat::IsSorted => self.merge_is_sorted(other),
276                Stat::IsStrictSorted => self.merge_is_strict_sorted(other),
277                Stat::Max => self.merge_max(other),
278                Stat::Min => self.merge_min(other),
279                Stat::Sum => self.merge_sum(other),
280                Stat::NullCount => self.merge_null_count(other),
281                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
282                Stat::NaNCount => self.merge_nan_count(other),
283            }
284        }
285
286        self
287    }
288
289    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
290    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
291    pub fn merge_unordered(mut self, other: &TypedStatsSetRef) -> Self {
292        for s in all::<Stat>() {
293            if !s.is_commutative() {
294                self.clear(s);
295                continue;
296            }
297
298            match s {
299                Stat::IsConstant => self.merge_is_constant(other),
300                Stat::Max => self.merge_max(other),
301                Stat::Min => self.merge_min(other),
302                Stat::Sum => self.merge_sum(other),
303                Stat::NullCount => self.merge_null_count(other),
304                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
305                Stat::IsSorted | Stat::IsStrictSorted => {
306                    unreachable!("not commutative")
307                }
308                Stat::NaNCount => self.merge_nan_count(other),
309            }
310        }
311
312        self
313    }
314
315    /// Given two sets of stats (of differing precision) for the same array, combine them
316    pub fn combine_sets(&mut self, other: &TypedStatsSetRef) -> VortexResult<()> {
317        let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
318        for s in other_stats {
319            match s {
320                Stat::Max => self.combine_bound::<Max>(other)?,
321                Stat::Min => self.combine_bound::<Min>(other)?,
322                Stat::UncompressedSizeInBytes => {
323                    self.combine_bound::<UncompressedSizeInBytes>(other)?
324                }
325                Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
326                Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
327                Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
328                Stat::NullCount => self.combine_bound::<NullCount>(other)?,
329                Stat::Sum => self.combine_bound::<Sum>(other)?,
330                Stat::NaNCount => self.combine_bound::<NaNCount>(other)?,
331            }
332        }
333        Ok(())
334    }
335
336    fn combine_bound<S: StatType<Scalar>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
337    where
338        S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
339    {
340        match (self.get_scalar_bound::<S>(), other.get_scalar_bound::<S>()) {
341            (Some(m1), Some(m2)) => {
342                let meet = m1
343                    .intersection(&m2)
344                    .vortex_expect("can always compare scalar")
345                    .ok_or_else(|| {
346                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
347                    })?;
348                if meet != m1 {
349                    self.set(S::STAT, meet.into_value().map(Scalar::into_value));
350                }
351            }
352            (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
353            (Some(_), _) => (),
354            (None, None) => self.clear(S::STAT),
355        }
356        Ok(())
357    }
358
359    fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
360    where
361        S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
362    {
363        match (
364            self.get_as_bound::<S, bool>(),
365            other.get_as_bound::<S, bool>(),
366        ) {
367            (Some(m1), Some(m2)) => {
368                let intersection = m1
369                    .intersection(&m2)
370                    .vortex_expect("can always compare boolean")
371                    .ok_or_else(|| {
372                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
373                    })?;
374                if intersection != m1 {
375                    self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
376                }
377            }
378            (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
379            (Some(_), None) => (),
380            (None, None) => self.clear(S::STAT),
381        }
382        Ok(())
383    }
384
385    fn merge_min(&mut self, other: &TypedStatsSetRef) {
386        match (
387            self.get_scalar_bound::<Min>(),
388            other.get_scalar_bound::<Min>(),
389        ) {
390            (Some(m1), Some(m2)) => {
391                let meet = m1.union(&m2).vortex_expect("can compare scalar");
392                if meet != m1 {
393                    self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
394                }
395            }
396            _ => self.clear(Stat::Min),
397        }
398    }
399
400    fn merge_max(&mut self, other: &TypedStatsSetRef) {
401        match (
402            self.get_scalar_bound::<Max>(),
403            other.get_scalar_bound::<Max>(),
404        ) {
405            (Some(m1), Some(m2)) => {
406                let meet = m1.union(&m2).vortex_expect("can compare scalar");
407                if meet != m1 {
408                    self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
409                }
410            }
411            _ => self.clear(Stat::Max),
412        }
413    }
414
415    fn merge_sum(&mut self, other: &TypedStatsSetRef) {
416        match (
417            self.get_scalar_bound::<Sum>(),
418            other.get_scalar_bound::<Sum>(),
419        ) {
420            (Some(m1), Some(m2)) => {
421                // If the combine sum is exact, then we can sum them.
422                if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
423                    s1.as_primitive()
424                        .checked_add(&s2.as_primitive())
425                        .map(|pscalar| {
426                            pscalar
427                                .pvalue()
428                                .map(|pvalue| {
429                                    Scalar::primitive_value(
430                                        pvalue,
431                                        pscalar.ptype(),
432                                        pscalar.dtype().nullability(),
433                                    )
434                                    .into_value()
435                                })
436                                .unwrap_or_else(ScalarValue::null)
437                        })
438                }) {
439                    self.set(Stat::Sum, Precision::Exact(scalar_value));
440                }
441            }
442            _ => self.clear(Stat::Sum),
443        }
444    }
445
446    fn merge_is_constant(&mut self, other: &TypedStatsSetRef) {
447        let self_const = self.get_as(Stat::IsConstant);
448        let other_const = other.get_as(Stat::IsConstant);
449        let self_min = self.get(Stat::Min);
450        let other_min = other.get(Stat::Min);
451
452        if let (
453            Some(Precision::Exact(self_const)),
454            Some(Precision::Exact(other_const)),
455            Some(Precision::Exact(self_min)),
456            Some(Precision::Exact(other_min)),
457        ) = (self_const, other_const, self_min, other_min)
458        {
459            if self_const && other_const && self_min == other_min {
460                self.set(Stat::IsConstant, Precision::exact(true));
461            } else {
462                self.set(Stat::IsConstant, Precision::inexact(false));
463            }
464        }
465        self.set(Stat::IsConstant, Precision::exact(false));
466    }
467
468    fn merge_is_sorted(&mut self, other: &TypedStatsSetRef) {
469        self.merge_sortedness_stat(other, Stat::IsSorted, PartialOrd::le)
470    }
471
472    fn merge_is_strict_sorted(&mut self, other: &TypedStatsSetRef) {
473        self.merge_sortedness_stat(other, Stat::IsStrictSorted, PartialOrd::lt)
474    }
475
476    fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
477        &mut self,
478        other: &TypedStatsSetRef,
479        stat: Stat,
480        cmp: F,
481    ) {
482        if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
483            == (self.get_as(stat), other.get_as(stat))
484        {
485            // There might be no stat because it was dropped, or it doesn't exist
486            // (e.g. an all null array).
487            // We assume that it was the dropped case since the doesn't exist might imply sorted,
488            // but this in-precision is correct.
489            if let (Some(self_max), Some(other_min)) = (
490                self.get_scalar_bound::<Max>(),
491                other.get_scalar_bound::<Min>(),
492            ) {
493                return if cmp(&self_max.max_value(), &other_min.min_value()) {
494                    // keep value
495                } else {
496                    self.set(stat, Precision::inexact(false));
497                };
498            }
499        }
500        self.clear(stat);
501    }
502
503    fn merge_null_count(&mut self, other: &TypedStatsSetRef) {
504        self.merge_sum_stat(Stat::NullCount, other)
505    }
506
507    fn merge_nan_count(&mut self, other: &TypedStatsSetRef) {
508        self.merge_sum_stat(Stat::NaNCount, other)
509    }
510
511    fn merge_uncompressed_size_in_bytes(&mut self, other: &TypedStatsSetRef) {
512        self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
513    }
514
515    fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) {
516        match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
517            (Some(nc1), Some(nc2)) => {
518                self.set(
519                    stat,
520                    nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
521                );
522            }
523            _ => self.clear(stat),
524        }
525    }
526}
527
528#[cfg(test)]
529mod test {
530    use enum_iterator::all;
531    use itertools::Itertools;
532    use vortex_dtype::{DType, Nullability, PType};
533
534    use crate::arrays::PrimitiveArray;
535    use crate::stats::stats_set::Scalar;
536    use crate::stats::{IsConstant, Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
537
538    #[test]
539    fn test_iter() {
540        let set = StatsSet::new_unchecked(vec![
541            (Stat::Max, Precision::exact(100)),
542            (Stat::Min, Precision::exact(42)),
543        ]);
544        let mut iter = set.iter();
545        let first = iter.next().unwrap().clone();
546        assert_eq!(first.0, Stat::Max);
547        assert_eq!(
548            first
549                .1
550                .map(|f| i32::try_from(&Scalar::new(PType::I32.into(), f)).unwrap()),
551            Precision::exact(100)
552        );
553        let snd = iter.next().unwrap().clone();
554        assert_eq!(snd.0, Stat::Min);
555        assert_eq!(
556            snd.1
557                .map(|s| i32::try_from(&Scalar::new(PType::I32.into(), s)).unwrap()),
558            42
559        );
560    }
561
562    #[test]
563    fn into_iter() {
564        let mut set = StatsSet::new_unchecked(vec![
565            (Stat::Max, Precision::exact(100)),
566            (Stat::Min, Precision::exact(42)),
567        ])
568        .into_iter();
569        let (stat, first) = set.next().unwrap();
570        assert_eq!(stat, Stat::Max);
571        assert_eq!(
572            first.map(|f| i32::try_from(&Scalar::new(PType::I32.into(), f)).unwrap()),
573            Precision::exact(100)
574        );
575        let snd = set.next().unwrap();
576        assert_eq!(snd.0, Stat::Min);
577        assert_eq!(
578            snd.1
579                .map(|s| i32::try_from(&Scalar::new(PType::I32.into(), s)).unwrap()),
580            Precision::exact(42)
581        );
582    }
583
584    #[test]
585    fn merge_constant() {
586        let first = StatsSet::from_iter([
587            (Stat::Min, Precision::exact(42)),
588            (Stat::IsConstant, Precision::exact(true)),
589        ])
590        .merge_ordered(
591            &StatsSet::from_iter([
592                (Stat::Min, Precision::inexact(42)),
593                (Stat::IsConstant, Precision::exact(true)),
594            ]),
595            &DType::Primitive(PType::I32, Nullability::NonNullable),
596        );
597
598        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
599        assert_eq!(
600            first_ref.get_as::<bool>(Stat::IsConstant),
601            Some(Precision::exact(false))
602        );
603        assert_eq!(
604            first_ref.get_as::<i32>(Stat::Min),
605            Some(Precision::exact(42))
606        );
607    }
608
609    #[test]
610    fn merge_into_min() {
611        let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
612            &StatsSet::default(),
613            &DType::Primitive(PType::I32, Nullability::NonNullable),
614        );
615
616        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
617        assert!(first_ref.get(Stat::Min).is_none());
618    }
619
620    #[test]
621    fn merge_from_min() {
622        let first = StatsSet::default().merge_ordered(
623            &StatsSet::of(Stat::Min, Precision::exact(42)),
624            &DType::Primitive(PType::I32, Nullability::NonNullable),
625        );
626
627        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
628        assert!(first_ref.get(Stat::Min).is_none());
629    }
630
631    #[test]
632    fn merge_mins() {
633        let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
634            &StatsSet::of(Stat::Min, Precision::exact(42)),
635            &DType::Primitive(PType::I32, Nullability::NonNullable),
636        );
637
638        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
639        assert_eq!(
640            first_ref.get_as::<i32>(Stat::Min),
641            Some(Precision::exact(37))
642        );
643    }
644
645    #[test]
646    fn merge_into_bound_max() {
647        let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
648            &StatsSet::default(),
649            &DType::Primitive(PType::I32, Nullability::NonNullable),
650        );
651        assert!(first.get(Stat::Max).is_none());
652    }
653
654    #[test]
655    fn merge_from_max() {
656        let first = StatsSet::default().merge_ordered(
657            &StatsSet::of(Stat::Max, Precision::exact(42)),
658            &DType::Primitive(PType::I32, Nullability::NonNullable),
659        );
660        assert!(first.get(Stat::Max).is_none());
661    }
662
663    #[test]
664    fn merge_maxes() {
665        let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
666            &StatsSet::of(Stat::Max, Precision::exact(42)),
667            &DType::Primitive(PType::I32, Nullability::NonNullable),
668        );
669        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
670        assert_eq!(
671            first_ref.get_as::<i32>(Stat::Max),
672            Some(Precision::exact(42))
673        );
674    }
675
676    #[test]
677    fn merge_maxes_bound() {
678        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
679        let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
680            .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
681        let first_ref = first.as_typed_ref(&dtype);
682        assert_eq!(
683            first_ref.get_as::<i32>(Stat::Max),
684            Some(Precision::inexact(43))
685        );
686    }
687
688    #[test]
689    fn merge_into_scalar() {
690        let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
691            &StatsSet::default(),
692            &DType::Primitive(PType::I32, Nullability::NonNullable),
693        );
694        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
695        assert!(first_ref.get(Stat::Sum).is_none());
696    }
697
698    #[test]
699    fn merge_from_scalar() {
700        let first = StatsSet::default().merge_ordered(
701            &StatsSet::of(Stat::Sum, Precision::exact(42)),
702            &DType::Primitive(PType::I32, Nullability::NonNullable),
703        );
704        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
705        assert!(first_ref.get(Stat::Sum).is_none());
706    }
707
708    #[test]
709    fn merge_scalars() {
710        let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
711            &StatsSet::of(Stat::Sum, Precision::exact(42)),
712            &DType::Primitive(PType::I32, Nullability::NonNullable),
713        );
714        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
715        assert_eq!(
716            first_ref.get_as::<usize>(Stat::Sum),
717            Some(Precision::exact(79usize))
718        );
719    }
720
721    #[test]
722    fn merge_into_sortedness() {
723        let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
724            &StatsSet::default(),
725            &DType::Primitive(PType::I32, Nullability::NonNullable),
726        );
727        assert!(first.get(Stat::IsStrictSorted).is_none());
728    }
729
730    #[test]
731    fn merge_from_sortedness() {
732        let first = StatsSet::default().merge_ordered(
733            &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
734            &DType::Primitive(PType::I32, Nullability::NonNullable),
735        );
736        assert!(first.get(Stat::IsStrictSorted).is_none());
737    }
738
739    #[test]
740    fn merge_sortedness() {
741        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
742        first.set(Stat::Max, Precision::exact(1));
743        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
744        second.set(Stat::Min, Precision::exact(2));
745        first = first.merge_ordered(
746            &second,
747            &DType::Primitive(PType::I32, Nullability::NonNullable),
748        );
749
750        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
751        assert_eq!(
752            first_ref.get_as::<bool>(Stat::IsStrictSorted),
753            Some(Precision::exact(true))
754        );
755    }
756
757    #[test]
758    fn merge_sortedness_out_of_order() {
759        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
760        first.set(Stat::Min, Precision::exact(1));
761        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
762        second.set(Stat::Max, Precision::exact(2));
763        second = second.merge_ordered(
764            &first,
765            &DType::Primitive(PType::I32, Nullability::NonNullable),
766        );
767
768        let second_ref =
769            second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
770        assert_eq!(
771            second_ref.get_as::<bool>(Stat::IsStrictSorted),
772            Some(Precision::inexact(false))
773        );
774    }
775
776    #[test]
777    fn merge_sortedness_only_one_sorted() {
778        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
779        first.set(Stat::Max, Precision::exact(1));
780        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
781        second.set(Stat::Min, Precision::exact(2));
782        first.merge_ordered(
783            &second,
784            &DType::Primitive(PType::I32, Nullability::NonNullable),
785        );
786
787        let second_ref =
788            second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
789        assert_eq!(
790            second_ref.get_as::<bool>(Stat::IsStrictSorted),
791            Some(Precision::exact(false))
792        );
793    }
794
795    #[test]
796    fn merge_sortedness_missing_min() {
797        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
798        first.set(Stat::Max, Precision::exact(1));
799        let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
800        first = first.merge_ordered(
801            &second,
802            &DType::Primitive(PType::I32, Nullability::NonNullable),
803        );
804        assert!(first.get(Stat::IsStrictSorted).is_none());
805    }
806
807    #[test]
808    fn merge_sortedness_bound_min() {
809        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
810        first.set(Stat::Max, Precision::exact(1));
811        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
812        second.set(Stat::Min, Precision::inexact(2));
813        first = first.merge_ordered(
814            &second,
815            &DType::Primitive(PType::I32, Nullability::NonNullable),
816        );
817
818        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
819        assert_eq!(
820            first_ref.get_as::<bool>(Stat::IsStrictSorted),
821            Some(Precision::exact(true))
822        );
823    }
824
825    #[test]
826    fn merge_unordered() {
827        let array =
828            PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
829        let all_stats = all::<Stat>()
830            .filter(|s| !matches!(s, Stat::Sum))
831            .filter(|s| !matches!(s, Stat::NaNCount))
832            .collect_vec();
833        array.statistics().compute_all(&all_stats).unwrap();
834
835        let stats = array.statistics().to_owned();
836        for stat in &all_stats {
837            assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
838        }
839
840        let merged = stats.clone().merge_unordered(
841            &stats,
842            &DType::Primitive(PType::I32, Nullability::NonNullable),
843        );
844        for stat in &all_stats {
845            assert_eq!(
846                merged.get(*stat).is_some(),
847                stat.is_commutative(),
848                "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
849            )
850        }
851
852        let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
853        let stats_ref = stats.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
854
855        assert_eq!(
856            merged_ref.get_as::<i32>(Stat::Min),
857            stats_ref.get_as::<i32>(Stat::Min)
858        );
859        assert_eq!(
860            merged_ref.get_as::<i32>(Stat::Max),
861            stats_ref.get_as::<i32>(Stat::Max)
862        );
863        assert_eq!(
864            merged_ref.get_as::<u64>(Stat::NullCount).unwrap(),
865            stats_ref
866                .get_as::<u64>(Stat::NullCount)
867                .unwrap()
868                .map(|s| s * 2)
869        );
870    }
871
872    #[test]
873    fn merge_min_bound_same() {
874        // Merging a stat with a bound and another with an exact results in exact stat.
875        // since bound for min is a lower bound, it can in fact contain any value >= bound.
876        let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
877            &StatsSet::of(Stat::Min, Precision::exact(5)),
878            &DType::Primitive(PType::I32, Nullability::NonNullable),
879        );
880        let merged_ref =
881            merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
882        assert_eq!(
883            merged_ref.get_as::<i32>(Stat::Min),
884            Some(Precision::exact(5))
885        );
886    }
887
888    #[test]
889    fn merge_min_bound_bound_lower() {
890        let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
891            &StatsSet::of(Stat::Min, Precision::exact(5)),
892            &DType::Primitive(PType::I32, Nullability::NonNullable),
893        );
894        let merged_ref =
895            merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
896        assert_eq!(
897            merged_ref.get_as::<i32>(Stat::Min),
898            Some(Precision::inexact(4))
899        );
900    }
901
902    #[test]
903    fn retain_approx() {
904        let set = StatsSet::from_iter([
905            (Stat::Max, Precision::exact(100)),
906            (Stat::Min, Precision::exact(50)),
907            (Stat::Sum, Precision::inexact(10)),
908        ]);
909
910        let set = set.keep_inexact_stats(&[Stat::Min, Stat::Max]);
911
912        let set_ref = set.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
913
914        assert_eq!(set.len(), 2);
915        assert_eq!(
916            set_ref.get_as::<i32>(Stat::Max),
917            Some(Precision::inexact(100))
918        );
919        assert_eq!(
920            set_ref.get_as::<i32>(Stat::Min),
921            Some(Precision::inexact(50))
922        );
923        assert_eq!(set_ref.get_as::<i32>(Stat::Sum), None);
924    }
925
926    #[test]
927    fn test_combine_is_constant() {
928        {
929            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
930            let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
931            let mut stats_ref =
932                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
933            stats_ref
934                .combine_bool_stat::<IsConstant>(
935                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
936                )
937                .unwrap();
938            assert_eq!(
939                stats_ref.get_as::<bool>(Stat::IsConstant),
940                Some(Precision::exact(true))
941            );
942        }
943
944        {
945            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
946            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
947            let mut stats_ref =
948                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
949            stats_ref
950                .combine_bool_stat::<IsConstant>(
951                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
952                )
953                .unwrap();
954            assert_eq!(
955                stats_ref.get_as::<bool>(Stat::IsConstant),
956                Some(Precision::exact(true))
957            );
958        }
959
960        {
961            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
962            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
963            let mut stats_ref =
964                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
965            stats_ref
966                .combine_bool_stat::<IsConstant>(
967                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
968                )
969                .unwrap();
970            assert_eq!(
971                stats_ref.get_as::<bool>(Stat::IsConstant),
972                Some(Precision::exact(false))
973            );
974        }
975    }
976
977    #[test]
978    fn test_combine_sets_boolean_conflict() {
979        let mut stats1 = StatsSet::from_iter([
980            (Stat::IsConstant, Precision::exact(true)),
981            (Stat::IsSorted, Precision::exact(true)),
982        ]);
983
984        let stats2 = StatsSet::from_iter([
985            (Stat::IsConstant, Precision::exact(false)),
986            (Stat::IsSorted, Precision::exact(true)),
987        ]);
988
989        let result = stats1.combine_sets(
990            &stats2,
991            &DType::Primitive(PType::I32, Nullability::NonNullable),
992        );
993        assert!(result.is_err());
994    }
995
996    #[test]
997    fn test_combine_sets_with_missing_stats() {
998        let mut stats1 = StatsSet::from_iter([
999            (Stat::Min, Precision::exact(42)),
1000            (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
1001        ]);
1002
1003        let stats2 = StatsSet::from_iter([
1004            (Stat::Max, Precision::exact(100)),
1005            (Stat::IsStrictSorted, Precision::exact(true)),
1006        ]);
1007
1008        stats1
1009            .combine_sets(
1010                &stats2,
1011                &DType::Primitive(PType::I32, Nullability::NonNullable),
1012            )
1013            .unwrap();
1014
1015        let stats_ref =
1016            stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1017
1018        // Min should remain unchanged
1019        assert_eq!(
1020            stats_ref.get_as::<i32>(Stat::Min),
1021            Some(Precision::exact(42))
1022        );
1023        // Max should be added
1024        assert_eq!(
1025            stats_ref.get_as::<i32>(Stat::Max),
1026            Some(Precision::exact(100))
1027        );
1028        // IsStrictSorted should be added
1029        assert_eq!(
1030            stats_ref.get_as::<bool>(Stat::IsStrictSorted),
1031            Some(Precision::exact(true))
1032        );
1033    }
1034
1035    #[test]
1036    fn test_combine_sets_with_inexact() {
1037        let mut stats1 = StatsSet::from_iter([
1038            (Stat::Min, Precision::exact(42)),
1039            (Stat::Max, Precision::inexact(100)),
1040            (Stat::IsConstant, Precision::exact(false)),
1041        ]);
1042
1043        let stats2 = StatsSet::from_iter([
1044            // Must ensure Min from stats2 is <= Min from stats1
1045            (Stat::Min, Precision::inexact(40)),
1046            (Stat::Max, Precision::exact(90)),
1047            (Stat::IsSorted, Precision::exact(true)),
1048        ]);
1049
1050        stats1
1051            .combine_sets(
1052                &stats2,
1053                &DType::Primitive(PType::I32, Nullability::NonNullable),
1054            )
1055            .unwrap();
1056
1057        let stats_ref =
1058            stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1059
1060        // Min should remain unchanged since it's more restrictive than the inexact value
1061        assert_eq!(
1062            stats_ref.get_as::<i32>(Stat::Min),
1063            Some(Precision::exact(42))
1064        );
1065        // Check that max was updated with the exact value
1066        assert_eq!(
1067            stats_ref.get_as::<i32>(Stat::Max),
1068            Some(Precision::exact(90))
1069        );
1070        // Check that IsSorted was added
1071        assert_eq!(
1072            stats_ref.get_as::<bool>(Stat::IsSorted),
1073            Some(Precision::exact(true))
1074        );
1075    }
1076}