vortex_array/stats/
stats_set.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5
6use enum_iterator::Sequence;
7use enum_iterator::all;
8use num_traits::CheckedAdd;
9use vortex_dtype::DType;
10use vortex_error::VortexError;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_err;
14use vortex_error::vortex_panic;
15use vortex_scalar::Scalar;
16use vortex_scalar::ScalarValue;
17
18use crate::expr::stats::IsConstant;
19use crate::expr::stats::IsSorted;
20use crate::expr::stats::IsStrictSorted;
21use crate::expr::stats::Max;
22use crate::expr::stats::Min;
23use crate::expr::stats::NaNCount;
24use crate::expr::stats::NullCount;
25use crate::expr::stats::Precision;
26use crate::expr::stats::Stat;
27use crate::expr::stats::StatBound;
28use crate::expr::stats::StatType;
29use crate::expr::stats::StatsProvider;
30use crate::expr::stats::StatsProviderExt;
31use crate::expr::stats::Sum;
32use crate::expr::stats::UncompressedSizeInBytes;
33
34#[derive(Default, Debug, Clone)]
35pub struct StatsSet {
36    values: Vec<(Stat, Precision<ScalarValue>)>,
37}
38
39impl StatsSet {
40    /// Create new StatSet without validating uniqueness of all the entries
41    ///
42    /// # Safety
43    ///
44    /// This method will not panic or trigger UB, but may lead to duplicate stats being stored.
45    pub unsafe fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
46        Self { values }
47    }
48
49    /// Create StatsSet from single stat and value
50    pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
51        // SAFETY: No duplicate stats will be set here.
52        unsafe { Self::new_unchecked(vec![(stat, value)]) }
53    }
54
55    fn reserve_full_capacity(&mut self) {
56        if self.values.capacity() < Stat::CARDINALITY {
57            self.values
58                .reserve_exact(Stat::CARDINALITY - self.values.capacity());
59        }
60    }
61
62    /// Wrap stats set with a dtype for mutable typed scalar access
63    pub fn as_mut_typed_ref<'a, 'b>(&'a mut self, dtype: &'b DType) -> MutTypedStatsSetRef<'a, 'b> {
64        MutTypedStatsSetRef {
65            values: self,
66            dtype,
67        }
68    }
69
70    /// Wrap stats set with a dtype for typed scalar access
71    pub fn as_typed_ref<'a, 'b>(&'a self, dtype: &'b DType) -> TypedStatsSetRef<'a, 'b> {
72        TypedStatsSetRef {
73            values: self,
74            dtype,
75        }
76    }
77}
78
79// Getters and setters for individual stats.
80impl StatsSet {
81    /// Set the stat `stat` to `value`.
82    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
83        self.reserve_full_capacity();
84
85        if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
86            *existing = (stat, value);
87        } else {
88            self.values.push((stat, value));
89        }
90    }
91
92    /// Clear the stat `stat` from the set.
93    pub fn clear(&mut self, stat: Stat) {
94        self.values.retain(|(s, _)| *s != stat);
95    }
96
97    /// Only keep given stats
98    pub fn retain_only(&mut self, stats: &[Stat]) {
99        self.values.retain(|(s, _)| stats.contains(s));
100    }
101
102    /// Iterate over the statistic names and values in-place.
103    ///
104    /// See [Iterator].
105    pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
106        self.values.iter()
107    }
108
109    /// Get value for a given stat
110    pub fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
111        self.values
112            .iter()
113            .find(|(s, _)| *s == stat)
114            .map(|(_, v)| v.clone())
115    }
116
117    /// Length of the stats set
118    pub fn len(&self) -> usize {
119        self.values.len()
120    }
121
122    /// Check whether the statset is empty
123    pub fn is_empty(&self) -> bool {
124        self.values.is_empty()
125    }
126
127    /// Get scalar value of a given dtype
128    pub fn get_as<T: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
129        &self,
130        stat: Stat,
131        dtype: &DType,
132    ) -> Option<Precision<T>> {
133        self.get(stat).map(|v| {
134            v.map(|v| {
135                T::try_from(&Scalar::new(dtype.clone(), v)).unwrap_or_else(|err| {
136                    vortex_panic!(
137                        err,
138                        "Failed to get stat {} as {}",
139                        stat,
140                        std::any::type_name::<T>()
141                    )
142                })
143            })
144        })
145    }
146}
147
148// StatSetIntoIter just exists to protect current implementation from exposure on the public API.
149
150/// Owned iterator over the stats.
151///
152/// See [IntoIterator].
153pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
154
155impl Iterator for StatsSetIntoIter {
156    type Item = (Stat, Precision<ScalarValue>);
157
158    fn next(&mut self) -> Option<Self::Item> {
159        self.0.next()
160    }
161}
162
163impl IntoIterator for StatsSet {
164    type Item = (Stat, Precision<ScalarValue>);
165    type IntoIter = StatsSetIntoIter;
166
167    fn into_iter(self) -> Self::IntoIter {
168        StatsSetIntoIter(self.values.into_iter())
169    }
170}
171
172impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
173    fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
174        let iter = iter.into_iter();
175        let mut values = Vec::default();
176        values.reserve_exact(Stat::CARDINALITY);
177
178        let mut this = Self { values };
179        this.extend(iter);
180        this
181    }
182}
183
184impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
185    #[inline]
186    fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
187        let iter = iter.into_iter();
188        self.reserve_full_capacity();
189
190        iter.for_each(|(stat, value)| self.set(stat, value));
191    }
192}
193
194/// Merge helpers
195impl StatsSet {
196    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
197    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
198    pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
199        self.as_mut_typed_ref(dtype)
200            .merge_ordered(&other.as_typed_ref(dtype));
201        self
202    }
203
204    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
205    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
206    pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
207        self.as_mut_typed_ref(dtype)
208            .merge_unordered(&other.as_typed_ref(dtype));
209        self
210    }
211
212    /// Given two sets of stats (of differing precision) for the same array, combine them
213    pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
214        self.as_mut_typed_ref(dtype)
215            .combine_sets(&other.as_typed_ref(dtype))
216    }
217}
218
219pub struct TypedStatsSetRef<'a, 'b> {
220    pub values: &'a StatsSet,
221    pub dtype: &'b DType,
222}
223
224impl StatsProvider for TypedStatsSetRef<'_, '_> {
225    fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
226        self.values.get(stat).map(|p| {
227            p.map(|sv| {
228                Scalar::new(
229                    stat.dtype(self.dtype)
230                        .vortex_expect("Must have valid dtype if value is present"),
231                    sv,
232                )
233            })
234        })
235    }
236
237    fn len(&self) -> usize {
238        self.values.len()
239    }
240}
241
242pub struct MutTypedStatsSetRef<'a, 'b> {
243    pub values: &'a mut StatsSet,
244    pub dtype: &'b DType,
245}
246
247impl MutTypedStatsSetRef<'_, '_> {
248    /// Set the stat `stat` to `value`.
249    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
250        self.values.set(stat, value);
251    }
252
253    /// Clear the stat `stat` from the set.
254    pub fn clear(&mut self, stat: Stat) {
255        self.values.clear(stat);
256    }
257}
258
259impl StatsProvider for MutTypedStatsSetRef<'_, '_> {
260    fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
261        self.values.get(stat).map(|p| {
262            p.map(|sv| {
263                Scalar::new(
264                    stat.dtype(self.dtype)
265                        .vortex_expect("Must have valid dtype if value is present"),
266                    sv,
267                )
268            })
269        })
270    }
271
272    fn len(&self) -> usize {
273        self.values.len()
274    }
275}
276
277// Merge helpers
278impl MutTypedStatsSetRef<'_, '_> {
279    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
280    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
281    pub fn merge_ordered(mut self, other: &TypedStatsSetRef) -> Self {
282        for s in all::<Stat>() {
283            match s {
284                Stat::IsConstant => self.merge_is_constant(other),
285                Stat::IsSorted => self.merge_is_sorted(other),
286                Stat::IsStrictSorted => self.merge_is_strict_sorted(other),
287                Stat::Max => self.merge_max(other),
288                Stat::Min => self.merge_min(other),
289                Stat::Sum => self.merge_sum(other),
290                Stat::NullCount => self.merge_null_count(other),
291                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
292                Stat::NaNCount => self.merge_nan_count(other),
293            }
294        }
295
296        self
297    }
298
299    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
300    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
301    pub fn merge_unordered(mut self, other: &TypedStatsSetRef) -> Self {
302        for s in all::<Stat>() {
303            if !s.is_commutative() {
304                self.clear(s);
305                continue;
306            }
307
308            match s {
309                Stat::IsConstant => self.merge_is_constant(other),
310                Stat::Max => self.merge_max(other),
311                Stat::Min => self.merge_min(other),
312                Stat::Sum => self.merge_sum(other),
313                Stat::NullCount => self.merge_null_count(other),
314                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
315                Stat::IsSorted | Stat::IsStrictSorted => {
316                    unreachable!("not commutative")
317                }
318                Stat::NaNCount => self.merge_nan_count(other),
319            }
320        }
321
322        self
323    }
324
325    /// Given two sets of stats (of differing precision) for the same array, combine them
326    pub fn combine_sets(&mut self, other: &TypedStatsSetRef) -> VortexResult<()> {
327        let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
328        for s in other_stats {
329            match s {
330                Stat::Max => self.combine_bound::<Max>(other)?,
331                Stat::Min => self.combine_bound::<Min>(other)?,
332                Stat::UncompressedSizeInBytes => {
333                    self.combine_bound::<UncompressedSizeInBytes>(other)?
334                }
335                Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
336                Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
337                Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
338                Stat::NullCount => self.combine_bound::<NullCount>(other)?,
339                Stat::Sum => self.combine_bound::<Sum>(other)?,
340                Stat::NaNCount => self.combine_bound::<NaNCount>(other)?,
341            }
342        }
343        Ok(())
344    }
345
346    fn combine_bound<S: StatType<Scalar>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
347    where
348        S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
349    {
350        match (self.get_scalar_bound::<S>(), other.get_scalar_bound::<S>()) {
351            (Some(m1), Some(m2)) => {
352                let meet = m1
353                    .intersection(&m2)
354                    .vortex_expect("can always compare scalar")
355                    .ok_or_else(|| {
356                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
357                    })?;
358                if meet != m1 {
359                    self.set(S::STAT, meet.into_value().map(Scalar::into_value));
360                }
361            }
362            (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
363            (Some(_), _) => (),
364            (None, None) => self.clear(S::STAT),
365        }
366        Ok(())
367    }
368
369    fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
370    where
371        S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
372    {
373        match (
374            self.get_as_bound::<S, bool>(),
375            other.get_as_bound::<S, bool>(),
376        ) {
377            (Some(m1), Some(m2)) => {
378                let intersection = m1
379                    .intersection(&m2)
380                    .vortex_expect("can always compare boolean")
381                    .ok_or_else(|| {
382                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
383                    })?;
384                if intersection != m1 {
385                    self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
386                }
387            }
388            (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
389            (Some(_), None) => (),
390            (None, None) => self.clear(S::STAT),
391        }
392        Ok(())
393    }
394
395    fn merge_min(&mut self, other: &TypedStatsSetRef) {
396        match (
397            self.get_scalar_bound::<Min>(),
398            other.get_scalar_bound::<Min>(),
399        ) {
400            (Some(m1), Some(m2)) => {
401                let meet = m1.union(&m2).vortex_expect("can compare scalar");
402                if meet != m1 {
403                    self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
404                }
405            }
406            _ => self.clear(Stat::Min),
407        }
408    }
409
410    fn merge_max(&mut self, other: &TypedStatsSetRef) {
411        match (
412            self.get_scalar_bound::<Max>(),
413            other.get_scalar_bound::<Max>(),
414        ) {
415            (Some(m1), Some(m2)) => {
416                let meet = m1.union(&m2).vortex_expect("can compare scalar");
417                if meet != m1 {
418                    self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
419                }
420            }
421            _ => self.clear(Stat::Max),
422        }
423    }
424
425    fn merge_sum(&mut self, other: &TypedStatsSetRef) {
426        match (
427            self.get_scalar_bound::<Sum>(),
428            other.get_scalar_bound::<Sum>(),
429        ) {
430            (Some(m1), Some(m2)) => {
431                // If the combine sum is exact, then we can sum them.
432                if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
433                    s1.as_primitive()
434                        .checked_add(&s2.as_primitive())
435                        .map(|pscalar| {
436                            pscalar
437                                .pvalue()
438                                .map(|pvalue| {
439                                    Scalar::primitive_value(
440                                        pvalue,
441                                        pscalar.ptype(),
442                                        pscalar.dtype().nullability(),
443                                    )
444                                    .into_value()
445                                })
446                                .unwrap_or_else(ScalarValue::null)
447                        })
448                }) {
449                    self.set(Stat::Sum, Precision::Exact(scalar_value));
450                }
451            }
452            _ => self.clear(Stat::Sum),
453        }
454    }
455
456    fn merge_is_constant(&mut self, other: &TypedStatsSetRef) {
457        let self_const = self.get_as(Stat::IsConstant);
458        let other_const = other.get_as(Stat::IsConstant);
459        let self_min = self.get(Stat::Min);
460        let other_min = other.get(Stat::Min);
461
462        if let (
463            Some(Precision::Exact(self_const)),
464            Some(Precision::Exact(other_const)),
465            Some(Precision::Exact(self_min)),
466            Some(Precision::Exact(other_min)),
467        ) = (self_const, other_const, self_min, other_min)
468        {
469            if self_const && other_const && self_min == other_min {
470                self.set(Stat::IsConstant, Precision::exact(true));
471            } else {
472                self.set(Stat::IsConstant, Precision::inexact(false));
473            }
474        }
475        self.set(Stat::IsConstant, Precision::exact(false));
476    }
477
478    fn merge_is_sorted(&mut self, other: &TypedStatsSetRef) {
479        self.merge_sortedness_stat(other, Stat::IsSorted, PartialOrd::le)
480    }
481
482    fn merge_is_strict_sorted(&mut self, other: &TypedStatsSetRef) {
483        self.merge_sortedness_stat(other, Stat::IsStrictSorted, PartialOrd::lt)
484    }
485
486    fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
487        &mut self,
488        other: &TypedStatsSetRef,
489        stat: Stat,
490        cmp: F,
491    ) {
492        if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
493            == (self.get_as(stat), other.get_as(stat))
494        {
495            // There might be no stat because it was dropped, or it doesn't exist
496            // (e.g. an all null array).
497            // We assume that it was the dropped case since the doesn't exist might imply sorted,
498            // but this in-precision is correct.
499            if let (Some(self_max), Some(other_min)) = (
500                self.get_scalar_bound::<Max>(),
501                other.get_scalar_bound::<Min>(),
502            ) {
503                return if cmp(&self_max.max_value(), &other_min.min_value()) {
504                    // keep value
505                } else {
506                    self.set(stat, Precision::inexact(false));
507                };
508            }
509        }
510        self.clear(stat);
511    }
512
513    fn merge_null_count(&mut self, other: &TypedStatsSetRef) {
514        self.merge_sum_stat(Stat::NullCount, other)
515    }
516
517    fn merge_nan_count(&mut self, other: &TypedStatsSetRef) {
518        self.merge_sum_stat(Stat::NaNCount, other)
519    }
520
521    fn merge_uncompressed_size_in_bytes(&mut self, other: &TypedStatsSetRef) {
522        self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
523    }
524
525    fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) {
526        match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
527            (Some(nc1), Some(nc2)) => {
528                self.set(
529                    stat,
530                    nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
531                );
532            }
533            _ => self.clear(stat),
534        }
535    }
536}
537
538#[cfg(test)]
539mod test {
540    use enum_iterator::all;
541    use itertools::Itertools;
542    use vortex_dtype::DType;
543    use vortex_dtype::Nullability;
544    use vortex_dtype::PType;
545
546    use crate::arrays::PrimitiveArray;
547    use crate::expr::stats::IsConstant;
548    use crate::expr::stats::Precision;
549    use crate::expr::stats::Stat;
550    use crate::expr::stats::StatsProvider;
551    use crate::expr::stats::StatsProviderExt;
552    use crate::stats::StatsSet;
553    use crate::stats::stats_set::Scalar;
554
555    #[test]
556    fn test_iter() {
557        // SAFETY: No duplicate stats.
558        let set = unsafe {
559            StatsSet::new_unchecked(vec![
560                (Stat::Max, Precision::exact(100)),
561                (Stat::Min, Precision::exact(42)),
562            ])
563        };
564        let mut iter = set.iter();
565        let first = iter.next().unwrap().clone();
566        assert_eq!(first.0, Stat::Max);
567        assert_eq!(
568            first
569                .1
570                .map(|f| i32::try_from(&Scalar::new(PType::I32.into(), f)).unwrap()),
571            Precision::exact(100)
572        );
573        let snd = iter.next().unwrap().clone();
574        assert_eq!(snd.0, Stat::Min);
575        assert_eq!(
576            snd.1
577                .map(|s| i32::try_from(&Scalar::new(PType::I32.into(), s)).unwrap()),
578            42
579        );
580    }
581
582    #[test]
583    fn into_iter() {
584        // SAFETY: No duplicate stats.
585        let mut set = unsafe {
586            StatsSet::new_unchecked(vec![
587                (Stat::Max, Precision::exact(100)),
588                (Stat::Min, Precision::exact(42)),
589            ])
590        }
591        .into_iter();
592        let (stat, first) = set.next().unwrap();
593        assert_eq!(stat, Stat::Max);
594        assert_eq!(
595            first.map(|f| i32::try_from(&Scalar::new(PType::I32.into(), f)).unwrap()),
596            Precision::exact(100)
597        );
598        let snd = set.next().unwrap();
599        assert_eq!(snd.0, Stat::Min);
600        assert_eq!(
601            snd.1
602                .map(|s| i32::try_from(&Scalar::new(PType::I32.into(), s)).unwrap()),
603            Precision::exact(42)
604        );
605    }
606
607    #[test]
608    fn merge_constant() {
609        let first = StatsSet::from_iter([
610            (Stat::Min, Precision::exact(42)),
611            (Stat::IsConstant, Precision::exact(true)),
612        ])
613        .merge_ordered(
614            &StatsSet::from_iter([
615                (Stat::Min, Precision::inexact(42)),
616                (Stat::IsConstant, Precision::exact(true)),
617            ]),
618            &DType::Primitive(PType::I32, Nullability::NonNullable),
619        );
620
621        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
622        assert_eq!(
623            first_ref.get_as::<bool>(Stat::IsConstant),
624            Some(Precision::exact(false))
625        );
626        assert_eq!(
627            first_ref.get_as::<i32>(Stat::Min),
628            Some(Precision::exact(42))
629        );
630    }
631
632    #[test]
633    fn merge_into_min() {
634        let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
635            &StatsSet::default(),
636            &DType::Primitive(PType::I32, Nullability::NonNullable),
637        );
638
639        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
640        assert!(first_ref.get(Stat::Min).is_none());
641    }
642
643    #[test]
644    fn merge_from_min() {
645        let first = StatsSet::default().merge_ordered(
646            &StatsSet::of(Stat::Min, Precision::exact(42)),
647            &DType::Primitive(PType::I32, Nullability::NonNullable),
648        );
649
650        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
651        assert!(first_ref.get(Stat::Min).is_none());
652    }
653
654    #[test]
655    fn merge_mins() {
656        let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
657            &StatsSet::of(Stat::Min, Precision::exact(42)),
658            &DType::Primitive(PType::I32, Nullability::NonNullable),
659        );
660
661        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
662        assert_eq!(
663            first_ref.get_as::<i32>(Stat::Min),
664            Some(Precision::exact(37))
665        );
666    }
667
668    #[test]
669    fn merge_into_bound_max() {
670        let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
671            &StatsSet::default(),
672            &DType::Primitive(PType::I32, Nullability::NonNullable),
673        );
674        assert!(first.get(Stat::Max).is_none());
675    }
676
677    #[test]
678    fn merge_from_max() {
679        let first = StatsSet::default().merge_ordered(
680            &StatsSet::of(Stat::Max, Precision::exact(42)),
681            &DType::Primitive(PType::I32, Nullability::NonNullable),
682        );
683        assert!(first.get(Stat::Max).is_none());
684    }
685
686    #[test]
687    fn merge_maxes() {
688        let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
689            &StatsSet::of(Stat::Max, Precision::exact(42)),
690            &DType::Primitive(PType::I32, Nullability::NonNullable),
691        );
692        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
693        assert_eq!(
694            first_ref.get_as::<i32>(Stat::Max),
695            Some(Precision::exact(42))
696        );
697    }
698
699    #[test]
700    fn merge_maxes_bound() {
701        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
702        let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
703            .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
704        let first_ref = first.as_typed_ref(&dtype);
705        assert_eq!(
706            first_ref.get_as::<i32>(Stat::Max),
707            Some(Precision::inexact(43))
708        );
709    }
710
711    #[test]
712    fn merge_into_scalar() {
713        let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
714            &StatsSet::default(),
715            &DType::Primitive(PType::I32, Nullability::NonNullable),
716        );
717        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
718        assert!(first_ref.get(Stat::Sum).is_none());
719    }
720
721    #[test]
722    fn merge_from_scalar() {
723        let first = StatsSet::default().merge_ordered(
724            &StatsSet::of(Stat::Sum, Precision::exact(42)),
725            &DType::Primitive(PType::I32, Nullability::NonNullable),
726        );
727        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
728        assert!(first_ref.get(Stat::Sum).is_none());
729    }
730
731    #[test]
732    fn merge_scalars() {
733        let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
734            &StatsSet::of(Stat::Sum, Precision::exact(42)),
735            &DType::Primitive(PType::I32, Nullability::NonNullable),
736        );
737        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
738        assert_eq!(
739            first_ref.get_as::<usize>(Stat::Sum),
740            Some(Precision::exact(79usize))
741        );
742    }
743
744    #[test]
745    fn merge_into_sortedness() {
746        let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
747            &StatsSet::default(),
748            &DType::Primitive(PType::I32, Nullability::NonNullable),
749        );
750        assert!(first.get(Stat::IsStrictSorted).is_none());
751    }
752
753    #[test]
754    fn merge_from_sortedness() {
755        let first = StatsSet::default().merge_ordered(
756            &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
757            &DType::Primitive(PType::I32, Nullability::NonNullable),
758        );
759        assert!(first.get(Stat::IsStrictSorted).is_none());
760    }
761
762    #[test]
763    fn merge_sortedness() {
764        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
765        first.set(Stat::Max, Precision::exact(1));
766        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
767        second.set(Stat::Min, Precision::exact(2));
768        first = first.merge_ordered(
769            &second,
770            &DType::Primitive(PType::I32, Nullability::NonNullable),
771        );
772
773        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
774        assert_eq!(
775            first_ref.get_as::<bool>(Stat::IsStrictSorted),
776            Some(Precision::exact(true))
777        );
778    }
779
780    #[test]
781    fn merge_sortedness_out_of_order() {
782        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
783        first.set(Stat::Min, Precision::exact(1));
784        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
785        second.set(Stat::Max, Precision::exact(2));
786        second = second.merge_ordered(
787            &first,
788            &DType::Primitive(PType::I32, Nullability::NonNullable),
789        );
790
791        let second_ref =
792            second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
793        assert_eq!(
794            second_ref.get_as::<bool>(Stat::IsStrictSorted),
795            Some(Precision::inexact(false))
796        );
797    }
798
799    #[test]
800    fn merge_sortedness_only_one_sorted() {
801        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
802        first.set(Stat::Max, Precision::exact(1));
803        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
804        second.set(Stat::Min, Precision::exact(2));
805        first.merge_ordered(
806            &second,
807            &DType::Primitive(PType::I32, Nullability::NonNullable),
808        );
809
810        let second_ref =
811            second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
812        assert_eq!(
813            second_ref.get_as::<bool>(Stat::IsStrictSorted),
814            Some(Precision::exact(false))
815        );
816    }
817
818    #[test]
819    fn merge_sortedness_missing_min() {
820        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
821        first.set(Stat::Max, Precision::exact(1));
822        let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
823        first = first.merge_ordered(
824            &second,
825            &DType::Primitive(PType::I32, Nullability::NonNullable),
826        );
827        assert!(first.get(Stat::IsStrictSorted).is_none());
828    }
829
830    #[test]
831    fn merge_sortedness_bound_min() {
832        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
833        first.set(Stat::Max, Precision::exact(1));
834        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
835        second.set(Stat::Min, Precision::inexact(2));
836        first = first.merge_ordered(
837            &second,
838            &DType::Primitive(PType::I32, Nullability::NonNullable),
839        );
840
841        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
842        assert_eq!(
843            first_ref.get_as::<bool>(Stat::IsStrictSorted),
844            Some(Precision::exact(true))
845        );
846    }
847
848    #[test]
849    fn merge_unordered() {
850        let array =
851            PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
852        let all_stats = all::<Stat>()
853            .filter(|s| !matches!(s, Stat::Sum))
854            .filter(|s| !matches!(s, Stat::NaNCount))
855            .collect_vec();
856        array.statistics().compute_all(&all_stats).unwrap();
857
858        let stats = array.statistics().to_owned();
859        for stat in &all_stats {
860            assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
861        }
862
863        let merged = stats.clone().merge_unordered(
864            &stats,
865            &DType::Primitive(PType::I32, Nullability::NonNullable),
866        );
867        for stat in &all_stats {
868            assert_eq!(
869                merged.get(*stat).is_some(),
870                stat.is_commutative(),
871                "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
872            )
873        }
874
875        let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
876        let stats_ref = stats.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
877
878        assert_eq!(
879            merged_ref.get_as::<i32>(Stat::Min),
880            stats_ref.get_as::<i32>(Stat::Min)
881        );
882        assert_eq!(
883            merged_ref.get_as::<i32>(Stat::Max),
884            stats_ref.get_as::<i32>(Stat::Max)
885        );
886        assert_eq!(
887            merged_ref.get_as::<u64>(Stat::NullCount).unwrap(),
888            stats_ref
889                .get_as::<u64>(Stat::NullCount)
890                .unwrap()
891                .map(|s| s * 2)
892        );
893    }
894
895    #[test]
896    fn merge_min_bound_same() {
897        // Merging a stat with a bound and another with an exact results in exact stat.
898        // since bound for min is a lower bound, it can in fact contain any value >= bound.
899        let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
900            &StatsSet::of(Stat::Min, Precision::exact(5)),
901            &DType::Primitive(PType::I32, Nullability::NonNullable),
902        );
903        let merged_ref =
904            merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
905        assert_eq!(
906            merged_ref.get_as::<i32>(Stat::Min),
907            Some(Precision::exact(5))
908        );
909    }
910
911    #[test]
912    fn merge_min_bound_bound_lower() {
913        let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
914            &StatsSet::of(Stat::Min, Precision::exact(5)),
915            &DType::Primitive(PType::I32, Nullability::NonNullable),
916        );
917        let merged_ref =
918            merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
919        assert_eq!(
920            merged_ref.get_as::<i32>(Stat::Min),
921            Some(Precision::inexact(4))
922        );
923    }
924
925    #[test]
926    fn test_combine_is_constant() {
927        {
928            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
929            let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
930            let mut stats_ref =
931                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
932            stats_ref
933                .combine_bool_stat::<IsConstant>(
934                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
935                )
936                .unwrap();
937            assert_eq!(
938                stats_ref.get_as::<bool>(Stat::IsConstant),
939                Some(Precision::exact(true))
940            );
941        }
942
943        {
944            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
945            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
946            let mut stats_ref =
947                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
948            stats_ref
949                .combine_bool_stat::<IsConstant>(
950                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
951                )
952                .unwrap();
953            assert_eq!(
954                stats_ref.get_as::<bool>(Stat::IsConstant),
955                Some(Precision::exact(true))
956            );
957        }
958
959        {
960            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
961            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
962            let mut stats_ref =
963                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
964            stats_ref
965                .combine_bool_stat::<IsConstant>(
966                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
967                )
968                .unwrap();
969            assert_eq!(
970                stats_ref.get_as::<bool>(Stat::IsConstant),
971                Some(Precision::exact(false))
972            );
973        }
974    }
975
976    #[test]
977    fn test_combine_sets_boolean_conflict() {
978        let mut stats1 = StatsSet::from_iter([
979            (Stat::IsConstant, Precision::exact(true)),
980            (Stat::IsSorted, Precision::exact(true)),
981        ]);
982
983        let stats2 = StatsSet::from_iter([
984            (Stat::IsConstant, Precision::exact(false)),
985            (Stat::IsSorted, Precision::exact(true)),
986        ]);
987
988        let result = stats1.combine_sets(
989            &stats2,
990            &DType::Primitive(PType::I32, Nullability::NonNullable),
991        );
992        assert!(result.is_err());
993    }
994
995    #[test]
996    fn test_combine_sets_with_missing_stats() {
997        let mut stats1 = StatsSet::from_iter([
998            (Stat::Min, Precision::exact(42)),
999            (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
1000        ]);
1001
1002        let stats2 = StatsSet::from_iter([
1003            (Stat::Max, Precision::exact(100)),
1004            (Stat::IsStrictSorted, Precision::exact(true)),
1005        ]);
1006
1007        stats1
1008            .combine_sets(
1009                &stats2,
1010                &DType::Primitive(PType::I32, Nullability::NonNullable),
1011            )
1012            .unwrap();
1013
1014        let stats_ref =
1015            stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1016
1017        // Min should remain unchanged
1018        assert_eq!(
1019            stats_ref.get_as::<i32>(Stat::Min),
1020            Some(Precision::exact(42))
1021        );
1022        // Max should be added
1023        assert_eq!(
1024            stats_ref.get_as::<i32>(Stat::Max),
1025            Some(Precision::exact(100))
1026        );
1027        // IsStrictSorted should be added
1028        assert_eq!(
1029            stats_ref.get_as::<bool>(Stat::IsStrictSorted),
1030            Some(Precision::exact(true))
1031        );
1032    }
1033
1034    #[test]
1035    fn test_combine_sets_with_inexact() {
1036        let mut stats1 = StatsSet::from_iter([
1037            (Stat::Min, Precision::exact(42)),
1038            (Stat::Max, Precision::inexact(100)),
1039            (Stat::IsConstant, Precision::exact(false)),
1040        ]);
1041
1042        let stats2 = StatsSet::from_iter([
1043            // Must ensure Min from stats2 is <= Min from stats1
1044            (Stat::Min, Precision::inexact(40)),
1045            (Stat::Max, Precision::exact(90)),
1046            (Stat::IsSorted, Precision::exact(true)),
1047        ]);
1048
1049        stats1
1050            .combine_sets(
1051                &stats2,
1052                &DType::Primitive(PType::I32, Nullability::NonNullable),
1053            )
1054            .unwrap();
1055
1056        let stats_ref =
1057            stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1058
1059        // Min should remain unchanged since it's more restrictive than the inexact value
1060        assert_eq!(
1061            stats_ref.get_as::<i32>(Stat::Min),
1062            Some(Precision::exact(42))
1063        );
1064        // Check that max was updated with the exact value
1065        assert_eq!(
1066            stats_ref.get_as::<i32>(Stat::Max),
1067            Some(Precision::exact(90))
1068        );
1069        // Check that IsSorted was added
1070        assert_eq!(
1071            stats_ref.get_as::<bool>(Stat::IsSorted),
1072            Some(Precision::exact(true))
1073        );
1074    }
1075}