Skip to main content

vortex_array/stats/
stats_set.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5
6use enum_iterator::all;
7use num_traits::CheckedAdd;
8use smallvec::SmallVec;
9use smallvec::smallvec;
10use vortex_error::VortexError;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_err;
14use vortex_error::vortex_panic;
15
16use crate::dtype::DType;
17use crate::expr::stats::IsConstant;
18use crate::expr::stats::IsSorted;
19use crate::expr::stats::IsStrictSorted;
20use crate::expr::stats::Max;
21use crate::expr::stats::Min;
22use crate::expr::stats::NaNCount;
23use crate::expr::stats::NullCount;
24use crate::expr::stats::Precision;
25use crate::expr::stats::Stat;
26use crate::expr::stats::StatBound;
27use crate::expr::stats::StatType;
28use crate::expr::stats::StatsProvider;
29use crate::expr::stats::StatsProviderExt;
30use crate::expr::stats::Sum;
31use crate::expr::stats::UncompressedSizeInBytes;
32use crate::scalar::Scalar;
33use crate::scalar::ScalarValue;
34
35/// Type of the SmallVec stored inside StatsSet
36pub type StatsArray = [(Stat, Precision<ScalarValue>); 4];
37
38#[derive(Default, Debug, Clone)]
39pub struct StatsSet {
40    values: SmallVec<StatsArray>,
41}
42
43impl StatsSet {
44    /// Create new StatSet without validating uniqueness of all the entries
45    ///
46    /// # Safety
47    ///
48    /// This method will not panic or trigger UB, but may lead to duplicate stats being stored.
49    pub unsafe fn new_unchecked(values: SmallVec<StatsArray>) -> Self {
50        Self { values }
51    }
52
53    /// Create StatsSet from single stat and value
54    pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
55        Self {
56            values: smallvec![(stat, value)],
57        }
58    }
59
60    /// Wrap stats set with a dtype for mutable typed scalar access
61    pub fn as_mut_typed_ref<'a, 'b>(&'a mut self, dtype: &'b DType) -> MutTypedStatsSetRef<'a, 'b> {
62        MutTypedStatsSetRef {
63            values: self,
64            dtype,
65        }
66    }
67
68    /// Wrap stats set with a dtype for typed scalar access
69    pub fn as_typed_ref<'a, 'b>(&'a self, dtype: &'b DType) -> TypedStatsSetRef<'a, 'b> {
70        TypedStatsSetRef {
71            values: self,
72            dtype,
73        }
74    }
75}
76
77// Getters and setters for individual stats.
78impl StatsSet {
79    /// Set the stat `stat` to `value`.
80    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
81        if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
82            *existing = (stat, value);
83        } else {
84            self.values.push((stat, value));
85        }
86    }
87
88    /// Clear the stat `stat` from the set.
89    pub fn clear(&mut self, stat: Stat) {
90        self.values.retain(|(s, _)| *s != stat);
91    }
92
93    /// Only keep given stats
94    pub fn retain_only(&mut self, stats: &[Stat]) {
95        self.values.retain(|(s, _)| stats.contains(s));
96    }
97
98    /// Iterate over the statistic names and values in-place.
99    ///
100    /// See [Iterator].
101    pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
102        self.values.iter()
103    }
104
105    /// Get value for a given stat
106    pub fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
107        self.values
108            .iter()
109            .find(|(s, _)| *s == stat)
110            .map(|(_, v)| v.clone())
111    }
112
113    /// Length of the stats set
114    pub fn len(&self) -> usize {
115        self.values.len()
116    }
117
118    /// Check whether the statset is empty
119    pub fn is_empty(&self) -> bool {
120        self.values.is_empty()
121    }
122
123    /// Get scalar value of a given dtype
124    pub fn get_as<T: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
125        &self,
126        stat: Stat,
127        dtype: &DType,
128    ) -> Option<Precision<T>> {
129        self.get(stat).map(|v| {
130            v.map(|v| {
131                T::try_from(
132                    &Scalar::try_new(dtype.clone(), Some(v))
133                        .vortex_expect("failed to construct a scalar statistic"),
134                )
135                .unwrap_or_else(|err| {
136                    vortex_panic!(
137                        err,
138                        "Failed to get stat {} as {}",
139                        stat,
140                        std::any::type_name::<T>()
141                    )
142                })
143            })
144        })
145    }
146}
147
148// StatSetIntoIter just exists to protect current implementation from exposure on the public API.
149
150/// Owned iterator over the stats.
151///
152/// See [IntoIterator].
153pub struct StatsSetIntoIter(smallvec::IntoIter<StatsArray>);
154
155impl Iterator for StatsSetIntoIter {
156    type Item = (Stat, Precision<ScalarValue>);
157
158    fn next(&mut self) -> Option<Self::Item> {
159        self.0.next()
160    }
161}
162
163impl IntoIterator for StatsSet {
164    type Item = (Stat, Precision<ScalarValue>);
165    type IntoIter = StatsSetIntoIter;
166
167    fn into_iter(self) -> Self::IntoIter {
168        StatsSetIntoIter(self.values.into_iter())
169    }
170}
171
172impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
173    fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
174        let iter = iter.into_iter();
175
176        let mut this = Self {
177            values: SmallVec::new(),
178        };
179        this.extend(iter);
180        this
181    }
182}
183
184impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
185    #[inline]
186    fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
187        iter.into_iter()
188            .for_each(|(stat, value)| self.set(stat, value));
189    }
190}
191
192/// Merge helpers
193impl StatsSet {
194    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
195    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
196    pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
197        self.as_mut_typed_ref(dtype)
198            .merge_ordered(&other.as_typed_ref(dtype));
199        self
200    }
201
202    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
203    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
204    pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
205        self.as_mut_typed_ref(dtype)
206            .merge_unordered(&other.as_typed_ref(dtype));
207        self
208    }
209
210    /// Given two sets of stats (of differing precision) for the same array, combine them
211    pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
212        self.as_mut_typed_ref(dtype)
213            .combine_sets(&other.as_typed_ref(dtype))
214    }
215}
216
217pub struct TypedStatsSetRef<'a, 'b> {
218    pub values: &'a StatsSet,
219    pub dtype: &'b DType,
220}
221
222impl StatsProvider for TypedStatsSetRef<'_, '_> {
223    fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
224        self.values.get(stat).map(|p| {
225            p.map(|sv| {
226                Scalar::try_new(
227                    stat.dtype(self.dtype)
228                        .vortex_expect("Must have valid dtype if value is present"),
229                    Some(sv),
230                )
231                .vortex_expect("failed to construct a scalar statistic")
232            })
233        })
234    }
235
236    fn len(&self) -> usize {
237        self.values.len()
238    }
239}
240
241pub struct MutTypedStatsSetRef<'a, 'b> {
242    pub values: &'a mut StatsSet,
243    pub dtype: &'b DType,
244}
245
246impl MutTypedStatsSetRef<'_, '_> {
247    /// Set the stat `stat` to `value`.
248    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
249        self.values.set(stat, value);
250    }
251
252    /// Clear the stat `stat` from the set.
253    pub fn clear(&mut self, stat: Stat) {
254        self.values.clear(stat);
255    }
256}
257
258impl StatsProvider for MutTypedStatsSetRef<'_, '_> {
259    fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
260        self.values.get(stat).map(|p| {
261            p.map(|sv| {
262                Scalar::try_new(
263                    stat.dtype(self.dtype)
264                        .vortex_expect("Must have valid dtype if value is present"),
265                    Some(sv),
266                )
267                .vortex_expect("failed to construct a scalar statistic")
268            })
269        })
270    }
271
272    fn len(&self) -> usize {
273        self.values.len()
274    }
275}
276
277// Merge helpers
278impl MutTypedStatsSetRef<'_, '_> {
279    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
280    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
281    pub fn merge_ordered(mut self, other: &TypedStatsSetRef) -> Self {
282        for s in all::<Stat>() {
283            match s {
284                Stat::IsConstant => self.merge_is_constant(other),
285                Stat::IsSorted => self.merge_is_sorted(other),
286                Stat::IsStrictSorted => self.merge_is_strict_sorted(other),
287                Stat::Max => self.merge_max(other),
288                Stat::Min => self.merge_min(other),
289                Stat::Sum => self.merge_sum(other),
290                Stat::NullCount => self.merge_null_count(other),
291                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
292                Stat::NaNCount => self.merge_nan_count(other),
293            }
294        }
295
296        self
297    }
298
299    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
300    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
301    pub fn merge_unordered(mut self, other: &TypedStatsSetRef) -> Self {
302        for s in all::<Stat>() {
303            if !s.is_commutative() {
304                self.clear(s);
305                continue;
306            }
307
308            match s {
309                Stat::IsConstant => self.merge_is_constant(other),
310                Stat::Max => self.merge_max(other),
311                Stat::Min => self.merge_min(other),
312                Stat::Sum => self.merge_sum(other),
313                Stat::NullCount => self.merge_null_count(other),
314                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
315                Stat::IsSorted | Stat::IsStrictSorted => {
316                    unreachable!("not commutative")
317                }
318                Stat::NaNCount => self.merge_nan_count(other),
319            }
320        }
321
322        self
323    }
324
325    /// Given two sets of stats (of differing precision) for the same array, combine them
326    pub fn combine_sets(&mut self, other: &TypedStatsSetRef) -> VortexResult<()> {
327        let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
328        for s in other_stats {
329            match s {
330                Stat::Max => self.combine_bound::<Max>(other)?,
331                Stat::Min => self.combine_bound::<Min>(other)?,
332                Stat::UncompressedSizeInBytes => {
333                    self.combine_bound::<UncompressedSizeInBytes>(other)?
334                }
335                Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
336                Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
337                Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
338                Stat::NullCount => self.combine_bound::<NullCount>(other)?,
339                Stat::Sum => self.combine_bound::<Sum>(other)?,
340                Stat::NaNCount => self.combine_bound::<NaNCount>(other)?,
341            }
342        }
343        Ok(())
344    }
345
346    fn combine_bound<S: StatType<Scalar>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
347    where
348        S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
349    {
350        match (self.get_scalar_bound::<S>(), other.get_scalar_bound::<S>()) {
351            (Some(m1), Some(m2)) => {
352                let meet = m1
353                    .intersection(&m2)
354                    .vortex_expect("can always compare scalar")
355                    .ok_or_else(|| {
356                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
357                    })?;
358                if meet != m1 {
359                    self.set(
360                        S::STAT,
361                        meet.into_value().map(|s| {
362                            s.into_value()
363                                .vortex_expect("stat scalar value cannot be null")
364                        }),
365                    );
366                }
367            }
368            (None, Some(m)) => self.set(
369                S::STAT,
370                m.into_value().map(|s| {
371                    s.into_value()
372                        .vortex_expect("stat scalar value cannot be null")
373                }),
374            ),
375            (Some(_), _) => (),
376            (None, None) => self.clear(S::STAT),
377        }
378        Ok(())
379    }
380
381    fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
382    where
383        S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
384    {
385        match (
386            self.get_as_bound::<S, bool>(),
387            other.get_as_bound::<S, bool>(),
388        ) {
389            (Some(m1), Some(m2)) => {
390                let intersection = m1
391                    .intersection(&m2)
392                    .vortex_expect("can always compare boolean")
393                    .ok_or_else(|| {
394                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
395                    })?;
396                if intersection != m1 {
397                    self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
398                }
399            }
400            (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
401            (Some(_), None) => (),
402            (None, None) => self.clear(S::STAT),
403        }
404        Ok(())
405    }
406
407    fn merge_min(&mut self, other: &TypedStatsSetRef) {
408        match (
409            self.get_scalar_bound::<Min>(),
410            other.get_scalar_bound::<Min>(),
411        ) {
412            (Some(m1), Some(m2)) => {
413                let meet = m1.union(&m2).vortex_expect("can compare scalar");
414                if meet != m1 {
415                    self.set(
416                        Stat::Min,
417                        meet.into_value().map(|s| {
418                            s.into_value()
419                                .vortex_expect("stat scalar value cannot be null")
420                        }),
421                    );
422                }
423            }
424            _ => self.clear(Stat::Min),
425        }
426    }
427
428    fn merge_max(&mut self, other: &TypedStatsSetRef) {
429        match (
430            self.get_scalar_bound::<Max>(),
431            other.get_scalar_bound::<Max>(),
432        ) {
433            (Some(m1), Some(m2)) => {
434                let meet = m1.union(&m2).vortex_expect("can compare scalar");
435                if meet != m1 {
436                    self.set(
437                        Stat::Max,
438                        meet.into_value().map(|s| {
439                            s.into_value()
440                                .vortex_expect("stat scalar value cannot be null")
441                        }),
442                    );
443                }
444            }
445            _ => self.clear(Stat::Max),
446        }
447    }
448
449    fn merge_sum(&mut self, other: &TypedStatsSetRef) {
450        match (
451            self.get_scalar_bound::<Sum>(),
452            other.get_scalar_bound::<Sum>(),
453        ) {
454            (Some(m1), Some(m2)) => {
455                // If the combine sum is exact, then we can sum them.
456                if let Some(scalar_value) =
457                    m1.zip(m2).as_exact().and_then(|(s1, s2)| match s1.dtype() {
458                        DType::Primitive(..) => s1
459                            .as_primitive()
460                            .checked_add(&s2.as_primitive())
461                            .and_then(|pscalar| pscalar.pvalue().map(ScalarValue::Primitive)),
462                        DType::Decimal(..) => s1
463                            .as_decimal()
464                            .checked_binary_numeric(
465                                &s2.as_decimal(),
466                                crate::scalar::NumericOperator::Add,
467                            )
468                            .map(|scalar| {
469                                ScalarValue::Decimal(
470                                    scalar
471                                        .decimal_value()
472                                        .vortex_expect("no decimal value in scalar"),
473                                )
474                            }),
475                        _ => None,
476                    })
477                {
478                    self.set(Stat::Sum, Precision::Exact(scalar_value));
479                }
480            }
481            _ => self.clear(Stat::Sum),
482        }
483    }
484
485    fn merge_is_constant(&mut self, other: &TypedStatsSetRef) {
486        let self_const = self.get_as(Stat::IsConstant);
487        let other_const = other.get_as(Stat::IsConstant);
488        let self_min = self.get(Stat::Min);
489        let other_min = other.get(Stat::Min);
490
491        if let (
492            Some(Precision::Exact(self_const)),
493            Some(Precision::Exact(other_const)),
494            Some(Precision::Exact(self_min)),
495            Some(Precision::Exact(other_min)),
496        ) = (self_const, other_const, self_min, other_min)
497        {
498            if self_const && other_const && self_min == other_min {
499                self.set(Stat::IsConstant, Precision::exact(true));
500            } else {
501                self.set(Stat::IsConstant, Precision::inexact(false));
502            }
503        }
504        self.set(Stat::IsConstant, Precision::exact(false));
505    }
506
507    fn merge_is_sorted(&mut self, other: &TypedStatsSetRef) {
508        self.merge_sortedness_stat(other, Stat::IsSorted, PartialOrd::le)
509    }
510
511    fn merge_is_strict_sorted(&mut self, other: &TypedStatsSetRef) {
512        self.merge_sortedness_stat(other, Stat::IsStrictSorted, PartialOrd::lt)
513    }
514
515    fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
516        &mut self,
517        other: &TypedStatsSetRef,
518        stat: Stat,
519        cmp: F,
520    ) {
521        if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
522            == (self.get_as(stat), other.get_as(stat))
523        {
524            // There might be no stat because it was dropped, or it doesn't exist
525            // (e.g. an all null array).
526            // We assume that it was the dropped case since the doesn't exist might imply sorted,
527            // but this in-precision is correct.
528            if let (Some(self_max), Some(other_min)) = (
529                self.get_scalar_bound::<Max>(),
530                other.get_scalar_bound::<Min>(),
531            ) {
532                return if cmp(&self_max.max_value(), &other_min.min_value()) {
533                    // keep value
534                } else {
535                    self.set(stat, Precision::inexact(false));
536                };
537            }
538        }
539        self.clear(stat);
540    }
541
542    fn merge_null_count(&mut self, other: &TypedStatsSetRef) {
543        self.merge_sum_stat(Stat::NullCount, other)
544    }
545
546    fn merge_nan_count(&mut self, other: &TypedStatsSetRef) {
547        self.merge_sum_stat(Stat::NaNCount, other)
548    }
549
550    fn merge_uncompressed_size_in_bytes(&mut self, other: &TypedStatsSetRef) {
551        self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
552    }
553
554    fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) {
555        match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
556            (Some(nc1), Some(nc2)) => {
557                self.set(
558                    stat,
559                    nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
560                );
561            }
562            _ => self.clear(stat),
563        }
564    }
565}
566
567#[cfg(test)]
568mod test {
569    use enum_iterator::all;
570    use itertools::Itertools;
571    use smallvec::smallvec;
572
573    use crate::LEGACY_SESSION;
574    use crate::VortexSessionExecute;
575    use crate::arrays::PrimitiveArray;
576    use crate::dtype::DType;
577    use crate::dtype::Nullability;
578    use crate::dtype::PType;
579    use crate::expr::stats::IsConstant;
580    use crate::expr::stats::Precision;
581    use crate::expr::stats::Stat;
582    use crate::expr::stats::StatsProvider;
583    use crate::expr::stats::StatsProviderExt;
584    use crate::stats::StatsSet;
585    use crate::stats::stats_set::Scalar;
586
587    #[test]
588    fn test_iter() {
589        // SAFETY: No duplicate stats.
590        let set = unsafe {
591            StatsSet::new_unchecked(smallvec![
592                (Stat::Max, Precision::exact(100)),
593                (Stat::Min, Precision::exact(42)),
594            ])
595        };
596        let mut iter = set.iter();
597        let first = iter.next().unwrap().clone();
598        assert_eq!(first.0, Stat::Max);
599        assert_eq!(
600            first.1.map(
601                |f| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(f)).unwrap()).unwrap()
602            ),
603            Precision::exact(100)
604        );
605        let snd = iter.next().unwrap().clone();
606        assert_eq!(snd.0, Stat::Min);
607        assert_eq!(
608            snd.1.map(
609                |s| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(s)).unwrap()).unwrap()
610            ),
611            Precision::exact(42)
612        );
613    }
614
615    #[test]
616    fn into_iter() {
617        // SAFETY: No duplicate stats.
618        let mut set = unsafe {
619            StatsSet::new_unchecked(smallvec![
620                (Stat::Max, Precision::exact(100)),
621                (Stat::Min, Precision::exact(42)),
622            ])
623        }
624        .into_iter();
625        let (stat, first) = set.next().unwrap();
626        assert_eq!(stat, Stat::Max);
627        assert_eq!(
628            first.map(
629                |f| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(f)).unwrap()).unwrap()
630            ),
631            Precision::exact(100)
632        );
633        let snd = set.next().unwrap();
634        assert_eq!(snd.0, Stat::Min);
635        assert_eq!(
636            snd.1.map(
637                |s| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(s)).unwrap()).unwrap()
638            ),
639            Precision::exact(42)
640        );
641    }
642
643    #[test]
644    fn merge_constant() {
645        let first = StatsSet::from_iter([
646            (Stat::Min, Precision::exact(42)),
647            (Stat::IsConstant, Precision::exact(true)),
648        ])
649        .merge_ordered(
650            &StatsSet::from_iter([
651                (Stat::Min, Precision::inexact(42)),
652                (Stat::IsConstant, Precision::exact(true)),
653            ]),
654            &DType::Primitive(PType::I32, Nullability::NonNullable),
655        );
656
657        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
658        assert_eq!(
659            first_ref.get_as::<bool>(Stat::IsConstant),
660            Some(Precision::exact(false))
661        );
662        assert_eq!(
663            first_ref.get_as::<i32>(Stat::Min),
664            Some(Precision::exact(42))
665        );
666    }
667
668    #[test]
669    fn merge_into_min() {
670        let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
671            &StatsSet::default(),
672            &DType::Primitive(PType::I32, Nullability::NonNullable),
673        );
674
675        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
676        assert!(first_ref.get(Stat::Min).is_none());
677    }
678
679    #[test]
680    fn merge_from_min() {
681        let first = StatsSet::default().merge_ordered(
682            &StatsSet::of(Stat::Min, Precision::exact(42)),
683            &DType::Primitive(PType::I32, Nullability::NonNullable),
684        );
685
686        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
687        assert!(first_ref.get(Stat::Min).is_none());
688    }
689
690    #[test]
691    fn merge_mins() {
692        let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
693            &StatsSet::of(Stat::Min, Precision::exact(42)),
694            &DType::Primitive(PType::I32, Nullability::NonNullable),
695        );
696
697        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
698        assert_eq!(
699            first_ref.get_as::<i32>(Stat::Min),
700            Some(Precision::exact(37))
701        );
702    }
703
704    #[test]
705    fn merge_into_bound_max() {
706        let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
707            &StatsSet::default(),
708            &DType::Primitive(PType::I32, Nullability::NonNullable),
709        );
710        assert!(first.get(Stat::Max).is_none());
711    }
712
713    #[test]
714    fn merge_from_max() {
715        let first = StatsSet::default().merge_ordered(
716            &StatsSet::of(Stat::Max, Precision::exact(42)),
717            &DType::Primitive(PType::I32, Nullability::NonNullable),
718        );
719        assert!(first.get(Stat::Max).is_none());
720    }
721
722    #[test]
723    fn merge_maxes() {
724        let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
725            &StatsSet::of(Stat::Max, Precision::exact(42)),
726            &DType::Primitive(PType::I32, Nullability::NonNullable),
727        );
728        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
729        assert_eq!(
730            first_ref.get_as::<i32>(Stat::Max),
731            Some(Precision::exact(42))
732        );
733    }
734
735    #[test]
736    fn merge_maxes_bound() {
737        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
738        let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
739            .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
740        let first_ref = first.as_typed_ref(&dtype);
741        assert_eq!(
742            first_ref.get_as::<i32>(Stat::Max),
743            Some(Precision::inexact(43))
744        );
745    }
746
747    #[test]
748    fn merge_into_scalar() {
749        // Sum stats for primitive types are always the 64-bit version (i64 for signed, u64
750        // for unsigned, f64 for floats).
751        let first = StatsSet::of(Stat::Sum, Precision::exact(42i64)).merge_ordered(
752            &StatsSet::default(),
753            &DType::Primitive(PType::I32, Nullability::NonNullable),
754        );
755        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
756        assert!(first_ref.get(Stat::Sum).is_none());
757    }
758
759    #[test]
760    fn merge_from_scalar() {
761        // Sum stats for primitive types are always the 64-bit version (i64 for signed, u64
762        // for unsigned, f64 for floats).
763        let first = StatsSet::default().merge_ordered(
764            &StatsSet::of(Stat::Sum, Precision::exact(42i64)),
765            &DType::Primitive(PType::I32, Nullability::NonNullable),
766        );
767        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
768        assert!(first_ref.get(Stat::Sum).is_none());
769    }
770
771    #[test]
772    fn merge_scalars() {
773        // Sum stats for primitive types are always the 64-bit version (i64 for signed, u64
774        // for unsigned, f64 for floats).
775        let first = StatsSet::of(Stat::Sum, Precision::exact(37i64)).merge_ordered(
776            &StatsSet::of(Stat::Sum, Precision::exact(42i64)),
777            &DType::Primitive(PType::I32, Nullability::NonNullable),
778        );
779        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
780        assert_eq!(
781            first_ref.get_as::<i64>(Stat::Sum),
782            Some(Precision::exact(79i64))
783        );
784    }
785
786    #[test]
787    fn merge_into_sortedness() {
788        let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
789            &StatsSet::default(),
790            &DType::Primitive(PType::I32, Nullability::NonNullable),
791        );
792        assert!(first.get(Stat::IsStrictSorted).is_none());
793    }
794
795    #[test]
796    fn merge_from_sortedness() {
797        let first = StatsSet::default().merge_ordered(
798            &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
799            &DType::Primitive(PType::I32, Nullability::NonNullable),
800        );
801        assert!(first.get(Stat::IsStrictSorted).is_none());
802    }
803
804    #[test]
805    fn merge_sortedness() {
806        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
807        first.set(Stat::Max, Precision::exact(1));
808        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
809        second.set(Stat::Min, Precision::exact(2));
810        first = first.merge_ordered(
811            &second,
812            &DType::Primitive(PType::I32, Nullability::NonNullable),
813        );
814
815        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
816        assert_eq!(
817            first_ref.get_as::<bool>(Stat::IsStrictSorted),
818            Some(Precision::exact(true))
819        );
820    }
821
822    #[test]
823    fn merge_sortedness_out_of_order() {
824        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
825        first.set(Stat::Min, Precision::exact(1));
826        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
827        second.set(Stat::Max, Precision::exact(2));
828        second = second.merge_ordered(
829            &first,
830            &DType::Primitive(PType::I32, Nullability::NonNullable),
831        );
832
833        let second_ref =
834            second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
835        assert_eq!(
836            second_ref.get_as::<bool>(Stat::IsStrictSorted),
837            Some(Precision::inexact(false))
838        );
839    }
840
841    #[test]
842    fn merge_sortedness_only_one_sorted() {
843        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
844        first.set(Stat::Max, Precision::exact(1));
845        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
846        second.set(Stat::Min, Precision::exact(2));
847        first.merge_ordered(
848            &second,
849            &DType::Primitive(PType::I32, Nullability::NonNullable),
850        );
851
852        let second_ref =
853            second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
854        assert_eq!(
855            second_ref.get_as::<bool>(Stat::IsStrictSorted),
856            Some(Precision::exact(false))
857        );
858    }
859
860    #[test]
861    fn merge_sortedness_missing_min() {
862        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
863        first.set(Stat::Max, Precision::exact(1));
864        let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
865        first = first.merge_ordered(
866            &second,
867            &DType::Primitive(PType::I32, Nullability::NonNullable),
868        );
869        assert!(first.get(Stat::IsStrictSorted).is_none());
870    }
871
872    #[test]
873    fn merge_sortedness_bound_min() {
874        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
875        first.set(Stat::Max, Precision::exact(1));
876        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
877        second.set(Stat::Min, Precision::inexact(2));
878        first = first.merge_ordered(
879            &second,
880            &DType::Primitive(PType::I32, Nullability::NonNullable),
881        );
882
883        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
884        assert_eq!(
885            first_ref.get_as::<bool>(Stat::IsStrictSorted),
886            Some(Precision::exact(true))
887        );
888    }
889
890    #[test]
891    fn merge_unordered() {
892        let array =
893            PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
894        let all_stats = all::<Stat>()
895            .filter(|s| !matches!(s, Stat::Sum))
896            .filter(|s| !matches!(s, Stat::NaNCount))
897            .collect_vec();
898        array
899            .statistics()
900            .compute_all(&all_stats, &mut LEGACY_SESSION.create_execution_ctx())
901            .unwrap();
902
903        let stats = array.statistics().to_owned();
904        for stat in &all_stats {
905            assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
906        }
907
908        let merged = stats.clone().merge_unordered(
909            &stats,
910            &DType::Primitive(PType::I32, Nullability::NonNullable),
911        );
912        for stat in &all_stats {
913            assert_eq!(
914                merged.get(*stat).is_some(),
915                stat.is_commutative(),
916                "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
917            )
918        }
919
920        let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
921        let stats_ref = stats.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
922
923        assert_eq!(
924            merged_ref.get_as::<i32>(Stat::Min),
925            stats_ref.get_as::<i32>(Stat::Min)
926        );
927        assert_eq!(
928            merged_ref.get_as::<i32>(Stat::Max),
929            stats_ref.get_as::<i32>(Stat::Max)
930        );
931        assert_eq!(
932            merged_ref.get_as::<u64>(Stat::NullCount).unwrap(),
933            stats_ref
934                .get_as::<u64>(Stat::NullCount)
935                .unwrap()
936                .map(|s| s * 2)
937        );
938    }
939
940    #[test]
941    fn merge_min_bound_same() {
942        // Merging a stat with a bound and another with an exact results in exact stat.
943        // since bound for min is a lower bound, it can in fact contain any value >= bound.
944        let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
945            &StatsSet::of(Stat::Min, Precision::exact(5)),
946            &DType::Primitive(PType::I32, Nullability::NonNullable),
947        );
948        let merged_ref =
949            merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
950        assert_eq!(
951            merged_ref.get_as::<i32>(Stat::Min),
952            Some(Precision::exact(5))
953        );
954    }
955
956    #[test]
957    fn merge_min_bound_bound_lower() {
958        let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
959            &StatsSet::of(Stat::Min, Precision::exact(5)),
960            &DType::Primitive(PType::I32, Nullability::NonNullable),
961        );
962        let merged_ref =
963            merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
964        assert_eq!(
965            merged_ref.get_as::<i32>(Stat::Min),
966            Some(Precision::inexact(4))
967        );
968    }
969
970    #[test]
971    fn test_combine_is_constant() {
972        {
973            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
974            let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
975            let mut stats_ref =
976                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
977            stats_ref
978                .combine_bool_stat::<IsConstant>(
979                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
980                )
981                .unwrap();
982            assert_eq!(
983                stats_ref.get_as::<bool>(Stat::IsConstant),
984                Some(Precision::exact(true))
985            );
986        }
987
988        {
989            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
990            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
991            let mut stats_ref =
992                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
993            stats_ref
994                .combine_bool_stat::<IsConstant>(
995                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
996                )
997                .unwrap();
998            assert_eq!(
999                stats_ref.get_as::<bool>(Stat::IsConstant),
1000                Some(Precision::exact(true))
1001            );
1002        }
1003
1004        {
1005            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
1006            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
1007            let mut stats_ref =
1008                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1009            stats_ref
1010                .combine_bool_stat::<IsConstant>(
1011                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
1012                )
1013                .unwrap();
1014            assert_eq!(
1015                stats_ref.get_as::<bool>(Stat::IsConstant),
1016                Some(Precision::exact(false))
1017            );
1018        }
1019    }
1020
1021    #[test]
1022    fn test_combine_sets_boolean_conflict() {
1023        let mut stats1 = StatsSet::from_iter([
1024            (Stat::IsConstant, Precision::exact(true)),
1025            (Stat::IsSorted, Precision::exact(true)),
1026        ]);
1027
1028        let stats2 = StatsSet::from_iter([
1029            (Stat::IsConstant, Precision::exact(false)),
1030            (Stat::IsSorted, Precision::exact(true)),
1031        ]);
1032
1033        let result = stats1.combine_sets(
1034            &stats2,
1035            &DType::Primitive(PType::I32, Nullability::NonNullable),
1036        );
1037        assert!(result.is_err());
1038    }
1039
1040    #[test]
1041    fn test_combine_sets_with_missing_stats() {
1042        let mut stats1 = StatsSet::from_iter([
1043            (Stat::Min, Precision::exact(42)),
1044            (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
1045        ]);
1046
1047        let stats2 = StatsSet::from_iter([
1048            (Stat::Max, Precision::exact(100)),
1049            (Stat::IsStrictSorted, Precision::exact(true)),
1050        ]);
1051
1052        stats1
1053            .combine_sets(
1054                &stats2,
1055                &DType::Primitive(PType::I32, Nullability::NonNullable),
1056            )
1057            .unwrap();
1058
1059        let stats_ref =
1060            stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1061
1062        // Min should remain unchanged
1063        assert_eq!(
1064            stats_ref.get_as::<i32>(Stat::Min),
1065            Some(Precision::exact(42))
1066        );
1067        // Max should be added
1068        assert_eq!(
1069            stats_ref.get_as::<i32>(Stat::Max),
1070            Some(Precision::exact(100))
1071        );
1072        // IsStrictSorted should be added
1073        assert_eq!(
1074            stats_ref.get_as::<bool>(Stat::IsStrictSorted),
1075            Some(Precision::exact(true))
1076        );
1077    }
1078
1079    #[test]
1080    fn test_combine_sets_with_inexact() {
1081        let mut stats1 = StatsSet::from_iter([
1082            (Stat::Min, Precision::exact(42)),
1083            (Stat::Max, Precision::inexact(100)),
1084            (Stat::IsConstant, Precision::exact(false)),
1085        ]);
1086
1087        let stats2 = StatsSet::from_iter([
1088            // Must ensure Min from stats2 is <= Min from stats1
1089            (Stat::Min, Precision::inexact(40)),
1090            (Stat::Max, Precision::exact(90)),
1091            (Stat::IsSorted, Precision::exact(true)),
1092        ]);
1093
1094        stats1
1095            .combine_sets(
1096                &stats2,
1097                &DType::Primitive(PType::I32, Nullability::NonNullable),
1098            )
1099            .unwrap();
1100
1101        let stats_ref =
1102            stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1103
1104        // Min should remain unchanged since it's more restrictive than the inexact value
1105        assert_eq!(
1106            stats_ref.get_as::<i32>(Stat::Min),
1107            Some(Precision::exact(42))
1108        );
1109        // Check that max was updated with the exact value
1110        assert_eq!(
1111            stats_ref.get_as::<i32>(Stat::Max),
1112            Some(Precision::exact(90))
1113        );
1114        // Check that IsSorted was added
1115        assert_eq!(
1116            stats_ref.get_as::<bool>(Stat::IsSorted),
1117            Some(Precision::exact(true))
1118        );
1119    }
1120}