Skip to main content

vortex_array/stats/
stats_set.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5
6use enum_iterator::Sequence;
7use enum_iterator::all;
8use num_traits::CheckedAdd;
9use vortex_error::VortexError;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_err;
13use vortex_error::vortex_panic;
14
15use crate::dtype::DType;
16use crate::expr::stats::IsConstant;
17use crate::expr::stats::IsSorted;
18use crate::expr::stats::IsStrictSorted;
19use crate::expr::stats::Max;
20use crate::expr::stats::Min;
21use crate::expr::stats::NaNCount;
22use crate::expr::stats::NullCount;
23use crate::expr::stats::Precision;
24use crate::expr::stats::Stat;
25use crate::expr::stats::StatBound;
26use crate::expr::stats::StatType;
27use crate::expr::stats::StatsProvider;
28use crate::expr::stats::StatsProviderExt;
29use crate::expr::stats::Sum;
30use crate::expr::stats::UncompressedSizeInBytes;
31use crate::scalar::Scalar;
32use crate::scalar::ScalarValue;
33
34#[derive(Default, Debug, Clone)]
35pub struct StatsSet {
36    values: Vec<(Stat, Precision<ScalarValue>)>,
37}
38
39impl StatsSet {
40    /// Create new StatSet without validating uniqueness of all the entries
41    ///
42    /// # Safety
43    ///
44    /// This method will not panic or trigger UB, but may lead to duplicate stats being stored.
45    pub unsafe fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
46        Self { values }
47    }
48
49    /// Create StatsSet from single stat and value
50    pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
51        // SAFETY: No duplicate stats will be set here.
52        unsafe { Self::new_unchecked(vec![(stat, value)]) }
53    }
54
55    fn reserve_full_capacity(&mut self) {
56        if self.values.capacity() < Stat::CARDINALITY {
57            self.values
58                .reserve_exact(Stat::CARDINALITY - self.values.capacity());
59        }
60    }
61
62    /// Wrap stats set with a dtype for mutable typed scalar access
63    pub fn as_mut_typed_ref<'a, 'b>(&'a mut self, dtype: &'b DType) -> MutTypedStatsSetRef<'a, 'b> {
64        MutTypedStatsSetRef {
65            values: self,
66            dtype,
67        }
68    }
69
70    /// Wrap stats set with a dtype for typed scalar access
71    pub fn as_typed_ref<'a, 'b>(&'a self, dtype: &'b DType) -> TypedStatsSetRef<'a, 'b> {
72        TypedStatsSetRef {
73            values: self,
74            dtype,
75        }
76    }
77}
78
79// Getters and setters for individual stats.
80impl StatsSet {
81    /// Set the stat `stat` to `value`.
82    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
83        self.reserve_full_capacity();
84
85        if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
86            *existing = (stat, value);
87        } else {
88            self.values.push((stat, value));
89        }
90    }
91
92    /// Clear the stat `stat` from the set.
93    pub fn clear(&mut self, stat: Stat) {
94        self.values.retain(|(s, _)| *s != stat);
95    }
96
97    /// Only keep given stats
98    pub fn retain_only(&mut self, stats: &[Stat]) {
99        self.values.retain(|(s, _)| stats.contains(s));
100    }
101
102    /// Iterate over the statistic names and values in-place.
103    ///
104    /// See [Iterator].
105    pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
106        self.values.iter()
107    }
108
109    /// Get value for a given stat
110    pub fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
111        self.values
112            .iter()
113            .find(|(s, _)| *s == stat)
114            .map(|(_, v)| v.clone())
115    }
116
117    /// Length of the stats set
118    pub fn len(&self) -> usize {
119        self.values.len()
120    }
121
122    /// Check whether the statset is empty
123    pub fn is_empty(&self) -> bool {
124        self.values.is_empty()
125    }
126
127    /// Get scalar value of a given dtype
128    pub fn get_as<T: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
129        &self,
130        stat: Stat,
131        dtype: &DType,
132    ) -> Option<Precision<T>> {
133        self.get(stat).map(|v| {
134            v.map(|v| {
135                T::try_from(
136                    &Scalar::try_new(dtype.clone(), Some(v))
137                        .vortex_expect("failed to construct a scalar statistic"),
138                )
139                .unwrap_or_else(|err| {
140                    vortex_panic!(
141                        err,
142                        "Failed to get stat {} as {}",
143                        stat,
144                        std::any::type_name::<T>()
145                    )
146                })
147            })
148        })
149    }
150}
151
152// StatSetIntoIter just exists to protect current implementation from exposure on the public API.
153
154/// Owned iterator over the stats.
155///
156/// See [IntoIterator].
157pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
158
159impl Iterator for StatsSetIntoIter {
160    type Item = (Stat, Precision<ScalarValue>);
161
162    fn next(&mut self) -> Option<Self::Item> {
163        self.0.next()
164    }
165}
166
167impl IntoIterator for StatsSet {
168    type Item = (Stat, Precision<ScalarValue>);
169    type IntoIter = StatsSetIntoIter;
170
171    fn into_iter(self) -> Self::IntoIter {
172        StatsSetIntoIter(self.values.into_iter())
173    }
174}
175
176impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
177    fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
178        let iter = iter.into_iter();
179        let mut values = Vec::default();
180        values.reserve_exact(Stat::CARDINALITY);
181
182        let mut this = Self { values };
183        this.extend(iter);
184        this
185    }
186}
187
188impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
189    #[inline]
190    fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
191        let iter = iter.into_iter();
192        self.reserve_full_capacity();
193
194        iter.for_each(|(stat, value)| self.set(stat, value));
195    }
196}
197
198/// Merge helpers
199impl StatsSet {
200    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
201    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
202    pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
203        self.as_mut_typed_ref(dtype)
204            .merge_ordered(&other.as_typed_ref(dtype));
205        self
206    }
207
208    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
209    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
210    pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
211        self.as_mut_typed_ref(dtype)
212            .merge_unordered(&other.as_typed_ref(dtype));
213        self
214    }
215
216    /// Given two sets of stats (of differing precision) for the same array, combine them
217    pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
218        self.as_mut_typed_ref(dtype)
219            .combine_sets(&other.as_typed_ref(dtype))
220    }
221}
222
223pub struct TypedStatsSetRef<'a, 'b> {
224    pub values: &'a StatsSet,
225    pub dtype: &'b DType,
226}
227
228impl StatsProvider for TypedStatsSetRef<'_, '_> {
229    fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
230        self.values.get(stat).map(|p| {
231            p.map(|sv| {
232                Scalar::try_new(
233                    stat.dtype(self.dtype)
234                        .vortex_expect("Must have valid dtype if value is present"),
235                    Some(sv),
236                )
237                .vortex_expect("failed to construct a scalar statistic")
238            })
239        })
240    }
241
242    fn len(&self) -> usize {
243        self.values.len()
244    }
245}
246
247pub struct MutTypedStatsSetRef<'a, 'b> {
248    pub values: &'a mut StatsSet,
249    pub dtype: &'b DType,
250}
251
252impl MutTypedStatsSetRef<'_, '_> {
253    /// Set the stat `stat` to `value`.
254    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
255        self.values.set(stat, value);
256    }
257
258    /// Clear the stat `stat` from the set.
259    pub fn clear(&mut self, stat: Stat) {
260        self.values.clear(stat);
261    }
262}
263
264impl StatsProvider for MutTypedStatsSetRef<'_, '_> {
265    fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
266        self.values.get(stat).map(|p| {
267            p.map(|sv| {
268                Scalar::try_new(
269                    stat.dtype(self.dtype)
270                        .vortex_expect("Must have valid dtype if value is present"),
271                    Some(sv),
272                )
273                .vortex_expect("failed to construct a scalar statistic")
274            })
275        })
276    }
277
278    fn len(&self) -> usize {
279        self.values.len()
280    }
281}
282
283// Merge helpers
284impl MutTypedStatsSetRef<'_, '_> {
285    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
286    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
287    pub fn merge_ordered(mut self, other: &TypedStatsSetRef) -> Self {
288        for s in all::<Stat>() {
289            match s {
290                Stat::IsConstant => self.merge_is_constant(other),
291                Stat::IsSorted => self.merge_is_sorted(other),
292                Stat::IsStrictSorted => self.merge_is_strict_sorted(other),
293                Stat::Max => self.merge_max(other),
294                Stat::Min => self.merge_min(other),
295                Stat::Sum => self.merge_sum(other),
296                Stat::NullCount => self.merge_null_count(other),
297                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
298                Stat::NaNCount => self.merge_nan_count(other),
299            }
300        }
301
302        self
303    }
304
305    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
306    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
307    pub fn merge_unordered(mut self, other: &TypedStatsSetRef) -> Self {
308        for s in all::<Stat>() {
309            if !s.is_commutative() {
310                self.clear(s);
311                continue;
312            }
313
314            match s {
315                Stat::IsConstant => self.merge_is_constant(other),
316                Stat::Max => self.merge_max(other),
317                Stat::Min => self.merge_min(other),
318                Stat::Sum => self.merge_sum(other),
319                Stat::NullCount => self.merge_null_count(other),
320                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
321                Stat::IsSorted | Stat::IsStrictSorted => {
322                    unreachable!("not commutative")
323                }
324                Stat::NaNCount => self.merge_nan_count(other),
325            }
326        }
327
328        self
329    }
330
331    /// Given two sets of stats (of differing precision) for the same array, combine them
332    pub fn combine_sets(&mut self, other: &TypedStatsSetRef) -> VortexResult<()> {
333        let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
334        for s in other_stats {
335            match s {
336                Stat::Max => self.combine_bound::<Max>(other)?,
337                Stat::Min => self.combine_bound::<Min>(other)?,
338                Stat::UncompressedSizeInBytes => {
339                    self.combine_bound::<UncompressedSizeInBytes>(other)?
340                }
341                Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
342                Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
343                Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
344                Stat::NullCount => self.combine_bound::<NullCount>(other)?,
345                Stat::Sum => self.combine_bound::<Sum>(other)?,
346                Stat::NaNCount => self.combine_bound::<NaNCount>(other)?,
347            }
348        }
349        Ok(())
350    }
351
352    fn combine_bound<S: StatType<Scalar>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
353    where
354        S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
355    {
356        match (self.get_scalar_bound::<S>(), other.get_scalar_bound::<S>()) {
357            (Some(m1), Some(m2)) => {
358                let meet = m1
359                    .intersection(&m2)
360                    .vortex_expect("can always compare scalar")
361                    .ok_or_else(|| {
362                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
363                    })?;
364                if meet != m1 {
365                    self.set(
366                        S::STAT,
367                        meet.into_value().map(|s| {
368                            s.into_value()
369                                .vortex_expect("stat scalar value cannot be null")
370                        }),
371                    );
372                }
373            }
374            (None, Some(m)) => self.set(
375                S::STAT,
376                m.into_value().map(|s| {
377                    s.into_value()
378                        .vortex_expect("stat scalar value cannot be null")
379                }),
380            ),
381            (Some(_), _) => (),
382            (None, None) => self.clear(S::STAT),
383        }
384        Ok(())
385    }
386
387    fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
388    where
389        S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
390    {
391        match (
392            self.get_as_bound::<S, bool>(),
393            other.get_as_bound::<S, bool>(),
394        ) {
395            (Some(m1), Some(m2)) => {
396                let intersection = m1
397                    .intersection(&m2)
398                    .vortex_expect("can always compare boolean")
399                    .ok_or_else(|| {
400                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
401                    })?;
402                if intersection != m1 {
403                    self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
404                }
405            }
406            (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
407            (Some(_), None) => (),
408            (None, None) => self.clear(S::STAT),
409        }
410        Ok(())
411    }
412
413    fn merge_min(&mut self, other: &TypedStatsSetRef) {
414        match (
415            self.get_scalar_bound::<Min>(),
416            other.get_scalar_bound::<Min>(),
417        ) {
418            (Some(m1), Some(m2)) => {
419                let meet = m1.union(&m2).vortex_expect("can compare scalar");
420                if meet != m1 {
421                    self.set(
422                        Stat::Min,
423                        meet.into_value().map(|s| {
424                            s.into_value()
425                                .vortex_expect("stat scalar value cannot be null")
426                        }),
427                    );
428                }
429            }
430            _ => self.clear(Stat::Min),
431        }
432    }
433
434    fn merge_max(&mut self, other: &TypedStatsSetRef) {
435        match (
436            self.get_scalar_bound::<Max>(),
437            other.get_scalar_bound::<Max>(),
438        ) {
439            (Some(m1), Some(m2)) => {
440                let meet = m1.union(&m2).vortex_expect("can compare scalar");
441                if meet != m1 {
442                    self.set(
443                        Stat::Max,
444                        meet.into_value().map(|s| {
445                            s.into_value()
446                                .vortex_expect("stat scalar value cannot be null")
447                        }),
448                    );
449                }
450            }
451            _ => self.clear(Stat::Max),
452        }
453    }
454
455    fn merge_sum(&mut self, other: &TypedStatsSetRef) {
456        match (
457            self.get_scalar_bound::<Sum>(),
458            other.get_scalar_bound::<Sum>(),
459        ) {
460            (Some(m1), Some(m2)) => {
461                // If the combine sum is exact, then we can sum them.
462                if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
463                    s1.as_primitive()
464                        .checked_add(&s2.as_primitive())
465                        .and_then(|pscalar| pscalar.pvalue().map(ScalarValue::Primitive))
466                }) {
467                    self.set(Stat::Sum, Precision::Exact(scalar_value));
468                }
469            }
470            _ => self.clear(Stat::Sum),
471        }
472    }
473
474    fn merge_is_constant(&mut self, other: &TypedStatsSetRef) {
475        let self_const = self.get_as(Stat::IsConstant);
476        let other_const = other.get_as(Stat::IsConstant);
477        let self_min = self.get(Stat::Min);
478        let other_min = other.get(Stat::Min);
479
480        if let (
481            Some(Precision::Exact(self_const)),
482            Some(Precision::Exact(other_const)),
483            Some(Precision::Exact(self_min)),
484            Some(Precision::Exact(other_min)),
485        ) = (self_const, other_const, self_min, other_min)
486        {
487            if self_const && other_const && self_min == other_min {
488                self.set(Stat::IsConstant, Precision::exact(true));
489            } else {
490                self.set(Stat::IsConstant, Precision::inexact(false));
491            }
492        }
493        self.set(Stat::IsConstant, Precision::exact(false));
494    }
495
496    fn merge_is_sorted(&mut self, other: &TypedStatsSetRef) {
497        self.merge_sortedness_stat(other, Stat::IsSorted, PartialOrd::le)
498    }
499
500    fn merge_is_strict_sorted(&mut self, other: &TypedStatsSetRef) {
501        self.merge_sortedness_stat(other, Stat::IsStrictSorted, PartialOrd::lt)
502    }
503
504    fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
505        &mut self,
506        other: &TypedStatsSetRef,
507        stat: Stat,
508        cmp: F,
509    ) {
510        if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
511            == (self.get_as(stat), other.get_as(stat))
512        {
513            // There might be no stat because it was dropped, or it doesn't exist
514            // (e.g. an all null array).
515            // We assume that it was the dropped case since the doesn't exist might imply sorted,
516            // but this in-precision is correct.
517            if let (Some(self_max), Some(other_min)) = (
518                self.get_scalar_bound::<Max>(),
519                other.get_scalar_bound::<Min>(),
520            ) {
521                return if cmp(&self_max.max_value(), &other_min.min_value()) {
522                    // keep value
523                } else {
524                    self.set(stat, Precision::inexact(false));
525                };
526            }
527        }
528        self.clear(stat);
529    }
530
531    fn merge_null_count(&mut self, other: &TypedStatsSetRef) {
532        self.merge_sum_stat(Stat::NullCount, other)
533    }
534
535    fn merge_nan_count(&mut self, other: &TypedStatsSetRef) {
536        self.merge_sum_stat(Stat::NaNCount, other)
537    }
538
539    fn merge_uncompressed_size_in_bytes(&mut self, other: &TypedStatsSetRef) {
540        self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
541    }
542
543    fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) {
544        match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
545            (Some(nc1), Some(nc2)) => {
546                self.set(
547                    stat,
548                    nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
549                );
550            }
551            _ => self.clear(stat),
552        }
553    }
554}
555
556#[cfg(test)]
557mod test {
558    use enum_iterator::all;
559    use itertools::Itertools;
560
561    use crate::LEGACY_SESSION;
562    use crate::VortexSessionExecute;
563    use crate::arrays::PrimitiveArray;
564    use crate::dtype::DType;
565    use crate::dtype::Nullability;
566    use crate::dtype::PType;
567    use crate::expr::stats::IsConstant;
568    use crate::expr::stats::Precision;
569    use crate::expr::stats::Stat;
570    use crate::expr::stats::StatsProvider;
571    use crate::expr::stats::StatsProviderExt;
572    use crate::stats::StatsSet;
573    use crate::stats::stats_set::Scalar;
574
575    #[test]
576    fn test_iter() {
577        // SAFETY: No duplicate stats.
578        let set = unsafe {
579            StatsSet::new_unchecked(vec![
580                (Stat::Max, Precision::exact(100)),
581                (Stat::Min, Precision::exact(42)),
582            ])
583        };
584        let mut iter = set.iter();
585        let first = iter.next().unwrap().clone();
586        assert_eq!(first.0, Stat::Max);
587        assert_eq!(
588            first.1.map(
589                |f| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(f)).unwrap()).unwrap()
590            ),
591            Precision::exact(100)
592        );
593        let snd = iter.next().unwrap().clone();
594        assert_eq!(snd.0, Stat::Min);
595        assert_eq!(
596            snd.1.map(
597                |s| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(s)).unwrap()).unwrap()
598            ),
599            Precision::exact(42)
600        );
601    }
602
603    #[test]
604    fn into_iter() {
605        // SAFETY: No duplicate stats.
606        let mut set = unsafe {
607            StatsSet::new_unchecked(vec![
608                (Stat::Max, Precision::exact(100)),
609                (Stat::Min, Precision::exact(42)),
610            ])
611        }
612        .into_iter();
613        let (stat, first) = set.next().unwrap();
614        assert_eq!(stat, Stat::Max);
615        assert_eq!(
616            first.map(
617                |f| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(f)).unwrap()).unwrap()
618            ),
619            Precision::exact(100)
620        );
621        let snd = set.next().unwrap();
622        assert_eq!(snd.0, Stat::Min);
623        assert_eq!(
624            snd.1.map(
625                |s| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(s)).unwrap()).unwrap()
626            ),
627            Precision::exact(42)
628        );
629    }
630
631    #[test]
632    fn merge_constant() {
633        let first = StatsSet::from_iter([
634            (Stat::Min, Precision::exact(42)),
635            (Stat::IsConstant, Precision::exact(true)),
636        ])
637        .merge_ordered(
638            &StatsSet::from_iter([
639                (Stat::Min, Precision::inexact(42)),
640                (Stat::IsConstant, Precision::exact(true)),
641            ]),
642            &DType::Primitive(PType::I32, Nullability::NonNullable),
643        );
644
645        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
646        assert_eq!(
647            first_ref.get_as::<bool>(Stat::IsConstant),
648            Some(Precision::exact(false))
649        );
650        assert_eq!(
651            first_ref.get_as::<i32>(Stat::Min),
652            Some(Precision::exact(42))
653        );
654    }
655
656    #[test]
657    fn merge_into_min() {
658        let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
659            &StatsSet::default(),
660            &DType::Primitive(PType::I32, Nullability::NonNullable),
661        );
662
663        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
664        assert!(first_ref.get(Stat::Min).is_none());
665    }
666
667    #[test]
668    fn merge_from_min() {
669        let first = StatsSet::default().merge_ordered(
670            &StatsSet::of(Stat::Min, Precision::exact(42)),
671            &DType::Primitive(PType::I32, Nullability::NonNullable),
672        );
673
674        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
675        assert!(first_ref.get(Stat::Min).is_none());
676    }
677
678    #[test]
679    fn merge_mins() {
680        let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
681            &StatsSet::of(Stat::Min, Precision::exact(42)),
682            &DType::Primitive(PType::I32, Nullability::NonNullable),
683        );
684
685        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
686        assert_eq!(
687            first_ref.get_as::<i32>(Stat::Min),
688            Some(Precision::exact(37))
689        );
690    }
691
692    #[test]
693    fn merge_into_bound_max() {
694        let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
695            &StatsSet::default(),
696            &DType::Primitive(PType::I32, Nullability::NonNullable),
697        );
698        assert!(first.get(Stat::Max).is_none());
699    }
700
701    #[test]
702    fn merge_from_max() {
703        let first = StatsSet::default().merge_ordered(
704            &StatsSet::of(Stat::Max, Precision::exact(42)),
705            &DType::Primitive(PType::I32, Nullability::NonNullable),
706        );
707        assert!(first.get(Stat::Max).is_none());
708    }
709
710    #[test]
711    fn merge_maxes() {
712        let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
713            &StatsSet::of(Stat::Max, Precision::exact(42)),
714            &DType::Primitive(PType::I32, Nullability::NonNullable),
715        );
716        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
717        assert_eq!(
718            first_ref.get_as::<i32>(Stat::Max),
719            Some(Precision::exact(42))
720        );
721    }
722
723    #[test]
724    fn merge_maxes_bound() {
725        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
726        let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
727            .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
728        let first_ref = first.as_typed_ref(&dtype);
729        assert_eq!(
730            first_ref.get_as::<i32>(Stat::Max),
731            Some(Precision::inexact(43))
732        );
733    }
734
735    #[test]
736    fn merge_into_scalar() {
737        // Sum stats for primitive types are always the 64-bit version (i64 for signed, u64
738        // for unsigned, f64 for floats).
739        let first = StatsSet::of(Stat::Sum, Precision::exact(42i64)).merge_ordered(
740            &StatsSet::default(),
741            &DType::Primitive(PType::I32, Nullability::NonNullable),
742        );
743        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
744        assert!(first_ref.get(Stat::Sum).is_none());
745    }
746
747    #[test]
748    fn merge_from_scalar() {
749        // Sum stats for primitive types are always the 64-bit version (i64 for signed, u64
750        // for unsigned, f64 for floats).
751        let first = StatsSet::default().merge_ordered(
752            &StatsSet::of(Stat::Sum, Precision::exact(42i64)),
753            &DType::Primitive(PType::I32, Nullability::NonNullable),
754        );
755        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
756        assert!(first_ref.get(Stat::Sum).is_none());
757    }
758
759    #[test]
760    fn merge_scalars() {
761        // Sum stats for primitive types are always the 64-bit version (i64 for signed, u64
762        // for unsigned, f64 for floats).
763        let first = StatsSet::of(Stat::Sum, Precision::exact(37i64)).merge_ordered(
764            &StatsSet::of(Stat::Sum, Precision::exact(42i64)),
765            &DType::Primitive(PType::I32, Nullability::NonNullable),
766        );
767        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
768        assert_eq!(
769            first_ref.get_as::<i64>(Stat::Sum),
770            Some(Precision::exact(79i64))
771        );
772    }
773
774    #[test]
775    fn merge_into_sortedness() {
776        let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
777            &StatsSet::default(),
778            &DType::Primitive(PType::I32, Nullability::NonNullable),
779        );
780        assert!(first.get(Stat::IsStrictSorted).is_none());
781    }
782
783    #[test]
784    fn merge_from_sortedness() {
785        let first = StatsSet::default().merge_ordered(
786            &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
787            &DType::Primitive(PType::I32, Nullability::NonNullable),
788        );
789        assert!(first.get(Stat::IsStrictSorted).is_none());
790    }
791
792    #[test]
793    fn merge_sortedness() {
794        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
795        first.set(Stat::Max, Precision::exact(1));
796        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
797        second.set(Stat::Min, Precision::exact(2));
798        first = first.merge_ordered(
799            &second,
800            &DType::Primitive(PType::I32, Nullability::NonNullable),
801        );
802
803        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
804        assert_eq!(
805            first_ref.get_as::<bool>(Stat::IsStrictSorted),
806            Some(Precision::exact(true))
807        );
808    }
809
810    #[test]
811    fn merge_sortedness_out_of_order() {
812        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
813        first.set(Stat::Min, Precision::exact(1));
814        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
815        second.set(Stat::Max, Precision::exact(2));
816        second = second.merge_ordered(
817            &first,
818            &DType::Primitive(PType::I32, Nullability::NonNullable),
819        );
820
821        let second_ref =
822            second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
823        assert_eq!(
824            second_ref.get_as::<bool>(Stat::IsStrictSorted),
825            Some(Precision::inexact(false))
826        );
827    }
828
829    #[test]
830    fn merge_sortedness_only_one_sorted() {
831        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
832        first.set(Stat::Max, Precision::exact(1));
833        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
834        second.set(Stat::Min, Precision::exact(2));
835        first.merge_ordered(
836            &second,
837            &DType::Primitive(PType::I32, Nullability::NonNullable),
838        );
839
840        let second_ref =
841            second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
842        assert_eq!(
843            second_ref.get_as::<bool>(Stat::IsStrictSorted),
844            Some(Precision::exact(false))
845        );
846    }
847
848    #[test]
849    fn merge_sortedness_missing_min() {
850        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
851        first.set(Stat::Max, Precision::exact(1));
852        let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
853        first = first.merge_ordered(
854            &second,
855            &DType::Primitive(PType::I32, Nullability::NonNullable),
856        );
857        assert!(first.get(Stat::IsStrictSorted).is_none());
858    }
859
860    #[test]
861    fn merge_sortedness_bound_min() {
862        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
863        first.set(Stat::Max, Precision::exact(1));
864        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
865        second.set(Stat::Min, Precision::inexact(2));
866        first = first.merge_ordered(
867            &second,
868            &DType::Primitive(PType::I32, Nullability::NonNullable),
869        );
870
871        let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
872        assert_eq!(
873            first_ref.get_as::<bool>(Stat::IsStrictSorted),
874            Some(Precision::exact(true))
875        );
876    }
877
878    #[test]
879    fn merge_unordered() {
880        let array =
881            PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
882        let all_stats = all::<Stat>()
883            .filter(|s| !matches!(s, Stat::Sum))
884            .filter(|s| !matches!(s, Stat::NaNCount))
885            .collect_vec();
886        array
887            .statistics()
888            .compute_all(&all_stats, &mut LEGACY_SESSION.create_execution_ctx())
889            .unwrap();
890
891        let stats = array.statistics().to_owned();
892        for stat in &all_stats {
893            assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
894        }
895
896        let merged = stats.clone().merge_unordered(
897            &stats,
898            &DType::Primitive(PType::I32, Nullability::NonNullable),
899        );
900        for stat in &all_stats {
901            assert_eq!(
902                merged.get(*stat).is_some(),
903                stat.is_commutative(),
904                "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
905            )
906        }
907
908        let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
909        let stats_ref = stats.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
910
911        assert_eq!(
912            merged_ref.get_as::<i32>(Stat::Min),
913            stats_ref.get_as::<i32>(Stat::Min)
914        );
915        assert_eq!(
916            merged_ref.get_as::<i32>(Stat::Max),
917            stats_ref.get_as::<i32>(Stat::Max)
918        );
919        assert_eq!(
920            merged_ref.get_as::<u64>(Stat::NullCount).unwrap(),
921            stats_ref
922                .get_as::<u64>(Stat::NullCount)
923                .unwrap()
924                .map(|s| s * 2)
925        );
926    }
927
928    #[test]
929    fn merge_min_bound_same() {
930        // Merging a stat with a bound and another with an exact results in exact stat.
931        // since bound for min is a lower bound, it can in fact contain any value >= bound.
932        let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
933            &StatsSet::of(Stat::Min, Precision::exact(5)),
934            &DType::Primitive(PType::I32, Nullability::NonNullable),
935        );
936        let merged_ref =
937            merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
938        assert_eq!(
939            merged_ref.get_as::<i32>(Stat::Min),
940            Some(Precision::exact(5))
941        );
942    }
943
944    #[test]
945    fn merge_min_bound_bound_lower() {
946        let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
947            &StatsSet::of(Stat::Min, Precision::exact(5)),
948            &DType::Primitive(PType::I32, Nullability::NonNullable),
949        );
950        let merged_ref =
951            merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
952        assert_eq!(
953            merged_ref.get_as::<i32>(Stat::Min),
954            Some(Precision::inexact(4))
955        );
956    }
957
958    #[test]
959    fn test_combine_is_constant() {
960        {
961            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
962            let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
963            let mut stats_ref =
964                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
965            stats_ref
966                .combine_bool_stat::<IsConstant>(
967                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
968                )
969                .unwrap();
970            assert_eq!(
971                stats_ref.get_as::<bool>(Stat::IsConstant),
972                Some(Precision::exact(true))
973            );
974        }
975
976        {
977            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
978            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
979            let mut stats_ref =
980                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
981            stats_ref
982                .combine_bool_stat::<IsConstant>(
983                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
984                )
985                .unwrap();
986            assert_eq!(
987                stats_ref.get_as::<bool>(Stat::IsConstant),
988                Some(Precision::exact(true))
989            );
990        }
991
992        {
993            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
994            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
995            let mut stats_ref =
996                stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
997            stats_ref
998                .combine_bool_stat::<IsConstant>(
999                    &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
1000                )
1001                .unwrap();
1002            assert_eq!(
1003                stats_ref.get_as::<bool>(Stat::IsConstant),
1004                Some(Precision::exact(false))
1005            );
1006        }
1007    }
1008
1009    #[test]
1010    fn test_combine_sets_boolean_conflict() {
1011        let mut stats1 = StatsSet::from_iter([
1012            (Stat::IsConstant, Precision::exact(true)),
1013            (Stat::IsSorted, Precision::exact(true)),
1014        ]);
1015
1016        let stats2 = StatsSet::from_iter([
1017            (Stat::IsConstant, Precision::exact(false)),
1018            (Stat::IsSorted, Precision::exact(true)),
1019        ]);
1020
1021        let result = stats1.combine_sets(
1022            &stats2,
1023            &DType::Primitive(PType::I32, Nullability::NonNullable),
1024        );
1025        assert!(result.is_err());
1026    }
1027
1028    #[test]
1029    fn test_combine_sets_with_missing_stats() {
1030        let mut stats1 = StatsSet::from_iter([
1031            (Stat::Min, Precision::exact(42)),
1032            (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
1033        ]);
1034
1035        let stats2 = StatsSet::from_iter([
1036            (Stat::Max, Precision::exact(100)),
1037            (Stat::IsStrictSorted, Precision::exact(true)),
1038        ]);
1039
1040        stats1
1041            .combine_sets(
1042                &stats2,
1043                &DType::Primitive(PType::I32, Nullability::NonNullable),
1044            )
1045            .unwrap();
1046
1047        let stats_ref =
1048            stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1049
1050        // Min should remain unchanged
1051        assert_eq!(
1052            stats_ref.get_as::<i32>(Stat::Min),
1053            Some(Precision::exact(42))
1054        );
1055        // Max should be added
1056        assert_eq!(
1057            stats_ref.get_as::<i32>(Stat::Max),
1058            Some(Precision::exact(100))
1059        );
1060        // IsStrictSorted should be added
1061        assert_eq!(
1062            stats_ref.get_as::<bool>(Stat::IsStrictSorted),
1063            Some(Precision::exact(true))
1064        );
1065    }
1066
1067    #[test]
1068    fn test_combine_sets_with_inexact() {
1069        let mut stats1 = StatsSet::from_iter([
1070            (Stat::Min, Precision::exact(42)),
1071            (Stat::Max, Precision::inexact(100)),
1072            (Stat::IsConstant, Precision::exact(false)),
1073        ]);
1074
1075        let stats2 = StatsSet::from_iter([
1076            // Must ensure Min from stats2 is <= Min from stats1
1077            (Stat::Min, Precision::inexact(40)),
1078            (Stat::Max, Precision::exact(90)),
1079            (Stat::IsSorted, Precision::exact(true)),
1080        ]);
1081
1082        stats1
1083            .combine_sets(
1084                &stats2,
1085                &DType::Primitive(PType::I32, Nullability::NonNullable),
1086            )
1087            .unwrap();
1088
1089        let stats_ref =
1090            stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1091
1092        // Min should remain unchanged since it's more restrictive than the inexact value
1093        assert_eq!(
1094            stats_ref.get_as::<i32>(Stat::Min),
1095            Some(Precision::exact(42))
1096        );
1097        // Check that max was updated with the exact value
1098        assert_eq!(
1099            stats_ref.get_as::<i32>(Stat::Max),
1100            Some(Precision::exact(90))
1101        );
1102        // Check that IsSorted was added
1103        assert_eq!(
1104            stats_ref.get_as::<bool>(Stat::IsSorted),
1105            Some(Precision::exact(true))
1106        );
1107    }
1108}