vortex_array/stats/
stats_set.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5
6use enum_iterator::{Sequence, all};
7use num_traits::CheckedAdd;
8use vortex_dtype::DType;
9use vortex_error::{VortexExpect, VortexResult, vortex_err};
10use vortex_scalar::{Scalar, ScalarValue};
11
12use super::traits::StatsProvider;
13use super::{IsSorted, IsStrictSorted, NaNCount, NullCount, StatType, UncompressedSizeInBytes};
14use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
15
16#[derive(Default, Debug, Clone)]
17pub struct StatsSet {
18    values: Vec<(Stat, Precision<ScalarValue>)>,
19}
20
21impl StatsSet {
22    /// Create new StatSet without validating uniqueness of all the entries
23    ///
24    /// # Safety
25    ///
26    /// This method will not panic or trigger UB, but may lead to duplicate stats being stored.
27    pub fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
28        Self { values }
29    }
30
31    /// Specialized constructor for the case where the StatsSet represents
32    /// an array consisting entirely of [null](vortex_dtype::DType::Null) values.
33    pub fn nulls(len: usize) -> Self {
34        let mut stats = Self::new_unchecked(vec![(Stat::NullCount, Precision::exact(len))]);
35
36        if len > 0 {
37            stats.set(Stat::IsConstant, Precision::exact(true));
38            stats.set(Stat::IsSorted, Precision::exact(true));
39            stats.set(Stat::IsStrictSorted, Precision::exact(len < 2));
40        }
41
42        stats
43    }
44
45    /// A convenience method for creating a stats set which will represent an empty array.
46    pub fn empty_array() -> StatsSet {
47        StatsSet::new_unchecked(vec![(Stat::NullCount, Precision::exact(0))])
48    }
49
50    pub fn bools_with_sum_and_null_count(true_count: usize, null_count: usize, len: usize) -> Self {
51        StatsSet::new_unchecked(vec![
52            (Stat::Sum, Precision::exact(true_count)),
53            (Stat::NullCount, Precision::exact(null_count)),
54            (Stat::Min, Precision::exact(true_count == len)),
55            (Stat::Max, Precision::exact(true_count > 0)),
56            (
57                Stat::IsConstant,
58                Precision::exact((true_count == 0 && null_count == 0) || true_count == len),
59            ),
60        ])
61    }
62
63    pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
64        Self::new_unchecked(vec![(stat, value)])
65    }
66
67    fn reserve_full_capacity(&mut self) {
68        if self.values.capacity() < Stat::CARDINALITY {
69            self.values
70                .reserve_exact(Stat::CARDINALITY - self.values.capacity());
71        }
72    }
73}
74
75// Getters and setters for individual stats.
76impl StatsSet {
77    /// Set the stat `stat` to `value`.
78    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
79        self.reserve_full_capacity();
80
81        if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
82            *existing = (stat, value);
83        } else {
84            self.values.push((stat, value));
85        }
86    }
87
88    /// Clear the stat `stat` from the set.
89    pub fn clear(&mut self, stat: Stat) {
90        self.values.retain(|(s, _)| *s != stat);
91    }
92
93    pub fn retain_only(&mut self, stats: &[Stat]) {
94        self.values.retain(|(s, _)| stats.contains(s));
95    }
96
97    pub fn keep_inexact_stats(self, inexact_keep: &[Stat]) -> Self {
98        self.values
99            .into_iter()
100            .filter_map(|(s, v)| inexact_keep.contains(&s).then(|| (s, v.into_inexact())))
101            .collect()
102    }
103
104    /// Iterate over the statistic names and values in-place.
105    ///
106    /// See [Iterator].
107    pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
108        self.values.iter()
109    }
110}
111
112// StatSetIntoIter just exists to protect current implementation from exposure on the public API.
113
114/// Owned iterator over the stats.
115///
116/// See [IntoIterator].
117pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
118
119impl Iterator for StatsSetIntoIter {
120    type Item = (Stat, Precision<ScalarValue>);
121
122    fn next(&mut self) -> Option<Self::Item> {
123        self.0.next()
124    }
125}
126
127impl IntoIterator for StatsSet {
128    type Item = (Stat, Precision<ScalarValue>);
129    type IntoIter = StatsSetIntoIter;
130
131    fn into_iter(self) -> Self::IntoIter {
132        StatsSetIntoIter(self.values.into_iter())
133    }
134}
135
136impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
137    fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
138        let iter = iter.into_iter();
139        let mut values = Vec::default();
140        values.reserve_exact(Stat::CARDINALITY);
141
142        let mut this = Self { values };
143        this.extend(iter);
144        this
145    }
146}
147
148impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
149    #[inline]
150    fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
151        let iter = iter.into_iter();
152        self.reserve_full_capacity();
153
154        iter.for_each(|(stat, value)| self.set(stat, value));
155    }
156}
157
158// Merge helpers
159impl StatsSet {
160    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
161    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
162    pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
163        for s in all::<Stat>() {
164            match s {
165                Stat::IsConstant => self.merge_is_constant(other, dtype),
166                Stat::IsSorted => self.merge_is_sorted(other, dtype),
167                Stat::IsStrictSorted => self.merge_is_strict_sorted(other, dtype),
168                Stat::Max => self.merge_max(other, dtype),
169                Stat::Min => self.merge_min(other, dtype),
170                Stat::Sum => self.merge_sum(other, dtype),
171                Stat::NullCount => self.merge_null_count(other),
172                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
173                Stat::NaNCount => self.merge_nan_count(other),
174            }
175        }
176
177        self
178    }
179
180    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
181    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
182    pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
183        for s in all::<Stat>() {
184            if !s.is_commutative() {
185                self.clear(s);
186                continue;
187            }
188
189            match s {
190                Stat::IsConstant => self.merge_is_constant(other, dtype),
191                Stat::Max => self.merge_max(other, dtype),
192                Stat::Min => self.merge_min(other, dtype),
193                Stat::Sum => self.merge_sum(other, dtype),
194                Stat::NullCount => self.merge_null_count(other),
195                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
196                Stat::IsSorted | Stat::IsStrictSorted => {
197                    unreachable!("not commutative")
198                }
199                Stat::NaNCount => self.merge_nan_count(other),
200            }
201        }
202
203        self
204    }
205
206    /// Given two sets of stats (of differing precision) for the same array, combine them
207    pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
208        let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
209        for s in other_stats {
210            match s {
211                Stat::Max => self.combine_bound::<Max>(other, dtype)?,
212                Stat::Min => self.combine_bound::<Min>(other, dtype)?,
213                Stat::UncompressedSizeInBytes => {
214                    self.combine_bound::<UncompressedSizeInBytes>(other, dtype)?
215                }
216                Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
217                Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
218                Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
219                Stat::NullCount => self.combine_bound::<NullCount>(other, dtype)?,
220                Stat::Sum => self.combine_bound::<Sum>(other, dtype)?,
221                Stat::NaNCount => self.combine_bound::<NaNCount>(other, dtype)?,
222            }
223        }
224        Ok(())
225    }
226
227    fn combine_bound<S: StatType<Scalar>>(
228        &mut self,
229        other: &Self,
230        dtype: &DType,
231    ) -> VortexResult<()>
232    where
233        S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
234    {
235        match (
236            self.get_scalar_bound::<S>(dtype),
237            other.get_scalar_bound::<S>(dtype),
238        ) {
239            (Some(m1), Some(m2)) => {
240                let meet = m1
241                    .intersection(&m2)
242                    .vortex_expect("can always compare scalar")
243                    .ok_or_else(|| {
244                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
245                    })?;
246                if meet != m1 {
247                    self.set(S::STAT, meet.into_value().map(Scalar::into_value));
248                }
249            }
250            (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
251            (Some(_), _) => (),
252            (None, None) => self.clear(S::STAT),
253        }
254        Ok(())
255    }
256
257    fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &Self) -> VortexResult<()>
258    where
259        S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
260    {
261        match (
262            self.get_as_bound::<S, bool>(),
263            other.get_as_bound::<S, bool>(),
264        ) {
265            (Some(m1), Some(m2)) => {
266                let intersection = m1
267                    .intersection(&m2)
268                    .vortex_expect("can always compare boolean")
269                    .ok_or_else(|| {
270                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
271                    })?;
272                if intersection != m1 {
273                    self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
274                }
275            }
276            (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
277            (Some(_), None) => (),
278            (None, None) => self.clear(S::STAT),
279        }
280        Ok(())
281    }
282
283    fn merge_min(&mut self, other: &Self, dtype: &DType) {
284        match (
285            self.get_scalar_bound::<Min>(dtype),
286            other.get_scalar_bound::<Min>(dtype),
287        ) {
288            (Some(m1), Some(m2)) => {
289                let meet = m1.union(&m2).vortex_expect("can compare scalar");
290                if meet != m1 {
291                    self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
292                }
293            }
294            _ => self.clear(Stat::Min),
295        }
296    }
297
298    fn merge_max(&mut self, other: &Self, dtype: &DType) {
299        match (
300            self.get_scalar_bound::<Max>(dtype),
301            other.get_scalar_bound::<Max>(dtype),
302        ) {
303            (Some(m1), Some(m2)) => {
304                let meet = m1.union(&m2).vortex_expect("can compare scalar");
305                if meet != m1 {
306                    self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
307                }
308            }
309            _ => self.clear(Stat::Max),
310        }
311    }
312
313    fn merge_sum(&mut self, other: &Self, dtype: &DType) {
314        match (
315            self.get_scalar_bound::<Sum>(dtype),
316            other.get_scalar_bound::<Sum>(dtype),
317        ) {
318            (Some(m1), Some(m2)) => {
319                // If the combine sum is exact, then we can sum them.
320                if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
321                    s1.as_primitive()
322                        .checked_add(&s2.as_primitive())
323                        .map(|pscalar| {
324                            pscalar
325                                .pvalue()
326                                .map(|pvalue| {
327                                    Scalar::primitive_value(
328                                        pvalue,
329                                        pscalar.ptype(),
330                                        pscalar.dtype().nullability(),
331                                    )
332                                    .into_value()
333                                })
334                                .unwrap_or_else(ScalarValue::null)
335                        })
336                }) {
337                    self.set(Stat::Sum, Precision::Exact(scalar_value));
338                }
339            }
340            _ => self.clear(Stat::Sum),
341        }
342    }
343
344    fn merge_is_constant(&mut self, other: &Self, dtype: &DType) {
345        let self_const = self.get_as(Stat::IsConstant);
346        let other_const = other.get_as(Stat::IsConstant);
347        let self_min = self.get_scalar(Stat::Min, dtype);
348        let other_min = other.get_scalar(Stat::Min, dtype);
349
350        if let (
351            Some(Precision::Exact(self_const)),
352            Some(Precision::Exact(other_const)),
353            Some(Precision::Exact(self_min)),
354            Some(Precision::Exact(other_min)),
355        ) = (self_const, other_const, self_min, other_min)
356        {
357            if self_const && other_const && self_min == other_min {
358                self.set(Stat::IsConstant, Precision::exact(true));
359            } else {
360                self.set(Stat::IsConstant, Precision::inexact(false));
361            }
362        }
363        self.set(Stat::IsConstant, Precision::exact(false));
364    }
365
366    fn merge_is_sorted(&mut self, other: &Self, dtype: &DType) {
367        self.merge_sortedness_stat(other, Stat::IsSorted, dtype, PartialOrd::le)
368    }
369
370    fn merge_is_strict_sorted(&mut self, other: &Self, dtype: &DType) {
371        self.merge_sortedness_stat(other, Stat::IsStrictSorted, dtype, PartialOrd::lt)
372    }
373
374    fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
375        &mut self,
376        other: &Self,
377        stat: Stat,
378        dtype: &DType,
379        cmp: F,
380    ) {
381        if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
382            == (self.get_as(stat), other.get_as(stat))
383        {
384            // There might be no stat because it was dropped, or it doesn't exist
385            // (e.g. an all null array).
386            // We assume that it was the dropped case since the doesn't exist might imply sorted,
387            // but this in-precision is correct.
388            if let (Some(self_max), Some(other_min)) = (
389                self.get_scalar_bound::<Max>(dtype),
390                other.get_scalar_bound::<Min>(dtype),
391            ) {
392                return if cmp(&self_max.max_value(), &other_min.min_value()) {
393                    // keep value
394                } else {
395                    self.set(stat, Precision::inexact(false));
396                };
397            }
398        }
399        self.clear(stat);
400    }
401
402    fn merge_null_count(&mut self, other: &Self) {
403        self.merge_sum_stat(Stat::NullCount, other)
404    }
405
406    fn merge_nan_count(&mut self, other: &Self) {
407        self.merge_sum_stat(Stat::NaNCount, other)
408    }
409
410    fn merge_uncompressed_size_in_bytes(&mut self, other: &Self) {
411        self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
412    }
413
414    fn merge_sum_stat(&mut self, stat: Stat, other: &Self) {
415        match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
416            (Some(nc1), Some(nc2)) => {
417                self.set(
418                    stat,
419                    nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
420                );
421            }
422            _ => self.clear(stat),
423        }
424    }
425}
426
427impl StatsProvider for StatsSet {
428    fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
429        self.values
430            .iter()
431            .find(|(s, _)| *s == stat)
432            .map(|(_, v)| v.clone())
433    }
434
435    fn len(&self) -> usize {
436        self.values.len()
437    }
438}
439
440#[cfg(test)]
441mod test {
442    use enum_iterator::all;
443    use itertools::Itertools;
444    use vortex_dtype::{DType, Nullability, PType};
445
446    use crate::arrays::PrimitiveArray;
447    use crate::stats::{IsConstant, Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
448
449    #[test]
450    fn test_iter() {
451        let set = StatsSet::new_unchecked(vec![
452            (Stat::Max, Precision::exact(100)),
453            (Stat::Min, Precision::exact(42)),
454        ]);
455        let mut iter = set.iter();
456        let first = iter.next().unwrap().clone();
457        assert_eq!(first.0, Stat::Max);
458        assert_eq!(
459            first.1.map(|f| i32::try_from(&f).unwrap()),
460            Precision::exact(100)
461        );
462        let snd = iter.next().unwrap().clone();
463        assert_eq!(snd.0, Stat::Min);
464        assert_eq!(snd.1.map(|s| i32::try_from(&s).unwrap()), 42);
465    }
466
467    #[test]
468    fn into_iter() {
469        let mut set = StatsSet::new_unchecked(vec![
470            (Stat::Max, Precision::exact(100)),
471            (Stat::Min, Precision::exact(42)),
472        ])
473        .into_iter();
474        let (stat, first) = set.next().unwrap();
475        assert_eq!(stat, Stat::Max);
476        assert_eq!(
477            first.map(|f| i32::try_from(&f).unwrap()),
478            Precision::exact(100)
479        );
480        let snd = set.next().unwrap();
481        assert_eq!(snd.0, Stat::Min);
482        assert_eq!(
483            snd.1.map(|s| i32::try_from(&s).unwrap()),
484            Precision::exact(42)
485        );
486    }
487
488    #[test]
489    fn merge_constant() {
490        let first = StatsSet::from_iter([
491            (Stat::Min, Precision::exact(42)),
492            (Stat::IsConstant, Precision::exact(true)),
493        ])
494        .merge_ordered(
495            &StatsSet::from_iter([
496                (Stat::Min, Precision::inexact(42)),
497                (Stat::IsConstant, Precision::exact(true)),
498            ]),
499            &DType::Primitive(PType::I32, Nullability::NonNullable),
500        );
501        assert_eq!(
502            first.get_as::<bool>(Stat::IsConstant),
503            Some(Precision::exact(false))
504        );
505        assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
506    }
507
508    #[test]
509    fn merge_into_min() {
510        let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
511            &StatsSet::default(),
512            &DType::Primitive(PType::I32, Nullability::NonNullable),
513        );
514        assert!(first.get(Stat::Min).is_none());
515    }
516
517    #[test]
518    fn merge_from_min() {
519        let first = StatsSet::default().merge_ordered(
520            &StatsSet::of(Stat::Min, Precision::exact(42)),
521            &DType::Primitive(PType::I32, Nullability::NonNullable),
522        );
523        assert!(first.get(Stat::Min).is_none());
524    }
525
526    #[test]
527    fn merge_mins() {
528        let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
529            &StatsSet::of(Stat::Min, Precision::exact(42)),
530            &DType::Primitive(PType::I32, Nullability::NonNullable),
531        );
532        assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(37)));
533    }
534
535    #[test]
536    fn merge_into_bound_max() {
537        let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
538            &StatsSet::default(),
539            &DType::Primitive(PType::I32, Nullability::NonNullable),
540        );
541        assert!(first.get(Stat::Max).is_none());
542    }
543
544    #[test]
545    fn merge_from_max() {
546        let first = StatsSet::default().merge_ordered(
547            &StatsSet::of(Stat::Max, Precision::exact(42)),
548            &DType::Primitive(PType::I32, Nullability::NonNullable),
549        );
550        assert!(first.get(Stat::Max).is_none());
551    }
552
553    #[test]
554    fn merge_maxes() {
555        let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
556            &StatsSet::of(Stat::Max, Precision::exact(42)),
557            &DType::Primitive(PType::I32, Nullability::NonNullable),
558        );
559        assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::exact(42)));
560    }
561
562    #[test]
563    fn merge_maxes_bound() {
564        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
565        let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
566            .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
567        assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::inexact(43)));
568    }
569
570    #[test]
571    fn merge_into_scalar() {
572        let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
573            &StatsSet::default(),
574            &DType::Primitive(PType::I32, Nullability::NonNullable),
575        );
576        assert!(first.get(Stat::Sum).is_none());
577    }
578
579    #[test]
580    fn merge_from_scalar() {
581        let first = StatsSet::default().merge_ordered(
582            &StatsSet::of(Stat::Sum, Precision::exact(42)),
583            &DType::Primitive(PType::I32, Nullability::NonNullable),
584        );
585        assert!(first.get(Stat::Sum).is_none());
586    }
587
588    #[test]
589    fn merge_scalars() {
590        let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
591            &StatsSet::of(Stat::Sum, Precision::exact(42)),
592            &DType::Primitive(PType::I32, Nullability::NonNullable),
593        );
594        assert_eq!(
595            first.get_as::<usize>(Stat::Sum),
596            Some(Precision::exact(79usize))
597        );
598    }
599
600    #[test]
601    fn merge_into_sortedness() {
602        let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
603            &StatsSet::default(),
604            &DType::Primitive(PType::I32, Nullability::NonNullable),
605        );
606        assert!(first.get(Stat::IsStrictSorted).is_none());
607    }
608
609    #[test]
610    fn merge_from_sortedness() {
611        let first = StatsSet::default().merge_ordered(
612            &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
613            &DType::Primitive(PType::I32, Nullability::NonNullable),
614        );
615        assert!(first.get(Stat::IsStrictSorted).is_none());
616    }
617
618    #[test]
619    fn merge_sortedness() {
620        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
621        first.set(Stat::Max, Precision::exact(1));
622        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
623        second.set(Stat::Min, Precision::exact(2));
624        first = first.merge_ordered(
625            &second,
626            &DType::Primitive(PType::I32, Nullability::NonNullable),
627        );
628        assert_eq!(
629            first.get_as::<bool>(Stat::IsStrictSorted),
630            Some(Precision::exact(true))
631        );
632    }
633
634    #[test]
635    fn merge_sortedness_out_of_order() {
636        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
637        first.set(Stat::Min, Precision::exact(1));
638        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
639        second.set(Stat::Max, Precision::exact(2));
640        second = second.merge_ordered(
641            &first,
642            &DType::Primitive(PType::I32, Nullability::NonNullable),
643        );
644        assert_eq!(
645            second.get_as::<bool>(Stat::IsStrictSorted),
646            Some(Precision::inexact(false))
647        );
648    }
649
650    #[test]
651    fn merge_sortedness_only_one_sorted() {
652        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
653        first.set(Stat::Max, Precision::exact(1));
654        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
655        second.set(Stat::Min, Precision::exact(2));
656        first.merge_ordered(
657            &second,
658            &DType::Primitive(PType::I32, Nullability::NonNullable),
659        );
660        assert_eq!(
661            second.get_as::<bool>(Stat::IsStrictSorted),
662            Some(Precision::exact(false))
663        );
664    }
665
666    #[test]
667    fn merge_sortedness_missing_min() {
668        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
669        first.set(Stat::Max, Precision::exact(1));
670        let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
671        first = first.merge_ordered(
672            &second,
673            &DType::Primitive(PType::I32, Nullability::NonNullable),
674        );
675        assert!(first.get(Stat::IsStrictSorted).is_none());
676    }
677
678    #[test]
679    fn merge_sortedness_bound_min() {
680        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
681        first.set(Stat::Max, Precision::exact(1));
682        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
683        second.set(Stat::Min, Precision::inexact(2));
684        first = first.merge_ordered(
685            &second,
686            &DType::Primitive(PType::I32, Nullability::NonNullable),
687        );
688        assert_eq!(
689            first.get_as::<bool>(Stat::IsStrictSorted),
690            Some(Precision::exact(true))
691        );
692    }
693
694    #[test]
695    fn merge_unordered() {
696        let array =
697            PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
698        let all_stats = all::<Stat>()
699            .filter(|s| !matches!(s, Stat::Sum))
700            .filter(|s| !matches!(s, Stat::NaNCount))
701            .collect_vec();
702        array.statistics().compute_all(&all_stats).unwrap();
703
704        let stats = array.statistics().to_owned();
705        for stat in &all_stats {
706            assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
707        }
708
709        let merged = stats.clone().merge_unordered(
710            &stats,
711            &DType::Primitive(PType::I32, Nullability::NonNullable),
712        );
713        for stat in &all_stats {
714            assert_eq!(
715                merged.get(*stat).is_some(),
716                stat.is_commutative(),
717                "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
718            )
719        }
720
721        assert_eq!(
722            merged.get_as::<i32>(Stat::Min),
723            stats.get_as::<i32>(Stat::Min)
724        );
725        assert_eq!(
726            merged.get_as::<i32>(Stat::Max),
727            stats.get_as::<i32>(Stat::Max)
728        );
729        assert_eq!(
730            merged.get_as::<u64>(Stat::NullCount).unwrap(),
731            stats.get_as::<u64>(Stat::NullCount).unwrap().map(|s| s * 2)
732        );
733    }
734
735    #[test]
736    fn merge_min_bound_same() {
737        // Merging a stat with a bound and another with an exact results in exact stat.
738        // since bound for min is a lower bound, it can in fact contain any value >= bound.
739        let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
740            &StatsSet::of(Stat::Min, Precision::exact(5)),
741            &DType::Primitive(PType::I32, Nullability::NonNullable),
742        );
743        assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::exact(5)));
744    }
745
746    #[test]
747    fn merge_min_bound_bound_lower() {
748        let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
749            &StatsSet::of(Stat::Min, Precision::exact(5)),
750            &DType::Primitive(PType::I32, Nullability::NonNullable),
751        );
752        assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::inexact(4)));
753    }
754
755    #[test]
756    fn retain_approx() {
757        let set = StatsSet::from_iter([
758            (Stat::Max, Precision::exact(100)),
759            (Stat::Min, Precision::exact(50)),
760            (Stat::Sum, Precision::inexact(10)),
761        ]);
762
763        let set = set.keep_inexact_stats(&[Stat::Min, Stat::Max]);
764
765        assert_eq!(set.len(), 2);
766        assert_eq!(set.get_as::<i32>(Stat::Max), Some(Precision::inexact(100)));
767        assert_eq!(set.get_as::<i32>(Stat::Min), Some(Precision::inexact(50)));
768        assert_eq!(set.get_as::<i32>(Stat::Sum), None);
769    }
770
771    #[test]
772    fn test_combine_is_constant() {
773        {
774            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
775            let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
776            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
777            assert_eq!(
778                stats.get_as::<bool>(Stat::IsConstant),
779                Some(Precision::exact(true))
780            );
781        }
782
783        {
784            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
785            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
786            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
787            assert_eq!(
788                stats.get_as::<bool>(Stat::IsConstant),
789                Some(Precision::exact(true))
790            );
791        }
792
793        {
794            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
795            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
796            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
797            assert_eq!(
798                stats.get_as::<bool>(Stat::IsConstant),
799                Some(Precision::exact(false))
800            );
801        }
802    }
803
804    #[test]
805    fn test_combine_sets_boolean_conflict() {
806        let mut stats1 = StatsSet::from_iter([
807            (Stat::IsConstant, Precision::exact(true)),
808            (Stat::IsSorted, Precision::exact(true)),
809        ]);
810
811        let stats2 = StatsSet::from_iter([
812            (Stat::IsConstant, Precision::exact(false)),
813            (Stat::IsSorted, Precision::exact(true)),
814        ]);
815
816        let result = stats1.combine_sets(
817            &stats2,
818            &DType::Primitive(PType::I32, Nullability::NonNullable),
819        );
820        assert!(result.is_err());
821    }
822
823    #[test]
824    fn test_combine_sets_with_missing_stats() {
825        let mut stats1 = StatsSet::from_iter([
826            (Stat::Min, Precision::exact(42)),
827            (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
828        ]);
829
830        let stats2 = StatsSet::from_iter([
831            (Stat::Max, Precision::exact(100)),
832            (Stat::IsStrictSorted, Precision::exact(true)),
833        ]);
834
835        stats1
836            .combine_sets(
837                &stats2,
838                &DType::Primitive(PType::I32, Nullability::NonNullable),
839            )
840            .unwrap();
841
842        // Min should remain unchanged
843        assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
844        // Max should be added
845        assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(100)));
846        // IsStrictSorted should be added
847        assert_eq!(
848            stats1.get_as::<bool>(Stat::IsStrictSorted),
849            Some(Precision::exact(true))
850        );
851    }
852
853    #[test]
854    fn test_combine_sets_with_inexact() {
855        let mut stats1 = StatsSet::from_iter([
856            (Stat::Min, Precision::exact(42)),
857            (Stat::Max, Precision::inexact(100)),
858            (Stat::IsConstant, Precision::exact(false)),
859        ]);
860
861        let stats2 = StatsSet::from_iter([
862            // Must ensure Min from stats2 is <= Min from stats1
863            (Stat::Min, Precision::inexact(40)),
864            (Stat::Max, Precision::exact(90)),
865            (Stat::IsSorted, Precision::exact(true)),
866        ]);
867
868        stats1
869            .combine_sets(
870                &stats2,
871                &DType::Primitive(PType::I32, Nullability::NonNullable),
872            )
873            .unwrap();
874
875        // Min should remain unchanged since it's more restrictive than the inexact value
876        assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
877        // Check that max was updated with the exact value
878        assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(90)));
879        // Check that IsSorted was added
880        assert_eq!(
881            stats1.get_as::<bool>(Stat::IsSorted),
882            Some(Precision::exact(true))
883        );
884    }
885}