1use std::fmt::Debug;
5
6use enum_iterator::Sequence;
7use enum_iterator::all;
8use num_traits::CheckedAdd;
9use vortex_error::VortexError;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_err;
13use vortex_error::vortex_panic;
14
15use crate::dtype::DType;
16use crate::expr::stats::IsConstant;
17use crate::expr::stats::IsSorted;
18use crate::expr::stats::IsStrictSorted;
19use crate::expr::stats::Max;
20use crate::expr::stats::Min;
21use crate::expr::stats::NaNCount;
22use crate::expr::stats::NullCount;
23use crate::expr::stats::Precision;
24use crate::expr::stats::Stat;
25use crate::expr::stats::StatBound;
26use crate::expr::stats::StatType;
27use crate::expr::stats::StatsProvider;
28use crate::expr::stats::StatsProviderExt;
29use crate::expr::stats::Sum;
30use crate::expr::stats::UncompressedSizeInBytes;
31use crate::scalar::Scalar;
32use crate::scalar::ScalarValue;
33
34#[derive(Default, Debug, Clone)]
35pub struct StatsSet {
36 values: Vec<(Stat, Precision<ScalarValue>)>,
37}
38
39impl StatsSet {
40 pub unsafe fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
46 Self { values }
47 }
48
49 pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
51 unsafe { Self::new_unchecked(vec![(stat, value)]) }
53 }
54
55 fn reserve_full_capacity(&mut self) {
56 if self.values.capacity() < Stat::CARDINALITY {
57 self.values
58 .reserve_exact(Stat::CARDINALITY - self.values.capacity());
59 }
60 }
61
62 pub fn as_mut_typed_ref<'a, 'b>(&'a mut self, dtype: &'b DType) -> MutTypedStatsSetRef<'a, 'b> {
64 MutTypedStatsSetRef {
65 values: self,
66 dtype,
67 }
68 }
69
70 pub fn as_typed_ref<'a, 'b>(&'a self, dtype: &'b DType) -> TypedStatsSetRef<'a, 'b> {
72 TypedStatsSetRef {
73 values: self,
74 dtype,
75 }
76 }
77}
78
79impl StatsSet {
81 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
83 self.reserve_full_capacity();
84
85 if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
86 *existing = (stat, value);
87 } else {
88 self.values.push((stat, value));
89 }
90 }
91
92 pub fn clear(&mut self, stat: Stat) {
94 self.values.retain(|(s, _)| *s != stat);
95 }
96
97 pub fn retain_only(&mut self, stats: &[Stat]) {
99 self.values.retain(|(s, _)| stats.contains(s));
100 }
101
102 pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
106 self.values.iter()
107 }
108
109 pub fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
111 self.values
112 .iter()
113 .find(|(s, _)| *s == stat)
114 .map(|(_, v)| v.clone())
115 }
116
117 pub fn len(&self) -> usize {
119 self.values.len()
120 }
121
122 pub fn is_empty(&self) -> bool {
124 self.values.is_empty()
125 }
126
127 pub fn get_as<T: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
129 &self,
130 stat: Stat,
131 dtype: &DType,
132 ) -> Option<Precision<T>> {
133 self.get(stat).map(|v| {
134 v.map(|v| {
135 T::try_from(
136 &Scalar::try_new(dtype.clone(), Some(v))
137 .vortex_expect("failed to construct a scalar statistic"),
138 )
139 .unwrap_or_else(|err| {
140 vortex_panic!(
141 err,
142 "Failed to get stat {} as {}",
143 stat,
144 std::any::type_name::<T>()
145 )
146 })
147 })
148 })
149 }
150}
151
152pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
158
159impl Iterator for StatsSetIntoIter {
160 type Item = (Stat, Precision<ScalarValue>);
161
162 fn next(&mut self) -> Option<Self::Item> {
163 self.0.next()
164 }
165}
166
167impl IntoIterator for StatsSet {
168 type Item = (Stat, Precision<ScalarValue>);
169 type IntoIter = StatsSetIntoIter;
170
171 fn into_iter(self) -> Self::IntoIter {
172 StatsSetIntoIter(self.values.into_iter())
173 }
174}
175
176impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
177 fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
178 let iter = iter.into_iter();
179 let mut values = Vec::default();
180 values.reserve_exact(Stat::CARDINALITY);
181
182 let mut this = Self { values };
183 this.extend(iter);
184 this
185 }
186}
187
188impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
189 #[inline]
190 fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
191 let iter = iter.into_iter();
192 self.reserve_full_capacity();
193
194 iter.for_each(|(stat, value)| self.set(stat, value));
195 }
196}
197
198impl StatsSet {
200 pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
203 self.as_mut_typed_ref(dtype)
204 .merge_ordered(&other.as_typed_ref(dtype));
205 self
206 }
207
208 pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
211 self.as_mut_typed_ref(dtype)
212 .merge_unordered(&other.as_typed_ref(dtype));
213 self
214 }
215
216 pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
218 self.as_mut_typed_ref(dtype)
219 .combine_sets(&other.as_typed_ref(dtype))
220 }
221}
222
223pub struct TypedStatsSetRef<'a, 'b> {
224 pub values: &'a StatsSet,
225 pub dtype: &'b DType,
226}
227
228impl StatsProvider for TypedStatsSetRef<'_, '_> {
229 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
230 self.values.get(stat).map(|p| {
231 p.map(|sv| {
232 Scalar::try_new(
233 stat.dtype(self.dtype)
234 .vortex_expect("Must have valid dtype if value is present"),
235 Some(sv),
236 )
237 .vortex_expect("failed to construct a scalar statistic")
238 })
239 })
240 }
241
242 fn len(&self) -> usize {
243 self.values.len()
244 }
245}
246
247pub struct MutTypedStatsSetRef<'a, 'b> {
248 pub values: &'a mut StatsSet,
249 pub dtype: &'b DType,
250}
251
252impl MutTypedStatsSetRef<'_, '_> {
253 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
255 self.values.set(stat, value);
256 }
257
258 pub fn clear(&mut self, stat: Stat) {
260 self.values.clear(stat);
261 }
262}
263
264impl StatsProvider for MutTypedStatsSetRef<'_, '_> {
265 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
266 self.values.get(stat).map(|p| {
267 p.map(|sv| {
268 Scalar::try_new(
269 stat.dtype(self.dtype)
270 .vortex_expect("Must have valid dtype if value is present"),
271 Some(sv),
272 )
273 .vortex_expect("failed to construct a scalar statistic")
274 })
275 })
276 }
277
278 fn len(&self) -> usize {
279 self.values.len()
280 }
281}
282
283impl MutTypedStatsSetRef<'_, '_> {
285 pub fn merge_ordered(mut self, other: &TypedStatsSetRef) -> Self {
288 for s in all::<Stat>() {
289 match s {
290 Stat::IsConstant => self.merge_is_constant(other),
291 Stat::IsSorted => self.merge_is_sorted(other),
292 Stat::IsStrictSorted => self.merge_is_strict_sorted(other),
293 Stat::Max => self.merge_max(other),
294 Stat::Min => self.merge_min(other),
295 Stat::Sum => self.merge_sum(other),
296 Stat::NullCount => self.merge_null_count(other),
297 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
298 Stat::NaNCount => self.merge_nan_count(other),
299 }
300 }
301
302 self
303 }
304
305 pub fn merge_unordered(mut self, other: &TypedStatsSetRef) -> Self {
308 for s in all::<Stat>() {
309 if !s.is_commutative() {
310 self.clear(s);
311 continue;
312 }
313
314 match s {
315 Stat::IsConstant => self.merge_is_constant(other),
316 Stat::Max => self.merge_max(other),
317 Stat::Min => self.merge_min(other),
318 Stat::Sum => self.merge_sum(other),
319 Stat::NullCount => self.merge_null_count(other),
320 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
321 Stat::IsSorted | Stat::IsStrictSorted => {
322 unreachable!("not commutative")
323 }
324 Stat::NaNCount => self.merge_nan_count(other),
325 }
326 }
327
328 self
329 }
330
331 pub fn combine_sets(&mut self, other: &TypedStatsSetRef) -> VortexResult<()> {
333 let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
334 for s in other_stats {
335 match s {
336 Stat::Max => self.combine_bound::<Max>(other)?,
337 Stat::Min => self.combine_bound::<Min>(other)?,
338 Stat::UncompressedSizeInBytes => {
339 self.combine_bound::<UncompressedSizeInBytes>(other)?
340 }
341 Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
342 Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
343 Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
344 Stat::NullCount => self.combine_bound::<NullCount>(other)?,
345 Stat::Sum => self.combine_bound::<Sum>(other)?,
346 Stat::NaNCount => self.combine_bound::<NaNCount>(other)?,
347 }
348 }
349 Ok(())
350 }
351
352 fn combine_bound<S: StatType<Scalar>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
353 where
354 S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
355 {
356 match (self.get_scalar_bound::<S>(), other.get_scalar_bound::<S>()) {
357 (Some(m1), Some(m2)) => {
358 let meet = m1
359 .intersection(&m2)
360 .vortex_expect("can always compare scalar")
361 .ok_or_else(|| {
362 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
363 })?;
364 if meet != m1 {
365 self.set(
366 S::STAT,
367 meet.into_value().map(|s| {
368 s.into_value()
369 .vortex_expect("stat scalar value cannot be null")
370 }),
371 );
372 }
373 }
374 (None, Some(m)) => self.set(
375 S::STAT,
376 m.into_value().map(|s| {
377 s.into_value()
378 .vortex_expect("stat scalar value cannot be null")
379 }),
380 ),
381 (Some(_), _) => (),
382 (None, None) => self.clear(S::STAT),
383 }
384 Ok(())
385 }
386
387 fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
388 where
389 S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
390 {
391 match (
392 self.get_as_bound::<S, bool>(),
393 other.get_as_bound::<S, bool>(),
394 ) {
395 (Some(m1), Some(m2)) => {
396 let intersection = m1
397 .intersection(&m2)
398 .vortex_expect("can always compare boolean")
399 .ok_or_else(|| {
400 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
401 })?;
402 if intersection != m1 {
403 self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
404 }
405 }
406 (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
407 (Some(_), None) => (),
408 (None, None) => self.clear(S::STAT),
409 }
410 Ok(())
411 }
412
413 fn merge_min(&mut self, other: &TypedStatsSetRef) {
414 match (
415 self.get_scalar_bound::<Min>(),
416 other.get_scalar_bound::<Min>(),
417 ) {
418 (Some(m1), Some(m2)) => {
419 let meet = m1.union(&m2).vortex_expect("can compare scalar");
420 if meet != m1 {
421 self.set(
422 Stat::Min,
423 meet.into_value().map(|s| {
424 s.into_value()
425 .vortex_expect("stat scalar value cannot be null")
426 }),
427 );
428 }
429 }
430 _ => self.clear(Stat::Min),
431 }
432 }
433
434 fn merge_max(&mut self, other: &TypedStatsSetRef) {
435 match (
436 self.get_scalar_bound::<Max>(),
437 other.get_scalar_bound::<Max>(),
438 ) {
439 (Some(m1), Some(m2)) => {
440 let meet = m1.union(&m2).vortex_expect("can compare scalar");
441 if meet != m1 {
442 self.set(
443 Stat::Max,
444 meet.into_value().map(|s| {
445 s.into_value()
446 .vortex_expect("stat scalar value cannot be null")
447 }),
448 );
449 }
450 }
451 _ => self.clear(Stat::Max),
452 }
453 }
454
455 fn merge_sum(&mut self, other: &TypedStatsSetRef) {
456 match (
457 self.get_scalar_bound::<Sum>(),
458 other.get_scalar_bound::<Sum>(),
459 ) {
460 (Some(m1), Some(m2)) => {
461 if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
463 s1.as_primitive()
464 .checked_add(&s2.as_primitive())
465 .and_then(|pscalar| pscalar.pvalue().map(ScalarValue::Primitive))
466 }) {
467 self.set(Stat::Sum, Precision::Exact(scalar_value));
468 }
469 }
470 _ => self.clear(Stat::Sum),
471 }
472 }
473
474 fn merge_is_constant(&mut self, other: &TypedStatsSetRef) {
475 let self_const = self.get_as(Stat::IsConstant);
476 let other_const = other.get_as(Stat::IsConstant);
477 let self_min = self.get(Stat::Min);
478 let other_min = other.get(Stat::Min);
479
480 if let (
481 Some(Precision::Exact(self_const)),
482 Some(Precision::Exact(other_const)),
483 Some(Precision::Exact(self_min)),
484 Some(Precision::Exact(other_min)),
485 ) = (self_const, other_const, self_min, other_min)
486 {
487 if self_const && other_const && self_min == other_min {
488 self.set(Stat::IsConstant, Precision::exact(true));
489 } else {
490 self.set(Stat::IsConstant, Precision::inexact(false));
491 }
492 }
493 self.set(Stat::IsConstant, Precision::exact(false));
494 }
495
496 fn merge_is_sorted(&mut self, other: &TypedStatsSetRef) {
497 self.merge_sortedness_stat(other, Stat::IsSorted, PartialOrd::le)
498 }
499
500 fn merge_is_strict_sorted(&mut self, other: &TypedStatsSetRef) {
501 self.merge_sortedness_stat(other, Stat::IsStrictSorted, PartialOrd::lt)
502 }
503
504 fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
505 &mut self,
506 other: &TypedStatsSetRef,
507 stat: Stat,
508 cmp: F,
509 ) {
510 if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
511 == (self.get_as(stat), other.get_as(stat))
512 {
513 if let (Some(self_max), Some(other_min)) = (
518 self.get_scalar_bound::<Max>(),
519 other.get_scalar_bound::<Min>(),
520 ) {
521 return if cmp(&self_max.max_value(), &other_min.min_value()) {
522 } else {
524 self.set(stat, Precision::inexact(false));
525 };
526 }
527 }
528 self.clear(stat);
529 }
530
531 fn merge_null_count(&mut self, other: &TypedStatsSetRef) {
532 self.merge_sum_stat(Stat::NullCount, other)
533 }
534
535 fn merge_nan_count(&mut self, other: &TypedStatsSetRef) {
536 self.merge_sum_stat(Stat::NaNCount, other)
537 }
538
539 fn merge_uncompressed_size_in_bytes(&mut self, other: &TypedStatsSetRef) {
540 self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
541 }
542
543 fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) {
544 match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
545 (Some(nc1), Some(nc2)) => {
546 self.set(
547 stat,
548 nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
549 );
550 }
551 _ => self.clear(stat),
552 }
553 }
554}
555
556#[cfg(test)]
557mod test {
558 use enum_iterator::all;
559 use itertools::Itertools;
560
561 use crate::LEGACY_SESSION;
562 use crate::VortexSessionExecute;
563 use crate::arrays::PrimitiveArray;
564 use crate::dtype::DType;
565 use crate::dtype::Nullability;
566 use crate::dtype::PType;
567 use crate::expr::stats::IsConstant;
568 use crate::expr::stats::Precision;
569 use crate::expr::stats::Stat;
570 use crate::expr::stats::StatsProvider;
571 use crate::expr::stats::StatsProviderExt;
572 use crate::stats::StatsSet;
573 use crate::stats::stats_set::Scalar;
574
575 #[test]
576 fn test_iter() {
577 let set = unsafe {
579 StatsSet::new_unchecked(vec![
580 (Stat::Max, Precision::exact(100)),
581 (Stat::Min, Precision::exact(42)),
582 ])
583 };
584 let mut iter = set.iter();
585 let first = iter.next().unwrap().clone();
586 assert_eq!(first.0, Stat::Max);
587 assert_eq!(
588 first.1.map(
589 |f| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(f)).unwrap()).unwrap()
590 ),
591 Precision::exact(100)
592 );
593 let snd = iter.next().unwrap().clone();
594 assert_eq!(snd.0, Stat::Min);
595 assert_eq!(
596 snd.1.map(
597 |s| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(s)).unwrap()).unwrap()
598 ),
599 Precision::exact(42)
600 );
601 }
602
603 #[test]
604 fn into_iter() {
605 let mut set = unsafe {
607 StatsSet::new_unchecked(vec![
608 (Stat::Max, Precision::exact(100)),
609 (Stat::Min, Precision::exact(42)),
610 ])
611 }
612 .into_iter();
613 let (stat, first) = set.next().unwrap();
614 assert_eq!(stat, Stat::Max);
615 assert_eq!(
616 first.map(
617 |f| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(f)).unwrap()).unwrap()
618 ),
619 Precision::exact(100)
620 );
621 let snd = set.next().unwrap();
622 assert_eq!(snd.0, Stat::Min);
623 assert_eq!(
624 snd.1.map(
625 |s| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(s)).unwrap()).unwrap()
626 ),
627 Precision::exact(42)
628 );
629 }
630
631 #[test]
632 fn merge_constant() {
633 let first = StatsSet::from_iter([
634 (Stat::Min, Precision::exact(42)),
635 (Stat::IsConstant, Precision::exact(true)),
636 ])
637 .merge_ordered(
638 &StatsSet::from_iter([
639 (Stat::Min, Precision::inexact(42)),
640 (Stat::IsConstant, Precision::exact(true)),
641 ]),
642 &DType::Primitive(PType::I32, Nullability::NonNullable),
643 );
644
645 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
646 assert_eq!(
647 first_ref.get_as::<bool>(Stat::IsConstant),
648 Some(Precision::exact(false))
649 );
650 assert_eq!(
651 first_ref.get_as::<i32>(Stat::Min),
652 Some(Precision::exact(42))
653 );
654 }
655
656 #[test]
657 fn merge_into_min() {
658 let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
659 &StatsSet::default(),
660 &DType::Primitive(PType::I32, Nullability::NonNullable),
661 );
662
663 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
664 assert!(first_ref.get(Stat::Min).is_none());
665 }
666
667 #[test]
668 fn merge_from_min() {
669 let first = StatsSet::default().merge_ordered(
670 &StatsSet::of(Stat::Min, Precision::exact(42)),
671 &DType::Primitive(PType::I32, Nullability::NonNullable),
672 );
673
674 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
675 assert!(first_ref.get(Stat::Min).is_none());
676 }
677
678 #[test]
679 fn merge_mins() {
680 let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
681 &StatsSet::of(Stat::Min, Precision::exact(42)),
682 &DType::Primitive(PType::I32, Nullability::NonNullable),
683 );
684
685 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
686 assert_eq!(
687 first_ref.get_as::<i32>(Stat::Min),
688 Some(Precision::exact(37))
689 );
690 }
691
692 #[test]
693 fn merge_into_bound_max() {
694 let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
695 &StatsSet::default(),
696 &DType::Primitive(PType::I32, Nullability::NonNullable),
697 );
698 assert!(first.get(Stat::Max).is_none());
699 }
700
701 #[test]
702 fn merge_from_max() {
703 let first = StatsSet::default().merge_ordered(
704 &StatsSet::of(Stat::Max, Precision::exact(42)),
705 &DType::Primitive(PType::I32, Nullability::NonNullable),
706 );
707 assert!(first.get(Stat::Max).is_none());
708 }
709
710 #[test]
711 fn merge_maxes() {
712 let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
713 &StatsSet::of(Stat::Max, Precision::exact(42)),
714 &DType::Primitive(PType::I32, Nullability::NonNullable),
715 );
716 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
717 assert_eq!(
718 first_ref.get_as::<i32>(Stat::Max),
719 Some(Precision::exact(42))
720 );
721 }
722
723 #[test]
724 fn merge_maxes_bound() {
725 let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
726 let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
727 .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
728 let first_ref = first.as_typed_ref(&dtype);
729 assert_eq!(
730 first_ref.get_as::<i32>(Stat::Max),
731 Some(Precision::inexact(43))
732 );
733 }
734
735 #[test]
736 fn merge_into_scalar() {
737 let first = StatsSet::of(Stat::Sum, Precision::exact(42i64)).merge_ordered(
740 &StatsSet::default(),
741 &DType::Primitive(PType::I32, Nullability::NonNullable),
742 );
743 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
744 assert!(first_ref.get(Stat::Sum).is_none());
745 }
746
747 #[test]
748 fn merge_from_scalar() {
749 let first = StatsSet::default().merge_ordered(
752 &StatsSet::of(Stat::Sum, Precision::exact(42i64)),
753 &DType::Primitive(PType::I32, Nullability::NonNullable),
754 );
755 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
756 assert!(first_ref.get(Stat::Sum).is_none());
757 }
758
759 #[test]
760 fn merge_scalars() {
761 let first = StatsSet::of(Stat::Sum, Precision::exact(37i64)).merge_ordered(
764 &StatsSet::of(Stat::Sum, Precision::exact(42i64)),
765 &DType::Primitive(PType::I32, Nullability::NonNullable),
766 );
767 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
768 assert_eq!(
769 first_ref.get_as::<i64>(Stat::Sum),
770 Some(Precision::exact(79i64))
771 );
772 }
773
774 #[test]
775 fn merge_into_sortedness() {
776 let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
777 &StatsSet::default(),
778 &DType::Primitive(PType::I32, Nullability::NonNullable),
779 );
780 assert!(first.get(Stat::IsStrictSorted).is_none());
781 }
782
783 #[test]
784 fn merge_from_sortedness() {
785 let first = StatsSet::default().merge_ordered(
786 &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
787 &DType::Primitive(PType::I32, Nullability::NonNullable),
788 );
789 assert!(first.get(Stat::IsStrictSorted).is_none());
790 }
791
792 #[test]
793 fn merge_sortedness() {
794 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
795 first.set(Stat::Max, Precision::exact(1));
796 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
797 second.set(Stat::Min, Precision::exact(2));
798 first = first.merge_ordered(
799 &second,
800 &DType::Primitive(PType::I32, Nullability::NonNullable),
801 );
802
803 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
804 assert_eq!(
805 first_ref.get_as::<bool>(Stat::IsStrictSorted),
806 Some(Precision::exact(true))
807 );
808 }
809
810 #[test]
811 fn merge_sortedness_out_of_order() {
812 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
813 first.set(Stat::Min, Precision::exact(1));
814 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
815 second.set(Stat::Max, Precision::exact(2));
816 second = second.merge_ordered(
817 &first,
818 &DType::Primitive(PType::I32, Nullability::NonNullable),
819 );
820
821 let second_ref =
822 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
823 assert_eq!(
824 second_ref.get_as::<bool>(Stat::IsStrictSorted),
825 Some(Precision::inexact(false))
826 );
827 }
828
829 #[test]
830 fn merge_sortedness_only_one_sorted() {
831 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
832 first.set(Stat::Max, Precision::exact(1));
833 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
834 second.set(Stat::Min, Precision::exact(2));
835 first.merge_ordered(
836 &second,
837 &DType::Primitive(PType::I32, Nullability::NonNullable),
838 );
839
840 let second_ref =
841 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
842 assert_eq!(
843 second_ref.get_as::<bool>(Stat::IsStrictSorted),
844 Some(Precision::exact(false))
845 );
846 }
847
848 #[test]
849 fn merge_sortedness_missing_min() {
850 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
851 first.set(Stat::Max, Precision::exact(1));
852 let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
853 first = first.merge_ordered(
854 &second,
855 &DType::Primitive(PType::I32, Nullability::NonNullable),
856 );
857 assert!(first.get(Stat::IsStrictSorted).is_none());
858 }
859
860 #[test]
861 fn merge_sortedness_bound_min() {
862 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
863 first.set(Stat::Max, Precision::exact(1));
864 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
865 second.set(Stat::Min, Precision::inexact(2));
866 first = first.merge_ordered(
867 &second,
868 &DType::Primitive(PType::I32, Nullability::NonNullable),
869 );
870
871 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
872 assert_eq!(
873 first_ref.get_as::<bool>(Stat::IsStrictSorted),
874 Some(Precision::exact(true))
875 );
876 }
877
878 #[test]
879 fn merge_unordered() {
880 let array =
881 PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
882 let all_stats = all::<Stat>()
883 .filter(|s| !matches!(s, Stat::Sum))
884 .filter(|s| !matches!(s, Stat::NaNCount))
885 .collect_vec();
886 array
887 .statistics()
888 .compute_all(&all_stats, &mut LEGACY_SESSION.create_execution_ctx())
889 .unwrap();
890
891 let stats = array.statistics().to_owned();
892 for stat in &all_stats {
893 assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
894 }
895
896 let merged = stats.clone().merge_unordered(
897 &stats,
898 &DType::Primitive(PType::I32, Nullability::NonNullable),
899 );
900 for stat in &all_stats {
901 assert_eq!(
902 merged.get(*stat).is_some(),
903 stat.is_commutative(),
904 "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
905 )
906 }
907
908 let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
909 let stats_ref = stats.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
910
911 assert_eq!(
912 merged_ref.get_as::<i32>(Stat::Min),
913 stats_ref.get_as::<i32>(Stat::Min)
914 );
915 assert_eq!(
916 merged_ref.get_as::<i32>(Stat::Max),
917 stats_ref.get_as::<i32>(Stat::Max)
918 );
919 assert_eq!(
920 merged_ref.get_as::<u64>(Stat::NullCount).unwrap(),
921 stats_ref
922 .get_as::<u64>(Stat::NullCount)
923 .unwrap()
924 .map(|s| s * 2)
925 );
926 }
927
928 #[test]
929 fn merge_min_bound_same() {
930 let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
933 &StatsSet::of(Stat::Min, Precision::exact(5)),
934 &DType::Primitive(PType::I32, Nullability::NonNullable),
935 );
936 let merged_ref =
937 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
938 assert_eq!(
939 merged_ref.get_as::<i32>(Stat::Min),
940 Some(Precision::exact(5))
941 );
942 }
943
944 #[test]
945 fn merge_min_bound_bound_lower() {
946 let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
947 &StatsSet::of(Stat::Min, Precision::exact(5)),
948 &DType::Primitive(PType::I32, Nullability::NonNullable),
949 );
950 let merged_ref =
951 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
952 assert_eq!(
953 merged_ref.get_as::<i32>(Stat::Min),
954 Some(Precision::inexact(4))
955 );
956 }
957
958 #[test]
959 fn test_combine_is_constant() {
960 {
961 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
962 let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
963 let mut stats_ref =
964 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
965 stats_ref
966 .combine_bool_stat::<IsConstant>(
967 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
968 )
969 .unwrap();
970 assert_eq!(
971 stats_ref.get_as::<bool>(Stat::IsConstant),
972 Some(Precision::exact(true))
973 );
974 }
975
976 {
977 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
978 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
979 let mut stats_ref =
980 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
981 stats_ref
982 .combine_bool_stat::<IsConstant>(
983 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
984 )
985 .unwrap();
986 assert_eq!(
987 stats_ref.get_as::<bool>(Stat::IsConstant),
988 Some(Precision::exact(true))
989 );
990 }
991
992 {
993 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
994 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
995 let mut stats_ref =
996 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
997 stats_ref
998 .combine_bool_stat::<IsConstant>(
999 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
1000 )
1001 .unwrap();
1002 assert_eq!(
1003 stats_ref.get_as::<bool>(Stat::IsConstant),
1004 Some(Precision::exact(false))
1005 );
1006 }
1007 }
1008
1009 #[test]
1010 fn test_combine_sets_boolean_conflict() {
1011 let mut stats1 = StatsSet::from_iter([
1012 (Stat::IsConstant, Precision::exact(true)),
1013 (Stat::IsSorted, Precision::exact(true)),
1014 ]);
1015
1016 let stats2 = StatsSet::from_iter([
1017 (Stat::IsConstant, Precision::exact(false)),
1018 (Stat::IsSorted, Precision::exact(true)),
1019 ]);
1020
1021 let result = stats1.combine_sets(
1022 &stats2,
1023 &DType::Primitive(PType::I32, Nullability::NonNullable),
1024 );
1025 assert!(result.is_err());
1026 }
1027
1028 #[test]
1029 fn test_combine_sets_with_missing_stats() {
1030 let mut stats1 = StatsSet::from_iter([
1031 (Stat::Min, Precision::exact(42)),
1032 (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
1033 ]);
1034
1035 let stats2 = StatsSet::from_iter([
1036 (Stat::Max, Precision::exact(100)),
1037 (Stat::IsStrictSorted, Precision::exact(true)),
1038 ]);
1039
1040 stats1
1041 .combine_sets(
1042 &stats2,
1043 &DType::Primitive(PType::I32, Nullability::NonNullable),
1044 )
1045 .unwrap();
1046
1047 let stats_ref =
1048 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1049
1050 assert_eq!(
1052 stats_ref.get_as::<i32>(Stat::Min),
1053 Some(Precision::exact(42))
1054 );
1055 assert_eq!(
1057 stats_ref.get_as::<i32>(Stat::Max),
1058 Some(Precision::exact(100))
1059 );
1060 assert_eq!(
1062 stats_ref.get_as::<bool>(Stat::IsStrictSorted),
1063 Some(Precision::exact(true))
1064 );
1065 }
1066
1067 #[test]
1068 fn test_combine_sets_with_inexact() {
1069 let mut stats1 = StatsSet::from_iter([
1070 (Stat::Min, Precision::exact(42)),
1071 (Stat::Max, Precision::inexact(100)),
1072 (Stat::IsConstant, Precision::exact(false)),
1073 ]);
1074
1075 let stats2 = StatsSet::from_iter([
1076 (Stat::Min, Precision::inexact(40)),
1078 (Stat::Max, Precision::exact(90)),
1079 (Stat::IsSorted, Precision::exact(true)),
1080 ]);
1081
1082 stats1
1083 .combine_sets(
1084 &stats2,
1085 &DType::Primitive(PType::I32, Nullability::NonNullable),
1086 )
1087 .unwrap();
1088
1089 let stats_ref =
1090 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1091
1092 assert_eq!(
1094 stats_ref.get_as::<i32>(Stat::Min),
1095 Some(Precision::exact(42))
1096 );
1097 assert_eq!(
1099 stats_ref.get_as::<i32>(Stat::Max),
1100 Some(Precision::exact(90))
1101 );
1102 assert_eq!(
1104 stats_ref.get_as::<bool>(Stat::IsSorted),
1105 Some(Precision::exact(true))
1106 );
1107 }
1108}