1use std::fmt::Debug;
5
6use enum_iterator::Sequence;
7use enum_iterator::all;
8use num_traits::CheckedAdd;
9use vortex_error::VortexError;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_err;
13use vortex_error::vortex_panic;
14
15use crate::dtype::DType;
16use crate::expr::stats::IsConstant;
17use crate::expr::stats::IsSorted;
18use crate::expr::stats::IsStrictSorted;
19use crate::expr::stats::Max;
20use crate::expr::stats::Min;
21use crate::expr::stats::NaNCount;
22use crate::expr::stats::NullCount;
23use crate::expr::stats::Precision;
24use crate::expr::stats::Stat;
25use crate::expr::stats::StatBound;
26use crate::expr::stats::StatType;
27use crate::expr::stats::StatsProvider;
28use crate::expr::stats::StatsProviderExt;
29use crate::expr::stats::Sum;
30use crate::expr::stats::UncompressedSizeInBytes;
31use crate::scalar::Scalar;
32use crate::scalar::ScalarValue;
33
34#[derive(Default, Debug, Clone)]
35pub struct StatsSet {
36 values: Vec<(Stat, Precision<ScalarValue>)>,
37}
38
39impl StatsSet {
40 pub unsafe fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
46 Self { values }
47 }
48
49 pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
51 unsafe { Self::new_unchecked(vec![(stat, value)]) }
53 }
54
55 fn reserve_full_capacity(&mut self) {
56 if self.values.capacity() < Stat::CARDINALITY {
57 self.values
58 .reserve_exact(Stat::CARDINALITY - self.values.capacity());
59 }
60 }
61
62 pub fn as_mut_typed_ref<'a, 'b>(&'a mut self, dtype: &'b DType) -> MutTypedStatsSetRef<'a, 'b> {
64 MutTypedStatsSetRef {
65 values: self,
66 dtype,
67 }
68 }
69
70 pub fn as_typed_ref<'a, 'b>(&'a self, dtype: &'b DType) -> TypedStatsSetRef<'a, 'b> {
72 TypedStatsSetRef {
73 values: self,
74 dtype,
75 }
76 }
77}
78
79impl StatsSet {
81 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
83 self.reserve_full_capacity();
84
85 if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
86 *existing = (stat, value);
87 } else {
88 self.values.push((stat, value));
89 }
90 }
91
92 pub fn clear(&mut self, stat: Stat) {
94 self.values.retain(|(s, _)| *s != stat);
95 }
96
97 pub fn retain_only(&mut self, stats: &[Stat]) {
99 self.values.retain(|(s, _)| stats.contains(s));
100 }
101
102 pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
106 self.values.iter()
107 }
108
109 pub fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
111 self.values
112 .iter()
113 .find(|(s, _)| *s == stat)
114 .map(|(_, v)| v.clone())
115 }
116
117 pub fn len(&self) -> usize {
119 self.values.len()
120 }
121
122 pub fn is_empty(&self) -> bool {
124 self.values.is_empty()
125 }
126
127 pub fn get_as<T: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
129 &self,
130 stat: Stat,
131 dtype: &DType,
132 ) -> Option<Precision<T>> {
133 self.get(stat).map(|v| {
134 v.map(|v| {
135 T::try_from(
136 &Scalar::try_new(dtype.clone(), Some(v))
137 .vortex_expect("failed to construct a scalar statistic"),
138 )
139 .unwrap_or_else(|err| {
140 vortex_panic!(
141 err,
142 "Failed to get stat {} as {}",
143 stat,
144 std::any::type_name::<T>()
145 )
146 })
147 })
148 })
149 }
150}
151
152pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
158
159impl Iterator for StatsSetIntoIter {
160 type Item = (Stat, Precision<ScalarValue>);
161
162 fn next(&mut self) -> Option<Self::Item> {
163 self.0.next()
164 }
165}
166
167impl IntoIterator for StatsSet {
168 type Item = (Stat, Precision<ScalarValue>);
169 type IntoIter = StatsSetIntoIter;
170
171 fn into_iter(self) -> Self::IntoIter {
172 StatsSetIntoIter(self.values.into_iter())
173 }
174}
175
176impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
177 fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
178 let iter = iter.into_iter();
179 let mut values = Vec::default();
180 values.reserve_exact(Stat::CARDINALITY);
181
182 let mut this = Self { values };
183 this.extend(iter);
184 this
185 }
186}
187
188impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
189 #[inline]
190 fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
191 let iter = iter.into_iter();
192 self.reserve_full_capacity();
193
194 iter.for_each(|(stat, value)| self.set(stat, value));
195 }
196}
197
198impl StatsSet {
200 pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
203 self.as_mut_typed_ref(dtype)
204 .merge_ordered(&other.as_typed_ref(dtype));
205 self
206 }
207
208 pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
211 self.as_mut_typed_ref(dtype)
212 .merge_unordered(&other.as_typed_ref(dtype));
213 self
214 }
215
216 pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
218 self.as_mut_typed_ref(dtype)
219 .combine_sets(&other.as_typed_ref(dtype))
220 }
221}
222
223pub struct TypedStatsSetRef<'a, 'b> {
224 pub values: &'a StatsSet,
225 pub dtype: &'b DType,
226}
227
228impl StatsProvider for TypedStatsSetRef<'_, '_> {
229 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
230 self.values.get(stat).map(|p| {
231 p.map(|sv| {
232 Scalar::try_new(
233 stat.dtype(self.dtype)
234 .vortex_expect("Must have valid dtype if value is present"),
235 Some(sv),
236 )
237 .vortex_expect("failed to construct a scalar statistic")
238 })
239 })
240 }
241
242 fn len(&self) -> usize {
243 self.values.len()
244 }
245}
246
247pub struct MutTypedStatsSetRef<'a, 'b> {
248 pub values: &'a mut StatsSet,
249 pub dtype: &'b DType,
250}
251
252impl MutTypedStatsSetRef<'_, '_> {
253 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
255 self.values.set(stat, value);
256 }
257
258 pub fn clear(&mut self, stat: Stat) {
260 self.values.clear(stat);
261 }
262}
263
264impl StatsProvider for MutTypedStatsSetRef<'_, '_> {
265 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
266 self.values.get(stat).map(|p| {
267 p.map(|sv| {
268 Scalar::try_new(
269 stat.dtype(self.dtype)
270 .vortex_expect("Must have valid dtype if value is present"),
271 Some(sv),
272 )
273 .vortex_expect("failed to construct a scalar statistic")
274 })
275 })
276 }
277
278 fn len(&self) -> usize {
279 self.values.len()
280 }
281}
282
283impl MutTypedStatsSetRef<'_, '_> {
285 pub fn merge_ordered(mut self, other: &TypedStatsSetRef) -> Self {
288 for s in all::<Stat>() {
289 match s {
290 Stat::IsConstant => self.merge_is_constant(other),
291 Stat::IsSorted => self.merge_is_sorted(other),
292 Stat::IsStrictSorted => self.merge_is_strict_sorted(other),
293 Stat::Max => self.merge_max(other),
294 Stat::Min => self.merge_min(other),
295 Stat::Sum => self.merge_sum(other),
296 Stat::NullCount => self.merge_null_count(other),
297 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
298 Stat::NaNCount => self.merge_nan_count(other),
299 }
300 }
301
302 self
303 }
304
305 pub fn merge_unordered(mut self, other: &TypedStatsSetRef) -> Self {
308 for s in all::<Stat>() {
309 if !s.is_commutative() {
310 self.clear(s);
311 continue;
312 }
313
314 match s {
315 Stat::IsConstant => self.merge_is_constant(other),
316 Stat::Max => self.merge_max(other),
317 Stat::Min => self.merge_min(other),
318 Stat::Sum => self.merge_sum(other),
319 Stat::NullCount => self.merge_null_count(other),
320 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
321 Stat::IsSorted | Stat::IsStrictSorted => {
322 unreachable!("not commutative")
323 }
324 Stat::NaNCount => self.merge_nan_count(other),
325 }
326 }
327
328 self
329 }
330
331 pub fn combine_sets(&mut self, other: &TypedStatsSetRef) -> VortexResult<()> {
333 let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
334 for s in other_stats {
335 match s {
336 Stat::Max => self.combine_bound::<Max>(other)?,
337 Stat::Min => self.combine_bound::<Min>(other)?,
338 Stat::UncompressedSizeInBytes => {
339 self.combine_bound::<UncompressedSizeInBytes>(other)?
340 }
341 Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
342 Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
343 Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
344 Stat::NullCount => self.combine_bound::<NullCount>(other)?,
345 Stat::Sum => self.combine_bound::<Sum>(other)?,
346 Stat::NaNCount => self.combine_bound::<NaNCount>(other)?,
347 }
348 }
349 Ok(())
350 }
351
352 fn combine_bound<S: StatType<Scalar>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
353 where
354 S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
355 {
356 match (self.get_scalar_bound::<S>(), other.get_scalar_bound::<S>()) {
357 (Some(m1), Some(m2)) => {
358 let meet = m1
359 .intersection(&m2)
360 .vortex_expect("can always compare scalar")
361 .ok_or_else(|| {
362 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
363 })?;
364 if meet != m1 {
365 self.set(
366 S::STAT,
367 meet.into_value().map(|s| {
368 s.into_value()
369 .vortex_expect("stat scalar value cannot be null")
370 }),
371 );
372 }
373 }
374 (None, Some(m)) => self.set(
375 S::STAT,
376 m.into_value().map(|s| {
377 s.into_value()
378 .vortex_expect("stat scalar value cannot be null")
379 }),
380 ),
381 (Some(_), _) => (),
382 (None, None) => self.clear(S::STAT),
383 }
384 Ok(())
385 }
386
387 fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
388 where
389 S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
390 {
391 match (
392 self.get_as_bound::<S, bool>(),
393 other.get_as_bound::<S, bool>(),
394 ) {
395 (Some(m1), Some(m2)) => {
396 let intersection = m1
397 .intersection(&m2)
398 .vortex_expect("can always compare boolean")
399 .ok_or_else(|| {
400 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
401 })?;
402 if intersection != m1 {
403 self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
404 }
405 }
406 (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
407 (Some(_), None) => (),
408 (None, None) => self.clear(S::STAT),
409 }
410 Ok(())
411 }
412
413 fn merge_min(&mut self, other: &TypedStatsSetRef) {
414 match (
415 self.get_scalar_bound::<Min>(),
416 other.get_scalar_bound::<Min>(),
417 ) {
418 (Some(m1), Some(m2)) => {
419 let meet = m1.union(&m2).vortex_expect("can compare scalar");
420 if meet != m1 {
421 self.set(
422 Stat::Min,
423 meet.into_value().map(|s| {
424 s.into_value()
425 .vortex_expect("stat scalar value cannot be null")
426 }),
427 );
428 }
429 }
430 _ => self.clear(Stat::Min),
431 }
432 }
433
434 fn merge_max(&mut self, other: &TypedStatsSetRef) {
435 match (
436 self.get_scalar_bound::<Max>(),
437 other.get_scalar_bound::<Max>(),
438 ) {
439 (Some(m1), Some(m2)) => {
440 let meet = m1.union(&m2).vortex_expect("can compare scalar");
441 if meet != m1 {
442 self.set(
443 Stat::Max,
444 meet.into_value().map(|s| {
445 s.into_value()
446 .vortex_expect("stat scalar value cannot be null")
447 }),
448 );
449 }
450 }
451 _ => self.clear(Stat::Max),
452 }
453 }
454
455 fn merge_sum(&mut self, other: &TypedStatsSetRef) {
456 match (
457 self.get_scalar_bound::<Sum>(),
458 other.get_scalar_bound::<Sum>(),
459 ) {
460 (Some(m1), Some(m2)) => {
461 if let Some(scalar_value) =
463 m1.zip(m2).as_exact().and_then(|(s1, s2)| match s1.dtype() {
464 DType::Primitive(..) => s1
465 .as_primitive()
466 .checked_add(&s2.as_primitive())
467 .and_then(|pscalar| pscalar.pvalue().map(ScalarValue::Primitive)),
468 DType::Decimal(..) => s1
469 .as_decimal()
470 .checked_binary_numeric(
471 &s2.as_decimal(),
472 crate::scalar::NumericOperator::Add,
473 )
474 .map(|scalar| {
475 ScalarValue::Decimal(
476 scalar
477 .decimal_value()
478 .vortex_expect("no decimal value in scalar"),
479 )
480 }),
481 _ => None,
482 })
483 {
484 self.set(Stat::Sum, Precision::Exact(scalar_value));
485 }
486 }
487 _ => self.clear(Stat::Sum),
488 }
489 }
490
491 fn merge_is_constant(&mut self, other: &TypedStatsSetRef) {
492 let self_const = self.get_as(Stat::IsConstant);
493 let other_const = other.get_as(Stat::IsConstant);
494 let self_min = self.get(Stat::Min);
495 let other_min = other.get(Stat::Min);
496
497 if let (
498 Some(Precision::Exact(self_const)),
499 Some(Precision::Exact(other_const)),
500 Some(Precision::Exact(self_min)),
501 Some(Precision::Exact(other_min)),
502 ) = (self_const, other_const, self_min, other_min)
503 {
504 if self_const && other_const && self_min == other_min {
505 self.set(Stat::IsConstant, Precision::exact(true));
506 } else {
507 self.set(Stat::IsConstant, Precision::inexact(false));
508 }
509 }
510 self.set(Stat::IsConstant, Precision::exact(false));
511 }
512
513 fn merge_is_sorted(&mut self, other: &TypedStatsSetRef) {
514 self.merge_sortedness_stat(other, Stat::IsSorted, PartialOrd::le)
515 }
516
517 fn merge_is_strict_sorted(&mut self, other: &TypedStatsSetRef) {
518 self.merge_sortedness_stat(other, Stat::IsStrictSorted, PartialOrd::lt)
519 }
520
521 fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
522 &mut self,
523 other: &TypedStatsSetRef,
524 stat: Stat,
525 cmp: F,
526 ) {
527 if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
528 == (self.get_as(stat), other.get_as(stat))
529 {
530 if let (Some(self_max), Some(other_min)) = (
535 self.get_scalar_bound::<Max>(),
536 other.get_scalar_bound::<Min>(),
537 ) {
538 return if cmp(&self_max.max_value(), &other_min.min_value()) {
539 } else {
541 self.set(stat, Precision::inexact(false));
542 };
543 }
544 }
545 self.clear(stat);
546 }
547
548 fn merge_null_count(&mut self, other: &TypedStatsSetRef) {
549 self.merge_sum_stat(Stat::NullCount, other)
550 }
551
552 fn merge_nan_count(&mut self, other: &TypedStatsSetRef) {
553 self.merge_sum_stat(Stat::NaNCount, other)
554 }
555
556 fn merge_uncompressed_size_in_bytes(&mut self, other: &TypedStatsSetRef) {
557 self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
558 }
559
560 fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) {
561 match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
562 (Some(nc1), Some(nc2)) => {
563 self.set(
564 stat,
565 nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
566 );
567 }
568 _ => self.clear(stat),
569 }
570 }
571}
572
573#[cfg(test)]
574mod test {
575 use enum_iterator::all;
576 use itertools::Itertools;
577
578 use crate::LEGACY_SESSION;
579 use crate::VortexSessionExecute;
580 use crate::arrays::PrimitiveArray;
581 use crate::dtype::DType;
582 use crate::dtype::Nullability;
583 use crate::dtype::PType;
584 use crate::expr::stats::IsConstant;
585 use crate::expr::stats::Precision;
586 use crate::expr::stats::Stat;
587 use crate::expr::stats::StatsProvider;
588 use crate::expr::stats::StatsProviderExt;
589 use crate::stats::StatsSet;
590 use crate::stats::stats_set::Scalar;
591
592 #[test]
593 fn test_iter() {
594 let set = unsafe {
596 StatsSet::new_unchecked(vec![
597 (Stat::Max, Precision::exact(100)),
598 (Stat::Min, Precision::exact(42)),
599 ])
600 };
601 let mut iter = set.iter();
602 let first = iter.next().unwrap().clone();
603 assert_eq!(first.0, Stat::Max);
604 assert_eq!(
605 first.1.map(
606 |f| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(f)).unwrap()).unwrap()
607 ),
608 Precision::exact(100)
609 );
610 let snd = iter.next().unwrap().clone();
611 assert_eq!(snd.0, Stat::Min);
612 assert_eq!(
613 snd.1.map(
614 |s| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(s)).unwrap()).unwrap()
615 ),
616 Precision::exact(42)
617 );
618 }
619
620 #[test]
621 fn into_iter() {
622 let mut set = unsafe {
624 StatsSet::new_unchecked(vec![
625 (Stat::Max, Precision::exact(100)),
626 (Stat::Min, Precision::exact(42)),
627 ])
628 }
629 .into_iter();
630 let (stat, first) = set.next().unwrap();
631 assert_eq!(stat, Stat::Max);
632 assert_eq!(
633 first.map(
634 |f| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(f)).unwrap()).unwrap()
635 ),
636 Precision::exact(100)
637 );
638 let snd = set.next().unwrap();
639 assert_eq!(snd.0, Stat::Min);
640 assert_eq!(
641 snd.1.map(
642 |s| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(s)).unwrap()).unwrap()
643 ),
644 Precision::exact(42)
645 );
646 }
647
648 #[test]
649 fn merge_constant() {
650 let first = StatsSet::from_iter([
651 (Stat::Min, Precision::exact(42)),
652 (Stat::IsConstant, Precision::exact(true)),
653 ])
654 .merge_ordered(
655 &StatsSet::from_iter([
656 (Stat::Min, Precision::inexact(42)),
657 (Stat::IsConstant, Precision::exact(true)),
658 ]),
659 &DType::Primitive(PType::I32, Nullability::NonNullable),
660 );
661
662 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
663 assert_eq!(
664 first_ref.get_as::<bool>(Stat::IsConstant),
665 Some(Precision::exact(false))
666 );
667 assert_eq!(
668 first_ref.get_as::<i32>(Stat::Min),
669 Some(Precision::exact(42))
670 );
671 }
672
673 #[test]
674 fn merge_into_min() {
675 let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
676 &StatsSet::default(),
677 &DType::Primitive(PType::I32, Nullability::NonNullable),
678 );
679
680 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
681 assert!(first_ref.get(Stat::Min).is_none());
682 }
683
684 #[test]
685 fn merge_from_min() {
686 let first = StatsSet::default().merge_ordered(
687 &StatsSet::of(Stat::Min, Precision::exact(42)),
688 &DType::Primitive(PType::I32, Nullability::NonNullable),
689 );
690
691 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
692 assert!(first_ref.get(Stat::Min).is_none());
693 }
694
695 #[test]
696 fn merge_mins() {
697 let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
698 &StatsSet::of(Stat::Min, Precision::exact(42)),
699 &DType::Primitive(PType::I32, Nullability::NonNullable),
700 );
701
702 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
703 assert_eq!(
704 first_ref.get_as::<i32>(Stat::Min),
705 Some(Precision::exact(37))
706 );
707 }
708
709 #[test]
710 fn merge_into_bound_max() {
711 let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
712 &StatsSet::default(),
713 &DType::Primitive(PType::I32, Nullability::NonNullable),
714 );
715 assert!(first.get(Stat::Max).is_none());
716 }
717
718 #[test]
719 fn merge_from_max() {
720 let first = StatsSet::default().merge_ordered(
721 &StatsSet::of(Stat::Max, Precision::exact(42)),
722 &DType::Primitive(PType::I32, Nullability::NonNullable),
723 );
724 assert!(first.get(Stat::Max).is_none());
725 }
726
727 #[test]
728 fn merge_maxes() {
729 let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
730 &StatsSet::of(Stat::Max, Precision::exact(42)),
731 &DType::Primitive(PType::I32, Nullability::NonNullable),
732 );
733 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
734 assert_eq!(
735 first_ref.get_as::<i32>(Stat::Max),
736 Some(Precision::exact(42))
737 );
738 }
739
740 #[test]
741 fn merge_maxes_bound() {
742 let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
743 let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
744 .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
745 let first_ref = first.as_typed_ref(&dtype);
746 assert_eq!(
747 first_ref.get_as::<i32>(Stat::Max),
748 Some(Precision::inexact(43))
749 );
750 }
751
752 #[test]
753 fn merge_into_scalar() {
754 let first = StatsSet::of(Stat::Sum, Precision::exact(42i64)).merge_ordered(
757 &StatsSet::default(),
758 &DType::Primitive(PType::I32, Nullability::NonNullable),
759 );
760 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
761 assert!(first_ref.get(Stat::Sum).is_none());
762 }
763
764 #[test]
765 fn merge_from_scalar() {
766 let first = StatsSet::default().merge_ordered(
769 &StatsSet::of(Stat::Sum, Precision::exact(42i64)),
770 &DType::Primitive(PType::I32, Nullability::NonNullable),
771 );
772 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
773 assert!(first_ref.get(Stat::Sum).is_none());
774 }
775
776 #[test]
777 fn merge_scalars() {
778 let first = StatsSet::of(Stat::Sum, Precision::exact(37i64)).merge_ordered(
781 &StatsSet::of(Stat::Sum, Precision::exact(42i64)),
782 &DType::Primitive(PType::I32, Nullability::NonNullable),
783 );
784 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
785 assert_eq!(
786 first_ref.get_as::<i64>(Stat::Sum),
787 Some(Precision::exact(79i64))
788 );
789 }
790
791 #[test]
792 fn merge_into_sortedness() {
793 let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
794 &StatsSet::default(),
795 &DType::Primitive(PType::I32, Nullability::NonNullable),
796 );
797 assert!(first.get(Stat::IsStrictSorted).is_none());
798 }
799
800 #[test]
801 fn merge_from_sortedness() {
802 let first = StatsSet::default().merge_ordered(
803 &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
804 &DType::Primitive(PType::I32, Nullability::NonNullable),
805 );
806 assert!(first.get(Stat::IsStrictSorted).is_none());
807 }
808
809 #[test]
810 fn merge_sortedness() {
811 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
812 first.set(Stat::Max, Precision::exact(1));
813 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
814 second.set(Stat::Min, Precision::exact(2));
815 first = first.merge_ordered(
816 &second,
817 &DType::Primitive(PType::I32, Nullability::NonNullable),
818 );
819
820 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
821 assert_eq!(
822 first_ref.get_as::<bool>(Stat::IsStrictSorted),
823 Some(Precision::exact(true))
824 );
825 }
826
827 #[test]
828 fn merge_sortedness_out_of_order() {
829 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
830 first.set(Stat::Min, Precision::exact(1));
831 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
832 second.set(Stat::Max, Precision::exact(2));
833 second = second.merge_ordered(
834 &first,
835 &DType::Primitive(PType::I32, Nullability::NonNullable),
836 );
837
838 let second_ref =
839 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
840 assert_eq!(
841 second_ref.get_as::<bool>(Stat::IsStrictSorted),
842 Some(Precision::inexact(false))
843 );
844 }
845
846 #[test]
847 fn merge_sortedness_only_one_sorted() {
848 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
849 first.set(Stat::Max, Precision::exact(1));
850 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
851 second.set(Stat::Min, Precision::exact(2));
852 first.merge_ordered(
853 &second,
854 &DType::Primitive(PType::I32, Nullability::NonNullable),
855 );
856
857 let second_ref =
858 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
859 assert_eq!(
860 second_ref.get_as::<bool>(Stat::IsStrictSorted),
861 Some(Precision::exact(false))
862 );
863 }
864
865 #[test]
866 fn merge_sortedness_missing_min() {
867 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
868 first.set(Stat::Max, Precision::exact(1));
869 let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
870 first = first.merge_ordered(
871 &second,
872 &DType::Primitive(PType::I32, Nullability::NonNullable),
873 );
874 assert!(first.get(Stat::IsStrictSorted).is_none());
875 }
876
877 #[test]
878 fn merge_sortedness_bound_min() {
879 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
880 first.set(Stat::Max, Precision::exact(1));
881 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
882 second.set(Stat::Min, Precision::inexact(2));
883 first = first.merge_ordered(
884 &second,
885 &DType::Primitive(PType::I32, Nullability::NonNullable),
886 );
887
888 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
889 assert_eq!(
890 first_ref.get_as::<bool>(Stat::IsStrictSorted),
891 Some(Precision::exact(true))
892 );
893 }
894
895 #[test]
896 fn merge_unordered() {
897 let array =
898 PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
899 let all_stats = all::<Stat>()
900 .filter(|s| !matches!(s, Stat::Sum))
901 .filter(|s| !matches!(s, Stat::NaNCount))
902 .collect_vec();
903 array
904 .statistics()
905 .compute_all(&all_stats, &mut LEGACY_SESSION.create_execution_ctx())
906 .unwrap();
907
908 let stats = array.statistics().to_owned();
909 for stat in &all_stats {
910 assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
911 }
912
913 let merged = stats.clone().merge_unordered(
914 &stats,
915 &DType::Primitive(PType::I32, Nullability::NonNullable),
916 );
917 for stat in &all_stats {
918 assert_eq!(
919 merged.get(*stat).is_some(),
920 stat.is_commutative(),
921 "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
922 )
923 }
924
925 let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
926 let stats_ref = stats.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
927
928 assert_eq!(
929 merged_ref.get_as::<i32>(Stat::Min),
930 stats_ref.get_as::<i32>(Stat::Min)
931 );
932 assert_eq!(
933 merged_ref.get_as::<i32>(Stat::Max),
934 stats_ref.get_as::<i32>(Stat::Max)
935 );
936 assert_eq!(
937 merged_ref.get_as::<u64>(Stat::NullCount).unwrap(),
938 stats_ref
939 .get_as::<u64>(Stat::NullCount)
940 .unwrap()
941 .map(|s| s * 2)
942 );
943 }
944
945 #[test]
946 fn merge_min_bound_same() {
947 let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
950 &StatsSet::of(Stat::Min, Precision::exact(5)),
951 &DType::Primitive(PType::I32, Nullability::NonNullable),
952 );
953 let merged_ref =
954 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
955 assert_eq!(
956 merged_ref.get_as::<i32>(Stat::Min),
957 Some(Precision::exact(5))
958 );
959 }
960
961 #[test]
962 fn merge_min_bound_bound_lower() {
963 let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
964 &StatsSet::of(Stat::Min, Precision::exact(5)),
965 &DType::Primitive(PType::I32, Nullability::NonNullable),
966 );
967 let merged_ref =
968 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
969 assert_eq!(
970 merged_ref.get_as::<i32>(Stat::Min),
971 Some(Precision::inexact(4))
972 );
973 }
974
975 #[test]
976 fn test_combine_is_constant() {
977 {
978 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
979 let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
980 let mut stats_ref =
981 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
982 stats_ref
983 .combine_bool_stat::<IsConstant>(
984 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
985 )
986 .unwrap();
987 assert_eq!(
988 stats_ref.get_as::<bool>(Stat::IsConstant),
989 Some(Precision::exact(true))
990 );
991 }
992
993 {
994 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
995 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
996 let mut stats_ref =
997 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
998 stats_ref
999 .combine_bool_stat::<IsConstant>(
1000 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
1001 )
1002 .unwrap();
1003 assert_eq!(
1004 stats_ref.get_as::<bool>(Stat::IsConstant),
1005 Some(Precision::exact(true))
1006 );
1007 }
1008
1009 {
1010 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
1011 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
1012 let mut stats_ref =
1013 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1014 stats_ref
1015 .combine_bool_stat::<IsConstant>(
1016 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
1017 )
1018 .unwrap();
1019 assert_eq!(
1020 stats_ref.get_as::<bool>(Stat::IsConstant),
1021 Some(Precision::exact(false))
1022 );
1023 }
1024 }
1025
1026 #[test]
1027 fn test_combine_sets_boolean_conflict() {
1028 let mut stats1 = StatsSet::from_iter([
1029 (Stat::IsConstant, Precision::exact(true)),
1030 (Stat::IsSorted, Precision::exact(true)),
1031 ]);
1032
1033 let stats2 = StatsSet::from_iter([
1034 (Stat::IsConstant, Precision::exact(false)),
1035 (Stat::IsSorted, Precision::exact(true)),
1036 ]);
1037
1038 let result = stats1.combine_sets(
1039 &stats2,
1040 &DType::Primitive(PType::I32, Nullability::NonNullable),
1041 );
1042 assert!(result.is_err());
1043 }
1044
1045 #[test]
1046 fn test_combine_sets_with_missing_stats() {
1047 let mut stats1 = StatsSet::from_iter([
1048 (Stat::Min, Precision::exact(42)),
1049 (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
1050 ]);
1051
1052 let stats2 = StatsSet::from_iter([
1053 (Stat::Max, Precision::exact(100)),
1054 (Stat::IsStrictSorted, Precision::exact(true)),
1055 ]);
1056
1057 stats1
1058 .combine_sets(
1059 &stats2,
1060 &DType::Primitive(PType::I32, Nullability::NonNullable),
1061 )
1062 .unwrap();
1063
1064 let stats_ref =
1065 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1066
1067 assert_eq!(
1069 stats_ref.get_as::<i32>(Stat::Min),
1070 Some(Precision::exact(42))
1071 );
1072 assert_eq!(
1074 stats_ref.get_as::<i32>(Stat::Max),
1075 Some(Precision::exact(100))
1076 );
1077 assert_eq!(
1079 stats_ref.get_as::<bool>(Stat::IsStrictSorted),
1080 Some(Precision::exact(true))
1081 );
1082 }
1083
1084 #[test]
1085 fn test_combine_sets_with_inexact() {
1086 let mut stats1 = StatsSet::from_iter([
1087 (Stat::Min, Precision::exact(42)),
1088 (Stat::Max, Precision::inexact(100)),
1089 (Stat::IsConstant, Precision::exact(false)),
1090 ]);
1091
1092 let stats2 = StatsSet::from_iter([
1093 (Stat::Min, Precision::inexact(40)),
1095 (Stat::Max, Precision::exact(90)),
1096 (Stat::IsSorted, Precision::exact(true)),
1097 ]);
1098
1099 stats1
1100 .combine_sets(
1101 &stats2,
1102 &DType::Primitive(PType::I32, Nullability::NonNullable),
1103 )
1104 .unwrap();
1105
1106 let stats_ref =
1107 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1108
1109 assert_eq!(
1111 stats_ref.get_as::<i32>(Stat::Min),
1112 Some(Precision::exact(42))
1113 );
1114 assert_eq!(
1116 stats_ref.get_as::<i32>(Stat::Max),
1117 Some(Precision::exact(90))
1118 );
1119 assert_eq!(
1121 stats_ref.get_as::<bool>(Stat::IsSorted),
1122 Some(Precision::exact(true))
1123 );
1124 }
1125}