1use std::fmt::Debug;
5
6use enum_iterator::all;
7use num_traits::CheckedAdd;
8use smallvec::SmallVec;
9use smallvec::smallvec;
10use vortex_error::VortexError;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_err;
14use vortex_error::vortex_panic;
15
16use crate::dtype::DType;
17use crate::expr::stats::IsConstant;
18use crate::expr::stats::IsSorted;
19use crate::expr::stats::IsStrictSorted;
20use crate::expr::stats::Max;
21use crate::expr::stats::Min;
22use crate::expr::stats::NaNCount;
23use crate::expr::stats::NullCount;
24use crate::expr::stats::Precision;
25use crate::expr::stats::Stat;
26use crate::expr::stats::StatBound;
27use crate::expr::stats::StatType;
28use crate::expr::stats::StatsProvider;
29use crate::expr::stats::StatsProviderExt;
30use crate::expr::stats::Sum;
31use crate::expr::stats::UncompressedSizeInBytes;
32use crate::scalar::Scalar;
33use crate::scalar::ScalarValue;
34
35pub type StatsArray = [(Stat, Precision<ScalarValue>); 4];
37
38#[derive(Default, Debug, Clone)]
39pub struct StatsSet {
40 values: SmallVec<StatsArray>,
41}
42
43impl StatsSet {
44 pub unsafe fn new_unchecked(values: SmallVec<StatsArray>) -> Self {
50 Self { values }
51 }
52
53 pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
55 Self {
56 values: smallvec![(stat, value)],
57 }
58 }
59
60 pub fn as_mut_typed_ref<'a, 'b>(&'a mut self, dtype: &'b DType) -> MutTypedStatsSetRef<'a, 'b> {
62 MutTypedStatsSetRef {
63 values: self,
64 dtype,
65 }
66 }
67
68 pub fn as_typed_ref<'a, 'b>(&'a self, dtype: &'b DType) -> TypedStatsSetRef<'a, 'b> {
70 TypedStatsSetRef {
71 values: self,
72 dtype,
73 }
74 }
75}
76
77impl StatsSet {
79 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
81 if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
82 *existing = (stat, value);
83 } else {
84 self.values.push((stat, value));
85 }
86 }
87
88 pub fn clear(&mut self, stat: Stat) {
90 self.values.retain(|(s, _)| *s != stat);
91 }
92
93 pub fn retain_only(&mut self, stats: &[Stat]) {
95 self.values.retain(|(s, _)| stats.contains(s));
96 }
97
98 pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
102 self.values.iter()
103 }
104
105 pub fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
107 self.values
108 .iter()
109 .find(|(s, _)| *s == stat)
110 .map(|(_, v)| v.clone())
111 }
112
113 pub fn len(&self) -> usize {
115 self.values.len()
116 }
117
118 pub fn is_empty(&self) -> bool {
120 self.values.is_empty()
121 }
122
123 pub fn get_as<T: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
125 &self,
126 stat: Stat,
127 dtype: &DType,
128 ) -> Option<Precision<T>> {
129 self.get(stat).map(|v| {
130 v.map(|v| {
131 T::try_from(
132 &Scalar::try_new(dtype.clone(), Some(v))
133 .vortex_expect("failed to construct a scalar statistic"),
134 )
135 .unwrap_or_else(|err| {
136 vortex_panic!(
137 err,
138 "Failed to get stat {} as {}",
139 stat,
140 std::any::type_name::<T>()
141 )
142 })
143 })
144 })
145 }
146}
147
148pub struct StatsSetIntoIter(smallvec::IntoIter<StatsArray>);
154
155impl Iterator for StatsSetIntoIter {
156 type Item = (Stat, Precision<ScalarValue>);
157
158 fn next(&mut self) -> Option<Self::Item> {
159 self.0.next()
160 }
161}
162
163impl IntoIterator for StatsSet {
164 type Item = (Stat, Precision<ScalarValue>);
165 type IntoIter = StatsSetIntoIter;
166
167 fn into_iter(self) -> Self::IntoIter {
168 StatsSetIntoIter(self.values.into_iter())
169 }
170}
171
172impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
173 fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
174 let iter = iter.into_iter();
175
176 let mut this = Self {
177 values: SmallVec::new(),
178 };
179 this.extend(iter);
180 this
181 }
182}
183
184impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
185 #[inline]
186 fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
187 iter.into_iter()
188 .for_each(|(stat, value)| self.set(stat, value));
189 }
190}
191
192impl StatsSet {
194 pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
197 self.as_mut_typed_ref(dtype)
198 .merge_ordered(&other.as_typed_ref(dtype));
199 self
200 }
201
202 pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
205 self.as_mut_typed_ref(dtype)
206 .merge_unordered(&other.as_typed_ref(dtype));
207 self
208 }
209
210 pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
212 self.as_mut_typed_ref(dtype)
213 .combine_sets(&other.as_typed_ref(dtype))
214 }
215}
216
217pub struct TypedStatsSetRef<'a, 'b> {
218 pub values: &'a StatsSet,
219 pub dtype: &'b DType,
220}
221
222impl StatsProvider for TypedStatsSetRef<'_, '_> {
223 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
224 self.values.get(stat).map(|p| {
225 p.map(|sv| {
226 Scalar::try_new(
227 stat.dtype(self.dtype)
228 .vortex_expect("Must have valid dtype if value is present"),
229 Some(sv),
230 )
231 .vortex_expect("failed to construct a scalar statistic")
232 })
233 })
234 }
235
236 fn len(&self) -> usize {
237 self.values.len()
238 }
239}
240
241pub struct MutTypedStatsSetRef<'a, 'b> {
242 pub values: &'a mut StatsSet,
243 pub dtype: &'b DType,
244}
245
246impl MutTypedStatsSetRef<'_, '_> {
247 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
249 self.values.set(stat, value);
250 }
251
252 pub fn clear(&mut self, stat: Stat) {
254 self.values.clear(stat);
255 }
256}
257
258impl StatsProvider for MutTypedStatsSetRef<'_, '_> {
259 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
260 self.values.get(stat).map(|p| {
261 p.map(|sv| {
262 Scalar::try_new(
263 stat.dtype(self.dtype)
264 .vortex_expect("Must have valid dtype if value is present"),
265 Some(sv),
266 )
267 .vortex_expect("failed to construct a scalar statistic")
268 })
269 })
270 }
271
272 fn len(&self) -> usize {
273 self.values.len()
274 }
275}
276
277impl MutTypedStatsSetRef<'_, '_> {
279 pub fn merge_ordered(mut self, other: &TypedStatsSetRef) -> Self {
282 for s in all::<Stat>() {
283 match s {
284 Stat::IsConstant => self.merge_is_constant(other),
285 Stat::IsSorted => self.merge_is_sorted(other),
286 Stat::IsStrictSorted => self.merge_is_strict_sorted(other),
287 Stat::Max => self.merge_max(other),
288 Stat::Min => self.merge_min(other),
289 Stat::Sum => self.merge_sum(other),
290 Stat::NullCount => self.merge_null_count(other),
291 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
292 Stat::NaNCount => self.merge_nan_count(other),
293 }
294 }
295
296 self
297 }
298
299 pub fn merge_unordered(mut self, other: &TypedStatsSetRef) -> Self {
302 for s in all::<Stat>() {
303 if !s.is_commutative() {
304 self.clear(s);
305 continue;
306 }
307
308 match s {
309 Stat::IsConstant => self.merge_is_constant(other),
310 Stat::Max => self.merge_max(other),
311 Stat::Min => self.merge_min(other),
312 Stat::Sum => self.merge_sum(other),
313 Stat::NullCount => self.merge_null_count(other),
314 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
315 Stat::IsSorted | Stat::IsStrictSorted => {
316 unreachable!("not commutative")
317 }
318 Stat::NaNCount => self.merge_nan_count(other),
319 }
320 }
321
322 self
323 }
324
325 pub fn combine_sets(&mut self, other: &TypedStatsSetRef) -> VortexResult<()> {
327 let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
328 for s in other_stats {
329 match s {
330 Stat::Max => self.combine_bound::<Max>(other)?,
331 Stat::Min => self.combine_bound::<Min>(other)?,
332 Stat::UncompressedSizeInBytes => {
333 self.combine_bound::<UncompressedSizeInBytes>(other)?
334 }
335 Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
336 Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
337 Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
338 Stat::NullCount => self.combine_bound::<NullCount>(other)?,
339 Stat::Sum => self.combine_bound::<Sum>(other)?,
340 Stat::NaNCount => self.combine_bound::<NaNCount>(other)?,
341 }
342 }
343 Ok(())
344 }
345
346 fn combine_bound<S: StatType<Scalar>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
347 where
348 S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
349 {
350 match (self.get_scalar_bound::<S>(), other.get_scalar_bound::<S>()) {
351 (Some(m1), Some(m2)) => {
352 let meet = m1
353 .intersection(&m2)
354 .vortex_expect("can always compare scalar")
355 .ok_or_else(|| {
356 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
357 })?;
358 if meet != m1 {
359 self.set(
360 S::STAT,
361 meet.into_value().map(|s| {
362 s.into_value()
363 .vortex_expect("stat scalar value cannot be null")
364 }),
365 );
366 }
367 }
368 (None, Some(m)) => self.set(
369 S::STAT,
370 m.into_value().map(|s| {
371 s.into_value()
372 .vortex_expect("stat scalar value cannot be null")
373 }),
374 ),
375 (Some(_), _) => (),
376 (None, None) => self.clear(S::STAT),
377 }
378 Ok(())
379 }
380
381 fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
382 where
383 S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
384 {
385 match (
386 self.get_as_bound::<S, bool>(),
387 other.get_as_bound::<S, bool>(),
388 ) {
389 (Some(m1), Some(m2)) => {
390 let intersection = m1
391 .intersection(&m2)
392 .vortex_expect("can always compare boolean")
393 .ok_or_else(|| {
394 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
395 })?;
396 if intersection != m1 {
397 self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
398 }
399 }
400 (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
401 (Some(_), None) => (),
402 (None, None) => self.clear(S::STAT),
403 }
404 Ok(())
405 }
406
407 fn merge_min(&mut self, other: &TypedStatsSetRef) {
408 match (
409 self.get_scalar_bound::<Min>(),
410 other.get_scalar_bound::<Min>(),
411 ) {
412 (Some(m1), Some(m2)) => {
413 let meet = m1.union(&m2).vortex_expect("can compare scalar");
414 if meet != m1 {
415 self.set(
416 Stat::Min,
417 meet.into_value().map(|s| {
418 s.into_value()
419 .vortex_expect("stat scalar value cannot be null")
420 }),
421 );
422 }
423 }
424 _ => self.clear(Stat::Min),
425 }
426 }
427
428 fn merge_max(&mut self, other: &TypedStatsSetRef) {
429 match (
430 self.get_scalar_bound::<Max>(),
431 other.get_scalar_bound::<Max>(),
432 ) {
433 (Some(m1), Some(m2)) => {
434 let meet = m1.union(&m2).vortex_expect("can compare scalar");
435 if meet != m1 {
436 self.set(
437 Stat::Max,
438 meet.into_value().map(|s| {
439 s.into_value()
440 .vortex_expect("stat scalar value cannot be null")
441 }),
442 );
443 }
444 }
445 _ => self.clear(Stat::Max),
446 }
447 }
448
449 fn merge_sum(&mut self, other: &TypedStatsSetRef) {
450 match (
451 self.get_scalar_bound::<Sum>(),
452 other.get_scalar_bound::<Sum>(),
453 ) {
454 (Some(m1), Some(m2)) => {
455 if let Some(scalar_value) =
457 m1.zip(m2).as_exact().and_then(|(s1, s2)| match s1.dtype() {
458 DType::Primitive(..) => s1
459 .as_primitive()
460 .checked_add(&s2.as_primitive())
461 .and_then(|pscalar| pscalar.pvalue().map(ScalarValue::Primitive)),
462 DType::Decimal(..) => s1
463 .as_decimal()
464 .checked_binary_numeric(
465 &s2.as_decimal(),
466 crate::scalar::NumericOperator::Add,
467 )
468 .map(|scalar| {
469 ScalarValue::Decimal(
470 scalar
471 .decimal_value()
472 .vortex_expect("no decimal value in scalar"),
473 )
474 }),
475 _ => None,
476 })
477 {
478 self.set(Stat::Sum, Precision::Exact(scalar_value));
479 }
480 }
481 _ => self.clear(Stat::Sum),
482 }
483 }
484
485 fn merge_is_constant(&mut self, other: &TypedStatsSetRef) {
486 let self_const = self.get_as(Stat::IsConstant);
487 let other_const = other.get_as(Stat::IsConstant);
488 let self_min = self.get(Stat::Min);
489 let other_min = other.get(Stat::Min);
490
491 if let (
492 Some(Precision::Exact(self_const)),
493 Some(Precision::Exact(other_const)),
494 Some(Precision::Exact(self_min)),
495 Some(Precision::Exact(other_min)),
496 ) = (self_const, other_const, self_min, other_min)
497 {
498 if self_const && other_const && self_min == other_min {
499 self.set(Stat::IsConstant, Precision::exact(true));
500 } else {
501 self.set(Stat::IsConstant, Precision::inexact(false));
502 }
503 }
504 self.set(Stat::IsConstant, Precision::exact(false));
505 }
506
507 fn merge_is_sorted(&mut self, other: &TypedStatsSetRef) {
508 self.merge_sortedness_stat(other, Stat::IsSorted, PartialOrd::le)
509 }
510
511 fn merge_is_strict_sorted(&mut self, other: &TypedStatsSetRef) {
512 self.merge_sortedness_stat(other, Stat::IsStrictSorted, PartialOrd::lt)
513 }
514
515 fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
516 &mut self,
517 other: &TypedStatsSetRef,
518 stat: Stat,
519 cmp: F,
520 ) {
521 if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
522 == (self.get_as(stat), other.get_as(stat))
523 {
524 if let (Some(self_max), Some(other_min)) = (
529 self.get_scalar_bound::<Max>(),
530 other.get_scalar_bound::<Min>(),
531 ) {
532 return if cmp(&self_max.max_value(), &other_min.min_value()) {
533 } else {
535 self.set(stat, Precision::inexact(false));
536 };
537 }
538 }
539 self.clear(stat);
540 }
541
542 fn merge_null_count(&mut self, other: &TypedStatsSetRef) {
543 self.merge_sum_stat(Stat::NullCount, other)
544 }
545
546 fn merge_nan_count(&mut self, other: &TypedStatsSetRef) {
547 self.merge_sum_stat(Stat::NaNCount, other)
548 }
549
550 fn merge_uncompressed_size_in_bytes(&mut self, other: &TypedStatsSetRef) {
551 self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
552 }
553
554 fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) {
555 match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
556 (Some(nc1), Some(nc2)) => {
557 self.set(
558 stat,
559 nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
560 );
561 }
562 _ => self.clear(stat),
563 }
564 }
565}
566
567#[cfg(test)]
568mod test {
569 use enum_iterator::all;
570 use itertools::Itertools;
571 use smallvec::smallvec;
572
573 use crate::LEGACY_SESSION;
574 use crate::VortexSessionExecute;
575 use crate::arrays::PrimitiveArray;
576 use crate::dtype::DType;
577 use crate::dtype::Nullability;
578 use crate::dtype::PType;
579 use crate::expr::stats::IsConstant;
580 use crate::expr::stats::Precision;
581 use crate::expr::stats::Stat;
582 use crate::expr::stats::StatsProvider;
583 use crate::expr::stats::StatsProviderExt;
584 use crate::stats::StatsSet;
585 use crate::stats::stats_set::Scalar;
586
587 #[test]
588 fn test_iter() {
589 let set = unsafe {
591 StatsSet::new_unchecked(smallvec![
592 (Stat::Max, Precision::exact(100)),
593 (Stat::Min, Precision::exact(42)),
594 ])
595 };
596 let mut iter = set.iter();
597 let first = iter.next().unwrap().clone();
598 assert_eq!(first.0, Stat::Max);
599 assert_eq!(
600 first.1.map(
601 |f| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(f)).unwrap()).unwrap()
602 ),
603 Precision::exact(100)
604 );
605 let snd = iter.next().unwrap().clone();
606 assert_eq!(snd.0, Stat::Min);
607 assert_eq!(
608 snd.1.map(
609 |s| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(s)).unwrap()).unwrap()
610 ),
611 Precision::exact(42)
612 );
613 }
614
615 #[test]
616 fn into_iter() {
617 let mut set = unsafe {
619 StatsSet::new_unchecked(smallvec![
620 (Stat::Max, Precision::exact(100)),
621 (Stat::Min, Precision::exact(42)),
622 ])
623 }
624 .into_iter();
625 let (stat, first) = set.next().unwrap();
626 assert_eq!(stat, Stat::Max);
627 assert_eq!(
628 first.map(
629 |f| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(f)).unwrap()).unwrap()
630 ),
631 Precision::exact(100)
632 );
633 let snd = set.next().unwrap();
634 assert_eq!(snd.0, Stat::Min);
635 assert_eq!(
636 snd.1.map(
637 |s| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(s)).unwrap()).unwrap()
638 ),
639 Precision::exact(42)
640 );
641 }
642
643 #[test]
644 fn merge_constant() {
645 let first = StatsSet::from_iter([
646 (Stat::Min, Precision::exact(42)),
647 (Stat::IsConstant, Precision::exact(true)),
648 ])
649 .merge_ordered(
650 &StatsSet::from_iter([
651 (Stat::Min, Precision::inexact(42)),
652 (Stat::IsConstant, Precision::exact(true)),
653 ]),
654 &DType::Primitive(PType::I32, Nullability::NonNullable),
655 );
656
657 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
658 assert_eq!(
659 first_ref.get_as::<bool>(Stat::IsConstant),
660 Some(Precision::exact(false))
661 );
662 assert_eq!(
663 first_ref.get_as::<i32>(Stat::Min),
664 Some(Precision::exact(42))
665 );
666 }
667
668 #[test]
669 fn merge_into_min() {
670 let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
671 &StatsSet::default(),
672 &DType::Primitive(PType::I32, Nullability::NonNullable),
673 );
674
675 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
676 assert!(first_ref.get(Stat::Min).is_none());
677 }
678
679 #[test]
680 fn merge_from_min() {
681 let first = StatsSet::default().merge_ordered(
682 &StatsSet::of(Stat::Min, Precision::exact(42)),
683 &DType::Primitive(PType::I32, Nullability::NonNullable),
684 );
685
686 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
687 assert!(first_ref.get(Stat::Min).is_none());
688 }
689
690 #[test]
691 fn merge_mins() {
692 let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
693 &StatsSet::of(Stat::Min, Precision::exact(42)),
694 &DType::Primitive(PType::I32, Nullability::NonNullable),
695 );
696
697 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
698 assert_eq!(
699 first_ref.get_as::<i32>(Stat::Min),
700 Some(Precision::exact(37))
701 );
702 }
703
704 #[test]
705 fn merge_into_bound_max() {
706 let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
707 &StatsSet::default(),
708 &DType::Primitive(PType::I32, Nullability::NonNullable),
709 );
710 assert!(first.get(Stat::Max).is_none());
711 }
712
713 #[test]
714 fn merge_from_max() {
715 let first = StatsSet::default().merge_ordered(
716 &StatsSet::of(Stat::Max, Precision::exact(42)),
717 &DType::Primitive(PType::I32, Nullability::NonNullable),
718 );
719 assert!(first.get(Stat::Max).is_none());
720 }
721
722 #[test]
723 fn merge_maxes() {
724 let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
725 &StatsSet::of(Stat::Max, Precision::exact(42)),
726 &DType::Primitive(PType::I32, Nullability::NonNullable),
727 );
728 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
729 assert_eq!(
730 first_ref.get_as::<i32>(Stat::Max),
731 Some(Precision::exact(42))
732 );
733 }
734
735 #[test]
736 fn merge_maxes_bound() {
737 let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
738 let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
739 .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
740 let first_ref = first.as_typed_ref(&dtype);
741 assert_eq!(
742 first_ref.get_as::<i32>(Stat::Max),
743 Some(Precision::inexact(43))
744 );
745 }
746
747 #[test]
748 fn merge_into_scalar() {
749 let first = StatsSet::of(Stat::Sum, Precision::exact(42i64)).merge_ordered(
752 &StatsSet::default(),
753 &DType::Primitive(PType::I32, Nullability::NonNullable),
754 );
755 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
756 assert!(first_ref.get(Stat::Sum).is_none());
757 }
758
759 #[test]
760 fn merge_from_scalar() {
761 let first = StatsSet::default().merge_ordered(
764 &StatsSet::of(Stat::Sum, Precision::exact(42i64)),
765 &DType::Primitive(PType::I32, Nullability::NonNullable),
766 );
767 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
768 assert!(first_ref.get(Stat::Sum).is_none());
769 }
770
771 #[test]
772 fn merge_scalars() {
773 let first = StatsSet::of(Stat::Sum, Precision::exact(37i64)).merge_ordered(
776 &StatsSet::of(Stat::Sum, Precision::exact(42i64)),
777 &DType::Primitive(PType::I32, Nullability::NonNullable),
778 );
779 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
780 assert_eq!(
781 first_ref.get_as::<i64>(Stat::Sum),
782 Some(Precision::exact(79i64))
783 );
784 }
785
786 #[test]
787 fn merge_into_sortedness() {
788 let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
789 &StatsSet::default(),
790 &DType::Primitive(PType::I32, Nullability::NonNullable),
791 );
792 assert!(first.get(Stat::IsStrictSorted).is_none());
793 }
794
795 #[test]
796 fn merge_from_sortedness() {
797 let first = StatsSet::default().merge_ordered(
798 &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
799 &DType::Primitive(PType::I32, Nullability::NonNullable),
800 );
801 assert!(first.get(Stat::IsStrictSorted).is_none());
802 }
803
804 #[test]
805 fn merge_sortedness() {
806 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
807 first.set(Stat::Max, Precision::exact(1));
808 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
809 second.set(Stat::Min, Precision::exact(2));
810 first = first.merge_ordered(
811 &second,
812 &DType::Primitive(PType::I32, Nullability::NonNullable),
813 );
814
815 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
816 assert_eq!(
817 first_ref.get_as::<bool>(Stat::IsStrictSorted),
818 Some(Precision::exact(true))
819 );
820 }
821
822 #[test]
823 fn merge_sortedness_out_of_order() {
824 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
825 first.set(Stat::Min, Precision::exact(1));
826 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
827 second.set(Stat::Max, Precision::exact(2));
828 second = second.merge_ordered(
829 &first,
830 &DType::Primitive(PType::I32, Nullability::NonNullable),
831 );
832
833 let second_ref =
834 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
835 assert_eq!(
836 second_ref.get_as::<bool>(Stat::IsStrictSorted),
837 Some(Precision::inexact(false))
838 );
839 }
840
841 #[test]
842 fn merge_sortedness_only_one_sorted() {
843 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
844 first.set(Stat::Max, Precision::exact(1));
845 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
846 second.set(Stat::Min, Precision::exact(2));
847 first.merge_ordered(
848 &second,
849 &DType::Primitive(PType::I32, Nullability::NonNullable),
850 );
851
852 let second_ref =
853 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
854 assert_eq!(
855 second_ref.get_as::<bool>(Stat::IsStrictSorted),
856 Some(Precision::exact(false))
857 );
858 }
859
860 #[test]
861 fn merge_sortedness_missing_min() {
862 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
863 first.set(Stat::Max, Precision::exact(1));
864 let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
865 first = first.merge_ordered(
866 &second,
867 &DType::Primitive(PType::I32, Nullability::NonNullable),
868 );
869 assert!(first.get(Stat::IsStrictSorted).is_none());
870 }
871
872 #[test]
873 fn merge_sortedness_bound_min() {
874 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
875 first.set(Stat::Max, Precision::exact(1));
876 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
877 second.set(Stat::Min, Precision::inexact(2));
878 first = first.merge_ordered(
879 &second,
880 &DType::Primitive(PType::I32, Nullability::NonNullable),
881 );
882
883 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
884 assert_eq!(
885 first_ref.get_as::<bool>(Stat::IsStrictSorted),
886 Some(Precision::exact(true))
887 );
888 }
889
890 #[test]
891 fn merge_unordered() {
892 let array =
893 PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
894 let all_stats = all::<Stat>()
895 .filter(|s| !matches!(s, Stat::Sum))
896 .filter(|s| !matches!(s, Stat::NaNCount))
897 .collect_vec();
898 array
899 .statistics()
900 .compute_all(&all_stats, &mut LEGACY_SESSION.create_execution_ctx())
901 .unwrap();
902
903 let stats = array.statistics().to_owned();
904 for stat in &all_stats {
905 assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
906 }
907
908 let merged = stats.clone().merge_unordered(
909 &stats,
910 &DType::Primitive(PType::I32, Nullability::NonNullable),
911 );
912 for stat in &all_stats {
913 assert_eq!(
914 merged.get(*stat).is_some(),
915 stat.is_commutative(),
916 "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
917 )
918 }
919
920 let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
921 let stats_ref = stats.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
922
923 assert_eq!(
924 merged_ref.get_as::<i32>(Stat::Min),
925 stats_ref.get_as::<i32>(Stat::Min)
926 );
927 assert_eq!(
928 merged_ref.get_as::<i32>(Stat::Max),
929 stats_ref.get_as::<i32>(Stat::Max)
930 );
931 assert_eq!(
932 merged_ref.get_as::<u64>(Stat::NullCount).unwrap(),
933 stats_ref
934 .get_as::<u64>(Stat::NullCount)
935 .unwrap()
936 .map(|s| s * 2)
937 );
938 }
939
940 #[test]
941 fn merge_min_bound_same() {
942 let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
945 &StatsSet::of(Stat::Min, Precision::exact(5)),
946 &DType::Primitive(PType::I32, Nullability::NonNullable),
947 );
948 let merged_ref =
949 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
950 assert_eq!(
951 merged_ref.get_as::<i32>(Stat::Min),
952 Some(Precision::exact(5))
953 );
954 }
955
956 #[test]
957 fn merge_min_bound_bound_lower() {
958 let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
959 &StatsSet::of(Stat::Min, Precision::exact(5)),
960 &DType::Primitive(PType::I32, Nullability::NonNullable),
961 );
962 let merged_ref =
963 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
964 assert_eq!(
965 merged_ref.get_as::<i32>(Stat::Min),
966 Some(Precision::inexact(4))
967 );
968 }
969
970 #[test]
971 fn test_combine_is_constant() {
972 {
973 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
974 let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
975 let mut stats_ref =
976 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
977 stats_ref
978 .combine_bool_stat::<IsConstant>(
979 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
980 )
981 .unwrap();
982 assert_eq!(
983 stats_ref.get_as::<bool>(Stat::IsConstant),
984 Some(Precision::exact(true))
985 );
986 }
987
988 {
989 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
990 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
991 let mut stats_ref =
992 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
993 stats_ref
994 .combine_bool_stat::<IsConstant>(
995 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
996 )
997 .unwrap();
998 assert_eq!(
999 stats_ref.get_as::<bool>(Stat::IsConstant),
1000 Some(Precision::exact(true))
1001 );
1002 }
1003
1004 {
1005 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
1006 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
1007 let mut stats_ref =
1008 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1009 stats_ref
1010 .combine_bool_stat::<IsConstant>(
1011 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
1012 )
1013 .unwrap();
1014 assert_eq!(
1015 stats_ref.get_as::<bool>(Stat::IsConstant),
1016 Some(Precision::exact(false))
1017 );
1018 }
1019 }
1020
1021 #[test]
1022 fn test_combine_sets_boolean_conflict() {
1023 let mut stats1 = StatsSet::from_iter([
1024 (Stat::IsConstant, Precision::exact(true)),
1025 (Stat::IsSorted, Precision::exact(true)),
1026 ]);
1027
1028 let stats2 = StatsSet::from_iter([
1029 (Stat::IsConstant, Precision::exact(false)),
1030 (Stat::IsSorted, Precision::exact(true)),
1031 ]);
1032
1033 let result = stats1.combine_sets(
1034 &stats2,
1035 &DType::Primitive(PType::I32, Nullability::NonNullable),
1036 );
1037 assert!(result.is_err());
1038 }
1039
1040 #[test]
1041 fn test_combine_sets_with_missing_stats() {
1042 let mut stats1 = StatsSet::from_iter([
1043 (Stat::Min, Precision::exact(42)),
1044 (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
1045 ]);
1046
1047 let stats2 = StatsSet::from_iter([
1048 (Stat::Max, Precision::exact(100)),
1049 (Stat::IsStrictSorted, Precision::exact(true)),
1050 ]);
1051
1052 stats1
1053 .combine_sets(
1054 &stats2,
1055 &DType::Primitive(PType::I32, Nullability::NonNullable),
1056 )
1057 .unwrap();
1058
1059 let stats_ref =
1060 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1061
1062 assert_eq!(
1064 stats_ref.get_as::<i32>(Stat::Min),
1065 Some(Precision::exact(42))
1066 );
1067 assert_eq!(
1069 stats_ref.get_as::<i32>(Stat::Max),
1070 Some(Precision::exact(100))
1071 );
1072 assert_eq!(
1074 stats_ref.get_as::<bool>(Stat::IsStrictSorted),
1075 Some(Precision::exact(true))
1076 );
1077 }
1078
1079 #[test]
1080 fn test_combine_sets_with_inexact() {
1081 let mut stats1 = StatsSet::from_iter([
1082 (Stat::Min, Precision::exact(42)),
1083 (Stat::Max, Precision::inexact(100)),
1084 (Stat::IsConstant, Precision::exact(false)),
1085 ]);
1086
1087 let stats2 = StatsSet::from_iter([
1088 (Stat::Min, Precision::inexact(40)),
1090 (Stat::Max, Precision::exact(90)),
1091 (Stat::IsSorted, Precision::exact(true)),
1092 ]);
1093
1094 stats1
1095 .combine_sets(
1096 &stats2,
1097 &DType::Primitive(PType::I32, Nullability::NonNullable),
1098 )
1099 .unwrap();
1100
1101 let stats_ref =
1102 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1103
1104 assert_eq!(
1106 stats_ref.get_as::<i32>(Stat::Min),
1107 Some(Precision::exact(42))
1108 );
1109 assert_eq!(
1111 stats_ref.get_as::<i32>(Stat::Max),
1112 Some(Precision::exact(90))
1113 );
1114 assert_eq!(
1116 stats_ref.get_as::<bool>(Stat::IsSorted),
1117 Some(Precision::exact(true))
1118 );
1119 }
1120}