1use std::fmt::Debug;
5
6use enum_iterator::Sequence;
7use enum_iterator::all;
8use num_traits::CheckedAdd;
9use vortex_dtype::DType;
10use vortex_error::VortexError;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_err;
14use vortex_error::vortex_panic;
15
16use crate::expr::stats::IsConstant;
17use crate::expr::stats::IsSorted;
18use crate::expr::stats::IsStrictSorted;
19use crate::expr::stats::Max;
20use crate::expr::stats::Min;
21use crate::expr::stats::NaNCount;
22use crate::expr::stats::NullCount;
23use crate::expr::stats::Precision;
24use crate::expr::stats::Stat;
25use crate::expr::stats::StatBound;
26use crate::expr::stats::StatType;
27use crate::expr::stats::StatsProvider;
28use crate::expr::stats::StatsProviderExt;
29use crate::expr::stats::Sum;
30use crate::expr::stats::UncompressedSizeInBytes;
31use crate::scalar::Scalar;
32use crate::scalar::ScalarValue;
33
34#[derive(Default, Debug, Clone)]
35pub struct StatsSet {
36 values: Vec<(Stat, Precision<ScalarValue>)>,
37}
38
39impl StatsSet {
40 pub unsafe fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
46 Self { values }
47 }
48
49 pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
51 unsafe { Self::new_unchecked(vec![(stat, value)]) }
53 }
54
55 fn reserve_full_capacity(&mut self) {
56 if self.values.capacity() < Stat::CARDINALITY {
57 self.values
58 .reserve_exact(Stat::CARDINALITY - self.values.capacity());
59 }
60 }
61
62 pub fn as_mut_typed_ref<'a, 'b>(&'a mut self, dtype: &'b DType) -> MutTypedStatsSetRef<'a, 'b> {
64 MutTypedStatsSetRef {
65 values: self,
66 dtype,
67 }
68 }
69
70 pub fn as_typed_ref<'a, 'b>(&'a self, dtype: &'b DType) -> TypedStatsSetRef<'a, 'b> {
72 TypedStatsSetRef {
73 values: self,
74 dtype,
75 }
76 }
77}
78
79impl StatsSet {
81 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
83 self.reserve_full_capacity();
84
85 if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
86 *existing = (stat, value);
87 } else {
88 self.values.push((stat, value));
89 }
90 }
91
92 pub fn clear(&mut self, stat: Stat) {
94 self.values.retain(|(s, _)| *s != stat);
95 }
96
97 pub fn retain_only(&mut self, stats: &[Stat]) {
99 self.values.retain(|(s, _)| stats.contains(s));
100 }
101
102 pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
106 self.values.iter()
107 }
108
109 pub fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
111 self.values
112 .iter()
113 .find(|(s, _)| *s == stat)
114 .map(|(_, v)| v.clone())
115 }
116
117 pub fn len(&self) -> usize {
119 self.values.len()
120 }
121
122 pub fn is_empty(&self) -> bool {
124 self.values.is_empty()
125 }
126
127 pub fn get_as<T: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
129 &self,
130 stat: Stat,
131 dtype: &DType,
132 ) -> Option<Precision<T>> {
133 self.get(stat).map(|v| {
134 v.map(|v| {
135 T::try_from(
136 &Scalar::try_new(dtype.clone(), Some(v))
137 .vortex_expect("failed to construct a scalar statistic"),
138 )
139 .unwrap_or_else(|err| {
140 vortex_panic!(
141 err,
142 "Failed to get stat {} as {}",
143 stat,
144 std::any::type_name::<T>()
145 )
146 })
147 })
148 })
149 }
150}
151
152pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
158
159impl Iterator for StatsSetIntoIter {
160 type Item = (Stat, Precision<ScalarValue>);
161
162 fn next(&mut self) -> Option<Self::Item> {
163 self.0.next()
164 }
165}
166
167impl IntoIterator for StatsSet {
168 type Item = (Stat, Precision<ScalarValue>);
169 type IntoIter = StatsSetIntoIter;
170
171 fn into_iter(self) -> Self::IntoIter {
172 StatsSetIntoIter(self.values.into_iter())
173 }
174}
175
176impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
177 fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
178 let iter = iter.into_iter();
179 let mut values = Vec::default();
180 values.reserve_exact(Stat::CARDINALITY);
181
182 let mut this = Self { values };
183 this.extend(iter);
184 this
185 }
186}
187
188impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
189 #[inline]
190 fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
191 let iter = iter.into_iter();
192 self.reserve_full_capacity();
193
194 iter.for_each(|(stat, value)| self.set(stat, value));
195 }
196}
197
198impl StatsSet {
200 pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
203 self.as_mut_typed_ref(dtype)
204 .merge_ordered(&other.as_typed_ref(dtype));
205 self
206 }
207
208 pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
211 self.as_mut_typed_ref(dtype)
212 .merge_unordered(&other.as_typed_ref(dtype));
213 self
214 }
215
216 pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
218 self.as_mut_typed_ref(dtype)
219 .combine_sets(&other.as_typed_ref(dtype))
220 }
221}
222
223pub struct TypedStatsSetRef<'a, 'b> {
224 pub values: &'a StatsSet,
225 pub dtype: &'b DType,
226}
227
228impl StatsProvider for TypedStatsSetRef<'_, '_> {
229 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
230 self.values.get(stat).map(|p| {
231 p.map(|sv| {
232 Scalar::try_new(
233 stat.dtype(self.dtype)
234 .vortex_expect("Must have valid dtype if value is present"),
235 Some(sv),
236 )
237 .vortex_expect("failed to construct a scalar statistic")
238 })
239 })
240 }
241
242 fn len(&self) -> usize {
243 self.values.len()
244 }
245}
246
247pub struct MutTypedStatsSetRef<'a, 'b> {
248 pub values: &'a mut StatsSet,
249 pub dtype: &'b DType,
250}
251
252impl MutTypedStatsSetRef<'_, '_> {
253 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
255 self.values.set(stat, value);
256 }
257
258 pub fn clear(&mut self, stat: Stat) {
260 self.values.clear(stat);
261 }
262}
263
264impl StatsProvider for MutTypedStatsSetRef<'_, '_> {
265 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
266 self.values.get(stat).map(|p| {
267 p.map(|sv| {
268 Scalar::try_new(
269 stat.dtype(self.dtype)
270 .vortex_expect("Must have valid dtype if value is present"),
271 Some(sv),
272 )
273 .vortex_expect("failed to construct a scalar statistic")
274 })
275 })
276 }
277
278 fn len(&self) -> usize {
279 self.values.len()
280 }
281}
282
283impl MutTypedStatsSetRef<'_, '_> {
285 pub fn merge_ordered(mut self, other: &TypedStatsSetRef) -> Self {
288 for s in all::<Stat>() {
289 match s {
290 Stat::IsConstant => self.merge_is_constant(other),
291 Stat::IsSorted => self.merge_is_sorted(other),
292 Stat::IsStrictSorted => self.merge_is_strict_sorted(other),
293 Stat::Max => self.merge_max(other),
294 Stat::Min => self.merge_min(other),
295 Stat::Sum => self.merge_sum(other),
296 Stat::NullCount => self.merge_null_count(other),
297 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
298 Stat::NaNCount => self.merge_nan_count(other),
299 }
300 }
301
302 self
303 }
304
305 pub fn merge_unordered(mut self, other: &TypedStatsSetRef) -> Self {
308 for s in all::<Stat>() {
309 if !s.is_commutative() {
310 self.clear(s);
311 continue;
312 }
313
314 match s {
315 Stat::IsConstant => self.merge_is_constant(other),
316 Stat::Max => self.merge_max(other),
317 Stat::Min => self.merge_min(other),
318 Stat::Sum => self.merge_sum(other),
319 Stat::NullCount => self.merge_null_count(other),
320 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
321 Stat::IsSorted | Stat::IsStrictSorted => {
322 unreachable!("not commutative")
323 }
324 Stat::NaNCount => self.merge_nan_count(other),
325 }
326 }
327
328 self
329 }
330
331 pub fn combine_sets(&mut self, other: &TypedStatsSetRef) -> VortexResult<()> {
333 let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
334 for s in other_stats {
335 match s {
336 Stat::Max => self.combine_bound::<Max>(other)?,
337 Stat::Min => self.combine_bound::<Min>(other)?,
338 Stat::UncompressedSizeInBytes => {
339 self.combine_bound::<UncompressedSizeInBytes>(other)?
340 }
341 Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
342 Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
343 Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
344 Stat::NullCount => self.combine_bound::<NullCount>(other)?,
345 Stat::Sum => self.combine_bound::<Sum>(other)?,
346 Stat::NaNCount => self.combine_bound::<NaNCount>(other)?,
347 }
348 }
349 Ok(())
350 }
351
352 fn combine_bound<S: StatType<Scalar>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
353 where
354 S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
355 {
356 match (self.get_scalar_bound::<S>(), other.get_scalar_bound::<S>()) {
357 (Some(m1), Some(m2)) => {
358 let meet = m1
359 .intersection(&m2)
360 .vortex_expect("can always compare scalar")
361 .ok_or_else(|| {
362 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
363 })?;
364 if meet != m1 {
365 self.set(
366 S::STAT,
367 meet.into_value().map(|s| {
368 s.into_value()
369 .vortex_expect("stat scalar value cannot be null")
370 }),
371 );
372 }
373 }
374 (None, Some(m)) => self.set(
375 S::STAT,
376 m.into_value().map(|s| {
377 s.into_value()
378 .vortex_expect("stat scalar value cannot be null")
379 }),
380 ),
381 (Some(_), _) => (),
382 (None, None) => self.clear(S::STAT),
383 }
384 Ok(())
385 }
386
387 fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
388 where
389 S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
390 {
391 match (
392 self.get_as_bound::<S, bool>(),
393 other.get_as_bound::<S, bool>(),
394 ) {
395 (Some(m1), Some(m2)) => {
396 let intersection = m1
397 .intersection(&m2)
398 .vortex_expect("can always compare boolean")
399 .ok_or_else(|| {
400 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
401 })?;
402 if intersection != m1 {
403 self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
404 }
405 }
406 (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
407 (Some(_), None) => (),
408 (None, None) => self.clear(S::STAT),
409 }
410 Ok(())
411 }
412
413 fn merge_min(&mut self, other: &TypedStatsSetRef) {
414 match (
415 self.get_scalar_bound::<Min>(),
416 other.get_scalar_bound::<Min>(),
417 ) {
418 (Some(m1), Some(m2)) => {
419 let meet = m1.union(&m2).vortex_expect("can compare scalar");
420 if meet != m1 {
421 self.set(
422 Stat::Min,
423 meet.into_value().map(|s| {
424 s.into_value()
425 .vortex_expect("stat scalar value cannot be null")
426 }),
427 );
428 }
429 }
430 _ => self.clear(Stat::Min),
431 }
432 }
433
434 fn merge_max(&mut self, other: &TypedStatsSetRef) {
435 match (
436 self.get_scalar_bound::<Max>(),
437 other.get_scalar_bound::<Max>(),
438 ) {
439 (Some(m1), Some(m2)) => {
440 let meet = m1.union(&m2).vortex_expect("can compare scalar");
441 if meet != m1 {
442 self.set(
443 Stat::Max,
444 meet.into_value().map(|s| {
445 s.into_value()
446 .vortex_expect("stat scalar value cannot be null")
447 }),
448 );
449 }
450 }
451 _ => self.clear(Stat::Max),
452 }
453 }
454
455 fn merge_sum(&mut self, other: &TypedStatsSetRef) {
456 match (
457 self.get_scalar_bound::<Sum>(),
458 other.get_scalar_bound::<Sum>(),
459 ) {
460 (Some(m1), Some(m2)) => {
461 if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
463 s1.as_primitive()
464 .checked_add(&s2.as_primitive())
465 .and_then(|pscalar| pscalar.pvalue().map(ScalarValue::Primitive))
466 }) {
467 self.set(Stat::Sum, Precision::Exact(scalar_value));
468 }
469 }
470 _ => self.clear(Stat::Sum),
471 }
472 }
473
474 fn merge_is_constant(&mut self, other: &TypedStatsSetRef) {
475 let self_const = self.get_as(Stat::IsConstant);
476 let other_const = other.get_as(Stat::IsConstant);
477 let self_min = self.get(Stat::Min);
478 let other_min = other.get(Stat::Min);
479
480 if let (
481 Some(Precision::Exact(self_const)),
482 Some(Precision::Exact(other_const)),
483 Some(Precision::Exact(self_min)),
484 Some(Precision::Exact(other_min)),
485 ) = (self_const, other_const, self_min, other_min)
486 {
487 if self_const && other_const && self_min == other_min {
488 self.set(Stat::IsConstant, Precision::exact(true));
489 } else {
490 self.set(Stat::IsConstant, Precision::inexact(false));
491 }
492 }
493 self.set(Stat::IsConstant, Precision::exact(false));
494 }
495
496 fn merge_is_sorted(&mut self, other: &TypedStatsSetRef) {
497 self.merge_sortedness_stat(other, Stat::IsSorted, PartialOrd::le)
498 }
499
500 fn merge_is_strict_sorted(&mut self, other: &TypedStatsSetRef) {
501 self.merge_sortedness_stat(other, Stat::IsStrictSorted, PartialOrd::lt)
502 }
503
504 fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
505 &mut self,
506 other: &TypedStatsSetRef,
507 stat: Stat,
508 cmp: F,
509 ) {
510 if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
511 == (self.get_as(stat), other.get_as(stat))
512 {
513 if let (Some(self_max), Some(other_min)) = (
518 self.get_scalar_bound::<Max>(),
519 other.get_scalar_bound::<Min>(),
520 ) {
521 return if cmp(&self_max.max_value(), &other_min.min_value()) {
522 } else {
524 self.set(stat, Precision::inexact(false));
525 };
526 }
527 }
528 self.clear(stat);
529 }
530
531 fn merge_null_count(&mut self, other: &TypedStatsSetRef) {
532 self.merge_sum_stat(Stat::NullCount, other)
533 }
534
535 fn merge_nan_count(&mut self, other: &TypedStatsSetRef) {
536 self.merge_sum_stat(Stat::NaNCount, other)
537 }
538
539 fn merge_uncompressed_size_in_bytes(&mut self, other: &TypedStatsSetRef) {
540 self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
541 }
542
543 fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) {
544 match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
545 (Some(nc1), Some(nc2)) => {
546 self.set(
547 stat,
548 nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
549 );
550 }
551 _ => self.clear(stat),
552 }
553 }
554}
555
556#[cfg(test)]
557mod test {
558 use enum_iterator::all;
559 use itertools::Itertools;
560 use vortex_dtype::DType;
561 use vortex_dtype::Nullability;
562 use vortex_dtype::PType;
563
564 use crate::arrays::PrimitiveArray;
565 use crate::expr::stats::IsConstant;
566 use crate::expr::stats::Precision;
567 use crate::expr::stats::Stat;
568 use crate::expr::stats::StatsProvider;
569 use crate::expr::stats::StatsProviderExt;
570 use crate::stats::StatsSet;
571 use crate::stats::stats_set::Scalar;
572
573 #[test]
574 fn test_iter() {
575 let set = unsafe {
577 StatsSet::new_unchecked(vec![
578 (Stat::Max, Precision::exact(100)),
579 (Stat::Min, Precision::exact(42)),
580 ])
581 };
582 let mut iter = set.iter();
583 let first = iter.next().unwrap().clone();
584 assert_eq!(first.0, Stat::Max);
585 assert_eq!(
586 first.1.map(
587 |f| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(f)).unwrap()).unwrap()
588 ),
589 Precision::exact(100)
590 );
591 let snd = iter.next().unwrap().clone();
592 assert_eq!(snd.0, Stat::Min);
593 assert_eq!(
594 snd.1.map(
595 |s| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(s)).unwrap()).unwrap()
596 ),
597 Precision::exact(42)
598 );
599 }
600
601 #[test]
602 fn into_iter() {
603 let mut set = unsafe {
605 StatsSet::new_unchecked(vec![
606 (Stat::Max, Precision::exact(100)),
607 (Stat::Min, Precision::exact(42)),
608 ])
609 }
610 .into_iter();
611 let (stat, first) = set.next().unwrap();
612 assert_eq!(stat, Stat::Max);
613 assert_eq!(
614 first.map(
615 |f| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(f)).unwrap()).unwrap()
616 ),
617 Precision::exact(100)
618 );
619 let snd = set.next().unwrap();
620 assert_eq!(snd.0, Stat::Min);
621 assert_eq!(
622 snd.1.map(
623 |s| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(s)).unwrap()).unwrap()
624 ),
625 Precision::exact(42)
626 );
627 }
628
629 #[test]
630 fn merge_constant() {
631 let first = StatsSet::from_iter([
632 (Stat::Min, Precision::exact(42)),
633 (Stat::IsConstant, Precision::exact(true)),
634 ])
635 .merge_ordered(
636 &StatsSet::from_iter([
637 (Stat::Min, Precision::inexact(42)),
638 (Stat::IsConstant, Precision::exact(true)),
639 ]),
640 &DType::Primitive(PType::I32, Nullability::NonNullable),
641 );
642
643 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
644 assert_eq!(
645 first_ref.get_as::<bool>(Stat::IsConstant),
646 Some(Precision::exact(false))
647 );
648 assert_eq!(
649 first_ref.get_as::<i32>(Stat::Min),
650 Some(Precision::exact(42))
651 );
652 }
653
654 #[test]
655 fn merge_into_min() {
656 let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
657 &StatsSet::default(),
658 &DType::Primitive(PType::I32, Nullability::NonNullable),
659 );
660
661 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
662 assert!(first_ref.get(Stat::Min).is_none());
663 }
664
665 #[test]
666 fn merge_from_min() {
667 let first = StatsSet::default().merge_ordered(
668 &StatsSet::of(Stat::Min, Precision::exact(42)),
669 &DType::Primitive(PType::I32, Nullability::NonNullable),
670 );
671
672 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
673 assert!(first_ref.get(Stat::Min).is_none());
674 }
675
676 #[test]
677 fn merge_mins() {
678 let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
679 &StatsSet::of(Stat::Min, Precision::exact(42)),
680 &DType::Primitive(PType::I32, Nullability::NonNullable),
681 );
682
683 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
684 assert_eq!(
685 first_ref.get_as::<i32>(Stat::Min),
686 Some(Precision::exact(37))
687 );
688 }
689
690 #[test]
691 fn merge_into_bound_max() {
692 let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
693 &StatsSet::default(),
694 &DType::Primitive(PType::I32, Nullability::NonNullable),
695 );
696 assert!(first.get(Stat::Max).is_none());
697 }
698
699 #[test]
700 fn merge_from_max() {
701 let first = StatsSet::default().merge_ordered(
702 &StatsSet::of(Stat::Max, Precision::exact(42)),
703 &DType::Primitive(PType::I32, Nullability::NonNullable),
704 );
705 assert!(first.get(Stat::Max).is_none());
706 }
707
708 #[test]
709 fn merge_maxes() {
710 let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
711 &StatsSet::of(Stat::Max, Precision::exact(42)),
712 &DType::Primitive(PType::I32, Nullability::NonNullable),
713 );
714 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
715 assert_eq!(
716 first_ref.get_as::<i32>(Stat::Max),
717 Some(Precision::exact(42))
718 );
719 }
720
721 #[test]
722 fn merge_maxes_bound() {
723 let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
724 let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
725 .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
726 let first_ref = first.as_typed_ref(&dtype);
727 assert_eq!(
728 first_ref.get_as::<i32>(Stat::Max),
729 Some(Precision::inexact(43))
730 );
731 }
732
733 #[test]
734 fn merge_into_scalar() {
735 let first = StatsSet::of(Stat::Sum, Precision::exact(42i64)).merge_ordered(
738 &StatsSet::default(),
739 &DType::Primitive(PType::I32, Nullability::NonNullable),
740 );
741 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
742 assert!(first_ref.get(Stat::Sum).is_none());
743 }
744
745 #[test]
746 fn merge_from_scalar() {
747 let first = StatsSet::default().merge_ordered(
750 &StatsSet::of(Stat::Sum, Precision::exact(42i64)),
751 &DType::Primitive(PType::I32, Nullability::NonNullable),
752 );
753 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
754 assert!(first_ref.get(Stat::Sum).is_none());
755 }
756
757 #[test]
758 fn merge_scalars() {
759 let first = StatsSet::of(Stat::Sum, Precision::exact(37i64)).merge_ordered(
762 &StatsSet::of(Stat::Sum, Precision::exact(42i64)),
763 &DType::Primitive(PType::I32, Nullability::NonNullable),
764 );
765 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
766 assert_eq!(
767 first_ref.get_as::<i64>(Stat::Sum),
768 Some(Precision::exact(79i64))
769 );
770 }
771
772 #[test]
773 fn merge_into_sortedness() {
774 let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
775 &StatsSet::default(),
776 &DType::Primitive(PType::I32, Nullability::NonNullable),
777 );
778 assert!(first.get(Stat::IsStrictSorted).is_none());
779 }
780
781 #[test]
782 fn merge_from_sortedness() {
783 let first = StatsSet::default().merge_ordered(
784 &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
785 &DType::Primitive(PType::I32, Nullability::NonNullable),
786 );
787 assert!(first.get(Stat::IsStrictSorted).is_none());
788 }
789
790 #[test]
791 fn merge_sortedness() {
792 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
793 first.set(Stat::Max, Precision::exact(1));
794 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
795 second.set(Stat::Min, Precision::exact(2));
796 first = first.merge_ordered(
797 &second,
798 &DType::Primitive(PType::I32, Nullability::NonNullable),
799 );
800
801 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
802 assert_eq!(
803 first_ref.get_as::<bool>(Stat::IsStrictSorted),
804 Some(Precision::exact(true))
805 );
806 }
807
808 #[test]
809 fn merge_sortedness_out_of_order() {
810 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
811 first.set(Stat::Min, Precision::exact(1));
812 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
813 second.set(Stat::Max, Precision::exact(2));
814 second = second.merge_ordered(
815 &first,
816 &DType::Primitive(PType::I32, Nullability::NonNullable),
817 );
818
819 let second_ref =
820 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
821 assert_eq!(
822 second_ref.get_as::<bool>(Stat::IsStrictSorted),
823 Some(Precision::inexact(false))
824 );
825 }
826
827 #[test]
828 fn merge_sortedness_only_one_sorted() {
829 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
830 first.set(Stat::Max, Precision::exact(1));
831 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
832 second.set(Stat::Min, Precision::exact(2));
833 first.merge_ordered(
834 &second,
835 &DType::Primitive(PType::I32, Nullability::NonNullable),
836 );
837
838 let second_ref =
839 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
840 assert_eq!(
841 second_ref.get_as::<bool>(Stat::IsStrictSorted),
842 Some(Precision::exact(false))
843 );
844 }
845
846 #[test]
847 fn merge_sortedness_missing_min() {
848 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
849 first.set(Stat::Max, Precision::exact(1));
850 let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
851 first = first.merge_ordered(
852 &second,
853 &DType::Primitive(PType::I32, Nullability::NonNullable),
854 );
855 assert!(first.get(Stat::IsStrictSorted).is_none());
856 }
857
858 #[test]
859 fn merge_sortedness_bound_min() {
860 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
861 first.set(Stat::Max, Precision::exact(1));
862 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
863 second.set(Stat::Min, Precision::inexact(2));
864 first = first.merge_ordered(
865 &second,
866 &DType::Primitive(PType::I32, Nullability::NonNullable),
867 );
868
869 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
870 assert_eq!(
871 first_ref.get_as::<bool>(Stat::IsStrictSorted),
872 Some(Precision::exact(true))
873 );
874 }
875
876 #[test]
877 fn merge_unordered() {
878 let array =
879 PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
880 let all_stats = all::<Stat>()
881 .filter(|s| !matches!(s, Stat::Sum))
882 .filter(|s| !matches!(s, Stat::NaNCount))
883 .collect_vec();
884 array.statistics().compute_all(&all_stats).unwrap();
885
886 let stats = array.statistics().to_owned();
887 for stat in &all_stats {
888 assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
889 }
890
891 let merged = stats.clone().merge_unordered(
892 &stats,
893 &DType::Primitive(PType::I32, Nullability::NonNullable),
894 );
895 for stat in &all_stats {
896 assert_eq!(
897 merged.get(*stat).is_some(),
898 stat.is_commutative(),
899 "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
900 )
901 }
902
903 let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
904 let stats_ref = stats.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
905
906 assert_eq!(
907 merged_ref.get_as::<i32>(Stat::Min),
908 stats_ref.get_as::<i32>(Stat::Min)
909 );
910 assert_eq!(
911 merged_ref.get_as::<i32>(Stat::Max),
912 stats_ref.get_as::<i32>(Stat::Max)
913 );
914 assert_eq!(
915 merged_ref.get_as::<u64>(Stat::NullCount).unwrap(),
916 stats_ref
917 .get_as::<u64>(Stat::NullCount)
918 .unwrap()
919 .map(|s| s * 2)
920 );
921 }
922
923 #[test]
924 fn merge_min_bound_same() {
925 let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
928 &StatsSet::of(Stat::Min, Precision::exact(5)),
929 &DType::Primitive(PType::I32, Nullability::NonNullable),
930 );
931 let merged_ref =
932 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
933 assert_eq!(
934 merged_ref.get_as::<i32>(Stat::Min),
935 Some(Precision::exact(5))
936 );
937 }
938
939 #[test]
940 fn merge_min_bound_bound_lower() {
941 let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
942 &StatsSet::of(Stat::Min, Precision::exact(5)),
943 &DType::Primitive(PType::I32, Nullability::NonNullable),
944 );
945 let merged_ref =
946 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
947 assert_eq!(
948 merged_ref.get_as::<i32>(Stat::Min),
949 Some(Precision::inexact(4))
950 );
951 }
952
953 #[test]
954 fn test_combine_is_constant() {
955 {
956 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
957 let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
958 let mut stats_ref =
959 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
960 stats_ref
961 .combine_bool_stat::<IsConstant>(
962 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
963 )
964 .unwrap();
965 assert_eq!(
966 stats_ref.get_as::<bool>(Stat::IsConstant),
967 Some(Precision::exact(true))
968 );
969 }
970
971 {
972 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
973 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
974 let mut stats_ref =
975 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
976 stats_ref
977 .combine_bool_stat::<IsConstant>(
978 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
979 )
980 .unwrap();
981 assert_eq!(
982 stats_ref.get_as::<bool>(Stat::IsConstant),
983 Some(Precision::exact(true))
984 );
985 }
986
987 {
988 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
989 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
990 let mut stats_ref =
991 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
992 stats_ref
993 .combine_bool_stat::<IsConstant>(
994 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
995 )
996 .unwrap();
997 assert_eq!(
998 stats_ref.get_as::<bool>(Stat::IsConstant),
999 Some(Precision::exact(false))
1000 );
1001 }
1002 }
1003
1004 #[test]
1005 fn test_combine_sets_boolean_conflict() {
1006 let mut stats1 = StatsSet::from_iter([
1007 (Stat::IsConstant, Precision::exact(true)),
1008 (Stat::IsSorted, Precision::exact(true)),
1009 ]);
1010
1011 let stats2 = StatsSet::from_iter([
1012 (Stat::IsConstant, Precision::exact(false)),
1013 (Stat::IsSorted, Precision::exact(true)),
1014 ]);
1015
1016 let result = stats1.combine_sets(
1017 &stats2,
1018 &DType::Primitive(PType::I32, Nullability::NonNullable),
1019 );
1020 assert!(result.is_err());
1021 }
1022
1023 #[test]
1024 fn test_combine_sets_with_missing_stats() {
1025 let mut stats1 = StatsSet::from_iter([
1026 (Stat::Min, Precision::exact(42)),
1027 (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
1028 ]);
1029
1030 let stats2 = StatsSet::from_iter([
1031 (Stat::Max, Precision::exact(100)),
1032 (Stat::IsStrictSorted, Precision::exact(true)),
1033 ]);
1034
1035 stats1
1036 .combine_sets(
1037 &stats2,
1038 &DType::Primitive(PType::I32, Nullability::NonNullable),
1039 )
1040 .unwrap();
1041
1042 let stats_ref =
1043 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1044
1045 assert_eq!(
1047 stats_ref.get_as::<i32>(Stat::Min),
1048 Some(Precision::exact(42))
1049 );
1050 assert_eq!(
1052 stats_ref.get_as::<i32>(Stat::Max),
1053 Some(Precision::exact(100))
1054 );
1055 assert_eq!(
1057 stats_ref.get_as::<bool>(Stat::IsStrictSorted),
1058 Some(Precision::exact(true))
1059 );
1060 }
1061
1062 #[test]
1063 fn test_combine_sets_with_inexact() {
1064 let mut stats1 = StatsSet::from_iter([
1065 (Stat::Min, Precision::exact(42)),
1066 (Stat::Max, Precision::inexact(100)),
1067 (Stat::IsConstant, Precision::exact(false)),
1068 ]);
1069
1070 let stats2 = StatsSet::from_iter([
1071 (Stat::Min, Precision::inexact(40)),
1073 (Stat::Max, Precision::exact(90)),
1074 (Stat::IsSorted, Precision::exact(true)),
1075 ]);
1076
1077 stats1
1078 .combine_sets(
1079 &stats2,
1080 &DType::Primitive(PType::I32, Nullability::NonNullable),
1081 )
1082 .unwrap();
1083
1084 let stats_ref =
1085 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1086
1087 assert_eq!(
1089 stats_ref.get_as::<i32>(Stat::Min),
1090 Some(Precision::exact(42))
1091 );
1092 assert_eq!(
1094 stats_ref.get_as::<i32>(Stat::Max),
1095 Some(Precision::exact(90))
1096 );
1097 assert_eq!(
1099 stats_ref.get_as::<bool>(Stat::IsSorted),
1100 Some(Precision::exact(true))
1101 );
1102 }
1103}