1use std::fmt::Debug;
5
6use enum_iterator::Sequence;
7use enum_iterator::all;
8use num_traits::CheckedAdd;
9use vortex_dtype::DType;
10use vortex_error::VortexError;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_err;
14use vortex_error::vortex_panic;
15use vortex_scalar::Scalar;
16use vortex_scalar::ScalarValue;
17
18use crate::expr::stats::IsConstant;
19use crate::expr::stats::IsSorted;
20use crate::expr::stats::IsStrictSorted;
21use crate::expr::stats::Max;
22use crate::expr::stats::Min;
23use crate::expr::stats::NaNCount;
24use crate::expr::stats::NullCount;
25use crate::expr::stats::Precision;
26use crate::expr::stats::Stat;
27use crate::expr::stats::StatBound;
28use crate::expr::stats::StatType;
29use crate::expr::stats::StatsProvider;
30use crate::expr::stats::StatsProviderExt;
31use crate::expr::stats::Sum;
32use crate::expr::stats::UncompressedSizeInBytes;
33
34#[derive(Default, Debug, Clone)]
35pub struct StatsSet {
36 values: Vec<(Stat, Precision<ScalarValue>)>,
37}
38
39impl StatsSet {
40 pub unsafe fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
46 Self { values }
47 }
48
49 pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
51 unsafe { Self::new_unchecked(vec![(stat, value)]) }
53 }
54
55 fn reserve_full_capacity(&mut self) {
56 if self.values.capacity() < Stat::CARDINALITY {
57 self.values
58 .reserve_exact(Stat::CARDINALITY - self.values.capacity());
59 }
60 }
61
62 pub fn as_mut_typed_ref<'a, 'b>(&'a mut self, dtype: &'b DType) -> MutTypedStatsSetRef<'a, 'b> {
64 MutTypedStatsSetRef {
65 values: self,
66 dtype,
67 }
68 }
69
70 pub fn as_typed_ref<'a, 'b>(&'a self, dtype: &'b DType) -> TypedStatsSetRef<'a, 'b> {
72 TypedStatsSetRef {
73 values: self,
74 dtype,
75 }
76 }
77}
78
79impl StatsSet {
81 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
83 self.reserve_full_capacity();
84
85 if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
86 *existing = (stat, value);
87 } else {
88 self.values.push((stat, value));
89 }
90 }
91
92 pub fn clear(&mut self, stat: Stat) {
94 self.values.retain(|(s, _)| *s != stat);
95 }
96
97 pub fn retain_only(&mut self, stats: &[Stat]) {
99 self.values.retain(|(s, _)| stats.contains(s));
100 }
101
102 pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
106 self.values.iter()
107 }
108
109 pub fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
111 self.values
112 .iter()
113 .find(|(s, _)| *s == stat)
114 .map(|(_, v)| v.clone())
115 }
116
117 pub fn len(&self) -> usize {
119 self.values.len()
120 }
121
122 pub fn is_empty(&self) -> bool {
124 self.values.is_empty()
125 }
126
127 pub fn get_as<T: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
129 &self,
130 stat: Stat,
131 dtype: &DType,
132 ) -> Option<Precision<T>> {
133 self.get(stat).map(|v| {
134 v.map(|v| {
135 T::try_from(&Scalar::new(dtype.clone(), v)).unwrap_or_else(|err| {
136 vortex_panic!(
137 err,
138 "Failed to get stat {} as {}",
139 stat,
140 std::any::type_name::<T>()
141 )
142 })
143 })
144 })
145 }
146}
147
148pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
154
155impl Iterator for StatsSetIntoIter {
156 type Item = (Stat, Precision<ScalarValue>);
157
158 fn next(&mut self) -> Option<Self::Item> {
159 self.0.next()
160 }
161}
162
163impl IntoIterator for StatsSet {
164 type Item = (Stat, Precision<ScalarValue>);
165 type IntoIter = StatsSetIntoIter;
166
167 fn into_iter(self) -> Self::IntoIter {
168 StatsSetIntoIter(self.values.into_iter())
169 }
170}
171
172impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
173 fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
174 let iter = iter.into_iter();
175 let mut values = Vec::default();
176 values.reserve_exact(Stat::CARDINALITY);
177
178 let mut this = Self { values };
179 this.extend(iter);
180 this
181 }
182}
183
184impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
185 #[inline]
186 fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
187 let iter = iter.into_iter();
188 self.reserve_full_capacity();
189
190 iter.for_each(|(stat, value)| self.set(stat, value));
191 }
192}
193
194impl StatsSet {
196 pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
199 self.as_mut_typed_ref(dtype)
200 .merge_ordered(&other.as_typed_ref(dtype));
201 self
202 }
203
204 pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
207 self.as_mut_typed_ref(dtype)
208 .merge_unordered(&other.as_typed_ref(dtype));
209 self
210 }
211
212 pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
214 self.as_mut_typed_ref(dtype)
215 .combine_sets(&other.as_typed_ref(dtype))
216 }
217}
218
219pub struct TypedStatsSetRef<'a, 'b> {
220 pub values: &'a StatsSet,
221 pub dtype: &'b DType,
222}
223
224impl StatsProvider for TypedStatsSetRef<'_, '_> {
225 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
226 self.values.get(stat).map(|p| {
227 p.map(|sv| {
228 Scalar::new(
229 stat.dtype(self.dtype)
230 .vortex_expect("Must have valid dtype if value is present"),
231 sv,
232 )
233 })
234 })
235 }
236
237 fn len(&self) -> usize {
238 self.values.len()
239 }
240}
241
242pub struct MutTypedStatsSetRef<'a, 'b> {
243 pub values: &'a mut StatsSet,
244 pub dtype: &'b DType,
245}
246
247impl MutTypedStatsSetRef<'_, '_> {
248 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
250 self.values.set(stat, value);
251 }
252
253 pub fn clear(&mut self, stat: Stat) {
255 self.values.clear(stat);
256 }
257}
258
259impl StatsProvider for MutTypedStatsSetRef<'_, '_> {
260 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
261 self.values.get(stat).map(|p| {
262 p.map(|sv| {
263 Scalar::new(
264 stat.dtype(self.dtype)
265 .vortex_expect("Must have valid dtype if value is present"),
266 sv,
267 )
268 })
269 })
270 }
271
272 fn len(&self) -> usize {
273 self.values.len()
274 }
275}
276
277impl MutTypedStatsSetRef<'_, '_> {
279 pub fn merge_ordered(mut self, other: &TypedStatsSetRef) -> Self {
282 for s in all::<Stat>() {
283 match s {
284 Stat::IsConstant => self.merge_is_constant(other),
285 Stat::IsSorted => self.merge_is_sorted(other),
286 Stat::IsStrictSorted => self.merge_is_strict_sorted(other),
287 Stat::Max => self.merge_max(other),
288 Stat::Min => self.merge_min(other),
289 Stat::Sum => self.merge_sum(other),
290 Stat::NullCount => self.merge_null_count(other),
291 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
292 Stat::NaNCount => self.merge_nan_count(other),
293 }
294 }
295
296 self
297 }
298
299 pub fn merge_unordered(mut self, other: &TypedStatsSetRef) -> Self {
302 for s in all::<Stat>() {
303 if !s.is_commutative() {
304 self.clear(s);
305 continue;
306 }
307
308 match s {
309 Stat::IsConstant => self.merge_is_constant(other),
310 Stat::Max => self.merge_max(other),
311 Stat::Min => self.merge_min(other),
312 Stat::Sum => self.merge_sum(other),
313 Stat::NullCount => self.merge_null_count(other),
314 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
315 Stat::IsSorted | Stat::IsStrictSorted => {
316 unreachable!("not commutative")
317 }
318 Stat::NaNCount => self.merge_nan_count(other),
319 }
320 }
321
322 self
323 }
324
325 pub fn combine_sets(&mut self, other: &TypedStatsSetRef) -> VortexResult<()> {
327 let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
328 for s in other_stats {
329 match s {
330 Stat::Max => self.combine_bound::<Max>(other)?,
331 Stat::Min => self.combine_bound::<Min>(other)?,
332 Stat::UncompressedSizeInBytes => {
333 self.combine_bound::<UncompressedSizeInBytes>(other)?
334 }
335 Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
336 Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
337 Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
338 Stat::NullCount => self.combine_bound::<NullCount>(other)?,
339 Stat::Sum => self.combine_bound::<Sum>(other)?,
340 Stat::NaNCount => self.combine_bound::<NaNCount>(other)?,
341 }
342 }
343 Ok(())
344 }
345
346 fn combine_bound<S: StatType<Scalar>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
347 where
348 S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
349 {
350 match (self.get_scalar_bound::<S>(), other.get_scalar_bound::<S>()) {
351 (Some(m1), Some(m2)) => {
352 let meet = m1
353 .intersection(&m2)
354 .vortex_expect("can always compare scalar")
355 .ok_or_else(|| {
356 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
357 })?;
358 if meet != m1 {
359 self.set(S::STAT, meet.into_value().map(Scalar::into_value));
360 }
361 }
362 (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
363 (Some(_), _) => (),
364 (None, None) => self.clear(S::STAT),
365 }
366 Ok(())
367 }
368
369 fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
370 where
371 S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
372 {
373 match (
374 self.get_as_bound::<S, bool>(),
375 other.get_as_bound::<S, bool>(),
376 ) {
377 (Some(m1), Some(m2)) => {
378 let intersection = m1
379 .intersection(&m2)
380 .vortex_expect("can always compare boolean")
381 .ok_or_else(|| {
382 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
383 })?;
384 if intersection != m1 {
385 self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
386 }
387 }
388 (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
389 (Some(_), None) => (),
390 (None, None) => self.clear(S::STAT),
391 }
392 Ok(())
393 }
394
395 fn merge_min(&mut self, other: &TypedStatsSetRef) {
396 match (
397 self.get_scalar_bound::<Min>(),
398 other.get_scalar_bound::<Min>(),
399 ) {
400 (Some(m1), Some(m2)) => {
401 let meet = m1.union(&m2).vortex_expect("can compare scalar");
402 if meet != m1 {
403 self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
404 }
405 }
406 _ => self.clear(Stat::Min),
407 }
408 }
409
410 fn merge_max(&mut self, other: &TypedStatsSetRef) {
411 match (
412 self.get_scalar_bound::<Max>(),
413 other.get_scalar_bound::<Max>(),
414 ) {
415 (Some(m1), Some(m2)) => {
416 let meet = m1.union(&m2).vortex_expect("can compare scalar");
417 if meet != m1 {
418 self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
419 }
420 }
421 _ => self.clear(Stat::Max),
422 }
423 }
424
425 fn merge_sum(&mut self, other: &TypedStatsSetRef) {
426 match (
427 self.get_scalar_bound::<Sum>(),
428 other.get_scalar_bound::<Sum>(),
429 ) {
430 (Some(m1), Some(m2)) => {
431 if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
433 s1.as_primitive()
434 .checked_add(&s2.as_primitive())
435 .map(|pscalar| {
436 pscalar
437 .pvalue()
438 .map(|pvalue| {
439 Scalar::primitive_value(
440 pvalue,
441 pscalar.ptype(),
442 pscalar.dtype().nullability(),
443 )
444 .into_value()
445 })
446 .unwrap_or_else(ScalarValue::null)
447 })
448 }) {
449 self.set(Stat::Sum, Precision::Exact(scalar_value));
450 }
451 }
452 _ => self.clear(Stat::Sum),
453 }
454 }
455
456 fn merge_is_constant(&mut self, other: &TypedStatsSetRef) {
457 let self_const = self.get_as(Stat::IsConstant);
458 let other_const = other.get_as(Stat::IsConstant);
459 let self_min = self.get(Stat::Min);
460 let other_min = other.get(Stat::Min);
461
462 if let (
463 Some(Precision::Exact(self_const)),
464 Some(Precision::Exact(other_const)),
465 Some(Precision::Exact(self_min)),
466 Some(Precision::Exact(other_min)),
467 ) = (self_const, other_const, self_min, other_min)
468 {
469 if self_const && other_const && self_min == other_min {
470 self.set(Stat::IsConstant, Precision::exact(true));
471 } else {
472 self.set(Stat::IsConstant, Precision::inexact(false));
473 }
474 }
475 self.set(Stat::IsConstant, Precision::exact(false));
476 }
477
478 fn merge_is_sorted(&mut self, other: &TypedStatsSetRef) {
479 self.merge_sortedness_stat(other, Stat::IsSorted, PartialOrd::le)
480 }
481
482 fn merge_is_strict_sorted(&mut self, other: &TypedStatsSetRef) {
483 self.merge_sortedness_stat(other, Stat::IsStrictSorted, PartialOrd::lt)
484 }
485
486 fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
487 &mut self,
488 other: &TypedStatsSetRef,
489 stat: Stat,
490 cmp: F,
491 ) {
492 if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
493 == (self.get_as(stat), other.get_as(stat))
494 {
495 if let (Some(self_max), Some(other_min)) = (
500 self.get_scalar_bound::<Max>(),
501 other.get_scalar_bound::<Min>(),
502 ) {
503 return if cmp(&self_max.max_value(), &other_min.min_value()) {
504 } else {
506 self.set(stat, Precision::inexact(false));
507 };
508 }
509 }
510 self.clear(stat);
511 }
512
513 fn merge_null_count(&mut self, other: &TypedStatsSetRef) {
514 self.merge_sum_stat(Stat::NullCount, other)
515 }
516
517 fn merge_nan_count(&mut self, other: &TypedStatsSetRef) {
518 self.merge_sum_stat(Stat::NaNCount, other)
519 }
520
521 fn merge_uncompressed_size_in_bytes(&mut self, other: &TypedStatsSetRef) {
522 self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
523 }
524
525 fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) {
526 match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
527 (Some(nc1), Some(nc2)) => {
528 self.set(
529 stat,
530 nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
531 );
532 }
533 _ => self.clear(stat),
534 }
535 }
536}
537
538#[cfg(test)]
539mod test {
540 use enum_iterator::all;
541 use itertools::Itertools;
542 use vortex_dtype::DType;
543 use vortex_dtype::Nullability;
544 use vortex_dtype::PType;
545
546 use crate::arrays::PrimitiveArray;
547 use crate::expr::stats::IsConstant;
548 use crate::expr::stats::Precision;
549 use crate::expr::stats::Stat;
550 use crate::expr::stats::StatsProvider;
551 use crate::expr::stats::StatsProviderExt;
552 use crate::stats::StatsSet;
553 use crate::stats::stats_set::Scalar;
554
555 #[test]
556 fn test_iter() {
557 let set = unsafe {
559 StatsSet::new_unchecked(vec![
560 (Stat::Max, Precision::exact(100)),
561 (Stat::Min, Precision::exact(42)),
562 ])
563 };
564 let mut iter = set.iter();
565 let first = iter.next().unwrap().clone();
566 assert_eq!(first.0, Stat::Max);
567 assert_eq!(
568 first
569 .1
570 .map(|f| i32::try_from(&Scalar::new(PType::I32.into(), f)).unwrap()),
571 Precision::exact(100)
572 );
573 let snd = iter.next().unwrap().clone();
574 assert_eq!(snd.0, Stat::Min);
575 assert_eq!(
576 snd.1
577 .map(|s| i32::try_from(&Scalar::new(PType::I32.into(), s)).unwrap()),
578 42
579 );
580 }
581
582 #[test]
583 fn into_iter() {
584 let mut set = unsafe {
586 StatsSet::new_unchecked(vec![
587 (Stat::Max, Precision::exact(100)),
588 (Stat::Min, Precision::exact(42)),
589 ])
590 }
591 .into_iter();
592 let (stat, first) = set.next().unwrap();
593 assert_eq!(stat, Stat::Max);
594 assert_eq!(
595 first.map(|f| i32::try_from(&Scalar::new(PType::I32.into(), f)).unwrap()),
596 Precision::exact(100)
597 );
598 let snd = set.next().unwrap();
599 assert_eq!(snd.0, Stat::Min);
600 assert_eq!(
601 snd.1
602 .map(|s| i32::try_from(&Scalar::new(PType::I32.into(), s)).unwrap()),
603 Precision::exact(42)
604 );
605 }
606
607 #[test]
608 fn merge_constant() {
609 let first = StatsSet::from_iter([
610 (Stat::Min, Precision::exact(42)),
611 (Stat::IsConstant, Precision::exact(true)),
612 ])
613 .merge_ordered(
614 &StatsSet::from_iter([
615 (Stat::Min, Precision::inexact(42)),
616 (Stat::IsConstant, Precision::exact(true)),
617 ]),
618 &DType::Primitive(PType::I32, Nullability::NonNullable),
619 );
620
621 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
622 assert_eq!(
623 first_ref.get_as::<bool>(Stat::IsConstant),
624 Some(Precision::exact(false))
625 );
626 assert_eq!(
627 first_ref.get_as::<i32>(Stat::Min),
628 Some(Precision::exact(42))
629 );
630 }
631
632 #[test]
633 fn merge_into_min() {
634 let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
635 &StatsSet::default(),
636 &DType::Primitive(PType::I32, Nullability::NonNullable),
637 );
638
639 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
640 assert!(first_ref.get(Stat::Min).is_none());
641 }
642
643 #[test]
644 fn merge_from_min() {
645 let first = StatsSet::default().merge_ordered(
646 &StatsSet::of(Stat::Min, Precision::exact(42)),
647 &DType::Primitive(PType::I32, Nullability::NonNullable),
648 );
649
650 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
651 assert!(first_ref.get(Stat::Min).is_none());
652 }
653
654 #[test]
655 fn merge_mins() {
656 let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
657 &StatsSet::of(Stat::Min, Precision::exact(42)),
658 &DType::Primitive(PType::I32, Nullability::NonNullable),
659 );
660
661 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
662 assert_eq!(
663 first_ref.get_as::<i32>(Stat::Min),
664 Some(Precision::exact(37))
665 );
666 }
667
668 #[test]
669 fn merge_into_bound_max() {
670 let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
671 &StatsSet::default(),
672 &DType::Primitive(PType::I32, Nullability::NonNullable),
673 );
674 assert!(first.get(Stat::Max).is_none());
675 }
676
677 #[test]
678 fn merge_from_max() {
679 let first = StatsSet::default().merge_ordered(
680 &StatsSet::of(Stat::Max, Precision::exact(42)),
681 &DType::Primitive(PType::I32, Nullability::NonNullable),
682 );
683 assert!(first.get(Stat::Max).is_none());
684 }
685
686 #[test]
687 fn merge_maxes() {
688 let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
689 &StatsSet::of(Stat::Max, Precision::exact(42)),
690 &DType::Primitive(PType::I32, Nullability::NonNullable),
691 );
692 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
693 assert_eq!(
694 first_ref.get_as::<i32>(Stat::Max),
695 Some(Precision::exact(42))
696 );
697 }
698
699 #[test]
700 fn merge_maxes_bound() {
701 let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
702 let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
703 .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
704 let first_ref = first.as_typed_ref(&dtype);
705 assert_eq!(
706 first_ref.get_as::<i32>(Stat::Max),
707 Some(Precision::inexact(43))
708 );
709 }
710
711 #[test]
712 fn merge_into_scalar() {
713 let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
714 &StatsSet::default(),
715 &DType::Primitive(PType::I32, Nullability::NonNullable),
716 );
717 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
718 assert!(first_ref.get(Stat::Sum).is_none());
719 }
720
721 #[test]
722 fn merge_from_scalar() {
723 let first = StatsSet::default().merge_ordered(
724 &StatsSet::of(Stat::Sum, Precision::exact(42)),
725 &DType::Primitive(PType::I32, Nullability::NonNullable),
726 );
727 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
728 assert!(first_ref.get(Stat::Sum).is_none());
729 }
730
731 #[test]
732 fn merge_scalars() {
733 let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
734 &StatsSet::of(Stat::Sum, Precision::exact(42)),
735 &DType::Primitive(PType::I32, Nullability::NonNullable),
736 );
737 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
738 assert_eq!(
739 first_ref.get_as::<usize>(Stat::Sum),
740 Some(Precision::exact(79usize))
741 );
742 }
743
744 #[test]
745 fn merge_into_sortedness() {
746 let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
747 &StatsSet::default(),
748 &DType::Primitive(PType::I32, Nullability::NonNullable),
749 );
750 assert!(first.get(Stat::IsStrictSorted).is_none());
751 }
752
753 #[test]
754 fn merge_from_sortedness() {
755 let first = StatsSet::default().merge_ordered(
756 &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
757 &DType::Primitive(PType::I32, Nullability::NonNullable),
758 );
759 assert!(first.get(Stat::IsStrictSorted).is_none());
760 }
761
762 #[test]
763 fn merge_sortedness() {
764 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
765 first.set(Stat::Max, Precision::exact(1));
766 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
767 second.set(Stat::Min, Precision::exact(2));
768 first = first.merge_ordered(
769 &second,
770 &DType::Primitive(PType::I32, Nullability::NonNullable),
771 );
772
773 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
774 assert_eq!(
775 first_ref.get_as::<bool>(Stat::IsStrictSorted),
776 Some(Precision::exact(true))
777 );
778 }
779
780 #[test]
781 fn merge_sortedness_out_of_order() {
782 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
783 first.set(Stat::Min, Precision::exact(1));
784 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
785 second.set(Stat::Max, Precision::exact(2));
786 second = second.merge_ordered(
787 &first,
788 &DType::Primitive(PType::I32, Nullability::NonNullable),
789 );
790
791 let second_ref =
792 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
793 assert_eq!(
794 second_ref.get_as::<bool>(Stat::IsStrictSorted),
795 Some(Precision::inexact(false))
796 );
797 }
798
799 #[test]
800 fn merge_sortedness_only_one_sorted() {
801 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
802 first.set(Stat::Max, Precision::exact(1));
803 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
804 second.set(Stat::Min, Precision::exact(2));
805 first.merge_ordered(
806 &second,
807 &DType::Primitive(PType::I32, Nullability::NonNullable),
808 );
809
810 let second_ref =
811 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
812 assert_eq!(
813 second_ref.get_as::<bool>(Stat::IsStrictSorted),
814 Some(Precision::exact(false))
815 );
816 }
817
818 #[test]
819 fn merge_sortedness_missing_min() {
820 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
821 first.set(Stat::Max, Precision::exact(1));
822 let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
823 first = first.merge_ordered(
824 &second,
825 &DType::Primitive(PType::I32, Nullability::NonNullable),
826 );
827 assert!(first.get(Stat::IsStrictSorted).is_none());
828 }
829
830 #[test]
831 fn merge_sortedness_bound_min() {
832 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
833 first.set(Stat::Max, Precision::exact(1));
834 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
835 second.set(Stat::Min, Precision::inexact(2));
836 first = first.merge_ordered(
837 &second,
838 &DType::Primitive(PType::I32, Nullability::NonNullable),
839 );
840
841 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
842 assert_eq!(
843 first_ref.get_as::<bool>(Stat::IsStrictSorted),
844 Some(Precision::exact(true))
845 );
846 }
847
848 #[test]
849 fn merge_unordered() {
850 let array =
851 PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
852 let all_stats = all::<Stat>()
853 .filter(|s| !matches!(s, Stat::Sum))
854 .filter(|s| !matches!(s, Stat::NaNCount))
855 .collect_vec();
856 array.statistics().compute_all(&all_stats).unwrap();
857
858 let stats = array.statistics().to_owned();
859 for stat in &all_stats {
860 assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
861 }
862
863 let merged = stats.clone().merge_unordered(
864 &stats,
865 &DType::Primitive(PType::I32, Nullability::NonNullable),
866 );
867 for stat in &all_stats {
868 assert_eq!(
869 merged.get(*stat).is_some(),
870 stat.is_commutative(),
871 "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
872 )
873 }
874
875 let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
876 let stats_ref = stats.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
877
878 assert_eq!(
879 merged_ref.get_as::<i32>(Stat::Min),
880 stats_ref.get_as::<i32>(Stat::Min)
881 );
882 assert_eq!(
883 merged_ref.get_as::<i32>(Stat::Max),
884 stats_ref.get_as::<i32>(Stat::Max)
885 );
886 assert_eq!(
887 merged_ref.get_as::<u64>(Stat::NullCount).unwrap(),
888 stats_ref
889 .get_as::<u64>(Stat::NullCount)
890 .unwrap()
891 .map(|s| s * 2)
892 );
893 }
894
895 #[test]
896 fn merge_min_bound_same() {
897 let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
900 &StatsSet::of(Stat::Min, Precision::exact(5)),
901 &DType::Primitive(PType::I32, Nullability::NonNullable),
902 );
903 let merged_ref =
904 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
905 assert_eq!(
906 merged_ref.get_as::<i32>(Stat::Min),
907 Some(Precision::exact(5))
908 );
909 }
910
911 #[test]
912 fn merge_min_bound_bound_lower() {
913 let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
914 &StatsSet::of(Stat::Min, Precision::exact(5)),
915 &DType::Primitive(PType::I32, Nullability::NonNullable),
916 );
917 let merged_ref =
918 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
919 assert_eq!(
920 merged_ref.get_as::<i32>(Stat::Min),
921 Some(Precision::inexact(4))
922 );
923 }
924
925 #[test]
926 fn test_combine_is_constant() {
927 {
928 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
929 let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
930 let mut stats_ref =
931 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
932 stats_ref
933 .combine_bool_stat::<IsConstant>(
934 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
935 )
936 .unwrap();
937 assert_eq!(
938 stats_ref.get_as::<bool>(Stat::IsConstant),
939 Some(Precision::exact(true))
940 );
941 }
942
943 {
944 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
945 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
946 let mut stats_ref =
947 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
948 stats_ref
949 .combine_bool_stat::<IsConstant>(
950 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
951 )
952 .unwrap();
953 assert_eq!(
954 stats_ref.get_as::<bool>(Stat::IsConstant),
955 Some(Precision::exact(true))
956 );
957 }
958
959 {
960 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
961 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
962 let mut stats_ref =
963 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
964 stats_ref
965 .combine_bool_stat::<IsConstant>(
966 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
967 )
968 .unwrap();
969 assert_eq!(
970 stats_ref.get_as::<bool>(Stat::IsConstant),
971 Some(Precision::exact(false))
972 );
973 }
974 }
975
976 #[test]
977 fn test_combine_sets_boolean_conflict() {
978 let mut stats1 = StatsSet::from_iter([
979 (Stat::IsConstant, Precision::exact(true)),
980 (Stat::IsSorted, Precision::exact(true)),
981 ]);
982
983 let stats2 = StatsSet::from_iter([
984 (Stat::IsConstant, Precision::exact(false)),
985 (Stat::IsSorted, Precision::exact(true)),
986 ]);
987
988 let result = stats1.combine_sets(
989 &stats2,
990 &DType::Primitive(PType::I32, Nullability::NonNullable),
991 );
992 assert!(result.is_err());
993 }
994
995 #[test]
996 fn test_combine_sets_with_missing_stats() {
997 let mut stats1 = StatsSet::from_iter([
998 (Stat::Min, Precision::exact(42)),
999 (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
1000 ]);
1001
1002 let stats2 = StatsSet::from_iter([
1003 (Stat::Max, Precision::exact(100)),
1004 (Stat::IsStrictSorted, Precision::exact(true)),
1005 ]);
1006
1007 stats1
1008 .combine_sets(
1009 &stats2,
1010 &DType::Primitive(PType::I32, Nullability::NonNullable),
1011 )
1012 .unwrap();
1013
1014 let stats_ref =
1015 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1016
1017 assert_eq!(
1019 stats_ref.get_as::<i32>(Stat::Min),
1020 Some(Precision::exact(42))
1021 );
1022 assert_eq!(
1024 stats_ref.get_as::<i32>(Stat::Max),
1025 Some(Precision::exact(100))
1026 );
1027 assert_eq!(
1029 stats_ref.get_as::<bool>(Stat::IsStrictSorted),
1030 Some(Precision::exact(true))
1031 );
1032 }
1033
1034 #[test]
1035 fn test_combine_sets_with_inexact() {
1036 let mut stats1 = StatsSet::from_iter([
1037 (Stat::Min, Precision::exact(42)),
1038 (Stat::Max, Precision::inexact(100)),
1039 (Stat::IsConstant, Precision::exact(false)),
1040 ]);
1041
1042 let stats2 = StatsSet::from_iter([
1043 (Stat::Min, Precision::inexact(40)),
1045 (Stat::Max, Precision::exact(90)),
1046 (Stat::IsSorted, Precision::exact(true)),
1047 ]);
1048
1049 stats1
1050 .combine_sets(
1051 &stats2,
1052 &DType::Primitive(PType::I32, Nullability::NonNullable),
1053 )
1054 .unwrap();
1055
1056 let stats_ref =
1057 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1058
1059 assert_eq!(
1061 stats_ref.get_as::<i32>(Stat::Min),
1062 Some(Precision::exact(42))
1063 );
1064 assert_eq!(
1066 stats_ref.get_as::<i32>(Stat::Max),
1067 Some(Precision::exact(90))
1068 );
1069 assert_eq!(
1071 stats_ref.get_as::<bool>(Stat::IsSorted),
1072 Some(Precision::exact(true))
1073 );
1074 }
1075}