1use std::fmt::Debug;
5
6use enum_iterator::all;
7use num_traits::CheckedAdd;
8use smallvec::SmallVec;
9use smallvec::smallvec;
10use vortex_error::VortexError;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_err;
14use vortex_error::vortex_panic;
15
16use crate::dtype::DType;
17use crate::expr::stats::IsConstant;
18use crate::expr::stats::IsSorted;
19use crate::expr::stats::IsStrictSorted;
20use crate::expr::stats::Max;
21use crate::expr::stats::Min;
22use crate::expr::stats::NaNCount;
23use crate::expr::stats::NullCount;
24use crate::expr::stats::Precision;
25use crate::expr::stats::Stat;
26use crate::expr::stats::StatBound;
27use crate::expr::stats::StatType;
28use crate::expr::stats::StatsProvider;
29use crate::expr::stats::StatsProviderExt;
30use crate::expr::stats::Sum;
31use crate::expr::stats::UncompressedSizeInBytes;
32use crate::scalar::Scalar;
33use crate::scalar::ScalarValue;
34
35pub type StatsArray = [(Stat, Precision<ScalarValue>); 4];
37
38#[derive(Default, Debug, Clone)]
39pub struct StatsSet {
40 values: SmallVec<StatsArray>,
41}
42
43impl StatsSet {
44 pub unsafe fn new_unchecked(values: SmallVec<StatsArray>) -> Self {
50 Self { values }
51 }
52
53 pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
55 Self {
56 values: smallvec![(stat, value)],
57 }
58 }
59
60 pub fn as_mut_typed_ref<'a, 'b>(&'a mut self, dtype: &'b DType) -> MutTypedStatsSetRef<'a, 'b> {
62 MutTypedStatsSetRef {
63 values: self,
64 dtype,
65 }
66 }
67
68 pub fn as_typed_ref<'a, 'b>(&'a self, dtype: &'b DType) -> TypedStatsSetRef<'a, 'b> {
70 TypedStatsSetRef {
71 values: self,
72 dtype,
73 }
74 }
75}
76
77impl StatsSet {
79 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
81 if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
82 *existing = (stat, value);
83 } else {
84 self.values.push((stat, value));
85 }
86 }
87
88 pub fn clear(&mut self, stat: Stat) {
90 self.values.retain(|(s, _)| *s != stat);
91 }
92
93 pub fn retain_only(&mut self, stats: &[Stat]) {
95 self.values.retain(|(s, _)| stats.contains(s));
96 }
97
98 pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
102 self.values.iter()
103 }
104
105 pub fn get(&self, stat: Stat) -> Precision<ScalarValue> {
107 self.values
108 .iter()
109 .find(|(s, _)| *s == stat)
110 .map(|(_, v)| v.clone())
111 .unwrap_or(Precision::Absent)
112 }
113
114 pub fn len(&self) -> usize {
116 self.values.len()
117 }
118
119 pub fn is_empty(&self) -> bool {
121 self.values.is_empty()
122 }
123
124 pub fn get_as<T: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
126 &self,
127 stat: Stat,
128 dtype: &DType,
129 ) -> Precision<T> {
130 self.get(stat).map(|v| {
131 T::try_from(
132 &Scalar::try_new(dtype.clone(), Some(v))
133 .vortex_expect("failed to construct a scalar statistic"),
134 )
135 .unwrap_or_else(|err| {
136 vortex_panic!(
137 err,
138 "Failed to get stat {} as {}",
139 stat,
140 std::any::type_name::<T>()
141 )
142 })
143 })
144 }
145}
146
147pub struct StatsSetIntoIter(smallvec::IntoIter<StatsArray>);
153
154impl Iterator for StatsSetIntoIter {
155 type Item = (Stat, Precision<ScalarValue>);
156
157 fn next(&mut self) -> Option<Self::Item> {
158 self.0.next()
159 }
160}
161
162impl IntoIterator for StatsSet {
163 type Item = (Stat, Precision<ScalarValue>);
164 type IntoIter = StatsSetIntoIter;
165
166 fn into_iter(self) -> Self::IntoIter {
167 StatsSetIntoIter(self.values.into_iter())
168 }
169}
170
171impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
172 fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
173 let iter = iter.into_iter();
174
175 let mut this = Self {
176 values: SmallVec::new(),
177 };
178 this.extend(iter);
179 this
180 }
181}
182
183impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
184 #[inline]
185 fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
186 iter.into_iter()
187 .for_each(|(stat, value)| self.set(stat, value));
188 }
189}
190
191impl StatsSet {
193 pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
196 self.as_mut_typed_ref(dtype)
197 .merge_ordered(&other.as_typed_ref(dtype));
198 self
199 }
200
201 pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
204 self.as_mut_typed_ref(dtype)
205 .merge_unordered(&other.as_typed_ref(dtype));
206 self
207 }
208
209 pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
211 self.as_mut_typed_ref(dtype)
212 .combine_sets(&other.as_typed_ref(dtype))
213 }
214}
215
216pub struct TypedStatsSetRef<'a, 'b> {
217 pub values: &'a StatsSet,
218 pub dtype: &'b DType,
219}
220
221impl StatsProvider for TypedStatsSetRef<'_, '_> {
222 fn get(&self, stat: Stat) -> Precision<Scalar> {
223 self.values.get(stat).map(|sv| {
224 Scalar::try_new(
225 stat.dtype(self.dtype)
226 .vortex_expect("Must have valid dtype if value is present"),
227 Some(sv),
228 )
229 .vortex_expect("failed to construct a scalar statistic")
230 })
231 }
232
233 fn len(&self) -> usize {
234 self.values.len()
235 }
236}
237
238pub struct MutTypedStatsSetRef<'a, 'b> {
239 pub values: &'a mut StatsSet,
240 pub dtype: &'b DType,
241}
242
243impl MutTypedStatsSetRef<'_, '_> {
244 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
246 self.values.set(stat, value);
247 }
248
249 pub fn clear(&mut self, stat: Stat) {
251 self.values.clear(stat);
252 }
253}
254
255impl StatsProvider for MutTypedStatsSetRef<'_, '_> {
256 fn get(&self, stat: Stat) -> Precision<Scalar> {
257 self.values.get(stat).map(|sv| {
258 Scalar::try_new(
259 stat.dtype(self.dtype)
260 .vortex_expect("Must have valid dtype if value is present"),
261 Some(sv),
262 )
263 .vortex_expect("failed to construct a scalar statistic")
264 })
265 }
266
267 fn len(&self) -> usize {
268 self.values.len()
269 }
270}
271
272impl MutTypedStatsSetRef<'_, '_> {
274 pub fn merge_ordered(mut self, other: &TypedStatsSetRef) -> Self {
277 for s in all::<Stat>() {
278 match s {
279 Stat::IsConstant => self.merge_is_constant(other),
280 Stat::IsSorted => self.merge_is_sorted(other),
281 Stat::IsStrictSorted => self.merge_is_strict_sorted(other),
282 Stat::Max => self.merge_max(other),
283 Stat::Min => self.merge_min(other),
284 Stat::Sum => self.merge_sum(other),
285 Stat::NullCount => self.merge_null_count(other),
286 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
287 Stat::NaNCount => self.merge_nan_count(other),
288 }
289 }
290
291 self
292 }
293
294 pub fn merge_unordered(mut self, other: &TypedStatsSetRef) -> Self {
297 for s in all::<Stat>() {
298 if !s.is_commutative() {
299 self.clear(s);
300 continue;
301 }
302
303 match s {
304 Stat::IsConstant => self.merge_is_constant(other),
305 Stat::Max => self.merge_max(other),
306 Stat::Min => self.merge_min(other),
307 Stat::Sum => self.merge_sum(other),
308 Stat::NullCount => self.merge_null_count(other),
309 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
310 Stat::IsSorted | Stat::IsStrictSorted => {
311 unreachable!("not commutative")
312 }
313 Stat::NaNCount => self.merge_nan_count(other),
314 }
315 }
316
317 self
318 }
319
320 pub fn combine_sets(&mut self, other: &TypedStatsSetRef) -> VortexResult<()> {
322 let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
323 for s in other_stats {
324 match s {
325 Stat::Max => self.combine_bound::<Max>(other)?,
326 Stat::Min => self.combine_bound::<Min>(other)?,
327 Stat::UncompressedSizeInBytes => {
328 self.combine_bound::<UncompressedSizeInBytes>(other)?
329 }
330 Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
331 Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
332 Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
333 Stat::NullCount => self.combine_bound::<NullCount>(other)?,
334 Stat::Sum => self.combine_bound::<Sum>(other)?,
335 Stat::NaNCount => self.combine_bound::<NaNCount>(other)?,
336 }
337 }
338 Ok(())
339 }
340
341 fn combine_bound<S: StatType<Scalar>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
342 where
343 S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
344 {
345 match (self.get_scalar_bound::<S>(), other.get_scalar_bound::<S>()) {
346 (Some(m1), Some(m2)) => {
347 let meet = m1
348 .intersection(&m2)
349 .vortex_expect("can always compare scalar")
350 .ok_or_else(|| {
351 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
352 })?;
353 if meet != m1 {
354 self.set(
355 S::STAT,
356 meet.into_value().map(|s| {
357 s.into_value()
358 .vortex_expect("stat scalar value cannot be null")
359 }),
360 );
361 }
362 }
363 (None, Some(m)) => self.set(
364 S::STAT,
365 m.into_value().map(|s| {
366 s.into_value()
367 .vortex_expect("stat scalar value cannot be null")
368 }),
369 ),
370 (Some(_), _) => (),
371 (None, None) => self.clear(S::STAT),
372 }
373 Ok(())
374 }
375
376 fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
377 where
378 S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
379 {
380 match (
381 self.get_as_bound::<S, bool>(),
382 other.get_as_bound::<S, bool>(),
383 ) {
384 (Some(m1), Some(m2)) => {
385 let intersection = m1
386 .intersection(&m2)
387 .vortex_expect("can always compare boolean")
388 .ok_or_else(|| {
389 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
390 })?;
391 if intersection != m1 {
392 self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
393 }
394 }
395 (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
396 (Some(_), None) => (),
397 (None, None) => self.clear(S::STAT),
398 }
399 Ok(())
400 }
401
402 fn merge_min(&mut self, other: &TypedStatsSetRef) {
403 match (
404 self.get_scalar_bound::<Min>(),
405 other.get_scalar_bound::<Min>(),
406 ) {
407 (Some(m1), Some(m2)) => {
408 let meet = m1.union(&m2).vortex_expect("can compare scalar");
409 if meet != m1 {
410 self.set(
411 Stat::Min,
412 meet.into_value().map(|s| {
413 s.into_value()
414 .vortex_expect("stat scalar value cannot be null")
415 }),
416 );
417 }
418 }
419 _ => self.clear(Stat::Min),
420 }
421 }
422
423 fn merge_max(&mut self, other: &TypedStatsSetRef) {
424 match (
425 self.get_scalar_bound::<Max>(),
426 other.get_scalar_bound::<Max>(),
427 ) {
428 (Some(m1), Some(m2)) => {
429 let meet = m1.union(&m2).vortex_expect("can compare scalar");
430 if meet != m1 {
431 self.set(
432 Stat::Max,
433 meet.into_value().map(|s| {
434 s.into_value()
435 .vortex_expect("stat scalar value cannot be null")
436 }),
437 );
438 }
439 }
440 _ => self.clear(Stat::Max),
441 }
442 }
443
444 fn merge_sum(&mut self, other: &TypedStatsSetRef) {
445 match (
446 self.get_scalar_bound::<Sum>(),
447 other.get_scalar_bound::<Sum>(),
448 ) {
449 (Some(m1), Some(m2)) => {
450 if let Some(scalar_value) =
452 m1.zip(m2).as_exact().and_then(|(s1, s2)| match s1.dtype() {
453 DType::Primitive(..) => s1
454 .as_primitive()
455 .checked_add(&s2.as_primitive())
456 .and_then(|pscalar| pscalar.pvalue().map(ScalarValue::Primitive)),
457 DType::Decimal(..) => s1
458 .as_decimal()
459 .checked_binary_numeric(
460 &s2.as_decimal(),
461 crate::scalar::NumericOperator::Add,
462 )
463 .map(|scalar| {
464 ScalarValue::Decimal(
465 scalar
466 .decimal_value()
467 .vortex_expect("no decimal value in scalar"),
468 )
469 }),
470 _ => None,
471 })
472 {
473 self.set(Stat::Sum, Precision::Exact(scalar_value));
474 }
475 }
476 _ => self.clear(Stat::Sum),
477 }
478 }
479
480 fn merge_is_constant(&mut self, other: &TypedStatsSetRef) {
481 let self_const = self.get_as(Stat::IsConstant);
482 let other_const = other.get_as(Stat::IsConstant);
483 let self_min = self.get(Stat::Min);
484 let other_min = other.get(Stat::Min);
485
486 if let (Some(self_const), Some(other_const), Some(self_min), Some(other_min)) = (
487 self_const.as_exact(),
488 other_const.as_exact(),
489 self_min.as_exact(),
490 other_min.as_exact(),
491 ) {
492 if self_const && other_const && self_min == other_min {
493 self.set(Stat::IsConstant, Precision::exact(true));
494 } else {
495 self.set(Stat::IsConstant, Precision::inexact(false));
496 }
497 }
498 self.set(Stat::IsConstant, Precision::exact(false));
499 }
500
501 fn merge_is_sorted(&mut self, other: &TypedStatsSetRef) {
502 self.merge_sortedness_stat(other, Stat::IsSorted, PartialOrd::le)
503 }
504
505 fn merge_is_strict_sorted(&mut self, other: &TypedStatsSetRef) {
506 self.merge_sortedness_stat(other, Stat::IsStrictSorted, PartialOrd::lt)
507 }
508
509 fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
510 &mut self,
511 other: &TypedStatsSetRef,
512 stat: Stat,
513 cmp: F,
514 ) {
515 if (Precision::Exact(true), Precision::Exact(true))
516 == (self.get_as(stat), other.get_as(stat))
517 {
518 if let (Some(self_max), Some(other_min)) = (
523 self.get_scalar_bound::<Max>().and_then(|v| v.max_value()),
524 other.get_scalar_bound::<Min>().and_then(|v| v.min_value()),
525 ) {
526 return if cmp(&self_max, &other_min) {
527 } else {
529 self.set(stat, Precision::inexact(false));
530 };
531 }
532 }
533 self.clear(stat);
534 }
535
536 fn merge_null_count(&mut self, other: &TypedStatsSetRef) {
537 self.merge_sum_stat(Stat::NullCount, other)
538 }
539
540 fn merge_nan_count(&mut self, other: &TypedStatsSetRef) {
541 self.merge_sum_stat(Stat::NaNCount, other)
542 }
543
544 fn merge_uncompressed_size_in_bytes(&mut self, other: &TypedStatsSetRef) {
545 self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
546 }
547
548 fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) {
549 let merged = self
550 .get_as::<usize>(stat)
551 .zip(other.get_as::<usize>(stat))
552 .map(|(l, r)| ScalarValue::from(l + r));
553
554 if merged.is_absent() {
555 self.clear(stat);
556 } else {
557 self.set(stat, merged);
558 }
559 }
560}
561
562#[cfg(test)]
563mod test {
564 use enum_iterator::all;
565 use itertools::Itertools;
566 use smallvec::smallvec;
567
568 use crate::LEGACY_SESSION;
569 use crate::VortexSessionExecute;
570 use crate::arrays::PrimitiveArray;
571 use crate::dtype::DType;
572 use crate::dtype::Nullability;
573 use crate::dtype::PType;
574 use crate::expr::stats::IsConstant;
575 use crate::expr::stats::Precision;
576 use crate::expr::stats::Stat;
577 use crate::expr::stats::StatsProvider;
578 use crate::expr::stats::StatsProviderExt;
579 use crate::stats::StatsSet;
580 use crate::stats::stats_set::Scalar;
581
582 #[test]
583 fn test_iter() {
584 let set = unsafe {
586 StatsSet::new_unchecked(smallvec![
587 (Stat::Max, Precision::exact(100)),
588 (Stat::Min, Precision::exact(42)),
589 ])
590 };
591 let mut iter = set.iter();
592 let first = iter.next().unwrap().clone();
593 assert_eq!(first.0, Stat::Max);
594 assert_eq!(
595 first.1.map(
596 |f| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(f)).unwrap()).unwrap()
597 ),
598 Precision::exact(100)
599 );
600 let snd = iter.next().unwrap().clone();
601 assert_eq!(snd.0, Stat::Min);
602 assert_eq!(
603 snd.1.map(
604 |s| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(s)).unwrap()).unwrap()
605 ),
606 Precision::exact(42)
607 );
608 }
609
610 #[test]
611 fn into_iter() {
612 let mut set = unsafe {
614 StatsSet::new_unchecked(smallvec![
615 (Stat::Max, Precision::exact(100)),
616 (Stat::Min, Precision::exact(42)),
617 ])
618 }
619 .into_iter();
620 let (stat, first) = set.next().unwrap();
621 assert_eq!(stat, Stat::Max);
622 assert_eq!(
623 first.map(
624 |f| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(f)).unwrap()).unwrap()
625 ),
626 Precision::exact(100)
627 );
628 let snd = set.next().unwrap();
629 assert_eq!(snd.0, Stat::Min);
630 assert_eq!(
631 snd.1.map(
632 |s| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(s)).unwrap()).unwrap()
633 ),
634 Precision::exact(42)
635 );
636 }
637
638 #[test]
639 fn merge_constant() {
640 let first = StatsSet::from_iter([
641 (Stat::Min, Precision::exact(42)),
642 (Stat::IsConstant, Precision::exact(true)),
643 ])
644 .merge_ordered(
645 &StatsSet::from_iter([
646 (Stat::Min, Precision::inexact(42)),
647 (Stat::IsConstant, Precision::exact(true)),
648 ]),
649 &DType::Primitive(PType::I32, Nullability::NonNullable),
650 );
651
652 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
653 assert_eq!(
654 first_ref.get_as::<bool>(Stat::IsConstant),
655 Precision::exact(false)
656 );
657 assert_eq!(first_ref.get_as::<i32>(Stat::Min), Precision::exact(42));
658 }
659
660 #[test]
661 fn merge_into_min() {
662 let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
663 &StatsSet::default(),
664 &DType::Primitive(PType::I32, Nullability::NonNullable),
665 );
666
667 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
668 assert!(first_ref.get(Stat::Min).is_absent());
669 }
670
671 #[test]
672 fn merge_from_min() {
673 let first = StatsSet::default().merge_ordered(
674 &StatsSet::of(Stat::Min, Precision::exact(42)),
675 &DType::Primitive(PType::I32, Nullability::NonNullable),
676 );
677
678 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
679 assert!(first_ref.get(Stat::Min).is_absent());
680 }
681
682 #[test]
683 fn merge_mins() {
684 let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
685 &StatsSet::of(Stat::Min, Precision::exact(42)),
686 &DType::Primitive(PType::I32, Nullability::NonNullable),
687 );
688
689 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
690 assert_eq!(first_ref.get_as::<i32>(Stat::Min), Precision::exact(37));
691 }
692
693 #[test]
694 fn merge_into_bound_max() {
695 let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
696 &StatsSet::default(),
697 &DType::Primitive(PType::I32, Nullability::NonNullable),
698 );
699 assert!(first.get(Stat::Max).is_absent());
700 }
701
702 #[test]
703 fn merge_from_max() {
704 let first = StatsSet::default().merge_ordered(
705 &StatsSet::of(Stat::Max, Precision::exact(42)),
706 &DType::Primitive(PType::I32, Nullability::NonNullable),
707 );
708 assert!(first.get(Stat::Max).is_absent());
709 }
710
711 #[test]
712 fn merge_maxes() {
713 let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
714 &StatsSet::of(Stat::Max, Precision::exact(42)),
715 &DType::Primitive(PType::I32, Nullability::NonNullable),
716 );
717 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
718 assert_eq!(first_ref.get_as::<i32>(Stat::Max), Precision::exact(42));
719 }
720
721 #[test]
722 fn merge_maxes_bound() {
723 let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
724 let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
725 .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
726 let first_ref = first.as_typed_ref(&dtype);
727 assert_eq!(first_ref.get_as::<i32>(Stat::Max), Precision::inexact(43));
728 }
729
730 #[test]
731 fn merge_into_scalar() {
732 let first = StatsSet::of(Stat::Sum, Precision::exact(42i64)).merge_ordered(
735 &StatsSet::default(),
736 &DType::Primitive(PType::I32, Nullability::NonNullable),
737 );
738 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
739 assert!(first_ref.get(Stat::Sum).is_absent());
740 }
741
742 #[test]
743 fn merge_from_scalar() {
744 let first = StatsSet::default().merge_ordered(
747 &StatsSet::of(Stat::Sum, Precision::exact(42i64)),
748 &DType::Primitive(PType::I32, Nullability::NonNullable),
749 );
750 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
751 assert!(first_ref.get(Stat::Sum).is_absent());
752 }
753
754 #[test]
755 fn merge_scalars() {
756 let first = StatsSet::of(Stat::Sum, Precision::exact(37i64)).merge_ordered(
759 &StatsSet::of(Stat::Sum, Precision::exact(42i64)),
760 &DType::Primitive(PType::I32, Nullability::NonNullable),
761 );
762 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
763 assert_eq!(first_ref.get_as::<i64>(Stat::Sum), Precision::exact(79i64));
764 }
765
766 #[test]
767 fn merge_into_sortedness() {
768 let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
769 &StatsSet::default(),
770 &DType::Primitive(PType::I32, Nullability::NonNullable),
771 );
772 assert!(first.get(Stat::IsStrictSorted).is_absent());
773 }
774
775 #[test]
776 fn merge_from_sortedness() {
777 let first = StatsSet::default().merge_ordered(
778 &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
779 &DType::Primitive(PType::I32, Nullability::NonNullable),
780 );
781 assert!(first.get(Stat::IsStrictSorted).is_absent());
782 }
783
784 #[test]
785 fn merge_sortedness() {
786 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
787 first.set(Stat::Max, Precision::exact(1));
788 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
789 second.set(Stat::Min, Precision::exact(2));
790 first = first.merge_ordered(
791 &second,
792 &DType::Primitive(PType::I32, Nullability::NonNullable),
793 );
794
795 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
796 assert_eq!(
797 first_ref.get_as::<bool>(Stat::IsStrictSorted),
798 Precision::exact(true)
799 );
800 }
801
802 #[test]
803 fn merge_sortedness_out_of_order() {
804 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
805 first.set(Stat::Min, Precision::exact(1));
806 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
807 second.set(Stat::Max, Precision::exact(2));
808 second = second.merge_ordered(
809 &first,
810 &DType::Primitive(PType::I32, Nullability::NonNullable),
811 );
812
813 let second_ref =
814 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
815 assert_eq!(
816 second_ref.get_as::<bool>(Stat::IsStrictSorted),
817 Precision::inexact(false)
818 );
819 }
820
821 #[test]
822 fn merge_sortedness_only_one_sorted() {
823 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
824 first.set(Stat::Max, Precision::exact(1));
825 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
826 second.set(Stat::Min, Precision::exact(2));
827 first.merge_ordered(
828 &second,
829 &DType::Primitive(PType::I32, Nullability::NonNullable),
830 );
831
832 let second_ref =
833 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
834 assert_eq!(
835 second_ref.get_as::<bool>(Stat::IsStrictSorted),
836 Precision::exact(false)
837 );
838 }
839
840 #[test]
841 fn merge_sortedness_missing_min() {
842 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
843 first.set(Stat::Max, Precision::exact(1));
844 let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
845 first = first.merge_ordered(
846 &second,
847 &DType::Primitive(PType::I32, Nullability::NonNullable),
848 );
849 assert!(first.get(Stat::IsStrictSorted).is_absent());
850 }
851
852 #[test]
853 fn merge_sortedness_bound_min() {
854 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
855 first.set(Stat::Max, Precision::exact(1));
856 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
857 second.set(Stat::Min, Precision::inexact(2));
858 first = first.merge_ordered(
859 &second,
860 &DType::Primitive(PType::I32, Nullability::NonNullable),
861 );
862
863 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
864 assert_eq!(
865 first_ref.get_as::<bool>(Stat::IsStrictSorted),
866 Precision::exact(true)
867 );
868 }
869
870 #[test]
871 fn merge_unordered() {
872 let array =
873 PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
874 let all_stats = all::<Stat>()
875 .filter(|s| !matches!(s, Stat::Sum))
876 .filter(|s| !matches!(s, Stat::NaNCount))
877 .collect_vec();
878 array
879 .statistics()
880 .compute_all(&all_stats, &mut LEGACY_SESSION.create_execution_ctx())
881 .unwrap();
882
883 let stats = array.statistics().to_owned();
884 for stat in &all_stats {
885 assert!(!stats.get(*stat).is_absent(), "Stat {stat} is missing");
886 }
887
888 let merged = stats.clone().merge_unordered(
889 &stats,
890 &DType::Primitive(PType::I32, Nullability::NonNullable),
891 );
892 for stat in &all_stats {
893 assert_eq!(
894 !merged.get(*stat).is_absent(),
895 stat.is_commutative(),
896 "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
897 )
898 }
899
900 let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
901 let stats_ref = stats.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
902
903 assert_eq!(
904 merged_ref.get_as::<i32>(Stat::Min),
905 stats_ref.get_as::<i32>(Stat::Min)
906 );
907 assert_eq!(
908 merged_ref.get_as::<i32>(Stat::Max),
909 stats_ref.get_as::<i32>(Stat::Max)
910 );
911 assert_eq!(
912 merged_ref.get_as::<u64>(Stat::NullCount),
913 stats_ref.get_as::<u64>(Stat::NullCount).map(|s| s * 2)
914 );
915 }
916
917 #[test]
918 fn merge_min_bound_same() {
919 let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
922 &StatsSet::of(Stat::Min, Precision::exact(5)),
923 &DType::Primitive(PType::I32, Nullability::NonNullable),
924 );
925 let merged_ref =
926 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
927 assert_eq!(merged_ref.get_as::<i32>(Stat::Min), Precision::exact(5));
928 }
929
930 #[test]
931 fn merge_min_bound_bound_lower() {
932 let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
933 &StatsSet::of(Stat::Min, Precision::exact(5)),
934 &DType::Primitive(PType::I32, Nullability::NonNullable),
935 );
936 let merged_ref =
937 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
938 assert_eq!(merged_ref.get_as::<i32>(Stat::Min), Precision::inexact(4));
939 }
940
941 #[test]
942 fn test_combine_is_constant() {
943 {
944 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
945 let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
946 let mut stats_ref =
947 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
948 stats_ref
949 .combine_bool_stat::<IsConstant>(
950 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
951 )
952 .unwrap();
953 assert_eq!(
954 stats_ref.get_as::<bool>(Stat::IsConstant),
955 Precision::exact(true)
956 );
957 }
958
959 {
960 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
961 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
962 let mut stats_ref =
963 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
964 stats_ref
965 .combine_bool_stat::<IsConstant>(
966 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
967 )
968 .unwrap();
969 assert_eq!(
970 stats_ref.get_as::<bool>(Stat::IsConstant),
971 Precision::exact(true)
972 );
973 }
974
975 {
976 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
977 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
978 let mut stats_ref =
979 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
980 stats_ref
981 .combine_bool_stat::<IsConstant>(
982 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
983 )
984 .unwrap();
985 assert_eq!(
986 stats_ref.get_as::<bool>(Stat::IsConstant),
987 Precision::exact(false)
988 );
989 }
990 }
991
992 #[test]
993 fn test_combine_sets_boolean_conflict() {
994 let mut stats1 = StatsSet::from_iter([
995 (Stat::IsConstant, Precision::exact(true)),
996 (Stat::IsSorted, Precision::exact(true)),
997 ]);
998
999 let stats2 = StatsSet::from_iter([
1000 (Stat::IsConstant, Precision::exact(false)),
1001 (Stat::IsSorted, Precision::exact(true)),
1002 ]);
1003
1004 let result = stats1.combine_sets(
1005 &stats2,
1006 &DType::Primitive(PType::I32, Nullability::NonNullable),
1007 );
1008 assert!(result.is_err());
1009 }
1010
1011 #[test]
1012 fn test_combine_sets_with_missing_stats() {
1013 let mut stats1 = StatsSet::from_iter([
1014 (Stat::Min, Precision::exact(42)),
1015 (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
1016 ]);
1017
1018 let stats2 = StatsSet::from_iter([
1019 (Stat::Max, Precision::exact(100)),
1020 (Stat::IsStrictSorted, Precision::exact(true)),
1021 ]);
1022
1023 stats1
1024 .combine_sets(
1025 &stats2,
1026 &DType::Primitive(PType::I32, Nullability::NonNullable),
1027 )
1028 .unwrap();
1029
1030 let stats_ref =
1031 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1032
1033 assert_eq!(stats_ref.get_as::<i32>(Stat::Min), Precision::exact(42));
1035 assert_eq!(stats_ref.get_as::<i32>(Stat::Max), Precision::exact(100));
1037 assert_eq!(
1039 stats_ref.get_as::<bool>(Stat::IsStrictSorted),
1040 Precision::exact(true)
1041 );
1042 }
1043
1044 #[test]
1045 fn test_combine_sets_with_inexact() {
1046 let mut stats1 = StatsSet::from_iter([
1047 (Stat::Min, Precision::exact(42)),
1048 (Stat::Max, Precision::inexact(100)),
1049 (Stat::IsConstant, Precision::exact(false)),
1050 ]);
1051
1052 let stats2 = StatsSet::from_iter([
1053 (Stat::Min, Precision::inexact(40)),
1055 (Stat::Max, Precision::exact(90)),
1056 (Stat::IsSorted, Precision::exact(true)),
1057 ]);
1058
1059 stats1
1060 .combine_sets(
1061 &stats2,
1062 &DType::Primitive(PType::I32, Nullability::NonNullable),
1063 )
1064 .unwrap();
1065
1066 let stats_ref =
1067 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1068
1069 assert_eq!(stats_ref.get_as::<i32>(Stat::Min), Precision::exact(42));
1071 assert_eq!(stats_ref.get_as::<i32>(Stat::Max), Precision::exact(90));
1073 assert_eq!(
1075 stats_ref.get_as::<bool>(Stat::IsSorted),
1076 Precision::exact(true)
1077 );
1078 }
1079}