1use std::fmt::Debug;
5
6use enum_iterator::{Sequence, all};
7use num_traits::CheckedAdd;
8use vortex_dtype::DType;
9use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_err, vortex_panic};
10use vortex_scalar::{Scalar, ScalarValue};
11
12use super::{
13 IsSorted, IsStrictSorted, NaNCount, NullCount, StatType, StatsProvider, UncompressedSizeInBytes,
14};
15use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
16
17#[derive(Default, Debug, Clone)]
18pub struct StatsSet {
19 values: Vec<(Stat, Precision<ScalarValue>)>,
20}
21
22impl StatsSet {
23 pub unsafe fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
29 Self { values }
30 }
31
32 pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
34 unsafe { Self::new_unchecked(vec![(stat, value)]) }
36 }
37
38 fn reserve_full_capacity(&mut self) {
39 if self.values.capacity() < Stat::CARDINALITY {
40 self.values
41 .reserve_exact(Stat::CARDINALITY - self.values.capacity());
42 }
43 }
44
45 pub fn as_mut_typed_ref<'a, 'b>(&'a mut self, dtype: &'b DType) -> MutTypedStatsSetRef<'a, 'b> {
47 MutTypedStatsSetRef {
48 values: self,
49 dtype,
50 }
51 }
52
53 pub fn as_typed_ref<'a, 'b>(&'a self, dtype: &'b DType) -> TypedStatsSetRef<'a, 'b> {
55 TypedStatsSetRef {
56 values: self,
57 dtype,
58 }
59 }
60}
61
62impl StatsSet {
64 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
66 self.reserve_full_capacity();
67
68 if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
69 *existing = (stat, value);
70 } else {
71 self.values.push((stat, value));
72 }
73 }
74
75 pub fn clear(&mut self, stat: Stat) {
77 self.values.retain(|(s, _)| *s != stat);
78 }
79
80 pub fn retain_only(&mut self, stats: &[Stat]) {
82 self.values.retain(|(s, _)| stats.contains(s));
83 }
84
85 pub fn keep_inexact_stats(self, inexact_keep: &[Stat]) -> Self {
87 self.values
88 .into_iter()
89 .filter_map(|(s, v)| inexact_keep.contains(&s).then(|| (s, v.into_inexact())))
90 .collect()
91 }
92
93 pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
97 self.values.iter()
98 }
99
100 pub fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
102 self.values
103 .iter()
104 .find(|(s, _)| *s == stat)
105 .map(|(_, v)| v.clone())
106 }
107
108 pub fn len(&self) -> usize {
110 self.values.len()
111 }
112
113 pub fn is_empty(&self) -> bool {
115 self.values.is_empty()
116 }
117
118 pub fn get_as<T: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
120 &self,
121 stat: Stat,
122 dtype: &DType,
123 ) -> Option<Precision<T>> {
124 self.get(stat).map(|v| {
125 v.map(|v| {
126 T::try_from(&Scalar::new(dtype.clone(), v)).unwrap_or_else(|err| {
127 vortex_panic!(
128 err,
129 "Failed to get stat {} as {}",
130 stat,
131 std::any::type_name::<T>()
132 )
133 })
134 })
135 })
136 }
137}
138
139pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
145
146impl Iterator for StatsSetIntoIter {
147 type Item = (Stat, Precision<ScalarValue>);
148
149 fn next(&mut self) -> Option<Self::Item> {
150 self.0.next()
151 }
152}
153
154impl IntoIterator for StatsSet {
155 type Item = (Stat, Precision<ScalarValue>);
156 type IntoIter = StatsSetIntoIter;
157
158 fn into_iter(self) -> Self::IntoIter {
159 StatsSetIntoIter(self.values.into_iter())
160 }
161}
162
163impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
164 fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
165 let iter = iter.into_iter();
166 let mut values = Vec::default();
167 values.reserve_exact(Stat::CARDINALITY);
168
169 let mut this = Self { values };
170 this.extend(iter);
171 this
172 }
173}
174
175impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
176 #[inline]
177 fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
178 let iter = iter.into_iter();
179 self.reserve_full_capacity();
180
181 iter.for_each(|(stat, value)| self.set(stat, value));
182 }
183}
184
185impl StatsSet {
187 pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
190 self.as_mut_typed_ref(dtype)
191 .merge_ordered(&other.as_typed_ref(dtype));
192 self
193 }
194
195 pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
198 self.as_mut_typed_ref(dtype)
199 .merge_unordered(&other.as_typed_ref(dtype));
200 self
201 }
202
203 pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
205 self.as_mut_typed_ref(dtype)
206 .combine_sets(&other.as_typed_ref(dtype))
207 }
208}
209
210pub struct TypedStatsSetRef<'a, 'b> {
211 pub values: &'a StatsSet,
212 pub dtype: &'b DType,
213}
214
215impl StatsProvider for TypedStatsSetRef<'_, '_> {
216 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
217 self.values.get(stat).map(|p| {
218 p.map(|sv| {
219 Scalar::new(
220 stat.dtype(self.dtype)
221 .vortex_expect("Must have valid dtype if value is present"),
222 sv,
223 )
224 })
225 })
226 }
227
228 fn len(&self) -> usize {
229 self.values.len()
230 }
231}
232
233pub struct MutTypedStatsSetRef<'a, 'b> {
234 pub values: &'a mut StatsSet,
235 pub dtype: &'b DType,
236}
237
238impl MutTypedStatsSetRef<'_, '_> {
239 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
241 self.values.set(stat, value);
242 }
243
244 pub fn clear(&mut self, stat: Stat) {
246 self.values.clear(stat);
247 }
248}
249
250impl StatsProvider for MutTypedStatsSetRef<'_, '_> {
251 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
252 self.values.get(stat).map(|p| {
253 p.map(|sv| {
254 Scalar::new(
255 stat.dtype(self.dtype)
256 .vortex_expect("Must have valid dtype if value is present"),
257 sv,
258 )
259 })
260 })
261 }
262
263 fn len(&self) -> usize {
264 self.values.len()
265 }
266}
267
268impl MutTypedStatsSetRef<'_, '_> {
270 pub fn merge_ordered(mut self, other: &TypedStatsSetRef) -> Self {
273 for s in all::<Stat>() {
274 match s {
275 Stat::IsConstant => self.merge_is_constant(other),
276 Stat::IsSorted => self.merge_is_sorted(other),
277 Stat::IsStrictSorted => self.merge_is_strict_sorted(other),
278 Stat::Max => self.merge_max(other),
279 Stat::Min => self.merge_min(other),
280 Stat::Sum => self.merge_sum(other),
281 Stat::NullCount => self.merge_null_count(other),
282 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
283 Stat::NaNCount => self.merge_nan_count(other),
284 }
285 }
286
287 self
288 }
289
290 pub fn merge_unordered(mut self, other: &TypedStatsSetRef) -> Self {
293 for s in all::<Stat>() {
294 if !s.is_commutative() {
295 self.clear(s);
296 continue;
297 }
298
299 match s {
300 Stat::IsConstant => self.merge_is_constant(other),
301 Stat::Max => self.merge_max(other),
302 Stat::Min => self.merge_min(other),
303 Stat::Sum => self.merge_sum(other),
304 Stat::NullCount => self.merge_null_count(other),
305 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
306 Stat::IsSorted | Stat::IsStrictSorted => {
307 unreachable!("not commutative")
308 }
309 Stat::NaNCount => self.merge_nan_count(other),
310 }
311 }
312
313 self
314 }
315
316 pub fn combine_sets(&mut self, other: &TypedStatsSetRef) -> VortexResult<()> {
318 let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
319 for s in other_stats {
320 match s {
321 Stat::Max => self.combine_bound::<Max>(other)?,
322 Stat::Min => self.combine_bound::<Min>(other)?,
323 Stat::UncompressedSizeInBytes => {
324 self.combine_bound::<UncompressedSizeInBytes>(other)?
325 }
326 Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
327 Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
328 Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
329 Stat::NullCount => self.combine_bound::<NullCount>(other)?,
330 Stat::Sum => self.combine_bound::<Sum>(other)?,
331 Stat::NaNCount => self.combine_bound::<NaNCount>(other)?,
332 }
333 }
334 Ok(())
335 }
336
337 fn combine_bound<S: StatType<Scalar>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
338 where
339 S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
340 {
341 match (self.get_scalar_bound::<S>(), other.get_scalar_bound::<S>()) {
342 (Some(m1), Some(m2)) => {
343 let meet = m1
344 .intersection(&m2)
345 .vortex_expect("can always compare scalar")
346 .ok_or_else(|| {
347 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
348 })?;
349 if meet != m1 {
350 self.set(S::STAT, meet.into_value().map(Scalar::into_value));
351 }
352 }
353 (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
354 (Some(_), _) => (),
355 (None, None) => self.clear(S::STAT),
356 }
357 Ok(())
358 }
359
360 fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
361 where
362 S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
363 {
364 match (
365 self.get_as_bound::<S, bool>(),
366 other.get_as_bound::<S, bool>(),
367 ) {
368 (Some(m1), Some(m2)) => {
369 let intersection = m1
370 .intersection(&m2)
371 .vortex_expect("can always compare boolean")
372 .ok_or_else(|| {
373 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
374 })?;
375 if intersection != m1 {
376 self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
377 }
378 }
379 (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
380 (Some(_), None) => (),
381 (None, None) => self.clear(S::STAT),
382 }
383 Ok(())
384 }
385
386 fn merge_min(&mut self, other: &TypedStatsSetRef) {
387 match (
388 self.get_scalar_bound::<Min>(),
389 other.get_scalar_bound::<Min>(),
390 ) {
391 (Some(m1), Some(m2)) => {
392 let meet = m1.union(&m2).vortex_expect("can compare scalar");
393 if meet != m1 {
394 self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
395 }
396 }
397 _ => self.clear(Stat::Min),
398 }
399 }
400
401 fn merge_max(&mut self, other: &TypedStatsSetRef) {
402 match (
403 self.get_scalar_bound::<Max>(),
404 other.get_scalar_bound::<Max>(),
405 ) {
406 (Some(m1), Some(m2)) => {
407 let meet = m1.union(&m2).vortex_expect("can compare scalar");
408 if meet != m1 {
409 self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
410 }
411 }
412 _ => self.clear(Stat::Max),
413 }
414 }
415
416 fn merge_sum(&mut self, other: &TypedStatsSetRef) {
417 match (
418 self.get_scalar_bound::<Sum>(),
419 other.get_scalar_bound::<Sum>(),
420 ) {
421 (Some(m1), Some(m2)) => {
422 if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
424 s1.as_primitive()
425 .checked_add(&s2.as_primitive())
426 .map(|pscalar| {
427 pscalar
428 .pvalue()
429 .map(|pvalue| {
430 Scalar::primitive_value(
431 pvalue,
432 pscalar.ptype(),
433 pscalar.dtype().nullability(),
434 )
435 .into_value()
436 })
437 .unwrap_or_else(ScalarValue::null)
438 })
439 }) {
440 self.set(Stat::Sum, Precision::Exact(scalar_value));
441 }
442 }
443 _ => self.clear(Stat::Sum),
444 }
445 }
446
447 fn merge_is_constant(&mut self, other: &TypedStatsSetRef) {
448 let self_const = self.get_as(Stat::IsConstant);
449 let other_const = other.get_as(Stat::IsConstant);
450 let self_min = self.get(Stat::Min);
451 let other_min = other.get(Stat::Min);
452
453 if let (
454 Some(Precision::Exact(self_const)),
455 Some(Precision::Exact(other_const)),
456 Some(Precision::Exact(self_min)),
457 Some(Precision::Exact(other_min)),
458 ) = (self_const, other_const, self_min, other_min)
459 {
460 if self_const && other_const && self_min == other_min {
461 self.set(Stat::IsConstant, Precision::exact(true));
462 } else {
463 self.set(Stat::IsConstant, Precision::inexact(false));
464 }
465 }
466 self.set(Stat::IsConstant, Precision::exact(false));
467 }
468
469 fn merge_is_sorted(&mut self, other: &TypedStatsSetRef) {
470 self.merge_sortedness_stat(other, Stat::IsSorted, PartialOrd::le)
471 }
472
473 fn merge_is_strict_sorted(&mut self, other: &TypedStatsSetRef) {
474 self.merge_sortedness_stat(other, Stat::IsStrictSorted, PartialOrd::lt)
475 }
476
477 fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
478 &mut self,
479 other: &TypedStatsSetRef,
480 stat: Stat,
481 cmp: F,
482 ) {
483 if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
484 == (self.get_as(stat), other.get_as(stat))
485 {
486 if let (Some(self_max), Some(other_min)) = (
491 self.get_scalar_bound::<Max>(),
492 other.get_scalar_bound::<Min>(),
493 ) {
494 return if cmp(&self_max.max_value(), &other_min.min_value()) {
495 } else {
497 self.set(stat, Precision::inexact(false));
498 };
499 }
500 }
501 self.clear(stat);
502 }
503
504 fn merge_null_count(&mut self, other: &TypedStatsSetRef) {
505 self.merge_sum_stat(Stat::NullCount, other)
506 }
507
508 fn merge_nan_count(&mut self, other: &TypedStatsSetRef) {
509 self.merge_sum_stat(Stat::NaNCount, other)
510 }
511
512 fn merge_uncompressed_size_in_bytes(&mut self, other: &TypedStatsSetRef) {
513 self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
514 }
515
516 fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) {
517 match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
518 (Some(nc1), Some(nc2)) => {
519 self.set(
520 stat,
521 nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
522 );
523 }
524 _ => self.clear(stat),
525 }
526 }
527}
528
529#[cfg(test)]
530mod test {
531 use enum_iterator::all;
532 use itertools::Itertools;
533 use vortex_dtype::{DType, Nullability, PType};
534
535 use crate::arrays::PrimitiveArray;
536 use crate::stats::stats_set::Scalar;
537 use crate::stats::{IsConstant, Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
538
539 #[test]
540 fn test_iter() {
541 let set = unsafe {
543 StatsSet::new_unchecked(vec![
544 (Stat::Max, Precision::exact(100)),
545 (Stat::Min, Precision::exact(42)),
546 ])
547 };
548 let mut iter = set.iter();
549 let first = iter.next().unwrap().clone();
550 assert_eq!(first.0, Stat::Max);
551 assert_eq!(
552 first
553 .1
554 .map(|f| i32::try_from(&Scalar::new(PType::I32.into(), f)).unwrap()),
555 Precision::exact(100)
556 );
557 let snd = iter.next().unwrap().clone();
558 assert_eq!(snd.0, Stat::Min);
559 assert_eq!(
560 snd.1
561 .map(|s| i32::try_from(&Scalar::new(PType::I32.into(), s)).unwrap()),
562 42
563 );
564 }
565
566 #[test]
567 fn into_iter() {
568 let mut set = unsafe {
570 StatsSet::new_unchecked(vec![
571 (Stat::Max, Precision::exact(100)),
572 (Stat::Min, Precision::exact(42)),
573 ])
574 }
575 .into_iter();
576 let (stat, first) = set.next().unwrap();
577 assert_eq!(stat, Stat::Max);
578 assert_eq!(
579 first.map(|f| i32::try_from(&Scalar::new(PType::I32.into(), f)).unwrap()),
580 Precision::exact(100)
581 );
582 let snd = set.next().unwrap();
583 assert_eq!(snd.0, Stat::Min);
584 assert_eq!(
585 snd.1
586 .map(|s| i32::try_from(&Scalar::new(PType::I32.into(), s)).unwrap()),
587 Precision::exact(42)
588 );
589 }
590
591 #[test]
592 fn merge_constant() {
593 let first = StatsSet::from_iter([
594 (Stat::Min, Precision::exact(42)),
595 (Stat::IsConstant, Precision::exact(true)),
596 ])
597 .merge_ordered(
598 &StatsSet::from_iter([
599 (Stat::Min, Precision::inexact(42)),
600 (Stat::IsConstant, Precision::exact(true)),
601 ]),
602 &DType::Primitive(PType::I32, Nullability::NonNullable),
603 );
604
605 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
606 assert_eq!(
607 first_ref.get_as::<bool>(Stat::IsConstant),
608 Some(Precision::exact(false))
609 );
610 assert_eq!(
611 first_ref.get_as::<i32>(Stat::Min),
612 Some(Precision::exact(42))
613 );
614 }
615
616 #[test]
617 fn merge_into_min() {
618 let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
619 &StatsSet::default(),
620 &DType::Primitive(PType::I32, Nullability::NonNullable),
621 );
622
623 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
624 assert!(first_ref.get(Stat::Min).is_none());
625 }
626
627 #[test]
628 fn merge_from_min() {
629 let first = StatsSet::default().merge_ordered(
630 &StatsSet::of(Stat::Min, Precision::exact(42)),
631 &DType::Primitive(PType::I32, Nullability::NonNullable),
632 );
633
634 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
635 assert!(first_ref.get(Stat::Min).is_none());
636 }
637
638 #[test]
639 fn merge_mins() {
640 let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
641 &StatsSet::of(Stat::Min, Precision::exact(42)),
642 &DType::Primitive(PType::I32, Nullability::NonNullable),
643 );
644
645 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
646 assert_eq!(
647 first_ref.get_as::<i32>(Stat::Min),
648 Some(Precision::exact(37))
649 );
650 }
651
652 #[test]
653 fn merge_into_bound_max() {
654 let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
655 &StatsSet::default(),
656 &DType::Primitive(PType::I32, Nullability::NonNullable),
657 );
658 assert!(first.get(Stat::Max).is_none());
659 }
660
661 #[test]
662 fn merge_from_max() {
663 let first = StatsSet::default().merge_ordered(
664 &StatsSet::of(Stat::Max, Precision::exact(42)),
665 &DType::Primitive(PType::I32, Nullability::NonNullable),
666 );
667 assert!(first.get(Stat::Max).is_none());
668 }
669
670 #[test]
671 fn merge_maxes() {
672 let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
673 &StatsSet::of(Stat::Max, Precision::exact(42)),
674 &DType::Primitive(PType::I32, Nullability::NonNullable),
675 );
676 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
677 assert_eq!(
678 first_ref.get_as::<i32>(Stat::Max),
679 Some(Precision::exact(42))
680 );
681 }
682
683 #[test]
684 fn merge_maxes_bound() {
685 let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
686 let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
687 .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
688 let first_ref = first.as_typed_ref(&dtype);
689 assert_eq!(
690 first_ref.get_as::<i32>(Stat::Max),
691 Some(Precision::inexact(43))
692 );
693 }
694
695 #[test]
696 fn merge_into_scalar() {
697 let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
698 &StatsSet::default(),
699 &DType::Primitive(PType::I32, Nullability::NonNullable),
700 );
701 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
702 assert!(first_ref.get(Stat::Sum).is_none());
703 }
704
705 #[test]
706 fn merge_from_scalar() {
707 let first = StatsSet::default().merge_ordered(
708 &StatsSet::of(Stat::Sum, Precision::exact(42)),
709 &DType::Primitive(PType::I32, Nullability::NonNullable),
710 );
711 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
712 assert!(first_ref.get(Stat::Sum).is_none());
713 }
714
715 #[test]
716 fn merge_scalars() {
717 let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
718 &StatsSet::of(Stat::Sum, Precision::exact(42)),
719 &DType::Primitive(PType::I32, Nullability::NonNullable),
720 );
721 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
722 assert_eq!(
723 first_ref.get_as::<usize>(Stat::Sum),
724 Some(Precision::exact(79usize))
725 );
726 }
727
728 #[test]
729 fn merge_into_sortedness() {
730 let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
731 &StatsSet::default(),
732 &DType::Primitive(PType::I32, Nullability::NonNullable),
733 );
734 assert!(first.get(Stat::IsStrictSorted).is_none());
735 }
736
737 #[test]
738 fn merge_from_sortedness() {
739 let first = StatsSet::default().merge_ordered(
740 &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
741 &DType::Primitive(PType::I32, Nullability::NonNullable),
742 );
743 assert!(first.get(Stat::IsStrictSorted).is_none());
744 }
745
746 #[test]
747 fn merge_sortedness() {
748 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
749 first.set(Stat::Max, Precision::exact(1));
750 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
751 second.set(Stat::Min, Precision::exact(2));
752 first = first.merge_ordered(
753 &second,
754 &DType::Primitive(PType::I32, Nullability::NonNullable),
755 );
756
757 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
758 assert_eq!(
759 first_ref.get_as::<bool>(Stat::IsStrictSorted),
760 Some(Precision::exact(true))
761 );
762 }
763
764 #[test]
765 fn merge_sortedness_out_of_order() {
766 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
767 first.set(Stat::Min, Precision::exact(1));
768 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
769 second.set(Stat::Max, Precision::exact(2));
770 second = second.merge_ordered(
771 &first,
772 &DType::Primitive(PType::I32, Nullability::NonNullable),
773 );
774
775 let second_ref =
776 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
777 assert_eq!(
778 second_ref.get_as::<bool>(Stat::IsStrictSorted),
779 Some(Precision::inexact(false))
780 );
781 }
782
783 #[test]
784 fn merge_sortedness_only_one_sorted() {
785 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
786 first.set(Stat::Max, Precision::exact(1));
787 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
788 second.set(Stat::Min, Precision::exact(2));
789 first.merge_ordered(
790 &second,
791 &DType::Primitive(PType::I32, Nullability::NonNullable),
792 );
793
794 let second_ref =
795 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
796 assert_eq!(
797 second_ref.get_as::<bool>(Stat::IsStrictSorted),
798 Some(Precision::exact(false))
799 );
800 }
801
802 #[test]
803 fn merge_sortedness_missing_min() {
804 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
805 first.set(Stat::Max, Precision::exact(1));
806 let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
807 first = first.merge_ordered(
808 &second,
809 &DType::Primitive(PType::I32, Nullability::NonNullable),
810 );
811 assert!(first.get(Stat::IsStrictSorted).is_none());
812 }
813
814 #[test]
815 fn merge_sortedness_bound_min() {
816 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
817 first.set(Stat::Max, Precision::exact(1));
818 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
819 second.set(Stat::Min, Precision::inexact(2));
820 first = first.merge_ordered(
821 &second,
822 &DType::Primitive(PType::I32, Nullability::NonNullable),
823 );
824
825 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
826 assert_eq!(
827 first_ref.get_as::<bool>(Stat::IsStrictSorted),
828 Some(Precision::exact(true))
829 );
830 }
831
832 #[test]
833 fn merge_unordered() {
834 let array =
835 PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
836 let all_stats = all::<Stat>()
837 .filter(|s| !matches!(s, Stat::Sum))
838 .filter(|s| !matches!(s, Stat::NaNCount))
839 .collect_vec();
840 array.statistics().compute_all(&all_stats).unwrap();
841
842 let stats = array.statistics().to_owned();
843 for stat in &all_stats {
844 assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
845 }
846
847 let merged = stats.clone().merge_unordered(
848 &stats,
849 &DType::Primitive(PType::I32, Nullability::NonNullable),
850 );
851 for stat in &all_stats {
852 assert_eq!(
853 merged.get(*stat).is_some(),
854 stat.is_commutative(),
855 "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
856 )
857 }
858
859 let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
860 let stats_ref = stats.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
861
862 assert_eq!(
863 merged_ref.get_as::<i32>(Stat::Min),
864 stats_ref.get_as::<i32>(Stat::Min)
865 );
866 assert_eq!(
867 merged_ref.get_as::<i32>(Stat::Max),
868 stats_ref.get_as::<i32>(Stat::Max)
869 );
870 assert_eq!(
871 merged_ref.get_as::<u64>(Stat::NullCount).unwrap(),
872 stats_ref
873 .get_as::<u64>(Stat::NullCount)
874 .unwrap()
875 .map(|s| s * 2)
876 );
877 }
878
879 #[test]
880 fn merge_min_bound_same() {
881 let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
884 &StatsSet::of(Stat::Min, Precision::exact(5)),
885 &DType::Primitive(PType::I32, Nullability::NonNullable),
886 );
887 let merged_ref =
888 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
889 assert_eq!(
890 merged_ref.get_as::<i32>(Stat::Min),
891 Some(Precision::exact(5))
892 );
893 }
894
895 #[test]
896 fn merge_min_bound_bound_lower() {
897 let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
898 &StatsSet::of(Stat::Min, Precision::exact(5)),
899 &DType::Primitive(PType::I32, Nullability::NonNullable),
900 );
901 let merged_ref =
902 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
903 assert_eq!(
904 merged_ref.get_as::<i32>(Stat::Min),
905 Some(Precision::inexact(4))
906 );
907 }
908
909 #[test]
910 fn retain_approx() {
911 let set = StatsSet::from_iter([
912 (Stat::Max, Precision::exact(100)),
913 (Stat::Min, Precision::exact(50)),
914 (Stat::Sum, Precision::inexact(10)),
915 ]);
916
917 let set = set.keep_inexact_stats(&[Stat::Min, Stat::Max]);
918
919 let set_ref = set.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
920
921 assert_eq!(set.len(), 2);
922 assert_eq!(
923 set_ref.get_as::<i32>(Stat::Max),
924 Some(Precision::inexact(100))
925 );
926 assert_eq!(
927 set_ref.get_as::<i32>(Stat::Min),
928 Some(Precision::inexact(50))
929 );
930 assert_eq!(set_ref.get_as::<i32>(Stat::Sum), None);
931 }
932
933 #[test]
934 fn test_combine_is_constant() {
935 {
936 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
937 let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
938 let mut stats_ref =
939 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
940 stats_ref
941 .combine_bool_stat::<IsConstant>(
942 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
943 )
944 .unwrap();
945 assert_eq!(
946 stats_ref.get_as::<bool>(Stat::IsConstant),
947 Some(Precision::exact(true))
948 );
949 }
950
951 {
952 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
953 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
954 let mut stats_ref =
955 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
956 stats_ref
957 .combine_bool_stat::<IsConstant>(
958 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
959 )
960 .unwrap();
961 assert_eq!(
962 stats_ref.get_as::<bool>(Stat::IsConstant),
963 Some(Precision::exact(true))
964 );
965 }
966
967 {
968 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
969 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
970 let mut stats_ref =
971 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
972 stats_ref
973 .combine_bool_stat::<IsConstant>(
974 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
975 )
976 .unwrap();
977 assert_eq!(
978 stats_ref.get_as::<bool>(Stat::IsConstant),
979 Some(Precision::exact(false))
980 );
981 }
982 }
983
984 #[test]
985 fn test_combine_sets_boolean_conflict() {
986 let mut stats1 = StatsSet::from_iter([
987 (Stat::IsConstant, Precision::exact(true)),
988 (Stat::IsSorted, Precision::exact(true)),
989 ]);
990
991 let stats2 = StatsSet::from_iter([
992 (Stat::IsConstant, Precision::exact(false)),
993 (Stat::IsSorted, Precision::exact(true)),
994 ]);
995
996 let result = stats1.combine_sets(
997 &stats2,
998 &DType::Primitive(PType::I32, Nullability::NonNullable),
999 );
1000 assert!(result.is_err());
1001 }
1002
1003 #[test]
1004 fn test_combine_sets_with_missing_stats() {
1005 let mut stats1 = StatsSet::from_iter([
1006 (Stat::Min, Precision::exact(42)),
1007 (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
1008 ]);
1009
1010 let stats2 = StatsSet::from_iter([
1011 (Stat::Max, Precision::exact(100)),
1012 (Stat::IsStrictSorted, Precision::exact(true)),
1013 ]);
1014
1015 stats1
1016 .combine_sets(
1017 &stats2,
1018 &DType::Primitive(PType::I32, Nullability::NonNullable),
1019 )
1020 .unwrap();
1021
1022 let stats_ref =
1023 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1024
1025 assert_eq!(
1027 stats_ref.get_as::<i32>(Stat::Min),
1028 Some(Precision::exact(42))
1029 );
1030 assert_eq!(
1032 stats_ref.get_as::<i32>(Stat::Max),
1033 Some(Precision::exact(100))
1034 );
1035 assert_eq!(
1037 stats_ref.get_as::<bool>(Stat::IsStrictSorted),
1038 Some(Precision::exact(true))
1039 );
1040 }
1041
1042 #[test]
1043 fn test_combine_sets_with_inexact() {
1044 let mut stats1 = StatsSet::from_iter([
1045 (Stat::Min, Precision::exact(42)),
1046 (Stat::Max, Precision::inexact(100)),
1047 (Stat::IsConstant, Precision::exact(false)),
1048 ]);
1049
1050 let stats2 = StatsSet::from_iter([
1051 (Stat::Min, Precision::inexact(40)),
1053 (Stat::Max, Precision::exact(90)),
1054 (Stat::IsSorted, Precision::exact(true)),
1055 ]);
1056
1057 stats1
1058 .combine_sets(
1059 &stats2,
1060 &DType::Primitive(PType::I32, Nullability::NonNullable),
1061 )
1062 .unwrap();
1063
1064 let stats_ref =
1065 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1066
1067 assert_eq!(
1069 stats_ref.get_as::<i32>(Stat::Min),
1070 Some(Precision::exact(42))
1071 );
1072 assert_eq!(
1074 stats_ref.get_as::<i32>(Stat::Max),
1075 Some(Precision::exact(90))
1076 );
1077 assert_eq!(
1079 stats_ref.get_as::<bool>(Stat::IsSorted),
1080 Some(Precision::exact(true))
1081 );
1082 }
1083}