1use std::fmt::Debug;
5
6use enum_iterator::{Sequence, all};
7use num_traits::CheckedAdd;
8use vortex_dtype::DType;
9use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_err, vortex_panic};
10use vortex_scalar::{Scalar, ScalarValue};
11
12use super::{
13 IsSorted, IsStrictSorted, NaNCount, NullCount, StatType, StatsProvider, UncompressedSizeInBytes,
14};
15use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
16
17#[derive(Default, Debug, Clone)]
18pub struct StatsSet {
19 values: Vec<(Stat, Precision<ScalarValue>)>,
20}
21
22impl StatsSet {
23 pub fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
29 Self { values }
30 }
31
32 pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
34 Self::new_unchecked(vec![(stat, value)])
35 }
36
37 fn reserve_full_capacity(&mut self) {
38 if self.values.capacity() < Stat::CARDINALITY {
39 self.values
40 .reserve_exact(Stat::CARDINALITY - self.values.capacity());
41 }
42 }
43
44 pub fn as_mut_typed_ref<'a, 'b>(&'a mut self, dtype: &'b DType) -> MutTypedStatsSetRef<'a, 'b> {
46 MutTypedStatsSetRef {
47 values: self,
48 dtype,
49 }
50 }
51
52 pub fn as_typed_ref<'a, 'b>(&'a self, dtype: &'b DType) -> TypedStatsSetRef<'a, 'b> {
54 TypedStatsSetRef {
55 values: self,
56 dtype,
57 }
58 }
59}
60
61impl StatsSet {
63 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
65 self.reserve_full_capacity();
66
67 if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
68 *existing = (stat, value);
69 } else {
70 self.values.push((stat, value));
71 }
72 }
73
74 pub fn clear(&mut self, stat: Stat) {
76 self.values.retain(|(s, _)| *s != stat);
77 }
78
79 pub fn retain_only(&mut self, stats: &[Stat]) {
81 self.values.retain(|(s, _)| stats.contains(s));
82 }
83
84 pub fn keep_inexact_stats(self, inexact_keep: &[Stat]) -> Self {
86 self.values
87 .into_iter()
88 .filter_map(|(s, v)| inexact_keep.contains(&s).then(|| (s, v.into_inexact())))
89 .collect()
90 }
91
92 pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
96 self.values.iter()
97 }
98
99 pub fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
101 self.values
102 .iter()
103 .find(|(s, _)| *s == stat)
104 .map(|(_, v)| v.clone())
105 }
106
107 pub fn len(&self) -> usize {
109 self.values.len()
110 }
111
112 pub fn is_empty(&self) -> bool {
114 self.values.is_empty()
115 }
116
117 pub fn get_as<T: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
119 &self,
120 stat: Stat,
121 dtype: &DType,
122 ) -> Option<Precision<T>> {
123 self.get(stat).map(|v| {
124 v.map(|v| {
125 T::try_from(&Scalar::new(dtype.clone(), v)).unwrap_or_else(|err| {
126 vortex_panic!(
127 err,
128 "Failed to get stat {} as {}",
129 stat,
130 std::any::type_name::<T>()
131 )
132 })
133 })
134 })
135 }
136}
137
138pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
144
145impl Iterator for StatsSetIntoIter {
146 type Item = (Stat, Precision<ScalarValue>);
147
148 fn next(&mut self) -> Option<Self::Item> {
149 self.0.next()
150 }
151}
152
153impl IntoIterator for StatsSet {
154 type Item = (Stat, Precision<ScalarValue>);
155 type IntoIter = StatsSetIntoIter;
156
157 fn into_iter(self) -> Self::IntoIter {
158 StatsSetIntoIter(self.values.into_iter())
159 }
160}
161
162impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
163 fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
164 let iter = iter.into_iter();
165 let mut values = Vec::default();
166 values.reserve_exact(Stat::CARDINALITY);
167
168 let mut this = Self { values };
169 this.extend(iter);
170 this
171 }
172}
173
174impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
175 #[inline]
176 fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
177 let iter = iter.into_iter();
178 self.reserve_full_capacity();
179
180 iter.for_each(|(stat, value)| self.set(stat, value));
181 }
182}
183
184impl StatsSet {
186 pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
189 self.as_mut_typed_ref(dtype)
190 .merge_ordered(&other.as_typed_ref(dtype));
191 self
192 }
193
194 pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
197 self.as_mut_typed_ref(dtype)
198 .merge_unordered(&other.as_typed_ref(dtype));
199 self
200 }
201
202 pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
204 self.as_mut_typed_ref(dtype)
205 .combine_sets(&other.as_typed_ref(dtype))
206 }
207}
208
209pub struct TypedStatsSetRef<'a, 'b> {
210 pub values: &'a StatsSet,
211 pub dtype: &'b DType,
212}
213
214impl StatsProvider for TypedStatsSetRef<'_, '_> {
215 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
216 self.values.get(stat).map(|p| {
217 p.map(|sv| {
218 Scalar::new(
219 stat.dtype(self.dtype)
220 .vortex_expect("Must have valid dtype if value is present"),
221 sv,
222 )
223 })
224 })
225 }
226
227 fn len(&self) -> usize {
228 self.values.len()
229 }
230}
231
232pub struct MutTypedStatsSetRef<'a, 'b> {
233 pub values: &'a mut StatsSet,
234 pub dtype: &'b DType,
235}
236
237impl MutTypedStatsSetRef<'_, '_> {
238 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
240 self.values.set(stat, value);
241 }
242
243 pub fn clear(&mut self, stat: Stat) {
245 self.values.clear(stat);
246 }
247}
248
249impl StatsProvider for MutTypedStatsSetRef<'_, '_> {
250 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
251 self.values.get(stat).map(|p| {
252 p.map(|sv| {
253 Scalar::new(
254 stat.dtype(self.dtype)
255 .vortex_expect("Must have valid dtype if value is present"),
256 sv,
257 )
258 })
259 })
260 }
261
262 fn len(&self) -> usize {
263 self.values.len()
264 }
265}
266
267impl MutTypedStatsSetRef<'_, '_> {
269 pub fn merge_ordered(mut self, other: &TypedStatsSetRef) -> Self {
272 for s in all::<Stat>() {
273 match s {
274 Stat::IsConstant => self.merge_is_constant(other),
275 Stat::IsSorted => self.merge_is_sorted(other),
276 Stat::IsStrictSorted => self.merge_is_strict_sorted(other),
277 Stat::Max => self.merge_max(other),
278 Stat::Min => self.merge_min(other),
279 Stat::Sum => self.merge_sum(other),
280 Stat::NullCount => self.merge_null_count(other),
281 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
282 Stat::NaNCount => self.merge_nan_count(other),
283 }
284 }
285
286 self
287 }
288
289 pub fn merge_unordered(mut self, other: &TypedStatsSetRef) -> Self {
292 for s in all::<Stat>() {
293 if !s.is_commutative() {
294 self.clear(s);
295 continue;
296 }
297
298 match s {
299 Stat::IsConstant => self.merge_is_constant(other),
300 Stat::Max => self.merge_max(other),
301 Stat::Min => self.merge_min(other),
302 Stat::Sum => self.merge_sum(other),
303 Stat::NullCount => self.merge_null_count(other),
304 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
305 Stat::IsSorted | Stat::IsStrictSorted => {
306 unreachable!("not commutative")
307 }
308 Stat::NaNCount => self.merge_nan_count(other),
309 }
310 }
311
312 self
313 }
314
315 pub fn combine_sets(&mut self, other: &TypedStatsSetRef) -> VortexResult<()> {
317 let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
318 for s in other_stats {
319 match s {
320 Stat::Max => self.combine_bound::<Max>(other)?,
321 Stat::Min => self.combine_bound::<Min>(other)?,
322 Stat::UncompressedSizeInBytes => {
323 self.combine_bound::<UncompressedSizeInBytes>(other)?
324 }
325 Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
326 Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
327 Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
328 Stat::NullCount => self.combine_bound::<NullCount>(other)?,
329 Stat::Sum => self.combine_bound::<Sum>(other)?,
330 Stat::NaNCount => self.combine_bound::<NaNCount>(other)?,
331 }
332 }
333 Ok(())
334 }
335
336 fn combine_bound<S: StatType<Scalar>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
337 where
338 S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
339 {
340 match (self.get_scalar_bound::<S>(), other.get_scalar_bound::<S>()) {
341 (Some(m1), Some(m2)) => {
342 let meet = m1
343 .intersection(&m2)
344 .vortex_expect("can always compare scalar")
345 .ok_or_else(|| {
346 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
347 })?;
348 if meet != m1 {
349 self.set(S::STAT, meet.into_value().map(Scalar::into_value));
350 }
351 }
352 (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
353 (Some(_), _) => (),
354 (None, None) => self.clear(S::STAT),
355 }
356 Ok(())
357 }
358
359 fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
360 where
361 S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
362 {
363 match (
364 self.get_as_bound::<S, bool>(),
365 other.get_as_bound::<S, bool>(),
366 ) {
367 (Some(m1), Some(m2)) => {
368 let intersection = m1
369 .intersection(&m2)
370 .vortex_expect("can always compare boolean")
371 .ok_or_else(|| {
372 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
373 })?;
374 if intersection != m1 {
375 self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
376 }
377 }
378 (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
379 (Some(_), None) => (),
380 (None, None) => self.clear(S::STAT),
381 }
382 Ok(())
383 }
384
385 fn merge_min(&mut self, other: &TypedStatsSetRef) {
386 match (
387 self.get_scalar_bound::<Min>(),
388 other.get_scalar_bound::<Min>(),
389 ) {
390 (Some(m1), Some(m2)) => {
391 let meet = m1.union(&m2).vortex_expect("can compare scalar");
392 if meet != m1 {
393 self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
394 }
395 }
396 _ => self.clear(Stat::Min),
397 }
398 }
399
400 fn merge_max(&mut self, other: &TypedStatsSetRef) {
401 match (
402 self.get_scalar_bound::<Max>(),
403 other.get_scalar_bound::<Max>(),
404 ) {
405 (Some(m1), Some(m2)) => {
406 let meet = m1.union(&m2).vortex_expect("can compare scalar");
407 if meet != m1 {
408 self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
409 }
410 }
411 _ => self.clear(Stat::Max),
412 }
413 }
414
415 fn merge_sum(&mut self, other: &TypedStatsSetRef) {
416 match (
417 self.get_scalar_bound::<Sum>(),
418 other.get_scalar_bound::<Sum>(),
419 ) {
420 (Some(m1), Some(m2)) => {
421 if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
423 s1.as_primitive()
424 .checked_add(&s2.as_primitive())
425 .map(|pscalar| {
426 pscalar
427 .pvalue()
428 .map(|pvalue| {
429 Scalar::primitive_value(
430 pvalue,
431 pscalar.ptype(),
432 pscalar.dtype().nullability(),
433 )
434 .into_value()
435 })
436 .unwrap_or_else(ScalarValue::null)
437 })
438 }) {
439 self.set(Stat::Sum, Precision::Exact(scalar_value));
440 }
441 }
442 _ => self.clear(Stat::Sum),
443 }
444 }
445
446 fn merge_is_constant(&mut self, other: &TypedStatsSetRef) {
447 let self_const = self.get_as(Stat::IsConstant);
448 let other_const = other.get_as(Stat::IsConstant);
449 let self_min = self.get(Stat::Min);
450 let other_min = other.get(Stat::Min);
451
452 if let (
453 Some(Precision::Exact(self_const)),
454 Some(Precision::Exact(other_const)),
455 Some(Precision::Exact(self_min)),
456 Some(Precision::Exact(other_min)),
457 ) = (self_const, other_const, self_min, other_min)
458 {
459 if self_const && other_const && self_min == other_min {
460 self.set(Stat::IsConstant, Precision::exact(true));
461 } else {
462 self.set(Stat::IsConstant, Precision::inexact(false));
463 }
464 }
465 self.set(Stat::IsConstant, Precision::exact(false));
466 }
467
468 fn merge_is_sorted(&mut self, other: &TypedStatsSetRef) {
469 self.merge_sortedness_stat(other, Stat::IsSorted, PartialOrd::le)
470 }
471
472 fn merge_is_strict_sorted(&mut self, other: &TypedStatsSetRef) {
473 self.merge_sortedness_stat(other, Stat::IsStrictSorted, PartialOrd::lt)
474 }
475
476 fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
477 &mut self,
478 other: &TypedStatsSetRef,
479 stat: Stat,
480 cmp: F,
481 ) {
482 if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
483 == (self.get_as(stat), other.get_as(stat))
484 {
485 if let (Some(self_max), Some(other_min)) = (
490 self.get_scalar_bound::<Max>(),
491 other.get_scalar_bound::<Min>(),
492 ) {
493 return if cmp(&self_max.max_value(), &other_min.min_value()) {
494 } else {
496 self.set(stat, Precision::inexact(false));
497 };
498 }
499 }
500 self.clear(stat);
501 }
502
503 fn merge_null_count(&mut self, other: &TypedStatsSetRef) {
504 self.merge_sum_stat(Stat::NullCount, other)
505 }
506
507 fn merge_nan_count(&mut self, other: &TypedStatsSetRef) {
508 self.merge_sum_stat(Stat::NaNCount, other)
509 }
510
511 fn merge_uncompressed_size_in_bytes(&mut self, other: &TypedStatsSetRef) {
512 self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
513 }
514
515 fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) {
516 match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
517 (Some(nc1), Some(nc2)) => {
518 self.set(
519 stat,
520 nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
521 );
522 }
523 _ => self.clear(stat),
524 }
525 }
526}
527
528#[cfg(test)]
529mod test {
530 use enum_iterator::all;
531 use itertools::Itertools;
532 use vortex_dtype::{DType, Nullability, PType};
533
534 use crate::arrays::PrimitiveArray;
535 use crate::stats::stats_set::Scalar;
536 use crate::stats::{IsConstant, Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
537
538 #[test]
539 fn test_iter() {
540 let set = StatsSet::new_unchecked(vec![
541 (Stat::Max, Precision::exact(100)),
542 (Stat::Min, Precision::exact(42)),
543 ]);
544 let mut iter = set.iter();
545 let first = iter.next().unwrap().clone();
546 assert_eq!(first.0, Stat::Max);
547 assert_eq!(
548 first
549 .1
550 .map(|f| i32::try_from(&Scalar::new(PType::I32.into(), f)).unwrap()),
551 Precision::exact(100)
552 );
553 let snd = iter.next().unwrap().clone();
554 assert_eq!(snd.0, Stat::Min);
555 assert_eq!(
556 snd.1
557 .map(|s| i32::try_from(&Scalar::new(PType::I32.into(), s)).unwrap()),
558 42
559 );
560 }
561
562 #[test]
563 fn into_iter() {
564 let mut set = StatsSet::new_unchecked(vec![
565 (Stat::Max, Precision::exact(100)),
566 (Stat::Min, Precision::exact(42)),
567 ])
568 .into_iter();
569 let (stat, first) = set.next().unwrap();
570 assert_eq!(stat, Stat::Max);
571 assert_eq!(
572 first.map(|f| i32::try_from(&Scalar::new(PType::I32.into(), f)).unwrap()),
573 Precision::exact(100)
574 );
575 let snd = set.next().unwrap();
576 assert_eq!(snd.0, Stat::Min);
577 assert_eq!(
578 snd.1
579 .map(|s| i32::try_from(&Scalar::new(PType::I32.into(), s)).unwrap()),
580 Precision::exact(42)
581 );
582 }
583
584 #[test]
585 fn merge_constant() {
586 let first = StatsSet::from_iter([
587 (Stat::Min, Precision::exact(42)),
588 (Stat::IsConstant, Precision::exact(true)),
589 ])
590 .merge_ordered(
591 &StatsSet::from_iter([
592 (Stat::Min, Precision::inexact(42)),
593 (Stat::IsConstant, Precision::exact(true)),
594 ]),
595 &DType::Primitive(PType::I32, Nullability::NonNullable),
596 );
597
598 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
599 assert_eq!(
600 first_ref.get_as::<bool>(Stat::IsConstant),
601 Some(Precision::exact(false))
602 );
603 assert_eq!(
604 first_ref.get_as::<i32>(Stat::Min),
605 Some(Precision::exact(42))
606 );
607 }
608
609 #[test]
610 fn merge_into_min() {
611 let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
612 &StatsSet::default(),
613 &DType::Primitive(PType::I32, Nullability::NonNullable),
614 );
615
616 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
617 assert!(first_ref.get(Stat::Min).is_none());
618 }
619
620 #[test]
621 fn merge_from_min() {
622 let first = StatsSet::default().merge_ordered(
623 &StatsSet::of(Stat::Min, Precision::exact(42)),
624 &DType::Primitive(PType::I32, Nullability::NonNullable),
625 );
626
627 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
628 assert!(first_ref.get(Stat::Min).is_none());
629 }
630
631 #[test]
632 fn merge_mins() {
633 let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
634 &StatsSet::of(Stat::Min, Precision::exact(42)),
635 &DType::Primitive(PType::I32, Nullability::NonNullable),
636 );
637
638 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
639 assert_eq!(
640 first_ref.get_as::<i32>(Stat::Min),
641 Some(Precision::exact(37))
642 );
643 }
644
645 #[test]
646 fn merge_into_bound_max() {
647 let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
648 &StatsSet::default(),
649 &DType::Primitive(PType::I32, Nullability::NonNullable),
650 );
651 assert!(first.get(Stat::Max).is_none());
652 }
653
654 #[test]
655 fn merge_from_max() {
656 let first = StatsSet::default().merge_ordered(
657 &StatsSet::of(Stat::Max, Precision::exact(42)),
658 &DType::Primitive(PType::I32, Nullability::NonNullable),
659 );
660 assert!(first.get(Stat::Max).is_none());
661 }
662
663 #[test]
664 fn merge_maxes() {
665 let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
666 &StatsSet::of(Stat::Max, Precision::exact(42)),
667 &DType::Primitive(PType::I32, Nullability::NonNullable),
668 );
669 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
670 assert_eq!(
671 first_ref.get_as::<i32>(Stat::Max),
672 Some(Precision::exact(42))
673 );
674 }
675
676 #[test]
677 fn merge_maxes_bound() {
678 let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
679 let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
680 .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
681 let first_ref = first.as_typed_ref(&dtype);
682 assert_eq!(
683 first_ref.get_as::<i32>(Stat::Max),
684 Some(Precision::inexact(43))
685 );
686 }
687
688 #[test]
689 fn merge_into_scalar() {
690 let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
691 &StatsSet::default(),
692 &DType::Primitive(PType::I32, Nullability::NonNullable),
693 );
694 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
695 assert!(first_ref.get(Stat::Sum).is_none());
696 }
697
698 #[test]
699 fn merge_from_scalar() {
700 let first = StatsSet::default().merge_ordered(
701 &StatsSet::of(Stat::Sum, Precision::exact(42)),
702 &DType::Primitive(PType::I32, Nullability::NonNullable),
703 );
704 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
705 assert!(first_ref.get(Stat::Sum).is_none());
706 }
707
708 #[test]
709 fn merge_scalars() {
710 let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
711 &StatsSet::of(Stat::Sum, Precision::exact(42)),
712 &DType::Primitive(PType::I32, Nullability::NonNullable),
713 );
714 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
715 assert_eq!(
716 first_ref.get_as::<usize>(Stat::Sum),
717 Some(Precision::exact(79usize))
718 );
719 }
720
721 #[test]
722 fn merge_into_sortedness() {
723 let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
724 &StatsSet::default(),
725 &DType::Primitive(PType::I32, Nullability::NonNullable),
726 );
727 assert!(first.get(Stat::IsStrictSorted).is_none());
728 }
729
730 #[test]
731 fn merge_from_sortedness() {
732 let first = StatsSet::default().merge_ordered(
733 &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
734 &DType::Primitive(PType::I32, Nullability::NonNullable),
735 );
736 assert!(first.get(Stat::IsStrictSorted).is_none());
737 }
738
739 #[test]
740 fn merge_sortedness() {
741 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
742 first.set(Stat::Max, Precision::exact(1));
743 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
744 second.set(Stat::Min, Precision::exact(2));
745 first = first.merge_ordered(
746 &second,
747 &DType::Primitive(PType::I32, Nullability::NonNullable),
748 );
749
750 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
751 assert_eq!(
752 first_ref.get_as::<bool>(Stat::IsStrictSorted),
753 Some(Precision::exact(true))
754 );
755 }
756
757 #[test]
758 fn merge_sortedness_out_of_order() {
759 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
760 first.set(Stat::Min, Precision::exact(1));
761 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
762 second.set(Stat::Max, Precision::exact(2));
763 second = second.merge_ordered(
764 &first,
765 &DType::Primitive(PType::I32, Nullability::NonNullable),
766 );
767
768 let second_ref =
769 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
770 assert_eq!(
771 second_ref.get_as::<bool>(Stat::IsStrictSorted),
772 Some(Precision::inexact(false))
773 );
774 }
775
776 #[test]
777 fn merge_sortedness_only_one_sorted() {
778 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
779 first.set(Stat::Max, Precision::exact(1));
780 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
781 second.set(Stat::Min, Precision::exact(2));
782 first.merge_ordered(
783 &second,
784 &DType::Primitive(PType::I32, Nullability::NonNullable),
785 );
786
787 let second_ref =
788 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
789 assert_eq!(
790 second_ref.get_as::<bool>(Stat::IsStrictSorted),
791 Some(Precision::exact(false))
792 );
793 }
794
795 #[test]
796 fn merge_sortedness_missing_min() {
797 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
798 first.set(Stat::Max, Precision::exact(1));
799 let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
800 first = first.merge_ordered(
801 &second,
802 &DType::Primitive(PType::I32, Nullability::NonNullable),
803 );
804 assert!(first.get(Stat::IsStrictSorted).is_none());
805 }
806
807 #[test]
808 fn merge_sortedness_bound_min() {
809 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
810 first.set(Stat::Max, Precision::exact(1));
811 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
812 second.set(Stat::Min, Precision::inexact(2));
813 first = first.merge_ordered(
814 &second,
815 &DType::Primitive(PType::I32, Nullability::NonNullable),
816 );
817
818 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
819 assert_eq!(
820 first_ref.get_as::<bool>(Stat::IsStrictSorted),
821 Some(Precision::exact(true))
822 );
823 }
824
825 #[test]
826 fn merge_unordered() {
827 let array =
828 PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
829 let all_stats = all::<Stat>()
830 .filter(|s| !matches!(s, Stat::Sum))
831 .filter(|s| !matches!(s, Stat::NaNCount))
832 .collect_vec();
833 array.statistics().compute_all(&all_stats).unwrap();
834
835 let stats = array.statistics().to_owned();
836 for stat in &all_stats {
837 assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
838 }
839
840 let merged = stats.clone().merge_unordered(
841 &stats,
842 &DType::Primitive(PType::I32, Nullability::NonNullable),
843 );
844 for stat in &all_stats {
845 assert_eq!(
846 merged.get(*stat).is_some(),
847 stat.is_commutative(),
848 "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
849 )
850 }
851
852 let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
853 let stats_ref = stats.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
854
855 assert_eq!(
856 merged_ref.get_as::<i32>(Stat::Min),
857 stats_ref.get_as::<i32>(Stat::Min)
858 );
859 assert_eq!(
860 merged_ref.get_as::<i32>(Stat::Max),
861 stats_ref.get_as::<i32>(Stat::Max)
862 );
863 assert_eq!(
864 merged_ref.get_as::<u64>(Stat::NullCount).unwrap(),
865 stats_ref
866 .get_as::<u64>(Stat::NullCount)
867 .unwrap()
868 .map(|s| s * 2)
869 );
870 }
871
872 #[test]
873 fn merge_min_bound_same() {
874 let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
877 &StatsSet::of(Stat::Min, Precision::exact(5)),
878 &DType::Primitive(PType::I32, Nullability::NonNullable),
879 );
880 let merged_ref =
881 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
882 assert_eq!(
883 merged_ref.get_as::<i32>(Stat::Min),
884 Some(Precision::exact(5))
885 );
886 }
887
888 #[test]
889 fn merge_min_bound_bound_lower() {
890 let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
891 &StatsSet::of(Stat::Min, Precision::exact(5)),
892 &DType::Primitive(PType::I32, Nullability::NonNullable),
893 );
894 let merged_ref =
895 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
896 assert_eq!(
897 merged_ref.get_as::<i32>(Stat::Min),
898 Some(Precision::inexact(4))
899 );
900 }
901
902 #[test]
903 fn retain_approx() {
904 let set = StatsSet::from_iter([
905 (Stat::Max, Precision::exact(100)),
906 (Stat::Min, Precision::exact(50)),
907 (Stat::Sum, Precision::inexact(10)),
908 ]);
909
910 let set = set.keep_inexact_stats(&[Stat::Min, Stat::Max]);
911
912 let set_ref = set.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
913
914 assert_eq!(set.len(), 2);
915 assert_eq!(
916 set_ref.get_as::<i32>(Stat::Max),
917 Some(Precision::inexact(100))
918 );
919 assert_eq!(
920 set_ref.get_as::<i32>(Stat::Min),
921 Some(Precision::inexact(50))
922 );
923 assert_eq!(set_ref.get_as::<i32>(Stat::Sum), None);
924 }
925
926 #[test]
927 fn test_combine_is_constant() {
928 {
929 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
930 let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
931 let mut stats_ref =
932 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
933 stats_ref
934 .combine_bool_stat::<IsConstant>(
935 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
936 )
937 .unwrap();
938 assert_eq!(
939 stats_ref.get_as::<bool>(Stat::IsConstant),
940 Some(Precision::exact(true))
941 );
942 }
943
944 {
945 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
946 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
947 let mut stats_ref =
948 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
949 stats_ref
950 .combine_bool_stat::<IsConstant>(
951 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
952 )
953 .unwrap();
954 assert_eq!(
955 stats_ref.get_as::<bool>(Stat::IsConstant),
956 Some(Precision::exact(true))
957 );
958 }
959
960 {
961 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
962 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
963 let mut stats_ref =
964 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
965 stats_ref
966 .combine_bool_stat::<IsConstant>(
967 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
968 )
969 .unwrap();
970 assert_eq!(
971 stats_ref.get_as::<bool>(Stat::IsConstant),
972 Some(Precision::exact(false))
973 );
974 }
975 }
976
977 #[test]
978 fn test_combine_sets_boolean_conflict() {
979 let mut stats1 = StatsSet::from_iter([
980 (Stat::IsConstant, Precision::exact(true)),
981 (Stat::IsSorted, Precision::exact(true)),
982 ]);
983
984 let stats2 = StatsSet::from_iter([
985 (Stat::IsConstant, Precision::exact(false)),
986 (Stat::IsSorted, Precision::exact(true)),
987 ]);
988
989 let result = stats1.combine_sets(
990 &stats2,
991 &DType::Primitive(PType::I32, Nullability::NonNullable),
992 );
993 assert!(result.is_err());
994 }
995
996 #[test]
997 fn test_combine_sets_with_missing_stats() {
998 let mut stats1 = StatsSet::from_iter([
999 (Stat::Min, Precision::exact(42)),
1000 (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
1001 ]);
1002
1003 let stats2 = StatsSet::from_iter([
1004 (Stat::Max, Precision::exact(100)),
1005 (Stat::IsStrictSorted, Precision::exact(true)),
1006 ]);
1007
1008 stats1
1009 .combine_sets(
1010 &stats2,
1011 &DType::Primitive(PType::I32, Nullability::NonNullable),
1012 )
1013 .unwrap();
1014
1015 let stats_ref =
1016 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1017
1018 assert_eq!(
1020 stats_ref.get_as::<i32>(Stat::Min),
1021 Some(Precision::exact(42))
1022 );
1023 assert_eq!(
1025 stats_ref.get_as::<i32>(Stat::Max),
1026 Some(Precision::exact(100))
1027 );
1028 assert_eq!(
1030 stats_ref.get_as::<bool>(Stat::IsStrictSorted),
1031 Some(Precision::exact(true))
1032 );
1033 }
1034
1035 #[test]
1036 fn test_combine_sets_with_inexact() {
1037 let mut stats1 = StatsSet::from_iter([
1038 (Stat::Min, Precision::exact(42)),
1039 (Stat::Max, Precision::inexact(100)),
1040 (Stat::IsConstant, Precision::exact(false)),
1041 ]);
1042
1043 let stats2 = StatsSet::from_iter([
1044 (Stat::Min, Precision::inexact(40)),
1046 (Stat::Max, Precision::exact(90)),
1047 (Stat::IsSorted, Precision::exact(true)),
1048 ]);
1049
1050 stats1
1051 .combine_sets(
1052 &stats2,
1053 &DType::Primitive(PType::I32, Nullability::NonNullable),
1054 )
1055 .unwrap();
1056
1057 let stats_ref =
1058 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1059
1060 assert_eq!(
1062 stats_ref.get_as::<i32>(Stat::Min),
1063 Some(Precision::exact(42))
1064 );
1065 assert_eq!(
1067 stats_ref.get_as::<i32>(Stat::Max),
1068 Some(Precision::exact(90))
1069 );
1070 assert_eq!(
1072 stats_ref.get_as::<bool>(Stat::IsSorted),
1073 Some(Precision::exact(true))
1074 );
1075 }
1076}