1use std::fmt::Debug;
5
6use enum_iterator::{Sequence, all};
7use num_traits::CheckedAdd;
8use vortex_dtype::DType;
9use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_err, vortex_panic};
10use vortex_scalar::{Scalar, ScalarValue};
11
12use super::{
13 IsSorted, IsStrictSorted, NaNCount, NullCount, StatType, StatsProvider, UncompressedSizeInBytes,
14};
15use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
16
17#[derive(Default, Debug, Clone)]
18pub struct StatsSet {
19 values: Vec<(Stat, Precision<ScalarValue>)>,
20}
21
22impl StatsSet {
23 pub unsafe fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
29 Self { values }
30 }
31
32 pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
34 unsafe { Self::new_unchecked(vec![(stat, value)]) }
36 }
37
38 fn reserve_full_capacity(&mut self) {
39 if self.values.capacity() < Stat::CARDINALITY {
40 self.values
41 .reserve_exact(Stat::CARDINALITY - self.values.capacity());
42 }
43 }
44
45 pub fn as_mut_typed_ref<'a, 'b>(&'a mut self, dtype: &'b DType) -> MutTypedStatsSetRef<'a, 'b> {
47 MutTypedStatsSetRef {
48 values: self,
49 dtype,
50 }
51 }
52
53 pub fn as_typed_ref<'a, 'b>(&'a self, dtype: &'b DType) -> TypedStatsSetRef<'a, 'b> {
55 TypedStatsSetRef {
56 values: self,
57 dtype,
58 }
59 }
60}
61
62impl StatsSet {
64 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
66 self.reserve_full_capacity();
67
68 if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
69 *existing = (stat, value);
70 } else {
71 self.values.push((stat, value));
72 }
73 }
74
75 pub fn clear(&mut self, stat: Stat) {
77 self.values.retain(|(s, _)| *s != stat);
78 }
79
80 pub fn retain_only(&mut self, stats: &[Stat]) {
82 self.values.retain(|(s, _)| stats.contains(s));
83 }
84
85 pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
89 self.values.iter()
90 }
91
92 pub fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
94 self.values
95 .iter()
96 .find(|(s, _)| *s == stat)
97 .map(|(_, v)| v.clone())
98 }
99
100 pub fn len(&self) -> usize {
102 self.values.len()
103 }
104
105 pub fn is_empty(&self) -> bool {
107 self.values.is_empty()
108 }
109
110 pub fn get_as<T: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
112 &self,
113 stat: Stat,
114 dtype: &DType,
115 ) -> Option<Precision<T>> {
116 self.get(stat).map(|v| {
117 v.map(|v| {
118 T::try_from(&Scalar::new(dtype.clone(), v)).unwrap_or_else(|err| {
119 vortex_panic!(
120 err,
121 "Failed to get stat {} as {}",
122 stat,
123 std::any::type_name::<T>()
124 )
125 })
126 })
127 })
128 }
129}
130
131pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
137
138impl Iterator for StatsSetIntoIter {
139 type Item = (Stat, Precision<ScalarValue>);
140
141 fn next(&mut self) -> Option<Self::Item> {
142 self.0.next()
143 }
144}
145
146impl IntoIterator for StatsSet {
147 type Item = (Stat, Precision<ScalarValue>);
148 type IntoIter = StatsSetIntoIter;
149
150 fn into_iter(self) -> Self::IntoIter {
151 StatsSetIntoIter(self.values.into_iter())
152 }
153}
154
155impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
156 fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
157 let iter = iter.into_iter();
158 let mut values = Vec::default();
159 values.reserve_exact(Stat::CARDINALITY);
160
161 let mut this = Self { values };
162 this.extend(iter);
163 this
164 }
165}
166
167impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
168 #[inline]
169 fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
170 let iter = iter.into_iter();
171 self.reserve_full_capacity();
172
173 iter.for_each(|(stat, value)| self.set(stat, value));
174 }
175}
176
177impl StatsSet {
179 pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
182 self.as_mut_typed_ref(dtype)
183 .merge_ordered(&other.as_typed_ref(dtype));
184 self
185 }
186
187 pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
190 self.as_mut_typed_ref(dtype)
191 .merge_unordered(&other.as_typed_ref(dtype));
192 self
193 }
194
195 pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
197 self.as_mut_typed_ref(dtype)
198 .combine_sets(&other.as_typed_ref(dtype))
199 }
200}
201
202pub struct TypedStatsSetRef<'a, 'b> {
203 pub values: &'a StatsSet,
204 pub dtype: &'b DType,
205}
206
207impl StatsProvider for TypedStatsSetRef<'_, '_> {
208 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
209 self.values.get(stat).map(|p| {
210 p.map(|sv| {
211 Scalar::new(
212 stat.dtype(self.dtype)
213 .vortex_expect("Must have valid dtype if value is present"),
214 sv,
215 )
216 })
217 })
218 }
219
220 fn len(&self) -> usize {
221 self.values.len()
222 }
223}
224
225pub struct MutTypedStatsSetRef<'a, 'b> {
226 pub values: &'a mut StatsSet,
227 pub dtype: &'b DType,
228}
229
230impl MutTypedStatsSetRef<'_, '_> {
231 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
233 self.values.set(stat, value);
234 }
235
236 pub fn clear(&mut self, stat: Stat) {
238 self.values.clear(stat);
239 }
240}
241
242impl StatsProvider for MutTypedStatsSetRef<'_, '_> {
243 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
244 self.values.get(stat).map(|p| {
245 p.map(|sv| {
246 Scalar::new(
247 stat.dtype(self.dtype)
248 .vortex_expect("Must have valid dtype if value is present"),
249 sv,
250 )
251 })
252 })
253 }
254
255 fn len(&self) -> usize {
256 self.values.len()
257 }
258}
259
260impl MutTypedStatsSetRef<'_, '_> {
262 pub fn merge_ordered(mut self, other: &TypedStatsSetRef) -> Self {
265 for s in all::<Stat>() {
266 match s {
267 Stat::IsConstant => self.merge_is_constant(other),
268 Stat::IsSorted => self.merge_is_sorted(other),
269 Stat::IsStrictSorted => self.merge_is_strict_sorted(other),
270 Stat::Max => self.merge_max(other),
271 Stat::Min => self.merge_min(other),
272 Stat::Sum => self.merge_sum(other),
273 Stat::NullCount => self.merge_null_count(other),
274 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
275 Stat::NaNCount => self.merge_nan_count(other),
276 }
277 }
278
279 self
280 }
281
282 pub fn merge_unordered(mut self, other: &TypedStatsSetRef) -> Self {
285 for s in all::<Stat>() {
286 if !s.is_commutative() {
287 self.clear(s);
288 continue;
289 }
290
291 match s {
292 Stat::IsConstant => self.merge_is_constant(other),
293 Stat::Max => self.merge_max(other),
294 Stat::Min => self.merge_min(other),
295 Stat::Sum => self.merge_sum(other),
296 Stat::NullCount => self.merge_null_count(other),
297 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
298 Stat::IsSorted | Stat::IsStrictSorted => {
299 unreachable!("not commutative")
300 }
301 Stat::NaNCount => self.merge_nan_count(other),
302 }
303 }
304
305 self
306 }
307
308 pub fn combine_sets(&mut self, other: &TypedStatsSetRef) -> VortexResult<()> {
310 let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
311 for s in other_stats {
312 match s {
313 Stat::Max => self.combine_bound::<Max>(other)?,
314 Stat::Min => self.combine_bound::<Min>(other)?,
315 Stat::UncompressedSizeInBytes => {
316 self.combine_bound::<UncompressedSizeInBytes>(other)?
317 }
318 Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
319 Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
320 Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
321 Stat::NullCount => self.combine_bound::<NullCount>(other)?,
322 Stat::Sum => self.combine_bound::<Sum>(other)?,
323 Stat::NaNCount => self.combine_bound::<NaNCount>(other)?,
324 }
325 }
326 Ok(())
327 }
328
329 fn combine_bound<S: StatType<Scalar>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
330 where
331 S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
332 {
333 match (self.get_scalar_bound::<S>(), other.get_scalar_bound::<S>()) {
334 (Some(m1), Some(m2)) => {
335 let meet = m1
336 .intersection(&m2)
337 .vortex_expect("can always compare scalar")
338 .ok_or_else(|| {
339 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
340 })?;
341 if meet != m1 {
342 self.set(S::STAT, meet.into_value().map(Scalar::into_value));
343 }
344 }
345 (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
346 (Some(_), _) => (),
347 (None, None) => self.clear(S::STAT),
348 }
349 Ok(())
350 }
351
352 fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
353 where
354 S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
355 {
356 match (
357 self.get_as_bound::<S, bool>(),
358 other.get_as_bound::<S, bool>(),
359 ) {
360 (Some(m1), Some(m2)) => {
361 let intersection = m1
362 .intersection(&m2)
363 .vortex_expect("can always compare boolean")
364 .ok_or_else(|| {
365 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
366 })?;
367 if intersection != m1 {
368 self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
369 }
370 }
371 (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
372 (Some(_), None) => (),
373 (None, None) => self.clear(S::STAT),
374 }
375 Ok(())
376 }
377
378 fn merge_min(&mut self, other: &TypedStatsSetRef) {
379 match (
380 self.get_scalar_bound::<Min>(),
381 other.get_scalar_bound::<Min>(),
382 ) {
383 (Some(m1), Some(m2)) => {
384 let meet = m1.union(&m2).vortex_expect("can compare scalar");
385 if meet != m1 {
386 self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
387 }
388 }
389 _ => self.clear(Stat::Min),
390 }
391 }
392
393 fn merge_max(&mut self, other: &TypedStatsSetRef) {
394 match (
395 self.get_scalar_bound::<Max>(),
396 other.get_scalar_bound::<Max>(),
397 ) {
398 (Some(m1), Some(m2)) => {
399 let meet = m1.union(&m2).vortex_expect("can compare scalar");
400 if meet != m1 {
401 self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
402 }
403 }
404 _ => self.clear(Stat::Max),
405 }
406 }
407
408 fn merge_sum(&mut self, other: &TypedStatsSetRef) {
409 match (
410 self.get_scalar_bound::<Sum>(),
411 other.get_scalar_bound::<Sum>(),
412 ) {
413 (Some(m1), Some(m2)) => {
414 if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
416 s1.as_primitive()
417 .checked_add(&s2.as_primitive())
418 .map(|pscalar| {
419 pscalar
420 .pvalue()
421 .map(|pvalue| {
422 Scalar::primitive_value(
423 pvalue,
424 pscalar.ptype(),
425 pscalar.dtype().nullability(),
426 )
427 .into_value()
428 })
429 .unwrap_or_else(ScalarValue::null)
430 })
431 }) {
432 self.set(Stat::Sum, Precision::Exact(scalar_value));
433 }
434 }
435 _ => self.clear(Stat::Sum),
436 }
437 }
438
439 fn merge_is_constant(&mut self, other: &TypedStatsSetRef) {
440 let self_const = self.get_as(Stat::IsConstant);
441 let other_const = other.get_as(Stat::IsConstant);
442 let self_min = self.get(Stat::Min);
443 let other_min = other.get(Stat::Min);
444
445 if let (
446 Some(Precision::Exact(self_const)),
447 Some(Precision::Exact(other_const)),
448 Some(Precision::Exact(self_min)),
449 Some(Precision::Exact(other_min)),
450 ) = (self_const, other_const, self_min, other_min)
451 {
452 if self_const && other_const && self_min == other_min {
453 self.set(Stat::IsConstant, Precision::exact(true));
454 } else {
455 self.set(Stat::IsConstant, Precision::inexact(false));
456 }
457 }
458 self.set(Stat::IsConstant, Precision::exact(false));
459 }
460
461 fn merge_is_sorted(&mut self, other: &TypedStatsSetRef) {
462 self.merge_sortedness_stat(other, Stat::IsSorted, PartialOrd::le)
463 }
464
465 fn merge_is_strict_sorted(&mut self, other: &TypedStatsSetRef) {
466 self.merge_sortedness_stat(other, Stat::IsStrictSorted, PartialOrd::lt)
467 }
468
469 fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
470 &mut self,
471 other: &TypedStatsSetRef,
472 stat: Stat,
473 cmp: F,
474 ) {
475 if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
476 == (self.get_as(stat), other.get_as(stat))
477 {
478 if let (Some(self_max), Some(other_min)) = (
483 self.get_scalar_bound::<Max>(),
484 other.get_scalar_bound::<Min>(),
485 ) {
486 return if cmp(&self_max.max_value(), &other_min.min_value()) {
487 } else {
489 self.set(stat, Precision::inexact(false));
490 };
491 }
492 }
493 self.clear(stat);
494 }
495
496 fn merge_null_count(&mut self, other: &TypedStatsSetRef) {
497 self.merge_sum_stat(Stat::NullCount, other)
498 }
499
500 fn merge_nan_count(&mut self, other: &TypedStatsSetRef) {
501 self.merge_sum_stat(Stat::NaNCount, other)
502 }
503
504 fn merge_uncompressed_size_in_bytes(&mut self, other: &TypedStatsSetRef) {
505 self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
506 }
507
508 fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) {
509 match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
510 (Some(nc1), Some(nc2)) => {
511 self.set(
512 stat,
513 nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
514 );
515 }
516 _ => self.clear(stat),
517 }
518 }
519}
520
521#[cfg(test)]
522mod test {
523 use enum_iterator::all;
524 use itertools::Itertools;
525 use vortex_dtype::{DType, Nullability, PType};
526
527 use crate::arrays::PrimitiveArray;
528 use crate::stats::stats_set::Scalar;
529 use crate::stats::{IsConstant, Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
530
531 #[test]
532 fn test_iter() {
533 let set = unsafe {
535 StatsSet::new_unchecked(vec![
536 (Stat::Max, Precision::exact(100)),
537 (Stat::Min, Precision::exact(42)),
538 ])
539 };
540 let mut iter = set.iter();
541 let first = iter.next().unwrap().clone();
542 assert_eq!(first.0, Stat::Max);
543 assert_eq!(
544 first
545 .1
546 .map(|f| i32::try_from(&Scalar::new(PType::I32.into(), f)).unwrap()),
547 Precision::exact(100)
548 );
549 let snd = iter.next().unwrap().clone();
550 assert_eq!(snd.0, Stat::Min);
551 assert_eq!(
552 snd.1
553 .map(|s| i32::try_from(&Scalar::new(PType::I32.into(), s)).unwrap()),
554 42
555 );
556 }
557
558 #[test]
559 fn into_iter() {
560 let mut set = unsafe {
562 StatsSet::new_unchecked(vec![
563 (Stat::Max, Precision::exact(100)),
564 (Stat::Min, Precision::exact(42)),
565 ])
566 }
567 .into_iter();
568 let (stat, first) = set.next().unwrap();
569 assert_eq!(stat, Stat::Max);
570 assert_eq!(
571 first.map(|f| i32::try_from(&Scalar::new(PType::I32.into(), f)).unwrap()),
572 Precision::exact(100)
573 );
574 let snd = set.next().unwrap();
575 assert_eq!(snd.0, Stat::Min);
576 assert_eq!(
577 snd.1
578 .map(|s| i32::try_from(&Scalar::new(PType::I32.into(), s)).unwrap()),
579 Precision::exact(42)
580 );
581 }
582
583 #[test]
584 fn merge_constant() {
585 let first = StatsSet::from_iter([
586 (Stat::Min, Precision::exact(42)),
587 (Stat::IsConstant, Precision::exact(true)),
588 ])
589 .merge_ordered(
590 &StatsSet::from_iter([
591 (Stat::Min, Precision::inexact(42)),
592 (Stat::IsConstant, Precision::exact(true)),
593 ]),
594 &DType::Primitive(PType::I32, Nullability::NonNullable),
595 );
596
597 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
598 assert_eq!(
599 first_ref.get_as::<bool>(Stat::IsConstant),
600 Some(Precision::exact(false))
601 );
602 assert_eq!(
603 first_ref.get_as::<i32>(Stat::Min),
604 Some(Precision::exact(42))
605 );
606 }
607
608 #[test]
609 fn merge_into_min() {
610 let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
611 &StatsSet::default(),
612 &DType::Primitive(PType::I32, Nullability::NonNullable),
613 );
614
615 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
616 assert!(first_ref.get(Stat::Min).is_none());
617 }
618
619 #[test]
620 fn merge_from_min() {
621 let first = StatsSet::default().merge_ordered(
622 &StatsSet::of(Stat::Min, Precision::exact(42)),
623 &DType::Primitive(PType::I32, Nullability::NonNullable),
624 );
625
626 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
627 assert!(first_ref.get(Stat::Min).is_none());
628 }
629
630 #[test]
631 fn merge_mins() {
632 let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
633 &StatsSet::of(Stat::Min, Precision::exact(42)),
634 &DType::Primitive(PType::I32, Nullability::NonNullable),
635 );
636
637 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
638 assert_eq!(
639 first_ref.get_as::<i32>(Stat::Min),
640 Some(Precision::exact(37))
641 );
642 }
643
644 #[test]
645 fn merge_into_bound_max() {
646 let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
647 &StatsSet::default(),
648 &DType::Primitive(PType::I32, Nullability::NonNullable),
649 );
650 assert!(first.get(Stat::Max).is_none());
651 }
652
653 #[test]
654 fn merge_from_max() {
655 let first = StatsSet::default().merge_ordered(
656 &StatsSet::of(Stat::Max, Precision::exact(42)),
657 &DType::Primitive(PType::I32, Nullability::NonNullable),
658 );
659 assert!(first.get(Stat::Max).is_none());
660 }
661
662 #[test]
663 fn merge_maxes() {
664 let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
665 &StatsSet::of(Stat::Max, Precision::exact(42)),
666 &DType::Primitive(PType::I32, Nullability::NonNullable),
667 );
668 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
669 assert_eq!(
670 first_ref.get_as::<i32>(Stat::Max),
671 Some(Precision::exact(42))
672 );
673 }
674
675 #[test]
676 fn merge_maxes_bound() {
677 let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
678 let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
679 .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
680 let first_ref = first.as_typed_ref(&dtype);
681 assert_eq!(
682 first_ref.get_as::<i32>(Stat::Max),
683 Some(Precision::inexact(43))
684 );
685 }
686
687 #[test]
688 fn merge_into_scalar() {
689 let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
690 &StatsSet::default(),
691 &DType::Primitive(PType::I32, Nullability::NonNullable),
692 );
693 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
694 assert!(first_ref.get(Stat::Sum).is_none());
695 }
696
697 #[test]
698 fn merge_from_scalar() {
699 let first = StatsSet::default().merge_ordered(
700 &StatsSet::of(Stat::Sum, Precision::exact(42)),
701 &DType::Primitive(PType::I32, Nullability::NonNullable),
702 );
703 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
704 assert!(first_ref.get(Stat::Sum).is_none());
705 }
706
707 #[test]
708 fn merge_scalars() {
709 let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
710 &StatsSet::of(Stat::Sum, Precision::exact(42)),
711 &DType::Primitive(PType::I32, Nullability::NonNullable),
712 );
713 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
714 assert_eq!(
715 first_ref.get_as::<usize>(Stat::Sum),
716 Some(Precision::exact(79usize))
717 );
718 }
719
720 #[test]
721 fn merge_into_sortedness() {
722 let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
723 &StatsSet::default(),
724 &DType::Primitive(PType::I32, Nullability::NonNullable),
725 );
726 assert!(first.get(Stat::IsStrictSorted).is_none());
727 }
728
729 #[test]
730 fn merge_from_sortedness() {
731 let first = StatsSet::default().merge_ordered(
732 &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
733 &DType::Primitive(PType::I32, Nullability::NonNullable),
734 );
735 assert!(first.get(Stat::IsStrictSorted).is_none());
736 }
737
738 #[test]
739 fn merge_sortedness() {
740 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
741 first.set(Stat::Max, Precision::exact(1));
742 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
743 second.set(Stat::Min, Precision::exact(2));
744 first = first.merge_ordered(
745 &second,
746 &DType::Primitive(PType::I32, Nullability::NonNullable),
747 );
748
749 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
750 assert_eq!(
751 first_ref.get_as::<bool>(Stat::IsStrictSorted),
752 Some(Precision::exact(true))
753 );
754 }
755
756 #[test]
757 fn merge_sortedness_out_of_order() {
758 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
759 first.set(Stat::Min, Precision::exact(1));
760 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
761 second.set(Stat::Max, Precision::exact(2));
762 second = second.merge_ordered(
763 &first,
764 &DType::Primitive(PType::I32, Nullability::NonNullable),
765 );
766
767 let second_ref =
768 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
769 assert_eq!(
770 second_ref.get_as::<bool>(Stat::IsStrictSorted),
771 Some(Precision::inexact(false))
772 );
773 }
774
775 #[test]
776 fn merge_sortedness_only_one_sorted() {
777 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
778 first.set(Stat::Max, Precision::exact(1));
779 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
780 second.set(Stat::Min, Precision::exact(2));
781 first.merge_ordered(
782 &second,
783 &DType::Primitive(PType::I32, Nullability::NonNullable),
784 );
785
786 let second_ref =
787 second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
788 assert_eq!(
789 second_ref.get_as::<bool>(Stat::IsStrictSorted),
790 Some(Precision::exact(false))
791 );
792 }
793
794 #[test]
795 fn merge_sortedness_missing_min() {
796 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
797 first.set(Stat::Max, Precision::exact(1));
798 let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
799 first = first.merge_ordered(
800 &second,
801 &DType::Primitive(PType::I32, Nullability::NonNullable),
802 );
803 assert!(first.get(Stat::IsStrictSorted).is_none());
804 }
805
806 #[test]
807 fn merge_sortedness_bound_min() {
808 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
809 first.set(Stat::Max, Precision::exact(1));
810 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
811 second.set(Stat::Min, Precision::inexact(2));
812 first = first.merge_ordered(
813 &second,
814 &DType::Primitive(PType::I32, Nullability::NonNullable),
815 );
816
817 let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
818 assert_eq!(
819 first_ref.get_as::<bool>(Stat::IsStrictSorted),
820 Some(Precision::exact(true))
821 );
822 }
823
824 #[test]
825 fn merge_unordered() {
826 let array =
827 PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
828 let all_stats = all::<Stat>()
829 .filter(|s| !matches!(s, Stat::Sum))
830 .filter(|s| !matches!(s, Stat::NaNCount))
831 .collect_vec();
832 array.statistics().compute_all(&all_stats).unwrap();
833
834 let stats = array.statistics().to_owned();
835 for stat in &all_stats {
836 assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
837 }
838
839 let merged = stats.clone().merge_unordered(
840 &stats,
841 &DType::Primitive(PType::I32, Nullability::NonNullable),
842 );
843 for stat in &all_stats {
844 assert_eq!(
845 merged.get(*stat).is_some(),
846 stat.is_commutative(),
847 "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
848 )
849 }
850
851 let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
852 let stats_ref = stats.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
853
854 assert_eq!(
855 merged_ref.get_as::<i32>(Stat::Min),
856 stats_ref.get_as::<i32>(Stat::Min)
857 );
858 assert_eq!(
859 merged_ref.get_as::<i32>(Stat::Max),
860 stats_ref.get_as::<i32>(Stat::Max)
861 );
862 assert_eq!(
863 merged_ref.get_as::<u64>(Stat::NullCount).unwrap(),
864 stats_ref
865 .get_as::<u64>(Stat::NullCount)
866 .unwrap()
867 .map(|s| s * 2)
868 );
869 }
870
871 #[test]
872 fn merge_min_bound_same() {
873 let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
876 &StatsSet::of(Stat::Min, Precision::exact(5)),
877 &DType::Primitive(PType::I32, Nullability::NonNullable),
878 );
879 let merged_ref =
880 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
881 assert_eq!(
882 merged_ref.get_as::<i32>(Stat::Min),
883 Some(Precision::exact(5))
884 );
885 }
886
887 #[test]
888 fn merge_min_bound_bound_lower() {
889 let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
890 &StatsSet::of(Stat::Min, Precision::exact(5)),
891 &DType::Primitive(PType::I32, Nullability::NonNullable),
892 );
893 let merged_ref =
894 merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
895 assert_eq!(
896 merged_ref.get_as::<i32>(Stat::Min),
897 Some(Precision::inexact(4))
898 );
899 }
900
901 #[test]
902 fn test_combine_is_constant() {
903 {
904 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
905 let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
906 let mut stats_ref =
907 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
908 stats_ref
909 .combine_bool_stat::<IsConstant>(
910 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
911 )
912 .unwrap();
913 assert_eq!(
914 stats_ref.get_as::<bool>(Stat::IsConstant),
915 Some(Precision::exact(true))
916 );
917 }
918
919 {
920 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
921 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
922 let mut stats_ref =
923 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
924 stats_ref
925 .combine_bool_stat::<IsConstant>(
926 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
927 )
928 .unwrap();
929 assert_eq!(
930 stats_ref.get_as::<bool>(Stat::IsConstant),
931 Some(Precision::exact(true))
932 );
933 }
934
935 {
936 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
937 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
938 let mut stats_ref =
939 stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
940 stats_ref
941 .combine_bool_stat::<IsConstant>(
942 &stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
943 )
944 .unwrap();
945 assert_eq!(
946 stats_ref.get_as::<bool>(Stat::IsConstant),
947 Some(Precision::exact(false))
948 );
949 }
950 }
951
952 #[test]
953 fn test_combine_sets_boolean_conflict() {
954 let mut stats1 = StatsSet::from_iter([
955 (Stat::IsConstant, Precision::exact(true)),
956 (Stat::IsSorted, Precision::exact(true)),
957 ]);
958
959 let stats2 = StatsSet::from_iter([
960 (Stat::IsConstant, Precision::exact(false)),
961 (Stat::IsSorted, Precision::exact(true)),
962 ]);
963
964 let result = stats1.combine_sets(
965 &stats2,
966 &DType::Primitive(PType::I32, Nullability::NonNullable),
967 );
968 assert!(result.is_err());
969 }
970
971 #[test]
972 fn test_combine_sets_with_missing_stats() {
973 let mut stats1 = StatsSet::from_iter([
974 (Stat::Min, Precision::exact(42)),
975 (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
976 ]);
977
978 let stats2 = StatsSet::from_iter([
979 (Stat::Max, Precision::exact(100)),
980 (Stat::IsStrictSorted, Precision::exact(true)),
981 ]);
982
983 stats1
984 .combine_sets(
985 &stats2,
986 &DType::Primitive(PType::I32, Nullability::NonNullable),
987 )
988 .unwrap();
989
990 let stats_ref =
991 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
992
993 assert_eq!(
995 stats_ref.get_as::<i32>(Stat::Min),
996 Some(Precision::exact(42))
997 );
998 assert_eq!(
1000 stats_ref.get_as::<i32>(Stat::Max),
1001 Some(Precision::exact(100))
1002 );
1003 assert_eq!(
1005 stats_ref.get_as::<bool>(Stat::IsStrictSorted),
1006 Some(Precision::exact(true))
1007 );
1008 }
1009
1010 #[test]
1011 fn test_combine_sets_with_inexact() {
1012 let mut stats1 = StatsSet::from_iter([
1013 (Stat::Min, Precision::exact(42)),
1014 (Stat::Max, Precision::inexact(100)),
1015 (Stat::IsConstant, Precision::exact(false)),
1016 ]);
1017
1018 let stats2 = StatsSet::from_iter([
1019 (Stat::Min, Precision::inexact(40)),
1021 (Stat::Max, Precision::exact(90)),
1022 (Stat::IsSorted, Precision::exact(true)),
1023 ]);
1024
1025 stats1
1026 .combine_sets(
1027 &stats2,
1028 &DType::Primitive(PType::I32, Nullability::NonNullable),
1029 )
1030 .unwrap();
1031
1032 let stats_ref =
1033 stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
1034
1035 assert_eq!(
1037 stats_ref.get_as::<i32>(Stat::Min),
1038 Some(Precision::exact(42))
1039 );
1040 assert_eq!(
1042 stats_ref.get_as::<i32>(Stat::Max),
1043 Some(Precision::exact(90))
1044 );
1045 assert_eq!(
1047 stats_ref.get_as::<bool>(Stat::IsSorted),
1048 Some(Precision::exact(true))
1049 );
1050 }
1051}