1use std::fmt::Debug;
5
6use enum_iterator::{Sequence, all};
7use num_traits::CheckedAdd;
8use vortex_dtype::DType;
9use vortex_error::{VortexExpect, VortexResult, vortex_err};
10use vortex_scalar::{Scalar, ScalarValue};
11
12use super::traits::StatsProvider;
13use super::{IsSorted, IsStrictSorted, NaNCount, NullCount, StatType, UncompressedSizeInBytes};
14use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
15
16#[derive(Default, Debug, Clone)]
17pub struct StatsSet {
18 values: Vec<(Stat, Precision<ScalarValue>)>,
19}
20
21impl StatsSet {
22 pub fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
28 Self { values }
29 }
30
31 pub fn nulls(len: usize) -> Self {
34 let mut stats = Self::new_unchecked(vec![(Stat::NullCount, Precision::exact(len))]);
35
36 if len > 0 {
37 stats.set(Stat::IsConstant, Precision::exact(true));
38 stats.set(Stat::IsSorted, Precision::exact(true));
39 stats.set(Stat::IsStrictSorted, Precision::exact(len < 2));
40 }
41
42 stats
43 }
44
45 pub fn empty_array() -> StatsSet {
47 StatsSet::new_unchecked(vec![(Stat::NullCount, Precision::exact(0))])
48 }
49
50 pub fn constant(scalar: Scalar, length: usize) -> Self {
51 let (dtype, sv) = scalar.into_parts();
52 let mut stats = Self::default();
53 if length > 0 {
54 stats.extend([
55 (Stat::IsConstant, Precision::exact(true)),
56 (Stat::IsSorted, Precision::exact(true)),
57 (Stat::IsStrictSorted, Precision::exact(length <= 1)),
58 ]);
59 }
60
61 let null_count = if sv.is_null() { length as u64 } else { 0 };
62 stats.set(Stat::NullCount, Precision::exact(null_count));
63
64 if !sv.is_null() {
65 stats.extend([
66 (Stat::Min, Precision::exact(sv.clone())),
67 (Stat::Max, Precision::exact(sv.clone())),
68 ]);
69 }
70
71 if matches!(dtype, DType::Bool(_)) {
72 let bool_val = <Option<bool>>::try_from(&sv).vortex_expect("Checked dtype");
73 let true_count = bool_val
74 .map(|b| if b { length as u64 } else { 0 })
75 .unwrap_or(0);
76 stats.set(Stat::Sum, Precision::exact(true_count));
77 }
78
79 stats
80 }
81
82 pub fn bools_with_sum_and_null_count(true_count: usize, null_count: usize, len: usize) -> Self {
83 StatsSet::new_unchecked(vec![
84 (Stat::Sum, Precision::exact(true_count)),
85 (Stat::NullCount, Precision::exact(null_count)),
86 (Stat::Min, Precision::exact(true_count == len)),
87 (Stat::Max, Precision::exact(true_count > 0)),
88 (
89 Stat::IsConstant,
90 Precision::exact((true_count == 0 && null_count == 0) || true_count == len),
91 ),
92 ])
93 }
94
95 pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
96 Self::new_unchecked(vec![(stat, value)])
97 }
98
99 fn reserve_full_capacity(&mut self) {
100 if self.values.capacity() < Stat::CARDINALITY {
101 self.values
102 .reserve_exact(Stat::CARDINALITY - self.values.capacity());
103 }
104 }
105}
106
107impl StatsSet {
109 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
111 self.reserve_full_capacity();
112
113 if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
114 *existing = (stat, value);
115 } else {
116 self.values.push((stat, value));
117 }
118 }
119
120 pub fn clear(&mut self, stat: Stat) {
122 self.values.retain(|(s, _)| *s != stat);
123 }
124
125 pub fn retain_only(&mut self, stats: &[Stat]) {
126 self.values.retain(|(s, _)| stats.contains(s));
127 }
128
129 pub fn keep_inexact_stats(self, inexact_keep: &[Stat]) -> Self {
130 self.values
131 .into_iter()
132 .filter_map(|(s, v)| inexact_keep.contains(&s).then(|| (s, v.into_inexact())))
133 .collect()
134 }
135
136 pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
140 self.values.iter()
141 }
142}
143
144pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
150
151impl Iterator for StatsSetIntoIter {
152 type Item = (Stat, Precision<ScalarValue>);
153
154 fn next(&mut self) -> Option<Self::Item> {
155 self.0.next()
156 }
157}
158
159impl IntoIterator for StatsSet {
160 type Item = (Stat, Precision<ScalarValue>);
161 type IntoIter = StatsSetIntoIter;
162
163 fn into_iter(self) -> Self::IntoIter {
164 StatsSetIntoIter(self.values.into_iter())
165 }
166}
167
168impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
169 fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
170 let iter = iter.into_iter();
171 let mut values = Vec::default();
172 values.reserve_exact(Stat::CARDINALITY);
173
174 let mut this = Self { values };
175 this.extend(iter);
176 this
177 }
178}
179
180impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
181 #[inline]
182 fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
183 let iter = iter.into_iter();
184 self.reserve_full_capacity();
185
186 iter.for_each(|(stat, value)| self.set(stat, value));
187 }
188}
189
190impl StatsSet {
192 pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
195 for s in all::<Stat>() {
196 match s {
197 Stat::IsConstant => self.merge_is_constant(other, dtype),
198 Stat::IsSorted => self.merge_is_sorted(other, dtype),
199 Stat::IsStrictSorted => self.merge_is_strict_sorted(other, dtype),
200 Stat::Max => self.merge_max(other, dtype),
201 Stat::Min => self.merge_min(other, dtype),
202 Stat::Sum => self.merge_sum(other, dtype),
203 Stat::NullCount => self.merge_null_count(other),
204 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
205 Stat::NaNCount => self.merge_nan_count(other),
206 }
207 }
208
209 self
210 }
211
212 pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
215 for s in all::<Stat>() {
216 if !s.is_commutative() {
217 self.clear(s);
218 continue;
219 }
220
221 match s {
222 Stat::IsConstant => self.merge_is_constant(other, dtype),
223 Stat::Max => self.merge_max(other, dtype),
224 Stat::Min => self.merge_min(other, dtype),
225 Stat::Sum => self.merge_sum(other, dtype),
226 Stat::NullCount => self.merge_null_count(other),
227 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
228 Stat::IsSorted | Stat::IsStrictSorted => {
229 unreachable!("not commutative")
230 }
231 Stat::NaNCount => self.merge_nan_count(other),
232 }
233 }
234
235 self
236 }
237
238 pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
240 let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
241 for s in other_stats {
242 match s {
243 Stat::Max => self.combine_bound::<Max>(other, dtype)?,
244 Stat::Min => self.combine_bound::<Min>(other, dtype)?,
245 Stat::UncompressedSizeInBytes => {
246 self.combine_bound::<UncompressedSizeInBytes>(other, dtype)?
247 }
248 Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
249 Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
250 Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
251 Stat::NullCount => self.combine_bound::<NullCount>(other, dtype)?,
252 Stat::Sum => self.combine_bound::<Sum>(other, dtype)?,
253 Stat::NaNCount => self.combine_bound::<NaNCount>(other, dtype)?,
254 }
255 }
256 Ok(())
257 }
258
259 fn combine_bound<S: StatType<Scalar>>(
260 &mut self,
261 other: &Self,
262 dtype: &DType,
263 ) -> VortexResult<()>
264 where
265 S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
266 {
267 match (
268 self.get_scalar_bound::<S>(dtype),
269 other.get_scalar_bound::<S>(dtype),
270 ) {
271 (Some(m1), Some(m2)) => {
272 let meet = m1
273 .intersection(&m2)
274 .vortex_expect("can always compare scalar")
275 .ok_or_else(|| {
276 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
277 })?;
278 if meet != m1 {
279 self.set(S::STAT, meet.into_value().map(Scalar::into_value));
280 }
281 }
282 (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
283 (Some(_), _) => (),
284 (None, None) => self.clear(S::STAT),
285 }
286 Ok(())
287 }
288
289 fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &Self) -> VortexResult<()>
290 where
291 S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
292 {
293 match (
294 self.get_as_bound::<S, bool>(),
295 other.get_as_bound::<S, bool>(),
296 ) {
297 (Some(m1), Some(m2)) => {
298 let intersection = m1
299 .intersection(&m2)
300 .vortex_expect("can always compare boolean")
301 .ok_or_else(|| {
302 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
303 })?;
304 if intersection != m1 {
305 self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
306 }
307 }
308 (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
309 (Some(_), None) => (),
310 (None, None) => self.clear(S::STAT),
311 }
312 Ok(())
313 }
314
315 fn merge_min(&mut self, other: &Self, dtype: &DType) {
316 match (
317 self.get_scalar_bound::<Min>(dtype),
318 other.get_scalar_bound::<Min>(dtype),
319 ) {
320 (Some(m1), Some(m2)) => {
321 let meet = m1.union(&m2).vortex_expect("can compare scalar");
322 if meet != m1 {
323 self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
324 }
325 }
326 _ => self.clear(Stat::Min),
327 }
328 }
329
330 fn merge_max(&mut self, other: &Self, dtype: &DType) {
331 match (
332 self.get_scalar_bound::<Max>(dtype),
333 other.get_scalar_bound::<Max>(dtype),
334 ) {
335 (Some(m1), Some(m2)) => {
336 let meet = m1.union(&m2).vortex_expect("can compare scalar");
337 if meet != m1 {
338 self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
339 }
340 }
341 _ => self.clear(Stat::Max),
342 }
343 }
344
345 fn merge_sum(&mut self, other: &Self, dtype: &DType) {
346 match (
347 self.get_scalar_bound::<Sum>(dtype),
348 other.get_scalar_bound::<Sum>(dtype),
349 ) {
350 (Some(m1), Some(m2)) => {
351 if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
353 s1.as_primitive()
354 .checked_add(&s2.as_primitive())
355 .map(|pscalar| {
356 pscalar
357 .pvalue()
358 .map(|pvalue| {
359 Scalar::primitive_value(
360 pvalue,
361 pscalar.ptype(),
362 pscalar.dtype().nullability(),
363 )
364 .into_value()
365 })
366 .unwrap_or_else(ScalarValue::null)
367 })
368 }) {
369 self.set(Stat::Sum, Precision::Exact(scalar_value));
370 }
371 }
372 _ => self.clear(Stat::Sum),
373 }
374 }
375
376 fn merge_is_constant(&mut self, other: &Self, dtype: &DType) {
377 let self_const = self.get_as(Stat::IsConstant);
378 let other_const = other.get_as(Stat::IsConstant);
379 let self_min = self.get_scalar(Stat::Min, dtype);
380 let other_min = other.get_scalar(Stat::Min, dtype);
381
382 if let (
383 Some(Precision::Exact(self_const)),
384 Some(Precision::Exact(other_const)),
385 Some(Precision::Exact(self_min)),
386 Some(Precision::Exact(other_min)),
387 ) = (self_const, other_const, self_min, other_min)
388 {
389 if self_const && other_const && self_min == other_min {
390 self.set(Stat::IsConstant, Precision::exact(true));
391 } else {
392 self.set(Stat::IsConstant, Precision::inexact(false));
393 }
394 }
395 self.set(Stat::IsConstant, Precision::exact(false));
396 }
397
398 fn merge_is_sorted(&mut self, other: &Self, dtype: &DType) {
399 self.merge_sortedness_stat(other, Stat::IsSorted, dtype, PartialOrd::le)
400 }
401
402 fn merge_is_strict_sorted(&mut self, other: &Self, dtype: &DType) {
403 self.merge_sortedness_stat(other, Stat::IsStrictSorted, dtype, PartialOrd::lt)
404 }
405
406 fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
407 &mut self,
408 other: &Self,
409 stat: Stat,
410 dtype: &DType,
411 cmp: F,
412 ) {
413 if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
414 == (self.get_as(stat), other.get_as(stat))
415 {
416 if let (Some(self_max), Some(other_min)) = (
421 self.get_scalar_bound::<Max>(dtype),
422 other.get_scalar_bound::<Min>(dtype),
423 ) {
424 return if cmp(&self_max.max_value(), &other_min.min_value()) {
425 } else {
427 self.set(stat, Precision::inexact(false));
428 };
429 }
430 }
431 self.clear(stat);
432 }
433
434 fn merge_null_count(&mut self, other: &Self) {
435 self.merge_sum_stat(Stat::NullCount, other)
436 }
437
438 fn merge_nan_count(&mut self, other: &Self) {
439 self.merge_sum_stat(Stat::NaNCount, other)
440 }
441
442 fn merge_uncompressed_size_in_bytes(&mut self, other: &Self) {
443 self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
444 }
445
446 fn merge_sum_stat(&mut self, stat: Stat, other: &Self) {
447 match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
448 (Some(nc1), Some(nc2)) => {
449 self.set(
450 stat,
451 nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
452 );
453 }
454 _ => self.clear(stat),
455 }
456 }
457}
458
459impl StatsProvider for StatsSet {
460 fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
461 self.values
462 .iter()
463 .find(|(s, _)| *s == stat)
464 .map(|(_, v)| v.clone())
465 }
466
467 fn len(&self) -> usize {
468 self.values.len()
469 }
470}
471
472#[cfg(test)]
473mod test {
474 use enum_iterator::all;
475 use itertools::Itertools;
476 use vortex_dtype::{DType, Nullability, PType};
477
478 use crate::arrays::PrimitiveArray;
479 use crate::stats::{IsConstant, Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
480
481 #[test]
482 fn test_iter() {
483 let set = StatsSet::new_unchecked(vec![
484 (Stat::Max, Precision::exact(100)),
485 (Stat::Min, Precision::exact(42)),
486 ]);
487 let mut iter = set.iter();
488 let first = iter.next().unwrap().clone();
489 assert_eq!(first.0, Stat::Max);
490 assert_eq!(
491 first.1.map(|f| i32::try_from(&f).unwrap()),
492 Precision::exact(100)
493 );
494 let snd = iter.next().unwrap().clone();
495 assert_eq!(snd.0, Stat::Min);
496 assert_eq!(snd.1.map(|s| i32::try_from(&s).unwrap()), 42);
497 }
498
499 #[test]
500 fn into_iter() {
501 let mut set = StatsSet::new_unchecked(vec![
502 (Stat::Max, Precision::exact(100)),
503 (Stat::Min, Precision::exact(42)),
504 ])
505 .into_iter();
506 let (stat, first) = set.next().unwrap();
507 assert_eq!(stat, Stat::Max);
508 assert_eq!(
509 first.map(|f| i32::try_from(&f).unwrap()),
510 Precision::exact(100)
511 );
512 let snd = set.next().unwrap();
513 assert_eq!(snd.0, Stat::Min);
514 assert_eq!(
515 snd.1.map(|s| i32::try_from(&s).unwrap()),
516 Precision::exact(42)
517 );
518 }
519
520 #[test]
521 fn merge_constant() {
522 let first = StatsSet::from_iter([
523 (Stat::Min, Precision::exact(42)),
524 (Stat::IsConstant, Precision::exact(true)),
525 ])
526 .merge_ordered(
527 &StatsSet::from_iter([
528 (Stat::Min, Precision::inexact(42)),
529 (Stat::IsConstant, Precision::exact(true)),
530 ]),
531 &DType::Primitive(PType::I32, Nullability::NonNullable),
532 );
533 assert_eq!(
534 first.get_as::<bool>(Stat::IsConstant),
535 Some(Precision::exact(false))
536 );
537 assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
538 }
539
540 #[test]
541 fn merge_into_min() {
542 let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
543 &StatsSet::default(),
544 &DType::Primitive(PType::I32, Nullability::NonNullable),
545 );
546 assert!(first.get(Stat::Min).is_none());
547 }
548
549 #[test]
550 fn merge_from_min() {
551 let first = StatsSet::default().merge_ordered(
552 &StatsSet::of(Stat::Min, Precision::exact(42)),
553 &DType::Primitive(PType::I32, Nullability::NonNullable),
554 );
555 assert!(first.get(Stat::Min).is_none());
556 }
557
558 #[test]
559 fn merge_mins() {
560 let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
561 &StatsSet::of(Stat::Min, Precision::exact(42)),
562 &DType::Primitive(PType::I32, Nullability::NonNullable),
563 );
564 assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(37)));
565 }
566
567 #[test]
568 fn merge_into_bound_max() {
569 let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
570 &StatsSet::default(),
571 &DType::Primitive(PType::I32, Nullability::NonNullable),
572 );
573 assert!(first.get(Stat::Max).is_none());
574 }
575
576 #[test]
577 fn merge_from_max() {
578 let first = StatsSet::default().merge_ordered(
579 &StatsSet::of(Stat::Max, Precision::exact(42)),
580 &DType::Primitive(PType::I32, Nullability::NonNullable),
581 );
582 assert!(first.get(Stat::Max).is_none());
583 }
584
585 #[test]
586 fn merge_maxes() {
587 let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
588 &StatsSet::of(Stat::Max, Precision::exact(42)),
589 &DType::Primitive(PType::I32, Nullability::NonNullable),
590 );
591 assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::exact(42)));
592 }
593
594 #[test]
595 fn merge_maxes_bound() {
596 let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
597 let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
598 .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
599 assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::inexact(43)));
600 }
601
602 #[test]
603 fn merge_into_scalar() {
604 let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
605 &StatsSet::default(),
606 &DType::Primitive(PType::I32, Nullability::NonNullable),
607 );
608 assert!(first.get(Stat::Sum).is_none());
609 }
610
611 #[test]
612 fn merge_from_scalar() {
613 let first = StatsSet::default().merge_ordered(
614 &StatsSet::of(Stat::Sum, Precision::exact(42)),
615 &DType::Primitive(PType::I32, Nullability::NonNullable),
616 );
617 assert!(first.get(Stat::Sum).is_none());
618 }
619
620 #[test]
621 fn merge_scalars() {
622 let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
623 &StatsSet::of(Stat::Sum, Precision::exact(42)),
624 &DType::Primitive(PType::I32, Nullability::NonNullable),
625 );
626 assert_eq!(
627 first.get_as::<usize>(Stat::Sum),
628 Some(Precision::exact(79usize))
629 );
630 }
631
632 #[test]
633 fn merge_into_sortedness() {
634 let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
635 &StatsSet::default(),
636 &DType::Primitive(PType::I32, Nullability::NonNullable),
637 );
638 assert!(first.get(Stat::IsStrictSorted).is_none());
639 }
640
641 #[test]
642 fn merge_from_sortedness() {
643 let first = StatsSet::default().merge_ordered(
644 &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
645 &DType::Primitive(PType::I32, Nullability::NonNullable),
646 );
647 assert!(first.get(Stat::IsStrictSorted).is_none());
648 }
649
650 #[test]
651 fn merge_sortedness() {
652 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
653 first.set(Stat::Max, Precision::exact(1));
654 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
655 second.set(Stat::Min, Precision::exact(2));
656 first = first.merge_ordered(
657 &second,
658 &DType::Primitive(PType::I32, Nullability::NonNullable),
659 );
660 assert_eq!(
661 first.get_as::<bool>(Stat::IsStrictSorted),
662 Some(Precision::exact(true))
663 );
664 }
665
666 #[test]
667 fn merge_sortedness_out_of_order() {
668 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
669 first.set(Stat::Min, Precision::exact(1));
670 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
671 second.set(Stat::Max, Precision::exact(2));
672 second = second.merge_ordered(
673 &first,
674 &DType::Primitive(PType::I32, Nullability::NonNullable),
675 );
676 assert_eq!(
677 second.get_as::<bool>(Stat::IsStrictSorted),
678 Some(Precision::inexact(false))
679 );
680 }
681
682 #[test]
683 fn merge_sortedness_only_one_sorted() {
684 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
685 first.set(Stat::Max, Precision::exact(1));
686 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
687 second.set(Stat::Min, Precision::exact(2));
688 first.merge_ordered(
689 &second,
690 &DType::Primitive(PType::I32, Nullability::NonNullable),
691 );
692 assert_eq!(
693 second.get_as::<bool>(Stat::IsStrictSorted),
694 Some(Precision::exact(false))
695 );
696 }
697
698 #[test]
699 fn merge_sortedness_missing_min() {
700 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
701 first.set(Stat::Max, Precision::exact(1));
702 let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
703 first = first.merge_ordered(
704 &second,
705 &DType::Primitive(PType::I32, Nullability::NonNullable),
706 );
707 assert!(first.get(Stat::IsStrictSorted).is_none());
708 }
709
710 #[test]
711 fn merge_sortedness_bound_min() {
712 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
713 first.set(Stat::Max, Precision::exact(1));
714 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
715 second.set(Stat::Min, Precision::inexact(2));
716 first = first.merge_ordered(
717 &second,
718 &DType::Primitive(PType::I32, Nullability::NonNullable),
719 );
720 assert_eq!(
721 first.get_as::<bool>(Stat::IsStrictSorted),
722 Some(Precision::exact(true))
723 );
724 }
725
726 #[test]
727 fn merge_unordered() {
728 let array =
729 PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
730 let all_stats = all::<Stat>()
731 .filter(|s| !matches!(s, Stat::Sum))
732 .filter(|s| !matches!(s, Stat::NaNCount))
733 .collect_vec();
734 array.statistics().compute_all(&all_stats).unwrap();
735
736 let stats = array.statistics().to_owned();
737 for stat in &all_stats {
738 assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
739 }
740
741 let merged = stats.clone().merge_unordered(
742 &stats,
743 &DType::Primitive(PType::I32, Nullability::NonNullable),
744 );
745 for stat in &all_stats {
746 assert_eq!(
747 merged.get(*stat).is_some(),
748 stat.is_commutative(),
749 "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
750 )
751 }
752
753 assert_eq!(
754 merged.get_as::<i32>(Stat::Min),
755 stats.get_as::<i32>(Stat::Min)
756 );
757 assert_eq!(
758 merged.get_as::<i32>(Stat::Max),
759 stats.get_as::<i32>(Stat::Max)
760 );
761 assert_eq!(
762 merged.get_as::<u64>(Stat::NullCount).unwrap(),
763 stats.get_as::<u64>(Stat::NullCount).unwrap().map(|s| s * 2)
764 );
765 }
766
767 #[test]
768 fn merge_min_bound_same() {
769 let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
772 &StatsSet::of(Stat::Min, Precision::exact(5)),
773 &DType::Primitive(PType::I32, Nullability::NonNullable),
774 );
775 assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::exact(5)));
776 }
777
778 #[test]
779 fn merge_min_bound_bound_lower() {
780 let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
781 &StatsSet::of(Stat::Min, Precision::exact(5)),
782 &DType::Primitive(PType::I32, Nullability::NonNullable),
783 );
784 assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::inexact(4)));
785 }
786
787 #[test]
788 fn retain_approx() {
789 let set = StatsSet::from_iter([
790 (Stat::Max, Precision::exact(100)),
791 (Stat::Min, Precision::exact(50)),
792 (Stat::Sum, Precision::inexact(10)),
793 ]);
794
795 let set = set.keep_inexact_stats(&[Stat::Min, Stat::Max]);
796
797 assert_eq!(set.len(), 2);
798 assert_eq!(set.get_as::<i32>(Stat::Max), Some(Precision::inexact(100)));
799 assert_eq!(set.get_as::<i32>(Stat::Min), Some(Precision::inexact(50)));
800 assert_eq!(set.get_as::<i32>(Stat::Sum), None);
801 }
802
803 #[test]
804 fn test_combine_is_constant() {
805 {
806 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
807 let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
808 stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
809 assert_eq!(
810 stats.get_as::<bool>(Stat::IsConstant),
811 Some(Precision::exact(true))
812 );
813 }
814
815 {
816 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
817 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
818 stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
819 assert_eq!(
820 stats.get_as::<bool>(Stat::IsConstant),
821 Some(Precision::exact(true))
822 );
823 }
824
825 {
826 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
827 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
828 stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
829 assert_eq!(
830 stats.get_as::<bool>(Stat::IsConstant),
831 Some(Precision::exact(false))
832 );
833 }
834 }
835
836 #[test]
837 fn test_combine_sets_boolean_conflict() {
838 let mut stats1 = StatsSet::from_iter([
839 (Stat::IsConstant, Precision::exact(true)),
840 (Stat::IsSorted, Precision::exact(true)),
841 ]);
842
843 let stats2 = StatsSet::from_iter([
844 (Stat::IsConstant, Precision::exact(false)),
845 (Stat::IsSorted, Precision::exact(true)),
846 ]);
847
848 let result = stats1.combine_sets(
849 &stats2,
850 &DType::Primitive(PType::I32, Nullability::NonNullable),
851 );
852 assert!(result.is_err());
853 }
854
855 #[test]
856 fn test_combine_sets_with_missing_stats() {
857 let mut stats1 = StatsSet::from_iter([
858 (Stat::Min, Precision::exact(42)),
859 (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
860 ]);
861
862 let stats2 = StatsSet::from_iter([
863 (Stat::Max, Precision::exact(100)),
864 (Stat::IsStrictSorted, Precision::exact(true)),
865 ]);
866
867 stats1
868 .combine_sets(
869 &stats2,
870 &DType::Primitive(PType::I32, Nullability::NonNullable),
871 )
872 .unwrap();
873
874 assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
876 assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(100)));
878 assert_eq!(
880 stats1.get_as::<bool>(Stat::IsStrictSorted),
881 Some(Precision::exact(true))
882 );
883 }
884
885 #[test]
886 fn test_combine_sets_with_inexact() {
887 let mut stats1 = StatsSet::from_iter([
888 (Stat::Min, Precision::exact(42)),
889 (Stat::Max, Precision::inexact(100)),
890 (Stat::IsConstant, Precision::exact(false)),
891 ]);
892
893 let stats2 = StatsSet::from_iter([
894 (Stat::Min, Precision::inexact(40)),
896 (Stat::Max, Precision::exact(90)),
897 (Stat::IsSorted, Precision::exact(true)),
898 ]);
899
900 stats1
901 .combine_sets(
902 &stats2,
903 &DType::Primitive(PType::I32, Nullability::NonNullable),
904 )
905 .unwrap();
906
907 assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
909 assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(90)));
911 assert_eq!(
913 stats1.get_as::<bool>(Stat::IsSorted),
914 Some(Precision::exact(true))
915 );
916 }
917}