1use std::fmt::Debug;
5
6use enum_iterator::{Sequence, all};
7use num_traits::CheckedAdd;
8use vortex_dtype::DType;
9use vortex_error::{VortexExpect, VortexResult, vortex_err};
10use vortex_scalar::{Scalar, ScalarValue};
11
12use super::traits::StatsProvider;
13use super::{IsSorted, IsStrictSorted, NaNCount, NullCount, StatType, UncompressedSizeInBytes};
14use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
15
16#[derive(Default, Debug, Clone)]
17pub struct StatsSet {
18 values: Vec<(Stat, Precision<ScalarValue>)>,
19}
20
21impl StatsSet {
22 pub fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
28 Self { values }
29 }
30
31 pub fn nulls(len: usize) -> Self {
34 let mut stats = Self::new_unchecked(vec![(Stat::NullCount, Precision::exact(len))]);
35
36 if len > 0 {
37 stats.set(Stat::IsConstant, Precision::exact(true));
38 stats.set(Stat::IsSorted, Precision::exact(true));
39 stats.set(Stat::IsStrictSorted, Precision::exact(len < 2));
40 }
41
42 stats
43 }
44
45 pub fn empty_array() -> StatsSet {
47 StatsSet::new_unchecked(vec![(Stat::NullCount, Precision::exact(0))])
48 }
49
50 pub fn bools_with_sum_and_null_count(true_count: usize, null_count: usize, len: usize) -> Self {
51 StatsSet::new_unchecked(vec![
52 (Stat::Sum, Precision::exact(true_count)),
53 (Stat::NullCount, Precision::exact(null_count)),
54 (Stat::Min, Precision::exact(true_count == len)),
55 (Stat::Max, Precision::exact(true_count > 0)),
56 (
57 Stat::IsConstant,
58 Precision::exact((true_count == 0 && null_count == 0) || true_count == len),
59 ),
60 ])
61 }
62
63 pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
64 Self::new_unchecked(vec![(stat, value)])
65 }
66
67 fn reserve_full_capacity(&mut self) {
68 if self.values.capacity() < Stat::CARDINALITY {
69 self.values
70 .reserve_exact(Stat::CARDINALITY - self.values.capacity());
71 }
72 }
73}
74
75impl StatsSet {
77 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
79 self.reserve_full_capacity();
80
81 if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
82 *existing = (stat, value);
83 } else {
84 self.values.push((stat, value));
85 }
86 }
87
88 pub fn clear(&mut self, stat: Stat) {
90 self.values.retain(|(s, _)| *s != stat);
91 }
92
93 pub fn retain_only(&mut self, stats: &[Stat]) {
94 self.values.retain(|(s, _)| stats.contains(s));
95 }
96
97 pub fn keep_inexact_stats(self, inexact_keep: &[Stat]) -> Self {
98 self.values
99 .into_iter()
100 .filter_map(|(s, v)| inexact_keep.contains(&s).then(|| (s, v.into_inexact())))
101 .collect()
102 }
103
104 pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
108 self.values.iter()
109 }
110}
111
112pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
118
119impl Iterator for StatsSetIntoIter {
120 type Item = (Stat, Precision<ScalarValue>);
121
122 fn next(&mut self) -> Option<Self::Item> {
123 self.0.next()
124 }
125}
126
127impl IntoIterator for StatsSet {
128 type Item = (Stat, Precision<ScalarValue>);
129 type IntoIter = StatsSetIntoIter;
130
131 fn into_iter(self) -> Self::IntoIter {
132 StatsSetIntoIter(self.values.into_iter())
133 }
134}
135
136impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
137 fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
138 let iter = iter.into_iter();
139 let mut values = Vec::default();
140 values.reserve_exact(Stat::CARDINALITY);
141
142 let mut this = Self { values };
143 this.extend(iter);
144 this
145 }
146}
147
148impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
149 #[inline]
150 fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
151 let iter = iter.into_iter();
152 self.reserve_full_capacity();
153
154 iter.for_each(|(stat, value)| self.set(stat, value));
155 }
156}
157
158impl StatsSet {
160 pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
163 for s in all::<Stat>() {
164 match s {
165 Stat::IsConstant => self.merge_is_constant(other, dtype),
166 Stat::IsSorted => self.merge_is_sorted(other, dtype),
167 Stat::IsStrictSorted => self.merge_is_strict_sorted(other, dtype),
168 Stat::Max => self.merge_max(other, dtype),
169 Stat::Min => self.merge_min(other, dtype),
170 Stat::Sum => self.merge_sum(other, dtype),
171 Stat::NullCount => self.merge_null_count(other),
172 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
173 Stat::NaNCount => self.merge_nan_count(other),
174 }
175 }
176
177 self
178 }
179
180 pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
183 for s in all::<Stat>() {
184 if !s.is_commutative() {
185 self.clear(s);
186 continue;
187 }
188
189 match s {
190 Stat::IsConstant => self.merge_is_constant(other, dtype),
191 Stat::Max => self.merge_max(other, dtype),
192 Stat::Min => self.merge_min(other, dtype),
193 Stat::Sum => self.merge_sum(other, dtype),
194 Stat::NullCount => self.merge_null_count(other),
195 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
196 Stat::IsSorted | Stat::IsStrictSorted => {
197 unreachable!("not commutative")
198 }
199 Stat::NaNCount => self.merge_nan_count(other),
200 }
201 }
202
203 self
204 }
205
206 pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
208 let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
209 for s in other_stats {
210 match s {
211 Stat::Max => self.combine_bound::<Max>(other, dtype)?,
212 Stat::Min => self.combine_bound::<Min>(other, dtype)?,
213 Stat::UncompressedSizeInBytes => {
214 self.combine_bound::<UncompressedSizeInBytes>(other, dtype)?
215 }
216 Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
217 Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
218 Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
219 Stat::NullCount => self.combine_bound::<NullCount>(other, dtype)?,
220 Stat::Sum => self.combine_bound::<Sum>(other, dtype)?,
221 Stat::NaNCount => self.combine_bound::<NaNCount>(other, dtype)?,
222 }
223 }
224 Ok(())
225 }
226
227 fn combine_bound<S: StatType<Scalar>>(
228 &mut self,
229 other: &Self,
230 dtype: &DType,
231 ) -> VortexResult<()>
232 where
233 S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
234 {
235 match (
236 self.get_scalar_bound::<S>(dtype),
237 other.get_scalar_bound::<S>(dtype),
238 ) {
239 (Some(m1), Some(m2)) => {
240 let meet = m1
241 .intersection(&m2)
242 .vortex_expect("can always compare scalar")
243 .ok_or_else(|| {
244 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
245 })?;
246 if meet != m1 {
247 self.set(S::STAT, meet.into_value().map(Scalar::into_value));
248 }
249 }
250 (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
251 (Some(_), _) => (),
252 (None, None) => self.clear(S::STAT),
253 }
254 Ok(())
255 }
256
257 fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &Self) -> VortexResult<()>
258 where
259 S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
260 {
261 match (
262 self.get_as_bound::<S, bool>(),
263 other.get_as_bound::<S, bool>(),
264 ) {
265 (Some(m1), Some(m2)) => {
266 let intersection = m1
267 .intersection(&m2)
268 .vortex_expect("can always compare boolean")
269 .ok_or_else(|| {
270 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
271 })?;
272 if intersection != m1 {
273 self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
274 }
275 }
276 (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
277 (Some(_), None) => (),
278 (None, None) => self.clear(S::STAT),
279 }
280 Ok(())
281 }
282
283 fn merge_min(&mut self, other: &Self, dtype: &DType) {
284 match (
285 self.get_scalar_bound::<Min>(dtype),
286 other.get_scalar_bound::<Min>(dtype),
287 ) {
288 (Some(m1), Some(m2)) => {
289 let meet = m1.union(&m2).vortex_expect("can compare scalar");
290 if meet != m1 {
291 self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
292 }
293 }
294 _ => self.clear(Stat::Min),
295 }
296 }
297
298 fn merge_max(&mut self, other: &Self, dtype: &DType) {
299 match (
300 self.get_scalar_bound::<Max>(dtype),
301 other.get_scalar_bound::<Max>(dtype),
302 ) {
303 (Some(m1), Some(m2)) => {
304 let meet = m1.union(&m2).vortex_expect("can compare scalar");
305 if meet != m1 {
306 self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
307 }
308 }
309 _ => self.clear(Stat::Max),
310 }
311 }
312
313 fn merge_sum(&mut self, other: &Self, dtype: &DType) {
314 match (
315 self.get_scalar_bound::<Sum>(dtype),
316 other.get_scalar_bound::<Sum>(dtype),
317 ) {
318 (Some(m1), Some(m2)) => {
319 if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
321 s1.as_primitive()
322 .checked_add(&s2.as_primitive())
323 .map(|pscalar| {
324 pscalar
325 .pvalue()
326 .map(|pvalue| {
327 Scalar::primitive_value(
328 pvalue,
329 pscalar.ptype(),
330 pscalar.dtype().nullability(),
331 )
332 .into_value()
333 })
334 .unwrap_or_else(ScalarValue::null)
335 })
336 }) {
337 self.set(Stat::Sum, Precision::Exact(scalar_value));
338 }
339 }
340 _ => self.clear(Stat::Sum),
341 }
342 }
343
344 fn merge_is_constant(&mut self, other: &Self, dtype: &DType) {
345 let self_const = self.get_as(Stat::IsConstant);
346 let other_const = other.get_as(Stat::IsConstant);
347 let self_min = self.get_scalar(Stat::Min, dtype);
348 let other_min = other.get_scalar(Stat::Min, dtype);
349
350 if let (
351 Some(Precision::Exact(self_const)),
352 Some(Precision::Exact(other_const)),
353 Some(Precision::Exact(self_min)),
354 Some(Precision::Exact(other_min)),
355 ) = (self_const, other_const, self_min, other_min)
356 {
357 if self_const && other_const && self_min == other_min {
358 self.set(Stat::IsConstant, Precision::exact(true));
359 } else {
360 self.set(Stat::IsConstant, Precision::inexact(false));
361 }
362 }
363 self.set(Stat::IsConstant, Precision::exact(false));
364 }
365
366 fn merge_is_sorted(&mut self, other: &Self, dtype: &DType) {
367 self.merge_sortedness_stat(other, Stat::IsSorted, dtype, PartialOrd::le)
368 }
369
370 fn merge_is_strict_sorted(&mut self, other: &Self, dtype: &DType) {
371 self.merge_sortedness_stat(other, Stat::IsStrictSorted, dtype, PartialOrd::lt)
372 }
373
374 fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
375 &mut self,
376 other: &Self,
377 stat: Stat,
378 dtype: &DType,
379 cmp: F,
380 ) {
381 if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
382 == (self.get_as(stat), other.get_as(stat))
383 {
384 if let (Some(self_max), Some(other_min)) = (
389 self.get_scalar_bound::<Max>(dtype),
390 other.get_scalar_bound::<Min>(dtype),
391 ) {
392 return if cmp(&self_max.max_value(), &other_min.min_value()) {
393 } else {
395 self.set(stat, Precision::inexact(false));
396 };
397 }
398 }
399 self.clear(stat);
400 }
401
402 fn merge_null_count(&mut self, other: &Self) {
403 self.merge_sum_stat(Stat::NullCount, other)
404 }
405
406 fn merge_nan_count(&mut self, other: &Self) {
407 self.merge_sum_stat(Stat::NaNCount, other)
408 }
409
410 fn merge_uncompressed_size_in_bytes(&mut self, other: &Self) {
411 self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
412 }
413
414 fn merge_sum_stat(&mut self, stat: Stat, other: &Self) {
415 match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
416 (Some(nc1), Some(nc2)) => {
417 self.set(
418 stat,
419 nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
420 );
421 }
422 _ => self.clear(stat),
423 }
424 }
425}
426
427impl StatsProvider for StatsSet {
428 fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
429 self.values
430 .iter()
431 .find(|(s, _)| *s == stat)
432 .map(|(_, v)| v.clone())
433 }
434
435 fn len(&self) -> usize {
436 self.values.len()
437 }
438}
439
440#[cfg(test)]
441mod test {
442 use enum_iterator::all;
443 use itertools::Itertools;
444 use vortex_dtype::{DType, Nullability, PType};
445
446 use crate::arrays::PrimitiveArray;
447 use crate::stats::{IsConstant, Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
448
449 #[test]
450 fn test_iter() {
451 let set = StatsSet::new_unchecked(vec![
452 (Stat::Max, Precision::exact(100)),
453 (Stat::Min, Precision::exact(42)),
454 ]);
455 let mut iter = set.iter();
456 let first = iter.next().unwrap().clone();
457 assert_eq!(first.0, Stat::Max);
458 assert_eq!(
459 first.1.map(|f| i32::try_from(&f).unwrap()),
460 Precision::exact(100)
461 );
462 let snd = iter.next().unwrap().clone();
463 assert_eq!(snd.0, Stat::Min);
464 assert_eq!(snd.1.map(|s| i32::try_from(&s).unwrap()), 42);
465 }
466
467 #[test]
468 fn into_iter() {
469 let mut set = StatsSet::new_unchecked(vec![
470 (Stat::Max, Precision::exact(100)),
471 (Stat::Min, Precision::exact(42)),
472 ])
473 .into_iter();
474 let (stat, first) = set.next().unwrap();
475 assert_eq!(stat, Stat::Max);
476 assert_eq!(
477 first.map(|f| i32::try_from(&f).unwrap()),
478 Precision::exact(100)
479 );
480 let snd = set.next().unwrap();
481 assert_eq!(snd.0, Stat::Min);
482 assert_eq!(
483 snd.1.map(|s| i32::try_from(&s).unwrap()),
484 Precision::exact(42)
485 );
486 }
487
488 #[test]
489 fn merge_constant() {
490 let first = StatsSet::from_iter([
491 (Stat::Min, Precision::exact(42)),
492 (Stat::IsConstant, Precision::exact(true)),
493 ])
494 .merge_ordered(
495 &StatsSet::from_iter([
496 (Stat::Min, Precision::inexact(42)),
497 (Stat::IsConstant, Precision::exact(true)),
498 ]),
499 &DType::Primitive(PType::I32, Nullability::NonNullable),
500 );
501 assert_eq!(
502 first.get_as::<bool>(Stat::IsConstant),
503 Some(Precision::exact(false))
504 );
505 assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
506 }
507
508 #[test]
509 fn merge_into_min() {
510 let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
511 &StatsSet::default(),
512 &DType::Primitive(PType::I32, Nullability::NonNullable),
513 );
514 assert!(first.get(Stat::Min).is_none());
515 }
516
517 #[test]
518 fn merge_from_min() {
519 let first = StatsSet::default().merge_ordered(
520 &StatsSet::of(Stat::Min, Precision::exact(42)),
521 &DType::Primitive(PType::I32, Nullability::NonNullable),
522 );
523 assert!(first.get(Stat::Min).is_none());
524 }
525
526 #[test]
527 fn merge_mins() {
528 let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
529 &StatsSet::of(Stat::Min, Precision::exact(42)),
530 &DType::Primitive(PType::I32, Nullability::NonNullable),
531 );
532 assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(37)));
533 }
534
535 #[test]
536 fn merge_into_bound_max() {
537 let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
538 &StatsSet::default(),
539 &DType::Primitive(PType::I32, Nullability::NonNullable),
540 );
541 assert!(first.get(Stat::Max).is_none());
542 }
543
544 #[test]
545 fn merge_from_max() {
546 let first = StatsSet::default().merge_ordered(
547 &StatsSet::of(Stat::Max, Precision::exact(42)),
548 &DType::Primitive(PType::I32, Nullability::NonNullable),
549 );
550 assert!(first.get(Stat::Max).is_none());
551 }
552
553 #[test]
554 fn merge_maxes() {
555 let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
556 &StatsSet::of(Stat::Max, Precision::exact(42)),
557 &DType::Primitive(PType::I32, Nullability::NonNullable),
558 );
559 assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::exact(42)));
560 }
561
562 #[test]
563 fn merge_maxes_bound() {
564 let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
565 let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
566 .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
567 assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::inexact(43)));
568 }
569
570 #[test]
571 fn merge_into_scalar() {
572 let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
573 &StatsSet::default(),
574 &DType::Primitive(PType::I32, Nullability::NonNullable),
575 );
576 assert!(first.get(Stat::Sum).is_none());
577 }
578
579 #[test]
580 fn merge_from_scalar() {
581 let first = StatsSet::default().merge_ordered(
582 &StatsSet::of(Stat::Sum, Precision::exact(42)),
583 &DType::Primitive(PType::I32, Nullability::NonNullable),
584 );
585 assert!(first.get(Stat::Sum).is_none());
586 }
587
588 #[test]
589 fn merge_scalars() {
590 let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
591 &StatsSet::of(Stat::Sum, Precision::exact(42)),
592 &DType::Primitive(PType::I32, Nullability::NonNullable),
593 );
594 assert_eq!(
595 first.get_as::<usize>(Stat::Sum),
596 Some(Precision::exact(79usize))
597 );
598 }
599
600 #[test]
601 fn merge_into_sortedness() {
602 let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
603 &StatsSet::default(),
604 &DType::Primitive(PType::I32, Nullability::NonNullable),
605 );
606 assert!(first.get(Stat::IsStrictSorted).is_none());
607 }
608
609 #[test]
610 fn merge_from_sortedness() {
611 let first = StatsSet::default().merge_ordered(
612 &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
613 &DType::Primitive(PType::I32, Nullability::NonNullable),
614 );
615 assert!(first.get(Stat::IsStrictSorted).is_none());
616 }
617
618 #[test]
619 fn merge_sortedness() {
620 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
621 first.set(Stat::Max, Precision::exact(1));
622 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
623 second.set(Stat::Min, Precision::exact(2));
624 first = first.merge_ordered(
625 &second,
626 &DType::Primitive(PType::I32, Nullability::NonNullable),
627 );
628 assert_eq!(
629 first.get_as::<bool>(Stat::IsStrictSorted),
630 Some(Precision::exact(true))
631 );
632 }
633
634 #[test]
635 fn merge_sortedness_out_of_order() {
636 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
637 first.set(Stat::Min, Precision::exact(1));
638 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
639 second.set(Stat::Max, Precision::exact(2));
640 second = second.merge_ordered(
641 &first,
642 &DType::Primitive(PType::I32, Nullability::NonNullable),
643 );
644 assert_eq!(
645 second.get_as::<bool>(Stat::IsStrictSorted),
646 Some(Precision::inexact(false))
647 );
648 }
649
650 #[test]
651 fn merge_sortedness_only_one_sorted() {
652 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
653 first.set(Stat::Max, Precision::exact(1));
654 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
655 second.set(Stat::Min, Precision::exact(2));
656 first.merge_ordered(
657 &second,
658 &DType::Primitive(PType::I32, Nullability::NonNullable),
659 );
660 assert_eq!(
661 second.get_as::<bool>(Stat::IsStrictSorted),
662 Some(Precision::exact(false))
663 );
664 }
665
666 #[test]
667 fn merge_sortedness_missing_min() {
668 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
669 first.set(Stat::Max, Precision::exact(1));
670 let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
671 first = first.merge_ordered(
672 &second,
673 &DType::Primitive(PType::I32, Nullability::NonNullable),
674 );
675 assert!(first.get(Stat::IsStrictSorted).is_none());
676 }
677
678 #[test]
679 fn merge_sortedness_bound_min() {
680 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
681 first.set(Stat::Max, Precision::exact(1));
682 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
683 second.set(Stat::Min, Precision::inexact(2));
684 first = first.merge_ordered(
685 &second,
686 &DType::Primitive(PType::I32, Nullability::NonNullable),
687 );
688 assert_eq!(
689 first.get_as::<bool>(Stat::IsStrictSorted),
690 Some(Precision::exact(true))
691 );
692 }
693
694 #[test]
695 fn merge_unordered() {
696 let array =
697 PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
698 let all_stats = all::<Stat>()
699 .filter(|s| !matches!(s, Stat::Sum))
700 .filter(|s| !matches!(s, Stat::NaNCount))
701 .collect_vec();
702 array.statistics().compute_all(&all_stats).unwrap();
703
704 let stats = array.statistics().to_owned();
705 for stat in &all_stats {
706 assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
707 }
708
709 let merged = stats.clone().merge_unordered(
710 &stats,
711 &DType::Primitive(PType::I32, Nullability::NonNullable),
712 );
713 for stat in &all_stats {
714 assert_eq!(
715 merged.get(*stat).is_some(),
716 stat.is_commutative(),
717 "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
718 )
719 }
720
721 assert_eq!(
722 merged.get_as::<i32>(Stat::Min),
723 stats.get_as::<i32>(Stat::Min)
724 );
725 assert_eq!(
726 merged.get_as::<i32>(Stat::Max),
727 stats.get_as::<i32>(Stat::Max)
728 );
729 assert_eq!(
730 merged.get_as::<u64>(Stat::NullCount).unwrap(),
731 stats.get_as::<u64>(Stat::NullCount).unwrap().map(|s| s * 2)
732 );
733 }
734
735 #[test]
736 fn merge_min_bound_same() {
737 let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
740 &StatsSet::of(Stat::Min, Precision::exact(5)),
741 &DType::Primitive(PType::I32, Nullability::NonNullable),
742 );
743 assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::exact(5)));
744 }
745
746 #[test]
747 fn merge_min_bound_bound_lower() {
748 let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
749 &StatsSet::of(Stat::Min, Precision::exact(5)),
750 &DType::Primitive(PType::I32, Nullability::NonNullable),
751 );
752 assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::inexact(4)));
753 }
754
755 #[test]
756 fn retain_approx() {
757 let set = StatsSet::from_iter([
758 (Stat::Max, Precision::exact(100)),
759 (Stat::Min, Precision::exact(50)),
760 (Stat::Sum, Precision::inexact(10)),
761 ]);
762
763 let set = set.keep_inexact_stats(&[Stat::Min, Stat::Max]);
764
765 assert_eq!(set.len(), 2);
766 assert_eq!(set.get_as::<i32>(Stat::Max), Some(Precision::inexact(100)));
767 assert_eq!(set.get_as::<i32>(Stat::Min), Some(Precision::inexact(50)));
768 assert_eq!(set.get_as::<i32>(Stat::Sum), None);
769 }
770
771 #[test]
772 fn test_combine_is_constant() {
773 {
774 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
775 let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
776 stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
777 assert_eq!(
778 stats.get_as::<bool>(Stat::IsConstant),
779 Some(Precision::exact(true))
780 );
781 }
782
783 {
784 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
785 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
786 stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
787 assert_eq!(
788 stats.get_as::<bool>(Stat::IsConstant),
789 Some(Precision::exact(true))
790 );
791 }
792
793 {
794 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
795 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
796 stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
797 assert_eq!(
798 stats.get_as::<bool>(Stat::IsConstant),
799 Some(Precision::exact(false))
800 );
801 }
802 }
803
804 #[test]
805 fn test_combine_sets_boolean_conflict() {
806 let mut stats1 = StatsSet::from_iter([
807 (Stat::IsConstant, Precision::exact(true)),
808 (Stat::IsSorted, Precision::exact(true)),
809 ]);
810
811 let stats2 = StatsSet::from_iter([
812 (Stat::IsConstant, Precision::exact(false)),
813 (Stat::IsSorted, Precision::exact(true)),
814 ]);
815
816 let result = stats1.combine_sets(
817 &stats2,
818 &DType::Primitive(PType::I32, Nullability::NonNullable),
819 );
820 assert!(result.is_err());
821 }
822
823 #[test]
824 fn test_combine_sets_with_missing_stats() {
825 let mut stats1 = StatsSet::from_iter([
826 (Stat::Min, Precision::exact(42)),
827 (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
828 ]);
829
830 let stats2 = StatsSet::from_iter([
831 (Stat::Max, Precision::exact(100)),
832 (Stat::IsStrictSorted, Precision::exact(true)),
833 ]);
834
835 stats1
836 .combine_sets(
837 &stats2,
838 &DType::Primitive(PType::I32, Nullability::NonNullable),
839 )
840 .unwrap();
841
842 assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
844 assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(100)));
846 assert_eq!(
848 stats1.get_as::<bool>(Stat::IsStrictSorted),
849 Some(Precision::exact(true))
850 );
851 }
852
853 #[test]
854 fn test_combine_sets_with_inexact() {
855 let mut stats1 = StatsSet::from_iter([
856 (Stat::Min, Precision::exact(42)),
857 (Stat::Max, Precision::inexact(100)),
858 (Stat::IsConstant, Precision::exact(false)),
859 ]);
860
861 let stats2 = StatsSet::from_iter([
862 (Stat::Min, Precision::inexact(40)),
864 (Stat::Max, Precision::exact(90)),
865 (Stat::IsSorted, Precision::exact(true)),
866 ]);
867
868 stats1
869 .combine_sets(
870 &stats2,
871 &DType::Primitive(PType::I32, Nullability::NonNullable),
872 )
873 .unwrap();
874
875 assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
877 assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(90)));
879 assert_eq!(
881 stats1.get_as::<bool>(Stat::IsSorted),
882 Some(Precision::exact(true))
883 );
884 }
885}