1use std::fmt::Debug;
2
3use enum_iterator::{Sequence, all};
4use num_traits::CheckedAdd;
5use vortex_dtype::DType;
6use vortex_error::{VortexExpect, VortexResult, vortex_err};
7use vortex_scalar::{Scalar, ScalarValue};
8
9use super::traits::StatsProvider;
10use super::{IsSorted, IsStrictSorted, NaNCount, NullCount, StatType, UncompressedSizeInBytes};
11use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
12
13#[derive(Default, Debug, Clone)]
14pub struct StatsSet {
15 values: Vec<(Stat, Precision<ScalarValue>)>,
16}
17
18impl StatsSet {
19 pub fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
25 Self { values }
26 }
27
28 pub fn nulls(len: usize) -> Self {
31 let mut stats = Self::new_unchecked(vec![(Stat::NullCount, Precision::exact(len))]);
32
33 if len > 0 {
34 stats.set(Stat::IsConstant, Precision::exact(true));
35 stats.set(Stat::IsSorted, Precision::exact(true));
36 stats.set(Stat::IsStrictSorted, Precision::exact(len < 2));
37 }
38
39 stats
40 }
41
42 pub fn empty_array() -> StatsSet {
44 StatsSet::new_unchecked(vec![(Stat::NullCount, Precision::exact(0))])
45 }
46
47 pub fn constant(scalar: Scalar, length: usize) -> Self {
48 let (dtype, sv) = scalar.into_parts();
49 let mut stats = Self::default();
50 if length > 0 {
51 stats.extend([
52 (Stat::IsConstant, Precision::exact(true)),
53 (Stat::IsSorted, Precision::exact(true)),
54 (Stat::IsStrictSorted, Precision::exact(length <= 1)),
55 ]);
56 }
57
58 let null_count = if sv.is_null() { length as u64 } else { 0 };
59 stats.set(Stat::NullCount, Precision::exact(null_count));
60
61 if !sv.is_null() {
62 stats.extend([
63 (Stat::Min, Precision::exact(sv.clone())),
64 (Stat::Max, Precision::exact(sv.clone())),
65 ]);
66 }
67
68 if matches!(dtype, DType::Bool(_)) {
69 let bool_val = <Option<bool>>::try_from(&sv).vortex_expect("Checked dtype");
70 let true_count = bool_val
71 .map(|b| if b { length as u64 } else { 0 })
72 .unwrap_or(0);
73 stats.set(Stat::Sum, Precision::exact(true_count));
74 }
75
76 stats
77 }
78
79 pub fn bools_with_sum_and_null_count(true_count: usize, null_count: usize, len: usize) -> Self {
80 StatsSet::new_unchecked(vec![
81 (Stat::Sum, Precision::exact(true_count)),
82 (Stat::NullCount, Precision::exact(null_count)),
83 (Stat::Min, Precision::exact(true_count == len)),
84 (Stat::Max, Precision::exact(true_count > 0)),
85 (
86 Stat::IsConstant,
87 Precision::exact((true_count == 0 && null_count == 0) || true_count == len),
88 ),
89 ])
90 }
91
92 pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
93 Self::new_unchecked(vec![(stat, value)])
94 }
95
96 fn reserve_full_capacity(&mut self) {
97 if self.values.capacity() < Stat::CARDINALITY {
98 self.values
99 .reserve_exact(Stat::CARDINALITY - self.values.capacity());
100 }
101 }
102}
103
104impl StatsSet {
106 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
108 self.reserve_full_capacity();
109
110 if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
111 *existing = (stat, value);
112 } else {
113 self.values.push((stat, value));
114 }
115 }
116
117 pub fn clear(&mut self, stat: Stat) {
119 self.values.retain(|(s, _)| *s != stat);
120 }
121
122 pub fn retain_only(&mut self, stats: &[Stat]) {
123 self.values.retain(|(s, _)| stats.contains(s));
124 }
125
126 pub fn keep_inexact_stats(self, inexact_keep: &[Stat]) -> Self {
127 self.values
128 .into_iter()
129 .filter_map(|(s, v)| inexact_keep.contains(&s).then(|| (s, v.into_inexact())))
130 .collect()
131 }
132
133 pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
137 self.values.iter()
138 }
139}
140
141pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
147
148impl Iterator for StatsSetIntoIter {
149 type Item = (Stat, Precision<ScalarValue>);
150
151 fn next(&mut self) -> Option<Self::Item> {
152 self.0.next()
153 }
154}
155
156impl IntoIterator for StatsSet {
157 type Item = (Stat, Precision<ScalarValue>);
158 type IntoIter = StatsSetIntoIter;
159
160 fn into_iter(self) -> Self::IntoIter {
161 StatsSetIntoIter(self.values.into_iter())
162 }
163}
164
165impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
166 fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
167 let iter = iter.into_iter();
168 let mut values = Vec::default();
169 values.reserve_exact(Stat::CARDINALITY);
170
171 let mut this = Self { values };
172 this.extend(iter);
173 this
174 }
175}
176
177impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
178 #[inline]
179 fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
180 let iter = iter.into_iter();
181 self.reserve_full_capacity();
182
183 iter.for_each(|(stat, value)| self.set(stat, value));
184 }
185}
186
187impl StatsSet {
189 pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
192 for s in all::<Stat>() {
193 match s {
194 Stat::IsConstant => self.merge_is_constant(other, dtype),
195 Stat::IsSorted => self.merge_is_sorted(other, dtype),
196 Stat::IsStrictSorted => self.merge_is_strict_sorted(other, dtype),
197 Stat::Max => self.merge_max(other, dtype),
198 Stat::Min => self.merge_min(other, dtype),
199 Stat::Sum => self.merge_sum(other, dtype),
200 Stat::NullCount => self.merge_null_count(other),
201 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
202 Stat::NaNCount => self.merge_nan_count(other),
203 }
204 }
205
206 self
207 }
208
209 pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
212 for s in all::<Stat>() {
213 if !s.is_commutative() {
214 self.clear(s);
215 continue;
216 }
217
218 match s {
219 Stat::IsConstant => self.merge_is_constant(other, dtype),
220 Stat::Max => self.merge_max(other, dtype),
221 Stat::Min => self.merge_min(other, dtype),
222 Stat::Sum => self.merge_sum(other, dtype),
223 Stat::NullCount => self.merge_null_count(other),
224 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
225 Stat::IsSorted | Stat::IsStrictSorted => {
226 unreachable!("not commutative")
227 }
228 Stat::NaNCount => self.merge_nan_count(other),
229 }
230 }
231
232 self
233 }
234
235 pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
237 let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
238 for s in other_stats {
239 match s {
240 Stat::Max => self.combine_bound::<Max>(other, dtype)?,
241 Stat::Min => self.combine_bound::<Min>(other, dtype)?,
242 Stat::UncompressedSizeInBytes => {
243 self.combine_bound::<UncompressedSizeInBytes>(other, dtype)?
244 }
245 Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
246 Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
247 Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
248 Stat::NullCount => self.combine_bound::<NullCount>(other, dtype)?,
249 Stat::Sum => self.combine_bound::<Sum>(other, dtype)?,
250 Stat::NaNCount => self.combine_bound::<NaNCount>(other, dtype)?,
251 }
252 }
253 Ok(())
254 }
255
256 fn combine_bound<S: StatType<Scalar>>(
257 &mut self,
258 other: &Self,
259 dtype: &DType,
260 ) -> VortexResult<()>
261 where
262 S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
263 {
264 match (
265 self.get_scalar_bound::<S>(dtype),
266 other.get_scalar_bound::<S>(dtype),
267 ) {
268 (Some(m1), Some(m2)) => {
269 let meet = m1
270 .intersection(&m2)
271 .vortex_expect("can always compare scalar")
272 .ok_or_else(|| {
273 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
274 })?;
275 if meet != m1 {
276 self.set(S::STAT, meet.into_value().map(Scalar::into_value));
277 }
278 }
279 (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
280 (Some(_), _) => (),
281 (None, None) => self.clear(S::STAT),
282 }
283 Ok(())
284 }
285
286 fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &Self) -> VortexResult<()>
287 where
288 S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
289 {
290 match (
291 self.get_as_bound::<S, bool>(),
292 other.get_as_bound::<S, bool>(),
293 ) {
294 (Some(m1), Some(m2)) => {
295 let intersection = m1
296 .intersection(&m2)
297 .vortex_expect("can always compare boolean")
298 .ok_or_else(|| {
299 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
300 })?;
301 if intersection != m1 {
302 self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
303 }
304 }
305 (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
306 (Some(_), None) => (),
307 (None, None) => self.clear(S::STAT),
308 }
309 Ok(())
310 }
311
312 fn merge_min(&mut self, other: &Self, dtype: &DType) {
313 match (
314 self.get_scalar_bound::<Min>(dtype),
315 other.get_scalar_bound::<Min>(dtype),
316 ) {
317 (Some(m1), Some(m2)) => {
318 let meet = m1.union(&m2).vortex_expect("can compare scalar");
319 if meet != m1 {
320 self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
321 }
322 }
323 _ => self.clear(Stat::Min),
324 }
325 }
326
327 fn merge_max(&mut self, other: &Self, dtype: &DType) {
328 match (
329 self.get_scalar_bound::<Max>(dtype),
330 other.get_scalar_bound::<Max>(dtype),
331 ) {
332 (Some(m1), Some(m2)) => {
333 let meet = m1.union(&m2).vortex_expect("can compare scalar");
334 if meet != m1 {
335 self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
336 }
337 }
338 _ => self.clear(Stat::Max),
339 }
340 }
341
342 fn merge_sum(&mut self, other: &Self, dtype: &DType) {
343 match (
344 self.get_scalar_bound::<Sum>(dtype),
345 other.get_scalar_bound::<Sum>(dtype),
346 ) {
347 (Some(m1), Some(m2)) => {
348 if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
350 s1.as_primitive()
351 .checked_add(&s2.as_primitive())
352 .map(|pscalar| {
353 pscalar
354 .pvalue()
355 .map(|pvalue| {
356 Scalar::primitive_value(
357 pvalue,
358 pscalar.ptype(),
359 pscalar.dtype().nullability(),
360 )
361 .into_value()
362 })
363 .unwrap_or_else(ScalarValue::null)
364 })
365 }) {
366 self.set(Stat::Sum, Precision::Exact(scalar_value));
367 }
368 }
369 _ => self.clear(Stat::Sum),
370 }
371 }
372
373 fn merge_is_constant(&mut self, other: &Self, dtype: &DType) {
374 let self_const = self.get_as(Stat::IsConstant);
375 let other_const = other.get_as(Stat::IsConstant);
376 let self_min = self.get_scalar(Stat::Min, dtype);
377 let other_min = other.get_scalar(Stat::Min, dtype);
378
379 if let (
380 Some(Precision::Exact(self_const)),
381 Some(Precision::Exact(other_const)),
382 Some(Precision::Exact(self_min)),
383 Some(Precision::Exact(other_min)),
384 ) = (self_const, other_const, self_min, other_min)
385 {
386 if self_const && other_const && self_min == other_min {
387 self.set(Stat::IsConstant, Precision::exact(true));
388 } else {
389 self.set(Stat::IsConstant, Precision::inexact(false));
390 }
391 }
392 self.set(Stat::IsConstant, Precision::exact(false));
393 }
394
395 fn merge_is_sorted(&mut self, other: &Self, dtype: &DType) {
396 self.merge_sortedness_stat(other, Stat::IsSorted, dtype, PartialOrd::le)
397 }
398
399 fn merge_is_strict_sorted(&mut self, other: &Self, dtype: &DType) {
400 self.merge_sortedness_stat(other, Stat::IsStrictSorted, dtype, PartialOrd::lt)
401 }
402
403 fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
404 &mut self,
405 other: &Self,
406 stat: Stat,
407 dtype: &DType,
408 cmp: F,
409 ) {
410 if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
411 == (self.get_as(stat), other.get_as(stat))
412 {
413 if let (Some(self_max), Some(other_min)) = (
418 self.get_scalar_bound::<Max>(dtype),
419 other.get_scalar_bound::<Min>(dtype),
420 ) {
421 return if cmp(&self_max.max_value(), &other_min.min_value()) {
422 } else {
424 self.set(stat, Precision::inexact(false));
425 };
426 }
427 }
428 self.clear(stat);
429 }
430
431 fn merge_null_count(&mut self, other: &Self) {
432 self.merge_sum_stat(Stat::NullCount, other)
433 }
434
435 fn merge_nan_count(&mut self, other: &Self) {
436 self.merge_sum_stat(Stat::NaNCount, other)
437 }
438
439 fn merge_uncompressed_size_in_bytes(&mut self, other: &Self) {
440 self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
441 }
442
443 fn merge_sum_stat(&mut self, stat: Stat, other: &Self) {
444 match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
445 (Some(nc1), Some(nc2)) => {
446 self.set(
447 stat,
448 nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
449 );
450 }
451 _ => self.clear(stat),
452 }
453 }
454}
455
456impl StatsProvider for StatsSet {
457 fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
458 self.values
459 .iter()
460 .find(|(s, _)| *s == stat)
461 .map(|(_, v)| v.clone())
462 }
463
464 fn len(&self) -> usize {
465 self.values.len()
466 }
467}
468
469#[cfg(test)]
470mod test {
471 use enum_iterator::all;
472 use itertools::Itertools;
473 use vortex_dtype::{DType, Nullability, PType};
474
475 use crate::arrays::PrimitiveArray;
476 use crate::stats::{IsConstant, Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
477
478 #[test]
479 fn test_iter() {
480 let set = StatsSet::new_unchecked(vec![
481 (Stat::Max, Precision::exact(100)),
482 (Stat::Min, Precision::exact(42)),
483 ]);
484 let mut iter = set.iter();
485 let first = iter.next().unwrap().clone();
486 assert_eq!(first.0, Stat::Max);
487 assert_eq!(
488 first.1.map(|f| i32::try_from(&f).unwrap()),
489 Precision::exact(100)
490 );
491 let snd = iter.next().unwrap().clone();
492 assert_eq!(snd.0, Stat::Min);
493 assert_eq!(snd.1.map(|s| i32::try_from(&s).unwrap()), 42);
494 }
495
496 #[test]
497 fn into_iter() {
498 let mut set = StatsSet::new_unchecked(vec![
499 (Stat::Max, Precision::exact(100)),
500 (Stat::Min, Precision::exact(42)),
501 ])
502 .into_iter();
503 let (stat, first) = set.next().unwrap();
504 assert_eq!(stat, Stat::Max);
505 assert_eq!(
506 first.map(|f| i32::try_from(&f).unwrap()),
507 Precision::exact(100)
508 );
509 let snd = set.next().unwrap();
510 assert_eq!(snd.0, Stat::Min);
511 assert_eq!(
512 snd.1.map(|s| i32::try_from(&s).unwrap()),
513 Precision::exact(42)
514 );
515 }
516
517 #[test]
518 fn merge_constant() {
519 let first = StatsSet::from_iter([
520 (Stat::Min, Precision::exact(42)),
521 (Stat::IsConstant, Precision::exact(true)),
522 ])
523 .merge_ordered(
524 &StatsSet::from_iter([
525 (Stat::Min, Precision::inexact(42)),
526 (Stat::IsConstant, Precision::exact(true)),
527 ]),
528 &DType::Primitive(PType::I32, Nullability::NonNullable),
529 );
530 assert_eq!(
531 first.get_as::<bool>(Stat::IsConstant),
532 Some(Precision::exact(false))
533 );
534 assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
535 }
536
537 #[test]
538 fn merge_into_min() {
539 let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
540 &StatsSet::default(),
541 &DType::Primitive(PType::I32, Nullability::NonNullable),
542 );
543 assert!(first.get(Stat::Min).is_none());
544 }
545
546 #[test]
547 fn merge_from_min() {
548 let first = StatsSet::default().merge_ordered(
549 &StatsSet::of(Stat::Min, Precision::exact(42)),
550 &DType::Primitive(PType::I32, Nullability::NonNullable),
551 );
552 assert!(first.get(Stat::Min).is_none());
553 }
554
555 #[test]
556 fn merge_mins() {
557 let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
558 &StatsSet::of(Stat::Min, Precision::exact(42)),
559 &DType::Primitive(PType::I32, Nullability::NonNullable),
560 );
561 assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(37)));
562 }
563
564 #[test]
565 fn merge_into_bound_max() {
566 let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
567 &StatsSet::default(),
568 &DType::Primitive(PType::I32, Nullability::NonNullable),
569 );
570 assert!(first.get(Stat::Max).is_none());
571 }
572
573 #[test]
574 fn merge_from_max() {
575 let first = StatsSet::default().merge_ordered(
576 &StatsSet::of(Stat::Max, Precision::exact(42)),
577 &DType::Primitive(PType::I32, Nullability::NonNullable),
578 );
579 assert!(first.get(Stat::Max).is_none());
580 }
581
582 #[test]
583 fn merge_maxes() {
584 let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
585 &StatsSet::of(Stat::Max, Precision::exact(42)),
586 &DType::Primitive(PType::I32, Nullability::NonNullable),
587 );
588 assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::exact(42)));
589 }
590
591 #[test]
592 fn merge_maxes_bound() {
593 let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
594 let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
595 .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
596 assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::inexact(43)));
597 }
598
599 #[test]
600 fn merge_into_scalar() {
601 let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
602 &StatsSet::default(),
603 &DType::Primitive(PType::I32, Nullability::NonNullable),
604 );
605 assert!(first.get(Stat::Sum).is_none());
606 }
607
608 #[test]
609 fn merge_from_scalar() {
610 let first = StatsSet::default().merge_ordered(
611 &StatsSet::of(Stat::Sum, Precision::exact(42)),
612 &DType::Primitive(PType::I32, Nullability::NonNullable),
613 );
614 assert!(first.get(Stat::Sum).is_none());
615 }
616
617 #[test]
618 fn merge_scalars() {
619 let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
620 &StatsSet::of(Stat::Sum, Precision::exact(42)),
621 &DType::Primitive(PType::I32, Nullability::NonNullable),
622 );
623 assert_eq!(
624 first.get_as::<usize>(Stat::Sum),
625 Some(Precision::exact(79usize))
626 );
627 }
628
629 #[test]
630 fn merge_into_sortedness() {
631 let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
632 &StatsSet::default(),
633 &DType::Primitive(PType::I32, Nullability::NonNullable),
634 );
635 assert!(first.get(Stat::IsStrictSorted).is_none());
636 }
637
638 #[test]
639 fn merge_from_sortedness() {
640 let first = StatsSet::default().merge_ordered(
641 &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
642 &DType::Primitive(PType::I32, Nullability::NonNullable),
643 );
644 assert!(first.get(Stat::IsStrictSorted).is_none());
645 }
646
647 #[test]
648 fn merge_sortedness() {
649 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
650 first.set(Stat::Max, Precision::exact(1));
651 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
652 second.set(Stat::Min, Precision::exact(2));
653 first = first.merge_ordered(
654 &second,
655 &DType::Primitive(PType::I32, Nullability::NonNullable),
656 );
657 assert_eq!(
658 first.get_as::<bool>(Stat::IsStrictSorted),
659 Some(Precision::exact(true))
660 );
661 }
662
663 #[test]
664 fn merge_sortedness_out_of_order() {
665 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
666 first.set(Stat::Min, Precision::exact(1));
667 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
668 second.set(Stat::Max, Precision::exact(2));
669 second = second.merge_ordered(
670 &first,
671 &DType::Primitive(PType::I32, Nullability::NonNullable),
672 );
673 assert_eq!(
674 second.get_as::<bool>(Stat::IsStrictSorted),
675 Some(Precision::inexact(false))
676 );
677 }
678
679 #[test]
680 fn merge_sortedness_only_one_sorted() {
681 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
682 first.set(Stat::Max, Precision::exact(1));
683 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
684 second.set(Stat::Min, Precision::exact(2));
685 first.merge_ordered(
686 &second,
687 &DType::Primitive(PType::I32, Nullability::NonNullable),
688 );
689 assert_eq!(
690 second.get_as::<bool>(Stat::IsStrictSorted),
691 Some(Precision::exact(false))
692 );
693 }
694
695 #[test]
696 fn merge_sortedness_missing_min() {
697 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
698 first.set(Stat::Max, Precision::exact(1));
699 let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
700 first = first.merge_ordered(
701 &second,
702 &DType::Primitive(PType::I32, Nullability::NonNullable),
703 );
704 assert!(first.get(Stat::IsStrictSorted).is_none());
705 }
706
707 #[test]
708 fn merge_sortedness_bound_min() {
709 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
710 first.set(Stat::Max, Precision::exact(1));
711 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
712 second.set(Stat::Min, Precision::inexact(2));
713 first = first.merge_ordered(
714 &second,
715 &DType::Primitive(PType::I32, Nullability::NonNullable),
716 );
717 assert_eq!(
718 first.get_as::<bool>(Stat::IsStrictSorted),
719 Some(Precision::exact(true))
720 );
721 }
722
723 #[test]
724 fn merge_unordered() {
725 let array =
726 PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
727 let all_stats = all::<Stat>()
728 .filter(|s| !matches!(s, Stat::Sum))
729 .filter(|s| !matches!(s, Stat::NaNCount))
730 .collect_vec();
731 array.statistics().compute_all(&all_stats).unwrap();
732
733 let stats = array.statistics().to_owned();
734 for stat in &all_stats {
735 assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
736 }
737
738 let merged = stats.clone().merge_unordered(
739 &stats,
740 &DType::Primitive(PType::I32, Nullability::NonNullable),
741 );
742 for stat in &all_stats {
743 assert_eq!(
744 merged.get(*stat).is_some(),
745 stat.is_commutative(),
746 "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
747 )
748 }
749
750 assert_eq!(
751 merged.get_as::<i32>(Stat::Min),
752 stats.get_as::<i32>(Stat::Min)
753 );
754 assert_eq!(
755 merged.get_as::<i32>(Stat::Max),
756 stats.get_as::<i32>(Stat::Max)
757 );
758 assert_eq!(
759 merged.get_as::<u64>(Stat::NullCount).unwrap(),
760 stats.get_as::<u64>(Stat::NullCount).unwrap().map(|s| s * 2)
761 );
762 }
763
764 #[test]
765 fn merge_min_bound_same() {
766 let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
769 &StatsSet::of(Stat::Min, Precision::exact(5)),
770 &DType::Primitive(PType::I32, Nullability::NonNullable),
771 );
772 assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::exact(5)));
773 }
774
775 #[test]
776 fn merge_min_bound_bound_lower() {
777 let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
778 &StatsSet::of(Stat::Min, Precision::exact(5)),
779 &DType::Primitive(PType::I32, Nullability::NonNullable),
780 );
781 assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::inexact(4)));
782 }
783
784 #[test]
785 fn retain_approx() {
786 let set = StatsSet::from_iter([
787 (Stat::Max, Precision::exact(100)),
788 (Stat::Min, Precision::exact(50)),
789 (Stat::Sum, Precision::inexact(10)),
790 ]);
791
792 let set = set.keep_inexact_stats(&[Stat::Min, Stat::Max]);
793
794 assert_eq!(set.len(), 2);
795 assert_eq!(set.get_as::<i32>(Stat::Max), Some(Precision::inexact(100)));
796 assert_eq!(set.get_as::<i32>(Stat::Min), Some(Precision::inexact(50)));
797 assert_eq!(set.get_as::<i32>(Stat::Sum), None);
798 }
799
800 #[test]
801 fn test_combine_is_constant() {
802 {
803 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
804 let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
805 stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
806 assert_eq!(
807 stats.get_as::<bool>(Stat::IsConstant),
808 Some(Precision::exact(true))
809 );
810 }
811
812 {
813 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
814 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
815 stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
816 assert_eq!(
817 stats.get_as::<bool>(Stat::IsConstant),
818 Some(Precision::exact(true))
819 );
820 }
821
822 {
823 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
824 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
825 stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
826 assert_eq!(
827 stats.get_as::<bool>(Stat::IsConstant),
828 Some(Precision::exact(false))
829 );
830 }
831 }
832
833 #[test]
834 fn test_combine_sets_boolean_conflict() {
835 let mut stats1 = StatsSet::from_iter([
836 (Stat::IsConstant, Precision::exact(true)),
837 (Stat::IsSorted, Precision::exact(true)),
838 ]);
839
840 let stats2 = StatsSet::from_iter([
841 (Stat::IsConstant, Precision::exact(false)),
842 (Stat::IsSorted, Precision::exact(true)),
843 ]);
844
845 let result = stats1.combine_sets(
846 &stats2,
847 &DType::Primitive(PType::I32, Nullability::NonNullable),
848 );
849 assert!(result.is_err());
850 }
851
852 #[test]
853 fn test_combine_sets_with_missing_stats() {
854 let mut stats1 = StatsSet::from_iter([
855 (Stat::Min, Precision::exact(42)),
856 (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
857 ]);
858
859 let stats2 = StatsSet::from_iter([
860 (Stat::Max, Precision::exact(100)),
861 (Stat::IsStrictSorted, Precision::exact(true)),
862 ]);
863
864 stats1
865 .combine_sets(
866 &stats2,
867 &DType::Primitive(PType::I32, Nullability::NonNullable),
868 )
869 .unwrap();
870
871 assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
873 assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(100)));
875 assert_eq!(
877 stats1.get_as::<bool>(Stat::IsStrictSorted),
878 Some(Precision::exact(true))
879 );
880 }
881
882 #[test]
883 fn test_combine_sets_with_inexact() {
884 let mut stats1 = StatsSet::from_iter([
885 (Stat::Min, Precision::exact(42)),
886 (Stat::Max, Precision::inexact(100)),
887 (Stat::IsConstant, Precision::exact(false)),
888 ]);
889
890 let stats2 = StatsSet::from_iter([
891 (Stat::Min, Precision::inexact(40)),
893 (Stat::Max, Precision::exact(90)),
894 (Stat::IsSorted, Precision::exact(true)),
895 ]);
896
897 stats1
898 .combine_sets(
899 &stats2,
900 &DType::Primitive(PType::I32, Nullability::NonNullable),
901 )
902 .unwrap();
903
904 assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
906 assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(90)));
908 assert_eq!(
910 stats1.get_as::<bool>(Stat::IsSorted),
911 Some(Precision::exact(true))
912 );
913 }
914}