1use std::fmt::Debug;
2
3use enum_iterator::{Sequence, all};
4use num_traits::CheckedAdd;
5use vortex_dtype::DType;
6use vortex_error::{VortexExpect, VortexResult, vortex_err};
7use vortex_scalar::{Scalar, ScalarValue};
8
9use super::traits::StatsProvider;
10use super::{IsSorted, IsStrictSorted, NullCount, StatType, UncompressedSizeInBytes};
11use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
12
13#[derive(Default, Debug, Clone)]
14pub struct StatsSet {
15 values: Vec<(Stat, Precision<ScalarValue>)>,
16}
17
18impl StatsSet {
19 pub fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
25 Self { values }
26 }
27
28 pub fn nulls(len: usize) -> Self {
31 let mut stats = Self::new_unchecked(vec![(Stat::NullCount, Precision::exact(len))]);
32
33 if len > 0 {
34 stats.set(Stat::IsConstant, Precision::exact(true));
35 stats.set(Stat::IsSorted, Precision::exact(true));
36 stats.set(Stat::IsStrictSorted, Precision::exact(len < 2));
37 }
38
39 stats
40 }
41
42 pub fn empty_array() -> StatsSet {
44 StatsSet::new_unchecked(vec![(Stat::NullCount, Precision::exact(0))])
45 }
46
47 pub fn constant(scalar: Scalar, length: usize) -> Self {
48 let (dtype, sv) = scalar.into_parts();
49 let mut stats = Self::default();
50 if length > 0 {
51 stats.extend([
52 (Stat::IsConstant, Precision::exact(true)),
53 (Stat::IsSorted, Precision::exact(true)),
54 (Stat::IsStrictSorted, Precision::exact(length <= 1)),
55 ]);
56 }
57
58 let null_count = if sv.is_null() { length as u64 } else { 0 };
59 stats.set(Stat::NullCount, Precision::exact(null_count));
60
61 if !sv.is_null() {
62 stats.extend([
63 (Stat::Min, Precision::exact(sv.clone())),
64 (Stat::Max, Precision::exact(sv.clone())),
65 ]);
66 }
67
68 if matches!(dtype, DType::Bool(_)) {
69 let bool_val = <Option<bool>>::try_from(&sv).vortex_expect("Checked dtype");
70 let true_count = bool_val
71 .map(|b| if b { length as u64 } else { 0 })
72 .unwrap_or(0);
73 stats.set(Stat::Sum, Precision::exact(true_count));
74 }
75
76 stats
77 }
78
79 pub fn bools_with_sum_and_null_count(true_count: usize, null_count: usize, len: usize) -> Self {
80 StatsSet::new_unchecked(vec![
81 (Stat::Sum, Precision::exact(true_count)),
82 (Stat::NullCount, Precision::exact(null_count)),
83 (Stat::Min, Precision::exact(true_count == len)),
84 (Stat::Max, Precision::exact(true_count > 0)),
85 (
86 Stat::IsConstant,
87 Precision::exact((true_count == 0 && null_count == 0) || true_count == len),
88 ),
89 ])
90 }
91
92 pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
93 Self::new_unchecked(vec![(stat, value)])
94 }
95
96 fn reserve_full_capacity(&mut self) {
97 if self.values.capacity() < Stat::CARDINALITY {
98 self.values
99 .reserve_exact(Stat::CARDINALITY - self.values.capacity());
100 }
101 }
102}
103
104impl StatsSet {
106 pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
108 self.reserve_full_capacity();
109
110 if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
111 *existing = (stat, value);
112 } else {
113 self.values.push((stat, value));
114 }
115 }
116
117 pub fn clear(&mut self, stat: Stat) {
119 self.values.retain(|(s, _)| *s != stat);
120 }
121
122 pub fn retain_only(&mut self, stats: &[Stat]) {
123 self.values.retain(|(s, _)| stats.contains(s));
124 }
125
126 pub fn keep_inexact_stats(self, inexact_keep: &[Stat]) -> Self {
127 self.values
128 .into_iter()
129 .filter_map(|(s, v)| inexact_keep.contains(&s).then(|| (s, v.into_inexact())))
130 .collect()
131 }
132
133 pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
137 self.values.iter()
138 }
139}
140
141pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
147
148impl Iterator for StatsSetIntoIter {
149 type Item = (Stat, Precision<ScalarValue>);
150
151 fn next(&mut self) -> Option<Self::Item> {
152 self.0.next()
153 }
154}
155
156impl IntoIterator for StatsSet {
157 type Item = (Stat, Precision<ScalarValue>);
158 type IntoIter = StatsSetIntoIter;
159
160 fn into_iter(self) -> Self::IntoIter {
161 StatsSetIntoIter(self.values.into_iter())
162 }
163}
164
165impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
166 fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
167 let iter = iter.into_iter();
168 let mut values = Vec::default();
169 values.reserve_exact(Stat::CARDINALITY);
170
171 let mut this = Self { values };
172 this.extend(iter);
173 this
174 }
175}
176
177impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
178 #[inline]
179 fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
180 let iter = iter.into_iter();
181 self.reserve_full_capacity();
182
183 iter.for_each(|(stat, value)| self.set(stat, value));
184 }
185}
186
187impl StatsSet {
189 pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
192 for s in all::<Stat>() {
193 match s {
194 Stat::IsConstant => self.merge_is_constant(other, dtype),
195 Stat::IsSorted => self.merge_is_sorted(other, dtype),
196 Stat::IsStrictSorted => self.merge_is_strict_sorted(other, dtype),
197 Stat::Max => self.merge_max(other, dtype),
198 Stat::Min => self.merge_min(other, dtype),
199 Stat::Sum => self.merge_sum(other, dtype),
200 Stat::NullCount => self.merge_null_count(other),
201 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
202 }
203 }
204
205 self
206 }
207
208 pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
211 for s in all::<Stat>() {
212 if !s.is_commutative() {
213 self.clear(s);
214 continue;
215 }
216
217 match s {
218 Stat::IsConstant => self.merge_is_constant(other, dtype),
219 Stat::Max => self.merge_max(other, dtype),
220 Stat::Min => self.merge_min(other, dtype),
221 Stat::Sum => self.merge_sum(other, dtype),
222 Stat::NullCount => self.merge_null_count(other),
223 Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
224 Stat::IsSorted | Stat::IsStrictSorted => {
225 unreachable!("not commutative")
226 }
227 }
228 }
229
230 self
231 }
232
233 pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
235 let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
236 for s in other_stats {
237 match s {
238 Stat::Max => self.combine_bound::<Max>(other, dtype)?,
239 Stat::Min => self.combine_bound::<Min>(other, dtype)?,
240 Stat::UncompressedSizeInBytes => {
241 self.combine_bound::<UncompressedSizeInBytes>(other, dtype)?
242 }
243 Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
244 Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
245 Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
246 Stat::NullCount => self.combine_bound::<NullCount>(other, dtype)?,
247 Stat::Sum => self.combine_bound::<Sum>(other, dtype)?,
248 }
249 }
250 Ok(())
251 }
252
253 fn combine_bound<S: StatType<Scalar>>(
254 &mut self,
255 other: &Self,
256 dtype: &DType,
257 ) -> VortexResult<()>
258 where
259 S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
260 {
261 match (
262 self.get_scalar_bound::<S>(dtype),
263 other.get_scalar_bound::<S>(dtype),
264 ) {
265 (Some(m1), Some(m2)) => {
266 let meet = m1
267 .intersection(&m2)
268 .vortex_expect("can always compare scalar")
269 .ok_or_else(|| {
270 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
271 })?;
272 if meet != m1 {
273 self.set(S::STAT, meet.into_value().map(Scalar::into_value));
274 }
275 }
276 (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
277 (Some(_), _) => (),
278 (None, None) => self.clear(S::STAT),
279 }
280 Ok(())
281 }
282
283 fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &Self) -> VortexResult<()>
284 where
285 S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
286 {
287 match (
288 self.get_as_bound::<S, bool>(),
289 other.get_as_bound::<S, bool>(),
290 ) {
291 (Some(m1), Some(m2)) => {
292 let intersection = m1
293 .intersection(&m2)
294 .vortex_expect("can always compare boolean")
295 .ok_or_else(|| {
296 vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
297 })?;
298 if intersection != m1 {
299 self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
300 }
301 }
302 (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
303 (Some(_), None) => (),
304 (None, None) => self.clear(S::STAT),
305 }
306 Ok(())
307 }
308
309 fn merge_min(&mut self, other: &Self, dtype: &DType) {
310 match (
311 self.get_scalar_bound::<Min>(dtype),
312 other.get_scalar_bound::<Min>(dtype),
313 ) {
314 (Some(m1), Some(m2)) => {
315 let meet = m1.union(&m2).vortex_expect("can compare scalar");
316 if meet != m1 {
317 self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
318 }
319 }
320 _ => self.clear(Stat::Min),
321 }
322 }
323
324 fn merge_max(&mut self, other: &Self, dtype: &DType) {
325 match (
326 self.get_scalar_bound::<Max>(dtype),
327 other.get_scalar_bound::<Max>(dtype),
328 ) {
329 (Some(m1), Some(m2)) => {
330 let meet = m1.union(&m2).vortex_expect("can compare scalar");
331 if meet != m1 {
332 self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
333 }
334 }
335 _ => self.clear(Stat::Max),
336 }
337 }
338
339 fn merge_sum(&mut self, other: &Self, dtype: &DType) {
340 match (
341 self.get_scalar_bound::<Sum>(dtype),
342 other.get_scalar_bound::<Sum>(dtype),
343 ) {
344 (Some(m1), Some(m2)) => {
345 if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
347 s1.as_primitive()
348 .checked_add(&s2.as_primitive())
349 .map(|pscalar| {
350 pscalar
351 .pvalue()
352 .map(|pvalue| {
353 Scalar::primitive_value(
354 pvalue,
355 pscalar.ptype(),
356 pscalar.dtype().nullability(),
357 )
358 .into_value()
359 })
360 .unwrap_or_else(ScalarValue::null)
361 })
362 }) {
363 self.set(Stat::Sum, Precision::Exact(scalar_value));
364 }
365 }
366 _ => self.clear(Stat::Sum),
367 }
368 }
369
370 fn merge_is_constant(&mut self, other: &Self, dtype: &DType) {
371 let self_const = self.get_as(Stat::IsConstant);
372 let other_const = other.get_as(Stat::IsConstant);
373 let self_min = self.get_scalar(Stat::Min, dtype);
374 let other_min = other.get_scalar(Stat::Min, dtype);
375
376 if let (
377 Some(Precision::Exact(self_const)),
378 Some(Precision::Exact(other_const)),
379 Some(Precision::Exact(self_min)),
380 Some(Precision::Exact(other_min)),
381 ) = (self_const, other_const, self_min, other_min)
382 {
383 if self_const && other_const && self_min == other_min {
384 self.set(Stat::IsConstant, Precision::exact(true));
385 } else {
386 self.set(Stat::IsConstant, Precision::inexact(false));
387 }
388 }
389 self.set(Stat::IsConstant, Precision::exact(false));
390 }
391
392 fn merge_is_sorted(&mut self, other: &Self, dtype: &DType) {
393 self.merge_sortedness_stat(other, Stat::IsSorted, dtype, PartialOrd::le)
394 }
395
396 fn merge_is_strict_sorted(&mut self, other: &Self, dtype: &DType) {
397 self.merge_sortedness_stat(other, Stat::IsStrictSorted, dtype, PartialOrd::lt)
398 }
399
400 fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
401 &mut self,
402 other: &Self,
403 stat: Stat,
404 dtype: &DType,
405 cmp: F,
406 ) {
407 if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
408 == (self.get_as(stat), other.get_as(stat))
409 {
410 if let (Some(self_max), Some(other_min)) = (
415 self.get_scalar_bound::<Max>(dtype),
416 other.get_scalar_bound::<Min>(dtype),
417 ) {
418 return if cmp(&self_max.max_value(), &other_min.min_value()) {
419 } else {
421 self.set(stat, Precision::inexact(false));
422 };
423 }
424 }
425 self.clear(stat);
426 }
427
428 fn merge_null_count(&mut self, other: &Self) {
429 self.merge_sum_stat(Stat::NullCount, other)
430 }
431
432 fn merge_uncompressed_size_in_bytes(&mut self, other: &Self) {
433 self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
434 }
435
436 fn merge_sum_stat(&mut self, stat: Stat, other: &Self) {
437 match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
438 (Some(nc1), Some(nc2)) => {
439 self.set(
440 stat,
441 nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
442 );
443 }
444 _ => self.clear(stat),
445 }
446 }
447}
448
449impl StatsProvider for StatsSet {
450 fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
451 self.values
452 .iter()
453 .find(|(s, _)| *s == stat)
454 .map(|(_, v)| v.clone())
455 }
456
457 fn len(&self) -> usize {
458 self.values.len()
459 }
460}
461
462#[cfg(test)]
463mod test {
464 use enum_iterator::all;
465 use itertools::Itertools;
466 use vortex_dtype::{DType, Nullability, PType};
467
468 use crate::Array;
469 use crate::arrays::PrimitiveArray;
470 use crate::stats::{IsConstant, Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
471
472 #[test]
473 fn test_iter() {
474 let set = StatsSet::new_unchecked(vec![
475 (Stat::Max, Precision::exact(100)),
476 (Stat::Min, Precision::exact(42)),
477 ]);
478 let mut iter = set.iter();
479 let first = iter.next().unwrap().clone();
480 assert_eq!(first.0, Stat::Max);
481 assert_eq!(
482 first.1.map(|f| i32::try_from(&f).unwrap()),
483 Precision::exact(100)
484 );
485 let snd = iter.next().unwrap().clone();
486 assert_eq!(snd.0, Stat::Min);
487 assert_eq!(snd.1.map(|s| i32::try_from(&s).unwrap()), 42);
488 }
489
490 #[test]
491 fn into_iter() {
492 let mut set = StatsSet::new_unchecked(vec![
493 (Stat::Max, Precision::exact(100)),
494 (Stat::Min, Precision::exact(42)),
495 ])
496 .into_iter();
497 let (stat, first) = set.next().unwrap();
498 assert_eq!(stat, Stat::Max);
499 assert_eq!(
500 first.map(|f| i32::try_from(&f).unwrap()),
501 Precision::exact(100)
502 );
503 let snd = set.next().unwrap();
504 assert_eq!(snd.0, Stat::Min);
505 assert_eq!(
506 snd.1.map(|s| i32::try_from(&s).unwrap()),
507 Precision::exact(42)
508 );
509 }
510
511 #[test]
512 fn merge_constant() {
513 let first = StatsSet::from_iter([
514 (Stat::Min, Precision::exact(42)),
515 (Stat::IsConstant, Precision::exact(true)),
516 ])
517 .merge_ordered(
518 &StatsSet::from_iter([
519 (Stat::Min, Precision::inexact(42)),
520 (Stat::IsConstant, Precision::exact(true)),
521 ]),
522 &DType::Primitive(PType::I32, Nullability::NonNullable),
523 );
524 assert_eq!(
525 first.get_as::<bool>(Stat::IsConstant),
526 Some(Precision::exact(false))
527 );
528 assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
529 }
530
531 #[test]
532 fn merge_into_min() {
533 let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
534 &StatsSet::default(),
535 &DType::Primitive(PType::I32, Nullability::NonNullable),
536 );
537 assert!(first.get(Stat::Min).is_none());
538 }
539
540 #[test]
541 fn merge_from_min() {
542 let first = StatsSet::default().merge_ordered(
543 &StatsSet::of(Stat::Min, Precision::exact(42)),
544 &DType::Primitive(PType::I32, Nullability::NonNullable),
545 );
546 assert!(first.get(Stat::Min).is_none());
547 }
548
549 #[test]
550 fn merge_mins() {
551 let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
552 &StatsSet::of(Stat::Min, Precision::exact(42)),
553 &DType::Primitive(PType::I32, Nullability::NonNullable),
554 );
555 assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(37)));
556 }
557
558 #[test]
559 fn merge_into_bound_max() {
560 let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
561 &StatsSet::default(),
562 &DType::Primitive(PType::I32, Nullability::NonNullable),
563 );
564 assert!(first.get(Stat::Max).is_none());
565 }
566
567 #[test]
568 fn merge_from_max() {
569 let first = StatsSet::default().merge_ordered(
570 &StatsSet::of(Stat::Max, Precision::exact(42)),
571 &DType::Primitive(PType::I32, Nullability::NonNullable),
572 );
573 assert!(first.get(Stat::Max).is_none());
574 }
575
576 #[test]
577 fn merge_maxes() {
578 let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
579 &StatsSet::of(Stat::Max, Precision::exact(42)),
580 &DType::Primitive(PType::I32, Nullability::NonNullable),
581 );
582 assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::exact(42)));
583 }
584
585 #[test]
586 fn merge_maxes_bound() {
587 let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
588 let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
589 .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
590 assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::inexact(43)));
591 }
592
593 #[test]
594 fn merge_into_scalar() {
595 let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
596 &StatsSet::default(),
597 &DType::Primitive(PType::I32, Nullability::NonNullable),
598 );
599 assert!(first.get(Stat::Sum).is_none());
600 }
601
602 #[test]
603 fn merge_from_scalar() {
604 let first = StatsSet::default().merge_ordered(
605 &StatsSet::of(Stat::Sum, Precision::exact(42)),
606 &DType::Primitive(PType::I32, Nullability::NonNullable),
607 );
608 assert!(first.get(Stat::Sum).is_none());
609 }
610
611 #[test]
612 fn merge_scalars() {
613 let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
614 &StatsSet::of(Stat::Sum, Precision::exact(42)),
615 &DType::Primitive(PType::I32, Nullability::NonNullable),
616 );
617 assert_eq!(
618 first.get_as::<usize>(Stat::Sum),
619 Some(Precision::exact(79usize))
620 );
621 }
622
623 #[test]
624 fn merge_into_sortedness() {
625 let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
626 &StatsSet::default(),
627 &DType::Primitive(PType::I32, Nullability::NonNullable),
628 );
629 assert!(first.get(Stat::IsStrictSorted).is_none());
630 }
631
632 #[test]
633 fn merge_from_sortedness() {
634 let first = StatsSet::default().merge_ordered(
635 &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
636 &DType::Primitive(PType::I32, Nullability::NonNullable),
637 );
638 assert!(first.get(Stat::IsStrictSorted).is_none());
639 }
640
641 #[test]
642 fn merge_sortedness() {
643 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
644 first.set(Stat::Max, Precision::exact(1));
645 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
646 second.set(Stat::Min, Precision::exact(2));
647 first = first.merge_ordered(
648 &second,
649 &DType::Primitive(PType::I32, Nullability::NonNullable),
650 );
651 assert_eq!(
652 first.get_as::<bool>(Stat::IsStrictSorted),
653 Some(Precision::exact(true))
654 );
655 }
656
657 #[test]
658 fn merge_sortedness_out_of_order() {
659 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
660 first.set(Stat::Min, Precision::exact(1));
661 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
662 second.set(Stat::Max, Precision::exact(2));
663 second = second.merge_ordered(
664 &first,
665 &DType::Primitive(PType::I32, Nullability::NonNullable),
666 );
667 assert_eq!(
668 second.get_as::<bool>(Stat::IsStrictSorted),
669 Some(Precision::inexact(false))
670 );
671 }
672
673 #[test]
674 fn merge_sortedness_only_one_sorted() {
675 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
676 first.set(Stat::Max, Precision::exact(1));
677 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
678 second.set(Stat::Min, Precision::exact(2));
679 first.merge_ordered(
680 &second,
681 &DType::Primitive(PType::I32, Nullability::NonNullable),
682 );
683 assert_eq!(
684 second.get_as::<bool>(Stat::IsStrictSorted),
685 Some(Precision::exact(false))
686 );
687 }
688
689 #[test]
690 fn merge_sortedness_missing_min() {
691 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
692 first.set(Stat::Max, Precision::exact(1));
693 let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
694 first = first.merge_ordered(
695 &second,
696 &DType::Primitive(PType::I32, Nullability::NonNullable),
697 );
698 assert!(first.get(Stat::IsStrictSorted).is_none());
699 }
700
701 #[test]
702 fn merge_sortedness_bound_min() {
703 let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
704 first.set(Stat::Max, Precision::exact(1));
705 let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
706 second.set(Stat::Min, Precision::inexact(2));
707 first = first.merge_ordered(
708 &second,
709 &DType::Primitive(PType::I32, Nullability::NonNullable),
710 );
711 assert_eq!(
712 first.get_as::<bool>(Stat::IsStrictSorted),
713 Some(Precision::exact(true))
714 );
715 }
716
717 #[test]
718 fn merge_unordered() {
719 let array =
720 PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
721 let all_stats = all::<Stat>()
722 .filter(|s| !matches!(s, Stat::Sum))
723 .collect_vec();
724 array.statistics().compute_all(&all_stats).unwrap();
725
726 let stats = array.statistics().to_owned();
727 for stat in &all_stats {
728 assert!(stats.get(*stat).is_some(), "Stat {} is missing", stat);
729 }
730
731 let merged = stats.clone().merge_unordered(
732 &stats,
733 &DType::Primitive(PType::I32, Nullability::NonNullable),
734 );
735 for stat in &all_stats {
736 assert_eq!(
737 merged.get(*stat).is_some(),
738 stat.is_commutative(),
739 "Stat {} remains after merge_unordered despite not being commutative, or was removed despite being commutative",
740 stat
741 )
742 }
743
744 assert_eq!(
745 merged.get_as::<i32>(Stat::Min),
746 stats.get_as::<i32>(Stat::Min)
747 );
748 assert_eq!(
749 merged.get_as::<i32>(Stat::Max),
750 stats.get_as::<i32>(Stat::Max)
751 );
752 assert_eq!(
753 merged.get_as::<u64>(Stat::NullCount).unwrap(),
754 stats.get_as::<u64>(Stat::NullCount).unwrap().map(|s| s * 2)
755 );
756 }
757
758 #[test]
759 fn merge_min_bound_same() {
760 let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
763 &StatsSet::of(Stat::Min, Precision::exact(5)),
764 &DType::Primitive(PType::I32, Nullability::NonNullable),
765 );
766 assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::exact(5)));
767 }
768
769 #[test]
770 fn merge_min_bound_bound_lower() {
771 let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
772 &StatsSet::of(Stat::Min, Precision::exact(5)),
773 &DType::Primitive(PType::I32, Nullability::NonNullable),
774 );
775 assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::inexact(4)));
776 }
777
778 #[test]
779 fn retain_approx() {
780 let set = StatsSet::from_iter([
781 (Stat::Max, Precision::exact(100)),
782 (Stat::Min, Precision::exact(50)),
783 (Stat::Sum, Precision::inexact(10)),
784 ]);
785
786 let set = set.keep_inexact_stats(&[Stat::Min, Stat::Max]);
787
788 assert_eq!(set.len(), 2);
789 assert_eq!(set.get_as::<i32>(Stat::Max), Some(Precision::inexact(100)));
790 assert_eq!(set.get_as::<i32>(Stat::Min), Some(Precision::inexact(50)));
791 assert_eq!(set.get_as::<i32>(Stat::Sum), None);
792 }
793
794 #[test]
795 fn test_combine_is_constant() {
796 {
797 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
798 let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
799 stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
800 assert_eq!(
801 stats.get_as::<bool>(Stat::IsConstant),
802 Some(Precision::exact(true))
803 );
804 }
805
806 {
807 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
808 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
809 stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
810 assert_eq!(
811 stats.get_as::<bool>(Stat::IsConstant),
812 Some(Precision::exact(true))
813 );
814 }
815
816 {
817 let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
818 let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
819 stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
820 assert_eq!(
821 stats.get_as::<bool>(Stat::IsConstant),
822 Some(Precision::exact(false))
823 );
824 }
825 }
826
827 #[test]
828 fn test_combine_sets_boolean_conflict() {
829 let mut stats1 = StatsSet::from_iter([
830 (Stat::IsConstant, Precision::exact(true)),
831 (Stat::IsSorted, Precision::exact(true)),
832 ]);
833
834 let stats2 = StatsSet::from_iter([
835 (Stat::IsConstant, Precision::exact(false)),
836 (Stat::IsSorted, Precision::exact(true)),
837 ]);
838
839 let result = stats1.combine_sets(
840 &stats2,
841 &DType::Primitive(PType::I32, Nullability::NonNullable),
842 );
843 assert!(result.is_err());
844 }
845
846 #[test]
847 fn test_combine_sets_with_missing_stats() {
848 let mut stats1 = StatsSet::from_iter([
849 (Stat::Min, Precision::exact(42)),
850 (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
851 ]);
852
853 let stats2 = StatsSet::from_iter([
854 (Stat::Max, Precision::exact(100)),
855 (Stat::IsStrictSorted, Precision::exact(true)),
856 ]);
857
858 stats1
859 .combine_sets(
860 &stats2,
861 &DType::Primitive(PType::I32, Nullability::NonNullable),
862 )
863 .unwrap();
864
865 assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
867 assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(100)));
869 assert_eq!(
871 stats1.get_as::<bool>(Stat::IsStrictSorted),
872 Some(Precision::exact(true))
873 );
874 }
875
876 #[test]
877 fn test_combine_sets_with_inexact() {
878 let mut stats1 = StatsSet::from_iter([
879 (Stat::Min, Precision::exact(42)),
880 (Stat::Max, Precision::inexact(100)),
881 (Stat::IsConstant, Precision::exact(false)),
882 ]);
883
884 let stats2 = StatsSet::from_iter([
885 (Stat::Min, Precision::inexact(40)),
887 (Stat::Max, Precision::exact(90)),
888 (Stat::IsSorted, Precision::exact(true)),
889 ]);
890
891 stats1
892 .combine_sets(
893 &stats2,
894 &DType::Primitive(PType::I32, Nullability::NonNullable),
895 )
896 .unwrap();
897
898 assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
900 assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(90)));
902 assert_eq!(
904 stats1.get_as::<bool>(Stat::IsSorted),
905 Some(Precision::exact(true))
906 );
907 }
908}