1use std::fmt::{self, Debug, Display};
21
22use crate::{Result, ScalarValue};
23
24use crate::error::_plan_err;
25use crate::utils::aggregate::precision_add;
26use arrow::datatypes::{DataType, Schema};
27
28#[derive(Clone, PartialEq, Eq, Default, Copy)]
31pub enum Precision<T: Debug + Clone + PartialEq + Eq + PartialOrd> {
32 Exact(T),
39 Inexact(T),
47 #[default]
60 Absent,
61}
62
63impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Precision<T> {
64 pub fn get_value(&self) -> Option<&T> {
67 match self {
68 Precision::Exact(value) | Precision::Inexact(value) => Some(value),
69 Precision::Absent => None,
70 }
71 }
72
73 pub fn map<U, F>(self, f: F) -> Precision<U>
76 where
77 F: Fn(T) -> U,
78 U: Debug + Clone + PartialEq + Eq + PartialOrd,
79 {
80 match self {
81 Precision::Exact(val) => Precision::Exact(f(val)),
82 Precision::Inexact(val) => Precision::Inexact(f(val)),
83 _ => Precision::<U>::Absent,
84 }
85 }
86
87 pub fn is_exact(&self) -> Option<bool> {
90 match self {
91 Precision::Exact(_) => Some(true),
92 Precision::Inexact(_) => Some(false),
93 _ => None,
94 }
95 }
96
97 pub fn max(&self, other: &Precision<T>) -> Precision<T> {
101 match (self, other) {
102 (Precision::Exact(a), Precision::Exact(b)) => {
103 Precision::Exact(if a >= b { a.clone() } else { b.clone() })
104 }
105 (Precision::Inexact(a), Precision::Exact(b))
106 | (Precision::Exact(a), Precision::Inexact(b))
107 | (Precision::Inexact(a), Precision::Inexact(b)) => {
108 Precision::Inexact(if a >= b { a.clone() } else { b.clone() })
109 }
110 (_, _) => Precision::Absent,
111 }
112 }
113
114 pub fn min(&self, other: &Precision<T>) -> Precision<T> {
118 match (self, other) {
119 (Precision::Exact(a), Precision::Exact(b)) => {
120 Precision::Exact(if a >= b { b.clone() } else { a.clone() })
121 }
122 (Precision::Inexact(a), Precision::Exact(b))
123 | (Precision::Exact(a), Precision::Inexact(b))
124 | (Precision::Inexact(a), Precision::Inexact(b)) => {
125 Precision::Inexact(if a >= b { b.clone() } else { a.clone() })
126 }
127 (_, _) => Precision::Absent,
128 }
129 }
130
131 pub fn to_inexact(self) -> Self {
133 match self {
134 Precision::Exact(value) => Precision::Inexact(value),
135 _ => self,
136 }
137 }
138}
139
140impl Precision<usize> {
141 pub fn add(&self, other: &Precision<usize>) -> Precision<usize> {
145 match (self, other) {
146 (Precision::Exact(a), Precision::Exact(b)) => a.checked_add(*b).map_or_else(
147 || Precision::Inexact(a.saturating_add(*b)),
148 Precision::Exact,
149 ),
150 (Precision::Inexact(a), Precision::Exact(b))
151 | (Precision::Exact(a), Precision::Inexact(b))
152 | (Precision::Inexact(a), Precision::Inexact(b)) => {
153 Precision::Inexact(a.saturating_add(*b))
154 }
155 (_, _) => Precision::Absent,
156 }
157 }
158
159 pub fn sub(&self, other: &Precision<usize>) -> Precision<usize> {
163 match (self, other) {
164 (Precision::Exact(a), Precision::Exact(b)) => a.checked_sub(*b).map_or_else(
165 || Precision::Inexact(a.saturating_sub(*b)),
166 Precision::Exact,
167 ),
168 (Precision::Inexact(a), Precision::Exact(b))
169 | (Precision::Exact(a), Precision::Inexact(b))
170 | (Precision::Inexact(a), Precision::Inexact(b)) => {
171 Precision::Inexact(a.saturating_sub(*b))
172 }
173 (_, _) => Precision::Absent,
174 }
175 }
176
177 pub fn multiply(&self, other: &Precision<usize>) -> Precision<usize> {
181 match (self, other) {
182 (Precision::Exact(a), Precision::Exact(b)) => a.checked_mul(*b).map_or_else(
183 || Precision::Inexact(a.saturating_mul(*b)),
184 Precision::Exact,
185 ),
186 (Precision::Inexact(a), Precision::Exact(b))
187 | (Precision::Exact(a), Precision::Inexact(b))
188 | (Precision::Inexact(a), Precision::Inexact(b)) => {
189 Precision::Inexact(a.saturating_mul(*b))
190 }
191 (_, _) => Precision::Absent,
192 }
193 }
194
195 pub fn with_estimated_selectivity(self, selectivity: f64) -> Self {
200 self.map(|v| ((v as f64 * selectivity).ceil()) as usize)
201 .to_inexact()
202 }
203}
204
205impl Precision<ScalarValue> {
206 fn sum_data_type(data_type: &DataType) -> DataType {
207 match data_type {
208 DataType::Int8 | DataType::Int16 | DataType::Int32 => DataType::Int64,
209 DataType::UInt8 | DataType::UInt16 | DataType::UInt32 => DataType::UInt64,
210 _ => data_type.clone(),
211 }
212 }
213
214 fn cast_scalar_to_sum_type(value: &ScalarValue) -> Result<ScalarValue> {
215 let source_type = value.data_type();
216 let target_type = Self::sum_data_type(&source_type);
217 if source_type == target_type {
218 Ok(value.clone())
219 } else {
220 value.cast_to(&target_type)
221 }
222 }
223
224 pub fn add(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
234 match (self, other) {
235 (Precision::Exact(a), Precision::Exact(b)) => a
236 .add_checked(b)
237 .map(Precision::Exact)
238 .unwrap_or(Precision::Absent),
239 (Precision::Inexact(a), Precision::Exact(b))
240 | (Precision::Exact(a), Precision::Inexact(b))
241 | (Precision::Inexact(a), Precision::Inexact(b)) => a
242 .add_checked(b)
243 .map(Precision::Inexact)
244 .unwrap_or(Precision::Absent),
245 (_, _) => Precision::Absent,
246 }
247 }
248
249 pub fn cast_to_sum_type(&self) -> Precision<ScalarValue> {
254 match (self.is_exact(), self.get_value()) {
255 (Some(true), Some(value)) => Self::cast_scalar_to_sum_type(value)
256 .map(Precision::Exact)
257 .unwrap_or(Precision::Absent),
258 (Some(false), Some(value)) => Self::cast_scalar_to_sum_type(value)
259 .map(Precision::Inexact)
260 .unwrap_or(Precision::Absent),
261 (_, _) => Precision::Absent,
262 }
263 }
264
265 pub fn add_for_sum(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
268 let mut lhs = self.cast_to_sum_type();
269 let rhs = other.cast_to_sum_type();
270 precision_add(&mut lhs, &rhs);
271 lhs
272 }
273
274 pub fn sub(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
278 match (self, other) {
279 (Precision::Exact(a), Precision::Exact(b)) => {
280 a.sub(b).map(Precision::Exact).unwrap_or(Precision::Absent)
281 }
282 (Precision::Inexact(a), Precision::Exact(b))
283 | (Precision::Exact(a), Precision::Inexact(b))
284 | (Precision::Inexact(a), Precision::Inexact(b)) => a
285 .sub(b)
286 .map(Precision::Inexact)
287 .unwrap_or(Precision::Absent),
288 (_, _) => Precision::Absent,
289 }
290 }
291
292 pub fn multiply(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
296 match (self, other) {
297 (Precision::Exact(a), Precision::Exact(b)) => a
298 .mul_checked(b)
299 .map(Precision::Exact)
300 .unwrap_or(Precision::Absent),
301 (Precision::Inexact(a), Precision::Exact(b))
302 | (Precision::Exact(a), Precision::Inexact(b))
303 | (Precision::Inexact(a), Precision::Inexact(b)) => a
304 .mul_checked(b)
305 .map(Precision::Inexact)
306 .unwrap_or(Precision::Absent),
307 (_, _) => Precision::Absent,
308 }
309 }
310
311 pub fn cast_to(&self, data_type: &DataType) -> Result<Precision<ScalarValue>> {
313 match self {
314 Precision::Exact(value) => value.cast_to(data_type).map(Precision::Exact),
315 Precision::Inexact(value) => value.cast_to(data_type).map(Precision::Inexact),
316 Precision::Absent => Ok(Precision::Absent),
317 }
318 }
319}
320
321impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Debug for Precision<T> {
322 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
323 match self {
324 Precision::Exact(inner) => write!(f, "Exact({inner:?})"),
325 Precision::Inexact(inner) => write!(f, "Inexact({inner:?})"),
326 Precision::Absent => write!(f, "Absent"),
327 }
328 }
329}
330
331impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Display for Precision<T> {
332 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
333 match self {
334 Precision::Exact(inner) => write!(f, "Exact({inner:?})"),
335 Precision::Inexact(inner) => write!(f, "Inexact({inner:?})"),
336 Precision::Absent => write!(f, "Absent"),
337 }
338 }
339}
340
341impl From<Precision<usize>> for Precision<ScalarValue> {
342 fn from(value: Precision<usize>) -> Self {
343 match value {
344 Precision::Exact(v) => Precision::Exact(ScalarValue::UInt64(Some(v as u64))),
345 Precision::Inexact(v) => {
346 Precision::Inexact(ScalarValue::UInt64(Some(v as u64)))
347 }
348 Precision::Absent => Precision::Absent,
349 }
350 }
351}
352
353#[derive(Debug, Clone, PartialEq, Eq)]
358pub struct Statistics {
359 pub num_rows: Precision<usize>,
361 pub total_byte_size: Precision<usize>,
368 pub column_statistics: Vec<ColumnStatistics>,
373}
374
375#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
377pub enum NdvFallback {
378 #[default]
381 Max,
382 Sum,
385}
386
387impl NdvFallback {
388 fn merge(self, left: usize, right: usize) -> usize {
389 match self {
390 Self::Max => usize::max(left, right),
391 Self::Sum => left.saturating_add(right),
392 }
393 }
394}
395
396impl Default for Statistics {
397 fn default() -> Self {
400 Self {
401 num_rows: Precision::Absent,
402 total_byte_size: Precision::Absent,
403 column_statistics: vec![],
404 }
405 }
406}
407
408impl Statistics {
409 pub fn new_unknown(schema: &Schema) -> Self {
412 Self {
413 num_rows: Precision::Absent,
414 total_byte_size: Precision::Absent,
415 column_statistics: Statistics::unknown_column(schema),
416 }
417 }
418
419 pub fn calculate_total_byte_size(&mut self, schema: &Schema) {
422 let mut row_size = Some(0);
423 for field in schema.fields() {
424 match field.data_type().primitive_width() {
425 Some(width) => {
426 row_size = row_size.map(|s| s + width);
427 }
428 None => {
429 row_size = None;
430 break;
431 }
432 }
433 }
434 match row_size {
435 None => {
436 self.total_byte_size = self.total_byte_size.to_inexact();
437 }
438 Some(size) => {
439 self.total_byte_size = self.num_rows.multiply(&Precision::Exact(size));
440 }
441 }
442 }
443
444 pub fn unknown_column(schema: &Schema) -> Vec<ColumnStatistics> {
446 schema
447 .fields()
448 .iter()
449 .map(|_| ColumnStatistics::new_unknown())
450 .collect()
451 }
452
453 pub fn with_num_rows(mut self, num_rows: Precision<usize>) -> Self {
455 self.num_rows = num_rows;
456 self
457 }
458
459 pub fn with_total_byte_size(mut self, total_byte_size: Precision<usize>) -> Self {
461 self.total_byte_size = total_byte_size;
462 self
463 }
464
465 pub fn add_column_statistics(mut self, column_stats: ColumnStatistics) -> Self {
467 self.column_statistics.push(column_stats);
468 self
469 }
470
471 pub fn to_inexact(mut self) -> Self {
474 self.num_rows = self.num_rows.to_inexact();
475 self.total_byte_size = self.total_byte_size.to_inexact();
476 self.column_statistics = self
477 .column_statistics
478 .into_iter()
479 .map(|s| s.to_inexact())
480 .collect();
481 self
482 }
483
484 pub fn project(self, projection: Option<&impl AsRef<[usize]>>) -> Self {
490 let projection = projection.map(AsRef::as_ref);
491 self.project_impl(projection)
492 }
493
494 fn project_impl(mut self, projection: Option<&[usize]>) -> Self {
495 let Some(projection) = projection.map(AsRef::as_ref) else {
496 return self;
497 };
498
499 #[expect(clippy::large_enum_variant)]
500 enum Slot {
501 Taken(usize),
503 Present(ColumnStatistics),
505 }
506
507 let mut columns: Vec<_> = std::mem::take(&mut self.column_statistics)
509 .into_iter()
510 .map(Slot::Present)
511 .collect();
512
513 for idx in projection.iter() {
514 let next_idx = self.column_statistics.len();
515 let slot = std::mem::replace(
516 columns.get_mut(*idx).expect("projection out of bounds"),
517 Slot::Taken(next_idx),
518 );
519 match slot {
520 Slot::Present(col) => self.column_statistics.push(col),
522 Slot::Taken(prev_idx) => self
524 .column_statistics
525 .push(self.column_statistics[prev_idx].clone()),
526 }
527 }
528
529 self
530 }
531
532 pub fn with_fetch(
537 mut self,
538 fetch: Option<usize>,
539 skip: usize,
540 n_partitions: usize,
541 ) -> Result<Self> {
542 let fetch_val = fetch.unwrap_or(usize::MAX);
543
544 let num_rows_before = self.num_rows;
546
547 self.num_rows = match self {
548 Statistics {
549 num_rows: Precision::Exact(nr),
550 ..
551 }
552 | Statistics {
553 num_rows: Precision::Inexact(nr),
554 ..
555 } => {
556 if nr <= skip {
558 check_num_rows(Some(0), self.num_rows.is_exact().unwrap())
562 } else if nr <= fetch_val && skip == 0 {
563 return Ok(self);
569 } else if nr - skip <= fetch_val {
570 check_num_rows(
574 (nr - skip).checked_mul(n_partitions),
575 self.num_rows.is_exact().unwrap(),
577 )
578 } else {
579 check_num_rows(
584 fetch_val.checked_mul(n_partitions),
585 self.num_rows.is_exact().unwrap(),
587 )
588 }
589 }
590 Statistics {
591 num_rows: Precision::Absent,
592 ..
593 } => check_num_rows(fetch.and_then(|v| v.checked_mul(n_partitions)), false),
594 };
595 let ratio: f64 = match (num_rows_before, self.num_rows) {
596 (
597 Precision::Exact(nr_before) | Precision::Inexact(nr_before),
598 Precision::Exact(nr_after) | Precision::Inexact(nr_after),
599 ) => {
600 if nr_before == 0 {
601 0.0
602 } else {
603 nr_after as f64 / nr_before as f64
604 }
605 }
606 _ => 0.0,
607 };
608 self.column_statistics = self
609 .column_statistics
610 .into_iter()
611 .map(|cs| {
612 let mut cs = cs.to_inexact();
613 cs.byte_size = match cs.byte_size {
615 Precision::Exact(n) | Precision::Inexact(n) => {
616 Precision::Inexact((n as f64 * ratio) as usize)
617 }
618 Precision::Absent => Precision::Absent,
619 };
620 if let Some(&rows) = self.num_rows.get_value() {
622 cs.distinct_count = cs.distinct_count.min(&Precision::Inexact(rows));
623 }
624 cs
625 })
626 .collect();
627
628 let sum_scan_bytes: Option<usize> = self
631 .column_statistics
632 .iter()
633 .map(|cs| cs.byte_size.get_value().copied())
634 .try_fold(0usize, |acc, val| val.map(|v| acc + v));
635
636 self.total_byte_size = match sum_scan_bytes {
637 Some(sum) => Precision::Inexact(sum),
638 None => {
639 match &self.total_byte_size {
641 Precision::Exact(n) | Precision::Inexact(n) => {
642 Precision::Inexact((*n as f64 * ratio) as usize)
643 }
644 Precision::Absent => Precision::Absent,
645 }
646 }
647 };
648 Ok(self)
649 }
650
651 pub fn try_merge_iter<'a, I>(items: I, schema: &Schema) -> Result<Statistics>
699 where
700 I: IntoIterator<Item = &'a Statistics>,
701 {
702 Self::try_merge_iter_with_ndv_fallback(items, schema, NdvFallback::Max)
703 }
704
705 pub fn try_merge_iter_with_ndv_fallback<'a, I>(
708 items: I,
709 schema: &Schema,
710 ndv_fallback: NdvFallback,
711 ) -> Result<Statistics>
712 where
713 I: IntoIterator<Item = &'a Statistics>,
714 {
715 let mut items = items.into_iter();
716 let Some(first) = items.next() else {
717 return Ok(Statistics::new_unknown(schema));
718 };
719 let Some(second) = items.next() else {
720 return Ok(first.clone());
721 };
722
723 let num_cols = first.column_statistics.len();
724 let mut num_rows = first.num_rows;
725 let mut total_byte_size = first.total_byte_size;
726 let mut column_statistics = first.column_statistics.clone();
727 for col_stats in &mut column_statistics {
728 cast_sum_value_to_sum_type_in_place(&mut col_stats.sum_value);
729 }
730
731 for (i, stat) in std::iter::once(second).chain(items).enumerate() {
733 if stat.column_statistics.len() != num_cols {
734 return _plan_err!(
735 "Cannot merge statistics with different number of columns: {} vs {} (item {})",
736 num_cols,
737 stat.column_statistics.len(),
738 i + 1
739 );
740 }
741 num_rows = num_rows.add(&stat.num_rows);
742 total_byte_size = total_byte_size.add(&stat.total_byte_size);
743
744 for (col_stats, item_cs) in
748 column_statistics.iter_mut().zip(&stat.column_statistics)
749 {
750 col_stats.null_count = col_stats.null_count.add(&item_cs.null_count);
751
752 col_stats.distinct_count = match (
754 col_stats.distinct_count.get_value(),
755 item_cs.distinct_count.get_value(),
756 ) {
757 (Some(&l), Some(&r)) => Precision::Inexact(
758 estimate_ndv_with_overlap(col_stats, item_cs, l, r)
759 .unwrap_or_else(|| ndv_fallback.merge(l, r)),
760 ),
761 _ => Precision::Absent,
762 };
763 precision_min(&mut col_stats.min_value, &item_cs.min_value);
764 precision_max(&mut col_stats.max_value, &item_cs.max_value);
765 precision_add_for_sum_in_place(
766 &mut col_stats.sum_value,
767 &item_cs.sum_value,
768 );
769 col_stats.byte_size = col_stats.byte_size.add(&item_cs.byte_size);
770 }
771 }
772
773 Ok(Statistics {
774 num_rows,
775 total_byte_size,
776 column_statistics,
777 })
778 }
779}
780
781pub fn estimate_ndv_with_overlap(
816 left: &ColumnStatistics,
817 right: &ColumnStatistics,
818 ndv_left: usize,
819 ndv_right: usize,
820) -> Option<usize> {
821 let left_min = left.min_value.get_value()?;
822 let left_max = left.max_value.get_value()?;
823 let right_min = right.min_value.get_value()?;
824 let right_max = right.max_value.get_value()?;
825
826 let range_left = left_max.distance(left_min)?;
827 let range_right = right_max.distance(right_min)?;
828
829 if range_left == 0 || range_right == 0 {
832 let overlaps = left_min <= right_max && right_min <= left_max;
833 return Some(if overlaps {
834 usize::max(ndv_left, ndv_right)
835 } else {
836 ndv_left + ndv_right
837 });
838 }
839
840 let overlap_min = if left_min >= right_min {
841 left_min
842 } else {
843 right_min
844 };
845 let overlap_max = if left_max <= right_max {
846 left_max
847 } else {
848 right_max
849 };
850
851 if overlap_min > overlap_max {
853 return Some(ndv_left + ndv_right);
854 }
855
856 let overlap_range = overlap_max.distance(overlap_min)? as f64;
857
858 let overlap_left = overlap_range / range_left as f64;
859 let overlap_right = overlap_range / range_right as f64;
860
861 let intersection = f64::max(
862 overlap_left * ndv_left as f64,
863 overlap_right * ndv_right as f64,
864 );
865 let only_left = (1.0 - overlap_left) * ndv_left as f64;
866 let only_right = (1.0 - overlap_right) * ndv_right as f64;
867
868 Some((intersection + only_left + only_right).round() as usize)
869}
870
871#[inline]
874fn precision_min<T>(lhs: &mut Precision<T>, rhs: &Precision<T>)
875where
876 T: Debug + Clone + PartialEq + Eq + PartialOrd,
877{
878 *lhs = match (std::mem::take(lhs), rhs) {
879 (Precision::Exact(left), Precision::Exact(right)) => {
880 if left <= *right {
881 Precision::Exact(left)
882 } else {
883 Precision::Exact(right.clone())
884 }
885 }
886 (Precision::Exact(left), Precision::Inexact(right))
887 | (Precision::Inexact(left), Precision::Exact(right))
888 | (Precision::Inexact(left), Precision::Inexact(right)) => {
889 if left <= *right {
890 Precision::Inexact(left)
891 } else {
892 Precision::Inexact(right.clone())
893 }
894 }
895 (_, _) => Precision::Absent,
896 };
897}
898
899#[inline]
902fn precision_max<T>(lhs: &mut Precision<T>, rhs: &Precision<T>)
903where
904 T: Debug + Clone + PartialEq + Eq + PartialOrd,
905{
906 *lhs = match (std::mem::take(lhs), rhs) {
907 (Precision::Exact(left), Precision::Exact(right)) => {
908 if left >= *right {
909 Precision::Exact(left)
910 } else {
911 Precision::Exact(right.clone())
912 }
913 }
914 (Precision::Exact(left), Precision::Inexact(right))
915 | (Precision::Inexact(left), Precision::Exact(right))
916 | (Precision::Inexact(left), Precision::Inexact(right)) => {
917 if left >= *right {
918 Precision::Inexact(left)
919 } else {
920 Precision::Inexact(right.clone())
921 }
922 }
923 (_, _) => Precision::Absent,
924 };
925}
926
927#[inline]
928fn cast_sum_value_to_sum_type_in_place(value: &mut Precision<ScalarValue>) {
929 let (is_exact, inner) = match std::mem::take(value) {
930 Precision::Exact(v) => (true, v),
931 Precision::Inexact(v) => (false, v),
932 Precision::Absent => return,
933 };
934 let source_type = inner.data_type();
935 let target_type = Precision::<ScalarValue>::sum_data_type(&source_type);
936
937 let wrap_precision_fn: fn(ScalarValue) -> Precision<ScalarValue> = if is_exact {
938 Precision::Exact
939 } else {
940 Precision::Inexact
941 };
942
943 *value = if source_type == target_type {
944 wrap_precision_fn(inner)
945 } else {
946 inner
947 .cast_to(&target_type)
948 .map(wrap_precision_fn)
949 .unwrap_or(Precision::Absent)
950 };
951}
952
953#[inline]
954fn precision_add_for_sum_in_place(
955 lhs: &mut Precision<ScalarValue>,
956 rhs: &Precision<ScalarValue>,
957) {
958 let (value, wrap_fn): (&ScalarValue, fn(ScalarValue) -> Precision<ScalarValue>) =
959 match rhs {
960 Precision::Exact(v) => (v, Precision::Exact),
961 Precision::Inexact(v) => (v, Precision::Inexact),
962 Precision::Absent => {
963 *lhs = Precision::Absent;
964 return;
965 }
966 };
967 let source_type = value.data_type();
968 let target_type = Precision::<ScalarValue>::sum_data_type(&source_type);
969 if source_type == target_type {
970 precision_add(lhs, rhs);
971 } else {
972 let rhs = value
973 .cast_to(&target_type)
974 .map(wrap_fn)
975 .unwrap_or(Precision::Absent);
976 precision_add(lhs, &rhs);
977 }
978}
979
980fn check_num_rows(value: Option<usize>, is_exact: bool) -> Precision<usize> {
983 if let Some(value) = value {
984 if is_exact {
985 Precision::Exact(value)
986 } else {
987 Precision::Inexact(value)
989 }
990 } else {
991 Precision::Absent
994 }
995}
996
997impl Display for Statistics {
998 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
999 let column_stats = self
1001 .column_statistics
1002 .iter()
1003 .enumerate()
1004 .map(|(i, cs)| {
1005 let s = format!("(Col[{i}]:");
1006 let s = if cs.min_value != Precision::Absent {
1007 format!("{} Min={}", s, cs.min_value)
1008 } else {
1009 s
1010 };
1011 let s = if cs.max_value != Precision::Absent {
1012 format!("{} Max={}", s, cs.max_value)
1013 } else {
1014 s
1015 };
1016 let s = if cs.sum_value != Precision::Absent {
1017 format!("{} Sum={}", s, cs.sum_value)
1018 } else {
1019 s
1020 };
1021 let s = if cs.null_count != Precision::Absent {
1022 format!("{} Null={}", s, cs.null_count)
1023 } else {
1024 s
1025 };
1026 let s = if cs.distinct_count != Precision::Absent {
1027 format!("{} Distinct={}", s, cs.distinct_count)
1028 } else {
1029 s
1030 };
1031 let s = if cs.byte_size != Precision::Absent {
1032 format!("{} ScanBytes={}", s, cs.byte_size)
1033 } else {
1034 s
1035 };
1036
1037 s + ")"
1038 })
1039 .collect::<Vec<_>>()
1040 .join(",");
1041
1042 write!(
1043 f,
1044 "Rows={}, Bytes={}, [{}]",
1045 self.num_rows, self.total_byte_size, column_stats
1046 )?;
1047
1048 Ok(())
1049 }
1050}
1051
1052#[derive(Clone, Debug, PartialEq, Eq, Default)]
1054pub struct ColumnStatistics {
1055 pub null_count: Precision<usize>,
1057 pub max_value: Precision<ScalarValue>,
1059 pub min_value: Precision<ScalarValue>,
1061 pub sum_value: Precision<ScalarValue>,
1071 pub distinct_count: Precision<usize>,
1073 pub byte_size: Precision<usize>,
1088}
1089
1090impl ColumnStatistics {
1091 pub fn is_singleton(&self) -> bool {
1093 match (&self.min_value, &self.max_value) {
1094 (Precision::Exact(min), Precision::Exact(max)) => {
1096 !min.is_null() && !max.is_null() && (min == max)
1097 }
1098 (_, _) => false,
1099 }
1100 }
1101
1102 pub fn new_unknown() -> Self {
1104 Self {
1105 null_count: Precision::Absent,
1106 max_value: Precision::Absent,
1107 min_value: Precision::Absent,
1108 sum_value: Precision::Absent,
1109 distinct_count: Precision::Absent,
1110 byte_size: Precision::Absent,
1111 }
1112 }
1113
1114 pub fn with_null_count(mut self, null_count: Precision<usize>) -> Self {
1116 self.null_count = null_count;
1117 self
1118 }
1119
1120 pub fn with_max_value(mut self, max_value: Precision<ScalarValue>) -> Self {
1122 self.max_value = max_value;
1123 self
1124 }
1125
1126 pub fn with_min_value(mut self, min_value: Precision<ScalarValue>) -> Self {
1128 self.min_value = min_value;
1129 self
1130 }
1131
1132 pub fn with_sum_value(mut self, sum_value: Precision<ScalarValue>) -> Self {
1134 self.sum_value = match sum_value {
1135 Precision::Exact(value) => {
1136 Precision::<ScalarValue>::cast_scalar_to_sum_type(&value)
1137 .map(Precision::Exact)
1138 .unwrap_or(Precision::Absent)
1139 }
1140 Precision::Inexact(value) => {
1141 Precision::<ScalarValue>::cast_scalar_to_sum_type(&value)
1142 .map(Precision::Inexact)
1143 .unwrap_or(Precision::Absent)
1144 }
1145 Precision::Absent => Precision::Absent,
1146 };
1147 self
1148 }
1149
1150 pub fn with_distinct_count(mut self, distinct_count: Precision<usize>) -> Self {
1152 self.distinct_count = distinct_count;
1153 self
1154 }
1155
1156 pub fn with_byte_size(mut self, byte_size: Precision<usize>) -> Self {
1159 self.byte_size = byte_size;
1160 self
1161 }
1162
1163 pub fn to_inexact(mut self) -> Self {
1167 self.null_count = self.null_count.to_inexact();
1168 self.max_value = self.max_value.to_inexact();
1169 self.min_value = self.min_value.to_inexact();
1170 self.sum_value = self.sum_value.to_inexact();
1171 self.distinct_count = self.distinct_count.to_inexact();
1172 self.byte_size = self.byte_size.to_inexact();
1173 self
1174 }
1175}
1176
1177#[cfg(test)]
1178mod tests {
1179 use super::*;
1180 use crate::assert_contains;
1181 use arrow::datatypes::Field;
1182 use std::sync::Arc;
1183
1184 #[test]
1185 fn test_get_value() {
1186 let exact_precision = Precision::Exact(42);
1187 let inexact_precision = Precision::Inexact(23);
1188 let absent_precision = Precision::<i32>::Absent;
1189
1190 assert_eq!(*exact_precision.get_value().unwrap(), 42);
1191 assert_eq!(*inexact_precision.get_value().unwrap(), 23);
1192 assert_eq!(absent_precision.get_value(), None);
1193 }
1194
1195 #[test]
1196 fn test_map() {
1197 let exact_precision = Precision::Exact(42);
1198 let inexact_precision = Precision::Inexact(23);
1199 let absent_precision = Precision::Absent;
1200
1201 let squared = |x| x * x;
1202
1203 assert_eq!(exact_precision.map(squared), Precision::Exact(1764));
1204 assert_eq!(inexact_precision.map(squared), Precision::Inexact(529));
1205 assert_eq!(absent_precision.map(squared), Precision::Absent);
1206 }
1207
1208 #[test]
1209 fn test_is_exact() {
1210 let exact_precision = Precision::Exact(42);
1211 let inexact_precision = Precision::Inexact(23);
1212 let absent_precision = Precision::<i32>::Absent;
1213
1214 assert_eq!(exact_precision.is_exact(), Some(true));
1215 assert_eq!(inexact_precision.is_exact(), Some(false));
1216 assert_eq!(absent_precision.is_exact(), None);
1217 }
1218
1219 #[test]
1220 fn test_max() {
1221 let precision1 = Precision::Exact(42);
1222 let precision2 = Precision::Inexact(23);
1223 let precision3 = Precision::Exact(30);
1224 let absent_precision = Precision::Absent;
1225
1226 assert_eq!(precision1.max(&precision2), Precision::Inexact(42));
1227 assert_eq!(precision1.max(&precision3), Precision::Exact(42));
1228 assert_eq!(precision2.max(&precision3), Precision::Inexact(30));
1229 assert_eq!(precision1.max(&absent_precision), Precision::Absent);
1230 }
1231
1232 #[test]
1233 fn test_min() {
1234 let precision1 = Precision::Exact(42);
1235 let precision2 = Precision::Inexact(23);
1236 let precision3 = Precision::Exact(30);
1237 let absent_precision = Precision::Absent;
1238
1239 assert_eq!(precision1.min(&precision2), Precision::Inexact(23));
1240 assert_eq!(precision1.min(&precision3), Precision::Exact(30));
1241 assert_eq!(precision2.min(&precision3), Precision::Inexact(23));
1242 assert_eq!(precision1.min(&absent_precision), Precision::Absent);
1243 }
1244
1245 #[test]
1246 fn test_to_inexact() {
1247 let exact_precision = Precision::Exact(42);
1248 let inexact_precision = Precision::Inexact(42);
1249 let absent_precision = Precision::<i32>::Absent;
1250
1251 assert_eq!(exact_precision.to_inexact(), inexact_precision);
1252 assert_eq!(inexact_precision.to_inexact(), inexact_precision);
1253 assert_eq!(absent_precision.to_inexact(), absent_precision);
1254 }
1255
1256 #[test]
1257 fn test_add() {
1258 let precision1 = Precision::Exact(42);
1259 let precision2 = Precision::Inexact(23);
1260 let precision3 = Precision::Exact(30);
1261 let absent_precision = Precision::Absent;
1262 let precision_max_exact = Precision::Exact(usize::MAX);
1263 let precision_max_inexact = Precision::Exact(usize::MAX);
1264
1265 assert_eq!(precision1.add(&precision2), Precision::Inexact(65));
1266 assert_eq!(precision1.add(&precision3), Precision::Exact(72));
1267 assert_eq!(precision2.add(&precision3), Precision::Inexact(53));
1268 assert_eq!(precision1.add(&absent_precision), Precision::Absent);
1269 assert_eq!(
1270 precision_max_exact.add(&precision1),
1271 Precision::Inexact(usize::MAX)
1272 );
1273 assert_eq!(
1274 precision_max_inexact.add(&precision1),
1275 Precision::Inexact(usize::MAX)
1276 );
1277 }
1278
1279 #[test]
1280 fn test_add_scalar() {
1281 let precision = Precision::Exact(ScalarValue::Int32(Some(42)));
1282
1283 assert_eq!(
1284 precision.add(&Precision::Exact(ScalarValue::Int32(Some(23)))),
1285 Precision::Exact(ScalarValue::Int32(Some(65))),
1286 );
1287 assert_eq!(
1288 precision.add(&Precision::Inexact(ScalarValue::Int32(Some(23)))),
1289 Precision::Inexact(ScalarValue::Int32(Some(65))),
1290 );
1291 assert_eq!(
1292 precision.add(&Precision::Exact(ScalarValue::Int32(None))),
1293 Precision::Exact(ScalarValue::Int32(None)),
1295 );
1296 assert_eq!(precision.add(&Precision::Absent), Precision::Absent);
1297 }
1298
1299 #[test]
1300 fn test_add_for_sum_scalar_integer_widening() {
1301 let precision = Precision::Exact(ScalarValue::Int32(Some(42)));
1302
1303 assert_eq!(
1304 precision.add_for_sum(&Precision::Exact(ScalarValue::Int32(Some(23)))),
1305 Precision::Exact(ScalarValue::Int64(Some(65))),
1306 );
1307 assert_eq!(
1308 precision.add_for_sum(&Precision::Inexact(ScalarValue::Int32(Some(23)))),
1309 Precision::Inexact(ScalarValue::Int64(Some(65))),
1310 );
1311 }
1312
1313 #[test]
1314 fn test_add_for_sum_prevents_int32_overflow() {
1315 let lhs = Precision::Exact(ScalarValue::Int32(Some(i32::MAX)));
1316 let rhs = Precision::Exact(ScalarValue::Int32(Some(1)));
1317
1318 assert_eq!(
1319 lhs.add_for_sum(&rhs),
1320 Precision::Exact(ScalarValue::Int64(Some(i64::from(i32::MAX) + 1))),
1321 );
1322 }
1323
1324 #[test]
1325 fn test_add_for_sum_scalar_unsigned_integer_widening() {
1326 let precision = Precision::Exact(ScalarValue::UInt32(Some(42)));
1327
1328 assert_eq!(
1329 precision.add_for_sum(&Precision::Exact(ScalarValue::UInt32(Some(23)))),
1330 Precision::Exact(ScalarValue::UInt64(Some(65))),
1331 );
1332 assert_eq!(
1333 precision.add_for_sum(&Precision::Inexact(ScalarValue::UInt32(Some(23)))),
1334 Precision::Inexact(ScalarValue::UInt64(Some(65))),
1335 );
1336 }
1337
1338 #[test]
1339 fn test_sub() {
1340 let precision1 = Precision::Exact(42);
1341 let precision2 = Precision::Inexact(23);
1342 let precision3 = Precision::Exact(30);
1343 let absent_precision = Precision::Absent;
1344
1345 assert_eq!(precision1.sub(&precision2), Precision::Inexact(19));
1346 assert_eq!(precision1.sub(&precision3), Precision::Exact(12));
1347 assert_eq!(precision2.sub(&precision1), Precision::Inexact(0));
1348 assert_eq!(precision3.sub(&precision1), Precision::Inexact(0));
1349 assert_eq!(precision1.sub(&absent_precision), Precision::Absent);
1350 }
1351
1352 #[test]
1353 fn test_sub_scalar() {
1354 let precision = Precision::Exact(ScalarValue::Int32(Some(42)));
1355
1356 assert_eq!(
1357 precision.sub(&Precision::Exact(ScalarValue::Int32(Some(23)))),
1358 Precision::Exact(ScalarValue::Int32(Some(19))),
1359 );
1360 assert_eq!(
1361 precision.sub(&Precision::Inexact(ScalarValue::Int32(Some(23)))),
1362 Precision::Inexact(ScalarValue::Int32(Some(19))),
1363 );
1364 assert_eq!(
1365 precision.sub(&Precision::Exact(ScalarValue::Int32(None))),
1366 Precision::Exact(ScalarValue::Int32(None)),
1368 );
1369 assert_eq!(precision.sub(&Precision::Absent), Precision::Absent);
1370 }
1371
1372 #[test]
1373 fn test_multiply() {
1374 let precision1 = Precision::Exact(6);
1375 let precision2 = Precision::Inexact(3);
1376 let precision3 = Precision::Exact(5);
1377 let precision_max_exact = Precision::Exact(usize::MAX);
1378 let precision_max_inexact = Precision::Exact(usize::MAX);
1379 let absent_precision = Precision::Absent;
1380
1381 assert_eq!(precision1.multiply(&precision2), Precision::Inexact(18));
1382 assert_eq!(precision1.multiply(&precision3), Precision::Exact(30));
1383 assert_eq!(precision2.multiply(&precision3), Precision::Inexact(15));
1384 assert_eq!(precision1.multiply(&absent_precision), Precision::Absent);
1385 assert_eq!(
1386 precision_max_exact.multiply(&precision1),
1387 Precision::Inexact(usize::MAX)
1388 );
1389 assert_eq!(
1390 precision_max_inexact.multiply(&precision1),
1391 Precision::Inexact(usize::MAX)
1392 );
1393 }
1394
1395 #[test]
1396 fn test_multiply_scalar() {
1397 let precision = Precision::Exact(ScalarValue::Int32(Some(6)));
1398
1399 assert_eq!(
1400 precision.multiply(&Precision::Exact(ScalarValue::Int32(Some(5)))),
1401 Precision::Exact(ScalarValue::Int32(Some(30))),
1402 );
1403 assert_eq!(
1404 precision.multiply(&Precision::Inexact(ScalarValue::Int32(Some(5)))),
1405 Precision::Inexact(ScalarValue::Int32(Some(30))),
1406 );
1407 assert_eq!(
1408 precision.multiply(&Precision::Exact(ScalarValue::Int32(None))),
1409 Precision::Exact(ScalarValue::Int32(None)),
1411 );
1412 assert_eq!(precision.multiply(&Precision::Absent), Precision::Absent);
1413 }
1414
1415 #[test]
1416 fn test_cast_to() {
1417 assert_eq!(
1419 Precision::Exact(ScalarValue::Int32(Some(42)))
1420 .cast_to(&DataType::Int64)
1421 .unwrap(),
1422 Precision::Exact(ScalarValue::Int64(Some(42))),
1423 );
1424 assert_eq!(
1425 Precision::Inexact(ScalarValue::Int32(Some(42)))
1426 .cast_to(&DataType::Int64)
1427 .unwrap(),
1428 Precision::Inexact(ScalarValue::Int64(Some(42))),
1429 );
1430 assert_eq!(
1432 Precision::Exact(ScalarValue::Int32(None))
1433 .cast_to(&DataType::Int64)
1434 .unwrap(),
1435 Precision::Exact(ScalarValue::Int64(None)),
1436 );
1437 assert!(
1439 Precision::Exact(ScalarValue::Int32(Some(256)))
1440 .cast_to(&DataType::Int8)
1441 .is_err()
1442 );
1443 }
1444
1445 #[test]
1446 fn test_precision_cloning() {
1447 let precision: Precision<usize> = Precision::Exact(42);
1449 let p2 = precision;
1450 assert_eq!(precision, p2);
1451
1452 let precision: Precision<ScalarValue> =
1454 Precision::Exact(ScalarValue::Int64(Some(42)));
1455 let p2 = precision.clone();
1456 assert_eq!(precision, p2);
1457 }
1458
1459 #[test]
1460 fn test_project_none() {
1461 let projection: Option<Vec<usize>> = None;
1462 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
1463 assert_eq!(stats, make_stats(vec![10, 20, 30]));
1464 }
1465
1466 #[test]
1467 fn test_project_empty() {
1468 let projection = Some(vec![]);
1469 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
1470 assert_eq!(stats, make_stats(vec![]));
1471 }
1472
1473 #[test]
1474 fn test_project_swap() {
1475 let projection = Some(vec![2, 1]);
1476 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
1477 assert_eq!(stats, make_stats(vec![30, 20]));
1478 }
1479
1480 #[test]
1481 fn test_project_repeated() {
1482 let projection = Some(vec![1, 2, 1, 1, 0, 2]);
1483 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
1484 assert_eq!(stats, make_stats(vec![20, 30, 20, 20, 10, 30]));
1485 }
1486
1487 fn make_stats(counts: impl IntoIterator<Item = usize>) -> Statistics {
1489 Statistics {
1490 num_rows: Precision::Exact(42),
1491 total_byte_size: Precision::Exact(500),
1492 column_statistics: counts.into_iter().map(col_stats_i64).collect(),
1493 }
1494 }
1495
1496 fn col_stats_i64(null_count: usize) -> ColumnStatistics {
1497 ColumnStatistics {
1498 null_count: Precision::Exact(null_count),
1499 max_value: Precision::Exact(ScalarValue::Int64(Some(42))),
1500 min_value: Precision::Exact(ScalarValue::Int64(Some(64))),
1501 sum_value: Precision::Exact(ScalarValue::Int64(Some(4600))),
1502 distinct_count: Precision::Exact(100),
1503 byte_size: Precision::Exact(800),
1504 }
1505 }
1506
1507 fn make_single_i64_ndv_stats(
1508 distinct_count: Precision<usize>,
1509 min_value: Option<i64>,
1510 max_value: Option<i64>,
1511 ) -> Statistics {
1512 let to_precision = |value| Precision::Exact(ScalarValue::Int64(Some(value)));
1513
1514 Statistics::default()
1515 .with_num_rows(Precision::Exact(10))
1516 .add_column_statistics(
1517 ColumnStatistics::new_unknown()
1518 .with_distinct_count(distinct_count)
1519 .with_min_value(
1520 min_value.map(to_precision).unwrap_or(Precision::Absent),
1521 )
1522 .with_max_value(
1523 max_value.map(to_precision).unwrap_or(Precision::Absent),
1524 ),
1525 )
1526 }
1527
1528 fn merge_single_i64_ndv_distinct_count(
1529 left: Statistics,
1530 right: Statistics,
1531 ndv_fallback: NdvFallback,
1532 ) -> Precision<usize> {
1533 let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]);
1534
1535 Statistics::try_merge_iter_with_ndv_fallback(
1536 [&left, &right],
1537 &schema,
1538 ndv_fallback,
1539 )
1540 .unwrap()
1541 .column_statistics[0]
1542 .distinct_count
1543 }
1544
1545 #[test]
1546 fn test_try_merge() {
1547 let schema = Arc::new(Schema::new(vec![
1549 Field::new("col1", DataType::Int32, false),
1550 Field::new("col2", DataType::Int32, false),
1551 ]));
1552
1553 let stats1 = Statistics {
1555 num_rows: Precision::Exact(10),
1556 total_byte_size: Precision::Exact(100),
1557 column_statistics: vec![
1558 ColumnStatistics {
1559 null_count: Precision::Exact(1),
1560 max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
1561 min_value: Precision::Exact(ScalarValue::Int32(Some(1))),
1562 sum_value: Precision::Exact(ScalarValue::Int32(Some(500))),
1563 distinct_count: Precision::Absent,
1564 byte_size: Precision::Exact(40),
1565 },
1566 ColumnStatistics {
1567 null_count: Precision::Exact(2),
1568 max_value: Precision::Exact(ScalarValue::Int32(Some(200))),
1569 min_value: Precision::Exact(ScalarValue::Int32(Some(10))),
1570 sum_value: Precision::Exact(ScalarValue::Int32(Some(1000))),
1571 distinct_count: Precision::Absent,
1572 byte_size: Precision::Exact(40),
1573 },
1574 ],
1575 };
1576
1577 let stats2 = Statistics {
1578 num_rows: Precision::Exact(15),
1579 total_byte_size: Precision::Exact(150),
1580 column_statistics: vec![
1581 ColumnStatistics {
1582 null_count: Precision::Exact(2),
1583 max_value: Precision::Exact(ScalarValue::Int32(Some(120))),
1584 min_value: Precision::Exact(ScalarValue::Int32(Some(-10))),
1585 sum_value: Precision::Exact(ScalarValue::Int32(Some(600))),
1586 distinct_count: Precision::Absent,
1587 byte_size: Precision::Exact(60),
1588 },
1589 ColumnStatistics {
1590 null_count: Precision::Exact(3),
1591 max_value: Precision::Exact(ScalarValue::Int32(Some(180))),
1592 min_value: Precision::Exact(ScalarValue::Int32(Some(5))),
1593 sum_value: Precision::Exact(ScalarValue::Int32(Some(1200))),
1594 distinct_count: Precision::Absent,
1595 byte_size: Precision::Exact(60),
1596 },
1597 ],
1598 };
1599
1600 let items = vec![stats1, stats2];
1601
1602 let summary_stats = Statistics::try_merge_iter(&items, &schema).unwrap();
1603
1604 assert_eq!(summary_stats.num_rows, Precision::Exact(25)); assert_eq!(summary_stats.total_byte_size, Precision::Exact(250)); let col1_stats = &summary_stats.column_statistics[0];
1610 assert_eq!(col1_stats.null_count, Precision::Exact(3)); assert_eq!(
1612 col1_stats.max_value,
1613 Precision::Exact(ScalarValue::Int32(Some(120)))
1614 );
1615 assert_eq!(
1616 col1_stats.min_value,
1617 Precision::Exact(ScalarValue::Int32(Some(-10)))
1618 );
1619 assert_eq!(
1620 col1_stats.sum_value,
1621 Precision::Exact(ScalarValue::Int64(Some(1100)))
1622 ); let col2_stats = &summary_stats.column_statistics[1];
1625 assert_eq!(col2_stats.null_count, Precision::Exact(5)); assert_eq!(
1627 col2_stats.max_value,
1628 Precision::Exact(ScalarValue::Int32(Some(200)))
1629 );
1630 assert_eq!(
1631 col2_stats.min_value,
1632 Precision::Exact(ScalarValue::Int32(Some(5)))
1633 );
1634 assert_eq!(
1635 col2_stats.sum_value,
1636 Precision::Exact(ScalarValue::Int64(Some(2200)))
1637 ); }
1639
1640 #[test]
1641 fn test_try_merge_mixed_precision() {
1642 let schema = Arc::new(Schema::new(vec![Field::new(
1644 "col1",
1645 DataType::Int32,
1646 false,
1647 )]));
1648
1649 let stats1 = Statistics {
1651 num_rows: Precision::Exact(10),
1652 total_byte_size: Precision::Inexact(100),
1653 column_statistics: vec![ColumnStatistics {
1654 null_count: Precision::Exact(1),
1655 max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
1656 min_value: Precision::Inexact(ScalarValue::Int32(Some(1))),
1657 sum_value: Precision::Exact(ScalarValue::Int32(Some(500))),
1658 distinct_count: Precision::Absent,
1659 byte_size: Precision::Exact(40),
1660 }],
1661 };
1662
1663 let stats2 = Statistics {
1664 num_rows: Precision::Inexact(15),
1665 total_byte_size: Precision::Exact(150),
1666 column_statistics: vec![ColumnStatistics {
1667 null_count: Precision::Inexact(2),
1668 max_value: Precision::Inexact(ScalarValue::Int32(Some(120))),
1669 min_value: Precision::Exact(ScalarValue::Int32(Some(-10))),
1670 sum_value: Precision::Absent,
1671 distinct_count: Precision::Absent,
1672 byte_size: Precision::Inexact(60),
1673 }],
1674 };
1675
1676 let items = vec![stats1, stats2];
1677
1678 let summary_stats = Statistics::try_merge_iter(&items, &schema).unwrap();
1679
1680 assert_eq!(summary_stats.num_rows, Precision::Inexact(25));
1681 assert_eq!(summary_stats.total_byte_size, Precision::Inexact(250));
1682
1683 let col_stats = &summary_stats.column_statistics[0];
1684 assert_eq!(col_stats.null_count, Precision::Inexact(3));
1685 assert_eq!(
1686 col_stats.max_value,
1687 Precision::Inexact(ScalarValue::Int32(Some(120)))
1688 );
1689 assert_eq!(
1690 col_stats.min_value,
1691 Precision::Inexact(ScalarValue::Int32(Some(-10)))
1692 );
1693 assert_eq!(col_stats.sum_value, Precision::Absent);
1694 }
1695
1696 #[test]
1697 fn test_try_merge_empty() {
1698 let schema = Arc::new(Schema::new(vec![Field::new(
1699 "col1",
1700 DataType::Int32,
1701 false,
1702 )]));
1703
1704 let items: Vec<Statistics> = vec![];
1706
1707 let summary_stats = Statistics::try_merge_iter(&items, &schema).unwrap();
1708
1709 assert_eq!(summary_stats.num_rows, Precision::Absent);
1711 assert_eq!(summary_stats.total_byte_size, Precision::Absent);
1712 assert_eq!(summary_stats.column_statistics.len(), 1);
1713 assert_eq!(
1714 summary_stats.column_statistics[0].null_count,
1715 Precision::Absent
1716 );
1717 }
1718
1719 #[test]
1720 fn test_try_merge_mismatched_size() {
1721 let schema = Arc::new(Schema::new(vec![Field::new(
1723 "col1",
1724 DataType::Int32,
1725 false,
1726 )]));
1727
1728 let stats1 = Statistics::default();
1730
1731 let stats2 =
1732 Statistics::default().add_column_statistics(ColumnStatistics::new_unknown());
1733
1734 let items = vec![stats1, stats2];
1735
1736 let e = Statistics::try_merge_iter(&items, &schema).unwrap_err();
1737 assert_contains!(
1738 e.to_string(),
1739 "Error during planning: Cannot merge statistics with different number of columns: 0 vs 1"
1740 );
1741 }
1742
1743 #[test]
1744 fn test_try_merge_distinct_count_absent() {
1745 let stats1 = Statistics::default()
1747 .with_num_rows(Precision::Exact(10))
1748 .with_total_byte_size(Precision::Exact(100))
1749 .add_column_statistics(
1750 ColumnStatistics::new_unknown()
1751 .with_null_count(Precision::Exact(0))
1752 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(1))))
1753 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(10))))
1754 .with_distinct_count(Precision::Exact(5)),
1755 );
1756
1757 let stats2 = Statistics::default()
1758 .with_num_rows(Precision::Exact(15))
1759 .with_total_byte_size(Precision::Exact(150))
1760 .add_column_statistics(
1761 ColumnStatistics::new_unknown()
1762 .with_null_count(Precision::Exact(0))
1763 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(5))))
1764 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(20))))
1765 .with_distinct_count(Precision::Exact(7)),
1766 );
1767
1768 let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
1770 let merged_stats =
1771 Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
1772
1773 assert_eq!(merged_stats.num_rows, Precision::Exact(25));
1775 assert_eq!(merged_stats.total_byte_size, Precision::Exact(250));
1776
1777 let col_stats = &merged_stats.column_statistics[0];
1778 assert_eq!(col_stats.null_count, Precision::Exact(0));
1779 assert_eq!(
1780 col_stats.min_value,
1781 Precision::Exact(ScalarValue::Int32(Some(1)))
1782 );
1783 assert_eq!(
1784 col_stats.max_value,
1785 Precision::Exact(ScalarValue::Int32(Some(20)))
1786 );
1787 assert_eq!(col_stats.distinct_count, Precision::Inexact(10));
1792 }
1793
1794 #[test]
1795 fn test_try_merge_ndv_disjoint_ranges() {
1796 let stats1 = Statistics::default()
1797 .with_num_rows(Precision::Exact(10))
1798 .add_column_statistics(
1799 ColumnStatistics::new_unknown()
1800 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(0))))
1801 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(10))))
1802 .with_distinct_count(Precision::Exact(5)),
1803 );
1804 let stats2 = Statistics::default()
1805 .with_num_rows(Precision::Exact(10))
1806 .add_column_statistics(
1807 ColumnStatistics::new_unknown()
1808 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(20))))
1809 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(30))))
1810 .with_distinct_count(Precision::Exact(8)),
1811 );
1812
1813 let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
1814 let merged = Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
1815 assert_eq!(
1817 merged.column_statistics[0].distinct_count,
1818 Precision::Inexact(13)
1819 );
1820 }
1821
1822 #[test]
1823 fn test_try_merge_ndv_identical_ranges() {
1824 let stats1 = Statistics::default()
1825 .with_num_rows(Precision::Exact(100))
1826 .add_column_statistics(
1827 ColumnStatistics::new_unknown()
1828 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(0))))
1829 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(100))))
1830 .with_distinct_count(Precision::Exact(50)),
1831 );
1832 let stats2 = Statistics::default()
1833 .with_num_rows(Precision::Exact(100))
1834 .add_column_statistics(
1835 ColumnStatistics::new_unknown()
1836 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(0))))
1837 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(100))))
1838 .with_distinct_count(Precision::Exact(30)),
1839 );
1840
1841 let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
1842 let merged = Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
1843 assert_eq!(
1845 merged.column_statistics[0].distinct_count,
1846 Precision::Inexact(50)
1847 );
1848 }
1849
1850 #[test]
1851 fn test_try_merge_ndv_partial_overlap() {
1852 let stats1 = Statistics::default()
1853 .with_num_rows(Precision::Exact(100))
1854 .add_column_statistics(
1855 ColumnStatistics::new_unknown()
1856 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(0))))
1857 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(100))))
1858 .with_distinct_count(Precision::Exact(80)),
1859 );
1860 let stats2 = Statistics::default()
1861 .with_num_rows(Precision::Exact(100))
1862 .add_column_statistics(
1863 ColumnStatistics::new_unknown()
1864 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(50))))
1865 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(150))))
1866 .with_distinct_count(Precision::Exact(60)),
1867 );
1868
1869 let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
1870 let merged = Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
1871 assert_eq!(
1875 merged.column_statistics[0].distinct_count,
1876 Precision::Inexact(110)
1877 );
1878 }
1879
1880 #[test]
1881 fn test_try_merge_ndv_missing_min_max() {
1882 let stats1 = Statistics::default()
1883 .with_num_rows(Precision::Exact(10))
1884 .add_column_statistics(
1885 ColumnStatistics::new_unknown().with_distinct_count(Precision::Exact(5)),
1886 );
1887 let stats2 = Statistics::default()
1888 .with_num_rows(Precision::Exact(10))
1889 .add_column_statistics(
1890 ColumnStatistics::new_unknown().with_distinct_count(Precision::Exact(8)),
1891 );
1892
1893 let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
1894 let merged = Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
1895 assert_eq!(
1897 merged.column_statistics[0].distinct_count,
1898 Precision::Inexact(8)
1899 );
1900 }
1901
1902 #[test]
1903 fn test_try_merge_ndv_non_numeric_types() {
1904 let stats1 = Statistics::default()
1905 .with_num_rows(Precision::Exact(10))
1906 .add_column_statistics(
1907 ColumnStatistics::new_unknown()
1908 .with_min_value(Precision::Exact(ScalarValue::Utf8(Some(
1909 "aaa".to_string(),
1910 ))))
1911 .with_max_value(Precision::Exact(ScalarValue::Utf8(Some(
1912 "zzz".to_string(),
1913 ))))
1914 .with_distinct_count(Precision::Exact(5)),
1915 );
1916 let stats2 = Statistics::default()
1917 .with_num_rows(Precision::Exact(10))
1918 .add_column_statistics(
1919 ColumnStatistics::new_unknown()
1920 .with_min_value(Precision::Exact(ScalarValue::Utf8(Some(
1921 "bbb".to_string(),
1922 ))))
1923 .with_max_value(Precision::Exact(ScalarValue::Utf8(Some(
1924 "yyy".to_string(),
1925 ))))
1926 .with_distinct_count(Precision::Exact(8)),
1927 );
1928
1929 let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
1930 let merged = Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
1931 assert_eq!(
1933 merged.column_statistics[0].distinct_count,
1934 Precision::Inexact(8)
1935 );
1936 }
1937
1938 #[test]
1939 fn test_try_merge_ndv_non_numeric_types_sum_fallback() {
1940 let stats1 = Statistics::default()
1941 .with_num_rows(Precision::Exact(10))
1942 .add_column_statistics(
1943 ColumnStatistics::new_unknown()
1944 .with_min_value(Precision::Exact(ScalarValue::Utf8(Some(
1945 "aaa".to_string(),
1946 ))))
1947 .with_max_value(Precision::Exact(ScalarValue::Utf8(Some(
1948 "zzz".to_string(),
1949 ))))
1950 .with_distinct_count(Precision::Exact(5)),
1951 );
1952 let stats2 = Statistics::default()
1953 .with_num_rows(Precision::Exact(10))
1954 .add_column_statistics(
1955 ColumnStatistics::new_unknown()
1956 .with_min_value(Precision::Exact(ScalarValue::Utf8(Some(
1957 "bbb".to_string(),
1958 ))))
1959 .with_max_value(Precision::Exact(ScalarValue::Utf8(Some(
1960 "yyy".to_string(),
1961 ))))
1962 .with_distinct_count(Precision::Exact(8)),
1963 );
1964
1965 let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
1966 let merged = Statistics::try_merge_iter_with_ndv_fallback(
1967 [&stats1, &stats2],
1968 &schema,
1969 NdvFallback::Sum,
1970 )
1971 .unwrap();
1972
1973 assert_eq!(
1975 merged.column_statistics[0].distinct_count,
1976 Precision::Inexact(13)
1977 );
1978 }
1979
1980 #[test]
1981 fn test_try_merge_ndv_constant_columns() {
1982 let stats1 = Statistics::default()
1984 .with_num_rows(Precision::Exact(10))
1985 .add_column_statistics(
1986 ColumnStatistics::new_unknown()
1987 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(5))))
1988 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(5))))
1989 .with_distinct_count(Precision::Exact(1)),
1990 );
1991 let stats2 = Statistics::default()
1992 .with_num_rows(Precision::Exact(10))
1993 .add_column_statistics(
1994 ColumnStatistics::new_unknown()
1995 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(5))))
1996 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(5))))
1997 .with_distinct_count(Precision::Exact(1)),
1998 );
1999
2000 let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
2001 let merged = Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
2002 assert_eq!(
2003 merged.column_statistics[0].distinct_count,
2004 Precision::Inexact(1)
2005 );
2006
2007 let stats3 = Statistics::default()
2009 .with_num_rows(Precision::Exact(10))
2010 .add_column_statistics(
2011 ColumnStatistics::new_unknown()
2012 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(5))))
2013 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(5))))
2014 .with_distinct_count(Precision::Exact(1)),
2015 );
2016 let stats4 = Statistics::default()
2017 .with_num_rows(Precision::Exact(10))
2018 .add_column_statistics(
2019 ColumnStatistics::new_unknown()
2020 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(10))))
2021 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(10))))
2022 .with_distinct_count(Precision::Exact(1)),
2023 );
2024
2025 let merged = Statistics::try_merge_iter([&stats3, &stats4], &schema).unwrap();
2026 assert_eq!(
2027 merged.column_statistics[0].distinct_count,
2028 Precision::Inexact(2)
2029 );
2030 }
2031
2032 #[test]
2033 fn test_try_merge_ndv_original_union_edge_cases() {
2034 struct NdvTestCase {
2035 name: &'static str,
2036 left_ndv: Precision<usize>,
2037 left_min: Option<i64>,
2038 left_max: Option<i64>,
2039 right_ndv: Precision<usize>,
2040 right_min: Option<i64>,
2041 right_max: Option<i64>,
2042 expected: Precision<usize>,
2043 }
2044
2045 let cases = vec![
2046 NdvTestCase {
2047 name: "disjoint ranges",
2048 left_ndv: Precision::Exact(5),
2049 left_min: Some(0),
2050 left_max: Some(10),
2051 right_ndv: Precision::Exact(3),
2052 right_min: Some(20),
2053 right_max: Some(30),
2054 expected: Precision::Inexact(8),
2055 },
2056 NdvTestCase {
2057 name: "identical ranges",
2058 left_ndv: Precision::Exact(10),
2059 left_min: Some(0),
2060 left_max: Some(100),
2061 right_ndv: Precision::Exact(8),
2062 right_min: Some(0),
2063 right_max: Some(100),
2064 expected: Precision::Inexact(10),
2065 },
2066 NdvTestCase {
2067 name: "partial overlap",
2068 left_ndv: Precision::Exact(100),
2069 left_min: Some(0),
2070 left_max: Some(100),
2071 right_ndv: Precision::Exact(50),
2072 right_min: Some(50),
2073 right_max: Some(150),
2074 expected: Precision::Inexact(125),
2075 },
2076 NdvTestCase {
2077 name: "right contained in left",
2078 left_ndv: Precision::Exact(100),
2079 left_min: Some(0),
2080 left_max: Some(100),
2081 right_ndv: Precision::Exact(50),
2082 right_min: Some(25),
2083 right_max: Some(75),
2084 expected: Precision::Inexact(100),
2085 },
2086 NdvTestCase {
2087 name: "same constant value",
2088 left_ndv: Precision::Exact(1),
2089 left_min: Some(5),
2090 left_max: Some(5),
2091 right_ndv: Precision::Exact(1),
2092 right_min: Some(5),
2093 right_max: Some(5),
2094 expected: Precision::Inexact(1),
2095 },
2096 NdvTestCase {
2097 name: "different constant values",
2098 left_ndv: Precision::Exact(1),
2099 left_min: Some(5),
2100 left_max: Some(5),
2101 right_ndv: Precision::Exact(1),
2102 right_min: Some(10),
2103 right_max: Some(10),
2104 expected: Precision::Inexact(2),
2105 },
2106 NdvTestCase {
2107 name: "left constant within right range",
2108 left_ndv: Precision::Exact(1),
2109 left_min: Some(5),
2110 left_max: Some(5),
2111 right_ndv: Precision::Exact(10),
2112 right_min: Some(0),
2113 right_max: Some(10),
2114 expected: Precision::Inexact(10),
2115 },
2116 NdvTestCase {
2117 name: "left constant outside right range",
2118 left_ndv: Precision::Exact(1),
2119 left_min: Some(20),
2120 left_max: Some(20),
2121 right_ndv: Precision::Exact(10),
2122 right_min: Some(0),
2123 right_max: Some(10),
2124 expected: Precision::Inexact(11),
2125 },
2126 NdvTestCase {
2127 name: "right constant within left range",
2128 left_ndv: Precision::Exact(10),
2129 left_min: Some(0),
2130 left_max: Some(10),
2131 right_ndv: Precision::Exact(1),
2132 right_min: Some(5),
2133 right_max: Some(5),
2134 expected: Precision::Inexact(10),
2135 },
2136 NdvTestCase {
2137 name: "right constant outside left range",
2138 left_ndv: Precision::Exact(10),
2139 left_min: Some(0),
2140 left_max: Some(10),
2141 right_ndv: Precision::Exact(1),
2142 right_min: Some(20),
2143 right_max: Some(20),
2144 expected: Precision::Inexact(11),
2145 },
2146 NdvTestCase {
2147 name: "missing bounds exact plus exact",
2148 left_ndv: Precision::Exact(10),
2149 left_min: None,
2150 left_max: None,
2151 right_ndv: Precision::Exact(5),
2152 right_min: None,
2153 right_max: None,
2154 expected: Precision::Inexact(15),
2155 },
2156 NdvTestCase {
2157 name: "missing bounds exact plus inexact",
2158 left_ndv: Precision::Exact(10),
2159 left_min: None,
2160 left_max: None,
2161 right_ndv: Precision::Inexact(5),
2162 right_min: None,
2163 right_max: None,
2164 expected: Precision::Inexact(15),
2165 },
2166 NdvTestCase {
2167 name: "missing bounds inexact plus inexact",
2168 left_ndv: Precision::Inexact(7),
2169 left_min: None,
2170 left_max: None,
2171 right_ndv: Precision::Inexact(3),
2172 right_min: None,
2173 right_max: None,
2174 expected: Precision::Inexact(10),
2175 },
2176 NdvTestCase {
2177 name: "exact plus absent",
2178 left_ndv: Precision::Exact(10),
2179 left_min: None,
2180 left_max: None,
2181 right_ndv: Precision::Absent,
2182 right_min: None,
2183 right_max: None,
2184 expected: Precision::Absent,
2185 },
2186 NdvTestCase {
2187 name: "inexact plus absent",
2188 left_ndv: Precision::Inexact(4),
2189 left_min: None,
2190 left_max: None,
2191 right_ndv: Precision::Absent,
2192 right_min: None,
2193 right_max: None,
2194 expected: Precision::Absent,
2195 },
2196 ];
2197
2198 for case in cases {
2199 let actual = merge_single_i64_ndv_distinct_count(
2200 make_single_i64_ndv_stats(case.left_ndv, case.left_min, case.left_max),
2201 make_single_i64_ndv_stats(case.right_ndv, case.right_min, case.right_max),
2202 NdvFallback::Sum,
2203 );
2204
2205 assert_eq!(actual, case.expected, "case {} failed", case.name);
2206 }
2207 }
2208
2209 #[test]
2210 fn test_with_fetch_basic_preservation() {
2211 let original_stats = Statistics {
2213 num_rows: Precision::Exact(1000),
2214 total_byte_size: Precision::Exact(8000),
2215 column_statistics: vec![
2216 ColumnStatistics {
2217 null_count: Precision::Exact(10),
2218 max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
2219 min_value: Precision::Exact(ScalarValue::Int32(Some(0))),
2220 sum_value: Precision::Exact(ScalarValue::Int32(Some(5050))),
2221 distinct_count: Precision::Exact(50),
2222 byte_size: Precision::Exact(4000),
2223 },
2224 ColumnStatistics {
2225 null_count: Precision::Exact(20),
2226 max_value: Precision::Exact(ScalarValue::Int64(Some(200))),
2227 min_value: Precision::Exact(ScalarValue::Int64(Some(10))),
2228 sum_value: Precision::Exact(ScalarValue::Int64(Some(10100))),
2229 distinct_count: Precision::Exact(75),
2230 byte_size: Precision::Exact(8000),
2231 },
2232 ],
2233 };
2234
2235 let result = original_stats.clone().with_fetch(Some(100), 0, 1).unwrap();
2237
2238 assert_eq!(result.num_rows, Precision::Exact(100));
2240
2241 assert_eq!(result.total_byte_size, Precision::Inexact(1200));
2244
2245 assert_eq!(result.column_statistics.len(), 2);
2247
2248 assert_eq!(
2250 result.column_statistics[0].null_count,
2251 Precision::Inexact(10)
2252 );
2253 assert_eq!(
2254 result.column_statistics[0].max_value,
2255 Precision::Inexact(ScalarValue::Int32(Some(100)))
2256 );
2257 assert_eq!(
2258 result.column_statistics[0].min_value,
2259 Precision::Inexact(ScalarValue::Int32(Some(0)))
2260 );
2261 assert_eq!(
2262 result.column_statistics[0].sum_value,
2263 Precision::Inexact(ScalarValue::Int32(Some(5050)))
2264 );
2265 assert_eq!(
2266 result.column_statistics[0].distinct_count,
2267 Precision::Inexact(50)
2268 );
2269
2270 assert_eq!(
2272 result.column_statistics[1].null_count,
2273 Precision::Inexact(20)
2274 );
2275 assert_eq!(
2276 result.column_statistics[1].max_value,
2277 Precision::Inexact(ScalarValue::Int64(Some(200)))
2278 );
2279 assert_eq!(
2280 result.column_statistics[1].min_value,
2281 Precision::Inexact(ScalarValue::Int64(Some(10)))
2282 );
2283 assert_eq!(
2284 result.column_statistics[1].sum_value,
2285 Precision::Inexact(ScalarValue::Int64(Some(10100)))
2286 );
2287 assert_eq!(
2288 result.column_statistics[1].distinct_count,
2289 Precision::Inexact(75)
2290 );
2291 }
2292
2293 #[test]
2294 fn test_with_fetch_inexact_input() {
2295 let original_stats = Statistics {
2297 num_rows: Precision::Inexact(1000),
2298 total_byte_size: Precision::Inexact(8000),
2299 column_statistics: vec![ColumnStatistics {
2300 null_count: Precision::Inexact(10),
2301 max_value: Precision::Inexact(ScalarValue::Int32(Some(100))),
2302 min_value: Precision::Inexact(ScalarValue::Int32(Some(0))),
2303 sum_value: Precision::Inexact(ScalarValue::Int32(Some(5050))),
2304 distinct_count: Precision::Inexact(50),
2305 byte_size: Precision::Inexact(4000),
2306 }],
2307 };
2308
2309 let result = original_stats.clone().with_fetch(Some(500), 0, 1).unwrap();
2310
2311 assert_eq!(result.num_rows, Precision::Inexact(500));
2313
2314 assert_eq!(result.total_byte_size, Precision::Inexact(2000));
2317
2318 assert_eq!(
2320 result.column_statistics[0].null_count,
2321 Precision::Inexact(10)
2322 );
2323 }
2324
2325 #[test]
2326 fn test_with_fetch_skip_all_rows() {
2327 let original_stats = Statistics {
2329 num_rows: Precision::Exact(100),
2330 total_byte_size: Precision::Exact(800),
2331 column_statistics: vec![col_stats_i64(10)],
2332 };
2333
2334 let result = original_stats.clone().with_fetch(Some(50), 100, 1).unwrap();
2335
2336 assert_eq!(result.num_rows, Precision::Exact(0));
2337 assert_eq!(result.total_byte_size, Precision::Inexact(0));
2339 }
2340
2341 #[test]
2342 fn test_with_fetch_skip_all_rows_inexact() {
2343 let original_stats = Statistics {
2347 num_rows: Precision::Inexact(0),
2348 total_byte_size: Precision::Inexact(0),
2349 column_statistics: vec![col_stats_i64(10)],
2350 };
2351
2352 let result = original_stats.clone().with_fetch(None, 0, 1).unwrap();
2353
2354 assert_eq!(result.num_rows, Precision::Inexact(0));
2355 }
2356
2357 #[test]
2358 fn test_with_fetch_no_limit() {
2359 let original_stats = Statistics {
2361 num_rows: Precision::Exact(100),
2362 total_byte_size: Precision::Exact(800),
2363 column_statistics: vec![col_stats_i64(10)],
2364 };
2365
2366 let result = original_stats.clone().with_fetch(None, 0, 1).unwrap();
2367
2368 assert_eq!(result.num_rows, Precision::Exact(100));
2370 assert_eq!(result.total_byte_size, Precision::Exact(800));
2371 }
2372
2373 #[test]
2374 fn test_with_fetch_with_skip() {
2375 let original_stats = Statistics {
2377 num_rows: Precision::Exact(1000),
2378 total_byte_size: Precision::Exact(8000),
2379 column_statistics: vec![col_stats_i64(10)],
2380 };
2381
2382 let result = original_stats
2384 .clone()
2385 .with_fetch(Some(300), 200, 1)
2386 .unwrap();
2387
2388 assert_eq!(result.num_rows, Precision::Exact(300));
2389 assert_eq!(result.total_byte_size, Precision::Inexact(240));
2391 }
2392
2393 #[test]
2394 fn test_with_fetch_multi_partition() {
2395 let original_stats = Statistics {
2397 num_rows: Precision::Exact(1000), total_byte_size: Precision::Exact(8000),
2399 column_statistics: vec![col_stats_i64(10)],
2400 };
2401
2402 let result = original_stats.clone().with_fetch(Some(100), 0, 4).unwrap();
2404
2405 assert_eq!(result.num_rows, Precision::Exact(400));
2406 assert_eq!(result.total_byte_size, Precision::Inexact(320));
2408 }
2409
2410 #[test]
2411 fn test_with_fetch_absent_stats() {
2412 let original_stats = Statistics {
2414 num_rows: Precision::Absent,
2415 total_byte_size: Precision::Absent,
2416 column_statistics: vec![ColumnStatistics {
2417 null_count: Precision::Absent,
2418 max_value: Precision::Absent,
2419 min_value: Precision::Absent,
2420 sum_value: Precision::Absent,
2421 distinct_count: Precision::Absent,
2422 byte_size: Precision::Absent,
2423 }],
2424 };
2425
2426 let result = original_stats.clone().with_fetch(Some(100), 0, 1).unwrap();
2427
2428 assert_eq!(result.num_rows, Precision::Inexact(100));
2430 assert_eq!(result.total_byte_size, Precision::Absent);
2431 assert_eq!(result.column_statistics[0].null_count, Precision::Absent);
2433 }
2434
2435 #[test]
2436 fn test_with_fetch_fetch_exceeds_rows() {
2437 let original_stats = Statistics {
2439 num_rows: Precision::Exact(100),
2440 total_byte_size: Precision::Exact(800),
2441 column_statistics: vec![col_stats_i64(10)],
2442 };
2443
2444 let result = original_stats.clone().with_fetch(Some(100), 50, 1).unwrap();
2446
2447 assert_eq!(result.num_rows, Precision::Exact(50));
2448 assert_eq!(result.total_byte_size, Precision::Inexact(400));
2450 }
2451
2452 #[test]
2453 fn test_with_fetch_preserves_all_column_stats() {
2454 let original_col_stats = ColumnStatistics {
2456 null_count: Precision::Exact(42),
2457 max_value: Precision::Exact(ScalarValue::Int32(Some(999))),
2458 min_value: Precision::Exact(ScalarValue::Int32(Some(-100))),
2459 sum_value: Precision::Exact(ScalarValue::Int32(Some(123456))),
2460 distinct_count: Precision::Exact(789),
2461 byte_size: Precision::Exact(4000),
2462 };
2463
2464 let original_stats = Statistics {
2465 num_rows: Precision::Exact(1000),
2466 total_byte_size: Precision::Exact(8000),
2467 column_statistics: vec![original_col_stats.clone()],
2468 };
2469
2470 let result = original_stats.with_fetch(Some(250), 0, 1).unwrap();
2471
2472 let result_col_stats = &result.column_statistics[0];
2473
2474 assert_eq!(result_col_stats.null_count, Precision::Inexact(42));
2476 assert_eq!(
2477 result_col_stats.max_value,
2478 Precision::Inexact(ScalarValue::Int32(Some(999)))
2479 );
2480 assert_eq!(
2481 result_col_stats.min_value,
2482 Precision::Inexact(ScalarValue::Int32(Some(-100)))
2483 );
2484 assert_eq!(
2485 result_col_stats.sum_value,
2486 Precision::Inexact(ScalarValue::Int32(Some(123456)))
2487 );
2488 assert_eq!(result_col_stats.distinct_count, Precision::Inexact(250));
2490 }
2491
2492 #[test]
2493 fn test_byte_size_to_inexact() {
2494 let col_stats = ColumnStatistics {
2495 null_count: Precision::Exact(10),
2496 max_value: Precision::Absent,
2497 min_value: Precision::Absent,
2498 sum_value: Precision::Absent,
2499 distinct_count: Precision::Absent,
2500 byte_size: Precision::Exact(5000),
2501 };
2502
2503 let inexact = col_stats.to_inexact();
2504 assert_eq!(inexact.byte_size, Precision::Inexact(5000));
2505 }
2506
2507 #[test]
2508 fn test_with_byte_size_builder() {
2509 let col_stats =
2510 ColumnStatistics::new_unknown().with_byte_size(Precision::Exact(8192));
2511 assert_eq!(col_stats.byte_size, Precision::Exact(8192));
2512 }
2513
2514 #[test]
2515 fn test_with_sum_value_builder_widens_small_integers() {
2516 let col_stats = ColumnStatistics::new_unknown()
2517 .with_sum_value(Precision::Exact(ScalarValue::UInt32(Some(123))));
2518 assert_eq!(
2519 col_stats.sum_value,
2520 Precision::Exact(ScalarValue::UInt64(Some(123)))
2521 );
2522 }
2523
2524 #[test]
2525 fn test_with_fetch_scales_byte_size() {
2526 let original_stats = Statistics {
2528 num_rows: Precision::Exact(1000),
2529 total_byte_size: Precision::Exact(8000),
2530 column_statistics: vec![
2531 ColumnStatistics {
2532 null_count: Precision::Exact(10),
2533 max_value: Precision::Absent,
2534 min_value: Precision::Absent,
2535 sum_value: Precision::Absent,
2536 distinct_count: Precision::Absent,
2537 byte_size: Precision::Exact(4000),
2538 },
2539 ColumnStatistics {
2540 null_count: Precision::Exact(20),
2541 max_value: Precision::Absent,
2542 min_value: Precision::Absent,
2543 sum_value: Precision::Absent,
2544 distinct_count: Precision::Absent,
2545 byte_size: Precision::Exact(8000),
2546 },
2547 ],
2548 };
2549
2550 let result = original_stats.with_fetch(Some(100), 0, 1).unwrap();
2552
2553 assert_eq!(
2555 result.column_statistics[0].byte_size,
2556 Precision::Inexact(400)
2557 );
2558 assert_eq!(
2559 result.column_statistics[1].byte_size,
2560 Precision::Inexact(800)
2561 );
2562
2563 assert_eq!(result.total_byte_size, Precision::Inexact(1200));
2565 }
2566
2567 #[test]
2568 fn test_with_fetch_total_byte_size_fallback() {
2569 let original_stats = Statistics {
2571 num_rows: Precision::Exact(1000),
2572 total_byte_size: Precision::Exact(8000),
2573 column_statistics: vec![
2574 ColumnStatistics {
2575 null_count: Precision::Exact(10),
2576 max_value: Precision::Absent,
2577 min_value: Precision::Absent,
2578 sum_value: Precision::Absent,
2579 distinct_count: Precision::Absent,
2580 byte_size: Precision::Exact(4000),
2581 },
2582 ColumnStatistics {
2583 null_count: Precision::Exact(20),
2584 max_value: Precision::Absent,
2585 min_value: Precision::Absent,
2586 sum_value: Precision::Absent,
2587 distinct_count: Precision::Absent,
2588 byte_size: Precision::Absent, },
2590 ],
2591 };
2592
2593 let result = original_stats.with_fetch(Some(100), 0, 1).unwrap();
2595
2596 assert_eq!(result.total_byte_size, Precision::Inexact(800));
2598 }
2599
2600 #[test]
2601 fn test_with_fetch_caps_ndv_at_row_count() {
2602 let stats = Statistics {
2604 num_rows: Precision::Exact(1000),
2605 total_byte_size: Precision::Exact(8000),
2606 column_statistics: vec![ColumnStatistics {
2607 distinct_count: Precision::Inexact(500),
2608 ..Default::default()
2609 }],
2610 };
2611
2612 let result = stats.with_fetch(Some(10), 0, 1).unwrap();
2613 assert_eq!(result.num_rows, Precision::Exact(10));
2614 assert_eq!(
2615 result.column_statistics[0].distinct_count,
2616 Precision::Inexact(10)
2617 );
2618 }
2619
2620 #[test]
2621 fn test_with_fetch_caps_ndv_with_skip() {
2622 let stats = Statistics {
2626 num_rows: Precision::Exact(1000),
2627 total_byte_size: Precision::Exact(8000),
2628 column_statistics: vec![ColumnStatistics {
2629 distinct_count: Precision::Inexact(500),
2630 ..Default::default()
2631 }],
2632 };
2633
2634 let result = stats.with_fetch(Some(10), 5, 1).unwrap();
2635 assert_eq!(result.num_rows, Precision::Exact(10));
2636 assert_eq!(
2637 result.column_statistics[0].distinct_count,
2638 Precision::Inexact(10)
2639 );
2640 }
2641
2642 #[test]
2643 fn test_with_fetch_caps_ndv_with_large_skip() {
2644 let stats = Statistics {
2648 num_rows: Precision::Exact(1000),
2649 total_byte_size: Precision::Exact(8000),
2650 column_statistics: vec![ColumnStatistics {
2651 distinct_count: Precision::Inexact(500),
2652 ..Default::default()
2653 }],
2654 };
2655
2656 let result = stats.with_fetch(Some(100), 995, 1).unwrap();
2657 assert_eq!(result.num_rows, Precision::Exact(5));
2658 assert_eq!(
2659 result.column_statistics[0].distinct_count,
2660 Precision::Inexact(5)
2661 );
2662 }
2663
2664 #[test]
2665 fn test_with_fetch_ndv_below_row_count_unchanged() {
2666 let stats = Statistics {
2668 num_rows: Precision::Exact(1000),
2669 total_byte_size: Precision::Exact(8000),
2670 column_statistics: vec![ColumnStatistics {
2671 distinct_count: Precision::Inexact(5),
2672 ..Default::default()
2673 }],
2674 };
2675
2676 let result = stats.with_fetch(Some(10), 0, 1).unwrap();
2677 assert_eq!(result.num_rows, Precision::Exact(10));
2678 assert_eq!(
2679 result.column_statistics[0].distinct_count,
2680 Precision::Inexact(5)
2681 );
2682 }
2683
2684 #[test]
2685 fn test_try_merge_iter_basic() {
2686 let schema = Arc::new(Schema::new(vec![
2687 Field::new("col1", DataType::Int32, false),
2688 Field::new("col2", DataType::Int32, false),
2689 ]));
2690
2691 let stats1 = Statistics {
2692 num_rows: Precision::Exact(10),
2693 total_byte_size: Precision::Exact(100),
2694 column_statistics: vec![
2695 ColumnStatistics {
2696 null_count: Precision::Exact(1),
2697 max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
2698 min_value: Precision::Exact(ScalarValue::Int32(Some(1))),
2699 sum_value: Precision::Exact(ScalarValue::Int32(Some(500))),
2700 distinct_count: Precision::Absent,
2701 byte_size: Precision::Exact(40),
2702 },
2703 ColumnStatistics {
2704 null_count: Precision::Exact(2),
2705 max_value: Precision::Exact(ScalarValue::Int32(Some(200))),
2706 min_value: Precision::Exact(ScalarValue::Int32(Some(10))),
2707 sum_value: Precision::Exact(ScalarValue::Int32(Some(1000))),
2708 distinct_count: Precision::Absent,
2709 byte_size: Precision::Exact(40),
2710 },
2711 ],
2712 };
2713
2714 let stats2 = Statistics {
2715 num_rows: Precision::Exact(15),
2716 total_byte_size: Precision::Exact(150),
2717 column_statistics: vec![
2718 ColumnStatistics {
2719 null_count: Precision::Exact(2),
2720 max_value: Precision::Exact(ScalarValue::Int32(Some(120))),
2721 min_value: Precision::Exact(ScalarValue::Int32(Some(-10))),
2722 sum_value: Precision::Exact(ScalarValue::Int32(Some(600))),
2723 distinct_count: Precision::Absent,
2724 byte_size: Precision::Exact(60),
2725 },
2726 ColumnStatistics {
2727 null_count: Precision::Exact(3),
2728 max_value: Precision::Exact(ScalarValue::Int32(Some(180))),
2729 min_value: Precision::Exact(ScalarValue::Int32(Some(5))),
2730 sum_value: Precision::Exact(ScalarValue::Int32(Some(1200))),
2731 distinct_count: Precision::Absent,
2732 byte_size: Precision::Exact(60),
2733 },
2734 ],
2735 };
2736
2737 let items = vec![&stats1, &stats2];
2738 let summary_stats = Statistics::try_merge_iter(items, &schema).unwrap();
2739
2740 assert_eq!(summary_stats.num_rows, Precision::Exact(25));
2741 assert_eq!(summary_stats.total_byte_size, Precision::Exact(250));
2742
2743 let col1_stats = &summary_stats.column_statistics[0];
2744 assert_eq!(col1_stats.null_count, Precision::Exact(3));
2745 assert_eq!(
2746 col1_stats.max_value,
2747 Precision::Exact(ScalarValue::Int32(Some(120)))
2748 );
2749 assert_eq!(
2750 col1_stats.min_value,
2751 Precision::Exact(ScalarValue::Int32(Some(-10)))
2752 );
2753 assert_eq!(
2754 col1_stats.sum_value,
2755 Precision::Exact(ScalarValue::Int64(Some(1100)))
2756 );
2757
2758 let col2_stats = &summary_stats.column_statistics[1];
2759 assert_eq!(col2_stats.null_count, Precision::Exact(5));
2760 assert_eq!(
2761 col2_stats.max_value,
2762 Precision::Exact(ScalarValue::Int32(Some(200)))
2763 );
2764 assert_eq!(
2765 col2_stats.min_value,
2766 Precision::Exact(ScalarValue::Int32(Some(5)))
2767 );
2768 assert_eq!(
2769 col2_stats.sum_value,
2770 Precision::Exact(ScalarValue::Int64(Some(2200)))
2771 );
2772 }
2773
2774 #[test]
2775 fn test_try_merge_iter_mixed_precision() {
2776 let schema = Arc::new(Schema::new(vec![Field::new(
2777 "col1",
2778 DataType::Int32,
2779 false,
2780 )]));
2781
2782 let stats1 = Statistics {
2783 num_rows: Precision::Exact(10),
2784 total_byte_size: Precision::Inexact(100),
2785 column_statistics: vec![ColumnStatistics {
2786 null_count: Precision::Exact(1),
2787 max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
2788 min_value: Precision::Inexact(ScalarValue::Int32(Some(1))),
2789 sum_value: Precision::Exact(ScalarValue::Int32(Some(500))),
2790 distinct_count: Precision::Absent,
2791 byte_size: Precision::Exact(40),
2792 }],
2793 };
2794
2795 let stats2 = Statistics {
2796 num_rows: Precision::Inexact(15),
2797 total_byte_size: Precision::Exact(150),
2798 column_statistics: vec![ColumnStatistics {
2799 null_count: Precision::Inexact(2),
2800 max_value: Precision::Inexact(ScalarValue::Int32(Some(120))),
2801 min_value: Precision::Exact(ScalarValue::Int32(Some(-10))),
2802 sum_value: Precision::Absent,
2803 distinct_count: Precision::Absent,
2804 byte_size: Precision::Inexact(60),
2805 }],
2806 };
2807
2808 let items = vec![&stats1, &stats2];
2809 let summary_stats = Statistics::try_merge_iter(items, &schema).unwrap();
2810
2811 assert_eq!(summary_stats.num_rows, Precision::Inexact(25));
2812 assert_eq!(summary_stats.total_byte_size, Precision::Inexact(250));
2813
2814 let col_stats = &summary_stats.column_statistics[0];
2815 assert_eq!(col_stats.null_count, Precision::Inexact(3));
2816 assert_eq!(
2817 col_stats.max_value,
2818 Precision::Inexact(ScalarValue::Int32(Some(120)))
2819 );
2820 assert_eq!(
2821 col_stats.min_value,
2822 Precision::Inexact(ScalarValue::Int32(Some(-10)))
2823 );
2824 assert_eq!(col_stats.sum_value, Precision::Absent);
2826 }
2827
2828 #[test]
2829 fn test_try_merge_iter_empty() {
2830 let schema = Arc::new(Schema::new(vec![Field::new(
2831 "col1",
2832 DataType::Int32,
2833 false,
2834 )]));
2835
2836 let items: Vec<&Statistics> = vec![];
2837 let summary_stats = Statistics::try_merge_iter(items, &schema).unwrap();
2838
2839 assert_eq!(summary_stats.num_rows, Precision::Absent);
2840 assert_eq!(summary_stats.total_byte_size, Precision::Absent);
2841 assert_eq!(summary_stats.column_statistics.len(), 1);
2842 assert_eq!(
2843 summary_stats.column_statistics[0].null_count,
2844 Precision::Absent
2845 );
2846 }
2847
2848 #[test]
2849 fn test_try_merge_iter_single_item() {
2850 let schema = Arc::new(Schema::new(vec![Field::new(
2851 "col1",
2852 DataType::Int32,
2853 false,
2854 )]));
2855
2856 let stats = Statistics {
2857 num_rows: Precision::Exact(10),
2858 total_byte_size: Precision::Exact(100),
2859 column_statistics: vec![ColumnStatistics {
2860 null_count: Precision::Exact(1),
2861 max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
2862 min_value: Precision::Exact(ScalarValue::Int32(Some(1))),
2863 sum_value: Precision::Exact(ScalarValue::Int32(Some(500))),
2864 distinct_count: Precision::Exact(10),
2865 byte_size: Precision::Exact(40),
2866 }],
2867 };
2868
2869 let items = vec![&stats];
2870 let summary_stats = Statistics::try_merge_iter(items, &schema).unwrap();
2871
2872 assert_eq!(summary_stats, stats);
2873 }
2874
2875 #[test]
2876 fn test_try_merge_iter_mismatched_columns() {
2877 let schema = Arc::new(Schema::new(vec![Field::new(
2878 "col1",
2879 DataType::Int32,
2880 false,
2881 )]));
2882
2883 let stats1 = Statistics::default();
2884 let stats2 =
2885 Statistics::default().add_column_statistics(ColumnStatistics::new_unknown());
2886
2887 let items = vec![&stats1, &stats2];
2888 let e = Statistics::try_merge_iter(items, &schema).unwrap_err();
2889 assert_contains!(
2890 e.to_string(),
2891 "Cannot merge statistics with different number of columns: 0 vs 1"
2892 );
2893 }
2894
2895 #[test]
2896 fn test_try_merge_iter_three_items() {
2897 let schema = Arc::new(Schema::new(vec![Field::new(
2899 "col1",
2900 DataType::Int64,
2901 false,
2902 )]));
2903
2904 let stats1 = Statistics {
2905 num_rows: Precision::Exact(10),
2906 total_byte_size: Precision::Exact(100),
2907 column_statistics: vec![ColumnStatistics {
2908 null_count: Precision::Exact(1),
2909 max_value: Precision::Exact(ScalarValue::Int64(Some(100))),
2910 min_value: Precision::Exact(ScalarValue::Int64(Some(10))),
2911 sum_value: Precision::Exact(ScalarValue::Int64(Some(500))),
2912 distinct_count: Precision::Exact(8),
2913 byte_size: Precision::Exact(80),
2914 }],
2915 };
2916
2917 let stats2 = Statistics {
2918 num_rows: Precision::Exact(20),
2919 total_byte_size: Precision::Exact(200),
2920 column_statistics: vec![ColumnStatistics {
2921 null_count: Precision::Exact(2),
2922 max_value: Precision::Exact(ScalarValue::Int64(Some(200))),
2923 min_value: Precision::Exact(ScalarValue::Int64(Some(5))),
2924 sum_value: Precision::Exact(ScalarValue::Int64(Some(1000))),
2925 distinct_count: Precision::Exact(15),
2926 byte_size: Precision::Exact(160),
2927 }],
2928 };
2929
2930 let stats3 = Statistics {
2931 num_rows: Precision::Exact(30),
2932 total_byte_size: Precision::Exact(300),
2933 column_statistics: vec![ColumnStatistics {
2934 null_count: Precision::Exact(3),
2935 max_value: Precision::Exact(ScalarValue::Int64(Some(150))),
2936 min_value: Precision::Exact(ScalarValue::Int64(Some(1))),
2937 sum_value: Precision::Exact(ScalarValue::Int64(Some(2000))),
2938 distinct_count: Precision::Exact(25),
2939 byte_size: Precision::Exact(240),
2940 }],
2941 };
2942
2943 let items = vec![&stats1, &stats2, &stats3];
2944 let summary_stats = Statistics::try_merge_iter(items, &schema).unwrap();
2945
2946 assert_eq!(summary_stats.num_rows, Precision::Exact(60));
2947 assert_eq!(summary_stats.total_byte_size, Precision::Exact(600));
2948
2949 let col_stats = &summary_stats.column_statistics[0];
2950 assert_eq!(col_stats.null_count, Precision::Exact(6));
2951 assert_eq!(
2952 col_stats.max_value,
2953 Precision::Exact(ScalarValue::Int64(Some(200)))
2954 );
2955 assert_eq!(
2956 col_stats.min_value,
2957 Precision::Exact(ScalarValue::Int64(Some(1)))
2958 );
2959 assert_eq!(
2960 col_stats.sum_value,
2961 Precision::Exact(ScalarValue::Int64(Some(3500)))
2962 );
2963 assert_eq!(col_stats.byte_size, Precision::Exact(480));
2964 assert_eq!(col_stats.distinct_count, Precision::Inexact(29));
2967 }
2968
2969 #[test]
2970 fn test_try_merge_iter_float_types() {
2971 let schema = Arc::new(Schema::new(vec![Field::new(
2972 "col1",
2973 DataType::Float64,
2974 false,
2975 )]));
2976
2977 let stats1 = Statistics {
2978 num_rows: Precision::Exact(10),
2979 total_byte_size: Precision::Exact(80),
2980 column_statistics: vec![ColumnStatistics {
2981 null_count: Precision::Exact(0),
2982 max_value: Precision::Exact(ScalarValue::Float64(Some(99.9))),
2983 min_value: Precision::Exact(ScalarValue::Float64(Some(1.1))),
2984 sum_value: Precision::Exact(ScalarValue::Float64(Some(500.5))),
2985 distinct_count: Precision::Absent,
2986 byte_size: Precision::Exact(80),
2987 }],
2988 };
2989
2990 let stats2 = Statistics {
2991 num_rows: Precision::Exact(10),
2992 total_byte_size: Precision::Exact(80),
2993 column_statistics: vec![ColumnStatistics {
2994 null_count: Precision::Exact(0),
2995 max_value: Precision::Exact(ScalarValue::Float64(Some(200.0))),
2996 min_value: Precision::Exact(ScalarValue::Float64(Some(0.5))),
2997 sum_value: Precision::Exact(ScalarValue::Float64(Some(1000.0))),
2998 distinct_count: Precision::Absent,
2999 byte_size: Precision::Exact(80),
3000 }],
3001 };
3002
3003 let items = vec![&stats1, &stats2];
3004 let summary_stats = Statistics::try_merge_iter(items, &schema).unwrap();
3005
3006 let col_stats = &summary_stats.column_statistics[0];
3007 assert_eq!(
3008 col_stats.max_value,
3009 Precision::Exact(ScalarValue::Float64(Some(200.0)))
3010 );
3011 assert_eq!(
3012 col_stats.min_value,
3013 Precision::Exact(ScalarValue::Float64(Some(0.5)))
3014 );
3015 assert_eq!(
3016 col_stats.sum_value,
3017 Precision::Exact(ScalarValue::Float64(Some(1500.5)))
3018 );
3019 }
3020
3021 #[test]
3022 fn test_try_merge_iter_string_types() {
3023 let schema =
3024 Arc::new(Schema::new(vec![Field::new("col1", DataType::Utf8, false)]));
3025
3026 let stats1 = Statistics {
3027 num_rows: Precision::Exact(10),
3028 total_byte_size: Precision::Exact(100),
3029 column_statistics: vec![ColumnStatistics {
3030 null_count: Precision::Exact(0),
3031 max_value: Precision::Exact(ScalarValue::Utf8(Some("dog".to_string()))),
3032 min_value: Precision::Exact(ScalarValue::Utf8(Some("ant".to_string()))),
3033 sum_value: Precision::Absent,
3034 distinct_count: Precision::Absent,
3035 byte_size: Precision::Exact(100),
3036 }],
3037 };
3038
3039 let stats2 = Statistics {
3040 num_rows: Precision::Exact(10),
3041 total_byte_size: Precision::Exact(100),
3042 column_statistics: vec![ColumnStatistics {
3043 null_count: Precision::Exact(0),
3044 max_value: Precision::Exact(ScalarValue::Utf8(Some("zebra".to_string()))),
3045 min_value: Precision::Exact(ScalarValue::Utf8(Some("bat".to_string()))),
3046 sum_value: Precision::Absent,
3047 distinct_count: Precision::Absent,
3048 byte_size: Precision::Exact(100),
3049 }],
3050 };
3051
3052 let items = vec![&stats1, &stats2];
3053 let summary_stats = Statistics::try_merge_iter(items, &schema).unwrap();
3054
3055 let col_stats = &summary_stats.column_statistics[0];
3056 assert_eq!(
3057 col_stats.max_value,
3058 Precision::Exact(ScalarValue::Utf8(Some("zebra".to_string())))
3059 );
3060 assert_eq!(
3061 col_stats.min_value,
3062 Precision::Exact(ScalarValue::Utf8(Some("ant".to_string())))
3063 );
3064 assert_eq!(col_stats.sum_value, Precision::Absent);
3065 }
3066
3067 #[test]
3068 fn test_try_merge_iter_all_inexact() {
3069 let schema = Arc::new(Schema::new(vec![Field::new(
3070 "col1",
3071 DataType::Int32,
3072 false,
3073 )]));
3074
3075 let stats1 = Statistics {
3076 num_rows: Precision::Inexact(10),
3077 total_byte_size: Precision::Inexact(100),
3078 column_statistics: vec![ColumnStatistics {
3079 null_count: Precision::Inexact(1),
3080 max_value: Precision::Inexact(ScalarValue::Int32(Some(100))),
3081 min_value: Precision::Inexact(ScalarValue::Int32(Some(1))),
3082 sum_value: Precision::Inexact(ScalarValue::Int32(Some(500))),
3083 distinct_count: Precision::Absent,
3084 byte_size: Precision::Inexact(40),
3085 }],
3086 };
3087
3088 let stats2 = Statistics {
3089 num_rows: Precision::Inexact(20),
3090 total_byte_size: Precision::Inexact(200),
3091 column_statistics: vec![ColumnStatistics {
3092 null_count: Precision::Inexact(2),
3093 max_value: Precision::Inexact(ScalarValue::Int32(Some(200))),
3094 min_value: Precision::Inexact(ScalarValue::Int32(Some(-5))),
3095 sum_value: Precision::Inexact(ScalarValue::Int32(Some(1000))),
3096 distinct_count: Precision::Absent,
3097 byte_size: Precision::Inexact(60),
3098 }],
3099 };
3100
3101 let items = vec![&stats1, &stats2];
3102 let summary_stats = Statistics::try_merge_iter(items, &schema).unwrap();
3103
3104 assert_eq!(summary_stats.num_rows, Precision::Inexact(30));
3105 assert_eq!(summary_stats.total_byte_size, Precision::Inexact(300));
3106
3107 let col_stats = &summary_stats.column_statistics[0];
3108 assert_eq!(col_stats.null_count, Precision::Inexact(3));
3109 assert_eq!(
3110 col_stats.max_value,
3111 Precision::Inexact(ScalarValue::Int32(Some(200)))
3112 );
3113 assert_eq!(
3114 col_stats.min_value,
3115 Precision::Inexact(ScalarValue::Int32(Some(-5)))
3116 );
3117 assert_eq!(
3118 col_stats.sum_value,
3119 Precision::Inexact(ScalarValue::Int64(Some(1500)))
3120 );
3121 }
3122
3123 #[test]
3124 fn test_precision_min_in_place() {
3125 let mut lhs = Precision::Exact(10);
3127 precision_min(&mut lhs, &Precision::Exact(20));
3128 assert_eq!(lhs, Precision::Exact(10));
3129
3130 let mut lhs = Precision::Exact(20);
3131 precision_min(&mut lhs, &Precision::Exact(10));
3132 assert_eq!(lhs, Precision::Exact(10));
3133
3134 let mut lhs = Precision::Exact(5);
3136 precision_min(&mut lhs, &Precision::Exact(5));
3137 assert_eq!(lhs, Precision::Exact(5));
3138
3139 let mut lhs = Precision::Exact(10);
3141 precision_min(&mut lhs, &Precision::Inexact(20));
3142 assert_eq!(lhs, Precision::Inexact(10));
3143
3144 let mut lhs = Precision::Inexact(10);
3145 precision_min(&mut lhs, &Precision::Exact(5));
3146 assert_eq!(lhs, Precision::Inexact(5));
3147
3148 let mut lhs = Precision::Inexact(30);
3150 precision_min(&mut lhs, &Precision::Inexact(20));
3151 assert_eq!(lhs, Precision::Inexact(20));
3152
3153 let mut lhs = Precision::Exact(10);
3155 precision_min(&mut lhs, &Precision::Absent);
3156 assert_eq!(lhs, Precision::Absent);
3157
3158 let mut lhs = Precision::<i32>::Absent;
3159 precision_min(&mut lhs, &Precision::Exact(10));
3160 assert_eq!(lhs, Precision::Absent);
3161 }
3162
3163 #[test]
3164 fn test_precision_max_in_place() {
3165 let mut lhs = Precision::Exact(10);
3167 precision_max(&mut lhs, &Precision::Exact(20));
3168 assert_eq!(lhs, Precision::Exact(20));
3169
3170 let mut lhs = Precision::Exact(20);
3171 precision_max(&mut lhs, &Precision::Exact(10));
3172 assert_eq!(lhs, Precision::Exact(20));
3173
3174 let mut lhs = Precision::Exact(5);
3176 precision_max(&mut lhs, &Precision::Exact(5));
3177 assert_eq!(lhs, Precision::Exact(5));
3178
3179 let mut lhs = Precision::Exact(10);
3181 precision_max(&mut lhs, &Precision::Inexact(20));
3182 assert_eq!(lhs, Precision::Inexact(20));
3183
3184 let mut lhs = Precision::Inexact(10);
3185 precision_max(&mut lhs, &Precision::Exact(5));
3186 assert_eq!(lhs, Precision::Inexact(10));
3187
3188 let mut lhs = Precision::Inexact(20);
3190 precision_max(&mut lhs, &Precision::Inexact(30));
3191 assert_eq!(lhs, Precision::Inexact(30));
3192
3193 let mut lhs = Precision::Exact(10);
3195 precision_max(&mut lhs, &Precision::Absent);
3196 assert_eq!(lhs, Precision::Absent);
3197
3198 let mut lhs = Precision::<i32>::Absent;
3199 precision_max(&mut lhs, &Precision::Exact(10));
3200 assert_eq!(lhs, Precision::Absent);
3201 }
3202
3203 #[test]
3204 fn test_cast_sum_value_to_sum_type_in_place_widens_int32() {
3205 let mut value = Precision::Exact(ScalarValue::Int32(Some(42)));
3206 cast_sum_value_to_sum_type_in_place(&mut value);
3207 assert_eq!(value, Precision::Exact(ScalarValue::Int64(Some(42))));
3208 }
3209
3210 #[test]
3211 fn test_cast_sum_value_to_sum_type_in_place_preserves_int64() {
3212 let mut value = Precision::Exact(ScalarValue::Int64(Some(100)));
3214 cast_sum_value_to_sum_type_in_place(&mut value);
3215 assert_eq!(value, Precision::Exact(ScalarValue::Int64(Some(100))));
3216 }
3217
3218 #[test]
3219 fn test_cast_sum_value_to_sum_type_in_place_inexact() {
3220 let mut value = Precision::Inexact(ScalarValue::Int32(Some(42)));
3221 cast_sum_value_to_sum_type_in_place(&mut value);
3222 assert_eq!(value, Precision::Inexact(ScalarValue::Int64(Some(42))));
3223 }
3224
3225 #[test]
3226 fn test_cast_sum_value_to_sum_type_in_place_absent() {
3227 let mut value = Precision::<ScalarValue>::Absent;
3228 cast_sum_value_to_sum_type_in_place(&mut value);
3229 assert_eq!(value, Precision::Absent);
3230 }
3231
3232 #[test]
3233 fn test_precision_add_for_sum_in_place_same_type() {
3234 let mut lhs = Precision::Exact(ScalarValue::Int64(Some(10)));
3236 let rhs = Precision::Exact(ScalarValue::Int64(Some(20)));
3237 precision_add_for_sum_in_place(&mut lhs, &rhs);
3238 assert_eq!(lhs, Precision::Exact(ScalarValue::Int64(Some(30))));
3239 }
3240
3241 #[test]
3242 fn test_precision_add_for_sum_in_place_widens_rhs() {
3243 let mut lhs = Precision::Exact(ScalarValue::Int64(Some(10)));
3245 let rhs = Precision::Exact(ScalarValue::Int32(Some(5)));
3246 precision_add_for_sum_in_place(&mut lhs, &rhs);
3247 assert_eq!(lhs, Precision::Exact(ScalarValue::Int64(Some(15))));
3248 }
3249
3250 #[test]
3251 fn test_precision_add_for_sum_in_place_inexact() {
3252 let mut lhs = Precision::Inexact(ScalarValue::Int64(Some(10)));
3253 let rhs = Precision::Inexact(ScalarValue::Int32(Some(5)));
3254 precision_add_for_sum_in_place(&mut lhs, &rhs);
3255 assert_eq!(lhs, Precision::Inexact(ScalarValue::Int64(Some(15))));
3256 }
3257
3258 #[test]
3259 fn test_precision_add_for_sum_in_place_absent_rhs() {
3260 let mut lhs = Precision::Exact(ScalarValue::Int64(Some(10)));
3261 precision_add_for_sum_in_place(&mut lhs, &Precision::Absent);
3262 assert_eq!(lhs, Precision::Absent);
3263 }
3264}