1use std::fmt::{self, Debug, Display};
21
22use crate::{Result, ScalarValue};
23
24use crate::error::_plan_err;
25use arrow::datatypes::{DataType, Schema, SchemaRef};
26
27#[derive(Clone, PartialEq, Eq, Default, Copy)]
30pub enum Precision<T: Debug + Clone + PartialEq + Eq + PartialOrd> {
31 Exact(T),
33 Inexact(T),
35 #[default]
37 Absent,
38}
39
40impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Precision<T> {
41 pub fn get_value(&self) -> Option<&T> {
44 match self {
45 Precision::Exact(value) | Precision::Inexact(value) => Some(value),
46 Precision::Absent => None,
47 }
48 }
49
50 pub fn map<U, F>(self, f: F) -> Precision<U>
53 where
54 F: Fn(T) -> U,
55 U: Debug + Clone + PartialEq + Eq + PartialOrd,
56 {
57 match self {
58 Precision::Exact(val) => Precision::Exact(f(val)),
59 Precision::Inexact(val) => Precision::Inexact(f(val)),
60 _ => Precision::<U>::Absent,
61 }
62 }
63
64 pub fn is_exact(&self) -> Option<bool> {
67 match self {
68 Precision::Exact(_) => Some(true),
69 Precision::Inexact(_) => Some(false),
70 _ => None,
71 }
72 }
73
74 pub fn max(&self, other: &Precision<T>) -> Precision<T> {
78 match (self, other) {
79 (Precision::Exact(a), Precision::Exact(b)) => {
80 Precision::Exact(if a >= b { a.clone() } else { b.clone() })
81 }
82 (Precision::Inexact(a), Precision::Exact(b))
83 | (Precision::Exact(a), Precision::Inexact(b))
84 | (Precision::Inexact(a), Precision::Inexact(b)) => {
85 Precision::Inexact(if a >= b { a.clone() } else { b.clone() })
86 }
87 (_, _) => Precision::Absent,
88 }
89 }
90
91 pub fn min(&self, other: &Precision<T>) -> Precision<T> {
95 match (self, other) {
96 (Precision::Exact(a), Precision::Exact(b)) => {
97 Precision::Exact(if a >= b { b.clone() } else { a.clone() })
98 }
99 (Precision::Inexact(a), Precision::Exact(b))
100 | (Precision::Exact(a), Precision::Inexact(b))
101 | (Precision::Inexact(a), Precision::Inexact(b)) => {
102 Precision::Inexact(if a >= b { b.clone() } else { a.clone() })
103 }
104 (_, _) => Precision::Absent,
105 }
106 }
107
108 pub fn to_inexact(self) -> Self {
110 match self {
111 Precision::Exact(value) => Precision::Inexact(value),
112 _ => self,
113 }
114 }
115}
116
117impl Precision<usize> {
118 pub fn add(&self, other: &Precision<usize>) -> Precision<usize> {
122 match (self, other) {
123 (Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a + b),
124 (Precision::Inexact(a), Precision::Exact(b))
125 | (Precision::Exact(a), Precision::Inexact(b))
126 | (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a + b),
127 (_, _) => Precision::Absent,
128 }
129 }
130
131 pub fn sub(&self, other: &Precision<usize>) -> Precision<usize> {
135 match (self, other) {
136 (Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a - b),
137 (Precision::Inexact(a), Precision::Exact(b))
138 | (Precision::Exact(a), Precision::Inexact(b))
139 | (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a - b),
140 (_, _) => Precision::Absent,
141 }
142 }
143
144 pub fn multiply(&self, other: &Precision<usize>) -> Precision<usize> {
148 match (self, other) {
149 (Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a * b),
150 (Precision::Inexact(a), Precision::Exact(b))
151 | (Precision::Exact(a), Precision::Inexact(b))
152 | (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a * b),
153 (_, _) => Precision::Absent,
154 }
155 }
156
157 pub fn with_estimated_selectivity(self, selectivity: f64) -> Self {
162 self.map(|v| ((v as f64 * selectivity).ceil()) as usize)
163 .to_inexact()
164 }
165}
166
167impl Precision<ScalarValue> {
168 pub fn add(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
172 match (self, other) {
173 (Precision::Exact(a), Precision::Exact(b)) => {
174 a.add(b).map(Precision::Exact).unwrap_or(Precision::Absent)
175 }
176 (Precision::Inexact(a), Precision::Exact(b))
177 | (Precision::Exact(a), Precision::Inexact(b))
178 | (Precision::Inexact(a), Precision::Inexact(b)) => a
179 .add(b)
180 .map(Precision::Inexact)
181 .unwrap_or(Precision::Absent),
182 (_, _) => Precision::Absent,
183 }
184 }
185
186 pub fn sub(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
190 match (self, other) {
191 (Precision::Exact(a), Precision::Exact(b)) => {
192 a.sub(b).map(Precision::Exact).unwrap_or(Precision::Absent)
193 }
194 (Precision::Inexact(a), Precision::Exact(b))
195 | (Precision::Exact(a), Precision::Inexact(b))
196 | (Precision::Inexact(a), Precision::Inexact(b)) => a
197 .sub(b)
198 .map(Precision::Inexact)
199 .unwrap_or(Precision::Absent),
200 (_, _) => Precision::Absent,
201 }
202 }
203
204 pub fn multiply(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
208 match (self, other) {
209 (Precision::Exact(a), Precision::Exact(b)) => a
210 .mul_checked(b)
211 .map(Precision::Exact)
212 .unwrap_or(Precision::Absent),
213 (Precision::Inexact(a), Precision::Exact(b))
214 | (Precision::Exact(a), Precision::Inexact(b))
215 | (Precision::Inexact(a), Precision::Inexact(b)) => a
216 .mul_checked(b)
217 .map(Precision::Inexact)
218 .unwrap_or(Precision::Absent),
219 (_, _) => Precision::Absent,
220 }
221 }
222
223 pub fn cast_to(&self, data_type: &DataType) -> Result<Precision<ScalarValue>> {
225 match self {
226 Precision::Exact(value) => value.cast_to(data_type).map(Precision::Exact),
227 Precision::Inexact(value) => value.cast_to(data_type).map(Precision::Inexact),
228 Precision::Absent => Ok(Precision::Absent),
229 }
230 }
231}
232
233impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Debug for Precision<T> {
234 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
235 match self {
236 Precision::Exact(inner) => write!(f, "Exact({inner:?})"),
237 Precision::Inexact(inner) => write!(f, "Inexact({inner:?})"),
238 Precision::Absent => write!(f, "Absent"),
239 }
240 }
241}
242
243impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Display for Precision<T> {
244 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
245 match self {
246 Precision::Exact(inner) => write!(f, "Exact({inner:?})"),
247 Precision::Inexact(inner) => write!(f, "Inexact({inner:?})"),
248 Precision::Absent => write!(f, "Absent"),
249 }
250 }
251}
252
253impl From<Precision<usize>> for Precision<ScalarValue> {
254 fn from(value: Precision<usize>) -> Self {
255 match value {
256 Precision::Exact(v) => Precision::Exact(ScalarValue::UInt64(Some(v as u64))),
257 Precision::Inexact(v) => {
258 Precision::Inexact(ScalarValue::UInt64(Some(v as u64)))
259 }
260 Precision::Absent => Precision::Absent,
261 }
262 }
263}
264
265#[derive(Debug, Clone, PartialEq, Eq)]
270pub struct Statistics {
271 pub num_rows: Precision<usize>,
273 pub total_byte_size: Precision<usize>,
275 pub column_statistics: Vec<ColumnStatistics>,
280}
281
282impl Default for Statistics {
283 fn default() -> Self {
286 Self {
287 num_rows: Precision::Absent,
288 total_byte_size: Precision::Absent,
289 column_statistics: vec![],
290 }
291 }
292}
293
294impl Statistics {
295 pub fn new_unknown(schema: &Schema) -> Self {
298 Self {
299 num_rows: Precision::Absent,
300 total_byte_size: Precision::Absent,
301 column_statistics: Statistics::unknown_column(schema),
302 }
303 }
304
305 pub fn unknown_column(schema: &Schema) -> Vec<ColumnStatistics> {
307 schema
308 .fields()
309 .iter()
310 .map(|_| ColumnStatistics::new_unknown())
311 .collect()
312 }
313
314 pub fn with_num_rows(mut self, num_rows: Precision<usize>) -> Self {
316 self.num_rows = num_rows;
317 self
318 }
319
320 pub fn with_total_byte_size(mut self, total_byte_size: Precision<usize>) -> Self {
322 self.total_byte_size = total_byte_size;
323 self
324 }
325
326 pub fn add_column_statistics(mut self, column_stats: ColumnStatistics) -> Self {
328 self.column_statistics.push(column_stats);
329 self
330 }
331
332 pub fn to_inexact(mut self) -> Self {
335 self.num_rows = self.num_rows.to_inexact();
336 self.total_byte_size = self.total_byte_size.to_inexact();
337 self.column_statistics = self
338 .column_statistics
339 .into_iter()
340 .map(|s| s.to_inexact())
341 .collect();
342 self
343 }
344
345 pub fn project(mut self, projection: Option<&Vec<usize>>) -> Self {
351 let Some(projection) = projection else {
352 return self;
353 };
354
355 #[allow(clippy::large_enum_variant)]
356 enum Slot {
357 Taken(usize),
359 Present(ColumnStatistics),
361 }
362
363 let mut columns: Vec<_> = std::mem::take(&mut self.column_statistics)
365 .into_iter()
366 .map(Slot::Present)
367 .collect();
368
369 for idx in projection {
370 let next_idx = self.column_statistics.len();
371 let slot = std::mem::replace(
372 columns.get_mut(*idx).expect("projection out of bounds"),
373 Slot::Taken(next_idx),
374 );
375 match slot {
376 Slot::Present(col) => self.column_statistics.push(col),
378 Slot::Taken(prev_idx) => self
380 .column_statistics
381 .push(self.column_statistics[prev_idx].clone()),
382 }
383 }
384
385 self
386 }
387
388 pub fn with_fetch(
393 mut self,
394 schema: SchemaRef,
395 fetch: Option<usize>,
396 skip: usize,
397 n_partitions: usize,
398 ) -> Result<Self> {
399 let fetch_val = fetch.unwrap_or(usize::MAX);
400
401 self.num_rows = match self {
402 Statistics {
403 num_rows: Precision::Exact(nr),
404 ..
405 }
406 | Statistics {
407 num_rows: Precision::Inexact(nr),
408 ..
409 } => {
410 if nr <= skip {
412 Precision::Exact(0)
414 } else if nr <= fetch_val && skip == 0 {
415 return Ok(self);
421 } else if nr - skip <= fetch_val {
422 check_num_rows(
426 (nr - skip).checked_mul(n_partitions),
427 self.num_rows.is_exact().unwrap(),
429 )
430 } else {
431 check_num_rows(
437 fetch_val.checked_mul(n_partitions),
438 self.num_rows.is_exact().unwrap(),
440 )
441 }
442 }
443 Statistics {
444 num_rows: Precision::Absent,
445 ..
446 } => check_num_rows(fetch.and_then(|v| v.checked_mul(n_partitions)), false),
447 };
448 self.column_statistics = Statistics::unknown_column(&schema);
449 self.total_byte_size = Precision::Absent;
450 Ok(self)
451 }
452
453 pub fn try_merge_iter<'a, I>(items: I, schema: &Schema) -> Result<Statistics>
460 where
461 I: IntoIterator<Item = &'a Statistics>,
462 {
463 let mut items = items.into_iter();
464
465 let Some(init) = items.next() else {
466 return Ok(Statistics::new_unknown(schema));
467 };
468 items.try_fold(init.clone(), |acc: Statistics, item_stats: &Statistics| {
469 acc.try_merge(item_stats)
470 })
471 }
472
473 pub fn try_merge(self, other: &Statistics) -> Result<Self> {
514 let Self {
515 mut num_rows,
516 mut total_byte_size,
517 mut column_statistics,
518 } = self;
519
520 num_rows = num_rows.add(&other.num_rows);
522 total_byte_size = total_byte_size.add(&other.total_byte_size);
523
524 if column_statistics.len() != other.column_statistics.len() {
525 return _plan_err!(
526 "Cannot merge statistics with different number of columns: {} vs {}",
527 column_statistics.len(),
528 other.column_statistics.len()
529 );
530 }
531
532 for (item_col_stats, col_stats) in other
533 .column_statistics
534 .iter()
535 .zip(column_statistics.iter_mut())
536 {
537 col_stats.null_count = col_stats.null_count.add(&item_col_stats.null_count);
538 col_stats.max_value = col_stats.max_value.max(&item_col_stats.max_value);
539 col_stats.min_value = col_stats.min_value.min(&item_col_stats.min_value);
540 col_stats.sum_value = col_stats.sum_value.add(&item_col_stats.sum_value);
541 col_stats.distinct_count = Precision::Absent;
542 }
543
544 Ok(Statistics {
545 num_rows,
546 total_byte_size,
547 column_statistics,
548 })
549 }
550}
551
552fn check_num_rows(value: Option<usize>, is_exact: bool) -> Precision<usize> {
555 if let Some(value) = value {
556 if is_exact {
557 Precision::Exact(value)
558 } else {
559 Precision::Inexact(value)
561 }
562 } else {
563 Precision::Absent
566 }
567}
568
569impl Display for Statistics {
570 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
571 let column_stats = self
573 .column_statistics
574 .iter()
575 .enumerate()
576 .map(|(i, cs)| {
577 let s = format!("(Col[{i}]:");
578 let s = if cs.min_value != Precision::Absent {
579 format!("{} Min={}", s, cs.min_value)
580 } else {
581 s
582 };
583 let s = if cs.max_value != Precision::Absent {
584 format!("{} Max={}", s, cs.max_value)
585 } else {
586 s
587 };
588 let s = if cs.sum_value != Precision::Absent {
589 format!("{} Sum={}", s, cs.sum_value)
590 } else {
591 s
592 };
593 let s = if cs.null_count != Precision::Absent {
594 format!("{} Null={}", s, cs.null_count)
595 } else {
596 s
597 };
598 let s = if cs.distinct_count != Precision::Absent {
599 format!("{} Distinct={}", s, cs.distinct_count)
600 } else {
601 s
602 };
603
604 s + ")"
605 })
606 .collect::<Vec<_>>()
607 .join(",");
608
609 write!(
610 f,
611 "Rows={}, Bytes={}, [{}]",
612 self.num_rows, self.total_byte_size, column_stats
613 )?;
614
615 Ok(())
616 }
617}
618
619#[derive(Clone, Debug, PartialEq, Eq, Default)]
621pub struct ColumnStatistics {
622 pub null_count: Precision<usize>,
624 pub max_value: Precision<ScalarValue>,
626 pub min_value: Precision<ScalarValue>,
628 pub sum_value: Precision<ScalarValue>,
630 pub distinct_count: Precision<usize>,
632}
633
634impl ColumnStatistics {
635 pub fn is_singleton(&self) -> bool {
637 match (&self.min_value, &self.max_value) {
638 (Precision::Exact(min), Precision::Exact(max)) => {
640 !min.is_null() && !max.is_null() && (min == max)
641 }
642 (_, _) => false,
643 }
644 }
645
646 pub fn new_unknown() -> Self {
648 Self {
649 null_count: Precision::Absent,
650 max_value: Precision::Absent,
651 min_value: Precision::Absent,
652 sum_value: Precision::Absent,
653 distinct_count: Precision::Absent,
654 }
655 }
656
657 pub fn with_null_count(mut self, null_count: Precision<usize>) -> Self {
659 self.null_count = null_count;
660 self
661 }
662
663 pub fn with_max_value(mut self, max_value: Precision<ScalarValue>) -> Self {
665 self.max_value = max_value;
666 self
667 }
668
669 pub fn with_min_value(mut self, min_value: Precision<ScalarValue>) -> Self {
671 self.min_value = min_value;
672 self
673 }
674
675 pub fn with_sum_value(mut self, sum_value: Precision<ScalarValue>) -> Self {
677 self.sum_value = sum_value;
678 self
679 }
680
681 pub fn with_distinct_count(mut self, distinct_count: Precision<usize>) -> Self {
683 self.distinct_count = distinct_count;
684 self
685 }
686
687 pub fn to_inexact(mut self) -> Self {
691 self.null_count = self.null_count.to_inexact();
692 self.max_value = self.max_value.to_inexact();
693 self.min_value = self.min_value.to_inexact();
694 self.sum_value = self.sum_value.to_inexact();
695 self.distinct_count = self.distinct_count.to_inexact();
696 self
697 }
698}
699
700#[cfg(test)]
701mod tests {
702 use super::*;
703 use crate::assert_contains;
704 use arrow::datatypes::Field;
705 use std::sync::Arc;
706
707 #[test]
708 fn test_get_value() {
709 let exact_precision = Precision::Exact(42);
710 let inexact_precision = Precision::Inexact(23);
711 let absent_precision = Precision::<i32>::Absent;
712
713 assert_eq!(*exact_precision.get_value().unwrap(), 42);
714 assert_eq!(*inexact_precision.get_value().unwrap(), 23);
715 assert_eq!(absent_precision.get_value(), None);
716 }
717
718 #[test]
719 fn test_map() {
720 let exact_precision = Precision::Exact(42);
721 let inexact_precision = Precision::Inexact(23);
722 let absent_precision = Precision::Absent;
723
724 let squared = |x| x * x;
725
726 assert_eq!(exact_precision.map(squared), Precision::Exact(1764));
727 assert_eq!(inexact_precision.map(squared), Precision::Inexact(529));
728 assert_eq!(absent_precision.map(squared), Precision::Absent);
729 }
730
731 #[test]
732 fn test_is_exact() {
733 let exact_precision = Precision::Exact(42);
734 let inexact_precision = Precision::Inexact(23);
735 let absent_precision = Precision::<i32>::Absent;
736
737 assert_eq!(exact_precision.is_exact(), Some(true));
738 assert_eq!(inexact_precision.is_exact(), Some(false));
739 assert_eq!(absent_precision.is_exact(), None);
740 }
741
742 #[test]
743 fn test_max() {
744 let precision1 = Precision::Exact(42);
745 let precision2 = Precision::Inexact(23);
746 let precision3 = Precision::Exact(30);
747 let absent_precision = Precision::Absent;
748
749 assert_eq!(precision1.max(&precision2), Precision::Inexact(42));
750 assert_eq!(precision1.max(&precision3), Precision::Exact(42));
751 assert_eq!(precision2.max(&precision3), Precision::Inexact(30));
752 assert_eq!(precision1.max(&absent_precision), Precision::Absent);
753 }
754
755 #[test]
756 fn test_min() {
757 let precision1 = Precision::Exact(42);
758 let precision2 = Precision::Inexact(23);
759 let precision3 = Precision::Exact(30);
760 let absent_precision = Precision::Absent;
761
762 assert_eq!(precision1.min(&precision2), Precision::Inexact(23));
763 assert_eq!(precision1.min(&precision3), Precision::Exact(30));
764 assert_eq!(precision2.min(&precision3), Precision::Inexact(23));
765 assert_eq!(precision1.min(&absent_precision), Precision::Absent);
766 }
767
768 #[test]
769 fn test_to_inexact() {
770 let exact_precision = Precision::Exact(42);
771 let inexact_precision = Precision::Inexact(42);
772 let absent_precision = Precision::<i32>::Absent;
773
774 assert_eq!(exact_precision.to_inexact(), inexact_precision);
775 assert_eq!(inexact_precision.to_inexact(), inexact_precision);
776 assert_eq!(absent_precision.to_inexact(), absent_precision);
777 }
778
779 #[test]
780 fn test_add() {
781 let precision1 = Precision::Exact(42);
782 let precision2 = Precision::Inexact(23);
783 let precision3 = Precision::Exact(30);
784 let absent_precision = Precision::Absent;
785
786 assert_eq!(precision1.add(&precision2), Precision::Inexact(65));
787 assert_eq!(precision1.add(&precision3), Precision::Exact(72));
788 assert_eq!(precision2.add(&precision3), Precision::Inexact(53));
789 assert_eq!(precision1.add(&absent_precision), Precision::Absent);
790 }
791
792 #[test]
793 fn test_add_scalar() {
794 let precision = Precision::Exact(ScalarValue::Int32(Some(42)));
795
796 assert_eq!(
797 precision.add(&Precision::Exact(ScalarValue::Int32(Some(23)))),
798 Precision::Exact(ScalarValue::Int32(Some(65))),
799 );
800 assert_eq!(
801 precision.add(&Precision::Inexact(ScalarValue::Int32(Some(23)))),
802 Precision::Inexact(ScalarValue::Int32(Some(65))),
803 );
804 assert_eq!(
805 precision.add(&Precision::Exact(ScalarValue::Int32(None))),
806 Precision::Exact(ScalarValue::Int32(None)),
808 );
809 assert_eq!(precision.add(&Precision::Absent), Precision::Absent);
810 }
811
812 #[test]
813 fn test_sub() {
814 let precision1 = Precision::Exact(42);
815 let precision2 = Precision::Inexact(23);
816 let precision3 = Precision::Exact(30);
817 let absent_precision = Precision::Absent;
818
819 assert_eq!(precision1.sub(&precision2), Precision::Inexact(19));
820 assert_eq!(precision1.sub(&precision3), Precision::Exact(12));
821 assert_eq!(precision1.sub(&absent_precision), Precision::Absent);
822 }
823
824 #[test]
825 fn test_sub_scalar() {
826 let precision = Precision::Exact(ScalarValue::Int32(Some(42)));
827
828 assert_eq!(
829 precision.sub(&Precision::Exact(ScalarValue::Int32(Some(23)))),
830 Precision::Exact(ScalarValue::Int32(Some(19))),
831 );
832 assert_eq!(
833 precision.sub(&Precision::Inexact(ScalarValue::Int32(Some(23)))),
834 Precision::Inexact(ScalarValue::Int32(Some(19))),
835 );
836 assert_eq!(
837 precision.sub(&Precision::Exact(ScalarValue::Int32(None))),
838 Precision::Exact(ScalarValue::Int32(None)),
840 );
841 assert_eq!(precision.sub(&Precision::Absent), Precision::Absent);
842 }
843
844 #[test]
845 fn test_multiply() {
846 let precision1 = Precision::Exact(6);
847 let precision2 = Precision::Inexact(3);
848 let precision3 = Precision::Exact(5);
849 let absent_precision = Precision::Absent;
850
851 assert_eq!(precision1.multiply(&precision2), Precision::Inexact(18));
852 assert_eq!(precision1.multiply(&precision3), Precision::Exact(30));
853 assert_eq!(precision2.multiply(&precision3), Precision::Inexact(15));
854 assert_eq!(precision1.multiply(&absent_precision), Precision::Absent);
855 }
856
857 #[test]
858 fn test_multiply_scalar() {
859 let precision = Precision::Exact(ScalarValue::Int32(Some(6)));
860
861 assert_eq!(
862 precision.multiply(&Precision::Exact(ScalarValue::Int32(Some(5)))),
863 Precision::Exact(ScalarValue::Int32(Some(30))),
864 );
865 assert_eq!(
866 precision.multiply(&Precision::Inexact(ScalarValue::Int32(Some(5)))),
867 Precision::Inexact(ScalarValue::Int32(Some(30))),
868 );
869 assert_eq!(
870 precision.multiply(&Precision::Exact(ScalarValue::Int32(None))),
871 Precision::Exact(ScalarValue::Int32(None)),
873 );
874 assert_eq!(precision.multiply(&Precision::Absent), Precision::Absent);
875 }
876
877 #[test]
878 fn test_cast_to() {
879 assert_eq!(
881 Precision::Exact(ScalarValue::Int32(Some(42)))
882 .cast_to(&DataType::Int64)
883 .unwrap(),
884 Precision::Exact(ScalarValue::Int64(Some(42))),
885 );
886 assert_eq!(
887 Precision::Inexact(ScalarValue::Int32(Some(42)))
888 .cast_to(&DataType::Int64)
889 .unwrap(),
890 Precision::Inexact(ScalarValue::Int64(Some(42))),
891 );
892 assert_eq!(
894 Precision::Exact(ScalarValue::Int32(None))
895 .cast_to(&DataType::Int64)
896 .unwrap(),
897 Precision::Exact(ScalarValue::Int64(None)),
898 );
899 assert!(Precision::Exact(ScalarValue::Int32(Some(256)))
901 .cast_to(&DataType::Int8)
902 .is_err());
903 }
904
905 #[test]
906 fn test_precision_cloning() {
907 let precision: Precision<usize> = Precision::Exact(42);
909 let p2 = precision;
910 assert_eq!(precision, p2);
911
912 let precision: Precision<ScalarValue> =
914 Precision::Exact(ScalarValue::Int64(Some(42)));
915 #[allow(clippy::redundant_clone)]
917 let p2 = precision.clone();
918 assert_eq!(precision, p2);
919 }
920
921 #[test]
922 fn test_project_none() {
923 let projection = None;
924 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
925 assert_eq!(stats, make_stats(vec![10, 20, 30]));
926 }
927
928 #[test]
929 fn test_project_empty() {
930 let projection = Some(vec![]);
931 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
932 assert_eq!(stats, make_stats(vec![]));
933 }
934
935 #[test]
936 fn test_project_swap() {
937 let projection = Some(vec![2, 1]);
938 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
939 assert_eq!(stats, make_stats(vec![30, 20]));
940 }
941
942 #[test]
943 fn test_project_repeated() {
944 let projection = Some(vec![1, 2, 1, 1, 0, 2]);
945 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
946 assert_eq!(stats, make_stats(vec![20, 30, 20, 20, 10, 30]));
947 }
948
949 fn make_stats(counts: impl IntoIterator<Item = usize>) -> Statistics {
951 Statistics {
952 num_rows: Precision::Exact(42),
953 total_byte_size: Precision::Exact(500),
954 column_statistics: counts.into_iter().map(col_stats_i64).collect(),
955 }
956 }
957
958 fn col_stats_i64(null_count: usize) -> ColumnStatistics {
959 ColumnStatistics {
960 null_count: Precision::Exact(null_count),
961 max_value: Precision::Exact(ScalarValue::Int64(Some(42))),
962 min_value: Precision::Exact(ScalarValue::Int64(Some(64))),
963 sum_value: Precision::Exact(ScalarValue::Int64(Some(4600))),
964 distinct_count: Precision::Exact(100),
965 }
966 }
967
968 #[test]
969 fn test_try_merge_basic() {
970 let schema = Arc::new(Schema::new(vec![
972 Field::new("col1", DataType::Int32, false),
973 Field::new("col2", DataType::Int32, false),
974 ]));
975
976 let stats1 = Statistics {
978 num_rows: Precision::Exact(10),
979 total_byte_size: Precision::Exact(100),
980 column_statistics: vec![
981 ColumnStatistics {
982 null_count: Precision::Exact(1),
983 max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
984 min_value: Precision::Exact(ScalarValue::Int32(Some(1))),
985 sum_value: Precision::Exact(ScalarValue::Int32(Some(500))),
986 distinct_count: Precision::Absent,
987 },
988 ColumnStatistics {
989 null_count: Precision::Exact(2),
990 max_value: Precision::Exact(ScalarValue::Int32(Some(200))),
991 min_value: Precision::Exact(ScalarValue::Int32(Some(10))),
992 sum_value: Precision::Exact(ScalarValue::Int32(Some(1000))),
993 distinct_count: Precision::Absent,
994 },
995 ],
996 };
997
998 let stats2 = Statistics {
999 num_rows: Precision::Exact(15),
1000 total_byte_size: Precision::Exact(150),
1001 column_statistics: vec![
1002 ColumnStatistics {
1003 null_count: Precision::Exact(2),
1004 max_value: Precision::Exact(ScalarValue::Int32(Some(120))),
1005 min_value: Precision::Exact(ScalarValue::Int32(Some(-10))),
1006 sum_value: Precision::Exact(ScalarValue::Int32(Some(600))),
1007 distinct_count: Precision::Absent,
1008 },
1009 ColumnStatistics {
1010 null_count: Precision::Exact(3),
1011 max_value: Precision::Exact(ScalarValue::Int32(Some(180))),
1012 min_value: Precision::Exact(ScalarValue::Int32(Some(5))),
1013 sum_value: Precision::Exact(ScalarValue::Int32(Some(1200))),
1014 distinct_count: Precision::Absent,
1015 },
1016 ],
1017 };
1018
1019 let items = vec![stats1, stats2];
1020
1021 let summary_stats = Statistics::try_merge_iter(&items, &schema).unwrap();
1022
1023 assert_eq!(summary_stats.num_rows, Precision::Exact(25)); assert_eq!(summary_stats.total_byte_size, Precision::Exact(250)); let col1_stats = &summary_stats.column_statistics[0];
1029 assert_eq!(col1_stats.null_count, Precision::Exact(3)); assert_eq!(
1031 col1_stats.max_value,
1032 Precision::Exact(ScalarValue::Int32(Some(120)))
1033 );
1034 assert_eq!(
1035 col1_stats.min_value,
1036 Precision::Exact(ScalarValue::Int32(Some(-10)))
1037 );
1038 assert_eq!(
1039 col1_stats.sum_value,
1040 Precision::Exact(ScalarValue::Int32(Some(1100)))
1041 ); let col2_stats = &summary_stats.column_statistics[1];
1044 assert_eq!(col2_stats.null_count, Precision::Exact(5)); assert_eq!(
1046 col2_stats.max_value,
1047 Precision::Exact(ScalarValue::Int32(Some(200)))
1048 );
1049 assert_eq!(
1050 col2_stats.min_value,
1051 Precision::Exact(ScalarValue::Int32(Some(5)))
1052 );
1053 assert_eq!(
1054 col2_stats.sum_value,
1055 Precision::Exact(ScalarValue::Int32(Some(2200)))
1056 ); }
1058
1059 #[test]
1060 fn test_try_merge_mixed_precision() {
1061 let schema = Arc::new(Schema::new(vec![Field::new(
1063 "col1",
1064 DataType::Int32,
1065 false,
1066 )]));
1067
1068 let stats1 = Statistics {
1070 num_rows: Precision::Exact(10),
1071 total_byte_size: Precision::Inexact(100),
1072 column_statistics: vec![ColumnStatistics {
1073 null_count: Precision::Exact(1),
1074 max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
1075 min_value: Precision::Inexact(ScalarValue::Int32(Some(1))),
1076 sum_value: Precision::Exact(ScalarValue::Int32(Some(500))),
1077 distinct_count: Precision::Absent,
1078 }],
1079 };
1080
1081 let stats2 = Statistics {
1082 num_rows: Precision::Inexact(15),
1083 total_byte_size: Precision::Exact(150),
1084 column_statistics: vec![ColumnStatistics {
1085 null_count: Precision::Inexact(2),
1086 max_value: Precision::Inexact(ScalarValue::Int32(Some(120))),
1087 min_value: Precision::Exact(ScalarValue::Int32(Some(-10))),
1088 sum_value: Precision::Absent,
1089 distinct_count: Precision::Absent,
1090 }],
1091 };
1092
1093 let items = vec![stats1, stats2];
1094
1095 let summary_stats = Statistics::try_merge_iter(&items, &schema).unwrap();
1096
1097 assert_eq!(summary_stats.num_rows, Precision::Inexact(25));
1098 assert_eq!(summary_stats.total_byte_size, Precision::Inexact(250));
1099
1100 let col_stats = &summary_stats.column_statistics[0];
1101 assert_eq!(col_stats.null_count, Precision::Inexact(3));
1102 assert_eq!(
1103 col_stats.max_value,
1104 Precision::Inexact(ScalarValue::Int32(Some(120)))
1105 );
1106 assert_eq!(
1107 col_stats.min_value,
1108 Precision::Inexact(ScalarValue::Int32(Some(-10)))
1109 );
1110 assert!(matches!(col_stats.sum_value, Precision::Absent));
1111 }
1112
1113 #[test]
1114 fn test_try_merge_empty() {
1115 let schema = Arc::new(Schema::new(vec![Field::new(
1116 "col1",
1117 DataType::Int32,
1118 false,
1119 )]));
1120
1121 let items: Vec<Statistics> = vec![];
1123
1124 let summary_stats = Statistics::try_merge_iter(&items, &schema).unwrap();
1125
1126 assert_eq!(summary_stats.num_rows, Precision::Absent);
1128 assert_eq!(summary_stats.total_byte_size, Precision::Absent);
1129 assert_eq!(summary_stats.column_statistics.len(), 1);
1130 assert_eq!(
1131 summary_stats.column_statistics[0].null_count,
1132 Precision::Absent
1133 );
1134 }
1135
1136 #[test]
1137 fn test_try_merge_mismatched_size() {
1138 let schema = Arc::new(Schema::new(vec![Field::new(
1140 "col1",
1141 DataType::Int32,
1142 false,
1143 )]));
1144
1145 let stats1 = Statistics::default();
1147
1148 let stats2 =
1149 Statistics::default().add_column_statistics(ColumnStatistics::new_unknown());
1150
1151 let items = vec![stats1, stats2];
1152
1153 let e = Statistics::try_merge_iter(&items, &schema).unwrap_err();
1154 assert_contains!(e.to_string(), "Error during planning: Cannot merge statistics with different number of columns: 0 vs 1");
1155 }
1156
1157 #[test]
1158 fn test_try_merge_distinct_count_absent() {
1159 let stats1 = Statistics::default()
1161 .with_num_rows(Precision::Exact(10))
1162 .with_total_byte_size(Precision::Exact(100))
1163 .add_column_statistics(
1164 ColumnStatistics::new_unknown()
1165 .with_null_count(Precision::Exact(0))
1166 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(1))))
1167 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(10))))
1168 .with_distinct_count(Precision::Exact(5)),
1169 );
1170
1171 let stats2 = Statistics::default()
1172 .with_num_rows(Precision::Exact(15))
1173 .with_total_byte_size(Precision::Exact(150))
1174 .add_column_statistics(
1175 ColumnStatistics::new_unknown()
1176 .with_null_count(Precision::Exact(0))
1177 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(5))))
1178 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(20))))
1179 .with_distinct_count(Precision::Exact(7)),
1180 );
1181
1182 let merged_stats = stats1.try_merge(&stats2).unwrap();
1184
1185 assert_eq!(merged_stats.num_rows, Precision::Exact(25));
1187 assert_eq!(merged_stats.total_byte_size, Precision::Exact(250));
1188
1189 let col_stats = &merged_stats.column_statistics[0];
1190 assert_eq!(col_stats.null_count, Precision::Exact(0));
1191 assert_eq!(
1192 col_stats.min_value,
1193 Precision::Exact(ScalarValue::Int32(Some(1)))
1194 );
1195 assert_eq!(
1196 col_stats.max_value,
1197 Precision::Exact(ScalarValue::Int32(Some(20)))
1198 );
1199 assert_eq!(col_stats.distinct_count, Precision::Absent);
1201 }
1202}