1use std::fmt::{self, Debug, Display};
21
22use crate::{Result, ScalarValue};
23
24use crate::error::_plan_err;
25use arrow::datatypes::{DataType, Schema, SchemaRef};
26
27#[derive(Clone, PartialEq, Eq, Default, Copy)]
30pub enum Precision<T: Debug + Clone + PartialEq + Eq + PartialOrd> {
31 Exact(T),
33 Inexact(T),
35 #[default]
37 Absent,
38}
39
40impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Precision<T> {
41 pub fn get_value(&self) -> Option<&T> {
44 match self {
45 Precision::Exact(value) | Precision::Inexact(value) => Some(value),
46 Precision::Absent => None,
47 }
48 }
49
50 pub fn map<U, F>(self, f: F) -> Precision<U>
53 where
54 F: Fn(T) -> U,
55 U: Debug + Clone + PartialEq + Eq + PartialOrd,
56 {
57 match self {
58 Precision::Exact(val) => Precision::Exact(f(val)),
59 Precision::Inexact(val) => Precision::Inexact(f(val)),
60 _ => Precision::<U>::Absent,
61 }
62 }
63
64 pub fn is_exact(&self) -> Option<bool> {
67 match self {
68 Precision::Exact(_) => Some(true),
69 Precision::Inexact(_) => Some(false),
70 _ => None,
71 }
72 }
73
74 pub fn max(&self, other: &Precision<T>) -> Precision<T> {
78 match (self, other) {
79 (Precision::Exact(a), Precision::Exact(b)) => {
80 Precision::Exact(if a >= b { a.clone() } else { b.clone() })
81 }
82 (Precision::Inexact(a), Precision::Exact(b))
83 | (Precision::Exact(a), Precision::Inexact(b))
84 | (Precision::Inexact(a), Precision::Inexact(b)) => {
85 Precision::Inexact(if a >= b { a.clone() } else { b.clone() })
86 }
87 (_, _) => Precision::Absent,
88 }
89 }
90
91 pub fn min(&self, other: &Precision<T>) -> Precision<T> {
95 match (self, other) {
96 (Precision::Exact(a), Precision::Exact(b)) => {
97 Precision::Exact(if a >= b { b.clone() } else { a.clone() })
98 }
99 (Precision::Inexact(a), Precision::Exact(b))
100 | (Precision::Exact(a), Precision::Inexact(b))
101 | (Precision::Inexact(a), Precision::Inexact(b)) => {
102 Precision::Inexact(if a >= b { b.clone() } else { a.clone() })
103 }
104 (_, _) => Precision::Absent,
105 }
106 }
107
108 pub fn to_inexact(self) -> Self {
110 match self {
111 Precision::Exact(value) => Precision::Inexact(value),
112 _ => self,
113 }
114 }
115}
116
117impl Precision<usize> {
118 pub fn add(&self, other: &Precision<usize>) -> Precision<usize> {
122 match (self, other) {
123 (Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a + b),
124 (Precision::Inexact(a), Precision::Exact(b))
125 | (Precision::Exact(a), Precision::Inexact(b))
126 | (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a + b),
127 (_, _) => Precision::Absent,
128 }
129 }
130
131 pub fn sub(&self, other: &Precision<usize>) -> Precision<usize> {
135 match (self, other) {
136 (Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a - b),
137 (Precision::Inexact(a), Precision::Exact(b))
138 | (Precision::Exact(a), Precision::Inexact(b))
139 | (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a - b),
140 (_, _) => Precision::Absent,
141 }
142 }
143
144 pub fn multiply(&self, other: &Precision<usize>) -> Precision<usize> {
148 match (self, other) {
149 (Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a * b),
150 (Precision::Inexact(a), Precision::Exact(b))
151 | (Precision::Exact(a), Precision::Inexact(b))
152 | (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a * b),
153 (_, _) => Precision::Absent,
154 }
155 }
156
157 pub fn with_estimated_selectivity(self, selectivity: f64) -> Self {
162 self.map(|v| ((v as f64 * selectivity).ceil()) as usize)
163 .to_inexact()
164 }
165}
166
167impl Precision<ScalarValue> {
168 pub fn add(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
172 match (self, other) {
173 (Precision::Exact(a), Precision::Exact(b)) => {
174 a.add(b).map(Precision::Exact).unwrap_or(Precision::Absent)
175 }
176 (Precision::Inexact(a), Precision::Exact(b))
177 | (Precision::Exact(a), Precision::Inexact(b))
178 | (Precision::Inexact(a), Precision::Inexact(b)) => a
179 .add(b)
180 .map(Precision::Inexact)
181 .unwrap_or(Precision::Absent),
182 (_, _) => Precision::Absent,
183 }
184 }
185
186 pub fn sub(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
190 match (self, other) {
191 (Precision::Exact(a), Precision::Exact(b)) => {
192 a.sub(b).map(Precision::Exact).unwrap_or(Precision::Absent)
193 }
194 (Precision::Inexact(a), Precision::Exact(b))
195 | (Precision::Exact(a), Precision::Inexact(b))
196 | (Precision::Inexact(a), Precision::Inexact(b)) => a
197 .sub(b)
198 .map(Precision::Inexact)
199 .unwrap_or(Precision::Absent),
200 (_, _) => Precision::Absent,
201 }
202 }
203
204 pub fn multiply(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
208 match (self, other) {
209 (Precision::Exact(a), Precision::Exact(b)) => a
210 .mul_checked(b)
211 .map(Precision::Exact)
212 .unwrap_or(Precision::Absent),
213 (Precision::Inexact(a), Precision::Exact(b))
214 | (Precision::Exact(a), Precision::Inexact(b))
215 | (Precision::Inexact(a), Precision::Inexact(b)) => a
216 .mul_checked(b)
217 .map(Precision::Inexact)
218 .unwrap_or(Precision::Absent),
219 (_, _) => Precision::Absent,
220 }
221 }
222
223 pub fn cast_to(&self, data_type: &DataType) -> Result<Precision<ScalarValue>> {
225 match self {
226 Precision::Exact(value) => value.cast_to(data_type).map(Precision::Exact),
227 Precision::Inexact(value) => value.cast_to(data_type).map(Precision::Inexact),
228 Precision::Absent => Ok(Precision::Absent),
229 }
230 }
231}
232
233impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Debug for Precision<T> {
234 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
235 match self {
236 Precision::Exact(inner) => write!(f, "Exact({inner:?})"),
237 Precision::Inexact(inner) => write!(f, "Inexact({inner:?})"),
238 Precision::Absent => write!(f, "Absent"),
239 }
240 }
241}
242
243impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Display for Precision<T> {
244 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
245 match self {
246 Precision::Exact(inner) => write!(f, "Exact({inner:?})"),
247 Precision::Inexact(inner) => write!(f, "Inexact({inner:?})"),
248 Precision::Absent => write!(f, "Absent"),
249 }
250 }
251}
252
253impl From<Precision<usize>> for Precision<ScalarValue> {
254 fn from(value: Precision<usize>) -> Self {
255 match value {
256 Precision::Exact(v) => Precision::Exact(ScalarValue::UInt64(Some(v as u64))),
257 Precision::Inexact(v) => {
258 Precision::Inexact(ScalarValue::UInt64(Some(v as u64)))
259 }
260 Precision::Absent => Precision::Absent,
261 }
262 }
263}
264
265#[derive(Debug, Clone, PartialEq, Eq)]
270pub struct Statistics {
271 pub num_rows: Precision<usize>,
273 pub total_byte_size: Precision<usize>,
275 pub column_statistics: Vec<ColumnStatistics>,
280}
281
282impl Default for Statistics {
283 fn default() -> Self {
286 Self {
287 num_rows: Precision::Absent,
288 total_byte_size: Precision::Absent,
289 column_statistics: vec![],
290 }
291 }
292}
293
294impl Statistics {
295 pub fn new_unknown(schema: &Schema) -> Self {
298 Self {
299 num_rows: Precision::Absent,
300 total_byte_size: Precision::Absent,
301 column_statistics: Statistics::unknown_column(schema),
302 }
303 }
304
305 pub fn unknown_column(schema: &Schema) -> Vec<ColumnStatistics> {
307 schema
308 .fields()
309 .iter()
310 .map(|_| ColumnStatistics::new_unknown())
311 .collect()
312 }
313
314 pub fn with_num_rows(mut self, num_rows: Precision<usize>) -> Self {
316 self.num_rows = num_rows;
317 self
318 }
319
320 pub fn with_total_byte_size(mut self, total_byte_size: Precision<usize>) -> Self {
322 self.total_byte_size = total_byte_size;
323 self
324 }
325
326 pub fn add_column_statistics(mut self, column_stats: ColumnStatistics) -> Self {
328 self.column_statistics.push(column_stats);
329 self
330 }
331
332 pub fn to_inexact(mut self) -> Self {
335 self.num_rows = self.num_rows.to_inexact();
336 self.total_byte_size = self.total_byte_size.to_inexact();
337 self.column_statistics = self
338 .column_statistics
339 .into_iter()
340 .map(|s| s.to_inexact())
341 .collect();
342 self
343 }
344
345 pub fn project(mut self, projection: Option<&Vec<usize>>) -> Self {
351 let Some(projection) = projection else {
352 return self;
353 };
354
355 #[allow(clippy::large_enum_variant)]
356 enum Slot {
357 Taken(usize),
359 Present(ColumnStatistics),
361 }
362
363 let mut columns: Vec<_> = std::mem::take(&mut self.column_statistics)
365 .into_iter()
366 .map(Slot::Present)
367 .collect();
368
369 for idx in projection {
370 let next_idx = self.column_statistics.len();
371 let slot = std::mem::replace(
372 columns.get_mut(*idx).expect("projection out of bounds"),
373 Slot::Taken(next_idx),
374 );
375 match slot {
376 Slot::Present(col) => self.column_statistics.push(col),
378 Slot::Taken(prev_idx) => self
380 .column_statistics
381 .push(self.column_statistics[prev_idx].clone()),
382 }
383 }
384
385 self
386 }
387
388 pub fn with_fetch(
393 mut self,
394 schema: SchemaRef,
395 fetch: Option<usize>,
396 skip: usize,
397 n_partitions: usize,
398 ) -> Result<Self> {
399 let fetch_val = fetch.unwrap_or(usize::MAX);
400
401 self.num_rows = match self {
402 Statistics {
403 num_rows: Precision::Exact(nr),
404 ..
405 }
406 | Statistics {
407 num_rows: Precision::Inexact(nr),
408 ..
409 } => {
410 if nr <= skip {
412 Precision::Exact(0)
414 } else if nr <= fetch_val && skip == 0 {
415 return Ok(self);
421 } else if nr - skip <= fetch_val {
422 check_num_rows(
426 (nr - skip).checked_mul(n_partitions),
427 self.num_rows.is_exact().unwrap(),
429 )
430 } else {
431 check_num_rows(
437 fetch_val.checked_mul(n_partitions),
438 self.num_rows.is_exact().unwrap(),
440 )
441 }
442 }
443 Statistics {
444 num_rows: Precision::Absent,
445 ..
446 } => check_num_rows(fetch.and_then(|v| v.checked_mul(n_partitions)), false),
447 };
448 self.column_statistics = Statistics::unknown_column(&schema);
449 self.total_byte_size = Precision::Absent;
450 Ok(self)
451 }
452
453 pub fn try_merge_iter<'a, I>(items: I, schema: &Schema) -> Result<Statistics>
460 where
461 I: IntoIterator<Item = &'a Statistics>,
462 {
463 let mut items = items.into_iter();
464
465 let Some(init) = items.next() else {
466 return Ok(Statistics::new_unknown(schema));
467 };
468 items.try_fold(init.clone(), |acc: Statistics, item_stats: &Statistics| {
469 acc.try_merge(item_stats)
470 })
471 }
472
473 pub fn try_merge(self, other: &Statistics) -> Result<Self> {
514 let Self {
515 mut num_rows,
516 mut total_byte_size,
517 mut column_statistics,
518 } = self;
519
520 num_rows = num_rows.add(&other.num_rows);
522 total_byte_size = total_byte_size.add(&other.total_byte_size);
523
524 if column_statistics.len() != other.column_statistics.len() {
525 return _plan_err!(
526 "Cannot merge statistics with different number of columns: {} vs {}",
527 column_statistics.len(),
528 other.column_statistics.len()
529 );
530 }
531
532 for (item_col_stats, col_stats) in other
533 .column_statistics
534 .iter()
535 .zip(column_statistics.iter_mut())
536 {
537 col_stats.null_count = col_stats.null_count.add(&item_col_stats.null_count);
538 col_stats.max_value = col_stats.max_value.max(&item_col_stats.max_value);
539 col_stats.min_value = col_stats.min_value.min(&item_col_stats.min_value);
540 col_stats.sum_value = col_stats.sum_value.add(&item_col_stats.sum_value);
541 }
542
543 Ok(Statistics {
544 num_rows,
545 total_byte_size,
546 column_statistics,
547 })
548 }
549}
550
551fn check_num_rows(value: Option<usize>, is_exact: bool) -> Precision<usize> {
554 if let Some(value) = value {
555 if is_exact {
556 Precision::Exact(value)
557 } else {
558 Precision::Inexact(value)
560 }
561 } else {
562 Precision::Absent
565 }
566}
567
568impl Display for Statistics {
569 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
570 let column_stats = self
572 .column_statistics
573 .iter()
574 .enumerate()
575 .map(|(i, cs)| {
576 let s = format!("(Col[{i}]:");
577 let s = if cs.min_value != Precision::Absent {
578 format!("{} Min={}", s, cs.min_value)
579 } else {
580 s
581 };
582 let s = if cs.max_value != Precision::Absent {
583 format!("{} Max={}", s, cs.max_value)
584 } else {
585 s
586 };
587 let s = if cs.sum_value != Precision::Absent {
588 format!("{} Sum={}", s, cs.sum_value)
589 } else {
590 s
591 };
592 let s = if cs.null_count != Precision::Absent {
593 format!("{} Null={}", s, cs.null_count)
594 } else {
595 s
596 };
597 let s = if cs.distinct_count != Precision::Absent {
598 format!("{} Distinct={}", s, cs.distinct_count)
599 } else {
600 s
601 };
602
603 s + ")"
604 })
605 .collect::<Vec<_>>()
606 .join(",");
607
608 write!(
609 f,
610 "Rows={}, Bytes={}, [{}]",
611 self.num_rows, self.total_byte_size, column_stats
612 )?;
613
614 Ok(())
615 }
616}
617
618#[derive(Clone, Debug, PartialEq, Eq, Default)]
620pub struct ColumnStatistics {
621 pub null_count: Precision<usize>,
623 pub max_value: Precision<ScalarValue>,
625 pub min_value: Precision<ScalarValue>,
627 pub sum_value: Precision<ScalarValue>,
629 pub distinct_count: Precision<usize>,
631}
632
633impl ColumnStatistics {
634 pub fn is_singleton(&self) -> bool {
636 match (&self.min_value, &self.max_value) {
637 (Precision::Exact(min), Precision::Exact(max)) => {
639 !min.is_null() && !max.is_null() && (min == max)
640 }
641 (_, _) => false,
642 }
643 }
644
645 pub fn new_unknown() -> Self {
647 Self {
648 null_count: Precision::Absent,
649 max_value: Precision::Absent,
650 min_value: Precision::Absent,
651 sum_value: Precision::Absent,
652 distinct_count: Precision::Absent,
653 }
654 }
655
656 pub fn with_null_count(mut self, null_count: Precision<usize>) -> Self {
658 self.null_count = null_count;
659 self
660 }
661
662 pub fn with_max_value(mut self, max_value: Precision<ScalarValue>) -> Self {
664 self.max_value = max_value;
665 self
666 }
667
668 pub fn with_min_value(mut self, min_value: Precision<ScalarValue>) -> Self {
670 self.min_value = min_value;
671 self
672 }
673
674 pub fn with_sum_value(mut self, sum_value: Precision<ScalarValue>) -> Self {
676 self.sum_value = sum_value;
677 self
678 }
679
680 pub fn with_distinct_count(mut self, distinct_count: Precision<usize>) -> Self {
682 self.distinct_count = distinct_count;
683 self
684 }
685
686 pub fn to_inexact(mut self) -> Self {
690 self.null_count = self.null_count.to_inexact();
691 self.max_value = self.max_value.to_inexact();
692 self.min_value = self.min_value.to_inexact();
693 self.sum_value = self.sum_value.to_inexact();
694 self.distinct_count = self.distinct_count.to_inexact();
695 self
696 }
697}
698
699#[cfg(test)]
700mod tests {
701 use super::*;
702 use crate::assert_contains;
703 use arrow::datatypes::Field;
704 use std::sync::Arc;
705
706 #[test]
707 fn test_get_value() {
708 let exact_precision = Precision::Exact(42);
709 let inexact_precision = Precision::Inexact(23);
710 let absent_precision = Precision::<i32>::Absent;
711
712 assert_eq!(*exact_precision.get_value().unwrap(), 42);
713 assert_eq!(*inexact_precision.get_value().unwrap(), 23);
714 assert_eq!(absent_precision.get_value(), None);
715 }
716
717 #[test]
718 fn test_map() {
719 let exact_precision = Precision::Exact(42);
720 let inexact_precision = Precision::Inexact(23);
721 let absent_precision = Precision::Absent;
722
723 let squared = |x| x * x;
724
725 assert_eq!(exact_precision.map(squared), Precision::Exact(1764));
726 assert_eq!(inexact_precision.map(squared), Precision::Inexact(529));
727 assert_eq!(absent_precision.map(squared), Precision::Absent);
728 }
729
730 #[test]
731 fn test_is_exact() {
732 let exact_precision = Precision::Exact(42);
733 let inexact_precision = Precision::Inexact(23);
734 let absent_precision = Precision::<i32>::Absent;
735
736 assert_eq!(exact_precision.is_exact(), Some(true));
737 assert_eq!(inexact_precision.is_exact(), Some(false));
738 assert_eq!(absent_precision.is_exact(), None);
739 }
740
741 #[test]
742 fn test_max() {
743 let precision1 = Precision::Exact(42);
744 let precision2 = Precision::Inexact(23);
745 let precision3 = Precision::Exact(30);
746 let absent_precision = Precision::Absent;
747
748 assert_eq!(precision1.max(&precision2), Precision::Inexact(42));
749 assert_eq!(precision1.max(&precision3), Precision::Exact(42));
750 assert_eq!(precision2.max(&precision3), Precision::Inexact(30));
751 assert_eq!(precision1.max(&absent_precision), Precision::Absent);
752 }
753
754 #[test]
755 fn test_min() {
756 let precision1 = Precision::Exact(42);
757 let precision2 = Precision::Inexact(23);
758 let precision3 = Precision::Exact(30);
759 let absent_precision = Precision::Absent;
760
761 assert_eq!(precision1.min(&precision2), Precision::Inexact(23));
762 assert_eq!(precision1.min(&precision3), Precision::Exact(30));
763 assert_eq!(precision2.min(&precision3), Precision::Inexact(23));
764 assert_eq!(precision1.min(&absent_precision), Precision::Absent);
765 }
766
767 #[test]
768 fn test_to_inexact() {
769 let exact_precision = Precision::Exact(42);
770 let inexact_precision = Precision::Inexact(42);
771 let absent_precision = Precision::<i32>::Absent;
772
773 assert_eq!(exact_precision.to_inexact(), inexact_precision);
774 assert_eq!(inexact_precision.to_inexact(), inexact_precision);
775 assert_eq!(absent_precision.to_inexact(), absent_precision);
776 }
777
778 #[test]
779 fn test_add() {
780 let precision1 = Precision::Exact(42);
781 let precision2 = Precision::Inexact(23);
782 let precision3 = Precision::Exact(30);
783 let absent_precision = Precision::Absent;
784
785 assert_eq!(precision1.add(&precision2), Precision::Inexact(65));
786 assert_eq!(precision1.add(&precision3), Precision::Exact(72));
787 assert_eq!(precision2.add(&precision3), Precision::Inexact(53));
788 assert_eq!(precision1.add(&absent_precision), Precision::Absent);
789 }
790
791 #[test]
792 fn test_add_scalar() {
793 let precision = Precision::Exact(ScalarValue::Int32(Some(42)));
794
795 assert_eq!(
796 precision.add(&Precision::Exact(ScalarValue::Int32(Some(23)))),
797 Precision::Exact(ScalarValue::Int32(Some(65))),
798 );
799 assert_eq!(
800 precision.add(&Precision::Inexact(ScalarValue::Int32(Some(23)))),
801 Precision::Inexact(ScalarValue::Int32(Some(65))),
802 );
803 assert_eq!(
804 precision.add(&Precision::Exact(ScalarValue::Int32(None))),
805 Precision::Exact(ScalarValue::Int32(None)),
807 );
808 assert_eq!(precision.add(&Precision::Absent), Precision::Absent);
809 }
810
811 #[test]
812 fn test_sub() {
813 let precision1 = Precision::Exact(42);
814 let precision2 = Precision::Inexact(23);
815 let precision3 = Precision::Exact(30);
816 let absent_precision = Precision::Absent;
817
818 assert_eq!(precision1.sub(&precision2), Precision::Inexact(19));
819 assert_eq!(precision1.sub(&precision3), Precision::Exact(12));
820 assert_eq!(precision1.sub(&absent_precision), Precision::Absent);
821 }
822
823 #[test]
824 fn test_sub_scalar() {
825 let precision = Precision::Exact(ScalarValue::Int32(Some(42)));
826
827 assert_eq!(
828 precision.sub(&Precision::Exact(ScalarValue::Int32(Some(23)))),
829 Precision::Exact(ScalarValue::Int32(Some(19))),
830 );
831 assert_eq!(
832 precision.sub(&Precision::Inexact(ScalarValue::Int32(Some(23)))),
833 Precision::Inexact(ScalarValue::Int32(Some(19))),
834 );
835 assert_eq!(
836 precision.sub(&Precision::Exact(ScalarValue::Int32(None))),
837 Precision::Exact(ScalarValue::Int32(None)),
839 );
840 assert_eq!(precision.sub(&Precision::Absent), Precision::Absent);
841 }
842
843 #[test]
844 fn test_multiply() {
845 let precision1 = Precision::Exact(6);
846 let precision2 = Precision::Inexact(3);
847 let precision3 = Precision::Exact(5);
848 let absent_precision = Precision::Absent;
849
850 assert_eq!(precision1.multiply(&precision2), Precision::Inexact(18));
851 assert_eq!(precision1.multiply(&precision3), Precision::Exact(30));
852 assert_eq!(precision2.multiply(&precision3), Precision::Inexact(15));
853 assert_eq!(precision1.multiply(&absent_precision), Precision::Absent);
854 }
855
856 #[test]
857 fn test_multiply_scalar() {
858 let precision = Precision::Exact(ScalarValue::Int32(Some(6)));
859
860 assert_eq!(
861 precision.multiply(&Precision::Exact(ScalarValue::Int32(Some(5)))),
862 Precision::Exact(ScalarValue::Int32(Some(30))),
863 );
864 assert_eq!(
865 precision.multiply(&Precision::Inexact(ScalarValue::Int32(Some(5)))),
866 Precision::Inexact(ScalarValue::Int32(Some(30))),
867 );
868 assert_eq!(
869 precision.multiply(&Precision::Exact(ScalarValue::Int32(None))),
870 Precision::Exact(ScalarValue::Int32(None)),
872 );
873 assert_eq!(precision.multiply(&Precision::Absent), Precision::Absent);
874 }
875
876 #[test]
877 fn test_cast_to() {
878 assert_eq!(
880 Precision::Exact(ScalarValue::Int32(Some(42)))
881 .cast_to(&DataType::Int64)
882 .unwrap(),
883 Precision::Exact(ScalarValue::Int64(Some(42))),
884 );
885 assert_eq!(
886 Precision::Inexact(ScalarValue::Int32(Some(42)))
887 .cast_to(&DataType::Int64)
888 .unwrap(),
889 Precision::Inexact(ScalarValue::Int64(Some(42))),
890 );
891 assert_eq!(
893 Precision::Exact(ScalarValue::Int32(None))
894 .cast_to(&DataType::Int64)
895 .unwrap(),
896 Precision::Exact(ScalarValue::Int64(None)),
897 );
898 assert!(Precision::Exact(ScalarValue::Int32(Some(256)))
900 .cast_to(&DataType::Int8)
901 .is_err());
902 }
903
904 #[test]
905 fn test_precision_cloning() {
906 let precision: Precision<usize> = Precision::Exact(42);
908 let p2 = precision;
909 assert_eq!(precision, p2);
910
911 let precision: Precision<ScalarValue> =
913 Precision::Exact(ScalarValue::Int64(Some(42)));
914 #[allow(clippy::redundant_clone)]
916 let p2 = precision.clone();
917 assert_eq!(precision, p2);
918 }
919
920 #[test]
921 fn test_project_none() {
922 let projection = None;
923 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
924 assert_eq!(stats, make_stats(vec![10, 20, 30]));
925 }
926
927 #[test]
928 fn test_project_empty() {
929 let projection = Some(vec![]);
930 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
931 assert_eq!(stats, make_stats(vec![]));
932 }
933
934 #[test]
935 fn test_project_swap() {
936 let projection = Some(vec![2, 1]);
937 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
938 assert_eq!(stats, make_stats(vec![30, 20]));
939 }
940
941 #[test]
942 fn test_project_repeated() {
943 let projection = Some(vec![1, 2, 1, 1, 0, 2]);
944 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
945 assert_eq!(stats, make_stats(vec![20, 30, 20, 20, 10, 30]));
946 }
947
948 fn make_stats(counts: impl IntoIterator<Item = usize>) -> Statistics {
950 Statistics {
951 num_rows: Precision::Exact(42),
952 total_byte_size: Precision::Exact(500),
953 column_statistics: counts.into_iter().map(col_stats_i64).collect(),
954 }
955 }
956
957 fn col_stats_i64(null_count: usize) -> ColumnStatistics {
958 ColumnStatistics {
959 null_count: Precision::Exact(null_count),
960 max_value: Precision::Exact(ScalarValue::Int64(Some(42))),
961 min_value: Precision::Exact(ScalarValue::Int64(Some(64))),
962 sum_value: Precision::Exact(ScalarValue::Int64(Some(4600))),
963 distinct_count: Precision::Exact(100),
964 }
965 }
966
967 #[test]
968 fn test_try_merge_basic() {
969 let schema = Arc::new(Schema::new(vec![
971 Field::new("col1", DataType::Int32, false),
972 Field::new("col2", DataType::Int32, false),
973 ]));
974
975 let stats1 = Statistics {
977 num_rows: Precision::Exact(10),
978 total_byte_size: Precision::Exact(100),
979 column_statistics: vec![
980 ColumnStatistics {
981 null_count: Precision::Exact(1),
982 max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
983 min_value: Precision::Exact(ScalarValue::Int32(Some(1))),
984 sum_value: Precision::Exact(ScalarValue::Int32(Some(500))),
985 distinct_count: Precision::Absent,
986 },
987 ColumnStatistics {
988 null_count: Precision::Exact(2),
989 max_value: Precision::Exact(ScalarValue::Int32(Some(200))),
990 min_value: Precision::Exact(ScalarValue::Int32(Some(10))),
991 sum_value: Precision::Exact(ScalarValue::Int32(Some(1000))),
992 distinct_count: Precision::Absent,
993 },
994 ],
995 };
996
997 let stats2 = Statistics {
998 num_rows: Precision::Exact(15),
999 total_byte_size: Precision::Exact(150),
1000 column_statistics: vec![
1001 ColumnStatistics {
1002 null_count: Precision::Exact(2),
1003 max_value: Precision::Exact(ScalarValue::Int32(Some(120))),
1004 min_value: Precision::Exact(ScalarValue::Int32(Some(-10))),
1005 sum_value: Precision::Exact(ScalarValue::Int32(Some(600))),
1006 distinct_count: Precision::Absent,
1007 },
1008 ColumnStatistics {
1009 null_count: Precision::Exact(3),
1010 max_value: Precision::Exact(ScalarValue::Int32(Some(180))),
1011 min_value: Precision::Exact(ScalarValue::Int32(Some(5))),
1012 sum_value: Precision::Exact(ScalarValue::Int32(Some(1200))),
1013 distinct_count: Precision::Absent,
1014 },
1015 ],
1016 };
1017
1018 let items = vec![stats1, stats2];
1019
1020 let summary_stats = Statistics::try_merge_iter(&items, &schema).unwrap();
1021
1022 assert_eq!(summary_stats.num_rows, Precision::Exact(25)); assert_eq!(summary_stats.total_byte_size, Precision::Exact(250)); let col1_stats = &summary_stats.column_statistics[0];
1028 assert_eq!(col1_stats.null_count, Precision::Exact(3)); assert_eq!(
1030 col1_stats.max_value,
1031 Precision::Exact(ScalarValue::Int32(Some(120)))
1032 );
1033 assert_eq!(
1034 col1_stats.min_value,
1035 Precision::Exact(ScalarValue::Int32(Some(-10)))
1036 );
1037 assert_eq!(
1038 col1_stats.sum_value,
1039 Precision::Exact(ScalarValue::Int32(Some(1100)))
1040 ); let col2_stats = &summary_stats.column_statistics[1];
1043 assert_eq!(col2_stats.null_count, Precision::Exact(5)); assert_eq!(
1045 col2_stats.max_value,
1046 Precision::Exact(ScalarValue::Int32(Some(200)))
1047 );
1048 assert_eq!(
1049 col2_stats.min_value,
1050 Precision::Exact(ScalarValue::Int32(Some(5)))
1051 );
1052 assert_eq!(
1053 col2_stats.sum_value,
1054 Precision::Exact(ScalarValue::Int32(Some(2200)))
1055 ); }
1057
1058 #[test]
1059 fn test_try_merge_mixed_precision() {
1060 let schema = Arc::new(Schema::new(vec![Field::new(
1062 "col1",
1063 DataType::Int32,
1064 false,
1065 )]));
1066
1067 let stats1 = Statistics {
1069 num_rows: Precision::Exact(10),
1070 total_byte_size: Precision::Inexact(100),
1071 column_statistics: vec![ColumnStatistics {
1072 null_count: Precision::Exact(1),
1073 max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
1074 min_value: Precision::Inexact(ScalarValue::Int32(Some(1))),
1075 sum_value: Precision::Exact(ScalarValue::Int32(Some(500))),
1076 distinct_count: Precision::Absent,
1077 }],
1078 };
1079
1080 let stats2 = Statistics {
1081 num_rows: Precision::Inexact(15),
1082 total_byte_size: Precision::Exact(150),
1083 column_statistics: vec![ColumnStatistics {
1084 null_count: Precision::Inexact(2),
1085 max_value: Precision::Inexact(ScalarValue::Int32(Some(120))),
1086 min_value: Precision::Exact(ScalarValue::Int32(Some(-10))),
1087 sum_value: Precision::Absent,
1088 distinct_count: Precision::Absent,
1089 }],
1090 };
1091
1092 let items = vec![stats1, stats2];
1093
1094 let summary_stats = Statistics::try_merge_iter(&items, &schema).unwrap();
1095
1096 assert_eq!(summary_stats.num_rows, Precision::Inexact(25));
1097 assert_eq!(summary_stats.total_byte_size, Precision::Inexact(250));
1098
1099 let col_stats = &summary_stats.column_statistics[0];
1100 assert_eq!(col_stats.null_count, Precision::Inexact(3));
1101 assert_eq!(
1102 col_stats.max_value,
1103 Precision::Inexact(ScalarValue::Int32(Some(120)))
1104 );
1105 assert_eq!(
1106 col_stats.min_value,
1107 Precision::Inexact(ScalarValue::Int32(Some(-10)))
1108 );
1109 assert!(matches!(col_stats.sum_value, Precision::Absent));
1110 }
1111
1112 #[test]
1113 fn test_try_merge_empty() {
1114 let schema = Arc::new(Schema::new(vec![Field::new(
1115 "col1",
1116 DataType::Int32,
1117 false,
1118 )]));
1119
1120 let items: Vec<Statistics> = vec![];
1122
1123 let summary_stats = Statistics::try_merge_iter(&items, &schema).unwrap();
1124
1125 assert_eq!(summary_stats.num_rows, Precision::Absent);
1127 assert_eq!(summary_stats.total_byte_size, Precision::Absent);
1128 assert_eq!(summary_stats.column_statistics.len(), 1);
1129 assert_eq!(
1130 summary_stats.column_statistics[0].null_count,
1131 Precision::Absent
1132 );
1133 }
1134
1135 #[test]
1136 fn test_try_merge_mismatched_size() {
1137 let schema = Arc::new(Schema::new(vec![Field::new(
1139 "col1",
1140 DataType::Int32,
1141 false,
1142 )]));
1143
1144 let stats1 = Statistics::default();
1146
1147 let stats2 =
1148 Statistics::default().add_column_statistics(ColumnStatistics::new_unknown());
1149
1150 let items = vec![stats1, stats2];
1151
1152 let e = Statistics::try_merge_iter(&items, &schema).unwrap_err();
1153 assert_contains!(e.to_string(), "Error during planning: Cannot merge statistics with different number of columns: 0 vs 1");
1154 }
1155}