1use std::fmt::{self, Debug, Display};
21
22use crate::{Result, ScalarValue};
23
24use crate::error::_plan_err;
25use arrow::datatypes::{DataType, Schema};
26
27#[derive(Clone, PartialEq, Eq, Default, Copy)]
30pub enum Precision<T: Debug + Clone + PartialEq + Eq + PartialOrd> {
31 Exact(T),
33 Inexact(T),
35 #[default]
37 Absent,
38}
39
40impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Precision<T> {
41 pub fn get_value(&self) -> Option<&T> {
44 match self {
45 Precision::Exact(value) | Precision::Inexact(value) => Some(value),
46 Precision::Absent => None,
47 }
48 }
49
50 pub fn map<U, F>(self, f: F) -> Precision<U>
53 where
54 F: Fn(T) -> U,
55 U: Debug + Clone + PartialEq + Eq + PartialOrd,
56 {
57 match self {
58 Precision::Exact(val) => Precision::Exact(f(val)),
59 Precision::Inexact(val) => Precision::Inexact(f(val)),
60 _ => Precision::<U>::Absent,
61 }
62 }
63
64 pub fn is_exact(&self) -> Option<bool> {
67 match self {
68 Precision::Exact(_) => Some(true),
69 Precision::Inexact(_) => Some(false),
70 _ => None,
71 }
72 }
73
74 pub fn max(&self, other: &Precision<T>) -> Precision<T> {
78 match (self, other) {
79 (Precision::Exact(a), Precision::Exact(b)) => {
80 Precision::Exact(if a >= b { a.clone() } else { b.clone() })
81 }
82 (Precision::Inexact(a), Precision::Exact(b))
83 | (Precision::Exact(a), Precision::Inexact(b))
84 | (Precision::Inexact(a), Precision::Inexact(b)) => {
85 Precision::Inexact(if a >= b { a.clone() } else { b.clone() })
86 }
87 (_, _) => Precision::Absent,
88 }
89 }
90
91 pub fn min(&self, other: &Precision<T>) -> Precision<T> {
95 match (self, other) {
96 (Precision::Exact(a), Precision::Exact(b)) => {
97 Precision::Exact(if a >= b { b.clone() } else { a.clone() })
98 }
99 (Precision::Inexact(a), Precision::Exact(b))
100 | (Precision::Exact(a), Precision::Inexact(b))
101 | (Precision::Inexact(a), Precision::Inexact(b)) => {
102 Precision::Inexact(if a >= b { b.clone() } else { a.clone() })
103 }
104 (_, _) => Precision::Absent,
105 }
106 }
107
108 pub fn to_inexact(self) -> Self {
110 match self {
111 Precision::Exact(value) => Precision::Inexact(value),
112 _ => self,
113 }
114 }
115}
116
117impl Precision<usize> {
118 pub fn add(&self, other: &Precision<usize>) -> Precision<usize> {
122 match (self, other) {
123 (Precision::Exact(a), Precision::Exact(b)) => a.checked_add(*b).map_or_else(
124 || Precision::Inexact(a.saturating_add(*b)),
125 Precision::Exact,
126 ),
127 (Precision::Inexact(a), Precision::Exact(b))
128 | (Precision::Exact(a), Precision::Inexact(b))
129 | (Precision::Inexact(a), Precision::Inexact(b)) => {
130 Precision::Inexact(a.saturating_add(*b))
131 }
132 (_, _) => Precision::Absent,
133 }
134 }
135
136 pub fn sub(&self, other: &Precision<usize>) -> Precision<usize> {
140 match (self, other) {
141 (Precision::Exact(a), Precision::Exact(b)) => a.checked_sub(*b).map_or_else(
142 || Precision::Inexact(a.saturating_sub(*b)),
143 Precision::Exact,
144 ),
145 (Precision::Inexact(a), Precision::Exact(b))
146 | (Precision::Exact(a), Precision::Inexact(b))
147 | (Precision::Inexact(a), Precision::Inexact(b)) => {
148 Precision::Inexact(a.saturating_sub(*b))
149 }
150 (_, _) => Precision::Absent,
151 }
152 }
153
154 pub fn multiply(&self, other: &Precision<usize>) -> Precision<usize> {
158 match (self, other) {
159 (Precision::Exact(a), Precision::Exact(b)) => a.checked_mul(*b).map_or_else(
160 || Precision::Inexact(a.saturating_mul(*b)),
161 Precision::Exact,
162 ),
163 (Precision::Inexact(a), Precision::Exact(b))
164 | (Precision::Exact(a), Precision::Inexact(b))
165 | (Precision::Inexact(a), Precision::Inexact(b)) => {
166 Precision::Inexact(a.saturating_mul(*b))
167 }
168 (_, _) => Precision::Absent,
169 }
170 }
171
172 pub fn with_estimated_selectivity(self, selectivity: f64) -> Self {
177 self.map(|v| ((v as f64 * selectivity).ceil()) as usize)
178 .to_inexact()
179 }
180}
181
182impl Precision<ScalarValue> {
183 pub fn add(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
187 match (self, other) {
188 (Precision::Exact(a), Precision::Exact(b)) => {
189 a.add(b).map(Precision::Exact).unwrap_or(Precision::Absent)
190 }
191 (Precision::Inexact(a), Precision::Exact(b))
192 | (Precision::Exact(a), Precision::Inexact(b))
193 | (Precision::Inexact(a), Precision::Inexact(b)) => a
194 .add(b)
195 .map(Precision::Inexact)
196 .unwrap_or(Precision::Absent),
197 (_, _) => Precision::Absent,
198 }
199 }
200
201 pub fn sub(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
205 match (self, other) {
206 (Precision::Exact(a), Precision::Exact(b)) => {
207 a.sub(b).map(Precision::Exact).unwrap_or(Precision::Absent)
208 }
209 (Precision::Inexact(a), Precision::Exact(b))
210 | (Precision::Exact(a), Precision::Inexact(b))
211 | (Precision::Inexact(a), Precision::Inexact(b)) => a
212 .sub(b)
213 .map(Precision::Inexact)
214 .unwrap_or(Precision::Absent),
215 (_, _) => Precision::Absent,
216 }
217 }
218
219 pub fn multiply(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
223 match (self, other) {
224 (Precision::Exact(a), Precision::Exact(b)) => a
225 .mul_checked(b)
226 .map(Precision::Exact)
227 .unwrap_or(Precision::Absent),
228 (Precision::Inexact(a), Precision::Exact(b))
229 | (Precision::Exact(a), Precision::Inexact(b))
230 | (Precision::Inexact(a), Precision::Inexact(b)) => a
231 .mul_checked(b)
232 .map(Precision::Inexact)
233 .unwrap_or(Precision::Absent),
234 (_, _) => Precision::Absent,
235 }
236 }
237
238 pub fn cast_to(&self, data_type: &DataType) -> Result<Precision<ScalarValue>> {
240 match self {
241 Precision::Exact(value) => value.cast_to(data_type).map(Precision::Exact),
242 Precision::Inexact(value) => value.cast_to(data_type).map(Precision::Inexact),
243 Precision::Absent => Ok(Precision::Absent),
244 }
245 }
246}
247
248impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Debug for Precision<T> {
249 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
250 match self {
251 Precision::Exact(inner) => write!(f, "Exact({inner:?})"),
252 Precision::Inexact(inner) => write!(f, "Inexact({inner:?})"),
253 Precision::Absent => write!(f, "Absent"),
254 }
255 }
256}
257
258impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Display for Precision<T> {
259 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
260 match self {
261 Precision::Exact(inner) => write!(f, "Exact({inner:?})"),
262 Precision::Inexact(inner) => write!(f, "Inexact({inner:?})"),
263 Precision::Absent => write!(f, "Absent"),
264 }
265 }
266}
267
268impl From<Precision<usize>> for Precision<ScalarValue> {
269 fn from(value: Precision<usize>) -> Self {
270 match value {
271 Precision::Exact(v) => Precision::Exact(ScalarValue::UInt64(Some(v as u64))),
272 Precision::Inexact(v) => {
273 Precision::Inexact(ScalarValue::UInt64(Some(v as u64)))
274 }
275 Precision::Absent => Precision::Absent,
276 }
277 }
278}
279
280#[derive(Debug, Clone, PartialEq, Eq)]
285pub struct Statistics {
286 pub num_rows: Precision<usize>,
288 pub total_byte_size: Precision<usize>,
290 pub column_statistics: Vec<ColumnStatistics>,
295}
296
297impl Default for Statistics {
298 fn default() -> Self {
301 Self {
302 num_rows: Precision::Absent,
303 total_byte_size: Precision::Absent,
304 column_statistics: vec![],
305 }
306 }
307}
308
309impl Statistics {
310 pub fn new_unknown(schema: &Schema) -> Self {
313 Self {
314 num_rows: Precision::Absent,
315 total_byte_size: Precision::Absent,
316 column_statistics: Statistics::unknown_column(schema),
317 }
318 }
319
320 pub fn unknown_column(schema: &Schema) -> Vec<ColumnStatistics> {
322 schema
323 .fields()
324 .iter()
325 .map(|_| ColumnStatistics::new_unknown())
326 .collect()
327 }
328
329 pub fn with_num_rows(mut self, num_rows: Precision<usize>) -> Self {
331 self.num_rows = num_rows;
332 self
333 }
334
335 pub fn with_total_byte_size(mut self, total_byte_size: Precision<usize>) -> Self {
337 self.total_byte_size = total_byte_size;
338 self
339 }
340
341 pub fn add_column_statistics(mut self, column_stats: ColumnStatistics) -> Self {
343 self.column_statistics.push(column_stats);
344 self
345 }
346
347 pub fn to_inexact(mut self) -> Self {
350 self.num_rows = self.num_rows.to_inexact();
351 self.total_byte_size = self.total_byte_size.to_inexact();
352 self.column_statistics = self
353 .column_statistics
354 .into_iter()
355 .map(|s| s.to_inexact())
356 .collect();
357 self
358 }
359
360 pub fn project(mut self, projection: Option<&Vec<usize>>) -> Self {
366 let Some(projection) = projection else {
367 return self;
368 };
369
370 #[allow(clippy::large_enum_variant)]
371 enum Slot {
372 Taken(usize),
374 Present(ColumnStatistics),
376 }
377
378 let mut columns: Vec<_> = std::mem::take(&mut self.column_statistics)
380 .into_iter()
381 .map(Slot::Present)
382 .collect();
383
384 for idx in projection {
385 let next_idx = self.column_statistics.len();
386 let slot = std::mem::replace(
387 columns.get_mut(*idx).expect("projection out of bounds"),
388 Slot::Taken(next_idx),
389 );
390 match slot {
391 Slot::Present(col) => self.column_statistics.push(col),
393 Slot::Taken(prev_idx) => self
395 .column_statistics
396 .push(self.column_statistics[prev_idx].clone()),
397 }
398 }
399
400 self
401 }
402
403 pub fn with_fetch(
408 mut self,
409 fetch: Option<usize>,
410 skip: usize,
411 n_partitions: usize,
412 ) -> Result<Self> {
413 let fetch_val = fetch.unwrap_or(usize::MAX);
414
415 let num_rows_before = self.num_rows;
417
418 self.num_rows = match self {
419 Statistics {
420 num_rows: Precision::Exact(nr),
421 ..
422 }
423 | Statistics {
424 num_rows: Precision::Inexact(nr),
425 ..
426 } => {
427 if nr <= skip {
429 Precision::Exact(0)
431 } else if nr <= fetch_val && skip == 0 {
432 return Ok(self);
438 } else if nr - skip <= fetch_val {
439 check_num_rows(
443 (nr - skip).checked_mul(n_partitions),
444 self.num_rows.is_exact().unwrap(),
446 )
447 } else {
448 check_num_rows(
453 fetch_val.checked_mul(n_partitions),
454 self.num_rows.is_exact().unwrap(),
456 )
457 }
458 }
459 Statistics {
460 num_rows: Precision::Absent,
461 ..
462 } => check_num_rows(fetch.and_then(|v| v.checked_mul(n_partitions)), false),
463 };
464 let ratio: f64 = match (num_rows_before, self.num_rows) {
465 (
466 Precision::Exact(nr_before) | Precision::Inexact(nr_before),
467 Precision::Exact(nr_after) | Precision::Inexact(nr_after),
468 ) => {
469 if nr_before == 0 {
470 0.0
471 } else {
472 nr_after as f64 / nr_before as f64
473 }
474 }
475 _ => 0.0,
476 };
477 self.column_statistics = self
478 .column_statistics
479 .into_iter()
480 .map(ColumnStatistics::to_inexact)
481 .collect();
482 self.total_byte_size = match &self.total_byte_size {
484 Precision::Exact(n) | Precision::Inexact(n) => {
485 let adjusted = (*n as f64 * ratio) as usize;
486 Precision::Inexact(adjusted)
487 }
488 Precision::Absent => Precision::Absent,
489 };
490 Ok(self)
491 }
492
493 pub fn try_merge_iter<'a, I>(items: I, schema: &Schema) -> Result<Statistics>
500 where
501 I: IntoIterator<Item = &'a Statistics>,
502 {
503 let mut items = items.into_iter();
504
505 let Some(init) = items.next() else {
506 return Ok(Statistics::new_unknown(schema));
507 };
508 items.try_fold(init.clone(), |acc: Statistics, item_stats: &Statistics| {
509 acc.try_merge(item_stats)
510 })
511 }
512
513 pub fn try_merge(self, other: &Statistics) -> Result<Self> {
556 let Self {
557 mut num_rows,
558 mut total_byte_size,
559 mut column_statistics,
560 } = self;
561
562 num_rows = num_rows.add(&other.num_rows);
564 total_byte_size = total_byte_size.add(&other.total_byte_size);
565
566 if column_statistics.len() != other.column_statistics.len() {
567 return _plan_err!(
568 "Cannot merge statistics with different number of columns: {} vs {}",
569 column_statistics.len(),
570 other.column_statistics.len()
571 );
572 }
573
574 for (item_col_stats, col_stats) in other
575 .column_statistics
576 .iter()
577 .zip(column_statistics.iter_mut())
578 {
579 col_stats.null_count = col_stats.null_count.add(&item_col_stats.null_count);
580 col_stats.max_value = col_stats.max_value.max(&item_col_stats.max_value);
581 col_stats.min_value = col_stats.min_value.min(&item_col_stats.min_value);
582 col_stats.sum_value = col_stats.sum_value.add(&item_col_stats.sum_value);
583 col_stats.distinct_count = Precision::Absent;
584 }
585
586 Ok(Statistics {
587 num_rows,
588 total_byte_size,
589 column_statistics,
590 })
591 }
592}
593
594fn check_num_rows(value: Option<usize>, is_exact: bool) -> Precision<usize> {
597 if let Some(value) = value {
598 if is_exact {
599 Precision::Exact(value)
600 } else {
601 Precision::Inexact(value)
603 }
604 } else {
605 Precision::Absent
608 }
609}
610
611impl Display for Statistics {
612 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
613 let column_stats = self
615 .column_statistics
616 .iter()
617 .enumerate()
618 .map(|(i, cs)| {
619 let s = format!("(Col[{i}]:");
620 let s = if cs.min_value != Precision::Absent {
621 format!("{} Min={}", s, cs.min_value)
622 } else {
623 s
624 };
625 let s = if cs.max_value != Precision::Absent {
626 format!("{} Max={}", s, cs.max_value)
627 } else {
628 s
629 };
630 let s = if cs.sum_value != Precision::Absent {
631 format!("{} Sum={}", s, cs.sum_value)
632 } else {
633 s
634 };
635 let s = if cs.null_count != Precision::Absent {
636 format!("{} Null={}", s, cs.null_count)
637 } else {
638 s
639 };
640 let s = if cs.distinct_count != Precision::Absent {
641 format!("{} Distinct={}", s, cs.distinct_count)
642 } else {
643 s
644 };
645
646 s + ")"
647 })
648 .collect::<Vec<_>>()
649 .join(",");
650
651 write!(
652 f,
653 "Rows={}, Bytes={}, [{}]",
654 self.num_rows, self.total_byte_size, column_stats
655 )?;
656
657 Ok(())
658 }
659}
660
661#[derive(Clone, Debug, PartialEq, Eq, Default)]
663pub struct ColumnStatistics {
664 pub null_count: Precision<usize>,
666 pub max_value: Precision<ScalarValue>,
668 pub min_value: Precision<ScalarValue>,
670 pub sum_value: Precision<ScalarValue>,
672 pub distinct_count: Precision<usize>,
674}
675
676impl ColumnStatistics {
677 pub fn is_singleton(&self) -> bool {
679 match (&self.min_value, &self.max_value) {
680 (Precision::Exact(min), Precision::Exact(max)) => {
682 !min.is_null() && !max.is_null() && (min == max)
683 }
684 (_, _) => false,
685 }
686 }
687
688 pub fn new_unknown() -> Self {
690 Self {
691 null_count: Precision::Absent,
692 max_value: Precision::Absent,
693 min_value: Precision::Absent,
694 sum_value: Precision::Absent,
695 distinct_count: Precision::Absent,
696 }
697 }
698
699 pub fn with_null_count(mut self, null_count: Precision<usize>) -> Self {
701 self.null_count = null_count;
702 self
703 }
704
705 pub fn with_max_value(mut self, max_value: Precision<ScalarValue>) -> Self {
707 self.max_value = max_value;
708 self
709 }
710
711 pub fn with_min_value(mut self, min_value: Precision<ScalarValue>) -> Self {
713 self.min_value = min_value;
714 self
715 }
716
717 pub fn with_sum_value(mut self, sum_value: Precision<ScalarValue>) -> Self {
719 self.sum_value = sum_value;
720 self
721 }
722
723 pub fn with_distinct_count(mut self, distinct_count: Precision<usize>) -> Self {
725 self.distinct_count = distinct_count;
726 self
727 }
728
729 pub fn to_inexact(mut self) -> Self {
733 self.null_count = self.null_count.to_inexact();
734 self.max_value = self.max_value.to_inexact();
735 self.min_value = self.min_value.to_inexact();
736 self.sum_value = self.sum_value.to_inexact();
737 self.distinct_count = self.distinct_count.to_inexact();
738 self
739 }
740}
741
742#[cfg(test)]
743mod tests {
744 use super::*;
745 use crate::assert_contains;
746 use arrow::datatypes::Field;
747 use std::sync::Arc;
748
749 #[test]
750 fn test_get_value() {
751 let exact_precision = Precision::Exact(42);
752 let inexact_precision = Precision::Inexact(23);
753 let absent_precision = Precision::<i32>::Absent;
754
755 assert_eq!(*exact_precision.get_value().unwrap(), 42);
756 assert_eq!(*inexact_precision.get_value().unwrap(), 23);
757 assert_eq!(absent_precision.get_value(), None);
758 }
759
760 #[test]
761 fn test_map() {
762 let exact_precision = Precision::Exact(42);
763 let inexact_precision = Precision::Inexact(23);
764 let absent_precision = Precision::Absent;
765
766 let squared = |x| x * x;
767
768 assert_eq!(exact_precision.map(squared), Precision::Exact(1764));
769 assert_eq!(inexact_precision.map(squared), Precision::Inexact(529));
770 assert_eq!(absent_precision.map(squared), Precision::Absent);
771 }
772
773 #[test]
774 fn test_is_exact() {
775 let exact_precision = Precision::Exact(42);
776 let inexact_precision = Precision::Inexact(23);
777 let absent_precision = Precision::<i32>::Absent;
778
779 assert_eq!(exact_precision.is_exact(), Some(true));
780 assert_eq!(inexact_precision.is_exact(), Some(false));
781 assert_eq!(absent_precision.is_exact(), None);
782 }
783
784 #[test]
785 fn test_max() {
786 let precision1 = Precision::Exact(42);
787 let precision2 = Precision::Inexact(23);
788 let precision3 = Precision::Exact(30);
789 let absent_precision = Precision::Absent;
790
791 assert_eq!(precision1.max(&precision2), Precision::Inexact(42));
792 assert_eq!(precision1.max(&precision3), Precision::Exact(42));
793 assert_eq!(precision2.max(&precision3), Precision::Inexact(30));
794 assert_eq!(precision1.max(&absent_precision), Precision::Absent);
795 }
796
797 #[test]
798 fn test_min() {
799 let precision1 = Precision::Exact(42);
800 let precision2 = Precision::Inexact(23);
801 let precision3 = Precision::Exact(30);
802 let absent_precision = Precision::Absent;
803
804 assert_eq!(precision1.min(&precision2), Precision::Inexact(23));
805 assert_eq!(precision1.min(&precision3), Precision::Exact(30));
806 assert_eq!(precision2.min(&precision3), Precision::Inexact(23));
807 assert_eq!(precision1.min(&absent_precision), Precision::Absent);
808 }
809
810 #[test]
811 fn test_to_inexact() {
812 let exact_precision = Precision::Exact(42);
813 let inexact_precision = Precision::Inexact(42);
814 let absent_precision = Precision::<i32>::Absent;
815
816 assert_eq!(exact_precision.to_inexact(), inexact_precision);
817 assert_eq!(inexact_precision.to_inexact(), inexact_precision);
818 assert_eq!(absent_precision.to_inexact(), absent_precision);
819 }
820
821 #[test]
822 fn test_add() {
823 let precision1 = Precision::Exact(42);
824 let precision2 = Precision::Inexact(23);
825 let precision3 = Precision::Exact(30);
826 let absent_precision = Precision::Absent;
827 let precision_max_exact = Precision::Exact(usize::MAX);
828 let precision_max_inexact = Precision::Exact(usize::MAX);
829
830 assert_eq!(precision1.add(&precision2), Precision::Inexact(65));
831 assert_eq!(precision1.add(&precision3), Precision::Exact(72));
832 assert_eq!(precision2.add(&precision3), Precision::Inexact(53));
833 assert_eq!(precision1.add(&absent_precision), Precision::Absent);
834 assert_eq!(
835 precision_max_exact.add(&precision1),
836 Precision::Inexact(usize::MAX)
837 );
838 assert_eq!(
839 precision_max_inexact.add(&precision1),
840 Precision::Inexact(usize::MAX)
841 );
842 }
843
844 #[test]
845 fn test_add_scalar() {
846 let precision = Precision::Exact(ScalarValue::Int32(Some(42)));
847
848 assert_eq!(
849 precision.add(&Precision::Exact(ScalarValue::Int32(Some(23)))),
850 Precision::Exact(ScalarValue::Int32(Some(65))),
851 );
852 assert_eq!(
853 precision.add(&Precision::Inexact(ScalarValue::Int32(Some(23)))),
854 Precision::Inexact(ScalarValue::Int32(Some(65))),
855 );
856 assert_eq!(
857 precision.add(&Precision::Exact(ScalarValue::Int32(None))),
858 Precision::Exact(ScalarValue::Int32(None)),
860 );
861 assert_eq!(precision.add(&Precision::Absent), Precision::Absent);
862 }
863
864 #[test]
865 fn test_sub() {
866 let precision1 = Precision::Exact(42);
867 let precision2 = Precision::Inexact(23);
868 let precision3 = Precision::Exact(30);
869 let absent_precision = Precision::Absent;
870
871 assert_eq!(precision1.sub(&precision2), Precision::Inexact(19));
872 assert_eq!(precision1.sub(&precision3), Precision::Exact(12));
873 assert_eq!(precision2.sub(&precision1), Precision::Inexact(0));
874 assert_eq!(precision3.sub(&precision1), Precision::Inexact(0));
875 assert_eq!(precision1.sub(&absent_precision), Precision::Absent);
876 }
877
878 #[test]
879 fn test_sub_scalar() {
880 let precision = Precision::Exact(ScalarValue::Int32(Some(42)));
881
882 assert_eq!(
883 precision.sub(&Precision::Exact(ScalarValue::Int32(Some(23)))),
884 Precision::Exact(ScalarValue::Int32(Some(19))),
885 );
886 assert_eq!(
887 precision.sub(&Precision::Inexact(ScalarValue::Int32(Some(23)))),
888 Precision::Inexact(ScalarValue::Int32(Some(19))),
889 );
890 assert_eq!(
891 precision.sub(&Precision::Exact(ScalarValue::Int32(None))),
892 Precision::Exact(ScalarValue::Int32(None)),
894 );
895 assert_eq!(precision.sub(&Precision::Absent), Precision::Absent);
896 }
897
898 #[test]
899 fn test_multiply() {
900 let precision1 = Precision::Exact(6);
901 let precision2 = Precision::Inexact(3);
902 let precision3 = Precision::Exact(5);
903 let precision_max_exact = Precision::Exact(usize::MAX);
904 let precision_max_inexact = Precision::Exact(usize::MAX);
905 let absent_precision = Precision::Absent;
906
907 assert_eq!(precision1.multiply(&precision2), Precision::Inexact(18));
908 assert_eq!(precision1.multiply(&precision3), Precision::Exact(30));
909 assert_eq!(precision2.multiply(&precision3), Precision::Inexact(15));
910 assert_eq!(precision1.multiply(&absent_precision), Precision::Absent);
911 assert_eq!(
912 precision_max_exact.multiply(&precision1),
913 Precision::Inexact(usize::MAX)
914 );
915 assert_eq!(
916 precision_max_inexact.multiply(&precision1),
917 Precision::Inexact(usize::MAX)
918 );
919 }
920
921 #[test]
922 fn test_multiply_scalar() {
923 let precision = Precision::Exact(ScalarValue::Int32(Some(6)));
924
925 assert_eq!(
926 precision.multiply(&Precision::Exact(ScalarValue::Int32(Some(5)))),
927 Precision::Exact(ScalarValue::Int32(Some(30))),
928 );
929 assert_eq!(
930 precision.multiply(&Precision::Inexact(ScalarValue::Int32(Some(5)))),
931 Precision::Inexact(ScalarValue::Int32(Some(30))),
932 );
933 assert_eq!(
934 precision.multiply(&Precision::Exact(ScalarValue::Int32(None))),
935 Precision::Exact(ScalarValue::Int32(None)),
937 );
938 assert_eq!(precision.multiply(&Precision::Absent), Precision::Absent);
939 }
940
941 #[test]
942 fn test_cast_to() {
943 assert_eq!(
945 Precision::Exact(ScalarValue::Int32(Some(42)))
946 .cast_to(&DataType::Int64)
947 .unwrap(),
948 Precision::Exact(ScalarValue::Int64(Some(42))),
949 );
950 assert_eq!(
951 Precision::Inexact(ScalarValue::Int32(Some(42)))
952 .cast_to(&DataType::Int64)
953 .unwrap(),
954 Precision::Inexact(ScalarValue::Int64(Some(42))),
955 );
956 assert_eq!(
958 Precision::Exact(ScalarValue::Int32(None))
959 .cast_to(&DataType::Int64)
960 .unwrap(),
961 Precision::Exact(ScalarValue::Int64(None)),
962 );
963 assert!(Precision::Exact(ScalarValue::Int32(Some(256)))
965 .cast_to(&DataType::Int8)
966 .is_err());
967 }
968
969 #[test]
970 fn test_precision_cloning() {
971 let precision: Precision<usize> = Precision::Exact(42);
973 let p2 = precision;
974 assert_eq!(precision, p2);
975
976 let precision: Precision<ScalarValue> =
978 Precision::Exact(ScalarValue::Int64(Some(42)));
979 #[allow(clippy::redundant_clone)]
981 let p2 = precision.clone();
982 assert_eq!(precision, p2);
983 }
984
985 #[test]
986 fn test_project_none() {
987 let projection = None;
988 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
989 assert_eq!(stats, make_stats(vec![10, 20, 30]));
990 }
991
992 #[test]
993 fn test_project_empty() {
994 let projection = Some(vec![]);
995 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
996 assert_eq!(stats, make_stats(vec![]));
997 }
998
999 #[test]
1000 fn test_project_swap() {
1001 let projection = Some(vec![2, 1]);
1002 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
1003 assert_eq!(stats, make_stats(vec![30, 20]));
1004 }
1005
1006 #[test]
1007 fn test_project_repeated() {
1008 let projection = Some(vec![1, 2, 1, 1, 0, 2]);
1009 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
1010 assert_eq!(stats, make_stats(vec![20, 30, 20, 20, 10, 30]));
1011 }
1012
1013 fn make_stats(counts: impl IntoIterator<Item = usize>) -> Statistics {
1015 Statistics {
1016 num_rows: Precision::Exact(42),
1017 total_byte_size: Precision::Exact(500),
1018 column_statistics: counts.into_iter().map(col_stats_i64).collect(),
1019 }
1020 }
1021
1022 fn col_stats_i64(null_count: usize) -> ColumnStatistics {
1023 ColumnStatistics {
1024 null_count: Precision::Exact(null_count),
1025 max_value: Precision::Exact(ScalarValue::Int64(Some(42))),
1026 min_value: Precision::Exact(ScalarValue::Int64(Some(64))),
1027 sum_value: Precision::Exact(ScalarValue::Int64(Some(4600))),
1028 distinct_count: Precision::Exact(100),
1029 }
1030 }
1031
1032 #[test]
1033 fn test_try_merge_basic() {
1034 let schema = Arc::new(Schema::new(vec![
1036 Field::new("col1", DataType::Int32, false),
1037 Field::new("col2", DataType::Int32, false),
1038 ]));
1039
1040 let stats1 = Statistics {
1042 num_rows: Precision::Exact(10),
1043 total_byte_size: Precision::Exact(100),
1044 column_statistics: vec![
1045 ColumnStatistics {
1046 null_count: Precision::Exact(1),
1047 max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
1048 min_value: Precision::Exact(ScalarValue::Int32(Some(1))),
1049 sum_value: Precision::Exact(ScalarValue::Int32(Some(500))),
1050 distinct_count: Precision::Absent,
1051 },
1052 ColumnStatistics {
1053 null_count: Precision::Exact(2),
1054 max_value: Precision::Exact(ScalarValue::Int32(Some(200))),
1055 min_value: Precision::Exact(ScalarValue::Int32(Some(10))),
1056 sum_value: Precision::Exact(ScalarValue::Int32(Some(1000))),
1057 distinct_count: Precision::Absent,
1058 },
1059 ],
1060 };
1061
1062 let stats2 = Statistics {
1063 num_rows: Precision::Exact(15),
1064 total_byte_size: Precision::Exact(150),
1065 column_statistics: vec![
1066 ColumnStatistics {
1067 null_count: Precision::Exact(2),
1068 max_value: Precision::Exact(ScalarValue::Int32(Some(120))),
1069 min_value: Precision::Exact(ScalarValue::Int32(Some(-10))),
1070 sum_value: Precision::Exact(ScalarValue::Int32(Some(600))),
1071 distinct_count: Precision::Absent,
1072 },
1073 ColumnStatistics {
1074 null_count: Precision::Exact(3),
1075 max_value: Precision::Exact(ScalarValue::Int32(Some(180))),
1076 min_value: Precision::Exact(ScalarValue::Int32(Some(5))),
1077 sum_value: Precision::Exact(ScalarValue::Int32(Some(1200))),
1078 distinct_count: Precision::Absent,
1079 },
1080 ],
1081 };
1082
1083 let items = vec![stats1, stats2];
1084
1085 let summary_stats = Statistics::try_merge_iter(&items, &schema).unwrap();
1086
1087 assert_eq!(summary_stats.num_rows, Precision::Exact(25)); assert_eq!(summary_stats.total_byte_size, Precision::Exact(250)); let col1_stats = &summary_stats.column_statistics[0];
1093 assert_eq!(col1_stats.null_count, Precision::Exact(3)); assert_eq!(
1095 col1_stats.max_value,
1096 Precision::Exact(ScalarValue::Int32(Some(120)))
1097 );
1098 assert_eq!(
1099 col1_stats.min_value,
1100 Precision::Exact(ScalarValue::Int32(Some(-10)))
1101 );
1102 assert_eq!(
1103 col1_stats.sum_value,
1104 Precision::Exact(ScalarValue::Int32(Some(1100)))
1105 ); let col2_stats = &summary_stats.column_statistics[1];
1108 assert_eq!(col2_stats.null_count, Precision::Exact(5)); assert_eq!(
1110 col2_stats.max_value,
1111 Precision::Exact(ScalarValue::Int32(Some(200)))
1112 );
1113 assert_eq!(
1114 col2_stats.min_value,
1115 Precision::Exact(ScalarValue::Int32(Some(5)))
1116 );
1117 assert_eq!(
1118 col2_stats.sum_value,
1119 Precision::Exact(ScalarValue::Int32(Some(2200)))
1120 ); }
1122
1123 #[test]
1124 fn test_try_merge_mixed_precision() {
1125 let schema = Arc::new(Schema::new(vec![Field::new(
1127 "col1",
1128 DataType::Int32,
1129 false,
1130 )]));
1131
1132 let stats1 = Statistics {
1134 num_rows: Precision::Exact(10),
1135 total_byte_size: Precision::Inexact(100),
1136 column_statistics: vec![ColumnStatistics {
1137 null_count: Precision::Exact(1),
1138 max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
1139 min_value: Precision::Inexact(ScalarValue::Int32(Some(1))),
1140 sum_value: Precision::Exact(ScalarValue::Int32(Some(500))),
1141 distinct_count: Precision::Absent,
1142 }],
1143 };
1144
1145 let stats2 = Statistics {
1146 num_rows: Precision::Inexact(15),
1147 total_byte_size: Precision::Exact(150),
1148 column_statistics: vec![ColumnStatistics {
1149 null_count: Precision::Inexact(2),
1150 max_value: Precision::Inexact(ScalarValue::Int32(Some(120))),
1151 min_value: Precision::Exact(ScalarValue::Int32(Some(-10))),
1152 sum_value: Precision::Absent,
1153 distinct_count: Precision::Absent,
1154 }],
1155 };
1156
1157 let items = vec![stats1, stats2];
1158
1159 let summary_stats = Statistics::try_merge_iter(&items, &schema).unwrap();
1160
1161 assert_eq!(summary_stats.num_rows, Precision::Inexact(25));
1162 assert_eq!(summary_stats.total_byte_size, Precision::Inexact(250));
1163
1164 let col_stats = &summary_stats.column_statistics[0];
1165 assert_eq!(col_stats.null_count, Precision::Inexact(3));
1166 assert_eq!(
1167 col_stats.max_value,
1168 Precision::Inexact(ScalarValue::Int32(Some(120)))
1169 );
1170 assert_eq!(
1171 col_stats.min_value,
1172 Precision::Inexact(ScalarValue::Int32(Some(-10)))
1173 );
1174 assert!(matches!(col_stats.sum_value, Precision::Absent));
1175 }
1176
1177 #[test]
1178 fn test_try_merge_empty() {
1179 let schema = Arc::new(Schema::new(vec![Field::new(
1180 "col1",
1181 DataType::Int32,
1182 false,
1183 )]));
1184
1185 let items: Vec<Statistics> = vec![];
1187
1188 let summary_stats = Statistics::try_merge_iter(&items, &schema).unwrap();
1189
1190 assert_eq!(summary_stats.num_rows, Precision::Absent);
1192 assert_eq!(summary_stats.total_byte_size, Precision::Absent);
1193 assert_eq!(summary_stats.column_statistics.len(), 1);
1194 assert_eq!(
1195 summary_stats.column_statistics[0].null_count,
1196 Precision::Absent
1197 );
1198 }
1199
1200 #[test]
1201 fn test_try_merge_mismatched_size() {
1202 let schema = Arc::new(Schema::new(vec![Field::new(
1204 "col1",
1205 DataType::Int32,
1206 false,
1207 )]));
1208
1209 let stats1 = Statistics::default();
1211
1212 let stats2 =
1213 Statistics::default().add_column_statistics(ColumnStatistics::new_unknown());
1214
1215 let items = vec![stats1, stats2];
1216
1217 let e = Statistics::try_merge_iter(&items, &schema).unwrap_err();
1218 assert_contains!(e.to_string(), "Error during planning: Cannot merge statistics with different number of columns: 0 vs 1");
1219 }
1220
1221 #[test]
1222 fn test_try_merge_distinct_count_absent() {
1223 let stats1 = Statistics::default()
1225 .with_num_rows(Precision::Exact(10))
1226 .with_total_byte_size(Precision::Exact(100))
1227 .add_column_statistics(
1228 ColumnStatistics::new_unknown()
1229 .with_null_count(Precision::Exact(0))
1230 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(1))))
1231 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(10))))
1232 .with_distinct_count(Precision::Exact(5)),
1233 );
1234
1235 let stats2 = Statistics::default()
1236 .with_num_rows(Precision::Exact(15))
1237 .with_total_byte_size(Precision::Exact(150))
1238 .add_column_statistics(
1239 ColumnStatistics::new_unknown()
1240 .with_null_count(Precision::Exact(0))
1241 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(5))))
1242 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(20))))
1243 .with_distinct_count(Precision::Exact(7)),
1244 );
1245
1246 let merged_stats = stats1.try_merge(&stats2).unwrap();
1248
1249 assert_eq!(merged_stats.num_rows, Precision::Exact(25));
1251 assert_eq!(merged_stats.total_byte_size, Precision::Exact(250));
1252
1253 let col_stats = &merged_stats.column_statistics[0];
1254 assert_eq!(col_stats.null_count, Precision::Exact(0));
1255 assert_eq!(
1256 col_stats.min_value,
1257 Precision::Exact(ScalarValue::Int32(Some(1)))
1258 );
1259 assert_eq!(
1260 col_stats.max_value,
1261 Precision::Exact(ScalarValue::Int32(Some(20)))
1262 );
1263 assert_eq!(col_stats.distinct_count, Precision::Absent);
1265 }
1266
1267 #[test]
1268 fn test_with_fetch_basic_preservation() {
1269 let original_stats = Statistics {
1271 num_rows: Precision::Exact(1000),
1272 total_byte_size: Precision::Exact(8000),
1273 column_statistics: vec![
1274 ColumnStatistics {
1275 null_count: Precision::Exact(10),
1276 max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
1277 min_value: Precision::Exact(ScalarValue::Int32(Some(0))),
1278 sum_value: Precision::Exact(ScalarValue::Int32(Some(5050))),
1279 distinct_count: Precision::Exact(50),
1280 },
1281 ColumnStatistics {
1282 null_count: Precision::Exact(20),
1283 max_value: Precision::Exact(ScalarValue::Int64(Some(200))),
1284 min_value: Precision::Exact(ScalarValue::Int64(Some(10))),
1285 sum_value: Precision::Exact(ScalarValue::Int64(Some(10100))),
1286 distinct_count: Precision::Exact(75),
1287 },
1288 ],
1289 };
1290
1291 let result = original_stats.clone().with_fetch(Some(100), 0, 1).unwrap();
1293
1294 assert_eq!(result.num_rows, Precision::Exact(100));
1296
1297 assert_eq!(result.total_byte_size, Precision::Inexact(800));
1300
1301 assert_eq!(result.column_statistics.len(), 2);
1303
1304 assert_eq!(
1306 result.column_statistics[0].null_count,
1307 Precision::Inexact(10)
1308 );
1309 assert_eq!(
1310 result.column_statistics[0].max_value,
1311 Precision::Inexact(ScalarValue::Int32(Some(100)))
1312 );
1313 assert_eq!(
1314 result.column_statistics[0].min_value,
1315 Precision::Inexact(ScalarValue::Int32(Some(0)))
1316 );
1317 assert_eq!(
1318 result.column_statistics[0].sum_value,
1319 Precision::Inexact(ScalarValue::Int32(Some(5050)))
1320 );
1321 assert_eq!(
1322 result.column_statistics[0].distinct_count,
1323 Precision::Inexact(50)
1324 );
1325
1326 assert_eq!(
1328 result.column_statistics[1].null_count,
1329 Precision::Inexact(20)
1330 );
1331 assert_eq!(
1332 result.column_statistics[1].max_value,
1333 Precision::Inexact(ScalarValue::Int64(Some(200)))
1334 );
1335 assert_eq!(
1336 result.column_statistics[1].min_value,
1337 Precision::Inexact(ScalarValue::Int64(Some(10)))
1338 );
1339 assert_eq!(
1340 result.column_statistics[1].sum_value,
1341 Precision::Inexact(ScalarValue::Int64(Some(10100)))
1342 );
1343 assert_eq!(
1344 result.column_statistics[1].distinct_count,
1345 Precision::Inexact(75)
1346 );
1347 }
1348
1349 #[test]
1350 fn test_with_fetch_inexact_input() {
1351 let original_stats = Statistics {
1353 num_rows: Precision::Inexact(1000),
1354 total_byte_size: Precision::Inexact(8000),
1355 column_statistics: vec![ColumnStatistics {
1356 null_count: Precision::Inexact(10),
1357 max_value: Precision::Inexact(ScalarValue::Int32(Some(100))),
1358 min_value: Precision::Inexact(ScalarValue::Int32(Some(0))),
1359 sum_value: Precision::Inexact(ScalarValue::Int32(Some(5050))),
1360 distinct_count: Precision::Inexact(50),
1361 }],
1362 };
1363
1364 let result = original_stats.clone().with_fetch(Some(500), 0, 1).unwrap();
1365
1366 assert_eq!(result.num_rows, Precision::Inexact(500));
1368
1369 assert_eq!(result.total_byte_size, Precision::Inexact(4000));
1372
1373 assert_eq!(
1375 result.column_statistics[0].null_count,
1376 Precision::Inexact(10)
1377 );
1378 }
1379
1380 #[test]
1381 fn test_with_fetch_skip_all_rows() {
1382 let original_stats = Statistics {
1384 num_rows: Precision::Exact(100),
1385 total_byte_size: Precision::Exact(800),
1386 column_statistics: vec![col_stats_i64(10)],
1387 };
1388
1389 let result = original_stats.clone().with_fetch(Some(50), 100, 1).unwrap();
1390
1391 assert_eq!(result.num_rows, Precision::Exact(0));
1392 assert_eq!(result.total_byte_size, Precision::Inexact(0));
1394 }
1395
1396 #[test]
1397 fn test_with_fetch_no_limit() {
1398 let original_stats = Statistics {
1400 num_rows: Precision::Exact(100),
1401 total_byte_size: Precision::Exact(800),
1402 column_statistics: vec![col_stats_i64(10)],
1403 };
1404
1405 let result = original_stats.clone().with_fetch(None, 0, 1).unwrap();
1406
1407 assert_eq!(result.num_rows, Precision::Exact(100));
1409 assert_eq!(result.total_byte_size, Precision::Exact(800));
1410 }
1411
1412 #[test]
1413 fn test_with_fetch_with_skip() {
1414 let original_stats = Statistics {
1416 num_rows: Precision::Exact(1000),
1417 total_byte_size: Precision::Exact(8000),
1418 column_statistics: vec![col_stats_i64(10)],
1419 };
1420
1421 let result = original_stats
1423 .clone()
1424 .with_fetch(Some(300), 200, 1)
1425 .unwrap();
1426
1427 assert_eq!(result.num_rows, Precision::Exact(300));
1428 assert_eq!(result.total_byte_size, Precision::Inexact(2400));
1430 }
1431
1432 #[test]
1433 fn test_with_fetch_multi_partition() {
1434 let original_stats = Statistics {
1436 num_rows: Precision::Exact(1000), total_byte_size: Precision::Exact(8000),
1438 column_statistics: vec![col_stats_i64(10)],
1439 };
1440
1441 let result = original_stats.clone().with_fetch(Some(100), 0, 4).unwrap();
1443
1444 assert_eq!(result.num_rows, Precision::Exact(400));
1445 assert_eq!(result.total_byte_size, Precision::Inexact(3200));
1447 }
1448
1449 #[test]
1450 fn test_with_fetch_absent_stats() {
1451 let original_stats = Statistics {
1453 num_rows: Precision::Absent,
1454 total_byte_size: Precision::Absent,
1455 column_statistics: vec![ColumnStatistics {
1456 null_count: Precision::Absent,
1457 max_value: Precision::Absent,
1458 min_value: Precision::Absent,
1459 sum_value: Precision::Absent,
1460 distinct_count: Precision::Absent,
1461 }],
1462 };
1463
1464 let result = original_stats.clone().with_fetch(Some(100), 0, 1).unwrap();
1465
1466 assert_eq!(result.num_rows, Precision::Inexact(100));
1468 assert_eq!(result.total_byte_size, Precision::Absent);
1469 assert_eq!(result.column_statistics[0].null_count, Precision::Absent);
1471 }
1472
1473 #[test]
1474 fn test_with_fetch_fetch_exceeds_rows() {
1475 let original_stats = Statistics {
1477 num_rows: Precision::Exact(100),
1478 total_byte_size: Precision::Exact(800),
1479 column_statistics: vec![col_stats_i64(10)],
1480 };
1481
1482 let result = original_stats.clone().with_fetch(Some(100), 50, 1).unwrap();
1484
1485 assert_eq!(result.num_rows, Precision::Exact(50));
1486 assert_eq!(result.total_byte_size, Precision::Inexact(400));
1488 }
1489
1490 #[test]
1491 fn test_with_fetch_preserves_all_column_stats() {
1492 let original_col_stats = ColumnStatistics {
1494 null_count: Precision::Exact(42),
1495 max_value: Precision::Exact(ScalarValue::Int32(Some(999))),
1496 min_value: Precision::Exact(ScalarValue::Int32(Some(-100))),
1497 sum_value: Precision::Exact(ScalarValue::Int32(Some(123456))),
1498 distinct_count: Precision::Exact(789),
1499 };
1500
1501 let original_stats = Statistics {
1502 num_rows: Precision::Exact(1000),
1503 total_byte_size: Precision::Exact(8000),
1504 column_statistics: vec![original_col_stats.clone()],
1505 };
1506
1507 let result = original_stats.with_fetch(Some(250), 0, 1).unwrap();
1508
1509 let result_col_stats = &result.column_statistics[0];
1510
1511 assert_eq!(result_col_stats.null_count, Precision::Inexact(42));
1513 assert_eq!(
1514 result_col_stats.max_value,
1515 Precision::Inexact(ScalarValue::Int32(Some(999)))
1516 );
1517 assert_eq!(
1518 result_col_stats.min_value,
1519 Precision::Inexact(ScalarValue::Int32(Some(-100)))
1520 );
1521 assert_eq!(
1522 result_col_stats.sum_value,
1523 Precision::Inexact(ScalarValue::Int32(Some(123456)))
1524 );
1525 assert_eq!(result_col_stats.distinct_count, Precision::Inexact(789));
1526 }
1527}