1use crate::{error, proto};
19
20#[derive(Debug, Clone)]
23pub struct ColumnStatistics {
24 number_of_values: u64,
25 has_null: bool,
27 type_statistics: Option<TypeStatistics>,
28}
29
30impl ColumnStatistics {
31 pub fn number_of_values(&self) -> u64 {
32 self.number_of_values
33 }
34
35 pub fn has_null(&self) -> bool {
36 self.has_null
37 }
38
39 pub fn type_statistics(&self) -> Option<&TypeStatistics> {
40 self.type_statistics.as_ref()
41 }
42}
43
44#[derive(Debug, Clone)]
45pub enum TypeStatistics {
46 Integer {
48 min: i64,
49 max: i64,
50 sum: Option<i64>,
52 },
53 Double {
55 min: f64,
56 max: f64,
57 sum: Option<f64>,
59 },
60 String {
61 lower_bound: String,
62 upper_bound: String,
63 sum: i64,
65 is_exact_min: bool,
67 is_exact_max: bool,
69 },
70 Bucket { true_count: u64 },
72 Decimal {
73 min: String,
75 max: String,
76 sum: String,
77 },
78 Date {
79 min: i32,
81 max: i32,
82 },
83 Binary {
84 sum: i64,
86 },
87 Timestamp {
88 min: i64,
92 max: i64,
93 min_utc: i64,
95 max_utc: i64,
96 },
97 Collection {
98 min_children: u64,
99 max_children: u64,
100 total_children: u64,
101 },
102}
103
104impl TryFrom<&proto::ColumnStatistics> for ColumnStatistics {
105 type Error = error::OrcError;
106
107 fn try_from(value: &proto::ColumnStatistics) -> Result<Self, Self::Error> {
108 let type_statistics = if value.number_of_values() == 0 {
109 None
110 } else if let Some(stats) = &value.int_statistics {
111 Some(TypeStatistics::Integer {
112 min: stats.minimum(),
113 max: stats.maximum(),
114 sum: stats.sum,
115 })
116 } else if let Some(stats) = &value.double_statistics {
117 Some(TypeStatistics::Double {
118 min: stats.minimum(),
119 max: stats.maximum(),
120 sum: stats.sum,
121 })
122 } else if let Some(stats) = &value.string_statistics {
123 let (lower_bound, is_exact_min) = stats
124 .minimum
125 .as_deref()
126 .map(|s| (s, true))
127 .unwrap_or_else(|| (stats.lower_bound(), false));
128 let (upper_bound, is_exact_max) = stats
129 .maximum
130 .as_deref()
131 .map(|s| (s, true))
132 .unwrap_or_else(|| (stats.upper_bound(), false));
133 Some(TypeStatistics::String {
134 lower_bound: lower_bound.to_owned(),
135 upper_bound: upper_bound.to_owned(),
136 sum: stats.sum(),
137 is_exact_min,
138 is_exact_max,
139 })
140 } else if let Some(stats) = &value.bucket_statistics {
141 Some(TypeStatistics::Bucket {
143 true_count: stats.count[0], })
145 } else if let Some(stats) = &value.decimal_statistics {
146 Some(TypeStatistics::Decimal {
147 min: stats.minimum().to_owned(),
148 max: stats.maximum().to_owned(),
149 sum: stats.sum().to_owned(),
150 })
151 } else if let Some(stats) = &value.date_statistics {
152 Some(TypeStatistics::Date {
153 min: stats.minimum(),
154 max: stats.maximum(),
155 })
156 } else if let Some(stats) = &value.binary_statistics {
157 Some(TypeStatistics::Binary { sum: stats.sum() })
158 } else if let Some(stats) = &value.timestamp_statistics {
159 Some(TypeStatistics::Timestamp {
160 min: stats.minimum(),
161 max: stats.maximum(),
162 min_utc: stats.minimum_utc(),
163 max_utc: stats.maximum_utc(),
164 })
165 } else {
166 value
167 .collection_statistics
168 .as_ref()
169 .map(|stats| TypeStatistics::Collection {
170 min_children: stats.min_children(),
171 max_children: stats.max_children(),
172 total_children: stats.total_children(),
173 })
174 };
175 Ok(Self {
176 number_of_values: value.number_of_values(),
177 has_null: value.has_null(),
178 type_statistics,
179 })
180 }
181}