vortex_array/expr/stats/
mod.rs1use std::fmt::Debug;
5use std::fmt::Display;
6use std::fmt::Formatter;
7
8use enum_iterator::Sequence;
9use enum_iterator::all;
10use num_enum::IntoPrimitive;
11use num_enum::TryFromPrimitive;
12use vortex_dtype::DType;
13use vortex_dtype::DecimalDType;
14use vortex_dtype::MAX_PRECISION;
15use vortex_dtype::Nullability::NonNullable;
16use vortex_dtype::Nullability::Nullable;
17use vortex_dtype::PType;
18
19mod bound;
20mod precision;
21mod provider;
22mod stat_bound;
23
24pub use bound::*;
25pub use precision::*;
26pub use provider::*;
27pub use stat_bound::*;
28
29#[derive(
30 Debug,
31 Clone,
32 Copy,
33 PartialEq,
34 Eq,
35 PartialOrd,
36 Ord,
37 Hash,
38 Sequence,
39 IntoPrimitive,
40 TryFromPrimitive,
41)]
42#[repr(u8)]
43pub enum Stat {
44 IsConstant = 0,
47 IsSorted = 1,
49 IsStrictSorted = 2,
51 Max = 3,
53 Min = 4,
55 Sum = 5,
57 NullCount = 6,
59 UncompressedSizeInBytes = 7,
61 NaNCount = 8,
63}
64
65pub struct Max;
68
69pub struct Min;
70
71pub struct Sum;
72
73pub struct IsConstant;
74
75pub struct IsSorted;
76
77pub struct IsStrictSorted;
78
79pub struct NullCount;
80
81pub struct UncompressedSizeInBytes;
82
83pub struct NaNCount;
84
85impl StatType<bool> for IsConstant {
86 type Bound = Precision<bool>;
87
88 const STAT: Stat = Stat::IsConstant;
89}
90
91impl StatType<bool> for IsSorted {
92 type Bound = Precision<bool>;
93
94 const STAT: Stat = Stat::IsSorted;
95}
96
97impl StatType<bool> for IsStrictSorted {
98 type Bound = Precision<bool>;
99
100 const STAT: Stat = Stat::IsStrictSorted;
101}
102
103impl<T: PartialOrd + Clone> StatType<T> for NullCount {
104 type Bound = UpperBound<T>;
105
106 const STAT: Stat = Stat::NullCount;
107}
108
109impl<T: PartialOrd + Clone> StatType<T> for UncompressedSizeInBytes {
110 type Bound = UpperBound<T>;
111
112 const STAT: Stat = Stat::UncompressedSizeInBytes;
113}
114
115impl<T: PartialOrd + Clone + Debug> StatType<T> for Max {
116 type Bound = UpperBound<T>;
117
118 const STAT: Stat = Stat::Max;
119}
120
121impl<T: PartialOrd + Clone + Debug> StatType<T> for Min {
122 type Bound = LowerBound<T>;
123
124 const STAT: Stat = Stat::Min;
125}
126
127impl<T: PartialOrd + Clone + Debug> StatType<T> for Sum {
128 type Bound = Precision<T>;
129
130 const STAT: Stat = Stat::Sum;
131}
132
133impl<T: PartialOrd + Clone> StatType<T> for NaNCount {
134 type Bound = UpperBound<T>;
135
136 const STAT: Stat = Stat::NaNCount;
137}
138
139impl Stat {
140 pub fn is_commutative(&self) -> bool {
143 match self {
145 Self::IsConstant
146 | Self::Max
147 | Self::Min
148 | Self::NullCount
149 | Self::Sum
150 | Self::NaNCount
151 | Self::UncompressedSizeInBytes => true,
152 Self::IsSorted | Self::IsStrictSorted => false,
153 }
154 }
155
156 pub fn has_same_dtype_as_array(&self) -> bool {
158 matches!(self, Stat::Min | Stat::Max)
159 }
160
161 pub fn dtype(&self, data_type: &DType) -> Option<DType> {
163 Some(match self {
164 Self::IsConstant => DType::Bool(NonNullable),
165 Self::IsSorted => DType::Bool(NonNullable),
166 Self::IsStrictSorted => DType::Bool(NonNullable),
167 Self::Max if matches!(data_type, DType::Null) => return None,
168 Self::Max => data_type.clone(),
169 Self::Min if matches!(data_type, DType::Null) => return None,
170 Self::Min => data_type.clone(),
171 Self::NullCount => DType::Primitive(PType::U64, NonNullable),
172 Self::UncompressedSizeInBytes => DType::Primitive(PType::U64, NonNullable),
173 Self::NaNCount => {
174 if let DType::Primitive(ptype, ..) = data_type
176 && ptype.is_float()
177 {
178 DType::Primitive(PType::U64, NonNullable)
179 } else {
180 return None;
181 }
182 }
183 Self::Sum => {
184 match data_type {
188 DType::Bool(_) => DType::Primitive(PType::U64, Nullable),
189 DType::Primitive(ptype, _) => match ptype {
190 PType::U8 | PType::U16 | PType::U32 | PType::U64 => {
191 DType::Primitive(PType::U64, Nullable)
192 }
193 PType::I8 | PType::I16 | PType::I32 | PType::I64 => {
194 DType::Primitive(PType::I64, Nullable)
195 }
196 PType::F16 | PType::F32 | PType::F64 => {
197 DType::Primitive(PType::F64, Nullable)
199 }
200 },
201 DType::Extension(ext_dtype) => self.dtype(ext_dtype.storage_dtype())?,
202 DType::Decimal(decimal_dtype, _) => {
203 let precision = u8::min(MAX_PRECISION, decimal_dtype.precision() + 10);
207 DType::Decimal(
208 DecimalDType::new(precision, decimal_dtype.scale()),
209 Nullable,
210 )
211 }
212 _ => return None,
214 }
215 }
216 })
217 }
218
219 pub fn name(&self) -> &str {
220 match self {
221 Self::IsConstant => "is_constant",
222 Self::IsSorted => "is_sorted",
223 Self::IsStrictSorted => "is_strict_sorted",
224 Self::Max => "max",
225 Self::Min => "min",
226 Self::NullCount => "null_count",
227 Self::UncompressedSizeInBytes => "uncompressed_size_in_bytes",
228 Self::Sum => "sum",
229 Self::NaNCount => "nan_count",
230 }
231 }
232
233 pub fn all() -> impl Iterator<Item = Stat> {
234 all::<Self>()
235 }
236}
237
238impl Display for Stat {
239 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
240 write!(f, "{}", self.name())
241 }
242}
243
244#[cfg(test)]
245mod test {
246 use enum_iterator::all;
247
248 use crate::arrays::PrimitiveArray;
249 use crate::expr::stats::Stat;
250
251 #[test]
252 fn min_of_nulls_is_not_panic() {
253 let min = PrimitiveArray::from_option_iter::<i32, _>([None, None, None, None])
254 .statistics()
255 .compute_as::<i64>(Stat::Min);
256
257 assert_eq!(min, None);
258 }
259
260 #[test]
261 fn has_same_dtype_as_array() {
262 assert!(Stat::Min.has_same_dtype_as_array());
263 assert!(Stat::Max.has_same_dtype_as_array());
264 for stat in all::<Stat>().filter(|s| !matches!(s, Stat::Min | Stat::Max)) {
265 assert!(!stat.has_same_dtype_as_array());
266 }
267 }
268}