1use std::fmt::{Debug, Display, Formatter};
4use std::hash::Hash;
5
6use arrow_buffer::bit_iterator::BitIterator;
7use arrow_buffer::{BooleanBufferBuilder, MutableBuffer};
8use enum_iterator::{Sequence, last};
9use log::debug;
10use num_enum::{IntoPrimitive, TryFromPrimitive};
11pub use stats_set::*;
12use vortex_dtype::Nullability::{NonNullable, Nullable};
13use vortex_dtype::{DType, PType};
14
15mod array;
16mod bound;
17pub mod flatbuffers;
18mod precision;
19mod stat_bound;
20mod stats_set;
21mod traits;
22
23pub use array::*;
24pub use bound::{LowerBound, UpperBound};
25pub use precision::Precision;
26pub use stat_bound::*;
27pub use traits::*;
28use vortex_error::VortexExpect;
29
30pub const PRUNING_STATS: &[Stat] = &[
33 Stat::Min,
34 Stat::Max,
35 Stat::Sum,
36 Stat::NullCount,
37 Stat::NaNCount,
38];
39
40pub const STATS_TO_WRITE: &[Stat] = &[
42 Stat::Min,
43 Stat::Max,
44 Stat::NullCount,
45 Stat::NaNCount,
46 Stat::Sum,
47 Stat::IsConstant,
48 Stat::IsSorted,
49 Stat::IsStrictSorted,
50 Stat::UncompressedSizeInBytes,
51];
52
53#[derive(
54 Debug,
55 Clone,
56 Copy,
57 PartialEq,
58 Eq,
59 PartialOrd,
60 Ord,
61 Hash,
62 Sequence,
63 IntoPrimitive,
64 TryFromPrimitive,
65)]
66#[repr(u8)]
67pub enum Stat {
68 IsConstant = 0,
71 IsSorted = 1,
73 IsStrictSorted = 2,
75 Max = 3,
77 Min = 4,
79 Sum = 5,
81 NullCount = 6,
83 UncompressedSizeInBytes = 7,
85 NaNCount = 8,
87}
88
89pub struct Max;
92pub struct Min;
93pub struct Sum;
94pub struct IsConstant;
95pub struct IsSorted;
96pub struct IsStrictSorted;
97pub struct NullCount;
98pub struct UncompressedSizeInBytes;
99pub struct NaNCount;
100
101impl StatType<bool> for IsConstant {
102 type Bound = Precision<bool>;
103
104 const STAT: Stat = Stat::IsConstant;
105}
106
107impl StatType<bool> for IsSorted {
108 type Bound = Precision<bool>;
109
110 const STAT: Stat = Stat::IsSorted;
111}
112
113impl StatType<bool> for IsStrictSorted {
114 type Bound = Precision<bool>;
115
116 const STAT: Stat = Stat::IsStrictSorted;
117}
118
119impl<T: PartialOrd + Clone> StatType<T> for NullCount {
120 type Bound = UpperBound<T>;
121
122 const STAT: Stat = Stat::NullCount;
123}
124
125impl<T: PartialOrd + Clone> StatType<T> for UncompressedSizeInBytes {
126 type Bound = UpperBound<T>;
127
128 const STAT: Stat = Stat::UncompressedSizeInBytes;
129}
130
131impl<T: PartialOrd + Clone + Debug> StatType<T> for Max {
132 type Bound = UpperBound<T>;
133
134 const STAT: Stat = Stat::Max;
135}
136
137impl<T: PartialOrd + Clone + Debug> StatType<T> for Min {
138 type Bound = LowerBound<T>;
139
140 const STAT: Stat = Stat::Min;
141}
142
143impl<T: PartialOrd + Clone + Debug> StatType<T> for Sum {
144 type Bound = Precision<T>;
145
146 const STAT: Stat = Stat::Sum;
147}
148
149impl<T: PartialOrd + Clone> StatType<T> for NaNCount {
150 type Bound = UpperBound<T>;
151
152 const STAT: Stat = Stat::NaNCount;
153}
154
155impl Stat {
156 pub fn is_commutative(&self) -> bool {
159 match self {
161 Self::IsConstant
162 | Self::Max
163 | Self::Min
164 | Self::NullCount
165 | Self::Sum
166 | Self::NaNCount
167 | Self::UncompressedSizeInBytes => true,
168 Self::IsSorted | Self::IsStrictSorted => false,
169 }
170 }
171
172 pub fn has_same_dtype_as_array(&self) -> bool {
174 matches!(self, Stat::Min | Stat::Max)
175 }
176
177 pub fn dtype(&self, data_type: &DType) -> Option<DType> {
179 Some(match self {
180 Self::IsConstant => DType::Bool(NonNullable),
181 Self::IsSorted => DType::Bool(NonNullable),
182 Self::IsStrictSorted => DType::Bool(NonNullable),
183 Self::Max => data_type.clone(),
184 Self::Min => data_type.clone(),
185 Self::NullCount => DType::Primitive(PType::U64, NonNullable),
186 Self::UncompressedSizeInBytes => DType::Primitive(PType::U64, NonNullable),
187 Self::NaNCount => match data_type {
188 DType::Primitive(ptype, ..) if ptype.is_float() => {
189 DType::Primitive(PType::U64, NonNullable)
190 }
191 _ => return None,
193 },
194 Self::Sum => {
195 match data_type {
199 DType::Bool(_) => DType::Primitive(PType::U64, Nullable),
200 DType::Primitive(ptype, _) => match ptype {
201 PType::U8 | PType::U16 | PType::U32 | PType::U64 => {
202 DType::Primitive(PType::U64, Nullable)
203 }
204 PType::I8 | PType::I16 | PType::I32 | PType::I64 => {
205 DType::Primitive(PType::I64, Nullable)
206 }
207 PType::F16 | PType::F32 | PType::F64 => {
208 DType::Primitive(PType::F64, NonNullable)
210 }
211 },
212 DType::Extension(ext_dtype) => self.dtype(ext_dtype.storage_dtype())?,
213 DType::Null
215 | DType::Decimal(..)
217 | DType::Utf8(_)
218 | DType::Binary(_)
219 | DType::Struct(..)
220 | DType::List(..) => return None,
221 }
222 }
223 })
224 }
225
226 pub fn name(&self) -> &str {
227 match self {
228 Self::IsConstant => "is_constant",
229 Self::IsSorted => "is_sorted",
230 Self::IsStrictSorted => "is_strict_sorted",
231 Self::Max => "max",
232 Self::Min => "min",
233 Self::NullCount => "null_count",
234 Self::UncompressedSizeInBytes => "uncompressed_size_in_bytes",
235 Self::Sum => "sum",
236 Self::NaNCount => "nan_count",
237 }
238 }
239}
240
241pub fn as_stat_bitset_bytes(stats: &[Stat]) -> Vec<u8> {
242 let max_stat = u8::from(last::<Stat>().vortex_expect("last stat")) as usize + 1;
243 let mut stat_bitset = BooleanBufferBuilder::new_from_buffer(
245 MutableBuffer::from_len_zeroed(max_stat.div_ceil(8)),
246 max_stat,
247 );
248 for stat in stats {
249 stat_bitset.set_bit(u8::from(*stat) as usize, true);
250 }
251
252 stat_bitset
253 .finish()
254 .into_inner()
255 .into_vec()
256 .unwrap_or_else(|b| b.to_vec())
257}
258
259pub fn stats_from_bitset_bytes(bytes: &[u8]) -> Vec<Stat> {
260 BitIterator::new(bytes, 0, bytes.len() * 8)
261 .enumerate()
262 .filter_map(|(i, b)| b.then_some(i))
263 .filter_map(|i| {
265 let Ok(stat) = u8::try_from(i) else {
266 debug!("invalid stat encountered: {i}");
267 return None;
268 };
269 Stat::try_from(stat).ok()
270 })
271 .collect::<Vec<_>>()
272}
273
274impl Display for Stat {
275 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
276 write!(f, "{}", self.name())
277 }
278}
279
280#[cfg(test)]
281mod test {
282 use enum_iterator::all;
283
284 use crate::arrays::PrimitiveArray;
285 use crate::stats::Stat;
286
287 #[test]
288 fn min_of_nulls_is_not_panic() {
289 let min = PrimitiveArray::from_option_iter::<i32, _>([None, None, None, None])
290 .statistics()
291 .compute_as::<i64>(Stat::Min);
292
293 assert_eq!(min, None);
294 }
295
296 #[test]
297 fn has_same_dtype_as_array() {
298 assert!(Stat::Min.has_same_dtype_as_array());
299 assert!(Stat::Max.has_same_dtype_as_array());
300 for stat in all::<Stat>().filter(|s| !matches!(s, Stat::Min | Stat::Max)) {
301 assert!(!stat.has_same_dtype_as_array());
302 }
303 }
304}