vortex_array/arrays/bool/
stats.rs1use std::ops::BitAnd;
2
3use arrow_buffer::BooleanBuffer;
4use itertools::Itertools;
5use vortex_error::VortexResult;
6use vortex_mask::Mask;
7
8use crate::Array;
9use crate::arrays::{BoolArray, BoolEncoding};
10use crate::stats::{Precision, Stat, StatsSet};
11use crate::vtable::StatisticsVTable;
12
13impl StatisticsVTable<&BoolArray> for BoolEncoding {
14 fn compute_statistics(&self, array: &BoolArray, stat: Stat) -> VortexResult<StatsSet> {
15 if array.is_empty() {
16 return Ok(StatsSet::new_unchecked(vec![
17 (Stat::Sum, Precision::exact(0)),
18 (Stat::NullCount, Precision::exact(0)),
19 ]));
20 }
21
22 match array.validity_mask()? {
23 Mask::AllTrue(_) => self.compute_statistics(array.boolean_buffer(), stat),
24 Mask::AllFalse(v) => Ok(StatsSet::nulls(v)),
25 Mask::Values(values) => self.compute_statistics(
26 &NullableBools(array.boolean_buffer(), values.boolean_buffer()),
27 stat,
28 ),
29 }
30 }
31}
32
33struct NullableBools<'a>(&'a BooleanBuffer, &'a BooleanBuffer);
34
35impl StatisticsVTable<&NullableBools<'_>> for BoolEncoding {
36 fn compute_statistics(&self, array: &NullableBools<'_>, stat: Stat) -> VortexResult<StatsSet> {
37 if matches!(
39 stat,
40 Stat::Sum | Stat::Min | Stat::Max | Stat::IsConstant | Stat::NullCount
41 ) {
42 return Ok(StatsSet::bools_with_sum_and_null_count(
43 array.0.bitand(array.1).count_set_bits(),
44 array.1.len() - array.1.count_set_bits(),
45 array.0.len(),
46 ));
47 }
48
49 let first_non_null_idx = array
50 .1
51 .iter()
52 .enumerate()
53 .skip_while(|(_, valid)| !*valid)
54 .map(|(idx, _)| idx)
55 .next();
56
57 if let Some(first_non_null) = first_non_null_idx {
58 let mut acc = BoolStatsAccumulator::new(array.0.value(first_non_null));
59 acc.n_nulls(first_non_null);
60 array
61 .0
62 .iter()
63 .zip_eq(array.1.iter())
64 .skip(first_non_null + 1)
65 .map(|(next, valid)| valid.then_some(next))
66 .for_each(|next| acc.nullable_next(next));
67 Ok(acc.finish())
68 } else {
69 Ok(StatsSet::nulls(array.0.len()))
70 }
71 }
72}
73
74impl StatisticsVTable<&BooleanBuffer> for BoolEncoding {
75 fn compute_statistics(&self, buffer: &BooleanBuffer, stat: Stat) -> VortexResult<StatsSet> {
76 if matches!(
78 stat,
79 Stat::Sum | Stat::Min | Stat::Max | Stat::IsConstant | Stat::NullCount
80 ) {
81 return Ok(StatsSet::bools_with_sum_and_null_count(
82 buffer.count_set_bits(),
83 0,
84 buffer.len(),
85 ));
86 }
87
88 let mut stats = BoolStatsAccumulator::new(buffer.value(0));
89 buffer.iter().skip(1).for_each(|next| stats.next(next));
90 Ok(stats.finish())
91 }
92}
93
94struct BoolStatsAccumulator {
95 prev: bool,
96 is_sorted: bool,
97 run_count: usize,
98 null_count: usize,
99 true_count: usize,
100 len: usize,
101}
102
103impl BoolStatsAccumulator {
104 pub fn new(first_value: bool) -> Self {
105 Self {
106 prev: first_value,
107 is_sorted: true,
108 run_count: 1,
109 null_count: 0,
110 true_count: if first_value { 1 } else { 0 },
111 len: 1,
112 }
113 }
114
115 pub fn n_nulls(&mut self, n_nulls: usize) {
116 self.null_count += n_nulls;
117 self.len += n_nulls;
118 }
119
120 pub fn nullable_next(&mut self, next: Option<bool>) {
121 match next {
122 Some(n) => self.next(n),
123 None => {
124 self.null_count += 1;
125 self.len += 1;
126 }
127 }
128 }
129
130 pub fn next(&mut self, next: bool) {
131 self.len += 1;
132
133 if next {
134 self.true_count += 1
135 }
136 if !next & self.prev {
137 self.is_sorted = false;
138 }
139 if next != self.prev {
140 self.run_count += 1;
141 self.prev = next;
142 }
143 }
144
145 pub fn finish(self) -> StatsSet {
146 StatsSet::new_unchecked(vec![
147 (Stat::NullCount, Precision::exact(self.null_count)),
148 (Stat::IsSorted, Precision::exact(self.is_sorted)),
149 (
150 Stat::IsStrictSorted,
151 Precision::exact(
152 self.is_sorted && (self.len < 2 || (self.len == 2 && self.true_count == 1)),
153 ),
154 ),
155 ])
156 }
157}
158
159#[cfg(test)]
160mod test {
161 use arrow_buffer::BooleanBuffer;
162
163 use crate::ArrayVariants;
164 use crate::array::Array;
165 use crate::arrays::BoolArray;
166 use crate::stats::Stat;
167 use crate::validity::Validity;
168
169 #[test]
170 fn bool_stats() {
171 let bool_arr = BoolArray::from_iter([false, false, true, true, false, true, true, false]);
172 assert!(!bool_arr.statistics().compute_is_strict_sorted().unwrap());
173 assert!(!bool_arr.statistics().compute_is_sorted().unwrap());
174 assert!(!bool_arr.statistics().compute_is_constant().unwrap());
175 assert!(!bool_arr.statistics().compute_min::<bool>().unwrap());
176 assert!(bool_arr.statistics().compute_max::<bool>().unwrap());
177 assert_eq!(bool_arr.statistics().compute_null_count().unwrap(), 0);
178 assert_eq!(bool_arr.as_bool_typed().unwrap().true_count().unwrap(), 4);
179 }
180
181 #[test]
182 fn strict_sorted() {
183 let bool_arr_1 = BoolArray::from_iter([false, true]);
184 assert!(bool_arr_1.statistics().compute_is_strict_sorted().unwrap());
185 assert!(bool_arr_1.statistics().compute_is_sorted().unwrap());
186
187 let bool_arr_2 = BoolArray::from_iter([true]);
188 assert!(bool_arr_2.statistics().compute_is_strict_sorted().unwrap());
189 assert!(bool_arr_2.statistics().compute_is_sorted().unwrap());
190
191 let bool_arr_3 = BoolArray::from_iter([false]);
192 assert!(bool_arr_3.statistics().compute_is_strict_sorted().unwrap());
193 assert!(bool_arr_3.statistics().compute_is_sorted().unwrap());
194
195 let bool_arr_4 = BoolArray::from_iter([true, false]);
196 assert!(!bool_arr_4.statistics().compute_is_strict_sorted().unwrap());
197 assert!(!bool_arr_4.statistics().compute_is_sorted().unwrap());
198
199 let bool_arr_5 = BoolArray::from_iter([false, true, true]);
200 assert!(!bool_arr_5.statistics().compute_is_strict_sorted().unwrap());
201 assert!(bool_arr_5.statistics().compute_is_sorted().unwrap());
202 }
203
204 #[test]
205 fn nullable_stats() {
206 let bool_arr = BoolArray::from_iter(vec![
207 Some(false),
208 Some(true),
209 None,
210 Some(true),
211 Some(false),
212 None,
213 None,
214 ]);
215 assert!(!bool_arr.statistics().compute_is_strict_sorted().unwrap());
216 assert!(!bool_arr.statistics().compute_is_sorted().unwrap());
217 assert!(!bool_arr.statistics().compute_is_constant().unwrap());
218 assert!(!bool_arr.statistics().compute_min::<bool>().unwrap());
219 assert!(bool_arr.statistics().compute_max::<bool>().unwrap());
220 assert_eq!(bool_arr.as_bool_typed().unwrap().true_count().unwrap(), 2);
221 assert_eq!(bool_arr.statistics().compute_null_count().unwrap(), 3);
222 }
223
224 #[test]
225 fn one_non_null_value() {
226 let bool_arr = BoolArray::from_iter(vec![Some(false), None]);
227 assert!(!bool_arr.statistics().compute_is_strict_sorted().unwrap());
228 assert!(bool_arr.statistics().compute_is_sorted().unwrap());
229 assert!(!bool_arr.statistics().compute_is_constant().unwrap());
230 assert!(!bool_arr.statistics().compute_min::<bool>().unwrap());
231 assert!(!bool_arr.statistics().compute_max::<bool>().unwrap());
232 assert_eq!(bool_arr.as_bool_typed().unwrap().true_count().unwrap(), 0);
233 assert_eq!(bool_arr.statistics().compute_null_count().unwrap(), 1);
234 }
235
236 #[test]
237 fn empty_array() {
238 let bool_arr = BoolArray::new(BooleanBuffer::new_set(0), Validity::NonNullable);
239 assert!(bool_arr.statistics().compute_is_strict_sorted().is_none());
240 assert!(bool_arr.statistics().compute_is_sorted().is_none());
241 assert!(bool_arr.statistics().compute_is_constant().is_none());
242 assert!(bool_arr.statistics().compute_min::<bool>().is_none());
243 assert!(bool_arr.statistics().compute_max::<bool>().is_none());
244 assert_eq!(bool_arr.as_bool_typed().unwrap().true_count().unwrap(), 0);
245 assert_eq!(bool_arr.statistics().compute_null_count().unwrap(), 0);
246 }
247
248 #[test]
249 fn all_false() {
250 let bool_arr = BoolArray::from_iter(vec![false, false, false]);
251 assert!(!bool_arr.statistics().compute_is_strict_sorted().unwrap());
252 assert!(bool_arr.statistics().compute_is_sorted().unwrap());
253 assert!(bool_arr.statistics().compute_is_constant().unwrap());
254 assert!(!bool_arr.statistics().compute_min::<bool>().unwrap());
255 assert!(!bool_arr.statistics().compute_max::<bool>().unwrap());
256 assert_eq!(bool_arr.as_bool_typed().unwrap().true_count().unwrap(), 0);
257 assert_eq!(bool_arr.statistics().compute_null_count().unwrap(), 0);
258 }
259
260 #[test]
261 fn all_nullable_stats() {
262 let bool_arr = BoolArray::from_iter(vec![None, None, None, None, None]);
263 assert!(!bool_arr.statistics().compute_is_strict_sorted().unwrap());
264 assert!(bool_arr.statistics().compute_is_sorted().unwrap());
265 assert!(bool_arr.statistics().compute_is_constant().unwrap());
266 assert!(
267 bool_arr
268 .statistics()
269 .compute_stat(Stat::Min)
270 .unwrap()
271 .is_none()
272 );
273 assert!(
274 bool_arr
275 .statistics()
276 .compute_stat(Stat::Max)
277 .unwrap()
278 .is_none()
279 );
280 assert_eq!(bool_arr.as_bool_typed().unwrap().true_count().unwrap(), 0);
281 assert_eq!(bool_arr.statistics().compute_null_count().unwrap(), 5);
282 }
283}