vortex_runend/
statistics.rs1use std::cmp;
2
3use arrow_buffer::BooleanBuffer;
4use itertools::Itertools;
5use vortex_array::stats::{Precision, Stat, StatsSet};
6use vortex_array::variants::PrimitiveArrayTrait;
7use vortex_array::vtable::StatisticsVTable;
8use vortex_array::{Array, ToCanonical as _};
9use vortex_dtype::{NativePType, match_each_unsigned_integer_ptype};
10use vortex_error::VortexResult;
11use vortex_mask::Mask;
12use vortex_scalar::ScalarValue;
13
14use crate::{RunEndArray, RunEndEncoding};
15
16impl StatisticsVTable<&RunEndArray> for RunEndEncoding {
17 fn compute_statistics(&self, array: &RunEndArray, stat: Stat) -> VortexResult<StatsSet> {
18 let maybe_stat = match stat {
19 Stat::Min | Stat::Max => array.values().statistics().compute_stat(stat)?,
20 Stat::IsSorted => Some(ScalarValue::from(
21 array
22 .values()
23 .statistics()
24 .compute_is_sorted()
25 .unwrap_or(false)
26 && array.validity_mask()?.all_true(),
27 )),
28 Stat::NullCount => Some(ScalarValue::from(array.null_count()?)),
29 _ => None,
30 };
31
32 let mut stats = StatsSet::default();
33 if let Some(stat_value) = maybe_stat {
34 stats.set(stat, Precision::exact(stat_value));
35 }
36 Ok(stats)
37 }
38}
39
40impl RunEndArray {
41 #[allow(dead_code)]
43 fn true_count(&self) -> VortexResult<u64> {
44 let ends = self.ends().to_primitive()?;
45 let values = self.values().to_bool()?.boolean_buffer().clone();
46
47 match_each_unsigned_integer_ptype!(ends.ptype(), |$P| self.typed_true_count(ends.as_slice::<$P>(), values))
48 }
49
50 #[allow(dead_code)]
52 fn typed_true_count<P: NativePType + Into<u64>>(
53 &self,
54 decompressed_ends: &[P],
55 decompressed_values: BooleanBuffer,
56 ) -> VortexResult<u64> {
57 Ok(match self.values().validity_mask()? {
58 Mask::AllTrue(_) => {
59 let mut begin = self.offset() as u64;
60 decompressed_ends
61 .iter()
62 .copied()
63 .zip_eq(&decompressed_values)
64 .map(|(end, bool_value)| {
65 let end: u64 = end.into();
66 let len = end - begin;
67 begin = end;
68 len * u64::from(bool_value)
69 })
70 .sum()
71 }
72 Mask::AllFalse(_) => 0,
73 Mask::Values(values) => {
74 let mut is_valid = values.indices().iter();
75 match is_valid.next() {
76 None => self.len() as u64,
77 Some(&valid_index) => {
78 let mut true_count: u64 = 0;
79 let offsetted_begin = self.offset() as u64;
80 let offsetted_len = (self.len() + self.offset()) as u64;
81 let begin = if valid_index == 0 {
82 offsetted_begin
83 } else {
84 decompressed_ends[valid_index - 1].into()
85 };
86
87 let end = cmp::min(decompressed_ends[valid_index].into(), offsetted_len);
88 true_count += decompressed_values.value(valid_index) as u64 * (end - begin);
89
90 for &valid_index in is_valid {
91 let valid_end: u64 = decompressed_ends[valid_index].into();
92 let end = cmp::min(valid_end, offsetted_len);
93 true_count +=
94 decompressed_values.value(valid_index) as u64 * (end - valid_end);
95 }
96
97 true_count
98 }
99 }
100 }
101 })
102 }
103
104 fn null_count(&self) -> VortexResult<u64> {
105 let ends = self.ends().to_primitive()?;
106 let null_count = match self.values().validity_mask()? {
107 Mask::AllTrue(_) => 0u64,
108 Mask::AllFalse(_) => self.len() as u64,
109 Mask::Values(mask) => {
110 match_each_unsigned_integer_ptype!(ends.ptype(), |$P| self.null_count_with_array_validity(ends.as_slice::<$P>(), mask.boolean_buffer()))
111 }
112 };
113 Ok(null_count)
114 }
115
116 fn null_count_with_array_validity<P: NativePType + Into<u64>>(
117 &self,
118 decompressed_ends: &[P],
119 is_valid: &BooleanBuffer,
120 ) -> u64 {
121 let mut is_valid = is_valid.set_indices();
122 match is_valid.next() {
123 None => self.len() as u64,
124 Some(valid_index) => {
125 let offsetted_len = (self.len() + self.offset()) as u64;
126 let mut null_count: u64 = self.len() as u64;
127 let begin = if valid_index == 0 {
128 0
129 } else {
130 decompressed_ends[valid_index - 1].into()
131 };
132
133 let end = cmp::min(decompressed_ends[valid_index].into(), offsetted_len);
134 null_count -= end - begin;
135
136 for valid_index in is_valid {
137 let end = cmp::min(decompressed_ends[valid_index].into(), offsetted_len);
138 null_count -= end - decompressed_ends[valid_index - 1].into();
139 }
140
141 null_count
142 }
143 }
144 }
145}
146
147#[cfg(test)]
148mod tests {
149 use arrow_buffer::BooleanBuffer;
150 use vortex_array::arrays::BoolArray;
151 use vortex_array::compute::slice;
152 use vortex_array::stats::Stat;
153 use vortex_array::validity::Validity;
154 use vortex_array::{Array, IntoArray};
155 use vortex_buffer::buffer;
156
157 use crate::RunEndArray;
158
159 #[test]
160 fn test_runend_int_stats() {
161 let arr = RunEndArray::try_new(
162 buffer![2u32, 5, 10].into_array(),
163 buffer![1i32, 2, 3].into_array(),
164 )
165 .unwrap();
166
167 assert_eq!(arr.statistics().compute_as::<i32>(Stat::Min).unwrap(), 1);
168 assert_eq!(arr.statistics().compute_as::<i32>(Stat::Max).unwrap(), 3);
169 assert_eq!(
170 arr.statistics().compute_as::<u64>(Stat::NullCount).unwrap(),
171 0
172 );
173 assert!(arr.statistics().compute_as::<bool>(Stat::IsSorted).unwrap());
174 }
175
176 #[test]
177 fn test_runend_bool_stats() {
178 let arr = RunEndArray::try_new(
179 buffer![2u32, 5, 10].into_array(),
180 BoolArray::new(
181 BooleanBuffer::from_iter([true, true, false]),
182 Validity::Array(BoolArray::from_iter([true, false, true]).into_array()),
183 )
184 .into_array(),
185 )
186 .unwrap();
187
188 assert!(!arr.statistics().compute_as::<bool>(Stat::Min).unwrap());
189 assert!(arr.statistics().compute_as::<bool>(Stat::Max).unwrap());
190 assert_eq!(
191 arr.statistics().compute_as::<u64>(Stat::NullCount).unwrap(),
192 3
193 );
194 assert!(!arr.statistics().compute_as::<bool>(Stat::IsSorted).unwrap());
195
196 let sliced = slice(&arr, 4, 7).unwrap();
197
198 assert!(!sliced.statistics().compute_as::<bool>(Stat::Min).unwrap());
199 assert!(!sliced.statistics().compute_as::<bool>(Stat::Max).unwrap());
200 assert_eq!(
201 sliced
202 .statistics()
203 .compute_as::<u64>(Stat::NullCount)
204 .unwrap(),
205 1
206 );
207 assert!(
209 !sliced
210 .statistics()
211 .compute_as::<bool>(Stat::IsSorted)
212 .unwrap()
213 );
214 }
215
216 #[test]
217 fn test_all_invalid_true_count() {
218 let arr = RunEndArray::try_new(
219 buffer![2u32, 5, 10].into_array(),
220 BoolArray::from_iter([None, None, None]).into_array(),
221 )
222 .unwrap()
223 .into_array();
224 assert_eq!(
225 arr.statistics().compute_as::<u64>(Stat::NullCount).unwrap(),
226 10
227 );
228 }
229}