1use std::fmt::{Debug, Display};
2
3use serde::{Deserialize, Serialize};
4use vortex_array::array::{BoolArray, PrimitiveArray};
5use vortex_array::compute::{scalar_at, search_sorted_usize, SearchSortedSide};
6use vortex_array::encoding::ids;
7use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet};
8use vortex_array::validity::{LogicalValidity, Validity, ValidityMetadata, ValidityVTable};
9use vortex_array::variants::{BoolArrayTrait, PrimitiveArrayTrait, VariantsVTable};
10use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
11use vortex_array::{
12 impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData,
13 IntoArrayVariant, IntoCanonical,
14};
15use vortex_dtype::{match_each_integer_ptype, match_each_unsigned_integer_ptype, DType, PType};
16use vortex_error::{vortex_bail, VortexExpect as _, VortexResult};
17use vortex_scalar::Scalar;
18
19use crate::compress::{runend_bool_decode_slice, runend_bool_encode_slice, trimmed_ends_iter};
20
21impl_encoding!("vortex.runendbool", ids::RUN_END_BOOL, RunEndBool);
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct RunEndBoolMetadata {
25 start: bool,
26 validity: ValidityMetadata,
27 ends_ptype: PType,
28 num_runs: usize,
29 offset: usize,
30}
31
32impl Display for RunEndBoolMetadata {
33 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34 Debug::fmt(self, f)
35 }
36}
37
38impl RunEndBoolArray {
39 pub fn try_new(ends: ArrayData, start: bool, validity: Validity) -> VortexResult<Self> {
40 let length: usize = scalar_at(&ends, ends.len() - 1)?.as_ref().try_into()?;
41 Self::with_offset_and_size(ends, start, validity, length, 0)
42 }
43
44 pub(crate) fn with_offset_and_size(
45 ends: ArrayData,
46 start: bool,
47 validity: Validity,
48 length: usize,
49 offset: usize,
50 ) -> VortexResult<Self> {
51 if !ends.statistics().compute_is_strict_sorted().unwrap_or(true) {
52 vortex_bail!("Ends array must be strictly sorted",);
53 }
54 if !ends.dtype().is_unsigned_int() || ends.dtype().is_nullable() {
55 vortex_bail!(
56 "Ends array must be an unsigned integer type, got {}",
57 ends.dtype()
58 );
59 }
60 if ends.is_empty() && length != 0 {
61 vortex_bail!(
62 "Ends array cannot be empty when length ({}) is not zero",
63 length
64 );
65 }
66
67 if offset != 0 {
68 let first_run_end: usize = scalar_at(&ends, 0)?.as_ref().try_into()?;
69 if first_run_end <= offset {
70 vortex_bail!("First run end {first_run_end} must be bigger than offset {offset}");
71 }
72 }
73
74 let dtype = DType::Bool(validity.nullability());
75 let ends_ptype = ends.dtype().try_into()?;
76 let metadata = RunEndBoolMetadata {
77 start,
78 validity: validity.to_metadata(length)?,
79 ends_ptype,
80 num_runs: ends.len(),
81 offset,
82 };
83
84 let stats = if matches!(validity, Validity::AllValid | Validity::NonNullable) {
85 let ends_len = ends.len();
86 let is_constant = ends_len <= 1;
87 let is_sorted = is_constant || (!start && ends_len == 2);
88 let is_strict_sorted =
89 (is_constant && length <= 1) || (!is_constant && is_sorted && length == 2);
90 let run_count = ends_len;
91 let min = start && is_constant; let max = start || ends_len > 1; StatsSet::new_unchecked(vec![
94 (Stat::IsConstant, is_constant.into()),
95 (Stat::IsSorted, is_sorted.into()),
96 (Stat::IsStrictSorted, is_strict_sorted.into()),
97 (Stat::RunCount, run_count.into()),
98 (Stat::Min, min.into()),
99 (Stat::Max, max.into()),
100 ])
101 } else {
102 StatsSet::default()
103 };
104
105 let mut children = Vec::with_capacity(2);
106 children.push(ends);
107 if let Some(a) = validity.into_array() {
108 children.push(a)
109 }
110
111 Self::try_from_parts(dtype, length, metadata, children.into(), stats)
112 }
113
114 pub(crate) fn find_physical_index(&self, index: usize) -> VortexResult<usize> {
115 search_sorted_usize(&self.ends(), index + self.offset(), SearchSortedSide::Right)
116 .map(|s| s.to_ends_index(self.ends().len()))
117 }
118
119 #[inline]
120 pub(crate) fn offset(&self) -> usize {
121 self.metadata().offset
122 }
123
124 #[inline]
125 pub(crate) fn start(&self) -> bool {
126 self.metadata().start
127 }
128
129 #[inline]
130 pub(crate) fn ends(&self) -> ArrayData {
131 self.as_ref()
132 .child(
133 0,
134 &self.metadata().ends_ptype.into(),
135 self.metadata().num_runs,
136 )
137 .vortex_expect("RunEndBoolArray is missing its run ends")
138 }
139
140 pub fn validity(&self) -> Validity {
141 self.metadata().validity.to_validity(|| {
142 self.as_ref()
143 .child(1, &Validity::DTYPE, self.len())
144 .vortex_expect("RunEndBoolArray: validity child")
145 })
146 }
147}
148
149pub fn encode_runend_bool(array: &BoolArray) -> VortexResult<RunEndBoolArray> {
150 let (ends, start) = runend_bool_encode_slice(&array.boolean_buffer());
151 RunEndBoolArray::try_new(
152 PrimitiveArray::from(ends).into_array(),
153 start,
154 array.validity(),
155 )
156}
157
158pub(crate) fn decode_runend_bool(
159 run_ends: &PrimitiveArray,
160 start: bool,
161 validity: Validity,
162 offset: usize,
163 length: usize,
164) -> VortexResult<BoolArray> {
165 match_each_integer_ptype!(run_ends.ptype(), |$E| {
166 let bools = runend_bool_decode_slice(trimmed_ends_iter(run_ends.maybe_null_slice::<$E>(), offset, length), start, length);
167 Ok(BoolArray::try_new(bools, validity)?)
168 })
169}
170
171pub(crate) fn value_at_index(idx: usize, start: bool) -> bool {
172 if idx % 2 == 0 {
173 start
174 } else {
175 !start
176 }
177}
178
179impl BoolArrayTrait for RunEndBoolArray {}
180
181impl VariantsVTable<RunEndBoolArray> for RunEndBoolEncoding {
182 fn as_bool_array<'a>(&self, array: &'a RunEndBoolArray) -> Option<&'a dyn BoolArrayTrait> {
183 Some(array)
184 }
185}
186
187impl ArrayTrait for RunEndBoolArray {}
188
189impl ValidityVTable<RunEndBoolArray> for RunEndBoolEncoding {
190 fn is_valid(&self, array: &RunEndBoolArray, index: usize) -> bool {
191 array.validity().is_valid(index)
192 }
193
194 fn logical_validity(&self, array: &RunEndBoolArray) -> LogicalValidity {
195 array.validity().to_logical(array.len())
196 }
197}
198
199impl IntoCanonical for RunEndBoolArray {
200 fn into_canonical(self) -> VortexResult<Canonical> {
201 let pends = self.ends().into_primitive()?;
202 decode_runend_bool(
203 &pends,
204 self.start(),
205 self.validity(),
206 self.offset(),
207 self.len(),
208 )
209 .map(Canonical::Bool)
210 }
211}
212
213impl VisitorVTable<RunEndBoolArray> for RunEndBoolEncoding {
214 fn accept(&self, array: &RunEndBoolArray, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> {
215 visitor.visit_child("ends", &array.ends())?;
216 visitor.visit_validity(&array.validity())
217 }
218}
219
220impl StatisticsVTable<RunEndBoolArray> for RunEndBoolEncoding {
221 fn compute_statistics(&self, array: &RunEndBoolArray, stat: Stat) -> VortexResult<StatsSet> {
222 let maybe_scalar: Option<Scalar> = match stat {
223 Stat::NullCount => Some(array.validity().null_count(array.len())?.into()),
224 Stat::TrueCount => {
225 let pends = array.ends().into_primitive()?;
226 let mut true_count: usize = 0;
227 let mut prev_end: usize = 0;
228 let mut include = array.start();
229 match_each_unsigned_integer_ptype!(pends.ptype(), |$P| {
230 for end in trimmed_ends_iter(pends.maybe_null_slice::<$P>(), array.offset(), array.len()) {
231 if include {
232 true_count += end - prev_end;
233 }
234 include = !include;
235 prev_end = end;
236 }
237 });
238 Some((true_count as u64).into())
239 }
240 _ => None,
241 };
242 if let Some(scalar) = maybe_scalar {
243 Ok(StatsSet::of(stat, scalar))
244 } else {
245 Ok(StatsSet::default())
246 }
247 }
248}
249
250#[cfg(test)]
251#[allow(clippy::cast_possible_truncation)]
252mod test {
253 use core::iter;
254
255 use itertools::Itertools as _;
256 use rstest::rstest;
257 use vortex_array::array::{BoolArray, PrimitiveArray};
258 use vortex_array::compute::{scalar_at, slice, take};
259 use vortex_array::stats::ArrayStatistics;
260 use vortex_array::validity::Validity;
261 use vortex_array::{
262 ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoCanonical, ToArrayData,
263 };
264 use vortex_dtype::{DType, Nullability};
265
266 use crate::RunEndBoolArray;
267
268 #[test]
269 fn new() {
270 let arr =
272 RunEndBoolArray::try_new(vec![2u32, 4, 5].into_array(), false, Validity::NonNullable)
273 .unwrap();
274 assert_eq!(arr.len(), 5);
275 assert_eq!(arr.dtype(), &DType::Bool(Nullability::NonNullable));
276
277 assert_eq!(scalar_at(arr.as_ref(), 0).unwrap(), false.into());
278 assert_eq!(scalar_at(arr.as_ref(), 2).unwrap(), true.into());
279 assert_eq!(scalar_at(arr.as_ref(), 4).unwrap(), false.into());
280 }
281
282 #[test]
283 fn slice_array() {
284 let arr = slice(
285 RunEndBoolArray::try_new(
287 vec![2u32, 5, 6, 7, 10].into_array(),
288 true,
289 Validity::NonNullable,
290 )
291 .unwrap()
292 .as_ref(),
293 2,
294 8,
295 )
296 .unwrap();
297 assert_eq!(arr.dtype(), &DType::Bool(Nullability::NonNullable));
298
299 assert_eq!(
300 to_bool_vec(&arr),
301 vec![false, false, false, true, false, true],
302 );
303 }
304
305 #[test]
306 fn slice_slice_array() {
307 let raw = BoolArray::from_iter([
308 true, true, false, false, false, true, false, true, true, true,
309 ])
310 .to_array();
311 let arr = slice(&raw, 2, 8).unwrap();
312 assert_eq!(arr.dtype(), &DType::Bool(Nullability::NonNullable));
313
314 assert_eq!(
315 to_bool_vec(&arr),
316 vec![false, false, false, true, false, true],
317 );
318
319 let arr2 = slice(&arr, 3, 6).unwrap();
320 assert_eq!(to_bool_vec(&arr2), vec![true, false, true],);
321
322 let arr3 = slice(&arr2, 1, 3).unwrap();
323 assert_eq!(to_bool_vec(&arr3), vec![false, true],);
324 }
325
326 #[test]
327 fn flatten() {
328 let arr =
329 RunEndBoolArray::try_new(vec![2u32, 4, 5].into_array(), true, Validity::NonNullable)
330 .unwrap();
331
332 assert_eq!(
333 to_bool_vec(&arr.to_array()),
334 vec![true, true, false, false, true]
335 );
336 }
337
338 #[test]
339 fn take_bool() {
340 let arr = take(
341 RunEndBoolArray::try_new(
342 vec![2u32, 4, 5, 10].into_array(),
343 true,
344 Validity::NonNullable,
345 )
346 .unwrap(),
347 vec![0, 0, 6, 4].into_array(),
348 )
349 .unwrap();
350
351 assert_eq!(to_bool_vec(&arr), vec![true, true, false, true]);
352 }
353
354 fn to_bool_vec(arr: &ArrayData) -> Vec<bool> {
355 arr.clone()
356 .into_canonical()
357 .unwrap()
358 .into_bool()
359 .unwrap()
360 .boolean_buffer()
361 .iter()
362 .collect::<Vec<_>>()
363 }
364
365 #[rstest]
366 #[case(true, 1, 1)]
367 #[case(true, 1, 2)]
368 #[case(true, 2, 2)]
369 #[case(false, 1, 1)]
370 #[case(false, 1, 2)]
371 #[case(false, 2, 2)]
372 #[case(false, 3, 32)]
373 #[case(true, 3, 32)]
374 fn stats(#[case] start: bool, #[case] ends_len: usize, #[case] len: usize) {
375 use vortex_array::stats::Stat;
376
377 let ends = (1u32..(ends_len as u32))
378 .chain(iter::once(len as u32))
379 .collect_vec();
380 assert_eq!(ends.len(), ends_len);
381 assert_eq!(*ends.last().unwrap(), len as u32);
382
383 let arr =
384 RunEndBoolArray::try_new(ends.into_array(), start, Validity::NonNullable).unwrap();
385 let bools = arr.clone().into_canonical().unwrap().into_bool().unwrap();
386 for stat in [
387 Stat::IsConstant,
388 Stat::NullCount,
389 Stat::TrueCount,
390 Stat::Min,
391 Stat::Max,
392 Stat::IsSorted,
393 Stat::IsStrictSorted,
394 ] {
395 let expected = bools.statistics().compute(stat).unwrap();
397 let actual = arr.statistics().compute(stat).unwrap();
398 assert_eq!(expected, actual);
399 }
400
401 assert_eq!(arr.statistics().compute_run_count(), Some(ends_len));
402 }
403
404 #[test]
405 fn sliced_true_count() {
406 let arr = RunEndBoolArray::try_new(
407 PrimitiveArray::from(vec![5u32, 7, 10]).into_array(),
408 true,
409 Validity::NonNullable,
410 )
411 .unwrap();
412 let sliced = slice(&arr, 4, 8).unwrap();
413 assert_eq!(sliced.statistics().compute_true_count().unwrap(), 2);
414 }
415}