vortex_runend_bool/
array.rs

1use std::fmt::{Debug, Display};
2
3use serde::{Deserialize, Serialize};
4use vortex_array::array::{BoolArray, PrimitiveArray};
5use vortex_array::compute::{scalar_at, search_sorted_usize, SearchSortedSide};
6use vortex_array::encoding::ids;
7use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet};
8use vortex_array::validity::{LogicalValidity, Validity, ValidityMetadata, ValidityVTable};
9use vortex_array::variants::{BoolArrayTrait, PrimitiveArrayTrait, VariantsVTable};
10use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
11use vortex_array::{
12    impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData,
13    IntoArrayVariant, IntoCanonical,
14};
15use vortex_dtype::{match_each_integer_ptype, match_each_unsigned_integer_ptype, DType, PType};
16use vortex_error::{vortex_bail, VortexExpect as _, VortexResult};
17use vortex_scalar::Scalar;
18
19use crate::compress::{runend_bool_decode_slice, runend_bool_encode_slice, trimmed_ends_iter};
20
21impl_encoding!("vortex.runendbool", ids::RUN_END_BOOL, RunEndBool);
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct RunEndBoolMetadata {
25    start: bool,
26    validity: ValidityMetadata,
27    ends_ptype: PType,
28    num_runs: usize,
29    offset: usize,
30}
31
32impl Display for RunEndBoolMetadata {
33    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34        Debug::fmt(self, f)
35    }
36}
37
38impl RunEndBoolArray {
39    pub fn try_new(ends: ArrayData, start: bool, validity: Validity) -> VortexResult<Self> {
40        let length: usize = scalar_at(&ends, ends.len() - 1)?.as_ref().try_into()?;
41        Self::with_offset_and_size(ends, start, validity, length, 0)
42    }
43
44    pub(crate) fn with_offset_and_size(
45        ends: ArrayData,
46        start: bool,
47        validity: Validity,
48        length: usize,
49        offset: usize,
50    ) -> VortexResult<Self> {
51        if !ends.statistics().compute_is_strict_sorted().unwrap_or(true) {
52            vortex_bail!("Ends array must be strictly sorted",);
53        }
54        if !ends.dtype().is_unsigned_int() || ends.dtype().is_nullable() {
55            vortex_bail!(
56                "Ends array must be an unsigned integer type, got {}",
57                ends.dtype()
58            );
59        }
60        if ends.is_empty() && length != 0 {
61            vortex_bail!(
62                "Ends array cannot be empty when length ({}) is not zero",
63                length
64            );
65        }
66
67        if offset != 0 {
68            let first_run_end: usize = scalar_at(&ends, 0)?.as_ref().try_into()?;
69            if first_run_end <= offset {
70                vortex_bail!("First run end {first_run_end} must be bigger than offset {offset}");
71            }
72        }
73
74        let dtype = DType::Bool(validity.nullability());
75        let ends_ptype = ends.dtype().try_into()?;
76        let metadata = RunEndBoolMetadata {
77            start,
78            validity: validity.to_metadata(length)?,
79            ends_ptype,
80            num_runs: ends.len(),
81            offset,
82        };
83
84        let stats = if matches!(validity, Validity::AllValid | Validity::NonNullable) {
85            let ends_len = ends.len();
86            let is_constant = ends_len <= 1;
87            let is_sorted = is_constant || (!start && ends_len == 2);
88            let is_strict_sorted =
89                (is_constant && length <= 1) || (!is_constant && is_sorted && length == 2);
90            let run_count = ends_len;
91            let min = start && is_constant; // i.e., true iff all are true
92            let max = start || ends_len > 1; // i.e., true iff any are true
93            StatsSet::new_unchecked(vec![
94                (Stat::IsConstant, is_constant.into()),
95                (Stat::IsSorted, is_sorted.into()),
96                (Stat::IsStrictSorted, is_strict_sorted.into()),
97                (Stat::RunCount, run_count.into()),
98                (Stat::Min, min.into()),
99                (Stat::Max, max.into()),
100            ])
101        } else {
102            StatsSet::default()
103        };
104
105        let mut children = Vec::with_capacity(2);
106        children.push(ends);
107        if let Some(a) = validity.into_array() {
108            children.push(a)
109        }
110
111        Self::try_from_parts(dtype, length, metadata, children.into(), stats)
112    }
113
114    pub(crate) fn find_physical_index(&self, index: usize) -> VortexResult<usize> {
115        search_sorted_usize(&self.ends(), index + self.offset(), SearchSortedSide::Right)
116            .map(|s| s.to_ends_index(self.ends().len()))
117    }
118
119    #[inline]
120    pub(crate) fn offset(&self) -> usize {
121        self.metadata().offset
122    }
123
124    #[inline]
125    pub(crate) fn start(&self) -> bool {
126        self.metadata().start
127    }
128
129    #[inline]
130    pub(crate) fn ends(&self) -> ArrayData {
131        self.as_ref()
132            .child(
133                0,
134                &self.metadata().ends_ptype.into(),
135                self.metadata().num_runs,
136            )
137            .vortex_expect("RunEndBoolArray is missing its run ends")
138    }
139
140    pub fn validity(&self) -> Validity {
141        self.metadata().validity.to_validity(|| {
142            self.as_ref()
143                .child(1, &Validity::DTYPE, self.len())
144                .vortex_expect("RunEndBoolArray: validity child")
145        })
146    }
147}
148
149pub fn encode_runend_bool(array: &BoolArray) -> VortexResult<RunEndBoolArray> {
150    let (ends, start) = runend_bool_encode_slice(&array.boolean_buffer());
151    RunEndBoolArray::try_new(
152        PrimitiveArray::from(ends).into_array(),
153        start,
154        array.validity(),
155    )
156}
157
158pub(crate) fn decode_runend_bool(
159    run_ends: &PrimitiveArray,
160    start: bool,
161    validity: Validity,
162    offset: usize,
163    length: usize,
164) -> VortexResult<BoolArray> {
165    match_each_integer_ptype!(run_ends.ptype(), |$E| {
166        let bools = runend_bool_decode_slice(trimmed_ends_iter(run_ends.maybe_null_slice::<$E>(), offset, length), start, length);
167        Ok(BoolArray::try_new(bools, validity)?)
168    })
169}
170
171pub(crate) fn value_at_index(idx: usize, start: bool) -> bool {
172    if idx % 2 == 0 {
173        start
174    } else {
175        !start
176    }
177}
178
179impl BoolArrayTrait for RunEndBoolArray {}
180
181impl VariantsVTable<RunEndBoolArray> for RunEndBoolEncoding {
182    fn as_bool_array<'a>(&self, array: &'a RunEndBoolArray) -> Option<&'a dyn BoolArrayTrait> {
183        Some(array)
184    }
185}
186
187impl ArrayTrait for RunEndBoolArray {}
188
189impl ValidityVTable<RunEndBoolArray> for RunEndBoolEncoding {
190    fn is_valid(&self, array: &RunEndBoolArray, index: usize) -> bool {
191        array.validity().is_valid(index)
192    }
193
194    fn logical_validity(&self, array: &RunEndBoolArray) -> LogicalValidity {
195        array.validity().to_logical(array.len())
196    }
197}
198
199impl IntoCanonical for RunEndBoolArray {
200    fn into_canonical(self) -> VortexResult<Canonical> {
201        let pends = self.ends().into_primitive()?;
202        decode_runend_bool(
203            &pends,
204            self.start(),
205            self.validity(),
206            self.offset(),
207            self.len(),
208        )
209        .map(Canonical::Bool)
210    }
211}
212
213impl VisitorVTable<RunEndBoolArray> for RunEndBoolEncoding {
214    fn accept(&self, array: &RunEndBoolArray, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> {
215        visitor.visit_child("ends", &array.ends())?;
216        visitor.visit_validity(&array.validity())
217    }
218}
219
220impl StatisticsVTable<RunEndBoolArray> for RunEndBoolEncoding {
221    fn compute_statistics(&self, array: &RunEndBoolArray, stat: Stat) -> VortexResult<StatsSet> {
222        let maybe_scalar: Option<Scalar> = match stat {
223            Stat::NullCount => Some(array.validity().null_count(array.len())?.into()),
224            Stat::TrueCount => {
225                let pends = array.ends().into_primitive()?;
226                let mut true_count: usize = 0;
227                let mut prev_end: usize = 0;
228                let mut include = array.start();
229                match_each_unsigned_integer_ptype!(pends.ptype(), |$P| {
230                    for end in trimmed_ends_iter(pends.maybe_null_slice::<$P>(), array.offset(), array.len()) {
231                        if include {
232                            true_count += end - prev_end;
233                        }
234                        include = !include;
235                        prev_end = end;
236                    }
237                });
238                Some((true_count as u64).into())
239            }
240            _ => None,
241        };
242        if let Some(scalar) = maybe_scalar {
243            Ok(StatsSet::of(stat, scalar))
244        } else {
245            Ok(StatsSet::default())
246        }
247    }
248}
249
250#[cfg(test)]
251#[allow(clippy::cast_possible_truncation)]
252mod test {
253    use core::iter;
254
255    use itertools::Itertools as _;
256    use rstest::rstest;
257    use vortex_array::array::{BoolArray, PrimitiveArray};
258    use vortex_array::compute::{scalar_at, slice, take};
259    use vortex_array::stats::ArrayStatistics;
260    use vortex_array::validity::Validity;
261    use vortex_array::{
262        ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoCanonical, ToArrayData,
263    };
264    use vortex_dtype::{DType, Nullability};
265
266    use crate::RunEndBoolArray;
267
268    #[test]
269    fn new() {
270        // [false, false, true, true, false]
271        let arr =
272            RunEndBoolArray::try_new(vec![2u32, 4, 5].into_array(), false, Validity::NonNullable)
273                .unwrap();
274        assert_eq!(arr.len(), 5);
275        assert_eq!(arr.dtype(), &DType::Bool(Nullability::NonNullable));
276
277        assert_eq!(scalar_at(arr.as_ref(), 0).unwrap(), false.into());
278        assert_eq!(scalar_at(arr.as_ref(), 2).unwrap(), true.into());
279        assert_eq!(scalar_at(arr.as_ref(), 4).unwrap(), false.into());
280    }
281
282    #[test]
283    fn slice_array() {
284        let arr = slice(
285            // [t, t, f, f, f, t, f, t, t, t]
286            RunEndBoolArray::try_new(
287                vec![2u32, 5, 6, 7, 10].into_array(),
288                true,
289                Validity::NonNullable,
290            )
291            .unwrap()
292            .as_ref(),
293            2,
294            8,
295        )
296        .unwrap();
297        assert_eq!(arr.dtype(), &DType::Bool(Nullability::NonNullable));
298
299        assert_eq!(
300            to_bool_vec(&arr),
301            vec![false, false, false, true, false, true],
302        );
303    }
304
305    #[test]
306    fn slice_slice_array() {
307        let raw = BoolArray::from_iter([
308            true, true, false, false, false, true, false, true, true, true,
309        ])
310        .to_array();
311        let arr = slice(&raw, 2, 8).unwrap();
312        assert_eq!(arr.dtype(), &DType::Bool(Nullability::NonNullable));
313
314        assert_eq!(
315            to_bool_vec(&arr),
316            vec![false, false, false, true, false, true],
317        );
318
319        let arr2 = slice(&arr, 3, 6).unwrap();
320        assert_eq!(to_bool_vec(&arr2), vec![true, false, true],);
321
322        let arr3 = slice(&arr2, 1, 3).unwrap();
323        assert_eq!(to_bool_vec(&arr3), vec![false, true],);
324    }
325
326    #[test]
327    fn flatten() {
328        let arr =
329            RunEndBoolArray::try_new(vec![2u32, 4, 5].into_array(), true, Validity::NonNullable)
330                .unwrap();
331
332        assert_eq!(
333            to_bool_vec(&arr.to_array()),
334            vec![true, true, false, false, true]
335        );
336    }
337
338    #[test]
339    fn take_bool() {
340        let arr = take(
341            RunEndBoolArray::try_new(
342                vec![2u32, 4, 5, 10].into_array(),
343                true,
344                Validity::NonNullable,
345            )
346            .unwrap(),
347            vec![0, 0, 6, 4].into_array(),
348        )
349        .unwrap();
350
351        assert_eq!(to_bool_vec(&arr), vec![true, true, false, true]);
352    }
353
354    fn to_bool_vec(arr: &ArrayData) -> Vec<bool> {
355        arr.clone()
356            .into_canonical()
357            .unwrap()
358            .into_bool()
359            .unwrap()
360            .boolean_buffer()
361            .iter()
362            .collect::<Vec<_>>()
363    }
364
365    #[rstest]
366    #[case(true, 1, 1)]
367    #[case(true, 1, 2)]
368    #[case(true, 2, 2)]
369    #[case(false, 1, 1)]
370    #[case(false, 1, 2)]
371    #[case(false, 2, 2)]
372    #[case(false, 3, 32)]
373    #[case(true, 3, 32)]
374    fn stats(#[case] start: bool, #[case] ends_len: usize, #[case] len: usize) {
375        use vortex_array::stats::Stat;
376
377        let ends = (1u32..(ends_len as u32))
378            .chain(iter::once(len as u32))
379            .collect_vec();
380        assert_eq!(ends.len(), ends_len);
381        assert_eq!(*ends.last().unwrap(), len as u32);
382
383        let arr =
384            RunEndBoolArray::try_new(ends.into_array(), start, Validity::NonNullable).unwrap();
385        let bools = arr.clone().into_canonical().unwrap().into_bool().unwrap();
386        for stat in [
387            Stat::IsConstant,
388            Stat::NullCount,
389            Stat::TrueCount,
390            Stat::Min,
391            Stat::Max,
392            Stat::IsSorted,
393            Stat::IsStrictSorted,
394        ] {
395            // call compute_statistics directly to avoid caching
396            let expected = bools.statistics().compute(stat).unwrap();
397            let actual = arr.statistics().compute(stat).unwrap();
398            assert_eq!(expected, actual);
399        }
400
401        assert_eq!(arr.statistics().compute_run_count(), Some(ends_len));
402    }
403
404    #[test]
405    fn sliced_true_count() {
406        let arr = RunEndBoolArray::try_new(
407            PrimitiveArray::from(vec![5u32, 7, 10]).into_array(),
408            true,
409            Validity::NonNullable,
410        )
411        .unwrap();
412        let sliced = slice(&arr, 4, 8).unwrap();
413        assert_eq!(sliced.statistics().compute_true_count().unwrap(), 2);
414    }
415}