vortex_runend_bool/
compress.rs

1use std::cmp::min;
2
3use arrow_buffer::buffer::BooleanBuffer;
4use arrow_buffer::BooleanBufferBuilder;
5use num_traits::AsPrimitive;
6use vortex_dtype::NativePType;
7use vortex_error::{vortex_panic, VortexExpect as _};
8
9use crate::value_at_index;
10
11pub fn runend_bool_encode_slice(elements: &BooleanBuffer) -> (Vec<u64>, bool) {
12    let mut iter = elements.set_slices();
13    let Some((start, end)) = iter.next() else {
14        return (vec![elements.len() as u64], false);
15    };
16
17    let mut ends = Vec::new();
18    let first_bool = start == 0;
19    if !first_bool {
20        ends.push(start as u64)
21    }
22    ends.push(end as u64);
23    for (s, e) in iter {
24        ends.push(s as u64);
25        ends.push(e as u64);
26    }
27
28    let last_end = ends.last().vortex_expect(
29        "RunEndBoolArray cannot have empty run ends (by construction); this should be impossible",
30    );
31    if *last_end != elements.len() as u64 {
32        ends.push(elements.len() as u64)
33    }
34
35    (ends, first_bool)
36}
37
38#[inline]
39pub fn trimmed_ends_iter<E: NativePType + AsPrimitive<usize> + Ord>(
40    run_ends: &[E],
41    offset: usize,
42    length: usize,
43) -> impl Iterator<Item = usize> + use<'_, E> {
44    let offset_e = E::from_usize(offset).unwrap_or_else(|| {
45        vortex_panic!(
46            "offset {} cannot be converted to {}",
47            offset,
48            std::any::type_name::<E>()
49        )
50    });
51    let length_e = E::from_usize(length).unwrap_or_else(|| {
52        vortex_panic!(
53            "length {} cannot be converted to {}",
54            length,
55            std::any::type_name::<E>()
56        )
57    });
58    run_ends
59        .iter()
60        .copied()
61        .map(move |v| v - offset_e)
62        .map(move |v| min(v, length_e))
63        .map(|v| v.as_())
64}
65
66pub fn runend_bool_decode_slice(
67    run_ends_iter: impl Iterator<Item = usize>,
68    start: bool,
69    length: usize,
70) -> BooleanBuffer {
71    let mut decoded = BooleanBufferBuilder::new(length);
72    for (idx, end) in run_ends_iter.enumerate() {
73        decoded.append_n(end - decoded.len(), value_at_index(idx, start));
74    }
75    BooleanBuffer::from(decoded)
76}
77
78#[cfg(test)]
79mod test {
80    use arrow_buffer::BooleanBuffer;
81    use itertools::Itertools;
82    use rand::prelude::StdRng;
83    use rand::{Rng, SeedableRng};
84    use vortex_array::array::{BoolArray, PrimitiveArray};
85    use vortex_array::compute::slice;
86    use vortex_array::validity::Validity;
87    use vortex_array::IntoArrayVariant;
88
89    use crate::compress::{runend_bool_decode_slice, runend_bool_encode_slice, trimmed_ends_iter};
90    use crate::decode_runend_bool;
91
92    #[test]
93    fn encode_bool() {
94        let encoded =
95            runend_bool_encode_slice(&BooleanBuffer::from([true, true, false, true].as_slice()));
96        assert_eq!(encoded, (vec![2, 3, 4], true))
97    }
98
99    #[test]
100    fn encode_bool_false_true_end() {
101        let mut input = vec![false; 66];
102        input.extend([true, true]);
103        let encoded = runend_bool_encode_slice(&BooleanBuffer::from(input));
104        assert_eq!(encoded, (vec![66, 68], false))
105    }
106
107    #[test]
108    fn encode_bool_false() {
109        let encoded =
110            runend_bool_encode_slice(&BooleanBuffer::from([false, false, true, false].as_slice()));
111        assert_eq!(encoded, (vec![2, 3, 4], false))
112    }
113
114    #[test]
115    fn encode_decode_bool() {
116        let input = [true, true, false, true, true, false];
117        let (ends, start) = runend_bool_encode_slice(&BooleanBuffer::from(input.as_slice()));
118        let ends_iter = trimmed_ends_iter(ends.as_slice(), 0, input.len());
119
120        let decoded = runend_bool_decode_slice(ends_iter, start, input.len());
121        assert_eq!(decoded, BooleanBuffer::from(input.as_slice()))
122    }
123
124    #[test]
125    fn encode_decode_bool_false_start() {
126        let input = [false, false, true, true, false, true, true, false];
127        let (ends, start) = runend_bool_encode_slice(&BooleanBuffer::from(input.as_slice()));
128        let ends_iter = trimmed_ends_iter(ends.as_slice(), 0, input.len());
129
130        let decoded = runend_bool_decode_slice(ends_iter, start, input.len());
131        assert_eq!(decoded, BooleanBuffer::from(input.as_slice()))
132    }
133
134    #[test]
135    fn encode_decode_bool_false_start_long() {
136        let mut input = vec![true; 1024];
137        input.extend([false, true, false, true].as_slice());
138        let (ends, start) = runend_bool_encode_slice(&BooleanBuffer::from(input.as_slice()));
139        let ends_iter = trimmed_ends_iter(ends.as_slice(), 0, input.len());
140
141        let decoded = runend_bool_decode_slice(ends_iter, start, input.len());
142        assert_eq!(decoded, BooleanBuffer::from(input.as_slice()))
143    }
144
145    #[test]
146    fn encode_decode_random() {
147        let mut rng = StdRng::seed_from_u64(4352);
148        let input = (0..1024 * 4).map(|_x| rng.gen::<bool>()).collect_vec();
149        let (ends, start) = runend_bool_encode_slice(&BooleanBuffer::from(input.as_slice()));
150        let ends_iter = trimmed_ends_iter(ends.as_slice(), 0, input.len());
151
152        let decoded = runend_bool_decode_slice(ends_iter, start, input.len());
153        assert_eq!(decoded, BooleanBuffer::from(input.as_slice()))
154    }
155
156    #[test]
157    fn encode_decode_offset_array() {
158        let mut rng = StdRng::seed_from_u64(39451);
159        let input = (0..1024 * 8 - 61).map(|_x| rng.gen::<bool>()).collect_vec();
160        let b = BoolArray::from_iter(input.clone());
161        let b = slice(b, 3, 1024 * 8 - 66).unwrap().into_bool().unwrap();
162        let (ends, start) = runend_bool_encode_slice(&b.boolean_buffer());
163        let ends = PrimitiveArray::from(ends);
164
165        let decoded = decode_runend_bool(&ends, start, Validity::NonNullable, 0, 1024 * 8 - 69)
166            .unwrap()
167            .into_bool()
168            .unwrap()
169            .boolean_buffer()
170            .iter()
171            .collect_vec();
172        assert_eq!(input[3..1024 * 8 - 66], decoded)
173    }
174}