1use std::cmp::min;
2
3use arrow_buffer::buffer::BooleanBuffer;
4use arrow_buffer::BooleanBufferBuilder;
5use num_traits::AsPrimitive;
6use vortex_dtype::NativePType;
7use vortex_error::{vortex_panic, VortexExpect as _};
8
9use crate::value_at_index;
10
11pub fn runend_bool_encode_slice(elements: &BooleanBuffer) -> (Vec<u64>, bool) {
12 let mut iter = elements.set_slices();
13 let Some((start, end)) = iter.next() else {
14 return (vec![elements.len() as u64], false);
15 };
16
17 let mut ends = Vec::new();
18 let first_bool = start == 0;
19 if !first_bool {
20 ends.push(start as u64)
21 }
22 ends.push(end as u64);
23 for (s, e) in iter {
24 ends.push(s as u64);
25 ends.push(e as u64);
26 }
27
28 let last_end = ends.last().vortex_expect(
29 "RunEndBoolArray cannot have empty run ends (by construction); this should be impossible",
30 );
31 if *last_end != elements.len() as u64 {
32 ends.push(elements.len() as u64)
33 }
34
35 (ends, first_bool)
36}
37
38#[inline]
39pub fn trimmed_ends_iter<E: NativePType + AsPrimitive<usize> + Ord>(
40 run_ends: &[E],
41 offset: usize,
42 length: usize,
43) -> impl Iterator<Item = usize> + use<'_, E> {
44 let offset_e = E::from_usize(offset).unwrap_or_else(|| {
45 vortex_panic!(
46 "offset {} cannot be converted to {}",
47 offset,
48 std::any::type_name::<E>()
49 )
50 });
51 let length_e = E::from_usize(length).unwrap_or_else(|| {
52 vortex_panic!(
53 "length {} cannot be converted to {}",
54 length,
55 std::any::type_name::<E>()
56 )
57 });
58 run_ends
59 .iter()
60 .copied()
61 .map(move |v| v - offset_e)
62 .map(move |v| min(v, length_e))
63 .map(|v| v.as_())
64}
65
66pub fn runend_bool_decode_slice(
67 run_ends_iter: impl Iterator<Item = usize>,
68 start: bool,
69 length: usize,
70) -> BooleanBuffer {
71 let mut decoded = BooleanBufferBuilder::new(length);
72 for (idx, end) in run_ends_iter.enumerate() {
73 decoded.append_n(end - decoded.len(), value_at_index(idx, start));
74 }
75 BooleanBuffer::from(decoded)
76}
77
78#[cfg(test)]
79mod test {
80 use arrow_buffer::BooleanBuffer;
81 use itertools::Itertools;
82 use rand::prelude::StdRng;
83 use rand::{Rng, SeedableRng};
84 use vortex_array::array::{BoolArray, PrimitiveArray};
85 use vortex_array::compute::slice;
86 use vortex_array::validity::Validity;
87 use vortex_array::IntoArrayVariant;
88
89 use crate::compress::{runend_bool_decode_slice, runend_bool_encode_slice, trimmed_ends_iter};
90 use crate::decode_runend_bool;
91
92 #[test]
93 fn encode_bool() {
94 let encoded =
95 runend_bool_encode_slice(&BooleanBuffer::from([true, true, false, true].as_slice()));
96 assert_eq!(encoded, (vec![2, 3, 4], true))
97 }
98
99 #[test]
100 fn encode_bool_false_true_end() {
101 let mut input = vec![false; 66];
102 input.extend([true, true]);
103 let encoded = runend_bool_encode_slice(&BooleanBuffer::from(input));
104 assert_eq!(encoded, (vec![66, 68], false))
105 }
106
107 #[test]
108 fn encode_bool_false() {
109 let encoded =
110 runend_bool_encode_slice(&BooleanBuffer::from([false, false, true, false].as_slice()));
111 assert_eq!(encoded, (vec![2, 3, 4], false))
112 }
113
114 #[test]
115 fn encode_decode_bool() {
116 let input = [true, true, false, true, true, false];
117 let (ends, start) = runend_bool_encode_slice(&BooleanBuffer::from(input.as_slice()));
118 let ends_iter = trimmed_ends_iter(ends.as_slice(), 0, input.len());
119
120 let decoded = runend_bool_decode_slice(ends_iter, start, input.len());
121 assert_eq!(decoded, BooleanBuffer::from(input.as_slice()))
122 }
123
124 #[test]
125 fn encode_decode_bool_false_start() {
126 let input = [false, false, true, true, false, true, true, false];
127 let (ends, start) = runend_bool_encode_slice(&BooleanBuffer::from(input.as_slice()));
128 let ends_iter = trimmed_ends_iter(ends.as_slice(), 0, input.len());
129
130 let decoded = runend_bool_decode_slice(ends_iter, start, input.len());
131 assert_eq!(decoded, BooleanBuffer::from(input.as_slice()))
132 }
133
134 #[test]
135 fn encode_decode_bool_false_start_long() {
136 let mut input = vec![true; 1024];
137 input.extend([false, true, false, true].as_slice());
138 let (ends, start) = runend_bool_encode_slice(&BooleanBuffer::from(input.as_slice()));
139 let ends_iter = trimmed_ends_iter(ends.as_slice(), 0, input.len());
140
141 let decoded = runend_bool_decode_slice(ends_iter, start, input.len());
142 assert_eq!(decoded, BooleanBuffer::from(input.as_slice()))
143 }
144
145 #[test]
146 fn encode_decode_random() {
147 let mut rng = StdRng::seed_from_u64(4352);
148 let input = (0..1024 * 4).map(|_x| rng.gen::<bool>()).collect_vec();
149 let (ends, start) = runend_bool_encode_slice(&BooleanBuffer::from(input.as_slice()));
150 let ends_iter = trimmed_ends_iter(ends.as_slice(), 0, input.len());
151
152 let decoded = runend_bool_decode_slice(ends_iter, start, input.len());
153 assert_eq!(decoded, BooleanBuffer::from(input.as_slice()))
154 }
155
156 #[test]
157 fn encode_decode_offset_array() {
158 let mut rng = StdRng::seed_from_u64(39451);
159 let input = (0..1024 * 8 - 61).map(|_x| rng.gen::<bool>()).collect_vec();
160 let b = BoolArray::from_iter(input.clone());
161 let b = slice(b, 3, 1024 * 8 - 66).unwrap().into_bool().unwrap();
162 let (ends, start) = runend_bool_encode_slice(&b.boolean_buffer());
163 let ends = PrimitiveArray::from(ends);
164
165 let decoded = decode_runend_bool(&ends, start, Validity::NonNullable, 0, 1024 * 8 - 69)
166 .unwrap()
167 .into_bool()
168 .unwrap()
169 .boolean_buffer()
170 .iter()
171 .collect_vec();
172 assert_eq!(input[3..1024 * 8 - 66], decoded)
173 }
174}