1use itertools::Itertools;
10use vortex_array::ArrayRef;
11use vortex_array::ExecutionCtx;
12use vortex_array::IntoArray;
13use vortex_array::arrays::BoolArray;
14use vortex_array::arrays::ConstantArray;
15use vortex_array::arrays::PrimitiveArray;
16use vortex_array::arrays::bool::BoolArrayExt;
17use vortex_array::dtype::DType;
18use vortex_array::dtype::Nullability;
19use vortex_array::match_each_unsigned_integer_ptype;
20use vortex_array::scalar::Scalar;
21use vortex_array::validity::Validity;
22use vortex_buffer::BitBuffer;
23use vortex_buffer::BitBufferMut;
24use vortex_error::VortexResult;
25use vortex_mask::Mask;
26
27use crate::iter::trimmed_ends_iter;
28
29const PREFILL_RUN_THRESHOLD: usize = 32;
32
33pub fn runend_decode_bools(
35 ends: PrimitiveArray,
36 values: BoolArray,
37 offset: usize,
38 length: usize,
39 ctx: &mut ExecutionCtx,
40) -> VortexResult<ArrayRef> {
41 let validity = values
42 .as_ref()
43 .validity()?
44 .execute_mask(values.as_ref().len(), ctx)?;
45 let values_buf = values.to_bit_buffer();
46 let nullability = values.dtype().nullability();
47
48 let num_runs = values_buf.len();
50 if offset == 0 && num_runs < PREFILL_RUN_THRESHOLD {
51 return Ok(match_each_unsigned_integer_ptype!(ends.ptype(), |E| {
52 decode_few_runs_no_offset(
53 ends.as_slice::<E>(),
54 &values_buf,
55 validity,
56 nullability,
57 length,
58 )
59 }));
60 }
61
62 Ok(match_each_unsigned_integer_ptype!(ends.ptype(), |E| {
63 runend_decode_typed_bool(
64 trimmed_ends_iter(ends.as_slice::<E>(), offset, length),
65 &values_buf,
66 validity,
67 nullability,
68 length,
69 )
70 }))
71}
72
73pub fn runend_decode_typed_bool(
81 run_ends: impl Iterator<Item = usize>,
82 values: &BitBuffer,
83 values_validity: Mask,
84 values_nullability: Nullability,
85 length: usize,
86) -> ArrayRef {
87 match values_validity {
88 Mask::AllTrue(_) => {
89 decode_bool_non_nullable(run_ends, values, values_nullability, length).into_array()
90 }
91 Mask::AllFalse(_) => {
92 ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), length)
93 .into_array()
94 }
95 Mask::Values(mask) => {
96 decode_bool_nullable(run_ends, values, mask.bit_buffer(), length).into_array()
97 }
98 }
99}
100
101#[inline(always)]
105fn decode_few_runs_no_offset<E: vortex_array::dtype::IntegerPType>(
106 ends: &[E],
107 values: &BitBuffer,
108 validity: Mask,
109 nullability: Nullability,
110 length: usize,
111) -> ArrayRef {
112 match validity {
113 Mask::AllTrue(_) => {
114 let mut decoded = BitBufferMut::with_capacity(length);
115 let mut prev_end = 0usize;
116 for (i, &end) in ends.iter().enumerate() {
117 let end = end.as_().min(length);
118 decoded.append_n(values.value(i), end - prev_end);
119 prev_end = end;
120 }
121 BoolArray::new(decoded.freeze(), nullability.into()).into_array()
122 }
123 Mask::AllFalse(_) => {
124 ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), length)
125 .into_array()
126 }
127 Mask::Values(mask) => {
128 let validity_buf = mask.bit_buffer();
129 let mut decoded = BitBufferMut::with_capacity(length);
130 let mut decoded_validity = BitBufferMut::with_capacity(length);
131 let mut prev_end = 0usize;
132 for (i, &end) in ends.iter().enumerate() {
133 let end = end.as_().min(length);
134 let run_len = end - prev_end;
135 let is_valid = validity_buf.value(i);
136 if is_valid {
137 decoded_validity.append_n(true, run_len);
138 decoded.append_n(values.value(i), run_len);
139 } else {
140 decoded_validity.append_n(false, run_len);
141 decoded.append_n(false, run_len);
142 }
143 prev_end = end;
144 }
145 BoolArray::new(decoded.freeze(), Validity::from(decoded_validity.freeze())).into_array()
146 }
147 }
148}
149
150fn decode_bool_non_nullable(
152 run_ends: impl Iterator<Item = usize>,
153 values: &BitBuffer,
154 nullability: Nullability,
155 length: usize,
156) -> BoolArray {
157 let num_runs = values.len();
158
159 if num_runs < PREFILL_RUN_THRESHOLD {
161 let mut decoded = BitBufferMut::with_capacity(length);
162 for (end, value) in run_ends.zip(values.iter()) {
163 decoded.append_n(value, end - decoded.len());
164 }
165 return BoolArray::new(decoded.freeze(), nullability.into());
166 }
167
168 let prefill = values.true_count() > num_runs - values.true_count();
170 let mut decoded = BitBufferMut::full(prefill, length);
171 let mut current_pos = 0usize;
172
173 for (end, value) in run_ends.zip_eq(values.iter()) {
174 if end > current_pos && value != prefill {
175 unsafe { decoded.fill_range_unchecked(current_pos, end, value) };
177 }
178 current_pos = end;
179 }
180 BoolArray::new(decoded.freeze(), nullability.into())
181}
182
183fn decode_bool_nullable(
185 run_ends: impl Iterator<Item = usize>,
186 values: &BitBuffer,
187 validity_mask: &BitBuffer,
188 length: usize,
189) -> BoolArray {
190 let num_runs = values.len();
191
192 if num_runs < PREFILL_RUN_THRESHOLD {
194 return decode_nullable_sequential(run_ends, values, validity_mask, length);
195 }
196
197 let prefill_decoded = values.true_count() > num_runs - values.true_count();
199 let prefill_valid = validity_mask.true_count() > num_runs - validity_mask.true_count();
200
201 let mut decoded = BitBufferMut::full(prefill_decoded, length);
202 let mut decoded_validity = BitBufferMut::full(prefill_valid, length);
203 let mut current_pos = 0usize;
204
205 for (end, (value, is_valid)) in run_ends.zip_eq(values.iter().zip(validity_mask.iter())) {
206 if end > current_pos {
207 if is_valid != prefill_valid {
209 unsafe { decoded_validity.fill_range_unchecked(current_pos, end, is_valid) };
210 }
211 let want_decoded = is_valid && value;
213 if want_decoded != prefill_decoded {
214 unsafe { decoded.fill_range_unchecked(current_pos, end, want_decoded) };
215 }
216 current_pos = end;
217 }
218 }
219 BoolArray::new(decoded.freeze(), Validity::from(decoded_validity.freeze()))
220}
221
222#[inline(always)]
224fn decode_nullable_sequential(
225 run_ends: impl Iterator<Item = usize>,
226 values: &BitBuffer,
227 validity_mask: &BitBuffer,
228 length: usize,
229) -> BoolArray {
230 let mut decoded = BitBufferMut::with_capacity(length);
231 let mut decoded_validity = BitBufferMut::with_capacity(length);
232
233 for (end, (value, is_valid)) in run_ends.zip(values.iter().zip(validity_mask.iter())) {
234 let run_len = end - decoded.len();
235 if is_valid {
236 decoded_validity.append_n(true, run_len);
237 decoded.append_n(value, run_len);
238 } else {
239 decoded_validity.append_n(false, run_len);
240 decoded.append_n(false, run_len);
241 }
242 }
243
244 BoolArray::new(decoded.freeze(), Validity::from(decoded_validity.freeze()))
245}
246
247#[cfg(test)]
248mod tests {
249 use vortex_array::LEGACY_SESSION;
250 use vortex_array::VortexSessionExecute;
251 use vortex_array::arrays::BoolArray;
252 use vortex_array::arrays::PrimitiveArray;
253 use vortex_array::arrays::bool::BoolArrayExt;
254 use vortex_array::assert_arrays_eq;
255 use vortex_array::validity::Validity;
256 use vortex_buffer::BitBuffer;
257 use vortex_error::VortexResult;
258
259 use super::runend_decode_bools;
260
261 #[test]
262 fn decode_bools_alternating() -> VortexResult<()> {
263 let mut ctx = LEGACY_SESSION.create_execution_ctx();
264 let ends = PrimitiveArray::from_iter([2u32, 5, 10]);
266 let values = BoolArray::from(BitBuffer::from(vec![true, false, true]));
267 let decoded = runend_decode_bools(ends, values, 0, 10, &mut ctx)?;
268
269 let expected = BoolArray::from(BitBuffer::from(vec![
270 true, true, false, false, false, true, true, true, true, true,
271 ]));
272 assert_arrays_eq!(decoded, expected);
273 Ok(())
274 }
275
276 #[test]
277 fn decode_bools_mostly_true() -> VortexResult<()> {
278 let mut ctx = LEGACY_SESSION.create_execution_ctx();
279 let ends = PrimitiveArray::from_iter([5u32, 6, 10]);
281 let values = BoolArray::from(BitBuffer::from(vec![true, false, true]));
282 let decoded = runend_decode_bools(ends, values, 0, 10, &mut ctx)?;
283
284 let expected = BoolArray::from(BitBuffer::from(vec![
285 true, true, true, true, true, false, true, true, true, true,
286 ]));
287 assert_arrays_eq!(decoded, expected);
288 Ok(())
289 }
290
291 #[test]
292 fn decode_bools_mostly_false() -> VortexResult<()> {
293 let mut ctx = LEGACY_SESSION.create_execution_ctx();
294 let ends = PrimitiveArray::from_iter([5u32, 6, 10]);
296 let values = BoolArray::from(BitBuffer::from(vec![false, true, false]));
297 let decoded = runend_decode_bools(ends, values, 0, 10, &mut ctx)?;
298
299 let expected = BoolArray::from(BitBuffer::from(vec![
300 false, false, false, false, false, true, false, false, false, false,
301 ]));
302 assert_arrays_eq!(decoded, expected);
303 Ok(())
304 }
305
306 #[test]
307 fn decode_bools_all_true_single_run() -> VortexResult<()> {
308 let mut ctx = LEGACY_SESSION.create_execution_ctx();
309 let ends = PrimitiveArray::from_iter([10u32]);
310 let values = BoolArray::from(BitBuffer::from(vec![true]));
311 let decoded = runend_decode_bools(ends, values, 0, 10, &mut ctx)?;
312
313 let expected = BoolArray::from(BitBuffer::from(vec![
314 true, true, true, true, true, true, true, true, true, true,
315 ]));
316 assert_arrays_eq!(decoded, expected);
317 Ok(())
318 }
319
320 #[test]
321 fn decode_bools_all_false_single_run() -> VortexResult<()> {
322 let mut ctx = LEGACY_SESSION.create_execution_ctx();
323 let ends = PrimitiveArray::from_iter([10u32]);
324 let values = BoolArray::from(BitBuffer::from(vec![false]));
325 let decoded = runend_decode_bools(ends, values, 0, 10, &mut ctx)?;
326
327 let expected = BoolArray::from(BitBuffer::from(vec![
328 false, false, false, false, false, false, false, false, false, false,
329 ]));
330 assert_arrays_eq!(decoded, expected);
331 Ok(())
332 }
333
334 #[test]
335 fn decode_bools_with_offset() -> VortexResult<()> {
336 let mut ctx = LEGACY_SESSION.create_execution_ctx();
337 let ends = PrimitiveArray::from_iter([2u32, 5, 10]);
339 let values = BoolArray::from(BitBuffer::from(vec![true, false, true]));
340 let decoded = runend_decode_bools(ends, values, 2, 6, &mut ctx)?;
341
342 let expected =
343 BoolArray::from(BitBuffer::from(vec![false, false, false, true, true, true]));
344 assert_arrays_eq!(decoded, expected);
345 Ok(())
346 }
347
348 #[test]
349 fn decode_bools_nullable() -> VortexResult<()> {
350 use vortex_array::validity::Validity;
351
352 let mut ctx = LEGACY_SESSION.create_execution_ctx();
353 let ends = PrimitiveArray::from_iter([2u32, 5, 10]);
355 let values = BoolArray::new(
356 BitBuffer::from(vec![true, false, true]),
357 Validity::from(BitBuffer::from(vec![true, false, true])),
358 );
359 let decoded = runend_decode_bools(ends, values, 0, 10, &mut ctx)?;
360
361 let expected = BoolArray::new(
363 BitBuffer::from(vec![
364 true, true, false, false, false, true, true, true, true, true,
365 ]),
366 Validity::from(BitBuffer::from(vec![
367 true, true, false, false, false, true, true, true, true, true,
368 ])),
369 );
370 assert_arrays_eq!(decoded, expected);
371 Ok(())
372 }
373
374 #[test]
375 fn decode_bools_nullable_few_runs() -> VortexResult<()> {
376 let mut ctx = LEGACY_SESSION.create_execution_ctx();
377 let ends = PrimitiveArray::from_iter([2000u32, 4000, 6000, 8000, 10000]);
379 let values = BoolArray::new(
380 BitBuffer::from(vec![true, false, true, false, true]),
381 Validity::from(BitBuffer::from(vec![true, false, true, false, true])),
382 );
383 let decoded = runend_decode_bools(ends, values, 0, 10000, &mut ctx)?
384 .execute::<BoolArray>(&mut ctx)?;
385
386 assert_eq!(decoded.len(), 10000);
388 assert!(
390 decoded
391 .as_ref()
392 .validity()?
393 .execute_mask(decoded.as_ref().len(), &mut ctx)?
394 .value(0)
395 );
396 assert!(decoded.to_bit_buffer().value(0));
397 assert!(
399 !decoded
400 .as_ref()
401 .validity()?
402 .execute_mask(decoded.as_ref().len(), &mut ctx)?
403 .value(2000)
404 );
405 assert!(
407 decoded
408 .as_ref()
409 .validity()?
410 .execute_mask(decoded.as_ref().len(), &mut ctx)?
411 .value(4000)
412 );
413 assert!(decoded.to_bit_buffer().value(4000));
414 Ok(())
415 }
416}