vortex_array/arrays/bool/
array.rs1use std::ops::BitAnd;
5
6use arrow_array::BooleanArray;
7use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
8use itertools::Itertools;
9use vortex_buffer::ByteBuffer;
10use vortex_dtype::{DType, match_each_integer_ptype};
11use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
12use vortex_mask::Mask;
13
14use crate::ToCanonical;
15use crate::arrays::bool;
16use crate::patches::Patches;
17use crate::stats::ArrayStats;
18use crate::validity::Validity;
19use crate::vtable::ValidityHelper;
20
21pub trait BooleanBufferExt {
22 fn shrink_offset(self) -> Self;
24}
25
26impl BooleanBufferExt for BooleanBuffer {
27 fn shrink_offset(self) -> Self {
28 let byte_offset = self.offset() / 8;
29 let bit_offset = self.offset() % 8;
30 let len = self.len();
31 let buffer = self
32 .into_inner()
33 .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
34 BooleanBuffer::new(buffer, bit_offset, len)
35 }
36}
37
38#[derive(Clone, Debug)]
68pub struct BoolArray {
69 pub(super) dtype: DType,
70 pub(super) buffer: BooleanBuffer,
71 pub(super) validity: Validity,
72 pub(super) stats_set: ArrayStats,
73}
74
75impl BoolArray {
76 pub fn try_new(
85 buffer: ByteBuffer,
86 offset: usize,
87 len: usize,
88 validity: Validity,
89 ) -> VortexResult<Self> {
90 Self::validate(&buffer, offset, len, &validity)?;
91
92 Ok(unsafe { Self::new_unchecked(buffer, offset, len, validity) })
94 }
95
96 pub unsafe fn new_unchecked(
112 buffer: ByteBuffer,
113 offset: usize,
114 len: usize,
115 validity: Validity,
116 ) -> Self {
117 #[cfg(debug_assertions)]
118 Self::validate(&buffer, offset, len, &validity)
119 .vortex_expect("[Debug Assertion]: Invalid `BoolArray` parameters");
120
121 let buffer = BooleanBuffer::new(buffer.into_arrow_buffer(), offset, len);
122 let buffer = buffer.shrink_offset();
123 Self {
124 dtype: DType::Bool(validity.nullability()),
125 buffer,
126 validity,
127 stats_set: ArrayStats::default(),
128 }
129 }
130
131 pub fn validate(
135 buffer: &ByteBuffer,
136 offset: usize,
137 len: usize,
138 validity: &Validity,
139 ) -> VortexResult<()> {
140 vortex_ensure!(
141 offset < 8,
142 "offset must be less than whole byte, was {offset} bits"
143 );
144
145 let required_bytes = offset.saturating_add(len).div_ceil(8);
147 vortex_ensure!(
148 buffer.len() >= required_bytes,
149 "BoolArray with offset={offset} len={len} cannot be built from buffer of size {}",
150 buffer.len()
151 );
152
153 if let Some(validity_len) = validity.maybe_len() {
155 vortex_ensure!(
156 validity_len == len,
157 "BoolArray of size {len} cannot be built with validity of size {validity_len}"
158 );
159 }
160
161 Ok(())
162 }
163
164 pub fn from_bool_buffer(buffer: BooleanBuffer, validity: Validity) -> Self {
170 if let Some(validity_len) = validity.maybe_len() {
171 assert_eq!(buffer.len(), validity_len);
172 }
173
174 let buffer = buffer.shrink_offset();
176 Self {
177 dtype: DType::Bool(validity.nullability()),
178 buffer,
179 validity,
180 stats_set: ArrayStats::default(),
181 }
182 }
183
184 pub fn from_indices<I: IntoIterator<Item = usize>>(
188 length: usize,
189 indices: I,
190 validity: Validity,
191 ) -> Self {
192 let mut buffer = MutableBuffer::new_null(length);
193 let buffer_slice = buffer.as_slice_mut();
194 indices
195 .into_iter()
196 .for_each(|idx| arrow_buffer::bit_util::set_bit(buffer_slice, idx));
197 Self::from_bool_buffer(
198 BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
199 validity,
200 )
201 }
202
203 pub fn boolean_buffer(&self) -> &BooleanBuffer {
205 assert!(
206 self.buffer.offset() < 8,
207 "Offset must be <8, did we forget to call shrink_offset? Found {}",
208 self.buffer.offset()
209 );
210 &self.buffer
211 }
212
213 pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
220 let offset = self.buffer.offset();
221 let len = self.buffer.len();
222 let arrow_buffer = self.buffer.into_inner();
223 let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
224 arrow_buffer.into_mutable().unwrap_or_else(|b| {
225 let mut buf = MutableBuffer::with_capacity(b.len());
226 buf.extend_from_slice(b.as_slice());
227 buf
228 })
229 } else {
230 let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
231 buf.extend_from_slice(arrow_buffer.as_slice());
232 buf
233 };
234
235 (
236 BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
237 offset,
238 )
239 }
240
241 pub fn to_mask(&self) -> Mask {
242 self.maybe_to_mask()
243 .vortex_expect("cannot convert nullable boolean array to mask")
244 }
245
246 pub fn maybe_to_mask(&self) -> Option<Mask> {
247 self.all_valid()
248 .then(|| Mask::from_buffer(self.boolean_buffer().clone()))
249 }
250
251 pub fn to_mask_fill_null_false(&self) -> Mask {
252 if let Some(constant) = self.as_constant() {
253 let bool_constant = constant.as_bool();
254 if bool_constant.value().unwrap_or(false) {
255 return Mask::new_true(self.len());
256 } else {
257 return Mask::new_false(self.len());
258 }
259 }
260 let buffer = match self.validity_mask() {
262 Mask::AllTrue(_) => self.boolean_buffer().clone(),
263 Mask::AllFalse(_) => return Mask::new_false(self.len()),
264 Mask::Values(validity) => validity.boolean_buffer().bitand(self.boolean_buffer()),
265 };
266 Mask::from_buffer(buffer)
267 }
268
269 pub fn patch(self, patches: &Patches) -> Self {
270 let len = self.len();
271 let offset = patches.offset();
272 let indices = patches.indices().to_primitive();
273 let values = patches.values().to_bool();
274
275 let patched_validity =
276 self.validity()
277 .clone()
278 .patch(len, offset, indices.as_ref(), values.validity());
279
280 let (mut own_values, bit_offset) = self.into_boolean_builder();
281 match_each_integer_ptype!(indices.ptype(), |I| {
282 for (idx, value) in indices
283 .as_slice::<I>()
284 .iter()
285 .zip_eq(values.boolean_buffer().iter())
286 {
287 #[allow(clippy::cast_possible_truncation)]
288 own_values.set_bit(*idx as usize - offset + bit_offset, value);
289 }
290 });
291
292 Self::from_bool_buffer(own_values.finish().slice(bit_offset, len), patched_validity)
293 }
294}
295
296impl From<BooleanBuffer> for BoolArray {
297 fn from(value: BooleanBuffer) -> Self {
298 Self::from_bool_buffer(value, Validity::NonNullable)
299 }
300}
301
302impl FromIterator<bool> for BoolArray {
303 fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
304 Self::from_bool_buffer(BooleanBuffer::from_iter(iter), Validity::NonNullable)
305 }
306}
307
308impl FromIterator<Option<bool>> for BoolArray {
309 fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
310 let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
311
312 Self::from_bool_buffer(
313 buffer,
314 nulls.map(Validity::from).unwrap_or(Validity::AllValid),
315 )
316 }
317}
318
319#[cfg(test)]
320mod tests {
321 use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
322 use vortex_buffer::buffer;
323
324 use crate::arrays::{BoolArray, PrimitiveArray};
325 use crate::patches::Patches;
326 use crate::validity::Validity;
327 use crate::vtable::ValidityHelper;
328 use crate::{Array, IntoArray, ToCanonical};
329
330 #[test]
331 fn bool_array() {
332 let arr = BoolArray::from_iter([true, false, true]);
333 let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
334 assert!(scalar);
335 }
336
337 #[test]
338 fn test_all_some_iter() {
339 let arr = BoolArray::from_iter([Some(true), Some(false)]);
340
341 assert!(matches!(arr.validity(), Validity::AllValid));
342
343 let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
344 assert!(scalar);
345 let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
346 assert!(!scalar);
347 }
348
349 #[test]
350 fn test_bool_from_iter() {
351 let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
352
353 let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
354 assert!(scalar);
355
356 let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
357 assert!(scalar);
358
359 let scalar = arr.scalar_at(2);
360 assert!(scalar.is_null());
361
362 let scalar = bool::try_from(&arr.scalar_at(3)).unwrap();
363 assert!(!scalar);
364
365 let scalar = arr.scalar_at(4);
366 assert!(scalar.is_null());
367 }
368
369 #[test]
370 fn patch_sliced_bools() {
371 let arr = BoolArray::from(BooleanBuffer::new_set(12));
372 let sliced = arr.slice(4..12);
373 let (values, offset) = sliced.to_bool().into_boolean_builder();
374 assert_eq!(offset, 4);
375 assert_eq!(values.len(), 12);
376 assert_eq!(values.as_slice(), &[255, 15]);
377
378 let arr = {
379 let mut builder = BooleanBufferBuilder::new(12);
380 builder.append(false);
381 builder.append_n(11, true);
382 BoolArray::from(builder.finish())
383 };
384 let sliced = arr.slice(4..12);
385 let sliced_len = sliced.len();
386 let (values, offset) = sliced.to_bool().into_boolean_builder();
387 assert_eq!(offset, 4);
388 assert_eq!(values.as_slice(), &[254, 15]);
389
390 let patches = Patches::new(
392 arr.len(),
393 0,
394 buffer![4u32].into_array(), BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
396 None,
397 );
398 let arr = arr.patch(&patches);
399 let arr_len = arr.len();
400 let (values, offset) = arr.to_bool().into_boolean_builder();
401 assert_eq!(offset, 0);
402 assert_eq!(values.len(), arr_len + offset);
403 assert_eq!(values.as_slice(), &[238, 15]);
404
405 let (values, offset) = sliced.to_bool().into_boolean_builder();
407 assert_eq!(offset, 4);
408 assert_eq!(values.len(), sliced_len + offset);
409 assert_eq!(values.as_slice(), &[254, 15]); }
411
412 #[test]
413 fn slice_array_in_middle() {
414 let arr = BoolArray::from(BooleanBuffer::new_set(16));
415 let sliced = arr.slice(4..12);
416 let sliced_len = sliced.len();
417 let (values, offset) = sliced.to_bool().into_boolean_builder();
418 assert_eq!(offset, 4);
419 assert_eq!(values.len(), sliced_len + offset);
420 assert_eq!(values.as_slice(), &[255, 15]);
421 }
422
423 #[test]
424 #[should_panic]
425 fn patch_bools_owned() {
426 let buffer = buffer![255u8; 2];
427 let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
428 let arr = BoolArray::from_bool_buffer(buf, Validity::NonNullable);
429 let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
430
431 let patches = Patches::new(
432 arr.len(),
433 0,
434 PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
435 BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
436 None,
437 );
438 let arr = arr.patch(&patches);
439 assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
440
441 let (values, _byte_bit_offset) = arr.to_bool().into_boolean_builder();
442 assert_eq!(values.as_slice(), &[254, 127]);
443 }
444
445 #[test]
446 fn patch_sliced_bools_offset() {
447 let arr = BoolArray::from(BooleanBuffer::new_set(15));
448 let sliced = arr.slice(4..15);
449 let (values, offset) = sliced.to_bool().into_boolean_builder();
450 assert_eq!(offset, 4);
451 assert_eq!(values.as_slice(), &[255, 127]);
452 }
453}