Skip to main content

vortex_array/arrays/bool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5use std::fmt::Formatter;
6
7use arrow_array::BooleanArray;
8use smallvec::smallvec;
9use vortex_buffer::BitBuffer;
10use vortex_buffer::BitBufferMut;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_ensure;
14use vortex_mask::Mask;
15
16use crate::ArrayRef;
17use crate::ArraySlots;
18use crate::ExecutionCtx;
19use crate::IntoArray;
20use crate::array::Array;
21use crate::array::ArrayParts;
22use crate::array::TypedArrayRef;
23use crate::array::child_to_validity;
24use crate::array::validity_to_child;
25use crate::arrays::Bool;
26use crate::arrays::BoolArray;
27use crate::buffer::BufferHandle;
28use crate::dtype::DType;
29use crate::validity::Validity;
30
31/// The validity bitmap indicating which elements are non-null.
32pub(super) const VALIDITY_SLOT: usize = 0;
33pub(super) const NUM_SLOTS: usize = 1;
34pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["validity"];
35
36/// Inner data for a boolean array that stores true/false values in a compact bit-packed format.
37///
38/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
39/// is stored as a single bit rather than a full byte.
40///
41/// The data layout uses:
42/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
43/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
44///   indicate valid and false indicates null. if the i-th value is null in the validity child,
45///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
46/// - Bit-level slicing is supported with minimal overhead
47///
48/// # Examples
49///
50/// ```
51/// # fn main() -> vortex_error::VortexResult<()> {
52/// use vortex_array::arrays::BoolArray;
53/// use vortex_array::{IntoArray, LEGACY_SESSION, VortexSessionExecute};
54///
55/// // Create from iterator using FromIterator impl
56/// let array: BoolArray = [true, false, true, false].into_iter().collect();
57///
58/// // Slice the array
59/// let sliced = array.slice(1..3)?;
60/// assert_eq!(sliced.len(), 2);
61///
62/// // Access individual values
63/// let mut ctx = LEGACY_SESSION.create_execution_ctx();
64/// let value = array.execute_scalar(0, &mut ctx).unwrap();
65/// assert_eq!(value, true.into());
66/// # Ok(())
67/// # }
68/// ```
69#[derive(Clone, Debug)]
70pub struct BoolData {
71    pub(super) bits: BufferHandle,
72    pub(super) offset: usize,
73}
74
75impl Display for BoolData {
76    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
77        write!(f, "offset: {}", self.offset)
78    }
79}
80
81pub struct BoolDataParts {
82    pub bits: BufferHandle,
83    pub offset: usize,
84    pub len: usize,
85}
86
87pub trait BoolArrayExt: TypedArrayRef<Bool> {
88    fn nullability(&self) -> crate::dtype::Nullability {
89        match self.as_ref().dtype() {
90            DType::Bool(nullability) => *nullability,
91            _ => unreachable!("BoolArrayExt requires a bool dtype"),
92        }
93    }
94
95    fn validity(&self) -> Validity {
96        child_to_validity(
97            self.as_ref().slots()[VALIDITY_SLOT].as_ref(),
98            self.nullability(),
99        )
100    }
101
102    fn to_bit_buffer(&self) -> BitBuffer {
103        let buffer = self.bits.as_host().clone();
104        BitBuffer::new_with_offset(buffer, self.as_ref().len(), self.offset)
105    }
106
107    fn maybe_execute_mask(&self, ctx: &mut ExecutionCtx) -> VortexResult<Option<Mask>> {
108        let all_valid = match &self.validity() {
109            Validity::NonNullable | Validity::AllValid => true,
110            Validity::AllInvalid => false,
111            Validity::Array(a) => a.statistics().compute_min::<bool>(ctx).unwrap_or(false),
112        };
113        Ok(all_valid.then(|| Mask::from_buffer(self.to_bit_buffer())))
114    }
115
116    fn execute_mask(&self, ctx: &mut ExecutionCtx) -> Mask {
117        self.maybe_execute_mask(ctx)
118            .vortex_expect("failed to check validity")
119            .vortex_expect("cannot convert nullable boolean array to mask")
120    }
121
122    fn to_mask_fill_null_false(&self, ctx: &mut ExecutionCtx) -> Mask {
123        let validity_mask = self
124            .validity()
125            .execute_mask(self.as_ref().len(), ctx)
126            .vortex_expect("Failed to compute validity mask");
127        let buffer = match validity_mask {
128            Mask::AllTrue(_) => self.to_bit_buffer(),
129            Mask::AllFalse(_) => return Mask::new_false(self.as_ref().len()),
130            Mask::Values(validity) => validity.bit_buffer() & self.to_bit_buffer(),
131        };
132        Mask::from_buffer(buffer)
133    }
134}
135impl<T: TypedArrayRef<Bool>> BoolArrayExt for T {}
136
137/// Field accessors and non-consuming methods on the inner bool data.
138impl BoolData {
139    /// Splits into owned parts
140    #[inline]
141    pub fn into_parts(self, len: usize) -> BoolDataParts {
142        BoolDataParts {
143            bits: self.bits,
144            offset: self.offset,
145            len,
146        }
147    }
148
149    pub(crate) fn make_slots(validity: &Validity, len: usize) -> ArraySlots {
150        smallvec![validity_to_child(validity, len)]
151    }
152}
153
154/// Constructors and consuming methods for `BoolArray` (`Array<Bool>`).
155impl Array<Bool> {
156    /// Constructs a new `BoolArray`.
157    ///
158    /// # Panics
159    ///
160    /// Panics if the validity length is not equal to the bit buffer length.
161    pub fn new(bits: BitBuffer, validity: Validity) -> Self {
162        Self::try_new(bits, validity).vortex_expect("Failed to create BoolArray")
163    }
164
165    /// Constructs a new `BoolArray` from a `BufferHandle`.
166    ///
167    /// # Panics
168    ///
169    /// Panics if the validity length is not equal to the bit buffer length.
170    pub fn new_handle(handle: BufferHandle, offset: usize, len: usize, validity: Validity) -> Self {
171        Self::try_new_from_handle(handle, offset, len, validity)
172            .vortex_expect("Failed to create BoolArray from BufferHandle")
173    }
174
175    /// Constructs a new `BoolArray`.
176    ///
177    /// # Errors
178    ///
179    /// Returns an error if the provided components do not satisfy the invariants.
180    pub fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
181        let dtype = DType::Bool(validity.nullability());
182        let len = bits.len();
183        let slots = BoolData::make_slots(&validity, len);
184        let data = BoolData::try_new(bits, validity)?;
185        Ok(unsafe {
186            Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
187        })
188    }
189
190    /// Build a new bool array from a `BufferHandle`, returning an error if the offset is
191    /// too large or the buffer is not large enough to hold the values.
192    pub fn try_new_from_handle(
193        bits: BufferHandle,
194        offset: usize,
195        len: usize,
196        validity: Validity,
197    ) -> VortexResult<Self> {
198        let dtype = DType::Bool(validity.nullability());
199        let slots = BoolData::make_slots(&validity, len);
200        let data = BoolData::try_new_from_handle(bits, offset, len, validity)?;
201        Ok(unsafe {
202            Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
203        })
204    }
205
206    /// Creates a new [`BoolArray`] without validation.
207    ///
208    /// # Safety
209    ///
210    /// The caller must ensure that the validity length is equal to the bit buffer length.
211    pub unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
212        let dtype = DType::Bool(validity.nullability());
213        let len = bits.len();
214        let slots = BoolData::make_slots(&validity, len);
215        // SAFETY: caller guarantees validity length equals bit buffer length.
216        let data = unsafe { BoolData::new_unchecked(bits, validity) };
217        unsafe {
218            Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
219        }
220    }
221
222    /// Validates the components that would be used to create a [`BoolArray`].
223    pub fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
224        BoolData::validate(bits, validity)
225    }
226
227    /// Create a new BoolArray from a set of indices and a length.
228    ///
229    /// All indices must be less than the length.
230    pub fn from_indices<I: IntoIterator<Item = usize>>(
231        length: usize,
232        indices: I,
233        validity: Validity,
234    ) -> Self {
235        let mut buffer = BitBufferMut::new_unset(length);
236        indices.into_iter().for_each(|idx| buffer.set(idx));
237        Self::new(buffer.freeze(), validity)
238    }
239
240    /// Returns the underlying [`BitBuffer`] of the array, consuming self.
241    pub fn into_bit_buffer(self) -> BitBuffer {
242        let len = self.len();
243        let data = self.into_data();
244        let buffer = data.bits.unwrap_host();
245        BitBuffer::new_with_offset(buffer, len, data.offset)
246    }
247}
248
249/// Internal constructors on BoolData (used by Array<Bool> constructors and VTable::build).
250impl BoolData {
251    pub(super) fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
252        let bits = bits.shrink_offset();
253        Self::validate(&bits, &validity)?;
254
255        let (offset, _len, buffer) = bits.into_inner();
256
257        Ok(Self {
258            bits: BufferHandle::new_host(buffer),
259            offset,
260        })
261    }
262
263    pub(super) fn try_new_from_handle(
264        bits: BufferHandle,
265        offset: usize,
266        len: usize,
267        validity: Validity,
268    ) -> VortexResult<Self> {
269        vortex_ensure!(offset < 8, "BitBuffer offset must be <8, got {}", offset);
270        if let Some(validity_len) = validity.maybe_len() {
271            vortex_ensure!(
272                validity_len == len,
273                "BoolArray of size {} cannot be built with validity of size {validity_len}",
274                len,
275            );
276        }
277
278        vortex_ensure!(
279            bits.len() * 8 >= (len + offset),
280            "provided BufferHandle with offset {offset} len {len} had size {} bits",
281            bits.len() * 8,
282        );
283
284        Ok(Self { bits, offset })
285    }
286
287    pub(super) unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
288        if cfg!(debug_assertions) {
289            Self::try_new(bits, validity).vortex_expect("Failed to create BoolData")
290        } else {
291            let (offset, _len, buffer) = bits.into_inner();
292
293            Self {
294                bits: BufferHandle::new_host(buffer),
295                offset,
296            }
297        }
298    }
299
300    pub(super) fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
301        vortex_ensure!(
302            bits.offset() < 8,
303            "BitBuffer offset must be <8, got {}",
304            bits.offset()
305        );
306
307        if let Some(validity_len) = validity.maybe_len() {
308            vortex_ensure!(
309                validity_len == bits.len(),
310                "BoolArray of size {} cannot be built with validity of size {validity_len}",
311                bits.len()
312            );
313        }
314
315        Ok(())
316    }
317}
318
319impl From<BitBuffer> for BoolArray {
320    fn from(value: BitBuffer) -> Self {
321        BoolArray::new(value, Validity::NonNullable)
322    }
323}
324
325impl FromIterator<bool> for BoolArray {
326    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
327        BoolArray::from(BitBuffer::from_iter(iter))
328    }
329}
330
331impl FromIterator<Option<bool>> for BoolArray {
332    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
333        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
334
335        BoolArray::new(
336            BitBuffer::from(buffer),
337            nulls
338                .map(|n| Validity::from(BitBuffer::from(n.into_inner())))
339                .unwrap_or(Validity::AllValid),
340        )
341    }
342}
343
344impl IntoArray for BitBuffer {
345    fn into_array(self) -> ArrayRef {
346        BoolArray::new(self, Validity::NonNullable).into_array()
347    }
348}
349
350impl IntoArray for BitBufferMut {
351    fn into_array(self) -> ArrayRef {
352        self.freeze().into_array()
353    }
354}
355
356#[cfg(test)]
357mod tests {
358    use std::iter::once;
359    use std::iter::repeat_n;
360
361    use vortex_buffer::BitBuffer;
362    use vortex_buffer::BitBufferMut;
363    use vortex_buffer::buffer;
364
365    use crate::IntoArray;
366    use crate::LEGACY_SESSION;
367    use crate::VortexSessionExecute;
368    use crate::arrays::BoolArray;
369    use crate::arrays::PrimitiveArray;
370    use crate::arrays::bool::BoolArrayExt;
371    use crate::assert_arrays_eq;
372    use crate::patches::Patches;
373    use crate::validity::Validity;
374
375    #[test]
376    fn bool_array() {
377        let arr = BoolArray::from_iter([true, false, true]);
378        let scalar = bool::try_from(
379            &arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
380                .unwrap(),
381        )
382        .unwrap();
383        assert!(scalar);
384    }
385
386    #[test]
387    fn test_all_some_iter() {
388        let arr = BoolArray::from_iter([Some(true), Some(false)]);
389
390        assert!(matches!(arr.validity(), Ok(Validity::AllValid)));
391
392        let scalar = bool::try_from(
393            &arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
394                .unwrap(),
395        )
396        .unwrap();
397        assert!(scalar);
398        let scalar = bool::try_from(
399            &arr.execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
400                .unwrap(),
401        )
402        .unwrap();
403        assert!(!scalar);
404    }
405
406    #[test]
407    fn test_bool_from_iter() {
408        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
409
410        let scalar = bool::try_from(
411            &arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
412                .unwrap(),
413        )
414        .unwrap();
415        assert!(scalar);
416
417        let scalar = bool::try_from(
418            &arr.execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
419                .unwrap(),
420        )
421        .unwrap();
422        assert!(scalar);
423
424        let scalar = arr
425            .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
426            .unwrap();
427        assert!(scalar.is_null());
428
429        let scalar = bool::try_from(
430            &arr.execute_scalar(3, &mut LEGACY_SESSION.create_execution_ctx())
431                .unwrap(),
432        )
433        .unwrap();
434        assert!(!scalar);
435
436        let scalar = arr
437            .execute_scalar(4, &mut LEGACY_SESSION.create_execution_ctx())
438            .unwrap();
439        assert!(scalar.is_null());
440    }
441
442    #[test]
443    fn patch_sliced_bools() {
444        let arr = BoolArray::from(BitBuffer::new_set(12));
445        let sliced = arr.slice(4..12).unwrap();
446        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
447
448        let arr = {
449            let mut builder = BitBufferMut::new_unset(12);
450            (1..12).for_each(|i| builder.set(i));
451            BoolArray::from(builder.freeze())
452        };
453        let sliced = arr.slice(4..12).unwrap();
454        let expected_slice: Vec<bool> = (4..12).map(|i| (1..12).contains(&i)).collect();
455        assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice.clone()));
456
457        // patch the underlying array at index 4 to false
458        let patches = Patches::new(
459            arr.len(),
460            0,
461            buffer![4u32].into_array(),
462            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
463            None,
464        )
465        .unwrap();
466        let arr = arr
467            .patch(&patches, &mut LEGACY_SESSION.create_execution_ctx())
468            .unwrap();
469        // After patching index 4 to false: indices 1-3 and 5-11 are true, index 0 and 4 are false
470        let expected_patched: Vec<bool> = (0..12).map(|i| (1..12).contains(&i) && i != 4).collect();
471        assert_arrays_eq!(arr, BoolArray::from_iter(expected_patched));
472
473        // the slice should be unchanged (still has original values before patch)
474        assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice));
475    }
476
477    #[test]
478    fn slice_array_in_middle() {
479        let arr = BoolArray::from(BitBuffer::new_set(16));
480        let sliced = arr.slice(4..12).unwrap();
481        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
482    }
483
484    #[test]
485    fn patch_bools_owned() {
486        let arr = BoolArray::from(BitBuffer::new_set(16));
487        let buf_ptr = arr.to_bit_buffer().inner().as_ptr();
488
489        let patches = Patches::new(
490            arr.len(),
491            0,
492            PrimitiveArray::new(buffer![0u32], Validity::NonNullable).into_array(),
493            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
494            None,
495        )
496        .unwrap();
497        let arr = arr
498            .patch(&patches, &mut LEGACY_SESSION.create_execution_ctx())
499            .unwrap();
500        // Verify buffer was reused in place
501        assert_eq!(arr.to_bit_buffer().inner().as_ptr(), buf_ptr);
502
503        // After patching index 0 to false: [false, true, true, ..., true] (16 values)
504        let expected: BoolArray = once(false).chain(repeat_n(true, 15)).collect();
505        assert_arrays_eq!(arr, expected);
506    }
507
508    #[test]
509    fn patch_sliced_bools_offset() {
510        let arr = BoolArray::from(BitBuffer::new_set(15));
511        let sliced = arr.slice(4..15).unwrap();
512        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 11]));
513    }
514}