Skip to main content

vortex_array/arrays/bool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5use std::fmt::Formatter;
6
7use arrow_array::BooleanArray;
8use vortex_buffer::BitBuffer;
9use vortex_buffer::BitBufferMut;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_mask::Mask;
14
15use crate::ArrayRef;
16use crate::ExecutionCtx;
17use crate::IntoArray;
18use crate::array::Array;
19use crate::array::ArrayParts;
20use crate::array::TypedArrayRef;
21use crate::array::child_to_validity;
22use crate::array::validity_to_child;
23use crate::arrays::Bool;
24use crate::arrays::BoolArray;
25use crate::buffer::BufferHandle;
26use crate::dtype::DType;
27use crate::validity::Validity;
28
29/// The validity bitmap indicating which elements are non-null.
30pub(super) const VALIDITY_SLOT: usize = 0;
31pub(super) const NUM_SLOTS: usize = 1;
32pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["validity"];
33
34/// Inner data for a boolean array that stores true/false values in a compact bit-packed format.
35///
36/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
37/// is stored as a single bit rather than a full byte.
38///
39/// The data layout uses:
40/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
41/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
42///   indicate valid and false indicates null. if the i-th value is null in the validity child,
43///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
44/// - Bit-level slicing is supported with minimal overhead
45///
46/// # Examples
47///
48/// ```
49/// # fn main() -> vortex_error::VortexResult<()> {
50/// use vortex_array::arrays::BoolArray;
51/// use vortex_array::{IntoArray, LEGACY_SESSION, VortexSessionExecute};
52///
53/// // Create from iterator using FromIterator impl
54/// let array: BoolArray = [true, false, true, false].into_iter().collect();
55///
56/// // Slice the array
57/// let sliced = array.slice(1..3)?;
58/// assert_eq!(sliced.len(), 2);
59///
60/// // Access individual values
61/// let mut ctx = LEGACY_SESSION.create_execution_ctx();
62/// let value = array.execute_scalar(0, &mut ctx).unwrap();
63/// assert_eq!(value, true.into());
64/// # Ok(())
65/// # }
66/// ```
67#[derive(Clone, Debug)]
68pub struct BoolData {
69    pub(super) bits: BufferHandle,
70    pub(super) offset: usize,
71}
72
73impl Display for BoolData {
74    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
75        write!(f, "offset: {}", self.offset)
76    }
77}
78
79pub struct BoolDataParts {
80    pub bits: BufferHandle,
81    pub offset: usize,
82    pub len: usize,
83}
84
85pub trait BoolArrayExt: TypedArrayRef<Bool> {
86    fn nullability(&self) -> crate::dtype::Nullability {
87        match self.as_ref().dtype() {
88            DType::Bool(nullability) => *nullability,
89            _ => unreachable!("BoolArrayExt requires a bool dtype"),
90        }
91    }
92
93    fn validity(&self) -> Validity {
94        child_to_validity(
95            self.as_ref().slots()[VALIDITY_SLOT].as_ref(),
96            self.nullability(),
97        )
98    }
99
100    fn to_bit_buffer(&self) -> BitBuffer {
101        let buffer = self.bits.as_host().clone();
102        BitBuffer::new_with_offset(buffer, self.as_ref().len(), self.offset)
103    }
104
105    fn maybe_execute_mask(&self, ctx: &mut ExecutionCtx) -> VortexResult<Option<Mask>> {
106        let all_valid = match &self.validity() {
107            Validity::NonNullable | Validity::AllValid => true,
108            Validity::AllInvalid => false,
109            Validity::Array(a) => a.statistics().compute_min::<bool>(ctx).unwrap_or(false),
110        };
111        Ok(all_valid.then(|| Mask::from_buffer(self.to_bit_buffer())))
112    }
113
114    fn execute_mask(&self, ctx: &mut ExecutionCtx) -> Mask {
115        self.maybe_execute_mask(ctx)
116            .vortex_expect("failed to check validity")
117            .vortex_expect("cannot convert nullable boolean array to mask")
118    }
119
120    fn to_mask_fill_null_false(&self, ctx: &mut ExecutionCtx) -> Mask {
121        let validity_mask = self
122            .validity()
123            .execute_mask(self.as_ref().len(), ctx)
124            .vortex_expect("Failed to compute validity mask");
125        let buffer = match validity_mask {
126            Mask::AllTrue(_) => self.to_bit_buffer(),
127            Mask::AllFalse(_) => return Mask::new_false(self.as_ref().len()),
128            Mask::Values(validity) => validity.bit_buffer() & self.to_bit_buffer(),
129        };
130        Mask::from_buffer(buffer)
131    }
132}
133impl<T: TypedArrayRef<Bool>> BoolArrayExt for T {}
134
135/// Field accessors and non-consuming methods on the inner bool data.
136impl BoolData {
137    /// Splits into owned parts
138    #[inline]
139    pub fn into_parts(self, len: usize) -> BoolDataParts {
140        BoolDataParts {
141            bits: self.bits,
142            offset: self.offset,
143            len,
144        }
145    }
146
147    pub(crate) fn make_slots(validity: &Validity, len: usize) -> Vec<Option<ArrayRef>> {
148        vec![validity_to_child(validity, len)]
149    }
150}
151
152/// Constructors and consuming methods for `BoolArray` (`Array<Bool>`).
153impl Array<Bool> {
154    /// Constructs a new `BoolArray`.
155    ///
156    /// # Panics
157    ///
158    /// Panics if the validity length is not equal to the bit buffer length.
159    pub fn new(bits: BitBuffer, validity: Validity) -> Self {
160        Self::try_new(bits, validity).vortex_expect("Failed to create BoolArray")
161    }
162
163    /// Constructs a new `BoolArray` from a `BufferHandle`.
164    ///
165    /// # Panics
166    ///
167    /// Panics if the validity length is not equal to the bit buffer length.
168    pub fn new_handle(handle: BufferHandle, offset: usize, len: usize, validity: Validity) -> Self {
169        Self::try_new_from_handle(handle, offset, len, validity)
170            .vortex_expect("Failed to create BoolArray from BufferHandle")
171    }
172
173    /// Constructs a new `BoolArray`.
174    ///
175    /// # Errors
176    ///
177    /// Returns an error if the provided components do not satisfy the invariants.
178    pub fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
179        let dtype = DType::Bool(validity.nullability());
180        let len = bits.len();
181        let slots = BoolData::make_slots(&validity, len);
182        let data = BoolData::try_new(bits, validity)?;
183        Ok(unsafe {
184            Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
185        })
186    }
187
188    /// Build a new bool array from a `BufferHandle`, returning an error if the offset is
189    /// too large or the buffer is not large enough to hold the values.
190    pub fn try_new_from_handle(
191        bits: BufferHandle,
192        offset: usize,
193        len: usize,
194        validity: Validity,
195    ) -> VortexResult<Self> {
196        let dtype = DType::Bool(validity.nullability());
197        let slots = BoolData::make_slots(&validity, len);
198        let data = BoolData::try_new_from_handle(bits, offset, len, validity)?;
199        Ok(unsafe {
200            Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
201        })
202    }
203
204    /// Creates a new [`BoolArray`] without validation.
205    ///
206    /// # Safety
207    ///
208    /// The caller must ensure that the validity length is equal to the bit buffer length.
209    pub unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
210        let dtype = DType::Bool(validity.nullability());
211        let len = bits.len();
212        let slots = BoolData::make_slots(&validity, len);
213        // SAFETY: caller guarantees validity length equals bit buffer length.
214        let data = unsafe { BoolData::new_unchecked(bits, validity) };
215        unsafe {
216            Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
217        }
218    }
219
220    /// Validates the components that would be used to create a [`BoolArray`].
221    pub fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
222        BoolData::validate(bits, validity)
223    }
224
225    /// Create a new BoolArray from a set of indices and a length.
226    ///
227    /// All indices must be less than the length.
228    pub fn from_indices<I: IntoIterator<Item = usize>>(
229        length: usize,
230        indices: I,
231        validity: Validity,
232    ) -> Self {
233        let mut buffer = BitBufferMut::new_unset(length);
234        indices.into_iter().for_each(|idx| buffer.set(idx));
235        Self::new(buffer.freeze(), validity)
236    }
237
238    /// Returns the underlying [`BitBuffer`] of the array, consuming self.
239    pub fn into_bit_buffer(self) -> BitBuffer {
240        let len = self.len();
241        let data = self.into_data();
242        let buffer = data.bits.unwrap_host();
243        BitBuffer::new_with_offset(buffer, len, data.offset)
244    }
245}
246
247/// Internal constructors on BoolData (used by Array<Bool> constructors and VTable::build).
248impl BoolData {
249    pub(super) fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
250        let bits = bits.shrink_offset();
251        Self::validate(&bits, &validity)?;
252
253        let (offset, _len, buffer) = bits.into_inner();
254
255        Ok(Self {
256            bits: BufferHandle::new_host(buffer),
257            offset,
258        })
259    }
260
261    pub(super) fn try_new_from_handle(
262        bits: BufferHandle,
263        offset: usize,
264        len: usize,
265        validity: Validity,
266    ) -> VortexResult<Self> {
267        vortex_ensure!(offset < 8, "BitBuffer offset must be <8, got {}", offset);
268        if let Some(validity_len) = validity.maybe_len() {
269            vortex_ensure!(
270                validity_len == len,
271                "BoolArray of size {} cannot be built with validity of size {validity_len}",
272                len,
273            );
274        }
275
276        vortex_ensure!(
277            bits.len() * 8 >= (len + offset),
278            "provided BufferHandle with offset {offset} len {len} had size {} bits",
279            bits.len() * 8,
280        );
281
282        Ok(Self { bits, offset })
283    }
284
285    pub(super) unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
286        if cfg!(debug_assertions) {
287            Self::try_new(bits, validity).vortex_expect("Failed to create BoolData")
288        } else {
289            let (offset, _len, buffer) = bits.into_inner();
290
291            Self {
292                bits: BufferHandle::new_host(buffer),
293                offset,
294            }
295        }
296    }
297
298    pub(super) fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
299        vortex_ensure!(
300            bits.offset() < 8,
301            "BitBuffer offset must be <8, got {}",
302            bits.offset()
303        );
304
305        if let Some(validity_len) = validity.maybe_len() {
306            vortex_ensure!(
307                validity_len == bits.len(),
308                "BoolArray of size {} cannot be built with validity of size {validity_len}",
309                bits.len()
310            );
311        }
312
313        Ok(())
314    }
315}
316
317impl From<BitBuffer> for BoolArray {
318    fn from(value: BitBuffer) -> Self {
319        BoolArray::new(value, Validity::NonNullable)
320    }
321}
322
323impl FromIterator<bool> for BoolArray {
324    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
325        BoolArray::from(BitBuffer::from_iter(iter))
326    }
327}
328
329impl FromIterator<Option<bool>> for BoolArray {
330    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
331        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
332
333        BoolArray::new(
334            BitBuffer::from(buffer),
335            nulls
336                .map(|n| Validity::from(BitBuffer::from(n.into_inner())))
337                .unwrap_or(Validity::AllValid),
338        )
339    }
340}
341
342impl IntoArray for BitBuffer {
343    fn into_array(self) -> ArrayRef {
344        BoolArray::new(self, Validity::NonNullable).into_array()
345    }
346}
347
348impl IntoArray for BitBufferMut {
349    fn into_array(self) -> ArrayRef {
350        self.freeze().into_array()
351    }
352}
353
354#[cfg(test)]
355mod tests {
356    use std::iter::once;
357    use std::iter::repeat_n;
358
359    use vortex_buffer::BitBuffer;
360    use vortex_buffer::BitBufferMut;
361    use vortex_buffer::buffer;
362
363    use crate::IntoArray;
364    use crate::LEGACY_SESSION;
365    use crate::VortexSessionExecute;
366    use crate::arrays::BoolArray;
367    use crate::arrays::PrimitiveArray;
368    use crate::arrays::bool::BoolArrayExt;
369    use crate::assert_arrays_eq;
370    use crate::patches::Patches;
371    use crate::validity::Validity;
372
373    #[test]
374    fn bool_array() {
375        let arr = BoolArray::from_iter([true, false, true]);
376        let scalar = bool::try_from(
377            &arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
378                .unwrap(),
379        )
380        .unwrap();
381        assert!(scalar);
382    }
383
384    #[test]
385    fn test_all_some_iter() {
386        let arr = BoolArray::from_iter([Some(true), Some(false)]);
387
388        assert!(matches!(arr.validity(), Ok(Validity::AllValid)));
389
390        let scalar = bool::try_from(
391            &arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
392                .unwrap(),
393        )
394        .unwrap();
395        assert!(scalar);
396        let scalar = bool::try_from(
397            &arr.execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
398                .unwrap(),
399        )
400        .unwrap();
401        assert!(!scalar);
402    }
403
404    #[test]
405    fn test_bool_from_iter() {
406        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
407
408        let scalar = bool::try_from(
409            &arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
410                .unwrap(),
411        )
412        .unwrap();
413        assert!(scalar);
414
415        let scalar = bool::try_from(
416            &arr.execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
417                .unwrap(),
418        )
419        .unwrap();
420        assert!(scalar);
421
422        let scalar = arr
423            .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
424            .unwrap();
425        assert!(scalar.is_null());
426
427        let scalar = bool::try_from(
428            &arr.execute_scalar(3, &mut LEGACY_SESSION.create_execution_ctx())
429                .unwrap(),
430        )
431        .unwrap();
432        assert!(!scalar);
433
434        let scalar = arr
435            .execute_scalar(4, &mut LEGACY_SESSION.create_execution_ctx())
436            .unwrap();
437        assert!(scalar.is_null());
438    }
439
440    #[test]
441    fn patch_sliced_bools() {
442        let arr = BoolArray::from(BitBuffer::new_set(12));
443        let sliced = arr.slice(4..12).unwrap();
444        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
445
446        let arr = {
447            let mut builder = BitBufferMut::new_unset(12);
448            (1..12).for_each(|i| builder.set(i));
449            BoolArray::from(builder.freeze())
450        };
451        let sliced = arr.slice(4..12).unwrap();
452        let expected_slice: Vec<bool> = (4..12).map(|i| (1..12).contains(&i)).collect();
453        assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice.clone()));
454
455        // patch the underlying array at index 4 to false
456        let patches = Patches::new(
457            arr.len(),
458            0,
459            buffer![4u32].into_array(),
460            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
461            None,
462        )
463        .unwrap();
464        let arr = arr
465            .patch(&patches, &mut LEGACY_SESSION.create_execution_ctx())
466            .unwrap();
467        // After patching index 4 to false: indices 1-3 and 5-11 are true, index 0 and 4 are false
468        let expected_patched: Vec<bool> = (0..12).map(|i| (1..12).contains(&i) && i != 4).collect();
469        assert_arrays_eq!(arr, BoolArray::from_iter(expected_patched));
470
471        // the slice should be unchanged (still has original values before patch)
472        assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice));
473    }
474
475    #[test]
476    fn slice_array_in_middle() {
477        let arr = BoolArray::from(BitBuffer::new_set(16));
478        let sliced = arr.slice(4..12).unwrap();
479        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
480    }
481
482    #[test]
483    fn patch_bools_owned() {
484        let arr = BoolArray::from(BitBuffer::new_set(16));
485        let buf_ptr = arr.to_bit_buffer().inner().as_ptr();
486
487        let patches = Patches::new(
488            arr.len(),
489            0,
490            PrimitiveArray::new(buffer![0u32], Validity::NonNullable).into_array(),
491            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
492            None,
493        )
494        .unwrap();
495        let arr = arr
496            .patch(&patches, &mut LEGACY_SESSION.create_execution_ctx())
497            .unwrap();
498        // Verify buffer was reused in place
499        assert_eq!(arr.to_bit_buffer().inner().as_ptr(), buf_ptr);
500
501        // After patching index 0 to false: [false, true, true, ..., true] (16 values)
502        let expected: BoolArray = once(false).chain(repeat_n(true, 15)).collect();
503        assert_arrays_eq!(arr, expected);
504    }
505
506    #[test]
507    fn patch_sliced_bools_offset() {
508        let arr = BoolArray::from(BitBuffer::new_set(15));
509        let sliced = arr.slice(4..15).unwrap();
510        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 11]));
511    }
512}