Skip to main content

vortex_array/arrays/bool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5use std::fmt::Formatter;
6
7use arrow_array::BooleanArray;
8use vortex_buffer::BitBuffer;
9use vortex_buffer::BitBufferMut;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_mask::Mask;
14
15use crate::ArrayRef;
16use crate::ExecutionCtx;
17use crate::IntoArray;
18use crate::array::Array;
19use crate::array::ArrayParts;
20use crate::array::TypedArrayRef;
21use crate::array::child_to_validity;
22use crate::array::validity_to_child;
23use crate::arrays::Bool;
24use crate::arrays::BoolArray;
25use crate::buffer::BufferHandle;
26use crate::dtype::DType;
27use crate::validity::Validity;
28
29/// The validity bitmap indicating which elements are non-null.
30pub(super) const VALIDITY_SLOT: usize = 0;
31pub(super) const NUM_SLOTS: usize = 1;
32pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["validity"];
33
34/// Inner data for a boolean array that stores true/false values in a compact bit-packed format.
35///
36/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
37/// is stored as a single bit rather than a full byte.
38///
39/// The data layout uses:
40/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
41/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
42///   indicate valid and false indicates null. if the i-th value is null in the validity child,
43///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
44/// - Bit-level slicing is supported with minimal overhead
45///
46/// # Examples
47///
48/// ```
49/// # fn main() -> vortex_error::VortexResult<()> {
50/// use vortex_array::arrays::BoolArray;
51/// use vortex_array::{IntoArray, LEGACY_SESSION, VortexSessionExecute};
52///
53/// // Create from iterator using FromIterator impl
54/// let array: BoolArray = [true, false, true, false].into_iter().collect();
55///
56/// // Slice the array
57/// let sliced = array.slice(1..3)?;
58/// assert_eq!(sliced.len(), 2);
59///
60/// // Access individual values
61/// let mut ctx = LEGACY_SESSION.create_execution_ctx();
62/// let value = array.execute_scalar(0, &mut ctx).unwrap();
63/// assert_eq!(value, true.into());
64/// # Ok(())
65/// # }
66/// ```
67#[derive(Clone, Debug)]
68pub struct BoolData {
69    pub(super) bits: BufferHandle,
70    pub(super) offset: usize,
71}
72
73impl Display for BoolData {
74    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
75        write!(f, "offset: {}", self.offset)
76    }
77}
78
79pub struct BoolDataParts {
80    pub bits: BufferHandle,
81    pub offset: usize,
82    pub len: usize,
83}
84
85pub trait BoolArrayExt: TypedArrayRef<Bool> {
86    fn nullability(&self) -> crate::dtype::Nullability {
87        match self.as_ref().dtype() {
88            DType::Bool(nullability) => *nullability,
89            _ => unreachable!("BoolArrayExt requires a bool dtype"),
90        }
91    }
92
93    fn validity(&self) -> Validity {
94        child_to_validity(&self.as_ref().slots()[VALIDITY_SLOT], self.nullability())
95    }
96
97    fn to_bit_buffer(&self) -> BitBuffer {
98        let buffer = self.bits.as_host().clone();
99        BitBuffer::new_with_offset(buffer, self.as_ref().len(), self.offset)
100    }
101
102    fn maybe_to_mask(&self, ctx: &mut ExecutionCtx) -> VortexResult<Option<Mask>> {
103        let all_valid = match &self.validity() {
104            Validity::NonNullable | Validity::AllValid => true,
105            Validity::AllInvalid => false,
106            Validity::Array(a) => a.statistics().compute_min::<bool>(ctx).unwrap_or(false),
107        };
108        Ok(all_valid.then(|| Mask::from_buffer(self.to_bit_buffer())))
109    }
110
111    fn to_mask(&self, ctx: &mut ExecutionCtx) -> Mask {
112        self.maybe_to_mask(ctx)
113            .vortex_expect("failed to check validity")
114            .vortex_expect("cannot convert nullable boolean array to mask")
115    }
116
117    fn to_mask_fill_null_false(&self, ctx: &mut ExecutionCtx) -> Mask {
118        let validity_mask = self
119            .validity()
120            .to_mask(self.as_ref().len(), ctx)
121            .vortex_expect("Failed to compute validity mask");
122        let buffer = match validity_mask {
123            Mask::AllTrue(_) => self.to_bit_buffer(),
124            Mask::AllFalse(_) => return Mask::new_false(self.as_ref().len()),
125            Mask::Values(validity) => validity.bit_buffer() & self.to_bit_buffer(),
126        };
127        Mask::from_buffer(buffer)
128    }
129}
130impl<T: TypedArrayRef<Bool>> BoolArrayExt for T {}
131
132/// Field accessors and non-consuming methods on the inner bool data.
133impl BoolData {
134    /// Splits into owned parts
135    #[inline]
136    pub fn into_parts(self, len: usize) -> BoolDataParts {
137        BoolDataParts {
138            bits: self.bits,
139            offset: self.offset,
140            len,
141        }
142    }
143
144    pub(crate) fn make_slots(validity: &Validity, len: usize) -> Vec<Option<ArrayRef>> {
145        vec![validity_to_child(validity, len)]
146    }
147}
148
149/// Constructors and consuming methods for `BoolArray` (`Array<Bool>`).
150impl Array<Bool> {
151    /// Constructs a new `BoolArray`.
152    ///
153    /// # Panics
154    ///
155    /// Panics if the validity length is not equal to the bit buffer length.
156    pub fn new(bits: BitBuffer, validity: Validity) -> Self {
157        Self::try_new(bits, validity).vortex_expect("Failed to create BoolArray")
158    }
159
160    /// Constructs a new `BoolArray` from a `BufferHandle`.
161    ///
162    /// # Panics
163    ///
164    /// Panics if the validity length is not equal to the bit buffer length.
165    pub fn new_handle(handle: BufferHandle, offset: usize, len: usize, validity: Validity) -> Self {
166        Self::try_new_from_handle(handle, offset, len, validity)
167            .vortex_expect("Failed to create BoolArray from BufferHandle")
168    }
169
170    /// Constructs a new `BoolArray`.
171    ///
172    /// # Errors
173    ///
174    /// Returns an error if the provided components do not satisfy the invariants.
175    pub fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
176        let dtype = DType::Bool(validity.nullability());
177        let len = bits.len();
178        let slots = BoolData::make_slots(&validity, len);
179        let data = BoolData::try_new(bits, validity)?;
180        Ok(unsafe {
181            Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
182        })
183    }
184
185    /// Build a new bool array from a `BufferHandle`, returning an error if the offset is
186    /// too large or the buffer is not large enough to hold the values.
187    pub fn try_new_from_handle(
188        bits: BufferHandle,
189        offset: usize,
190        len: usize,
191        validity: Validity,
192    ) -> VortexResult<Self> {
193        let dtype = DType::Bool(validity.nullability());
194        let slots = BoolData::make_slots(&validity, len);
195        let data = BoolData::try_new_from_handle(bits, offset, len, validity)?;
196        Ok(unsafe {
197            Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
198        })
199    }
200
201    /// Creates a new [`BoolArray`] without validation.
202    ///
203    /// # Safety
204    ///
205    /// The caller must ensure that the validity length is equal to the bit buffer length.
206    pub unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
207        let dtype = DType::Bool(validity.nullability());
208        let len = bits.len();
209        let slots = BoolData::make_slots(&validity, len);
210        // SAFETY: caller guarantees validity length equals bit buffer length.
211        let data = unsafe { BoolData::new_unchecked(bits, validity) };
212        unsafe {
213            Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
214        }
215    }
216
217    /// Validates the components that would be used to create a [`BoolArray`].
218    pub fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
219        BoolData::validate(bits, validity)
220    }
221
222    /// Create a new BoolArray from a set of indices and a length.
223    ///
224    /// All indices must be less than the length.
225    pub fn from_indices<I: IntoIterator<Item = usize>>(
226        length: usize,
227        indices: I,
228        validity: Validity,
229    ) -> Self {
230        let mut buffer = BitBufferMut::new_unset(length);
231        indices.into_iter().for_each(|idx| buffer.set(idx));
232        Self::new(buffer.freeze(), validity)
233    }
234
235    /// Returns the underlying [`BitBuffer`] of the array, consuming self.
236    pub fn into_bit_buffer(self) -> BitBuffer {
237        let len = self.len();
238        let data = self.into_data();
239        let buffer = data.bits.unwrap_host();
240        BitBuffer::new_with_offset(buffer, len, data.offset)
241    }
242}
243
244/// Internal constructors on BoolData (used by Array<Bool> constructors and VTable::build).
245impl BoolData {
246    pub(super) fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
247        let bits = bits.shrink_offset();
248        Self::validate(&bits, &validity)?;
249
250        let (offset, _len, buffer) = bits.into_inner();
251
252        Ok(Self {
253            bits: BufferHandle::new_host(buffer),
254            offset,
255        })
256    }
257
258    pub(super) fn try_new_from_handle(
259        bits: BufferHandle,
260        offset: usize,
261        len: usize,
262        validity: Validity,
263    ) -> VortexResult<Self> {
264        vortex_ensure!(offset < 8, "BitBuffer offset must be <8, got {}", offset);
265        if let Some(validity_len) = validity.maybe_len() {
266            vortex_ensure!(
267                validity_len == len,
268                "BoolArray of size {} cannot be built with validity of size {validity_len}",
269                len,
270            );
271        }
272
273        vortex_ensure!(
274            bits.len() * 8 >= (len + offset),
275            "provided BufferHandle with offset {offset} len {len} had size {} bits",
276            bits.len() * 8,
277        );
278
279        Ok(Self { bits, offset })
280    }
281
282    pub(super) unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
283        if cfg!(debug_assertions) {
284            Self::try_new(bits, validity).vortex_expect("Failed to create BoolData")
285        } else {
286            let (offset, _len, buffer) = bits.into_inner();
287
288            Self {
289                bits: BufferHandle::new_host(buffer),
290                offset,
291            }
292        }
293    }
294
295    pub(super) fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
296        vortex_ensure!(
297            bits.offset() < 8,
298            "BitBuffer offset must be <8, got {}",
299            bits.offset()
300        );
301
302        if let Some(validity_len) = validity.maybe_len() {
303            vortex_ensure!(
304                validity_len == bits.len(),
305                "BoolArray of size {} cannot be built with validity of size {validity_len}",
306                bits.len()
307            );
308        }
309
310        Ok(())
311    }
312}
313
314impl From<BitBuffer> for BoolArray {
315    fn from(value: BitBuffer) -> Self {
316        BoolArray::new(value, Validity::NonNullable)
317    }
318}
319
320impl FromIterator<bool> for BoolArray {
321    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
322        BoolArray::from(BitBuffer::from_iter(iter))
323    }
324}
325
326impl FromIterator<Option<bool>> for BoolArray {
327    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
328        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
329
330        BoolArray::new(
331            BitBuffer::from(buffer),
332            nulls
333                .map(|n| Validity::from(BitBuffer::from(n.into_inner())))
334                .unwrap_or(Validity::AllValid),
335        )
336    }
337}
338
339impl IntoArray for BitBuffer {
340    fn into_array(self) -> ArrayRef {
341        BoolArray::new(self, Validity::NonNullable).into_array()
342    }
343}
344
345impl IntoArray for BitBufferMut {
346    fn into_array(self) -> ArrayRef {
347        self.freeze().into_array()
348    }
349}
350
351#[cfg(test)]
352mod tests {
353    use std::iter::once;
354    use std::iter::repeat_n;
355
356    use vortex_buffer::BitBuffer;
357    use vortex_buffer::BitBufferMut;
358    use vortex_buffer::buffer;
359
360    use crate::IntoArray;
361    use crate::LEGACY_SESSION;
362    use crate::VortexSessionExecute;
363    use crate::arrays::BoolArray;
364    use crate::arrays::PrimitiveArray;
365    use crate::arrays::bool::BoolArrayExt;
366    use crate::assert_arrays_eq;
367    use crate::patches::Patches;
368    use crate::validity::Validity;
369
370    #[test]
371    fn bool_array() {
372        let arr = BoolArray::from_iter([true, false, true]);
373        let scalar = bool::try_from(
374            &arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
375                .unwrap(),
376        )
377        .unwrap();
378        assert!(scalar);
379    }
380
381    #[test]
382    fn test_all_some_iter() {
383        let arr = BoolArray::from_iter([Some(true), Some(false)]);
384
385        assert!(matches!(arr.validity(), Ok(Validity::AllValid)));
386
387        let scalar = bool::try_from(
388            &arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
389                .unwrap(),
390        )
391        .unwrap();
392        assert!(scalar);
393        let scalar = bool::try_from(
394            &arr.execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
395                .unwrap(),
396        )
397        .unwrap();
398        assert!(!scalar);
399    }
400
401    #[test]
402    fn test_bool_from_iter() {
403        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
404
405        let scalar = bool::try_from(
406            &arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
407                .unwrap(),
408        )
409        .unwrap();
410        assert!(scalar);
411
412        let scalar = bool::try_from(
413            &arr.execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
414                .unwrap(),
415        )
416        .unwrap();
417        assert!(scalar);
418
419        let scalar = arr
420            .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
421            .unwrap();
422        assert!(scalar.is_null());
423
424        let scalar = bool::try_from(
425            &arr.execute_scalar(3, &mut LEGACY_SESSION.create_execution_ctx())
426                .unwrap(),
427        )
428        .unwrap();
429        assert!(!scalar);
430
431        let scalar = arr
432            .execute_scalar(4, &mut LEGACY_SESSION.create_execution_ctx())
433            .unwrap();
434        assert!(scalar.is_null());
435    }
436
437    #[test]
438    fn patch_sliced_bools() {
439        let arr = BoolArray::from(BitBuffer::new_set(12));
440        let sliced = arr.slice(4..12).unwrap();
441        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
442
443        let arr = {
444            let mut builder = BitBufferMut::new_unset(12);
445            (1..12).for_each(|i| builder.set(i));
446            BoolArray::from(builder.freeze())
447        };
448        let sliced = arr.slice(4..12).unwrap();
449        let expected_slice: Vec<bool> = (4..12).map(|i| (1..12).contains(&i)).collect();
450        assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice.clone()));
451
452        // patch the underlying array at index 4 to false
453        let patches = Patches::new(
454            arr.len(),
455            0,
456            buffer![4u32].into_array(),
457            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
458            None,
459        )
460        .unwrap();
461        let arr = arr
462            .patch(&patches, &mut LEGACY_SESSION.create_execution_ctx())
463            .unwrap();
464        // After patching index 4 to false: indices 1-3 and 5-11 are true, index 0 and 4 are false
465        let expected_patched: Vec<bool> = (0..12).map(|i| (1..12).contains(&i) && i != 4).collect();
466        assert_arrays_eq!(arr, BoolArray::from_iter(expected_patched));
467
468        // the slice should be unchanged (still has original values before patch)
469        assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice));
470    }
471
472    #[test]
473    fn slice_array_in_middle() {
474        let arr = BoolArray::from(BitBuffer::new_set(16));
475        let sliced = arr.slice(4..12).unwrap();
476        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
477    }
478
479    #[test]
480    fn patch_bools_owned() {
481        let arr = BoolArray::from(BitBuffer::new_set(16));
482        let buf_ptr = arr.to_bit_buffer().inner().as_ptr();
483
484        let patches = Patches::new(
485            arr.len(),
486            0,
487            PrimitiveArray::new(buffer![0u32], Validity::NonNullable).into_array(),
488            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
489            None,
490        )
491        .unwrap();
492        let arr = arr
493            .patch(&patches, &mut LEGACY_SESSION.create_execution_ctx())
494            .unwrap();
495        // Verify buffer was reused in place
496        assert_eq!(arr.to_bit_buffer().inner().as_ptr(), buf_ptr);
497
498        // After patching index 0 to false: [false, true, true, ..., true] (16 values)
499        let expected: BoolArray = once(false).chain(repeat_n(true, 15)).collect();
500        assert_arrays_eq!(arr, expected);
501    }
502
503    #[test]
504    fn patch_sliced_bools_offset() {
505        let arr = BoolArray::from(BitBuffer::new_set(15));
506        let sliced = arr.slice(4..15).unwrap();
507        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 11]));
508    }
509}