Skip to main content

vortex_array/arrays/bool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5use std::fmt::Formatter;
6
7use arrow_array::BooleanArray;
8use smallvec::smallvec;
9use vortex_buffer::BitBuffer;
10use vortex_buffer::BitBufferMeta;
11use vortex_buffer::BitBufferMut;
12use vortex_buffer::BitBufferView;
13use vortex_error::VortexExpect;
14use vortex_error::VortexResult;
15use vortex_error::vortex_ensure;
16use vortex_mask::Mask;
17
18use crate::ArrayRef;
19use crate::ArraySlots;
20use crate::ExecutionCtx;
21use crate::IntoArray;
22use crate::array::Array;
23use crate::array::ArrayParts;
24use crate::array::TypedArrayRef;
25use crate::array::child_to_validity;
26use crate::array::validity_to_child;
27use crate::arrays::Bool;
28use crate::arrays::BoolArray;
29use crate::buffer::BufferHandle;
30use crate::dtype::DType;
31use crate::validity::Validity;
32
33/// The validity bitmap indicating which elements are non-null.
34pub(super) const VALIDITY_SLOT: usize = 0;
35pub(super) const NUM_SLOTS: usize = 1;
36pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["validity"];
37
38/// Inner data for a boolean array that stores true/false values in a compact bit-packed format.
39///
40/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
41/// is stored as a single bit rather than a full byte.
42///
43/// The data layout uses:
44/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
45/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
46///   indicate valid and false indicates null. if the i-th value is null in the validity child,
47///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
48/// - Bit-level slicing is supported with minimal overhead
49///
50/// # Examples
51///
52/// ```
53/// # fn main() -> vortex_error::VortexResult<()> {
54/// use vortex_array::arrays::BoolArray;
55/// use vortex_array::{IntoArray, LEGACY_SESSION, VortexSessionExecute};
56///
57/// // Create from iterator using FromIterator impl
58/// let array: BoolArray = [true, false, true, false].into_iter().collect();
59///
60/// // Slice the array
61/// let sliced = array.slice(1..3)?;
62/// assert_eq!(sliced.len(), 2);
63///
64/// // Access individual values
65/// let mut ctx = LEGACY_SESSION.create_execution_ctx();
66/// let value = array.execute_scalar(0, &mut ctx).unwrap();
67/// assert_eq!(value, true.into());
68/// # Ok(())
69/// # }
70/// ```
71#[derive(Clone, Debug)]
72pub struct BoolData {
73    pub(super) bits: BufferHandle,
74    pub(super) meta: BitBufferMeta,
75}
76
77impl Display for BoolData {
78    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
79        write!(f, "offset: {}", self.meta.offset())
80    }
81}
82
83pub struct BoolDataParts {
84    pub bits: BufferHandle,
85    pub meta: BitBufferMeta,
86}
87
88pub trait BoolArrayExt: TypedArrayRef<Bool> {
89    fn nullability(&self) -> crate::dtype::Nullability {
90        match self.as_ref().dtype() {
91            DType::Bool(nullability) => *nullability,
92            _ => unreachable!("BoolArrayExt requires a bool dtype"),
93        }
94    }
95
96    fn validity(&self) -> Validity {
97        child_to_validity(
98            self.as_ref().slots()[VALIDITY_SLOT].as_ref(),
99            self.nullability(),
100        )
101    }
102
103    fn to_bit_buffer(&self) -> BitBuffer {
104        let buffer = self.bits.as_host().clone();
105        BitBuffer::new_with_offset(buffer, self.meta.len(), self.meta.offset())
106    }
107
108    /// Borrow the array's packed bits as a [`BitBufferView`] without cloning the backing buffer.
109    fn bit_buffer_view(&self) -> BitBufferView<'_> {
110        BitBufferView::from_meta(self.bits.as_host().as_slice(), self.meta)
111    }
112
113    fn maybe_execute_mask(&self, ctx: &mut ExecutionCtx) -> VortexResult<Option<Mask>> {
114        let all_valid = match &self.validity() {
115            Validity::NonNullable | Validity::AllValid => true,
116            Validity::AllInvalid => false,
117            Validity::Array(a) => a.statistics().compute_min::<bool>(ctx).unwrap_or(false),
118        };
119        Ok(all_valid.then(|| Mask::from_buffer(self.to_bit_buffer())))
120    }
121
122    fn execute_mask(&self, ctx: &mut ExecutionCtx) -> Mask {
123        self.maybe_execute_mask(ctx)
124            .vortex_expect("failed to check validity")
125            .vortex_expect("cannot convert nullable boolean array to mask")
126    }
127
128    fn to_mask_fill_null_false(&self, ctx: &mut ExecutionCtx) -> Mask {
129        let validity_mask = self
130            .validity()
131            .execute_mask(self.as_ref().len(), ctx)
132            .vortex_expect("Failed to compute validity mask");
133        let buffer = match validity_mask {
134            Mask::AllTrue(_) => self.to_bit_buffer(),
135            Mask::AllFalse(_) => return Mask::new_false(self.as_ref().len()),
136            Mask::Values(validity) => validity.bit_buffer() & self.to_bit_buffer(),
137        };
138        Mask::from_buffer(buffer)
139    }
140}
141impl<T: TypedArrayRef<Bool>> BoolArrayExt for T {}
142
143/// Field accessors and non-consuming methods on the inner bool data.
144impl BoolData {
145    /// Splits into owned parts
146    #[inline]
147    pub fn into_parts(self, len: usize) -> BoolDataParts {
148        BoolDataParts {
149            bits: self.bits,
150            meta: BitBufferMeta::new(self.meta.offset(), len),
151        }
152    }
153
154    pub(crate) fn make_slots(validity: &Validity, len: usize) -> ArraySlots {
155        smallvec![validity_to_child(validity, len)]
156    }
157}
158
159/// Constructors and consuming methods for [`BoolArray`].
160impl Array<Bool> {
161    /// Constructs a new [`BoolArray`].
162    ///
163    /// # Panics
164    ///
165    /// Panics if the validity length is not equal to the bit buffer length.
166    pub fn new(bits: BitBuffer, validity: Validity) -> Self {
167        Self::try_new(bits, validity).vortex_expect("Failed to create BoolArray")
168    }
169
170    /// Constructs a new [`BoolArray`] from a [`BufferHandle`].
171    ///
172    /// # Panics
173    ///
174    /// Panics if the validity length is not equal to the bit buffer length.
175    pub fn new_handle(handle: BufferHandle, offset: usize, len: usize, validity: Validity) -> Self {
176        Self::try_new_from_handle(handle, offset, len, validity)
177            .vortex_expect("Failed to create BoolArray from BufferHandle")
178    }
179
180    /// Constructs a new `BoolArray`.
181    ///
182    /// # Errors
183    ///
184    /// Returns an error if the provided components do not satisfy the invariants.
185    pub fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
186        let dtype = DType::Bool(validity.nullability());
187        let len = bits.len();
188        let slots = BoolData::make_slots(&validity, len);
189        let data = BoolData::try_new(bits, validity)?;
190        Ok(unsafe {
191            Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
192        })
193    }
194
195    /// Build a new bool array from a `BufferHandle`, returning an error if the offset is
196    /// too large or the buffer is not large enough to hold the values.
197    pub fn try_new_from_handle(
198        bits: BufferHandle,
199        offset: usize,
200        len: usize,
201        validity: Validity,
202    ) -> VortexResult<Self> {
203        let dtype = DType::Bool(validity.nullability());
204        let slots = BoolData::make_slots(&validity, len);
205        let data = BoolData::try_new_from_handle(bits, offset, len, validity)?;
206        Ok(unsafe {
207            Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
208        })
209    }
210
211    /// Creates a new [`BoolArray`] without validation.
212    ///
213    /// # Safety
214    ///
215    /// The caller must ensure that the validity length is equal to the bit buffer length.
216    pub unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
217        let dtype = DType::Bool(validity.nullability());
218        let len = bits.len();
219        let slots = BoolData::make_slots(&validity, len);
220        // SAFETY: caller guarantees validity length equals bit buffer length.
221        let data = unsafe { BoolData::new_unchecked(bits, validity) };
222        unsafe {
223            Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
224        }
225    }
226
227    /// Validates the components that would be used to create a [`BoolArray`].
228    pub fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
229        BoolData::validate(bits, validity)
230    }
231
232    /// Create a new BoolArray from a set of indices and a length.
233    ///
234    /// All indices must be less than the length.
235    pub fn from_indices<I: IntoIterator<Item = usize>>(
236        length: usize,
237        indices: I,
238        validity: Validity,
239    ) -> Self {
240        let mut buffer = BitBufferMut::new_unset(length);
241        indices.into_iter().for_each(|idx| buffer.set(idx));
242        Self::new(buffer.freeze(), validity)
243    }
244
245    /// Returns the underlying [`BitBuffer`] of the array, consuming self.
246    pub fn into_bit_buffer(self) -> BitBuffer {
247        let len = self.len();
248        let data = self.into_data();
249        let buffer = data.bits.unwrap_host();
250        BitBuffer::new_with_offset(buffer, len, data.meta.offset())
251    }
252}
253
254// Internal constructors on BoolData (used by [`BoolArray`] constructors and [`VTable::build`]).
255impl BoolData {
256    pub(super) fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
257        let bits = bits.shrink_offset();
258        Self::validate(&bits, &validity)?;
259
260        let (offset, len, buffer) = bits.into_inner();
261
262        Ok(Self {
263            bits: BufferHandle::new_host(buffer),
264            meta: BitBufferMeta::new(offset, len),
265        })
266    }
267
268    pub(super) fn try_new_from_handle(
269        bits: BufferHandle,
270        offset: usize,
271        len: usize,
272        validity: Validity,
273    ) -> VortexResult<Self> {
274        vortex_ensure!(offset < 8, "BitBuffer offset must be <8, got {}", offset);
275        if let Some(validity_len) = validity.maybe_len() {
276            vortex_ensure!(
277                validity_len == len,
278                "BoolArray of size {} cannot be built with validity of size {validity_len}",
279                len,
280            );
281        }
282
283        vortex_ensure!(
284            bits.len() * 8 >= (len + offset),
285            "provided BufferHandle with offset {offset} len {len} had size {} bits",
286            bits.len() * 8,
287        );
288
289        Ok(Self {
290            bits,
291            meta: BitBufferMeta::new(offset, len),
292        })
293    }
294
295    pub(super) unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
296        if cfg!(debug_assertions) {
297            Self::try_new(bits, validity).vortex_expect("Failed to create BoolData")
298        } else {
299            let (offset, len, buffer) = bits.into_inner();
300
301            Self {
302                bits: BufferHandle::new_host(buffer),
303                meta: BitBufferMeta::new(offset, len),
304            }
305        }
306    }
307
308    pub(super) fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
309        vortex_ensure!(
310            bits.offset() < 8,
311            "BitBuffer offset must be <8, got {}",
312            bits.offset()
313        );
314
315        if let Some(validity_len) = validity.maybe_len() {
316            vortex_ensure!(
317                validity_len == bits.len(),
318                "BoolArray of size {} cannot be built with validity of size {validity_len}",
319                bits.len()
320            );
321        }
322
323        Ok(())
324    }
325}
326
327impl From<BitBuffer> for BoolArray {
328    fn from(value: BitBuffer) -> Self {
329        BoolArray::new(value, Validity::NonNullable)
330    }
331}
332
333impl FromIterator<bool> for BoolArray {
334    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
335        BoolArray::from(BitBuffer::from_iter(iter))
336    }
337}
338
339impl FromIterator<Option<bool>> for BoolArray {
340    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
341        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
342
343        BoolArray::new(
344            BitBuffer::from(buffer),
345            nulls
346                .map(|n| Validity::from(BitBuffer::from(n.into_inner())))
347                .unwrap_or(Validity::AllValid),
348        )
349    }
350}
351
352impl IntoArray for BitBuffer {
353    fn into_array(self) -> ArrayRef {
354        BoolArray::new(self, Validity::NonNullable).into_array()
355    }
356}
357
358impl IntoArray for BitBufferMut {
359    fn into_array(self) -> ArrayRef {
360        self.freeze().into_array()
361    }
362}
363
364#[cfg(test)]
365mod tests {
366    use std::iter::once;
367    use std::iter::repeat_n;
368
369    use vortex_buffer::BitBuffer;
370    use vortex_buffer::BitBufferMut;
371    use vortex_buffer::buffer;
372
373    use crate::IntoArray;
374    use crate::LEGACY_SESSION;
375    use crate::VortexSessionExecute;
376    use crate::arrays::BoolArray;
377    use crate::arrays::PrimitiveArray;
378    use crate::arrays::bool::BoolArrayExt;
379    use crate::assert_arrays_eq;
380    use crate::patches::Patches;
381    use crate::validity::Validity;
382
383    #[test]
384    fn bool_array() {
385        let arr = BoolArray::from_iter([true, false, true]);
386        let scalar = bool::try_from(
387            &arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
388                .unwrap(),
389        )
390        .unwrap();
391        assert!(scalar);
392    }
393
394    #[test]
395    fn test_all_some_iter() {
396        let arr = BoolArray::from_iter([Some(true), Some(false)]);
397
398        assert!(matches!(arr.validity(), Ok(Validity::AllValid)));
399
400        let scalar = bool::try_from(
401            &arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
402                .unwrap(),
403        )
404        .unwrap();
405        assert!(scalar);
406        let scalar = bool::try_from(
407            &arr.execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
408                .unwrap(),
409        )
410        .unwrap();
411        assert!(!scalar);
412    }
413
414    #[test]
415    fn test_bool_from_iter() {
416        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
417
418        let scalar = bool::try_from(
419            &arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
420                .unwrap(),
421        )
422        .unwrap();
423        assert!(scalar);
424
425        let scalar = bool::try_from(
426            &arr.execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
427                .unwrap(),
428        )
429        .unwrap();
430        assert!(scalar);
431
432        let scalar = arr
433            .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
434            .unwrap();
435        assert!(scalar.is_null());
436
437        let scalar = bool::try_from(
438            &arr.execute_scalar(3, &mut LEGACY_SESSION.create_execution_ctx())
439                .unwrap(),
440        )
441        .unwrap();
442        assert!(!scalar);
443
444        let scalar = arr
445            .execute_scalar(4, &mut LEGACY_SESSION.create_execution_ctx())
446            .unwrap();
447        assert!(scalar.is_null());
448    }
449
450    #[test]
451    fn patch_sliced_bools() {
452        let arr = BoolArray::from(BitBuffer::new_set(12));
453        let sliced = arr.slice(4..12).unwrap();
454        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
455
456        let arr = {
457            let mut builder = BitBufferMut::new_unset(12);
458            (1..12).for_each(|i| builder.set(i));
459            BoolArray::from(builder.freeze())
460        };
461        let sliced = arr.slice(4..12).unwrap();
462        let expected_slice: Vec<bool> = (4..12).map(|i| (1..12).contains(&i)).collect();
463        assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice.clone()));
464
465        // patch the underlying array at index 4 to false
466        let patches = Patches::new(
467            arr.len(),
468            0,
469            buffer![4u32].into_array(),
470            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
471            None,
472        )
473        .unwrap();
474        let arr = arr
475            .patch(&patches, &mut LEGACY_SESSION.create_execution_ctx())
476            .unwrap();
477        // After patching index 4 to false: indices 1-3 and 5-11 are true, index 0 and 4 are false
478        let expected_patched: Vec<bool> = (0..12).map(|i| (1..12).contains(&i) && i != 4).collect();
479        assert_arrays_eq!(arr, BoolArray::from_iter(expected_patched));
480
481        // the slice should be unchanged (still has original values before patch)
482        assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice));
483    }
484
485    #[test]
486    fn slice_array_in_middle() {
487        let arr = BoolArray::from(BitBuffer::new_set(16));
488        let sliced = arr.slice(4..12).unwrap();
489        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
490    }
491
492    #[test]
493    fn patch_bools_owned() {
494        let arr = BoolArray::from(BitBuffer::new_set(16));
495        let buf_ptr = arr.to_bit_buffer().inner().as_ptr();
496
497        let patches = Patches::new(
498            arr.len(),
499            0,
500            PrimitiveArray::new(buffer![0u32], Validity::NonNullable).into_array(),
501            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
502            None,
503        )
504        .unwrap();
505        let arr = arr
506            .patch(&patches, &mut LEGACY_SESSION.create_execution_ctx())
507            .unwrap();
508        // Verify buffer was reused in place
509        assert_eq!(arr.to_bit_buffer().inner().as_ptr(), buf_ptr);
510
511        // After patching index 0 to false: [false, true, true, ..., true] (16 values)
512        let expected: BoolArray = once(false).chain(repeat_n(true, 15)).collect();
513        assert_arrays_eq!(arr, expected);
514    }
515
516    #[test]
517    fn patch_sliced_bools_offset() {
518        let arr = BoolArray::from(BitBuffer::new_set(15));
519        let sliced = arr.slice(4..15).unwrap();
520        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 11]));
521    }
522}