Skip to main content

vortex_array/arrays/bool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5use std::fmt::Formatter;
6
7use arrow_array::BooleanArray;
8use vortex_buffer::BitBuffer;
9use vortex_buffer::BitBufferMut;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_mask::Mask;
14
15use crate::ArrayRef;
16use crate::IntoArray;
17use crate::array::Array;
18use crate::array::ArrayParts;
19use crate::array::TypedArrayRef;
20use crate::array::child_to_validity;
21use crate::array::validity_to_child;
22use crate::arrays::Bool;
23use crate::arrays::BoolArray;
24use crate::buffer::BufferHandle;
25use crate::dtype::DType;
26use crate::validity::Validity;
27
28/// The validity bitmap indicating which elements are non-null.
29pub(super) const VALIDITY_SLOT: usize = 0;
30pub(super) const NUM_SLOTS: usize = 1;
31pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["validity"];
32
33/// Inner data for a boolean array that stores true/false values in a compact bit-packed format.
34///
35/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
36/// is stored as a single bit rather than a full byte.
37///
38/// The data layout uses:
39/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
40/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
41///   indicate valid and false indicates null. if the i-th value is null in the validity child,
42///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
43/// - Bit-level slicing is supported with minimal overhead
44///
45/// # Examples
46///
47/// ```
48/// # fn main() -> vortex_error::VortexResult<()> {
49/// use vortex_array::arrays::BoolArray;
50/// use vortex_array::IntoArray;
51///
52/// // Create from iterator using FromIterator impl
53/// let array: BoolArray = [true, false, true, false].into_iter().collect();
54///
55/// // Slice the array
56/// let sliced = array.slice(1..3)?;
57/// assert_eq!(sliced.len(), 2);
58///
59/// // Access individual values
60/// let value = array.scalar_at(0).unwrap();
61/// assert_eq!(value, true.into());
62/// # Ok(())
63/// # }
64/// ```
65#[derive(Clone, Debug)]
66pub struct BoolData {
67    pub(super) bits: BufferHandle,
68    pub(super) offset: usize,
69}
70
71impl Display for BoolData {
72    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
73        write!(f, "offset: {}", self.offset)
74    }
75}
76
77pub struct BoolDataParts {
78    pub bits: BufferHandle,
79    pub offset: usize,
80    pub len: usize,
81}
82
83pub trait BoolArrayExt: TypedArrayRef<Bool> {
84    fn nullability(&self) -> crate::dtype::Nullability {
85        match self.as_ref().dtype() {
86            DType::Bool(nullability) => *nullability,
87            _ => unreachable!("BoolArrayExt requires a bool dtype"),
88        }
89    }
90
91    fn validity(&self) -> Validity {
92        child_to_validity(&self.as_ref().slots()[VALIDITY_SLOT], self.nullability())
93    }
94
95    fn bool_validity_mask(&self) -> Mask {
96        self.validity().to_mask(self.as_ref().len())
97    }
98
99    fn to_bit_buffer(&self) -> BitBuffer {
100        let buffer = self.bits.as_host().clone();
101        BitBuffer::new_with_offset(buffer, self.as_ref().len(), self.offset)
102    }
103
104    fn maybe_to_mask(&self) -> VortexResult<Option<Mask>> {
105        let all_valid = match &self.validity() {
106            Validity::NonNullable | Validity::AllValid => true,
107            Validity::AllInvalid => false,
108            Validity::Array(a) => a.statistics().compute_min::<bool>().unwrap_or(false),
109        };
110        Ok(all_valid.then(|| Mask::from_buffer(self.to_bit_buffer())))
111    }
112
113    fn to_mask(&self) -> Mask {
114        self.maybe_to_mask()
115            .vortex_expect("failed to check validity")
116            .vortex_expect("cannot convert nullable boolean array to mask")
117    }
118
119    fn to_mask_fill_null_false(&self) -> Mask {
120        let validity_mask = self.bool_validity_mask();
121        let buffer = match validity_mask {
122            Mask::AllTrue(_) => self.to_bit_buffer(),
123            Mask::AllFalse(_) => return Mask::new_false(self.as_ref().len()),
124            Mask::Values(validity) => validity.bit_buffer() & self.to_bit_buffer(),
125        };
126        Mask::from_buffer(buffer)
127    }
128}
129impl<T: TypedArrayRef<Bool>> BoolArrayExt for T {}
130
131/// Field accessors and non-consuming methods on the inner bool data.
132impl BoolData {
133    /// Splits into owned parts
134    #[inline]
135    pub fn into_parts(self, len: usize) -> BoolDataParts {
136        BoolDataParts {
137            bits: self.bits,
138            offset: self.offset,
139            len,
140        }
141    }
142
143    pub(crate) fn make_slots(validity: &Validity, len: usize) -> Vec<Option<ArrayRef>> {
144        vec![validity_to_child(validity, len)]
145    }
146}
147
148/// Constructors and consuming methods for `BoolArray` (`Array<Bool>`).
149impl Array<Bool> {
150    /// Constructs a new `BoolArray`.
151    ///
152    /// # Panics
153    ///
154    /// Panics if the validity length is not equal to the bit buffer length.
155    pub fn new(bits: BitBuffer, validity: Validity) -> Self {
156        Self::try_new(bits, validity).vortex_expect("Failed to create BoolArray")
157    }
158
159    /// Constructs a new `BoolArray` from a `BufferHandle`.
160    ///
161    /// # Panics
162    ///
163    /// Panics if the validity length is not equal to the bit buffer length.
164    pub fn new_handle(handle: BufferHandle, offset: usize, len: usize, validity: Validity) -> Self {
165        Self::try_new_from_handle(handle, offset, len, validity)
166            .vortex_expect("Failed to create BoolArray from BufferHandle")
167    }
168
169    /// Constructs a new `BoolArray`.
170    ///
171    /// # Errors
172    ///
173    /// Returns an error if the provided components do not satisfy the invariants.
174    pub fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
175        let dtype = DType::Bool(validity.nullability());
176        let len = bits.len();
177        let slots = BoolData::make_slots(&validity, len);
178        let data = BoolData::try_new(bits, validity)?;
179        Ok(unsafe {
180            Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
181        })
182    }
183
184    /// Build a new bool array from a `BufferHandle`, returning an error if the offset is
185    /// too large or the buffer is not large enough to hold the values.
186    pub fn try_new_from_handle(
187        bits: BufferHandle,
188        offset: usize,
189        len: usize,
190        validity: Validity,
191    ) -> VortexResult<Self> {
192        let dtype = DType::Bool(validity.nullability());
193        let slots = BoolData::make_slots(&validity, len);
194        let data = BoolData::try_new_from_handle(bits, offset, len, validity)?;
195        Ok(unsafe {
196            Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
197        })
198    }
199
200    /// Creates a new [`BoolArray`] without validation.
201    ///
202    /// # Safety
203    ///
204    /// The caller must ensure that the validity length is equal to the bit buffer length.
205    pub unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
206        let dtype = DType::Bool(validity.nullability());
207        let len = bits.len();
208        let slots = BoolData::make_slots(&validity, len);
209        // SAFETY: caller guarantees validity length equals bit buffer length.
210        let data = unsafe { BoolData::new_unchecked(bits, validity) };
211        unsafe {
212            Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
213        }
214    }
215
216    /// Validates the components that would be used to create a [`BoolArray`].
217    pub fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
218        BoolData::validate(bits, validity)
219    }
220
221    /// Create a new BoolArray from a set of indices and a length.
222    ///
223    /// All indices must be less than the length.
224    pub fn from_indices<I: IntoIterator<Item = usize>>(
225        length: usize,
226        indices: I,
227        validity: Validity,
228    ) -> Self {
229        let mut buffer = BitBufferMut::new_unset(length);
230        indices.into_iter().for_each(|idx| buffer.set(idx));
231        Self::new(buffer.freeze(), validity)
232    }
233
234    /// Returns the underlying [`BitBuffer`] of the array, consuming self.
235    pub fn into_bit_buffer(self) -> BitBuffer {
236        let len = self.len();
237        let data = self.into_data();
238        let buffer = data.bits.unwrap_host();
239        BitBuffer::new_with_offset(buffer, len, data.offset)
240    }
241}
242
243/// Internal constructors on BoolData (used by Array<Bool> constructors and VTable::build).
244impl BoolData {
245    pub(super) fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
246        let bits = bits.shrink_offset();
247        Self::validate(&bits, &validity)?;
248
249        let (offset, _len, buffer) = bits.into_inner();
250
251        Ok(Self {
252            bits: BufferHandle::new_host(buffer),
253            offset,
254        })
255    }
256
257    pub(super) fn try_new_from_handle(
258        bits: BufferHandle,
259        offset: usize,
260        len: usize,
261        validity: Validity,
262    ) -> VortexResult<Self> {
263        vortex_ensure!(offset < 8, "BitBuffer offset must be <8, got {}", offset);
264        if let Some(validity_len) = validity.maybe_len() {
265            vortex_ensure!(
266                validity_len == len,
267                "BoolArray of size {} cannot be built with validity of size {validity_len}",
268                len,
269            );
270        }
271
272        vortex_ensure!(
273            bits.len() * 8 >= (len + offset),
274            "provided BufferHandle with offset {offset} len {len} had size {} bits",
275            bits.len() * 8,
276        );
277
278        Ok(Self { bits, offset })
279    }
280
281    pub(super) unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
282        if cfg!(debug_assertions) {
283            Self::try_new(bits, validity).vortex_expect("Failed to create BoolData")
284        } else {
285            let (offset, _len, buffer) = bits.into_inner();
286
287            Self {
288                bits: BufferHandle::new_host(buffer),
289                offset,
290            }
291        }
292    }
293
294    pub(super) fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
295        vortex_ensure!(
296            bits.offset() < 8,
297            "BitBuffer offset must be <8, got {}",
298            bits.offset()
299        );
300
301        if let Some(validity_len) = validity.maybe_len() {
302            vortex_ensure!(
303                validity_len == bits.len(),
304                "BoolArray of size {} cannot be built with validity of size {validity_len}",
305                bits.len()
306            );
307        }
308
309        Ok(())
310    }
311}
312
313impl From<BitBuffer> for BoolArray {
314    fn from(value: BitBuffer) -> Self {
315        BoolArray::new(value, Validity::NonNullable)
316    }
317}
318
319impl FromIterator<bool> for BoolArray {
320    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
321        BoolArray::from(BitBuffer::from_iter(iter))
322    }
323}
324
325impl FromIterator<Option<bool>> for BoolArray {
326    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
327        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
328
329        BoolArray::new(
330            BitBuffer::from(buffer),
331            nulls
332                .map(|n| Validity::from(BitBuffer::from(n.into_inner())))
333                .unwrap_or(Validity::AllValid),
334        )
335    }
336}
337
338impl IntoArray for BitBuffer {
339    fn into_array(self) -> ArrayRef {
340        BoolArray::new(self, Validity::NonNullable).into_array()
341    }
342}
343
344impl IntoArray for BitBufferMut {
345    fn into_array(self) -> ArrayRef {
346        self.freeze().into_array()
347    }
348}
349
350#[cfg(test)]
351mod tests {
352    use std::iter::once;
353    use std::iter::repeat_n;
354
355    use vortex_buffer::BitBuffer;
356    use vortex_buffer::BitBufferMut;
357    use vortex_buffer::buffer;
358
359    use crate::IntoArray;
360    use crate::LEGACY_SESSION;
361    use crate::VortexSessionExecute;
362    use crate::arrays::BoolArray;
363    use crate::arrays::PrimitiveArray;
364    use crate::arrays::bool::BoolArrayExt;
365    use crate::assert_arrays_eq;
366    use crate::patches::Patches;
367    use crate::validity::Validity;
368
369    #[test]
370    fn bool_array() {
371        let arr = BoolArray::from_iter([true, false, true]);
372        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
373        assert!(scalar);
374    }
375
376    #[test]
377    fn test_all_some_iter() {
378        let arr = BoolArray::from_iter([Some(true), Some(false)]);
379
380        assert!(matches!(arr.validity(), Ok(Validity::AllValid)));
381
382        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
383        assert!(scalar);
384        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
385        assert!(!scalar);
386    }
387
388    #[test]
389    fn test_bool_from_iter() {
390        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
391
392        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
393        assert!(scalar);
394
395        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
396        assert!(scalar);
397
398        let scalar = arr.scalar_at(2).unwrap();
399        assert!(scalar.is_null());
400
401        let scalar = bool::try_from(&arr.scalar_at(3).unwrap()).unwrap();
402        assert!(!scalar);
403
404        let scalar = arr.scalar_at(4).unwrap();
405        assert!(scalar.is_null());
406    }
407
408    #[test]
409    fn patch_sliced_bools() {
410        let arr = BoolArray::from(BitBuffer::new_set(12));
411        let sliced = arr.slice(4..12).unwrap();
412        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
413
414        let arr = {
415            let mut builder = BitBufferMut::new_unset(12);
416            (1..12).for_each(|i| builder.set(i));
417            BoolArray::from(builder.freeze())
418        };
419        let sliced = arr.slice(4..12).unwrap();
420        let expected_slice: Vec<bool> = (4..12).map(|i| (1..12).contains(&i)).collect();
421        assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice.clone()));
422
423        // patch the underlying array at index 4 to false
424        let patches = Patches::new(
425            arr.len(),
426            0,
427            buffer![4u32].into_array(),
428            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
429            None,
430        )
431        .unwrap();
432        let arr = arr
433            .patch(&patches, &mut LEGACY_SESSION.create_execution_ctx())
434            .unwrap();
435        // After patching index 4 to false: indices 1-3 and 5-11 are true, index 0 and 4 are false
436        let expected_patched: Vec<bool> = (0..12).map(|i| (1..12).contains(&i) && i != 4).collect();
437        assert_arrays_eq!(arr, BoolArray::from_iter(expected_patched));
438
439        // the slice should be unchanged (still has original values before patch)
440        assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice));
441    }
442
443    #[test]
444    fn slice_array_in_middle() {
445        let arr = BoolArray::from(BitBuffer::new_set(16));
446        let sliced = arr.slice(4..12).unwrap();
447        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
448    }
449
450    #[test]
451    fn patch_bools_owned() {
452        let arr = BoolArray::from(BitBuffer::new_set(16));
453        let buf_ptr = arr.to_bit_buffer().inner().as_ptr();
454
455        let patches = Patches::new(
456            arr.len(),
457            0,
458            PrimitiveArray::new(buffer![0u32], Validity::NonNullable).into_array(),
459            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
460            None,
461        )
462        .unwrap();
463        let arr = arr
464            .patch(&patches, &mut LEGACY_SESSION.create_execution_ctx())
465            .unwrap();
466        // Verify buffer was reused in place
467        assert_eq!(arr.to_bit_buffer().inner().as_ptr(), buf_ptr);
468
469        // After patching index 0 to false: [false, true, true, ..., true] (16 values)
470        let expected: BoolArray = once(false).chain(repeat_n(true, 15)).collect();
471        assert_arrays_eq!(arr, expected);
472    }
473
474    #[test]
475    fn patch_sliced_bools_offset() {
476        let arr = BoolArray::from(BitBuffer::new_set(15));
477        let sliced = arr.slice(4..15).unwrap();
478        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 11]));
479    }
480}