Skip to main content

vortex_array/arrays/bool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use arrow_array::BooleanArray;
5use vortex_buffer::BitBuffer;
6use vortex_buffer::BitBufferMut;
7use vortex_error::VortexExpect;
8use vortex_error::VortexResult;
9use vortex_error::vortex_ensure;
10use vortex_mask::Mask;
11
12use crate::ArrayRef;
13use crate::IntoArray;
14use crate::arrays::bool;
15use crate::buffer::BufferHandle;
16use crate::dtype::DType;
17use crate::stats::ArrayStats;
18use crate::validity::Validity;
19
20/// A boolean array that stores true/false values in a compact bit-packed format.
21///
22/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
23/// is stored as a single bit rather than a full byte.
24///
25/// The data layout uses:
26/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
27/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
28///   indicate valid and false indicates null. if the i-th value is null in the validity child,
29///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
30/// - Bit-level slicing is supported with minimal overhead
31///
32/// # Examples
33///
34/// ```
35/// # fn main() -> vortex_error::VortexResult<()> {
36/// use vortex_array::arrays::BoolArray;
37/// use vortex_array::IntoArray;
38///
39/// // Create from iterator using FromIterator impl
40/// let array: BoolArray = [true, false, true, false].into_iter().collect();
41///
42/// // Slice the array
43/// let sliced = array.slice(1..3)?;
44/// assert_eq!(sliced.len(), 2);
45///
46/// // Access individual values
47/// let value = array.scalar_at(0).unwrap();
48/// assert_eq!(value, true.into());
49/// # Ok(())
50/// # }
51/// ```
52#[derive(Clone, Debug)]
53pub struct BoolArray {
54    pub(super) dtype: DType,
55    pub(super) bits: BufferHandle,
56    pub(super) offset: usize,
57    pub(super) len: usize,
58    pub(super) validity: Validity,
59    pub(super) stats_set: ArrayStats,
60}
61
62pub struct BoolArrayParts {
63    pub bits: BufferHandle,
64    pub offset: usize,
65    pub len: usize,
66    pub validity: Validity,
67}
68
69impl BoolArray {
70    /// Constructs a new `BoolArray`.
71    ///
72    /// # Panics
73    ///
74    /// Panics if the validity length is not equal to the bit buffer length.
75    pub fn new(bits: BitBuffer, validity: Validity) -> Self {
76        Self::try_new(bits, validity).vortex_expect("Failed to create BoolArray")
77    }
78
79    /// Constructs a new `BoolArray` from a `BufferHandle`.
80    ///
81    /// # Panics
82    ///
83    /// Panics if the validity length is not equal to the bit buffer length.
84    pub fn new_handle(handle: BufferHandle, offset: usize, len: usize, validity: Validity) -> Self {
85        Self::try_new_from_handle(handle, offset, len, validity)
86            .vortex_expect("Failed to create BoolArray from BufferHandle")
87    }
88
89    /// Constructs a new `BoolArray`.
90    ///
91    /// See [`BoolArray::new_unchecked`] for more information.
92    ///
93    /// # Errors
94    ///
95    /// Returns an error if the provided components do not satisfy the invariants documented in
96    /// [`BoolArray::new_unchecked`].
97    pub fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
98        let bits = bits.shrink_offset();
99        Self::validate(&bits, &validity)?;
100
101        let (offset, len, buffer) = bits.into_inner();
102
103        Ok(Self {
104            dtype: DType::Bool(validity.nullability()),
105            bits: BufferHandle::new_host(buffer),
106            offset,
107            len,
108            validity,
109            stats_set: ArrayStats::default(),
110        })
111    }
112
113    /// Build a new bool array from a `BufferHandle`, returning an error if the offset is
114    /// too large or the buffer is not large enough to hold the values.
115    ///
116    /// # Error
117    ///
118    /// Error if the inputs fail validation. See also `try_new`.
119    pub fn try_new_from_handle(
120        bits: BufferHandle,
121        offset: usize,
122        len: usize,
123        validity: Validity,
124    ) -> VortexResult<Self> {
125        vortex_ensure!(offset < 8, "BitBuffer offset must be <8, got {}", offset);
126        if let Some(validity_len) = validity.maybe_len() {
127            vortex_ensure!(
128                validity_len == len,
129                "BoolArray of size {} cannot be built with validity of size {validity_len}",
130                len,
131            );
132        }
133
134        vortex_ensure!(
135            bits.len() * 8 >= (len + offset),
136            "provided BufferHandle with offset {offset} len {len} had size {} bits",
137            bits.len() * 8,
138        );
139
140        Ok(Self {
141            dtype: DType::Bool(validity.nullability()),
142            bits,
143            offset,
144            len,
145            validity,
146            stats_set: ArrayStats::default(),
147        })
148    }
149
150    /// Creates a new [`BoolArray`] without validation from these components:
151    ///
152    /// # Safety
153    ///
154    /// The caller must ensure that the validity length is equal to the bit buffer length.
155    pub unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
156        if cfg!(debug_assertions) {
157            Self::new(bits, validity)
158        } else {
159            let (offset, len, buffer) = bits.into_inner();
160
161            Self {
162                dtype: DType::Bool(validity.nullability()),
163                bits: BufferHandle::new_host(buffer),
164                offset,
165                len,
166                validity,
167                stats_set: ArrayStats::default(),
168            }
169        }
170    }
171
172    /// Validates the components that would be used to create a [`BoolArray`].
173    ///
174    /// This function checks all the invariants required by [`BoolArray::new_unchecked`].
175    pub fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
176        vortex_ensure!(
177            bits.offset() < 8,
178            "BitBuffer offset must be <8, got {}",
179            bits.offset()
180        );
181
182        // Validate validity
183        if let Some(validity_len) = validity.maybe_len() {
184            vortex_ensure!(
185                validity_len == bits.len(),
186                "BoolArray of size {} cannot be built with validity of size {validity_len}",
187                bits.len()
188            );
189        }
190
191        Ok(())
192    }
193
194    /// Splits into owned parts
195    #[inline]
196    pub fn into_parts(self) -> BoolArrayParts {
197        BoolArrayParts {
198            bits: self.bits,
199            offset: self.offset,
200            len: self.len,
201            validity: self.validity,
202        }
203    }
204
205    /// Create a new BoolArray from a set of indices and a length.
206    ///
207    /// All indices must be less than the length.
208    pub fn from_indices<I: IntoIterator<Item = usize>>(
209        length: usize,
210        indices: I,
211        validity: Validity,
212    ) -> Self {
213        let mut buffer = BitBufferMut::new_unset(length);
214        indices.into_iter().for_each(|idx| buffer.set(idx));
215        Self::new(buffer.freeze(), validity)
216    }
217
218    /// Returns the underlying [`BitBuffer`] of the array.
219    pub fn to_bit_buffer(&self) -> BitBuffer {
220        let buffer = self.bits.as_host().clone();
221
222        BitBuffer::new_with_offset(buffer, self.len, self.offset)
223    }
224
225    /// Returns the underlying [`BitBuffer`] of the array
226    pub fn into_bit_buffer(self) -> BitBuffer {
227        let buffer = self.bits.unwrap_host();
228
229        BitBuffer::new_with_offset(buffer, self.len, self.offset)
230    }
231
232    pub fn to_mask(&self) -> Mask {
233        self.maybe_to_mask()
234            .vortex_expect("failed to check validity")
235            .vortex_expect("cannot convert nullable boolean array to mask")
236    }
237
238    pub fn maybe_to_mask(&self) -> VortexResult<Option<Mask>> {
239        Ok(self
240            .all_valid()?
241            .then(|| Mask::from_buffer(self.to_bit_buffer())))
242    }
243
244    pub fn to_mask_fill_null_false(&self) -> Mask {
245        if let Some(constant) = self.as_constant() {
246            let bool_constant = constant.as_bool();
247            if bool_constant.value().unwrap_or(false) {
248                return Mask::new_true(self.len());
249            } else {
250                return Mask::new_false(self.len());
251            }
252        }
253        // Extract a boolean buffer, treating null values to false
254        let buffer = match self
255            .validity_mask()
256            .unwrap_or_else(|_| Mask::new_true(self.len()))
257        {
258            Mask::AllTrue(_) => self.to_bit_buffer(),
259            Mask::AllFalse(_) => return Mask::new_false(self.len()),
260            Mask::Values(validity) => validity.bit_buffer() & self.to_bit_buffer(),
261        };
262        Mask::from_buffer(buffer)
263    }
264}
265
266impl From<BitBuffer> for BoolArray {
267    fn from(value: BitBuffer) -> Self {
268        Self::new(value, Validity::NonNullable)
269    }
270}
271
272impl FromIterator<bool> for BoolArray {
273    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
274        Self::from(BitBuffer::from_iter(iter))
275    }
276}
277
278impl FromIterator<Option<bool>> for BoolArray {
279    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
280        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
281
282        Self::new(
283            BitBuffer::from(buffer),
284            nulls
285                .map(|n| Validity::from(BitBuffer::from(n.into_inner())))
286                .unwrap_or(Validity::AllValid),
287        )
288    }
289}
290
291impl IntoArray for BitBuffer {
292    fn into_array(self) -> ArrayRef {
293        BoolArray::new(self, Validity::NonNullable).into_array()
294    }
295}
296
297impl IntoArray for BitBufferMut {
298    fn into_array(self) -> ArrayRef {
299        self.freeze().into_array()
300    }
301}
302
303#[cfg(test)]
304mod tests {
305    use std::iter::once;
306    use std::iter::repeat_n;
307
308    use vortex_buffer::BitBuffer;
309    use vortex_buffer::BitBufferMut;
310    use vortex_buffer::buffer;
311
312    use crate::DynArray;
313    use crate::IntoArray;
314    use crate::LEGACY_SESSION;
315    use crate::VortexSessionExecute;
316    use crate::arrays::BoolArray;
317    use crate::arrays::PrimitiveArray;
318    use crate::assert_arrays_eq;
319    use crate::patches::Patches;
320    use crate::validity::Validity;
321    use crate::vtable::ValidityHelper;
322
323    #[test]
324    fn bool_array() {
325        let arr = BoolArray::from_iter([true, false, true]);
326        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
327        assert!(scalar);
328    }
329
330    #[test]
331    fn test_all_some_iter() {
332        let arr = BoolArray::from_iter([Some(true), Some(false)]);
333
334        assert!(matches!(arr.validity(), Validity::AllValid));
335
336        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
337        assert!(scalar);
338        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
339        assert!(!scalar);
340    }
341
342    #[test]
343    fn test_bool_from_iter() {
344        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
345
346        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
347        assert!(scalar);
348
349        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
350        assert!(scalar);
351
352        let scalar = arr.scalar_at(2).unwrap();
353        assert!(scalar.is_null());
354
355        let scalar = bool::try_from(&arr.scalar_at(3).unwrap()).unwrap();
356        assert!(!scalar);
357
358        let scalar = arr.scalar_at(4).unwrap();
359        assert!(scalar.is_null());
360    }
361
362    #[test]
363    fn patch_sliced_bools() {
364        let arr = BoolArray::from(BitBuffer::new_set(12));
365        let sliced = arr.slice(4..12).unwrap();
366        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
367
368        let arr = {
369            let mut builder = BitBufferMut::new_unset(12);
370            (1..12).for_each(|i| builder.set(i));
371            BoolArray::from(builder.freeze())
372        };
373        let sliced = arr.slice(4..12).unwrap();
374        let expected_slice: Vec<bool> = (4..12).map(|i| (1..12).contains(&i)).collect();
375        assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice.clone()));
376
377        // patch the underlying array at index 4 to false
378        let patches = Patches::new(
379            arr.len(),
380            0,
381            buffer![4u32].into_array(),
382            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
383            None,
384        )
385        .unwrap();
386        let arr = arr
387            .patch(&patches, &mut LEGACY_SESSION.create_execution_ctx())
388            .unwrap();
389        // After patching index 4 to false: indices 1-3 and 5-11 are true, index 0 and 4 are false
390        let expected_patched: Vec<bool> = (0..12).map(|i| (1..12).contains(&i) && i != 4).collect();
391        assert_arrays_eq!(arr, BoolArray::from_iter(expected_patched));
392
393        // the slice should be unchanged (still has original values before patch)
394        assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice));
395    }
396
397    #[test]
398    fn slice_array_in_middle() {
399        let arr = BoolArray::from(BitBuffer::new_set(16));
400        let sliced = arr.slice(4..12).unwrap();
401        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
402    }
403
404    #[test]
405    fn patch_bools_owned() {
406        let arr = BoolArray::from(BitBuffer::new_set(16));
407        let buf_ptr = arr.to_bit_buffer().inner().as_ptr();
408
409        let patches = Patches::new(
410            arr.len(),
411            0,
412            PrimitiveArray::new(buffer![0u32], Validity::NonNullable).into_array(),
413            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
414            None,
415        )
416        .unwrap();
417        let arr = arr
418            .patch(&patches, &mut LEGACY_SESSION.create_execution_ctx())
419            .unwrap();
420        // Verify buffer was reused in place
421        assert_eq!(arr.to_bit_buffer().inner().as_ptr(), buf_ptr);
422
423        // After patching index 0 to false: [false, true, true, ..., true] (16 values)
424        let expected: BoolArray = once(false).chain(repeat_n(true, 15)).collect();
425        assert_arrays_eq!(arr, expected);
426    }
427
428    #[test]
429    fn patch_sliced_bools_offset() {
430        let arr = BoolArray::from(BitBuffer::new_set(15));
431        let sliced = arr.slice(4..15).unwrap();
432        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 11]));
433    }
434}