Skip to main content

vortex_array/arrays/bool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use arrow_array::BooleanArray;
5use vortex_buffer::BitBuffer;
6use vortex_buffer::BitBufferMut;
7use vortex_dtype::DType;
8use vortex_error::VortexExpect;
9use vortex_error::VortexResult;
10use vortex_error::vortex_ensure;
11use vortex_mask::Mask;
12
13use crate::ArrayRef;
14use crate::IntoArray;
15use crate::arrays::bool;
16use crate::buffer::BufferHandle;
17use crate::stats::ArrayStats;
18use crate::validity::Validity;
19
20/// A boolean array that stores true/false values in a compact bit-packed format.
21///
22/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
23/// is stored as a single bit rather than a full byte.
24///
25/// The data layout uses:
26/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
27/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
28///   indicate valid and false indicates null. if the i-th value is null in the validity child,
29///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
30/// - Bit-level slicing is supported with minimal overhead
31///
32/// # Examples
33///
34/// ```
35/// # fn main() -> vortex_error::VortexResult<()> {
36/// use vortex_array::arrays::BoolArray;
37/// use vortex_array::IntoArray;
38///
39/// // Create from iterator using FromIterator impl
40/// let array: BoolArray = [true, false, true, false].into_iter().collect();
41///
42/// // Slice the array
43/// let sliced = array.slice(1..3)?;
44/// assert_eq!(sliced.len(), 2);
45///
46/// // Access individual values
47/// let value = array.scalar_at(0).unwrap();
48/// assert_eq!(value, true.into());
49/// # Ok(())
50/// # }
51/// ```
52#[derive(Clone, Debug)]
53pub struct BoolArray {
54    pub(super) dtype: DType,
55    pub(super) bits: BufferHandle,
56    pub(super) offset: usize,
57    pub(super) len: usize,
58    pub(super) validity: Validity,
59    pub(super) stats_set: ArrayStats,
60}
61
62pub struct BoolArrayParts {
63    pub bits: BufferHandle,
64    pub offset: usize,
65    pub len: usize,
66    pub validity: Validity,
67}
68
69impl BoolArray {
70    /// Constructs a new `BoolArray`.
71    ///
72    /// # Panics
73    ///
74    /// Panics if the validity length is not equal to the bit buffer length.
75    pub fn new(bits: BitBuffer, validity: Validity) -> Self {
76        Self::try_new(bits, validity).vortex_expect("Failed to create BoolArray")
77    }
78
79    /// Constructs a new `BoolArray` from a `BufferHandle`.
80    ///
81    /// # Panics
82    ///
83    /// Panics if the validity length is not equal to the bit buffer length.
84    pub fn new_handle(handle: BufferHandle, offset: usize, len: usize, validity: Validity) -> Self {
85        Self::try_new_from_handle(handle, offset, len, validity)
86            .vortex_expect("Failed to create BoolArray from BufferHandle")
87    }
88
89    /// Constructs a new `BoolArray`.
90    ///
91    /// See [`BoolArray::new_unchecked`] for more information.
92    ///
93    /// # Errors
94    ///
95    /// Returns an error if the provided components do not satisfy the invariants documented in
96    /// [`BoolArray::new_unchecked`].
97    pub fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
98        let bits = bits.shrink_offset();
99        Self::validate(&bits, &validity)?;
100
101        let (offset, len, buffer) = bits.into_inner();
102
103        Ok(Self {
104            dtype: DType::Bool(validity.nullability()),
105            bits: BufferHandle::new_host(buffer),
106            offset,
107            len,
108            validity,
109            stats_set: ArrayStats::default(),
110        })
111    }
112
113    /// Build a new bool array from a `BufferHandle`, returning an error if the offset is
114    /// too large or the buffer is not large enough to hold the values.
115    ///
116    /// # Error
117    ///
118    /// Error if the inputs fail validation. See also `try_new`.
119    pub fn try_new_from_handle(
120        bits: BufferHandle,
121        offset: usize,
122        len: usize,
123        validity: Validity,
124    ) -> VortexResult<Self> {
125        vortex_ensure!(offset < 8, "BitBuffer offset must be <8, got {}", offset);
126        if let Some(validity_len) = validity.maybe_len() {
127            vortex_ensure!(
128                validity_len == len,
129                "BoolArray of size {} cannot be built with validity of size {validity_len}",
130                len,
131            );
132        }
133
134        vortex_ensure!(
135            bits.len() * 8 >= (len + offset),
136            "provided BufferHandle with offset {offset} len {len} had size {} bits",
137            bits.len() * 8,
138        );
139
140        Ok(Self {
141            dtype: DType::Bool(validity.nullability()),
142            bits,
143            offset,
144            len,
145            validity,
146            stats_set: ArrayStats::default(),
147        })
148    }
149
150    /// Creates a new [`BoolArray`] without validation from these components:
151    ///
152    /// # Safety
153    ///
154    /// The caller must ensure that the validity length is equal to the bit buffer length.
155    pub unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
156        if cfg!(debug_assertions) {
157            Self::new(bits, validity)
158        } else {
159            let (offset, len, buffer) = bits.into_inner();
160
161            Self {
162                dtype: DType::Bool(validity.nullability()),
163                bits: BufferHandle::new_host(buffer),
164                offset,
165                len,
166                validity,
167                stats_set: ArrayStats::default(),
168            }
169        }
170    }
171
172    /// Validates the components that would be used to create a [`BoolArray`].
173    ///
174    /// This function checks all the invariants required by [`BoolArray::new_unchecked`].
175    pub fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
176        vortex_ensure!(
177            bits.offset() < 8,
178            "BitBuffer offset must be <8, got {}",
179            bits.offset()
180        );
181
182        // Validate validity
183        if let Some(validity_len) = validity.maybe_len() {
184            vortex_ensure!(
185                validity_len == bits.len(),
186                "BoolArray of size {} cannot be built with validity of size {validity_len}",
187                bits.len()
188            );
189        }
190
191        Ok(())
192    }
193
194    /// Splits into owned parts
195    #[inline]
196    pub fn into_parts(self) -> BoolArrayParts {
197        BoolArrayParts {
198            bits: self.bits,
199            offset: self.offset,
200            len: self.len,
201            validity: self.validity,
202        }
203    }
204
205    /// Create a new BoolArray from a set of indices and a length.
206    ///
207    /// All indices must be less than the length.
208    pub fn from_indices<I: IntoIterator<Item = usize>>(
209        length: usize,
210        indices: I,
211        validity: Validity,
212    ) -> Self {
213        let mut buffer = BitBufferMut::new_unset(length);
214        indices.into_iter().for_each(|idx| buffer.set(idx));
215        Self::new(buffer.freeze(), validity)
216    }
217
218    /// Returns the underlying [`BitBuffer`] of the array.
219    pub fn to_bit_buffer(&self) -> BitBuffer {
220        let buffer = self.bits.as_host().clone();
221
222        BitBuffer::new_with_offset(buffer, self.len, self.offset)
223    }
224
225    /// Returns the underlying [`BitBuffer`] of the array
226    pub fn into_bit_buffer(self) -> BitBuffer {
227        self.to_bit_buffer()
228    }
229
230    pub fn to_mask(&self) -> Mask {
231        self.maybe_to_mask()
232            .vortex_expect("failed to check validity")
233            .vortex_expect("cannot convert nullable boolean array to mask")
234    }
235
236    pub fn maybe_to_mask(&self) -> VortexResult<Option<Mask>> {
237        Ok(self
238            .all_valid()?
239            .then(|| Mask::from_buffer(self.to_bit_buffer())))
240    }
241
242    pub fn to_mask_fill_null_false(&self) -> Mask {
243        if let Some(constant) = self.as_constant() {
244            let bool_constant = constant.as_bool();
245            if bool_constant.value().unwrap_or(false) {
246                return Mask::new_true(self.len());
247            } else {
248                return Mask::new_false(self.len());
249            }
250        }
251        // Extract a boolean buffer, treating null values to false
252        let buffer = match self
253            .validity_mask()
254            .unwrap_or_else(|_| Mask::new_true(self.len()))
255        {
256            Mask::AllTrue(_) => self.to_bit_buffer(),
257            Mask::AllFalse(_) => return Mask::new_false(self.len()),
258            Mask::Values(validity) => validity.bit_buffer() & self.to_bit_buffer(),
259        };
260        Mask::from_buffer(buffer)
261    }
262}
263
264impl From<BitBuffer> for BoolArray {
265    fn from(value: BitBuffer) -> Self {
266        Self::new(value, Validity::NonNullable)
267    }
268}
269
270impl FromIterator<bool> for BoolArray {
271    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
272        Self::from(BitBuffer::from_iter(iter))
273    }
274}
275
276impl FromIterator<Option<bool>> for BoolArray {
277    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
278        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
279
280        Self::new(
281            BitBuffer::from(buffer),
282            nulls
283                .map(|n| Validity::from(BitBuffer::from(n.into_inner())))
284                .unwrap_or(Validity::AllValid),
285        )
286    }
287}
288
289impl IntoArray for BitBuffer {
290    fn into_array(self) -> ArrayRef {
291        BoolArray::new(self, Validity::NonNullable).into_array()
292    }
293}
294
295impl IntoArray for BitBufferMut {
296    fn into_array(self) -> ArrayRef {
297        self.freeze().into_array()
298    }
299}
300
301#[cfg(test)]
302mod tests {
303    use std::iter::once;
304    use std::iter::repeat_n;
305
306    use vortex_buffer::BitBuffer;
307    use vortex_buffer::BitBufferMut;
308    use vortex_buffer::buffer;
309
310    use crate::Array;
311    use crate::IntoArray;
312    use crate::arrays::BoolArray;
313    use crate::arrays::PrimitiveArray;
314    use crate::assert_arrays_eq;
315    use crate::patches::Patches;
316    use crate::validity::Validity;
317    use crate::vtable::ValidityHelper;
318
319    #[test]
320    fn bool_array() {
321        let arr = BoolArray::from_iter([true, false, true]);
322        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
323        assert!(scalar);
324    }
325
326    #[test]
327    fn test_all_some_iter() {
328        let arr = BoolArray::from_iter([Some(true), Some(false)]);
329
330        assert!(matches!(arr.validity(), Validity::AllValid));
331
332        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
333        assert!(scalar);
334        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
335        assert!(!scalar);
336    }
337
338    #[test]
339    fn test_bool_from_iter() {
340        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
341
342        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
343        assert!(scalar);
344
345        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
346        assert!(scalar);
347
348        let scalar = arr.scalar_at(2).unwrap();
349        assert!(scalar.is_null());
350
351        let scalar = bool::try_from(&arr.scalar_at(3).unwrap()).unwrap();
352        assert!(!scalar);
353
354        let scalar = arr.scalar_at(4).unwrap();
355        assert!(scalar.is_null());
356    }
357
358    #[test]
359    fn patch_sliced_bools() {
360        let arr = BoolArray::from(BitBuffer::new_set(12));
361        let sliced = arr.slice(4..12).unwrap();
362        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
363
364        let arr = {
365            let mut builder = BitBufferMut::new_unset(12);
366            (1..12).for_each(|i| builder.set(i));
367            BoolArray::from(builder.freeze())
368        };
369        let sliced = arr.slice(4..12).unwrap();
370        let expected_slice: Vec<bool> = (4..12).map(|i| (1..12).contains(&i)).collect();
371        assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice.clone()));
372
373        // patch the underlying array at index 4 to false
374        let patches = Patches::new(
375            arr.len(),
376            0,
377            buffer![4u32].into_array(),
378            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
379            None,
380        )
381        .unwrap();
382        let arr = arr.patch(&patches).unwrap();
383        // After patching index 4 to false: indices 1-3 and 5-11 are true, index 0 and 4 are false
384        let expected_patched: Vec<bool> = (0..12).map(|i| (1..12).contains(&i) && i != 4).collect();
385        assert_arrays_eq!(arr, BoolArray::from_iter(expected_patched));
386
387        // the slice should be unchanged (still has original values before patch)
388        assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice));
389    }
390
391    #[test]
392    fn slice_array_in_middle() {
393        let arr = BoolArray::from(BitBuffer::new_set(16));
394        let sliced = arr.slice(4..12).unwrap();
395        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
396    }
397
398    #[test]
399    fn patch_bools_owned() {
400        let arr = BoolArray::from(BitBuffer::new_set(16));
401        let buf_ptr = arr.to_bit_buffer().inner().as_ptr();
402
403        let patches = Patches::new(
404            arr.len(),
405            0,
406            PrimitiveArray::new(buffer![0u32], Validity::NonNullable).into_array(),
407            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
408            None,
409        )
410        .unwrap();
411        let arr = arr.patch(&patches).unwrap();
412        // Verify buffer was reused in place
413        assert_eq!(arr.to_bit_buffer().inner().as_ptr(), buf_ptr);
414
415        // After patching index 0 to false: [false, true, true, ..., true] (16 values)
416        let expected: BoolArray = once(false).chain(repeat_n(true, 15)).collect();
417        assert_arrays_eq!(arr, expected);
418    }
419
420    #[test]
421    fn patch_sliced_bools_offset() {
422        let arr = BoolArray::from(BitBuffer::new_set(15));
423        let sliced = arr.slice(4..15).unwrap();
424        assert_arrays_eq!(sliced, BoolArray::from_iter([true; 11]));
425    }
426}