vortex_array/arrays/bool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use arrow_array::BooleanArray;
5use vortex_buffer::{BitBuffer, BitBufferMut, ByteBuffer};
6use vortex_dtype::DType;
7use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
8use vortex_mask::Mask;
9
10use crate::arrays::bool;
11use crate::stats::ArrayStats;
12use crate::validity::Validity;
13use crate::{ArrayRef, IntoArray};
14
15/// A boolean array that stores true/false values in a compact bit-packed format.
16///
17/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
18/// is stored as a single bit rather than a full byte.
19///
20/// The data layout uses:
21/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
22/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
23///   indicate valid and false indicates null. if the i-th value is null in the validity child,
24///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
25/// - Bit-level slicing is supported with minimal overhead
26///
27/// # Examples
28///
29/// ```
30/// use vortex_array::arrays::BoolArray;
31/// use vortex_array::IntoArray;
32///
33/// // Create from iterator using FromIterator impl
34/// let array: BoolArray = [true, false, true, false].into_iter().collect();
35///
36/// // Slice the array
37/// let sliced = array.slice(1..3);
38/// assert_eq!(sliced.len(), 2);
39///
40/// // Access individual values
41/// let value = array.scalar_at(0);
42/// assert_eq!(value, true.into());
43/// ```
44#[derive(Clone, Debug)]
45pub struct BoolArray {
46    pub(super) dtype: DType,
47    pub(super) buffer: BitBuffer,
48    pub(super) validity: Validity,
49    pub(super) stats_set: ArrayStats,
50}
51
52impl BoolArray {
53    /// Constructs a new `BoolArray`.
54    ///
55    /// See [`BoolArray::new_unchecked`] for more information.
56    ///
57    /// # Errors
58    ///
59    /// Returns an error if the provided components do not satisfy the invariants documented in
60    /// [`BoolArray::new_unchecked`].
61    pub fn try_new(
62        buffer: ByteBuffer,
63        offset: usize,
64        len: usize,
65        validity: Validity,
66    ) -> VortexResult<Self> {
67        Self::validate(&buffer, offset, len, &validity)?;
68
69        // SAFETY: validate ensures all invariants are met.
70        Ok(unsafe { Self::new_unchecked(buffer, offset, len, validity) })
71    }
72
73    /// Creates a new [`BoolArray`] without validation from these components:
74    ///
75    /// * `buffer` is a raw [`ByteBuffer`] holding the packed bits.
76    /// * `offset` is the number of bits in the start of the buffer that should be skipped when
77    ///   looking up the i-th value.
78    /// * `len` is the length of the array, which should correspond to the number of bits.
79    /// * `validity` holds the null values.
80    ///
81    /// # Safety
82    ///
83    /// The caller must ensure all of the following invariants are satisfied:
84    ///
85    /// - `buffer` must contain at least `(offset + len).div_ceil(8)` bytes.
86    /// - `offset` must be less than 8 (it represents the bit offset within the first byte).
87    /// - If `validity` is `Validity::Array`, its length must exactly equal `len`.
88    pub unsafe fn new_unchecked(
89        buffer: ByteBuffer,
90        offset: usize,
91        len: usize,
92        validity: Validity,
93    ) -> Self {
94        #[cfg(debug_assertions)]
95        Self::validate(&buffer, offset, len, &validity)
96            .vortex_expect("[Debug Assertion]: Invalid `BoolArray` parameters");
97
98        let buffer = BitBuffer::new_with_offset(buffer, len, offset);
99        let buffer = buffer.shrink_offset();
100        Self {
101            dtype: DType::Bool(validity.nullability()),
102            buffer,
103            validity,
104            stats_set: ArrayStats::default(),
105        }
106    }
107
108    /// Validates the components that would be used to create a [`BoolArray`].
109    ///
110    /// This function checks all the invariants required by [`BoolArray::new_unchecked`].
111    pub fn validate(
112        buffer: &ByteBuffer,
113        offset: usize,
114        len: usize,
115        validity: &Validity,
116    ) -> VortexResult<()> {
117        vortex_ensure!(
118            offset < 8,
119            "offset must be less than whole byte, was {offset} bits"
120        );
121
122        // Validate the buffer is large enough to hold all the bits
123        let required_bytes = offset.saturating_add(len).div_ceil(8);
124        vortex_ensure!(
125            buffer.len() >= required_bytes,
126            "BoolArray with offset={offset} len={len} cannot be built from buffer of size {}",
127            buffer.len()
128        );
129
130        // Validate validity
131        if let Some(validity_len) = validity.maybe_len() {
132            vortex_ensure!(
133                validity_len == len,
134                "BoolArray of size {len} cannot be built with validity of size {validity_len}"
135            );
136        }
137
138        Ok(())
139    }
140
141    /// Creates a new [`BoolArray`] from a [`BitBuffer`] and [`Validity`] directly.
142    ///
143    /// # Panics
144    ///
145    /// Panics if the validity is [`Validity::Array`] and the length is not the same as the buffer.
146    pub fn from_bit_buffer(buffer: BitBuffer, validity: Validity) -> Self {
147        if let Some(validity_len) = validity.maybe_len() {
148            assert_eq!(buffer.len(), validity_len);
149        }
150
151        // Shrink the buffer to remove any whole bytes.
152        let buffer = buffer.shrink_offset();
153        Self {
154            dtype: DType::Bool(validity.nullability()),
155            buffer,
156            validity,
157            stats_set: ArrayStats::default(),
158        }
159    }
160
161    /// Create a new BoolArray from a set of indices and a length.
162    ///
163    /// All indices must be less than the length.
164    pub fn from_indices<I: IntoIterator<Item = usize>>(
165        length: usize,
166        indices: I,
167        validity: Validity,
168    ) -> Self {
169        let mut buffer = BitBufferMut::new_unset(length);
170        indices.into_iter().for_each(|idx| buffer.set(idx));
171        Self::from_bit_buffer(buffer.freeze(), validity)
172    }
173
174    /// Returns the underlying [`BitBuffer`] of the array.
175    pub fn bit_buffer(&self) -> &BitBuffer {
176        assert!(
177            self.buffer.offset() < 8,
178            "Offset must be <8, did we forget to call shrink_offset? Found {}",
179            self.buffer.offset()
180        );
181        &self.buffer
182    }
183
184    /// Returns the underlying [`BitBuffer`] ofthe array
185    pub fn into_bit_buffer(self) -> BitBuffer {
186        self.buffer
187    }
188
189    pub fn to_mask(&self) -> Mask {
190        self.maybe_to_mask()
191            .vortex_expect("cannot convert nullable boolean array to mask")
192    }
193
194    pub fn maybe_to_mask(&self) -> Option<Mask> {
195        self.all_valid()
196            .then(|| Mask::from_buffer(self.bit_buffer().clone()))
197    }
198
199    pub fn to_mask_fill_null_false(&self) -> Mask {
200        if let Some(constant) = self.as_constant() {
201            let bool_constant = constant.as_bool();
202            if bool_constant.value().unwrap_or(false) {
203                return Mask::new_true(self.len());
204            } else {
205                return Mask::new_false(self.len());
206            }
207        }
208        // Extract a boolean buffer, treating null values to false
209        let buffer = match self.validity_mask() {
210            Mask::AllTrue(_) => self.bit_buffer().clone(),
211            Mask::AllFalse(_) => return Mask::new_false(self.len()),
212            Mask::Values(validity) => validity.bit_buffer() & self.bit_buffer(),
213        };
214        Mask::from_buffer(buffer)
215    }
216}
217
218impl From<BitBuffer> for BoolArray {
219    fn from(value: BitBuffer) -> Self {
220        Self::from_bit_buffer(value, Validity::NonNullable)
221    }
222}
223
224impl FromIterator<bool> for BoolArray {
225    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
226        Self::from(BitBuffer::from_iter(iter))
227    }
228}
229
230impl FromIterator<Option<bool>> for BoolArray {
231    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
232        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
233
234        Self::from_bit_buffer(
235            BitBuffer::from(buffer),
236            nulls
237                .map(|n| Validity::from(BitBuffer::from(n.into_inner())))
238                .unwrap_or(Validity::AllValid),
239        )
240    }
241}
242
243impl IntoArray for BitBuffer {
244    fn into_array(self) -> ArrayRef {
245        let len = self.len();
246        BoolArray::try_new(self.into_inner(), 0, len, Validity::NonNullable)
247            .vortex_expect("known correct")
248            .into_array()
249    }
250}
251
252impl IntoArray for BitBufferMut {
253    fn into_array(self) -> ArrayRef {
254        self.freeze().into_array()
255    }
256}
257
258#[cfg(test)]
259mod tests {
260    use vortex_buffer::{BitBuffer, BitBufferMut, buffer};
261
262    use crate::arrays::{BoolArray, PrimitiveArray};
263    use crate::patches::Patches;
264    use crate::validity::Validity;
265    use crate::vtable::ValidityHelper;
266    use crate::{Array, IntoArray, ToCanonical};
267
268    #[test]
269    fn bool_array() {
270        let arr = BoolArray::from_iter([true, false, true]);
271        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
272        assert!(scalar);
273    }
274
275    #[test]
276    fn test_all_some_iter() {
277        let arr = BoolArray::from_iter([Some(true), Some(false)]);
278
279        assert!(matches!(arr.validity(), Validity::AllValid));
280
281        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
282        assert!(scalar);
283        let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
284        assert!(!scalar);
285    }
286
287    #[test]
288    fn test_bool_from_iter() {
289        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
290
291        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
292        assert!(scalar);
293
294        let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
295        assert!(scalar);
296
297        let scalar = arr.scalar_at(2);
298        assert!(scalar.is_null());
299
300        let scalar = bool::try_from(&arr.scalar_at(3)).unwrap();
301        assert!(!scalar);
302
303        let scalar = arr.scalar_at(4);
304        assert!(scalar.is_null());
305    }
306
307    #[test]
308    fn patch_sliced_bools() {
309        let arr = BoolArray::from(BitBuffer::new_set(12));
310        let sliced = arr.slice(4..12);
311        assert_eq!(sliced.len(), 8);
312        let values = sliced.to_bool().into_bit_buffer().into_mut();
313        assert_eq!(values.len(), 8);
314        assert_eq!(values.as_slice(), &[255, 255]);
315
316        let arr = {
317            let mut builder = BitBufferMut::new_unset(12);
318            (1..12).for_each(|i| builder.set(i));
319            BoolArray::from(builder.freeze())
320        };
321        let sliced = arr.slice(4..12);
322        let sliced_len = sliced.len();
323        let values = sliced.to_bool().into_bit_buffer().into_mut();
324        assert_eq!(values.as_slice(), &[254, 15]);
325
326        // patch the underlying array
327        let patches = Patches::new(
328            arr.len(),
329            0,
330            buffer![4u32].into_array(), // This creates a non-nullable array
331            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
332            None,
333        );
334        let arr = arr.patch(&patches);
335        let arr_len = arr.len();
336        let values = arr.to_bool().into_bit_buffer().into_mut();
337        assert_eq!(values.len(), arr_len);
338        assert_eq!(values.as_slice(), &[238, 15]);
339
340        // the slice should be unchanged
341        let values = sliced.to_bool().into_bit_buffer().into_mut();
342        assert_eq!(values.len(), sliced_len);
343        assert_eq!(values.as_slice(), &[254, 15]); // unchanged
344    }
345
346    #[test]
347    fn slice_array_in_middle() {
348        let arr = BoolArray::from(BitBuffer::new_set(16));
349        let sliced = arr.slice(4..12);
350        let sliced_len = sliced.len();
351        let values = sliced.to_bool().into_bit_buffer().into_mut();
352        assert_eq!(values.len(), sliced_len);
353        assert_eq!(values.as_slice(), &[255, 255]);
354    }
355
356    #[test]
357    fn patch_bools_owned() {
358        let arr = BoolArray::from(BitBuffer::new_set(16));
359        let buf_ptr = arr.bit_buffer().inner().as_ptr();
360
361        let patches = Patches::new(
362            arr.len(),
363            0,
364            PrimitiveArray::new(buffer![0u32], Validity::NonNullable).into_array(),
365            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
366            None,
367        );
368        let arr = arr.patch(&patches);
369        assert_eq!(arr.bit_buffer().inner().as_ptr(), buf_ptr);
370
371        let values = arr.into_bit_buffer();
372        assert_eq!(values.inner().as_slice(), &[254, 255]);
373    }
374
375    #[test]
376    fn patch_sliced_bools_offset() {
377        let arr = BoolArray::from(BitBuffer::new_set(15));
378        let sliced = arr.slice(4..15);
379        let values = sliced.to_bool().into_bit_buffer().into_mut();
380        assert_eq!(values.as_slice(), &[255, 255]);
381    }
382}