vortex_array/arrays/bool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use arrow_array::BooleanArray;
5use vortex_buffer::BitBuffer;
6use vortex_buffer::BitBufferMut;
7use vortex_dtype::DType;
8use vortex_error::VortexExpect;
9use vortex_error::VortexResult;
10use vortex_error::vortex_ensure;
11use vortex_mask::Mask;
12
13use crate::ArrayRef;
14use crate::IntoArray;
15use crate::arrays::bool;
16use crate::stats::ArrayStats;
17use crate::validity::Validity;
18
19/// A boolean array that stores true/false values in a compact bit-packed format.
20///
21/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
22/// is stored as a single bit rather than a full byte.
23///
24/// The data layout uses:
25/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
26/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
27///   indicate valid and false indicates null. if the i-th value is null in the validity child,
28///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
29/// - Bit-level slicing is supported with minimal overhead
30///
31/// # Examples
32///
33/// ```
34/// use vortex_array::arrays::BoolArray;
35/// use vortex_array::IntoArray;
36///
37/// // Create from iterator using FromIterator impl
38/// let array: BoolArray = [true, false, true, false].into_iter().collect();
39///
40/// // Slice the array
41/// let sliced = array.slice(1..3);
42/// assert_eq!(sliced.len(), 2);
43///
44/// // Access individual values
45/// let value = array.scalar_at(0);
46/// assert_eq!(value, true.into());
47/// ```
48#[derive(Clone, Debug)]
49pub struct BoolArray {
50    pub(super) dtype: DType,
51    pub(super) bits: BitBuffer,
52    pub(super) validity: Validity,
53    pub(super) stats_set: ArrayStats,
54}
55
56impl BoolArray {
57    /// Constructs a new `BoolArray`.
58    ///
59    /// # Panics
60    ///
61    /// Panics if the validity length is not equal to the bit buffer length.
62    pub fn new(bits: BitBuffer, validity: Validity) -> Self {
63        Self::try_new(bits, validity).vortex_expect("Failed to create BoolArray")
64    }
65
66    /// Constructs a new `BoolArray`.
67    ///
68    /// See [`BoolArray::new_unchecked`] for more information.
69    ///
70    /// # Errors
71    ///
72    /// Returns an error if the provided components do not satisfy the invariants documented in
73    /// [`BoolArray::new_unchecked`].
74    pub fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
75        let bits = bits.shrink_offset();
76        Self::validate(&bits, &validity)?;
77        Ok(Self {
78            dtype: DType::Bool(validity.nullability()),
79            bits,
80            validity,
81            stats_set: ArrayStats::default(),
82        })
83    }
84
85    /// Creates a new [`BoolArray`] without validation from these components:
86    ///
87    /// # Safety
88    ///
89    /// The caller must ensure that the validity length is equal to the bit buffer length.
90    pub unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
91        if cfg!(debug_assertions) {
92            Self::new(bits, validity)
93        } else {
94            Self {
95                dtype: DType::Bool(validity.nullability()),
96                bits,
97                validity,
98                stats_set: ArrayStats::default(),
99            }
100        }
101    }
102
103    /// Validates the components that would be used to create a [`BoolArray`].
104    ///
105    /// This function checks all the invariants required by [`BoolArray::new_unchecked`].
106    pub fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
107        vortex_ensure!(
108            bits.offset() < 8,
109            "BitBuffer offset must be <8, got {}",
110            bits.offset()
111        );
112
113        // Validate validity
114        if let Some(validity_len) = validity.maybe_len() {
115            vortex_ensure!(
116                validity_len == bits.len(),
117                "BoolArray of size {} cannot be built with validity of size {validity_len}",
118                bits.len()
119            );
120        }
121
122        Ok(())
123    }
124
125    /// Creates a new [`BoolArray`] from a [`BitBuffer`] and [`Validity`] directly.
126    ///
127    /// # Panics
128    ///
129    /// Panics if the validity is [`Validity::Array`] and the length is not the same as the buffer.
130    pub fn from_bit_buffer(buffer: BitBuffer, validity: Validity) -> Self {
131        if let Some(validity_len) = validity.maybe_len() {
132            assert_eq!(buffer.len(), validity_len);
133        }
134
135        // Shrink the buffer to remove any whole bytes.
136        let buffer = buffer.shrink_offset();
137        Self {
138            dtype: DType::Bool(validity.nullability()),
139            bits: buffer,
140            validity,
141            stats_set: ArrayStats::default(),
142        }
143    }
144
145    /// Create a new BoolArray from a set of indices and a length.
146    ///
147    /// All indices must be less than the length.
148    pub fn from_indices<I: IntoIterator<Item = usize>>(
149        length: usize,
150        indices: I,
151        validity: Validity,
152    ) -> Self {
153        let mut buffer = BitBufferMut::new_unset(length);
154        indices.into_iter().for_each(|idx| buffer.set(idx));
155        Self::from_bit_buffer(buffer.freeze(), validity)
156    }
157
158    /// Returns the underlying [`BitBuffer`] of the array.
159    pub fn bit_buffer(&self) -> &BitBuffer {
160        assert!(
161            self.bits.offset() < 8,
162            "Offset must be <8, did we forget to call shrink_offset? Found {}",
163            self.bits.offset()
164        );
165        &self.bits
166    }
167
168    /// Returns the underlying [`BitBuffer`] ofthe array
169    pub fn into_bit_buffer(self) -> BitBuffer {
170        self.bits
171    }
172
173    pub fn to_mask(&self) -> Mask {
174        self.maybe_to_mask()
175            .vortex_expect("cannot convert nullable boolean array to mask")
176    }
177
178    pub fn maybe_to_mask(&self) -> Option<Mask> {
179        self.all_valid()
180            .then(|| Mask::from_buffer(self.bit_buffer().clone()))
181    }
182
183    pub fn to_mask_fill_null_false(&self) -> Mask {
184        if let Some(constant) = self.as_constant() {
185            let bool_constant = constant.as_bool();
186            if bool_constant.value().unwrap_or(false) {
187                return Mask::new_true(self.len());
188            } else {
189                return Mask::new_false(self.len());
190            }
191        }
192        // Extract a boolean buffer, treating null values to false
193        let buffer = match self.validity_mask() {
194            Mask::AllTrue(_) => self.bit_buffer().clone(),
195            Mask::AllFalse(_) => return Mask::new_false(self.len()),
196            Mask::Values(validity) => validity.bit_buffer() & self.bit_buffer(),
197        };
198        Mask::from_buffer(buffer)
199    }
200}
201
202impl From<BitBuffer> for BoolArray {
203    fn from(value: BitBuffer) -> Self {
204        Self::from_bit_buffer(value, Validity::NonNullable)
205    }
206}
207
208impl FromIterator<bool> for BoolArray {
209    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
210        Self::from(BitBuffer::from_iter(iter))
211    }
212}
213
214impl FromIterator<Option<bool>> for BoolArray {
215    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
216        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
217
218        Self::from_bit_buffer(
219            BitBuffer::from(buffer),
220            nulls
221                .map(|n| Validity::from(BitBuffer::from(n.into_inner())))
222                .unwrap_or(Validity::AllValid),
223        )
224    }
225}
226
227impl IntoArray for BitBuffer {
228    fn into_array(self) -> ArrayRef {
229        BoolArray::new(self, Validity::NonNullable).into_array()
230    }
231}
232
233impl IntoArray for BitBufferMut {
234    fn into_array(self) -> ArrayRef {
235        self.freeze().into_array()
236    }
237}
238
239#[cfg(test)]
240mod tests {
241    use vortex_buffer::BitBuffer;
242    use vortex_buffer::BitBufferMut;
243    use vortex_buffer::buffer;
244
245    use crate::Array;
246    use crate::IntoArray;
247    use crate::ToCanonical;
248    use crate::arrays::BoolArray;
249    use crate::arrays::PrimitiveArray;
250    use crate::patches::Patches;
251    use crate::validity::Validity;
252    use crate::vtable::ValidityHelper;
253
254    #[test]
255    fn bool_array() {
256        let arr = BoolArray::from_iter([true, false, true]);
257        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
258        assert!(scalar);
259    }
260
261    #[test]
262    fn test_all_some_iter() {
263        let arr = BoolArray::from_iter([Some(true), Some(false)]);
264
265        assert!(matches!(arr.validity(), Validity::AllValid));
266
267        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
268        assert!(scalar);
269        let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
270        assert!(!scalar);
271    }
272
273    #[test]
274    fn test_bool_from_iter() {
275        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
276
277        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
278        assert!(scalar);
279
280        let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
281        assert!(scalar);
282
283        let scalar = arr.scalar_at(2);
284        assert!(scalar.is_null());
285
286        let scalar = bool::try_from(&arr.scalar_at(3)).unwrap();
287        assert!(!scalar);
288
289        let scalar = arr.scalar_at(4);
290        assert!(scalar.is_null());
291    }
292
293    #[test]
294    fn patch_sliced_bools() {
295        let arr = BoolArray::from(BitBuffer::new_set(12));
296        let sliced = arr.slice(4..12);
297        assert_eq!(sliced.len(), 8);
298        let values = sliced.to_bool().into_bit_buffer().into_mut();
299        assert_eq!(values.len(), 8);
300        assert_eq!(values.as_slice(), &[255, 255]);
301
302        let arr = {
303            let mut builder = BitBufferMut::new_unset(12);
304            (1..12).for_each(|i| builder.set(i));
305            BoolArray::from(builder.freeze())
306        };
307        let sliced = arr.slice(4..12);
308        let sliced_len = sliced.len();
309        let values = sliced.to_bool().into_bit_buffer().into_mut();
310        assert_eq!(values.as_slice(), &[254, 15]);
311
312        // patch the underlying array
313        let patches = Patches::new(
314            arr.len(),
315            0,
316            buffer![4u32].into_array(), // This creates a non-nullable array
317            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
318            None,
319        );
320        let arr = arr.patch(&patches);
321        let arr_len = arr.len();
322        let values = arr.to_bool().into_bit_buffer().into_mut();
323        assert_eq!(values.len(), arr_len);
324        assert_eq!(values.as_slice(), &[238, 15]);
325
326        // the slice should be unchanged
327        let values = sliced.to_bool().into_bit_buffer().into_mut();
328        assert_eq!(values.len(), sliced_len);
329        assert_eq!(values.as_slice(), &[254, 15]); // unchanged
330    }
331
332    #[test]
333    fn slice_array_in_middle() {
334        let arr = BoolArray::from(BitBuffer::new_set(16));
335        let sliced = arr.slice(4..12);
336        let sliced_len = sliced.len();
337        let values = sliced.to_bool().into_bit_buffer().into_mut();
338        assert_eq!(values.len(), sliced_len);
339        assert_eq!(values.as_slice(), &[255, 255]);
340    }
341
342    #[test]
343    fn patch_bools_owned() {
344        let arr = BoolArray::from(BitBuffer::new_set(16));
345        let buf_ptr = arr.bit_buffer().inner().as_ptr();
346
347        let patches = Patches::new(
348            arr.len(),
349            0,
350            PrimitiveArray::new(buffer![0u32], Validity::NonNullable).into_array(),
351            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
352            None,
353        );
354        let arr = arr.patch(&patches);
355        assert_eq!(arr.bit_buffer().inner().as_ptr(), buf_ptr);
356
357        let values = arr.into_bit_buffer();
358        assert_eq!(values.inner().as_slice(), &[254, 255]);
359    }
360
361    #[test]
362    fn patch_sliced_bools_offset() {
363        let arr = BoolArray::from(BitBuffer::new_set(15));
364        let sliced = arr.slice(4..15);
365        let values = sliced.to_bool().into_bit_buffer().into_mut();
366        assert_eq!(values.as_slice(), &[255, 255]);
367    }
368}