vortex_array/arrays/bool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use arrow_array::BooleanArray;
5use vortex_buffer::BitBuffer;
6use vortex_buffer::BitBufferMut;
7use vortex_buffer::ByteBuffer;
8use vortex_dtype::DType;
9use vortex_error::VortexExpect;
10use vortex_error::VortexResult;
11use vortex_error::vortex_ensure;
12use vortex_mask::Mask;
13
14use crate::ArrayRef;
15use crate::IntoArray;
16use crate::arrays::bool;
17use crate::stats::ArrayStats;
18use crate::validity::Validity;
19
20/// A boolean array that stores true/false values in a compact bit-packed format.
21///
22/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
23/// is stored as a single bit rather than a full byte.
24///
25/// The data layout uses:
26/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
27/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
28///   indicate valid and false indicates null. if the i-th value is null in the validity child,
29///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
30/// - Bit-level slicing is supported with minimal overhead
31///
32/// # Examples
33///
34/// ```
35/// use vortex_array::arrays::BoolArray;
36/// use vortex_array::IntoArray;
37///
38/// // Create from iterator using FromIterator impl
39/// let array: BoolArray = [true, false, true, false].into_iter().collect();
40///
41/// // Slice the array
42/// let sliced = array.slice(1..3);
43/// assert_eq!(sliced.len(), 2);
44///
45/// // Access individual values
46/// let value = array.scalar_at(0);
47/// assert_eq!(value, true.into());
48/// ```
49#[derive(Clone, Debug)]
50pub struct BoolArray {
51    pub(super) dtype: DType,
52    pub(super) buffer: BitBuffer,
53    pub(super) validity: Validity,
54    pub(super) stats_set: ArrayStats,
55}
56
57impl BoolArray {
58    /// Constructs a new `BoolArray`.
59    ///
60    /// See [`BoolArray::new_unchecked`] for more information.
61    ///
62    /// # Errors
63    ///
64    /// Returns an error if the provided components do not satisfy the invariants documented in
65    /// [`BoolArray::new_unchecked`].
66    pub fn try_new(
67        buffer: ByteBuffer,
68        offset: usize,
69        len: usize,
70        validity: Validity,
71    ) -> VortexResult<Self> {
72        Self::validate(&buffer, offset, len, &validity)?;
73
74        // SAFETY: validate ensures all invariants are met.
75        Ok(unsafe { Self::new_unchecked(buffer, offset, len, validity) })
76    }
77
78    /// Creates a new [`BoolArray`] without validation from these components:
79    ///
80    /// * `buffer` is a raw [`ByteBuffer`] holding the packed bits.
81    /// * `offset` is the number of bits in the start of the buffer that should be skipped when
82    ///   looking up the i-th value.
83    /// * `len` is the length of the array, which should correspond to the number of bits.
84    /// * `validity` holds the null values.
85    ///
86    /// # Safety
87    ///
88    /// The caller must ensure all of the following invariants are satisfied:
89    ///
90    /// - `buffer` must contain at least `(offset + len).div_ceil(8)` bytes.
91    /// - `offset` must be less than 8 (it represents the bit offset within the first byte).
92    /// - If `validity` is `Validity::Array`, its length must exactly equal `len`.
93    pub unsafe fn new_unchecked(
94        buffer: ByteBuffer,
95        offset: usize,
96        len: usize,
97        validity: Validity,
98    ) -> Self {
99        #[cfg(debug_assertions)]
100        Self::validate(&buffer, offset, len, &validity)
101            .vortex_expect("[Debug Assertion]: Invalid `BoolArray` parameters");
102
103        let buffer = BitBuffer::new_with_offset(buffer, len, offset);
104        let buffer = buffer.shrink_offset();
105        Self {
106            dtype: DType::Bool(validity.nullability()),
107            buffer,
108            validity,
109            stats_set: ArrayStats::default(),
110        }
111    }
112
113    /// Validates the components that would be used to create a [`BoolArray`].
114    ///
115    /// This function checks all the invariants required by [`BoolArray::new_unchecked`].
116    pub fn validate(
117        buffer: &ByteBuffer,
118        offset: usize,
119        len: usize,
120        validity: &Validity,
121    ) -> VortexResult<()> {
122        vortex_ensure!(
123            offset < 8,
124            "offset must be less than whole byte, was {offset} bits"
125        );
126
127        // Validate the buffer is large enough to hold all the bits
128        let required_bytes = offset.saturating_add(len).div_ceil(8);
129        vortex_ensure!(
130            buffer.len() >= required_bytes,
131            "BoolArray with offset={offset} len={len} cannot be built from buffer of size {}",
132            buffer.len()
133        );
134
135        // Validate validity
136        if let Some(validity_len) = validity.maybe_len() {
137            vortex_ensure!(
138                validity_len == len,
139                "BoolArray of size {len} cannot be built with validity of size {validity_len}"
140            );
141        }
142
143        Ok(())
144    }
145
146    /// Creates a new [`BoolArray`] from a [`BitBuffer`] and [`Validity`] directly.
147    ///
148    /// # Panics
149    ///
150    /// Panics if the validity is [`Validity::Array`] and the length is not the same as the buffer.
151    pub fn from_bit_buffer(buffer: BitBuffer, validity: Validity) -> Self {
152        if let Some(validity_len) = validity.maybe_len() {
153            assert_eq!(buffer.len(), validity_len);
154        }
155
156        // Shrink the buffer to remove any whole bytes.
157        let buffer = buffer.shrink_offset();
158        Self {
159            dtype: DType::Bool(validity.nullability()),
160            buffer,
161            validity,
162            stats_set: ArrayStats::default(),
163        }
164    }
165
166    /// Create a new BoolArray from a set of indices and a length.
167    ///
168    /// All indices must be less than the length.
169    pub fn from_indices<I: IntoIterator<Item = usize>>(
170        length: usize,
171        indices: I,
172        validity: Validity,
173    ) -> Self {
174        let mut buffer = BitBufferMut::new_unset(length);
175        indices.into_iter().for_each(|idx| buffer.set(idx));
176        Self::from_bit_buffer(buffer.freeze(), validity)
177    }
178
179    /// Returns the underlying [`BitBuffer`] of the array.
180    pub fn bit_buffer(&self) -> &BitBuffer {
181        assert!(
182            self.buffer.offset() < 8,
183            "Offset must be <8, did we forget to call shrink_offset? Found {}",
184            self.buffer.offset()
185        );
186        &self.buffer
187    }
188
189    /// Returns the underlying [`BitBuffer`] ofthe array
190    pub fn into_bit_buffer(self) -> BitBuffer {
191        self.buffer
192    }
193
194    pub fn to_mask(&self) -> Mask {
195        self.maybe_to_mask()
196            .vortex_expect("cannot convert nullable boolean array to mask")
197    }
198
199    pub fn maybe_to_mask(&self) -> Option<Mask> {
200        self.all_valid()
201            .then(|| Mask::from_buffer(self.bit_buffer().clone()))
202    }
203
204    pub fn to_mask_fill_null_false(&self) -> Mask {
205        if let Some(constant) = self.as_constant() {
206            let bool_constant = constant.as_bool();
207            if bool_constant.value().unwrap_or(false) {
208                return Mask::new_true(self.len());
209            } else {
210                return Mask::new_false(self.len());
211            }
212        }
213        // Extract a boolean buffer, treating null values to false
214        let buffer = match self.validity_mask() {
215            Mask::AllTrue(_) => self.bit_buffer().clone(),
216            Mask::AllFalse(_) => return Mask::new_false(self.len()),
217            Mask::Values(validity) => validity.bit_buffer() & self.bit_buffer(),
218        };
219        Mask::from_buffer(buffer)
220    }
221}
222
223impl From<BitBuffer> for BoolArray {
224    fn from(value: BitBuffer) -> Self {
225        Self::from_bit_buffer(value, Validity::NonNullable)
226    }
227}
228
229impl FromIterator<bool> for BoolArray {
230    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
231        Self::from(BitBuffer::from_iter(iter))
232    }
233}
234
235impl FromIterator<Option<bool>> for BoolArray {
236    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
237        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
238
239        Self::from_bit_buffer(
240            BitBuffer::from(buffer),
241            nulls
242                .map(|n| Validity::from(BitBuffer::from(n.into_inner())))
243                .unwrap_or(Validity::AllValid),
244        )
245    }
246}
247
248impl IntoArray for BitBuffer {
249    fn into_array(self) -> ArrayRef {
250        let (offset, len, buffer) = self.into_inner();
251        BoolArray::try_new(buffer, offset, len, Validity::NonNullable)
252            .vortex_expect("known correct")
253            .into_array()
254    }
255}
256
257impl IntoArray for BitBufferMut {
258    fn into_array(self) -> ArrayRef {
259        self.freeze().into_array()
260    }
261}
262
263#[cfg(test)]
264mod tests {
265    use vortex_buffer::BitBuffer;
266    use vortex_buffer::BitBufferMut;
267    use vortex_buffer::buffer;
268
269    use crate::Array;
270    use crate::IntoArray;
271    use crate::ToCanonical;
272    use crate::arrays::BoolArray;
273    use crate::arrays::PrimitiveArray;
274    use crate::patches::Patches;
275    use crate::validity::Validity;
276    use crate::vtable::ValidityHelper;
277
278    #[test]
279    fn bool_array() {
280        let arr = BoolArray::from_iter([true, false, true]);
281        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
282        assert!(scalar);
283    }
284
285    #[test]
286    fn test_all_some_iter() {
287        let arr = BoolArray::from_iter([Some(true), Some(false)]);
288
289        assert!(matches!(arr.validity(), Validity::AllValid));
290
291        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
292        assert!(scalar);
293        let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
294        assert!(!scalar);
295    }
296
297    #[test]
298    fn test_bool_from_iter() {
299        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
300
301        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
302        assert!(scalar);
303
304        let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
305        assert!(scalar);
306
307        let scalar = arr.scalar_at(2);
308        assert!(scalar.is_null());
309
310        let scalar = bool::try_from(&arr.scalar_at(3)).unwrap();
311        assert!(!scalar);
312
313        let scalar = arr.scalar_at(4);
314        assert!(scalar.is_null());
315    }
316
317    #[test]
318    fn patch_sliced_bools() {
319        let arr = BoolArray::from(BitBuffer::new_set(12));
320        let sliced = arr.slice(4..12);
321        assert_eq!(sliced.len(), 8);
322        let values = sliced.to_bool().into_bit_buffer().into_mut();
323        assert_eq!(values.len(), 8);
324        assert_eq!(values.as_slice(), &[255, 255]);
325
326        let arr = {
327            let mut builder = BitBufferMut::new_unset(12);
328            (1..12).for_each(|i| builder.set(i));
329            BoolArray::from(builder.freeze())
330        };
331        let sliced = arr.slice(4..12);
332        let sliced_len = sliced.len();
333        let values = sliced.to_bool().into_bit_buffer().into_mut();
334        assert_eq!(values.as_slice(), &[254, 15]);
335
336        // patch the underlying array
337        let patches = Patches::new(
338            arr.len(),
339            0,
340            buffer![4u32].into_array(), // This creates a non-nullable array
341            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
342            None,
343        );
344        let arr = arr.patch(&patches);
345        let arr_len = arr.len();
346        let values = arr.to_bool().into_bit_buffer().into_mut();
347        assert_eq!(values.len(), arr_len);
348        assert_eq!(values.as_slice(), &[238, 15]);
349
350        // the slice should be unchanged
351        let values = sliced.to_bool().into_bit_buffer().into_mut();
352        assert_eq!(values.len(), sliced_len);
353        assert_eq!(values.as_slice(), &[254, 15]); // unchanged
354    }
355
356    #[test]
357    fn slice_array_in_middle() {
358        let arr = BoolArray::from(BitBuffer::new_set(16));
359        let sliced = arr.slice(4..12);
360        let sliced_len = sliced.len();
361        let values = sliced.to_bool().into_bit_buffer().into_mut();
362        assert_eq!(values.len(), sliced_len);
363        assert_eq!(values.as_slice(), &[255, 255]);
364    }
365
366    #[test]
367    fn patch_bools_owned() {
368        let arr = BoolArray::from(BitBuffer::new_set(16));
369        let buf_ptr = arr.bit_buffer().inner().as_ptr();
370
371        let patches = Patches::new(
372            arr.len(),
373            0,
374            PrimitiveArray::new(buffer![0u32], Validity::NonNullable).into_array(),
375            BoolArray::from(BitBuffer::new_unset(1)).into_array(),
376            None,
377        );
378        let arr = arr.patch(&patches);
379        assert_eq!(arr.bit_buffer().inner().as_ptr(), buf_ptr);
380
381        let values = arr.into_bit_buffer();
382        assert_eq!(values.inner().as_slice(), &[254, 255]);
383    }
384
385    #[test]
386    fn patch_sliced_bools_offset() {
387        let arr = BoolArray::from(BitBuffer::new_set(15));
388        let sliced = arr.slice(4..15);
389        let values = sliced.to_bool().into_bit_buffer().into_mut();
390        assert_eq!(values.as_slice(), &[255, 255]);
391    }
392}