vortex_array/arrays/bool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::ops::BitAnd;
5
6use arrow_array::BooleanArray;
7use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
8use vortex_buffer::ByteBuffer;
9use vortex_dtype::DType;
10use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
11use vortex_mask::Mask;
12
13use crate::Canonical;
14use crate::arrays::{BoolVTable, bool};
15use crate::builders::ArrayBuilder;
16use crate::stats::{ArrayStats, StatsSetRef};
17use crate::validity::Validity;
18use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
19
20/// A boolean array that stores true/false values in a compact bit-packed format.
21///
22/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
23/// is stored as a single bit rather than a full byte.
24///
25/// The data layout uses:
26/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
27/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
28///   indicate valid and false indicates null. if the i-th value is null in the validity child,
29///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
30/// - Bit-level slicing is supported with minimal overhead
31///
32/// # Examples
33///
34/// ```
35/// use vortex_array::arrays::BoolArray;
36/// use vortex_array::IntoArray;
37///
38/// // Create from iterator using FromIterator impl
39/// let array: BoolArray = [true, false, true, false].into_iter().collect();
40///
41/// // Slice the array
42/// let sliced = array.slice(1..3);
43/// assert_eq!(sliced.len(), 2);
44///
45/// // Access individual values
46/// let value = array.scalar_at(0);
47/// assert_eq!(value, true.into());
48/// ```
49#[derive(Clone, Debug)]
50pub struct BoolArray {
51    dtype: DType,
52    buffer: BooleanBuffer,
53    pub(crate) validity: Validity,
54    pub(crate) stats_set: ArrayStats,
55}
56
57impl BoolArray {
58    /// Constructs a new `BoolArray`.
59    ///
60    /// See [`BoolArray::new_unchecked`] for more information.
61    ///
62    /// # Errors
63    ///
64    /// Returns an error if the provided components do not satisfy the invariants documented in
65    /// [`BoolArray::new_unchecked`].
66    pub fn try_new(
67        buffer: ByteBuffer,
68        offset: usize,
69        len: usize,
70        validity: Validity,
71    ) -> VortexResult<Self> {
72        Self::validate(&buffer, offset, len, &validity)?;
73
74        // SAFETY: validate ensures all invariants are met.
75        Ok(unsafe { Self::new_unchecked(buffer, offset, len, validity) })
76    }
77
78    /// Creates a new [`BoolArray`] without validation from these components:
79    ///
80    /// * `buffer` is a raw [`ByteBuffer`] holding the packed bits.
81    /// * `offset` is the number of bits in the start of the buffer that should be skipped when
82    ///   looking up the i-th value.
83    /// * `len` is the length of the array, which should correspond to the number of bits.
84    /// * `validity` holds the null values.
85    ///
86    /// # Safety
87    ///
88    /// The caller must ensure all of the following invariants are satisfied:
89    ///
90    /// - `buffer` must contain at least `(offset + len).div_ceil(8)` bytes.
91    /// - `offset` must be less than 8 (it represents the bit offset within the first byte).
92    /// - If `validity` is `Validity::Array`, its length must exactly equal `len`.
93    pub unsafe fn new_unchecked(
94        buffer: ByteBuffer,
95        offset: usize,
96        len: usize,
97        validity: Validity,
98    ) -> Self {
99        let buffer = BooleanBuffer::new(buffer.into_arrow_buffer(), offset, len);
100        let buffer = buffer.shrink_offset();
101        Self {
102            dtype: DType::Bool(validity.nullability()),
103            buffer,
104            validity,
105            stats_set: ArrayStats::default(),
106        }
107    }
108
109    /// Validates the components that would be used to create a [`BoolArray`].
110    ///
111    /// This function checks all the invariants required by [`BoolArray::new_unchecked`].
112    pub(crate) fn validate(
113        buffer: &ByteBuffer,
114        offset: usize,
115        len: usize,
116        validity: &Validity,
117    ) -> VortexResult<()> {
118        vortex_ensure!(
119            offset < 8,
120            "offset must be less than whole byte, was {offset} bits"
121        );
122
123        // Validate the buffer is large enough to hold all the bits
124        let required_bytes = offset.saturating_add(len).div_ceil(8);
125        vortex_ensure!(
126            buffer.len() >= required_bytes,
127            "BoolArray with offset={offset} len={len} cannot be built from buffer of size {}",
128            buffer.len()
129        );
130
131        // Validate validity
132        if let Some(validity_len) = validity.maybe_len() {
133            vortex_ensure!(
134                validity_len == len,
135                "BoolArray of size {len} cannot be built with validity of size {validity_len}"
136            );
137        }
138
139        Ok(())
140    }
141
142    /// Creates a new [`BoolArray`] from a [`BooleanBuffer`] and [`Validity`] directly.
143    ///
144    /// # Panics
145    ///
146    /// Panics if the validity is [`Validity::Array`] and the length is not the same as the buffer.
147    pub fn from_bool_buffer(buffer: BooleanBuffer, validity: Validity) -> Self {
148        if let Some(validity_len) = validity.maybe_len() {
149            assert_eq!(buffer.len(), validity_len);
150        }
151
152        // Shrink the buffer to remove any whole bytes.
153        let buffer = buffer.shrink_offset();
154        Self {
155            dtype: DType::Bool(validity.nullability()),
156            buffer,
157            validity,
158            stats_set: ArrayStats::default(),
159        }
160    }
161
162    /// Create a new BoolArray from a set of indices and a length.
163    ///
164    /// All indices must be less than the length.
165    pub fn from_indices<I: IntoIterator<Item = usize>>(
166        length: usize,
167        indices: I,
168        validity: Validity,
169    ) -> Self {
170        let mut buffer = MutableBuffer::new_null(length);
171        let buffer_slice = buffer.as_slice_mut();
172        indices
173            .into_iter()
174            .for_each(|idx| arrow_buffer::bit_util::set_bit(buffer_slice, idx));
175        Self::from_bool_buffer(
176            BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
177            validity,
178        )
179    }
180
181    /// Returns the underlying [`BooleanBuffer`] of the array.
182    pub fn boolean_buffer(&self) -> &BooleanBuffer {
183        assert!(
184            self.buffer.offset() < 8,
185            "Offset must be <8, did we forget to call shrink_offset? Found {}",
186            self.buffer.offset()
187        );
188        &self.buffer
189    }
190
191    /// Get a mutable version of this array.
192    ///
193    /// If the caller holds the only reference to the underlying buffer the underlying buffer is returned
194    /// otherwise a copy is created.
195    ///
196    /// The second value of the tuple is a bit_offset of first value in first byte of the returned builder
197    pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
198        let offset = self.buffer.offset();
199        let len = self.buffer.len();
200        let arrow_buffer = self.buffer.into_inner();
201        let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
202            arrow_buffer.into_mutable().unwrap_or_else(|b| {
203                let mut buf = MutableBuffer::with_capacity(b.len());
204                buf.extend_from_slice(b.as_slice());
205                buf
206            })
207        } else {
208            let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
209            buf.extend_from_slice(arrow_buffer.as_slice());
210            buf
211        };
212
213        (
214            BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
215            offset,
216        )
217    }
218
219    pub fn to_mask(&self) -> Mask {
220        self.maybe_to_mask()
221            .vortex_expect("cannot convert nullable boolean array to mask")
222    }
223
224    pub fn maybe_to_mask(&self) -> Option<Mask> {
225        self.all_valid()
226            .then(|| Mask::from_buffer(self.boolean_buffer().clone()))
227    }
228
229    pub fn to_mask_fill_null_false(&self) -> Mask {
230        if let Some(constant) = self.as_constant() {
231            let bool_constant = constant.as_bool();
232            if bool_constant.value().unwrap_or(false) {
233                return Mask::new_true(self.len());
234            } else {
235                return Mask::new_false(self.len());
236            }
237        }
238        // Extract a boolean buffer, treating null values to false
239        let buffer = match self.validity_mask() {
240            Mask::AllTrue(_) => self.boolean_buffer().clone(),
241            Mask::AllFalse(_) => return Mask::new_false(self.len()),
242            Mask::Values(validity) => validity.boolean_buffer().bitand(self.boolean_buffer()),
243        };
244        Mask::from_buffer(buffer)
245    }
246}
247
248impl From<BooleanBuffer> for BoolArray {
249    fn from(value: BooleanBuffer) -> Self {
250        Self::from_bool_buffer(value, Validity::NonNullable)
251    }
252}
253
254impl FromIterator<bool> for BoolArray {
255    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
256        Self::from_bool_buffer(BooleanBuffer::from_iter(iter), Validity::NonNullable)
257    }
258}
259
260impl FromIterator<Option<bool>> for BoolArray {
261    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
262        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
263
264        Self::from_bool_buffer(
265            buffer,
266            nulls.map(Validity::from).unwrap_or(Validity::AllValid),
267        )
268    }
269}
270
271impl ValidityHelper for BoolArray {
272    fn validity(&self) -> &Validity {
273        &self.validity
274    }
275}
276
277impl ArrayVTable<BoolVTable> for BoolVTable {
278    fn len(array: &BoolArray) -> usize {
279        array.buffer.len()
280    }
281
282    fn dtype(array: &BoolArray) -> &DType {
283        &array.dtype
284    }
285
286    fn stats(array: &BoolArray) -> StatsSetRef<'_> {
287        array.stats_set.to_ref(array.as_ref())
288    }
289}
290
291impl CanonicalVTable<BoolVTable> for BoolVTable {
292    fn canonicalize(array: &BoolArray) -> Canonical {
293        Canonical::Bool(array.clone())
294    }
295
296    fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) {
297        builder.extend_from_array(array.as_ref())
298    }
299}
300
301pub trait BooleanBufferExt {
302    /// Slice any full bytes from the buffer, leaving the offset < 8.
303    fn shrink_offset(self) -> Self;
304}
305
306impl BooleanBufferExt for BooleanBuffer {
307    fn shrink_offset(self) -> Self {
308        let byte_offset = self.offset() / 8;
309        let bit_offset = self.offset() % 8;
310        let len = self.len();
311        let buffer = self
312            .into_inner()
313            .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
314        BooleanBuffer::new(buffer, bit_offset, len)
315    }
316}
317
318#[cfg(test)]
319mod tests {
320    use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
321    use vortex_buffer::buffer;
322
323    use crate::arrays::{BoolArray, PrimitiveArray};
324    use crate::patches::Patches;
325    use crate::validity::Validity;
326    use crate::vtable::ValidityHelper;
327    use crate::{Array, IntoArray, ToCanonical};
328
329    #[test]
330    fn bool_array() {
331        let arr = BoolArray::from_iter([true, false, true]);
332        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
333        assert!(scalar);
334    }
335
336    #[test]
337    fn test_all_some_iter() {
338        let arr = BoolArray::from_iter([Some(true), Some(false)]);
339
340        assert!(matches!(arr.validity(), Validity::AllValid));
341
342        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
343        assert!(scalar);
344        let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
345        assert!(!scalar);
346    }
347
348    #[test]
349    fn test_bool_from_iter() {
350        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
351
352        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
353        assert!(scalar);
354
355        let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
356        assert!(scalar);
357
358        let scalar = arr.scalar_at(2);
359        assert!(scalar.is_null());
360
361        let scalar = bool::try_from(&arr.scalar_at(3)).unwrap();
362        assert!(!scalar);
363
364        let scalar = arr.scalar_at(4);
365        assert!(scalar.is_null());
366    }
367
368    #[test]
369    fn patch_sliced_bools() {
370        let arr = {
371            let mut builder = BooleanBufferBuilder::new(12);
372            builder.append(false);
373            builder.append_n(11, true);
374            BoolArray::from(builder.finish())
375        };
376        let sliced = arr.slice(4..12);
377        let sliced_len = sliced.len();
378        let (values, offset) = sliced.to_bool().into_boolean_builder();
379        assert_eq!(offset, 4);
380        assert_eq!(values.as_slice(), &[254, 15]);
381
382        // patch the underlying array
383        let patches = Patches::new(
384            arr.len(),
385            0,
386            buffer![4u32].into_array(), // This creates a non-nullable array
387            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
388        );
389        let arr = arr.patch(&patches);
390        let arr_len = arr.len();
391        let (values, offset) = arr.to_bool().into_boolean_builder();
392        assert_eq!(offset, 0);
393        assert_eq!(values.len(), arr_len + offset);
394        assert_eq!(values.as_slice(), &[238, 15]);
395
396        // the slice should be unchanged
397        let (values, offset) = sliced.to_bool().into_boolean_builder();
398        assert_eq!(offset, 4);
399        assert_eq!(values.len(), sliced_len + offset);
400        assert_eq!(values.as_slice(), &[254, 15]); // unchanged
401    }
402
403    #[test]
404    fn slice_array_in_middle() {
405        let arr = BoolArray::from(BooleanBuffer::new_set(16));
406        let sliced = arr.slice(4..12);
407        let sliced_len = sliced.len();
408        let (values, offset) = sliced.to_bool().into_boolean_builder();
409        assert_eq!(offset, 4);
410        assert_eq!(values.len(), sliced_len + offset);
411        assert_eq!(values.as_slice(), &[255, 15]);
412    }
413
414    #[test]
415    #[should_panic]
416    fn patch_bools_owned() {
417        let buffer = buffer![255u8; 2];
418        let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
419        let arr = BoolArray::from_bool_buffer(buf, Validity::NonNullable);
420        let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
421
422        let patches = Patches::new(
423            arr.len(),
424            0,
425            PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
426            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
427        );
428        let arr = arr.patch(&patches);
429        assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
430
431        let (values, _byte_bit_offset) = arr.to_bool().into_boolean_builder();
432        assert_eq!(values.as_slice(), &[254, 127]);
433    }
434}