vortex_array/arrays/bool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use arrow_array::BooleanArray;
5use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
6use vortex_buffer::ByteBuffer;
7use vortex_dtype::DType;
8use vortex_error::{VortexResult, vortex_ensure};
9
10use crate::Canonical;
11use crate::arrays::{BoolVTable, bool};
12use crate::builders::ArrayBuilder;
13use crate::stats::{ArrayStats, StatsSetRef};
14use crate::validity::Validity;
15use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
16
17/// A boolean array that stores true/false values in a compact bit-packed format.
18///
19/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
20/// is stored as a single bit rather than a full byte.
21///
22/// The data layout uses:
23/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
24/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
25///   indicate valid and false indicates null. if the i-th value is null in the validity child,
26///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
27/// - Bit-level slicing is supported with minimal overhead
28///
29/// # Examples
30///
31/// ```
32/// use vortex_array::arrays::BoolArray;
33/// use vortex_array::IntoArray;
34///
35/// // Create from iterator using FromIterator impl
36/// let array: BoolArray = [true, false, true, false].into_iter().collect();
37///
38/// // Slice the array
39/// let sliced = array.slice(1, 3);
40/// assert_eq!(sliced.len(), 2);
41///
42/// // Access individual values
43/// let value = array.scalar_at(0);
44/// assert_eq!(value, true.into());
45/// ```
46#[derive(Clone, Debug)]
47pub struct BoolArray {
48    dtype: DType,
49    buffer: BooleanBuffer,
50    pub(crate) validity: Validity,
51    pub(crate) stats_set: ArrayStats,
52}
53
54impl BoolArray {
55    fn validate(
56        buffer: &ByteBuffer,
57        offset: usize,
58        len: usize,
59        validity: &Validity,
60    ) -> VortexResult<()> {
61        vortex_ensure!(
62            offset < 8,
63            "offset must be less than whole byte, was {offset} bits"
64        );
65
66        // Validate the buffer is large enough to hold all the bits
67        let required_bytes = offset.saturating_add(len).div_ceil(8);
68        vortex_ensure!(
69            buffer.len() >= required_bytes,
70            "BoolArray with offset={offset} len={len} cannot be built from buffer of size {}",
71            buffer.len()
72        );
73
74        // Validate validity
75        if let Some(validity_len) = validity.maybe_len() {
76            vortex_ensure!(
77                validity_len == len,
78                "BoolArray of size {len} cannot be built with validity of size {validity_len}"
79            );
80        }
81
82        Ok(())
83    }
84}
85
86impl BoolArray {
87    /// Construct a new `BoolArray` from its components:
88    ///
89    /// * `buffer` is a raw ByteBuffer holding the packed bits
90    /// * `offset` is the number of bits in the start of the buffer that should be skipped when
91    ///   looking up the i-th value.
92    /// * `len` is the length of the array, which should correspond to the number of bits
93    /// * `validity` holds the null values.
94    ///
95    /// # Validation
96    ///
97    /// Buffer must be at least large enough to hold `len` bits starting at `offset`.
98    ///
99    /// A provided validity array must be of size `len`.
100    ///
101    /// The offset must be less than a whole byte.
102    pub fn try_new(
103        buffer: ByteBuffer,
104        offset: usize,
105        len: usize,
106        validity: Validity,
107    ) -> VortexResult<Self> {
108        Self::validate(&buffer, offset, len, &validity)?;
109
110        Ok(Self::new(
111            BooleanBuffer::new(buffer.into_arrow_buffer(), offset, len),
112            validity,
113        ))
114    }
115
116    /// Creates a new [`BoolArray`] from a [`BooleanBuffer`] and [`Validity`] directly.
117    ///
118    /// Panics if the validity length differs from the buffer length.
119    pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
120        if let Some(validity_len) = validity.maybe_len() {
121            assert_eq!(buffer.len(), validity_len);
122        }
123
124        // Shrink the buffer to remove any whole bytes.
125        let buffer = buffer.shrink_offset();
126        Self {
127            dtype: DType::Bool(validity.nullability()),
128            buffer,
129            validity,
130            stats_set: ArrayStats::default(),
131        }
132    }
133
134    /// Create a new BoolArray from a set of indices and a length.
135    ///
136    /// All indices must be less than the length.
137    pub fn from_indices<I: IntoIterator<Item = usize>>(
138        length: usize,
139        indices: I,
140        validity: Validity,
141    ) -> Self {
142        let mut buffer = MutableBuffer::new_null(length);
143        let buffer_slice = buffer.as_slice_mut();
144        indices
145            .into_iter()
146            .for_each(|idx| arrow_buffer::bit_util::set_bit(buffer_slice, idx));
147        Self::new(
148            BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
149            validity,
150        )
151    }
152
153    /// Returns the underlying [`BooleanBuffer`] of the array.
154    pub fn boolean_buffer(&self) -> &BooleanBuffer {
155        assert!(
156            self.buffer.offset() < 8,
157            "Offset must be <8, did we forget to call shrink_offset? Found {}",
158            self.buffer.offset()
159        );
160        &self.buffer
161    }
162
163    /// Get a mutable version of this array.
164    ///
165    /// If the caller holds the only reference to the underlying buffer the underlying buffer is returned
166    /// otherwise a copy is created.
167    ///
168    /// The second value of the tuple is a bit_offset of first value in first byte of the returned builder
169    pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
170        let offset = self.buffer.offset();
171        let len = self.buffer.len();
172        let arrow_buffer = self.buffer.into_inner();
173        let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
174            arrow_buffer.into_mutable().unwrap_or_else(|b| {
175                let mut buf = MutableBuffer::with_capacity(b.len());
176                buf.extend_from_slice(b.as_slice());
177                buf
178            })
179        } else {
180            let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
181            buf.extend_from_slice(arrow_buffer.as_slice());
182            buf
183        };
184
185        (
186            BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
187            offset,
188        )
189    }
190}
191
192impl From<BooleanBuffer> for BoolArray {
193    fn from(value: BooleanBuffer) -> Self {
194        Self::new(value, Validity::NonNullable)
195    }
196}
197
198impl FromIterator<bool> for BoolArray {
199    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
200        Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
201    }
202}
203
204impl FromIterator<Option<bool>> for BoolArray {
205    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
206        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
207
208        Self::new(
209            buffer,
210            nulls.map(Validity::from).unwrap_or(Validity::AllValid),
211        )
212    }
213}
214
215impl ValidityHelper for BoolArray {
216    fn validity(&self) -> &Validity {
217        &self.validity
218    }
219}
220
221impl ArrayVTable<BoolVTable> for BoolVTable {
222    fn len(array: &BoolArray) -> usize {
223        array.buffer.len()
224    }
225
226    fn dtype(array: &BoolArray) -> &DType {
227        &array.dtype
228    }
229
230    fn stats(array: &BoolArray) -> StatsSetRef<'_> {
231        array.stats_set.to_ref(array.as_ref())
232    }
233}
234
235impl CanonicalVTable<BoolVTable> for BoolVTable {
236    fn canonicalize(array: &BoolArray) -> VortexResult<Canonical> {
237        Ok(Canonical::Bool(array.clone()))
238    }
239
240    fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
241        builder.extend_from_array(array.as_ref())
242    }
243}
244
245pub trait BooleanBufferExt {
246    /// Slice any full bytes from the buffer, leaving the offset < 8.
247    fn shrink_offset(self) -> Self;
248}
249
250impl BooleanBufferExt for BooleanBuffer {
251    fn shrink_offset(self) -> Self {
252        let byte_offset = self.offset() / 8;
253        let bit_offset = self.offset() % 8;
254        let len = self.len();
255        let buffer = self
256            .into_inner()
257            .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
258        BooleanBuffer::new(buffer, bit_offset, len)
259    }
260}
261
262#[cfg(test)]
263mod tests {
264    use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
265    use vortex_buffer::buffer;
266
267    use crate::arrays::{BoolArray, PrimitiveArray};
268    use crate::patches::Patches;
269    use crate::validity::Validity;
270    use crate::vtable::ValidityHelper;
271    use crate::{Array, IntoArray, ToCanonical};
272
273    #[test]
274    fn bool_array() {
275        let arr = BoolArray::from_iter([true, false, true]);
276        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
277        assert!(scalar);
278    }
279
280    #[test]
281    fn test_all_some_iter() {
282        let arr = BoolArray::from_iter([Some(true), Some(false)]);
283
284        assert!(matches!(arr.validity(), Validity::AllValid));
285
286        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
287        assert!(scalar);
288        let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
289        assert!(!scalar);
290    }
291
292    #[test]
293    fn test_bool_from_iter() {
294        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
295
296        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
297        assert!(scalar);
298
299        let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
300        assert!(scalar);
301
302        let scalar = arr.scalar_at(2);
303        assert!(scalar.is_null());
304
305        let scalar = bool::try_from(&arr.scalar_at(3)).unwrap();
306        assert!(!scalar);
307
308        let scalar = arr.scalar_at(4);
309        assert!(scalar.is_null());
310    }
311
312    #[test]
313    fn patch_sliced_bools() {
314        let arr = {
315            let mut builder = BooleanBufferBuilder::new(12);
316            builder.append(false);
317            builder.append_n(11, true);
318            BoolArray::from(builder.finish())
319        };
320        let sliced = arr.slice(4, 12);
321        let sliced_len = sliced.len();
322        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
323        assert_eq!(offset, 4);
324        assert_eq!(values.as_slice(), &[254, 15]);
325
326        // patch the underlying array
327        let patches = Patches::new(
328            arr.len(),
329            0,
330            buffer![4u32].into_array(), // This creates a non-nullable array
331            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
332        );
333        let arr = arr.patch(&patches).unwrap();
334        let arr_len = arr.len();
335        let (values, offset) = arr.to_bool().unwrap().into_boolean_builder();
336        assert_eq!(offset, 0);
337        assert_eq!(values.len(), arr_len + offset);
338        assert_eq!(values.as_slice(), &[238, 15]);
339
340        // the slice should be unchanged
341        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
342        assert_eq!(offset, 4);
343        assert_eq!(values.len(), sliced_len + offset);
344        assert_eq!(values.as_slice(), &[254, 15]); // unchanged
345    }
346
347    #[test]
348    fn slice_array_in_middle() {
349        let arr = BoolArray::from(BooleanBuffer::new_set(16));
350        let sliced = arr.slice(4, 12);
351        let sliced_len = sliced.len();
352        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
353        assert_eq!(offset, 4);
354        assert_eq!(values.len(), sliced_len + offset);
355        assert_eq!(values.as_slice(), &[255, 15]);
356    }
357
358    #[test]
359    #[should_panic]
360    fn patch_bools_owned() {
361        let buffer = buffer![255u8; 2];
362        let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
363        let arr = BoolArray::new(buf, Validity::NonNullable);
364        let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
365
366        let patches = Patches::new(
367            arr.len(),
368            0,
369            PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
370            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
371        );
372        let arr = arr.patch(&patches).unwrap();
373        assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
374
375        let (values, _byte_bit_offset) = arr.to_bool().unwrap().into_boolean_builder();
376        assert_eq!(values.as_slice(), &[254, 127]);
377    }
378}