vortex_array/arrays/bool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use arrow_array::BooleanArray;
5use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
6use vortex_dtype::DType;
7use vortex_error::{VortexResult, vortex_panic};
8
9use crate::Canonical;
10use crate::arrays::{BoolVTable, bool};
11use crate::builders::ArrayBuilder;
12use crate::stats::{ArrayStats, StatsSetRef};
13use crate::validity::Validity;
14use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
15
16/// A boolean array that stores true/false values in a compact bit-packed format.
17///
18/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
19/// is stored as a single bit rather than a full byte.
20///
21/// The data layout uses:
22/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
23/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
24///   indicate valid and false indicates null. if the i-th value is null in the validity child,
25///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
26/// - Bit-level slicing is supported with minimal overhead
27///
28/// # Examples
29///
30/// ```
31/// use vortex_array::arrays::BoolArray;
32/// use vortex_array::IntoArray;
33///
34/// // Create from iterator using FromIterator impl
35/// let array: BoolArray = [true, false, true, false].into_iter().collect();
36///
37/// // Slice the array
38/// let sliced = array.slice(1, 3).unwrap();
39/// assert_eq!(sliced.len(), 2);
40///
41/// // Access individual values
42/// let value = array.scalar_at(0).unwrap();
43/// assert_eq!(value, true.into());
44/// ```
45#[derive(Clone, Debug)]
46pub struct BoolArray {
47    dtype: DType,
48    buffer: BooleanBuffer,
49    pub(crate) validity: Validity,
50    pub(crate) stats_set: ArrayStats,
51}
52
53impl BoolArray {
54    /// Create a new BoolArray from a set of indices and a length.
55    /// All indices must be less than the length.
56    pub fn from_indices<I: IntoIterator<Item = usize>>(
57        length: usize,
58        indices: I,
59        validity: Validity,
60    ) -> Self {
61        let mut buffer = MutableBuffer::new_null(length);
62        indices
63            .into_iter()
64            .for_each(|idx| arrow_buffer::bit_util::set_bit(&mut buffer, idx));
65        Self::new(
66            BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
67            validity,
68        )
69    }
70
71    /// Creates a new [`BoolArray`] from a [`BooleanBuffer`] and [`Validity`], without checking
72    /// any invariants.
73    pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
74        if let Some(len) = validity.maybe_len() {
75            if buffer.len() != len {
76                vortex_panic!(
77                    "Buffer and validity length mismatch: buffer={}, validity={}",
78                    buffer.len(),
79                    len
80                );
81            }
82        }
83
84        // Shrink the buffer to remove any whole bytes.
85        let buffer = buffer.shrink_offset();
86        Self {
87            dtype: DType::Bool(validity.nullability()),
88            buffer,
89            validity,
90            stats_set: ArrayStats::default(),
91        }
92    }
93
94    /// Returns the underlying [`BooleanBuffer`] of the array.
95    pub fn boolean_buffer(&self) -> &BooleanBuffer {
96        assert!(
97            self.buffer.offset() < 8,
98            "Offset must be <8, did we forget to call shrink_offset? Found {}",
99            self.buffer.offset()
100        );
101        &self.buffer
102    }
103
104    /// Get a mutable version of this array.
105    ///
106    /// If the caller holds the only reference to the underlying buffer the underlying buffer is returned
107    /// otherwise a copy is created.
108    ///
109    /// The second value of the tuple is a bit_offset of first value in first byte of the returned builder
110    pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
111        let offset = self.buffer.offset();
112        let len = self.buffer.len();
113        let arrow_buffer = self.buffer.into_inner();
114        let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
115            arrow_buffer.into_mutable().unwrap_or_else(|b| {
116                let mut buf = MutableBuffer::with_capacity(b.len());
117                buf.extend_from_slice(b.as_slice());
118                buf
119            })
120        } else {
121            let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
122            buf.extend_from_slice(arrow_buffer.as_slice());
123            buf
124        };
125
126        (
127            BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
128            offset,
129        )
130    }
131}
132
133impl From<BooleanBuffer> for BoolArray {
134    fn from(value: BooleanBuffer) -> Self {
135        Self::new(value, Validity::NonNullable)
136    }
137}
138
139impl FromIterator<bool> for BoolArray {
140    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
141        Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
142    }
143}
144
145impl FromIterator<Option<bool>> for BoolArray {
146    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
147        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
148
149        Self::new(
150            buffer,
151            nulls.map(Validity::from).unwrap_or(Validity::AllValid),
152        )
153    }
154}
155
156impl ValidityHelper for BoolArray {
157    fn validity(&self) -> &Validity {
158        &self.validity
159    }
160}
161
162impl ArrayVTable<BoolVTable> for BoolVTable {
163    fn len(array: &BoolArray) -> usize {
164        array.buffer.len()
165    }
166
167    fn dtype(array: &BoolArray) -> &DType {
168        &array.dtype
169    }
170
171    fn stats(array: &BoolArray) -> StatsSetRef<'_> {
172        array.stats_set.to_ref(array.as_ref())
173    }
174}
175
176impl CanonicalVTable<BoolVTable> for BoolVTable {
177    fn canonicalize(array: &BoolArray) -> VortexResult<Canonical> {
178        Ok(Canonical::Bool(array.clone()))
179    }
180
181    fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
182        builder.extend_from_array(array.as_ref())
183    }
184}
185
186pub trait BooleanBufferExt {
187    /// Slice any full bytes from the buffer, leaving the offset < 8.
188    fn shrink_offset(self) -> Self;
189}
190
191impl BooleanBufferExt for BooleanBuffer {
192    fn shrink_offset(self) -> Self {
193        let byte_offset = self.offset() / 8;
194        let bit_offset = self.offset() % 8;
195        let len = self.len();
196        let buffer = self
197            .into_inner()
198            .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
199        BooleanBuffer::new(buffer, bit_offset, len)
200    }
201}
202
203#[cfg(test)]
204mod tests {
205    use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
206    use vortex_buffer::buffer;
207
208    use crate::arrays::{BoolArray, PrimitiveArray};
209    use crate::patches::Patches;
210    use crate::validity::Validity;
211    use crate::vtable::ValidityHelper;
212    use crate::{Array, IntoArray, ToCanonical};
213
214    #[test]
215    fn bool_array() {
216        let arr = BoolArray::from_iter([true, false, true]);
217        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
218        assert!(scalar);
219    }
220
221    #[test]
222    fn test_all_some_iter() {
223        let arr = BoolArray::from_iter([Some(true), Some(false)]);
224
225        assert!(matches!(arr.validity(), Validity::AllValid));
226
227        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
228        assert!(scalar);
229        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
230        assert!(!scalar);
231    }
232
233    #[test]
234    fn test_bool_from_iter() {
235        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
236
237        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
238        assert!(scalar);
239
240        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
241        assert!(scalar);
242
243        let scalar = arr.scalar_at(2).unwrap();
244        assert!(scalar.is_null());
245
246        let scalar = bool::try_from(&arr.scalar_at(3).unwrap()).unwrap();
247        assert!(!scalar);
248
249        let scalar = arr.scalar_at(4).unwrap();
250        assert!(scalar.is_null());
251    }
252
253    #[test]
254    fn patch_sliced_bools() {
255        let arr = {
256            let mut builder = BooleanBufferBuilder::new(12);
257            builder.append(false);
258            builder.append_n(11, true);
259            BoolArray::from(builder.finish())
260        };
261        let sliced = arr.slice(4, 12).unwrap();
262        let sliced_len = sliced.len();
263        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
264        assert_eq!(offset, 4);
265        assert_eq!(values.as_slice(), &[254, 15]);
266
267        // patch the underlying array
268        let patches = Patches::new(
269            arr.len(),
270            0,
271            buffer![4u32].into_array(), // This creates a non-nullable array
272            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
273        );
274        let arr = arr.patch(&patches).unwrap();
275        let arr_len = arr.len();
276        let (values, offset) = arr.to_bool().unwrap().into_boolean_builder();
277        assert_eq!(offset, 0);
278        assert_eq!(values.len(), arr_len + offset);
279        assert_eq!(values.as_slice(), &[238, 15]);
280
281        // the slice should be unchanged
282        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
283        assert_eq!(offset, 4);
284        assert_eq!(values.len(), sliced_len + offset);
285        assert_eq!(values.as_slice(), &[254, 15]); // unchanged
286    }
287
288    #[test]
289    fn slice_array_in_middle() {
290        let arr = BoolArray::from(BooleanBuffer::new_set(16));
291        let sliced = arr.slice(4, 12).unwrap();
292        let sliced_len = sliced.len();
293        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
294        assert_eq!(offset, 4);
295        assert_eq!(values.len(), sliced_len + offset);
296        assert_eq!(values.as_slice(), &[255, 15]);
297    }
298
299    #[test]
300    #[should_panic]
301    fn patch_bools_owned() {
302        let buffer = buffer![255u8; 2];
303        let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
304        let arr = BoolArray::new(buf, Validity::NonNullable);
305        let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
306
307        let patches = Patches::new(
308            arr.len(),
309            0,
310            PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
311            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
312        );
313        let arr = arr.patch(&patches).unwrap();
314        assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
315
316        let (values, _byte_bit_offset) = arr.to_bool().unwrap().into_boolean_builder();
317        assert_eq!(values.as_slice(), &[254, 127]);
318    }
319}