vortex_array/arrays/bool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use arrow_array::BooleanArray;
5use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
6use vortex_dtype::DType;
7use vortex_error::{VortexResult, vortex_panic};
8
9use crate::Canonical;
10use crate::arrays::{BoolVTable, bool};
11use crate::builders::ArrayBuilder;
12use crate::stats::{ArrayStats, StatsSetRef};
13use crate::validity::Validity;
14use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
15
16/// A boolean array that stores true/false values in a compact bit-packed format.
17///
18/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
19/// is stored as a single bit rather than a full byte.
20///
21/// The data layout uses:
22/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
23/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
24///   indicate valid and false indicates null. if the i-th value is null in the validity child,
25///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
26/// - Bit-level slicing is supported with minimal overhead
27///
28/// # Examples
29///
30/// ```
31/// use vortex_array::arrays::BoolArray;
32/// use vortex_array::IntoArray;
33///
34/// // Create from iterator using FromIterator impl
35/// let array: BoolArray = [true, false, true, false].into_iter().collect();
36///
37/// // Slice the array
38/// let sliced = array.slice(1, 3).unwrap();
39/// assert_eq!(sliced.len(), 2);
40///
41/// // Access individual values
42/// let value = array.scalar_at(0).unwrap();
43/// assert_eq!(value, true.into());
44/// ```
45#[derive(Clone, Debug)]
46pub struct BoolArray {
47    dtype: DType,
48    buffer: BooleanBuffer,
49    pub(crate) validity: Validity,
50    pub(crate) stats_set: ArrayStats,
51}
52
53impl BoolArray {
54    /// Create a new [`BoolArray`] from a set of indices, a length and a [`Validity`].
55    /// All indices must be less than the length.
56    pub fn from_indices<I: IntoIterator<Item = usize>>(
57        length: usize,
58        indices: I,
59        validity: Validity,
60    ) -> Self {
61        let mut buffer = MutableBuffer::new_null(length);
62        let buffer_slice = buffer.as_slice_mut();
63        indices
64            .into_iter()
65            .for_each(|idx| arrow_buffer::bit_util::set_bit(buffer_slice, idx));
66        Self::new(
67            BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
68            validity,
69        )
70    }
71
72    /// Creates a new [`BoolArray`] from a [`BooleanBuffer`] and [`Validity`], without checking
73    /// any invariants.
74    pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
75        if let Some(len) = validity.maybe_len()
76            && buffer.len() != len
77        {
78            vortex_panic!(
79                "Buffer and validity length mismatch: buffer={}, validity={}",
80                buffer.len(),
81                len
82            );
83        }
84
85        // Shrink the buffer to remove any whole bytes.
86        let buffer = buffer.shrink_offset();
87        Self {
88            dtype: DType::Bool(validity.nullability()),
89            buffer,
90            validity,
91            stats_set: ArrayStats::default(),
92        }
93    }
94
95    /// Returns the underlying [`BooleanBuffer`] of the array.
96    pub fn boolean_buffer(&self) -> &BooleanBuffer {
97        assert!(
98            self.buffer.offset() < 8,
99            "Offset must be <8, did we forget to call shrink_offset? Found {}",
100            self.buffer.offset()
101        );
102        &self.buffer
103    }
104
105    /// Get a mutable version of this array.
106    ///
107    /// If the caller holds the only reference to the underlying buffer the underlying buffer is returned
108    /// otherwise a copy is created.
109    ///
110    /// The second value of the tuple is a bit_offset of first value in first byte of the returned builder
111    pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
112        let offset = self.buffer.offset();
113        let len = self.buffer.len();
114        let arrow_buffer = self.buffer.into_inner();
115        let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
116            arrow_buffer.into_mutable().unwrap_or_else(|b| {
117                let mut buf = MutableBuffer::with_capacity(b.len());
118                buf.extend_from_slice(b.as_slice());
119                buf
120            })
121        } else {
122            let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
123            buf.extend_from_slice(arrow_buffer.as_slice());
124            buf
125        };
126
127        (
128            BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
129            offset,
130        )
131    }
132}
133
134impl From<BooleanBuffer> for BoolArray {
135    fn from(value: BooleanBuffer) -> Self {
136        Self::new(value, Validity::NonNullable)
137    }
138}
139
140impl FromIterator<bool> for BoolArray {
141    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
142        Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
143    }
144}
145
146impl FromIterator<Option<bool>> for BoolArray {
147    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
148        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
149
150        Self::new(
151            buffer,
152            nulls.map(Validity::from).unwrap_or(Validity::AllValid),
153        )
154    }
155}
156
157impl ValidityHelper for BoolArray {
158    fn validity(&self) -> &Validity {
159        &self.validity
160    }
161}
162
163impl ArrayVTable<BoolVTable> for BoolVTable {
164    fn len(array: &BoolArray) -> usize {
165        array.buffer.len()
166    }
167
168    fn dtype(array: &BoolArray) -> &DType {
169        &array.dtype
170    }
171
172    fn stats(array: &BoolArray) -> StatsSetRef<'_> {
173        array.stats_set.to_ref(array.as_ref())
174    }
175}
176
177impl CanonicalVTable<BoolVTable> for BoolVTable {
178    fn canonicalize(array: &BoolArray) -> VortexResult<Canonical> {
179        Ok(Canonical::Bool(array.clone()))
180    }
181
182    fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
183        builder.extend_from_array(array.as_ref())
184    }
185}
186
187pub trait BooleanBufferExt {
188    /// Slice any full bytes from the buffer, leaving the offset < 8.
189    fn shrink_offset(self) -> Self;
190}
191
192impl BooleanBufferExt for BooleanBuffer {
193    fn shrink_offset(self) -> Self {
194        let byte_offset = self.offset() / 8;
195        let bit_offset = self.offset() % 8;
196        let len = self.len();
197        let buffer = self
198            .into_inner()
199            .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
200        BooleanBuffer::new(buffer, bit_offset, len)
201    }
202}
203
204#[cfg(test)]
205mod tests {
206    use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
207    use vortex_buffer::buffer;
208
209    use crate::arrays::{BoolArray, PrimitiveArray};
210    use crate::patches::Patches;
211    use crate::validity::Validity;
212    use crate::vtable::ValidityHelper;
213    use crate::{Array, IntoArray, ToCanonical};
214
215    #[test]
216    fn bool_array() {
217        let arr = BoolArray::from_iter([true, false, true]);
218        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
219        assert!(scalar);
220    }
221
222    #[test]
223    fn test_all_some_iter() {
224        let arr = BoolArray::from_iter([Some(true), Some(false)]);
225
226        assert!(matches!(arr.validity(), Validity::AllValid));
227
228        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
229        assert!(scalar);
230        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
231        assert!(!scalar);
232    }
233
234    #[test]
235    fn test_bool_from_iter() {
236        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
237
238        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
239        assert!(scalar);
240
241        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
242        assert!(scalar);
243
244        let scalar = arr.scalar_at(2).unwrap();
245        assert!(scalar.is_null());
246
247        let scalar = bool::try_from(&arr.scalar_at(3).unwrap()).unwrap();
248        assert!(!scalar);
249
250        let scalar = arr.scalar_at(4).unwrap();
251        assert!(scalar.is_null());
252    }
253
254    #[test]
255    fn patch_sliced_bools() {
256        let arr = {
257            let mut builder = BooleanBufferBuilder::new(12);
258            builder.append(false);
259            builder.append_n(11, true);
260            BoolArray::from(builder.finish())
261        };
262        let sliced = arr.slice(4, 12).unwrap();
263        let sliced_len = sliced.len();
264        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
265        assert_eq!(offset, 4);
266        assert_eq!(values.as_slice(), &[254, 15]);
267
268        // patch the underlying array
269        let patches = Patches::new(
270            arr.len(),
271            0,
272            buffer![4u32].into_array(), // This creates a non-nullable array
273            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
274        );
275        let arr = arr.patch(&patches).unwrap();
276        let arr_len = arr.len();
277        let (values, offset) = arr.to_bool().unwrap().into_boolean_builder();
278        assert_eq!(offset, 0);
279        assert_eq!(values.len(), arr_len + offset);
280        assert_eq!(values.as_slice(), &[238, 15]);
281
282        // the slice should be unchanged
283        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
284        assert_eq!(offset, 4);
285        assert_eq!(values.len(), sliced_len + offset);
286        assert_eq!(values.as_slice(), &[254, 15]); // unchanged
287    }
288
289    #[test]
290    fn slice_array_in_middle() {
291        let arr = BoolArray::from(BooleanBuffer::new_set(16));
292        let sliced = arr.slice(4, 12).unwrap();
293        let sliced_len = sliced.len();
294        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
295        assert_eq!(offset, 4);
296        assert_eq!(values.len(), sliced_len + offset);
297        assert_eq!(values.as_slice(), &[255, 15]);
298    }
299
300    #[test]
301    #[should_panic]
302    fn patch_bools_owned() {
303        let buffer = buffer![255u8; 2];
304        let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
305        let arr = BoolArray::new(buf, Validity::NonNullable);
306        let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
307
308        let patches = Patches::new(
309            arr.len(),
310            0,
311            PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
312            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
313        );
314        let arr = arr.patch(&patches).unwrap();
315        assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
316
317        let (values, _byte_bit_offset) = arr.to_bool().unwrap().into_boolean_builder();
318        assert_eq!(values.as_slice(), &[254, 127]);
319    }
320}