vortex_array/arrays/bool/
array.rs

1use arrow_array::BooleanArray;
2use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
3use vortex_dtype::DType;
4use vortex_error::{VortexResult, vortex_panic};
5
6use crate::Canonical;
7use crate::arrays::{BoolVTable, bool};
8use crate::builders::ArrayBuilder;
9use crate::stats::{ArrayStats, StatsSetRef};
10use crate::validity::Validity;
11use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
12
13#[derive(Clone, Debug)]
14pub struct BoolArray {
15    dtype: DType,
16    buffer: BooleanBuffer,
17    pub(crate) validity: Validity,
18    pub(crate) stats_set: ArrayStats,
19}
20
21impl BoolArray {
22    /// Create a new BoolArray from a set of indices and a length.
23    /// All indices must be less than the length.
24    pub fn from_indices<I: IntoIterator<Item = usize>>(
25        length: usize,
26        indices: I,
27        validity: Validity,
28    ) -> Self {
29        let mut buffer = MutableBuffer::new_null(length);
30        indices
31            .into_iter()
32            .for_each(|idx| arrow_buffer::bit_util::set_bit(&mut buffer, idx));
33        Self::new(
34            BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
35            validity,
36        )
37    }
38
39    /// Creates a new [`BoolArray`] from a [`BooleanBuffer`] and [`Validity`], without checking
40    /// any invariants.
41    pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
42        if let Some(len) = validity.maybe_len() {
43            if buffer.len() != len {
44                vortex_panic!(
45                    "Buffer and validity length mismatch: buffer={}, validity={}",
46                    buffer.len(),
47                    len
48                );
49            }
50        }
51
52        // Shrink the buffer to remove any whole bytes.
53        let buffer = buffer.shrink_offset();
54        Self {
55            dtype: DType::Bool(validity.nullability()),
56            buffer,
57            validity,
58            stats_set: ArrayStats::default(),
59        }
60    }
61
62    /// Returns the underlying [`BooleanBuffer`] of the array.
63    pub fn boolean_buffer(&self) -> &BooleanBuffer {
64        assert!(
65            self.buffer.offset() < 8,
66            "Offset must be <8, did we forget to call shrink_offset? Found {}",
67            self.buffer.offset()
68        );
69        &self.buffer
70    }
71
72    /// Get a mutable version of this array.
73    ///
74    /// If the caller holds the only reference to the underlying buffer the underlying buffer is returned
75    /// otherwise a copy is created.
76    ///
77    /// The second value of the tuple is a bit_offset of first value in first byte of the returned builder
78    pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
79        let offset = self.buffer.offset();
80        let len = self.buffer.len();
81        let arrow_buffer = self.buffer.into_inner();
82        let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
83            arrow_buffer.into_mutable().unwrap_or_else(|b| {
84                let mut buf = MutableBuffer::with_capacity(b.len());
85                buf.extend_from_slice(b.as_slice());
86                buf
87            })
88        } else {
89            let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
90            buf.extend_from_slice(arrow_buffer.as_slice());
91            buf
92        };
93
94        (
95            BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
96            offset,
97        )
98    }
99}
100
101impl From<BooleanBuffer> for BoolArray {
102    fn from(value: BooleanBuffer) -> Self {
103        Self::new(value, Validity::NonNullable)
104    }
105}
106
107impl FromIterator<bool> for BoolArray {
108    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
109        Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
110    }
111}
112
113impl FromIterator<Option<bool>> for BoolArray {
114    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
115        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
116
117        Self::new(
118            buffer,
119            nulls.map(Validity::from).unwrap_or(Validity::AllValid),
120        )
121    }
122}
123
124impl ValidityHelper for BoolArray {
125    fn validity(&self) -> &Validity {
126        &self.validity
127    }
128}
129
130impl ArrayVTable<BoolVTable> for BoolVTable {
131    fn len(array: &BoolArray) -> usize {
132        array.buffer.len()
133    }
134
135    fn dtype(array: &BoolArray) -> &DType {
136        &array.dtype
137    }
138
139    fn stats(array: &BoolArray) -> StatsSetRef<'_> {
140        array.stats_set.to_ref(array.as_ref())
141    }
142}
143
144impl CanonicalVTable<BoolVTable> for BoolVTable {
145    fn canonicalize(array: &BoolArray) -> VortexResult<Canonical> {
146        Ok(Canonical::Bool(array.clone()))
147    }
148
149    fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
150        builder.extend_from_array(array.as_ref())
151    }
152}
153
154pub trait BooleanBufferExt {
155    /// Slice any full bytes from the buffer, leaving the offset < 8.
156    fn shrink_offset(self) -> Self;
157}
158
159impl BooleanBufferExt for BooleanBuffer {
160    fn shrink_offset(self) -> Self {
161        let byte_offset = self.offset() / 8;
162        let bit_offset = self.offset() % 8;
163        let len = self.len();
164        let buffer = self
165            .into_inner()
166            .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
167        BooleanBuffer::new(buffer, bit_offset, len)
168    }
169}
170
171#[cfg(test)]
172mod tests {
173    use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
174    use vortex_buffer::buffer;
175
176    use crate::arrays::{BoolArray, PrimitiveArray};
177    use crate::compute::conformance::mask::test_mask;
178    use crate::patches::Patches;
179    use crate::validity::Validity;
180    use crate::vtable::ValidityHelper;
181    use crate::{Array, IntoArray, ToCanonical};
182
183    #[test]
184    fn bool_array() {
185        let arr = BoolArray::from_iter([true, false, true]);
186        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
187        assert!(scalar);
188    }
189
190    #[test]
191    fn test_all_some_iter() {
192        let arr = BoolArray::from_iter([Some(true), Some(false)]);
193
194        assert!(matches!(arr.validity(), Validity::AllValid));
195
196        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
197        assert!(scalar);
198        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
199        assert!(!scalar);
200    }
201
202    #[test]
203    fn test_bool_from_iter() {
204        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
205
206        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
207        assert!(scalar);
208
209        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
210        assert!(scalar);
211
212        let scalar = arr.scalar_at(2).unwrap();
213        assert!(scalar.is_null());
214
215        let scalar = bool::try_from(&arr.scalar_at(3).unwrap()).unwrap();
216        assert!(!scalar);
217
218        let scalar = arr.scalar_at(4).unwrap();
219        assert!(scalar.is_null());
220    }
221
222    #[test]
223    fn patch_sliced_bools() {
224        let arr = {
225            let mut builder = BooleanBufferBuilder::new(12);
226            builder.append(false);
227            builder.append_n(11, true);
228            BoolArray::from(builder.finish())
229        };
230        let sliced = arr.slice(4, 12).unwrap();
231        let sliced_len = sliced.len();
232        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
233        assert_eq!(offset, 4);
234        assert_eq!(values.as_slice(), &[254, 15]);
235
236        // patch the underlying array
237        let patches = Patches::new(
238            arr.len(),
239            0,
240            PrimitiveArray::new(buffer![4u32], Validity::AllValid).into_array(),
241            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
242        );
243        let arr = arr.patch(&patches).unwrap();
244        let arr_len = arr.len();
245        let (values, offset) = arr.to_bool().unwrap().into_boolean_builder();
246        assert_eq!(offset, 0);
247        assert_eq!(values.len(), arr_len + offset);
248        assert_eq!(values.as_slice(), &[238, 15]);
249
250        // the slice should be unchanged
251        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
252        assert_eq!(offset, 4);
253        assert_eq!(values.len(), sliced_len + offset);
254        assert_eq!(values.as_slice(), &[254, 15]); // unchanged
255    }
256
257    #[test]
258    fn slice_array_in_middle() {
259        let arr = BoolArray::from(BooleanBuffer::new_set(16));
260        let sliced = arr.slice(4, 12).unwrap();
261        let sliced_len = sliced.len();
262        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
263        assert_eq!(offset, 4);
264        assert_eq!(values.len(), sliced_len + offset);
265        assert_eq!(values.as_slice(), &[255, 15]);
266    }
267
268    #[test]
269    #[should_panic]
270    fn patch_bools_owned() {
271        let buffer = buffer![255u8; 2];
272        let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
273        let arr = BoolArray::new(buf, Validity::NonNullable);
274        let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
275
276        let patches = Patches::new(
277            arr.len(),
278            0,
279            PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
280            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
281        );
282        let arr = arr.patch(&patches).unwrap();
283        assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
284
285        let (values, _byte_bit_offset) = arr.to_bool().unwrap().into_boolean_builder();
286        assert_eq!(values.as_slice(), &[254, 127]);
287    }
288
289    #[test]
290    fn test_mask_primitive_array() {
291        test_mask(BoolArray::from_iter([true, false, true, true, false]).as_ref());
292    }
293}