vortex_array/arrays/bool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use arrow_array::BooleanArray;
5use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
6use vortex_dtype::DType;
7use vortex_error::{VortexResult, vortex_panic};
8
9use crate::Canonical;
10use crate::arrays::{BoolVTable, bool};
11use crate::builders::ArrayBuilder;
12use crate::stats::{ArrayStats, StatsSetRef};
13use crate::validity::Validity;
14use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
15
16#[derive(Clone, Debug)]
17pub struct BoolArray {
18    dtype: DType,
19    buffer: BooleanBuffer,
20    pub(crate) validity: Validity,
21    pub(crate) stats_set: ArrayStats,
22}
23
24impl BoolArray {
25    /// Create a new BoolArray from a set of indices and a length.
26    /// All indices must be less than the length.
27    pub fn from_indices<I: IntoIterator<Item = usize>>(
28        length: usize,
29        indices: I,
30        validity: Validity,
31    ) -> Self {
32        let mut buffer = MutableBuffer::new_null(length);
33        indices
34            .into_iter()
35            .for_each(|idx| arrow_buffer::bit_util::set_bit(&mut buffer, idx));
36        Self::new(
37            BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
38            validity,
39        )
40    }
41
42    /// Creates a new [`BoolArray`] from a [`BooleanBuffer`] and [`Validity`], without checking
43    /// any invariants.
44    pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
45        if let Some(len) = validity.maybe_len() {
46            if buffer.len() != len {
47                vortex_panic!(
48                    "Buffer and validity length mismatch: buffer={}, validity={}",
49                    buffer.len(),
50                    len
51                );
52            }
53        }
54
55        // Shrink the buffer to remove any whole bytes.
56        let buffer = buffer.shrink_offset();
57        Self {
58            dtype: DType::Bool(validity.nullability()),
59            buffer,
60            validity,
61            stats_set: ArrayStats::default(),
62        }
63    }
64
65    /// Returns the underlying [`BooleanBuffer`] of the array.
66    pub fn boolean_buffer(&self) -> &BooleanBuffer {
67        assert!(
68            self.buffer.offset() < 8,
69            "Offset must be <8, did we forget to call shrink_offset? Found {}",
70            self.buffer.offset()
71        );
72        &self.buffer
73    }
74
75    /// Get a mutable version of this array.
76    ///
77    /// If the caller holds the only reference to the underlying buffer the underlying buffer is returned
78    /// otherwise a copy is created.
79    ///
80    /// The second value of the tuple is a bit_offset of first value in first byte of the returned builder
81    pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
82        let offset = self.buffer.offset();
83        let len = self.buffer.len();
84        let arrow_buffer = self.buffer.into_inner();
85        let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
86            arrow_buffer.into_mutable().unwrap_or_else(|b| {
87                let mut buf = MutableBuffer::with_capacity(b.len());
88                buf.extend_from_slice(b.as_slice());
89                buf
90            })
91        } else {
92            let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
93            buf.extend_from_slice(arrow_buffer.as_slice());
94            buf
95        };
96
97        (
98            BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
99            offset,
100        )
101    }
102}
103
104impl From<BooleanBuffer> for BoolArray {
105    fn from(value: BooleanBuffer) -> Self {
106        Self::new(value, Validity::NonNullable)
107    }
108}
109
110impl FromIterator<bool> for BoolArray {
111    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
112        Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
113    }
114}
115
116impl FromIterator<Option<bool>> for BoolArray {
117    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
118        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
119
120        Self::new(
121            buffer,
122            nulls.map(Validity::from).unwrap_or(Validity::AllValid),
123        )
124    }
125}
126
127impl ValidityHelper for BoolArray {
128    fn validity(&self) -> &Validity {
129        &self.validity
130    }
131}
132
133impl ArrayVTable<BoolVTable> for BoolVTable {
134    fn len(array: &BoolArray) -> usize {
135        array.buffer.len()
136    }
137
138    fn dtype(array: &BoolArray) -> &DType {
139        &array.dtype
140    }
141
142    fn stats(array: &BoolArray) -> StatsSetRef<'_> {
143        array.stats_set.to_ref(array.as_ref())
144    }
145}
146
147impl CanonicalVTable<BoolVTable> for BoolVTable {
148    fn canonicalize(array: &BoolArray) -> VortexResult<Canonical> {
149        Ok(Canonical::Bool(array.clone()))
150    }
151
152    fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
153        builder.extend_from_array(array.as_ref())
154    }
155}
156
157pub trait BooleanBufferExt {
158    /// Slice any full bytes from the buffer, leaving the offset < 8.
159    fn shrink_offset(self) -> Self;
160}
161
162impl BooleanBufferExt for BooleanBuffer {
163    fn shrink_offset(self) -> Self {
164        let byte_offset = self.offset() / 8;
165        let bit_offset = self.offset() % 8;
166        let len = self.len();
167        let buffer = self
168            .into_inner()
169            .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
170        BooleanBuffer::new(buffer, bit_offset, len)
171    }
172}
173
174#[cfg(test)]
175mod tests {
176    use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
177    use vortex_buffer::buffer;
178
179    use crate::arrays::{BoolArray, PrimitiveArray};
180    use crate::compute::conformance::mask::test_mask;
181    use crate::patches::Patches;
182    use crate::validity::Validity;
183    use crate::vtable::ValidityHelper;
184    use crate::{Array, IntoArray, ToCanonical};
185
186    #[test]
187    fn bool_array() {
188        let arr = BoolArray::from_iter([true, false, true]);
189        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
190        assert!(scalar);
191    }
192
193    #[test]
194    fn test_all_some_iter() {
195        let arr = BoolArray::from_iter([Some(true), Some(false)]);
196
197        assert!(matches!(arr.validity(), Validity::AllValid));
198
199        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
200        assert!(scalar);
201        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
202        assert!(!scalar);
203    }
204
205    #[test]
206    fn test_bool_from_iter() {
207        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
208
209        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
210        assert!(scalar);
211
212        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
213        assert!(scalar);
214
215        let scalar = arr.scalar_at(2).unwrap();
216        assert!(scalar.is_null());
217
218        let scalar = bool::try_from(&arr.scalar_at(3).unwrap()).unwrap();
219        assert!(!scalar);
220
221        let scalar = arr.scalar_at(4).unwrap();
222        assert!(scalar.is_null());
223    }
224
225    #[test]
226    fn patch_sliced_bools() {
227        let arr = {
228            let mut builder = BooleanBufferBuilder::new(12);
229            builder.append(false);
230            builder.append_n(11, true);
231            BoolArray::from(builder.finish())
232        };
233        let sliced = arr.slice(4, 12).unwrap();
234        let sliced_len = sliced.len();
235        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
236        assert_eq!(offset, 4);
237        assert_eq!(values.as_slice(), &[254, 15]);
238
239        // patch the underlying array
240        let patches = Patches::new(
241            arr.len(),
242            0,
243            PrimitiveArray::new(buffer![4u32], Validity::AllValid).into_array(),
244            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
245        );
246        let arr = arr.patch(&patches).unwrap();
247        let arr_len = arr.len();
248        let (values, offset) = arr.to_bool().unwrap().into_boolean_builder();
249        assert_eq!(offset, 0);
250        assert_eq!(values.len(), arr_len + offset);
251        assert_eq!(values.as_slice(), &[238, 15]);
252
253        // the slice should be unchanged
254        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
255        assert_eq!(offset, 4);
256        assert_eq!(values.len(), sliced_len + offset);
257        assert_eq!(values.as_slice(), &[254, 15]); // unchanged
258    }
259
260    #[test]
261    fn slice_array_in_middle() {
262        let arr = BoolArray::from(BooleanBuffer::new_set(16));
263        let sliced = arr.slice(4, 12).unwrap();
264        let sliced_len = sliced.len();
265        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
266        assert_eq!(offset, 4);
267        assert_eq!(values.len(), sliced_len + offset);
268        assert_eq!(values.as_slice(), &[255, 15]);
269    }
270
271    #[test]
272    #[should_panic]
273    fn patch_bools_owned() {
274        let buffer = buffer![255u8; 2];
275        let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
276        let arr = BoolArray::new(buf, Validity::NonNullable);
277        let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
278
279        let patches = Patches::new(
280            arr.len(),
281            0,
282            PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
283            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
284        );
285        let arr = arr.patch(&patches).unwrap();
286        assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
287
288        let (values, _byte_bit_offset) = arr.to_bool().unwrap().into_boolean_builder();
289        assert_eq!(values.as_slice(), &[254, 127]);
290    }
291
292    #[test]
293    fn test_mask_primitive_array() {
294        test_mask(BoolArray::from_iter([true, false, true, true, false]).as_ref());
295    }
296}