vortex_array/arrays/bool/
array.rs

1use arrow_array::BooleanArray;
2use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
3use vortex_dtype::DType;
4use vortex_error::{VortexResult, vortex_panic};
5
6use crate::Canonical;
7use crate::arrays::{BoolVTable, bool};
8use crate::builders::ArrayBuilder;
9use crate::stats::{ArrayStats, StatsSetRef};
10use crate::validity::Validity;
11use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
12
13#[derive(Clone, Debug)]
14pub struct BoolArray {
15    dtype: DType,
16    buffer: BooleanBuffer,
17    pub(crate) validity: Validity,
18    pub(crate) stats_set: ArrayStats,
19}
20
21impl BoolArray {
22    /// Create a new BoolArray from a set of indices and a length.
23    /// All indices must be less than the length.
24    pub fn from_indices<I: IntoIterator<Item = usize>>(length: usize, indices: I) -> Self {
25        let mut buffer = MutableBuffer::new_null(length);
26        indices
27            .into_iter()
28            .for_each(|idx| arrow_buffer::bit_util::set_bit(&mut buffer, idx));
29        Self::new(
30            BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
31            Validity::NonNullable,
32        )
33    }
34
35    /// Creates a new [`BoolArray`] from a [`BooleanBuffer`] and [`Validity`], without checking
36    /// any invariants.
37    pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
38        if let Some(len) = validity.maybe_len() {
39            if buffer.len() != len {
40                vortex_panic!(
41                    "Buffer and validity length mismatch: buffer={}, validity={}",
42                    buffer.len(),
43                    len
44                );
45            }
46        }
47
48        // Shrink the buffer to remove any whole bytes.
49        let buffer = buffer.shrink_offset();
50        Self {
51            dtype: DType::Bool(validity.nullability()),
52            buffer,
53            validity,
54            stats_set: ArrayStats::default(),
55        }
56    }
57
58    /// Returns the underlying [`BooleanBuffer`] of the array.
59    pub fn boolean_buffer(&self) -> &BooleanBuffer {
60        assert!(
61            self.buffer.offset() < 8,
62            "Offset must be <8, did we forget to call shrink_offset? Found {}",
63            self.buffer.offset()
64        );
65        &self.buffer
66    }
67
68    /// Get a mutable version of this array.
69    ///
70    /// If the caller holds the only reference to the underlying buffer the underlying buffer is returned
71    /// otherwise a copy is created.
72    ///
73    /// The second value of the tuple is a bit_offset of first value in first byte of the returned builder
74    pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
75        let offset = self.buffer.offset();
76        let len = self.buffer.len();
77        let arrow_buffer = self.buffer.into_inner();
78        let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
79            arrow_buffer.into_mutable().unwrap_or_else(|b| {
80                let mut buf = MutableBuffer::with_capacity(b.len());
81                buf.extend_from_slice(b.as_slice());
82                buf
83            })
84        } else {
85            let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
86            buf.extend_from_slice(arrow_buffer.as_slice());
87            buf
88        };
89
90        (
91            BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
92            offset,
93        )
94    }
95}
96
97impl From<BooleanBuffer> for BoolArray {
98    fn from(value: BooleanBuffer) -> Self {
99        Self::new(value, Validity::NonNullable)
100    }
101}
102
103impl FromIterator<bool> for BoolArray {
104    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
105        Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
106    }
107}
108
109impl FromIterator<Option<bool>> for BoolArray {
110    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
111        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
112
113        Self::new(
114            buffer,
115            nulls.map(Validity::from).unwrap_or(Validity::AllValid),
116        )
117    }
118}
119
120impl ValidityHelper for BoolArray {
121    fn validity(&self) -> &Validity {
122        &self.validity
123    }
124}
125
126impl ArrayVTable<BoolVTable> for BoolVTable {
127    fn len(array: &BoolArray) -> usize {
128        array.buffer.len()
129    }
130
131    fn dtype(array: &BoolArray) -> &DType {
132        &array.dtype
133    }
134
135    fn stats(array: &BoolArray) -> StatsSetRef<'_> {
136        array.stats_set.to_ref(array.as_ref())
137    }
138}
139
140impl CanonicalVTable<BoolVTable> for BoolVTable {
141    fn canonicalize(array: &BoolArray) -> VortexResult<Canonical> {
142        Ok(Canonical::Bool(array.clone()))
143    }
144
145    fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
146        builder.extend_from_array(array.as_ref())
147    }
148}
149
150pub trait BooleanBufferExt {
151    /// Slice any full bytes from the buffer, leaving the offset < 8.
152    fn shrink_offset(self) -> Self;
153}
154
155impl BooleanBufferExt for BooleanBuffer {
156    fn shrink_offset(self) -> Self {
157        let byte_offset = self.offset() / 8;
158        let bit_offset = self.offset() % 8;
159        let len = self.len();
160        let buffer = self
161            .into_inner()
162            .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
163        BooleanBuffer::new(buffer, bit_offset, len)
164    }
165}
166
167#[cfg(test)]
168mod tests {
169    use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
170    use vortex_buffer::buffer;
171
172    use crate::arrays::{BoolArray, PrimitiveArray};
173    use crate::compute::conformance::mask::test_mask;
174    use crate::patches::Patches;
175    use crate::validity::Validity;
176    use crate::vtable::ValidityHelper;
177    use crate::{Array, IntoArray, ToCanonical};
178
179    #[test]
180    fn bool_array() {
181        let arr = BoolArray::from_iter([true, false, true]);
182        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
183        assert!(scalar);
184    }
185
186    #[test]
187    fn test_all_some_iter() {
188        let arr = BoolArray::from_iter([Some(true), Some(false)]);
189
190        assert!(matches!(arr.validity(), Validity::AllValid));
191
192        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
193        assert!(scalar);
194        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
195        assert!(!scalar);
196    }
197
198    #[test]
199    fn test_bool_from_iter() {
200        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
201
202        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
203        assert!(scalar);
204
205        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
206        assert!(scalar);
207
208        let scalar = arr.scalar_at(2).unwrap();
209        assert!(scalar.is_null());
210
211        let scalar = bool::try_from(&arr.scalar_at(3).unwrap()).unwrap();
212        assert!(!scalar);
213
214        let scalar = arr.scalar_at(4).unwrap();
215        assert!(scalar.is_null());
216    }
217
218    #[test]
219    fn patch_sliced_bools() {
220        let arr = {
221            let mut builder = BooleanBufferBuilder::new(12);
222            builder.append(false);
223            builder.append_n(11, true);
224            BoolArray::from(builder.finish())
225        };
226        let sliced = arr.slice(4, 12).unwrap();
227        let sliced_len = sliced.len();
228        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
229        assert_eq!(offset, 4);
230        assert_eq!(values.as_slice(), &[254, 15]);
231
232        // patch the underlying array
233        let patches = Patches::new(
234            arr.len(),
235            0,
236            PrimitiveArray::new(buffer![4u32], Validity::AllValid).into_array(),
237            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
238        );
239        let arr = arr.patch(&patches).unwrap();
240        let arr_len = arr.len();
241        let (values, offset) = arr.to_bool().unwrap().into_boolean_builder();
242        assert_eq!(offset, 0);
243        assert_eq!(values.len(), arr_len + offset);
244        assert_eq!(values.as_slice(), &[238, 15]);
245
246        // the slice should be unchanged
247        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
248        assert_eq!(offset, 4);
249        assert_eq!(values.len(), sliced_len + offset);
250        assert_eq!(values.as_slice(), &[254, 15]); // unchanged
251    }
252
253    #[test]
254    fn slice_array_in_middle() {
255        let arr = BoolArray::from(BooleanBuffer::new_set(16));
256        let sliced = arr.slice(4, 12).unwrap();
257        let sliced_len = sliced.len();
258        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
259        assert_eq!(offset, 4);
260        assert_eq!(values.len(), sliced_len + offset);
261        assert_eq!(values.as_slice(), &[255, 15]);
262    }
263
264    #[test]
265    #[should_panic]
266    fn patch_bools_owned() {
267        let buffer = buffer![255u8; 2];
268        let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
269        let arr = BoolArray::new(buf, Validity::NonNullable);
270        let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
271
272        let patches = Patches::new(
273            arr.len(),
274            0,
275            PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
276            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
277        );
278        let arr = arr.patch(&patches).unwrap();
279        assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
280
281        let (values, _byte_bit_offset) = arr.to_bool().unwrap().into_boolean_builder();
282        assert_eq!(values.as_slice(), &[254, 127]);
283    }
284
285    #[test]
286    fn test_mask_primitive_array() {
287        test_mask(BoolArray::from_iter([true, false, true, true, false]).as_ref());
288    }
289}