vortex_array/arrays/bool/
array.rs1use arrow_array::BooleanArray;
5use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
6use vortex_buffer::ByteBuffer;
7use vortex_dtype::DType;
8use vortex_error::{VortexResult, vortex_ensure};
9
10use crate::Canonical;
11use crate::arrays::{BoolVTable, bool};
12use crate::builders::ArrayBuilder;
13use crate::stats::{ArrayStats, StatsSetRef};
14use crate::validity::Validity;
15use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
16
17#[derive(Clone, Debug)]
47pub struct BoolArray {
48 dtype: DType,
49 buffer: BooleanBuffer,
50 pub(crate) validity: Validity,
51 pub(crate) stats_set: ArrayStats,
52}
53
54impl BoolArray {
55 fn validate(
56 buffer: &ByteBuffer,
57 offset: usize,
58 len: usize,
59 validity: &Validity,
60 ) -> VortexResult<()> {
61 vortex_ensure!(
62 offset < 8,
63 "offset must be less than whole byte, was {offset} bits"
64 );
65
66 let required_bytes = offset.saturating_add(len).div_ceil(8);
68 vortex_ensure!(
69 buffer.len() >= required_bytes,
70 "BoolArray with offset={offset} len={len} cannot be built from buffer of size {}",
71 buffer.len()
72 );
73
74 if let Some(validity_len) = validity.maybe_len() {
76 vortex_ensure!(
77 validity_len == len,
78 "BoolArray of size {len} cannot be built with validity of size {validity_len}"
79 );
80 }
81
82 Ok(())
83 }
84}
85
86impl BoolArray {
87 pub fn try_new(
103 buffer: ByteBuffer,
104 offset: usize,
105 len: usize,
106 validity: Validity,
107 ) -> VortexResult<Self> {
108 Self::validate(&buffer, offset, len, &validity)?;
109
110 Ok(Self::new(
111 BooleanBuffer::new(buffer.into_arrow_buffer(), offset, len),
112 validity,
113 ))
114 }
115
116 pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
120 if let Some(validity_len) = validity.maybe_len() {
121 assert_eq!(buffer.len(), validity_len);
122 }
123
124 let buffer = buffer.shrink_offset();
126 Self {
127 dtype: DType::Bool(validity.nullability()),
128 buffer,
129 validity,
130 stats_set: ArrayStats::default(),
131 }
132 }
133
134 pub fn from_indices<I: IntoIterator<Item = usize>>(
138 length: usize,
139 indices: I,
140 validity: Validity,
141 ) -> Self {
142 let mut buffer = MutableBuffer::new_null(length);
143 let buffer_slice = buffer.as_slice_mut();
144 indices
145 .into_iter()
146 .for_each(|idx| arrow_buffer::bit_util::set_bit(buffer_slice, idx));
147 Self::new(
148 BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
149 validity,
150 )
151 }
152
153 pub fn boolean_buffer(&self) -> &BooleanBuffer {
155 assert!(
156 self.buffer.offset() < 8,
157 "Offset must be <8, did we forget to call shrink_offset? Found {}",
158 self.buffer.offset()
159 );
160 &self.buffer
161 }
162
163 pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
170 let offset = self.buffer.offset();
171 let len = self.buffer.len();
172 let arrow_buffer = self.buffer.into_inner();
173 let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
174 arrow_buffer.into_mutable().unwrap_or_else(|b| {
175 let mut buf = MutableBuffer::with_capacity(b.len());
176 buf.extend_from_slice(b.as_slice());
177 buf
178 })
179 } else {
180 let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
181 buf.extend_from_slice(arrow_buffer.as_slice());
182 buf
183 };
184
185 (
186 BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
187 offset,
188 )
189 }
190}
191
192impl From<BooleanBuffer> for BoolArray {
193 fn from(value: BooleanBuffer) -> Self {
194 Self::new(value, Validity::NonNullable)
195 }
196}
197
198impl FromIterator<bool> for BoolArray {
199 fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
200 Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
201 }
202}
203
204impl FromIterator<Option<bool>> for BoolArray {
205 fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
206 let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
207
208 Self::new(
209 buffer,
210 nulls.map(Validity::from).unwrap_or(Validity::AllValid),
211 )
212 }
213}
214
215impl ValidityHelper for BoolArray {
216 fn validity(&self) -> &Validity {
217 &self.validity
218 }
219}
220
221impl ArrayVTable<BoolVTable> for BoolVTable {
222 fn len(array: &BoolArray) -> usize {
223 array.buffer.len()
224 }
225
226 fn dtype(array: &BoolArray) -> &DType {
227 &array.dtype
228 }
229
230 fn stats(array: &BoolArray) -> StatsSetRef<'_> {
231 array.stats_set.to_ref(array.as_ref())
232 }
233}
234
235impl CanonicalVTable<BoolVTable> for BoolVTable {
236 fn canonicalize(array: &BoolArray) -> VortexResult<Canonical> {
237 Ok(Canonical::Bool(array.clone()))
238 }
239
240 fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
241 builder.extend_from_array(array.as_ref())
242 }
243}
244
245pub trait BooleanBufferExt {
246 fn shrink_offset(self) -> Self;
248}
249
250impl BooleanBufferExt for BooleanBuffer {
251 fn shrink_offset(self) -> Self {
252 let byte_offset = self.offset() / 8;
253 let bit_offset = self.offset() % 8;
254 let len = self.len();
255 let buffer = self
256 .into_inner()
257 .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
258 BooleanBuffer::new(buffer, bit_offset, len)
259 }
260}
261
262#[cfg(test)]
263mod tests {
264 use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
265 use vortex_buffer::buffer;
266
267 use crate::arrays::{BoolArray, PrimitiveArray};
268 use crate::patches::Patches;
269 use crate::validity::Validity;
270 use crate::vtable::ValidityHelper;
271 use crate::{Array, IntoArray, ToCanonical};
272
273 #[test]
274 fn bool_array() {
275 let arr = BoolArray::from_iter([true, false, true]);
276 let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
277 assert!(scalar);
278 }
279
280 #[test]
281 fn test_all_some_iter() {
282 let arr = BoolArray::from_iter([Some(true), Some(false)]);
283
284 assert!(matches!(arr.validity(), Validity::AllValid));
285
286 let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
287 assert!(scalar);
288 let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
289 assert!(!scalar);
290 }
291
292 #[test]
293 fn test_bool_from_iter() {
294 let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
295
296 let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
297 assert!(scalar);
298
299 let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
300 assert!(scalar);
301
302 let scalar = arr.scalar_at(2);
303 assert!(scalar.is_null());
304
305 let scalar = bool::try_from(&arr.scalar_at(3)).unwrap();
306 assert!(!scalar);
307
308 let scalar = arr.scalar_at(4);
309 assert!(scalar.is_null());
310 }
311
312 #[test]
313 fn patch_sliced_bools() {
314 let arr = {
315 let mut builder = BooleanBufferBuilder::new(12);
316 builder.append(false);
317 builder.append_n(11, true);
318 BoolArray::from(builder.finish())
319 };
320 let sliced = arr.slice(4, 12);
321 let sliced_len = sliced.len();
322 let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
323 assert_eq!(offset, 4);
324 assert_eq!(values.as_slice(), &[254, 15]);
325
326 let patches = Patches::new(
328 arr.len(),
329 0,
330 buffer![4u32].into_array(), BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
332 );
333 let arr = arr.patch(&patches).unwrap();
334 let arr_len = arr.len();
335 let (values, offset) = arr.to_bool().unwrap().into_boolean_builder();
336 assert_eq!(offset, 0);
337 assert_eq!(values.len(), arr_len + offset);
338 assert_eq!(values.as_slice(), &[238, 15]);
339
340 let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
342 assert_eq!(offset, 4);
343 assert_eq!(values.len(), sliced_len + offset);
344 assert_eq!(values.as_slice(), &[254, 15]); }
346
347 #[test]
348 fn slice_array_in_middle() {
349 let arr = BoolArray::from(BooleanBuffer::new_set(16));
350 let sliced = arr.slice(4, 12);
351 let sliced_len = sliced.len();
352 let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
353 assert_eq!(offset, 4);
354 assert_eq!(values.len(), sliced_len + offset);
355 assert_eq!(values.as_slice(), &[255, 15]);
356 }
357
358 #[test]
359 #[should_panic]
360 fn patch_bools_owned() {
361 let buffer = buffer![255u8; 2];
362 let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
363 let arr = BoolArray::new(buf, Validity::NonNullable);
364 let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
365
366 let patches = Patches::new(
367 arr.len(),
368 0,
369 PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
370 BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
371 );
372 let arr = arr.patch(&patches).unwrap();
373 assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
374
375 let (values, _byte_bit_offset) = arr.to_bool().unwrap().into_boolean_builder();
376 assert_eq!(values.as_slice(), &[254, 127]);
377 }
378}