vortex_array/arrays/bool/
array.rs1use std::ops::BitAnd;
5
6use arrow_array::BooleanArray;
7use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
8use vortex_buffer::ByteBuffer;
9use vortex_dtype::DType;
10use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
11use vortex_mask::Mask;
12
13use crate::Canonical;
14use crate::arrays::{BoolVTable, bool};
15use crate::builders::ArrayBuilder;
16use crate::stats::{ArrayStats, StatsSetRef};
17use crate::validity::Validity;
18use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
19
20#[derive(Clone, Debug)]
50pub struct BoolArray {
51 dtype: DType,
52 buffer: BooleanBuffer,
53 pub(crate) validity: Validity,
54 pub(crate) stats_set: ArrayStats,
55}
56
57impl BoolArray {
58 pub fn try_new(
67 buffer: ByteBuffer,
68 offset: usize,
69 len: usize,
70 validity: Validity,
71 ) -> VortexResult<Self> {
72 Self::validate(&buffer, offset, len, &validity)?;
73
74 Ok(unsafe { Self::new_unchecked(buffer, offset, len, validity) })
76 }
77
78 pub unsafe fn new_unchecked(
94 buffer: ByteBuffer,
95 offset: usize,
96 len: usize,
97 validity: Validity,
98 ) -> Self {
99 let buffer = BooleanBuffer::new(buffer.into_arrow_buffer(), offset, len);
100 let buffer = buffer.shrink_offset();
101 Self {
102 dtype: DType::Bool(validity.nullability()),
103 buffer,
104 validity,
105 stats_set: ArrayStats::default(),
106 }
107 }
108
109 pub(crate) fn validate(
113 buffer: &ByteBuffer,
114 offset: usize,
115 len: usize,
116 validity: &Validity,
117 ) -> VortexResult<()> {
118 vortex_ensure!(
119 offset < 8,
120 "offset must be less than whole byte, was {offset} bits"
121 );
122
123 let required_bytes = offset.saturating_add(len).div_ceil(8);
125 vortex_ensure!(
126 buffer.len() >= required_bytes,
127 "BoolArray with offset={offset} len={len} cannot be built from buffer of size {}",
128 buffer.len()
129 );
130
131 if let Some(validity_len) = validity.maybe_len() {
133 vortex_ensure!(
134 validity_len == len,
135 "BoolArray of size {len} cannot be built with validity of size {validity_len}"
136 );
137 }
138
139 Ok(())
140 }
141
142 pub fn from_bool_buffer(buffer: BooleanBuffer, validity: Validity) -> Self {
148 if let Some(validity_len) = validity.maybe_len() {
149 assert_eq!(buffer.len(), validity_len);
150 }
151
152 let buffer = buffer.shrink_offset();
154 Self {
155 dtype: DType::Bool(validity.nullability()),
156 buffer,
157 validity,
158 stats_set: ArrayStats::default(),
159 }
160 }
161
162 pub fn from_indices<I: IntoIterator<Item = usize>>(
166 length: usize,
167 indices: I,
168 validity: Validity,
169 ) -> Self {
170 let mut buffer = MutableBuffer::new_null(length);
171 let buffer_slice = buffer.as_slice_mut();
172 indices
173 .into_iter()
174 .for_each(|idx| arrow_buffer::bit_util::set_bit(buffer_slice, idx));
175 Self::from_bool_buffer(
176 BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
177 validity,
178 )
179 }
180
181 pub fn boolean_buffer(&self) -> &BooleanBuffer {
183 assert!(
184 self.buffer.offset() < 8,
185 "Offset must be <8, did we forget to call shrink_offset? Found {}",
186 self.buffer.offset()
187 );
188 &self.buffer
189 }
190
191 pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
198 let offset = self.buffer.offset();
199 let len = self.buffer.len();
200 let arrow_buffer = self.buffer.into_inner();
201 let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
202 arrow_buffer.into_mutable().unwrap_or_else(|b| {
203 let mut buf = MutableBuffer::with_capacity(b.len());
204 buf.extend_from_slice(b.as_slice());
205 buf
206 })
207 } else {
208 let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
209 buf.extend_from_slice(arrow_buffer.as_slice());
210 buf
211 };
212
213 (
214 BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
215 offset,
216 )
217 }
218
219 pub fn to_mask(&self) -> Mask {
220 self.maybe_to_mask()
221 .vortex_expect("cannot convert nullable boolean array to mask")
222 }
223
224 pub fn maybe_to_mask(&self) -> Option<Mask> {
225 self.all_valid()
226 .then(|| Mask::from_buffer(self.boolean_buffer().clone()))
227 }
228
229 pub fn to_mask_fill_null_false(&self) -> Mask {
230 if let Some(constant) = self.as_constant() {
231 let bool_constant = constant.as_bool();
232 if bool_constant.value().unwrap_or(false) {
233 return Mask::new_true(self.len());
234 } else {
235 return Mask::new_false(self.len());
236 }
237 }
238 let buffer = match self.validity_mask() {
240 Mask::AllTrue(_) => self.boolean_buffer().clone(),
241 Mask::AllFalse(_) => return Mask::new_false(self.len()),
242 Mask::Values(validity) => validity.boolean_buffer().bitand(self.boolean_buffer()),
243 };
244 Mask::from_buffer(buffer)
245 }
246}
247
248impl From<BooleanBuffer> for BoolArray {
249 fn from(value: BooleanBuffer) -> Self {
250 Self::from_bool_buffer(value, Validity::NonNullable)
251 }
252}
253
254impl FromIterator<bool> for BoolArray {
255 fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
256 Self::from_bool_buffer(BooleanBuffer::from_iter(iter), Validity::NonNullable)
257 }
258}
259
260impl FromIterator<Option<bool>> for BoolArray {
261 fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
262 let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
263
264 Self::from_bool_buffer(
265 buffer,
266 nulls.map(Validity::from).unwrap_or(Validity::AllValid),
267 )
268 }
269}
270
271impl ValidityHelper for BoolArray {
272 fn validity(&self) -> &Validity {
273 &self.validity
274 }
275}
276
277impl ArrayVTable<BoolVTable> for BoolVTable {
278 fn len(array: &BoolArray) -> usize {
279 array.buffer.len()
280 }
281
282 fn dtype(array: &BoolArray) -> &DType {
283 &array.dtype
284 }
285
286 fn stats(array: &BoolArray) -> StatsSetRef<'_> {
287 array.stats_set.to_ref(array.as_ref())
288 }
289}
290
291impl CanonicalVTable<BoolVTable> for BoolVTable {
292 fn canonicalize(array: &BoolArray) -> Canonical {
293 Canonical::Bool(array.clone())
294 }
295
296 fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) {
297 builder.extend_from_array(array.as_ref())
298 }
299}
300
301pub trait BooleanBufferExt {
302 fn shrink_offset(self) -> Self;
304}
305
306impl BooleanBufferExt for BooleanBuffer {
307 fn shrink_offset(self) -> Self {
308 let byte_offset = self.offset() / 8;
309 let bit_offset = self.offset() % 8;
310 let len = self.len();
311 let buffer = self
312 .into_inner()
313 .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
314 BooleanBuffer::new(buffer, bit_offset, len)
315 }
316}
317
318#[cfg(test)]
319mod tests {
320 use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
321 use vortex_buffer::buffer;
322
323 use crate::arrays::{BoolArray, PrimitiveArray};
324 use crate::patches::Patches;
325 use crate::validity::Validity;
326 use crate::vtable::ValidityHelper;
327 use crate::{Array, IntoArray, ToCanonical};
328
329 #[test]
330 fn bool_array() {
331 let arr = BoolArray::from_iter([true, false, true]);
332 let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
333 assert!(scalar);
334 }
335
336 #[test]
337 fn test_all_some_iter() {
338 let arr = BoolArray::from_iter([Some(true), Some(false)]);
339
340 assert!(matches!(arr.validity(), Validity::AllValid));
341
342 let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
343 assert!(scalar);
344 let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
345 assert!(!scalar);
346 }
347
348 #[test]
349 fn test_bool_from_iter() {
350 let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
351
352 let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
353 assert!(scalar);
354
355 let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
356 assert!(scalar);
357
358 let scalar = arr.scalar_at(2);
359 assert!(scalar.is_null());
360
361 let scalar = bool::try_from(&arr.scalar_at(3)).unwrap();
362 assert!(!scalar);
363
364 let scalar = arr.scalar_at(4);
365 assert!(scalar.is_null());
366 }
367
368 #[test]
369 fn patch_sliced_bools() {
370 let arr = {
371 let mut builder = BooleanBufferBuilder::new(12);
372 builder.append(false);
373 builder.append_n(11, true);
374 BoolArray::from(builder.finish())
375 };
376 let sliced = arr.slice(4..12);
377 let sliced_len = sliced.len();
378 let (values, offset) = sliced.to_bool().into_boolean_builder();
379 assert_eq!(offset, 4);
380 assert_eq!(values.as_slice(), &[254, 15]);
381
382 let patches = Patches::new(
384 arr.len(),
385 0,
386 buffer![4u32].into_array(), BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
388 );
389 let arr = arr.patch(&patches);
390 let arr_len = arr.len();
391 let (values, offset) = arr.to_bool().into_boolean_builder();
392 assert_eq!(offset, 0);
393 assert_eq!(values.len(), arr_len + offset);
394 assert_eq!(values.as_slice(), &[238, 15]);
395
396 let (values, offset) = sliced.to_bool().into_boolean_builder();
398 assert_eq!(offset, 4);
399 assert_eq!(values.len(), sliced_len + offset);
400 assert_eq!(values.as_slice(), &[254, 15]); }
402
403 #[test]
404 fn slice_array_in_middle() {
405 let arr = BoolArray::from(BooleanBuffer::new_set(16));
406 let sliced = arr.slice(4..12);
407 let sliced_len = sliced.len();
408 let (values, offset) = sliced.to_bool().into_boolean_builder();
409 assert_eq!(offset, 4);
410 assert_eq!(values.len(), sliced_len + offset);
411 assert_eq!(values.as_slice(), &[255, 15]);
412 }
413
414 #[test]
415 #[should_panic]
416 fn patch_bools_owned() {
417 let buffer = buffer![255u8; 2];
418 let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
419 let arr = BoolArray::from_bool_buffer(buf, Validity::NonNullable);
420 let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
421
422 let patches = Patches::new(
423 arr.len(),
424 0,
425 PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
426 BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
427 );
428 let arr = arr.patch(&patches);
429 assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
430
431 let (values, _byte_bit_offset) = arr.to_bool().into_boolean_builder();
432 assert_eq!(values.as_slice(), &[254, 127]);
433 }
434}