vortex_array/arrays/bool/
array.rs1use std::fmt::Display;
5use std::fmt::Formatter;
6
7use arrow_array::BooleanArray;
8use smallvec::smallvec;
9use vortex_buffer::BitBuffer;
10use vortex_buffer::BitBufferMeta;
11use vortex_buffer::BitBufferMut;
12use vortex_buffer::BitBufferView;
13use vortex_error::VortexExpect;
14use vortex_error::VortexResult;
15use vortex_error::vortex_ensure;
16use vortex_mask::Mask;
17
18use crate::ArrayRef;
19use crate::ArraySlots;
20use crate::ExecutionCtx;
21use crate::IntoArray;
22use crate::array::Array;
23use crate::array::ArrayParts;
24use crate::array::TypedArrayRef;
25use crate::array::child_to_validity;
26use crate::array::validity_to_child;
27use crate::arrays::Bool;
28use crate::arrays::BoolArray;
29use crate::buffer::BufferHandle;
30use crate::dtype::DType;
31use crate::validity::Validity;
32
33pub(super) const VALIDITY_SLOT: usize = 0;
35pub(super) const NUM_SLOTS: usize = 1;
36pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["validity"];
37
38#[derive(Clone, Debug)]
72pub struct BoolData {
73 pub(super) bits: BufferHandle,
74 pub(super) meta: BitBufferMeta,
75}
76
77impl Display for BoolData {
78 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
79 write!(f, "offset: {}", self.meta.offset())
80 }
81}
82
83pub struct BoolDataParts {
84 pub bits: BufferHandle,
85 pub meta: BitBufferMeta,
86}
87
88pub trait BoolArrayExt: TypedArrayRef<Bool> {
89 fn nullability(&self) -> crate::dtype::Nullability {
90 match self.as_ref().dtype() {
91 DType::Bool(nullability) => *nullability,
92 _ => unreachable!("BoolArrayExt requires a bool dtype"),
93 }
94 }
95
96 fn validity(&self) -> Validity {
97 child_to_validity(
98 self.as_ref().slots()[VALIDITY_SLOT].as_ref(),
99 self.nullability(),
100 )
101 }
102
103 fn to_bit_buffer(&self) -> BitBuffer {
104 let buffer = self.bits.as_host().clone();
105 BitBuffer::new_with_offset(buffer, self.meta.len(), self.meta.offset())
106 }
107
108 fn bit_buffer_view(&self) -> BitBufferView<'_> {
110 BitBufferView::from_meta(self.bits.as_host().as_slice(), self.meta)
111 }
112
113 fn maybe_execute_mask(&self, ctx: &mut ExecutionCtx) -> VortexResult<Option<Mask>> {
114 let all_valid = match &self.validity() {
115 Validity::NonNullable | Validity::AllValid => true,
116 Validity::AllInvalid => false,
117 Validity::Array(a) => a.statistics().compute_min::<bool>(ctx).unwrap_or(false),
118 };
119 Ok(all_valid.then(|| Mask::from_buffer(self.to_bit_buffer())))
120 }
121
122 fn execute_mask(&self, ctx: &mut ExecutionCtx) -> Mask {
123 self.maybe_execute_mask(ctx)
124 .vortex_expect("failed to check validity")
125 .vortex_expect("cannot convert nullable boolean array to mask")
126 }
127
128 fn to_mask_fill_null_false(&self, ctx: &mut ExecutionCtx) -> Mask {
129 let validity_mask = self
130 .validity()
131 .execute_mask(self.as_ref().len(), ctx)
132 .vortex_expect("Failed to compute validity mask");
133 let buffer = match validity_mask {
134 Mask::AllTrue(_) => self.to_bit_buffer(),
135 Mask::AllFalse(_) => return Mask::new_false(self.as_ref().len()),
136 Mask::Values(validity) => validity.bit_buffer() & self.to_bit_buffer(),
137 };
138 Mask::from_buffer(buffer)
139 }
140}
141impl<T: TypedArrayRef<Bool>> BoolArrayExt for T {}
142
143impl BoolData {
145 #[inline]
147 pub fn into_parts(self, len: usize) -> BoolDataParts {
148 BoolDataParts {
149 bits: self.bits,
150 meta: BitBufferMeta::new(self.meta.offset(), len),
151 }
152 }
153
154 pub(crate) fn make_slots(validity: &Validity, len: usize) -> ArraySlots {
155 smallvec![validity_to_child(validity, len)]
156 }
157}
158
159impl Array<Bool> {
161 pub fn new(bits: BitBuffer, validity: Validity) -> Self {
167 Self::try_new(bits, validity).vortex_expect("Failed to create BoolArray")
168 }
169
170 pub fn new_handle(handle: BufferHandle, offset: usize, len: usize, validity: Validity) -> Self {
176 Self::try_new_from_handle(handle, offset, len, validity)
177 .vortex_expect("Failed to create BoolArray from BufferHandle")
178 }
179
180 pub fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
186 let dtype = DType::Bool(validity.nullability());
187 let len = bits.len();
188 let slots = BoolData::make_slots(&validity, len);
189 let data = BoolData::try_new(bits, validity)?;
190 Ok(unsafe {
191 Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
192 })
193 }
194
195 pub fn try_new_from_handle(
198 bits: BufferHandle,
199 offset: usize,
200 len: usize,
201 validity: Validity,
202 ) -> VortexResult<Self> {
203 let dtype = DType::Bool(validity.nullability());
204 let slots = BoolData::make_slots(&validity, len);
205 let data = BoolData::try_new_from_handle(bits, offset, len, validity)?;
206 Ok(unsafe {
207 Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
208 })
209 }
210
211 pub unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
217 let dtype = DType::Bool(validity.nullability());
218 let len = bits.len();
219 let slots = BoolData::make_slots(&validity, len);
220 let data = unsafe { BoolData::new_unchecked(bits, validity) };
222 unsafe {
223 Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
224 }
225 }
226
227 pub fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
229 BoolData::validate(bits, validity)
230 }
231
232 pub fn from_indices<I: IntoIterator<Item = usize>>(
236 length: usize,
237 indices: I,
238 validity: Validity,
239 ) -> Self {
240 let mut buffer = BitBufferMut::new_unset(length);
241 indices.into_iter().for_each(|idx| buffer.set(idx));
242 Self::new(buffer.freeze(), validity)
243 }
244
245 pub fn into_bit_buffer(self) -> BitBuffer {
247 let len = self.len();
248 let data = self.into_data();
249 let buffer = data.bits.unwrap_host();
250 BitBuffer::new_with_offset(buffer, len, data.meta.offset())
251 }
252}
253
254impl BoolData {
256 pub(super) fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
257 let bits = bits.shrink_offset();
258 Self::validate(&bits, &validity)?;
259
260 let (offset, len, buffer) = bits.into_inner();
261
262 Ok(Self {
263 bits: BufferHandle::new_host(buffer),
264 meta: BitBufferMeta::new(offset, len),
265 })
266 }
267
268 pub(super) fn try_new_from_handle(
269 bits: BufferHandle,
270 offset: usize,
271 len: usize,
272 validity: Validity,
273 ) -> VortexResult<Self> {
274 vortex_ensure!(offset < 8, "BitBuffer offset must be <8, got {}", offset);
275 if let Some(validity_len) = validity.maybe_len() {
276 vortex_ensure!(
277 validity_len == len,
278 "BoolArray of size {} cannot be built with validity of size {validity_len}",
279 len,
280 );
281 }
282
283 vortex_ensure!(
284 bits.len() * 8 >= (len + offset),
285 "provided BufferHandle with offset {offset} len {len} had size {} bits",
286 bits.len() * 8,
287 );
288
289 Ok(Self {
290 bits,
291 meta: BitBufferMeta::new(offset, len),
292 })
293 }
294
295 pub(super) unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
296 if cfg!(debug_assertions) {
297 Self::try_new(bits, validity).vortex_expect("Failed to create BoolData")
298 } else {
299 let (offset, len, buffer) = bits.into_inner();
300
301 Self {
302 bits: BufferHandle::new_host(buffer),
303 meta: BitBufferMeta::new(offset, len),
304 }
305 }
306 }
307
308 pub(super) fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
309 vortex_ensure!(
310 bits.offset() < 8,
311 "BitBuffer offset must be <8, got {}",
312 bits.offset()
313 );
314
315 if let Some(validity_len) = validity.maybe_len() {
316 vortex_ensure!(
317 validity_len == bits.len(),
318 "BoolArray of size {} cannot be built with validity of size {validity_len}",
319 bits.len()
320 );
321 }
322
323 Ok(())
324 }
325}
326
327impl From<BitBuffer> for BoolArray {
328 fn from(value: BitBuffer) -> Self {
329 BoolArray::new(value, Validity::NonNullable)
330 }
331}
332
333impl FromIterator<bool> for BoolArray {
334 fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
335 BoolArray::from(BitBuffer::from_iter(iter))
336 }
337}
338
339impl FromIterator<Option<bool>> for BoolArray {
340 fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
341 let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
342
343 BoolArray::new(
344 BitBuffer::from(buffer),
345 nulls
346 .map(|n| Validity::from(BitBuffer::from(n.into_inner())))
347 .unwrap_or(Validity::AllValid),
348 )
349 }
350}
351
352impl IntoArray for BitBuffer {
353 fn into_array(self) -> ArrayRef {
354 BoolArray::new(self, Validity::NonNullable).into_array()
355 }
356}
357
358impl IntoArray for BitBufferMut {
359 fn into_array(self) -> ArrayRef {
360 self.freeze().into_array()
361 }
362}
363
364#[cfg(test)]
365mod tests {
366 use std::iter::once;
367 use std::iter::repeat_n;
368
369 use vortex_buffer::BitBuffer;
370 use vortex_buffer::BitBufferMut;
371 use vortex_buffer::buffer;
372
373 use crate::IntoArray;
374 use crate::LEGACY_SESSION;
375 use crate::VortexSessionExecute;
376 use crate::arrays::BoolArray;
377 use crate::arrays::PrimitiveArray;
378 use crate::arrays::bool::BoolArrayExt;
379 use crate::assert_arrays_eq;
380 use crate::patches::Patches;
381 use crate::validity::Validity;
382
383 #[test]
384 fn bool_array() {
385 let arr = BoolArray::from_iter([true, false, true]);
386 let scalar = bool::try_from(
387 &arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
388 .unwrap(),
389 )
390 .unwrap();
391 assert!(scalar);
392 }
393
394 #[test]
395 fn test_all_some_iter() {
396 let arr = BoolArray::from_iter([Some(true), Some(false)]);
397
398 assert!(matches!(arr.validity(), Ok(Validity::AllValid)));
399
400 let scalar = bool::try_from(
401 &arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
402 .unwrap(),
403 )
404 .unwrap();
405 assert!(scalar);
406 let scalar = bool::try_from(
407 &arr.execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
408 .unwrap(),
409 )
410 .unwrap();
411 assert!(!scalar);
412 }
413
414 #[test]
415 fn test_bool_from_iter() {
416 let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
417
418 let scalar = bool::try_from(
419 &arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
420 .unwrap(),
421 )
422 .unwrap();
423 assert!(scalar);
424
425 let scalar = bool::try_from(
426 &arr.execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
427 .unwrap(),
428 )
429 .unwrap();
430 assert!(scalar);
431
432 let scalar = arr
433 .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
434 .unwrap();
435 assert!(scalar.is_null());
436
437 let scalar = bool::try_from(
438 &arr.execute_scalar(3, &mut LEGACY_SESSION.create_execution_ctx())
439 .unwrap(),
440 )
441 .unwrap();
442 assert!(!scalar);
443
444 let scalar = arr
445 .execute_scalar(4, &mut LEGACY_SESSION.create_execution_ctx())
446 .unwrap();
447 assert!(scalar.is_null());
448 }
449
450 #[test]
451 fn patch_sliced_bools() {
452 let arr = BoolArray::from(BitBuffer::new_set(12));
453 let sliced = arr.slice(4..12).unwrap();
454 assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
455
456 let arr = {
457 let mut builder = BitBufferMut::new_unset(12);
458 (1..12).for_each(|i| builder.set(i));
459 BoolArray::from(builder.freeze())
460 };
461 let sliced = arr.slice(4..12).unwrap();
462 let expected_slice: Vec<bool> = (4..12).map(|i| (1..12).contains(&i)).collect();
463 assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice.clone()));
464
465 let patches = Patches::new(
467 arr.len(),
468 0,
469 buffer![4u32].into_array(),
470 BoolArray::from(BitBuffer::new_unset(1)).into_array(),
471 None,
472 )
473 .unwrap();
474 let arr = arr
475 .patch(&patches, &mut LEGACY_SESSION.create_execution_ctx())
476 .unwrap();
477 let expected_patched: Vec<bool> = (0..12).map(|i| (1..12).contains(&i) && i != 4).collect();
479 assert_arrays_eq!(arr, BoolArray::from_iter(expected_patched));
480
481 assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice));
483 }
484
485 #[test]
486 fn slice_array_in_middle() {
487 let arr = BoolArray::from(BitBuffer::new_set(16));
488 let sliced = arr.slice(4..12).unwrap();
489 assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
490 }
491
492 #[test]
493 fn patch_bools_owned() {
494 let arr = BoolArray::from(BitBuffer::new_set(16));
495 let buf_ptr = arr.to_bit_buffer().inner().as_ptr();
496
497 let patches = Patches::new(
498 arr.len(),
499 0,
500 PrimitiveArray::new(buffer![0u32], Validity::NonNullable).into_array(),
501 BoolArray::from(BitBuffer::new_unset(1)).into_array(),
502 None,
503 )
504 .unwrap();
505 let arr = arr
506 .patch(&patches, &mut LEGACY_SESSION.create_execution_ctx())
507 .unwrap();
508 assert_eq!(arr.to_bit_buffer().inner().as_ptr(), buf_ptr);
510
511 let expected: BoolArray = once(false).chain(repeat_n(true, 15)).collect();
513 assert_arrays_eq!(arr, expected);
514 }
515
516 #[test]
517 fn patch_sliced_bools_offset() {
518 let arr = BoolArray::from(BitBuffer::new_set(15));
519 let sliced = arr.slice(4..15).unwrap();
520 assert_arrays_eq!(sliced, BoolArray::from_iter([true; 11]));
521 }
522}