vortex_array/arrays/bool/
array.rs1use std::fmt::Display;
5use std::fmt::Formatter;
6
7use arrow_array::BooleanArray;
8use vortex_buffer::BitBuffer;
9use vortex_buffer::BitBufferMut;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_mask::Mask;
14
15use crate::ArrayRef;
16use crate::IntoArray;
17use crate::array::Array;
18use crate::array::ArrayParts;
19use crate::array::TypedArrayRef;
20use crate::array::child_to_validity;
21use crate::array::validity_to_child;
22use crate::arrays::Bool;
23use crate::arrays::BoolArray;
24use crate::buffer::BufferHandle;
25use crate::dtype::DType;
26use crate::validity::Validity;
27
28pub(super) const VALIDITY_SLOT: usize = 0;
30pub(super) const NUM_SLOTS: usize = 1;
31pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["validity"];
32
33#[derive(Clone, Debug)]
66pub struct BoolData {
67 pub(super) bits: BufferHandle,
68 pub(super) offset: usize,
69}
70
71impl Display for BoolData {
72 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
73 write!(f, "offset: {}", self.offset)
74 }
75}
76
77pub struct BoolDataParts {
78 pub bits: BufferHandle,
79 pub offset: usize,
80 pub len: usize,
81}
82
83pub trait BoolArrayExt: TypedArrayRef<Bool> {
84 fn nullability(&self) -> crate::dtype::Nullability {
85 match self.as_ref().dtype() {
86 DType::Bool(nullability) => *nullability,
87 _ => unreachable!("BoolArrayExt requires a bool dtype"),
88 }
89 }
90
91 fn validity(&self) -> Validity {
92 child_to_validity(&self.as_ref().slots()[VALIDITY_SLOT], self.nullability())
93 }
94
95 fn bool_validity_mask(&self) -> Mask {
96 self.validity().to_mask(self.as_ref().len())
97 }
98
99 fn to_bit_buffer(&self) -> BitBuffer {
100 let buffer = self.bits.as_host().clone();
101 BitBuffer::new_with_offset(buffer, self.as_ref().len(), self.offset)
102 }
103
104 fn maybe_to_mask(&self) -> VortexResult<Option<Mask>> {
105 let all_valid = match &self.validity() {
106 Validity::NonNullable | Validity::AllValid => true,
107 Validity::AllInvalid => false,
108 Validity::Array(a) => a.statistics().compute_min::<bool>().unwrap_or(false),
109 };
110 Ok(all_valid.then(|| Mask::from_buffer(self.to_bit_buffer())))
111 }
112
113 fn to_mask(&self) -> Mask {
114 self.maybe_to_mask()
115 .vortex_expect("failed to check validity")
116 .vortex_expect("cannot convert nullable boolean array to mask")
117 }
118
119 fn to_mask_fill_null_false(&self) -> Mask {
120 let validity_mask = self.bool_validity_mask();
121 let buffer = match validity_mask {
122 Mask::AllTrue(_) => self.to_bit_buffer(),
123 Mask::AllFalse(_) => return Mask::new_false(self.as_ref().len()),
124 Mask::Values(validity) => validity.bit_buffer() & self.to_bit_buffer(),
125 };
126 Mask::from_buffer(buffer)
127 }
128}
129impl<T: TypedArrayRef<Bool>> BoolArrayExt for T {}
130
131impl BoolData {
133 #[inline]
135 pub fn into_parts(self, len: usize) -> BoolDataParts {
136 BoolDataParts {
137 bits: self.bits,
138 offset: self.offset,
139 len,
140 }
141 }
142
143 pub(crate) fn make_slots(validity: &Validity, len: usize) -> Vec<Option<ArrayRef>> {
144 vec![validity_to_child(validity, len)]
145 }
146}
147
148impl Array<Bool> {
150 pub fn new(bits: BitBuffer, validity: Validity) -> Self {
156 Self::try_new(bits, validity).vortex_expect("Failed to create BoolArray")
157 }
158
159 pub fn new_handle(handle: BufferHandle, offset: usize, len: usize, validity: Validity) -> Self {
165 Self::try_new_from_handle(handle, offset, len, validity)
166 .vortex_expect("Failed to create BoolArray from BufferHandle")
167 }
168
169 pub fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
175 let dtype = DType::Bool(validity.nullability());
176 let len = bits.len();
177 let slots = BoolData::make_slots(&validity, len);
178 let data = BoolData::try_new(bits, validity)?;
179 Ok(unsafe {
180 Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
181 })
182 }
183
184 pub fn try_new_from_handle(
187 bits: BufferHandle,
188 offset: usize,
189 len: usize,
190 validity: Validity,
191 ) -> VortexResult<Self> {
192 let dtype = DType::Bool(validity.nullability());
193 let slots = BoolData::make_slots(&validity, len);
194 let data = BoolData::try_new_from_handle(bits, offset, len, validity)?;
195 Ok(unsafe {
196 Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
197 })
198 }
199
200 pub unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
206 let dtype = DType::Bool(validity.nullability());
207 let len = bits.len();
208 let slots = BoolData::make_slots(&validity, len);
209 let data = unsafe { BoolData::new_unchecked(bits, validity) };
211 unsafe {
212 Array::from_parts_unchecked(ArrayParts::new(Bool, dtype, len, data).with_slots(slots))
213 }
214 }
215
216 pub fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
218 BoolData::validate(bits, validity)
219 }
220
221 pub fn from_indices<I: IntoIterator<Item = usize>>(
225 length: usize,
226 indices: I,
227 validity: Validity,
228 ) -> Self {
229 let mut buffer = BitBufferMut::new_unset(length);
230 indices.into_iter().for_each(|idx| buffer.set(idx));
231 Self::new(buffer.freeze(), validity)
232 }
233
234 pub fn into_bit_buffer(self) -> BitBuffer {
236 let len = self.len();
237 let data = self.into_data();
238 let buffer = data.bits.unwrap_host();
239 BitBuffer::new_with_offset(buffer, len, data.offset)
240 }
241}
242
243impl BoolData {
245 pub(super) fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
246 let bits = bits.shrink_offset();
247 Self::validate(&bits, &validity)?;
248
249 let (offset, _len, buffer) = bits.into_inner();
250
251 Ok(Self {
252 bits: BufferHandle::new_host(buffer),
253 offset,
254 })
255 }
256
257 pub(super) fn try_new_from_handle(
258 bits: BufferHandle,
259 offset: usize,
260 len: usize,
261 validity: Validity,
262 ) -> VortexResult<Self> {
263 vortex_ensure!(offset < 8, "BitBuffer offset must be <8, got {}", offset);
264 if let Some(validity_len) = validity.maybe_len() {
265 vortex_ensure!(
266 validity_len == len,
267 "BoolArray of size {} cannot be built with validity of size {validity_len}",
268 len,
269 );
270 }
271
272 vortex_ensure!(
273 bits.len() * 8 >= (len + offset),
274 "provided BufferHandle with offset {offset} len {len} had size {} bits",
275 bits.len() * 8,
276 );
277
278 Ok(Self { bits, offset })
279 }
280
281 pub(super) unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
282 if cfg!(debug_assertions) {
283 Self::try_new(bits, validity).vortex_expect("Failed to create BoolData")
284 } else {
285 let (offset, _len, buffer) = bits.into_inner();
286
287 Self {
288 bits: BufferHandle::new_host(buffer),
289 offset,
290 }
291 }
292 }
293
294 pub(super) fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
295 vortex_ensure!(
296 bits.offset() < 8,
297 "BitBuffer offset must be <8, got {}",
298 bits.offset()
299 );
300
301 if let Some(validity_len) = validity.maybe_len() {
302 vortex_ensure!(
303 validity_len == bits.len(),
304 "BoolArray of size {} cannot be built with validity of size {validity_len}",
305 bits.len()
306 );
307 }
308
309 Ok(())
310 }
311}
312
313impl From<BitBuffer> for BoolArray {
314 fn from(value: BitBuffer) -> Self {
315 BoolArray::new(value, Validity::NonNullable)
316 }
317}
318
319impl FromIterator<bool> for BoolArray {
320 fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
321 BoolArray::from(BitBuffer::from_iter(iter))
322 }
323}
324
325impl FromIterator<Option<bool>> for BoolArray {
326 fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
327 let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
328
329 BoolArray::new(
330 BitBuffer::from(buffer),
331 nulls
332 .map(|n| Validity::from(BitBuffer::from(n.into_inner())))
333 .unwrap_or(Validity::AllValid),
334 )
335 }
336}
337
338impl IntoArray for BitBuffer {
339 fn into_array(self) -> ArrayRef {
340 BoolArray::new(self, Validity::NonNullable).into_array()
341 }
342}
343
344impl IntoArray for BitBufferMut {
345 fn into_array(self) -> ArrayRef {
346 self.freeze().into_array()
347 }
348}
349
350#[cfg(test)]
351mod tests {
352 use std::iter::once;
353 use std::iter::repeat_n;
354
355 use vortex_buffer::BitBuffer;
356 use vortex_buffer::BitBufferMut;
357 use vortex_buffer::buffer;
358
359 use crate::IntoArray;
360 use crate::LEGACY_SESSION;
361 use crate::VortexSessionExecute;
362 use crate::arrays::BoolArray;
363 use crate::arrays::PrimitiveArray;
364 use crate::arrays::bool::BoolArrayExt;
365 use crate::assert_arrays_eq;
366 use crate::patches::Patches;
367 use crate::validity::Validity;
368
369 #[test]
370 fn bool_array() {
371 let arr = BoolArray::from_iter([true, false, true]);
372 let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
373 assert!(scalar);
374 }
375
376 #[test]
377 fn test_all_some_iter() {
378 let arr = BoolArray::from_iter([Some(true), Some(false)]);
379
380 assert!(matches!(arr.validity(), Ok(Validity::AllValid)));
381
382 let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
383 assert!(scalar);
384 let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
385 assert!(!scalar);
386 }
387
388 #[test]
389 fn test_bool_from_iter() {
390 let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
391
392 let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
393 assert!(scalar);
394
395 let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
396 assert!(scalar);
397
398 let scalar = arr.scalar_at(2).unwrap();
399 assert!(scalar.is_null());
400
401 let scalar = bool::try_from(&arr.scalar_at(3).unwrap()).unwrap();
402 assert!(!scalar);
403
404 let scalar = arr.scalar_at(4).unwrap();
405 assert!(scalar.is_null());
406 }
407
408 #[test]
409 fn patch_sliced_bools() {
410 let arr = BoolArray::from(BitBuffer::new_set(12));
411 let sliced = arr.slice(4..12).unwrap();
412 assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
413
414 let arr = {
415 let mut builder = BitBufferMut::new_unset(12);
416 (1..12).for_each(|i| builder.set(i));
417 BoolArray::from(builder.freeze())
418 };
419 let sliced = arr.slice(4..12).unwrap();
420 let expected_slice: Vec<bool> = (4..12).map(|i| (1..12).contains(&i)).collect();
421 assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice.clone()));
422
423 let patches = Patches::new(
425 arr.len(),
426 0,
427 buffer![4u32].into_array(),
428 BoolArray::from(BitBuffer::new_unset(1)).into_array(),
429 None,
430 )
431 .unwrap();
432 let arr = arr
433 .patch(&patches, &mut LEGACY_SESSION.create_execution_ctx())
434 .unwrap();
435 let expected_patched: Vec<bool> = (0..12).map(|i| (1..12).contains(&i) && i != 4).collect();
437 assert_arrays_eq!(arr, BoolArray::from_iter(expected_patched));
438
439 assert_arrays_eq!(sliced, BoolArray::from_iter(expected_slice));
441 }
442
443 #[test]
444 fn slice_array_in_middle() {
445 let arr = BoolArray::from(BitBuffer::new_set(16));
446 let sliced = arr.slice(4..12).unwrap();
447 assert_arrays_eq!(sliced, BoolArray::from_iter([true; 8]));
448 }
449
450 #[test]
451 fn patch_bools_owned() {
452 let arr = BoolArray::from(BitBuffer::new_set(16));
453 let buf_ptr = arr.to_bit_buffer().inner().as_ptr();
454
455 let patches = Patches::new(
456 arr.len(),
457 0,
458 PrimitiveArray::new(buffer![0u32], Validity::NonNullable).into_array(),
459 BoolArray::from(BitBuffer::new_unset(1)).into_array(),
460 None,
461 )
462 .unwrap();
463 let arr = arr
464 .patch(&patches, &mut LEGACY_SESSION.create_execution_ctx())
465 .unwrap();
466 assert_eq!(arr.to_bit_buffer().inner().as_ptr(), buf_ptr);
468
469 let expected: BoolArray = once(false).chain(repeat_n(true, 15)).collect();
471 assert_arrays_eq!(arr, expected);
472 }
473
474 #[test]
475 fn patch_sliced_bools_offset() {
476 let arr = BoolArray::from(BitBuffer::new_set(15));
477 let sliced = arr.slice(4..15).unwrap();
478 assert_arrays_eq!(sliced, BoolArray::from_iter([true; 11]));
479 }
480}