1use crate::array::print_long_array;
19use crate::builder::BooleanBuilder;
20use crate::iterator::BooleanIter;
21use crate::{Array, ArrayAccessor, ArrayRef, Scalar};
22use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer, NullBuffer};
23use arrow_data::{ArrayData, ArrayDataBuilder};
24use arrow_schema::DataType;
25use std::any::Any;
26use std::sync::Arc;
27
28#[derive(Clone)]
68pub struct BooleanArray {
69 values: BooleanBuffer,
70 nulls: Option<NullBuffer>,
71}
72
73impl std::fmt::Debug for BooleanArray {
74 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
75 write!(f, "BooleanArray\n[\n")?;
76 print_long_array(self, f, |array, index, f| {
77 std::fmt::Debug::fmt(&array.value(index), f)
78 })?;
79 write!(f, "]")
80 }
81}
82
83impl BooleanArray {
84 pub fn new(values: BooleanBuffer, nulls: Option<NullBuffer>) -> Self {
90 if let Some(n) = nulls.as_ref() {
91 assert_eq!(values.len(), n.len());
92 }
93 Self { values, nulls }
94 }
95
96 pub fn new_null(len: usize) -> Self {
98 Self {
99 values: BooleanBuffer::new_unset(len),
100 nulls: Some(NullBuffer::new_null(len)),
101 }
102 }
103
104 pub fn new_scalar(value: bool) -> Scalar<Self> {
106 let values = match value {
107 true => BooleanBuffer::new_set(1),
108 false => BooleanBuffer::new_unset(1),
109 };
110 Scalar::new(Self::new(values, None))
111 }
112
113 pub fn new_from_packed(buffer: impl Into<Buffer>, offset: usize, len: usize) -> Self {
119 BooleanBuffer::new(buffer.into(), offset, len).into()
120 }
121
122 pub fn new_from_u8(value: &[u8]) -> Self {
128 BooleanBuffer::new(Buffer::from(value), 0, value.len() * 8).into()
129 }
130
131 pub fn len(&self) -> usize {
133 self.values.len()
134 }
135
136 pub fn is_empty(&self) -> bool {
138 self.values.is_empty()
139 }
140
141 pub fn slice(&self, offset: usize, length: usize) -> Self {
143 Self {
144 values: self.values.slice(offset, length),
145 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
146 }
147 }
148
149 pub fn builder(capacity: usize) -> BooleanBuilder {
151 BooleanBuilder::with_capacity(capacity)
152 }
153
154 pub fn values(&self) -> &BooleanBuffer {
156 &self.values
157 }
158
159 pub fn true_count(&self) -> usize {
161 match self.nulls() {
162 Some(nulls) => {
163 let null_chunks = nulls.inner().bit_chunks().iter_padded();
164 let value_chunks = self.values().bit_chunks().iter_padded();
165 null_chunks
166 .zip(value_chunks)
167 .map(|(a, b)| (a & b).count_ones() as usize)
168 .sum()
169 }
170 None => self.values().count_set_bits(),
171 }
172 }
173
174 pub fn false_count(&self) -> usize {
176 self.len() - self.null_count() - self.true_count()
177 }
178
179 pub unsafe fn value_unchecked(&self, i: usize) -> bool {
187 self.values.value_unchecked(i)
188 }
189
190 pub fn value(&self, i: usize) -> bool {
198 assert!(
199 i < self.len(),
200 "Trying to access an element at index {} from a BooleanArray of length {}",
201 i,
202 self.len()
203 );
204 unsafe { self.value_unchecked(i) }
207 }
208
209 pub fn take_iter<'a>(
211 &'a self,
212 indexes: impl Iterator<Item = Option<usize>> + 'a,
213 ) -> impl Iterator<Item = Option<bool>> + 'a {
214 indexes.map(|opt_index| opt_index.map(|index| self.value(index)))
215 }
216
217 pub unsafe fn take_iter_unchecked<'a>(
222 &'a self,
223 indexes: impl Iterator<Item = Option<usize>> + 'a,
224 ) -> impl Iterator<Item = Option<bool>> + 'a {
225 indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
226 }
227
228 pub fn from_unary<T: ArrayAccessor, F>(left: T, mut op: F) -> Self
239 where
240 F: FnMut(T::Item) -> bool,
241 {
242 let nulls = left.logical_nulls();
243 let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe {
244 op(left.value_unchecked(i))
246 });
247 Self::new(values, nulls)
248 }
249
250 pub fn from_binary<T: ArrayAccessor, S: ArrayAccessor, F>(left: T, right: S, mut op: F) -> Self
267 where
268 F: FnMut(T::Item, S::Item) -> bool,
269 {
270 assert_eq!(left.len(), right.len());
271
272 let nulls = NullBuffer::union(
273 left.logical_nulls().as_ref(),
274 right.logical_nulls().as_ref(),
275 );
276 let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe {
277 op(left.value_unchecked(i), right.value_unchecked(i))
279 });
280 Self::new(values, nulls)
281 }
282
283 pub fn into_parts(self) -> (BooleanBuffer, Option<NullBuffer>) {
285 (self.values, self.nulls)
286 }
287}
288
289impl Array for BooleanArray {
290 fn as_any(&self) -> &dyn Any {
291 self
292 }
293
294 fn to_data(&self) -> ArrayData {
295 self.clone().into()
296 }
297
298 fn into_data(self) -> ArrayData {
299 self.into()
300 }
301
302 fn data_type(&self) -> &DataType {
303 &DataType::Boolean
304 }
305
306 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
307 Arc::new(self.slice(offset, length))
308 }
309
310 fn len(&self) -> usize {
311 self.values.len()
312 }
313
314 fn is_empty(&self) -> bool {
315 self.values.is_empty()
316 }
317
318 fn shrink_to_fit(&mut self) {
319 self.values.shrink_to_fit();
320 if let Some(nulls) = &mut self.nulls {
321 nulls.shrink_to_fit();
322 }
323 }
324
325 fn offset(&self) -> usize {
326 self.values.offset()
327 }
328
329 fn nulls(&self) -> Option<&NullBuffer> {
330 self.nulls.as_ref()
331 }
332
333 fn logical_null_count(&self) -> usize {
334 self.null_count()
335 }
336
337 fn get_buffer_memory_size(&self) -> usize {
338 let mut sum = self.values.inner().capacity();
339 if let Some(x) = &self.nulls {
340 sum += x.buffer().capacity()
341 }
342 sum
343 }
344
345 fn get_array_memory_size(&self) -> usize {
346 std::mem::size_of::<Self>() + self.get_buffer_memory_size()
347 }
348}
349
350impl ArrayAccessor for &BooleanArray {
351 type Item = bool;
352
353 fn value(&self, index: usize) -> Self::Item {
354 BooleanArray::value(self, index)
355 }
356
357 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
358 BooleanArray::value_unchecked(self, index)
359 }
360}
361
362impl From<Vec<bool>> for BooleanArray {
363 fn from(data: Vec<bool>) -> Self {
364 let mut mut_buf = MutableBuffer::new_null(data.len());
365 {
366 let mut_slice = mut_buf.as_slice_mut();
367 for (i, b) in data.iter().enumerate() {
368 if *b {
369 bit_util::set_bit(mut_slice, i);
370 }
371 }
372 }
373 let array_data = ArrayData::builder(DataType::Boolean)
374 .len(data.len())
375 .add_buffer(mut_buf.into());
376
377 let array_data = unsafe { array_data.build_unchecked() };
378 BooleanArray::from(array_data)
379 }
380}
381
382impl From<Vec<Option<bool>>> for BooleanArray {
383 fn from(data: Vec<Option<bool>>) -> Self {
384 data.iter().collect()
385 }
386}
387
388impl From<ArrayData> for BooleanArray {
389 fn from(data: ArrayData) -> Self {
390 assert_eq!(
391 data.data_type(),
392 &DataType::Boolean,
393 "BooleanArray expected ArrayData with type {} got {}",
394 DataType::Boolean,
395 data.data_type()
396 );
397 assert_eq!(
398 data.buffers().len(),
399 1,
400 "BooleanArray data should contain a single buffer only (values buffer)"
401 );
402 let values = BooleanBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
403
404 Self {
405 values,
406 nulls: data.nulls().cloned(),
407 }
408 }
409}
410
411impl From<BooleanArray> for ArrayData {
412 fn from(array: BooleanArray) -> Self {
413 let builder = ArrayDataBuilder::new(DataType::Boolean)
414 .len(array.values.len())
415 .offset(array.values.offset())
416 .nulls(array.nulls)
417 .buffers(vec![array.values.into_inner()]);
418
419 unsafe { builder.build_unchecked() }
420 }
421}
422
423impl<'a> IntoIterator for &'a BooleanArray {
424 type Item = Option<bool>;
425 type IntoIter = BooleanIter<'a>;
426
427 fn into_iter(self) -> Self::IntoIter {
428 BooleanIter::<'a>::new(self)
429 }
430}
431
432impl<'a> BooleanArray {
433 pub fn iter(&'a self) -> BooleanIter<'a> {
435 BooleanIter::<'a>::new(self)
436 }
437}
438
439impl<Ptr: std::borrow::Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
440 fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
441 let iter = iter.into_iter();
442 let (_, data_len) = iter.size_hint();
443 let data_len = data_len.expect("Iterator must be sized"); let num_bytes = bit_util::ceil(data_len, 8);
446 let mut null_builder = MutableBuffer::from_len_zeroed(num_bytes);
447 let mut val_builder = MutableBuffer::from_len_zeroed(num_bytes);
448
449 let data = val_builder.as_slice_mut();
450
451 let null_slice = null_builder.as_slice_mut();
452 iter.enumerate().for_each(|(i, item)| {
453 if let Some(a) = item.borrow() {
454 bit_util::set_bit(null_slice, i);
455 if *a {
456 bit_util::set_bit(data, i);
457 }
458 }
459 });
460
461 let data = unsafe {
462 ArrayData::new_unchecked(
463 DataType::Boolean,
464 data_len,
465 None,
466 Some(null_builder.into()),
467 0,
468 vec![val_builder.into()],
469 vec![],
470 )
471 };
472 BooleanArray::from(data)
473 }
474}
475
476impl From<BooleanBuffer> for BooleanArray {
477 fn from(values: BooleanBuffer) -> Self {
478 Self {
479 values,
480 nulls: None,
481 }
482 }
483}
484
485#[cfg(test)]
486mod tests {
487 use super::*;
488 use arrow_buffer::Buffer;
489 use rand::{rng, Rng};
490
491 #[test]
492 fn test_boolean_fmt_debug() {
493 let arr = BooleanArray::from(vec![true, false, false]);
494 assert_eq!(
495 "BooleanArray\n[\n true,\n false,\n false,\n]",
496 format!("{arr:?}")
497 );
498 }
499
500 #[test]
501 fn test_boolean_with_null_fmt_debug() {
502 let mut builder = BooleanArray::builder(3);
503 builder.append_value(true);
504 builder.append_null();
505 builder.append_value(false);
506 let arr = builder.finish();
507 assert_eq!(
508 "BooleanArray\n[\n true,\n null,\n false,\n]",
509 format!("{arr:?}")
510 );
511 }
512
513 #[test]
514 fn test_boolean_array_from_vec() {
515 let buf = Buffer::from([10_u8]);
516 let arr = BooleanArray::from(vec![false, true, false, true]);
517 assert_eq!(&buf, arr.values().inner());
518 assert_eq!(4, arr.len());
519 assert_eq!(0, arr.offset());
520 assert_eq!(0, arr.null_count());
521 for i in 0..4 {
522 assert!(!arr.is_null(i));
523 assert!(arr.is_valid(i));
524 assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}")
525 }
526 }
527
528 #[test]
529 fn test_boolean_array_from_vec_option() {
530 let buf = Buffer::from([10_u8]);
531 let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]);
532 assert_eq!(&buf, arr.values().inner());
533 assert_eq!(4, arr.len());
534 assert_eq!(0, arr.offset());
535 assert_eq!(1, arr.null_count());
536 for i in 0..4 {
537 if i == 2 {
538 assert!(arr.is_null(i));
539 assert!(!arr.is_valid(i));
540 } else {
541 assert!(!arr.is_null(i));
542 assert!(arr.is_valid(i));
543 assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}")
544 }
545 }
546 }
547
548 #[test]
549 fn test_boolean_array_from_packed() {
550 let v = [1_u8, 2_u8, 3_u8];
551 let arr = BooleanArray::new_from_packed(v, 0, 24);
552 assert_eq!(24, arr.len());
553 assert_eq!(0, arr.offset());
554 assert_eq!(0, arr.null_count());
555 assert!(arr.nulls.is_none());
556 for i in 0..24 {
557 assert!(!arr.is_null(i));
558 assert!(arr.is_valid(i));
559 assert_eq!(
560 i == 0 || i == 9 || i == 16 || i == 17,
561 arr.value(i),
562 "failed t {i}"
563 )
564 }
565 }
566
567 #[test]
568 fn test_boolean_array_from_slice_u8() {
569 let v: Vec<u8> = vec![1, 2, 3];
570 let slice = &v[..];
571 let arr = BooleanArray::new_from_u8(slice);
572 assert_eq!(24, arr.len());
573 assert_eq!(0, arr.offset());
574 assert_eq!(0, arr.null_count());
575 assert!(arr.nulls().is_none());
576 for i in 0..24 {
577 assert!(!arr.is_null(i));
578 assert!(arr.is_valid(i));
579 assert_eq!(
580 i == 0 || i == 9 || i == 16 || i == 17,
581 arr.value(i),
582 "failed t {i}"
583 )
584 }
585 }
586
587 #[test]
588 fn test_boolean_array_from_iter() {
589 let v = vec![Some(false), Some(true), Some(false), Some(true)];
590 let arr = v.into_iter().collect::<BooleanArray>();
591 assert_eq!(4, arr.len());
592 assert_eq!(0, arr.offset());
593 assert_eq!(0, arr.null_count());
594 assert!(arr.nulls().is_none());
595 for i in 0..3 {
596 assert!(!arr.is_null(i));
597 assert!(arr.is_valid(i));
598 assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}")
599 }
600 }
601
602 #[test]
603 fn test_boolean_array_from_nullable_iter() {
604 let v = vec![Some(true), None, Some(false), None];
605 let arr = v.into_iter().collect::<BooleanArray>();
606 assert_eq!(4, arr.len());
607 assert_eq!(0, arr.offset());
608 assert_eq!(2, arr.null_count());
609 assert!(arr.nulls().is_some());
610
611 assert!(arr.is_valid(0));
612 assert!(arr.is_null(1));
613 assert!(arr.is_valid(2));
614 assert!(arr.is_null(3));
615
616 assert!(arr.value(0));
617 assert!(!arr.value(2));
618 }
619
620 #[test]
621 fn test_boolean_array_builder() {
622 let buf = Buffer::from([27_u8]);
625 let buf2 = buf.clone();
626 let data = ArrayData::builder(DataType::Boolean)
627 .len(5)
628 .offset(2)
629 .add_buffer(buf)
630 .build()
631 .unwrap();
632 let arr = BooleanArray::from(data);
633 assert_eq!(&buf2, arr.values().inner());
634 assert_eq!(5, arr.len());
635 assert_eq!(2, arr.offset());
636 assert_eq!(0, arr.null_count());
637 for i in 0..3 {
638 assert_eq!(i != 0, arr.value(i), "failed at {i}");
639 }
640 }
641
642 #[test]
643 #[should_panic(
644 expected = "Trying to access an element at index 4 from a BooleanArray of length 3"
645 )]
646 fn test_fixed_size_binary_array_get_value_index_out_of_bound() {
647 let v = vec![Some(true), None, Some(false)];
648 let array = v.into_iter().collect::<BooleanArray>();
649
650 array.value(4);
651 }
652
653 #[test]
654 #[should_panic(expected = "BooleanArray data should contain a single buffer only \
655 (values buffer)")]
656 #[cfg(not(feature = "force_validate"))]
659 fn test_boolean_array_invalid_buffer_len() {
660 let data = unsafe {
661 ArrayData::builder(DataType::Boolean)
662 .len(5)
663 .build_unchecked()
664 };
665 drop(BooleanArray::from(data));
666 }
667
668 #[test]
669 #[should_panic(expected = "BooleanArray expected ArrayData with type Boolean got Int32")]
670 fn test_from_array_data_validation() {
671 let _ = BooleanArray::from(ArrayData::new_empty(&DataType::Int32));
672 }
673
674 #[test]
675 #[cfg_attr(miri, ignore)] fn test_true_false_count() {
677 let mut rng = rng();
678
679 for _ in 0..10 {
680 let d: Vec<_> = (0..2000).map(|_| rng.random_bool(0.5)).collect();
682 let b = BooleanArray::from(d.clone());
683
684 let expected_true = d.iter().filter(|x| **x).count();
685 assert_eq!(b.true_count(), expected_true);
686 assert_eq!(b.false_count(), d.len() - expected_true);
687
688 let d: Vec<_> = (0..2000)
690 .map(|_| rng.random_bool(0.5).then(|| rng.random_bool(0.5)))
691 .collect();
692 let b = BooleanArray::from(d.clone());
693
694 let expected_true = d.iter().filter(|x| matches!(x, Some(true))).count();
695 assert_eq!(b.true_count(), expected_true);
696
697 let expected_false = d.iter().filter(|x| matches!(x, Some(false))).count();
698 assert_eq!(b.false_count(), expected_false);
699 }
700 }
701
702 #[test]
703 fn test_into_parts() {
704 let boolean_array = [Some(true), None, Some(false)]
705 .into_iter()
706 .collect::<BooleanArray>();
707 let (values, nulls) = boolean_array.into_parts();
708 assert_eq!(values.values(), &[0b0000_0001]);
709 assert!(nulls.is_some());
710 assert_eq!(nulls.unwrap().buffer().as_slice(), &[0b0000_0101]);
711
712 let boolean_array =
713 BooleanArray::from(vec![false, false, false, false, false, false, false, true]);
714 let (values, nulls) = boolean_array.into_parts();
715 assert_eq!(values.values(), &[0b1000_0000]);
716 assert!(nulls.is_none());
717 }
718}