polars_arrow/array/boolean/
mod.rs

1use either::Either;
2use polars_error::{PolarsResult, polars_bail};
3
4use super::{Array, Splitable};
5use crate::array::iterator::NonNullValuesIter;
6use crate::bitmap::utils::{BitmapIter, ZipValidity};
7use crate::bitmap::{Bitmap, MutableBitmap};
8use crate::compute::utils::{combine_validities_and, combine_validities_or};
9use crate::datatypes::{ArrowDataType, PhysicalType};
10use crate::trusted_len::TrustedLen;
11
12mod ffi;
13pub(super) mod fmt;
14mod from;
15mod iterator;
16mod mutable;
17pub use mutable::*;
18mod builder;
19pub use builder::*;
20#[cfg(feature = "proptest")]
21pub mod proptest;
22
23/// A [`BooleanArray`] is Arrow's semantically equivalent of an immutable `Vec<Option<bool>>`.
24/// It implements [`Array`].
25///
26/// One way to think about a [`BooleanArray`] is `(DataType, Arc<Vec<u8>>, Option<Arc<Vec<u8>>>)`
27/// where:
28/// * the first item is the array's logical type
29/// * the second is the immutable values
30/// * the third is the immutable validity (whether a value is null or not as a bitmap).
31///
32/// The size of this struct is `O(1)`, as all data is stored behind an [`std::sync::Arc`].
33/// # Example
34/// ```
35/// use polars_arrow::array::BooleanArray;
36/// use polars_arrow::bitmap::Bitmap;
37/// use polars_arrow::buffer::Buffer;
38///
39/// let array = BooleanArray::from([Some(true), None, Some(false)]);
40/// assert_eq!(array.value(0), true);
41/// assert_eq!(array.iter().collect::<Vec<_>>(), vec![Some(true), None, Some(false)]);
42/// assert_eq!(array.values_iter().collect::<Vec<_>>(), vec![true, false, false]);
43/// // the underlying representation
44/// assert_eq!(array.values(), &Bitmap::from([true, false, false]));
45/// assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true])));
46///
47/// ```
48#[derive(Clone)]
49pub struct BooleanArray {
50    dtype: ArrowDataType,
51    values: Bitmap,
52    validity: Option<Bitmap>,
53}
54
55impl BooleanArray {
56    /// The canonical method to create a [`BooleanArray`] out of low-end APIs.
57    /// # Errors
58    /// This function errors iff:
59    /// * The validity is not `None` and its length is different from `values`'s length
60    /// * The `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`].
61    pub fn try_new(
62        dtype: ArrowDataType,
63        values: Bitmap,
64        validity: Option<Bitmap>,
65    ) -> PolarsResult<Self> {
66        if validity
67            .as_ref()
68            .is_some_and(|validity| validity.len() != values.len())
69        {
70            polars_bail!(ComputeError: "validity mask length must match the number of values")
71        }
72
73        if dtype.to_physical_type() != PhysicalType::Boolean {
74            polars_bail!(ComputeError: "BooleanArray can only be initialized with a DataType whose physical type is Boolean")
75        }
76
77        Ok(Self {
78            dtype,
79            values,
80            validity,
81        })
82    }
83
84    /// Alias to `Self::try_new().unwrap()`
85    pub fn new(dtype: ArrowDataType, values: Bitmap, validity: Option<Bitmap>) -> Self {
86        Self::try_new(dtype, values, validity).unwrap()
87    }
88
89    /// Returns an iterator over the optional values of this [`BooleanArray`].
90    #[inline]
91    pub fn iter(&self) -> ZipValidity<bool, BitmapIter, BitmapIter> {
92        ZipValidity::new_with_validity(self.values().iter(), self.validity())
93    }
94
95    /// Returns an iterator over the values of this [`BooleanArray`].
96    #[inline]
97    pub fn values_iter(&self) -> BitmapIter {
98        self.values().iter()
99    }
100
101    /// Returns an iterator of the non-null values.
102    #[inline]
103    pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, BooleanArray> {
104        NonNullValuesIter::new(self, self.validity())
105    }
106
107    /// Returns the length of this array
108    #[inline]
109    pub fn len(&self) -> usize {
110        self.values.len()
111    }
112
113    /// The values [`Bitmap`].
114    /// Values on null slots are undetermined (they can be anything).
115    #[inline]
116    pub fn values(&self) -> &Bitmap {
117        &self.values
118    }
119
120    /// Returns the optional validity.
121    #[inline]
122    pub fn validity(&self) -> Option<&Bitmap> {
123        self.validity.as_ref()
124    }
125
126    /// Returns the arrays' [`ArrowDataType`].
127    #[inline]
128    pub fn dtype(&self) -> &ArrowDataType {
129        &self.dtype
130    }
131
132    /// Returns the value at index `i`
133    /// # Panic
134    /// This function panics iff `i >= self.len()`.
135    #[inline]
136    pub fn value(&self, i: usize) -> bool {
137        self.values.get_bit(i)
138    }
139
140    /// Returns the element at index `i` as bool
141    ///
142    /// # Safety
143    /// Caller must be sure that `i < self.len()`
144    #[inline]
145    pub unsafe fn value_unchecked(&self, i: usize) -> bool {
146        self.values.get_bit_unchecked(i)
147    }
148
149    /// Returns the element at index `i` or `None` if it is null
150    /// # Panics
151    /// iff `i >= self.len()`
152    #[inline]
153    pub fn get(&self, i: usize) -> Option<bool> {
154        if !self.is_null(i) {
155            // soundness: Array::is_null panics if i >= self.len
156            unsafe { Some(self.value_unchecked(i)) }
157        } else {
158            None
159        }
160    }
161
162    /// Slices this [`BooleanArray`].
163    /// # Implementation
164    /// This operation is `O(1)` as it amounts to increase up to two ref counts.
165    /// # Panic
166    /// This function panics iff `offset + length > self.len()`.
167    #[inline]
168    pub fn slice(&mut self, offset: usize, length: usize) {
169        assert!(
170            offset + length <= self.len(),
171            "the offset of the new Buffer cannot exceed the existing length"
172        );
173        unsafe { self.slice_unchecked(offset, length) }
174    }
175
176    /// Slices this [`BooleanArray`].
177    /// # Implementation
178    /// This operation is `O(1)` as it amounts to increase two ref counts.
179    ///
180    /// # Safety
181    /// The caller must ensure that `offset + length <= self.len()`.
182    #[inline]
183    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
184        self.validity = self
185            .validity
186            .take()
187            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
188            .filter(|bitmap| bitmap.unset_bits() > 0);
189        self.values.slice_unchecked(offset, length);
190    }
191
192    impl_sliced!();
193    impl_mut_validity!();
194    impl_into_array!();
195
196    /// Returns a clone of this [`BooleanArray`] with new values.
197    /// # Panics
198    /// This function panics iff `values.len() != self.len()`.
199    #[must_use]
200    pub fn with_values(&self, values: Bitmap) -> Self {
201        let mut out = self.clone();
202        out.set_values(values);
203        out
204    }
205
206    /// Sets the values of this [`BooleanArray`].
207    /// # Panics
208    /// This function panics iff `values.len() != self.len()`.
209    pub fn set_values(&mut self, values: Bitmap) {
210        assert_eq!(
211            values.len(),
212            self.len(),
213            "values length must be equal to this arrays length"
214        );
215        self.values = values;
216    }
217
218    /// Applies a function `f` to the values of this array, cloning the values
219    /// iff they are being shared with others
220    ///
221    /// This is an API to use clone-on-write
222    /// # Implementation
223    /// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
224    /// if it is being shared (since it results in a `O(N)` memcopy).
225    /// # Panics
226    /// This function panics if the function modifies the length of the [`MutableBitmap`].
227    pub fn apply_values_mut<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
228        let values = std::mem::take(&mut self.values);
229        let mut values = values.make_mut();
230        f(&mut values);
231        if let Some(validity) = &self.validity {
232            assert_eq!(validity.len(), values.len());
233        }
234        self.values = values.into();
235    }
236
237    /// Try to convert this [`BooleanArray`] to a [`MutableBooleanArray`]
238    pub fn into_mut(self) -> Either<Self, MutableBooleanArray> {
239        use Either::*;
240
241        if let Some(bitmap) = self.validity {
242            match bitmap.into_mut() {
243                Left(bitmap) => Left(BooleanArray::new(self.dtype, self.values, Some(bitmap))),
244                Right(mutable_bitmap) => match self.values.into_mut() {
245                    Left(immutable) => Left(BooleanArray::new(
246                        self.dtype,
247                        immutable,
248                        Some(mutable_bitmap.into()),
249                    )),
250                    Right(mutable) => Right(
251                        MutableBooleanArray::try_new(self.dtype, mutable, Some(mutable_bitmap))
252                            .unwrap(),
253                    ),
254                },
255            }
256        } else {
257            match self.values.into_mut() {
258                Left(immutable) => Left(BooleanArray::new(self.dtype, immutable, None)),
259                Right(mutable) => {
260                    Right(MutableBooleanArray::try_new(self.dtype, mutable, None).unwrap())
261                },
262            }
263        }
264    }
265
266    /// Returns a new empty [`BooleanArray`].
267    pub fn new_empty(dtype: ArrowDataType) -> Self {
268        Self::new(dtype, Bitmap::new(), None)
269    }
270
271    /// Returns a new [`BooleanArray`] whose all slots are null / `None`.
272    pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
273        let bitmap = Bitmap::new_zeroed(length);
274        Self::new(dtype, bitmap.clone(), Some(bitmap))
275    }
276
277    /// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`.
278    #[inline]
279    pub fn from_trusted_len_values_iter<I: TrustedLen<Item = bool>>(iterator: I) -> Self {
280        MutableBooleanArray::from_trusted_len_values_iter(iterator).into()
281    }
282
283    /// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`.
284    /// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len
285    /// but this crate does not mark it as such.
286    ///
287    /// # Safety
288    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
289    /// I.e. that `size_hint().1` correctly reports its length.
290    #[inline]
291    pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = bool>>(
292        iterator: I,
293    ) -> Self {
294        MutableBooleanArray::from_trusted_len_values_iter_unchecked(iterator).into()
295    }
296
297    /// Creates a new [`BooleanArray`] from a slice of `bool`.
298    #[inline]
299    pub fn from_slice<P: AsRef<[bool]>>(slice: P) -> Self {
300        MutableBooleanArray::from_slice(slice).into()
301    }
302
303    /// Creates a [`BooleanArray`] from an iterator of trusted length.
304    /// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len
305    /// but this crate does not mark it as such.
306    ///
307    /// # Safety
308    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
309    /// I.e. that `size_hint().1` correctly reports its length.
310    #[inline]
311    pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
312    where
313        P: std::borrow::Borrow<bool>,
314        I: Iterator<Item = Option<P>>,
315    {
316        MutableBooleanArray::from_trusted_len_iter_unchecked(iterator).into()
317    }
318
319    /// Creates a [`BooleanArray`] from a [`TrustedLen`].
320    #[inline]
321    pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
322    where
323        P: std::borrow::Borrow<bool>,
324        I: TrustedLen<Item = Option<P>>,
325    {
326        MutableBooleanArray::from_trusted_len_iter(iterator).into()
327    }
328
329    /// Creates a [`BooleanArray`] from an falible iterator of trusted length.
330    ///
331    /// # Safety
332    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
333    /// I.e. that `size_hint().1` correctly reports its length.
334    #[inline]
335    pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(iterator: I) -> Result<Self, E>
336    where
337        P: std::borrow::Borrow<bool>,
338        I: Iterator<Item = Result<Option<P>, E>>,
339    {
340        Ok(MutableBooleanArray::try_from_trusted_len_iter_unchecked(iterator)?.into())
341    }
342
343    /// Creates a [`BooleanArray`] from a [`TrustedLen`].
344    #[inline]
345    pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> Result<Self, E>
346    where
347        P: std::borrow::Borrow<bool>,
348        I: TrustedLen<Item = Result<Option<P>, E>>,
349    {
350        Ok(MutableBooleanArray::try_from_trusted_len_iter(iterator)?.into())
351    }
352
353    pub fn true_and_valid(&self) -> Bitmap {
354        match &self.validity {
355            None => self.values.clone(),
356            Some(validity) => combine_validities_and(Some(&self.values), Some(validity)).unwrap(),
357        }
358    }
359
360    pub fn true_or_valid(&self) -> Bitmap {
361        match &self.validity {
362            None => self.values.clone(),
363            Some(validity) => combine_validities_or(Some(&self.values), Some(validity)).unwrap(),
364        }
365    }
366
367    /// Returns its internal representation
368    #[must_use]
369    pub fn into_inner(self) -> (ArrowDataType, Bitmap, Option<Bitmap>) {
370        let Self {
371            dtype,
372            values,
373            validity,
374        } = self;
375        (dtype, values, validity)
376    }
377
378    /// Creates a [`BooleanArray`] from its internal representation.
379    /// This is the inverted from [`BooleanArray::into_inner`]
380    ///
381    /// # Safety
382    /// Callers must ensure all invariants of this struct are upheld.
383    pub unsafe fn from_inner_unchecked(
384        dtype: ArrowDataType,
385        values: Bitmap,
386        validity: Option<Bitmap>,
387    ) -> Self {
388        Self {
389            dtype,
390            values,
391            validity,
392        }
393    }
394}
395
396impl Array for BooleanArray {
397    impl_common_array!();
398
399    fn validity(&self) -> Option<&Bitmap> {
400        self.validity.as_ref()
401    }
402
403    #[inline]
404    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
405        Box::new(self.clone().with_validity(validity))
406    }
407}
408
409impl Splitable for BooleanArray {
410    fn check_bound(&self, offset: usize) -> bool {
411        offset <= self.len()
412    }
413
414    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
415        let (lhs_values, rhs_values) = unsafe { self.values.split_at_unchecked(offset) };
416        let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
417
418        (
419            Self {
420                dtype: self.dtype.clone(),
421                values: lhs_values,
422                validity: lhs_validity,
423            },
424            Self {
425                dtype: self.dtype.clone(),
426                values: rhs_values,
427                validity: rhs_validity,
428            },
429        )
430    }
431}
432
433impl From<Bitmap> for BooleanArray {
434    fn from(values: Bitmap) -> Self {
435        Self {
436            dtype: ArrowDataType::Boolean,
437            values,
438            validity: None,
439        }
440    }
441}