polars_arrow/array/boolean/
mod.rs

1use either::Either;
2use polars_error::{PolarsResult, polars_bail};
3
4use super::{Array, Splitable};
5use crate::array::iterator::NonNullValuesIter;
6use crate::bitmap::utils::{BitmapIter, ZipValidity};
7use crate::bitmap::{Bitmap, MutableBitmap};
8use crate::compute::utils::{combine_validities_and, combine_validities_or};
9use crate::datatypes::{ArrowDataType, PhysicalType};
10use crate::trusted_len::TrustedLen;
11
12mod ffi;
13pub(super) mod fmt;
14mod from;
15mod iterator;
16mod mutable;
17pub use mutable::*;
18mod builder;
19pub use builder::*;
20
21/// A [`BooleanArray`] is Arrow's semantically equivalent of an immutable `Vec<Option<bool>>`.
22/// It implements [`Array`].
23///
24/// One way to think about a [`BooleanArray`] is `(DataType, Arc<Vec<u8>>, Option<Arc<Vec<u8>>>)`
25/// where:
26/// * the first item is the array's logical type
27/// * the second is the immutable values
28/// * the third is the immutable validity (whether a value is null or not as a bitmap).
29///
30/// The size of this struct is `O(1)`, as all data is stored behind an [`std::sync::Arc`].
31/// # Example
32/// ```
33/// use polars_arrow::array::BooleanArray;
34/// use polars_arrow::bitmap::Bitmap;
35/// use polars_arrow::buffer::Buffer;
36///
37/// let array = BooleanArray::from([Some(true), None, Some(false)]);
38/// assert_eq!(array.value(0), true);
39/// assert_eq!(array.iter().collect::<Vec<_>>(), vec![Some(true), None, Some(false)]);
40/// assert_eq!(array.values_iter().collect::<Vec<_>>(), vec![true, false, false]);
41/// // the underlying representation
42/// assert_eq!(array.values(), &Bitmap::from([true, false, false]));
43/// assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true])));
44///
45/// ```
46#[derive(Clone)]
47pub struct BooleanArray {
48    dtype: ArrowDataType,
49    values: Bitmap,
50    validity: Option<Bitmap>,
51}
52
53impl BooleanArray {
54    /// The canonical method to create a [`BooleanArray`] out of low-end APIs.
55    /// # Errors
56    /// This function errors iff:
57    /// * The validity is not `None` and its length is different from `values`'s length
58    /// * The `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`].
59    pub fn try_new(
60        dtype: ArrowDataType,
61        values: Bitmap,
62        validity: Option<Bitmap>,
63    ) -> PolarsResult<Self> {
64        if validity
65            .as_ref()
66            .is_some_and(|validity| validity.len() != values.len())
67        {
68            polars_bail!(ComputeError: "validity mask length must match the number of values")
69        }
70
71        if dtype.to_physical_type() != PhysicalType::Boolean {
72            polars_bail!(ComputeError: "BooleanArray can only be initialized with a DataType whose physical type is Boolean")
73        }
74
75        Ok(Self {
76            dtype,
77            values,
78            validity,
79        })
80    }
81
82    /// Alias to `Self::try_new().unwrap()`
83    pub fn new(dtype: ArrowDataType, values: Bitmap, validity: Option<Bitmap>) -> Self {
84        Self::try_new(dtype, values, validity).unwrap()
85    }
86
87    /// Returns an iterator over the optional values of this [`BooleanArray`].
88    #[inline]
89    pub fn iter(&self) -> ZipValidity<bool, BitmapIter, BitmapIter> {
90        ZipValidity::new_with_validity(self.values().iter(), self.validity())
91    }
92
93    /// Returns an iterator over the values of this [`BooleanArray`].
94    #[inline]
95    pub fn values_iter(&self) -> BitmapIter {
96        self.values().iter()
97    }
98
99    /// Returns an iterator of the non-null values.
100    #[inline]
101    pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, BooleanArray> {
102        NonNullValuesIter::new(self, self.validity())
103    }
104
105    /// Returns the length of this array
106    #[inline]
107    pub fn len(&self) -> usize {
108        self.values.len()
109    }
110
111    /// The values [`Bitmap`].
112    /// Values on null slots are undetermined (they can be anything).
113    #[inline]
114    pub fn values(&self) -> &Bitmap {
115        &self.values
116    }
117
118    /// Returns the optional validity.
119    #[inline]
120    pub fn validity(&self) -> Option<&Bitmap> {
121        self.validity.as_ref()
122    }
123
124    /// Returns the arrays' [`ArrowDataType`].
125    #[inline]
126    pub fn dtype(&self) -> &ArrowDataType {
127        &self.dtype
128    }
129
130    /// Returns the value at index `i`
131    /// # Panic
132    /// This function panics iff `i >= self.len()`.
133    #[inline]
134    pub fn value(&self, i: usize) -> bool {
135        self.values.get_bit(i)
136    }
137
138    /// Returns the element at index `i` as bool
139    ///
140    /// # Safety
141    /// Caller must be sure that `i < self.len()`
142    #[inline]
143    pub unsafe fn value_unchecked(&self, i: usize) -> bool {
144        self.values.get_bit_unchecked(i)
145    }
146
147    /// Returns the element at index `i` or `None` if it is null
148    /// # Panics
149    /// iff `i >= self.len()`
150    #[inline]
151    pub fn get(&self, i: usize) -> Option<bool> {
152        if !self.is_null(i) {
153            // soundness: Array::is_null panics if i >= self.len
154            unsafe { Some(self.value_unchecked(i)) }
155        } else {
156            None
157        }
158    }
159
160    /// Slices this [`BooleanArray`].
161    /// # Implementation
162    /// This operation is `O(1)` as it amounts to increase up to two ref counts.
163    /// # Panic
164    /// This function panics iff `offset + length > self.len()`.
165    #[inline]
166    pub fn slice(&mut self, offset: usize, length: usize) {
167        assert!(
168            offset + length <= self.len(),
169            "the offset of the new Buffer cannot exceed the existing length"
170        );
171        unsafe { self.slice_unchecked(offset, length) }
172    }
173
174    /// Slices this [`BooleanArray`].
175    /// # Implementation
176    /// This operation is `O(1)` as it amounts to increase two ref counts.
177    ///
178    /// # Safety
179    /// The caller must ensure that `offset + length <= self.len()`.
180    #[inline]
181    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
182        self.validity = self
183            .validity
184            .take()
185            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
186            .filter(|bitmap| bitmap.unset_bits() > 0);
187        self.values.slice_unchecked(offset, length);
188    }
189
190    impl_sliced!();
191    impl_mut_validity!();
192    impl_into_array!();
193
194    /// Returns a clone of this [`BooleanArray`] with new values.
195    /// # Panics
196    /// This function panics iff `values.len() != self.len()`.
197    #[must_use]
198    pub fn with_values(&self, values: Bitmap) -> Self {
199        let mut out = self.clone();
200        out.set_values(values);
201        out
202    }
203
204    /// Sets the values of this [`BooleanArray`].
205    /// # Panics
206    /// This function panics iff `values.len() != self.len()`.
207    pub fn set_values(&mut self, values: Bitmap) {
208        assert_eq!(
209            values.len(),
210            self.len(),
211            "values length must be equal to this arrays length"
212        );
213        self.values = values;
214    }
215
216    /// Applies a function `f` to the values of this array, cloning the values
217    /// iff they are being shared with others
218    ///
219    /// This is an API to use clone-on-write
220    /// # Implementation
221    /// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
222    /// if it is being shared (since it results in a `O(N)` memcopy).
223    /// # Panics
224    /// This function panics if the function modifies the length of the [`MutableBitmap`].
225    pub fn apply_values_mut<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
226        let values = std::mem::take(&mut self.values);
227        let mut values = values.make_mut();
228        f(&mut values);
229        if let Some(validity) = &self.validity {
230            assert_eq!(validity.len(), values.len());
231        }
232        self.values = values.into();
233    }
234
235    /// Try to convert this [`BooleanArray`] to a [`MutableBooleanArray`]
236    pub fn into_mut(self) -> Either<Self, MutableBooleanArray> {
237        use Either::*;
238
239        if let Some(bitmap) = self.validity {
240            match bitmap.into_mut() {
241                Left(bitmap) => Left(BooleanArray::new(self.dtype, self.values, Some(bitmap))),
242                Right(mutable_bitmap) => match self.values.into_mut() {
243                    Left(immutable) => Left(BooleanArray::new(
244                        self.dtype,
245                        immutable,
246                        Some(mutable_bitmap.into()),
247                    )),
248                    Right(mutable) => Right(
249                        MutableBooleanArray::try_new(self.dtype, mutable, Some(mutable_bitmap))
250                            .unwrap(),
251                    ),
252                },
253            }
254        } else {
255            match self.values.into_mut() {
256                Left(immutable) => Left(BooleanArray::new(self.dtype, immutable, None)),
257                Right(mutable) => {
258                    Right(MutableBooleanArray::try_new(self.dtype, mutable, None).unwrap())
259                },
260            }
261        }
262    }
263
264    /// Returns a new empty [`BooleanArray`].
265    pub fn new_empty(dtype: ArrowDataType) -> Self {
266        Self::new(dtype, Bitmap::new(), None)
267    }
268
269    /// Returns a new [`BooleanArray`] whose all slots are null / `None`.
270    pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
271        let bitmap = Bitmap::new_zeroed(length);
272        Self::new(dtype, bitmap.clone(), Some(bitmap))
273    }
274
275    /// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`.
276    #[inline]
277    pub fn from_trusted_len_values_iter<I: TrustedLen<Item = bool>>(iterator: I) -> Self {
278        MutableBooleanArray::from_trusted_len_values_iter(iterator).into()
279    }
280
281    /// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`.
282    /// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len
283    /// but this crate does not mark it as such.
284    ///
285    /// # Safety
286    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
287    /// I.e. that `size_hint().1` correctly reports its length.
288    #[inline]
289    pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = bool>>(
290        iterator: I,
291    ) -> Self {
292        MutableBooleanArray::from_trusted_len_values_iter_unchecked(iterator).into()
293    }
294
295    /// Creates a new [`BooleanArray`] from a slice of `bool`.
296    #[inline]
297    pub fn from_slice<P: AsRef<[bool]>>(slice: P) -> Self {
298        MutableBooleanArray::from_slice(slice).into()
299    }
300
301    /// Creates a [`BooleanArray`] from an iterator of trusted length.
302    /// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len
303    /// but this crate does not mark it as such.
304    ///
305    /// # Safety
306    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
307    /// I.e. that `size_hint().1` correctly reports its length.
308    #[inline]
309    pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
310    where
311        P: std::borrow::Borrow<bool>,
312        I: Iterator<Item = Option<P>>,
313    {
314        MutableBooleanArray::from_trusted_len_iter_unchecked(iterator).into()
315    }
316
317    /// Creates a [`BooleanArray`] from a [`TrustedLen`].
318    #[inline]
319    pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
320    where
321        P: std::borrow::Borrow<bool>,
322        I: TrustedLen<Item = Option<P>>,
323    {
324        MutableBooleanArray::from_trusted_len_iter(iterator).into()
325    }
326
327    /// Creates a [`BooleanArray`] from an falible iterator of trusted length.
328    ///
329    /// # Safety
330    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
331    /// I.e. that `size_hint().1` correctly reports its length.
332    #[inline]
333    pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(iterator: I) -> Result<Self, E>
334    where
335        P: std::borrow::Borrow<bool>,
336        I: Iterator<Item = Result<Option<P>, E>>,
337    {
338        Ok(MutableBooleanArray::try_from_trusted_len_iter_unchecked(iterator)?.into())
339    }
340
341    /// Creates a [`BooleanArray`] from a [`TrustedLen`].
342    #[inline]
343    pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> Result<Self, E>
344    where
345        P: std::borrow::Borrow<bool>,
346        I: TrustedLen<Item = Result<Option<P>, E>>,
347    {
348        Ok(MutableBooleanArray::try_from_trusted_len_iter(iterator)?.into())
349    }
350
351    pub fn true_and_valid(&self) -> Bitmap {
352        match &self.validity {
353            None => self.values.clone(),
354            Some(validity) => combine_validities_and(Some(&self.values), Some(validity)).unwrap(),
355        }
356    }
357
358    pub fn true_or_valid(&self) -> Bitmap {
359        match &self.validity {
360            None => self.values.clone(),
361            Some(validity) => combine_validities_or(Some(&self.values), Some(validity)).unwrap(),
362        }
363    }
364
365    /// Returns its internal representation
366    #[must_use]
367    pub fn into_inner(self) -> (ArrowDataType, Bitmap, Option<Bitmap>) {
368        let Self {
369            dtype,
370            values,
371            validity,
372        } = self;
373        (dtype, values, validity)
374    }
375
376    /// Creates a [`BooleanArray`] from its internal representation.
377    /// This is the inverted from [`BooleanArray::into_inner`]
378    ///
379    /// # Safety
380    /// Callers must ensure all invariants of this struct are upheld.
381    pub unsafe fn from_inner_unchecked(
382        dtype: ArrowDataType,
383        values: Bitmap,
384        validity: Option<Bitmap>,
385    ) -> Self {
386        Self {
387            dtype,
388            values,
389            validity,
390        }
391    }
392}
393
394impl Array for BooleanArray {
395    impl_common_array!();
396
397    fn validity(&self) -> Option<&Bitmap> {
398        self.validity.as_ref()
399    }
400
401    #[inline]
402    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
403        Box::new(self.clone().with_validity(validity))
404    }
405}
406
407impl Splitable for BooleanArray {
408    fn check_bound(&self, offset: usize) -> bool {
409        offset <= self.len()
410    }
411
412    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
413        let (lhs_values, rhs_values) = unsafe { self.values.split_at_unchecked(offset) };
414        let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
415
416        (
417            Self {
418                dtype: self.dtype.clone(),
419                values: lhs_values,
420                validity: lhs_validity,
421            },
422            Self {
423                dtype: self.dtype.clone(),
424                values: rhs_values,
425                validity: rhs_validity,
426            },
427        )
428    }
429}
430
431impl From<Bitmap> for BooleanArray {
432    fn from(values: Bitmap) -> Self {
433        Self {
434            dtype: ArrowDataType::Boolean,
435            values,
436            validity: None,
437        }
438    }
439}