Skip to main content

polars_arrow/array/boolean/
mutable.rs

1use std::sync::Arc;
2
3use polars_error::{PolarsResult, polars_bail};
4
5use super::BooleanArray;
6use crate::array::physical_binary::extend_validity;
7use crate::array::{Array, MutableArray, TryExtend, TryExtendFromSelf, TryPush};
8use crate::bitmap::MutableBitmap;
9use crate::datatypes::{ArrowDataType, PhysicalType};
10use crate::trusted_len::TrustedLen;
11
12/// The Arrow's equivalent to `Vec<Option<bool>>`, but with `1/16` of its size.
13/// Converting a [`MutableBooleanArray`] into a [`BooleanArray`] is `O(1)`.
14/// # Implementation
15/// This struct does not allocate a validity until one is required (i.e. push a null to it).
16#[derive(Debug, Clone)]
17pub struct MutableBooleanArray {
18    dtype: ArrowDataType,
19    values: MutableBitmap,
20    validity: Option<MutableBitmap>,
21}
22
23impl From<MutableBooleanArray> for BooleanArray {
24    fn from(other: MutableBooleanArray) -> Self {
25        BooleanArray::new(
26            other.dtype,
27            other.values.into(),
28            other.validity.map(|x| x.into()),
29        )
30    }
31}
32
33impl<P: AsRef<[Option<bool>]>> From<P> for MutableBooleanArray {
34    /// Creates a new [`MutableBooleanArray`] out of a slice of Optional `bool`.
35    fn from(slice: P) -> Self {
36        Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
37    }
38}
39
40impl Default for MutableBooleanArray {
41    fn default() -> Self {
42        Self::new()
43    }
44}
45
46impl MutableBooleanArray {
47    /// Creates an new empty [`MutableBooleanArray`].
48    pub fn new() -> Self {
49        Self::with_capacity(0)
50    }
51
52    /// The canonical method to create a [`MutableBooleanArray`] out of low-end APIs.
53    /// # Errors
54    /// This function errors iff:
55    /// * The validity is not `None` and its length is different from `values`'s length
56    /// * The `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`].
57    pub fn try_new(
58        dtype: ArrowDataType,
59        values: MutableBitmap,
60        validity: Option<MutableBitmap>,
61    ) -> PolarsResult<Self> {
62        if validity
63            .as_ref()
64            .is_some_and(|validity| validity.len() != values.len())
65        {
66            polars_bail!(ComputeError:
67                "validity mask length must match the number of values",
68            )
69        }
70
71        if dtype.to_physical_type() != PhysicalType::Boolean {
72            polars_bail!(
73                oos = "MutableBooleanArray can only be initialized with a DataType whose physical type is Boolean",
74            )
75        }
76
77        Ok(Self {
78            dtype,
79            values,
80            validity,
81        })
82    }
83
84    /// Creates an new [`MutableBooleanArray`] with a capacity of values.
85    pub fn with_capacity(capacity: usize) -> Self {
86        Self {
87            dtype: ArrowDataType::Boolean,
88            values: MutableBitmap::with_capacity(capacity),
89            validity: None,
90        }
91    }
92
93    /// Reserves `additional` slots.
94    pub fn reserve(&mut self, additional: usize) {
95        self.values.reserve(additional);
96        if let Some(x) = self.validity.as_mut() {
97            x.reserve(additional)
98        }
99    }
100
101    #[inline]
102    pub fn push_value(&mut self, value: bool) {
103        self.values.push(value);
104        if let Some(validity) = &mut self.validity {
105            validity.push(true)
106        }
107    }
108
109    #[inline]
110    pub fn push_null(&mut self) {
111        self.values.push(false);
112        match &mut self.validity {
113            Some(validity) => validity.push(false),
114            None => self.init_validity(),
115        }
116    }
117
118    /// Pushes a new entry to [`MutableBooleanArray`].
119    #[inline]
120    pub fn push(&mut self, value: Option<bool>) {
121        match value {
122            Some(value) => self.push_value(value),
123            None => self.push_null(),
124        }
125    }
126
127    /// Pop an entry from [`MutableBooleanArray`].
128    /// Note If the values is empty, this method will return None.
129    pub fn pop(&mut self) -> Option<bool> {
130        let value = self.values.pop()?;
131        self.validity
132            .as_mut()
133            .map(|x| x.pop()?.then(|| value))
134            .unwrap_or_else(|| Some(value))
135    }
136
137    /// Extends the [`MutableBooleanArray`] from an iterator of values of trusted len.
138    /// This differs from `extend_trusted_len` which accepts in iterator of optional values.
139    #[inline]
140    pub fn extend_trusted_len_values<I>(&mut self, iterator: I)
141    where
142        I: TrustedLen<Item = bool>,
143    {
144        // SAFETY: `I` is `TrustedLen`
145        unsafe { self.extend_trusted_len_values_unchecked(iterator) }
146    }
147
148    /// Extends the [`MutableBooleanArray`] from an iterator of values of trusted len.
149    /// This differs from `extend_trusted_len_unchecked`, which accepts in iterator of optional values.
150    ///
151    /// # Safety
152    /// The iterator must be trusted len.
153    #[inline]
154    pub unsafe fn extend_trusted_len_values_unchecked<I>(&mut self, iterator: I)
155    where
156        I: Iterator<Item = bool>,
157    {
158        let (_, upper) = iterator.size_hint();
159        let additional =
160            upper.expect("extend_trusted_len_values_unchecked requires an upper limit");
161
162        if let Some(validity) = self.validity.as_mut() {
163            validity.extend_constant(additional, true);
164        }
165
166        self.values.extend_from_trusted_len_iter_unchecked(iterator)
167    }
168
169    /// Extends the [`MutableBooleanArray`] from an iterator of trusted len.
170    #[inline]
171    pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
172    where
173        P: std::borrow::Borrow<bool>,
174        I: TrustedLen<Item = Option<P>>,
175    {
176        // SAFETY: `I` is `TrustedLen`
177        unsafe { self.extend_trusted_len_unchecked(iterator) }
178    }
179
180    /// Extends the [`MutableBooleanArray`] from an iterator of trusted len.
181    ///
182    /// # Safety
183    /// The iterator must be trusted len.
184    #[inline]
185    pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
186    where
187        P: std::borrow::Borrow<bool>,
188        I: Iterator<Item = Option<P>>,
189    {
190        if let Some(validity) = self.validity.as_mut() {
191            extend_trusted_len_unzip(iterator, validity, &mut self.values);
192        } else {
193            let mut validity = MutableBitmap::new();
194            validity.extend_constant(self.len(), true);
195
196            extend_trusted_len_unzip(iterator, &mut validity, &mut self.values);
197
198            if validity.unset_bits() > 0 {
199                self.validity = Some(validity);
200            }
201        }
202    }
203
204    /// Extends `MutableBooleanArray` by additional values of constant value.
205    #[inline]
206    pub fn extend_constant(&mut self, additional: usize, value: Option<bool>) {
207        match value {
208            Some(value) => {
209                self.values.extend_constant(additional, value);
210                if let Some(validity) = self.validity.as_mut() {
211                    validity.extend_constant(additional, true);
212                }
213            },
214            None => self.extend_null(additional),
215        };
216    }
217
218    pub fn extend_null(&mut self, additional: usize) {
219        if let Some(validity) = self.validity.as_mut() {
220            validity.extend_constant(additional, false)
221        } else {
222            let mut validity = MutableBitmap::with_capacity(self.values.capacity());
223            validity.extend_constant(self.len(), true);
224            validity.extend_constant(additional, false);
225            self.validity = Some(validity);
226        };
227        self.values.extend_constant(additional, false);
228    }
229
230    fn init_validity(&mut self) {
231        let mut validity = MutableBitmap::with_capacity(self.values.capacity());
232        validity.extend_constant(self.len(), true);
233        validity.set(self.len() - 1, false);
234        self.validity = Some(validity)
235    }
236
237    /// Converts itself into an [`Array`].
238    pub fn into_arc(self) -> Arc<dyn Array> {
239        let a: BooleanArray = self.into();
240        Arc::new(a)
241    }
242
243    pub fn freeze(self) -> BooleanArray {
244        self.into()
245    }
246}
247
248/// Getters
249impl MutableBooleanArray {
250    /// Returns its values.
251    pub fn values(&self) -> &MutableBitmap {
252        &self.values
253    }
254}
255
256/// Setters
257impl MutableBooleanArray {
258    /// Sets position `index` to `value`.
259    /// Note that if it is the first time a null appears in this array,
260    /// this initializes the validity bitmap (`O(N)`).
261    /// # Panic
262    /// Panics iff index is larger than `self.len()`.
263    pub fn set(&mut self, index: usize, value: Option<bool>) {
264        self.values.set(index, value.unwrap_or_default());
265
266        if value.is_none() && self.validity.is_none() {
267            // When the validity is None, all elements so far are valid. When one of the elements is set of null,
268            // the validity must be initialized.
269            self.validity = Some(MutableBitmap::from_trusted_len_iter(std::iter::repeat_n(
270                true,
271                self.len(),
272            )));
273        }
274        if let Some(x) = self.validity.as_mut() {
275            x.set(index, value.is_some())
276        }
277    }
278}
279
280/// From implementations
281impl MutableBooleanArray {
282    /// Creates a new [`MutableBooleanArray`] from an [`TrustedLen`] of `bool`.
283    #[inline]
284    pub fn from_trusted_len_values_iter<I: TrustedLen<Item = bool>>(iterator: I) -> Self {
285        Self::try_new(
286            ArrowDataType::Boolean,
287            MutableBitmap::from_trusted_len_iter(iterator),
288            None,
289        )
290        .unwrap()
291    }
292
293    /// Creates a new [`MutableBooleanArray`] from an [`TrustedLen`] of `bool`.
294    /// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len
295    /// but this crate does not mark it as such.
296    ///
297    /// # Safety
298    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
299    /// I.e. that `size_hint().1` correctly reports its length.
300    #[inline]
301    pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = bool>>(
302        iterator: I,
303    ) -> Self {
304        let mut mutable = MutableBitmap::new();
305        mutable.extend_from_trusted_len_iter_unchecked(iterator);
306        MutableBooleanArray::try_new(ArrowDataType::Boolean, mutable, None).unwrap()
307    }
308
309    /// Creates a new [`MutableBooleanArray`] from a slice of `bool`.
310    #[inline]
311    pub fn from_slice<P: AsRef<[bool]>>(slice: P) -> Self {
312        Self::from_trusted_len_values_iter(slice.as_ref().iter().copied())
313    }
314
315    /// Creates a [`BooleanArray`] from an iterator of trusted length.
316    /// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len
317    /// but this crate does not mark it as such.
318    ///
319    /// # Safety
320    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
321    /// I.e. that `size_hint().1` correctly reports its length.
322    #[inline]
323    pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
324    where
325        P: std::borrow::Borrow<bool>,
326        I: Iterator<Item = Option<P>>,
327    {
328        let (validity, values) = trusted_len_unzip(iterator);
329
330        Self::try_new(ArrowDataType::Boolean, values, validity).unwrap()
331    }
332
333    /// Creates a [`BooleanArray`] from a [`TrustedLen`].
334    #[inline]
335    pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
336    where
337        P: std::borrow::Borrow<bool>,
338        I: TrustedLen<Item = Option<P>>,
339    {
340        // SAFETY: `I` is `TrustedLen`
341        unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
342    }
343
344    /// Creates a [`BooleanArray`] from an falible iterator of trusted length.
345    ///
346    /// # Safety
347    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
348    /// I.e. that `size_hint().1` correctly reports its length.
349    #[inline]
350    pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(
351        iterator: I,
352    ) -> std::result::Result<Self, E>
353    where
354        P: std::borrow::Borrow<bool>,
355        I: Iterator<Item = std::result::Result<Option<P>, E>>,
356    {
357        let (validity, values) = try_trusted_len_unzip(iterator)?;
358
359        let validity = if validity.unset_bits() > 0 {
360            Some(validity)
361        } else {
362            None
363        };
364
365        Ok(Self::try_new(ArrowDataType::Boolean, values, validity).unwrap())
366    }
367
368    /// Creates a [`BooleanArray`] from a [`TrustedLen`].
369    #[inline]
370    pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>
371    where
372        P: std::borrow::Borrow<bool>,
373        I: TrustedLen<Item = std::result::Result<Option<P>, E>>,
374    {
375        // SAFETY: `I` is `TrustedLen`
376        unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
377    }
378
379    /// Shrinks the capacity of the [`MutableBooleanArray`] to fit its current length.
380    pub fn shrink_to_fit(&mut self) {
381        self.values.shrink_to_fit();
382        if let Some(validity) = &mut self.validity {
383            validity.shrink_to_fit()
384        }
385    }
386}
387
388/// Creates a Bitmap and an optional [`MutableBitmap`] from an iterator of `Option<bool>`.
389/// The first buffer corresponds to a bitmap buffer, the second one
390/// corresponds to a values buffer.
391/// # Safety
392/// The caller must ensure that `iterator` is `TrustedLen`.
393#[inline]
394pub(crate) unsafe fn trusted_len_unzip<I, P>(iterator: I) -> (Option<MutableBitmap>, MutableBitmap)
395where
396    P: std::borrow::Borrow<bool>,
397    I: Iterator<Item = Option<P>>,
398{
399    let mut validity = MutableBitmap::new();
400    let mut values = MutableBitmap::new();
401
402    extend_trusted_len_unzip(iterator, &mut validity, &mut values);
403
404    let validity = if validity.unset_bits() > 0 {
405        Some(validity)
406    } else {
407        None
408    };
409
410    (validity, values)
411}
412
413/// Extends validity [`MutableBitmap`] and values [`MutableBitmap`] from an iterator of `Option`.
414/// # Safety
415/// The caller must ensure that `iterator` is `TrustedLen`.
416#[inline]
417pub(crate) unsafe fn extend_trusted_len_unzip<I, P>(
418    iterator: I,
419    validity: &mut MutableBitmap,
420    values: &mut MutableBitmap,
421) where
422    P: std::borrow::Borrow<bool>,
423    I: Iterator<Item = Option<P>>,
424{
425    let (_, upper) = iterator.size_hint();
426    let additional = upper.expect("extend_trusted_len_unzip requires an upper limit");
427
428    // Length of the array before new values are pushed,
429    // variable created for assertion post operation
430    let pre_length = values.len();
431
432    validity.reserve(additional);
433    values.reserve(additional);
434
435    for item in iterator {
436        let item = if let Some(item) = item {
437            validity.push_unchecked(true);
438            *item.borrow()
439        } else {
440            validity.push_unchecked(false);
441            bool::default()
442        };
443        values.push_unchecked(item);
444    }
445
446    debug_assert_eq!(
447        values.len(),
448        pre_length + additional,
449        "Trusted iterator length was not accurately reported"
450    );
451}
452
453/// # Safety
454/// The caller must ensure that `iterator` is `TrustedLen`.
455#[inline]
456pub(crate) unsafe fn try_trusted_len_unzip<E, I, P>(
457    iterator: I,
458) -> std::result::Result<(MutableBitmap, MutableBitmap), E>
459where
460    P: std::borrow::Borrow<bool>,
461    I: Iterator<Item = std::result::Result<Option<P>, E>>,
462{
463    let (_, upper) = iterator.size_hint();
464    let len = upper.expect("trusted_len_unzip requires an upper limit");
465
466    let mut null = MutableBitmap::with_capacity(len);
467    let mut values = MutableBitmap::with_capacity(len);
468
469    for item in iterator {
470        let item = if let Some(item) = item? {
471            null.push(true);
472            *item.borrow()
473        } else {
474            null.push(false);
475            false
476        };
477        values.push(item);
478    }
479    assert_eq!(
480        values.len(),
481        len,
482        "Trusted iterator length was not accurately reported"
483    );
484    values.set_len(len);
485    null.set_len(len);
486
487    Ok((null, values))
488}
489
490impl<Ptr: std::borrow::Borrow<Option<bool>>> FromIterator<Ptr> for MutableBooleanArray {
491    fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
492        let iter = iter.into_iter();
493        let (lower, _) = iter.size_hint();
494
495        let mut validity = MutableBitmap::with_capacity(lower);
496
497        let values: MutableBitmap = iter
498            .map(|item| {
499                if let Some(a) = item.borrow() {
500                    validity.push(true);
501                    *a
502                } else {
503                    validity.push(false);
504                    false
505                }
506            })
507            .collect();
508
509        let validity = if validity.unset_bits() > 0 {
510            Some(validity)
511        } else {
512            None
513        };
514
515        MutableBooleanArray::try_new(ArrowDataType::Boolean, values, validity).unwrap()
516    }
517}
518
519impl MutableArray for MutableBooleanArray {
520    fn len(&self) -> usize {
521        self.values.len()
522    }
523
524    fn validity(&self) -> Option<&MutableBitmap> {
525        self.validity.as_ref()
526    }
527
528    fn as_box(&mut self) -> Box<dyn Array> {
529        let array: BooleanArray = std::mem::take(self).into();
530        array.boxed()
531    }
532
533    fn as_arc(&mut self) -> Arc<dyn Array> {
534        let array: BooleanArray = std::mem::take(self).into();
535        array.arced()
536    }
537
538    fn dtype(&self) -> &ArrowDataType {
539        &self.dtype
540    }
541
542    fn as_any(&self) -> &dyn std::any::Any {
543        self
544    }
545
546    fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
547        self
548    }
549
550    #[inline]
551    fn push_null(&mut self) {
552        self.push(None)
553    }
554
555    fn reserve(&mut self, additional: usize) {
556        self.reserve(additional)
557    }
558
559    fn shrink_to_fit(&mut self) {
560        self.shrink_to_fit()
561    }
562}
563
564impl Extend<Option<bool>> for MutableBooleanArray {
565    fn extend<I: IntoIterator<Item = Option<bool>>>(&mut self, iter: I) {
566        let iter = iter.into_iter();
567        self.reserve(iter.size_hint().0);
568        iter.for_each(|x| self.push(x))
569    }
570}
571
572impl TryExtend<Option<bool>> for MutableBooleanArray {
573    /// This is infalible and is implemented for consistency with all other types
574    fn try_extend<I: IntoIterator<Item = Option<bool>>>(&mut self, iter: I) -> PolarsResult<()> {
575        self.extend(iter);
576        Ok(())
577    }
578}
579
580impl TryPush<Option<bool>> for MutableBooleanArray {
581    /// This is infalible and is implemented for consistency with all other types
582    fn try_push(&mut self, item: Option<bool>) -> PolarsResult<()> {
583        self.push(item);
584        Ok(())
585    }
586}
587
588impl PartialEq for MutableBooleanArray {
589    fn eq(&self, other: &Self) -> bool {
590        self.iter().eq(other.iter())
591    }
592}
593
594impl TryExtendFromSelf for MutableBooleanArray {
595    fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
596        extend_validity(self.len(), &mut self.validity, &other.validity);
597
598        let slice = other.values.as_slice();
599        // SAFETY: invariant offset + length <= slice.len()
600        unsafe {
601            self.values
602                .extend_from_slice_unchecked(slice, 0, other.values.len());
603        }
604        Ok(())
605    }
606}