Skip to main content

polars_arrow/array/primitive/
mutable.rs

1use std::sync::Arc;
2
3use polars_error::PolarsResult;
4
5use super::{PrimitiveArray, check};
6use crate::array::physical_binary::extend_validity;
7use crate::array::{Array, MutableArray, TryExtend, TryExtendFromSelf, TryPush};
8use crate::bitmap::{Bitmap, MutableBitmap};
9use crate::datatypes::ArrowDataType;
10use crate::trusted_len::TrustedLen;
11use crate::types::NativeType;
12
13/// The Arrow's equivalent to `Vec<Option<T>>` where `T` is byte-size (e.g. `i32`).
14/// Converting a [`MutablePrimitiveArray`] into a [`PrimitiveArray`] is `O(1)`.
15#[derive(Debug, Clone)]
16pub struct MutablePrimitiveArray<T: NativeType> {
17    dtype: ArrowDataType,
18    values: Vec<T>,
19    validity: Option<MutableBitmap>,
20}
21
22impl<T: NativeType> From<MutablePrimitiveArray<T>> for PrimitiveArray<T> {
23    fn from(other: MutablePrimitiveArray<T>) -> Self {
24        let validity = other.validity.and_then(|x| {
25            let bitmap: Bitmap = x.into();
26            if bitmap.unset_bits() == 0 {
27                None
28            } else {
29                Some(bitmap)
30            }
31        });
32
33        PrimitiveArray::<T>::new(other.dtype, other.values.into(), validity)
34    }
35}
36
37impl<T: NativeType, P: AsRef<[Option<T>]>> From<P> for MutablePrimitiveArray<T> {
38    fn from(slice: P) -> Self {
39        Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
40    }
41}
42
43impl<T: NativeType> MutablePrimitiveArray<T> {
44    /// Creates a new empty [`MutablePrimitiveArray`].
45    pub fn new() -> Self {
46        Self::with_capacity(0)
47    }
48
49    /// Creates a new [`MutablePrimitiveArray`] with a capacity.
50    pub fn with_capacity(capacity: usize) -> Self {
51        Self::with_capacity_from(capacity, T::PRIMITIVE.into())
52    }
53
54    /// The canonical method to create a [`MutablePrimitiveArray`] out of its internal components.
55    /// # Implementation
56    /// This function is `O(1)`.
57    ///
58    /// # Errors
59    /// This function errors iff:
60    /// * The validity is not `None` and its length is different from `values`'s length
61    /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Primitive(T::PRIMITIVE)`]
62    pub fn try_new(
63        dtype: ArrowDataType,
64        values: Vec<T>,
65        validity: Option<MutableBitmap>,
66    ) -> PolarsResult<Self> {
67        check(&dtype, &values, validity.as_ref().map(|x| x.len()))?;
68        Ok(Self {
69            dtype,
70            values,
71            validity,
72        })
73    }
74
75    /// Extract the low-end APIs from the [`MutablePrimitiveArray`].
76    pub fn into_inner(self) -> (ArrowDataType, Vec<T>, Option<MutableBitmap>) {
77        (self.dtype, self.values, self.validity)
78    }
79
80    /// Applies a function `f` to the values of this array, cloning the values
81    /// iff they are being shared with others
82    ///
83    /// This is an API to use clone-on-write
84    /// # Implementation
85    /// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
86    /// if it is being shared (since it results in a `O(N)` memcopy).
87    /// # Panics
88    /// This function panics iff `f` panics
89    pub fn apply_values<F: Fn(&mut [T])>(&mut self, f: F) {
90        f(&mut self.values);
91    }
92}
93
94impl<T: NativeType> Default for MutablePrimitiveArray<T> {
95    fn default() -> Self {
96        Self::new()
97    }
98}
99
100impl<T: NativeType> From<ArrowDataType> for MutablePrimitiveArray<T> {
101    fn from(dtype: ArrowDataType) -> Self {
102        assert!(dtype.to_physical_type().eq_primitive(T::PRIMITIVE));
103        Self {
104            dtype,
105            values: Vec::<T>::new(),
106            validity: None,
107        }
108    }
109}
110
111impl<T: NativeType> MutablePrimitiveArray<T> {
112    /// Creates a new [`MutablePrimitiveArray`] from a capacity and [`ArrowDataType`].
113    pub fn with_capacity_from(capacity: usize, dtype: ArrowDataType) -> Self {
114        assert!(dtype.to_physical_type().eq_primitive(T::PRIMITIVE));
115        Self {
116            dtype,
117            values: Vec::<T>::with_capacity(capacity),
118            validity: None,
119        }
120    }
121
122    /// Reserves `additional` entries.
123    pub fn reserve(&mut self, additional: usize) {
124        self.values.reserve(additional);
125        if let Some(x) = self.validity.as_mut() {
126            x.reserve(additional)
127        }
128    }
129
130    #[inline]
131    pub fn push_value(&mut self, value: T) {
132        self.values.push(value);
133        if let Some(validity) = &mut self.validity {
134            validity.push(true)
135        }
136    }
137
138    /// Adds a new value to the array.
139    #[inline]
140    pub fn push(&mut self, value: Option<T>) {
141        match value {
142            Some(value) => self.push_value(value),
143            None => {
144                self.values.push(T::default());
145                match &mut self.validity {
146                    Some(validity) => validity.push(false),
147                    None => {
148                        self.init_validity();
149                    },
150                }
151            },
152        }
153    }
154
155    /// Pop a value from the array.
156    /// Note if the values is empty, this method will return None.
157    pub fn pop(&mut self) -> Option<T> {
158        let value = self.values.pop()?;
159        self.validity
160            .as_mut()
161            .map(|x| x.pop()?.then(|| value))
162            .unwrap_or_else(|| Some(value))
163    }
164
165    /// Extends the [`MutablePrimitiveArray`] with a constant
166    #[inline]
167    pub fn extend_constant(&mut self, additional: usize, value: Option<T>) {
168        if let Some(value) = value {
169            self.values.resize(self.values.len() + additional, value);
170            if let Some(validity) = &mut self.validity {
171                validity.extend_constant(additional, true)
172            }
173        } else {
174            self.extend_null(additional);
175        }
176    }
177
178    pub fn extend_null(&mut self, additional: usize) {
179        if let Some(validity) = &mut self.validity {
180            validity.extend_constant(additional, false)
181        } else {
182            let mut validity = MutableBitmap::with_capacity(self.values.capacity());
183            validity.extend_constant(self.len(), true);
184            validity.extend_constant(additional, false);
185            self.validity = Some(validity)
186        }
187        self.values
188            .resize(self.values.len() + additional, T::default());
189    }
190
191    /// Extends the [`MutablePrimitiveArray`] from an iterator of trusted len.
192    #[inline]
193    pub fn extend_trusted_len<P, I>(&mut self, iterator: I)
194    where
195        P: std::borrow::Borrow<T>,
196        I: TrustedLen<Item = Option<P>>,
197    {
198        unsafe { self.extend_trusted_len_unchecked(iterator) }
199    }
200
201    /// Extends the [`MutablePrimitiveArray`] from an iterator of trusted len.
202    ///
203    /// # Safety
204    /// The iterator must be trusted len.
205    #[inline]
206    pub unsafe fn extend_trusted_len_unchecked<P, I>(&mut self, iterator: I)
207    where
208        P: std::borrow::Borrow<T>,
209        I: Iterator<Item = Option<P>>,
210    {
211        if let Some(validity) = self.validity.as_mut() {
212            extend_trusted_len_unzip(iterator, validity, &mut self.values)
213        } else {
214            let mut validity = MutableBitmap::new();
215            validity.extend_constant(self.len(), true);
216            extend_trusted_len_unzip(iterator, &mut validity, &mut self.values);
217            self.validity = Some(validity);
218        }
219    }
220    /// Extends the [`MutablePrimitiveArray`] from an iterator of values of trusted len.
221    /// This differs from `extend_trusted_len` which accepts in iterator of optional values.
222    #[inline]
223    pub fn extend_trusted_len_values<I>(&mut self, iterator: I)
224    where
225        I: TrustedLen<Item = T>,
226    {
227        unsafe { self.extend_values(iterator) }
228    }
229
230    /// Extends the [`MutablePrimitiveArray`] from an iterator of values of trusted len.
231    /// This differs from `extend_trusted_len_unchecked` which accepts in iterator of optional values.
232    ///
233    /// # Safety
234    /// The iterator must be trusted len.
235    #[inline]
236    pub fn extend_values<I>(&mut self, iterator: I)
237    where
238        I: Iterator<Item = T>,
239    {
240        self.values.extend(iterator);
241        self.update_all_valid();
242    }
243
244    #[inline]
245    /// Extends the [`MutablePrimitiveArray`] from a slice
246    pub fn extend_from_slice(&mut self, items: &[T]) {
247        self.values.extend_from_slice(items);
248        self.update_all_valid();
249    }
250
251    fn update_all_valid(&mut self) {
252        // get len before mutable borrow
253        let len = self.len();
254        if let Some(validity) = self.validity.as_mut() {
255            validity.extend_constant(len - validity.len(), true);
256        }
257    }
258
259    fn init_validity(&mut self) {
260        let mut validity = MutableBitmap::with_capacity(self.values.capacity());
261        validity.extend_constant(self.len(), true);
262        validity.set(self.len() - 1, false);
263        self.validity = Some(validity)
264    }
265
266    /// Changes the arrays' [`ArrowDataType`], returning a new [`MutablePrimitiveArray`].
267    /// Use to change the logical type without changing the corresponding physical Type.
268    /// # Implementation
269    /// This operation is `O(1)`.
270    #[inline]
271    pub fn to(self, dtype: ArrowDataType) -> Self {
272        Self::try_new(dtype, self.values, self.validity).unwrap()
273    }
274
275    /// Converts itself into an [`Array`].
276    pub fn into_arc(self) -> Arc<dyn Array> {
277        let a: PrimitiveArray<T> = self.into();
278        Arc::new(a)
279    }
280
281    /// Shrinks the capacity of the [`MutablePrimitiveArray`] to fit its current length.
282    pub fn shrink_to_fit(&mut self) {
283        self.values.shrink_to_fit();
284        if let Some(validity) = &mut self.validity {
285            validity.shrink_to_fit()
286        }
287    }
288
289    /// Returns the capacity of this [`MutablePrimitiveArray`].
290    pub fn capacity(&self) -> usize {
291        self.values.capacity()
292    }
293
294    pub fn freeze(self) -> PrimitiveArray<T> {
295        self.into()
296    }
297
298    /// Clears the array, removing all values.
299    ///
300    /// Note that this method has no effect on the allocated capacity
301    /// of the array.
302    pub fn clear(&mut self) {
303        self.values.clear();
304        self.validity = None;
305    }
306
307    /// Apply a function that temporarily freezes this `MutableArray` into a `PrimitiveArray`.
308    pub fn with_freeze<K, F: FnOnce(&PrimitiveArray<T>) -> K>(&mut self, f: F) -> K {
309        let mutable = std::mem::take(self);
310        let arr = mutable.freeze();
311        let out = f(&arr);
312        *self = arr.into_mut().right().unwrap();
313        out
314    }
315}
316
317/// Accessors
318impl<T: NativeType> MutablePrimitiveArray<T> {
319    /// Returns its values.
320    pub fn values(&self) -> &Vec<T> {
321        &self.values
322    }
323
324    /// Returns a mutable slice of values.
325    pub fn values_mut_slice(&mut self) -> &mut [T] {
326        self.values.as_mut_slice()
327    }
328}
329
330/// Setters
331impl<T: NativeType> MutablePrimitiveArray<T> {
332    /// Sets position `index` to `value`.
333    /// Note that if it is the first time a null appears in this array,
334    /// this initializes the validity bitmap (`O(N)`).
335    /// # Panic
336    /// Panics iff `index >= self.len()`.
337    pub fn set(&mut self, index: usize, value: Option<T>) {
338        assert!(index < self.len());
339        // SAFETY:
340        // we just checked bounds
341        unsafe { self.set_unchecked(index, value) }
342    }
343
344    /// Sets position `index` to `value`.
345    /// Note that if it is the first time a null appears in this array,
346    /// this initializes the validity bitmap (`O(N)`).
347    ///
348    /// # Safety
349    /// Caller must ensure `index < self.len()`
350    pub unsafe fn set_unchecked(&mut self, index: usize, value: Option<T>) {
351        *self.values.get_unchecked_mut(index) = value.unwrap_or_default();
352
353        if value.is_none() && self.validity.is_none() {
354            // When the validity is None, all elements so far are valid. When one of the elements is set of null,
355            // the validity must be initialized.
356            let mut validity = MutableBitmap::new();
357            validity.extend_constant(self.len(), true);
358            self.validity = Some(validity);
359        }
360        if let Some(x) = self.validity.as_mut() {
361            x.set_unchecked(index, value.is_some())
362        }
363    }
364
365    /// Sets the validity.
366    /// # Panic
367    /// Panics iff the validity's len is not equal to the existing values' length.
368    pub fn set_validity(&mut self, validity: Option<MutableBitmap>) {
369        if let Some(validity) = &validity {
370            assert_eq!(self.values.len(), validity.len())
371        }
372        self.validity = validity;
373    }
374
375    /// Sets values.
376    /// # Panic
377    /// Panics iff the values' length is not equal to the existing values' len.
378    pub fn set_values(&mut self, values: Vec<T>) {
379        assert_eq!(values.len(), self.values.len());
380        self.values = values;
381    }
382}
383
384impl<T: NativeType> Extend<Option<T>> for MutablePrimitiveArray<T> {
385    fn extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) {
386        let iter = iter.into_iter();
387        self.reserve(iter.size_hint().0);
388        iter.for_each(|x| self.push(x))
389    }
390}
391
392impl<T: NativeType> TryExtend<Option<T>> for MutablePrimitiveArray<T> {
393    /// This is infallible and is implemented for consistency with all other types
394    fn try_extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) -> PolarsResult<()> {
395        self.extend(iter);
396        Ok(())
397    }
398}
399
400impl<T: NativeType> TryPush<Option<T>> for MutablePrimitiveArray<T> {
401    /// This is infalible and is implemented for consistency with all other types
402    #[inline]
403    fn try_push(&mut self, item: Option<T>) -> PolarsResult<()> {
404        self.push(item);
405        Ok(())
406    }
407}
408
409impl<T: NativeType> MutableArray for MutablePrimitiveArray<T> {
410    fn len(&self) -> usize {
411        self.values.len()
412    }
413
414    fn validity(&self) -> Option<&MutableBitmap> {
415        self.validity.as_ref()
416    }
417
418    fn as_box(&mut self) -> Box<dyn Array> {
419        PrimitiveArray::new(
420            self.dtype.clone(),
421            std::mem::take(&mut self.values).into(),
422            std::mem::take(&mut self.validity).map(|x| x.into()),
423        )
424        .boxed()
425    }
426
427    fn as_arc(&mut self) -> Arc<dyn Array> {
428        PrimitiveArray::new(
429            self.dtype.clone(),
430            std::mem::take(&mut self.values).into(),
431            std::mem::take(&mut self.validity).map(|x| x.into()),
432        )
433        .arced()
434    }
435
436    fn dtype(&self) -> &ArrowDataType {
437        &self.dtype
438    }
439
440    fn as_any(&self) -> &dyn std::any::Any {
441        self
442    }
443
444    fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
445        self
446    }
447
448    fn push_null(&mut self) {
449        self.push(None)
450    }
451
452    fn reserve(&mut self, additional: usize) {
453        self.reserve(additional)
454    }
455
456    fn shrink_to_fit(&mut self) {
457        self.shrink_to_fit()
458    }
459}
460
461impl<T: NativeType> MutablePrimitiveArray<T> {
462    /// Creates a [`MutablePrimitiveArray`] from a slice of values.
463    pub fn from_slice<P: AsRef<[T]>>(slice: P) -> Self {
464        Self::from_trusted_len_values_iter(slice.as_ref().iter().copied())
465    }
466
467    /// Creates a [`MutablePrimitiveArray`] from an iterator of trusted length.
468    ///
469    /// # Safety
470    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
471    /// I.e. `size_hint().1` correctly reports its length.
472    #[inline]
473    pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
474    where
475        P: std::borrow::Borrow<T>,
476        I: Iterator<Item = Option<P>>,
477    {
478        let (validity, values) = trusted_len_unzip(iterator);
479
480        Self {
481            dtype: T::PRIMITIVE.into(),
482            values,
483            validity,
484        }
485    }
486
487    /// Creates a [`MutablePrimitiveArray`] from a [`TrustedLen`].
488    #[inline]
489    pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
490    where
491        P: std::borrow::Borrow<T>,
492        I: TrustedLen<Item = Option<P>>,
493    {
494        unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
495    }
496
497    /// Creates a [`MutablePrimitiveArray`] from an fallible iterator of trusted length.
498    ///
499    /// # Safety
500    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
501    /// I.e. that `size_hint().1` correctly reports its length.
502    #[inline]
503    pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(
504        iter: I,
505    ) -> std::result::Result<Self, E>
506    where
507        P: std::borrow::Borrow<T>,
508        I: IntoIterator<Item = std::result::Result<Option<P>, E>>,
509    {
510        let iterator = iter.into_iter();
511
512        let (validity, values) = try_trusted_len_unzip(iterator)?;
513
514        Ok(Self {
515            dtype: T::PRIMITIVE.into(),
516            values,
517            validity,
518        })
519    }
520
521    /// Creates a [`MutablePrimitiveArray`] from an fallible iterator of trusted length.
522    #[inline]
523    pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>
524    where
525        P: std::borrow::Borrow<T>,
526        I: TrustedLen<Item = std::result::Result<Option<P>, E>>,
527    {
528        unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
529    }
530
531    /// Creates a new [`MutablePrimitiveArray`] out an iterator over values
532    pub fn from_trusted_len_values_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {
533        Self {
534            dtype: T::PRIMITIVE.into(),
535            values: iter.collect(),
536            validity: None,
537        }
538    }
539
540    /// Creates a (non-null) [`MutablePrimitiveArray`] from a vector of values.
541    /// This does not have memcopy and is the fastest way to create a [`PrimitiveArray`].
542    pub fn from_vec(values: Vec<T>) -> Self {
543        Self::try_new(T::PRIMITIVE.into(), values, None).unwrap()
544    }
545
546    /// Creates a new [`MutablePrimitiveArray`] from an iterator over values
547    ///
548    /// # Safety
549    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
550    /// I.e. that `size_hint().1` correctly reports its length.
551    pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = T>>(iter: I) -> Self {
552        Self {
553            dtype: T::PRIMITIVE.into(),
554            values: iter.collect(),
555            validity: None,
556        }
557    }
558}
559
560impl<T: NativeType, Ptr: std::borrow::Borrow<Option<T>>> FromIterator<Ptr>
561    for MutablePrimitiveArray<T>
562{
563    fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
564        let iter = iter.into_iter();
565        let (lower, _) = iter.size_hint();
566
567        let mut validity = MutableBitmap::with_capacity(lower);
568
569        let values: Vec<T> = iter
570            .map(|item| {
571                if let Some(a) = item.borrow() {
572                    validity.push(true);
573                    *a
574                } else {
575                    validity.push(false);
576                    T::default()
577                }
578            })
579            .collect();
580
581        let validity = Some(validity);
582
583        Self {
584            dtype: T::PRIMITIVE.into(),
585            values,
586            validity,
587        }
588    }
589}
590
591/// Extends a [`MutableBitmap`] and a [`Vec`] from an iterator of `Option`.
592/// The first buffer corresponds to a bitmap buffer, the second one
593/// corresponds to a values buffer.
594/// # Safety
595/// The caller must ensure that `iterator` is `TrustedLen`.
596#[inline]
597pub(crate) unsafe fn extend_trusted_len_unzip<I, P, T>(
598    iterator: I,
599    validity: &mut MutableBitmap,
600    buffer: &mut Vec<T>,
601) where
602    T: NativeType,
603    P: std::borrow::Borrow<T>,
604    I: Iterator<Item = Option<P>>,
605{
606    let (_, upper) = iterator.size_hint();
607    let additional = upper.expect("trusted_len_unzip requires an upper limit");
608
609    validity.reserve(additional);
610    let values = iterator.map(|item| {
611        if let Some(item) = item {
612            validity.push_unchecked(true);
613            *item.borrow()
614        } else {
615            validity.push_unchecked(false);
616            T::default()
617        }
618    });
619    buffer.extend(values);
620}
621
622/// Creates a [`MutableBitmap`] and a [`Vec`] from an iterator of `Option`.
623/// The first buffer corresponds to a bitmap buffer, the second one
624/// corresponds to a values buffer.
625/// # Safety
626/// The caller must ensure that `iterator` is `TrustedLen`.
627#[inline]
628pub(crate) unsafe fn trusted_len_unzip<I, P, T>(iterator: I) -> (Option<MutableBitmap>, Vec<T>)
629where
630    T: NativeType,
631    P: std::borrow::Borrow<T>,
632    I: Iterator<Item = Option<P>>,
633{
634    let mut validity = MutableBitmap::new();
635    let mut buffer = Vec::<T>::new();
636
637    extend_trusted_len_unzip(iterator, &mut validity, &mut buffer);
638
639    let validity = Some(validity);
640
641    (validity, buffer)
642}
643
644/// # Safety
645/// The caller must ensure that `iterator` is `TrustedLen`.
646#[inline]
647pub(crate) unsafe fn try_trusted_len_unzip<E, I, P, T>(
648    iterator: I,
649) -> std::result::Result<(Option<MutableBitmap>, Vec<T>), E>
650where
651    T: NativeType,
652    P: std::borrow::Borrow<T>,
653    I: Iterator<Item = std::result::Result<Option<P>, E>>,
654{
655    let (_, upper) = iterator.size_hint();
656    let len = upper.expect("trusted_len_unzip requires an upper limit");
657
658    let mut null = MutableBitmap::with_capacity(len);
659    let mut buffer = Vec::<T>::with_capacity(len);
660
661    let mut dst = buffer.as_mut_ptr();
662    for item in iterator {
663        let item = if let Some(item) = item? {
664            null.push(true);
665            *item.borrow()
666        } else {
667            null.push(false);
668            T::default()
669        };
670        std::ptr::write(dst, item);
671        dst = dst.add(1);
672    }
673    assert_eq!(
674        dst.offset_from(buffer.as_ptr()) as usize,
675        len,
676        "Trusted iterator length was not accurately reported"
677    );
678    buffer.set_len(len);
679    null.set_len(len);
680
681    let validity = Some(null);
682
683    Ok((validity, buffer))
684}
685
686impl<T: NativeType> PartialEq for MutablePrimitiveArray<T> {
687    fn eq(&self, other: &Self) -> bool {
688        self.iter().eq(other.iter())
689    }
690}
691
692impl<T: NativeType> TryExtendFromSelf for MutablePrimitiveArray<T> {
693    fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
694        extend_validity(self.len(), &mut self.validity, &other.validity);
695
696        let slice = other.values.as_slice();
697        self.values.extend_from_slice(slice);
698        Ok(())
699    }
700}