polars_core/chunked_array/ops/
mod.rs

1//! Traits for miscellaneous operations on ChunkedArray
2use arrow::offset::OffsetsBuffer;
3use polars_compute::rolling::QuantileMethod;
4
5use crate::prelude::*;
6
7pub(crate) mod aggregate;
8pub(crate) mod any_value;
9pub(crate) mod append;
10mod apply;
11#[cfg(feature = "approx_unique")]
12mod approx_n_unique;
13pub mod arity;
14mod bit_repr;
15mod bits;
16#[cfg(feature = "bitwise")]
17mod bitwise_reduce;
18pub(crate) mod chunkops;
19pub(crate) mod compare_inner;
20#[cfg(feature = "dtype-decimal")]
21mod decimal;
22pub(crate) mod downcast;
23pub(crate) mod explode;
24mod explode_and_offsets;
25mod extend;
26pub mod fill_null;
27mod filter;
28pub mod float_sorted_arg_max;
29mod for_each;
30pub mod full;
31pub mod gather;
32mod nesting_utils;
33pub(crate) mod nulls;
34mod reverse;
35#[cfg(feature = "rolling_window")]
36pub(crate) mod rolling_window;
37pub mod row_encode;
38pub mod search_sorted;
39mod set;
40mod shift;
41pub mod sort;
42#[cfg(feature = "algorithm_group_by")]
43pub(crate) mod unique;
44#[cfg(feature = "zip_with")]
45pub mod zip;
46
47pub use chunkops::_set_check_length;
48pub use nesting_utils::ChunkNestingUtils;
49#[cfg(feature = "serde-lazy")]
50use serde::{Deserialize, Serialize};
51pub use sort::options::*;
52
53use crate::chunked_array::cast::CastOptions;
54use crate::series::{BitRepr, IsSorted};
55#[cfg(feature = "reinterpret")]
56pub trait Reinterpret {
57    fn reinterpret_signed(&self) -> Series {
58        unimplemented!()
59    }
60
61    fn reinterpret_unsigned(&self) -> Series {
62        unimplemented!()
63    }
64}
65
66/// Transmute [`ChunkedArray`] to bit representation.
67/// This is useful in hashing context and reduces no.
68/// of compiled code paths.
69pub(crate) trait ToBitRepr {
70    fn to_bit_repr(&self) -> BitRepr;
71}
72
73pub trait ChunkAnyValue {
74    /// Get a single value. Beware this is slow.
75    /// If you need to use this slightly performant, cast Categorical to UInt32
76    ///
77    /// # Safety
78    /// Does not do any bounds checking.
79    unsafe fn get_any_value_unchecked(&self, index: usize) -> AnyValue;
80
81    /// Get a single value. Beware this is slow.
82    fn get_any_value(&self, index: usize) -> PolarsResult<AnyValue>;
83}
84
85/// Explode/flatten a List or String Series
86pub trait ChunkExplode {
87    fn explode(&self, skip_empty: bool) -> PolarsResult<Series> {
88        self.explode_and_offsets(skip_empty).map(|t| t.0)
89    }
90    fn offsets(&self) -> PolarsResult<OffsetsBuffer<i64>>;
91    fn explode_and_offsets(&self, skip_empty: bool) -> PolarsResult<(Series, OffsetsBuffer<i64>)>;
92}
93
94pub trait ChunkBytes {
95    fn to_byte_slices(&self) -> Vec<&[u8]>;
96}
97
98/// This differs from ChunkWindowCustom and ChunkWindow
99/// by not using a fold aggregator, but reusing a `Series` wrapper and calling `Series` aggregators.
100/// This likely is a bit slower than ChunkWindow
101#[cfg(feature = "rolling_window")]
102pub trait ChunkRollApply: AsRefDataType {
103    fn rolling_map(
104        &self,
105        _f: &dyn Fn(&Series) -> Series,
106        _options: RollingOptionsFixedWindow,
107    ) -> PolarsResult<Series>
108    where
109        Self: Sized,
110    {
111        polars_bail!(opq = rolling_map, self.as_ref_dtype());
112    }
113}
114
115pub trait ChunkTake<Idx: ?Sized>: ChunkTakeUnchecked<Idx> {
116    /// Gather values from ChunkedArray by index.
117    fn take(&self, indices: &Idx) -> PolarsResult<Self>
118    where
119        Self: Sized;
120}
121
122pub trait ChunkTakeUnchecked<Idx: ?Sized> {
123    /// Gather values from ChunkedArray by index.
124    ///
125    /// # Safety
126    /// The non-null indices must be valid.
127    unsafe fn take_unchecked(&self, indices: &Idx) -> Self;
128}
129
130/// Create a `ChunkedArray` with new values by index or by boolean mask.
131///
132/// Note that these operations clone data. This is however the only way we can modify at mask or
133/// index level as the underlying Arrow arrays are immutable.
134pub trait ChunkSet<'a, A, B> {
135    /// Set the values at indexes `idx` to some optional value `Option<T>`.
136    ///
137    /// # Example
138    ///
139    /// ```rust
140    /// # use polars_core::prelude::*;
141    /// let ca = UInt32Chunked::new("a".into(), &[1, 2, 3]);
142    /// let new = ca.scatter_single(vec![0, 1], Some(10)).unwrap();
143    ///
144    /// assert_eq!(Vec::from(&new), &[Some(10), Some(10), Some(3)]);
145    /// ```
146    fn scatter_single<I: IntoIterator<Item = IdxSize>>(
147        &'a self,
148        idx: I,
149        opt_value: Option<A>,
150    ) -> PolarsResult<Self>
151    where
152        Self: Sized;
153
154    /// Set the values at indexes `idx` by applying a closure to these values.
155    ///
156    /// # Example
157    ///
158    /// ```rust
159    /// # use polars_core::prelude::*;
160    /// let ca = Int32Chunked::new("a".into(), &[1, 2, 3]);
161    /// let new = ca.scatter_with(vec![0, 1], |opt_v| opt_v.map(|v| v - 5)).unwrap();
162    ///
163    /// assert_eq!(Vec::from(&new), &[Some(-4), Some(-3), Some(3)]);
164    /// ```
165    fn scatter_with<I: IntoIterator<Item = IdxSize>, F>(
166        &'a self,
167        idx: I,
168        f: F,
169    ) -> PolarsResult<Self>
170    where
171        Self: Sized,
172        F: Fn(Option<A>) -> Option<B>;
173    /// Set the values where the mask evaluates to `true` to some optional value `Option<T>`.
174    ///
175    /// # Example
176    ///
177    /// ```rust
178    /// # use polars_core::prelude::*;
179    /// let ca = Int32Chunked::new("a".into(), &[1, 2, 3]);
180    /// let mask = BooleanChunked::new("mask".into(), &[false, true, false]);
181    /// let new = ca.set(&mask, Some(5)).unwrap();
182    /// assert_eq!(Vec::from(&new), &[Some(1), Some(5), Some(3)]);
183    /// ```
184    fn set(&'a self, mask: &BooleanChunked, opt_value: Option<A>) -> PolarsResult<Self>
185    where
186        Self: Sized;
187}
188
189/// Cast `ChunkedArray<T>` to `ChunkedArray<N>`
190pub trait ChunkCast {
191    /// Cast a [`ChunkedArray`] to [`DataType`]
192    fn cast(&self, dtype: &DataType) -> PolarsResult<Series> {
193        self.cast_with_options(dtype, CastOptions::NonStrict)
194    }
195
196    /// Cast a [`ChunkedArray`] to [`DataType`]
197    fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series>;
198
199    /// Does not check if the cast is a valid one and may over/underflow
200    ///
201    /// # Safety
202    /// - This doesn't do utf8 validation checking when casting from binary
203    /// - This doesn't do categorical bound checking when casting from UInt32
204    unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series>;
205}
206
207/// Fastest way to do elementwise operations on a [`ChunkedArray<T>`] when the operation is cheaper than
208/// branching due to null checking.
209pub trait ChunkApply<'a, T> {
210    type FuncRet;
211
212    /// Apply a closure elementwise. This is fastest when the null check branching is more expensive
213    /// than the closure application. Often it is.
214    ///
215    /// Null values remain null.
216    ///
217    /// # Example
218    ///
219    /// ```
220    /// use polars_core::prelude::*;
221    /// fn double(ca: &UInt32Chunked) -> UInt32Chunked {
222    ///     ca.apply_values(|v| v * 2)
223    /// }
224    /// ```
225    #[must_use]
226    fn apply_values<F>(&'a self, f: F) -> Self
227    where
228        F: Fn(T) -> Self::FuncRet + Copy;
229
230    /// Apply a closure elementwise including null values.
231    #[must_use]
232    fn apply<F>(&'a self, f: F) -> Self
233    where
234        F: Fn(Option<T>) -> Option<Self::FuncRet> + Copy;
235
236    /// Apply a closure elementwise and write results to a mutable slice.
237    fn apply_to_slice<F, S>(&'a self, f: F, slice: &mut [S])
238    // (value of chunkedarray, value of slice) -> value of slice
239    where
240        F: Fn(Option<T>, &S) -> S;
241}
242
243/// Aggregation operations.
244pub trait ChunkAgg<T> {
245    /// Aggregate the sum of the ChunkedArray.
246    /// Returns `None` if not implemented for `T`.
247    /// If the array is empty, `0` is returned
248    fn sum(&self) -> Option<T> {
249        None
250    }
251
252    fn _sum_as_f64(&self) -> f64;
253
254    fn min(&self) -> Option<T> {
255        None
256    }
257
258    /// Returns the maximum value in the array, according to the natural order.
259    /// Returns `None` if the array is empty or only contains null values.
260    fn max(&self) -> Option<T> {
261        None
262    }
263
264    fn min_max(&self) -> Option<(T, T)> {
265        Some((self.min()?, self.max()?))
266    }
267
268    /// Returns the mean value in the array.
269    /// Returns `None` if the array is empty or only contains null values.
270    fn mean(&self) -> Option<f64> {
271        None
272    }
273}
274
275/// Quantile and median aggregation.
276pub trait ChunkQuantile<T> {
277    /// Returns the mean value in the array.
278    /// Returns `None` if the array is empty or only contains null values.
279    fn median(&self) -> Option<T> {
280        None
281    }
282    /// Aggregate a given quantile of the ChunkedArray.
283    /// Returns `None` if the array is empty or only contains null values.
284    fn quantile(&self, _quantile: f64, _method: QuantileMethod) -> PolarsResult<Option<T>> {
285        Ok(None)
286    }
287}
288
289/// Variance and standard deviation aggregation.
290pub trait ChunkVar {
291    /// Compute the variance of this ChunkedArray/Series.
292    fn var(&self, _ddof: u8) -> Option<f64> {
293        None
294    }
295
296    /// Compute the standard deviation of this ChunkedArray/Series.
297    fn std(&self, _ddof: u8) -> Option<f64> {
298        None
299    }
300}
301
302/// Bitwise Reduction Operations.
303#[cfg(feature = "bitwise")]
304pub trait ChunkBitwiseReduce {
305    type Physical;
306
307    fn and_reduce(&self) -> Option<Self::Physical>;
308    fn or_reduce(&self) -> Option<Self::Physical>;
309    fn xor_reduce(&self) -> Option<Self::Physical>;
310}
311
312/// Compare [`Series`] and [`ChunkedArray`]'s and get a `boolean` mask that
313/// can be used to filter rows.
314///
315/// # Example
316///
317/// ```
318/// use polars_core::prelude::*;
319/// fn filter_all_ones(df: &DataFrame) -> PolarsResult<DataFrame> {
320///     let mask = df
321///     .column("column_a")?
322///     .as_materialized_series()
323///     .equal(1)?;
324///
325///     df.filter(&mask)
326/// }
327/// ```
328pub trait ChunkCompareEq<Rhs> {
329    type Item;
330
331    /// Check for equality.
332    fn equal(&self, rhs: Rhs) -> Self::Item;
333
334    /// Check for equality where `None == None`.
335    fn equal_missing(&self, rhs: Rhs) -> Self::Item;
336
337    /// Check for inequality.
338    fn not_equal(&self, rhs: Rhs) -> Self::Item;
339
340    /// Check for inequality where `None == None`.
341    fn not_equal_missing(&self, rhs: Rhs) -> Self::Item;
342}
343
344/// Compare [`Series`] and [`ChunkedArray`]'s using inequality operators (`<`, `>=`, etc.) and get
345/// a `boolean` mask that can be used to filter rows.
346pub trait ChunkCompareIneq<Rhs> {
347    type Item;
348
349    /// Greater than comparison.
350    fn gt(&self, rhs: Rhs) -> Self::Item;
351
352    /// Greater than or equal comparison.
353    fn gt_eq(&self, rhs: Rhs) -> Self::Item;
354
355    /// Less than comparison.
356    fn lt(&self, rhs: Rhs) -> Self::Item;
357
358    /// Less than or equal comparison
359    fn lt_eq(&self, rhs: Rhs) -> Self::Item;
360}
361
362/// Get unique values in a `ChunkedArray`
363pub trait ChunkUnique {
364    // We don't return Self to be able to use AutoRef specialization
365    /// Get unique values of a ChunkedArray
366    fn unique(&self) -> PolarsResult<Self>
367    where
368        Self: Sized;
369
370    /// Get first index of the unique values in a `ChunkedArray`.
371    /// This Vec is sorted.
372    fn arg_unique(&self) -> PolarsResult<IdxCa>;
373
374    /// Number of unique values in the `ChunkedArray`
375    fn n_unique(&self) -> PolarsResult<usize> {
376        self.arg_unique().map(|v| v.len())
377    }
378}
379
380#[cfg(feature = "approx_unique")]
381pub trait ChunkApproxNUnique {
382    fn approx_n_unique(&self) -> IdxSize;
383}
384
385/// Sort operations on `ChunkedArray`.
386pub trait ChunkSort<T: PolarsDataType> {
387    #[allow(unused_variables)]
388    fn sort_with(&self, options: SortOptions) -> ChunkedArray<T>;
389
390    /// Returned a sorted `ChunkedArray`.
391    fn sort(&self, descending: bool) -> ChunkedArray<T>;
392
393    /// Retrieve the indexes needed to sort this array.
394    fn arg_sort(&self, options: SortOptions) -> IdxCa;
395
396    /// Retrieve the indexes need to sort this and the other arrays.
397    #[allow(unused_variables)]
398    fn arg_sort_multiple(
399        &self,
400        by: &[Column],
401        _options: &SortMultipleOptions,
402    ) -> PolarsResult<IdxCa> {
403        polars_bail!(opq = arg_sort_multiple, T::get_dtype());
404    }
405}
406
407pub type FillNullLimit = Option<IdxSize>;
408
409#[derive(Copy, Clone, Debug, PartialEq, Hash)]
410#[cfg_attr(feature = "serde-lazy", derive(Serialize, Deserialize))]
411pub enum FillNullStrategy {
412    /// previous value in array
413    Backward(FillNullLimit),
414    /// next value in array
415    Forward(FillNullLimit),
416    /// mean value of array
417    Mean,
418    /// minimal value in array
419    Min,
420    /// maximum value in array
421    Max,
422    /// replace with the value zero
423    Zero,
424    /// replace with the value one
425    One,
426}
427
428impl FillNullStrategy {
429    pub fn is_elementwise(&self) -> bool {
430        matches!(self, Self::One | Self::Zero)
431    }
432}
433
434/// Replace None values with a value
435pub trait ChunkFillNullValue<T> {
436    /// Replace None values with a give value `T`.
437    fn fill_null_with_values(&self, value: T) -> PolarsResult<Self>
438    where
439        Self: Sized;
440}
441
442/// Fill a ChunkedArray with one value.
443pub trait ChunkFull<T> {
444    /// Create a ChunkedArray with a single value.
445    fn full(name: PlSmallStr, value: T, length: usize) -> Self
446    where
447        Self: Sized;
448}
449
450pub trait ChunkFullNull {
451    fn full_null(_name: PlSmallStr, _length: usize) -> Self
452    where
453        Self: Sized;
454}
455
456/// Reverse a [`ChunkedArray<T>`]
457pub trait ChunkReverse {
458    /// Return a reversed version of this array.
459    fn reverse(&self) -> Self;
460}
461
462/// Filter values by a boolean mask.
463pub trait ChunkFilter<T: PolarsDataType> {
464    /// Filter values in the ChunkedArray with a boolean mask.
465    ///
466    /// ```rust
467    /// # use polars_core::prelude::*;
468    /// let array = Int32Chunked::new("array".into(), &[1, 2, 3]);
469    /// let mask = BooleanChunked::new("mask".into(), &[true, false, true]);
470    ///
471    /// let filtered = array.filter(&mask).unwrap();
472    /// assert_eq!(Vec::from(&filtered), [Some(1), Some(3)])
473    /// ```
474    fn filter(&self, filter: &BooleanChunked) -> PolarsResult<ChunkedArray<T>>
475    where
476        Self: Sized;
477}
478
479/// Create a new ChunkedArray filled with values at that index.
480pub trait ChunkExpandAtIndex<T: PolarsDataType> {
481    /// Create a new ChunkedArray filled with values at that index.
482    fn new_from_index(&self, index: usize, length: usize) -> ChunkedArray<T>;
483}
484
485macro_rules! impl_chunk_expand {
486    ($self:ident, $length:ident, $index:ident) => {{
487        if $self.is_empty() {
488            return $self.clone();
489        }
490        let opt_val = $self.get($index);
491        match opt_val {
492            Some(val) => ChunkedArray::full($self.name().clone(), val, $length),
493            None => ChunkedArray::full_null($self.name().clone(), $length),
494        }
495    }};
496}
497
498impl<T: PolarsNumericType> ChunkExpandAtIndex<T> for ChunkedArray<T>
499where
500    ChunkedArray<T>: ChunkFull<T::Native>,
501{
502    fn new_from_index(&self, index: usize, length: usize) -> ChunkedArray<T> {
503        let mut out = impl_chunk_expand!(self, length, index);
504        out.set_sorted_flag(IsSorted::Ascending);
505        out
506    }
507}
508
509impl ChunkExpandAtIndex<BooleanType> for BooleanChunked {
510    fn new_from_index(&self, index: usize, length: usize) -> BooleanChunked {
511        let mut out = impl_chunk_expand!(self, length, index);
512        out.set_sorted_flag(IsSorted::Ascending);
513        out
514    }
515}
516
517impl ChunkExpandAtIndex<StringType> for StringChunked {
518    fn new_from_index(&self, index: usize, length: usize) -> StringChunked {
519        let mut out = impl_chunk_expand!(self, length, index);
520        out.set_sorted_flag(IsSorted::Ascending);
521        out
522    }
523}
524
525impl ChunkExpandAtIndex<BinaryType> for BinaryChunked {
526    fn new_from_index(&self, index: usize, length: usize) -> BinaryChunked {
527        let mut out = impl_chunk_expand!(self, length, index);
528        out.set_sorted_flag(IsSorted::Ascending);
529        out
530    }
531}
532
533impl ChunkExpandAtIndex<BinaryOffsetType> for BinaryOffsetChunked {
534    fn new_from_index(&self, index: usize, length: usize) -> BinaryOffsetChunked {
535        let mut out = impl_chunk_expand!(self, length, index);
536        out.set_sorted_flag(IsSorted::Ascending);
537        out
538    }
539}
540
541impl ChunkExpandAtIndex<ListType> for ListChunked {
542    fn new_from_index(&self, index: usize, length: usize) -> ListChunked {
543        let opt_val = self.get_as_series(index);
544        match opt_val {
545            Some(val) => {
546                let mut ca = ListChunked::full(self.name().clone(), &val, length);
547                unsafe { ca.to_logical(self.inner_dtype().clone()) };
548                ca
549            },
550            None => {
551                ListChunked::full_null_with_dtype(self.name().clone(), length, self.inner_dtype())
552            },
553        }
554    }
555}
556
557#[cfg(feature = "dtype-struct")]
558impl ChunkExpandAtIndex<StructType> for StructChunked {
559    fn new_from_index(&self, index: usize, length: usize) -> ChunkedArray<StructType> {
560        let (chunk_idx, idx) = self.index_to_chunked_index(index);
561        let chunk = self.downcast_chunks().get(chunk_idx).unwrap();
562        let chunk = if chunk.is_null(idx) {
563            new_null_array(chunk.dtype().clone(), length)
564        } else {
565            let values = chunk
566                .values()
567                .iter()
568                .map(|arr| {
569                    let s = Series::try_from((PlSmallStr::EMPTY, arr.clone())).unwrap();
570                    let s = s.new_from_index(idx, length);
571                    s.chunks()[0].clone()
572                })
573                .collect::<Vec<_>>();
574
575            StructArray::new(chunk.dtype().clone(), length, values, None).boxed()
576        };
577
578        // SAFETY: chunks are from self.
579        unsafe { self.copy_with_chunks(vec![chunk]) }
580    }
581}
582
583#[cfg(feature = "dtype-array")]
584impl ChunkExpandAtIndex<FixedSizeListType> for ArrayChunked {
585    fn new_from_index(&self, index: usize, length: usize) -> ArrayChunked {
586        let opt_val = self.get_as_series(index);
587        match opt_val {
588            Some(val) => {
589                let mut ca = ArrayChunked::full(self.name().clone(), &val, length);
590                unsafe { ca.to_logical(self.inner_dtype().clone()) };
591                ca
592            },
593            None => ArrayChunked::full_null_with_dtype(
594                self.name().clone(),
595                length,
596                self.inner_dtype(),
597                self.width(),
598            ),
599        }
600    }
601}
602
603#[cfg(feature = "object")]
604impl<T: PolarsObject> ChunkExpandAtIndex<ObjectType<T>> for ObjectChunked<T> {
605    fn new_from_index(&self, index: usize, length: usize) -> ObjectChunked<T> {
606        let opt_val = self.get(index);
607        match opt_val {
608            Some(val) => ObjectChunked::<T>::full(self.name().clone(), val.clone(), length),
609            None => ObjectChunked::<T>::full_null(self.name().clone(), length),
610        }
611    }
612}
613
614/// Shift the values of a [`ChunkedArray`] by a number of periods.
615pub trait ChunkShiftFill<T: PolarsDataType, V> {
616    /// Shift the values by a given period and fill the parts that will be empty due to this operation
617    /// with `fill_value`.
618    fn shift_and_fill(&self, periods: i64, fill_value: V) -> ChunkedArray<T>;
619}
620
621pub trait ChunkShift<T: PolarsDataType> {
622    fn shift(&self, periods: i64) -> ChunkedArray<T>;
623}
624
625/// Combine two [`ChunkedArray`] based on some predicate.
626pub trait ChunkZip<T: PolarsDataType> {
627    /// Create a new ChunkedArray with values from self where the mask evaluates `true` and values
628    /// from `other` where the mask evaluates `false`
629    fn zip_with(
630        &self,
631        mask: &BooleanChunked,
632        other: &ChunkedArray<T>,
633    ) -> PolarsResult<ChunkedArray<T>>;
634}
635
636/// Apply kernels on the arrow array chunks in a ChunkedArray.
637pub trait ChunkApplyKernel<A: Array> {
638    /// Apply kernel and return result as a new ChunkedArray.
639    #[must_use]
640    fn apply_kernel(&self, f: &dyn Fn(&A) -> ArrayRef) -> Self;
641
642    /// Apply a kernel that outputs an array of different type.
643    fn apply_kernel_cast<S>(&self, f: &dyn Fn(&A) -> ArrayRef) -> ChunkedArray<S>
644    where
645        S: PolarsDataType;
646}
647
648#[cfg(feature = "is_first_distinct")]
649/// Mask the first unique values as `true`
650pub trait IsFirstDistinct<T: PolarsDataType> {
651    fn is_first_distinct(&self) -> PolarsResult<BooleanChunked> {
652        polars_bail!(opq = is_first_distinct, T::get_dtype());
653    }
654}
655
656#[cfg(feature = "is_last_distinct")]
657/// Mask the last unique values as `true`
658pub trait IsLastDistinct<T: PolarsDataType> {
659    fn is_last_distinct(&self) -> PolarsResult<BooleanChunked> {
660        polars_bail!(opq = is_last_distinct, T::get_dtype());
661    }
662}