polars_core/utils/
mod.rs

1mod any_value;
2use arrow::compute::concatenate::concatenate_validities;
3use arrow::compute::utils::combine_validities_and;
4pub mod flatten;
5pub(crate) mod series;
6mod supertype;
7use std::borrow::Cow;
8use std::ops::{Deref, DerefMut};
9mod schema;
10
11pub use any_value::*;
12use arrow::bitmap::Bitmap;
13use arrow::bitmap::bitmask::BitMask;
14pub use arrow::legacy::utils::*;
15pub use arrow::trusted_len::TrustMyLength;
16use flatten::*;
17use num_traits::{One, Zero};
18use rayon::prelude::*;
19pub use schema::*;
20pub use series::*;
21pub use supertype::*;
22pub use {arrow, rayon};
23
24use crate::POOL;
25use crate::prelude::*;
26
27#[repr(transparent)]
28pub struct Wrap<T>(pub T);
29
30impl<T> Deref for Wrap<T> {
31    type Target = T;
32    fn deref(&self) -> &Self::Target {
33        &self.0
34    }
35}
36
37#[inline(always)]
38pub fn _set_partition_size() -> usize {
39    POOL.current_num_threads()
40}
41
42/// Just a wrapper structure which is useful for certain impl specializations.
43///
44/// This is for instance use to implement
45/// `impl<T> FromIterator<T::Native> for NoNull<ChunkedArray<T>>`
46/// as `Option<T::Native>` was already implemented:
47/// `impl<T> FromIterator<Option<T::Native>> for ChunkedArray<T>`
48pub struct NoNull<T> {
49    inner: T,
50}
51
52impl<T> NoNull<T> {
53    pub fn new(inner: T) -> Self {
54        NoNull { inner }
55    }
56
57    pub fn into_inner(self) -> T {
58        self.inner
59    }
60}
61
62impl<T> Deref for NoNull<T> {
63    type Target = T;
64
65    fn deref(&self) -> &Self::Target {
66        &self.inner
67    }
68}
69
70impl<T> DerefMut for NoNull<T> {
71    fn deref_mut(&mut self) -> &mut Self::Target {
72        &mut self.inner
73    }
74}
75
76pub(crate) fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
77    match iter.size_hint() {
78        (_lower, Some(upper)) => upper,
79        (0, None) => 1024,
80        (lower, None) => lower,
81    }
82}
83
84// prefer this one over split_ca, as this can push the null_count into the thread pool
85// returns an `(offset, length)` tuple
86#[doc(hidden)]
87pub fn _split_offsets(len: usize, n: usize) -> Vec<(usize, usize)> {
88    if n == 1 {
89        vec![(0, len)]
90    } else {
91        let chunk_size = len / n;
92
93        (0..n)
94            .map(|partition| {
95                let offset = partition * chunk_size;
96                let len = if partition == (n - 1) {
97                    len - offset
98                } else {
99                    chunk_size
100                };
101                (partition * chunk_size, len)
102            })
103            .collect_trusted()
104    }
105}
106
107#[allow(clippy::len_without_is_empty)]
108pub trait Container: Clone {
109    fn slice(&self, offset: i64, len: usize) -> Self;
110
111    fn split_at(&self, offset: i64) -> (Self, Self);
112
113    fn len(&self) -> usize;
114
115    fn iter_chunks(&self) -> impl Iterator<Item = Self>;
116
117    fn should_rechunk(&self) -> bool;
118
119    fn n_chunks(&self) -> usize;
120
121    fn chunk_lengths(&self) -> impl Iterator<Item = usize>;
122}
123
124impl Container for DataFrame {
125    fn slice(&self, offset: i64, len: usize) -> Self {
126        DataFrame::slice(self, offset, len)
127    }
128
129    fn split_at(&self, offset: i64) -> (Self, Self) {
130        DataFrame::split_at(self, offset)
131    }
132
133    fn len(&self) -> usize {
134        self.height()
135    }
136
137    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
138        flatten_df_iter(self)
139    }
140
141    fn should_rechunk(&self) -> bool {
142        self.should_rechunk()
143    }
144
145    fn n_chunks(&self) -> usize {
146        DataFrame::first_col_n_chunks(self)
147    }
148
149    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
150        // @scalar-correctness?
151        self.columns[0].as_materialized_series().chunk_lengths()
152    }
153}
154
155impl<T: PolarsDataType> Container for ChunkedArray<T> {
156    fn slice(&self, offset: i64, len: usize) -> Self {
157        ChunkedArray::slice(self, offset, len)
158    }
159
160    fn split_at(&self, offset: i64) -> (Self, Self) {
161        ChunkedArray::split_at(self, offset)
162    }
163
164    fn len(&self) -> usize {
165        ChunkedArray::len(self)
166    }
167
168    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
169        self.downcast_iter()
170            .map(|arr| Self::with_chunk(self.name().clone(), arr.clone()))
171    }
172
173    fn should_rechunk(&self) -> bool {
174        false
175    }
176
177    fn n_chunks(&self) -> usize {
178        self.chunks().len()
179    }
180
181    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
182        ChunkedArray::chunk_lengths(self)
183    }
184}
185
186impl Container for Series {
187    fn slice(&self, offset: i64, len: usize) -> Self {
188        self.0.slice(offset, len)
189    }
190
191    fn split_at(&self, offset: i64) -> (Self, Self) {
192        self.0.split_at(offset)
193    }
194
195    fn len(&self) -> usize {
196        self.0.len()
197    }
198
199    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
200        (0..self.0.n_chunks()).map(|i| self.select_chunk(i))
201    }
202
203    fn should_rechunk(&self) -> bool {
204        false
205    }
206
207    fn n_chunks(&self) -> usize {
208        self.chunks().len()
209    }
210
211    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
212        self.0.chunk_lengths()
213    }
214}
215
216fn split_impl<C: Container>(container: &C, target: usize, chunk_size: usize) -> Vec<C> {
217    if target == 1 {
218        return vec![container.clone()];
219    }
220    let mut out = Vec::with_capacity(target);
221    let chunk_size = chunk_size as i64;
222
223    // First split
224    let (chunk, mut remainder) = container.split_at(chunk_size);
225    out.push(chunk);
226
227    // Take the rest of the splits of exactly chunk size, but skip the last remainder as we won't split that.
228    for _ in 1..target - 1 {
229        let (a, b) = remainder.split_at(chunk_size);
230        out.push(a);
231        remainder = b
232    }
233    // This can be slightly larger than `chunk_size`, but is smaller than `2 * chunk_size`.
234    out.push(remainder);
235    out
236}
237
238/// Splits, but doesn't flatten chunks. E.g. a container can still have multiple chunks.
239pub fn split<C: Container>(container: &C, target: usize) -> Vec<C> {
240    let total_len = container.len();
241    if total_len == 0 {
242        return vec![container.clone()];
243    }
244
245    let chunk_size = std::cmp::max(total_len / target, 1);
246
247    if container.n_chunks() == target
248        && container
249            .chunk_lengths()
250            .all(|len| len.abs_diff(chunk_size) < 100)
251        // We cannot get chunks if they are misaligned
252        && !container.should_rechunk()
253    {
254        return container.iter_chunks().collect();
255    }
256    split_impl(container, target, chunk_size)
257}
258
259/// Split a [`Container`] in `target` elements. The target doesn't have to be respected if not
260/// Deviation of the target might be done to create more equal size chunks.
261pub fn split_and_flatten<C: Container>(container: &C, target: usize) -> Vec<C> {
262    let total_len = container.len();
263    if total_len == 0 {
264        return vec![container.clone()];
265    }
266
267    let chunk_size = std::cmp::max(total_len / target, 1);
268
269    if container.n_chunks() == target
270        && container
271            .chunk_lengths()
272            .all(|len| len.abs_diff(chunk_size) < 100)
273        // We cannot get chunks if they are misaligned
274        && !container.should_rechunk()
275    {
276        return container.iter_chunks().collect();
277    }
278
279    if container.n_chunks() == 1 {
280        split_impl(container, target, chunk_size)
281    } else {
282        let mut out = Vec::with_capacity(target);
283        let chunks = container.iter_chunks();
284
285        'new_chunk: for mut chunk in chunks {
286            loop {
287                let h = chunk.len();
288                if h < chunk_size {
289                    // TODO if the chunk is much smaller than chunk size, we should try to merge it with the next one.
290                    out.push(chunk);
291                    continue 'new_chunk;
292                }
293
294                // If a split leads to the next chunk being smaller than 30% take the whole chunk
295                if ((h - chunk_size) as f64 / chunk_size as f64) < 0.3 {
296                    out.push(chunk);
297                    continue 'new_chunk;
298                }
299
300                let (a, b) = chunk.split_at(chunk_size as i64);
301                out.push(a);
302                chunk = b;
303            }
304        }
305        out
306    }
307}
308
309/// Split a [`DataFrame`] in `target` elements. The target doesn't have to be respected if not
310/// strict. Deviation of the target might be done to create more equal size chunks.
311///
312/// # Panics
313/// if chunks are not aligned
314pub fn split_df_as_ref(df: &DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
315    if strict {
316        split(df, target)
317    } else {
318        split_and_flatten(df, target)
319    }
320}
321
322#[doc(hidden)]
323/// Split a [`DataFrame`] into `n` parts. We take a `&mut` to be able to repartition/align chunks.
324/// `strict` in that it respects `n` even if the chunks are suboptimal.
325pub fn split_df(df: &mut DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
326    if target == 0 || df.is_empty() {
327        return vec![df.clone()];
328    }
329    // make sure that chunks are aligned.
330    df.align_chunks_par();
331    split_df_as_ref(df, target, strict)
332}
333
334pub fn slice_slice<T>(vals: &[T], offset: i64, len: usize) -> &[T] {
335    let (raw_offset, slice_len) = slice_offsets(offset, len, vals.len());
336    &vals[raw_offset..raw_offset + slice_len]
337}
338
339#[inline]
340#[doc(hidden)]
341pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, usize) {
342    let signed_start_offset = if offset < 0 {
343        offset.saturating_add_unsigned(array_len as u64)
344    } else {
345        offset
346    };
347    let signed_stop_offset = signed_start_offset.saturating_add_unsigned(length as u64);
348
349    let signed_array_len: i64 = array_len
350        .try_into()
351        .expect("array length larger than i64::MAX");
352    let clamped_start_offset = signed_start_offset.clamp(0, signed_array_len);
353    let clamped_stop_offset = signed_stop_offset.clamp(0, signed_array_len);
354
355    let slice_start_idx = clamped_start_offset as usize;
356    let slice_len = (clamped_stop_offset - clamped_start_offset) as usize;
357    (slice_start_idx, slice_len)
358}
359
360/// Apply a macro on the Series
361#[macro_export]
362macro_rules! match_dtype_to_physical_apply_macro {
363    ($obj:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
364        match $obj {
365            DataType::String => $macro_string!($($opt_args)*),
366            DataType::Boolean => $macro_bool!($($opt_args)*),
367            #[cfg(feature = "dtype-u8")]
368            DataType::UInt8 => $macro!(u8 $(, $opt_args)*),
369            #[cfg(feature = "dtype-u16")]
370            DataType::UInt16 => $macro!(u16 $(, $opt_args)*),
371            DataType::UInt32 => $macro!(u32 $(, $opt_args)*),
372            DataType::UInt64 => $macro!(u64 $(, $opt_args)*),
373            #[cfg(feature = "dtype-i8")]
374            DataType::Int8 => $macro!(i8 $(, $opt_args)*),
375            #[cfg(feature = "dtype-i16")]
376            DataType::Int16 => $macro!(i16 $(, $opt_args)*),
377            DataType::Int32 => $macro!(i32 $(, $opt_args)*),
378            DataType::Int64 => $macro!(i64 $(, $opt_args)*),
379            #[cfg(feature = "dtype-i128")]
380            DataType::Int128 => $macro!(i128 $(, $opt_args)*),
381            DataType::Float32 => $macro!(f32 $(, $opt_args)*),
382            DataType::Float64 => $macro!(f64 $(, $opt_args)*),
383            dt => panic!("not implemented for dtype {:?}", dt),
384        }
385    }};
386}
387
388/// Apply a macro on the Series
389#[macro_export]
390macro_rules! match_dtype_to_logical_apply_macro {
391    ($obj:expr, $macro:ident, $macro_string:ident, $macro_binary:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
392        match $obj {
393            DataType::String => $macro_string!($($opt_args)*),
394            DataType::Binary => $macro_binary!($($opt_args)*),
395            DataType::Boolean => $macro_bool!($($opt_args)*),
396            #[cfg(feature = "dtype-u8")]
397            DataType::UInt8 => $macro!(UInt8Type $(, $opt_args)*),
398            #[cfg(feature = "dtype-u16")]
399            DataType::UInt16 => $macro!(UInt16Type $(, $opt_args)*),
400            DataType::UInt32 => $macro!(UInt32Type $(, $opt_args)*),
401            DataType::UInt64 => $macro!(UInt64Type $(, $opt_args)*),
402            #[cfg(feature = "dtype-i8")]
403            DataType::Int8 => $macro!(Int8Type $(, $opt_args)*),
404            #[cfg(feature = "dtype-i16")]
405            DataType::Int16 => $macro!(Int16Type $(, $opt_args)*),
406            DataType::Int32 => $macro!(Int32Type $(, $opt_args)*),
407            DataType::Int64 => $macro!(Int64Type $(, $opt_args)*),
408            #[cfg(feature = "dtype-i128")]
409            DataType::Int128 => $macro!(Int128Type $(, $opt_args)*),
410            DataType::Float32 => $macro!(Float32Type $(, $opt_args)*),
411            DataType::Float64 => $macro!(Float64Type $(, $opt_args)*),
412            dt => panic!("not implemented for dtype {:?}", dt),
413        }
414    }};
415}
416
417/// Apply a macro on the Downcasted ChunkedArrays
418#[macro_export]
419macro_rules! match_arrow_dtype_apply_macro_ca {
420    ($self:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
421        match $self.dtype() {
422            DataType::String => $macro_string!($self.str().unwrap() $(, $opt_args)*),
423            DataType::Boolean => $macro_bool!($self.bool().unwrap() $(, $opt_args)*),
424            #[cfg(feature = "dtype-u8")]
425            DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
426            #[cfg(feature = "dtype-u16")]
427            DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
428            DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
429            DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
430            #[cfg(feature = "dtype-i8")]
431            DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
432            #[cfg(feature = "dtype-i16")]
433            DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
434            DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
435            DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
436            #[cfg(feature = "dtype-i128")]
437            DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
438            DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
439            DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
440            dt => panic!("not implemented for dtype {:?}", dt),
441        }
442    }};
443}
444
445#[macro_export]
446macro_rules! with_match_physical_numeric_type {(
447    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
448) => ({
449    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
450    use $crate::datatypes::DataType::*;
451    match $dtype {
452        #[cfg(feature = "dtype-i8")]
453        Int8 => __with_ty__! { i8 },
454        #[cfg(feature = "dtype-i16")]
455        Int16 => __with_ty__! { i16 },
456        Int32 => __with_ty__! { i32 },
457        Int64 => __with_ty__! { i64 },
458        #[cfg(feature = "dtype-i128")]
459        Int128 => __with_ty__! { i128 },
460        #[cfg(feature = "dtype-u8")]
461        UInt8 => __with_ty__! { u8 },
462        #[cfg(feature = "dtype-u16")]
463        UInt16 => __with_ty__! { u16 },
464        UInt32 => __with_ty__! { u32 },
465        UInt64 => __with_ty__! { u64 },
466        Float32 => __with_ty__! { f32 },
467        Float64 => __with_ty__! { f64 },
468        dt => panic!("not implemented for dtype {:?}", dt),
469    }
470})}
471
472#[macro_export]
473macro_rules! with_match_physical_integer_type {(
474    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
475) => ({
476    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
477    use $crate::datatypes::DataType::*;
478    match $dtype {
479        #[cfg(feature = "dtype-i8")]
480        Int8 => __with_ty__! { i8 },
481        #[cfg(feature = "dtype-i16")]
482        Int16 => __with_ty__! { i16 },
483        Int32 => __with_ty__! { i32 },
484        Int64 => __with_ty__! { i64 },
485        #[cfg(feature = "dtype-i128")]
486        Int128 => __with_ty__! { i128 },
487        #[cfg(feature = "dtype-u8")]
488        UInt8 => __with_ty__! { u8 },
489        #[cfg(feature = "dtype-u16")]
490        UInt16 => __with_ty__! { u16 },
491        UInt32 => __with_ty__! { u32 },
492        UInt64 => __with_ty__! { u64 },
493        dt => panic!("not implemented for dtype {:?}", dt),
494    }
495})}
496
497#[macro_export]
498macro_rules! with_match_physical_float_type {(
499    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
500) => ({
501    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
502    use $crate::datatypes::DataType::*;
503    match $dtype {
504        Float32 => __with_ty__! { f32 },
505        Float64 => __with_ty__! { f64 },
506        dt => panic!("not implemented for dtype {:?}", dt),
507    }
508})}
509
510#[macro_export]
511macro_rules! with_match_physical_float_polars_type {(
512    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
513) => ({
514    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
515    use $crate::datatypes::DataType::*;
516    match $key_type {
517        Float32 => __with_ty__! { Float32Type },
518        Float64 => __with_ty__! { Float64Type },
519        dt => panic!("not implemented for dtype {:?}", dt),
520    }
521})}
522
523#[macro_export]
524macro_rules! with_match_physical_numeric_polars_type {(
525    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
526) => ({
527    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
528    use $crate::datatypes::DataType::*;
529    match $key_type {
530            #[cfg(feature = "dtype-i8")]
531        Int8 => __with_ty__! { Int8Type },
532            #[cfg(feature = "dtype-i16")]
533        Int16 => __with_ty__! { Int16Type },
534        Int32 => __with_ty__! { Int32Type },
535        Int64 => __with_ty__! { Int64Type },
536            #[cfg(feature = "dtype-i128")]
537        Int128 => __with_ty__! { Int128Type },
538            #[cfg(feature = "dtype-u8")]
539        UInt8 => __with_ty__! { UInt8Type },
540            #[cfg(feature = "dtype-u16")]
541        UInt16 => __with_ty__! { UInt16Type },
542        UInt32 => __with_ty__! { UInt32Type },
543        UInt64 => __with_ty__! { UInt64Type },
544        Float32 => __with_ty__! { Float32Type },
545        Float64 => __with_ty__! { Float64Type },
546        dt => panic!("not implemented for dtype {:?}", dt),
547    }
548})}
549
550#[macro_export]
551macro_rules! with_match_physical_integer_polars_type {(
552    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
553) => ({
554    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
555    use $crate::datatypes::DataType::*;
556    use $crate::datatypes::*;
557    match $key_type {
558        #[cfg(feature = "dtype-i8")]
559        Int8 => __with_ty__! { Int8Type },
560        #[cfg(feature = "dtype-i16")]
561        Int16 => __with_ty__! { Int16Type },
562        Int32 => __with_ty__! { Int32Type },
563        Int64 => __with_ty__! { Int64Type },
564        #[cfg(feature = "dtype-i128")]
565        Int128 => __with_ty__! { Int128Type },
566        #[cfg(feature = "dtype-u8")]
567        UInt8 => __with_ty__! { UInt8Type },
568        #[cfg(feature = "dtype-u16")]
569        UInt16 => __with_ty__! { UInt16Type },
570        UInt32 => __with_ty__! { UInt32Type },
571        UInt64 => __with_ty__! { UInt64Type },
572        dt => panic!("not implemented for dtype {:?}", dt),
573    }
574})}
575
576/// Apply a macro on the Downcasted ChunkedArrays of DataTypes that are logical numerics.
577/// So no logical.
578#[macro_export]
579macro_rules! downcast_as_macro_arg_physical {
580    ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
581        match $self.dtype() {
582            #[cfg(feature = "dtype-u8")]
583            DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
584            #[cfg(feature = "dtype-u16")]
585            DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
586            DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
587            DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
588            #[cfg(feature = "dtype-i8")]
589            DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
590            #[cfg(feature = "dtype-i16")]
591            DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
592            DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
593            DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
594            #[cfg(feature = "dtype-i128")]
595            DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
596            DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
597            DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
598            dt => panic!("not implemented for {:?}", dt),
599        }
600    }};
601}
602
603/// Apply a macro on the Downcasted ChunkedArrays of DataTypes that are logical numerics.
604/// So no logical.
605#[macro_export]
606macro_rules! downcast_as_macro_arg_physical_mut {
607    ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
608        // clone so that we do not borrow
609        match $self.dtype().clone() {
610            #[cfg(feature = "dtype-u8")]
611            DataType::UInt8 => {
612                let ca: &mut UInt8Chunked = $self.as_mut();
613                $macro!(UInt8Type, ca $(, $opt_args)*)
614            },
615            #[cfg(feature = "dtype-u16")]
616            DataType::UInt16 => {
617                let ca: &mut UInt16Chunked = $self.as_mut();
618                $macro!(UInt16Type, ca $(, $opt_args)*)
619            },
620            DataType::UInt32 => {
621                let ca: &mut UInt32Chunked = $self.as_mut();
622                $macro!(UInt32Type, ca $(, $opt_args)*)
623            },
624            DataType::UInt64 => {
625                let ca: &mut UInt64Chunked = $self.as_mut();
626                $macro!(UInt64Type, ca $(, $opt_args)*)
627            },
628            #[cfg(feature = "dtype-i8")]
629            DataType::Int8 => {
630                let ca: &mut Int8Chunked = $self.as_mut();
631                $macro!(Int8Type, ca $(, $opt_args)*)
632            },
633            #[cfg(feature = "dtype-i16")]
634            DataType::Int16 => {
635                let ca: &mut Int16Chunked = $self.as_mut();
636                $macro!(Int16Type, ca $(, $opt_args)*)
637            },
638            DataType::Int32 => {
639                let ca: &mut Int32Chunked = $self.as_mut();
640                $macro!(Int32Type, ca $(, $opt_args)*)
641            },
642            DataType::Int64 => {
643                let ca: &mut Int64Chunked = $self.as_mut();
644                $macro!(Int64Type, ca $(, $opt_args)*)
645            },
646            #[cfg(feature = "dtype-i128")]
647            DataType::Int128 => {
648                let ca: &mut Int128Chunked = $self.as_mut();
649                $macro!(Int128Type, ca $(, $opt_args)*)
650            },
651            DataType::Float32 => {
652                let ca: &mut Float32Chunked = $self.as_mut();
653                $macro!(Float32Type, ca $(, $opt_args)*)
654            },
655            DataType::Float64 => {
656                let ca: &mut Float64Chunked = $self.as_mut();
657                $macro!(Float64Type, ca $(, $opt_args)*)
658            },
659            dt => panic!("not implemented for {:?}", dt),
660        }
661    }};
662}
663
664#[macro_export]
665macro_rules! apply_method_all_arrow_series {
666    ($self:expr, $method:ident, $($args:expr),*) => {
667        match $self.dtype() {
668            DataType::Boolean => $self.bool().unwrap().$method($($args),*),
669            DataType::String => $self.str().unwrap().$method($($args),*),
670            #[cfg(feature = "dtype-u8")]
671            DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
672            #[cfg(feature = "dtype-u16")]
673            DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
674            DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
675            DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
676            #[cfg(feature = "dtype-i8")]
677            DataType::Int8 => $self.i8().unwrap().$method($($args),*),
678            #[cfg(feature = "dtype-i16")]
679            DataType::Int16 => $self.i16().unwrap().$method($($args),*),
680            DataType::Int32 => $self.i32().unwrap().$method($($args),*),
681            DataType::Int64 => $self.i64().unwrap().$method($($args),*),
682            #[cfg(feature = "dtype-i128")]
683            DataType::Int128 => $self.i128().unwrap().$method($($args),*),
684            DataType::Float32 => $self.f32().unwrap().$method($($args),*),
685            DataType::Float64 => $self.f64().unwrap().$method($($args),*),
686            DataType::Time => $self.time().unwrap().$method($($args),*),
687            DataType::Date => $self.date().unwrap().$method($($args),*),
688            DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
689            DataType::List(_) => $self.list().unwrap().$method($($args),*),
690            DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
691            dt => panic!("dtype {:?} not supported", dt)
692        }
693    }
694}
695
696#[macro_export]
697macro_rules! apply_method_physical_integer {
698    ($self:expr, $method:ident, $($args:expr),*) => {
699        match $self.dtype() {
700            #[cfg(feature = "dtype-u8")]
701            DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
702            #[cfg(feature = "dtype-u16")]
703            DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
704            DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
705            DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
706            #[cfg(feature = "dtype-i8")]
707            DataType::Int8 => $self.i8().unwrap().$method($($args),*),
708            #[cfg(feature = "dtype-i16")]
709            DataType::Int16 => $self.i16().unwrap().$method($($args),*),
710            DataType::Int32 => $self.i32().unwrap().$method($($args),*),
711            DataType::Int64 => $self.i64().unwrap().$method($($args),*),
712            #[cfg(feature = "dtype-i128")]
713            DataType::Int128 => $self.i128().unwrap().$method($($args),*),
714            dt => panic!("not implemented for dtype {:?}", dt),
715        }
716    }
717}
718
719// doesn't include Bool and String
720#[macro_export]
721macro_rules! apply_method_physical_numeric {
722    ($self:expr, $method:ident, $($args:expr),*) => {
723        match $self.dtype() {
724            DataType::Float32 => $self.f32().unwrap().$method($($args),*),
725            DataType::Float64 => $self.f64().unwrap().$method($($args),*),
726            _ => apply_method_physical_integer!($self, $method, $($args),*),
727        }
728    }
729}
730
731#[macro_export]
732macro_rules! df {
733    ($($col_name:expr => $slice:expr), + $(,)?) => {
734        $crate::prelude::DataFrame::new(vec![
735            $($crate::prelude::Column::from(<$crate::prelude::Series as $crate::prelude::NamedFrom::<_, _>>::new($col_name.into(), $slice)),)+
736        ])
737    }
738}
739
740pub fn get_time_units(tu_l: &TimeUnit, tu_r: &TimeUnit) -> TimeUnit {
741    use crate::datatypes::time_unit::TimeUnit::*;
742    match (tu_l, tu_r) {
743        (Nanoseconds, Microseconds) => Microseconds,
744        (_, Milliseconds) => Milliseconds,
745        _ => *tu_l,
746    }
747}
748
749#[cold]
750#[inline(never)]
751fn width_mismatch(df1: &DataFrame, df2: &DataFrame) -> PolarsError {
752    let mut df1_extra = Vec::new();
753    let mut df2_extra = Vec::new();
754
755    let s1 = df1.schema();
756    let s2 = df2.schema();
757
758    s1.field_compare(s2, &mut df1_extra, &mut df2_extra);
759
760    let df1_extra = df1_extra
761        .into_iter()
762        .map(|(_, (n, _))| n.as_str())
763        .collect::<Vec<_>>()
764        .join(", ");
765    let df2_extra = df2_extra
766        .into_iter()
767        .map(|(_, (n, _))| n.as_str())
768        .collect::<Vec<_>>()
769        .join(", ");
770
771    polars_err!(
772        SchemaMismatch: r#"unable to vstack, dataframes have different widths ({} != {}).
773One dataframe has additional columns: [{df1_extra}].
774Other dataframe has additional columns: [{df2_extra}]."#,
775        df1.width(),
776        df2.width(),
777    )
778}
779
780pub fn accumulate_dataframes_vertical_unchecked_optional<I>(dfs: I) -> Option<DataFrame>
781where
782    I: IntoIterator<Item = DataFrame>,
783{
784    let mut iter = dfs.into_iter();
785    let additional = iter.size_hint().0;
786    let mut acc_df = iter.next()?;
787    acc_df.reserve_chunks(additional);
788
789    for df in iter {
790        if acc_df.width() != df.width() {
791            panic!("{}", width_mismatch(&acc_df, &df));
792        }
793
794        acc_df.vstack_mut_owned_unchecked(df);
795    }
796    Some(acc_df)
797}
798
799/// This takes ownership of the DataFrame so that drop is called earlier.
800/// Does not check if schema is correct
801pub fn accumulate_dataframes_vertical_unchecked<I>(dfs: I) -> DataFrame
802where
803    I: IntoIterator<Item = DataFrame>,
804{
805    let mut iter = dfs.into_iter();
806    let additional = iter.size_hint().0;
807    let mut acc_df = iter.next().unwrap();
808    acc_df.reserve_chunks(additional);
809
810    for df in iter {
811        if acc_df.width() != df.width() {
812            panic!("{}", width_mismatch(&acc_df, &df));
813        }
814
815        acc_df.vstack_mut_owned_unchecked(df);
816    }
817    acc_df
818}
819
820/// This takes ownership of the DataFrame so that drop is called earlier.
821/// # Panics
822/// Panics if `dfs` is empty.
823pub fn accumulate_dataframes_vertical<I>(dfs: I) -> PolarsResult<DataFrame>
824where
825    I: IntoIterator<Item = DataFrame>,
826{
827    let mut iter = dfs.into_iter();
828    let additional = iter.size_hint().0;
829    let mut acc_df = iter.next().unwrap();
830    acc_df.reserve_chunks(additional);
831    for df in iter {
832        if acc_df.width() != df.width() {
833            return Err(width_mismatch(&acc_df, &df));
834        }
835
836        acc_df.vstack_mut_owned(df)?;
837    }
838
839    Ok(acc_df)
840}
841
842/// Concat the DataFrames to a single DataFrame.
843pub fn concat_df<'a, I>(dfs: I) -> PolarsResult<DataFrame>
844where
845    I: IntoIterator<Item = &'a DataFrame>,
846{
847    let mut iter = dfs.into_iter();
848    let additional = iter.size_hint().0;
849    let mut acc_df = iter.next().unwrap().clone();
850    acc_df.reserve_chunks(additional);
851    for df in iter {
852        acc_df.vstack_mut(df)?;
853    }
854    Ok(acc_df)
855}
856
857/// Concat the DataFrames to a single DataFrame.
858pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFrame
859where
860    I: IntoIterator<Item = &'a DataFrame>,
861{
862    let mut iter = dfs.into_iter();
863    let additional = iter.size_hint().0;
864    let mut acc_df = iter.next().unwrap().clone();
865    acc_df.reserve_chunks(additional);
866    for df in iter {
867        acc_df.vstack_mut_unchecked(df);
868    }
869    acc_df
870}
871
872pub fn accumulate_dataframes_horizontal(dfs: Vec<DataFrame>) -> PolarsResult<DataFrame> {
873    let mut iter = dfs.into_iter();
874    let mut acc_df = iter.next().unwrap();
875    for df in iter {
876        acc_df.hstack_mut(df.get_columns())?;
877    }
878    Ok(acc_df)
879}
880
881/// Ensure the chunks in both ChunkedArrays have the same length.
882/// # Panics
883/// This will panic if `left.len() != right.len()` and array is chunked.
884pub fn align_chunks_binary<'a, T, B>(
885    left: &'a ChunkedArray<T>,
886    right: &'a ChunkedArray<B>,
887) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<B>>)
888where
889    B: PolarsDataType,
890    T: PolarsDataType,
891{
892    let assert = || {
893        assert_eq!(
894            left.len(),
895            right.len(),
896            "expected arrays of the same length"
897        )
898    };
899    match (left.chunks.len(), right.chunks.len()) {
900        // All chunks are equal length
901        (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
902        // All chunks are equal length
903        (a, b)
904            if a == b
905                && left
906                    .chunk_lengths()
907                    .zip(right.chunk_lengths())
908                    .all(|(l, r)| l == r) =>
909        {
910            (Cow::Borrowed(left), Cow::Borrowed(right))
911        },
912        (_, 1) => {
913            assert();
914            (
915                Cow::Borrowed(left),
916                Cow::Owned(right.match_chunks(left.chunk_lengths())),
917            )
918        },
919        (1, _) => {
920            assert();
921            (
922                Cow::Owned(left.match_chunks(right.chunk_lengths())),
923                Cow::Borrowed(right),
924            )
925        },
926        (_, _) => {
927            assert();
928            // could optimize to choose to rechunk a primitive and not a string or list type
929            let left = left.rechunk();
930            (
931                Cow::Owned(left.match_chunks(right.chunk_lengths())),
932                Cow::Borrowed(right),
933            )
934        },
935    }
936}
937
938/// Ensure the chunks in ChunkedArray and Series have the same length.
939/// # Panics
940/// This will panic if `left.len() != right.len()` and array is chunked.
941pub fn align_chunks_binary_ca_series<'a, T>(
942    left: &'a ChunkedArray<T>,
943    right: &'a Series,
944) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, Series>)
945where
946    T: PolarsDataType,
947{
948    let assert = || {
949        assert_eq!(
950            left.len(),
951            right.len(),
952            "expected arrays of the same length"
953        )
954    };
955    match (left.chunks.len(), right.chunks().len()) {
956        // All chunks are equal length
957        (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
958        // All chunks are equal length
959        (a, b)
960            if a == b
961                && left
962                    .chunk_lengths()
963                    .zip(right.chunk_lengths())
964                    .all(|(l, r)| l == r) =>
965        {
966            assert();
967            (Cow::Borrowed(left), Cow::Borrowed(right))
968        },
969        (_, 1) => (left.rechunk(), Cow::Borrowed(right)),
970        (1, _) => (Cow::Borrowed(left), Cow::Owned(right.rechunk())),
971        (_, _) => {
972            assert();
973            (left.rechunk(), Cow::Owned(right.rechunk()))
974        },
975    }
976}
977
978#[cfg(feature = "performant")]
979pub(crate) fn align_chunks_binary_owned_series(left: Series, right: Series) -> (Series, Series) {
980    match (left.chunks().len(), right.chunks().len()) {
981        (1, 1) => (left, right),
982        // All chunks are equal length
983        (a, b)
984            if a == b
985                && left
986                    .chunk_lengths()
987                    .zip(right.chunk_lengths())
988                    .all(|(l, r)| l == r) =>
989        {
990            (left, right)
991        },
992        (_, 1) => (left.rechunk(), right),
993        (1, _) => (left, right.rechunk()),
994        (_, _) => (left.rechunk(), right.rechunk()),
995    }
996}
997
998pub(crate) fn align_chunks_binary_owned<T, B>(
999    left: ChunkedArray<T>,
1000    right: ChunkedArray<B>,
1001) -> (ChunkedArray<T>, ChunkedArray<B>)
1002where
1003    B: PolarsDataType,
1004    T: PolarsDataType,
1005{
1006    match (left.chunks.len(), right.chunks.len()) {
1007        (1, 1) => (left, right),
1008        // All chunks are equal length
1009        (a, b)
1010            if a == b
1011                && left
1012                    .chunk_lengths()
1013                    .zip(right.chunk_lengths())
1014                    .all(|(l, r)| l == r) =>
1015        {
1016            (left, right)
1017        },
1018        (_, 1) => (left.rechunk().into_owned(), right),
1019        (1, _) => (left, right.rechunk().into_owned()),
1020        (_, _) => (left.rechunk().into_owned(), right.rechunk().into_owned()),
1021    }
1022}
1023
1024/// # Panics
1025/// This will panic if `a.len() != b.len() || b.len() != c.len()` and array is chunked.
1026#[allow(clippy::type_complexity)]
1027pub fn align_chunks_ternary<'a, A, B, C>(
1028    a: &'a ChunkedArray<A>,
1029    b: &'a ChunkedArray<B>,
1030    c: &'a ChunkedArray<C>,
1031) -> (
1032    Cow<'a, ChunkedArray<A>>,
1033    Cow<'a, ChunkedArray<B>>,
1034    Cow<'a, ChunkedArray<C>>,
1035)
1036where
1037    A: PolarsDataType,
1038    B: PolarsDataType,
1039    C: PolarsDataType,
1040{
1041    if a.chunks.len() == 1 && b.chunks.len() == 1 && c.chunks.len() == 1 {
1042        return (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c));
1043    }
1044
1045    assert!(
1046        a.len() == b.len() && b.len() == c.len(),
1047        "expected arrays of the same length"
1048    );
1049
1050    match (a.chunks.len(), b.chunks.len(), c.chunks.len()) {
1051        (_, 1, 1) => (
1052            Cow::Borrowed(a),
1053            Cow::Owned(b.match_chunks(a.chunk_lengths())),
1054            Cow::Owned(c.match_chunks(a.chunk_lengths())),
1055        ),
1056        (1, 1, _) => (
1057            Cow::Owned(a.match_chunks(c.chunk_lengths())),
1058            Cow::Owned(b.match_chunks(c.chunk_lengths())),
1059            Cow::Borrowed(c),
1060        ),
1061        (1, _, 1) => (
1062            Cow::Owned(a.match_chunks(b.chunk_lengths())),
1063            Cow::Borrowed(b),
1064            Cow::Owned(c.match_chunks(b.chunk_lengths())),
1065        ),
1066        (1, _, _) => {
1067            let b = b.rechunk();
1068            (
1069                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1070                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1071                Cow::Borrowed(c),
1072            )
1073        },
1074        (_, 1, _) => {
1075            let a = a.rechunk();
1076            (
1077                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1078                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1079                Cow::Borrowed(c),
1080            )
1081        },
1082        (_, _, 1) => {
1083            let b = b.rechunk();
1084            (
1085                Cow::Borrowed(a),
1086                Cow::Owned(b.match_chunks(a.chunk_lengths())),
1087                Cow::Owned(c.match_chunks(a.chunk_lengths())),
1088            )
1089        },
1090        (len_a, len_b, len_c)
1091            if len_a == len_b
1092                && len_b == len_c
1093                && a.chunk_lengths()
1094                    .zip(b.chunk_lengths())
1095                    .zip(c.chunk_lengths())
1096                    .all(|((a, b), c)| a == b && b == c) =>
1097        {
1098            (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c))
1099        },
1100        _ => {
1101            // could optimize to choose to rechunk a primitive and not a string or list type
1102            let a = a.rechunk();
1103            let b = b.rechunk();
1104            (
1105                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1106                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1107                Cow::Borrowed(c),
1108            )
1109        },
1110    }
1111}
1112
1113pub fn binary_concatenate_validities<'a, T, B>(
1114    left: &'a ChunkedArray<T>,
1115    right: &'a ChunkedArray<B>,
1116) -> Option<Bitmap>
1117where
1118    B: PolarsDataType,
1119    T: PolarsDataType,
1120{
1121    let (left, right) = align_chunks_binary(left, right);
1122    let left_validity = concatenate_validities(left.chunks());
1123    let right_validity = concatenate_validities(right.chunks());
1124    combine_validities_and(left_validity.as_ref(), right_validity.as_ref())
1125}
1126
1127/// Convenience for `x.into_iter().map(Into::into).collect()` using an `into_vec()` function.
1128pub trait IntoVec<T> {
1129    fn into_vec(self) -> Vec<T>;
1130}
1131
1132impl<I, S> IntoVec<PlSmallStr> for I
1133where
1134    I: IntoIterator<Item = S>,
1135    S: Into<PlSmallStr>,
1136{
1137    fn into_vec(self) -> Vec<PlSmallStr> {
1138        self.into_iter().map(|s| s.into()).collect()
1139    }
1140}
1141
1142/// This logic is same as the impl on ChunkedArray
1143/// The difference is that there is less indirection because the caller should preallocate
1144/// `chunk_lens` once. On the `ChunkedArray` we indirect through an `ArrayRef` which is an indirection
1145/// and a vtable.
1146#[inline]
1147pub(crate) fn index_to_chunked_index<
1148    I: Iterator<Item = Idx>,
1149    Idx: PartialOrd + std::ops::AddAssign + std::ops::SubAssign + Zero + One,
1150>(
1151    chunk_lens: I,
1152    index: Idx,
1153) -> (Idx, Idx) {
1154    let mut index_remainder = index;
1155    let mut current_chunk_idx = Zero::zero();
1156
1157    for chunk_len in chunk_lens {
1158        if chunk_len > index_remainder {
1159            break;
1160        } else {
1161            index_remainder -= chunk_len;
1162            current_chunk_idx += One::one();
1163        }
1164    }
1165    (current_chunk_idx, index_remainder)
1166}
1167
1168pub(crate) fn index_to_chunked_index_rev<
1169    I: Iterator<Item = Idx>,
1170    Idx: PartialOrd
1171        + std::ops::AddAssign
1172        + std::ops::SubAssign
1173        + std::ops::Sub<Output = Idx>
1174        + Zero
1175        + One
1176        + Copy
1177        + std::fmt::Debug,
1178>(
1179    chunk_lens_rev: I,
1180    index_from_back: Idx,
1181    total_chunks: Idx,
1182) -> (Idx, Idx) {
1183    debug_assert!(index_from_back > Zero::zero(), "at least -1");
1184    let mut index_remainder = index_from_back;
1185    let mut current_chunk_idx = One::one();
1186    let mut current_chunk_len = Zero::zero();
1187
1188    for chunk_len in chunk_lens_rev {
1189        current_chunk_len = chunk_len;
1190        if chunk_len >= index_remainder {
1191            break;
1192        } else {
1193            index_remainder -= chunk_len;
1194            current_chunk_idx += One::one();
1195        }
1196    }
1197    (
1198        total_chunks - current_chunk_idx,
1199        current_chunk_len - index_remainder,
1200    )
1201}
1202
1203pub(crate) fn first_non_null<'a, I>(iter: I) -> Option<usize>
1204where
1205    I: Iterator<Item = Option<&'a Bitmap>>,
1206{
1207    let mut offset = 0;
1208    for validity in iter {
1209        if let Some(validity) = validity {
1210            let mask = BitMask::from_bitmap(validity);
1211            if let Some(n) = mask.nth_set_bit_idx(0, 0) {
1212                return Some(offset + n);
1213            }
1214            offset += validity.len()
1215        } else {
1216            return Some(offset);
1217        }
1218    }
1219    None
1220}
1221
1222pub(crate) fn last_non_null<'a, I>(iter: I, len: usize) -> Option<usize>
1223where
1224    I: DoubleEndedIterator<Item = Option<&'a Bitmap>>,
1225{
1226    if len == 0 {
1227        return None;
1228    }
1229    let mut offset = 0;
1230    for validity in iter.rev() {
1231        if let Some(validity) = validity {
1232            let mask = BitMask::from_bitmap(validity);
1233            if let Some(n) = mask.nth_set_bit_idx_rev(0, mask.len()) {
1234                let mask_start = len - offset - mask.len();
1235                return Some(mask_start + n);
1236            }
1237            offset += validity.len()
1238        } else {
1239            return Some(len - 1 - offset);
1240        }
1241    }
1242    None
1243}
1244
1245/// ensure that nulls are propagated to both arrays
1246pub fn coalesce_nulls<'a, T: PolarsDataType>(
1247    a: &'a ChunkedArray<T>,
1248    b: &'a ChunkedArray<T>,
1249) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<T>>) {
1250    if a.null_count() > 0 || b.null_count() > 0 {
1251        let (a, b) = align_chunks_binary(a, b);
1252        let mut b = b.into_owned();
1253        let a = a.coalesce_nulls(b.chunks());
1254
1255        for arr in a.chunks().iter() {
1256            for arr_b in unsafe { b.chunks_mut() } {
1257                *arr_b = arr_b.with_validity(arr.validity().cloned())
1258            }
1259        }
1260        b.compute_len();
1261        (Cow::Owned(a), Cow::Owned(b))
1262    } else {
1263        (Cow::Borrowed(a), Cow::Borrowed(b))
1264    }
1265}
1266
1267pub fn coalesce_nulls_columns(a: &Column, b: &Column) -> (Column, Column) {
1268    if a.null_count() > 0 || b.null_count() > 0 {
1269        let mut a = a.as_materialized_series().rechunk();
1270        let mut b = b.as_materialized_series().rechunk();
1271        for (arr_a, arr_b) in unsafe { a.chunks_mut().iter_mut().zip(b.chunks_mut()) } {
1272            let validity = match (arr_a.validity(), arr_b.validity()) {
1273                (None, Some(b)) => Some(b.clone()),
1274                (Some(a), Some(b)) => Some(a & b),
1275                (Some(a), None) => Some(a.clone()),
1276                (None, None) => None,
1277            };
1278            *arr_a = arr_a.with_validity(validity.clone());
1279            *arr_b = arr_b.with_validity(validity);
1280        }
1281        a.compute_len();
1282        b.compute_len();
1283        (a.into(), b.into())
1284    } else {
1285        (a.clone(), b.clone())
1286    }
1287}
1288
1289pub fn operation_exceeded_idxsize_msg(operation: &str) -> String {
1290    if size_of::<IdxSize>() == size_of::<u32>() {
1291        format!(
1292            "{} exceeded the maximum supported limit of {} rows. Consider installing 'polars-u64-idx'.",
1293            operation,
1294            IdxSize::MAX,
1295        )
1296    } else {
1297        format!(
1298            "{} exceeded the maximum supported limit of {} rows.",
1299            operation,
1300            IdxSize::MAX,
1301        )
1302    }
1303}
1304
1305#[cfg(test)]
1306mod test {
1307    use super::*;
1308
1309    #[test]
1310    fn test_split() {
1311        let ca: Int32Chunked = (0..10).collect_ca("a".into());
1312
1313        let out = split(&ca, 3);
1314        assert_eq!(out[0].len(), 3);
1315        assert_eq!(out[1].len(), 3);
1316        assert_eq!(out[2].len(), 4);
1317    }
1318
1319    #[test]
1320    fn test_align_chunks() -> PolarsResult<()> {
1321        let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1322        let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1323        let b2 = Int32Chunked::new(PlSmallStr::EMPTY, &[2, 3, 4]);
1324
1325        b.append(&b2)?;
1326        let (a, b) = align_chunks_binary(&a, &b);
1327        assert_eq!(
1328            a.chunk_lengths().collect::<Vec<_>>(),
1329            b.chunk_lengths().collect::<Vec<_>>()
1330        );
1331
1332        let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1333        let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1334        let b1 = b.clone();
1335        b.append(&b1)?;
1336        b.append(&b1)?;
1337        b.append(&b1)?;
1338        let (a, b) = align_chunks_binary(&a, &b);
1339        assert_eq!(
1340            a.chunk_lengths().collect::<Vec<_>>(),
1341            b.chunk_lengths().collect::<Vec<_>>()
1342        );
1343
1344        Ok(())
1345    }
1346}