polars_core/utils/
mod.rs

1mod any_value;
2use arrow::compute::concatenate::concatenate_validities;
3use arrow::compute::utils::combine_validities_and;
4pub mod flatten;
5pub(crate) mod series;
6mod supertype;
7use std::borrow::Cow;
8use std::ops::{Deref, DerefMut};
9mod schema;
10
11pub use any_value::*;
12use arrow::bitmap::Bitmap;
13pub use arrow::legacy::utils::*;
14pub use arrow::trusted_len::TrustMyLength;
15use flatten::*;
16use num_traits::{One, Zero};
17use rayon::prelude::*;
18pub use schema::*;
19pub use series::*;
20pub use supertype::*;
21pub use {arrow, rayon};
22
23use crate::POOL;
24use crate::prelude::*;
25
26#[repr(transparent)]
27pub struct Wrap<T>(pub T);
28
29impl<T> Deref for Wrap<T> {
30    type Target = T;
31    fn deref(&self) -> &Self::Target {
32        &self.0
33    }
34}
35
36#[inline(always)]
37pub fn _set_partition_size() -> usize {
38    POOL.current_num_threads()
39}
40
41/// Just a wrapper structure which is useful for certain impl specializations.
42///
43/// This is for instance use to implement
44/// `impl<T> FromIterator<T::Native> for NoNull<ChunkedArray<T>>`
45/// as `Option<T::Native>` was already implemented:
46/// `impl<T> FromIterator<Option<T::Native>> for ChunkedArray<T>`
47pub struct NoNull<T> {
48    inner: T,
49}
50
51impl<T> NoNull<T> {
52    pub fn new(inner: T) -> Self {
53        NoNull { inner }
54    }
55
56    pub fn into_inner(self) -> T {
57        self.inner
58    }
59}
60
61impl<T> Deref for NoNull<T> {
62    type Target = T;
63
64    fn deref(&self) -> &Self::Target {
65        &self.inner
66    }
67}
68
69impl<T> DerefMut for NoNull<T> {
70    fn deref_mut(&mut self) -> &mut Self::Target {
71        &mut self.inner
72    }
73}
74
75pub(crate) fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
76    match iter.size_hint() {
77        (_lower, Some(upper)) => upper,
78        (0, None) => 1024,
79        (lower, None) => lower,
80    }
81}
82
83// prefer this one over split_ca, as this can push the null_count into the thread pool
84// returns an `(offset, length)` tuple
85#[doc(hidden)]
86pub fn _split_offsets(len: usize, n: usize) -> Vec<(usize, usize)> {
87    if n == 1 {
88        vec![(0, len)]
89    } else {
90        let chunk_size = len / n;
91
92        (0..n)
93            .map(|partition| {
94                let offset = partition * chunk_size;
95                let len = if partition == (n - 1) {
96                    len - offset
97                } else {
98                    chunk_size
99                };
100                (partition * chunk_size, len)
101            })
102            .collect_trusted()
103    }
104}
105
106#[allow(clippy::len_without_is_empty)]
107pub trait Container: Clone {
108    fn slice(&self, offset: i64, len: usize) -> Self;
109
110    fn split_at(&self, offset: i64) -> (Self, Self);
111
112    fn len(&self) -> usize;
113
114    fn iter_chunks(&self) -> impl Iterator<Item = Self>;
115
116    fn should_rechunk(&self) -> bool;
117
118    fn n_chunks(&self) -> usize;
119
120    fn chunk_lengths(&self) -> impl Iterator<Item = usize>;
121}
122
123impl Container for DataFrame {
124    fn slice(&self, offset: i64, len: usize) -> Self {
125        DataFrame::slice(self, offset, len)
126    }
127
128    fn split_at(&self, offset: i64) -> (Self, Self) {
129        DataFrame::split_at(self, offset)
130    }
131
132    fn len(&self) -> usize {
133        self.height()
134    }
135
136    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
137        flatten_df_iter(self)
138    }
139
140    fn should_rechunk(&self) -> bool {
141        self.should_rechunk()
142    }
143
144    fn n_chunks(&self) -> usize {
145        DataFrame::first_col_n_chunks(self)
146    }
147
148    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
149        // @scalar-correctness?
150        self.columns[0].as_materialized_series().chunk_lengths()
151    }
152}
153
154impl<T: PolarsDataType> Container for ChunkedArray<T> {
155    fn slice(&self, offset: i64, len: usize) -> Self {
156        ChunkedArray::slice(self, offset, len)
157    }
158
159    fn split_at(&self, offset: i64) -> (Self, Self) {
160        ChunkedArray::split_at(self, offset)
161    }
162
163    fn len(&self) -> usize {
164        ChunkedArray::len(self)
165    }
166
167    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
168        self.downcast_iter()
169            .map(|arr| Self::with_chunk(self.name().clone(), arr.clone()))
170    }
171
172    fn should_rechunk(&self) -> bool {
173        false
174    }
175
176    fn n_chunks(&self) -> usize {
177        self.chunks().len()
178    }
179
180    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
181        ChunkedArray::chunk_lengths(self)
182    }
183}
184
185impl Container for Series {
186    fn slice(&self, offset: i64, len: usize) -> Self {
187        self.0.slice(offset, len)
188    }
189
190    fn split_at(&self, offset: i64) -> (Self, Self) {
191        self.0.split_at(offset)
192    }
193
194    fn len(&self) -> usize {
195        self.0.len()
196    }
197
198    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
199        (0..self.0.n_chunks()).map(|i| self.select_chunk(i))
200    }
201
202    fn should_rechunk(&self) -> bool {
203        false
204    }
205
206    fn n_chunks(&self) -> usize {
207        self.chunks().len()
208    }
209
210    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
211        self.0.chunk_lengths()
212    }
213}
214
215fn split_impl<C: Container>(container: &C, target: usize, chunk_size: usize) -> Vec<C> {
216    if target == 1 {
217        return vec![container.clone()];
218    }
219    let mut out = Vec::with_capacity(target);
220    let chunk_size = chunk_size as i64;
221
222    // First split
223    let (chunk, mut remainder) = container.split_at(chunk_size);
224    out.push(chunk);
225
226    // Take the rest of the splits of exactly chunk size, but skip the last remainder as we won't split that.
227    for _ in 1..target - 1 {
228        let (a, b) = remainder.split_at(chunk_size);
229        out.push(a);
230        remainder = b
231    }
232    // This can be slightly larger than `chunk_size`, but is smaller than `2 * chunk_size`.
233    out.push(remainder);
234    out
235}
236
237/// Splits, but doesn't flatten chunks. E.g. a container can still have multiple chunks.
238pub fn split<C: Container>(container: &C, target: usize) -> Vec<C> {
239    let total_len = container.len();
240    if total_len == 0 {
241        return vec![container.clone()];
242    }
243
244    let chunk_size = std::cmp::max(total_len / target, 1);
245
246    if container.n_chunks() == target
247        && container
248            .chunk_lengths()
249            .all(|len| len.abs_diff(chunk_size) < 100)
250        // We cannot get chunks if they are misaligned
251        && !container.should_rechunk()
252    {
253        return container.iter_chunks().collect();
254    }
255    split_impl(container, target, chunk_size)
256}
257
258/// Split a [`Container`] in `target` elements. The target doesn't have to be respected if not
259/// Deviation of the target might be done to create more equal size chunks.
260pub fn split_and_flatten<C: Container>(container: &C, target: usize) -> Vec<C> {
261    let total_len = container.len();
262    if total_len == 0 {
263        return vec![container.clone()];
264    }
265
266    let chunk_size = std::cmp::max(total_len / target, 1);
267
268    if container.n_chunks() == target
269        && container
270            .chunk_lengths()
271            .all(|len| len.abs_diff(chunk_size) < 100)
272        // We cannot get chunks if they are misaligned
273        && !container.should_rechunk()
274    {
275        return container.iter_chunks().collect();
276    }
277
278    if container.n_chunks() == 1 {
279        split_impl(container, target, chunk_size)
280    } else {
281        let mut out = Vec::with_capacity(target);
282        let chunks = container.iter_chunks();
283
284        'new_chunk: for mut chunk in chunks {
285            loop {
286                let h = chunk.len();
287                if h < chunk_size {
288                    // TODO if the chunk is much smaller than chunk size, we should try to merge it with the next one.
289                    out.push(chunk);
290                    continue 'new_chunk;
291                }
292
293                // If a split leads to the next chunk being smaller than 30% take the whole chunk
294                if ((h - chunk_size) as f64 / chunk_size as f64) < 0.3 {
295                    out.push(chunk);
296                    continue 'new_chunk;
297                }
298
299                let (a, b) = chunk.split_at(chunk_size as i64);
300                out.push(a);
301                chunk = b;
302            }
303        }
304        out
305    }
306}
307
308/// Split a [`DataFrame`] in `target` elements. The target doesn't have to be respected if not
309/// strict. Deviation of the target might be done to create more equal size chunks.
310///
311/// # Panics
312/// if chunks are not aligned
313pub fn split_df_as_ref(df: &DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
314    if strict {
315        split(df, target)
316    } else {
317        split_and_flatten(df, target)
318    }
319}
320
321#[doc(hidden)]
322/// Split a [`DataFrame`] into `n` parts. We take a `&mut` to be able to repartition/align chunks.
323/// `strict` in that it respects `n` even if the chunks are suboptimal.
324pub fn split_df(df: &mut DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
325    if target == 0 || df.is_empty() {
326        return vec![df.clone()];
327    }
328    // make sure that chunks are aligned.
329    df.align_chunks_par();
330    split_df_as_ref(df, target, strict)
331}
332
333pub fn slice_slice<T>(vals: &[T], offset: i64, len: usize) -> &[T] {
334    let (raw_offset, slice_len) = slice_offsets(offset, len, vals.len());
335    &vals[raw_offset..raw_offset + slice_len]
336}
337
338#[inline]
339#[doc(hidden)]
340pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, usize) {
341    let signed_start_offset = if offset < 0 {
342        offset.saturating_add_unsigned(array_len as u64)
343    } else {
344        offset
345    };
346    let signed_stop_offset = signed_start_offset.saturating_add_unsigned(length as u64);
347
348    let signed_array_len: i64 = array_len
349        .try_into()
350        .expect("array length larger than i64::MAX");
351    let clamped_start_offset = signed_start_offset.clamp(0, signed_array_len);
352    let clamped_stop_offset = signed_stop_offset.clamp(0, signed_array_len);
353
354    let slice_start_idx = clamped_start_offset as usize;
355    let slice_len = (clamped_stop_offset - clamped_start_offset) as usize;
356    (slice_start_idx, slice_len)
357}
358
359/// Apply a macro on the Series
360#[macro_export]
361macro_rules! match_dtype_to_physical_apply_macro {
362    ($obj:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
363        match $obj {
364            DataType::String => $macro_string!($($opt_args)*),
365            DataType::Boolean => $macro_bool!($($opt_args)*),
366            #[cfg(feature = "dtype-u8")]
367            DataType::UInt8 => $macro!(u8 $(, $opt_args)*),
368            #[cfg(feature = "dtype-u16")]
369            DataType::UInt16 => $macro!(u16 $(, $opt_args)*),
370            DataType::UInt32 => $macro!(u32 $(, $opt_args)*),
371            DataType::UInt64 => $macro!(u64 $(, $opt_args)*),
372            #[cfg(feature = "dtype-i8")]
373            DataType::Int8 => $macro!(i8 $(, $opt_args)*),
374            #[cfg(feature = "dtype-i16")]
375            DataType::Int16 => $macro!(i16 $(, $opt_args)*),
376            DataType::Int32 => $macro!(i32 $(, $opt_args)*),
377            DataType::Int64 => $macro!(i64 $(, $opt_args)*),
378            #[cfg(feature = "dtype-i128")]
379            DataType::Int128 => $macro!(i128 $(, $opt_args)*),
380            DataType::Float32 => $macro!(f32 $(, $opt_args)*),
381            DataType::Float64 => $macro!(f64 $(, $opt_args)*),
382            dt => panic!("not implemented for dtype {:?}", dt),
383        }
384    }};
385}
386
387/// Apply a macro on the Series
388#[macro_export]
389macro_rules! match_dtype_to_logical_apply_macro {
390    ($obj:expr, $macro:ident, $macro_string:ident, $macro_binary:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
391        match $obj {
392            DataType::String => $macro_string!($($opt_args)*),
393            DataType::Binary => $macro_binary!($($opt_args)*),
394            DataType::Boolean => $macro_bool!($($opt_args)*),
395            #[cfg(feature = "dtype-u8")]
396            DataType::UInt8 => $macro!(UInt8Type $(, $opt_args)*),
397            #[cfg(feature = "dtype-u16")]
398            DataType::UInt16 => $macro!(UInt16Type $(, $opt_args)*),
399            DataType::UInt32 => $macro!(UInt32Type $(, $opt_args)*),
400            DataType::UInt64 => $macro!(UInt64Type $(, $opt_args)*),
401            #[cfg(feature = "dtype-i8")]
402            DataType::Int8 => $macro!(Int8Type $(, $opt_args)*),
403            #[cfg(feature = "dtype-i16")]
404            DataType::Int16 => $macro!(Int16Type $(, $opt_args)*),
405            DataType::Int32 => $macro!(Int32Type $(, $opt_args)*),
406            DataType::Int64 => $macro!(Int64Type $(, $opt_args)*),
407            #[cfg(feature = "dtype-i128")]
408            DataType::Int128 => $macro!(Int128Type $(, $opt_args)*),
409            DataType::Float32 => $macro!(Float32Type $(, $opt_args)*),
410            DataType::Float64 => $macro!(Float64Type $(, $opt_args)*),
411            dt => panic!("not implemented for dtype {:?}", dt),
412        }
413    }};
414}
415
416/// Apply a macro on the Downcasted ChunkedArrays
417#[macro_export]
418macro_rules! match_arrow_dtype_apply_macro_ca {
419    ($self:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
420        match $self.dtype() {
421            DataType::String => $macro_string!($self.str().unwrap() $(, $opt_args)*),
422            DataType::Boolean => $macro_bool!($self.bool().unwrap() $(, $opt_args)*),
423            #[cfg(feature = "dtype-u8")]
424            DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
425            #[cfg(feature = "dtype-u16")]
426            DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
427            DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
428            DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
429            #[cfg(feature = "dtype-i8")]
430            DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
431            #[cfg(feature = "dtype-i16")]
432            DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
433            DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
434            DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
435            #[cfg(feature = "dtype-i128")]
436            DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
437            DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
438            DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
439            dt => panic!("not implemented for dtype {:?}", dt),
440        }
441    }};
442}
443
444#[macro_export]
445macro_rules! with_match_physical_numeric_type {(
446    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
447) => ({
448    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
449    use $crate::datatypes::DataType::*;
450    match $dtype {
451        #[cfg(feature = "dtype-i8")]
452        Int8 => __with_ty__! { i8 },
453        #[cfg(feature = "dtype-i16")]
454        Int16 => __with_ty__! { i16 },
455        Int32 => __with_ty__! { i32 },
456        Int64 => __with_ty__! { i64 },
457        #[cfg(feature = "dtype-i128")]
458        Int128 => __with_ty__! { i128 },
459        #[cfg(feature = "dtype-u8")]
460        UInt8 => __with_ty__! { u8 },
461        #[cfg(feature = "dtype-u16")]
462        UInt16 => __with_ty__! { u16 },
463        UInt32 => __with_ty__! { u32 },
464        UInt64 => __with_ty__! { u64 },
465        Float32 => __with_ty__! { f32 },
466        Float64 => __with_ty__! { f64 },
467        dt => panic!("not implemented for dtype {:?}", dt),
468    }
469})}
470
471#[macro_export]
472macro_rules! with_match_physical_integer_type {(
473    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
474) => ({
475    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
476    use $crate::datatypes::DataType::*;
477    match $dtype {
478        #[cfg(feature = "dtype-i8")]
479        Int8 => __with_ty__! { i8 },
480        #[cfg(feature = "dtype-i16")]
481        Int16 => __with_ty__! { i16 },
482        Int32 => __with_ty__! { i32 },
483        Int64 => __with_ty__! { i64 },
484        #[cfg(feature = "dtype-i128")]
485        Int128 => __with_ty__! { i128 },
486        #[cfg(feature = "dtype-u8")]
487        UInt8 => __with_ty__! { u8 },
488        #[cfg(feature = "dtype-u16")]
489        UInt16 => __with_ty__! { u16 },
490        UInt32 => __with_ty__! { u32 },
491        UInt64 => __with_ty__! { u64 },
492        dt => panic!("not implemented for dtype {:?}", dt),
493    }
494})}
495
496#[macro_export]
497macro_rules! with_match_physical_float_type {(
498    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
499) => ({
500    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
501    use $crate::datatypes::DataType::*;
502    match $dtype {
503        Float32 => __with_ty__! { f32 },
504        Float64 => __with_ty__! { f64 },
505        dt => panic!("not implemented for dtype {:?}", dt),
506    }
507})}
508
509#[macro_export]
510macro_rules! with_match_physical_float_polars_type {(
511    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
512) => ({
513    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
514    use $crate::datatypes::DataType::*;
515    match $key_type {
516        Float32 => __with_ty__! { Float32Type },
517        Float64 => __with_ty__! { Float64Type },
518        dt => panic!("not implemented for dtype {:?}", dt),
519    }
520})}
521
522#[macro_export]
523macro_rules! with_match_physical_numeric_polars_type {(
524    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
525) => ({
526    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
527    use $crate::datatypes::DataType::*;
528    match $key_type {
529            #[cfg(feature = "dtype-i8")]
530        Int8 => __with_ty__! { Int8Type },
531            #[cfg(feature = "dtype-i16")]
532        Int16 => __with_ty__! { Int16Type },
533        Int32 => __with_ty__! { Int32Type },
534        Int64 => __with_ty__! { Int64Type },
535            #[cfg(feature = "dtype-i128")]
536        Int128 => __with_ty__! { Int128Type },
537            #[cfg(feature = "dtype-u8")]
538        UInt8 => __with_ty__! { UInt8Type },
539            #[cfg(feature = "dtype-u16")]
540        UInt16 => __with_ty__! { UInt16Type },
541        UInt32 => __with_ty__! { UInt32Type },
542        UInt64 => __with_ty__! { UInt64Type },
543        Float32 => __with_ty__! { Float32Type },
544        Float64 => __with_ty__! { Float64Type },
545        dt => panic!("not implemented for dtype {:?}", dt),
546    }
547})}
548
549#[macro_export]
550macro_rules! with_match_physical_integer_polars_type {(
551    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
552) => ({
553    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
554    use $crate::datatypes::DataType::*;
555    use $crate::datatypes::*;
556    match $key_type {
557        #[cfg(feature = "dtype-i8")]
558        Int8 => __with_ty__! { Int8Type },
559        #[cfg(feature = "dtype-i16")]
560        Int16 => __with_ty__! { Int16Type },
561        Int32 => __with_ty__! { Int32Type },
562        Int64 => __with_ty__! { Int64Type },
563        #[cfg(feature = "dtype-i128")]
564        Int128 => __with_ty__! { Int128Type },
565        #[cfg(feature = "dtype-u8")]
566        UInt8 => __with_ty__! { UInt8Type },
567        #[cfg(feature = "dtype-u16")]
568        UInt16 => __with_ty__! { UInt16Type },
569        UInt32 => __with_ty__! { UInt32Type },
570        UInt64 => __with_ty__! { UInt64Type },
571        dt => panic!("not implemented for dtype {:?}", dt),
572    }
573})}
574
575/// Apply a macro on the Downcasted ChunkedArrays of DataTypes that are logical numerics.
576/// So no logical.
577#[macro_export]
578macro_rules! downcast_as_macro_arg_physical {
579    ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
580        match $self.dtype() {
581            #[cfg(feature = "dtype-u8")]
582            DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
583            #[cfg(feature = "dtype-u16")]
584            DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
585            DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
586            DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
587            #[cfg(feature = "dtype-i8")]
588            DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
589            #[cfg(feature = "dtype-i16")]
590            DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
591            DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
592            DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
593            #[cfg(feature = "dtype-i128")]
594            DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
595            DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
596            DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
597            dt => panic!("not implemented for {:?}", dt),
598        }
599    }};
600}
601
602/// Apply a macro on the Downcasted ChunkedArrays of DataTypes that are logical numerics.
603/// So no logical.
604#[macro_export]
605macro_rules! downcast_as_macro_arg_physical_mut {
606    ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
607        // clone so that we do not borrow
608        match $self.dtype().clone() {
609            #[cfg(feature = "dtype-u8")]
610            DataType::UInt8 => {
611                let ca: &mut UInt8Chunked = $self.as_mut();
612                $macro!(UInt8Type, ca $(, $opt_args)*)
613            },
614            #[cfg(feature = "dtype-u16")]
615            DataType::UInt16 => {
616                let ca: &mut UInt16Chunked = $self.as_mut();
617                $macro!(UInt16Type, ca $(, $opt_args)*)
618            },
619            DataType::UInt32 => {
620                let ca: &mut UInt32Chunked = $self.as_mut();
621                $macro!(UInt32Type, ca $(, $opt_args)*)
622            },
623            DataType::UInt64 => {
624                let ca: &mut UInt64Chunked = $self.as_mut();
625                $macro!(UInt64Type, ca $(, $opt_args)*)
626            },
627            #[cfg(feature = "dtype-i8")]
628            DataType::Int8 => {
629                let ca: &mut Int8Chunked = $self.as_mut();
630                $macro!(Int8Type, ca $(, $opt_args)*)
631            },
632            #[cfg(feature = "dtype-i16")]
633            DataType::Int16 => {
634                let ca: &mut Int16Chunked = $self.as_mut();
635                $macro!(Int16Type, ca $(, $opt_args)*)
636            },
637            DataType::Int32 => {
638                let ca: &mut Int32Chunked = $self.as_mut();
639                $macro!(Int32Type, ca $(, $opt_args)*)
640            },
641            DataType::Int64 => {
642                let ca: &mut Int64Chunked = $self.as_mut();
643                $macro!(Int64Type, ca $(, $opt_args)*)
644            },
645            #[cfg(feature = "dtype-i128")]
646            DataType::Int128 => {
647                let ca: &mut Int128Chunked = $self.as_mut();
648                $macro!(Int128Type, ca $(, $opt_args)*)
649            },
650            DataType::Float32 => {
651                let ca: &mut Float32Chunked = $self.as_mut();
652                $macro!(Float32Type, ca $(, $opt_args)*)
653            },
654            DataType::Float64 => {
655                let ca: &mut Float64Chunked = $self.as_mut();
656                $macro!(Float64Type, ca $(, $opt_args)*)
657            },
658            dt => panic!("not implemented for {:?}", dt),
659        }
660    }};
661}
662
663#[macro_export]
664macro_rules! apply_method_all_arrow_series {
665    ($self:expr, $method:ident, $($args:expr),*) => {
666        match $self.dtype() {
667            DataType::Boolean => $self.bool().unwrap().$method($($args),*),
668            DataType::String => $self.str().unwrap().$method($($args),*),
669            #[cfg(feature = "dtype-u8")]
670            DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
671            #[cfg(feature = "dtype-u16")]
672            DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
673            DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
674            DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
675            #[cfg(feature = "dtype-i8")]
676            DataType::Int8 => $self.i8().unwrap().$method($($args),*),
677            #[cfg(feature = "dtype-i16")]
678            DataType::Int16 => $self.i16().unwrap().$method($($args),*),
679            DataType::Int32 => $self.i32().unwrap().$method($($args),*),
680            DataType::Int64 => $self.i64().unwrap().$method($($args),*),
681            #[cfg(feature = "dtype-i128")]
682            DataType::Int128 => $self.i128().unwrap().$method($($args),*),
683            DataType::Float32 => $self.f32().unwrap().$method($($args),*),
684            DataType::Float64 => $self.f64().unwrap().$method($($args),*),
685            DataType::Time => $self.time().unwrap().$method($($args),*),
686            DataType::Date => $self.date().unwrap().$method($($args),*),
687            DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
688            DataType::List(_) => $self.list().unwrap().$method($($args),*),
689            DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
690            dt => panic!("dtype {:?} not supported", dt)
691        }
692    }
693}
694
695#[macro_export]
696macro_rules! apply_method_physical_integer {
697    ($self:expr, $method:ident, $($args:expr),*) => {
698        match $self.dtype() {
699            #[cfg(feature = "dtype-u8")]
700            DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
701            #[cfg(feature = "dtype-u16")]
702            DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
703            DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
704            DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
705            #[cfg(feature = "dtype-i8")]
706            DataType::Int8 => $self.i8().unwrap().$method($($args),*),
707            #[cfg(feature = "dtype-i16")]
708            DataType::Int16 => $self.i16().unwrap().$method($($args),*),
709            DataType::Int32 => $self.i32().unwrap().$method($($args),*),
710            DataType::Int64 => $self.i64().unwrap().$method($($args),*),
711            #[cfg(feature = "dtype-i128")]
712            DataType::Int128 => $self.i128().unwrap().$method($($args),*),
713            dt => panic!("not implemented for dtype {:?}", dt),
714        }
715    }
716}
717
718// doesn't include Bool and String
719#[macro_export]
720macro_rules! apply_method_physical_numeric {
721    ($self:expr, $method:ident, $($args:expr),*) => {
722        match $self.dtype() {
723            DataType::Float32 => $self.f32().unwrap().$method($($args),*),
724            DataType::Float64 => $self.f64().unwrap().$method($($args),*),
725            _ => apply_method_physical_integer!($self, $method, $($args),*),
726        }
727    }
728}
729
730#[macro_export]
731macro_rules! df {
732    ($($col_name:expr => $slice:expr), + $(,)?) => {
733        $crate::prelude::DataFrame::new(vec![
734            $($crate::prelude::Column::from(<$crate::prelude::Series as $crate::prelude::NamedFrom::<_, _>>::new($col_name.into(), $slice)),)+
735        ])
736    }
737}
738
739pub fn get_time_units(tu_l: &TimeUnit, tu_r: &TimeUnit) -> TimeUnit {
740    use crate::datatypes::time_unit::TimeUnit::*;
741    match (tu_l, tu_r) {
742        (Nanoseconds, Microseconds) => Microseconds,
743        (_, Milliseconds) => Milliseconds,
744        _ => *tu_l,
745    }
746}
747
748#[cold]
749#[inline(never)]
750fn width_mismatch(df1: &DataFrame, df2: &DataFrame) -> PolarsError {
751    let mut df1_extra = Vec::new();
752    let mut df2_extra = Vec::new();
753
754    let s1 = df1.schema();
755    let s2 = df2.schema();
756
757    s1.field_compare(s2, &mut df1_extra, &mut df2_extra);
758
759    let df1_extra = df1_extra
760        .into_iter()
761        .map(|(_, (n, _))| n.as_str())
762        .collect::<Vec<_>>()
763        .join(", ");
764    let df2_extra = df2_extra
765        .into_iter()
766        .map(|(_, (n, _))| n.as_str())
767        .collect::<Vec<_>>()
768        .join(", ");
769
770    polars_err!(
771        SchemaMismatch: r#"unable to vstack, dataframes have different widths ({} != {}).
772One dataframe has additional columns: [{df1_extra}].
773Other dataframe has additional columns: [{df2_extra}]."#,
774        df1.width(),
775        df2.width(),
776    )
777}
778
779pub fn accumulate_dataframes_vertical_unchecked_optional<I>(dfs: I) -> Option<DataFrame>
780where
781    I: IntoIterator<Item = DataFrame>,
782{
783    let mut iter = dfs.into_iter();
784    let additional = iter.size_hint().0;
785    let mut acc_df = iter.next()?;
786    acc_df.reserve_chunks(additional);
787
788    for df in iter {
789        if acc_df.width() != df.width() {
790            panic!("{}", width_mismatch(&acc_df, &df));
791        }
792
793        acc_df.vstack_mut_owned_unchecked(df);
794    }
795    Some(acc_df)
796}
797
798/// This takes ownership of the DataFrame so that drop is called earlier.
799/// Does not check if schema is correct
800pub fn accumulate_dataframes_vertical_unchecked<I>(dfs: I) -> DataFrame
801where
802    I: IntoIterator<Item = DataFrame>,
803{
804    let mut iter = dfs.into_iter();
805    let additional = iter.size_hint().0;
806    let mut acc_df = iter.next().unwrap();
807    acc_df.reserve_chunks(additional);
808
809    for df in iter {
810        if acc_df.width() != df.width() {
811            panic!("{}", width_mismatch(&acc_df, &df));
812        }
813
814        acc_df.vstack_mut_owned_unchecked(df);
815    }
816    acc_df
817}
818
819/// This takes ownership of the DataFrame so that drop is called earlier.
820/// # Panics
821/// Panics if `dfs` is empty.
822pub fn accumulate_dataframes_vertical<I>(dfs: I) -> PolarsResult<DataFrame>
823where
824    I: IntoIterator<Item = DataFrame>,
825{
826    let mut iter = dfs.into_iter();
827    let additional = iter.size_hint().0;
828    let mut acc_df = iter.next().unwrap();
829    acc_df.reserve_chunks(additional);
830    for df in iter {
831        if acc_df.width() != df.width() {
832            return Err(width_mismatch(&acc_df, &df));
833        }
834
835        acc_df.vstack_mut_owned(df)?;
836    }
837
838    Ok(acc_df)
839}
840
841/// Concat the DataFrames to a single DataFrame.
842pub fn concat_df<'a, I>(dfs: I) -> PolarsResult<DataFrame>
843where
844    I: IntoIterator<Item = &'a DataFrame>,
845{
846    let mut iter = dfs.into_iter();
847    let additional = iter.size_hint().0;
848    let mut acc_df = iter.next().unwrap().clone();
849    acc_df.reserve_chunks(additional);
850    for df in iter {
851        acc_df.vstack_mut(df)?;
852    }
853    Ok(acc_df)
854}
855
856/// Concat the DataFrames to a single DataFrame.
857pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFrame
858where
859    I: IntoIterator<Item = &'a DataFrame>,
860{
861    let mut iter = dfs.into_iter();
862    let additional = iter.size_hint().0;
863    let mut acc_df = iter.next().unwrap().clone();
864    acc_df.reserve_chunks(additional);
865    for df in iter {
866        acc_df.vstack_mut_unchecked(df);
867    }
868    acc_df
869}
870
871pub fn accumulate_dataframes_horizontal(dfs: Vec<DataFrame>) -> PolarsResult<DataFrame> {
872    let mut iter = dfs.into_iter();
873    let mut acc_df = iter.next().unwrap();
874    for df in iter {
875        acc_df.hstack_mut(df.get_columns())?;
876    }
877    Ok(acc_df)
878}
879
880/// Ensure the chunks in both ChunkedArrays have the same length.
881/// # Panics
882/// This will panic if `left.len() != right.len()` and array is chunked.
883pub fn align_chunks_binary<'a, T, B>(
884    left: &'a ChunkedArray<T>,
885    right: &'a ChunkedArray<B>,
886) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<B>>)
887where
888    B: PolarsDataType,
889    T: PolarsDataType,
890{
891    let assert = || {
892        assert_eq!(
893            left.len(),
894            right.len(),
895            "expected arrays of the same length"
896        )
897    };
898    match (left.chunks.len(), right.chunks.len()) {
899        // All chunks are equal length
900        (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
901        // All chunks are equal length
902        (a, b)
903            if a == b
904                && left
905                    .chunk_lengths()
906                    .zip(right.chunk_lengths())
907                    .all(|(l, r)| l == r) =>
908        {
909            (Cow::Borrowed(left), Cow::Borrowed(right))
910        },
911        (_, 1) => {
912            assert();
913            (
914                Cow::Borrowed(left),
915                Cow::Owned(right.match_chunks(left.chunk_lengths())),
916            )
917        },
918        (1, _) => {
919            assert();
920            (
921                Cow::Owned(left.match_chunks(right.chunk_lengths())),
922                Cow::Borrowed(right),
923            )
924        },
925        (_, _) => {
926            assert();
927            // could optimize to choose to rechunk a primitive and not a string or list type
928            let left = left.rechunk();
929            (
930                Cow::Owned(left.match_chunks(right.chunk_lengths())),
931                Cow::Borrowed(right),
932            )
933        },
934    }
935}
936
937/// Ensure the chunks in ChunkedArray and Series have the same length.
938/// # Panics
939/// This will panic if `left.len() != right.len()` and array is chunked.
940pub fn align_chunks_binary_ca_series<'a, T>(
941    left: &'a ChunkedArray<T>,
942    right: &'a Series,
943) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, Series>)
944where
945    T: PolarsDataType,
946{
947    let assert = || {
948        assert_eq!(
949            left.len(),
950            right.len(),
951            "expected arrays of the same length"
952        )
953    };
954    match (left.chunks.len(), right.chunks().len()) {
955        // All chunks are equal length
956        (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
957        // All chunks are equal length
958        (a, b)
959            if a == b
960                && left
961                    .chunk_lengths()
962                    .zip(right.chunk_lengths())
963                    .all(|(l, r)| l == r) =>
964        {
965            assert();
966            (Cow::Borrowed(left), Cow::Borrowed(right))
967        },
968        (_, 1) => (left.rechunk(), Cow::Borrowed(right)),
969        (1, _) => (Cow::Borrowed(left), Cow::Owned(right.rechunk())),
970        (_, _) => {
971            assert();
972            (left.rechunk(), Cow::Owned(right.rechunk()))
973        },
974    }
975}
976
977#[cfg(feature = "performant")]
978pub(crate) fn align_chunks_binary_owned_series(left: Series, right: Series) -> (Series, Series) {
979    match (left.chunks().len(), right.chunks().len()) {
980        (1, 1) => (left, right),
981        // All chunks are equal length
982        (a, b)
983            if a == b
984                && left
985                    .chunk_lengths()
986                    .zip(right.chunk_lengths())
987                    .all(|(l, r)| l == r) =>
988        {
989            (left, right)
990        },
991        (_, 1) => (left.rechunk(), right),
992        (1, _) => (left, right.rechunk()),
993        (_, _) => (left.rechunk(), right.rechunk()),
994    }
995}
996
997pub(crate) fn align_chunks_binary_owned<T, B>(
998    left: ChunkedArray<T>,
999    right: ChunkedArray<B>,
1000) -> (ChunkedArray<T>, ChunkedArray<B>)
1001where
1002    B: PolarsDataType,
1003    T: PolarsDataType,
1004{
1005    match (left.chunks.len(), right.chunks.len()) {
1006        (1, 1) => (left, right),
1007        // All chunks are equal length
1008        (a, b)
1009            if a == b
1010                && left
1011                    .chunk_lengths()
1012                    .zip(right.chunk_lengths())
1013                    .all(|(l, r)| l == r) =>
1014        {
1015            (left, right)
1016        },
1017        (_, 1) => (left.rechunk().into_owned(), right),
1018        (1, _) => (left, right.rechunk().into_owned()),
1019        (_, _) => (left.rechunk().into_owned(), right.rechunk().into_owned()),
1020    }
1021}
1022
1023/// # Panics
1024/// This will panic if `a.len() != b.len() || b.len() != c.len()` and array is chunked.
1025#[allow(clippy::type_complexity)]
1026pub fn align_chunks_ternary<'a, A, B, C>(
1027    a: &'a ChunkedArray<A>,
1028    b: &'a ChunkedArray<B>,
1029    c: &'a ChunkedArray<C>,
1030) -> (
1031    Cow<'a, ChunkedArray<A>>,
1032    Cow<'a, ChunkedArray<B>>,
1033    Cow<'a, ChunkedArray<C>>,
1034)
1035where
1036    A: PolarsDataType,
1037    B: PolarsDataType,
1038    C: PolarsDataType,
1039{
1040    if a.chunks.len() == 1 && b.chunks.len() == 1 && c.chunks.len() == 1 {
1041        return (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c));
1042    }
1043
1044    assert!(
1045        a.len() == b.len() && b.len() == c.len(),
1046        "expected arrays of the same length"
1047    );
1048
1049    match (a.chunks.len(), b.chunks.len(), c.chunks.len()) {
1050        (_, 1, 1) => (
1051            Cow::Borrowed(a),
1052            Cow::Owned(b.match_chunks(a.chunk_lengths())),
1053            Cow::Owned(c.match_chunks(a.chunk_lengths())),
1054        ),
1055        (1, 1, _) => (
1056            Cow::Owned(a.match_chunks(c.chunk_lengths())),
1057            Cow::Owned(b.match_chunks(c.chunk_lengths())),
1058            Cow::Borrowed(c),
1059        ),
1060        (1, _, 1) => (
1061            Cow::Owned(a.match_chunks(b.chunk_lengths())),
1062            Cow::Borrowed(b),
1063            Cow::Owned(c.match_chunks(b.chunk_lengths())),
1064        ),
1065        (1, _, _) => {
1066            let b = b.rechunk();
1067            (
1068                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1069                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1070                Cow::Borrowed(c),
1071            )
1072        },
1073        (_, 1, _) => {
1074            let a = a.rechunk();
1075            (
1076                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1077                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1078                Cow::Borrowed(c),
1079            )
1080        },
1081        (_, _, 1) => {
1082            let b = b.rechunk();
1083            (
1084                Cow::Borrowed(a),
1085                Cow::Owned(b.match_chunks(a.chunk_lengths())),
1086                Cow::Owned(c.match_chunks(a.chunk_lengths())),
1087            )
1088        },
1089        (len_a, len_b, len_c)
1090            if len_a == len_b
1091                && len_b == len_c
1092                && a.chunk_lengths()
1093                    .zip(b.chunk_lengths())
1094                    .zip(c.chunk_lengths())
1095                    .all(|((a, b), c)| a == b && b == c) =>
1096        {
1097            (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c))
1098        },
1099        _ => {
1100            // could optimize to choose to rechunk a primitive and not a string or list type
1101            let a = a.rechunk();
1102            let b = b.rechunk();
1103            (
1104                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1105                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1106                Cow::Borrowed(c),
1107            )
1108        },
1109    }
1110}
1111
1112pub fn binary_concatenate_validities<'a, T, B>(
1113    left: &'a ChunkedArray<T>,
1114    right: &'a ChunkedArray<B>,
1115) -> Option<Bitmap>
1116where
1117    B: PolarsDataType,
1118    T: PolarsDataType,
1119{
1120    let (left, right) = align_chunks_binary(left, right);
1121    let left_validity = concatenate_validities(left.chunks());
1122    let right_validity = concatenate_validities(right.chunks());
1123    combine_validities_and(left_validity.as_ref(), right_validity.as_ref())
1124}
1125
1126/// Convenience for `x.into_iter().map(Into::into).collect()` using an `into_vec()` function.
1127pub trait IntoVec<T> {
1128    fn into_vec(self) -> Vec<T>;
1129}
1130
1131impl<I, S> IntoVec<PlSmallStr> for I
1132where
1133    I: IntoIterator<Item = S>,
1134    S: Into<PlSmallStr>,
1135{
1136    fn into_vec(self) -> Vec<PlSmallStr> {
1137        self.into_iter().map(|s| s.into()).collect()
1138    }
1139}
1140
1141/// This logic is same as the impl on ChunkedArray
1142/// The difference is that there is less indirection because the caller should preallocate
1143/// `chunk_lens` once. On the `ChunkedArray` we indirect through an `ArrayRef` which is an indirection
1144/// and a vtable.
1145#[inline]
1146pub(crate) fn index_to_chunked_index<
1147    I: Iterator<Item = Idx>,
1148    Idx: PartialOrd + std::ops::AddAssign + std::ops::SubAssign + Zero + One,
1149>(
1150    chunk_lens: I,
1151    index: Idx,
1152) -> (Idx, Idx) {
1153    let mut index_remainder = index;
1154    let mut current_chunk_idx = Zero::zero();
1155
1156    for chunk_len in chunk_lens {
1157        if chunk_len > index_remainder {
1158            break;
1159        } else {
1160            index_remainder -= chunk_len;
1161            current_chunk_idx += One::one();
1162        }
1163    }
1164    (current_chunk_idx, index_remainder)
1165}
1166
1167pub(crate) fn index_to_chunked_index_rev<
1168    I: Iterator<Item = Idx>,
1169    Idx: PartialOrd
1170        + std::ops::AddAssign
1171        + std::ops::SubAssign
1172        + std::ops::Sub<Output = Idx>
1173        + Zero
1174        + One
1175        + Copy
1176        + std::fmt::Debug,
1177>(
1178    chunk_lens_rev: I,
1179    index_from_back: Idx,
1180    total_chunks: Idx,
1181) -> (Idx, Idx) {
1182    debug_assert!(index_from_back > Zero::zero(), "at least -1");
1183    let mut index_remainder = index_from_back;
1184    let mut current_chunk_idx = One::one();
1185    let mut current_chunk_len = Zero::zero();
1186
1187    for chunk_len in chunk_lens_rev {
1188        current_chunk_len = chunk_len;
1189        if chunk_len >= index_remainder {
1190            break;
1191        } else {
1192            index_remainder -= chunk_len;
1193            current_chunk_idx += One::one();
1194        }
1195    }
1196    (
1197        total_chunks - current_chunk_idx,
1198        current_chunk_len - index_remainder,
1199    )
1200}
1201
1202pub fn first_non_null<'a, I>(iter: I) -> Option<usize>
1203where
1204    I: Iterator<Item = Option<&'a Bitmap>>,
1205{
1206    let mut offset = 0;
1207    for validity in iter {
1208        if let Some(mask) = validity {
1209            let len_mask = mask.len();
1210            let n = mask.leading_zeros();
1211            if n < len_mask {
1212                return Some(offset + n);
1213            }
1214            offset += len_mask
1215        } else {
1216            return Some(offset);
1217        }
1218    }
1219    None
1220}
1221
1222pub fn last_non_null<'a, I>(iter: I, len: usize) -> Option<usize>
1223where
1224    I: DoubleEndedIterator<Item = Option<&'a Bitmap>>,
1225{
1226    if len == 0 {
1227        return None;
1228    }
1229    let mut offset = 0;
1230    for validity in iter.rev() {
1231        if let Some(mask) = validity {
1232            let len_mask = mask.len();
1233            let n = mask.trailing_zeros();
1234            if n < len_mask {
1235                return Some(len - offset - n - 1);
1236            }
1237            offset += len_mask;
1238        } else {
1239            return Some(len - offset - 1);
1240        }
1241    }
1242    None
1243}
1244
1245/// ensure that nulls are propagated to both arrays
1246pub fn coalesce_nulls<'a, T: PolarsDataType>(
1247    a: &'a ChunkedArray<T>,
1248    b: &'a ChunkedArray<T>,
1249) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<T>>) {
1250    if a.null_count() > 0 || b.null_count() > 0 {
1251        let (a, b) = align_chunks_binary(a, b);
1252        let mut b = b.into_owned();
1253        let a = a.coalesce_nulls(b.chunks());
1254
1255        for arr in a.chunks().iter() {
1256            for arr_b in unsafe { b.chunks_mut() } {
1257                *arr_b = arr_b.with_validity(arr.validity().cloned())
1258            }
1259        }
1260        b.compute_len();
1261        (Cow::Owned(a), Cow::Owned(b))
1262    } else {
1263        (Cow::Borrowed(a), Cow::Borrowed(b))
1264    }
1265}
1266
1267pub fn coalesce_nulls_columns(a: &Column, b: &Column) -> (Column, Column) {
1268    if a.null_count() > 0 || b.null_count() > 0 {
1269        let mut a = a.as_materialized_series().rechunk();
1270        let mut b = b.as_materialized_series().rechunk();
1271        for (arr_a, arr_b) in unsafe { a.chunks_mut().iter_mut().zip(b.chunks_mut()) } {
1272            let validity = match (arr_a.validity(), arr_b.validity()) {
1273                (None, Some(b)) => Some(b.clone()),
1274                (Some(a), Some(b)) => Some(a & b),
1275                (Some(a), None) => Some(a.clone()),
1276                (None, None) => None,
1277            };
1278            *arr_a = arr_a.with_validity(validity.clone());
1279            *arr_b = arr_b.with_validity(validity);
1280        }
1281        a.compute_len();
1282        b.compute_len();
1283        (a.into(), b.into())
1284    } else {
1285        (a.clone(), b.clone())
1286    }
1287}
1288
1289pub fn operation_exceeded_idxsize_msg(operation: &str) -> String {
1290    if size_of::<IdxSize>() == size_of::<u32>() {
1291        format!(
1292            "{} exceeded the maximum supported limit of {} rows. Consider installing 'polars-u64-idx'.",
1293            operation,
1294            IdxSize::MAX,
1295        )
1296    } else {
1297        format!(
1298            "{} exceeded the maximum supported limit of {} rows.",
1299            operation,
1300            IdxSize::MAX,
1301        )
1302    }
1303}
1304
1305#[cfg(test)]
1306mod test {
1307    use super::*;
1308
1309    #[test]
1310    fn test_split() {
1311        let ca: Int32Chunked = (0..10).collect_ca("a".into());
1312
1313        let out = split(&ca, 3);
1314        assert_eq!(out[0].len(), 3);
1315        assert_eq!(out[1].len(), 3);
1316        assert_eq!(out[2].len(), 4);
1317    }
1318
1319    #[test]
1320    fn test_align_chunks() -> PolarsResult<()> {
1321        let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1322        let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1323        let b2 = Int32Chunked::new(PlSmallStr::EMPTY, &[2, 3, 4]);
1324
1325        b.append(&b2)?;
1326        let (a, b) = align_chunks_binary(&a, &b);
1327        assert_eq!(
1328            a.chunk_lengths().collect::<Vec<_>>(),
1329            b.chunk_lengths().collect::<Vec<_>>()
1330        );
1331
1332        let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1333        let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1334        let b1 = b.clone();
1335        b.append(&b1)?;
1336        b.append(&b1)?;
1337        b.append(&b1)?;
1338        let (a, b) = align_chunks_binary(&a, &b);
1339        assert_eq!(
1340            a.chunk_lengths().collect::<Vec<_>>(),
1341            b.chunk_lengths().collect::<Vec<_>>()
1342        );
1343
1344        Ok(())
1345    }
1346}
polars_core/utils/mod.rs

polars_core/utils/
mod.rs