1mod any_value;
2use arrow::compute::concatenate::concatenate_validities;
3use arrow::compute::utils::combine_validities_and;
4pub mod flatten;
5pub(crate) mod series;
6mod supertype;
7use std::borrow::Cow;
8use std::ops::{Deref, DerefMut};
9mod schema;
10
11pub use any_value::*;
12use arrow::bitmap::Bitmap;
13pub use arrow::legacy::utils::*;
14pub use arrow::trusted_len::TrustMyLength;
15use flatten::*;
16use num_traits::{One, Zero};
17use rayon::prelude::*;
18pub use schema::*;
19pub use series::*;
20pub use supertype::*;
21pub use {arrow, rayon};
22
23use crate::POOL;
24use crate::prelude::*;
25
26#[repr(transparent)]
27pub struct Wrap<T>(pub T);
28
29impl<T> Deref for Wrap<T> {
30 type Target = T;
31 fn deref(&self) -> &Self::Target {
32 &self.0
33 }
34}
35
36#[inline(always)]
37pub fn _set_partition_size() -> usize {
38 POOL.current_num_threads()
39}
40
41pub struct NoNull<T> {
48 inner: T,
49}
50
51impl<T> NoNull<T> {
52 pub fn new(inner: T) -> Self {
53 NoNull { inner }
54 }
55
56 pub fn into_inner(self) -> T {
57 self.inner
58 }
59}
60
61impl<T> Deref for NoNull<T> {
62 type Target = T;
63
64 fn deref(&self) -> &Self::Target {
65 &self.inner
66 }
67}
68
69impl<T> DerefMut for NoNull<T> {
70 fn deref_mut(&mut self) -> &mut Self::Target {
71 &mut self.inner
72 }
73}
74
75pub(crate) fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
76 match iter.size_hint() {
77 (_lower, Some(upper)) => upper,
78 (0, None) => 1024,
79 (lower, None) => lower,
80 }
81}
82
83#[doc(hidden)]
86pub fn _split_offsets(len: usize, n: usize) -> Vec<(usize, usize)> {
87 if n == 1 {
88 vec![(0, len)]
89 } else {
90 let chunk_size = len / n;
91
92 (0..n)
93 .map(|partition| {
94 let offset = partition * chunk_size;
95 let len = if partition == (n - 1) {
96 len - offset
97 } else {
98 chunk_size
99 };
100 (partition * chunk_size, len)
101 })
102 .collect_trusted()
103 }
104}
105
106#[allow(clippy::len_without_is_empty)]
107pub trait Container: Clone {
108 fn slice(&self, offset: i64, len: usize) -> Self;
109
110 fn split_at(&self, offset: i64) -> (Self, Self);
111
112 fn len(&self) -> usize;
113
114 fn iter_chunks(&self) -> impl Iterator<Item = Self>;
115
116 fn should_rechunk(&self) -> bool;
117
118 fn n_chunks(&self) -> usize;
119
120 fn chunk_lengths(&self) -> impl Iterator<Item = usize>;
121}
122
123impl Container for DataFrame {
124 fn slice(&self, offset: i64, len: usize) -> Self {
125 DataFrame::slice(self, offset, len)
126 }
127
128 fn split_at(&self, offset: i64) -> (Self, Self) {
129 DataFrame::split_at(self, offset)
130 }
131
132 fn len(&self) -> usize {
133 self.height()
134 }
135
136 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
137 flatten_df_iter(self)
138 }
139
140 fn should_rechunk(&self) -> bool {
141 self.should_rechunk()
142 }
143
144 fn n_chunks(&self) -> usize {
145 DataFrame::first_col_n_chunks(self)
146 }
147
148 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
149 self.columns[0].as_materialized_series().chunk_lengths()
151 }
152}
153
154impl<T: PolarsDataType> Container for ChunkedArray<T> {
155 fn slice(&self, offset: i64, len: usize) -> Self {
156 ChunkedArray::slice(self, offset, len)
157 }
158
159 fn split_at(&self, offset: i64) -> (Self, Self) {
160 ChunkedArray::split_at(self, offset)
161 }
162
163 fn len(&self) -> usize {
164 ChunkedArray::len(self)
165 }
166
167 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
168 self.downcast_iter()
169 .map(|arr| Self::with_chunk(self.name().clone(), arr.clone()))
170 }
171
172 fn should_rechunk(&self) -> bool {
173 false
174 }
175
176 fn n_chunks(&self) -> usize {
177 self.chunks().len()
178 }
179
180 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
181 ChunkedArray::chunk_lengths(self)
182 }
183}
184
185impl Container for Series {
186 fn slice(&self, offset: i64, len: usize) -> Self {
187 self.0.slice(offset, len)
188 }
189
190 fn split_at(&self, offset: i64) -> (Self, Self) {
191 self.0.split_at(offset)
192 }
193
194 fn len(&self) -> usize {
195 self.0.len()
196 }
197
198 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
199 (0..self.0.n_chunks()).map(|i| self.select_chunk(i))
200 }
201
202 fn should_rechunk(&self) -> bool {
203 false
204 }
205
206 fn n_chunks(&self) -> usize {
207 self.chunks().len()
208 }
209
210 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
211 self.0.chunk_lengths()
212 }
213}
214
215fn split_impl<C: Container>(container: &C, target: usize, chunk_size: usize) -> Vec<C> {
216 if target == 1 {
217 return vec![container.clone()];
218 }
219 let mut out = Vec::with_capacity(target);
220 let chunk_size = chunk_size as i64;
221
222 let (chunk, mut remainder) = container.split_at(chunk_size);
224 out.push(chunk);
225
226 for _ in 1..target - 1 {
228 let (a, b) = remainder.split_at(chunk_size);
229 out.push(a);
230 remainder = b
231 }
232 out.push(remainder);
234 out
235}
236
237pub fn split<C: Container>(container: &C, target: usize) -> Vec<C> {
239 let total_len = container.len();
240 if total_len == 0 {
241 return vec![container.clone()];
242 }
243
244 let chunk_size = std::cmp::max(total_len / target, 1);
245
246 if container.n_chunks() == target
247 && container
248 .chunk_lengths()
249 .all(|len| len.abs_diff(chunk_size) < 100)
250 && !container.should_rechunk()
252 {
253 return container.iter_chunks().collect();
254 }
255 split_impl(container, target, chunk_size)
256}
257
258pub fn split_and_flatten<C: Container>(container: &C, target: usize) -> Vec<C> {
261 let total_len = container.len();
262 if total_len == 0 {
263 return vec![container.clone()];
264 }
265
266 let chunk_size = std::cmp::max(total_len / target, 1);
267
268 if container.n_chunks() == target
269 && container
270 .chunk_lengths()
271 .all(|len| len.abs_diff(chunk_size) < 100)
272 && !container.should_rechunk()
274 {
275 return container.iter_chunks().collect();
276 }
277
278 if container.n_chunks() == 1 {
279 split_impl(container, target, chunk_size)
280 } else {
281 let mut out = Vec::with_capacity(target);
282 let chunks = container.iter_chunks();
283
284 'new_chunk: for mut chunk in chunks {
285 loop {
286 let h = chunk.len();
287 if h < chunk_size {
288 out.push(chunk);
290 continue 'new_chunk;
291 }
292
293 if ((h - chunk_size) as f64 / chunk_size as f64) < 0.3 {
295 out.push(chunk);
296 continue 'new_chunk;
297 }
298
299 let (a, b) = chunk.split_at(chunk_size as i64);
300 out.push(a);
301 chunk = b;
302 }
303 }
304 out
305 }
306}
307
308pub fn split_df_as_ref(df: &DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
314 if strict {
315 split(df, target)
316 } else {
317 split_and_flatten(df, target)
318 }
319}
320
321#[doc(hidden)]
322pub fn split_df(df: &mut DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
325 if target == 0 || df.is_empty() {
326 return vec![df.clone()];
327 }
328 df.align_chunks_par();
330 split_df_as_ref(df, target, strict)
331}
332
333pub fn slice_slice<T>(vals: &[T], offset: i64, len: usize) -> &[T] {
334 let (raw_offset, slice_len) = slice_offsets(offset, len, vals.len());
335 &vals[raw_offset..raw_offset + slice_len]
336}
337
338#[inline]
339#[doc(hidden)]
340pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, usize) {
341 let signed_start_offset = if offset < 0 {
342 offset.saturating_add_unsigned(array_len as u64)
343 } else {
344 offset
345 };
346 let signed_stop_offset = signed_start_offset.saturating_add_unsigned(length as u64);
347
348 let signed_array_len: i64 = array_len
349 .try_into()
350 .expect("array length larger than i64::MAX");
351 let clamped_start_offset = signed_start_offset.clamp(0, signed_array_len);
352 let clamped_stop_offset = signed_stop_offset.clamp(0, signed_array_len);
353
354 let slice_start_idx = clamped_start_offset as usize;
355 let slice_len = (clamped_stop_offset - clamped_start_offset) as usize;
356 (slice_start_idx, slice_len)
357}
358
359#[macro_export]
361macro_rules! match_dtype_to_physical_apply_macro {
362 ($obj:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
363 match $obj {
364 DataType::String => $macro_string!($($opt_args)*),
365 DataType::Boolean => $macro_bool!($($opt_args)*),
366 #[cfg(feature = "dtype-u8")]
367 DataType::UInt8 => $macro!(u8 $(, $opt_args)*),
368 #[cfg(feature = "dtype-u16")]
369 DataType::UInt16 => $macro!(u16 $(, $opt_args)*),
370 DataType::UInt32 => $macro!(u32 $(, $opt_args)*),
371 DataType::UInt64 => $macro!(u64 $(, $opt_args)*),
372 #[cfg(feature = "dtype-i8")]
373 DataType::Int8 => $macro!(i8 $(, $opt_args)*),
374 #[cfg(feature = "dtype-i16")]
375 DataType::Int16 => $macro!(i16 $(, $opt_args)*),
376 DataType::Int32 => $macro!(i32 $(, $opt_args)*),
377 DataType::Int64 => $macro!(i64 $(, $opt_args)*),
378 #[cfg(feature = "dtype-i128")]
379 DataType::Int128 => $macro!(i128 $(, $opt_args)*),
380 DataType::Float32 => $macro!(f32 $(, $opt_args)*),
381 DataType::Float64 => $macro!(f64 $(, $opt_args)*),
382 dt => panic!("not implemented for dtype {:?}", dt),
383 }
384 }};
385}
386
387#[macro_export]
389macro_rules! match_dtype_to_logical_apply_macro {
390 ($obj:expr, $macro:ident, $macro_string:ident, $macro_binary:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
391 match $obj {
392 DataType::String => $macro_string!($($opt_args)*),
393 DataType::Binary => $macro_binary!($($opt_args)*),
394 DataType::Boolean => $macro_bool!($($opt_args)*),
395 #[cfg(feature = "dtype-u8")]
396 DataType::UInt8 => $macro!(UInt8Type $(, $opt_args)*),
397 #[cfg(feature = "dtype-u16")]
398 DataType::UInt16 => $macro!(UInt16Type $(, $opt_args)*),
399 DataType::UInt32 => $macro!(UInt32Type $(, $opt_args)*),
400 DataType::UInt64 => $macro!(UInt64Type $(, $opt_args)*),
401 #[cfg(feature = "dtype-i8")]
402 DataType::Int8 => $macro!(Int8Type $(, $opt_args)*),
403 #[cfg(feature = "dtype-i16")]
404 DataType::Int16 => $macro!(Int16Type $(, $opt_args)*),
405 DataType::Int32 => $macro!(Int32Type $(, $opt_args)*),
406 DataType::Int64 => $macro!(Int64Type $(, $opt_args)*),
407 #[cfg(feature = "dtype-i128")]
408 DataType::Int128 => $macro!(Int128Type $(, $opt_args)*),
409 DataType::Float32 => $macro!(Float32Type $(, $opt_args)*),
410 DataType::Float64 => $macro!(Float64Type $(, $opt_args)*),
411 dt => panic!("not implemented for dtype {:?}", dt),
412 }
413 }};
414}
415
416#[macro_export]
418macro_rules! match_arrow_dtype_apply_macro_ca {
419 ($self:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
420 match $self.dtype() {
421 DataType::String => $macro_string!($self.str().unwrap() $(, $opt_args)*),
422 DataType::Boolean => $macro_bool!($self.bool().unwrap() $(, $opt_args)*),
423 #[cfg(feature = "dtype-u8")]
424 DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
425 #[cfg(feature = "dtype-u16")]
426 DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
427 DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
428 DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
429 #[cfg(feature = "dtype-i8")]
430 DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
431 #[cfg(feature = "dtype-i16")]
432 DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
433 DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
434 DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
435 #[cfg(feature = "dtype-i128")]
436 DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
437 DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
438 DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
439 dt => panic!("not implemented for dtype {:?}", dt),
440 }
441 }};
442}
443
444#[macro_export]
445macro_rules! with_match_physical_numeric_type {(
446 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
447) => ({
448 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
449 use $crate::datatypes::DataType::*;
450 match $dtype {
451 #[cfg(feature = "dtype-i8")]
452 Int8 => __with_ty__! { i8 },
453 #[cfg(feature = "dtype-i16")]
454 Int16 => __with_ty__! { i16 },
455 Int32 => __with_ty__! { i32 },
456 Int64 => __with_ty__! { i64 },
457 #[cfg(feature = "dtype-i128")]
458 Int128 => __with_ty__! { i128 },
459 #[cfg(feature = "dtype-u8")]
460 UInt8 => __with_ty__! { u8 },
461 #[cfg(feature = "dtype-u16")]
462 UInt16 => __with_ty__! { u16 },
463 UInt32 => __with_ty__! { u32 },
464 UInt64 => __with_ty__! { u64 },
465 Float32 => __with_ty__! { f32 },
466 Float64 => __with_ty__! { f64 },
467 dt => panic!("not implemented for dtype {:?}", dt),
468 }
469})}
470
471#[macro_export]
472macro_rules! with_match_physical_integer_type {(
473 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
474) => ({
475 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
476 use $crate::datatypes::DataType::*;
477 match $dtype {
478 #[cfg(feature = "dtype-i8")]
479 Int8 => __with_ty__! { i8 },
480 #[cfg(feature = "dtype-i16")]
481 Int16 => __with_ty__! { i16 },
482 Int32 => __with_ty__! { i32 },
483 Int64 => __with_ty__! { i64 },
484 #[cfg(feature = "dtype-i128")]
485 Int128 => __with_ty__! { i128 },
486 #[cfg(feature = "dtype-u8")]
487 UInt8 => __with_ty__! { u8 },
488 #[cfg(feature = "dtype-u16")]
489 UInt16 => __with_ty__! { u16 },
490 UInt32 => __with_ty__! { u32 },
491 UInt64 => __with_ty__! { u64 },
492 dt => panic!("not implemented for dtype {:?}", dt),
493 }
494})}
495
496#[macro_export]
497macro_rules! with_match_physical_float_type {(
498 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
499) => ({
500 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
501 use $crate::datatypes::DataType::*;
502 match $dtype {
503 Float32 => __with_ty__! { f32 },
504 Float64 => __with_ty__! { f64 },
505 dt => panic!("not implemented for dtype {:?}", dt),
506 }
507})}
508
509#[macro_export]
510macro_rules! with_match_physical_float_polars_type {(
511 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
512) => ({
513 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
514 use $crate::datatypes::DataType::*;
515 match $key_type {
516 Float32 => __with_ty__! { Float32Type },
517 Float64 => __with_ty__! { Float64Type },
518 dt => panic!("not implemented for dtype {:?}", dt),
519 }
520})}
521
522#[macro_export]
523macro_rules! with_match_physical_numeric_polars_type {(
524 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
525) => ({
526 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
527 use $crate::datatypes::DataType::*;
528 match $key_type {
529 #[cfg(feature = "dtype-i8")]
530 Int8 => __with_ty__! { Int8Type },
531 #[cfg(feature = "dtype-i16")]
532 Int16 => __with_ty__! { Int16Type },
533 Int32 => __with_ty__! { Int32Type },
534 Int64 => __with_ty__! { Int64Type },
535 #[cfg(feature = "dtype-i128")]
536 Int128 => __with_ty__! { Int128Type },
537 #[cfg(feature = "dtype-u8")]
538 UInt8 => __with_ty__! { UInt8Type },
539 #[cfg(feature = "dtype-u16")]
540 UInt16 => __with_ty__! { UInt16Type },
541 UInt32 => __with_ty__! { UInt32Type },
542 UInt64 => __with_ty__! { UInt64Type },
543 Float32 => __with_ty__! { Float32Type },
544 Float64 => __with_ty__! { Float64Type },
545 dt => panic!("not implemented for dtype {:?}", dt),
546 }
547})}
548
549#[macro_export]
550macro_rules! with_match_physical_integer_polars_type {(
551 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
552) => ({
553 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
554 use $crate::datatypes::DataType::*;
555 use $crate::datatypes::*;
556 match $key_type {
557 #[cfg(feature = "dtype-i8")]
558 Int8 => __with_ty__! { Int8Type },
559 #[cfg(feature = "dtype-i16")]
560 Int16 => __with_ty__! { Int16Type },
561 Int32 => __with_ty__! { Int32Type },
562 Int64 => __with_ty__! { Int64Type },
563 #[cfg(feature = "dtype-i128")]
564 Int128 => __with_ty__! { Int128Type },
565 #[cfg(feature = "dtype-u8")]
566 UInt8 => __with_ty__! { UInt8Type },
567 #[cfg(feature = "dtype-u16")]
568 UInt16 => __with_ty__! { UInt16Type },
569 UInt32 => __with_ty__! { UInt32Type },
570 UInt64 => __with_ty__! { UInt64Type },
571 dt => panic!("not implemented for dtype {:?}", dt),
572 }
573})}
574
575#[macro_export]
578macro_rules! downcast_as_macro_arg_physical {
579 ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
580 match $self.dtype() {
581 #[cfg(feature = "dtype-u8")]
582 DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
583 #[cfg(feature = "dtype-u16")]
584 DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
585 DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
586 DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
587 #[cfg(feature = "dtype-i8")]
588 DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
589 #[cfg(feature = "dtype-i16")]
590 DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
591 DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
592 DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
593 #[cfg(feature = "dtype-i128")]
594 DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
595 DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
596 DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
597 dt => panic!("not implemented for {:?}", dt),
598 }
599 }};
600}
601
602#[macro_export]
605macro_rules! downcast_as_macro_arg_physical_mut {
606 ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
607 match $self.dtype().clone() {
609 #[cfg(feature = "dtype-u8")]
610 DataType::UInt8 => {
611 let ca: &mut UInt8Chunked = $self.as_mut();
612 $macro!(UInt8Type, ca $(, $opt_args)*)
613 },
614 #[cfg(feature = "dtype-u16")]
615 DataType::UInt16 => {
616 let ca: &mut UInt16Chunked = $self.as_mut();
617 $macro!(UInt16Type, ca $(, $opt_args)*)
618 },
619 DataType::UInt32 => {
620 let ca: &mut UInt32Chunked = $self.as_mut();
621 $macro!(UInt32Type, ca $(, $opt_args)*)
622 },
623 DataType::UInt64 => {
624 let ca: &mut UInt64Chunked = $self.as_mut();
625 $macro!(UInt64Type, ca $(, $opt_args)*)
626 },
627 #[cfg(feature = "dtype-i8")]
628 DataType::Int8 => {
629 let ca: &mut Int8Chunked = $self.as_mut();
630 $macro!(Int8Type, ca $(, $opt_args)*)
631 },
632 #[cfg(feature = "dtype-i16")]
633 DataType::Int16 => {
634 let ca: &mut Int16Chunked = $self.as_mut();
635 $macro!(Int16Type, ca $(, $opt_args)*)
636 },
637 DataType::Int32 => {
638 let ca: &mut Int32Chunked = $self.as_mut();
639 $macro!(Int32Type, ca $(, $opt_args)*)
640 },
641 DataType::Int64 => {
642 let ca: &mut Int64Chunked = $self.as_mut();
643 $macro!(Int64Type, ca $(, $opt_args)*)
644 },
645 #[cfg(feature = "dtype-i128")]
646 DataType::Int128 => {
647 let ca: &mut Int128Chunked = $self.as_mut();
648 $macro!(Int128Type, ca $(, $opt_args)*)
649 },
650 DataType::Float32 => {
651 let ca: &mut Float32Chunked = $self.as_mut();
652 $macro!(Float32Type, ca $(, $opt_args)*)
653 },
654 DataType::Float64 => {
655 let ca: &mut Float64Chunked = $self.as_mut();
656 $macro!(Float64Type, ca $(, $opt_args)*)
657 },
658 dt => panic!("not implemented for {:?}", dt),
659 }
660 }};
661}
662
663#[macro_export]
664macro_rules! apply_method_all_arrow_series {
665 ($self:expr, $method:ident, $($args:expr),*) => {
666 match $self.dtype() {
667 DataType::Boolean => $self.bool().unwrap().$method($($args),*),
668 DataType::String => $self.str().unwrap().$method($($args),*),
669 #[cfg(feature = "dtype-u8")]
670 DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
671 #[cfg(feature = "dtype-u16")]
672 DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
673 DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
674 DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
675 #[cfg(feature = "dtype-i8")]
676 DataType::Int8 => $self.i8().unwrap().$method($($args),*),
677 #[cfg(feature = "dtype-i16")]
678 DataType::Int16 => $self.i16().unwrap().$method($($args),*),
679 DataType::Int32 => $self.i32().unwrap().$method($($args),*),
680 DataType::Int64 => $self.i64().unwrap().$method($($args),*),
681 #[cfg(feature = "dtype-i128")]
682 DataType::Int128 => $self.i128().unwrap().$method($($args),*),
683 DataType::Float32 => $self.f32().unwrap().$method($($args),*),
684 DataType::Float64 => $self.f64().unwrap().$method($($args),*),
685 DataType::Time => $self.time().unwrap().$method($($args),*),
686 DataType::Date => $self.date().unwrap().$method($($args),*),
687 DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
688 DataType::List(_) => $self.list().unwrap().$method($($args),*),
689 DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
690 dt => panic!("dtype {:?} not supported", dt)
691 }
692 }
693}
694
695#[macro_export]
696macro_rules! apply_method_physical_integer {
697 ($self:expr, $method:ident, $($args:expr),*) => {
698 match $self.dtype() {
699 #[cfg(feature = "dtype-u8")]
700 DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
701 #[cfg(feature = "dtype-u16")]
702 DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
703 DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
704 DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
705 #[cfg(feature = "dtype-i8")]
706 DataType::Int8 => $self.i8().unwrap().$method($($args),*),
707 #[cfg(feature = "dtype-i16")]
708 DataType::Int16 => $self.i16().unwrap().$method($($args),*),
709 DataType::Int32 => $self.i32().unwrap().$method($($args),*),
710 DataType::Int64 => $self.i64().unwrap().$method($($args),*),
711 #[cfg(feature = "dtype-i128")]
712 DataType::Int128 => $self.i128().unwrap().$method($($args),*),
713 dt => panic!("not implemented for dtype {:?}", dt),
714 }
715 }
716}
717
718#[macro_export]
720macro_rules! apply_method_physical_numeric {
721 ($self:expr, $method:ident, $($args:expr),*) => {
722 match $self.dtype() {
723 DataType::Float32 => $self.f32().unwrap().$method($($args),*),
724 DataType::Float64 => $self.f64().unwrap().$method($($args),*),
725 _ => apply_method_physical_integer!($self, $method, $($args),*),
726 }
727 }
728}
729
730#[macro_export]
731macro_rules! df {
732 ($($col_name:expr => $slice:expr), + $(,)?) => {
733 $crate::prelude::DataFrame::new(vec![
734 $($crate::prelude::Column::from(<$crate::prelude::Series as $crate::prelude::NamedFrom::<_, _>>::new($col_name.into(), $slice)),)+
735 ])
736 }
737}
738
739pub fn get_time_units(tu_l: &TimeUnit, tu_r: &TimeUnit) -> TimeUnit {
740 use crate::datatypes::time_unit::TimeUnit::*;
741 match (tu_l, tu_r) {
742 (Nanoseconds, Microseconds) => Microseconds,
743 (_, Milliseconds) => Milliseconds,
744 _ => *tu_l,
745 }
746}
747
748#[cold]
749#[inline(never)]
750fn width_mismatch(df1: &DataFrame, df2: &DataFrame) -> PolarsError {
751 let mut df1_extra = Vec::new();
752 let mut df2_extra = Vec::new();
753
754 let s1 = df1.schema();
755 let s2 = df2.schema();
756
757 s1.field_compare(s2, &mut df1_extra, &mut df2_extra);
758
759 let df1_extra = df1_extra
760 .into_iter()
761 .map(|(_, (n, _))| n.as_str())
762 .collect::<Vec<_>>()
763 .join(", ");
764 let df2_extra = df2_extra
765 .into_iter()
766 .map(|(_, (n, _))| n.as_str())
767 .collect::<Vec<_>>()
768 .join(", ");
769
770 polars_err!(
771 SchemaMismatch: r#"unable to vstack, dataframes have different widths ({} != {}).
772One dataframe has additional columns: [{df1_extra}].
773Other dataframe has additional columns: [{df2_extra}]."#,
774 df1.width(),
775 df2.width(),
776 )
777}
778
779pub fn accumulate_dataframes_vertical_unchecked_optional<I>(dfs: I) -> Option<DataFrame>
780where
781 I: IntoIterator<Item = DataFrame>,
782{
783 let mut iter = dfs.into_iter();
784 let additional = iter.size_hint().0;
785 let mut acc_df = iter.next()?;
786 acc_df.reserve_chunks(additional);
787
788 for df in iter {
789 if acc_df.width() != df.width() {
790 panic!("{}", width_mismatch(&acc_df, &df));
791 }
792
793 acc_df.vstack_mut_owned_unchecked(df);
794 }
795 Some(acc_df)
796}
797
798pub fn accumulate_dataframes_vertical_unchecked<I>(dfs: I) -> DataFrame
801where
802 I: IntoIterator<Item = DataFrame>,
803{
804 let mut iter = dfs.into_iter();
805 let additional = iter.size_hint().0;
806 let mut acc_df = iter.next().unwrap();
807 acc_df.reserve_chunks(additional);
808
809 for df in iter {
810 if acc_df.width() != df.width() {
811 panic!("{}", width_mismatch(&acc_df, &df));
812 }
813
814 acc_df.vstack_mut_owned_unchecked(df);
815 }
816 acc_df
817}
818
819pub fn accumulate_dataframes_vertical<I>(dfs: I) -> PolarsResult<DataFrame>
823where
824 I: IntoIterator<Item = DataFrame>,
825{
826 let mut iter = dfs.into_iter();
827 let additional = iter.size_hint().0;
828 let mut acc_df = iter.next().unwrap();
829 acc_df.reserve_chunks(additional);
830 for df in iter {
831 if acc_df.width() != df.width() {
832 return Err(width_mismatch(&acc_df, &df));
833 }
834
835 acc_df.vstack_mut_owned(df)?;
836 }
837
838 Ok(acc_df)
839}
840
841pub fn concat_df<'a, I>(dfs: I) -> PolarsResult<DataFrame>
843where
844 I: IntoIterator<Item = &'a DataFrame>,
845{
846 let mut iter = dfs.into_iter();
847 let additional = iter.size_hint().0;
848 let mut acc_df = iter.next().unwrap().clone();
849 acc_df.reserve_chunks(additional);
850 for df in iter {
851 acc_df.vstack_mut(df)?;
852 }
853 Ok(acc_df)
854}
855
856pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFrame
858where
859 I: IntoIterator<Item = &'a DataFrame>,
860{
861 let mut iter = dfs.into_iter();
862 let additional = iter.size_hint().0;
863 let mut acc_df = iter.next().unwrap().clone();
864 acc_df.reserve_chunks(additional);
865 for df in iter {
866 acc_df.vstack_mut_unchecked(df);
867 }
868 acc_df
869}
870
871pub fn accumulate_dataframes_horizontal(dfs: Vec<DataFrame>) -> PolarsResult<DataFrame> {
872 let mut iter = dfs.into_iter();
873 let mut acc_df = iter.next().unwrap();
874 for df in iter {
875 acc_df.hstack_mut(df.get_columns())?;
876 }
877 Ok(acc_df)
878}
879
880pub fn align_chunks_binary<'a, T, B>(
884 left: &'a ChunkedArray<T>,
885 right: &'a ChunkedArray<B>,
886) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<B>>)
887where
888 B: PolarsDataType,
889 T: PolarsDataType,
890{
891 let assert = || {
892 assert_eq!(
893 left.len(),
894 right.len(),
895 "expected arrays of the same length"
896 )
897 };
898 match (left.chunks.len(), right.chunks.len()) {
899 (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
901 (a, b)
903 if a == b
904 && left
905 .chunk_lengths()
906 .zip(right.chunk_lengths())
907 .all(|(l, r)| l == r) =>
908 {
909 (Cow::Borrowed(left), Cow::Borrowed(right))
910 },
911 (_, 1) => {
912 assert();
913 (
914 Cow::Borrowed(left),
915 Cow::Owned(right.match_chunks(left.chunk_lengths())),
916 )
917 },
918 (1, _) => {
919 assert();
920 (
921 Cow::Owned(left.match_chunks(right.chunk_lengths())),
922 Cow::Borrowed(right),
923 )
924 },
925 (_, _) => {
926 assert();
927 let left = left.rechunk();
929 (
930 Cow::Owned(left.match_chunks(right.chunk_lengths())),
931 Cow::Borrowed(right),
932 )
933 },
934 }
935}
936
937pub fn align_chunks_binary_ca_series<'a, T>(
941 left: &'a ChunkedArray<T>,
942 right: &'a Series,
943) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, Series>)
944where
945 T: PolarsDataType,
946{
947 let assert = || {
948 assert_eq!(
949 left.len(),
950 right.len(),
951 "expected arrays of the same length"
952 )
953 };
954 match (left.chunks.len(), right.chunks().len()) {
955 (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
957 (a, b)
959 if a == b
960 && left
961 .chunk_lengths()
962 .zip(right.chunk_lengths())
963 .all(|(l, r)| l == r) =>
964 {
965 assert();
966 (Cow::Borrowed(left), Cow::Borrowed(right))
967 },
968 (_, 1) => (left.rechunk(), Cow::Borrowed(right)),
969 (1, _) => (Cow::Borrowed(left), Cow::Owned(right.rechunk())),
970 (_, _) => {
971 assert();
972 (left.rechunk(), Cow::Owned(right.rechunk()))
973 },
974 }
975}
976
977#[cfg(feature = "performant")]
978pub(crate) fn align_chunks_binary_owned_series(left: Series, right: Series) -> (Series, Series) {
979 match (left.chunks().len(), right.chunks().len()) {
980 (1, 1) => (left, right),
981 (a, b)
983 if a == b
984 && left
985 .chunk_lengths()
986 .zip(right.chunk_lengths())
987 .all(|(l, r)| l == r) =>
988 {
989 (left, right)
990 },
991 (_, 1) => (left.rechunk(), right),
992 (1, _) => (left, right.rechunk()),
993 (_, _) => (left.rechunk(), right.rechunk()),
994 }
995}
996
997pub(crate) fn align_chunks_binary_owned<T, B>(
998 left: ChunkedArray<T>,
999 right: ChunkedArray<B>,
1000) -> (ChunkedArray<T>, ChunkedArray<B>)
1001where
1002 B: PolarsDataType,
1003 T: PolarsDataType,
1004{
1005 match (left.chunks.len(), right.chunks.len()) {
1006 (1, 1) => (left, right),
1007 (a, b)
1009 if a == b
1010 && left
1011 .chunk_lengths()
1012 .zip(right.chunk_lengths())
1013 .all(|(l, r)| l == r) =>
1014 {
1015 (left, right)
1016 },
1017 (_, 1) => (left.rechunk().into_owned(), right),
1018 (1, _) => (left, right.rechunk().into_owned()),
1019 (_, _) => (left.rechunk().into_owned(), right.rechunk().into_owned()),
1020 }
1021}
1022
1023#[allow(clippy::type_complexity)]
1026pub fn align_chunks_ternary<'a, A, B, C>(
1027 a: &'a ChunkedArray<A>,
1028 b: &'a ChunkedArray<B>,
1029 c: &'a ChunkedArray<C>,
1030) -> (
1031 Cow<'a, ChunkedArray<A>>,
1032 Cow<'a, ChunkedArray<B>>,
1033 Cow<'a, ChunkedArray<C>>,
1034)
1035where
1036 A: PolarsDataType,
1037 B: PolarsDataType,
1038 C: PolarsDataType,
1039{
1040 if a.chunks.len() == 1 && b.chunks.len() == 1 && c.chunks.len() == 1 {
1041 return (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c));
1042 }
1043
1044 assert!(
1045 a.len() == b.len() && b.len() == c.len(),
1046 "expected arrays of the same length"
1047 );
1048
1049 match (a.chunks.len(), b.chunks.len(), c.chunks.len()) {
1050 (_, 1, 1) => (
1051 Cow::Borrowed(a),
1052 Cow::Owned(b.match_chunks(a.chunk_lengths())),
1053 Cow::Owned(c.match_chunks(a.chunk_lengths())),
1054 ),
1055 (1, 1, _) => (
1056 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1057 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1058 Cow::Borrowed(c),
1059 ),
1060 (1, _, 1) => (
1061 Cow::Owned(a.match_chunks(b.chunk_lengths())),
1062 Cow::Borrowed(b),
1063 Cow::Owned(c.match_chunks(b.chunk_lengths())),
1064 ),
1065 (1, _, _) => {
1066 let b = b.rechunk();
1067 (
1068 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1069 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1070 Cow::Borrowed(c),
1071 )
1072 },
1073 (_, 1, _) => {
1074 let a = a.rechunk();
1075 (
1076 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1077 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1078 Cow::Borrowed(c),
1079 )
1080 },
1081 (_, _, 1) => {
1082 let b = b.rechunk();
1083 (
1084 Cow::Borrowed(a),
1085 Cow::Owned(b.match_chunks(a.chunk_lengths())),
1086 Cow::Owned(c.match_chunks(a.chunk_lengths())),
1087 )
1088 },
1089 (len_a, len_b, len_c)
1090 if len_a == len_b
1091 && len_b == len_c
1092 && a.chunk_lengths()
1093 .zip(b.chunk_lengths())
1094 .zip(c.chunk_lengths())
1095 .all(|((a, b), c)| a == b && b == c) =>
1096 {
1097 (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c))
1098 },
1099 _ => {
1100 let a = a.rechunk();
1102 let b = b.rechunk();
1103 (
1104 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1105 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1106 Cow::Borrowed(c),
1107 )
1108 },
1109 }
1110}
1111
1112pub fn binary_concatenate_validities<'a, T, B>(
1113 left: &'a ChunkedArray<T>,
1114 right: &'a ChunkedArray<B>,
1115) -> Option<Bitmap>
1116where
1117 B: PolarsDataType,
1118 T: PolarsDataType,
1119{
1120 let (left, right) = align_chunks_binary(left, right);
1121 let left_validity = concatenate_validities(left.chunks());
1122 let right_validity = concatenate_validities(right.chunks());
1123 combine_validities_and(left_validity.as_ref(), right_validity.as_ref())
1124}
1125
1126pub trait IntoVec<T> {
1128 fn into_vec(self) -> Vec<T>;
1129}
1130
1131impl<I, S> IntoVec<PlSmallStr> for I
1132where
1133 I: IntoIterator<Item = S>,
1134 S: Into<PlSmallStr>,
1135{
1136 fn into_vec(self) -> Vec<PlSmallStr> {
1137 self.into_iter().map(|s| s.into()).collect()
1138 }
1139}
1140
1141#[inline]
1146pub(crate) fn index_to_chunked_index<
1147 I: Iterator<Item = Idx>,
1148 Idx: PartialOrd + std::ops::AddAssign + std::ops::SubAssign + Zero + One,
1149>(
1150 chunk_lens: I,
1151 index: Idx,
1152) -> (Idx, Idx) {
1153 let mut index_remainder = index;
1154 let mut current_chunk_idx = Zero::zero();
1155
1156 for chunk_len in chunk_lens {
1157 if chunk_len > index_remainder {
1158 break;
1159 } else {
1160 index_remainder -= chunk_len;
1161 current_chunk_idx += One::one();
1162 }
1163 }
1164 (current_chunk_idx, index_remainder)
1165}
1166
1167pub(crate) fn index_to_chunked_index_rev<
1168 I: Iterator<Item = Idx>,
1169 Idx: PartialOrd
1170 + std::ops::AddAssign
1171 + std::ops::SubAssign
1172 + std::ops::Sub<Output = Idx>
1173 + Zero
1174 + One
1175 + Copy
1176 + std::fmt::Debug,
1177>(
1178 chunk_lens_rev: I,
1179 index_from_back: Idx,
1180 total_chunks: Idx,
1181) -> (Idx, Idx) {
1182 debug_assert!(index_from_back > Zero::zero(), "at least -1");
1183 let mut index_remainder = index_from_back;
1184 let mut current_chunk_idx = One::one();
1185 let mut current_chunk_len = Zero::zero();
1186
1187 for chunk_len in chunk_lens_rev {
1188 current_chunk_len = chunk_len;
1189 if chunk_len >= index_remainder {
1190 break;
1191 } else {
1192 index_remainder -= chunk_len;
1193 current_chunk_idx += One::one();
1194 }
1195 }
1196 (
1197 total_chunks - current_chunk_idx,
1198 current_chunk_len - index_remainder,
1199 )
1200}
1201
1202pub fn first_non_null<'a, I>(iter: I) -> Option<usize>
1203where
1204 I: Iterator<Item = Option<&'a Bitmap>>,
1205{
1206 let mut offset = 0;
1207 for validity in iter {
1208 if let Some(mask) = validity {
1209 let len_mask = mask.len();
1210 let n = mask.leading_zeros();
1211 if n < len_mask {
1212 return Some(offset + n);
1213 }
1214 offset += len_mask
1215 } else {
1216 return Some(offset);
1217 }
1218 }
1219 None
1220}
1221
1222pub fn last_non_null<'a, I>(iter: I, len: usize) -> Option<usize>
1223where
1224 I: DoubleEndedIterator<Item = Option<&'a Bitmap>>,
1225{
1226 if len == 0 {
1227 return None;
1228 }
1229 let mut offset = 0;
1230 for validity in iter.rev() {
1231 if let Some(mask) = validity {
1232 let len_mask = mask.len();
1233 let n = mask.trailing_zeros();
1234 if n < len_mask {
1235 return Some(len - offset - n - 1);
1236 }
1237 offset += len_mask;
1238 } else {
1239 return Some(len - offset - 1);
1240 }
1241 }
1242 None
1243}
1244
1245pub fn coalesce_nulls<'a, T: PolarsDataType>(
1247 a: &'a ChunkedArray<T>,
1248 b: &'a ChunkedArray<T>,
1249) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<T>>) {
1250 if a.null_count() > 0 || b.null_count() > 0 {
1251 let (a, b) = align_chunks_binary(a, b);
1252 let mut b = b.into_owned();
1253 let a = a.coalesce_nulls(b.chunks());
1254
1255 for arr in a.chunks().iter() {
1256 for arr_b in unsafe { b.chunks_mut() } {
1257 *arr_b = arr_b.with_validity(arr.validity().cloned())
1258 }
1259 }
1260 b.compute_len();
1261 (Cow::Owned(a), Cow::Owned(b))
1262 } else {
1263 (Cow::Borrowed(a), Cow::Borrowed(b))
1264 }
1265}
1266
1267pub fn coalesce_nulls_columns(a: &Column, b: &Column) -> (Column, Column) {
1268 if a.null_count() > 0 || b.null_count() > 0 {
1269 let mut a = a.as_materialized_series().rechunk();
1270 let mut b = b.as_materialized_series().rechunk();
1271 for (arr_a, arr_b) in unsafe { a.chunks_mut().iter_mut().zip(b.chunks_mut()) } {
1272 let validity = match (arr_a.validity(), arr_b.validity()) {
1273 (None, Some(b)) => Some(b.clone()),
1274 (Some(a), Some(b)) => Some(a & b),
1275 (Some(a), None) => Some(a.clone()),
1276 (None, None) => None,
1277 };
1278 *arr_a = arr_a.with_validity(validity.clone());
1279 *arr_b = arr_b.with_validity(validity);
1280 }
1281 a.compute_len();
1282 b.compute_len();
1283 (a.into(), b.into())
1284 } else {
1285 (a.clone(), b.clone())
1286 }
1287}
1288
1289pub fn operation_exceeded_idxsize_msg(operation: &str) -> String {
1290 if size_of::<IdxSize>() == size_of::<u32>() {
1291 format!(
1292 "{} exceeded the maximum supported limit of {} rows. Consider installing 'polars-u64-idx'.",
1293 operation,
1294 IdxSize::MAX,
1295 )
1296 } else {
1297 format!(
1298 "{} exceeded the maximum supported limit of {} rows.",
1299 operation,
1300 IdxSize::MAX,
1301 )
1302 }
1303}
1304
1305#[cfg(test)]
1306mod test {
1307 use super::*;
1308
1309 #[test]
1310 fn test_split() {
1311 let ca: Int32Chunked = (0..10).collect_ca("a".into());
1312
1313 let out = split(&ca, 3);
1314 assert_eq!(out[0].len(), 3);
1315 assert_eq!(out[1].len(), 3);
1316 assert_eq!(out[2].len(), 4);
1317 }
1318
1319 #[test]
1320 fn test_align_chunks() -> PolarsResult<()> {
1321 let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1322 let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1323 let b2 = Int32Chunked::new(PlSmallStr::EMPTY, &[2, 3, 4]);
1324
1325 b.append(&b2)?;
1326 let (a, b) = align_chunks_binary(&a, &b);
1327 assert_eq!(
1328 a.chunk_lengths().collect::<Vec<_>>(),
1329 b.chunk_lengths().collect::<Vec<_>>()
1330 );
1331
1332 let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1333 let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1334 let b1 = b.clone();
1335 b.append(&b1)?;
1336 b.append(&b1)?;
1337 b.append(&b1)?;
1338 let (a, b) = align_chunks_binary(&a, &b);
1339 assert_eq!(
1340 a.chunk_lengths().collect::<Vec<_>>(),
1341 b.chunk_lengths().collect::<Vec<_>>()
1342 );
1343
1344 Ok(())
1345 }
1346}