1mod any_value;
2use arrow::compute::concatenate::concatenate_validities;
3use arrow::compute::utils::combine_validities_and;
4pub mod flatten;
5pub(crate) mod series;
6mod supertype;
7use std::borrow::Cow;
8use std::ops::{Deref, DerefMut};
9mod schema;
10
11pub use any_value::*;
12use arrow::bitmap::Bitmap;
13use arrow::bitmap::bitmask::BitMask;
14pub use arrow::legacy::utils::*;
15pub use arrow::trusted_len::TrustMyLength;
16use flatten::*;
17use num_traits::{One, Zero};
18use rayon::prelude::*;
19pub use schema::*;
20pub use series::*;
21pub use supertype::*;
22pub use {arrow, rayon};
23
24use crate::POOL;
25use crate::prelude::*;
26
27#[repr(transparent)]
28pub struct Wrap<T>(pub T);
29
30impl<T> Deref for Wrap<T> {
31 type Target = T;
32 fn deref(&self) -> &Self::Target {
33 &self.0
34 }
35}
36
37#[inline(always)]
38pub fn _set_partition_size() -> usize {
39 POOL.current_num_threads()
40}
41
42pub struct NoNull<T> {
49 inner: T,
50}
51
52impl<T> NoNull<T> {
53 pub fn new(inner: T) -> Self {
54 NoNull { inner }
55 }
56
57 pub fn into_inner(self) -> T {
58 self.inner
59 }
60}
61
62impl<T> Deref for NoNull<T> {
63 type Target = T;
64
65 fn deref(&self) -> &Self::Target {
66 &self.inner
67 }
68}
69
70impl<T> DerefMut for NoNull<T> {
71 fn deref_mut(&mut self) -> &mut Self::Target {
72 &mut self.inner
73 }
74}
75
76pub(crate) fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
77 match iter.size_hint() {
78 (_lower, Some(upper)) => upper,
79 (0, None) => 1024,
80 (lower, None) => lower,
81 }
82}
83
84#[doc(hidden)]
87pub fn _split_offsets(len: usize, n: usize) -> Vec<(usize, usize)> {
88 if n == 1 {
89 vec![(0, len)]
90 } else {
91 let chunk_size = len / n;
92
93 (0..n)
94 .map(|partition| {
95 let offset = partition * chunk_size;
96 let len = if partition == (n - 1) {
97 len - offset
98 } else {
99 chunk_size
100 };
101 (partition * chunk_size, len)
102 })
103 .collect_trusted()
104 }
105}
106
107#[allow(clippy::len_without_is_empty)]
108pub trait Container: Clone {
109 fn slice(&self, offset: i64, len: usize) -> Self;
110
111 fn split_at(&self, offset: i64) -> (Self, Self);
112
113 fn len(&self) -> usize;
114
115 fn iter_chunks(&self) -> impl Iterator<Item = Self>;
116
117 fn should_rechunk(&self) -> bool;
118
119 fn n_chunks(&self) -> usize;
120
121 fn chunk_lengths(&self) -> impl Iterator<Item = usize>;
122}
123
124impl Container for DataFrame {
125 fn slice(&self, offset: i64, len: usize) -> Self {
126 DataFrame::slice(self, offset, len)
127 }
128
129 fn split_at(&self, offset: i64) -> (Self, Self) {
130 DataFrame::split_at(self, offset)
131 }
132
133 fn len(&self) -> usize {
134 self.height()
135 }
136
137 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
138 flatten_df_iter(self)
139 }
140
141 fn should_rechunk(&self) -> bool {
142 self.should_rechunk()
143 }
144
145 fn n_chunks(&self) -> usize {
146 DataFrame::first_col_n_chunks(self)
147 }
148
149 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
150 self.columns[0].as_materialized_series().chunk_lengths()
152 }
153}
154
155impl<T: PolarsDataType> Container for ChunkedArray<T> {
156 fn slice(&self, offset: i64, len: usize) -> Self {
157 ChunkedArray::slice(self, offset, len)
158 }
159
160 fn split_at(&self, offset: i64) -> (Self, Self) {
161 ChunkedArray::split_at(self, offset)
162 }
163
164 fn len(&self) -> usize {
165 ChunkedArray::len(self)
166 }
167
168 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
169 self.downcast_iter()
170 .map(|arr| Self::with_chunk(self.name().clone(), arr.clone()))
171 }
172
173 fn should_rechunk(&self) -> bool {
174 false
175 }
176
177 fn n_chunks(&self) -> usize {
178 self.chunks().len()
179 }
180
181 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
182 ChunkedArray::chunk_lengths(self)
183 }
184}
185
186impl Container for Series {
187 fn slice(&self, offset: i64, len: usize) -> Self {
188 self.0.slice(offset, len)
189 }
190
191 fn split_at(&self, offset: i64) -> (Self, Self) {
192 self.0.split_at(offset)
193 }
194
195 fn len(&self) -> usize {
196 self.0.len()
197 }
198
199 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
200 (0..self.0.n_chunks()).map(|i| self.select_chunk(i))
201 }
202
203 fn should_rechunk(&self) -> bool {
204 false
205 }
206
207 fn n_chunks(&self) -> usize {
208 self.chunks().len()
209 }
210
211 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
212 self.0.chunk_lengths()
213 }
214}
215
216fn split_impl<C: Container>(container: &C, target: usize, chunk_size: usize) -> Vec<C> {
217 if target == 1 {
218 return vec![container.clone()];
219 }
220 let mut out = Vec::with_capacity(target);
221 let chunk_size = chunk_size as i64;
222
223 let (chunk, mut remainder) = container.split_at(chunk_size);
225 out.push(chunk);
226
227 for _ in 1..target - 1 {
229 let (a, b) = remainder.split_at(chunk_size);
230 out.push(a);
231 remainder = b
232 }
233 out.push(remainder);
235 out
236}
237
238pub fn split<C: Container>(container: &C, target: usize) -> Vec<C> {
240 let total_len = container.len();
241 if total_len == 0 {
242 return vec![container.clone()];
243 }
244
245 let chunk_size = std::cmp::max(total_len / target, 1);
246
247 if container.n_chunks() == target
248 && container
249 .chunk_lengths()
250 .all(|len| len.abs_diff(chunk_size) < 100)
251 && !container.should_rechunk()
253 {
254 return container.iter_chunks().collect();
255 }
256 split_impl(container, target, chunk_size)
257}
258
259pub fn split_and_flatten<C: Container>(container: &C, target: usize) -> Vec<C> {
262 let total_len = container.len();
263 if total_len == 0 {
264 return vec![container.clone()];
265 }
266
267 let chunk_size = std::cmp::max(total_len / target, 1);
268
269 if container.n_chunks() == target
270 && container
271 .chunk_lengths()
272 .all(|len| len.abs_diff(chunk_size) < 100)
273 && !container.should_rechunk()
275 {
276 return container.iter_chunks().collect();
277 }
278
279 if container.n_chunks() == 1 {
280 split_impl(container, target, chunk_size)
281 } else {
282 let mut out = Vec::with_capacity(target);
283 let chunks = container.iter_chunks();
284
285 'new_chunk: for mut chunk in chunks {
286 loop {
287 let h = chunk.len();
288 if h < chunk_size {
289 out.push(chunk);
291 continue 'new_chunk;
292 }
293
294 if ((h - chunk_size) as f64 / chunk_size as f64) < 0.3 {
296 out.push(chunk);
297 continue 'new_chunk;
298 }
299
300 let (a, b) = chunk.split_at(chunk_size as i64);
301 out.push(a);
302 chunk = b;
303 }
304 }
305 out
306 }
307}
308
309pub fn split_df_as_ref(df: &DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
315 if strict {
316 split(df, target)
317 } else {
318 split_and_flatten(df, target)
319 }
320}
321
322#[doc(hidden)]
323pub fn split_df(df: &mut DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
326 if target == 0 || df.is_empty() {
327 return vec![df.clone()];
328 }
329 df.align_chunks_par();
331 split_df_as_ref(df, target, strict)
332}
333
334pub fn slice_slice<T>(vals: &[T], offset: i64, len: usize) -> &[T] {
335 let (raw_offset, slice_len) = slice_offsets(offset, len, vals.len());
336 &vals[raw_offset..raw_offset + slice_len]
337}
338
339#[inline]
340#[doc(hidden)]
341pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, usize) {
342 let signed_start_offset = if offset < 0 {
343 offset.saturating_add_unsigned(array_len as u64)
344 } else {
345 offset
346 };
347 let signed_stop_offset = signed_start_offset.saturating_add_unsigned(length as u64);
348
349 let signed_array_len: i64 = array_len
350 .try_into()
351 .expect("array length larger than i64::MAX");
352 let clamped_start_offset = signed_start_offset.clamp(0, signed_array_len);
353 let clamped_stop_offset = signed_stop_offset.clamp(0, signed_array_len);
354
355 let slice_start_idx = clamped_start_offset as usize;
356 let slice_len = (clamped_stop_offset - clamped_start_offset) as usize;
357 (slice_start_idx, slice_len)
358}
359
360#[macro_export]
362macro_rules! match_dtype_to_physical_apply_macro {
363 ($obj:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
364 match $obj {
365 DataType::String => $macro_string!($($opt_args)*),
366 DataType::Boolean => $macro_bool!($($opt_args)*),
367 #[cfg(feature = "dtype-u8")]
368 DataType::UInt8 => $macro!(u8 $(, $opt_args)*),
369 #[cfg(feature = "dtype-u16")]
370 DataType::UInt16 => $macro!(u16 $(, $opt_args)*),
371 DataType::UInt32 => $macro!(u32 $(, $opt_args)*),
372 DataType::UInt64 => $macro!(u64 $(, $opt_args)*),
373 #[cfg(feature = "dtype-i8")]
374 DataType::Int8 => $macro!(i8 $(, $opt_args)*),
375 #[cfg(feature = "dtype-i16")]
376 DataType::Int16 => $macro!(i16 $(, $opt_args)*),
377 DataType::Int32 => $macro!(i32 $(, $opt_args)*),
378 DataType::Int64 => $macro!(i64 $(, $opt_args)*),
379 #[cfg(feature = "dtype-i128")]
380 DataType::Int128 => $macro!(i128 $(, $opt_args)*),
381 DataType::Float32 => $macro!(f32 $(, $opt_args)*),
382 DataType::Float64 => $macro!(f64 $(, $opt_args)*),
383 dt => panic!("not implemented for dtype {:?}", dt),
384 }
385 }};
386}
387
388#[macro_export]
390macro_rules! match_dtype_to_logical_apply_macro {
391 ($obj:expr, $macro:ident, $macro_string:ident, $macro_binary:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
392 match $obj {
393 DataType::String => $macro_string!($($opt_args)*),
394 DataType::Binary => $macro_binary!($($opt_args)*),
395 DataType::Boolean => $macro_bool!($($opt_args)*),
396 #[cfg(feature = "dtype-u8")]
397 DataType::UInt8 => $macro!(UInt8Type $(, $opt_args)*),
398 #[cfg(feature = "dtype-u16")]
399 DataType::UInt16 => $macro!(UInt16Type $(, $opt_args)*),
400 DataType::UInt32 => $macro!(UInt32Type $(, $opt_args)*),
401 DataType::UInt64 => $macro!(UInt64Type $(, $opt_args)*),
402 #[cfg(feature = "dtype-i8")]
403 DataType::Int8 => $macro!(Int8Type $(, $opt_args)*),
404 #[cfg(feature = "dtype-i16")]
405 DataType::Int16 => $macro!(Int16Type $(, $opt_args)*),
406 DataType::Int32 => $macro!(Int32Type $(, $opt_args)*),
407 DataType::Int64 => $macro!(Int64Type $(, $opt_args)*),
408 #[cfg(feature = "dtype-i128")]
409 DataType::Int128 => $macro!(Int128Type $(, $opt_args)*),
410 DataType::Float32 => $macro!(Float32Type $(, $opt_args)*),
411 DataType::Float64 => $macro!(Float64Type $(, $opt_args)*),
412 dt => panic!("not implemented for dtype {:?}", dt),
413 }
414 }};
415}
416
417#[macro_export]
419macro_rules! match_arrow_dtype_apply_macro_ca {
420 ($self:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
421 match $self.dtype() {
422 DataType::String => $macro_string!($self.str().unwrap() $(, $opt_args)*),
423 DataType::Boolean => $macro_bool!($self.bool().unwrap() $(, $opt_args)*),
424 #[cfg(feature = "dtype-u8")]
425 DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
426 #[cfg(feature = "dtype-u16")]
427 DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
428 DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
429 DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
430 #[cfg(feature = "dtype-i8")]
431 DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
432 #[cfg(feature = "dtype-i16")]
433 DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
434 DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
435 DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
436 #[cfg(feature = "dtype-i128")]
437 DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
438 DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
439 DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
440 dt => panic!("not implemented for dtype {:?}", dt),
441 }
442 }};
443}
444
445#[macro_export]
446macro_rules! with_match_physical_numeric_type {(
447 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
448) => ({
449 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
450 use $crate::datatypes::DataType::*;
451 match $dtype {
452 #[cfg(feature = "dtype-i8")]
453 Int8 => __with_ty__! { i8 },
454 #[cfg(feature = "dtype-i16")]
455 Int16 => __with_ty__! { i16 },
456 Int32 => __with_ty__! { i32 },
457 Int64 => __with_ty__! { i64 },
458 #[cfg(feature = "dtype-i128")]
459 Int128 => __with_ty__! { i128 },
460 #[cfg(feature = "dtype-u8")]
461 UInt8 => __with_ty__! { u8 },
462 #[cfg(feature = "dtype-u16")]
463 UInt16 => __with_ty__! { u16 },
464 UInt32 => __with_ty__! { u32 },
465 UInt64 => __with_ty__! { u64 },
466 Float32 => __with_ty__! { f32 },
467 Float64 => __with_ty__! { f64 },
468 dt => panic!("not implemented for dtype {:?}", dt),
469 }
470})}
471
472#[macro_export]
473macro_rules! with_match_physical_integer_type {(
474 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
475) => ({
476 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
477 use $crate::datatypes::DataType::*;
478 match $dtype {
479 #[cfg(feature = "dtype-i8")]
480 Int8 => __with_ty__! { i8 },
481 #[cfg(feature = "dtype-i16")]
482 Int16 => __with_ty__! { i16 },
483 Int32 => __with_ty__! { i32 },
484 Int64 => __with_ty__! { i64 },
485 #[cfg(feature = "dtype-i128")]
486 Int128 => __with_ty__! { i128 },
487 #[cfg(feature = "dtype-u8")]
488 UInt8 => __with_ty__! { u8 },
489 #[cfg(feature = "dtype-u16")]
490 UInt16 => __with_ty__! { u16 },
491 UInt32 => __with_ty__! { u32 },
492 UInt64 => __with_ty__! { u64 },
493 dt => panic!("not implemented for dtype {:?}", dt),
494 }
495})}
496
497#[macro_export]
498macro_rules! with_match_physical_float_type {(
499 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
500) => ({
501 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
502 use $crate::datatypes::DataType::*;
503 match $dtype {
504 Float32 => __with_ty__! { f32 },
505 Float64 => __with_ty__! { f64 },
506 dt => panic!("not implemented for dtype {:?}", dt),
507 }
508})}
509
510#[macro_export]
511macro_rules! with_match_physical_float_polars_type {(
512 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
513) => ({
514 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
515 use $crate::datatypes::DataType::*;
516 match $key_type {
517 Float32 => __with_ty__! { Float32Type },
518 Float64 => __with_ty__! { Float64Type },
519 dt => panic!("not implemented for dtype {:?}", dt),
520 }
521})}
522
523#[macro_export]
524macro_rules! with_match_physical_numeric_polars_type {(
525 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
526) => ({
527 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
528 use $crate::datatypes::DataType::*;
529 match $key_type {
530 #[cfg(feature = "dtype-i8")]
531 Int8 => __with_ty__! { Int8Type },
532 #[cfg(feature = "dtype-i16")]
533 Int16 => __with_ty__! { Int16Type },
534 Int32 => __with_ty__! { Int32Type },
535 Int64 => __with_ty__! { Int64Type },
536 #[cfg(feature = "dtype-i128")]
537 Int128 => __with_ty__! { Int128Type },
538 #[cfg(feature = "dtype-u8")]
539 UInt8 => __with_ty__! { UInt8Type },
540 #[cfg(feature = "dtype-u16")]
541 UInt16 => __with_ty__! { UInt16Type },
542 UInt32 => __with_ty__! { UInt32Type },
543 UInt64 => __with_ty__! { UInt64Type },
544 Float32 => __with_ty__! { Float32Type },
545 Float64 => __with_ty__! { Float64Type },
546 dt => panic!("not implemented for dtype {:?}", dt),
547 }
548})}
549
550#[macro_export]
551macro_rules! with_match_physical_integer_polars_type {(
552 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
553) => ({
554 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
555 use $crate::datatypes::DataType::*;
556 use $crate::datatypes::*;
557 match $key_type {
558 #[cfg(feature = "dtype-i8")]
559 Int8 => __with_ty__! { Int8Type },
560 #[cfg(feature = "dtype-i16")]
561 Int16 => __with_ty__! { Int16Type },
562 Int32 => __with_ty__! { Int32Type },
563 Int64 => __with_ty__! { Int64Type },
564 #[cfg(feature = "dtype-i128")]
565 Int128 => __with_ty__! { Int128Type },
566 #[cfg(feature = "dtype-u8")]
567 UInt8 => __with_ty__! { UInt8Type },
568 #[cfg(feature = "dtype-u16")]
569 UInt16 => __with_ty__! { UInt16Type },
570 UInt32 => __with_ty__! { UInt32Type },
571 UInt64 => __with_ty__! { UInt64Type },
572 dt => panic!("not implemented for dtype {:?}", dt),
573 }
574})}
575
576#[macro_export]
579macro_rules! downcast_as_macro_arg_physical {
580 ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
581 match $self.dtype() {
582 #[cfg(feature = "dtype-u8")]
583 DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
584 #[cfg(feature = "dtype-u16")]
585 DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
586 DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
587 DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
588 #[cfg(feature = "dtype-i8")]
589 DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
590 #[cfg(feature = "dtype-i16")]
591 DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
592 DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
593 DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
594 #[cfg(feature = "dtype-i128")]
595 DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
596 DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
597 DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
598 dt => panic!("not implemented for {:?}", dt),
599 }
600 }};
601}
602
603#[macro_export]
606macro_rules! downcast_as_macro_arg_physical_mut {
607 ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
608 match $self.dtype().clone() {
610 #[cfg(feature = "dtype-u8")]
611 DataType::UInt8 => {
612 let ca: &mut UInt8Chunked = $self.as_mut();
613 $macro!(UInt8Type, ca $(, $opt_args)*)
614 },
615 #[cfg(feature = "dtype-u16")]
616 DataType::UInt16 => {
617 let ca: &mut UInt16Chunked = $self.as_mut();
618 $macro!(UInt16Type, ca $(, $opt_args)*)
619 },
620 DataType::UInt32 => {
621 let ca: &mut UInt32Chunked = $self.as_mut();
622 $macro!(UInt32Type, ca $(, $opt_args)*)
623 },
624 DataType::UInt64 => {
625 let ca: &mut UInt64Chunked = $self.as_mut();
626 $macro!(UInt64Type, ca $(, $opt_args)*)
627 },
628 #[cfg(feature = "dtype-i8")]
629 DataType::Int8 => {
630 let ca: &mut Int8Chunked = $self.as_mut();
631 $macro!(Int8Type, ca $(, $opt_args)*)
632 },
633 #[cfg(feature = "dtype-i16")]
634 DataType::Int16 => {
635 let ca: &mut Int16Chunked = $self.as_mut();
636 $macro!(Int16Type, ca $(, $opt_args)*)
637 },
638 DataType::Int32 => {
639 let ca: &mut Int32Chunked = $self.as_mut();
640 $macro!(Int32Type, ca $(, $opt_args)*)
641 },
642 DataType::Int64 => {
643 let ca: &mut Int64Chunked = $self.as_mut();
644 $macro!(Int64Type, ca $(, $opt_args)*)
645 },
646 #[cfg(feature = "dtype-i128")]
647 DataType::Int128 => {
648 let ca: &mut Int128Chunked = $self.as_mut();
649 $macro!(Int128Type, ca $(, $opt_args)*)
650 },
651 DataType::Float32 => {
652 let ca: &mut Float32Chunked = $self.as_mut();
653 $macro!(Float32Type, ca $(, $opt_args)*)
654 },
655 DataType::Float64 => {
656 let ca: &mut Float64Chunked = $self.as_mut();
657 $macro!(Float64Type, ca $(, $opt_args)*)
658 },
659 dt => panic!("not implemented for {:?}", dt),
660 }
661 }};
662}
663
664#[macro_export]
665macro_rules! apply_method_all_arrow_series {
666 ($self:expr, $method:ident, $($args:expr),*) => {
667 match $self.dtype() {
668 DataType::Boolean => $self.bool().unwrap().$method($($args),*),
669 DataType::String => $self.str().unwrap().$method($($args),*),
670 #[cfg(feature = "dtype-u8")]
671 DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
672 #[cfg(feature = "dtype-u16")]
673 DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
674 DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
675 DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
676 #[cfg(feature = "dtype-i8")]
677 DataType::Int8 => $self.i8().unwrap().$method($($args),*),
678 #[cfg(feature = "dtype-i16")]
679 DataType::Int16 => $self.i16().unwrap().$method($($args),*),
680 DataType::Int32 => $self.i32().unwrap().$method($($args),*),
681 DataType::Int64 => $self.i64().unwrap().$method($($args),*),
682 #[cfg(feature = "dtype-i128")]
683 DataType::Int128 => $self.i128().unwrap().$method($($args),*),
684 DataType::Float32 => $self.f32().unwrap().$method($($args),*),
685 DataType::Float64 => $self.f64().unwrap().$method($($args),*),
686 DataType::Time => $self.time().unwrap().$method($($args),*),
687 DataType::Date => $self.date().unwrap().$method($($args),*),
688 DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
689 DataType::List(_) => $self.list().unwrap().$method($($args),*),
690 DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
691 dt => panic!("dtype {:?} not supported", dt)
692 }
693 }
694}
695
696#[macro_export]
697macro_rules! apply_method_physical_integer {
698 ($self:expr, $method:ident, $($args:expr),*) => {
699 match $self.dtype() {
700 #[cfg(feature = "dtype-u8")]
701 DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
702 #[cfg(feature = "dtype-u16")]
703 DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
704 DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
705 DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
706 #[cfg(feature = "dtype-i8")]
707 DataType::Int8 => $self.i8().unwrap().$method($($args),*),
708 #[cfg(feature = "dtype-i16")]
709 DataType::Int16 => $self.i16().unwrap().$method($($args),*),
710 DataType::Int32 => $self.i32().unwrap().$method($($args),*),
711 DataType::Int64 => $self.i64().unwrap().$method($($args),*),
712 #[cfg(feature = "dtype-i128")]
713 DataType::Int128 => $self.i128().unwrap().$method($($args),*),
714 dt => panic!("not implemented for dtype {:?}", dt),
715 }
716 }
717}
718
719#[macro_export]
721macro_rules! apply_method_physical_numeric {
722 ($self:expr, $method:ident, $($args:expr),*) => {
723 match $self.dtype() {
724 DataType::Float32 => $self.f32().unwrap().$method($($args),*),
725 DataType::Float64 => $self.f64().unwrap().$method($($args),*),
726 _ => apply_method_physical_integer!($self, $method, $($args),*),
727 }
728 }
729}
730
731#[macro_export]
732macro_rules! df {
733 ($($col_name:expr => $slice:expr), + $(,)?) => {
734 $crate::prelude::DataFrame::new(vec![
735 $($crate::prelude::Column::from(<$crate::prelude::Series as $crate::prelude::NamedFrom::<_, _>>::new($col_name.into(), $slice)),)+
736 ])
737 }
738}
739
740pub fn get_time_units(tu_l: &TimeUnit, tu_r: &TimeUnit) -> TimeUnit {
741 use crate::datatypes::time_unit::TimeUnit::*;
742 match (tu_l, tu_r) {
743 (Nanoseconds, Microseconds) => Microseconds,
744 (_, Milliseconds) => Milliseconds,
745 _ => *tu_l,
746 }
747}
748
749#[cold]
750#[inline(never)]
751fn width_mismatch(df1: &DataFrame, df2: &DataFrame) -> PolarsError {
752 let mut df1_extra = Vec::new();
753 let mut df2_extra = Vec::new();
754
755 let s1 = df1.schema();
756 let s2 = df2.schema();
757
758 s1.field_compare(s2, &mut df1_extra, &mut df2_extra);
759
760 let df1_extra = df1_extra
761 .into_iter()
762 .map(|(_, (n, _))| n.as_str())
763 .collect::<Vec<_>>()
764 .join(", ");
765 let df2_extra = df2_extra
766 .into_iter()
767 .map(|(_, (n, _))| n.as_str())
768 .collect::<Vec<_>>()
769 .join(", ");
770
771 polars_err!(
772 SchemaMismatch: r#"unable to vstack, dataframes have different widths ({} != {}).
773One dataframe has additional columns: [{df1_extra}].
774Other dataframe has additional columns: [{df2_extra}]."#,
775 df1.width(),
776 df2.width(),
777 )
778}
779
780pub fn accumulate_dataframes_vertical_unchecked_optional<I>(dfs: I) -> Option<DataFrame>
781where
782 I: IntoIterator<Item = DataFrame>,
783{
784 let mut iter = dfs.into_iter();
785 let additional = iter.size_hint().0;
786 let mut acc_df = iter.next()?;
787 acc_df.reserve_chunks(additional);
788
789 for df in iter {
790 if acc_df.width() != df.width() {
791 panic!("{}", width_mismatch(&acc_df, &df));
792 }
793
794 acc_df.vstack_mut_owned_unchecked(df);
795 }
796 Some(acc_df)
797}
798
799pub fn accumulate_dataframes_vertical_unchecked<I>(dfs: I) -> DataFrame
802where
803 I: IntoIterator<Item = DataFrame>,
804{
805 let mut iter = dfs.into_iter();
806 let additional = iter.size_hint().0;
807 let mut acc_df = iter.next().unwrap();
808 acc_df.reserve_chunks(additional);
809
810 for df in iter {
811 if acc_df.width() != df.width() {
812 panic!("{}", width_mismatch(&acc_df, &df));
813 }
814
815 acc_df.vstack_mut_owned_unchecked(df);
816 }
817 acc_df
818}
819
820pub fn accumulate_dataframes_vertical<I>(dfs: I) -> PolarsResult<DataFrame>
824where
825 I: IntoIterator<Item = DataFrame>,
826{
827 let mut iter = dfs.into_iter();
828 let additional = iter.size_hint().0;
829 let mut acc_df = iter.next().unwrap();
830 acc_df.reserve_chunks(additional);
831 for df in iter {
832 if acc_df.width() != df.width() {
833 return Err(width_mismatch(&acc_df, &df));
834 }
835
836 acc_df.vstack_mut_owned(df)?;
837 }
838
839 Ok(acc_df)
840}
841
842pub fn concat_df<'a, I>(dfs: I) -> PolarsResult<DataFrame>
844where
845 I: IntoIterator<Item = &'a DataFrame>,
846{
847 let mut iter = dfs.into_iter();
848 let additional = iter.size_hint().0;
849 let mut acc_df = iter.next().unwrap().clone();
850 acc_df.reserve_chunks(additional);
851 for df in iter {
852 acc_df.vstack_mut(df)?;
853 }
854 Ok(acc_df)
855}
856
857pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFrame
859where
860 I: IntoIterator<Item = &'a DataFrame>,
861{
862 let mut iter = dfs.into_iter();
863 let additional = iter.size_hint().0;
864 let mut acc_df = iter.next().unwrap().clone();
865 acc_df.reserve_chunks(additional);
866 for df in iter {
867 acc_df.vstack_mut_unchecked(df);
868 }
869 acc_df
870}
871
872pub fn accumulate_dataframes_horizontal(dfs: Vec<DataFrame>) -> PolarsResult<DataFrame> {
873 let mut iter = dfs.into_iter();
874 let mut acc_df = iter.next().unwrap();
875 for df in iter {
876 acc_df.hstack_mut(df.get_columns())?;
877 }
878 Ok(acc_df)
879}
880
881pub fn align_chunks_binary<'a, T, B>(
885 left: &'a ChunkedArray<T>,
886 right: &'a ChunkedArray<B>,
887) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<B>>)
888where
889 B: PolarsDataType,
890 T: PolarsDataType,
891{
892 let assert = || {
893 assert_eq!(
894 left.len(),
895 right.len(),
896 "expected arrays of the same length"
897 )
898 };
899 match (left.chunks.len(), right.chunks.len()) {
900 (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
902 (a, b)
904 if a == b
905 && left
906 .chunk_lengths()
907 .zip(right.chunk_lengths())
908 .all(|(l, r)| l == r) =>
909 {
910 (Cow::Borrowed(left), Cow::Borrowed(right))
911 },
912 (_, 1) => {
913 assert();
914 (
915 Cow::Borrowed(left),
916 Cow::Owned(right.match_chunks(left.chunk_lengths())),
917 )
918 },
919 (1, _) => {
920 assert();
921 (
922 Cow::Owned(left.match_chunks(right.chunk_lengths())),
923 Cow::Borrowed(right),
924 )
925 },
926 (_, _) => {
927 assert();
928 let left = left.rechunk();
930 (
931 Cow::Owned(left.match_chunks(right.chunk_lengths())),
932 Cow::Borrowed(right),
933 )
934 },
935 }
936}
937
938pub fn align_chunks_binary_ca_series<'a, T>(
942 left: &'a ChunkedArray<T>,
943 right: &'a Series,
944) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, Series>)
945where
946 T: PolarsDataType,
947{
948 let assert = || {
949 assert_eq!(
950 left.len(),
951 right.len(),
952 "expected arrays of the same length"
953 )
954 };
955 match (left.chunks.len(), right.chunks().len()) {
956 (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
958 (a, b)
960 if a == b
961 && left
962 .chunk_lengths()
963 .zip(right.chunk_lengths())
964 .all(|(l, r)| l == r) =>
965 {
966 assert();
967 (Cow::Borrowed(left), Cow::Borrowed(right))
968 },
969 (_, 1) => (left.rechunk(), Cow::Borrowed(right)),
970 (1, _) => (Cow::Borrowed(left), Cow::Owned(right.rechunk())),
971 (_, _) => {
972 assert();
973 (left.rechunk(), Cow::Owned(right.rechunk()))
974 },
975 }
976}
977
978#[cfg(feature = "performant")]
979pub(crate) fn align_chunks_binary_owned_series(left: Series, right: Series) -> (Series, Series) {
980 match (left.chunks().len(), right.chunks().len()) {
981 (1, 1) => (left, right),
982 (a, b)
984 if a == b
985 && left
986 .chunk_lengths()
987 .zip(right.chunk_lengths())
988 .all(|(l, r)| l == r) =>
989 {
990 (left, right)
991 },
992 (_, 1) => (left.rechunk(), right),
993 (1, _) => (left, right.rechunk()),
994 (_, _) => (left.rechunk(), right.rechunk()),
995 }
996}
997
998pub(crate) fn align_chunks_binary_owned<T, B>(
999 left: ChunkedArray<T>,
1000 right: ChunkedArray<B>,
1001) -> (ChunkedArray<T>, ChunkedArray<B>)
1002where
1003 B: PolarsDataType,
1004 T: PolarsDataType,
1005{
1006 match (left.chunks.len(), right.chunks.len()) {
1007 (1, 1) => (left, right),
1008 (a, b)
1010 if a == b
1011 && left
1012 .chunk_lengths()
1013 .zip(right.chunk_lengths())
1014 .all(|(l, r)| l == r) =>
1015 {
1016 (left, right)
1017 },
1018 (_, 1) => (left.rechunk().into_owned(), right),
1019 (1, _) => (left, right.rechunk().into_owned()),
1020 (_, _) => (left.rechunk().into_owned(), right.rechunk().into_owned()),
1021 }
1022}
1023
1024#[allow(clippy::type_complexity)]
1027pub fn align_chunks_ternary<'a, A, B, C>(
1028 a: &'a ChunkedArray<A>,
1029 b: &'a ChunkedArray<B>,
1030 c: &'a ChunkedArray<C>,
1031) -> (
1032 Cow<'a, ChunkedArray<A>>,
1033 Cow<'a, ChunkedArray<B>>,
1034 Cow<'a, ChunkedArray<C>>,
1035)
1036where
1037 A: PolarsDataType,
1038 B: PolarsDataType,
1039 C: PolarsDataType,
1040{
1041 if a.chunks.len() == 1 && b.chunks.len() == 1 && c.chunks.len() == 1 {
1042 return (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c));
1043 }
1044
1045 assert!(
1046 a.len() == b.len() && b.len() == c.len(),
1047 "expected arrays of the same length"
1048 );
1049
1050 match (a.chunks.len(), b.chunks.len(), c.chunks.len()) {
1051 (_, 1, 1) => (
1052 Cow::Borrowed(a),
1053 Cow::Owned(b.match_chunks(a.chunk_lengths())),
1054 Cow::Owned(c.match_chunks(a.chunk_lengths())),
1055 ),
1056 (1, 1, _) => (
1057 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1058 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1059 Cow::Borrowed(c),
1060 ),
1061 (1, _, 1) => (
1062 Cow::Owned(a.match_chunks(b.chunk_lengths())),
1063 Cow::Borrowed(b),
1064 Cow::Owned(c.match_chunks(b.chunk_lengths())),
1065 ),
1066 (1, _, _) => {
1067 let b = b.rechunk();
1068 (
1069 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1070 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1071 Cow::Borrowed(c),
1072 )
1073 },
1074 (_, 1, _) => {
1075 let a = a.rechunk();
1076 (
1077 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1078 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1079 Cow::Borrowed(c),
1080 )
1081 },
1082 (_, _, 1) => {
1083 let b = b.rechunk();
1084 (
1085 Cow::Borrowed(a),
1086 Cow::Owned(b.match_chunks(a.chunk_lengths())),
1087 Cow::Owned(c.match_chunks(a.chunk_lengths())),
1088 )
1089 },
1090 (len_a, len_b, len_c)
1091 if len_a == len_b
1092 && len_b == len_c
1093 && a.chunk_lengths()
1094 .zip(b.chunk_lengths())
1095 .zip(c.chunk_lengths())
1096 .all(|((a, b), c)| a == b && b == c) =>
1097 {
1098 (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c))
1099 },
1100 _ => {
1101 let a = a.rechunk();
1103 let b = b.rechunk();
1104 (
1105 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1106 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1107 Cow::Borrowed(c),
1108 )
1109 },
1110 }
1111}
1112
1113pub fn binary_concatenate_validities<'a, T, B>(
1114 left: &'a ChunkedArray<T>,
1115 right: &'a ChunkedArray<B>,
1116) -> Option<Bitmap>
1117where
1118 B: PolarsDataType,
1119 T: PolarsDataType,
1120{
1121 let (left, right) = align_chunks_binary(left, right);
1122 let left_validity = concatenate_validities(left.chunks());
1123 let right_validity = concatenate_validities(right.chunks());
1124 combine_validities_and(left_validity.as_ref(), right_validity.as_ref())
1125}
1126
1127pub trait IntoVec<T> {
1129 fn into_vec(self) -> Vec<T>;
1130}
1131
1132impl<I, S> IntoVec<PlSmallStr> for I
1133where
1134 I: IntoIterator<Item = S>,
1135 S: Into<PlSmallStr>,
1136{
1137 fn into_vec(self) -> Vec<PlSmallStr> {
1138 self.into_iter().map(|s| s.into()).collect()
1139 }
1140}
1141
1142#[inline]
1147pub(crate) fn index_to_chunked_index<
1148 I: Iterator<Item = Idx>,
1149 Idx: PartialOrd + std::ops::AddAssign + std::ops::SubAssign + Zero + One,
1150>(
1151 chunk_lens: I,
1152 index: Idx,
1153) -> (Idx, Idx) {
1154 let mut index_remainder = index;
1155 let mut current_chunk_idx = Zero::zero();
1156
1157 for chunk_len in chunk_lens {
1158 if chunk_len > index_remainder {
1159 break;
1160 } else {
1161 index_remainder -= chunk_len;
1162 current_chunk_idx += One::one();
1163 }
1164 }
1165 (current_chunk_idx, index_remainder)
1166}
1167
1168pub(crate) fn index_to_chunked_index_rev<
1169 I: Iterator<Item = Idx>,
1170 Idx: PartialOrd
1171 + std::ops::AddAssign
1172 + std::ops::SubAssign
1173 + std::ops::Sub<Output = Idx>
1174 + Zero
1175 + One
1176 + Copy
1177 + std::fmt::Debug,
1178>(
1179 chunk_lens_rev: I,
1180 index_from_back: Idx,
1181 total_chunks: Idx,
1182) -> (Idx, Idx) {
1183 debug_assert!(index_from_back > Zero::zero(), "at least -1");
1184 let mut index_remainder = index_from_back;
1185 let mut current_chunk_idx = One::one();
1186 let mut current_chunk_len = Zero::zero();
1187
1188 for chunk_len in chunk_lens_rev {
1189 current_chunk_len = chunk_len;
1190 if chunk_len >= index_remainder {
1191 break;
1192 } else {
1193 index_remainder -= chunk_len;
1194 current_chunk_idx += One::one();
1195 }
1196 }
1197 (
1198 total_chunks - current_chunk_idx,
1199 current_chunk_len - index_remainder,
1200 )
1201}
1202
1203pub(crate) fn first_non_null<'a, I>(iter: I) -> Option<usize>
1204where
1205 I: Iterator<Item = Option<&'a Bitmap>>,
1206{
1207 let mut offset = 0;
1208 for validity in iter {
1209 if let Some(validity) = validity {
1210 let mask = BitMask::from_bitmap(validity);
1211 if let Some(n) = mask.nth_set_bit_idx(0, 0) {
1212 return Some(offset + n);
1213 }
1214 offset += validity.len()
1215 } else {
1216 return Some(offset);
1217 }
1218 }
1219 None
1220}
1221
1222pub(crate) fn last_non_null<'a, I>(iter: I, len: usize) -> Option<usize>
1223where
1224 I: DoubleEndedIterator<Item = Option<&'a Bitmap>>,
1225{
1226 if len == 0 {
1227 return None;
1228 }
1229 let mut offset = 0;
1230 for validity in iter.rev() {
1231 if let Some(validity) = validity {
1232 let mask = BitMask::from_bitmap(validity);
1233 if let Some(n) = mask.nth_set_bit_idx_rev(0, mask.len()) {
1234 let mask_start = len - offset - mask.len();
1235 return Some(mask_start + n);
1236 }
1237 offset += validity.len()
1238 } else {
1239 return Some(len - 1 - offset);
1240 }
1241 }
1242 None
1243}
1244
1245pub fn coalesce_nulls<'a, T: PolarsDataType>(
1247 a: &'a ChunkedArray<T>,
1248 b: &'a ChunkedArray<T>,
1249) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<T>>) {
1250 if a.null_count() > 0 || b.null_count() > 0 {
1251 let (a, b) = align_chunks_binary(a, b);
1252 let mut b = b.into_owned();
1253 let a = a.coalesce_nulls(b.chunks());
1254
1255 for arr in a.chunks().iter() {
1256 for arr_b in unsafe { b.chunks_mut() } {
1257 *arr_b = arr_b.with_validity(arr.validity().cloned())
1258 }
1259 }
1260 b.compute_len();
1261 (Cow::Owned(a), Cow::Owned(b))
1262 } else {
1263 (Cow::Borrowed(a), Cow::Borrowed(b))
1264 }
1265}
1266
1267pub fn coalesce_nulls_columns(a: &Column, b: &Column) -> (Column, Column) {
1268 if a.null_count() > 0 || b.null_count() > 0 {
1269 let mut a = a.as_materialized_series().rechunk();
1270 let mut b = b.as_materialized_series().rechunk();
1271 for (arr_a, arr_b) in unsafe { a.chunks_mut().iter_mut().zip(b.chunks_mut()) } {
1272 let validity = match (arr_a.validity(), arr_b.validity()) {
1273 (None, Some(b)) => Some(b.clone()),
1274 (Some(a), Some(b)) => Some(a & b),
1275 (Some(a), None) => Some(a.clone()),
1276 (None, None) => None,
1277 };
1278 *arr_a = arr_a.with_validity(validity.clone());
1279 *arr_b = arr_b.with_validity(validity);
1280 }
1281 a.compute_len();
1282 b.compute_len();
1283 (a.into(), b.into())
1284 } else {
1285 (a.clone(), b.clone())
1286 }
1287}
1288
1289pub fn operation_exceeded_idxsize_msg(operation: &str) -> String {
1290 if size_of::<IdxSize>() == size_of::<u32>() {
1291 format!(
1292 "{} exceeded the maximum supported limit of {} rows. Consider installing 'polars-u64-idx'.",
1293 operation,
1294 IdxSize::MAX,
1295 )
1296 } else {
1297 format!(
1298 "{} exceeded the maximum supported limit of {} rows.",
1299 operation,
1300 IdxSize::MAX,
1301 )
1302 }
1303}
1304
1305#[cfg(test)]
1306mod test {
1307 use super::*;
1308
1309 #[test]
1310 fn test_split() {
1311 let ca: Int32Chunked = (0..10).collect_ca("a".into());
1312
1313 let out = split(&ca, 3);
1314 assert_eq!(out[0].len(), 3);
1315 assert_eq!(out[1].len(), 3);
1316 assert_eq!(out[2].len(), 4);
1317 }
1318
1319 #[test]
1320 fn test_align_chunks() -> PolarsResult<()> {
1321 let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1322 let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1323 let b2 = Int32Chunked::new(PlSmallStr::EMPTY, &[2, 3, 4]);
1324
1325 b.append(&b2)?;
1326 let (a, b) = align_chunks_binary(&a, &b);
1327 assert_eq!(
1328 a.chunk_lengths().collect::<Vec<_>>(),
1329 b.chunk_lengths().collect::<Vec<_>>()
1330 );
1331
1332 let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1333 let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1334 let b1 = b.clone();
1335 b.append(&b1)?;
1336 b.append(&b1)?;
1337 b.append(&b1)?;
1338 let (a, b) = align_chunks_binary(&a, &b);
1339 assert_eq!(
1340 a.chunk_lengths().collect::<Vec<_>>(),
1341 b.chunk_lengths().collect::<Vec<_>>()
1342 );
1343
1344 Ok(())
1345 }
1346}