1mod any_value;
2use arrow::compute::concatenate::concatenate_validities;
3use arrow::compute::utils::combine_validities_and;
4pub mod flatten;
5pub(crate) mod series;
6mod supertype;
7use std::borrow::Cow;
8use std::ops::{Deref, DerefMut};
9mod schema;
10
11pub use any_value::*;
12use arrow::bitmap::Bitmap;
13use arrow::bitmap::bitmask::BitMask;
14pub use arrow::legacy::utils::*;
15pub use arrow::trusted_len::TrustMyLength;
16use flatten::*;
17use num_traits::{One, Zero};
18use rayon::prelude::*;
19pub use schema::*;
20pub use series::*;
21pub use supertype::*;
22pub use {arrow, rayon};
23
24use crate::POOL;
25use crate::prelude::*;
26
27#[repr(transparent)]
28pub struct Wrap<T>(pub T);
29
30impl<T> Deref for Wrap<T> {
31 type Target = T;
32 fn deref(&self) -> &Self::Target {
33 &self.0
34 }
35}
36
37#[inline(always)]
38pub fn _set_partition_size() -> usize {
39 POOL.current_num_threads()
40}
41
42pub struct NoNull<T> {
49 inner: T,
50}
51
52impl<T> NoNull<T> {
53 pub fn new(inner: T) -> Self {
54 NoNull { inner }
55 }
56
57 pub fn into_inner(self) -> T {
58 self.inner
59 }
60}
61
62impl<T> Deref for NoNull<T> {
63 type Target = T;
64
65 fn deref(&self) -> &Self::Target {
66 &self.inner
67 }
68}
69
70impl<T> DerefMut for NoNull<T> {
71 fn deref_mut(&mut self) -> &mut Self::Target {
72 &mut self.inner
73 }
74}
75
76pub(crate) fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
77 match iter.size_hint() {
78 (_lower, Some(upper)) => upper,
79 (0, None) => 1024,
80 (lower, None) => lower,
81 }
82}
83
84#[doc(hidden)]
87pub fn _split_offsets(len: usize, n: usize) -> Vec<(usize, usize)> {
88 if n == 1 {
89 vec![(0, len)]
90 } else {
91 let chunk_size = len / n;
92
93 (0..n)
94 .map(|partition| {
95 let offset = partition * chunk_size;
96 let len = if partition == (n - 1) {
97 len - offset
98 } else {
99 chunk_size
100 };
101 (partition * chunk_size, len)
102 })
103 .collect_trusted()
104 }
105}
106
107#[allow(clippy::len_without_is_empty)]
108pub trait Container: Clone {
109 fn slice(&self, offset: i64, len: usize) -> Self;
110
111 fn split_at(&self, offset: i64) -> (Self, Self);
112
113 fn len(&self) -> usize;
114
115 fn iter_chunks(&self) -> impl Iterator<Item = Self>;
116
117 fn n_chunks(&self) -> usize;
118
119 fn chunk_lengths(&self) -> impl Iterator<Item = usize>;
120}
121
122impl Container for DataFrame {
123 fn slice(&self, offset: i64, len: usize) -> Self {
124 DataFrame::slice(self, offset, len)
125 }
126
127 fn split_at(&self, offset: i64) -> (Self, Self) {
128 DataFrame::split_at(self, offset)
129 }
130
131 fn len(&self) -> usize {
132 self.height()
133 }
134
135 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
136 flatten_df_iter(self)
137 }
138
139 fn n_chunks(&self) -> usize {
140 DataFrame::first_col_n_chunks(self)
141 }
142
143 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
144 self.columns[0].as_materialized_series().chunk_lengths()
146 }
147}
148
149impl<T: PolarsDataType> Container for ChunkedArray<T> {
150 fn slice(&self, offset: i64, len: usize) -> Self {
151 ChunkedArray::slice(self, offset, len)
152 }
153
154 fn split_at(&self, offset: i64) -> (Self, Self) {
155 ChunkedArray::split_at(self, offset)
156 }
157
158 fn len(&self) -> usize {
159 ChunkedArray::len(self)
160 }
161
162 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
163 self.downcast_iter()
164 .map(|arr| Self::with_chunk(self.name().clone(), arr.clone()))
165 }
166
167 fn n_chunks(&self) -> usize {
168 self.chunks().len()
169 }
170
171 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
172 ChunkedArray::chunk_lengths(self)
173 }
174}
175
176impl Container for Series {
177 fn slice(&self, offset: i64, len: usize) -> Self {
178 self.0.slice(offset, len)
179 }
180
181 fn split_at(&self, offset: i64) -> (Self, Self) {
182 self.0.split_at(offset)
183 }
184
185 fn len(&self) -> usize {
186 self.0.len()
187 }
188
189 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
190 (0..self.0.n_chunks()).map(|i| self.select_chunk(i))
191 }
192
193 fn n_chunks(&self) -> usize {
194 self.chunks().len()
195 }
196
197 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
198 self.0.chunk_lengths()
199 }
200}
201
202fn split_impl<C: Container>(container: &C, target: usize, chunk_size: usize) -> Vec<C> {
203 if target == 1 {
204 return vec![container.clone()];
205 }
206 let mut out = Vec::with_capacity(target);
207 let chunk_size = chunk_size as i64;
208
209 let (chunk, mut remainder) = container.split_at(chunk_size);
211 out.push(chunk);
212
213 for _ in 1..target - 1 {
215 let (a, b) = remainder.split_at(chunk_size);
216 out.push(a);
217 remainder = b
218 }
219 out.push(remainder);
221 out
222}
223
224pub fn split<C: Container>(container: &C, target: usize) -> Vec<C> {
226 let total_len = container.len();
227 if total_len == 0 {
228 return vec![container.clone()];
229 }
230
231 let chunk_size = std::cmp::max(total_len / target, 1);
232
233 if container.n_chunks() == target
234 && container
235 .chunk_lengths()
236 .all(|len| len.abs_diff(chunk_size) < 100)
237 {
238 return container.iter_chunks().collect();
239 }
240 split_impl(container, target, chunk_size)
241}
242
243pub fn split_and_flatten<C: Container>(container: &C, target: usize) -> Vec<C> {
246 let total_len = container.len();
247 if total_len == 0 {
248 return vec![container.clone()];
249 }
250
251 let chunk_size = std::cmp::max(total_len / target, 1);
252
253 if container.n_chunks() == target
254 && container
255 .chunk_lengths()
256 .all(|len| len.abs_diff(chunk_size) < 100)
257 {
258 return container.iter_chunks().collect();
259 }
260
261 if container.n_chunks() == 1 {
262 split_impl(container, target, chunk_size)
263 } else {
264 let mut out = Vec::with_capacity(target);
265 let chunks = container.iter_chunks();
266
267 'new_chunk: for mut chunk in chunks {
268 loop {
269 let h = chunk.len();
270 if h < chunk_size {
271 out.push(chunk);
273 continue 'new_chunk;
274 }
275
276 if ((h - chunk_size) as f64 / chunk_size as f64) < 0.3 {
278 out.push(chunk);
279 continue 'new_chunk;
280 }
281
282 let (a, b) = chunk.split_at(chunk_size as i64);
283 out.push(a);
284 chunk = b;
285 }
286 }
287 out
288 }
289}
290
291pub fn split_df_as_ref(df: &DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
297 if strict {
298 split(df, target)
299 } else {
300 split_and_flatten(df, target)
301 }
302}
303
304#[doc(hidden)]
305pub fn split_df(df: &mut DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
308 if target == 0 || df.is_empty() {
309 return vec![df.clone()];
310 }
311 df.align_chunks_par();
313 split_df_as_ref(df, target, strict)
314}
315
316pub fn slice_slice<T>(vals: &[T], offset: i64, len: usize) -> &[T] {
317 let (raw_offset, slice_len) = slice_offsets(offset, len, vals.len());
318 &vals[raw_offset..raw_offset + slice_len]
319}
320
321#[inline]
322#[doc(hidden)]
323pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, usize) {
324 let signed_start_offset = if offset < 0 {
325 offset.saturating_add_unsigned(array_len as u64)
326 } else {
327 offset
328 };
329 let signed_stop_offset = signed_start_offset.saturating_add_unsigned(length as u64);
330
331 let signed_array_len: i64 = array_len
332 .try_into()
333 .expect("array length larger than i64::MAX");
334 let clamped_start_offset = signed_start_offset.clamp(0, signed_array_len);
335 let clamped_stop_offset = signed_stop_offset.clamp(0, signed_array_len);
336
337 let slice_start_idx = clamped_start_offset as usize;
338 let slice_len = (clamped_stop_offset - clamped_start_offset) as usize;
339 (slice_start_idx, slice_len)
340}
341
342#[macro_export]
344macro_rules! match_dtype_to_physical_apply_macro {
345 ($obj:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
346 match $obj {
347 DataType::String => $macro_string!($($opt_args)*),
348 DataType::Boolean => $macro_bool!($($opt_args)*),
349 #[cfg(feature = "dtype-u8")]
350 DataType::UInt8 => $macro!(u8 $(, $opt_args)*),
351 #[cfg(feature = "dtype-u16")]
352 DataType::UInt16 => $macro!(u16 $(, $opt_args)*),
353 DataType::UInt32 => $macro!(u32 $(, $opt_args)*),
354 DataType::UInt64 => $macro!(u64 $(, $opt_args)*),
355 #[cfg(feature = "dtype-i8")]
356 DataType::Int8 => $macro!(i8 $(, $opt_args)*),
357 #[cfg(feature = "dtype-i16")]
358 DataType::Int16 => $macro!(i16 $(, $opt_args)*),
359 DataType::Int32 => $macro!(i32 $(, $opt_args)*),
360 DataType::Int64 => $macro!(i64 $(, $opt_args)*),
361 #[cfg(feature = "dtype-i128")]
362 DataType::Int128 => $macro!(i128 $(, $opt_args)*),
363 DataType::Float32 => $macro!(f32 $(, $opt_args)*),
364 DataType::Float64 => $macro!(f64 $(, $opt_args)*),
365 dt => panic!("not implemented for dtype {:?}", dt),
366 }
367 }};
368}
369
370#[macro_export]
372macro_rules! match_dtype_to_logical_apply_macro {
373 ($obj:expr, $macro:ident, $macro_string:ident, $macro_binary:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
374 match $obj {
375 DataType::String => $macro_string!($($opt_args)*),
376 DataType::Binary => $macro_binary!($($opt_args)*),
377 DataType::Boolean => $macro_bool!($($opt_args)*),
378 #[cfg(feature = "dtype-u8")]
379 DataType::UInt8 => $macro!(UInt8Type $(, $opt_args)*),
380 #[cfg(feature = "dtype-u16")]
381 DataType::UInt16 => $macro!(UInt16Type $(, $opt_args)*),
382 DataType::UInt32 => $macro!(UInt32Type $(, $opt_args)*),
383 DataType::UInt64 => $macro!(UInt64Type $(, $opt_args)*),
384 #[cfg(feature = "dtype-i8")]
385 DataType::Int8 => $macro!(Int8Type $(, $opt_args)*),
386 #[cfg(feature = "dtype-i16")]
387 DataType::Int16 => $macro!(Int16Type $(, $opt_args)*),
388 DataType::Int32 => $macro!(Int32Type $(, $opt_args)*),
389 DataType::Int64 => $macro!(Int64Type $(, $opt_args)*),
390 #[cfg(feature = "dtype-i128")]
391 DataType::Int128 => $macro!(Int128Type $(, $opt_args)*),
392 DataType::Float32 => $macro!(Float32Type $(, $opt_args)*),
393 DataType::Float64 => $macro!(Float64Type $(, $opt_args)*),
394 dt => panic!("not implemented for dtype {:?}", dt),
395 }
396 }};
397}
398
399#[macro_export]
401macro_rules! match_arrow_dtype_apply_macro_ca {
402 ($self:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
403 match $self.dtype() {
404 DataType::String => $macro_string!($self.str().unwrap() $(, $opt_args)*),
405 DataType::Boolean => $macro_bool!($self.bool().unwrap() $(, $opt_args)*),
406 #[cfg(feature = "dtype-u8")]
407 DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
408 #[cfg(feature = "dtype-u16")]
409 DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
410 DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
411 DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
412 #[cfg(feature = "dtype-i8")]
413 DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
414 #[cfg(feature = "dtype-i16")]
415 DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
416 DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
417 DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
418 #[cfg(feature = "dtype-i128")]
419 DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
420 DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
421 DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
422 dt => panic!("not implemented for dtype {:?}", dt),
423 }
424 }};
425}
426
427#[macro_export]
428macro_rules! with_match_physical_numeric_type {(
429 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
430) => ({
431 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
432 use $crate::datatypes::DataType::*;
433 match $dtype {
434 #[cfg(feature = "dtype-i8")]
435 Int8 => __with_ty__! { i8 },
436 #[cfg(feature = "dtype-i16")]
437 Int16 => __with_ty__! { i16 },
438 Int32 => __with_ty__! { i32 },
439 Int64 => __with_ty__! { i64 },
440 #[cfg(feature = "dtype-i128")]
441 Int128 => __with_ty__! { i128 },
442 #[cfg(feature = "dtype-u8")]
443 UInt8 => __with_ty__! { u8 },
444 #[cfg(feature = "dtype-u16")]
445 UInt16 => __with_ty__! { u16 },
446 UInt32 => __with_ty__! { u32 },
447 UInt64 => __with_ty__! { u64 },
448 Float32 => __with_ty__! { f32 },
449 Float64 => __with_ty__! { f64 },
450 dt => panic!("not implemented for dtype {:?}", dt),
451 }
452})}
453
454#[macro_export]
455macro_rules! with_match_physical_integer_type {(
456 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
457) => ({
458 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
459 use $crate::datatypes::DataType::*;
460 match $dtype {
461 #[cfg(feature = "dtype-i8")]
462 Int8 => __with_ty__! { i8 },
463 #[cfg(feature = "dtype-i16")]
464 Int16 => __with_ty__! { i16 },
465 Int32 => __with_ty__! { i32 },
466 Int64 => __with_ty__! { i64 },
467 #[cfg(feature = "dtype-i128")]
468 Int128 => __with_ty__! { i128 },
469 #[cfg(feature = "dtype-u8")]
470 UInt8 => __with_ty__! { u8 },
471 #[cfg(feature = "dtype-u16")]
472 UInt16 => __with_ty__! { u16 },
473 UInt32 => __with_ty__! { u32 },
474 UInt64 => __with_ty__! { u64 },
475 dt => panic!("not implemented for dtype {:?}", dt),
476 }
477})}
478
479#[macro_export]
480macro_rules! with_match_physical_float_type {(
481 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
482) => ({
483 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
484 use $crate::datatypes::DataType::*;
485 match $dtype {
486 Float32 => __with_ty__! { f32 },
487 Float64 => __with_ty__! { f64 },
488 dt => panic!("not implemented for dtype {:?}", dt),
489 }
490})}
491
492#[macro_export]
493macro_rules! with_match_physical_float_polars_type {(
494 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
495) => ({
496 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
497 use $crate::datatypes::DataType::*;
498 match $key_type {
499 Float32 => __with_ty__! { Float32Type },
500 Float64 => __with_ty__! { Float64Type },
501 dt => panic!("not implemented for dtype {:?}", dt),
502 }
503})}
504
505#[macro_export]
506macro_rules! with_match_physical_numeric_polars_type {(
507 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
508) => ({
509 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
510 use $crate::datatypes::DataType::*;
511 match $key_type {
512 #[cfg(feature = "dtype-i8")]
513 Int8 => __with_ty__! { Int8Type },
514 #[cfg(feature = "dtype-i16")]
515 Int16 => __with_ty__! { Int16Type },
516 Int32 => __with_ty__! { Int32Type },
517 Int64 => __with_ty__! { Int64Type },
518 #[cfg(feature = "dtype-i128")]
519 Int128 => __with_ty__! { Int128Type },
520 #[cfg(feature = "dtype-u8")]
521 UInt8 => __with_ty__! { UInt8Type },
522 #[cfg(feature = "dtype-u16")]
523 UInt16 => __with_ty__! { UInt16Type },
524 UInt32 => __with_ty__! { UInt32Type },
525 UInt64 => __with_ty__! { UInt64Type },
526 Float32 => __with_ty__! { Float32Type },
527 Float64 => __with_ty__! { Float64Type },
528 dt => panic!("not implemented for dtype {:?}", dt),
529 }
530})}
531
532#[macro_export]
533macro_rules! with_match_physical_integer_polars_type {(
534 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
535) => ({
536 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
537 use $crate::datatypes::DataType::*;
538 use $crate::datatypes::*;
539 match $key_type {
540 #[cfg(feature = "dtype-i8")]
541 Int8 => __with_ty__! { Int8Type },
542 #[cfg(feature = "dtype-i16")]
543 Int16 => __with_ty__! { Int16Type },
544 Int32 => __with_ty__! { Int32Type },
545 Int64 => __with_ty__! { Int64Type },
546 #[cfg(feature = "dtype-i128")]
547 Int128 => __with_ty__! { Int128Type },
548 #[cfg(feature = "dtype-u8")]
549 UInt8 => __with_ty__! { UInt8Type },
550 #[cfg(feature = "dtype-u16")]
551 UInt16 => __with_ty__! { UInt16Type },
552 UInt32 => __with_ty__! { UInt32Type },
553 UInt64 => __with_ty__! { UInt64Type },
554 dt => panic!("not implemented for dtype {:?}", dt),
555 }
556})}
557
558#[macro_export]
561macro_rules! downcast_as_macro_arg_physical {
562 ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
563 match $self.dtype() {
564 #[cfg(feature = "dtype-u8")]
565 DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
566 #[cfg(feature = "dtype-u16")]
567 DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
568 DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
569 DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
570 #[cfg(feature = "dtype-i8")]
571 DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
572 #[cfg(feature = "dtype-i16")]
573 DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
574 DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
575 DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
576 #[cfg(feature = "dtype-i128")]
577 DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
578 DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
579 DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
580 dt => panic!("not implemented for {:?}", dt),
581 }
582 }};
583}
584
585#[macro_export]
588macro_rules! downcast_as_macro_arg_physical_mut {
589 ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
590 match $self.dtype().clone() {
592 #[cfg(feature = "dtype-u8")]
593 DataType::UInt8 => {
594 let ca: &mut UInt8Chunked = $self.as_mut();
595 $macro!(UInt8Type, ca $(, $opt_args)*)
596 },
597 #[cfg(feature = "dtype-u16")]
598 DataType::UInt16 => {
599 let ca: &mut UInt16Chunked = $self.as_mut();
600 $macro!(UInt16Type, ca $(, $opt_args)*)
601 },
602 DataType::UInt32 => {
603 let ca: &mut UInt32Chunked = $self.as_mut();
604 $macro!(UInt32Type, ca $(, $opt_args)*)
605 },
606 DataType::UInt64 => {
607 let ca: &mut UInt64Chunked = $self.as_mut();
608 $macro!(UInt64Type, ca $(, $opt_args)*)
609 },
610 #[cfg(feature = "dtype-i8")]
611 DataType::Int8 => {
612 let ca: &mut Int8Chunked = $self.as_mut();
613 $macro!(Int8Type, ca $(, $opt_args)*)
614 },
615 #[cfg(feature = "dtype-i16")]
616 DataType::Int16 => {
617 let ca: &mut Int16Chunked = $self.as_mut();
618 $macro!(Int16Type, ca $(, $opt_args)*)
619 },
620 DataType::Int32 => {
621 let ca: &mut Int32Chunked = $self.as_mut();
622 $macro!(Int32Type, ca $(, $opt_args)*)
623 },
624 DataType::Int64 => {
625 let ca: &mut Int64Chunked = $self.as_mut();
626 $macro!(Int64Type, ca $(, $opt_args)*)
627 },
628 #[cfg(feature = "dtype-i128")]
629 DataType::Int128 => {
630 let ca: &mut Int128Chunked = $self.as_mut();
631 $macro!(Int128Type, ca $(, $opt_args)*)
632 },
633 DataType::Float32 => {
634 let ca: &mut Float32Chunked = $self.as_mut();
635 $macro!(Float32Type, ca $(, $opt_args)*)
636 },
637 DataType::Float64 => {
638 let ca: &mut Float64Chunked = $self.as_mut();
639 $macro!(Float64Type, ca $(, $opt_args)*)
640 },
641 dt => panic!("not implemented for {:?}", dt),
642 }
643 }};
644}
645
646#[macro_export]
647macro_rules! apply_method_all_arrow_series {
648 ($self:expr, $method:ident, $($args:expr),*) => {
649 match $self.dtype() {
650 DataType::Boolean => $self.bool().unwrap().$method($($args),*),
651 DataType::String => $self.str().unwrap().$method($($args),*),
652 #[cfg(feature = "dtype-u8")]
653 DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
654 #[cfg(feature = "dtype-u16")]
655 DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
656 DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
657 DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
658 #[cfg(feature = "dtype-i8")]
659 DataType::Int8 => $self.i8().unwrap().$method($($args),*),
660 #[cfg(feature = "dtype-i16")]
661 DataType::Int16 => $self.i16().unwrap().$method($($args),*),
662 DataType::Int32 => $self.i32().unwrap().$method($($args),*),
663 DataType::Int64 => $self.i64().unwrap().$method($($args),*),
664 #[cfg(feature = "dtype-i128")]
665 DataType::Int128 => $self.i128().unwrap().$method($($args),*),
666 DataType::Float32 => $self.f32().unwrap().$method($($args),*),
667 DataType::Float64 => $self.f64().unwrap().$method($($args),*),
668 DataType::Time => $self.time().unwrap().$method($($args),*),
669 DataType::Date => $self.date().unwrap().$method($($args),*),
670 DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
671 DataType::List(_) => $self.list().unwrap().$method($($args),*),
672 DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
673 dt => panic!("dtype {:?} not supported", dt)
674 }
675 }
676}
677
678#[macro_export]
679macro_rules! apply_method_physical_integer {
680 ($self:expr, $method:ident, $($args:expr),*) => {
681 match $self.dtype() {
682 #[cfg(feature = "dtype-u8")]
683 DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
684 #[cfg(feature = "dtype-u16")]
685 DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
686 DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
687 DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
688 #[cfg(feature = "dtype-i8")]
689 DataType::Int8 => $self.i8().unwrap().$method($($args),*),
690 #[cfg(feature = "dtype-i16")]
691 DataType::Int16 => $self.i16().unwrap().$method($($args),*),
692 DataType::Int32 => $self.i32().unwrap().$method($($args),*),
693 DataType::Int64 => $self.i64().unwrap().$method($($args),*),
694 #[cfg(feature = "dtype-i128")]
695 DataType::Int128 => $self.i128().unwrap().$method($($args),*),
696 dt => panic!("not implemented for dtype {:?}", dt),
697 }
698 }
699}
700
701#[macro_export]
703macro_rules! apply_method_physical_numeric {
704 ($self:expr, $method:ident, $($args:expr),*) => {
705 match $self.dtype() {
706 DataType::Float32 => $self.f32().unwrap().$method($($args),*),
707 DataType::Float64 => $self.f64().unwrap().$method($($args),*),
708 _ => apply_method_physical_integer!($self, $method, $($args),*),
709 }
710 }
711}
712
713#[macro_export]
714macro_rules! df {
715 ($($col_name:expr => $slice:expr), + $(,)?) => {
716 $crate::prelude::DataFrame::new(vec![
717 $($crate::prelude::Column::from(<$crate::prelude::Series as $crate::prelude::NamedFrom::<_, _>>::new($col_name.into(), $slice)),)+
718 ])
719 }
720}
721
722pub fn get_time_units(tu_l: &TimeUnit, tu_r: &TimeUnit) -> TimeUnit {
723 use TimeUnit::*;
724 match (tu_l, tu_r) {
725 (Nanoseconds, Microseconds) => Microseconds,
726 (_, Milliseconds) => Milliseconds,
727 _ => *tu_l,
728 }
729}
730
731#[cold]
732#[inline(never)]
733fn width_mismatch(df1: &DataFrame, df2: &DataFrame) -> PolarsError {
734 let mut df1_extra = Vec::new();
735 let mut df2_extra = Vec::new();
736
737 let s1 = df1.schema();
738 let s2 = df2.schema();
739
740 s1.field_compare(s2, &mut df1_extra, &mut df2_extra);
741
742 let df1_extra = df1_extra
743 .into_iter()
744 .map(|(_, (n, _))| n.as_str())
745 .collect::<Vec<_>>()
746 .join(", ");
747 let df2_extra = df2_extra
748 .into_iter()
749 .map(|(_, (n, _))| n.as_str())
750 .collect::<Vec<_>>()
751 .join(", ");
752
753 polars_err!(
754 SchemaMismatch: r#"unable to vstack, dataframes have different widths ({} != {}).
755One dataframe has additional columns: [{df1_extra}].
756Other dataframe has additional columns: [{df2_extra}]."#,
757 df1.width(),
758 df2.width(),
759 )
760}
761
762pub fn accumulate_dataframes_vertical_unchecked_optional<I>(dfs: I) -> Option<DataFrame>
763where
764 I: IntoIterator<Item = DataFrame>,
765{
766 let mut iter = dfs.into_iter();
767 let additional = iter.size_hint().0;
768 let mut acc_df = iter.next()?;
769 acc_df.reserve_chunks(additional);
770
771 for df in iter {
772 if acc_df.width() != df.width() {
773 panic!("{}", width_mismatch(&acc_df, &df));
774 }
775
776 acc_df.vstack_mut_owned_unchecked(df);
777 }
778 Some(acc_df)
779}
780
781pub fn accumulate_dataframes_vertical_unchecked<I>(dfs: I) -> DataFrame
784where
785 I: IntoIterator<Item = DataFrame>,
786{
787 let mut iter = dfs.into_iter();
788 let additional = iter.size_hint().0;
789 let mut acc_df = iter.next().unwrap();
790 acc_df.reserve_chunks(additional);
791
792 for df in iter {
793 if acc_df.width() != df.width() {
794 panic!("{}", width_mismatch(&acc_df, &df));
795 }
796
797 acc_df.vstack_mut_owned_unchecked(df);
798 }
799 acc_df
800}
801
802pub fn accumulate_dataframes_vertical<I>(dfs: I) -> PolarsResult<DataFrame>
806where
807 I: IntoIterator<Item = DataFrame>,
808{
809 let mut iter = dfs.into_iter();
810 let additional = iter.size_hint().0;
811 let mut acc_df = iter.next().unwrap();
812 acc_df.reserve_chunks(additional);
813 for df in iter {
814 if acc_df.width() != df.width() {
815 return Err(width_mismatch(&acc_df, &df));
816 }
817
818 acc_df.vstack_mut_owned(df)?;
819 }
820
821 Ok(acc_df)
822}
823
824pub fn concat_df<'a, I>(dfs: I) -> PolarsResult<DataFrame>
826where
827 I: IntoIterator<Item = &'a DataFrame>,
828{
829 let mut iter = dfs.into_iter();
830 let additional = iter.size_hint().0;
831 let mut acc_df = iter.next().unwrap().clone();
832 acc_df.reserve_chunks(additional);
833 for df in iter {
834 acc_df.vstack_mut(df)?;
835 }
836 Ok(acc_df)
837}
838
839pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFrame
841where
842 I: IntoIterator<Item = &'a DataFrame>,
843{
844 let mut iter = dfs.into_iter();
845 let additional = iter.size_hint().0;
846 let mut acc_df = iter.next().unwrap().clone();
847 acc_df.reserve_chunks(additional);
848 for df in iter {
849 acc_df.vstack_mut_unchecked(df);
850 }
851 acc_df
852}
853
854pub fn accumulate_dataframes_horizontal(dfs: Vec<DataFrame>) -> PolarsResult<DataFrame> {
855 let mut iter = dfs.into_iter();
856 let mut acc_df = iter.next().unwrap();
857 for df in iter {
858 acc_df.hstack_mut(df.get_columns())?;
859 }
860 Ok(acc_df)
861}
862
863pub fn align_chunks_binary<'a, T, B>(
867 left: &'a ChunkedArray<T>,
868 right: &'a ChunkedArray<B>,
869) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<B>>)
870where
871 B: PolarsDataType,
872 T: PolarsDataType,
873{
874 let assert = || {
875 assert_eq!(
876 left.len(),
877 right.len(),
878 "expected arrays of the same length"
879 )
880 };
881 match (left.chunks.len(), right.chunks.len()) {
882 (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
884 (a, b)
886 if a == b
887 && left
888 .chunk_lengths()
889 .zip(right.chunk_lengths())
890 .all(|(l, r)| l == r) =>
891 {
892 (Cow::Borrowed(left), Cow::Borrowed(right))
893 },
894 (_, 1) => {
895 assert();
896 (
897 Cow::Borrowed(left),
898 Cow::Owned(right.match_chunks(left.chunk_lengths())),
899 )
900 },
901 (1, _) => {
902 assert();
903 (
904 Cow::Owned(left.match_chunks(right.chunk_lengths())),
905 Cow::Borrowed(right),
906 )
907 },
908 (_, _) => {
909 assert();
910 let left = left.rechunk();
912 (
913 Cow::Owned(left.match_chunks(right.chunk_lengths())),
914 Cow::Borrowed(right),
915 )
916 },
917 }
918}
919
920pub fn align_chunks_binary_ca_series<'a, T>(
924 left: &'a ChunkedArray<T>,
925 right: &'a Series,
926) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, Series>)
927where
928 T: PolarsDataType,
929{
930 let assert = || {
931 assert_eq!(
932 left.len(),
933 right.len(),
934 "expected arrays of the same length"
935 )
936 };
937 match (left.chunks.len(), right.chunks().len()) {
938 (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
940 (a, b)
942 if a == b
943 && left
944 .chunk_lengths()
945 .zip(right.chunk_lengths())
946 .all(|(l, r)| l == r) =>
947 {
948 assert();
949 (Cow::Borrowed(left), Cow::Borrowed(right))
950 },
951 (_, 1) => (left.rechunk(), Cow::Borrowed(right)),
952 (1, _) => (Cow::Borrowed(left), Cow::Owned(right.rechunk())),
953 (_, _) => {
954 assert();
955 (left.rechunk(), Cow::Owned(right.rechunk()))
956 },
957 }
958}
959
960#[cfg(feature = "performant")]
961pub(crate) fn align_chunks_binary_owned_series(left: Series, right: Series) -> (Series, Series) {
962 match (left.chunks().len(), right.chunks().len()) {
963 (1, 1) => (left, right),
964 (a, b)
966 if a == b
967 && left
968 .chunk_lengths()
969 .zip(right.chunk_lengths())
970 .all(|(l, r)| l == r) =>
971 {
972 (left, right)
973 },
974 (_, 1) => (left.rechunk(), right),
975 (1, _) => (left, right.rechunk()),
976 (_, _) => (left.rechunk(), right.rechunk()),
977 }
978}
979
980pub(crate) fn align_chunks_binary_owned<T, B>(
981 left: ChunkedArray<T>,
982 right: ChunkedArray<B>,
983) -> (ChunkedArray<T>, ChunkedArray<B>)
984where
985 B: PolarsDataType,
986 T: PolarsDataType,
987{
988 match (left.chunks.len(), right.chunks.len()) {
989 (1, 1) => (left, right),
990 (a, b)
992 if a == b
993 && left
994 .chunk_lengths()
995 .zip(right.chunk_lengths())
996 .all(|(l, r)| l == r) =>
997 {
998 (left, right)
999 },
1000 (_, 1) => (left.rechunk().into_owned(), right),
1001 (1, _) => (left, right.rechunk().into_owned()),
1002 (_, _) => (left.rechunk().into_owned(), right.rechunk().into_owned()),
1003 }
1004}
1005
1006#[allow(clippy::type_complexity)]
1009pub fn align_chunks_ternary<'a, A, B, C>(
1010 a: &'a ChunkedArray<A>,
1011 b: &'a ChunkedArray<B>,
1012 c: &'a ChunkedArray<C>,
1013) -> (
1014 Cow<'a, ChunkedArray<A>>,
1015 Cow<'a, ChunkedArray<B>>,
1016 Cow<'a, ChunkedArray<C>>,
1017)
1018where
1019 A: PolarsDataType,
1020 B: PolarsDataType,
1021 C: PolarsDataType,
1022{
1023 if a.chunks.len() == 1 && b.chunks.len() == 1 && c.chunks.len() == 1 {
1024 return (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c));
1025 }
1026
1027 assert!(
1028 a.len() == b.len() && b.len() == c.len(),
1029 "expected arrays of the same length"
1030 );
1031
1032 match (a.chunks.len(), b.chunks.len(), c.chunks.len()) {
1033 (_, 1, 1) => (
1034 Cow::Borrowed(a),
1035 Cow::Owned(b.match_chunks(a.chunk_lengths())),
1036 Cow::Owned(c.match_chunks(a.chunk_lengths())),
1037 ),
1038 (1, 1, _) => (
1039 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1040 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1041 Cow::Borrowed(c),
1042 ),
1043 (1, _, 1) => (
1044 Cow::Owned(a.match_chunks(b.chunk_lengths())),
1045 Cow::Borrowed(b),
1046 Cow::Owned(c.match_chunks(b.chunk_lengths())),
1047 ),
1048 (1, _, _) => {
1049 let b = b.rechunk();
1050 (
1051 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1052 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1053 Cow::Borrowed(c),
1054 )
1055 },
1056 (_, 1, _) => {
1057 let a = a.rechunk();
1058 (
1059 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1060 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1061 Cow::Borrowed(c),
1062 )
1063 },
1064 (_, _, 1) => {
1065 let b = b.rechunk();
1066 (
1067 Cow::Borrowed(a),
1068 Cow::Owned(b.match_chunks(a.chunk_lengths())),
1069 Cow::Owned(c.match_chunks(a.chunk_lengths())),
1070 )
1071 },
1072 (len_a, len_b, len_c)
1073 if len_a == len_b
1074 && len_b == len_c
1075 && a.chunk_lengths()
1076 .zip(b.chunk_lengths())
1077 .zip(c.chunk_lengths())
1078 .all(|((a, b), c)| a == b && b == c) =>
1079 {
1080 (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c))
1081 },
1082 _ => {
1083 let a = a.rechunk();
1085 let b = b.rechunk();
1086 (
1087 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1088 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1089 Cow::Borrowed(c),
1090 )
1091 },
1092 }
1093}
1094
1095pub fn binary_concatenate_validities<'a, T, B>(
1096 left: &'a ChunkedArray<T>,
1097 right: &'a ChunkedArray<B>,
1098) -> Option<Bitmap>
1099where
1100 B: PolarsDataType,
1101 T: PolarsDataType,
1102{
1103 let (left, right) = align_chunks_binary(left, right);
1104 let left_validity = concatenate_validities(left.chunks());
1105 let right_validity = concatenate_validities(right.chunks());
1106 combine_validities_and(left_validity.as_ref(), right_validity.as_ref())
1107}
1108
1109pub trait IntoVec<T> {
1111 fn into_vec(self) -> Vec<T>;
1112}
1113
1114impl<I, S> IntoVec<PlSmallStr> for I
1115where
1116 I: IntoIterator<Item = S>,
1117 S: Into<PlSmallStr>,
1118{
1119 fn into_vec(self) -> Vec<PlSmallStr> {
1120 self.into_iter().map(|s| s.into()).collect()
1121 }
1122}
1123
1124#[inline]
1129pub(crate) fn index_to_chunked_index<
1130 I: Iterator<Item = Idx>,
1131 Idx: PartialOrd + std::ops::AddAssign + std::ops::SubAssign + Zero + One,
1132>(
1133 chunk_lens: I,
1134 index: Idx,
1135) -> (Idx, Idx) {
1136 let mut index_remainder = index;
1137 let mut current_chunk_idx = Zero::zero();
1138
1139 for chunk_len in chunk_lens {
1140 if chunk_len > index_remainder {
1141 break;
1142 } else {
1143 index_remainder -= chunk_len;
1144 current_chunk_idx += One::one();
1145 }
1146 }
1147 (current_chunk_idx, index_remainder)
1148}
1149
1150pub(crate) fn index_to_chunked_index_rev<
1151 I: Iterator<Item = Idx>,
1152 Idx: PartialOrd
1153 + std::ops::AddAssign
1154 + std::ops::SubAssign
1155 + std::ops::Sub<Output = Idx>
1156 + Zero
1157 + One
1158 + Copy
1159 + std::fmt::Debug,
1160>(
1161 chunk_lens_rev: I,
1162 index_from_back: Idx,
1163 total_chunks: Idx,
1164) -> (Idx, Idx) {
1165 debug_assert!(index_from_back > Zero::zero(), "at least -1");
1166 let mut index_remainder = index_from_back;
1167 let mut current_chunk_idx = One::one();
1168 let mut current_chunk_len = Zero::zero();
1169
1170 for chunk_len in chunk_lens_rev {
1171 current_chunk_len = chunk_len;
1172 if chunk_len >= index_remainder {
1173 break;
1174 } else {
1175 index_remainder -= chunk_len;
1176 current_chunk_idx += One::one();
1177 }
1178 }
1179 (
1180 total_chunks - current_chunk_idx,
1181 current_chunk_len - index_remainder,
1182 )
1183}
1184
1185pub(crate) fn first_non_null<'a, I>(iter: I) -> Option<usize>
1186where
1187 I: Iterator<Item = Option<&'a Bitmap>>,
1188{
1189 let mut offset = 0;
1190 for validity in iter {
1191 if let Some(validity) = validity {
1192 let mask = BitMask::from_bitmap(validity);
1193 if let Some(n) = mask.nth_set_bit_idx(0, 0) {
1194 return Some(offset + n);
1195 }
1196 offset += validity.len()
1197 } else {
1198 return Some(offset);
1199 }
1200 }
1201 None
1202}
1203
1204pub(crate) fn last_non_null<'a, I>(iter: I, len: usize) -> Option<usize>
1205where
1206 I: DoubleEndedIterator<Item = Option<&'a Bitmap>>,
1207{
1208 if len == 0 {
1209 return None;
1210 }
1211 let mut offset = 0;
1212 for validity in iter.rev() {
1213 if let Some(validity) = validity {
1214 let mask = BitMask::from_bitmap(validity);
1215 if let Some(n) = mask.nth_set_bit_idx_rev(0, mask.len()) {
1216 let mask_start = len - offset - mask.len();
1217 return Some(mask_start + n);
1218 }
1219 offset += validity.len()
1220 } else {
1221 return Some(len - 1 - offset);
1222 }
1223 }
1224 None
1225}
1226
1227pub fn coalesce_nulls<'a, T: PolarsDataType>(
1229 a: &'a ChunkedArray<T>,
1230 b: &'a ChunkedArray<T>,
1231) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<T>>) {
1232 if a.null_count() > 0 || b.null_count() > 0 {
1233 let (a, b) = align_chunks_binary(a, b);
1234 let mut b = b.into_owned();
1235 let a = a.coalesce_nulls(b.chunks());
1236
1237 for arr in a.chunks().iter() {
1238 for arr_b in unsafe { b.chunks_mut() } {
1239 *arr_b = arr_b.with_validity(arr.validity().cloned())
1240 }
1241 }
1242 b.compute_len();
1243 (Cow::Owned(a), Cow::Owned(b))
1244 } else {
1245 (Cow::Borrowed(a), Cow::Borrowed(b))
1246 }
1247}
1248
1249pub fn coalesce_nulls_columns(a: &Column, b: &Column) -> (Column, Column) {
1250 if a.null_count() > 0 || b.null_count() > 0 {
1251 let mut a = a.as_materialized_series().rechunk();
1252 let mut b = b.as_materialized_series().rechunk();
1253 for (arr_a, arr_b) in unsafe { a.chunks_mut().iter_mut().zip(b.chunks_mut()) } {
1254 let validity = match (arr_a.validity(), arr_b.validity()) {
1255 (None, Some(b)) => Some(b.clone()),
1256 (Some(a), Some(b)) => Some(a & b),
1257 (Some(a), None) => Some(a.clone()),
1258 (None, None) => None,
1259 };
1260 *arr_a = arr_a.with_validity(validity.clone());
1261 *arr_b = arr_b.with_validity(validity);
1262 }
1263 a.compute_len();
1264 b.compute_len();
1265 (a.into(), b.into())
1266 } else {
1267 (a.clone(), b.clone())
1268 }
1269}
1270
1271pub fn operation_exceeded_idxsize_msg(operation: &str) -> String {
1272 if size_of::<IdxSize>() == size_of::<u32>() {
1273 format!(
1274 "{} exceeded the maximum supported limit of {} rows. Consider installing 'polars-u64-idx'.",
1275 operation,
1276 IdxSize::MAX,
1277 )
1278 } else {
1279 format!(
1280 "{} exceeded the maximum supported limit of {} rows.",
1281 operation,
1282 IdxSize::MAX,
1283 )
1284 }
1285}
1286
1287#[cfg(test)]
1288mod test {
1289 use super::*;
1290
1291 #[test]
1292 fn test_split() {
1293 let ca: Int32Chunked = (0..10).collect_ca("a".into());
1294
1295 let out = split(&ca, 3);
1296 assert_eq!(out[0].len(), 3);
1297 assert_eq!(out[1].len(), 3);
1298 assert_eq!(out[2].len(), 4);
1299 }
1300
1301 #[test]
1302 fn test_align_chunks() -> PolarsResult<()> {
1303 let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1304 let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1305 let b2 = Int32Chunked::new(PlSmallStr::EMPTY, &[2, 3, 4]);
1306
1307 b.append(&b2)?;
1308 let (a, b) = align_chunks_binary(&a, &b);
1309 assert_eq!(
1310 a.chunk_lengths().collect::<Vec<_>>(),
1311 b.chunk_lengths().collect::<Vec<_>>()
1312 );
1313
1314 let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1315 let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1316 let b1 = b.clone();
1317 b.append(&b1)?;
1318 b.append(&b1)?;
1319 b.append(&b1)?;
1320 let (a, b) = align_chunks_binary(&a, &b);
1321 assert_eq!(
1322 a.chunk_lengths().collect::<Vec<_>>(),
1323 b.chunk_lengths().collect::<Vec<_>>()
1324 );
1325
1326 Ok(())
1327 }
1328}