1#![allow(unsafe_op_in_unsafe_fn)]
3use std::iter::Map;
4use std::sync::Arc;
5
6use arrow::array::*;
7use arrow::bitmap::Bitmap;
8use arrow::compute::concatenate::concatenate_unchecked;
9use polars_compute::filter::filter_with_bitmap;
10
11use crate::prelude::{ChunkTakeUnchecked, *};
12
13pub mod ops;
14#[macro_use]
15pub mod arithmetic;
16pub mod builder;
17pub mod cast;
18pub mod collect;
19pub mod comparison;
20pub mod flags;
21pub mod float;
22pub mod iterator;
23#[cfg(feature = "ndarray")]
24pub(crate) mod ndarray;
25
26#[cfg(feature = "dtype-array")]
27pub(crate) mod array;
28mod binary;
29mod binary_offset;
30mod bitwise;
31#[cfg(feature = "object")]
32mod drop;
33mod from;
34mod from_iterator;
35pub mod from_iterator_par;
36pub(crate) mod list;
37pub(crate) mod logical;
38#[cfg(feature = "object")]
39pub mod object;
40#[cfg(feature = "random")]
41mod random;
42#[cfg(feature = "dtype-struct")]
43mod struct_;
44#[cfg(any(
45 feature = "temporal",
46 feature = "dtype-datetime",
47 feature = "dtype-date"
48))]
49pub mod temporal;
50mod to_vec;
51mod trusted_len;
52
53use std::slice::Iter;
54
55use arrow::legacy::prelude::*;
56#[cfg(feature = "dtype-struct")]
57pub use struct_::StructChunked;
58
59use self::flags::{StatisticsFlags, StatisticsFlagsIM};
60use crate::series::IsSorted;
61use crate::utils::{first_non_null, last_non_null};
62
63#[cfg(not(feature = "dtype-categorical"))]
64pub struct RevMapping {}
65
66pub type ChunkLenIter<'a> = std::iter::Map<std::slice::Iter<'a, ArrayRef>, fn(&ArrayRef) -> usize>;
67
68pub struct ChunkedArray<T: PolarsDataType> {
143 pub(crate) field: Arc<Field>,
144 pub(crate) chunks: Vec<ArrayRef>,
145
146 pub(crate) flags: StatisticsFlagsIM,
147
148 length: usize,
149 null_count: usize,
150 _pd: std::marker::PhantomData<T>,
151}
152
153impl<T: PolarsDataType> ChunkedArray<T> {
154 fn should_rechunk(&self) -> bool {
155 self.chunks.len() > 1 && self.chunks.len() > self.len() / 3
156 }
157
158 fn optional_rechunk(mut self) -> Self {
159 if self.should_rechunk() {
161 self.rechunk_mut()
162 }
163 self
164 }
165
166 pub(crate) fn as_any(&self) -> &dyn std::any::Any {
167 self
168 }
169
170 pub fn unpack_series_matching_type<'a>(
172 &self,
173 series: &'a Series,
174 ) -> PolarsResult<&'a ChunkedArray<T>> {
175 polars_ensure!(
176 self.dtype() == series.dtype(),
177 SchemaMismatch: "cannot unpack series of type `{}` into `{}`",
178 series.dtype(),
179 self.dtype(),
180 );
181
182 Ok(unsafe { self.unpack_series_matching_physical_type(series) })
184 }
185
186 fn new_with_compute_len(field: Arc<Field>, chunks: Vec<ArrayRef>) -> Self {
191 unsafe {
192 let mut chunked_arr = Self::new_with_dims(field, chunks, 0, 0);
193 chunked_arr.compute_len();
194 chunked_arr
195 }
196 }
197
198 pub unsafe fn new_with_dims(
202 field: Arc<Field>,
203 chunks: Vec<ArrayRef>,
204 length: usize,
205 null_count: usize,
206 ) -> Self {
207 Self {
208 field,
209 chunks,
210 flags: StatisticsFlagsIM::empty(),
211
212 _pd: Default::default(),
213 length,
214 null_count,
215 }
216 }
217
218 pub(crate) fn is_sorted_ascending_flag(&self) -> bool {
219 self.get_flags().is_sorted_ascending()
220 }
221
222 pub(crate) fn is_sorted_descending_flag(&self) -> bool {
223 self.get_flags().is_sorted_descending()
224 }
225
226 pub(crate) fn is_sorted_any(&self) -> bool {
228 self.get_flags().is_sorted_any()
229 }
230
231 pub fn unset_fast_explode_list(&mut self) {
232 self.set_fast_explode_list(false)
233 }
234
235 pub fn set_fast_explode_list(&mut self, value: bool) {
236 let mut flags = self.flags.get_mut();
237 flags.set(StatisticsFlags::CAN_FAST_EXPLODE_LIST, value);
238 self.flags.set_mut(flags);
239 }
240
241 pub fn get_fast_explode_list(&self) -> bool {
242 self.get_flags().can_fast_explode_list()
243 }
244
245 pub fn get_flags(&self) -> StatisticsFlags {
246 self.flags.get()
247 }
248
249 pub fn set_flags(&mut self, flags: StatisticsFlags) {
251 self.flags = StatisticsFlagsIM::new(flags);
252 }
253
254 pub fn is_sorted_flag(&self) -> IsSorted {
255 self.get_flags().is_sorted()
256 }
257
258 pub fn retain_flags_from<U: PolarsDataType>(
259 &mut self,
260 from: &ChunkedArray<U>,
261 retain_flags: StatisticsFlags,
262 ) {
263 let flags = from.flags.get();
264 if !flags.is_empty() {
266 self.set_flags(flags & retain_flags)
267 }
268 }
269
270 pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
272 let mut flags = self.flags.get_mut();
273 flags.set_sorted(sorted);
274 self.flags.set_mut(flags);
275 }
276
277 pub fn with_sorted_flag(&self, sorted: IsSorted) -> Self {
279 let mut out = self.clone();
280 out.set_sorted_flag(sorted);
281 out
282 }
283
284 pub fn first_non_null(&self) -> Option<usize> {
286 if self.null_count() == self.len() {
287 None
288 }
289 else if self.null_count() == 0 {
291 Some(0)
292 } else if self.is_sorted_any() {
293 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
294 self.null_count()
296 } else {
297 0
299 };
300
301 debug_assert!(
302 unsafe { self.get_unchecked(out) }.is_some(),
304 "incorrect sorted flag"
305 );
306
307 Some(out)
308 } else {
309 first_non_null(self.iter_validities())
310 }
311 }
312
313 pub fn last_non_null(&self) -> Option<usize> {
315 if self.null_count() == self.len() {
316 None
317 }
318 else if self.null_count() == 0 {
320 Some(self.len() - 1)
321 } else if self.is_sorted_any() {
322 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
323 self.len() - 1
325 } else {
326 self.len() - self.null_count() - 1
328 };
329
330 debug_assert!(
331 unsafe { self.get_unchecked(out) }.is_some(),
333 "incorrect sorted flag"
334 );
335
336 Some(out)
337 } else {
338 last_non_null(self.iter_validities(), self.len())
339 }
340 }
341
342 pub fn drop_nulls(&self) -> Self {
343 if self.null_count() == 0 {
344 self.clone()
345 } else {
346 let chunks = self
347 .downcast_iter()
348 .map(|arr| {
349 if arr.null_count() == 0 {
350 arr.to_boxed()
351 } else {
352 filter_with_bitmap(arr, arr.validity().unwrap())
353 }
354 })
355 .collect();
356 unsafe {
357 Self::new_with_dims(
358 self.field.clone(),
359 chunks,
360 self.len() - self.null_count(),
361 0,
362 )
363 }
364 }
365 }
366
367 #[inline]
369 #[allow(clippy::type_complexity)]
370 pub fn iter_validities(&self) -> Map<Iter<'_, ArrayRef>, fn(&ArrayRef) -> Option<&Bitmap>> {
371 fn to_validity(arr: &ArrayRef) -> Option<&Bitmap> {
372 arr.validity()
373 }
374 self.chunks.iter().map(to_validity)
375 }
376
377 #[inline]
378 pub fn has_nulls(&self) -> bool {
380 self.null_count > 0
381 }
382
383 pub fn shrink_to_fit(&mut self) {
385 self.chunks = vec![concatenate_unchecked(self.chunks.as_slice()).unwrap()];
386 }
387
388 pub fn clear(&self) -> Self {
389 let mut ca = unsafe {
391 self.copy_with_chunks(vec![new_empty_array(
392 self.chunks.first().unwrap().dtype().clone(),
393 )])
394 };
395
396 use StatisticsFlags as F;
397 ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
398 ca
399 }
400
401 pub(crate) unsafe fn unpack_series_matching_physical_type<'a>(
408 &self,
409 series: &'a Series,
410 ) -> &'a ChunkedArray<T> {
411 let series_trait = &**series;
412 if self.dtype() == series.dtype() {
413 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
414 } else {
415 use DataType::*;
416 match (self.dtype(), series.dtype()) {
417 (Int64, Datetime(_, _)) | (Int64, Duration(_)) | (Int32, Date) => {
418 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
419 },
420 _ => panic!(
421 "cannot unpack series {:?} into matching type {:?}",
422 series,
423 self.dtype()
424 ),
425 }
426 }
427 }
428
429 pub fn chunk_lengths(&self) -> ChunkLenIter<'_> {
431 self.chunks.iter().map(|chunk| chunk.len())
432 }
433
434 #[inline]
436 pub fn chunks(&self) -> &Vec<ArrayRef> {
437 &self.chunks
438 }
439
440 #[inline]
446 pub unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
447 &mut self.chunks
448 }
449
450 pub fn is_optimal_aligned(&self) -> bool {
452 self.chunks.len() == 1 && self.null_count() == 0
453 }
454
455 unsafe fn copy_with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
460 Self::new_with_compute_len(self.field.clone(), chunks)
461 }
462
463 pub fn dtype(&self) -> &DataType {
465 self.field.dtype()
466 }
467
468 pub(crate) unsafe fn set_dtype(&mut self, dtype: DataType) {
469 self.field = Arc::new(Field::new(self.name().clone(), dtype))
470 }
471
472 pub fn name(&self) -> &PlSmallStr {
474 self.field.name()
475 }
476
477 pub fn ref_field(&self) -> &Field {
479 &self.field
480 }
481
482 pub fn rename(&mut self, name: PlSmallStr) {
484 self.field = Arc::new(Field::new(name, self.field.dtype().clone()));
485 }
486
487 pub fn with_name(mut self, name: PlSmallStr) -> Self {
489 self.rename(name);
490 self
491 }
492}
493
494impl<T> ChunkedArray<T>
495where
496 T: PolarsDataType,
497{
498 #[inline]
504 pub fn get(&self, idx: usize) -> Option<T::Physical<'_>> {
505 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
506 assert!(
507 chunk_idx < self.chunks().len(),
508 "index: {} out of bounds for len: {}",
509 idx,
510 self.len()
511 );
512 unsafe {
513 let arr = self.downcast_get_unchecked(chunk_idx);
514 assert!(
515 arr_idx < arr.len(),
516 "index: {} out of bounds for len: {}",
517 idx,
518 self.len()
519 );
520 arr.get_unchecked(arr_idx)
521 }
522 }
523
524 #[inline]
530 pub unsafe fn get_unchecked(&self, idx: usize) -> Option<T::Physical<'_>> {
531 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
532
533 unsafe {
534 self.downcast_get_unchecked(chunk_idx)
536 .get_unchecked(arr_idx)
537 }
538 }
539
540 #[inline]
546 pub unsafe fn value_unchecked(&self, idx: usize) -> T::Physical<'_> {
547 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
548
549 unsafe {
550 self.downcast_get_unchecked(chunk_idx)
552 .value_unchecked(arr_idx)
553 }
554 }
555
556 #[inline]
557 pub fn first(&self) -> Option<T::Physical<'_>> {
558 unsafe {
559 let arr = self.downcast_get_unchecked(0);
560 arr.get_unchecked(0)
561 }
562 }
563
564 #[inline]
565 pub fn last(&self) -> Option<T::Physical<'_>> {
566 unsafe {
567 let arr = self.downcast_get_unchecked(self.chunks.len().checked_sub(1)?);
568 arr.get_unchecked(arr.len().checked_sub(1)?)
569 }
570 }
571
572 pub fn set_validity(&mut self, validity: &Bitmap) {
573 assert_eq!(self.len(), validity.len());
574 let mut i = 0;
575 for chunk in unsafe { self.chunks_mut() } {
576 *chunk = chunk.with_validity(Some(validity.clone().sliced(i, chunk.len())));
577 i += chunk.len();
578 }
579 self.null_count = validity.unset_bits();
580 self.set_fast_explode_list(false);
581 }
582}
583
584impl<T> ChunkedArray<T>
585where
586 T: PolarsDataType,
587 ChunkedArray<T>: ChunkTakeUnchecked<[IdxSize]>,
588{
589 pub fn deposit(&self, validity: &Bitmap) -> Self {
591 let set_bits = validity.set_bits();
592
593 assert_eq!(self.null_count(), 0);
594 assert_eq!(self.len(), set_bits);
595
596 if set_bits == validity.len() {
597 return self.clone();
598 }
599
600 if set_bits == 0 {
601 return Self::full_null_like(self, validity.len());
602 }
603
604 let mut null_mask = validity.clone();
605
606 let mut gather_idxs = Vec::with_capacity(validity.len());
607 let leading_nulls = null_mask.take_leading_zeros();
608 gather_idxs.extend(std::iter::repeat_n(0, leading_nulls + 1));
609
610 let mut i = 0 as IdxSize;
611 gather_idxs.extend(null_mask.iter().skip(1).map(|v| {
612 i += IdxSize::from(v);
613 i
614 }));
615
616 let mut ca = unsafe { ChunkTakeUnchecked::take_unchecked(self, &gather_idxs) };
617 ca.set_validity(validity);
618 ca
619 }
620}
621
622impl ListChunked {
623 #[inline]
624 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
625 unsafe {
626 Some(Series::from_chunks_and_dtype_unchecked(
627 self.name().clone(),
628 vec![self.get(idx)?],
629 &self.inner_dtype().to_physical(),
630 ))
631 }
632 }
633
634 pub fn has_masked_out_values(&self) -> bool {
635 for arr in self.downcast_iter() {
636 if arr.is_empty() {
637 continue;
638 }
639
640 if *arr.offsets().first() != 0 || *arr.offsets().last() != arr.values().len() as i64 {
641 return true;
642 }
643
644 let Some(validity) = arr.validity() else {
645 continue;
646 };
647 if validity.set_bits() == 0 {
648 continue;
649 }
650
651 for i in (!validity).true_idx_iter() {
653 if arr.offsets().length_at(i) > 0 {
654 return true;
655 }
656 }
657 }
658
659 false
660 }
661}
662
663#[cfg(feature = "dtype-array")]
664impl ArrayChunked {
665 #[inline]
666 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
667 unsafe {
668 Some(Series::from_chunks_and_dtype_unchecked(
669 self.name().clone(),
670 vec![self.get(idx)?],
671 &self.inner_dtype().to_physical(),
672 ))
673 }
674 }
675
676 pub fn from_aligned_values(
677 name: PlSmallStr,
678 inner_dtype: &DataType,
679 width: usize,
680 chunks: Vec<ArrayRef>,
681 length: usize,
682 ) -> Self {
683 let dtype = DataType::Array(Box::new(inner_dtype.clone()), width);
684 let arrow_dtype = dtype.to_arrow(CompatLevel::newest());
685 let field = Arc::new(Field::new(name, dtype));
686 if width == 0 {
687 use arrow::array::builder::{ArrayBuilder, make_builder};
688 let values = make_builder(&inner_dtype.to_arrow(CompatLevel::newest())).freeze();
689 return ArrayChunked::new_with_compute_len(
690 field,
691 vec![FixedSizeListArray::new(arrow_dtype, length, values, None).into_boxed()],
692 );
693 }
694
695 let chunks = chunks
696 .into_iter()
697 .map(|chunk| {
698 debug_assert_eq!(chunk.len() % width, 0);
699 FixedSizeListArray::new(arrow_dtype.clone(), length, chunk, None).into_boxed()
700 })
701 .collect();
702
703 unsafe { Self::new_with_dims(field, chunks, length, 0) }
704 }
705
706 pub fn to_list(&self) -> ListChunked {
710 let inner_dtype = self.inner_dtype();
711 let chunks = self
712 .downcast_iter()
713 .map(|chunk| {
714 use arrow::offset::OffsetsBuffer;
715
716 let inner_dtype = chunk.dtype().inner_dtype().unwrap();
717 let dtype = inner_dtype.clone().to_large_list(true);
718
719 let offsets = (0..=chunk.len())
720 .map(|i| (i * self.width()) as i64)
721 .collect::<Vec<i64>>();
722
723 let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets.into()) };
725
726 ListArray::<i64>::new(
727 dtype,
728 offsets,
729 chunk.values().clone(),
730 chunk.validity().cloned(),
731 )
732 .into_boxed()
733 })
734 .collect();
735
736 let mut ca = unsafe {
738 ListChunked::new_with_dims(
739 Arc::new(Field::new(
740 self.name().clone(),
741 DataType::List(Box::new(inner_dtype.clone())),
742 )),
743 chunks,
744 self.len(),
745 self.null_count(),
746 )
747 };
748 ca.set_fast_explode_list(!self.has_nulls());
749 ca
750 }
751}
752
753impl<T> ChunkedArray<T>
754where
755 T: PolarsDataType,
756{
757 pub fn match_chunks<I>(&self, chunk_id: I) -> Self
761 where
762 I: Iterator<Item = usize>,
763 {
764 debug_assert!(self.chunks.len() == 1);
765 let slice = |ca: &Self| {
767 let array = &ca.chunks[0];
768
769 let mut offset = 0;
770 let chunks = chunk_id
771 .map(|len| {
772 debug_assert!((offset + len) <= array.len());
774 let out = unsafe { array.sliced_unchecked(offset, len) };
775 offset += len;
776 out
777 })
778 .collect();
779
780 debug_assert_eq!(offset, array.len());
781
782 unsafe {
784 Self::from_chunks_and_dtype(self.name().clone(), chunks, self.dtype().clone())
785 }
786 };
787
788 if self.chunks.len() != 1 {
789 let out = self.rechunk();
790 slice(&out)
791 } else {
792 slice(self)
793 }
794 }
795}
796
797impl<T: PolarsDataType> AsRefDataType for ChunkedArray<T> {
798 fn as_ref_dtype(&self) -> &DataType {
799 self.dtype()
800 }
801}
802
803pub(crate) trait AsSinglePtr: AsRefDataType {
804 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
806 polars_bail!(opq = as_single_ptr, self.as_ref_dtype());
807 }
808}
809
810impl<T> AsSinglePtr for ChunkedArray<T>
811where
812 T: PolarsNumericType,
813{
814 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
815 self.rechunk_mut();
816 let a = self.data_views().next().unwrap();
817 let ptr = a.as_ptr();
818 Ok(ptr as usize)
819 }
820}
821
822impl AsSinglePtr for BooleanChunked {}
823impl AsSinglePtr for ListChunked {}
824#[cfg(feature = "dtype-array")]
825impl AsSinglePtr for ArrayChunked {}
826impl AsSinglePtr for StringChunked {}
827impl AsSinglePtr for BinaryChunked {}
828#[cfg(feature = "object")]
829impl<T: PolarsObject> AsSinglePtr for ObjectChunked<T> {}
830
831pub enum ChunkedArrayLayout<'a, T: PolarsDataType> {
832 SingleNoNull(&'a T::Array),
833 Single(&'a T::Array),
834 MultiNoNull(&'a ChunkedArray<T>),
835 Multi(&'a ChunkedArray<T>),
836}
837
838impl<T> ChunkedArray<T>
839where
840 T: PolarsDataType,
841{
842 pub fn layout(&self) -> ChunkedArrayLayout<'_, T> {
843 if self.chunks.len() == 1 {
844 let arr = self.downcast_iter().next().unwrap();
845 return if arr.null_count() == 0 {
846 ChunkedArrayLayout::SingleNoNull(arr)
847 } else {
848 ChunkedArrayLayout::Single(arr)
849 };
850 }
851
852 if self.downcast_iter().all(|a| a.null_count() == 0) {
853 ChunkedArrayLayout::MultiNoNull(self)
854 } else {
855 ChunkedArrayLayout::Multi(self)
856 }
857 }
858}
859
860impl<T> ChunkedArray<T>
861where
862 T: PolarsNumericType,
863{
864 pub fn cont_slice(&self) -> PolarsResult<&[T::Native]> {
866 polars_ensure!(
867 self.chunks.len() == 1 && self.chunks[0].null_count() == 0,
868 ComputeError: "chunked array is not contiguous"
869 );
870 Ok(self.downcast_iter().next().map(|arr| arr.values()).unwrap())
871 }
872
873 pub(crate) fn cont_slice_mut(&mut self) -> Option<&mut [T::Native]> {
875 if self.chunks.len() == 1 && self.chunks[0].null_count() == 0 {
876 let arr = unsafe { self.downcast_iter_mut().next().unwrap() };
878 arr.get_mut_values()
879 } else {
880 None
881 }
882 }
883
884 pub fn data_views(&self) -> impl DoubleEndedIterator<Item = &[T::Native]> {
888 self.downcast_iter().map(|arr| arr.values().as_slice())
889 }
890
891 #[allow(clippy::wrong_self_convention)]
892 pub fn into_no_null_iter(
893 &self,
894 ) -> impl '_ + Send + Sync + ExactSizeIterator<Item = T::Native> + DoubleEndedIterator + TrustedLen
895 {
896 #[allow(clippy::map_clone)]
898 unsafe {
900 self.data_views()
901 .flatten()
902 .map(|v| *v)
903 .trust_my_length(self.len())
904 }
905 }
906}
907
908impl<T: PolarsDataType> Clone for ChunkedArray<T> {
909 fn clone(&self) -> Self {
910 ChunkedArray {
911 field: self.field.clone(),
912 chunks: self.chunks.clone(),
913 flags: self.flags.clone(),
914
915 _pd: Default::default(),
916 length: self.length,
917 null_count: self.null_count,
918 }
919 }
920}
921
922impl<T: PolarsDataType> AsRef<ChunkedArray<T>> for ChunkedArray<T> {
923 fn as_ref(&self) -> &ChunkedArray<T> {
924 self
925 }
926}
927
928impl ValueSize for ListChunked {
929 fn get_values_size(&self) -> usize {
930 self.chunks
931 .iter()
932 .fold(0usize, |acc, arr| acc + arr.get_values_size())
933 }
934}
935
936#[cfg(feature = "dtype-array")]
937impl ValueSize for ArrayChunked {
938 fn get_values_size(&self) -> usize {
939 self.chunks
940 .iter()
941 .fold(0usize, |acc, arr| acc + arr.get_values_size())
942 }
943}
944impl ValueSize for StringChunked {
945 fn get_values_size(&self) -> usize {
946 self.chunks
947 .iter()
948 .fold(0usize, |acc, arr| acc + arr.get_values_size())
949 }
950}
951
952impl ValueSize for BinaryOffsetChunked {
953 fn get_values_size(&self) -> usize {
954 self.chunks
955 .iter()
956 .fold(0usize, |acc, arr| acc + arr.get_values_size())
957 }
958}
959
960pub(crate) fn to_primitive<T: PolarsNumericType>(
961 values: Vec<T::Native>,
962 validity: Option<Bitmap>,
963) -> PrimitiveArray<T::Native> {
964 PrimitiveArray::new(
965 T::get_static_dtype().to_arrow(CompatLevel::newest()),
966 values.into(),
967 validity,
968 )
969}
970
971pub(crate) fn to_array<T: PolarsNumericType>(
972 values: Vec<T::Native>,
973 validity: Option<Bitmap>,
974) -> ArrayRef {
975 Box::new(to_primitive::<T>(values, validity))
976}
977
978impl<T: PolarsDataType> Default for ChunkedArray<T> {
979 fn default() -> Self {
980 let dtype = T::get_static_dtype();
981 let arrow_dtype = dtype.to_physical().to_arrow(CompatLevel::newest());
982 ChunkedArray {
983 field: Arc::new(Field::new(PlSmallStr::EMPTY, dtype)),
984 chunks: vec![new_empty_array(arrow_dtype)],
986 flags: StatisticsFlagsIM::empty(),
987
988 _pd: Default::default(),
989 length: 0,
990 null_count: 0,
991 }
992 }
993}
994
995#[cfg(test)]
996pub(crate) mod test {
997 use crate::prelude::*;
998
999 pub(crate) fn get_chunked_array() -> Int32Chunked {
1000 ChunkedArray::new(PlSmallStr::from_static("a"), &[1, 2, 3])
1001 }
1002
1003 #[test]
1004 fn test_sort() {
1005 let a = Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 9, 3, 2]);
1006 let b = a
1007 .sort(false)
1008 .into_iter()
1009 .map(|opt| opt.unwrap())
1010 .collect::<Vec<_>>();
1011 assert_eq!(b, [1, 2, 3, 9]);
1012 let a = StringChunked::new(PlSmallStr::from_static("a"), &["b", "a", "c"]);
1013 let a = a.sort(false);
1014 let b = a.into_iter().collect::<Vec<_>>();
1015 assert_eq!(b, [Some("a"), Some("b"), Some("c")]);
1016 assert!(a.is_sorted_ascending_flag());
1017 }
1018
1019 #[test]
1020 fn arithmetic() {
1021 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 6, 40]);
1022 let b = &Int32Chunked::new(PlSmallStr::from_static("b"), &[-1, 2, 3, 4]);
1023
1024 println!("{:?}", a + b);
1027 println!("{:?}", a - b);
1028 println!("{:?}", a * b);
1029 println!("{:?}", a / b);
1030 }
1031
1032 #[test]
1033 fn iter() {
1034 let s1 = get_chunked_array();
1035 assert_eq!(s1.into_iter().fold(0, |acc, val| { acc + val.unwrap() }), 6)
1037 }
1038
1039 #[test]
1040 fn limit() {
1041 let a = get_chunked_array();
1042 let b = a.limit(2);
1043 println!("{b:?}");
1044 assert_eq!(b.len(), 2)
1045 }
1046
1047 #[test]
1048 fn filter() {
1049 let a = get_chunked_array();
1050 let b = a
1051 .filter(&BooleanChunked::new(
1052 PlSmallStr::from_static("filter"),
1053 &[true, false, false],
1054 ))
1055 .unwrap();
1056 assert_eq!(b.len(), 1);
1057 assert_eq!(b.into_iter().next(), Some(Some(1)));
1058 }
1059
1060 #[test]
1061 fn aggregates() {
1062 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 10, 9]);
1063 assert_eq!(a.max(), Some(100));
1064 assert_eq!(a.min(), Some(1));
1065 assert_eq!(a.sum(), Some(120))
1066 }
1067
1068 #[test]
1069 fn take() {
1070 let a = get_chunked_array();
1071 let new = a.take(&[0 as IdxSize, 1]).unwrap();
1072 assert_eq!(new.len(), 2)
1073 }
1074
1075 #[test]
1076 fn cast() {
1077 let a = get_chunked_array();
1078 let b = a.cast(&DataType::Int64).unwrap();
1079 assert_eq!(b.dtype(), &DataType::Int64)
1080 }
1081
1082 fn assert_slice_equal<T>(ca: &ChunkedArray<T>, eq: &[T::Native])
1083 where
1084 T: PolarsNumericType,
1085 {
1086 assert_eq!(ca.iter().map(|opt| opt.unwrap()).collect::<Vec<_>>(), eq)
1087 }
1088
1089 #[test]
1090 fn slice() {
1091 let mut first = UInt32Chunked::new(PlSmallStr::from_static("first"), &[0, 1, 2]);
1092 let second = UInt32Chunked::new(PlSmallStr::from_static("second"), &[3, 4, 5]);
1093 first.append(&second).unwrap();
1094 assert_slice_equal(&first.slice(0, 3), &[0, 1, 2]);
1095 assert_slice_equal(&first.slice(0, 4), &[0, 1, 2, 3]);
1096 assert_slice_equal(&first.slice(1, 4), &[1, 2, 3, 4]);
1097 assert_slice_equal(&first.slice(3, 2), &[3, 4]);
1098 assert_slice_equal(&first.slice(3, 3), &[3, 4, 5]);
1099 assert_slice_equal(&first.slice(-3, 3), &[3, 4, 5]);
1100 assert_slice_equal(&first.slice(-6, 6), &[0, 1, 2, 3, 4, 5]);
1101
1102 assert_eq!(first.slice(-7, 2).len(), 1);
1103 assert_eq!(first.slice(-3, 4).len(), 3);
1104 assert_eq!(first.slice(3, 4).len(), 3);
1105 assert_eq!(first.slice(10, 4).len(), 0);
1106 }
1107
1108 #[test]
1109 fn sorting() {
1110 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[9, 2, 4]);
1111 let sorted = s.sort(false);
1112 assert_slice_equal(&sorted, &[2, 4, 9]);
1113 let sorted = s.sort(true);
1114 assert_slice_equal(&sorted, &[9, 4, 2]);
1115
1116 let s: StringChunked = ["b", "a", "z"].iter().collect();
1117 let sorted = s.sort(false);
1118 assert_eq!(
1119 sorted.into_iter().collect::<Vec<_>>(),
1120 &[Some("a"), Some("b"), Some("z")]
1121 );
1122 let sorted = s.sort(true);
1123 assert_eq!(
1124 sorted.into_iter().collect::<Vec<_>>(),
1125 &[Some("z"), Some("b"), Some("a")]
1126 );
1127 let s: StringChunked = [Some("b"), None, Some("z")].iter().copied().collect();
1128 let sorted = s.sort(false);
1129 assert_eq!(
1130 sorted.into_iter().collect::<Vec<_>>(),
1131 &[None, Some("b"), Some("z")]
1132 );
1133 }
1134
1135 #[test]
1136 fn reverse() {
1137 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3]);
1138 assert_slice_equal(&s.reverse(), &[3, 2, 1]);
1140 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[Some(1), None, Some(3)]);
1142 assert_eq!(Vec::from(&s.reverse()), &[Some(3), None, Some(1)]);
1143 let s = BooleanChunked::new(PlSmallStr::EMPTY, &[true, false]);
1144 assert_eq!(Vec::from(&s.reverse()), &[Some(false), Some(true)]);
1145
1146 let s = StringChunked::new(PlSmallStr::EMPTY, &["a", "b", "c"]);
1147 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), Some("b"), Some("a")]);
1148
1149 let s = StringChunked::new(PlSmallStr::EMPTY, &[Some("a"), None, Some("c")]);
1150 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), None, Some("a")]);
1151 }
1152
1153 #[test]
1154 #[cfg(feature = "dtype-categorical")]
1155 fn test_iter_categorical() {
1156 let ca = StringChunked::new(
1157 PlSmallStr::EMPTY,
1158 &[Some("foo"), None, Some("bar"), Some("ham")],
1159 );
1160 let cats = Categories::new(
1161 PlSmallStr::EMPTY,
1162 PlSmallStr::EMPTY,
1163 CategoricalPhysical::U32,
1164 );
1165 let ca = ca.cast(&DataType::from_categories(cats)).unwrap();
1166 let ca = ca.cat32().unwrap();
1167 let v: Vec<_> = ca.physical().into_iter().collect();
1168 assert_eq!(v, &[Some(0), None, Some(1), Some(2)]);
1169 }
1170
1171 #[test]
1172 #[ignore]
1173 fn test_shrink_to_fit() {
1174 let mut builder = StringChunkedBuilder::new(PlSmallStr::from_static("foo"), 2048);
1175 builder.append_value("foo");
1176 let mut arr = builder.finish();
1177 let before = arr
1178 .chunks()
1179 .iter()
1180 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1181 .sum::<usize>();
1182 arr.shrink_to_fit();
1183 let after = arr
1184 .chunks()
1185 .iter()
1186 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1187 .sum::<usize>();
1188 assert!(before > after);
1189 }
1190}