#![allow(unsafe_op_in_unsafe_fn)]
use std::sync::Arc;
use arrow::array::*;
use arrow::bitmap::Bitmap;
use arrow::compute::concatenate::concatenate_unchecked;
use polars_compute::filter::filter_with_bitmap;
use crate::prelude::{ChunkTakeUnchecked, *};
pub mod ops;
#[macro_use]
pub mod arithmetic;
pub mod builder;
pub mod cast;
pub mod collect;
pub mod comparison;
pub mod flags;
pub mod float;
pub mod iterator;
#[cfg(feature = "ndarray")]
pub(crate) mod ndarray;
pub mod arg_min_max;
#[cfg(feature = "dtype-array")]
pub(crate) mod array;
mod binary;
mod binary_offset;
mod bitwise;
#[cfg(feature = "object")]
mod drop;
mod from;
mod from_iterator;
pub mod from_iterator_par;
pub(crate) mod list;
pub(crate) mod logical;
#[cfg(feature = "object")]
pub mod object;
#[cfg(feature = "random")]
mod random;
#[cfg(feature = "dtype-struct")]
mod struct_;
#[cfg(any(
feature = "temporal",
feature = "dtype-datetime",
feature = "dtype-date"
))]
pub mod temporal;
mod to_vec;
mod trusted_len;
pub(crate) use arg_min_max::*;
use arrow::legacy::prelude::*;
#[cfg(feature = "dtype-struct")]
pub use struct_::StructChunked;
use self::flags::{StatisticsFlags, StatisticsFlagsIM};
use crate::series::IsSorted;
use crate::utils::{first_non_null, first_null, last_non_null};
#[cfg(not(feature = "dtype-categorical"))]
pub struct RevMapping {}
pub type ChunkLenIter<'a> = std::iter::Map<std::slice::Iter<'a, ArrayRef>, fn(&ArrayRef) -> usize>;
pub struct ChunkedArray<T: PolarsDataType> {
pub(crate) field: Arc<Field>,
pub(crate) chunks: Vec<ArrayRef>,
pub(crate) flags: StatisticsFlagsIM,
length: usize,
null_count: usize,
_pd: std::marker::PhantomData<T>,
}
impl<T: PolarsDataType> ChunkedArray<T> {
fn should_rechunk(&self) -> bool {
self.chunks.len() > 1 && self.chunks.len() > self.len() / 3
}
fn optional_rechunk(mut self) -> Self {
if self.should_rechunk() {
self.rechunk_mut()
}
self
}
pub(crate) fn as_any(&self) -> &dyn std::any::Any {
self
}
pub fn unpack_series_matching_type<'a>(
&self,
series: &'a Series,
) -> PolarsResult<&'a ChunkedArray<T>> {
polars_ensure!(
self.dtype() == series.dtype(),
SchemaMismatch: "cannot unpack series of type `{}` into `{}`",
series.dtype(),
self.dtype(),
);
Ok(unsafe { self.unpack_series_matching_physical_type(series) })
}
fn new_with_compute_len(field: Arc<Field>, chunks: Vec<ArrayRef>) -> Self {
unsafe {
let mut chunked_arr = Self::new_with_dims(field, chunks, 0, 0);
chunked_arr.compute_len();
chunked_arr
}
}
pub unsafe fn new_with_dims(
field: Arc<Field>,
chunks: Vec<ArrayRef>,
length: usize,
null_count: usize,
) -> Self {
Self {
field,
chunks,
flags: StatisticsFlagsIM::empty(),
_pd: Default::default(),
length,
null_count,
}
}
pub(crate) fn is_sorted_ascending_flag(&self) -> bool {
self.get_flags().is_sorted_ascending()
}
pub(crate) fn is_sorted_descending_flag(&self) -> bool {
self.get_flags().is_sorted_descending()
}
pub(crate) fn is_sorted_any(&self) -> bool {
self.get_flags().is_sorted_any()
}
pub fn unset_fast_explode_list(&mut self) {
self.set_fast_explode_list(false)
}
pub fn set_fast_explode_list(&mut self, value: bool) {
let mut flags = self.flags.get_mut();
flags.set(StatisticsFlags::CAN_FAST_EXPLODE_LIST, value);
self.flags.set_mut(flags);
}
pub fn get_fast_explode_list(&self) -> bool {
self.get_flags().can_fast_explode_list()
}
pub fn get_flags(&self) -> StatisticsFlags {
self.flags.get()
}
pub fn set_flags(&mut self, flags: StatisticsFlags) {
self.flags = StatisticsFlagsIM::new(flags);
}
pub fn is_sorted_flag(&self) -> IsSorted {
self.get_flags().is_sorted()
}
pub fn retain_flags_from<U: PolarsDataType>(
&mut self,
from: &ChunkedArray<U>,
retain_flags: StatisticsFlags,
) {
let flags = from.flags.get();
if !flags.is_empty() {
self.set_flags(flags & retain_flags)
}
}
pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
let mut flags = self.flags.get_mut();
flags.set_sorted(sorted);
self.flags.set_mut(flags);
}
pub fn with_sorted_flag(&self, sorted: IsSorted) -> Self {
let mut out = self.clone();
out.set_sorted_flag(sorted);
out
}
pub fn first_null(&self) -> Option<usize> {
if self.null_count() == 0 {
None
}
else if self.null_count() == self.len() {
Some(0)
} else if self.is_sorted_any() {
let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
0
} else {
self.null_count()
};
debug_assert!(
unsafe { self.get_unchecked(out) }.is_some(),
"incorrect sorted flag"
);
Some(out)
} else {
first_null(self.chunks().iter().map(|arr| arr.as_ref()))
}
}
pub fn first_non_null(&self) -> Option<usize> {
if self.null_count() == self.len() {
None
}
else if self.null_count() == 0 {
Some(0)
} else if self.is_sorted_any() {
let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
self.null_count()
} else {
0
};
debug_assert!(
unsafe { self.get_unchecked(out) }.is_some(),
"incorrect sorted flag"
);
Some(out)
} else {
first_non_null(self.chunks().iter().map(|arr| arr.as_ref()))
}
}
pub fn last_non_null(&self) -> Option<usize> {
if self.null_count() == self.len() {
None
}
else if self.null_count() == 0 {
Some(self.len() - 1)
} else if self.is_sorted_any() {
let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
self.len() - 1
} else {
self.len() - self.null_count() - 1
};
debug_assert!(
unsafe { self.get_unchecked(out) }.is_some(),
"incorrect sorted flag"
);
Some(out)
} else {
last_non_null(self.chunks().iter().map(|arr| arr.as_ref()), self.len())
}
}
pub fn drop_nulls(&self) -> Self {
if self.null_count() == 0 {
self.clone()
} else {
let chunks = self
.downcast_iter()
.map(|arr| {
if arr.null_count() == 0 {
arr.to_boxed()
} else {
filter_with_bitmap(arr, arr.validity().unwrap())
}
})
.collect();
unsafe {
Self::new_with_dims(
self.field.clone(),
chunks,
self.len() - self.null_count(),
0,
)
}
}
}
#[inline]
#[allow(clippy::type_complexity)]
pub fn iter_validities(
&self,
) -> impl ExactSizeIterator<Item = Option<&Bitmap>> + DoubleEndedIterator {
fn to_validity(arr: &ArrayRef) -> Option<&Bitmap> {
arr.validity()
}
self.chunks.iter().map(to_validity)
}
#[inline]
pub fn has_nulls(&self) -> bool {
self.null_count > 0
}
pub fn shrink_to_fit(&mut self) {
self.chunks = vec![concatenate_unchecked(self.chunks.as_slice()).unwrap()];
}
pub fn clear(&self) -> Self {
let mut ca = unsafe {
self.copy_with_chunks(vec![new_empty_array(
self.chunks.first().unwrap().dtype().clone(),
)])
};
use StatisticsFlags as F;
ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
ca
}
pub(crate) unsafe fn unpack_series_matching_physical_type<'a>(
&self,
series: &'a Series,
) -> &'a ChunkedArray<T> {
let series_trait = &**series;
if self.dtype() == series.dtype() {
&*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
} else {
use DataType::*;
match (self.dtype(), series.dtype()) {
(Int64, Datetime(_, _)) | (Int64, Duration(_)) | (Int32, Date) => {
&*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
},
_ => panic!(
"cannot unpack series {:?} into matching type {:?}",
series,
self.dtype()
),
}
}
}
pub fn chunk_lengths(&self) -> ChunkLenIter<'_> {
self.chunks.iter().map(|chunk| chunk.len())
}
#[inline]
pub fn chunks(&self) -> &Vec<ArrayRef> {
&self.chunks
}
#[inline]
pub unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
&mut self.chunks
}
pub fn is_optimal_aligned(&self) -> bool {
self.chunks.len() == 1 && self.null_count() == 0
}
unsafe fn copy_with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
Self::new_with_compute_len(self.field.clone(), chunks)
}
pub fn dtype(&self) -> &DataType {
self.field.dtype()
}
pub(crate) unsafe fn set_dtype(&mut self, dtype: DataType) {
self.field = Arc::new(Field::new(self.name().clone(), dtype))
}
pub fn name(&self) -> &PlSmallStr {
self.field.name()
}
pub fn ref_field(&self) -> &Field {
&self.field
}
pub fn rename(&mut self, name: PlSmallStr) {
self.field = Arc::new(Field::new(name, self.field.dtype().clone()));
}
pub fn with_name(mut self, name: PlSmallStr) -> Self {
self.rename(name);
self
}
}
impl<T> ChunkedArray<T>
where
T: PolarsDataType,
{
#[inline]
pub fn get(&self, idx: usize) -> Option<T::Physical<'_>> {
let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
assert!(
chunk_idx < self.chunks().len(),
"index: {} out of bounds for len: {}",
idx,
self.len()
);
unsafe {
let arr = self.downcast_get_unchecked(chunk_idx);
assert!(
arr_idx < arr.len(),
"index: {} out of bounds for len: {}",
idx,
self.len()
);
arr.get_unchecked(arr_idx)
}
}
#[inline]
pub unsafe fn get_unchecked(&self, idx: usize) -> Option<T::Physical<'_>> {
let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
unsafe {
self.downcast_get_unchecked(chunk_idx)
.get_unchecked(arr_idx)
}
}
#[inline]
pub unsafe fn value_unchecked(&self, idx: usize) -> T::Physical<'_> {
let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
unsafe {
self.downcast_get_unchecked(chunk_idx)
.value_unchecked(arr_idx)
}
}
#[inline]
pub fn first(&self) -> Option<T::Physical<'_>> {
self.iter().next().unwrap()
}
#[inline]
pub fn last(&self) -> Option<T::Physical<'_>> {
let arr = self
.downcast_iter()
.rev()
.find(|arr| !arr.is_empty())
.unwrap();
unsafe { arr.get_unchecked(arr.len() - 1) }
}
pub fn set_validity(&mut self, validity: &Bitmap) {
assert_eq!(self.len(), validity.len());
let mut i = 0;
for chunk in unsafe { self.chunks_mut() } {
*chunk = chunk.with_validity(Some(validity.clone().sliced(i, chunk.len())));
i += chunk.len();
}
self.null_count = validity.unset_bits();
self.set_fast_explode_list(false);
}
}
impl<T> ChunkedArray<T>
where
T: PolarsDataType,
ChunkedArray<T>: ChunkTakeUnchecked<[IdxSize]>,
{
pub fn deposit(&self, validity: &Bitmap) -> Self {
let set_bits = validity.set_bits();
assert_eq!(self.null_count(), 0);
assert_eq!(self.len(), set_bits);
if set_bits == validity.len() {
return self.clone();
}
if set_bits == 0 {
return Self::full_null_like(self, validity.len());
}
let mut null_mask = validity.clone();
let mut gather_idxs = Vec::with_capacity(validity.len());
let leading_nulls = null_mask.take_leading_zeros();
gather_idxs.extend(std::iter::repeat_n(0, leading_nulls + 1));
let mut i = 0 as IdxSize;
gather_idxs.extend(null_mask.iter().skip(1).map(|v| {
i += IdxSize::from(v);
i
}));
let mut ca = unsafe { ChunkTakeUnchecked::take_unchecked(self, &gather_idxs) };
ca.set_validity(validity);
ca
}
}
impl ListChunked {
#[inline]
pub fn get_as_series(&self, idx: usize) -> Option<Series> {
unsafe {
Some(Series::from_chunks_and_dtype_unchecked(
self.name().clone(),
vec![self.get(idx)?],
&self.inner_dtype().to_physical(),
))
}
}
pub fn has_empty_lists(&self) -> bool {
for arr in self.downcast_iter() {
if arr.is_empty() {
continue;
}
if match arr.validity() {
None => arr.offsets().lengths().any(|l| l == 0),
Some(validity) => arr
.offsets()
.lengths()
.enumerate()
.any(|(i, l)| l == 0 && unsafe { validity.get_bit_unchecked(i) }),
} {
return true;
}
}
false
}
pub fn has_masked_out_values(&self) -> bool {
for arr in self.downcast_iter() {
if arr.is_empty() {
continue;
}
if *arr.offsets().first() != 0 || *arr.offsets().last() != arr.values().len() as i64 {
return true;
}
let Some(validity) = arr.validity() else {
continue;
};
if validity.set_bits() == 0 {
continue;
}
for i in (!validity).true_idx_iter() {
if arr.offsets().length_at(i) > 0 {
return true;
}
}
}
false
}
}
#[cfg(feature = "dtype-array")]
impl ArrayChunked {
#[inline]
pub fn get_as_series(&self, idx: usize) -> Option<Series> {
unsafe {
Some(Series::from_chunks_and_dtype_unchecked(
self.name().clone(),
vec![self.get(idx)?],
&self.inner_dtype().to_physical(),
))
}
}
pub fn from_aligned_values(
name: PlSmallStr,
inner_dtype: &DataType,
width: usize,
chunks: Vec<ArrayRef>,
length: usize,
) -> Self {
let dtype = DataType::Array(Box::new(inner_dtype.clone()), width);
let arrow_dtype = dtype.to_arrow(CompatLevel::newest());
let field = Arc::new(Field::new(name, dtype));
if width == 0 {
use arrow::array::builder::{ArrayBuilder, make_builder};
let values = make_builder(&inner_dtype.to_arrow(CompatLevel::newest())).freeze();
return ArrayChunked::new_with_compute_len(
field,
vec![FixedSizeListArray::new(arrow_dtype, length, values, None).into_boxed()],
);
}
let mut total_len = 0;
let chunks = chunks
.into_iter()
.map(|chunk| {
debug_assert_eq!(chunk.len() % width, 0);
let chunk_len = chunk.len() / width;
total_len += chunk_len;
FixedSizeListArray::new(arrow_dtype.clone(), chunk_len, chunk, None).into_boxed()
})
.collect();
debug_assert_eq!(total_len, length);
unsafe { Self::new_with_dims(field, chunks, length, 0) }
}
pub fn to_list(&self) -> ListChunked {
let inner_dtype = self.inner_dtype();
let chunks = self
.downcast_iter()
.map(|chunk| {
use arrow::offset::OffsetsBuffer;
let inner_dtype = chunk.dtype().inner_dtype().unwrap();
let dtype = inner_dtype.clone().to_large_list(true);
let offsets = (0..=chunk.len())
.map(|i| (i * self.width()) as i64)
.collect::<Vec<i64>>();
let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets.into()) };
ListArray::<i64>::new(
dtype,
offsets,
chunk.values().clone(),
chunk.validity().cloned(),
)
.into_boxed()
})
.collect();
let mut ca = unsafe {
ListChunked::new_with_dims(
Arc::new(Field::new(
self.name().clone(),
DataType::List(Box::new(inner_dtype.clone())),
)),
chunks,
self.len(),
self.null_count(),
)
};
ca.set_fast_explode_list(!self.has_nulls());
ca
}
}
impl<T> ChunkedArray<T>
where
T: PolarsDataType,
{
pub fn match_chunks<I>(&self, chunk_id: I) -> Self
where
I: Iterator<Item = usize>,
{
debug_assert!(self.chunks.len() == 1);
let slice = |ca: &Self| {
let array = &ca.chunks[0];
let mut offset = 0;
let chunks = chunk_id
.map(|len| {
debug_assert!((offset + len) <= array.len());
let out = unsafe { array.sliced_unchecked(offset, len) };
offset += len;
out
})
.collect();
debug_assert_eq!(offset, array.len());
unsafe {
Self::from_chunks_and_dtype(self.name().clone(), chunks, self.dtype().clone())
}
};
if self.chunks.len() != 1 {
let out = self.rechunk();
slice(&out)
} else {
slice(self)
}
}
}
impl<T: PolarsDataType> AsRefDataType for ChunkedArray<T> {
fn as_ref_dtype(&self) -> &DataType {
self.dtype()
}
}
pub(crate) trait AsSinglePtr: AsRefDataType {
fn as_single_ptr(&mut self) -> PolarsResult<usize> {
polars_bail!(opq = as_single_ptr, self.as_ref_dtype());
}
}
impl<T> AsSinglePtr for ChunkedArray<T>
where
T: PolarsNumericType,
{
fn as_single_ptr(&mut self) -> PolarsResult<usize> {
self.rechunk_mut();
let a = self.data_views().next().unwrap();
let ptr = a.as_ptr();
Ok(ptr as usize)
}
}
impl AsSinglePtr for BooleanChunked {}
impl AsSinglePtr for ListChunked {}
#[cfg(feature = "dtype-array")]
impl AsSinglePtr for ArrayChunked {}
impl AsSinglePtr for StringChunked {}
impl AsSinglePtr for BinaryChunked {}
#[cfg(feature = "object")]
impl<T: PolarsObject> AsSinglePtr for ObjectChunked<T> {}
pub enum ChunkedArrayLayout<'a, T: PolarsDataType> {
SingleNoNull(&'a T::Array),
Single(&'a T::Array),
MultiNoNull(&'a ChunkedArray<T>),
Multi(&'a ChunkedArray<T>),
}
impl<T> ChunkedArray<T>
where
T: PolarsDataType,
{
pub fn layout(&self) -> ChunkedArrayLayout<'_, T> {
if self.chunks.len() == 1 {
let arr = self.downcast_iter().next().unwrap();
return if arr.null_count() == 0 {
ChunkedArrayLayout::SingleNoNull(arr)
} else {
ChunkedArrayLayout::Single(arr)
};
}
if self.downcast_iter().all(|a| a.null_count() == 0) {
ChunkedArrayLayout::MultiNoNull(self)
} else {
ChunkedArrayLayout::Multi(self)
}
}
}
impl<T> ChunkedArray<T>
where
T: PolarsNumericType,
{
pub fn cont_slice(&self) -> PolarsResult<&[T::Native]> {
polars_ensure!(
self.chunks.len() == 1 && self.chunks[0].null_count() == 0,
ComputeError: "chunked array is not contiguous"
);
Ok(self.downcast_iter().next().map(|arr| arr.values()).unwrap())
}
pub(crate) fn cont_slice_mut(&mut self) -> Option<&mut [T::Native]> {
if self.chunks.len() == 1 && self.chunks[0].null_count() == 0 {
let arr = unsafe { self.downcast_iter_mut().next().unwrap() };
arr.get_mut_values()
} else {
None
}
}
pub fn data_views(&self) -> impl DoubleEndedIterator<Item = &[T::Native]> {
self.downcast_iter().map(|arr| arr.values().as_slice())
}
#[allow(clippy::wrong_self_convention)]
pub fn into_no_null_iter(
&self,
) -> impl '_ + Send + Sync + ExactSizeIterator<Item = T::Native> + DoubleEndedIterator + TrustedLen
{
#[allow(clippy::map_clone)]
unsafe {
self.data_views()
.flatten()
.map(|v| *v)
.trust_my_length(self.len())
}
}
}
impl<T: PolarsDataType> Clone for ChunkedArray<T> {
fn clone(&self) -> Self {
ChunkedArray {
field: self.field.clone(),
chunks: self.chunks.clone(),
flags: self.flags.clone(),
_pd: Default::default(),
length: self.length,
null_count: self.null_count,
}
}
}
impl<T: PolarsDataType> AsRef<ChunkedArray<T>> for ChunkedArray<T> {
fn as_ref(&self) -> &ChunkedArray<T> {
self
}
}
impl ValueSize for ListChunked {
fn get_values_size(&self) -> usize {
self.chunks
.iter()
.fold(0usize, |acc, arr| acc + arr.get_values_size())
}
}
#[cfg(feature = "dtype-array")]
impl ValueSize for ArrayChunked {
fn get_values_size(&self) -> usize {
self.chunks
.iter()
.fold(0usize, |acc, arr| acc + arr.get_values_size())
}
}
impl ValueSize for StringChunked {
fn get_values_size(&self) -> usize {
self.chunks
.iter()
.fold(0usize, |acc, arr| acc + arr.get_values_size())
}
}
impl ValueSize for BinaryOffsetChunked {
fn get_values_size(&self) -> usize {
self.chunks
.iter()
.fold(0usize, |acc, arr| acc + arr.get_values_size())
}
}
pub(crate) fn to_primitive<T: PolarsNumericType>(
values: Vec<T::Native>,
validity: Option<Bitmap>,
) -> PrimitiveArray<T::Native> {
PrimitiveArray::new(
T::get_static_dtype().to_arrow(CompatLevel::newest()),
values.into(),
validity,
)
}
pub(crate) fn to_array<T: PolarsNumericType>(
values: Vec<T::Native>,
validity: Option<Bitmap>,
) -> ArrayRef {
Box::new(to_primitive::<T>(values, validity))
}
impl<T: PolarsDataType> Default for ChunkedArray<T> {
fn default() -> Self {
let dtype = T::get_static_dtype();
let arrow_dtype = dtype.to_physical().to_arrow(CompatLevel::newest());
ChunkedArray {
field: Arc::new(Field::new(PlSmallStr::EMPTY, dtype)),
chunks: vec![new_empty_array(arrow_dtype)],
flags: StatisticsFlagsIM::empty(),
_pd: Default::default(),
length: 0,
null_count: 0,
}
}
}
#[cfg(test)]
pub(crate) mod test {
use crate::prelude::*;
pub(crate) fn get_chunked_array() -> Int32Chunked {
ChunkedArray::new(PlSmallStr::from_static("a"), &[1, 2, 3])
}
#[test]
fn test_sort() {
let a = Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 9, 3, 2]);
let b = a
.sort(false)
.into_iter()
.map(|opt| opt.unwrap())
.collect::<Vec<_>>();
assert_eq!(b, [1, 2, 3, 9]);
let a = StringChunked::new(PlSmallStr::from_static("a"), &["b", "a", "c"]);
let a = a.sort(false);
let b = a.into_iter().collect::<Vec<_>>();
assert_eq!(b, [Some("a"), Some("b"), Some("c")]);
assert!(a.is_sorted_ascending_flag());
}
#[test]
fn arithmetic() {
let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 6, 40]);
let b = &Int32Chunked::new(PlSmallStr::from_static("b"), &[-1, 2, 3, 4]);
println!("{:?}", a + b);
println!("{:?}", a - b);
println!("{:?}", a * b);
println!("{:?}", a / b);
}
#[test]
fn iter() {
let s1 = get_chunked_array();
assert_eq!(s1.into_iter().fold(0, |acc, val| { acc + val.unwrap() }), 6)
}
#[test]
fn limit() {
let a = get_chunked_array();
let b = a.limit(2);
println!("{b:?}");
assert_eq!(b.len(), 2)
}
#[test]
fn filter() {
let a = get_chunked_array();
let b = a
.filter(&BooleanChunked::new(
PlSmallStr::from_static("filter"),
&[true, false, false],
))
.unwrap();
assert_eq!(b.len(), 1);
assert_eq!(b.into_iter().next(), Some(Some(1)));
}
#[test]
fn aggregates() {
let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 10, 9]);
assert_eq!(a.max(), Some(100));
assert_eq!(a.min(), Some(1));
assert_eq!(a.sum(), Some(120))
}
#[test]
fn take() {
let a = get_chunked_array();
let new = a.take(&[0 as IdxSize, 1]).unwrap();
assert_eq!(new.len(), 2)
}
#[test]
fn cast() {
let a = get_chunked_array();
let b = a.cast(&DataType::Int64).unwrap();
assert_eq!(b.dtype(), &DataType::Int64)
}
fn assert_slice_equal<T>(ca: &ChunkedArray<T>, eq: &[T::Native])
where
T: PolarsNumericType,
{
assert_eq!(ca.iter().map(|opt| opt.unwrap()).collect::<Vec<_>>(), eq)
}
#[test]
fn slice() {
let mut first = UInt32Chunked::new(PlSmallStr::from_static("first"), &[0, 1, 2]);
let second = UInt32Chunked::new(PlSmallStr::from_static("second"), &[3, 4, 5]);
first.append(&second).unwrap();
assert_slice_equal(&first.slice(0, 3), &[0, 1, 2]);
assert_slice_equal(&first.slice(0, 4), &[0, 1, 2, 3]);
assert_slice_equal(&first.slice(1, 4), &[1, 2, 3, 4]);
assert_slice_equal(&first.slice(3, 2), &[3, 4]);
assert_slice_equal(&first.slice(3, 3), &[3, 4, 5]);
assert_slice_equal(&first.slice(-3, 3), &[3, 4, 5]);
assert_slice_equal(&first.slice(-6, 6), &[0, 1, 2, 3, 4, 5]);
assert_eq!(first.slice(-7, 2).len(), 1);
assert_eq!(first.slice(-3, 4).len(), 3);
assert_eq!(first.slice(3, 4).len(), 3);
assert_eq!(first.slice(10, 4).len(), 0);
}
#[test]
fn sorting() {
let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[9, 2, 4]);
let sorted = s.sort(false);
assert_slice_equal(&sorted, &[2, 4, 9]);
let sorted = s.sort(true);
assert_slice_equal(&sorted, &[9, 4, 2]);
let s: StringChunked = ["b", "a", "z"].iter().collect();
let sorted = s.sort(false);
assert_eq!(
sorted.into_iter().collect::<Vec<_>>(),
&[Some("a"), Some("b"), Some("z")]
);
let sorted = s.sort(true);
assert_eq!(
sorted.into_iter().collect::<Vec<_>>(),
&[Some("z"), Some("b"), Some("a")]
);
let s: StringChunked = [Some("b"), None, Some("z")].iter().copied().collect();
let sorted = s.sort(false);
assert_eq!(
sorted.into_iter().collect::<Vec<_>>(),
&[None, Some("b"), Some("z")]
);
}
#[test]
fn reverse() {
let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3]);
assert_slice_equal(&s.reverse(), &[3, 2, 1]);
let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[Some(1), None, Some(3)]);
assert_eq!(Vec::from(&s.reverse()), &[Some(3), None, Some(1)]);
let s = BooleanChunked::new(PlSmallStr::EMPTY, &[true, false]);
assert_eq!(Vec::from(&s.reverse()), &[Some(false), Some(true)]);
let s = StringChunked::new(PlSmallStr::EMPTY, &["a", "b", "c"]);
assert_eq!(Vec::from(&s.reverse()), &[Some("c"), Some("b"), Some("a")]);
let s = StringChunked::new(PlSmallStr::EMPTY, &[Some("a"), None, Some("c")]);
assert_eq!(Vec::from(&s.reverse()), &[Some("c"), None, Some("a")]);
}
#[test]
#[cfg(feature = "dtype-categorical")]
fn test_iter_categorical() {
let ca = StringChunked::new(
PlSmallStr::EMPTY,
&[Some("foo"), None, Some("bar"), Some("ham")],
);
let cats = Categories::new(
PlSmallStr::EMPTY,
PlSmallStr::EMPTY,
CategoricalPhysical::U32,
);
let ca = ca.cast(&DataType::from_categories(cats)).unwrap();
let ca = ca.cat32().unwrap();
let v: Vec<_> = ca.physical().into_iter().collect();
assert_eq!(v, &[Some(0), None, Some(1), Some(2)]);
}
#[test]
#[ignore]
fn test_shrink_to_fit() {
let mut builder = StringChunkedBuilder::new(PlSmallStr::from_static("foo"), 2048);
builder.append_value("foo");
let mut arr = builder.finish();
let before = arr
.chunks()
.iter()
.map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
.sum::<usize>();
arr.shrink_to_fit();
let after = arr
.chunks()
.iter()
.map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
.sum::<usize>();
assert!(before > after);
}
}