use arrow::offset::OffsetsBuffer;
use polars_compute::gather::take_unchecked;
use super::*;
impl ListChunked {
fn explode_specialized(
&self,
values: ArrayRef,
offsets: &[i64],
offsets_buf: OffsetsBuffer<i64>,
options: ExplodeOptions,
) -> (Series, OffsetsBuffer<i64>) {
let values = unsafe {
Series::from_chunks_and_dtype_unchecked(
self.name().clone(),
vec![values],
&self.inner_dtype().to_physical(),
)
};
use crate::chunked_array::ops::explode::ExplodeByOffsets;
let mut values = match values.dtype() {
DataType::Boolean => {
let t = values.bool().unwrap();
ExplodeByOffsets::explode_by_offsets(t, offsets, options).into_series()
},
DataType::Null => {
let t = values.null().unwrap();
ExplodeByOffsets::explode_by_offsets(t, offsets, options).into_series()
},
dtype => {
with_match_physical_numeric_polars_type!(dtype, |$T| {
let t: &ChunkedArray<$T> = values.as_ref().as_ref();
ExplodeByOffsets::explode_by_offsets(t, offsets, options).into_series()
})
},
};
values = unsafe { values.from_physical_unchecked(self.inner_dtype()) }.unwrap();
(values, offsets_buf)
}
}
impl ChunkExplode for ListChunked {
fn offsets(&self) -> PolarsResult<OffsetsBuffer<i64>> {
let ca = self.rechunk();
let listarr: &LargeListArray = ca.downcast_iter().next().unwrap();
let offsets = listarr.offsets().clone();
Ok(offsets)
}
fn explode_and_offsets(
&self,
options: ExplodeOptions,
) -> PolarsResult<(Series, OffsetsBuffer<i64>)> {
let ca = self.rechunk();
let listarr: &LargeListArray = ca.downcast_iter().next().unwrap();
let offsets_buf = listarr.offsets().clone();
let offsets = listarr.offsets().as_slice();
let mut values = listarr.values().clone();
let (mut s, offsets) = if ca._can_fast_explode()
&& (!options.keep_nulls || !ca.has_nulls())
&& (!options.empty_as_null || !ca.has_empty_lists())
{
if !offsets.is_empty() {
let start = offsets[0] as usize;
let len = offsets[offsets.len() - 1] as usize - start;
values = unsafe { values.sliced_unchecked(start, len) };
}
(
unsafe {
Series::from_chunks_and_dtype_unchecked(
self.name().clone(),
vec![values],
&self.inner_dtype().to_physical(),
)
},
offsets_buf,
)
} else {
#[cfg(test)]
{
let mut last = offsets[0];
let mut has_empty = false;
for &o in &offsets[1..] {
if o == last {
has_empty = true;
}
last = o;
}
if !has_empty && offsets[0] == 0 {
panic!("could have fast exploded")
}
}
let (indices, new_offsets) = if listarr.null_count() == 0 {
let inner_phys = self.inner_dtype().to_physical();
if inner_phys.is_primitive_numeric() || inner_phys.is_null() || inner_phys.is_bool()
{
return Ok(self.explode_specialized(values, offsets, offsets_buf, options));
}
let mut indices =
MutablePrimitiveArray::<IdxSize>::with_capacity(*offsets_buf.last() as usize);
let mut new_offsets = Vec::with_capacity(listarr.len() + 1);
let mut current_offset = 0i64;
let mut iter = offsets.iter();
if let Some(mut previous) = iter.next().copied() {
new_offsets.push(current_offset);
iter.for_each(|&offset| {
let len = offset - previous;
let start = previous as IdxSize;
let end = offset as IdxSize;
if options.empty_as_null && len == 0 {
indices.push_null();
} else {
indices.extend_trusted_len_values(start..end);
}
current_offset += len;
previous = offset;
new_offsets.push(current_offset);
})
}
(indices, new_offsets)
} else {
let validity = listarr.validity().unwrap();
let mut indices =
MutablePrimitiveArray::<IdxSize>::with_capacity(*offsets_buf.last() as usize);
let mut new_offsets = Vec::with_capacity(listarr.len() + 1);
let mut current_offset = 0i64;
let mut iter = offsets.iter();
if let Some(mut previous) = iter.next().copied() {
new_offsets.push(current_offset);
iter.enumerate().for_each(|(i, &offset)| {
let len = offset - previous;
let start = previous as IdxSize;
let end = offset as IdxSize;
if unsafe { validity.get_bit_unchecked(i) } {
if options.empty_as_null && len == 0 {
indices.push_null();
} else {
indices.extend_trusted_len_values(start..end);
}
current_offset += len;
} else if options.keep_nulls {
indices.push_null();
}
previous = offset;
new_offsets.push(current_offset);
})
}
(indices, new_offsets)
};
let chunk = unsafe { take_unchecked(values.as_ref(), &indices.into()) };
let s = unsafe {
Series::from_chunks_and_dtype_unchecked(
self.name().clone(),
vec![chunk],
&self.inner_dtype().to_physical(),
)
};
let new_offsets = unsafe { OffsetsBuffer::new_unchecked(new_offsets.into()) };
(s, new_offsets)
};
debug_assert_eq!(s.name(), self.name());
s = unsafe { s.from_physical_unchecked(self.inner_dtype()) }.unwrap();
Ok((s, offsets))
}
}
#[cfg(feature = "dtype-array")]
impl ChunkExplode for ArrayChunked {
fn offsets(&self) -> PolarsResult<OffsetsBuffer<i64>> {
if self.null_count() == 0 {
let width = self.width() as i64;
let offsets = (0..self.len() + 1)
.map(|i| {
let i = i as i64;
i * width
})
.collect::<Vec<_>>();
let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets.into()) };
return Ok(offsets);
}
let ca = self.rechunk();
let arr = ca.downcast_iter().next().unwrap();
let validity = arr.validity().unwrap();
let width = arr.size();
let mut current_offset = 0i64;
let offsets = (0..=arr.len())
.map(|i| {
if i == 0 {
return current_offset;
}
if unsafe { validity.get_bit_unchecked(i - 1) } {
current_offset += width as i64
}
current_offset
})
.collect::<Vec<_>>();
let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets.into()) };
Ok(offsets)
}
fn explode_and_offsets(
&self,
options: ExplodeOptions,
) -> PolarsResult<(Series, OffsetsBuffer<i64>)> {
if self.width() == 0 {
let mut num_nulls = 0;
if options.empty_as_null {
num_nulls += self.len() - self.null_count();
}
if options.keep_nulls {
num_nulls += self.null_count();
}
let offsets = (0..num_nulls as i64 + 1).collect::<Vec<i64>>();
let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets.into()) };
let s = Column::new_scalar(
self.name().clone(),
Scalar::null(self.inner_dtype().clone()),
num_nulls,
)
.take_materialized_series();
return Ok((s, offsets));
}
let ca = self.rechunk();
let arr = ca.downcast_iter().next().unwrap();
if arr.null_count() == 0 {
let s = unsafe {
Series::from_chunks_and_dtype_unchecked(
self.name().clone(),
vec![arr.values().clone()],
ca.inner_dtype(),
)
};
let width = self.width() as i64;
let offsets = (0..self.len() + 1)
.map(|i| {
let i = i as i64;
i * width
})
.collect::<Vec<_>>();
let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets.into()) };
return Ok((s, offsets));
}
let validity = arr.validity().unwrap();
let values = arr.values();
let width = arr.size();
let mut indices = MutablePrimitiveArray::<IdxSize>::with_capacity(
values.len() - arr.null_count() * (width - 1),
);
let mut offsets = Vec::with_capacity(arr.len() + 1);
let mut current_offset = 0i64;
offsets.push(current_offset);
(0..arr.len()).for_each(|i| {
if unsafe { validity.get_bit_unchecked(i) } {
let start = (i * width) as IdxSize;
let end = start + width as IdxSize;
indices.extend_trusted_len_values(start..end);
current_offset += width as i64;
} else if options.keep_nulls {
indices.push_null();
}
offsets.push(current_offset);
});
let chunk = unsafe { take_unchecked(&**values, &indices.into()) };
let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets.into()) };
Ok((
unsafe {
Series::from_chunks_and_dtype_unchecked(
ca.name().clone(),
vec![chunk],
ca.inner_dtype(),
)
},
offsets,
))
}
}