use std::sync::Arc;
use num_traits::AsPrimitive;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
use vortex_error::vortex_ensure;
use vortex_error::vortex_panic;
use crate::ArrayRef;
use crate::DynArray;
use crate::IntoArray;
use crate::arrays::ConstantArray;
use crate::arrays::ListVTable;
use crate::arrays::PrimitiveVTable;
use crate::builtins::ArrayBuiltins;
use crate::compute::min_max;
use crate::dtype::DType;
use crate::dtype::NativePType;
use crate::match_each_integer_ptype;
use crate::match_each_native_ptype;
use crate::scalar_fn::fns::operators::Operator;
use crate::stats::ArrayStats;
use crate::validity::Validity;
#[derive(Clone, Debug)]
pub struct ListArray {
pub(super) dtype: DType,
pub(super) elements: ArrayRef,
pub(super) offsets: ArrayRef,
pub(super) validity: Validity,
pub(super) stats_set: ArrayStats,
}
pub struct ListArrayParts {
pub dtype: DType,
pub elements: ArrayRef,
pub offsets: ArrayRef,
pub validity: Validity,
}
impl ListArray {
pub fn new(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
Self::try_new(elements, offsets, validity).vortex_expect("ListArray new")
}
pub fn try_new(
elements: ArrayRef,
offsets: ArrayRef,
validity: Validity,
) -> VortexResult<Self> {
Self::validate(&elements, &offsets, &validity)?;
Ok(unsafe { Self::new_unchecked(elements, offsets, validity) })
}
pub unsafe fn new_unchecked(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
#[cfg(debug_assertions)]
Self::validate(&elements, &offsets, &validity)
.vortex_expect("[Debug Assertion]: Invalid `ListViewArray` parameters");
Self {
dtype: DType::List(Arc::new(elements.dtype().clone()), validity.nullability()),
elements,
offsets,
validity,
stats_set: Default::default(),
}
}
pub fn validate(
elements: &ArrayRef,
offsets: &ArrayRef,
validity: &Validity,
) -> VortexResult<()> {
vortex_ensure!(
!offsets.is_empty(),
InvalidArgument: "Offsets must have at least one element, [0] for an empty list"
);
vortex_ensure!(
offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
InvalidArgument: "offsets have invalid type {}",
offsets.dtype()
);
let offsets_ptype = offsets.dtype().as_ptype();
if let Some(is_sorted) = offsets.statistics().compute_is_sorted() {
vortex_ensure!(is_sorted, InvalidArgument: "offsets must be sorted");
} else {
vortex_bail!(InvalidArgument: "offsets must report is_sorted statistic");
}
if let Some(min_max) = min_max(offsets)? {
match_each_integer_ptype!(offsets_ptype, |P| {
#[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
{
let max = min_max
.max
.as_primitive()
.as_::<P>()
.vortex_expect("offsets type must fit offsets values");
let min = min_max
.min
.as_primitive()
.as_::<P>()
.vortex_expect("offsets type must fit offsets values");
vortex_ensure!(
min >= 0,
InvalidArgument: "offsets minimum {min} outside valid range [0, {max}]"
);
vortex_ensure!(
max <= P::try_from(elements.len()).unwrap_or_else(|_| vortex_panic!(
"Offsets type {} must be able to fit elements length {}",
<P as NativePType>::PTYPE,
elements.len()
)),
InvalidArgument: "Max offset {max} is beyond the length of the elements array {}",
elements.len()
);
}
})
} else {
vortex_bail!(
InvalidArgument: "offsets array with encoding {} must support min_max compute function",
offsets.encoding_id()
);
};
if let Some(validity_len) = validity.maybe_len() {
vortex_ensure!(
validity_len == offsets.len() - 1,
InvalidArgument: "validity with size {validity_len} does not match array size {}",
offsets.len() - 1
);
}
Ok(())
}
pub fn into_parts(self) -> ListArrayParts {
ListArrayParts {
dtype: self.dtype,
elements: self.elements,
offsets: self.offsets,
validity: self.validity,
}
}
pub fn offset_at(&self, index: usize) -> VortexResult<usize> {
vortex_ensure!(
index <= self.len(),
"Index {index} out of bounds 0..={}",
self.len()
);
if let Some(p) = self.offsets().as_opt::<PrimitiveVTable>() {
Ok(match_each_native_ptype!(p.ptype(), |P| {
p.as_slice::<P>()[index].as_()
}))
} else {
self.offsets()
.scalar_at(index)?
.as_primitive()
.as_::<usize>()
.ok_or_else(|| vortex_error::vortex_err!("offset value does not fit in usize"))
}
}
pub fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
let start = self.offset_at(index)?;
let end = self.offset_at(index + 1)?;
self.elements().slice(start..end)
}
pub fn sliced_elements(&self) -> VortexResult<ArrayRef> {
let start = self.offset_at(0)?;
let end = self.offset_at(self.len())?;
self.elements().slice(start..end)
}
pub fn offsets(&self) -> &ArrayRef {
&self.offsets
}
pub fn element_dtype(&self) -> &Arc<DType> {
match &self.dtype {
DType::List(element_dtype, _) => element_dtype,
_ => vortex_panic!("ListArray has invalid dtype {}", self.dtype),
}
}
pub fn elements(&self) -> &ArrayRef {
&self.elements
}
pub fn reset_offsets(&self, recurse: bool) -> VortexResult<Self> {
let mut elements = self.sliced_elements()?;
if recurse && elements.is_canonical() {
elements = elements.to_canonical()?.compact()?.into_array();
} else if recurse && let Some(child_list_array) = elements.as_opt::<ListVTable>() {
elements = child_list_array.reset_offsets(recurse)?.into_array();
}
let offsets = self.offsets();
let first_offset = offsets.scalar_at(0)?;
let adjusted_offsets = offsets.to_array().binary(
ConstantArray::new(first_offset, offsets.len()).into_array(),
Operator::Sub,
)?;
Self::try_new(elements, adjusted_offsets, self.validity.clone())
}
}