use std::sync::Arc;
use num_traits::AsPrimitive;
use vortex_dtype::DType;
use vortex_dtype::IntegerPType;
use vortex_dtype::match_each_integer_ptype;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
use vortex_error::vortex_ensure;
use vortex_error::vortex_err;
use crate::Array;
use crate::ArrayRef;
use crate::ToCanonical;
use crate::arrays::PrimitiveArray;
use crate::arrays::PrimitiveVTable;
use crate::arrays::bool;
use crate::stats::ArrayStats;
use crate::validity::Validity;
#[derive(Clone, Debug)]
pub struct ListViewArray {
pub(super) dtype: DType,
elements: ArrayRef,
offsets: ArrayRef,
sizes: ArrayRef,
is_zero_copy_to_list: bool,
pub(super) validity: Validity,
pub(super) stats_set: ArrayStats,
}
pub struct ListViewArrayParts {
pub elements_dtype: Arc<DType>,
pub elements: ArrayRef,
pub offsets: ArrayRef,
pub sizes: ArrayRef,
pub validity: Validity,
}
impl ListViewArray {
pub fn new(elements: ArrayRef, offsets: ArrayRef, sizes: ArrayRef, validity: Validity) -> Self {
Self::try_new(elements, offsets, sizes, validity)
.vortex_expect("`ListViewArray` construction failed")
}
pub fn try_new(
elements: ArrayRef,
offsets: ArrayRef,
sizes: ArrayRef,
validity: Validity,
) -> VortexResult<Self> {
Self::validate(&elements, &offsets, &sizes, &validity)?;
Ok(Self {
dtype: DType::List(Arc::new(elements.dtype().clone()), validity.nullability()),
elements,
offsets,
sizes,
validity,
is_zero_copy_to_list: false,
stats_set: Default::default(),
})
}
pub unsafe fn new_unchecked(
elements: ArrayRef,
offsets: ArrayRef,
sizes: ArrayRef,
validity: Validity,
) -> Self {
if cfg!(debug_assertions) {
Self::validate(&elements, &offsets, &sizes, &validity)
.vortex_expect("Failed to crate `ListViewArray`");
}
Self {
dtype: DType::List(Arc::new(elements.dtype().clone()), validity.nullability()),
elements,
offsets,
sizes,
validity,
is_zero_copy_to_list: false,
stats_set: Default::default(),
}
}
pub fn validate(
elements: &dyn Array,
offsets: &dyn Array,
sizes: &dyn Array,
validity: &Validity,
) -> VortexResult<()> {
vortex_ensure!(
offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
"offsets must be non-nullable integer array, got {}",
offsets.dtype()
);
vortex_ensure!(
sizes.dtype().is_int() && !sizes.dtype().is_nullable(),
"sizes must be non-nullable integer array, got {}",
sizes.dtype()
);
vortex_ensure!(
offsets.len() == sizes.len(),
"offsets and sizes must have the same length, got {} and {}",
offsets.len(),
sizes.len()
);
let size_ptype = sizes.dtype().as_ptype();
let offset_ptype = offsets.dtype().as_ptype();
if let Some(validity_len) = validity.maybe_len() {
vortex_ensure!(
validity_len == offsets.len(),
"validity with size {validity_len} does not match array size {}",
offsets.len()
);
}
if offsets.is_host() && sizes.is_host() {
let offsets_primitive = offsets.to_primitive();
let sizes_primitive = sizes.to_primitive();
match_each_integer_ptype!(offset_ptype, |O| {
match_each_integer_ptype!(size_ptype, |S| {
let offsets_slice = offsets_primitive.as_slice::<O>();
let sizes_slice = sizes_primitive.as_slice::<S>();
validate_offsets_and_sizes::<O, S>(
offsets_slice,
sizes_slice,
elements.len() as u64,
)?;
})
});
}
Ok(())
}
pub unsafe fn with_zero_copy_to_list(mut self, is_zctl: bool) -> Self {
if cfg!(debug_assertions) && is_zctl {
validate_zctl(
&self.elements,
self.offsets.to_primitive(),
self.sizes.to_primitive(),
)
.vortex_expect("Failed to validate zero-copy to list flag");
}
self.is_zero_copy_to_list = is_zctl;
self
}
pub fn verify_is_zero_copy_to_list(&self) -> bool {
validate_zctl(
&self.elements,
self.offsets.to_primitive(),
self.sizes.to_primitive(),
)
.is_ok()
}
pub fn into_parts(self) -> ListViewArrayParts {
let dtype = self.dtype.into_list_element_opt().vortex_expect("is list");
ListViewArrayParts {
elements_dtype: dtype,
elements: self.elements,
offsets: self.offsets,
sizes: self.sizes,
validity: self.validity,
}
}
pub fn offset_at(&self, index: usize) -> usize {
assert!(
index < self.len(),
"Index {index} out of bounds 0..{}",
self.len()
);
self.offsets
.as_opt::<PrimitiveVTable>()
.map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
.unwrap_or_else(|| {
self.offsets
.scalar_at(index)
.vortex_expect("offsets must support scalar_at")
.as_primitive()
.as_::<usize>()
.vortex_expect("offset must fit in usize")
})
}
pub fn size_at(&self, index: usize) -> usize {
assert!(
index < self.len(),
"Index {} out of bounds 0..{}",
index,
self.len()
);
self.sizes
.as_opt::<PrimitiveVTable>()
.map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
.unwrap_or_else(|| {
self.sizes
.scalar_at(index)
.vortex_expect("sizes must support scalar_at")
.as_primitive()
.as_::<usize>()
.vortex_expect("size must fit in usize")
})
}
pub fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
let offset = self.offset_at(index);
let size = self.size_at(index);
self.elements().slice(offset..offset + size)
}
pub fn offsets(&self) -> &ArrayRef {
&self.offsets
}
pub fn sizes(&self) -> &ArrayRef {
&self.sizes
}
pub fn elements(&self) -> &ArrayRef {
&self.elements
}
pub fn is_zero_copy_to_list(&self) -> bool {
self.is_zero_copy_to_list
}
}
fn validate_offsets_and_sizes<O, S>(
offsets_slice: &[O],
sizes_slice: &[S],
elements_len: u64,
) -> VortexResult<()>
where
O: IntegerPType,
S: IntegerPType,
{
debug_assert_eq!(offsets_slice.len(), sizes_slice.len());
#[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
for i in 0..offsets_slice.len() {
let offset = offsets_slice[i];
let size = sizes_slice[i];
vortex_ensure!(offset >= O::zero(), "cannot have negative offsets");
vortex_ensure!(size >= S::zero(), "cannot have negative size");
let offset_u64 = offset
.to_u64()
.ok_or_else(|| vortex_err!("offset[{i}] = {offset:?} cannot be converted to u64"))?;
let size_u64 = size
.to_u64()
.ok_or_else(|| vortex_err!("size[{i}] = {size:?} cannot be converted to u64"))?;
let end = offset_u64.checked_add(size_u64).ok_or_else(|| {
vortex_err!("offset[{i}] ({offset_u64}) + size[{i}] ({size_u64}) would overflow u64")
})?;
if offset_u64 == elements_len {
vortex_ensure!(
size_u64 == 0,
"views to the end of the elements array (length {elements_len}) must have size 0 \
(had size {size_u64})"
);
}
vortex_ensure!(
end <= elements_len,
"offset[{i}] + size[{i}] = {offset_u64} + {size_u64} = {end} \
exceeds elements length {elements_len}",
);
}
Ok(())
}
fn validate_zctl(
elements: &dyn Array,
offsets_primitive: PrimitiveArray,
sizes_primitive: PrimitiveArray,
) -> VortexResult<()> {
if let Some(is_sorted) = offsets_primitive.statistics().compute_is_sorted() {
vortex_ensure!(is_sorted, "offsets must be sorted");
} else {
vortex_bail!("offsets must report is_sorted statistic");
}
fn validate_monotonic_ends<O: IntegerPType, S: IntegerPType>(
offsets_slice: &[O],
sizes_slice: &[S],
len: usize,
) -> VortexResult<()> {
let mut max_end = 0usize;
for i in 0..len {
let offset = offsets_slice[i].to_usize().unwrap_or(usize::MAX);
let size = sizes_slice[i].to_usize().unwrap_or(usize::MAX);
vortex_ensure!(
offset >= max_end,
"Zero-copy-to-list requires views to be non-overlapping and ordered: \
view[{}] starts at {} but previous views extend to {}",
i,
offset,
max_end
);
let end = offset.saturating_add(size);
max_end = max_end.max(end);
}
Ok(())
}
let offsets_dtype = offsets_primitive.dtype();
let sizes_dtype = sizes_primitive.dtype();
let len = offsets_primitive.len();
match_each_integer_ptype!(offsets_dtype.as_ptype(), |O| {
match_each_integer_ptype!(sizes_dtype.as_ptype(), |S| {
let offsets_slice = offsets_primitive.as_slice::<O>();
let sizes_slice = sizes_primitive.as_slice::<S>();
validate_monotonic_ends(offsets_slice, sizes_slice, len)?;
})
});
let mut element_references = vec![0u8; elements.len()];
fn count_references<O: IntegerPType, S: IntegerPType>(
element_references: &mut [u8],
offsets_primitive: PrimitiveArray,
sizes_primitive: PrimitiveArray,
) {
let offsets_slice = offsets_primitive.as_slice::<O>();
let sizes_slice = sizes_primitive.as_slice::<S>();
for i in 0..offsets_slice.len() {
let offset: usize = offsets_slice[i].as_();
let size: usize = sizes_slice[i].as_();
for j in offset..offset + size {
element_references[j] = element_references[j].saturating_add(1);
}
}
}
match_each_integer_ptype!(offsets_primitive.ptype(), |O| {
match_each_integer_ptype!(sizes_primitive.ptype(), |S| {
count_references::<O, S>(&mut element_references, offsets_primitive, sizes_primitive);
})
});
let leftmost_used = element_references
.iter()
.position(|&references| references != 0);
let rightmost_used = element_references
.iter()
.rposition(|&references| references != 0);
if let (Some(first_ref), Some(last_ref)) = (leftmost_used, rightmost_used) {
vortex_ensure!(
element_references[first_ref..=last_ref]
.iter()
.all(|&references| references != 0),
"found gap in elements array between first and last referenced elements"
);
}
vortex_ensure!(element_references.iter().all(|&references| references <= 1));
Ok(())
}