use std::fmt::Display;
use std::fmt::Formatter;
use std::sync::Arc;
use num_traits::AsPrimitive;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
use vortex_error::vortex_ensure;
use vortex_error::vortex_err;
use crate::ArrayRef;
use crate::ToCanonical;
use crate::array::Array;
use crate::array::ArrayParts;
use crate::array::TypedArrayRef;
use crate::array::child_to_validity;
use crate::array::validity_to_child;
use crate::arrays::ListView;
use crate::arrays::Primitive;
use crate::arrays::PrimitiveArray;
use crate::arrays::bool;
use crate::dtype::DType;
use crate::dtype::IntegerPType;
use crate::match_each_integer_ptype;
use crate::validity::Validity;
pub(super) const ELEMENTS_SLOT: usize = 0;
pub(super) const OFFSETS_SLOT: usize = 1;
pub(super) const SIZES_SLOT: usize = 2;
pub(super) const VALIDITY_SLOT: usize = 3;
pub(super) const NUM_SLOTS: usize = 4;
pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["elements", "offsets", "sizes", "validity"];
#[derive(Clone, Debug)]
pub struct ListViewData {
is_zero_copy_to_list: bool,
}
impl Display for ListViewData {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "is_zero_copy_to_list: {}", self.is_zero_copy_to_list)
}
}
pub struct ListViewDataParts {
pub elements_dtype: Arc<DType>,
pub elements: ArrayRef,
pub offsets: ArrayRef,
pub sizes: ArrayRef,
pub validity: Validity,
}
impl ListViewData {
pub(crate) fn make_slots(
elements: &ArrayRef,
offsets: &ArrayRef,
sizes: &ArrayRef,
validity: &Validity,
len: usize,
) -> Vec<Option<ArrayRef>> {
vec![
Some(elements.clone()),
Some(offsets.clone()),
Some(sizes.clone()),
validity_to_child(validity, len),
]
}
pub fn new() -> Self {
Self {
is_zero_copy_to_list: false,
}
}
pub fn try_new() -> VortexResult<Self> {
Ok(Self::new())
}
pub unsafe fn new_unchecked() -> Self {
Self::new()
}
pub fn validate(
elements: &ArrayRef,
offsets: &ArrayRef,
sizes: &ArrayRef,
validity: &Validity,
) -> VortexResult<()> {
vortex_ensure!(
offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
"offsets must be non-nullable integer array, got {}",
offsets.dtype()
);
vortex_ensure!(
sizes.dtype().is_int() && !sizes.dtype().is_nullable(),
"sizes must be non-nullable integer array, got {}",
sizes.dtype()
);
vortex_ensure!(
offsets.len() == sizes.len(),
"offsets and sizes must have the same length, got {} and {}",
offsets.len(),
sizes.len()
);
let size_ptype = sizes.dtype().as_ptype();
let offset_ptype = offsets.dtype().as_ptype();
if let Some(validity_len) = validity.maybe_len() {
vortex_ensure!(
validity_len == offsets.len(),
"validity with size {validity_len} does not match array size {}",
offsets.len()
);
}
if offsets.is_host() && sizes.is_host() {
let offsets_primitive = offsets.to_primitive();
let sizes_primitive = sizes.to_primitive();
match_each_integer_ptype!(offset_ptype, |O| {
match_each_integer_ptype!(size_ptype, |S| {
let offsets_slice = offsets_primitive.as_slice::<O>();
let sizes_slice = sizes_primitive.as_slice::<S>();
validate_offsets_and_sizes::<O, S>(
offsets_slice,
sizes_slice,
elements.len() as u64,
)?;
})
});
}
Ok(())
}
pub unsafe fn with_zero_copy_to_list(mut self, is_zctl: bool) -> Self {
self.is_zero_copy_to_list = is_zctl;
self
}
pub fn is_zero_copy_to_list(&self) -> bool {
self.is_zero_copy_to_list
}
}
impl Default for ListViewData {
fn default() -> Self {
Self::new()
}
}
pub trait ListViewArrayExt: TypedArrayRef<ListView> {
fn nullability(&self) -> crate::dtype::Nullability {
match self.as_ref().dtype() {
DType::List(_, nullability) => *nullability,
_ => unreachable!("ListViewArrayExt requires a list dtype"),
}
}
fn elements(&self) -> &ArrayRef {
self.as_ref().slots()[ELEMENTS_SLOT]
.as_ref()
.vortex_expect("ListViewArray elements slot")
}
fn offsets(&self) -> &ArrayRef {
self.as_ref().slots()[OFFSETS_SLOT]
.as_ref()
.vortex_expect("ListViewArray offsets slot")
}
fn sizes(&self) -> &ArrayRef {
self.as_ref().slots()[SIZES_SLOT]
.as_ref()
.vortex_expect("ListViewArray sizes slot")
}
fn listview_validity(&self) -> Validity {
child_to_validity(&self.as_ref().slots()[VALIDITY_SLOT], self.nullability())
}
fn listview_validity_mask(&self) -> vortex_mask::Mask {
self.listview_validity().to_mask(self.as_ref().len())
}
fn offset_at(&self, index: usize) -> usize {
assert!(
index < self.as_ref().len(),
"Index {index} out of bounds 0..{}",
self.as_ref().len()
);
self.offsets()
.as_opt::<Primitive>()
.map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
.unwrap_or_else(|| {
self.offsets()
.scalar_at(index)
.vortex_expect("offsets must support scalar_at")
.as_primitive()
.as_::<usize>()
.vortex_expect("offset must fit in usize")
})
}
fn size_at(&self, index: usize) -> usize {
assert!(
index < self.as_ref().len(),
"Index {} out of bounds 0..{}",
index,
self.as_ref().len()
);
self.sizes()
.as_opt::<Primitive>()
.map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
.unwrap_or_else(|| {
self.sizes()
.scalar_at(index)
.vortex_expect("sizes must support scalar_at")
.as_primitive()
.as_::<usize>()
.vortex_expect("size must fit in usize")
})
}
fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
let offset = self.offset_at(index);
let size = self.size_at(index);
self.elements().slice(offset..offset + size)
}
fn verify_is_zero_copy_to_list(&self) -> bool {
validate_zctl(
self.elements(),
self.offsets().to_primitive(),
self.sizes().to_primitive(),
)
.is_ok()
}
}
impl<T: TypedArrayRef<ListView>> ListViewArrayExt for T {}
impl Array<ListView> {
pub fn new(elements: ArrayRef, offsets: ArrayRef, sizes: ArrayRef, validity: Validity) -> Self {
let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
let len = offsets.len();
let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
ListViewData::validate(&elements, &offsets, &sizes, &validity)
.vortex_expect("`ListViewArray` construction failed");
let data = ListViewData::new();
unsafe {
Array::from_parts_unchecked(
ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
)
}
}
pub fn try_new(
elements: ArrayRef,
offsets: ArrayRef,
sizes: ArrayRef,
validity: Validity,
) -> VortexResult<Self> {
let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
let len = offsets.len();
let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
ListViewData::validate(&elements, &offsets, &sizes, &validity)?;
let data = ListViewData::try_new()?;
Ok(unsafe {
Array::from_parts_unchecked(
ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
)
})
}
pub unsafe fn new_unchecked(
elements: ArrayRef,
offsets: ArrayRef,
sizes: ArrayRef,
validity: Validity,
) -> Self {
let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
let len = offsets.len();
let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
let data = unsafe { ListViewData::new_unchecked() };
unsafe {
Array::from_parts_unchecked(
ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
)
}
}
pub unsafe fn with_zero_copy_to_list(self, is_zctl: bool) -> Self {
if cfg!(debug_assertions) && is_zctl {
validate_zctl(
self.elements(),
self.offsets().to_primitive(),
self.sizes().to_primitive(),
)
.vortex_expect("Failed to validate zero-copy to list flag");
}
let dtype = self.dtype().clone();
let len = self.len();
let slots = self.slots().to_vec();
let data = unsafe { self.into_data().with_zero_copy_to_list(is_zctl) };
unsafe {
Array::from_parts_unchecked(
ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
)
}
}
pub fn into_data_parts(self) -> ListViewDataParts {
let elements = self.slots()[ELEMENTS_SLOT]
.clone()
.vortex_expect("ListViewArray elements slot");
let offsets = self.slots()[OFFSETS_SLOT]
.clone()
.vortex_expect("ListViewArray offsets slot");
let sizes = self.slots()[SIZES_SLOT]
.clone()
.vortex_expect("ListViewArray sizes slot");
let validity = self.listview_validity();
ListViewDataParts {
elements_dtype: Arc::new(elements.dtype().clone()),
elements,
offsets,
sizes,
validity,
}
}
}
fn validate_offsets_and_sizes<O, S>(
offsets_slice: &[O],
sizes_slice: &[S],
elements_len: u64,
) -> VortexResult<()>
where
O: IntegerPType,
S: IntegerPType,
{
debug_assert_eq!(offsets_slice.len(), sizes_slice.len());
#[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
for i in 0..offsets_slice.len() {
let offset = offsets_slice[i];
let size = sizes_slice[i];
vortex_ensure!(offset >= O::zero(), "cannot have negative offsets");
vortex_ensure!(size >= S::zero(), "cannot have negative size");
let offset_u64 = offset
.to_u64()
.ok_or_else(|| vortex_err!("offset[{i}] = {offset:?} cannot be converted to u64"))?;
let size_u64 = size
.to_u64()
.ok_or_else(|| vortex_err!("size[{i}] = {size:?} cannot be converted to u64"))?;
let end = offset_u64.checked_add(size_u64).ok_or_else(|| {
vortex_err!("offset[{i}] ({offset_u64}) + size[{i}] ({size_u64}) would overflow u64")
})?;
if offset_u64 == elements_len {
vortex_ensure!(
size_u64 == 0,
"views to the end of the elements array (length {elements_len}) must have size 0 \
(had size {size_u64})"
);
}
vortex_ensure!(
end <= elements_len,
"offset[{i}] + size[{i}] = {offset_u64} + {size_u64} = {end} \
exceeds elements length {elements_len}",
);
}
Ok(())
}
fn validate_zctl(
elements: &ArrayRef,
offsets_primitive: PrimitiveArray,
sizes_primitive: PrimitiveArray,
) -> VortexResult<()> {
if let Some(is_sorted) = offsets_primitive.statistics().compute_is_sorted() {
vortex_ensure!(is_sorted, "offsets must be sorted");
} else {
vortex_bail!("offsets must report is_sorted statistic");
}
fn validate_monotonic_ends<O: IntegerPType, S: IntegerPType>(
offsets_slice: &[O],
sizes_slice: &[S],
len: usize,
) -> VortexResult<()> {
let mut max_end = 0usize;
for i in 0..len {
let offset = offsets_slice[i].to_usize().unwrap_or(usize::MAX);
let size = sizes_slice[i].to_usize().unwrap_or(usize::MAX);
vortex_ensure!(
offset >= max_end,
"Zero-copy-to-list requires views to be non-overlapping and ordered: \
view[{}] starts at {} but previous views extend to {}",
i,
offset,
max_end
);
let end = offset.saturating_add(size);
max_end = max_end.max(end);
}
Ok(())
}
let offsets_dtype = offsets_primitive.dtype();
let sizes_dtype = sizes_primitive.dtype();
let len = offsets_primitive.len();
match_each_integer_ptype!(offsets_dtype.as_ptype(), |O| {
match_each_integer_ptype!(sizes_dtype.as_ptype(), |S| {
let offsets_slice = offsets_primitive.as_slice::<O>();
let sizes_slice = sizes_primitive.as_slice::<S>();
validate_monotonic_ends(offsets_slice, sizes_slice, len)?;
})
});
let mut element_references = vec![0u8; elements.len()];
fn count_references<O: IntegerPType, S: IntegerPType>(
element_references: &mut [u8],
offsets_primitive: PrimitiveArray,
sizes_primitive: PrimitiveArray,
) {
let offsets_slice = offsets_primitive.as_slice::<O>();
let sizes_slice = sizes_primitive.as_slice::<S>();
for i in 0..offsets_slice.len() {
let offset: usize = offsets_slice[i].as_();
let size: usize = sizes_slice[i].as_();
for j in offset..offset + size {
element_references[j] = element_references[j].saturating_add(1);
}
}
}
match_each_integer_ptype!(offsets_primitive.ptype(), |O| {
match_each_integer_ptype!(sizes_primitive.ptype(), |S| {
count_references::<O, S>(&mut element_references, offsets_primitive, sizes_primitive);
})
});
let leftmost_used = element_references
.iter()
.position(|&references| references != 0);
let rightmost_used = element_references
.iter()
.rposition(|&references| references != 0);
if let (Some(first_ref), Some(last_ref)) = (leftmost_used, rightmost_used) {
vortex_ensure!(
element_references[first_ref..=last_ref]
.iter()
.all(|&references| references != 0),
"found gap in elements array between first and last referenced elements"
);
}
vortex_ensure!(element_references.iter().all(|&references| references <= 1));
Ok(())
}