use num_traits::FromPrimitive;
use vortex_buffer::BufferMut;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use crate::IntoArray;
use crate::LEGACY_SESSION;
#[expect(deprecated)]
use crate::ToCanonical as _;
use crate::VortexSessionExecute;
use crate::arrays::ConstantArray;
use crate::arrays::ListViewArray;
use crate::arrays::PrimitiveArray;
use crate::arrays::listview::ListViewArrayExt;
use crate::arrays::primitive::PrimitiveArrayExt;
use crate::builders::builder_with_capacity;
use crate::builtins::ArrayBuiltins;
use crate::dtype::IntegerPType;
use crate::dtype::Nullability;
use crate::dtype::PType;
use crate::match_each_integer_ptype;
use crate::match_each_unsigned_integer_ptype;
use crate::scalar::Scalar;
use crate::scalar_fn::fns::operators::Operator;
use crate::validity::Validity;
fn rebuilt_offset_ptype(offsets_ptype: PType) -> PType {
match offsets_ptype {
PType::U8 | PType::U16 | PType::U32 => PType::U32,
PType::U64 => PType::U64,
PType::I8 | PType::I16 | PType::I32 => PType::I32,
PType::I64 => PType::I64,
_ => unreachable!("invalid offsets PType"),
}
}
pub const DEFAULT_REBUILD_DENSITY_THRESHOLD: f32 = 0.1;
pub const DEFAULT_TRIM_ELEMENTS_THRESHOLD: f32 = 0.05;
pub enum ListViewRebuildMode {
MakeZeroCopyToList,
TrimElements,
MakeExact,
OverlapCompression,
}
impl ListViewArray {
pub fn rebuild(&self, mode: ListViewRebuildMode) -> VortexResult<ListViewArray> {
if self.is_empty() {
return Ok(unsafe { self.clone().with_zero_copy_to_list(true) });
}
match mode {
ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
ListViewRebuildMode::MakeExact => self.rebuild_make_exact(),
ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
}
}
fn rebuild_zero_copy_to_list(&self) -> VortexResult<ListViewArray> {
if self.is_zero_copy_to_list() {
return Ok(self.clone());
}
let offsets_ptype = self.offsets().dtype().as_ptype();
let sizes_ptype = self.sizes().dtype().as_ptype();
match_each_unsigned_integer_ptype!(sizes_ptype.to_unsigned(), |S| {
match offsets_ptype.to_unsigned() {
PType::U8 => self.naive_rebuild::<u8, u32, S>(),
PType::U16 => self.naive_rebuild::<u16, u32, S>(),
PType::U32 => self.naive_rebuild::<u32, u32, S>(),
PType::U64 => self.naive_rebuild::<u64, u64, S>(),
_ => unreachable!("invalid offsets PType"),
}
})
}
fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
&self,
) -> VortexResult<ListViewArray> {
#[expect(deprecated)]
let sizes_canonical = self.sizes().to_primitive();
let sizes_canonical =
sizes_canonical.reinterpret_cast(sizes_canonical.ptype().to_unsigned());
let total: u64 = sizes_canonical
.as_slice::<S>()
.iter()
.map(|s| (*s).as_() as u64)
.sum();
if Self::should_use_take(total, self.len()) {
self.rebuild_with_take::<O, NewOffset, S>()
} else {
self.rebuild_list_by_list::<O, NewOffset, S>()
}
}
fn should_use_take(total_output_elements: u64, num_lists: usize) -> bool {
if num_lists == 0 {
return true;
}
let avg = total_output_elements / num_lists as u64;
avg < 128
}
fn rebuild_with_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
&self,
) -> VortexResult<ListViewArray> {
let new_offset_ptype = rebuilt_offset_ptype(self.offsets().dtype().as_ptype());
let size_ptype = self.sizes().dtype().as_ptype();
#[expect(deprecated)]
let offsets_canonical = self.offsets().to_primitive();
let offsets_canonical =
offsets_canonical.reinterpret_cast(offsets_canonical.ptype().to_unsigned());
let offsets_slice = offsets_canonical.as_slice::<O>();
#[expect(deprecated)]
let sizes_canonical = self.sizes().to_primitive();
let sizes_canonical =
sizes_canonical.reinterpret_cast(sizes_canonical.ptype().to_unsigned());
let sizes_slice = sizes_canonical.as_slice::<S>();
let len = offsets_slice.len();
let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
let mut new_sizes = BufferMut::<S>::with_capacity(len);
let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
let mut n_elements = NewOffset::zero();
for index in 0..len {
if !self.validity()?.is_valid(index)? {
new_offsets.push(n_elements);
new_sizes.push(S::zero());
continue;
}
let offset = offsets_slice[index];
let size = sizes_slice[index];
let start = offset.as_();
let stop = start + size.as_();
new_offsets.push(n_elements);
new_sizes.push(size);
take_indices.extend(start as u64..stop as u64);
n_elements += num_traits::cast(size).vortex_expect("Cast failed");
}
let elements = self.elements().take(take_indices.into_array())?;
let offsets = PrimitiveArray::new(new_offsets.freeze(), Validity::NonNullable)
.reinterpret_cast(new_offset_ptype)
.into_array();
let sizes = PrimitiveArray::new(new_sizes.freeze(), Validity::NonNullable)
.reinterpret_cast(size_ptype)
.into_array();
Ok(unsafe {
ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
.with_zero_copy_to_list(true)
})
}
fn rebuild_list_by_list<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
&self,
) -> VortexResult<ListViewArray> {
let element_dtype = self
.dtype()
.as_list_element_opt()
.vortex_expect("somehow had a canonical list that was not a list");
let new_offset_ptype = rebuilt_offset_ptype(self.offsets().dtype().as_ptype());
let size_ptype = self.sizes().dtype().as_ptype();
#[expect(deprecated)]
let offsets_canonical = self.offsets().to_primitive();
let offsets_canonical =
offsets_canonical.reinterpret_cast(offsets_canonical.ptype().to_unsigned());
let offsets_slice = offsets_canonical.as_slice::<O>();
#[expect(deprecated)]
let sizes_canonical = self.sizes().to_primitive();
let sizes_canonical =
sizes_canonical.reinterpret_cast(sizes_canonical.ptype().to_unsigned());
let sizes_slice = sizes_canonical.as_slice::<S>();
let len = offsets_slice.len();
let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
let mut new_sizes = BufferMut::<S>::with_capacity(len);
#[expect(deprecated)]
let elements_canonical = self
.elements()
.to_canonical()
.vortex_expect("canonicalize elements for rebuild")
.into_array();
let mut new_elements_builder =
builder_with_capacity(element_dtype.as_ref(), self.elements().len());
let mut n_elements = NewOffset::zero();
for index in 0..len {
if !self.validity()?.is_valid(index)? {
new_offsets.push(n_elements);
new_sizes.push(S::zero());
continue;
}
let offset = offsets_slice[index];
let size = sizes_slice[index];
let start = offset.as_();
let stop = start + size.as_();
new_offsets.push(n_elements);
new_sizes.push(size);
new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
n_elements += num_traits::cast(size).vortex_expect("Cast failed");
}
let offsets = PrimitiveArray::new(new_offsets.freeze(), Validity::NonNullable)
.reinterpret_cast(new_offset_ptype)
.into_array();
let sizes = PrimitiveArray::new(new_sizes.freeze(), Validity::NonNullable)
.reinterpret_cast(size_ptype)
.into_array();
let elements = new_elements_builder.finish();
debug_assert_eq!(
n_elements.as_(),
elements.len(),
"The accumulated elements somehow had the wrong length"
);
Ok(unsafe {
ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
.with_zero_copy_to_list(true)
})
}
fn rebuild_trim_elements(&self) -> VortexResult<ListViewArray> {
let mut ctx = LEGACY_SESSION.create_execution_ctx();
let (start, end) = self.referenced_element_bounds(&mut ctx)?;
unsafe { self.trim_elements(start, end) }
}
pub unsafe fn trim_elements(&self, start: usize, end: usize) -> VortexResult<ListViewArray> {
let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
let offset = <O as FromPrimitive>::from_usize(start)
.vortex_expect("unable to convert the min offset `start` into a `usize`");
let scalar = Scalar::primitive(offset, Nullability::NonNullable);
self.offsets()
.clone()
.binary(
ConstantArray::new(scalar, self.offsets().len()).into_array(),
Operator::Sub,
)
.vortex_expect("was somehow unable to adjust offsets down by their minimum")
});
let sliced_elements = self.elements().slice(start..end)?;
Ok(unsafe {
ListViewArray::new_unchecked(
sliced_elements,
adjusted_offsets,
self.sizes().clone(),
self.validity()?,
)
.with_zero_copy_to_list(self.is_zero_copy_to_list())
})
}
fn rebuild_make_exact(&self) -> VortexResult<ListViewArray> {
if self.is_zero_copy_to_list() {
self.rebuild_trim_elements()
} else {
self.rebuild_zero_copy_to_list()
}
}
}
#[cfg(test)]
mod tests {
use vortex_buffer::BitBuffer;
use vortex_error::VortexResult;
use super::ListViewRebuildMode;
use crate::IntoArray;
use crate::LEGACY_SESSION;
#[expect(deprecated)]
use crate::ToCanonical as _;
use crate::VortexSessionExecute;
use crate::arrays::ListViewArray;
use crate::arrays::PrimitiveArray;
use crate::arrays::listview::ListViewArrayExt;
use crate::assert_arrays_eq;
use crate::dtype::Nullability;
use crate::validity::Validity;
#[test]
fn test_rebuild_flatten_removes_overlaps() -> VortexResult<()> {
let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
assert_eq!(flattened.elements().len(), 5);
assert_eq!(flattened.offset_at(0), 0);
assert_eq!(flattened.size_at(0), 3);
assert_eq!(flattened.offset_at(1), 3);
assert_eq!(flattened.size_at(1), 2);
assert_arrays_eq!(
flattened.list_elements_at(0)?,
PrimitiveArray::from_iter([1i32, 2, 3])
);
assert_arrays_eq!(
flattened.list_elements_at(1)?,
PrimitiveArray::from_iter([2i32, 3])
);
Ok(())
}
#[test]
fn test_rebuild_flatten_with_nullable() -> VortexResult<()> {
use crate::arrays::BoolArray;
let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
let validity = Validity::Array(
BoolArray::new(
BitBuffer::from(vec![true, false, true]),
Validity::NonNullable,
)
.into_array(),
);
let listview = ListViewArray::new(elements, offsets, sizes, validity);
let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
assert!(flattened.validity()?.is_valid(0)?);
assert!(!flattened.validity()?.is_valid(1)?);
assert!(flattened.validity()?.is_valid(2)?);
assert_arrays_eq!(
flattened.list_elements_at(0)?,
PrimitiveArray::from_iter([1i32, 2])
);
assert_arrays_eq!(
flattened.list_elements_at(2)?,
PrimitiveArray::from_iter([3i32])
);
Ok(())
}
#[test]
fn test_rebuild_trim_elements_basic() -> VortexResult<()> {
let elements =
PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
assert_eq!(trimmed.elements().len(), 5);
assert_eq!(trimmed.offset_at(0), 0);
assert_eq!(trimmed.size_at(0), 2);
assert_eq!(trimmed.offset_at(1), 3);
assert_eq!(trimmed.size_at(1), 2);
assert_arrays_eq!(
trimmed.list_elements_at(0)?,
PrimitiveArray::from_iter([1i32, 2])
);
assert_arrays_eq!(
trimmed.list_elements_at(1)?,
PrimitiveArray::from_iter([3i32, 4])
);
#[expect(deprecated)]
let all_elements = trimmed.elements().to_primitive();
assert_eq!(
all_elements.execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())?,
97i32.into()
);
Ok(())
}
#[test]
fn test_rebuild_with_trailing_nulls_regression() -> VortexResult<()> {
let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
let validity = Validity::from_iter(vec![true, true, false, false]);
let listview = ListViewArray::new(elements, offsets, sizes, validity);
let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
assert!(rebuilt.is_zero_copy_to_list());
assert_eq!(rebuilt.offset_at(0), 0);
assert_eq!(rebuilt.offset_at(1), 2);
assert_eq!(rebuilt.offset_at(2), 4); assert_eq!(rebuilt.offset_at(3), 4);
assert_eq!(rebuilt.size_at(0), 2);
assert_eq!(rebuilt.size_at(1), 2);
assert_eq!(rebuilt.size_at(2), 0); assert_eq!(rebuilt.size_at(3), 0);
let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact)?;
assert!(exact.is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())?);
assert!(exact.is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())?);
assert!(!exact.is_valid(2, &mut LEGACY_SESSION.create_execution_ctx())?);
assert!(!exact.is_valid(3, &mut LEGACY_SESSION.create_execution_ctx())?);
assert_arrays_eq!(
exact.list_elements_at(0)?,
PrimitiveArray::from_iter([1i32, 2])
);
assert_arrays_eq!(
exact.list_elements_at(1)?,
PrimitiveArray::from_iter([3i32, 4])
);
Ok(())
}
#[test]
fn test_rebuild_trim_elements_offsets_wider_than_sizes() -> VortexResult<()> {
let mut elems = vec![0i32; 70_005];
elems[70_000] = 10;
elems[70_001] = 20;
elems[70_002] = 30;
elems[70_003] = 40;
let elements = PrimitiveArray::from_iter(elems).into_array();
let offsets = PrimitiveArray::from_iter(vec![70_000u32, 70_002]).into_array();
let sizes = PrimitiveArray::from_iter(vec![2u16, 2]).into_array();
let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
assert_arrays_eq!(
trimmed.list_elements_at(1)?,
PrimitiveArray::from_iter([30i32, 40])
);
Ok(())
}
#[test]
fn test_rebuild_trim_elements_sizes_wider_than_offsets() -> VortexResult<()> {
let mut elems = vec![0i32; 70_001];
elems[3] = 30;
elems[4] = 40;
let elements = PrimitiveArray::from_iter(elems).into_array();
let offsets = PrimitiveArray::from_iter(vec![1u16, 3]).into_array();
let sizes = PrimitiveArray::from_iter(vec![70_000u32, 2]).into_array();
let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
assert_arrays_eq!(
trimmed.list_elements_at(1)?,
PrimitiveArray::from_iter([30i32, 40])
);
Ok(())
}
#[test]
fn test_rebuild_preserves_signed_offset_and_size_types() -> VortexResult<()> {
use crate::dtype::PType;
let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
let offsets = PrimitiveArray::from_iter(vec![0i32, 1]).into_array();
let sizes = PrimitiveArray::from_iter(vec![3i16, 2]).into_array();
let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
assert_arrays_eq!(
rebuilt.list_elements_at(0)?,
PrimitiveArray::from_iter([1i32, 2, 3])
);
assert_arrays_eq!(
rebuilt.list_elements_at(1)?,
PrimitiveArray::from_iter([2i32, 3])
);
assert_eq!(rebuilt.offsets().dtype().as_ptype(), PType::I32);
assert_eq!(rebuilt.sizes().dtype().as_ptype(), PType::I16);
Ok(())
}
#[test]
fn heuristic_zero_lists_uses_take() {
assert!(ListViewArray::should_use_take(0, 0));
}
#[test]
fn heuristic_small_lists_use_take() {
assert!(ListViewArray::should_use_take(127_000, 1_000));
assert!(!ListViewArray::should_use_take(128_000, 1_000));
}
#[test]
fn test_rebuild_trim_elements_sum_overflows_type() -> VortexResult<()> {
let elements = PrimitiveArray::from_iter(vec![0i32; 261]).into_array();
let offsets = PrimitiveArray::from_iter(vec![215u8, 0]).into_array();
let sizes = PrimitiveArray::from_iter(vec![46u8, 10]).into_array();
let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
assert_arrays_eq!(trimmed, listview);
Ok(())
}
}