use vortex_dtype::{IntegerPType, Nullability, match_each_integer_ptype};
use vortex_error::VortexExpect;
use crate::arrays::ListViewArray;
use crate::builders::{ArrayBuilder, ListViewBuilder, PrimitiveBuilder, builder_with_capacity};
use crate::vtable::ValidityHelper;
use crate::{Array, IntoArray, ToCanonical};
pub enum ListViewRebuildMode {
MakeZeroCopyToList,
RemoveGaps,
OverlapCompression,
}
impl ListViewArray {
pub fn rebuild(&self, mode: ListViewRebuildMode) -> ListViewArray {
match mode {
ListViewRebuildMode::RemoveGaps => self.rebuild_remove_gaps(),
ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
}
}
fn rebuild_zero_copy_to_list(&self) -> ListViewArray {
let element_dtype = self
.dtype()
.as_list_element_opt()
.vortex_expect("somehow had a canonical list that was not a list");
let offsets_ptype = self.offsets().dtype().as_ptype();
let sizes_ptype = self.sizes().dtype().as_ptype();
match_each_integer_ptype!(offsets_ptype, |O| {
match_each_integer_ptype!(sizes_ptype, |S| {
let mut builder = ListViewBuilder::<O, S>::with_capacity(
element_dtype.clone(),
self.dtype().nullability(),
self.elements().len(),
self.len(),
);
builder.extend_from_array(self.as_ref());
builder.finish_into_listview()
})
})
}
fn rebuild_remove_gaps(&self) -> ListViewArray {
if self.is_empty() {
return self.clone();
}
let offset_ptype = self.offsets().dtype().as_ptype();
let size_ptype = self.sizes().dtype().as_ptype();
match_each_integer_ptype!(offset_ptype, |O| {
match_each_integer_ptype!(size_ptype, |S| { self.rebuild_remove_gaps_inner::<O, S>() })
})
}
fn rebuild_remove_gaps_inner<O, S>(&self) -> ListViewArray
where
O: IntegerPType,
S: IntegerPType,
{
let offsets_primitive = self.offsets().to_primitive();
let sizes_primitive = self.sizes().to_primitive();
let offsets_slice = offsets_primitive.as_slice::<O>();
let sizes_slice = sizes_primitive.as_slice::<S>();
let mut referenced_elements = vec![false; self.elements().len()];
for i in 0..self.len() {
let offset: usize = offsets_slice[i].as_();
let size: usize = sizes_slice[i].as_();
for j in offset..offset + size {
referenced_elements[j] = true;
}
}
if referenced_elements
.iter()
.all(|&is_referenced| is_referenced)
{
return self.clone();
}
let mut prefix_sum = vec![0; self.elements().len()];
let mut cumulative_sum = 0;
for i in 0..referenced_elements.len() {
prefix_sum[i] = cumulative_sum;
if referenced_elements[i] {
cumulative_sum += 1;
}
}
let mut elements_builder = builder_with_capacity(self.elements().dtype(), cumulative_sum);
for (i, &is_referenced) in referenced_elements.iter().enumerate() {
if is_referenced {
elements_builder
.append_scalar(&self.elements().scalar_at(i))
.vortex_expect("append scalar");
}
}
let mut new_offsets_builder =
PrimitiveBuilder::<O>::with_capacity(Nullability::NonNullable, self.len());
for &old_offset in offsets_slice {
let new_offset = prefix_sum[old_offset.as_()];
let offset_value =
O::from_usize(new_offset).vortex_expect("offset must fit in offset type");
new_offsets_builder.append_value(offset_value);
}
unsafe {
ListViewArray::new_unchecked(
elements_builder.finish(),
new_offsets_builder.finish().into_array(),
self.sizes().clone(),
self.validity().clone(),
)
}
}
}
#[cfg(test)]
mod tests {
use vortex_dtype::Nullability;
use crate::arrays::{ListViewArray, PrimitiveArray};
use crate::validity::Validity;
use crate::vtable::ValidityHelper;
use crate::{IntoArray, ToCanonical};
#[test]
fn test_rebuild_remove_gaps_with_leading_garbage() {
let elements = PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 3]).into_array();
let offsets = PrimitiveArray::from_iter(vec![2u32, 3]).into_array();
let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
let listview =
ListViewArray::try_new(elements, offsets, sizes, Validity::NonNullable).unwrap();
let compacted = listview.rebuild_remove_gaps();
assert_eq!(compacted.elements().len(), 3);
assert_eq!(compacted.offset_at(0), 0);
assert_eq!(compacted.size_at(0), 2);
assert_eq!(compacted.offset_at(1), 1);
assert_eq!(compacted.size_at(1), 2);
let list0 = compacted.list_elements_at(0).to_primitive();
assert_eq!(list0.as_slice::<i32>(), &[1, 2]);
let list1 = compacted.list_elements_at(1).to_primitive();
assert_eq!(list1.as_slice::<i32>(), &[2, 3]);
}
#[test]
fn test_rebuild_remove_gaps_with_trailing_garbage() {
let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 99, 98]).into_array();
let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
let listview =
ListViewArray::try_new(elements, offsets, sizes, Validity::NonNullable).unwrap();
let compacted = listview.rebuild_remove_gaps();
assert_eq!(compacted.elements().len(), 3);
assert_eq!(compacted.offset_at(0), 0);
assert_eq!(compacted.offset_at(1), 1);
}
#[test]
fn test_rebuild_remove_gaps_no_garbage() {
let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
let offsets = PrimitiveArray::from_iter(vec![0u32, 2]).into_array();
let sizes = PrimitiveArray::from_iter(vec![2u32, 1]).into_array();
let listview =
ListViewArray::try_new(elements, offsets, sizes, Validity::NonNullable).unwrap();
let compacted = listview.rebuild_remove_gaps();
assert_eq!(compacted.elements().len(), 3);
assert_eq!(compacted.offset_at(0), 0);
assert_eq!(compacted.offset_at(1), 2);
}
#[test]
fn test_rebuild_remove_gaps_preserves_overlaps() {
let elements = PrimitiveArray::from_iter(vec![99i32, 1, 2, 3, 98]).into_array();
let offsets = PrimitiveArray::from_iter(vec![1u32, 2]).into_array();
let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
let listview =
ListViewArray::try_new(elements, offsets, sizes, Validity::NonNullable).unwrap();
let compacted = listview.rebuild_remove_gaps();
assert_eq!(compacted.elements().len(), 3);
assert_eq!(compacted.offset_at(0), 0);
assert_eq!(compacted.size_at(0), 3);
assert_eq!(compacted.offset_at(1), 1);
assert_eq!(compacted.size_at(1), 2);
let list0 = compacted.list_elements_at(0).to_primitive();
assert_eq!(list0.as_slice::<i32>(), &[1, 2, 3]);
let list1 = compacted.list_elements_at(1).to_primitive();
assert_eq!(list1.as_slice::<i32>(), &[2, 3]);
}
#[test]
fn test_rebuild_remove_gaps_multiple_gaps() {
let elements =
PrimitiveArray::from_iter(vec![99i32, 1, 2, 98, 3, 4, 97, 5, 6, 96]).into_array();
let offsets = PrimitiveArray::from_iter(vec![1u32, 4, 7]).into_array();
let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 2]).into_array();
let listview =
ListViewArray::try_new(elements, offsets, sizes, Validity::NonNullable).unwrap();
let compacted = listview.rebuild_remove_gaps();
assert_eq!(compacted.elements().len(), 6);
assert_eq!(compacted.offset_at(0), 0);
assert_eq!(compacted.size_at(0), 2);
assert_eq!(compacted.offset_at(1), 2);
assert_eq!(compacted.size_at(1), 2);
assert_eq!(compacted.offset_at(2), 4);
assert_eq!(compacted.size_at(2), 2);
let list0 = compacted.list_elements_at(0).to_primitive();
assert_eq!(list0.as_slice::<i32>(), &[1, 2]);
let list1 = compacted.list_elements_at(1).to_primitive();
assert_eq!(list1.as_slice::<i32>(), &[3, 4]);
let list2 = compacted.list_elements_at(2).to_primitive();
assert_eq!(list2.as_slice::<i32>(), &[5, 6]);
}
#[test]
fn test_rebuild_flatten_removes_overlaps() {
let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
let listview =
ListViewArray::try_new(elements, offsets, sizes, Validity::NonNullable).unwrap();
let flattened = listview.rebuild_zero_copy_to_list();
assert_eq!(flattened.elements().len(), 5);
assert_eq!(flattened.offset_at(0), 0);
assert_eq!(flattened.size_at(0), 3);
assert_eq!(flattened.offset_at(1), 3);
assert_eq!(flattened.size_at(1), 2);
let list0 = flattened.list_elements_at(0).to_primitive();
assert_eq!(list0.as_slice::<i32>(), &[1, 2, 3]);
let list1 = flattened.list_elements_at(1).to_primitive();
assert_eq!(list1.as_slice::<i32>(), &[2, 3]);
}
#[test]
fn test_rebuild_flatten_with_nullable() {
use arrow_buffer::BooleanBuffer;
use crate::arrays::BoolArray;
let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
let validity = Validity::Array(
BoolArray::from(BooleanBuffer::from(vec![true, false, true])).into_array(),
);
let listview = ListViewArray::try_new(elements, offsets, sizes, validity).unwrap();
let flattened = listview.rebuild_zero_copy_to_list();
assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
assert!(flattened.validity().is_valid(0));
assert!(!flattened.validity().is_valid(1));
assert!(flattened.validity().is_valid(2));
let list0 = flattened.list_elements_at(0).to_primitive();
assert_eq!(list0.as_slice::<i32>(), &[1, 2]);
let list2 = flattened.list_elements_at(2).to_primitive();
assert_eq!(list2.as_slice::<i32>(), &[3]);
}
}