use std::sync::Arc;
use arrow_array::ArrayRef as ArrowArrayRef;
use arrow_array::GenericListArray;
use arrow_array::OffsetSizeTrait;
use arrow_buffer::OffsetBuffer;
use arrow_schema::FieldRef;
use vortex_buffer::BufferMut;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_error::vortex_ensure;
use crate::ArrayRef;
use crate::Canonical;
use crate::ExecutionCtx;
use crate::arrays::Chunked;
use crate::arrays::List;
use crate::arrays::ListArray;
use crate::arrays::ListView;
use crate::arrays::ListViewArray;
use crate::arrays::chunked::ChunkedArrayExt;
use crate::arrays::list::ListArrayExt;
use crate::arrays::listview::ListViewArrayExt;
use crate::arrays::listview::ListViewDataParts;
use crate::arrays::listview::ListViewRebuildMode;
use crate::arrow::ArrowArrayExecutor;
use crate::arrow::executor::validity::to_arrow_null_buffer;
use crate::builtins::ArrayBuiltins;
use crate::dtype::DType;
use crate::dtype::NativePType;
use crate::dtype::Nullability;
pub(super) fn to_arrow_list<O: OffsetSizeTrait + NativePType>(
array: ArrayRef,
elements_field: &FieldRef,
ctx: &mut ExecutionCtx,
) -> VortexResult<ArrowArrayRef> {
if let Some(array) = array.as_opt::<List>() {
return list_to_list::<O>(&array.into_owned(), elements_field, ctx);
}
if let Some(chunked) = array.as_opt::<Chunked>() {
let mut arrow_chunks: Vec<ArrowArrayRef> = Vec::with_capacity(chunked.nchunks());
for chunk in chunked.chunks() {
arrow_chunks.push(to_arrow_list::<O>(chunk.clone(), elements_field, ctx)?);
}
let refs = arrow_chunks.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
return Ok(arrow_select::concat::concat(&refs)?);
}
let array = match array.try_downcast::<ListView>() {
Ok(array) => {
let zctl = if array.is_zero_copy_to_list() {
array
} else {
array.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?
};
return list_view_zctl::<O>(zctl, elements_field, ctx);
}
Err(a) => a,
};
let list_view = array.execute::<ListViewArray>(ctx)?;
let zctl = if list_view.is_zero_copy_to_list() {
list_view
} else {
list_view.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?
};
list_view_zctl::<O>(zctl, elements_field, ctx)
}
fn list_to_list<O: OffsetSizeTrait + NativePType>(
array: &ListArray,
elements_field: &FieldRef,
ctx: &mut ExecutionCtx,
) -> VortexResult<ArrowArrayRef> {
let offsets = array
.offsets()
.cast(DType::Primitive(O::PTYPE, Nullability::NonNullable))?
.execute::<Canonical>(ctx)?
.into_primitive()
.to_buffer::<O>()
.into_arrow_offset_buffer();
let elements = array
.elements()
.clone()
.execute_arrow(Some(elements_field.data_type()), ctx)?;
vortex_ensure!(
elements_field.is_nullable() || elements.null_count() == 0,
"Cannot convert to non-nullable Arrow array with null elements"
);
let null_buffer = to_arrow_null_buffer(array.validity()?, array.len(), ctx)?;
Ok(Arc::new(GenericListArray::<O>::new(
Arc::clone(elements_field),
offsets,
elements,
null_buffer,
)))
}
fn list_view_zctl<O: OffsetSizeTrait + NativePType>(
array: ListViewArray,
elements_field: &FieldRef,
ctx: &mut ExecutionCtx,
) -> VortexResult<ArrowArrayRef> {
assert!(array.is_zero_copy_to_list());
if array.is_empty() {
let elements = array
.elements()
.clone()
.execute_arrow(Some(elements_field.data_type()), ctx)?;
return Ok(Arc::new(GenericListArray::<O>::new(
Arc::clone(elements_field),
OffsetBuffer::new_empty(),
elements,
None,
)));
}
let ListViewDataParts {
elements,
offsets,
sizes,
validity,
..
} = array.into_data_parts();
assert!(!sizes.is_empty());
let final_size = sizes
.scalar_at(sizes.len() - 1)?
.cast(&DType::Primitive(O::PTYPE, Nullability::NonNullable))?;
let final_size = final_size
.as_primitive()
.typed_value::<O>()
.vortex_expect("non null");
let offsets = offsets
.cast(DType::Primitive(O::PTYPE, Nullability::NonNullable))?
.execute::<Canonical>(ctx)?
.into_primitive()
.to_buffer::<O>();
let mut offsets = offsets.try_into_mut().unwrap_or_else(|o| {
let mut new_offsets = BufferMut::<O>::with_capacity(o.len() + 1);
new_offsets.extend_from_slice(&o);
new_offsets
});
offsets.push(if offsets.is_empty() {
final_size
} else {
offsets[offsets.len() - 1] + final_size
});
let elements = elements.execute_arrow(Some(elements_field.data_type()), ctx)?;
vortex_ensure!(
elements_field.is_nullable() || elements.null_count() == 0,
"Cannot convert to non-nullable Arrow array with null elements"
);
let null_buffer = to_arrow_null_buffer(validity, sizes.len(), ctx)?;
Ok(Arc::new(GenericListArray::<O>::new(
Arc::clone(elements_field),
offsets.freeze().into_arrow_offset_buffer(),
elements,
null_buffer,
)))
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use arrow_array::Array;
use arrow_array::GenericListArray;
use arrow_array::Int32Array;
use arrow_schema::DataType;
use arrow_schema::Field;
use vortex_buffer::buffer;
use vortex_error::VortexResult;
use crate::Canonical;
use crate::IntoArray;
use crate::arrays::PrimitiveArray;
use crate::arrow::IntoArrowArray;
use crate::arrow::executor::list::ListViewArray;
use crate::dtype::DType;
use crate::dtype::Nullability::NonNullable;
use crate::validity::Validity;
#[test]
fn test_to_arrow_list_i32() -> VortexResult<()> {
let elements = PrimitiveArray::new(buffer![1i32, 2, 3, 4, 5], Validity::NonNullable);
let offsets = PrimitiveArray::new(buffer![0i32, 3], Validity::NonNullable);
let sizes = PrimitiveArray::new(buffer![3i32, 2], Validity::NonNullable);
let list_array = unsafe {
ListViewArray::new_unchecked(
elements.into_array(),
offsets.into_array(),
sizes.into_array(),
Validity::AllValid,
)
.with_zero_copy_to_list(true)
};
let field = Field::new("item", DataType::Int32, false);
let arrow_dt = DataType::List(field.into());
let arrow_array = list_array.into_array().into_arrow(&arrow_dt)?;
assert_eq!(arrow_array.data_type(), &arrow_dt);
let list = arrow_array
.as_any()
.downcast_ref::<GenericListArray<i32>>()
.unwrap();
assert_eq!(list.len(), 2);
assert!(!list.is_null(0));
assert!(!list.is_null(1));
let first_list = list.value(0);
assert_eq!(first_list.len(), 3);
let first_values = first_list.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(first_values.value(0), 1);
assert_eq!(first_values.value(1), 2);
assert_eq!(first_values.value(2), 3);
let second_list = list.value(1);
assert_eq!(second_list.len(), 2);
let second_values = second_list.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(second_values.value(0), 4);
assert_eq!(second_values.value(1), 5);
Ok(())
}
#[test]
fn test_to_arrow_list_i64() -> VortexResult<()> {
let elements = PrimitiveArray::new(buffer![10i64, 20, 30], Validity::NonNullable);
let offsets = PrimitiveArray::new(buffer![0i64, 2], Validity::NonNullable);
let sizes = PrimitiveArray::new(buffer![2i64, 1], Validity::NonNullable);
let list_array = unsafe {
ListViewArray::new_unchecked(
elements.into_array(),
offsets.into_array(),
sizes.into_array(),
Validity::AllValid,
)
.with_zero_copy_to_list(true)
};
let field = Field::new("item", DataType::Int64, false);
let arrow_dt = DataType::LargeList(field.into());
let arrow_array = list_array.into_array().into_arrow(&arrow_dt)?;
assert_eq!(arrow_array.data_type(), &arrow_dt);
let list = arrow_array
.as_any()
.downcast_ref::<GenericListArray<i64>>()
.unwrap();
assert_eq!(list.len(), 2);
assert!(!list.is_null(0));
assert!(!list.is_null(1));
Ok(())
}
#[test]
fn test_to_arrow_list_non_zctl() -> VortexResult<()> {
let elements = PrimitiveArray::new(buffer![1i32, 2, 3, 4], Validity::NonNullable);
let offsets = PrimitiveArray::new(buffer![0i32, 1], Validity::NonNullable);
let sizes = PrimitiveArray::new(buffer![3i32, 3], Validity::NonNullable);
let list_array = ListViewArray::new(
elements.into_array(),
offsets.into_array(),
sizes.into_array(),
Validity::NonNullable,
);
assert!(!list_array.is_zero_copy_to_list());
let field = Field::new("item", DataType::Int32, false);
let arrow_dt = DataType::List(field.into());
let arrow_array = list_array.into_array().into_arrow(&arrow_dt)?;
let list = arrow_array
.as_any()
.downcast_ref::<GenericListArray<i32>>()
.unwrap();
assert_eq!(list.len(), 2);
let first = list.value(0);
assert_eq!(first.len(), 3);
let first_vals = first.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(first_vals.values(), &[1, 2, 3]);
let second = list.value(1);
assert_eq!(second.len(), 3);
let second_vals = second.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(second_vals.values(), &[2, 3, 4]);
Ok(())
}
#[test]
fn test_to_arrow_list_empty_zctl() -> VortexResult<()> {
let dtype = DType::List(
Arc::new(DType::Primitive(crate::dtype::PType::I32, NonNullable)),
NonNullable,
);
let list_array = unsafe {
Canonical::empty(&dtype)
.into_listview()
.with_zero_copy_to_list(true)
};
let arrow_dt = DataType::List(Field::new("item", DataType::Int32, false).into());
let arrow_array = list_array.into_array().into_arrow(&arrow_dt)?;
assert_eq!(arrow_array.len(), 0);
Ok(())
}
}