use std::iter::once;
use std::sync::Arc;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_error::vortex_err;
use crate::ArrayRef;
use crate::IntoArray;
use crate::array::Array;
use crate::array::ArrayParts;
use crate::array::EmptyArrayData;
use crate::array::TypedArrayRef;
use crate::array::child_to_validity;
use crate::array::validity_to_child;
use crate::arrays::Struct;
use crate::dtype::DType;
use crate::dtype::FieldName;
use crate::dtype::FieldNames;
use crate::dtype::StructFields;
use crate::validity::Validity;
pub(super) const VALIDITY_SLOT: usize = 0;
pub(super) const FIELDS_OFFSET: usize = 1;
pub struct StructDataParts {
pub struct_fields: StructFields,
pub fields: Arc<[ArrayRef]>,
pub validity: Validity,
}
pub(super) fn make_struct_slots(
fields: &[ArrayRef],
validity: &Validity,
length: usize,
) -> Vec<Option<ArrayRef>> {
once(validity_to_child(validity, length))
.chain(fields.iter().cloned().map(Some))
.collect()
}
pub trait StructArrayExt: TypedArrayRef<Struct> {
fn nullability(&self) -> crate::dtype::Nullability {
match self.as_ref().dtype() {
DType::Struct(_, nullability) => *nullability,
_ => unreachable!("StructArrayExt requires a struct dtype"),
}
}
fn names(&self) -> &FieldNames {
self.as_ref().dtype().as_struct_fields().names()
}
fn struct_validity(&self) -> Validity {
child_to_validity(&self.as_ref().slots()[VALIDITY_SLOT], self.nullability())
}
fn iter_unmasked_fields(&self) -> impl Iterator<Item = &ArrayRef> + '_ {
self.as_ref().slots()[FIELDS_OFFSET..]
.iter()
.map(|s| s.as_ref().vortex_expect("StructArray field slot"))
}
fn unmasked_fields(&self) -> Arc<[ArrayRef]> {
self.iter_unmasked_fields().cloned().collect()
}
fn unmasked_field(&self, idx: usize) -> &ArrayRef {
self.as_ref().slots()[FIELDS_OFFSET + idx]
.as_ref()
.vortex_expect("StructArray field slot")
}
fn unmasked_field_by_name_opt(&self, name: impl AsRef<str>) -> Option<&ArrayRef> {
let name = name.as_ref();
self.struct_fields()
.find(name)
.map(|idx| self.unmasked_field(idx))
}
fn unmasked_field_by_name(&self, name: impl AsRef<str>) -> VortexResult<&ArrayRef> {
let name = name.as_ref();
self.unmasked_field_by_name_opt(name).ok_or_else(|| {
vortex_err!(
"Field {name} not found in struct array with names {:?}",
self.names()
)
})
}
fn struct_fields(&self) -> &StructFields {
self.as_ref().dtype().as_struct_fields()
}
}
impl<T: TypedArrayRef<Struct>> StructArrayExt for T {}
impl Array<Struct> {
pub fn new(
names: FieldNames,
fields: impl Into<Arc<[ArrayRef]>>,
length: usize,
validity: Validity,
) -> Self {
Self::try_new(names, fields, length, validity)
.vortex_expect("StructArray construction failed")
}
pub fn try_new(
names: FieldNames,
fields: impl Into<Arc<[ArrayRef]>>,
length: usize,
validity: Validity,
) -> VortexResult<Self> {
let fields = fields.into();
let field_dtypes: Vec<_> = fields.iter().map(|d| d.dtype().clone()).collect();
let dtype = StructFields::new(names, field_dtypes);
let slots = make_struct_slots(&fields, &validity, length);
Array::try_from_parts(
ArrayParts::new(
Struct,
DType::Struct(dtype, validity.nullability()),
length,
EmptyArrayData,
)
.with_slots(slots),
)
}
pub unsafe fn new_unchecked(
fields: impl Into<Arc<[ArrayRef]>>,
dtype: StructFields,
length: usize,
validity: Validity,
) -> Self {
let fields = fields.into();
let outer_dtype = DType::Struct(dtype, validity.nullability());
let slots = make_struct_slots(&fields, &validity, length);
unsafe {
Array::from_parts_unchecked(
ArrayParts::new(Struct, outer_dtype, length, EmptyArrayData).with_slots(slots),
)
}
}
pub fn try_new_with_dtype(
fields: impl Into<Arc<[ArrayRef]>>,
dtype: StructFields,
length: usize,
validity: Validity,
) -> VortexResult<Self> {
let fields = fields.into();
let outer_dtype = DType::Struct(dtype, validity.nullability());
let slots = make_struct_slots(&fields, &validity, length);
Array::try_from_parts(
ArrayParts::new(Struct, outer_dtype, length, EmptyArrayData).with_slots(slots),
)
}
pub fn from_fields<N: AsRef<str>>(items: &[(N, ArrayRef)]) -> VortexResult<Self> {
Self::try_from_iter(items.iter().map(|(a, b)| (a, b.clone())))
}
pub fn try_from_iter_with_validity<
N: AsRef<str>,
A: IntoArray,
T: IntoIterator<Item = (N, A)>,
>(
iter: T,
validity: Validity,
) -> VortexResult<Self> {
let (names, fields): (Vec<FieldName>, Vec<ArrayRef>) = iter
.into_iter()
.map(|(name, fields)| (FieldName::from(name.as_ref()), fields.into_array()))
.unzip();
let len = fields
.first()
.map(|f| f.len())
.ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;
Self::try_new(FieldNames::from_iter(names), fields, len, validity)
}
pub fn try_from_iter<N: AsRef<str>, A: IntoArray, T: IntoIterator<Item = (N, A)>>(
iter: T,
) -> VortexResult<Self> {
let (names, fields): (Vec<FieldName>, Vec<ArrayRef>) = iter
.into_iter()
.map(|(name, field)| (FieldName::from(name.as_ref()), field.into_array()))
.unzip();
let len = fields
.first()
.map(ArrayRef::len)
.ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;
Self::try_new(
FieldNames::from_iter(names),
fields,
len,
Validity::NonNullable,
)
}
pub fn project(&self, projection: &[FieldName]) -> VortexResult<Self> {
let mut children = Vec::with_capacity(projection.len());
let mut names = Vec::with_capacity(projection.len());
for f_name in projection {
let idx = self
.struct_fields()
.find(f_name.as_ref())
.ok_or_else(|| vortex_err!("Unknown field {f_name}"))?;
names.push(self.names()[idx].clone());
children.push(self.unmasked_field(idx).clone());
}
Self::try_new(
FieldNames::from(names.as_slice()),
children,
self.len(),
self.validity()?,
)
}
pub fn new_fieldless_with_len(len: usize) -> Self {
let dtype = DType::Struct(
StructFields::new(FieldNames::default(), Vec::new()),
crate::dtype::Nullability::NonNullable,
);
let slots = make_struct_slots(&[], &Validity::NonNullable, len);
unsafe {
Array::from_parts_unchecked(
ArrayParts::new(Struct, dtype, len, EmptyArrayData).with_slots(slots),
)
}
}
pub fn into_data_parts(self) -> StructDataParts {
let fields: Arc<[ArrayRef]> = self.slots()[FIELDS_OFFSET..]
.iter()
.map(|s| s.as_ref().vortex_expect("StructArray field slot").clone())
.collect();
let validity = self.validity().vortex_expect("StructArray validity");
StructDataParts {
struct_fields: self.struct_fields().clone(),
fields,
validity,
}
}
pub fn remove_column(&self, name: impl Into<FieldName>) -> Option<(Self, ArrayRef)> {
let name = name.into();
let struct_dtype = self.struct_fields();
let len = self.len();
let position = struct_dtype.find(name.as_ref())?;
let slot_position = FIELDS_OFFSET + position;
let field = self.slots()[slot_position]
.as_ref()
.vortex_expect("StructArray field slot")
.clone();
let new_slots: Vec<Option<ArrayRef>> = self
.slots()
.iter()
.enumerate()
.filter(|(i, _)| *i != slot_position)
.map(|(_, s)| s.clone())
.collect();
let new_dtype = struct_dtype.without_field(position).ok()?;
let new_array = unsafe {
Array::from_parts_unchecked(
ArrayParts::new(
Struct,
DType::Struct(new_dtype, self.dtype().nullability()),
len,
EmptyArrayData,
)
.with_slots(new_slots),
)
};
Some((new_array, field))
}
}
impl Array<Struct> {
pub fn with_column(&self, name: impl Into<FieldName>, array: ArrayRef) -> VortexResult<Self> {
let name = name.into();
let struct_dtype = self.struct_fields();
let names = struct_dtype.names().iter().cloned().chain(once(name));
let types = struct_dtype.fields().chain(once(array.dtype().clone()));
let new_fields = StructFields::new(names.collect(), types.collect());
let children: Arc<[ArrayRef]> = self.slots()[FIELDS_OFFSET..]
.iter()
.map(|s| s.as_ref().vortex_expect("StructArray field slot").clone())
.chain(once(array))
.collect();
Self::try_new_with_dtype(children, new_fields, self.len(), self.validity()?)
}
pub fn remove_column_owned(&self, name: impl Into<FieldName>) -> Option<(Self, ArrayRef)> {
self.remove_column(name)
}
}