use std::fmt::Debug;
use std::iter::once;
use std::sync::Arc;
use vortex_dtype::DType;
use vortex_dtype::FieldName;
use vortex_dtype::FieldNames;
use vortex_dtype::StructFields;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
use vortex_error::vortex_err;
use crate::Array;
use crate::ArrayRef;
use crate::IntoArray;
use crate::stats::ArrayStats;
use crate::validity::Validity;
use crate::vtable::ValidityHelper;
#[derive(Clone, Debug)]
pub struct StructArray {
pub(super) len: usize,
pub(super) dtype: DType,
pub(super) fields: Arc<[ArrayRef]>,
pub(super) validity: Validity,
pub(super) stats_set: ArrayStats,
}
pub struct StructArrayParts {
pub struct_fields: StructFields,
pub fields: Arc<[ArrayRef]>,
pub validity: Validity,
}
impl StructArray {
pub fn unmasked_fields(&self) -> &Arc<[ArrayRef]> {
&self.fields
}
pub fn unmasked_field_by_name(&self, name: impl AsRef<str>) -> VortexResult<&ArrayRef> {
let name = name.as_ref();
self.unmasked_field_by_name_opt(name).ok_or_else(|| {
vortex_err!(
"Field {name} not found in struct array with names {:?}",
self.names()
)
})
}
pub fn unmasked_field_by_name_opt(&self, name: impl AsRef<str>) -> Option<&ArrayRef> {
let name = name.as_ref();
self.struct_fields().find(name).map(|idx| &self.fields[idx])
}
pub fn names(&self) -> &FieldNames {
self.struct_fields().names()
}
pub fn struct_fields(&self) -> &StructFields {
let Some(struct_dtype) = &self.dtype.as_struct_fields_opt() else {
unreachable!(
"struct arrays must have be a DType::Struct, this is likely an internal bug."
)
};
struct_dtype
}
pub fn new_fieldless_with_len(len: usize) -> Self {
Self::try_new(
FieldNames::default(),
Vec::new(),
len,
Validity::NonNullable,
)
.vortex_expect("StructArray::new_with_len should not fail")
}
pub fn new(
names: FieldNames,
fields: impl Into<Arc<[ArrayRef]>>,
length: usize,
validity: Validity,
) -> Self {
Self::try_new(names, fields, length, validity)
.vortex_expect("StructArray construction failed")
}
pub fn try_new(
names: FieldNames,
fields: impl Into<Arc<[ArrayRef]>>,
length: usize,
validity: Validity,
) -> VortexResult<Self> {
let fields = fields.into();
let field_dtypes: Vec<_> = fields.iter().map(|d| d.dtype()).cloned().collect();
let dtype = StructFields::new(names, field_dtypes);
Self::validate(&fields, &dtype, length, &validity)?;
Ok(unsafe { Self::new_unchecked(fields, dtype, length, validity) })
}
pub unsafe fn new_unchecked(
fields: impl Into<Arc<[ArrayRef]>>,
dtype: StructFields,
length: usize,
validity: Validity,
) -> Self {
let fields = fields.into();
#[cfg(debug_assertions)]
Self::validate(&fields, &dtype, length, &validity)
.vortex_expect("[Debug Assertion]: Invalid `StructArray` parameters");
Self {
len: length,
dtype: DType::Struct(dtype, validity.nullability()),
fields,
validity,
stats_set: Default::default(),
}
}
pub fn validate(
fields: &[ArrayRef],
dtype: &StructFields,
length: usize,
validity: &Validity,
) -> VortexResult<()> {
if fields.len() != dtype.names().len() {
vortex_bail!(
InvalidArgument: "Got {} fields but dtype has {} names",
fields.len(),
dtype.names().len()
);
}
for (i, (field, struct_dt)) in fields.iter().zip(dtype.fields()).enumerate() {
if field.len() != length {
vortex_bail!(
InvalidArgument: "Field {} has length {} but expected {}",
i,
field.len(),
length
);
}
if field.dtype() != &struct_dt {
vortex_bail!(
InvalidArgument: "Field {} has dtype {} but expected {}",
i,
field.dtype(),
struct_dt
);
}
}
if let Some(validity_len) = validity.maybe_len()
&& validity_len != length
{
vortex_bail!(
InvalidArgument: "Validity has length {} but expected {}",
validity_len,
length
);
}
Ok(())
}
pub fn try_new_with_dtype(
fields: impl Into<Arc<[ArrayRef]>>,
dtype: StructFields,
length: usize,
validity: Validity,
) -> VortexResult<Self> {
let fields = fields.into();
Self::validate(&fields, &dtype, length, &validity)?;
Ok(unsafe { Self::new_unchecked(fields, dtype, length, validity) })
}
pub fn into_parts(self) -> StructArrayParts {
let struct_fields = self.dtype.into_struct_fields();
StructArrayParts {
struct_fields,
fields: self.fields,
validity: self.validity,
}
}
pub fn into_fields(self) -> Vec<ArrayRef> {
self.into_parts().fields.to_vec()
}
pub fn from_fields<N: AsRef<str>>(items: &[(N, ArrayRef)]) -> VortexResult<Self> {
Self::try_from_iter(items.iter().map(|(a, b)| (a, b.to_array())))
}
pub fn try_from_iter_with_validity<
N: AsRef<str>,
A: IntoArray,
T: IntoIterator<Item = (N, A)>,
>(
iter: T,
validity: Validity,
) -> VortexResult<Self> {
let (names, fields): (Vec<FieldName>, Vec<ArrayRef>) = iter
.into_iter()
.map(|(name, fields)| (FieldName::from(name.as_ref()), fields.into_array()))
.unzip();
let len = fields
.first()
.map(|f| f.len())
.ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;
Self::try_new(FieldNames::from_iter(names), fields, len, validity)
}
pub fn try_from_iter<N: AsRef<str>, A: IntoArray, T: IntoIterator<Item = (N, A)>>(
iter: T,
) -> VortexResult<Self> {
Self::try_from_iter_with_validity(iter, Validity::NonNullable)
}
pub fn project(&self, projection: &[FieldName]) -> VortexResult<Self> {
let mut children = Vec::with_capacity(projection.len());
let mut names = Vec::with_capacity(projection.len());
let fields = self.unmasked_fields();
for f_name in projection.iter() {
let idx = self
.names()
.iter()
.position(|name| name == f_name)
.ok_or_else(|| vortex_err!("Unknown field {f_name}"))?;
names.push(self.names()[idx].clone());
children.push(fields[idx].clone());
}
StructArray::try_new(
FieldNames::from(names.as_slice()),
children,
self.len(),
self.validity().clone(),
)
}
pub fn remove_column(&mut self, name: impl Into<FieldName>) -> Option<ArrayRef> {
let name = name.into();
let struct_dtype = self.struct_fields().clone();
let position = struct_dtype
.names()
.iter()
.position(|field_name| field_name.as_ref() == name.as_ref())?;
let field = self.fields[position].clone();
let new_fields: Arc<[ArrayRef]> = self
.fields
.iter()
.enumerate()
.filter(|(i, _)| *i != position)
.map(|(_, f)| f.clone())
.collect();
if let Ok(new_dtype) = struct_dtype.without_field(position) {
self.fields = new_fields;
self.dtype = DType::Struct(new_dtype, self.dtype.nullability());
return Some(field);
}
None
}
pub fn with_column(&self, name: impl Into<FieldName>, array: ArrayRef) -> VortexResult<Self> {
let name = name.into();
let struct_dtype = self.struct_fields().clone();
let names = struct_dtype.names().iter().cloned().chain(once(name));
let types = struct_dtype.fields().chain(once(array.dtype().clone()));
let new_fields = StructFields::new(names.collect(), types.collect());
let children: Arc<[ArrayRef]> = self.fields.iter().cloned().chain(once(array)).collect();
Self::try_new_with_dtype(children, new_fields, self.len, self.validity.clone())
}
}