vortex_array/arrays/struct_/
mod.rs

1use std::fmt::Debug;
2use std::sync::Arc;
3
4use vortex_dtype::{DType, FieldName, FieldNames, StructDType};
5use vortex_error::{VortexExpect as _, VortexResult, vortex_bail, vortex_err};
6use vortex_mask::Mask;
7
8use crate::array::{ArrayCanonicalImpl, ArrayValidityImpl};
9use crate::stats::{ArrayStats, StatsSetRef};
10use crate::validity::Validity;
11use crate::variants::StructArrayTrait;
12use crate::vtable::VTableRef;
13use crate::{
14    Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayVariantsImpl, Canonical, EmptyMetadata,
15    Encoding,
16};
17
18mod compute;
19mod serde;
20
21#[derive(Clone, Debug)]
22pub struct StructArray {
23    len: usize,
24    dtype: DType,
25    fields: Vec<ArrayRef>,
26    validity: Validity,
27    stats_set: ArrayStats,
28}
29
30#[derive(Debug)]
31pub struct StructEncoding;
32impl Encoding for StructEncoding {
33    type Array = StructArray;
34    type Metadata = EmptyMetadata;
35}
36
37impl StructArray {
38    pub fn validity(&self) -> &Validity {
39        &self.validity
40    }
41
42    pub fn fields(&self) -> &[ArrayRef] {
43        &self.fields
44    }
45
46    pub fn struct_dtype(&self) -> &Arc<StructDType> {
47        let Some(struct_dtype) = &self.dtype.as_struct() else {
48            unreachable!(
49                "struct arrays must have be a DType::Struct, this is likely an internal bug."
50            )
51        };
52        struct_dtype
53    }
54
55    pub fn try_new(
56        names: FieldNames,
57        fields: Vec<ArrayRef>,
58        length: usize,
59        validity: Validity,
60    ) -> VortexResult<Self> {
61        let nullability = validity.nullability();
62
63        if names.len() != fields.len() {
64            vortex_bail!("Got {} names and {} fields", names.len(), fields.len());
65        }
66
67        for field in fields.iter() {
68            if field.len() != length {
69                vortex_bail!(
70                    "Expected all struct fields to have length {length}, found {}",
71                    field.len()
72                );
73            }
74        }
75
76        let field_dtypes: Vec<_> = fields.iter().map(|d| d.dtype()).cloned().collect();
77        let dtype = DType::Struct(Arc::new(StructDType::new(names, field_dtypes)), nullability);
78
79        Ok(Self {
80            len: length,
81            dtype,
82            fields,
83            validity,
84            stats_set: Default::default(),
85        })
86    }
87
88    pub fn try_new_with_dtype(
89        fields: Vec<ArrayRef>,
90        dtype: Arc<StructDType>,
91        length: usize,
92        validity: Validity,
93    ) -> VortexResult<Self> {
94        for (field, struct_dt) in fields.iter().zip(dtype.fields()) {
95            if field.len() != length {
96                vortex_bail!(
97                    "Expected all struct fields to have length {length}, found {}",
98                    field.len()
99                );
100            }
101
102            if &struct_dt != field.dtype() {
103                vortex_bail!(
104                    "Expected all struct fields to have dtype {}, found {}",
105                    struct_dt,
106                    field.dtype()
107                );
108            }
109        }
110
111        Ok(Self {
112            len: length,
113            dtype: DType::Struct(dtype, validity.nullability()),
114            fields,
115            validity,
116            stats_set: Default::default(),
117        })
118    }
119
120    pub fn from_fields<N: AsRef<str>>(items: &[(N, ArrayRef)]) -> VortexResult<Self> {
121        let names = items.iter().map(|(name, _)| FieldName::from(name.as_ref()));
122        let fields: Vec<ArrayRef> = items.iter().map(|(_, array)| array.to_array()).collect();
123        let len = fields
124            .first()
125            .map(|f| f.len())
126            .ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;
127
128        Self::try_new(
129            FieldNames::from_iter(names),
130            fields,
131            len,
132            Validity::NonNullable,
133        )
134    }
135
136    // TODO(aduffy): Add equivalent function to support field masks for nested column access.
137    /// Return a new StructArray with the given projection applied.
138    ///
139    /// Projection does not copy data arrays. Projection is defined by an ordinal array slice
140    /// which specifies the new ordering of columns in the struct. The projection can be used to
141    /// perform column re-ordering, deletion, or duplication at a logical level, without any data
142    /// copying.
143    #[allow(clippy::same_name_method)]
144    pub fn project(&self, projection: &[FieldName]) -> VortexResult<Self> {
145        let mut children = Vec::with_capacity(projection.len());
146        let mut names = Vec::with_capacity(projection.len());
147
148        for f_name in projection.iter() {
149            let idx = self
150                .names()
151                .iter()
152                .position(|name| name == f_name)
153                .ok_or_else(|| vortex_err!("Unknown field {f_name}"))?;
154
155            names.push(self.names()[idx].clone());
156            children.push(
157                self.maybe_null_field_by_idx(idx)
158                    .vortex_expect("never out of bounds"),
159            );
160        }
161
162        StructArray::try_new(
163            FieldNames::from(names.as_slice()),
164            children,
165            self.len(),
166            self.validity().clone(),
167        )
168    }
169}
170
171impl ArrayImpl for StructArray {
172    type Encoding = StructEncoding;
173
174    fn _len(&self) -> usize {
175        self.len
176    }
177
178    fn _dtype(&self) -> &DType {
179        &self.dtype
180    }
181
182    fn _vtable(&self) -> VTableRef {
183        VTableRef::new_ref(&StructEncoding)
184    }
185
186    fn _with_children(&self, children: &[ArrayRef]) -> VortexResult<Self> {
187        let validity = if self.validity().is_array() {
188            Validity::Array(children[0].clone())
189        } else {
190            self.validity().clone()
191        };
192
193        let fields_idx = if validity.is_array() { 1_usize } else { 0 };
194        let fields = children[fields_idx..].to_vec();
195
196        Self::try_new_with_dtype(fields, self.struct_dtype().clone(), self.len(), validity)
197    }
198}
199
200impl ArrayStatisticsImpl for StructArray {
201    fn _stats_ref(&self) -> StatsSetRef<'_> {
202        self.stats_set.to_ref(self)
203    }
204}
205
206impl ArrayVariantsImpl for StructArray {
207    fn _as_struct_typed(&self) -> Option<&dyn StructArrayTrait> {
208        Some(self)
209    }
210}
211
212impl StructArrayTrait for StructArray {
213    fn maybe_null_field_by_idx(&self, idx: usize) -> VortexResult<ArrayRef> {
214        Ok(self.fields[idx].clone())
215    }
216
217    fn project(&self, projection: &[FieldName]) -> VortexResult<ArrayRef> {
218        self.project(projection).map(|a| a.into_array())
219    }
220}
221
222impl ArrayCanonicalImpl for StructArray {
223    fn _to_canonical(&self) -> VortexResult<Canonical> {
224        Ok(Canonical::Struct(self.clone()))
225    }
226}
227
228impl ArrayValidityImpl for StructArray {
229    fn _is_valid(&self, index: usize) -> VortexResult<bool> {
230        self.validity.is_valid(index)
231    }
232
233    fn _all_valid(&self) -> VortexResult<bool> {
234        self.validity.all_valid()
235    }
236
237    fn _all_invalid(&self) -> VortexResult<bool> {
238        self.validity.all_invalid()
239    }
240
241    fn _validity_mask(&self) -> VortexResult<Mask> {
242        self.validity.to_mask(self.len())
243    }
244}
245
246#[cfg(test)]
247mod test {
248    use vortex_buffer::buffer;
249    use vortex_dtype::{DType, FieldName, FieldNames, Nullability};
250
251    use crate::ArrayExt;
252    use crate::array::Array;
253    use crate::arrays::BoolArray;
254    use crate::arrays::primitive::PrimitiveArray;
255    use crate::arrays::struct_::StructArray;
256    use crate::arrays::varbin::VarBinArray;
257    use crate::validity::Validity;
258    use crate::variants::StructArrayTrait;
259
260    #[test]
261    fn test_project() {
262        let xs = PrimitiveArray::new(buffer![0i64, 1, 2, 3, 4], Validity::NonNullable);
263        let ys = VarBinArray::from_vec(
264            vec!["a", "b", "c", "d", "e"],
265            DType::Utf8(Nullability::NonNullable),
266        );
267        let zs = BoolArray::from_iter([true, true, true, false, false]);
268
269        let struct_a = StructArray::try_new(
270            FieldNames::from(["xs".into(), "ys".into(), "zs".into()]),
271            vec![xs.into_array(), ys.into_array(), zs.into_array()],
272            5,
273            Validity::NonNullable,
274        )
275        .unwrap();
276
277        let struct_b = struct_a
278            .project(&[FieldName::from("zs"), FieldName::from("xs")])
279            .unwrap();
280        assert_eq!(
281            struct_b.names().as_ref(),
282            [FieldName::from("zs"), FieldName::from("xs")],
283        );
284
285        assert_eq!(struct_b.len(), 5);
286
287        let bools = struct_b.maybe_null_field_by_idx(0).unwrap();
288        assert_eq!(
289            bools
290                .as_::<BoolArray>()
291                .boolean_buffer()
292                .iter()
293                .collect::<Vec<_>>(),
294            vec![true, true, true, false, false]
295        );
296
297        let prims = struct_b.maybe_null_field_by_idx(1).unwrap();
298        assert_eq!(
299            prims.as_::<PrimitiveArray>().as_slice::<i64>(),
300            [0i64, 1, 2, 3, 4]
301        );
302    }
303}