vortex_array/arrays/struct_/
mod.rs

1use std::fmt::Debug;
2use std::sync::Arc;
3
4use vortex_dtype::{DType, FieldName, FieldNames, StructDType};
5use vortex_error::{VortexExpect as _, VortexResult, vortex_bail, vortex_err};
6use vortex_mask::Mask;
7
8use crate::array::{ArrayCanonicalImpl, ArrayValidityImpl};
9use crate::stats::{ArrayStats, StatsSetRef};
10use crate::validity::Validity;
11use crate::variants::StructArrayTrait;
12use crate::vtable::VTableRef;
13use crate::{
14    Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayVariantsImpl, Canonical, EmptyMetadata,
15    Encoding,
16};
17
18mod compute;
19mod serde;
20
21#[derive(Clone, Debug)]
22pub struct StructArray {
23    len: usize,
24    dtype: DType,
25    fields: Vec<ArrayRef>,
26    validity: Validity,
27    stats_set: ArrayStats,
28}
29
30pub struct StructEncoding;
31impl Encoding for StructEncoding {
32    type Array = StructArray;
33    type Metadata = EmptyMetadata;
34}
35
36impl StructArray {
37    pub fn validity(&self) -> &Validity {
38        &self.validity
39    }
40
41    pub fn fields(&self) -> &[ArrayRef] {
42        &self.fields
43    }
44
45    pub fn struct_dtype(&self) -> &Arc<StructDType> {
46        let Some(struct_dtype) = &self.dtype.as_struct() else {
47            unreachable!(
48                "struct arrays must have be a DType::Struct, this is likely an internal bug."
49            )
50        };
51        struct_dtype
52    }
53
54    pub fn try_new(
55        names: FieldNames,
56        fields: Vec<ArrayRef>,
57        length: usize,
58        validity: Validity,
59    ) -> VortexResult<Self> {
60        let nullability = validity.nullability();
61
62        if names.len() != fields.len() {
63            vortex_bail!("Got {} names and {} fields", names.len(), fields.len());
64        }
65
66        for field in fields.iter() {
67            if field.len() != length {
68                vortex_bail!(
69                    "Expected all struct fields to have length {length}, found {}",
70                    field.len()
71                );
72            }
73        }
74
75        let field_dtypes: Vec<_> = fields.iter().map(|d| d.dtype()).cloned().collect();
76        let dtype = DType::Struct(Arc::new(StructDType::new(names, field_dtypes)), nullability);
77
78        Ok(Self {
79            len: length,
80            dtype,
81            fields,
82            validity,
83            stats_set: Default::default(),
84        })
85    }
86
87    pub fn try_new_with_dtype(
88        fields: Vec<ArrayRef>,
89        dtype: Arc<StructDType>,
90        length: usize,
91        validity: Validity,
92    ) -> VortexResult<Self> {
93        for (field, struct_dt) in fields.iter().zip(dtype.fields()) {
94            if field.len() != length {
95                vortex_bail!(
96                    "Expected all struct fields to have length {length}, found {}",
97                    field.len()
98                );
99            }
100
101            if &struct_dt != field.dtype() {
102                vortex_bail!(
103                    "Expected all struct fields to have dtype {}, found {}",
104                    struct_dt,
105                    field.dtype()
106                );
107            }
108        }
109
110        Ok(Self {
111            len: length,
112            dtype: DType::Struct(dtype, validity.nullability()),
113            fields,
114            validity,
115            stats_set: Default::default(),
116        })
117    }
118
119    pub fn from_fields<N: AsRef<str>>(items: &[(N, ArrayRef)]) -> VortexResult<Self> {
120        let names = items.iter().map(|(name, _)| FieldName::from(name.as_ref()));
121        let fields: Vec<ArrayRef> = items.iter().map(|(_, array)| array.to_array()).collect();
122        let len = fields
123            .first()
124            .map(|f| f.len())
125            .ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;
126
127        Self::try_new(
128            FieldNames::from_iter(names),
129            fields,
130            len,
131            Validity::NonNullable,
132        )
133    }
134
135    // TODO(aduffy): Add equivalent function to support field masks for nested column access.
136    /// Return a new StructArray with the given projection applied.
137    ///
138    /// Projection does not copy data arrays. Projection is defined by an ordinal array slice
139    /// which specifies the new ordering of columns in the struct. The projection can be used to
140    /// perform column re-ordering, deletion, or duplication at a logical level, without any data
141    /// copying.
142    #[allow(clippy::same_name_method)]
143    pub fn project(&self, projection: &[FieldName]) -> VortexResult<Self> {
144        let mut children = Vec::with_capacity(projection.len());
145        let mut names = Vec::with_capacity(projection.len());
146
147        for f_name in projection.iter() {
148            let idx = self
149                .names()
150                .iter()
151                .position(|name| name == f_name)
152                .ok_or_else(|| vortex_err!("Unknown field {f_name}"))?;
153
154            names.push(self.names()[idx].clone());
155            children.push(
156                self.maybe_null_field_by_idx(idx)
157                    .vortex_expect("never out of bounds"),
158            );
159        }
160
161        StructArray::try_new(
162            FieldNames::from(names.as_slice()),
163            children,
164            self.len(),
165            self.validity().clone(),
166        )
167    }
168}
169
170impl ArrayImpl for StructArray {
171    type Encoding = StructEncoding;
172
173    fn _len(&self) -> usize {
174        self.len
175    }
176
177    fn _dtype(&self) -> &DType {
178        &self.dtype
179    }
180
181    fn _vtable(&self) -> VTableRef {
182        VTableRef::new_ref(&StructEncoding)
183    }
184
185    fn _with_children(&self, children: &[ArrayRef]) -> VortexResult<Self> {
186        let validity = if self.validity().is_array() {
187            Validity::Array(children[0].clone())
188        } else {
189            self.validity().clone()
190        };
191
192        let fields_idx = if validity.is_array() { 1_usize } else { 0 };
193        let fields = children[fields_idx..].to_vec();
194
195        Self::try_new_with_dtype(fields, self.struct_dtype().clone(), self.len(), validity)
196    }
197}
198
199impl ArrayStatisticsImpl for StructArray {
200    fn _stats_ref(&self) -> StatsSetRef<'_> {
201        self.stats_set.to_ref(self)
202    }
203}
204
205impl ArrayVariantsImpl for StructArray {
206    fn _as_struct_typed(&self) -> Option<&dyn StructArrayTrait> {
207        Some(self)
208    }
209}
210
211impl StructArrayTrait for StructArray {
212    fn maybe_null_field_by_idx(&self, idx: usize) -> VortexResult<ArrayRef> {
213        Ok(self.fields[idx].clone())
214    }
215
216    fn project(&self, projection: &[FieldName]) -> VortexResult<ArrayRef> {
217        self.project(projection).map(|a| a.into_array())
218    }
219}
220
221impl ArrayCanonicalImpl for StructArray {
222    fn _to_canonical(&self) -> VortexResult<Canonical> {
223        Ok(Canonical::Struct(self.clone()))
224    }
225}
226
227impl ArrayValidityImpl for StructArray {
228    fn _is_valid(&self, index: usize) -> VortexResult<bool> {
229        self.validity.is_valid(index)
230    }
231
232    fn _all_valid(&self) -> VortexResult<bool> {
233        self.validity.all_valid()
234    }
235
236    fn _all_invalid(&self) -> VortexResult<bool> {
237        self.validity.all_invalid()
238    }
239
240    fn _validity_mask(&self) -> VortexResult<Mask> {
241        self.validity.to_mask(self.len())
242    }
243}
244
245#[cfg(test)]
246mod test {
247    use vortex_buffer::buffer;
248    use vortex_dtype::{DType, FieldName, FieldNames, Nullability};
249
250    use crate::ArrayExt;
251    use crate::array::Array;
252    use crate::arrays::BoolArray;
253    use crate::arrays::primitive::PrimitiveArray;
254    use crate::arrays::struct_::StructArray;
255    use crate::arrays::varbin::VarBinArray;
256    use crate::validity::Validity;
257    use crate::variants::StructArrayTrait;
258
259    #[test]
260    fn test_project() {
261        let xs = PrimitiveArray::new(buffer![0i64, 1, 2, 3, 4], Validity::NonNullable);
262        let ys = VarBinArray::from_vec(
263            vec!["a", "b", "c", "d", "e"],
264            DType::Utf8(Nullability::NonNullable),
265        );
266        let zs = BoolArray::from_iter([true, true, true, false, false]);
267
268        let struct_a = StructArray::try_new(
269            FieldNames::from(["xs".into(), "ys".into(), "zs".into()]),
270            vec![xs.into_array(), ys.into_array(), zs.into_array()],
271            5,
272            Validity::NonNullable,
273        )
274        .unwrap();
275
276        let struct_b = struct_a
277            .project(&[FieldName::from("zs"), FieldName::from("xs")])
278            .unwrap();
279        assert_eq!(
280            struct_b.names().as_ref(),
281            [FieldName::from("zs"), FieldName::from("xs")],
282        );
283
284        assert_eq!(struct_b.len(), 5);
285
286        let bools = struct_b.maybe_null_field_by_idx(0).unwrap();
287        assert_eq!(
288            bools
289                .as_::<BoolArray>()
290                .boolean_buffer()
291                .iter()
292                .collect::<Vec<_>>(),
293            vec![true, true, true, false, false]
294        );
295
296        let prims = struct_b.maybe_null_field_by_idx(1).unwrap();
297        assert_eq!(
298            prims.as_::<PrimitiveArray>().as_slice::<i64>(),
299            [0i64, 1, 2, 3, 4]
300        );
301    }
302}