vortex_array/arrays/struct_/
mod.rs1use std::fmt::Debug;
2use std::sync::Arc;
3
4use vortex_dtype::{DType, FieldName, FieldNames, StructDType};
5use vortex_error::{VortexExpect as _, VortexResult, vortex_bail, vortex_err};
6use vortex_mask::Mask;
7
8use crate::array::{ArrayCanonicalImpl, ArrayValidityImpl};
9use crate::stats::{ArrayStats, StatsSetRef};
10use crate::validity::Validity;
11use crate::variants::StructArrayTrait;
12use crate::vtable::VTableRef;
13use crate::{
14 Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayVariantsImpl, Canonical, EmptyMetadata,
15 Encoding,
16};
17
18mod compute;
19mod serde;
20
21#[derive(Clone, Debug)]
22pub struct StructArray {
23 len: usize,
24 dtype: DType,
25 fields: Vec<ArrayRef>,
26 validity: Validity,
27 stats_set: ArrayStats,
28}
29
30#[derive(Debug)]
31pub struct StructEncoding;
32impl Encoding for StructEncoding {
33 type Array = StructArray;
34 type Metadata = EmptyMetadata;
35}
36
37impl StructArray {
38 pub fn validity(&self) -> &Validity {
39 &self.validity
40 }
41
42 pub fn fields(&self) -> &[ArrayRef] {
43 &self.fields
44 }
45
46 pub fn struct_dtype(&self) -> &Arc<StructDType> {
47 let Some(struct_dtype) = &self.dtype.as_struct() else {
48 unreachable!(
49 "struct arrays must have be a DType::Struct, this is likely an internal bug."
50 )
51 };
52 struct_dtype
53 }
54
55 pub fn try_new(
56 names: FieldNames,
57 fields: Vec<ArrayRef>,
58 length: usize,
59 validity: Validity,
60 ) -> VortexResult<Self> {
61 let nullability = validity.nullability();
62
63 if names.len() != fields.len() {
64 vortex_bail!("Got {} names and {} fields", names.len(), fields.len());
65 }
66
67 for field in fields.iter() {
68 if field.len() != length {
69 vortex_bail!(
70 "Expected all struct fields to have length {length}, found {}",
71 field.len()
72 );
73 }
74 }
75
76 let field_dtypes: Vec<_> = fields.iter().map(|d| d.dtype()).cloned().collect();
77 let dtype = DType::Struct(Arc::new(StructDType::new(names, field_dtypes)), nullability);
78
79 Ok(Self {
80 len: length,
81 dtype,
82 fields,
83 validity,
84 stats_set: Default::default(),
85 })
86 }
87
88 pub fn try_new_with_dtype(
89 fields: Vec<ArrayRef>,
90 dtype: Arc<StructDType>,
91 length: usize,
92 validity: Validity,
93 ) -> VortexResult<Self> {
94 for (field, struct_dt) in fields.iter().zip(dtype.fields()) {
95 if field.len() != length {
96 vortex_bail!(
97 "Expected all struct fields to have length {length}, found {}",
98 field.len()
99 );
100 }
101
102 if &struct_dt != field.dtype() {
103 vortex_bail!(
104 "Expected all struct fields to have dtype {}, found {}",
105 struct_dt,
106 field.dtype()
107 );
108 }
109 }
110
111 Ok(Self {
112 len: length,
113 dtype: DType::Struct(dtype, validity.nullability()),
114 fields,
115 validity,
116 stats_set: Default::default(),
117 })
118 }
119
120 pub fn from_fields<N: AsRef<str>>(items: &[(N, ArrayRef)]) -> VortexResult<Self> {
121 let names = items.iter().map(|(name, _)| FieldName::from(name.as_ref()));
122 let fields: Vec<ArrayRef> = items.iter().map(|(_, array)| array.to_array()).collect();
123 let len = fields
124 .first()
125 .map(|f| f.len())
126 .ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;
127
128 Self::try_new(
129 FieldNames::from_iter(names),
130 fields,
131 len,
132 Validity::NonNullable,
133 )
134 }
135
136 #[allow(clippy::same_name_method)]
144 pub fn project(&self, projection: &[FieldName]) -> VortexResult<Self> {
145 let mut children = Vec::with_capacity(projection.len());
146 let mut names = Vec::with_capacity(projection.len());
147
148 for f_name in projection.iter() {
149 let idx = self
150 .names()
151 .iter()
152 .position(|name| name == f_name)
153 .ok_or_else(|| vortex_err!("Unknown field {f_name}"))?;
154
155 names.push(self.names()[idx].clone());
156 children.push(
157 self.maybe_null_field_by_idx(idx)
158 .vortex_expect("never out of bounds"),
159 );
160 }
161
162 StructArray::try_new(
163 FieldNames::from(names.as_slice()),
164 children,
165 self.len(),
166 self.validity().clone(),
167 )
168 }
169}
170
171impl ArrayImpl for StructArray {
172 type Encoding = StructEncoding;
173
174 fn _len(&self) -> usize {
175 self.len
176 }
177
178 fn _dtype(&self) -> &DType {
179 &self.dtype
180 }
181
182 fn _vtable(&self) -> VTableRef {
183 VTableRef::new_ref(&StructEncoding)
184 }
185
186 fn _with_children(&self, children: &[ArrayRef]) -> VortexResult<Self> {
187 let validity = if self.validity().is_array() {
188 Validity::Array(children[0].clone())
189 } else {
190 self.validity().clone()
191 };
192
193 let fields_idx = if validity.is_array() { 1_usize } else { 0 };
194 let fields = children[fields_idx..].to_vec();
195
196 Self::try_new_with_dtype(fields, self.struct_dtype().clone(), self.len(), validity)
197 }
198}
199
200impl ArrayStatisticsImpl for StructArray {
201 fn _stats_ref(&self) -> StatsSetRef<'_> {
202 self.stats_set.to_ref(self)
203 }
204}
205
206impl ArrayVariantsImpl for StructArray {
207 fn _as_struct_typed(&self) -> Option<&dyn StructArrayTrait> {
208 Some(self)
209 }
210}
211
212impl StructArrayTrait for StructArray {
213 fn maybe_null_field_by_idx(&self, idx: usize) -> VortexResult<ArrayRef> {
214 Ok(self.fields[idx].clone())
215 }
216
217 fn project(&self, projection: &[FieldName]) -> VortexResult<ArrayRef> {
218 self.project(projection).map(|a| a.into_array())
219 }
220}
221
222impl ArrayCanonicalImpl for StructArray {
223 fn _to_canonical(&self) -> VortexResult<Canonical> {
224 Ok(Canonical::Struct(self.clone()))
225 }
226}
227
228impl ArrayValidityImpl for StructArray {
229 fn _is_valid(&self, index: usize) -> VortexResult<bool> {
230 self.validity.is_valid(index)
231 }
232
233 fn _all_valid(&self) -> VortexResult<bool> {
234 self.validity.all_valid()
235 }
236
237 fn _all_invalid(&self) -> VortexResult<bool> {
238 self.validity.all_invalid()
239 }
240
241 fn _validity_mask(&self) -> VortexResult<Mask> {
242 self.validity.to_mask(self.len())
243 }
244}
245
246#[cfg(test)]
247mod test {
248 use vortex_buffer::buffer;
249 use vortex_dtype::{DType, FieldName, FieldNames, Nullability};
250
251 use crate::ArrayExt;
252 use crate::array::Array;
253 use crate::arrays::BoolArray;
254 use crate::arrays::primitive::PrimitiveArray;
255 use crate::arrays::struct_::StructArray;
256 use crate::arrays::varbin::VarBinArray;
257 use crate::validity::Validity;
258 use crate::variants::StructArrayTrait;
259
260 #[test]
261 fn test_project() {
262 let xs = PrimitiveArray::new(buffer![0i64, 1, 2, 3, 4], Validity::NonNullable);
263 let ys = VarBinArray::from_vec(
264 vec!["a", "b", "c", "d", "e"],
265 DType::Utf8(Nullability::NonNullable),
266 );
267 let zs = BoolArray::from_iter([true, true, true, false, false]);
268
269 let struct_a = StructArray::try_new(
270 FieldNames::from(["xs".into(), "ys".into(), "zs".into()]),
271 vec![xs.into_array(), ys.into_array(), zs.into_array()],
272 5,
273 Validity::NonNullable,
274 )
275 .unwrap();
276
277 let struct_b = struct_a
278 .project(&[FieldName::from("zs"), FieldName::from("xs")])
279 .unwrap();
280 assert_eq!(
281 struct_b.names().as_ref(),
282 [FieldName::from("zs"), FieldName::from("xs")],
283 );
284
285 assert_eq!(struct_b.len(), 5);
286
287 let bools = struct_b.maybe_null_field_by_idx(0).unwrap();
288 assert_eq!(
289 bools
290 .as_::<BoolArray>()
291 .boolean_buffer()
292 .iter()
293 .collect::<Vec<_>>(),
294 vec![true, true, true, false, false]
295 );
296
297 let prims = struct_b.maybe_null_field_by_idx(1).unwrap();
298 assert_eq!(
299 prims.as_::<PrimitiveArray>().as_slice::<i64>(),
300 [0i64, 1, 2, 3, 4]
301 );
302 }
303}