vortex_array/arrays/struct_/
mod.rs1use std::fmt::Debug;
2use std::sync::Arc;
3
4use vortex_dtype::{DType, FieldName, FieldNames, StructDType};
5use vortex_error::{VortexExpect as _, VortexResult, vortex_bail, vortex_err};
6use vortex_mask::Mask;
7
8use crate::array::{ArrayCanonicalImpl, ArrayValidityImpl};
9use crate::stats::{ArrayStats, StatsSetRef};
10use crate::validity::Validity;
11use crate::variants::StructArrayTrait;
12use crate::vtable::VTableRef;
13use crate::{
14 Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayVariantsImpl, Canonical, EmptyMetadata,
15 Encoding,
16};
17
18mod compute;
19mod serde;
20
21#[derive(Clone, Debug)]
22pub struct StructArray {
23 len: usize,
24 dtype: DType,
25 fields: Vec<ArrayRef>,
26 validity: Validity,
27 stats_set: ArrayStats,
28}
29
30pub struct StructEncoding;
31impl Encoding for StructEncoding {
32 type Array = StructArray;
33 type Metadata = EmptyMetadata;
34}
35
36impl StructArray {
37 pub fn validity(&self) -> &Validity {
38 &self.validity
39 }
40
41 pub fn fields(&self) -> &[ArrayRef] {
42 &self.fields
43 }
44
45 pub fn struct_dtype(&self) -> &Arc<StructDType> {
46 let Some(struct_dtype) = &self.dtype.as_struct() else {
47 unreachable!(
48 "struct arrays must have be a DType::Struct, this is likely an internal bug."
49 )
50 };
51 struct_dtype
52 }
53
54 pub fn try_new(
55 names: FieldNames,
56 fields: Vec<ArrayRef>,
57 length: usize,
58 validity: Validity,
59 ) -> VortexResult<Self> {
60 let nullability = validity.nullability();
61
62 if names.len() != fields.len() {
63 vortex_bail!("Got {} names and {} fields", names.len(), fields.len());
64 }
65
66 for field in fields.iter() {
67 if field.len() != length {
68 vortex_bail!(
69 "Expected all struct fields to have length {length}, found {}",
70 field.len()
71 );
72 }
73 }
74
75 let field_dtypes: Vec<_> = fields.iter().map(|d| d.dtype()).cloned().collect();
76 let dtype = DType::Struct(Arc::new(StructDType::new(names, field_dtypes)), nullability);
77
78 Ok(Self {
79 len: length,
80 dtype,
81 fields,
82 validity,
83 stats_set: Default::default(),
84 })
85 }
86
87 pub fn try_new_with_dtype(
88 fields: Vec<ArrayRef>,
89 dtype: Arc<StructDType>,
90 length: usize,
91 validity: Validity,
92 ) -> VortexResult<Self> {
93 for (field, struct_dt) in fields.iter().zip(dtype.fields()) {
94 if field.len() != length {
95 vortex_bail!(
96 "Expected all struct fields to have length {length}, found {}",
97 field.len()
98 );
99 }
100
101 if &struct_dt != field.dtype() {
102 vortex_bail!(
103 "Expected all struct fields to have dtype {}, found {}",
104 struct_dt,
105 field.dtype()
106 );
107 }
108 }
109
110 Ok(Self {
111 len: length,
112 dtype: DType::Struct(dtype, validity.nullability()),
113 fields,
114 validity,
115 stats_set: Default::default(),
116 })
117 }
118
119 pub fn from_fields<N: AsRef<str>>(items: &[(N, ArrayRef)]) -> VortexResult<Self> {
120 let names = items.iter().map(|(name, _)| FieldName::from(name.as_ref()));
121 let fields: Vec<ArrayRef> = items.iter().map(|(_, array)| array.to_array()).collect();
122 let len = fields
123 .first()
124 .map(|f| f.len())
125 .ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;
126
127 Self::try_new(
128 FieldNames::from_iter(names),
129 fields,
130 len,
131 Validity::NonNullable,
132 )
133 }
134
135 #[allow(clippy::same_name_method)]
143 pub fn project(&self, projection: &[FieldName]) -> VortexResult<Self> {
144 let mut children = Vec::with_capacity(projection.len());
145 let mut names = Vec::with_capacity(projection.len());
146
147 for f_name in projection.iter() {
148 let idx = self
149 .names()
150 .iter()
151 .position(|name| name == f_name)
152 .ok_or_else(|| vortex_err!("Unknown field {f_name}"))?;
153
154 names.push(self.names()[idx].clone());
155 children.push(
156 self.maybe_null_field_by_idx(idx)
157 .vortex_expect("never out of bounds"),
158 );
159 }
160
161 StructArray::try_new(
162 FieldNames::from(names.as_slice()),
163 children,
164 self.len(),
165 self.validity().clone(),
166 )
167 }
168}
169
170impl ArrayImpl for StructArray {
171 type Encoding = StructEncoding;
172
173 fn _len(&self) -> usize {
174 self.len
175 }
176
177 fn _dtype(&self) -> &DType {
178 &self.dtype
179 }
180
181 fn _vtable(&self) -> VTableRef {
182 VTableRef::new_ref(&StructEncoding)
183 }
184
185 fn _with_children(&self, children: &[ArrayRef]) -> VortexResult<Self> {
186 let validity = if self.validity().is_array() {
187 Validity::Array(children[0].clone())
188 } else {
189 self.validity().clone()
190 };
191
192 let fields_idx = if validity.is_array() { 1_usize } else { 0 };
193 let fields = children[fields_idx..].to_vec();
194
195 Self::try_new_with_dtype(fields, self.struct_dtype().clone(), self.len(), validity)
196 }
197}
198
199impl ArrayStatisticsImpl for StructArray {
200 fn _stats_ref(&self) -> StatsSetRef<'_> {
201 self.stats_set.to_ref(self)
202 }
203}
204
205impl ArrayVariantsImpl for StructArray {
206 fn _as_struct_typed(&self) -> Option<&dyn StructArrayTrait> {
207 Some(self)
208 }
209}
210
211impl StructArrayTrait for StructArray {
212 fn maybe_null_field_by_idx(&self, idx: usize) -> VortexResult<ArrayRef> {
213 Ok(self.fields[idx].clone())
214 }
215
216 fn project(&self, projection: &[FieldName]) -> VortexResult<ArrayRef> {
217 self.project(projection).map(|a| a.into_array())
218 }
219}
220
221impl ArrayCanonicalImpl for StructArray {
222 fn _to_canonical(&self) -> VortexResult<Canonical> {
223 Ok(Canonical::Struct(self.clone()))
224 }
225}
226
227impl ArrayValidityImpl for StructArray {
228 fn _is_valid(&self, index: usize) -> VortexResult<bool> {
229 self.validity.is_valid(index)
230 }
231
232 fn _all_valid(&self) -> VortexResult<bool> {
233 self.validity.all_valid()
234 }
235
236 fn _all_invalid(&self) -> VortexResult<bool> {
237 self.validity.all_invalid()
238 }
239
240 fn _validity_mask(&self) -> VortexResult<Mask> {
241 self.validity.to_mask(self.len())
242 }
243}
244
245#[cfg(test)]
246mod test {
247 use vortex_buffer::buffer;
248 use vortex_dtype::{DType, FieldName, FieldNames, Nullability};
249
250 use crate::ArrayExt;
251 use crate::array::Array;
252 use crate::arrays::BoolArray;
253 use crate::arrays::primitive::PrimitiveArray;
254 use crate::arrays::struct_::StructArray;
255 use crate::arrays::varbin::VarBinArray;
256 use crate::validity::Validity;
257 use crate::variants::StructArrayTrait;
258
259 #[test]
260 fn test_project() {
261 let xs = PrimitiveArray::new(buffer![0i64, 1, 2, 3, 4], Validity::NonNullable);
262 let ys = VarBinArray::from_vec(
263 vec!["a", "b", "c", "d", "e"],
264 DType::Utf8(Nullability::NonNullable),
265 );
266 let zs = BoolArray::from_iter([true, true, true, false, false]);
267
268 let struct_a = StructArray::try_new(
269 FieldNames::from(["xs".into(), "ys".into(), "zs".into()]),
270 vec![xs.into_array(), ys.into_array(), zs.into_array()],
271 5,
272 Validity::NonNullable,
273 )
274 .unwrap();
275
276 let struct_b = struct_a
277 .project(&[FieldName::from("zs"), FieldName::from("xs")])
278 .unwrap();
279 assert_eq!(
280 struct_b.names().as_ref(),
281 [FieldName::from("zs"), FieldName::from("xs")],
282 );
283
284 assert_eq!(struct_b.len(), 5);
285
286 let bools = struct_b.maybe_null_field_by_idx(0).unwrap();
287 assert_eq!(
288 bools
289 .as_::<BoolArray>()
290 .boolean_buffer()
291 .iter()
292 .collect::<Vec<_>>(),
293 vec![true, true, true, false, false]
294 );
295
296 let prims = struct_b.maybe_null_field_by_idx(1).unwrap();
297 assert_eq!(
298 prims.as_::<PrimitiveArray>().as_slice::<i64>(),
299 [0i64, 1, 2, 3, 4]
300 );
301 }
302}