vortex_array/arrays/struct_/
array.rs1use std::iter::once;
5use std::sync::Arc;
6
7use vortex_error::VortexExpect;
8use vortex_error::VortexResult;
9use vortex_error::vortex_err;
10
11use crate::ArrayRef;
12use crate::IntoArray;
13use crate::array::Array;
14use crate::array::ArrayParts;
15use crate::array::EmptyArrayData;
16use crate::array::TypedArrayRef;
17use crate::array::child_to_validity;
18use crate::array::validity_to_child;
19use crate::arrays::Struct;
20use crate::dtype::DType;
21use crate::dtype::FieldName;
22use crate::dtype::FieldNames;
23use crate::dtype::StructFields;
24use crate::validity::Validity;
25
26pub(super) const VALIDITY_SLOT: usize = 0;
29pub(super) const FIELDS_OFFSET: usize = 1;
31
32pub struct StructDataParts {
155 pub struct_fields: StructFields,
156 pub fields: Arc<[ArrayRef]>,
157 pub validity: Validity,
158}
159
160pub(super) fn make_struct_slots(
161 fields: &[ArrayRef],
162 validity: &Validity,
163 length: usize,
164) -> Vec<Option<ArrayRef>> {
165 once(validity_to_child(validity, length))
166 .chain(fields.iter().cloned().map(Some))
167 .collect()
168}
169
170pub trait StructArrayExt: TypedArrayRef<Struct> {
171 fn nullability(&self) -> crate::dtype::Nullability {
172 match self.as_ref().dtype() {
173 DType::Struct(_, nullability) => *nullability,
174 _ => unreachable!("StructArrayExt requires a struct dtype"),
175 }
176 }
177
178 fn names(&self) -> &FieldNames {
179 self.as_ref().dtype().as_struct_fields().names()
180 }
181
182 fn struct_validity(&self) -> Validity {
183 child_to_validity(&self.as_ref().slots()[VALIDITY_SLOT], self.nullability())
184 }
185
186 fn iter_unmasked_fields(&self) -> impl Iterator<Item = &ArrayRef> + '_ {
187 self.as_ref().slots()[FIELDS_OFFSET..]
188 .iter()
189 .map(|s| s.as_ref().vortex_expect("StructArray field slot"))
190 }
191
192 fn unmasked_fields(&self) -> Arc<[ArrayRef]> {
193 self.iter_unmasked_fields().cloned().collect()
194 }
195
196 fn unmasked_field(&self, idx: usize) -> &ArrayRef {
197 self.as_ref().slots()[FIELDS_OFFSET + idx]
198 .as_ref()
199 .vortex_expect("StructArray field slot")
200 }
201
202 fn unmasked_field_by_name_opt(&self, name: impl AsRef<str>) -> Option<&ArrayRef> {
203 let name = name.as_ref();
204 self.struct_fields()
205 .find(name)
206 .map(|idx| self.unmasked_field(idx))
207 }
208
209 fn unmasked_field_by_name(&self, name: impl AsRef<str>) -> VortexResult<&ArrayRef> {
210 let name = name.as_ref();
211 self.unmasked_field_by_name_opt(name).ok_or_else(|| {
212 vortex_err!(
213 "Field {name} not found in struct array with names {:?}",
214 self.names()
215 )
216 })
217 }
218
219 fn struct_fields(&self) -> &StructFields {
220 self.as_ref().dtype().as_struct_fields()
221 }
222}
223impl<T: TypedArrayRef<Struct>> StructArrayExt for T {}
224
225impl Array<Struct> {
226 pub fn new(
228 names: FieldNames,
229 fields: impl Into<Arc<[ArrayRef]>>,
230 length: usize,
231 validity: Validity,
232 ) -> Self {
233 Self::try_new(names, fields, length, validity)
234 .vortex_expect("StructArray construction failed")
235 }
236
237 pub fn try_new(
239 names: FieldNames,
240 fields: impl Into<Arc<[ArrayRef]>>,
241 length: usize,
242 validity: Validity,
243 ) -> VortexResult<Self> {
244 let fields = fields.into();
245 let field_dtypes: Vec<_> = fields.iter().map(|d| d.dtype().clone()).collect();
246 let dtype = StructFields::new(names, field_dtypes);
247 let slots = make_struct_slots(&fields, &validity, length);
248 Array::try_from_parts(
249 ArrayParts::new(
250 Struct,
251 DType::Struct(dtype, validity.nullability()),
252 length,
253 EmptyArrayData,
254 )
255 .with_slots(slots),
256 )
257 }
258
259 pub unsafe fn new_unchecked(
265 fields: impl Into<Arc<[ArrayRef]>>,
266 dtype: StructFields,
267 length: usize,
268 validity: Validity,
269 ) -> Self {
270 let fields = fields.into();
271 let outer_dtype = DType::Struct(dtype, validity.nullability());
272 let slots = make_struct_slots(&fields, &validity, length);
273 unsafe {
274 Array::from_parts_unchecked(
275 ArrayParts::new(Struct, outer_dtype, length, EmptyArrayData).with_slots(slots),
276 )
277 }
278 }
279
280 pub fn try_new_with_dtype(
282 fields: impl Into<Arc<[ArrayRef]>>,
283 dtype: StructFields,
284 length: usize,
285 validity: Validity,
286 ) -> VortexResult<Self> {
287 let fields = fields.into();
288 let outer_dtype = DType::Struct(dtype, validity.nullability());
289 let slots = make_struct_slots(&fields, &validity, length);
290 Array::try_from_parts(
291 ArrayParts::new(Struct, outer_dtype, length, EmptyArrayData).with_slots(slots),
292 )
293 }
294
295 pub fn from_fields<N: AsRef<str>>(items: &[(N, ArrayRef)]) -> VortexResult<Self> {
297 Self::try_from_iter(items.iter().map(|(a, b)| (a, b.clone())))
298 }
299
300 pub fn try_from_iter_with_validity<
302 N: AsRef<str>,
303 A: IntoArray,
304 T: IntoIterator<Item = (N, A)>,
305 >(
306 iter: T,
307 validity: Validity,
308 ) -> VortexResult<Self> {
309 let (names, fields): (Vec<FieldName>, Vec<ArrayRef>) = iter
310 .into_iter()
311 .map(|(name, fields)| (FieldName::from(name.as_ref()), fields.into_array()))
312 .unzip();
313 let len = fields
314 .first()
315 .map(|f| f.len())
316 .ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;
317
318 Self::try_new(FieldNames::from_iter(names), fields, len, validity)
319 }
320
321 pub fn try_from_iter<N: AsRef<str>, A: IntoArray, T: IntoIterator<Item = (N, A)>>(
323 iter: T,
324 ) -> VortexResult<Self> {
325 let (names, fields): (Vec<FieldName>, Vec<ArrayRef>) = iter
326 .into_iter()
327 .map(|(name, field)| (FieldName::from(name.as_ref()), field.into_array()))
328 .unzip();
329 let len = fields
330 .first()
331 .map(ArrayRef::len)
332 .ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;
333
334 Self::try_new(
335 FieldNames::from_iter(names),
336 fields,
337 len,
338 Validity::NonNullable,
339 )
340 }
341
342 pub fn project(&self, projection: &[FieldName]) -> VortexResult<Self> {
350 let mut children = Vec::with_capacity(projection.len());
351 let mut names = Vec::with_capacity(projection.len());
352
353 for f_name in projection {
354 let idx = self
355 .struct_fields()
356 .find(f_name.as_ref())
357 .ok_or_else(|| vortex_err!("Unknown field {f_name}"))?;
358
359 names.push(self.names()[idx].clone());
360 children.push(self.unmasked_field(idx).clone());
361 }
362
363 Self::try_new(
364 FieldNames::from(names.as_slice()),
365 children,
366 self.len(),
367 self.validity()?,
368 )
369 }
370
371 pub fn new_fieldless_with_len(len: usize) -> Self {
373 let dtype = DType::Struct(
374 StructFields::new(FieldNames::default(), Vec::new()),
375 crate::dtype::Nullability::NonNullable,
376 );
377 let slots = make_struct_slots(&[], &Validity::NonNullable, len);
378 unsafe {
379 Array::from_parts_unchecked(
380 ArrayParts::new(Struct, dtype, len, EmptyArrayData).with_slots(slots),
381 )
382 }
383 }
384
385 pub fn into_data_parts(self) -> StructDataParts {
387 let fields: Arc<[ArrayRef]> = self.slots()[FIELDS_OFFSET..]
388 .iter()
389 .map(|s| s.as_ref().vortex_expect("StructArray field slot").clone())
390 .collect();
391 let validity = self.validity().vortex_expect("StructArray validity");
392 StructDataParts {
393 struct_fields: self.struct_fields().clone(),
394 fields,
395 validity,
396 }
397 }
398
399 pub fn remove_column(&self, name: impl Into<FieldName>) -> Option<(Self, ArrayRef)> {
400 let name = name.into();
401 let struct_dtype = self.struct_fields();
402 let len = self.len();
403
404 let position = struct_dtype.find(name.as_ref())?;
405
406 let slot_position = FIELDS_OFFSET + position;
407 let field = self.slots()[slot_position]
408 .as_ref()
409 .vortex_expect("StructArray field slot")
410 .clone();
411 let new_slots: Vec<Option<ArrayRef>> = self
412 .slots()
413 .iter()
414 .enumerate()
415 .filter(|(i, _)| *i != slot_position)
416 .map(|(_, s)| s.clone())
417 .collect();
418
419 let new_dtype = struct_dtype.without_field(position).ok()?;
420 let new_array = unsafe {
421 Array::from_parts_unchecked(
422 ArrayParts::new(
423 Struct,
424 DType::Struct(new_dtype, self.dtype().nullability()),
425 len,
426 EmptyArrayData,
427 )
428 .with_slots(new_slots),
429 )
430 };
431 Some((new_array, field))
432 }
433}
434
435impl Array<Struct> {
436 pub fn with_column(&self, name: impl Into<FieldName>, array: ArrayRef) -> VortexResult<Self> {
437 let name = name.into();
438 let struct_dtype = self.struct_fields();
439
440 let names = struct_dtype.names().iter().cloned().chain(once(name));
441 let types = struct_dtype.fields().chain(once(array.dtype().clone()));
442 let new_fields = StructFields::new(names.collect(), types.collect());
443
444 let children: Arc<[ArrayRef]> = self.slots()[FIELDS_OFFSET..]
445 .iter()
446 .map(|s| s.as_ref().vortex_expect("StructArray field slot").clone())
447 .chain(once(array))
448 .collect();
449
450 Self::try_new_with_dtype(children, new_fields, self.len(), self.validity()?)
451 }
452
453 pub fn remove_column_owned(&self, name: impl Into<FieldName>) -> Option<(Self, ArrayRef)> {
454 self.remove_column(name)
455 }
456}