1use std::fmt::Debug;
5use std::iter::once;
6use std::ops::Range;
7
8use itertools::Itertools;
9use vortex_dtype::{DType, FieldName, FieldNames, StructFields};
10use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
11use vortex_scalar::Scalar;
12
13use crate::stats::{ArrayStats, StatsSetRef};
14use crate::validity::Validity;
15use crate::vtable::{
16 ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityHelper,
17 ValidityVTableFromValidityHelper,
18};
19use crate::{Array, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
20
21mod compute;
22mod operator;
23mod serde;
24
25vtable!(Struct);
26
27impl VTable for StructVTable {
28 type Array = StructArray;
29 type Encoding = StructEncoding;
30
31 type ArrayVTable = Self;
32 type CanonicalVTable = Self;
33 type OperationsVTable = Self;
34 type ValidityVTable = ValidityVTableFromValidityHelper;
35 type VisitorVTable = Self;
36 type ComputeVTable = NotSupported;
37 type EncodeVTable = NotSupported;
38 type PipelineVTable = Self;
39 type SerdeVTable = Self;
40
41 fn id(_encoding: &Self::Encoding) -> EncodingId {
42 EncodingId::new_ref("vortex.struct")
43 }
44
45 fn encoding(_array: &Self::Array) -> EncodingRef {
46 EncodingRef::new_ref(StructEncoding.as_ref())
47 }
48}
49
50#[derive(Clone, Debug)]
169pub struct StructArray {
170 len: usize,
171 dtype: DType,
172 fields: Vec<ArrayRef>,
173 validity: Validity,
174 stats_set: ArrayStats,
175}
176
177#[derive(Clone, Debug)]
178pub struct StructEncoding;
179
180impl StructArray {
181 pub fn fields(&self) -> &[ArrayRef] {
182 &self.fields
183 }
184
185 pub fn field_by_name(&self, name: impl AsRef<str>) -> VortexResult<&ArrayRef> {
186 let name = name.as_ref();
187 self.field_by_name_opt(name).ok_or_else(|| {
188 vortex_err!(
189 "Field {name} not found in struct array with names {:?}",
190 self.names()
191 )
192 })
193 }
194
195 pub fn field_by_name_opt(&self, name: impl AsRef<str>) -> Option<&ArrayRef> {
196 let name = name.as_ref();
197 self.names()
198 .iter()
199 .position(|field_name| field_name.as_ref() == name)
200 .map(|idx| &self.fields[idx])
201 }
202
203 pub fn names(&self) -> &FieldNames {
204 self.struct_fields().names()
205 }
206
207 pub fn struct_fields(&self) -> &StructFields {
208 let Some(struct_dtype) = &self.dtype.as_struct_fields_opt() else {
209 unreachable!(
210 "struct arrays must have be a DType::Struct, this is likely an internal bug."
211 )
212 };
213 struct_dtype
214 }
215
216 pub fn new_fieldless_with_len(len: usize) -> Self {
218 Self::try_new(
219 FieldNames::default(),
220 Vec::new(),
221 len,
222 Validity::NonNullable,
223 )
224 .vortex_expect("StructArray::new_with_len should not fail")
225 }
226
227 pub fn new(
234 names: FieldNames,
235 fields: Vec<ArrayRef>,
236 length: usize,
237 validity: Validity,
238 ) -> Self {
239 Self::try_new(names, fields, length, validity)
240 .vortex_expect("StructArray construction failed")
241 }
242
243 pub fn try_new(
252 names: FieldNames,
253 fields: Vec<ArrayRef>,
254 length: usize,
255 validity: Validity,
256 ) -> VortexResult<Self> {
257 let field_dtypes: Vec<_> = fields.iter().map(|d| d.dtype()).cloned().collect();
258 let dtype = StructFields::new(names, field_dtypes);
259
260 Self::validate(&fields, &dtype, length, &validity)?;
261
262 Ok(unsafe { Self::new_unchecked(fields, dtype, length, validity) })
264 }
265
266 pub unsafe fn new_unchecked(
292 fields: Vec<ArrayRef>,
293 dtype: StructFields,
294 length: usize,
295 validity: Validity,
296 ) -> Self {
297 Self {
298 len: length,
299 dtype: DType::Struct(dtype, validity.nullability()),
300 fields,
301 validity,
302 stats_set: Default::default(),
303 }
304 }
305
306 pub(crate) fn validate(
310 fields: &[ArrayRef],
311 dtype: &StructFields,
312 length: usize,
313 validity: &Validity,
314 ) -> VortexResult<()> {
315 if fields.len() != dtype.names().len() {
317 vortex_bail!(
318 "Got {} fields but dtype has {} names",
319 fields.len(),
320 dtype.names().len()
321 );
322 }
323
324 for (i, (field, struct_dt)) in fields.iter().zip(dtype.fields()).enumerate() {
326 if field.len() != length {
327 vortex_bail!(
328 "Field {} has length {} but expected {}",
329 i,
330 field.len(),
331 length
332 );
333 }
334
335 if field.dtype() != &struct_dt {
336 vortex_bail!(
337 "Field {} has dtype {} but expected {}",
338 i,
339 field.dtype(),
340 struct_dt
341 );
342 }
343 }
344
345 if let Some(validity_len) = validity.maybe_len()
347 && validity_len != length
348 {
349 vortex_bail!(
350 "Validity has length {} but expected {}",
351 validity_len,
352 length
353 );
354 }
355
356 Ok(())
357 }
358
359 pub fn try_new_with_dtype(
360 fields: Vec<ArrayRef>,
361 dtype: StructFields,
362 length: usize,
363 validity: Validity,
364 ) -> VortexResult<Self> {
365 Self::validate(&fields, &dtype, length, &validity)?;
366
367 Ok(unsafe { Self::new_unchecked(fields, dtype, length, validity) })
369 }
370
371 pub fn from_fields<N: AsRef<str>>(items: &[(N, ArrayRef)]) -> VortexResult<Self> {
372 Self::try_from_iter(items.iter().map(|(a, b)| (a, b.to_array())))
373 }
374
375 pub fn try_from_iter_with_validity<
376 N: AsRef<str>,
377 A: IntoArray,
378 T: IntoIterator<Item = (N, A)>,
379 >(
380 iter: T,
381 validity: Validity,
382 ) -> VortexResult<Self> {
383 let (names, fields): (Vec<FieldName>, Vec<ArrayRef>) = iter
384 .into_iter()
385 .map(|(name, fields)| (FieldName::from(name.as_ref()), fields.into_array()))
386 .unzip();
387 let len = fields
388 .first()
389 .map(|f| f.len())
390 .ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;
391
392 Self::try_new(FieldNames::from_iter(names), fields, len, validity)
393 }
394
395 pub fn try_from_iter<N: AsRef<str>, A: IntoArray, T: IntoIterator<Item = (N, A)>>(
396 iter: T,
397 ) -> VortexResult<Self> {
398 Self::try_from_iter_with_validity(iter, Validity::NonNullable)
399 }
400
401 #[allow(clippy::same_name_method)]
409 pub fn project(&self, projection: &[FieldName]) -> VortexResult<Self> {
410 let mut children = Vec::with_capacity(projection.len());
411 let mut names = Vec::with_capacity(projection.len());
412
413 for f_name in projection.iter() {
414 let idx = self
415 .names()
416 .iter()
417 .position(|name| name == f_name)
418 .ok_or_else(|| vortex_err!("Unknown field {f_name}"))?;
419
420 names.push(self.names()[idx].clone());
421 children.push(self.fields()[idx].clone());
422 }
423
424 StructArray::try_new(
425 FieldNames::from(names.as_slice()),
426 children,
427 self.len(),
428 self.validity().clone(),
429 )
430 }
431
432 pub fn remove_column(&mut self, name: impl Into<FieldName>) -> Option<ArrayRef> {
435 let name = name.into();
436
437 let struct_dtype = self.struct_fields().clone();
438
439 let position = struct_dtype
440 .names()
441 .iter()
442 .position(|field_name| field_name.as_ref() == name.as_ref())?;
443
444 let field = self.fields.remove(position);
445
446 if let Ok(new_dtype) = struct_dtype.without_field(position) {
447 self.dtype = DType::Struct(new_dtype, self.dtype.nullability());
448 return Some(field);
449 }
450 None
451 }
452
453 pub fn with_column(&self, name: impl Into<FieldName>, array: ArrayRef) -> VortexResult<Self> {
455 let name = name.into();
456 let struct_dtype = self.struct_fields().clone();
457
458 let names = struct_dtype.names().iter().cloned().chain(once(name));
459 let types = struct_dtype.fields().chain(once(array.dtype().clone()));
460 let new_fields = StructFields::new(names.collect(), types.collect());
461
462 let mut children = self.fields.clone();
463 children.push(array);
464
465 Self::try_new_with_dtype(children, new_fields, self.len, self.validity.clone())
466 }
467}
468
469impl ValidityHelper for StructArray {
470 fn validity(&self) -> &Validity {
471 &self.validity
472 }
473}
474
475impl ArrayVTable<StructVTable> for StructVTable {
476 fn len(array: &StructArray) -> usize {
477 array.len
478 }
479
480 fn dtype(array: &StructArray) -> &DType {
481 &array.dtype
482 }
483
484 fn stats(array: &StructArray) -> StatsSetRef<'_> {
485 array.stats_set.to_ref(array.as_ref())
486 }
487}
488
489impl CanonicalVTable<StructVTable> for StructVTable {
490 fn canonicalize(array: &StructArray) -> Canonical {
491 Canonical::Struct(array.clone())
492 }
493}
494
495impl OperationsVTable<StructVTable> for StructVTable {
496 fn slice(array: &StructArray, range: Range<usize>) -> ArrayRef {
497 let fields = array
498 .fields()
499 .iter()
500 .map(|field| field.slice(range.clone()))
501 .collect_vec();
502 unsafe {
508 StructArray::new_unchecked(
509 fields,
510 array.struct_fields().clone(),
511 range.len(),
512 array.validity().slice(range),
513 )
514 }
515 .into_array()
516 }
517
518 fn scalar_at(array: &StructArray, index: usize) -> Scalar {
519 Scalar::struct_(
520 array.dtype().clone(),
521 array
522 .fields()
523 .iter()
524 .map(|field| field.scalar_at(index))
525 .collect_vec(),
526 )
527 }
528}
529
530#[cfg(test)]
531mod test {
532 use vortex_buffer::buffer;
533 use vortex_dtype::{DType, FieldName, FieldNames, Nullability, PType};
534
535 use crate::arrays::primitive::PrimitiveArray;
536 use crate::arrays::struct_::StructArray;
537 use crate::arrays::varbin::VarBinArray;
538 use crate::arrays::{BoolArray, ConstantArray};
539 use crate::validity::Validity;
540 use crate::{Array, IntoArray, ToCanonical};
541
542 #[test]
543 fn test_project() {
544 let xs = PrimitiveArray::new(buffer![0i64, 1, 2, 3, 4], Validity::NonNullable);
545 let ys = VarBinArray::from_vec(
546 vec!["a", "b", "c", "d", "e"],
547 DType::Utf8(Nullability::NonNullable),
548 );
549 let zs = BoolArray::from_iter([true, true, true, false, false]);
550
551 let struct_a = StructArray::try_new(
552 FieldNames::from(["xs", "ys", "zs"]),
553 vec![xs.into_array(), ys.into_array(), zs.into_array()],
554 5,
555 Validity::NonNullable,
556 )
557 .unwrap();
558
559 let struct_b = struct_a
560 .project(&[FieldName::from("zs"), FieldName::from("xs")])
561 .unwrap();
562 assert_eq!(
563 struct_b.names().as_ref(),
564 [FieldName::from("zs"), FieldName::from("xs")],
565 );
566
567 assert_eq!(struct_b.len(), 5);
568
569 let bools = &struct_b.fields[0];
570 assert_eq!(
571 bools.to_bool().boolean_buffer().iter().collect::<Vec<_>>(),
572 vec![true, true, true, false, false]
573 );
574
575 let prims = &struct_b.fields[1];
576 assert_eq!(prims.to_primitive().as_slice::<i64>(), [0i64, 1, 2, 3, 4]);
577 }
578
579 #[test]
580 fn test_remove_column() {
581 let xs = PrimitiveArray::new(buffer![0i64, 1, 2, 3, 4], Validity::NonNullable);
582 let ys = PrimitiveArray::new(buffer![4u64, 5, 6, 7, 8], Validity::NonNullable);
583
584 let mut struct_a = StructArray::try_new(
585 FieldNames::from(["xs", "ys"]),
586 vec![xs.into_array(), ys.into_array()],
587 5,
588 Validity::NonNullable,
589 )
590 .unwrap();
591
592 let removed = struct_a.remove_column("xs").unwrap();
593 assert_eq!(
594 removed.dtype(),
595 &DType::Primitive(PType::I64, Nullability::NonNullable)
596 );
597 assert_eq!(removed.to_primitive().as_slice::<i64>(), [0i64, 1, 2, 3, 4]);
598
599 assert_eq!(struct_a.names(), &["ys"]);
600 assert_eq!(struct_a.fields.len(), 1);
601 assert_eq!(struct_a.len(), 5);
602 assert_eq!(
603 struct_a.fields[0].dtype(),
604 &DType::Primitive(PType::U64, Nullability::NonNullable)
605 );
606 assert_eq!(
607 struct_a.fields[0].to_primitive().as_slice::<u64>(),
608 [4u64, 5, 6, 7, 8]
609 );
610
611 let empty = struct_a.remove_column("non_existent");
612 assert!(
613 empty.is_none(),
614 "Expected None when removing non-existent column"
615 );
616 assert_eq!(struct_a.names(), &["ys"]);
617 }
618
619 #[test]
620 fn test_duplicate_field_names() {
621 let field1 = buffer![1i32, 2, 3].into_array();
623 let field2 = buffer![10i32, 20, 30].into_array();
624 let field3 = buffer![100i32, 200, 300].into_array();
625
626 let struct_array = StructArray::try_new(
628 FieldNames::from(["value", "other", "value"]),
629 vec![field1, field2, field3],
630 3,
631 Validity::NonNullable,
632 )
633 .unwrap();
634
635 let first_value_field = struct_array.field_by_name("value").unwrap();
637 assert_eq!(
638 first_value_field.to_primitive().as_slice::<i32>(),
639 [1i32, 2, 3] );
641
642 let opt_field = struct_array.field_by_name_opt("value").unwrap();
644 assert_eq!(
645 opt_field.to_primitive().as_slice::<i32>(),
646 [1i32, 2, 3] );
648
649 let third_field = &struct_array.fields()[2];
651 assert_eq!(
652 third_field.to_primitive().as_slice::<i32>(),
653 [100i32, 200, 300]
654 );
655 }
656
657 #[test]
658 fn test_uncompressed_size_in_bytes() {
659 let struct_array = StructArray::new(
660 FieldNames::from(["integers"]),
661 vec![ConstantArray::new(5, 1000).into_array()],
662 1000,
663 Validity::NonNullable,
664 );
665
666 let canonical_size = struct_array.to_canonical().into_array().nbytes();
667 let uncompressed_size = struct_array
668 .statistics()
669 .compute_uncompressed_size_in_bytes();
670
671 assert_eq!(canonical_size, 2);
672 assert_eq!(uncompressed_size, Some(4000));
673 }
674}