1use std::fmt::Debug;
5use std::iter::once;
6
7use itertools::Itertools;
8use vortex_dtype::{DType, FieldName, FieldNames, StructFields};
9use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
10use vortex_scalar::Scalar;
11
12use crate::stats::{ArrayStats, StatsSetRef};
13use crate::validity::Validity;
14use crate::vtable::{
15 ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityHelper,
16 ValidityVTableFromValidityHelper,
17};
18use crate::{Array, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
19
20mod compute;
21mod serde;
22
23vtable!(Struct);
24
25impl VTable for StructVTable {
26 type Array = StructArray;
27 type Encoding = StructEncoding;
28
29 type ArrayVTable = Self;
30 type CanonicalVTable = Self;
31 type OperationsVTable = Self;
32 type ValidityVTable = ValidityVTableFromValidityHelper;
33 type VisitorVTable = Self;
34 type ComputeVTable = NotSupported;
35 type EncodeVTable = NotSupported;
36 type SerdeVTable = Self;
37
38 fn id(_encoding: &Self::Encoding) -> EncodingId {
39 EncodingId::new_ref("vortex.struct")
40 }
41
42 fn encoding(_array: &Self::Array) -> EncodingRef {
43 EncodingRef::new_ref(StructEncoding.as_ref())
44 }
45}
46
47#[derive(Clone, Debug)]
166pub struct StructArray {
167 len: usize,
168 dtype: DType,
169 fields: Vec<ArrayRef>,
170 validity: Validity,
171 stats_set: ArrayStats,
172}
173
174#[derive(Clone, Debug)]
175pub struct StructEncoding;
176
177impl StructArray {
178 pub fn fields(&self) -> &[ArrayRef] {
179 &self.fields
180 }
181
182 pub fn field_by_name(&self, name: impl AsRef<str>) -> VortexResult<&ArrayRef> {
183 let name = name.as_ref();
184 self.field_by_name_opt(name).ok_or_else(|| {
185 vortex_err!(
186 "Field {name} not found in struct array with names {:?}",
187 self.names()
188 )
189 })
190 }
191
192 pub fn field_by_name_opt(&self, name: impl AsRef<str>) -> Option<&ArrayRef> {
193 let name = name.as_ref();
194 self.names()
195 .iter()
196 .position(|field_name| field_name.as_ref() == name)
197 .map(|idx| &self.fields[idx])
198 }
199
200 pub fn names(&self) -> &FieldNames {
201 self.struct_fields().names()
202 }
203
204 pub fn struct_fields(&self) -> &StructFields {
205 let Some(struct_dtype) = &self.dtype.as_struct() else {
206 unreachable!(
207 "struct arrays must have be a DType::Struct, this is likely an internal bug."
208 )
209 };
210 struct_dtype
211 }
212
213 pub fn new_with_len(len: usize) -> Self {
215 Self::try_new(
216 FieldNames::default(),
217 Vec::new(),
218 len,
219 Validity::NonNullable,
220 )
221 .vortex_expect("StructArray::new_with_len should not fail")
222 }
223
224 pub fn try_new(
225 names: FieldNames,
226 fields: Vec<ArrayRef>,
227 length: usize,
228 validity: Validity,
229 ) -> VortexResult<Self> {
230 let nullability = validity.nullability();
231
232 if names.len() != fields.len() {
233 vortex_bail!("Got {} names and {} fields", names.len(), fields.len());
234 }
235
236 for field in fields.iter() {
237 if field.len() != length {
238 vortex_bail!(
239 "Expected all struct fields to have length {length}, found {}",
240 fields.iter().map(|f| f.len()).format(","),
241 );
242 }
243 }
244
245 let field_dtypes: Vec<_> = fields.iter().map(|d| d.dtype()).cloned().collect();
246 let dtype = DType::Struct(StructFields::new(names, field_dtypes), nullability);
247
248 if length != validity.maybe_len().unwrap_or(length) {
249 vortex_bail!(
250 "array length {} and validity length must match {}",
251 length,
252 validity
253 .maybe_len()
254 .vortex_expect("can only fail if maybe is some")
255 )
256 }
257
258 Ok(Self {
259 len: length,
260 dtype,
261 fields,
262 validity,
263 stats_set: Default::default(),
264 })
265 }
266
267 pub fn try_new_with_dtype(
268 fields: Vec<ArrayRef>,
269 dtype: StructFields,
270 length: usize,
271 validity: Validity,
272 ) -> VortexResult<Self> {
273 for (field, struct_dt) in fields.iter().zip(dtype.fields()) {
274 if field.len() != length {
275 vortex_bail!(
276 "Expected all struct fields to have length {length}, found {}",
277 field.len()
278 );
279 }
280
281 if &struct_dt != field.dtype() {
282 vortex_bail!(
283 "Expected all struct fields to have dtype {}, found {}",
284 struct_dt,
285 field.dtype()
286 );
287 }
288 }
289
290 Ok(Self {
291 len: length,
292 dtype: DType::Struct(dtype, validity.nullability()),
293 fields,
294 validity,
295 stats_set: Default::default(),
296 })
297 }
298
299 pub fn from_fields<N: AsRef<str>>(items: &[(N, ArrayRef)]) -> VortexResult<Self> {
300 Self::try_from_iter(items.iter().map(|(a, b)| (a, b.to_array())))
301 }
302
303 pub fn try_from_iter_with_validity<
304 N: AsRef<str>,
305 A: IntoArray,
306 T: IntoIterator<Item = (N, A)>,
307 >(
308 iter: T,
309 validity: Validity,
310 ) -> VortexResult<Self> {
311 let (names, fields): (Vec<FieldName>, Vec<ArrayRef>) = iter
312 .into_iter()
313 .map(|(name, fields)| (FieldName::from(name.as_ref()), fields.into_array()))
314 .unzip();
315 let len = fields
316 .first()
317 .map(|f| f.len())
318 .ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;
319
320 Self::try_new(FieldNames::from_iter(names), fields, len, validity)
321 }
322
323 pub fn try_from_iter<N: AsRef<str>, A: IntoArray, T: IntoIterator<Item = (N, A)>>(
324 iter: T,
325 ) -> VortexResult<Self> {
326 Self::try_from_iter_with_validity(iter, Validity::NonNullable)
327 }
328
329 #[allow(clippy::same_name_method)]
337 pub fn project(&self, projection: &[FieldName]) -> VortexResult<Self> {
338 let mut children = Vec::with_capacity(projection.len());
339 let mut names = Vec::with_capacity(projection.len());
340
341 for f_name in projection.iter() {
342 let idx = self
343 .names()
344 .iter()
345 .position(|name| name == f_name)
346 .ok_or_else(|| vortex_err!("Unknown field {f_name}"))?;
347
348 names.push(self.names()[idx].clone());
349 children.push(self.fields()[idx].clone());
350 }
351
352 StructArray::try_new(
353 FieldNames::from(names.as_slice()),
354 children,
355 self.len(),
356 self.validity().clone(),
357 )
358 }
359
360 pub fn remove_column(&mut self, name: impl Into<FieldName>) -> Option<ArrayRef> {
363 let name = name.into();
364
365 let struct_dtype = self.struct_fields().clone();
366
367 let position = struct_dtype
368 .names()
369 .iter()
370 .position(|field_name| field_name.as_ref() == name.as_ref())?;
371
372 let field = self.fields.remove(position);
373
374 let new_dtype = struct_dtype.without_field(position);
375 self.dtype = DType::Struct(new_dtype, self.dtype.nullability());
376
377 Some(field)
378 }
379
380 pub fn with_column(&self, name: impl Into<FieldName>, array: ArrayRef) -> VortexResult<Self> {
382 let name = name.into();
383 let struct_dtype = self.struct_fields().clone();
384
385 let names = struct_dtype.names().iter().cloned().chain(once(name));
386 let types = struct_dtype.fields().chain(once(array.dtype().clone()));
387 let new_fields = StructFields::new(names.collect(), types.collect());
388
389 let mut children = self.fields.clone();
390 children.push(array);
391
392 Self::try_new_with_dtype(children, new_fields, self.len, self.validity.clone())
393 }
394}
395
396impl ValidityHelper for StructArray {
397 fn validity(&self) -> &Validity {
398 &self.validity
399 }
400}
401
402impl ArrayVTable<StructVTable> for StructVTable {
403 fn len(array: &StructArray) -> usize {
404 array.len
405 }
406
407 fn dtype(array: &StructArray) -> &DType {
408 &array.dtype
409 }
410
411 fn stats(array: &StructArray) -> StatsSetRef<'_> {
412 array.stats_set.to_ref(array.as_ref())
413 }
414}
415
416impl CanonicalVTable<StructVTable> for StructVTable {
417 fn canonicalize(array: &StructArray) -> VortexResult<Canonical> {
418 Ok(Canonical::Struct(array.clone()))
419 }
420}
421
422impl OperationsVTable<StructVTable> for StructVTable {
423 fn slice(array: &StructArray, start: usize, stop: usize) -> VortexResult<ArrayRef> {
424 let fields = array
425 .fields()
426 .iter()
427 .map(|field| field.slice(start, stop))
428 .try_collect()?;
429 StructArray::try_new_with_dtype(
430 fields,
431 array.struct_fields().clone(),
432 stop - start,
433 array.validity().slice(start, stop)?,
434 )
435 .map(|a| a.into_array())
436 }
437
438 fn scalar_at(array: &StructArray, index: usize) -> VortexResult<Scalar> {
439 if array.is_valid(index)? {
440 Ok(Scalar::struct_(
441 array.dtype().clone(),
442 array
443 .fields()
444 .iter()
445 .map(|field| field.scalar_at(index))
446 .try_collect()?,
447 ))
448 } else {
449 Ok(Scalar::null(array.dtype().clone()))
450 }
451 }
452}
453
454#[cfg(test)]
455mod test {
456 use vortex_buffer::buffer;
457 use vortex_dtype::{DType, FieldName, FieldNames, Nullability, PType};
458
459 use crate::IntoArray;
460 use crate::arrays::primitive::PrimitiveArray;
461 use crate::arrays::struct_::StructArray;
462 use crate::arrays::varbin::VarBinArray;
463 use crate::arrays::{BoolArray, BoolVTable, PrimitiveVTable};
464 use crate::validity::Validity;
465
466 #[test]
467 fn test_project() {
468 let xs = PrimitiveArray::new(buffer![0i64, 1, 2, 3, 4], Validity::NonNullable);
469 let ys = VarBinArray::from_vec(
470 vec!["a", "b", "c", "d", "e"],
471 DType::Utf8(Nullability::NonNullable),
472 );
473 let zs = BoolArray::from_iter([true, true, true, false, false]);
474
475 let struct_a = StructArray::try_new(
476 FieldNames::from(["xs", "ys", "zs"]),
477 vec![xs.into_array(), ys.into_array(), zs.into_array()],
478 5,
479 Validity::NonNullable,
480 )
481 .unwrap();
482
483 let struct_b = struct_a
484 .project(&[FieldName::from("zs"), FieldName::from("xs")])
485 .unwrap();
486 assert_eq!(
487 struct_b.names().as_ref(),
488 [FieldName::from("zs"), FieldName::from("xs")],
489 );
490
491 assert_eq!(struct_b.len(), 5);
492
493 let bools = &struct_b.fields[0];
494 assert_eq!(
495 bools
496 .as_::<BoolVTable>()
497 .boolean_buffer()
498 .iter()
499 .collect::<Vec<_>>(),
500 vec![true, true, true, false, false]
501 );
502
503 let prims = &struct_b.fields[1];
504 assert_eq!(
505 prims.as_::<PrimitiveVTable>().as_slice::<i64>(),
506 [0i64, 1, 2, 3, 4]
507 );
508 }
509
510 #[test]
511 fn test_remove_column() {
512 let xs = PrimitiveArray::new(buffer![0i64, 1, 2, 3, 4], Validity::NonNullable);
513 let ys = PrimitiveArray::new(buffer![4u64, 5, 6, 7, 8], Validity::NonNullable);
514
515 let mut struct_a = StructArray::try_new(
516 FieldNames::from(["xs", "ys"]),
517 vec![xs.into_array(), ys.into_array()],
518 5,
519 Validity::NonNullable,
520 )
521 .unwrap();
522
523 let removed = struct_a.remove_column("xs").unwrap();
524 assert_eq!(
525 removed.dtype(),
526 &DType::Primitive(PType::I64, Nullability::NonNullable)
527 );
528 assert_eq!(
529 removed.as_::<PrimitiveVTable>().as_slice::<i64>(),
530 [0i64, 1, 2, 3, 4]
531 );
532
533 assert_eq!(struct_a.names(), &[FieldName::from("ys")].into());
534 assert_eq!(struct_a.fields.len(), 1);
535 assert_eq!(struct_a.len(), 5);
536 assert_eq!(
537 struct_a.fields[0].dtype(),
538 &DType::Primitive(PType::U64, Nullability::NonNullable)
539 );
540 assert_eq!(
541 struct_a.fields[0]
542 .as_::<PrimitiveVTable>()
543 .as_slice::<u64>(),
544 [4u64, 5, 6, 7, 8]
545 );
546
547 let empty = struct_a.remove_column("non_existent");
548 assert!(
549 empty.is_none(),
550 "Expected None when removing non-existent column"
551 );
552 assert_eq!(struct_a.names(), &[FieldName::from("ys")].into());
553 }
554
555 #[test]
556 fn test_duplicate_field_names() {
557 let field1 = buffer![1i32, 2, 3].into_array();
559 let field2 = buffer![10i32, 20, 30].into_array();
560 let field3 = buffer![100i32, 200, 300].into_array();
561
562 let struct_array = StructArray::try_new(
564 FieldNames::from(["value", "other", "value"]),
565 vec![field1, field2, field3],
566 3,
567 Validity::NonNullable,
568 )
569 .unwrap();
570
571 let first_value_field = struct_array.field_by_name("value").unwrap();
573 assert_eq!(
574 first_value_field.as_::<PrimitiveVTable>().as_slice::<i32>(),
575 [1i32, 2, 3] );
577
578 let opt_field = struct_array.field_by_name_opt("value").unwrap();
580 assert_eq!(
581 opt_field.as_::<PrimitiveVTable>().as_slice::<i32>(),
582 [1i32, 2, 3] );
584
585 let third_field = &struct_array.fields()[2];
587 assert_eq!(
588 third_field.as_::<PrimitiveVTable>().as_slice::<i32>(),
589 [100i32, 200, 300]
590 );
591 }
592}