1use std::sync::Arc;
7
8use vortex_buffer::BitBuffer;
9use vortex_buffer::Buffer;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_panic;
14
15use crate::ArrayRef;
16use crate::DynArray;
17use crate::Executable;
18use crate::ExecutionCtx;
19use crate::IntoArray;
20use crate::arrays::Bool;
21use crate::arrays::BoolArray;
22use crate::arrays::Decimal;
23use crate::arrays::DecimalArray;
24use crate::arrays::Extension;
25use crate::arrays::ExtensionArray;
26use crate::arrays::FixedSizeList;
27use crate::arrays::FixedSizeListArray;
28use crate::arrays::ListView;
29use crate::arrays::ListViewArray;
30use crate::arrays::Null;
31use crate::arrays::NullArray;
32use crate::arrays::Primitive;
33use crate::arrays::PrimitiveArray;
34use crate::arrays::Struct;
35use crate::arrays::StructArray;
36use crate::arrays::VarBinView;
37use crate::arrays::VarBinViewArray;
38use crate::arrays::bool::BoolArrayParts;
39use crate::arrays::decimal::DecimalArrayParts;
40use crate::arrays::listview::ListViewArrayParts;
41use crate::arrays::listview::ListViewRebuildMode;
42use crate::arrays::primitive::PrimitiveArrayParts;
43use crate::arrays::struct_::StructArrayParts;
44use crate::arrays::varbinview::VarBinViewArrayParts;
45use crate::dtype::DType;
46use crate::dtype::NativePType;
47use crate::dtype::Nullability;
48use crate::dtype::PType;
49use crate::match_each_decimal_value_type;
50use crate::match_each_native_ptype;
51use crate::matcher::Matcher;
52use crate::validity::Validity;
53
54#[derive(Debug, Clone)]
115pub enum Canonical {
116 Null(NullArray),
117 Bool(BoolArray),
118 Primitive(PrimitiveArray),
119 Decimal(DecimalArray),
120 VarBinView(VarBinViewArray),
121 List(ListViewArray),
122 FixedSizeList(FixedSizeListArray),
123 Struct(StructArray),
124 Extension(ExtensionArray),
125}
126
127macro_rules! match_each_canonical {
129 ($self:expr, | $ident:ident | $eval:expr) => {{
130 match $self {
131 Canonical::Null($ident) => $eval,
132 Canonical::Bool($ident) => $eval,
133 Canonical::Primitive($ident) => $eval,
134 Canonical::Decimal($ident) => $eval,
135 Canonical::VarBinView($ident) => $eval,
136 Canonical::List($ident) => $eval,
137 Canonical::FixedSizeList($ident) => $eval,
138 Canonical::Struct($ident) => $eval,
139 Canonical::Extension($ident) => $eval,
140 }
141 }};
142}
143
144impl Canonical {
145 pub fn empty(dtype: &DType) -> Canonical {
147 match dtype {
148 DType::Null => Canonical::Null(NullArray::new(0)),
149 DType::Bool(n) => Canonical::Bool(unsafe {
150 BoolArray::new_unchecked(BitBuffer::empty(), Validity::from(n))
151 }),
152 DType::Primitive(ptype, n) => {
153 match_each_native_ptype!(ptype, |P| {
154 Canonical::Primitive(unsafe {
155 PrimitiveArray::new_unchecked(Buffer::<P>::empty(), Validity::from(n))
156 })
157 })
158 }
159 DType::Decimal(decimal_type, n) => {
160 match_each_decimal_value_type!(
161 DecimalType::smallest_decimal_value_type(decimal_type),
162 |D| {
163 Canonical::Decimal(unsafe {
164 DecimalArray::new_unchecked::<D>(
165 Buffer::empty(),
166 *decimal_type,
167 Validity::from(n),
168 )
169 })
170 }
171 )
172 }
173 DType::Utf8(n) => Canonical::VarBinView(unsafe {
174 VarBinViewArray::new_unchecked(
175 Buffer::empty(),
176 Arc::new([]),
177 dtype.clone(),
178 Validity::from(n),
179 )
180 }),
181 DType::Binary(n) => Canonical::VarBinView(unsafe {
182 VarBinViewArray::new_unchecked(
183 Buffer::empty(),
184 Arc::new([]),
185 dtype.clone(),
186 Validity::from(n),
187 )
188 }),
189 DType::Struct(struct_dtype, n) => Canonical::Struct(unsafe {
190 StructArray::new_unchecked(
191 struct_dtype
192 .fields()
193 .map(|f| Canonical::empty(&f).into_array())
194 .collect::<Arc<[_]>>(),
195 struct_dtype.clone(),
196 0,
197 Validity::from(n),
198 )
199 }),
200 DType::List(dtype, n) => Canonical::List(unsafe {
201 ListViewArray::new_unchecked(
202 Canonical::empty(dtype).into_array(),
203 Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
204 .into_array(),
205 Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
206 .into_array(),
207 Validity::from(n),
208 )
209 .with_zero_copy_to_list(true)
211 }),
212 DType::FixedSizeList(elem_dtype, list_size, null) => Canonical::FixedSizeList(unsafe {
213 FixedSizeListArray::new_unchecked(
214 Canonical::empty(elem_dtype).into_array(),
215 *list_size,
216 Validity::from(null),
217 0,
218 )
219 }),
220 DType::Extension(ext_dtype) => Canonical::Extension(ExtensionArray::new(
221 ext_dtype.clone(),
222 Canonical::empty(ext_dtype.storage_dtype()).into_array(),
223 )),
224 }
225 }
226
227 pub fn len(&self) -> usize {
228 match_each_canonical!(self, |arr| arr.len())
229 }
230
231 pub fn dtype(&self) -> &DType {
232 match_each_canonical!(self, |arr| arr.dtype())
233 }
234
235 pub fn is_empty(&self) -> bool {
236 match_each_canonical!(self, |arr| arr.is_empty())
237 }
238}
239
240impl Canonical {
241 pub fn compact(&self) -> VortexResult<Canonical> {
249 match self {
250 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
251 Canonical::List(array) => Ok(Canonical::List(
252 array.rebuild(ListViewRebuildMode::TrimElements)?,
253 )),
254 _ => Ok(self.clone()),
255 }
256 }
257}
258
259impl Canonical {
261 pub fn as_null(&self) -> &NullArray {
262 if let Canonical::Null(a) = self {
263 a
264 } else {
265 vortex_panic!("Cannot get NullArray from {:?}", &self)
266 }
267 }
268
269 pub fn into_null(self) -> NullArray {
270 if let Canonical::Null(a) = self {
271 a
272 } else {
273 vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
274 }
275 }
276
277 pub fn as_bool(&self) -> &BoolArray {
278 if let Canonical::Bool(a) = self {
279 a
280 } else {
281 vortex_panic!("Cannot get BoolArray from {:?}", &self)
282 }
283 }
284
285 pub fn into_bool(self) -> BoolArray {
286 if let Canonical::Bool(a) = self {
287 a
288 } else {
289 vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
290 }
291 }
292
293 pub fn as_primitive(&self) -> &PrimitiveArray {
294 if let Canonical::Primitive(a) = self {
295 a
296 } else {
297 vortex_panic!("Cannot get PrimitiveArray from {:?}", &self)
298 }
299 }
300
301 pub fn into_primitive(self) -> PrimitiveArray {
302 if let Canonical::Primitive(a) = self {
303 a
304 } else {
305 vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
306 }
307 }
308
309 pub fn as_decimal(&self) -> &DecimalArray {
310 if let Canonical::Decimal(a) = self {
311 a
312 } else {
313 vortex_panic!("Cannot get DecimalArray from {:?}", &self)
314 }
315 }
316
317 pub fn into_decimal(self) -> DecimalArray {
318 if let Canonical::Decimal(a) = self {
319 a
320 } else {
321 vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
322 }
323 }
324
325 pub fn as_varbinview(&self) -> &VarBinViewArray {
326 if let Canonical::VarBinView(a) = self {
327 a
328 } else {
329 vortex_panic!("Cannot get VarBinViewArray from {:?}", &self)
330 }
331 }
332
333 pub fn into_varbinview(self) -> VarBinViewArray {
334 if let Canonical::VarBinView(a) = self {
335 a
336 } else {
337 vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
338 }
339 }
340
341 pub fn as_listview(&self) -> &ListViewArray {
342 if let Canonical::List(a) = self {
343 a
344 } else {
345 vortex_panic!("Cannot get ListArray from {:?}", &self)
346 }
347 }
348
349 pub fn into_listview(self) -> ListViewArray {
350 if let Canonical::List(a) = self {
351 a
352 } else {
353 vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
354 }
355 }
356
357 pub fn as_fixed_size_list(&self) -> &FixedSizeListArray {
358 if let Canonical::FixedSizeList(a) = self {
359 a
360 } else {
361 vortex_panic!("Cannot get FixedSizeListArray from {:?}", &self)
362 }
363 }
364
365 pub fn into_fixed_size_list(self) -> FixedSizeListArray {
366 if let Canonical::FixedSizeList(a) = self {
367 a
368 } else {
369 vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
370 }
371 }
372
373 pub fn as_struct(&self) -> &StructArray {
374 if let Canonical::Struct(a) = self {
375 a
376 } else {
377 vortex_panic!("Cannot get StructArray from {:?}", &self)
378 }
379 }
380
381 pub fn into_struct(self) -> StructArray {
382 if let Canonical::Struct(a) = self {
383 a
384 } else {
385 vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
386 }
387 }
388
389 pub fn as_extension(&self) -> &ExtensionArray {
390 if let Canonical::Extension(a) = self {
391 a
392 } else {
393 vortex_panic!("Cannot get ExtensionArray from {:?}", &self)
394 }
395 }
396
397 pub fn into_extension(self) -> ExtensionArray {
398 if let Canonical::Extension(a) = self {
399 a
400 } else {
401 vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
402 }
403 }
404}
405
406impl AsRef<dyn DynArray> for Canonical {
407 fn as_ref(&self) -> &(dyn DynArray + 'static) {
408 match_each_canonical!(self, |arr| arr.as_ref())
409 }
410}
411
412impl IntoArray for Canonical {
413 fn into_array(self) -> ArrayRef {
414 match_each_canonical!(self, |arr| arr.into_array())
415 }
416}
417
418pub trait ToCanonical {
424 fn to_null(&self) -> NullArray;
426
427 fn to_bool(&self) -> BoolArray;
429
430 fn to_primitive(&self) -> PrimitiveArray;
433
434 fn to_decimal(&self) -> DecimalArray;
437
438 fn to_struct(&self) -> StructArray;
440
441 fn to_listview(&self) -> ListViewArray;
443
444 fn to_fixed_size_list(&self) -> FixedSizeListArray;
447
448 fn to_varbinview(&self) -> VarBinViewArray;
451
452 fn to_extension(&self) -> ExtensionArray;
455}
456
457impl<A: DynArray + ?Sized> ToCanonical for A {
459 fn to_null(&self) -> NullArray {
460 self.to_canonical()
461 .vortex_expect("to_canonical failed")
462 .into_null()
463 }
464
465 fn to_bool(&self) -> BoolArray {
466 self.to_canonical()
467 .vortex_expect("to_canonical failed")
468 .into_bool()
469 }
470
471 fn to_primitive(&self) -> PrimitiveArray {
472 self.to_canonical()
473 .vortex_expect("to_canonical failed")
474 .into_primitive()
475 }
476
477 fn to_decimal(&self) -> DecimalArray {
478 self.to_canonical()
479 .vortex_expect("to_canonical failed")
480 .into_decimal()
481 }
482
483 fn to_struct(&self) -> StructArray {
484 self.to_canonical()
485 .vortex_expect("to_canonical failed")
486 .into_struct()
487 }
488
489 fn to_listview(&self) -> ListViewArray {
490 self.to_canonical()
491 .vortex_expect("to_canonical failed")
492 .into_listview()
493 }
494
495 fn to_fixed_size_list(&self) -> FixedSizeListArray {
496 self.to_canonical()
497 .vortex_expect("to_canonical failed")
498 .into_fixed_size_list()
499 }
500
501 fn to_varbinview(&self) -> VarBinViewArray {
502 self.to_canonical()
503 .vortex_expect("to_canonical failed")
504 .into_varbinview()
505 }
506
507 fn to_extension(&self) -> ExtensionArray {
508 self.to_canonical()
509 .vortex_expect("to_canonical failed")
510 .into_extension()
511 }
512}
513
514impl From<Canonical> for ArrayRef {
515 fn from(value: Canonical) -> Self {
516 match_each_canonical!(value, |arr| arr.into_array())
517 }
518}
519
520impl Executable for Canonical {
526 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
527 let result = array.execute_until::<AnyCanonical>(ctx)?;
528 Ok(result
529 .as_opt::<AnyCanonical>()
530 .map(Canonical::from)
531 .vortex_expect("execute_until::<AnyCanonical> must return a canonical array"))
532 }
533}
534
535pub struct CanonicalValidity(pub Canonical);
540
541impl Executable for CanonicalValidity {
542 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
543 match array.execute::<Canonical>(ctx)? {
544 n @ Canonical::Null(_) => Ok(CanonicalValidity(n)),
545 Canonical::Bool(b) => {
546 let BoolArrayParts {
547 bits,
548 offset,
549 len,
550 validity,
551 } = b.into_parts();
552 Ok(CanonicalValidity(Canonical::Bool(
553 BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?,
554 )))
555 }
556 Canonical::Primitive(p) => {
557 let PrimitiveArrayParts {
558 ptype,
559 buffer,
560 validity,
561 } = p.into_parts();
562 Ok(CanonicalValidity(Canonical::Primitive(unsafe {
563 PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
564 })))
565 }
566 Canonical::Decimal(d) => {
567 let DecimalArrayParts {
568 decimal_dtype,
569 values,
570 values_type,
571 validity,
572 } = d.into_parts();
573 Ok(CanonicalValidity(Canonical::Decimal(unsafe {
574 DecimalArray::new_unchecked_handle(
575 values,
576 values_type,
577 decimal_dtype,
578 validity.execute(ctx)?,
579 )
580 })))
581 }
582 Canonical::VarBinView(vbv) => {
583 let VarBinViewArrayParts {
584 dtype,
585 buffers,
586 views,
587 validity,
588 } = vbv.into_parts();
589 Ok(CanonicalValidity(Canonical::VarBinView(unsafe {
590 VarBinViewArray::new_handle_unchecked(
591 views,
592 buffers,
593 dtype,
594 validity.execute(ctx)?,
595 )
596 })))
597 }
598 Canonical::List(l) => {
599 let zctl = l.is_zero_copy_to_list();
600 let ListViewArrayParts {
601 elements,
602 offsets,
603 sizes,
604 validity,
605 ..
606 } = l.into_parts();
607 Ok(CanonicalValidity(Canonical::List(unsafe {
608 ListViewArray::new_unchecked(elements, offsets, sizes, validity.execute(ctx)?)
609 .with_zero_copy_to_list(zctl)
610 })))
611 }
612 Canonical::FixedSizeList(fsl) => {
613 let list_size = fsl.list_size();
614 let len = fsl.len();
615 let (elements, validity, _) = fsl.into_parts();
616 Ok(CanonicalValidity(Canonical::FixedSizeList(
617 FixedSizeListArray::new(elements, list_size, validity.execute(ctx)?, len),
618 )))
619 }
620 Canonical::Struct(st) => {
621 let len = st.len();
622 let StructArrayParts {
623 struct_fields,
624 fields,
625 validity,
626 } = st.into_parts();
627 Ok(CanonicalValidity(Canonical::Struct(unsafe {
628 StructArray::new_unchecked(fields, struct_fields, len, validity.execute(ctx)?)
629 })))
630 }
631 Canonical::Extension(ext) => Ok(CanonicalValidity(Canonical::Extension(
632 ExtensionArray::new(
633 ext.ext_dtype().clone(),
634 ext.storage_array()
635 .clone()
636 .execute::<CanonicalValidity>(ctx)?
637 .0
638 .into_array(),
639 ),
640 ))),
641 }
642 }
643}
644
645pub struct RecursiveCanonical(pub Canonical);
650
651impl Executable for RecursiveCanonical {
652 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
653 match array.execute::<Canonical>(ctx)? {
654 n @ Canonical::Null(_) => Ok(RecursiveCanonical(n)),
655 Canonical::Bool(b) => {
656 let BoolArrayParts {
657 bits,
658 offset,
659 len,
660 validity,
661 } = b.into_parts();
662 Ok(RecursiveCanonical(Canonical::Bool(
663 BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?,
664 )))
665 }
666 Canonical::Primitive(p) => {
667 let PrimitiveArrayParts {
668 ptype,
669 buffer,
670 validity,
671 } = p.into_parts();
672 Ok(RecursiveCanonical(Canonical::Primitive(unsafe {
673 PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
674 })))
675 }
676 Canonical::Decimal(d) => {
677 let DecimalArrayParts {
678 decimal_dtype,
679 values,
680 values_type,
681 validity,
682 } = d.into_parts();
683 Ok(RecursiveCanonical(Canonical::Decimal(unsafe {
684 DecimalArray::new_unchecked_handle(
685 values,
686 values_type,
687 decimal_dtype,
688 validity.execute(ctx)?,
689 )
690 })))
691 }
692 Canonical::VarBinView(vbv) => {
693 let VarBinViewArrayParts {
694 dtype,
695 buffers,
696 views,
697 validity,
698 } = vbv.into_parts();
699 Ok(RecursiveCanonical(Canonical::VarBinView(unsafe {
700 VarBinViewArray::new_handle_unchecked(
701 views,
702 buffers,
703 dtype,
704 validity.execute(ctx)?,
705 )
706 })))
707 }
708 Canonical::List(l) => {
709 let zctl = l.is_zero_copy_to_list();
710 let ListViewArrayParts {
711 elements,
712 offsets,
713 sizes,
714 validity,
715 ..
716 } = l.into_parts();
717 Ok(RecursiveCanonical(Canonical::List(unsafe {
718 ListViewArray::new_unchecked(
719 elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
720 offsets.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
721 sizes.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
722 validity.execute(ctx)?,
723 )
724 .with_zero_copy_to_list(zctl)
725 })))
726 }
727 Canonical::FixedSizeList(fsl) => {
728 let list_size = fsl.list_size();
729 let len = fsl.len();
730 let (elements, validity, _) = fsl.into_parts();
731 Ok(RecursiveCanonical(Canonical::FixedSizeList(
732 FixedSizeListArray::new(
733 elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
734 list_size,
735 validity.execute(ctx)?,
736 len,
737 ),
738 )))
739 }
740 Canonical::Struct(st) => {
741 let len = st.len();
742 let StructArrayParts {
743 struct_fields,
744 fields,
745 validity,
746 } = st.into_parts();
747 let executed_fields = fields
748 .iter()
749 .map(|f| Ok(f.clone().execute::<RecursiveCanonical>(ctx)?.0.into_array()))
750 .collect::<VortexResult<Arc<[_]>>>()?;
751
752 Ok(RecursiveCanonical(Canonical::Struct(unsafe {
753 StructArray::new_unchecked(
754 executed_fields,
755 struct_fields,
756 len,
757 validity.execute(ctx)?,
758 )
759 })))
760 }
761 Canonical::Extension(ext) => Ok(RecursiveCanonical(Canonical::Extension(
762 ExtensionArray::new(
763 ext.ext_dtype().clone(),
764 ext.storage_array()
765 .clone()
766 .execute::<RecursiveCanonical>(ctx)?
767 .0
768 .into_array(),
769 ),
770 ))),
771 }
772 }
773}
774
775impl<T: NativePType> Executable for Buffer<T> {
781 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
782 let array = PrimitiveArray::execute(array, ctx)?;
783 vortex_ensure!(
784 array.all_valid()?,
785 "Cannot execute to native buffer: array is not all-valid."
786 );
787 Ok(array.into_buffer())
788 }
789}
790
791impl Executable for PrimitiveArray {
795 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
796 match array.try_into::<Primitive>() {
797 Ok(primitive) => Ok(primitive),
798 Err(array) => Ok(Canonical::execute(array, ctx)?.into_primitive()),
799 }
800 }
801}
802
803impl Executable for BoolArray {
807 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
808 match array.try_into::<Bool>() {
809 Ok(bool_array) => Ok(bool_array),
810 Err(array) => Ok(Canonical::execute(array, ctx)?.into_bool()),
811 }
812 }
813}
814
815impl Executable for BitBuffer {
819 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
820 let bool = BoolArray::execute(array, ctx)?;
821 assert!(
822 !bool.dtype().is_nullable(),
823 "bit buffer execute only works with non-nullable bool arrays"
824 );
825 Ok(bool.into_bit_buffer())
826 }
827}
828
829impl Executable for NullArray {
833 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
834 match array.try_into::<Null>() {
835 Ok(null_array) => Ok(null_array),
836 Err(array) => Ok(Canonical::execute(array, ctx)?.into_null()),
837 }
838 }
839}
840
841impl Executable for VarBinViewArray {
845 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
846 match array.try_into::<VarBinView>() {
847 Ok(varbinview) => Ok(varbinview),
848 Err(array) => Ok(Canonical::execute(array, ctx)?.into_varbinview()),
849 }
850 }
851}
852
853impl Executable for ExtensionArray {
857 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
858 match array.try_into::<Extension>() {
859 Ok(ext_array) => Ok(ext_array),
860 Err(array) => Ok(Canonical::execute(array, ctx)?.into_extension()),
861 }
862 }
863}
864
865impl Executable for DecimalArray {
869 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
870 match array.try_into::<Decimal>() {
871 Ok(decimal) => Ok(decimal),
872 Err(array) => Ok(Canonical::execute(array, ctx)?.into_decimal()),
873 }
874 }
875}
876
877impl Executable for ListViewArray {
881 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
882 match array.try_into::<ListView>() {
883 Ok(list) => Ok(list),
884 Err(array) => Ok(Canonical::execute(array, ctx)?.into_listview()),
885 }
886 }
887}
888
889impl Executable for FixedSizeListArray {
893 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
894 match array.try_into::<FixedSizeList>() {
895 Ok(fsl) => Ok(fsl),
896 Err(array) => Ok(Canonical::execute(array, ctx)?.into_fixed_size_list()),
897 }
898 }
899}
900
901impl Executable for StructArray {
905 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
906 match array.try_into::<Struct>() {
907 Ok(struct_array) => Ok(struct_array),
908 Err(array) => Ok(Canonical::execute(array, ctx)?.into_struct()),
909 }
910 }
911}
912
913#[derive(Debug, Clone)]
915pub enum CanonicalView<'a> {
916 Null(&'a NullArray),
917 Bool(&'a BoolArray),
918 Primitive(&'a PrimitiveArray),
919 Decimal(&'a DecimalArray),
920 VarBinView(&'a VarBinViewArray),
921 List(&'a ListViewArray),
922 FixedSizeList(&'a FixedSizeListArray),
923 Struct(&'a StructArray),
924 Extension(&'a ExtensionArray),
925}
926
927impl From<CanonicalView<'_>> for Canonical {
928 fn from(value: CanonicalView<'_>) -> Self {
929 match value {
930 CanonicalView::Null(a) => Canonical::Null(a.clone()),
931 CanonicalView::Bool(a) => Canonical::Bool(a.clone()),
932 CanonicalView::Primitive(a) => Canonical::Primitive(a.clone()),
933 CanonicalView::Decimal(a) => Canonical::Decimal(a.clone()),
934 CanonicalView::VarBinView(a) => Canonical::VarBinView(a.clone()),
935 CanonicalView::List(a) => Canonical::List(a.clone()),
936 CanonicalView::FixedSizeList(a) => Canonical::FixedSizeList(a.clone()),
937 CanonicalView::Struct(a) => Canonical::Struct(a.clone()),
938 CanonicalView::Extension(a) => Canonical::Extension(a.clone()),
939 }
940 }
941}
942
943impl AsRef<dyn DynArray> for CanonicalView<'_> {
944 fn as_ref(&self) -> &dyn DynArray {
945 match self {
946 CanonicalView::Null(a) => a.as_ref(),
947 CanonicalView::Bool(a) => a.as_ref(),
948 CanonicalView::Primitive(a) => a.as_ref(),
949 CanonicalView::Decimal(a) => a.as_ref(),
950 CanonicalView::VarBinView(a) => a.as_ref(),
951 CanonicalView::List(a) => a.as_ref(),
952 CanonicalView::FixedSizeList(a) => a.as_ref(),
953 CanonicalView::Struct(a) => a.as_ref(),
954 CanonicalView::Extension(a) => a.as_ref(),
955 }
956 }
957}
958
959pub struct AnyCanonical;
961impl Matcher for AnyCanonical {
962 type Match<'a> = CanonicalView<'a>;
963
964 fn matches(array: &dyn DynArray) -> bool {
965 array.is::<Null>()
966 || array.is::<Bool>()
967 || array.is::<Primitive>()
968 || array.is::<Decimal>()
969 || array.is::<Struct>()
970 || array.is::<ListView>()
971 || array.is::<FixedSizeList>()
972 || array.is::<VarBinView>()
973 || array.is::<Extension>()
974 }
975
976 fn try_match<'a>(array: &'a dyn DynArray) -> Option<Self::Match<'a>> {
977 if let Some(a) = array.as_opt::<Null>() {
978 Some(CanonicalView::Null(a))
979 } else if let Some(a) = array.as_opt::<Bool>() {
980 Some(CanonicalView::Bool(a))
981 } else if let Some(a) = array.as_opt::<Primitive>() {
982 Some(CanonicalView::Primitive(a))
983 } else if let Some(a) = array.as_opt::<Decimal>() {
984 Some(CanonicalView::Decimal(a))
985 } else if let Some(a) = array.as_opt::<Struct>() {
986 Some(CanonicalView::Struct(a))
987 } else if let Some(a) = array.as_opt::<ListView>() {
988 Some(CanonicalView::List(a))
989 } else if let Some(a) = array.as_opt::<FixedSizeList>() {
990 Some(CanonicalView::FixedSizeList(a))
991 } else if let Some(a) = array.as_opt::<VarBinView>() {
992 Some(CanonicalView::VarBinView(a))
993 } else {
994 array.as_opt::<Extension>().map(CanonicalView::Extension)
995 }
996 }
997}
998
999#[cfg(test)]
1000mod test {
1001 use std::sync::Arc;
1002
1003 use arrow_array::Array as ArrowArray;
1004 use arrow_array::ArrayRef as ArrowArrayRef;
1005 use arrow_array::ListArray as ArrowListArray;
1006 use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
1007 use arrow_array::StringArray;
1008 use arrow_array::StringViewArray;
1009 use arrow_array::StructArray as ArrowStructArray;
1010 use arrow_array::cast::AsArray;
1011 use arrow_array::types::Int32Type;
1012 use arrow_array::types::Int64Type;
1013 use arrow_array::types::UInt64Type;
1014 use arrow_buffer::NullBufferBuilder;
1015 use arrow_buffer::OffsetBuffer;
1016 use arrow_schema::DataType;
1017 use arrow_schema::Field;
1018 use vortex_buffer::buffer;
1019
1020 use crate::ArrayRef;
1021 use crate::IntoArray;
1022 use crate::arrays::ConstantArray;
1023 use crate::arrow::FromArrowArray;
1024 use crate::arrow::IntoArrowArray;
1025 use crate::canonical::StructArray;
1026
1027 #[test]
1028 fn test_canonicalize_nested_struct() {
1029 let nested_struct_array = StructArray::from_fields(&[
1031 ("a", buffer![1u64].into_array()),
1032 (
1033 "b",
1034 StructArray::from_fields(&[(
1035 "inner_a",
1036 ConstantArray::new(100i64, 1).into_array(),
1041 )])
1042 .unwrap()
1043 .into_array(),
1044 ),
1045 ])
1046 .unwrap();
1047
1048 let arrow_struct = nested_struct_array
1049 .into_array()
1050 .into_arrow_preferred()
1051 .unwrap()
1052 .as_any()
1053 .downcast_ref::<ArrowStructArray>()
1054 .cloned()
1055 .unwrap();
1056
1057 assert!(
1058 arrow_struct
1059 .column(0)
1060 .as_any()
1061 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
1062 .is_some()
1063 );
1064
1065 let inner_struct = arrow_struct
1066 .column(1)
1067 .clone()
1068 .as_any()
1069 .downcast_ref::<ArrowStructArray>()
1070 .cloned()
1071 .unwrap();
1072
1073 let inner_a = inner_struct
1074 .column(0)
1075 .as_any()
1076 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
1077 assert!(inner_a.is_some());
1078
1079 assert_eq!(
1080 inner_a.cloned().unwrap(),
1081 ArrowPrimitiveArray::from_iter([100i64])
1082 );
1083 }
1084
1085 #[test]
1086 fn roundtrip_struct() {
1087 let mut nulls = NullBufferBuilder::new(6);
1088 nulls.append_n_non_nulls(4);
1089 nulls.append_null();
1090 nulls.append_non_null();
1091 let names = Arc::new(StringViewArray::from_iter(vec![
1092 Some("Joseph"),
1093 None,
1094 Some("Angela"),
1095 Some("Mikhail"),
1096 None,
1097 None,
1098 ]));
1099 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
1100 Some(25),
1101 Some(31),
1102 None,
1103 Some(57),
1104 None,
1105 None,
1106 ]));
1107
1108 let arrow_struct = ArrowStructArray::new(
1109 vec![
1110 Arc::new(Field::new("name", DataType::Utf8View, true)),
1111 Arc::new(Field::new("age", DataType::Int32, true)),
1112 ]
1113 .into(),
1114 vec![names, ages],
1115 nulls.finish(),
1116 );
1117
1118 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true).unwrap();
1119
1120 assert_eq!(
1121 &arrow_struct,
1122 vortex_struct.into_arrow_preferred().unwrap().as_struct()
1123 );
1124 }
1125
1126 #[test]
1127 fn roundtrip_list() {
1128 let names = Arc::new(StringArray::from_iter(vec![
1129 Some("Joseph"),
1130 Some("Angela"),
1131 Some("Mikhail"),
1132 ]));
1133
1134 let arrow_list = ArrowListArray::new(
1135 Arc::new(Field::new_list_field(DataType::Utf8, true)),
1136 OffsetBuffer::from_lengths(vec![0, 2, 1]),
1137 names,
1138 None,
1139 );
1140 let list_data_type = arrow_list.data_type();
1141
1142 let vortex_list = ArrayRef::from_arrow(&arrow_list, true).unwrap();
1143
1144 let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
1145
1146 assert_eq!(
1147 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
1148 rt_arrow_list.as_ref()
1149 );
1150 }
1151}