1use std::sync::Arc;
7
8use vortex_buffer::BitBuffer;
9use vortex_buffer::Buffer;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_panic;
14
15use crate::ArrayRef;
16use crate::Executable;
17use crate::ExecutionCtx;
18use crate::IntoArray;
19use crate::array::ArrayView;
20use crate::array::child_to_validity;
21use crate::arrays::Bool;
22use crate::arrays::BoolArray;
23use crate::arrays::Decimal;
24use crate::arrays::DecimalArray;
25use crate::arrays::Extension;
26use crate::arrays::ExtensionArray;
27use crate::arrays::FixedSizeList;
28use crate::arrays::FixedSizeListArray;
29use crate::arrays::ListView;
30use crate::arrays::ListViewArray;
31use crate::arrays::Null;
32use crate::arrays::NullArray;
33use crate::arrays::Primitive;
34use crate::arrays::PrimitiveArray;
35use crate::arrays::Struct;
36use crate::arrays::StructArray;
37use crate::arrays::VarBinView;
38use crate::arrays::VarBinViewArray;
39use crate::arrays::Variant;
40use crate::arrays::VariantArray;
41use crate::arrays::bool::BoolDataParts;
42use crate::arrays::decimal::DecimalDataParts;
43use crate::arrays::extension::ExtensionArrayExt;
44use crate::arrays::fixed_size_list::FixedSizeListArrayExt;
45use crate::arrays::listview::ListViewDataParts;
46use crate::arrays::listview::ListViewRebuildMode;
47use crate::arrays::primitive::PrimitiveDataParts;
48use crate::arrays::struct_::StructDataParts;
49use crate::arrays::varbinview::VarBinViewDataParts;
50use crate::arrays::variant::VariantArrayExt;
51use crate::dtype::DType;
52use crate::dtype::NativePType;
53use crate::dtype::Nullability;
54use crate::dtype::PType;
55use crate::match_each_decimal_value_type;
56use crate::match_each_native_ptype;
57use crate::matcher::Matcher;
58use crate::validity::Validity;
59
60#[derive(Debug, Clone)]
121pub enum Canonical {
122 Null(NullArray),
123 Bool(BoolArray),
124 Primitive(PrimitiveArray),
125 Decimal(DecimalArray),
126 VarBinView(VarBinViewArray),
127 List(ListViewArray),
128 FixedSizeList(FixedSizeListArray),
129 Struct(StructArray),
130 Extension(ExtensionArray),
131 Variant(VariantArray),
132}
133
134macro_rules! match_each_canonical {
136 ($self:expr, | $ident:ident | $eval:expr) => {{
137 match $self {
138 Canonical::Null($ident) => $eval,
139 Canonical::Bool($ident) => $eval,
140 Canonical::Primitive($ident) => $eval,
141 Canonical::Decimal($ident) => $eval,
142 Canonical::VarBinView($ident) => $eval,
143 Canonical::List($ident) => $eval,
144 Canonical::FixedSizeList($ident) => $eval,
145 Canonical::Struct($ident) => $eval,
146 Canonical::Variant($ident) => $eval,
147 Canonical::Extension($ident) => $eval,
148 }
149 }};
150}
151
152impl Canonical {
153 pub fn empty(dtype: &DType) -> Canonical {
155 match dtype {
156 DType::Null => Canonical::Null(NullArray::new(0)),
157 DType::Bool(n) => Canonical::Bool(unsafe {
158 BoolArray::new_unchecked(BitBuffer::empty(), Validity::from(n))
159 }),
160 DType::Primitive(ptype, n) => {
161 match_each_native_ptype!(ptype, |P| {
162 Canonical::Primitive(unsafe {
163 PrimitiveArray::new_unchecked(Buffer::<P>::empty(), Validity::from(n))
164 })
165 })
166 }
167 DType::Decimal(decimal_type, n) => {
168 match_each_decimal_value_type!(
169 DecimalType::smallest_decimal_value_type(decimal_type),
170 |D| {
171 Canonical::Decimal(unsafe {
172 DecimalArray::new_unchecked::<D>(
173 Buffer::empty(),
174 *decimal_type,
175 Validity::from(n),
176 )
177 })
178 }
179 )
180 }
181 DType::Utf8(n) => Canonical::VarBinView(unsafe {
182 VarBinViewArray::new_unchecked(
183 Buffer::empty(),
184 Arc::new([]),
185 dtype.clone(),
186 Validity::from(n),
187 )
188 }),
189 DType::Binary(n) => Canonical::VarBinView(unsafe {
190 VarBinViewArray::new_unchecked(
191 Buffer::empty(),
192 Arc::new([]),
193 dtype.clone(),
194 Validity::from(n),
195 )
196 }),
197 DType::Struct(struct_dtype, n) => Canonical::Struct(unsafe {
198 StructArray::new_unchecked(
199 struct_dtype
200 .fields()
201 .map(|f| Canonical::empty(&f).into_array())
202 .collect::<Arc<[_]>>(),
203 struct_dtype.clone(),
204 0,
205 Validity::from(n),
206 )
207 }),
208 DType::List(dtype, n) => Canonical::List(unsafe {
209 ListViewArray::new_unchecked(
210 Canonical::empty(dtype).into_array(),
211 Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
212 .into_array(),
213 Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
214 .into_array(),
215 Validity::from(n),
216 )
217 .with_zero_copy_to_list(true)
219 }),
220 DType::FixedSizeList(elem_dtype, list_size, null) => Canonical::FixedSizeList(unsafe {
221 FixedSizeListArray::new_unchecked(
222 Canonical::empty(elem_dtype).into_array(),
223 *list_size,
224 Validity::from(null),
225 0,
226 )
227 }),
228 DType::Extension(ext_dtype) => Canonical::Extension(ExtensionArray::new(
229 ext_dtype.clone(),
230 Canonical::empty(ext_dtype.storage_dtype()).into_array(),
231 )),
232 DType::Variant(_) => {
233 vortex_panic!(InvalidArgument: "Canonical empty is not supported for Variant")
234 }
235 }
236 }
237
238 pub fn len(&self) -> usize {
239 match_each_canonical!(self, |arr| arr.len())
240 }
241
242 pub fn dtype(&self) -> &DType {
243 match_each_canonical!(self, |arr| arr.dtype())
244 }
245
246 pub fn is_empty(&self) -> bool {
247 match_each_canonical!(self, |arr| arr.is_empty())
248 }
249}
250
251impl Canonical {
252 pub fn compact(&self) -> VortexResult<Canonical> {
260 match self {
261 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
262 Canonical::List(array) => Ok(Canonical::List(
263 array.rebuild(ListViewRebuildMode::TrimElements)?,
264 )),
265 _ => Ok(self.clone()),
266 }
267 }
268}
269
270impl Canonical {
272 pub fn as_null(&self) -> &NullArray {
273 if let Canonical::Null(a) = self {
274 a
275 } else {
276 vortex_panic!("Cannot get NullArray from {:?}", &self)
277 }
278 }
279
280 pub fn into_null(self) -> NullArray {
281 if let Canonical::Null(a) = self {
282 a
283 } else {
284 vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
285 }
286 }
287
288 pub fn as_bool(&self) -> &BoolArray {
289 if let Canonical::Bool(a) = self {
290 a
291 } else {
292 vortex_panic!("Cannot get BoolArray from {:?}", &self)
293 }
294 }
295
296 pub fn into_bool(self) -> BoolArray {
297 if let Canonical::Bool(a) = self {
298 a
299 } else {
300 vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
301 }
302 }
303
304 pub fn as_primitive(&self) -> &PrimitiveArray {
305 if let Canonical::Primitive(a) = self {
306 a
307 } else {
308 vortex_panic!("Cannot get PrimitiveArray from {:?}", &self)
309 }
310 }
311
312 pub fn into_primitive(self) -> PrimitiveArray {
313 if let Canonical::Primitive(a) = self {
314 a
315 } else {
316 vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
317 }
318 }
319
320 pub fn as_decimal(&self) -> &DecimalArray {
321 if let Canonical::Decimal(a) = self {
322 a
323 } else {
324 vortex_panic!("Cannot get DecimalArray from {:?}", &self)
325 }
326 }
327
328 pub fn into_decimal(self) -> DecimalArray {
329 if let Canonical::Decimal(a) = self {
330 a
331 } else {
332 vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
333 }
334 }
335
336 pub fn as_varbinview(&self) -> &VarBinViewArray {
337 if let Canonical::VarBinView(a) = self {
338 a
339 } else {
340 vortex_panic!("Cannot get VarBinViewArray from {:?}", &self)
341 }
342 }
343
344 pub fn into_varbinview(self) -> VarBinViewArray {
345 if let Canonical::VarBinView(a) = self {
346 a
347 } else {
348 vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
349 }
350 }
351
352 pub fn as_listview(&self) -> &ListViewArray {
353 if let Canonical::List(a) = self {
354 a
355 } else {
356 vortex_panic!("Cannot get ListArray from {:?}", &self)
357 }
358 }
359
360 pub fn into_listview(self) -> ListViewArray {
361 if let Canonical::List(a) = self {
362 a
363 } else {
364 vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
365 }
366 }
367
368 pub fn as_fixed_size_list(&self) -> &FixedSizeListArray {
369 if let Canonical::FixedSizeList(a) = self {
370 a
371 } else {
372 vortex_panic!("Cannot get FixedSizeListArray from {:?}", &self)
373 }
374 }
375
376 pub fn into_fixed_size_list(self) -> FixedSizeListArray {
377 if let Canonical::FixedSizeList(a) = self {
378 a
379 } else {
380 vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
381 }
382 }
383
384 pub fn as_struct(&self) -> &StructArray {
385 if let Canonical::Struct(a) = self {
386 a
387 } else {
388 vortex_panic!("Cannot get StructArray from {:?}", &self)
389 }
390 }
391
392 pub fn into_struct(self) -> StructArray {
393 if let Canonical::Struct(a) = self {
394 a
395 } else {
396 vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
397 }
398 }
399
400 pub fn as_extension(&self) -> &ExtensionArray {
401 if let Canonical::Extension(a) = self {
402 a
403 } else {
404 vortex_panic!("Cannot get ExtensionArray from {:?}", &self)
405 }
406 }
407
408 pub fn into_extension(self) -> ExtensionArray {
409 if let Canonical::Extension(a) = self {
410 a
411 } else {
412 vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
413 }
414 }
415}
416
417impl IntoArray for Canonical {
418 fn into_array(self) -> ArrayRef {
419 match_each_canonical!(self, |arr| arr.into_array())
420 }
421}
422
423pub trait ToCanonical {
429 fn to_null(&self) -> NullArray;
431
432 fn to_bool(&self) -> BoolArray;
434
435 fn to_primitive(&self) -> PrimitiveArray;
438
439 fn to_decimal(&self) -> DecimalArray;
442
443 fn to_struct(&self) -> StructArray;
445
446 fn to_listview(&self) -> ListViewArray;
448
449 fn to_fixed_size_list(&self) -> FixedSizeListArray;
452
453 fn to_varbinview(&self) -> VarBinViewArray;
456
457 fn to_extension(&self) -> ExtensionArray;
460}
461
462impl ToCanonical for ArrayRef {
464 fn to_null(&self) -> NullArray {
465 self.to_canonical()
466 .vortex_expect("to_canonical failed")
467 .into_null()
468 }
469
470 fn to_bool(&self) -> BoolArray {
471 self.to_canonical()
472 .vortex_expect("to_canonical failed")
473 .into_bool()
474 }
475
476 fn to_primitive(&self) -> PrimitiveArray {
477 self.to_canonical()
478 .vortex_expect("to_canonical failed")
479 .into_primitive()
480 }
481
482 fn to_decimal(&self) -> DecimalArray {
483 self.to_canonical()
484 .vortex_expect("to_canonical failed")
485 .into_decimal()
486 }
487
488 fn to_struct(&self) -> StructArray {
489 self.to_canonical()
490 .vortex_expect("to_canonical failed")
491 .into_struct()
492 }
493
494 fn to_listview(&self) -> ListViewArray {
495 self.to_canonical()
496 .vortex_expect("to_canonical failed")
497 .into_listview()
498 }
499
500 fn to_fixed_size_list(&self) -> FixedSizeListArray {
501 self.to_canonical()
502 .vortex_expect("to_canonical failed")
503 .into_fixed_size_list()
504 }
505
506 fn to_varbinview(&self) -> VarBinViewArray {
507 self.to_canonical()
508 .vortex_expect("to_canonical failed")
509 .into_varbinview()
510 }
511
512 fn to_extension(&self) -> ExtensionArray {
513 self.to_canonical()
514 .vortex_expect("to_canonical failed")
515 .into_extension()
516 }
517}
518
519impl From<Canonical> for ArrayRef {
520 fn from(value: Canonical) -> Self {
521 match_each_canonical!(value, |arr| arr.into_array())
522 }
523}
524
525impl Executable for Canonical {
531 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
532 let result = array.execute_until::<AnyCanonical>(ctx)?;
533 Ok(result
534 .as_opt::<AnyCanonical>()
535 .map(Canonical::from)
536 .vortex_expect("execute_until::<AnyCanonical> must return a canonical array"))
537 }
538}
539
540pub struct CanonicalValidity(pub Canonical);
545
546impl Executable for CanonicalValidity {
547 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
548 match array.execute::<Canonical>(ctx)? {
549 n @ Canonical::Null(_) => Ok(CanonicalValidity(n)),
550 Canonical::Bool(b) => {
551 let validity = child_to_validity(&b.slots()[0], b.dtype().nullability());
552 let len = b.len();
553 let BoolDataParts { bits, offset, len } = b.into_data().into_parts(len);
554 Ok(CanonicalValidity(Canonical::Bool(
555 BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?,
556 )))
557 }
558 Canonical::Primitive(p) => {
559 let PrimitiveDataParts {
560 ptype,
561 buffer,
562 validity,
563 } = p.into_data_parts();
564 Ok(CanonicalValidity(Canonical::Primitive(unsafe {
565 PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
566 })))
567 }
568 Canonical::Decimal(d) => {
569 let DecimalDataParts {
570 decimal_dtype,
571 values,
572 values_type,
573 validity,
574 } = d.into_data_parts();
575 Ok(CanonicalValidity(Canonical::Decimal(unsafe {
576 DecimalArray::new_unchecked_handle(
577 values,
578 values_type,
579 decimal_dtype,
580 validity.execute(ctx)?,
581 )
582 })))
583 }
584 Canonical::VarBinView(vbv) => {
585 let VarBinViewDataParts {
586 dtype,
587 buffers,
588 views,
589 validity,
590 } = vbv.into_data_parts();
591 Ok(CanonicalValidity(Canonical::VarBinView(unsafe {
592 VarBinViewArray::new_handle_unchecked(
593 views,
594 buffers,
595 dtype,
596 validity.execute(ctx)?,
597 )
598 })))
599 }
600 Canonical::List(l) => {
601 let zctl = l.is_zero_copy_to_list();
602 let ListViewDataParts {
603 elements,
604 offsets,
605 sizes,
606 validity,
607 ..
608 } = l.into_data_parts();
609 Ok(CanonicalValidity(Canonical::List(unsafe {
610 ListViewArray::new_unchecked(elements, offsets, sizes, validity.execute(ctx)?)
611 .with_zero_copy_to_list(zctl)
612 })))
613 }
614 Canonical::FixedSizeList(fsl) => {
615 let list_size = fsl.list_size();
616 let len = fsl.len();
617 let parts = fsl.into_data_parts();
618 let elements = parts.elements;
619 let validity = parts.validity;
620 Ok(CanonicalValidity(Canonical::FixedSizeList(
621 FixedSizeListArray::new(elements, list_size, validity.execute(ctx)?, len),
622 )))
623 }
624 Canonical::Struct(st) => {
625 let len = st.len();
626 let StructDataParts {
627 struct_fields,
628 fields,
629 validity,
630 } = st.into_data_parts();
631 Ok(CanonicalValidity(Canonical::Struct(unsafe {
632 StructArray::new_unchecked(fields, struct_fields, len, validity.execute(ctx)?)
633 })))
634 }
635 Canonical::Extension(ext) => Ok(CanonicalValidity(Canonical::Extension(
636 ExtensionArray::new(
637 ext.ext_dtype().clone(),
638 ext.storage_array()
639 .clone()
640 .execute::<CanonicalValidity>(ctx)?
641 .0
642 .into_array(),
643 ),
644 ))),
645 Canonical::Variant(variant) => {
646 Ok(CanonicalValidity(Canonical::Variant(VariantArray::new(
647 variant
648 .child()
649 .clone()
650 .execute::<CanonicalValidity>(ctx)?
651 .0
652 .into_array(),
653 ))))
654 }
655 }
656 }
657}
658
659pub struct RecursiveCanonical(pub Canonical);
664
665impl Executable for RecursiveCanonical {
666 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
667 match array.execute::<Canonical>(ctx)? {
668 n @ Canonical::Null(_) => Ok(RecursiveCanonical(n)),
669 Canonical::Bool(b) => {
670 let validity = child_to_validity(&b.slots()[0], b.dtype().nullability());
671 let len = b.len();
672 let BoolDataParts { bits, offset, len } = b.into_data().into_parts(len);
673 Ok(RecursiveCanonical(Canonical::Bool(
674 BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?,
675 )))
676 }
677 Canonical::Primitive(p) => {
678 let PrimitiveDataParts {
679 ptype,
680 buffer,
681 validity,
682 } = p.into_data_parts();
683 Ok(RecursiveCanonical(Canonical::Primitive(unsafe {
684 PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
685 })))
686 }
687 Canonical::Decimal(d) => {
688 let DecimalDataParts {
689 decimal_dtype,
690 values,
691 values_type,
692 validity,
693 } = d.into_data_parts();
694 Ok(RecursiveCanonical(Canonical::Decimal(unsafe {
695 DecimalArray::new_unchecked_handle(
696 values,
697 values_type,
698 decimal_dtype,
699 validity.execute(ctx)?,
700 )
701 })))
702 }
703 Canonical::VarBinView(vbv) => {
704 let VarBinViewDataParts {
705 dtype,
706 buffers,
707 views,
708 validity,
709 } = vbv.into_data_parts();
710 Ok(RecursiveCanonical(Canonical::VarBinView(unsafe {
711 VarBinViewArray::new_handle_unchecked(
712 views,
713 buffers,
714 dtype,
715 validity.execute(ctx)?,
716 )
717 })))
718 }
719 Canonical::List(l) => {
720 let zctl = l.is_zero_copy_to_list();
721 let ListViewDataParts {
722 elements,
723 offsets,
724 sizes,
725 validity,
726 ..
727 } = l.into_data_parts();
728 Ok(RecursiveCanonical(Canonical::List(unsafe {
729 ListViewArray::new_unchecked(
730 elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
731 offsets.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
732 sizes.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
733 validity.execute(ctx)?,
734 )
735 .with_zero_copy_to_list(zctl)
736 })))
737 }
738 Canonical::FixedSizeList(fsl) => {
739 let list_size = fsl.list_size();
740 let len = fsl.len();
741 let parts = fsl.into_data_parts();
742 let elements = parts.elements;
743 let validity = parts.validity;
744 Ok(RecursiveCanonical(Canonical::FixedSizeList(
745 FixedSizeListArray::new(
746 elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
747 list_size,
748 validity.execute(ctx)?,
749 len,
750 ),
751 )))
752 }
753 Canonical::Struct(st) => {
754 let len = st.len();
755 let StructDataParts {
756 struct_fields,
757 fields,
758 validity,
759 } = st.into_data_parts();
760 let executed_fields = fields
761 .iter()
762 .map(|f| Ok(f.clone().execute::<RecursiveCanonical>(ctx)?.0.into_array()))
763 .collect::<VortexResult<Arc<[_]>>>()?;
764
765 Ok(RecursiveCanonical(Canonical::Struct(unsafe {
766 StructArray::new_unchecked(
767 executed_fields,
768 struct_fields,
769 len,
770 validity.execute(ctx)?,
771 )
772 })))
773 }
774 Canonical::Extension(ext) => Ok(RecursiveCanonical(Canonical::Extension(
775 ExtensionArray::new(
776 ext.ext_dtype().clone(),
777 ext.storage_array()
778 .clone()
779 .execute::<RecursiveCanonical>(ctx)?
780 .0
781 .into_array(),
782 ),
783 ))),
784 Canonical::Variant(variant) => {
785 Ok(RecursiveCanonical(Canonical::Variant(VariantArray::new(
786 variant
787 .child()
788 .clone()
789 .execute::<RecursiveCanonical>(ctx)?
790 .0
791 .into_array(),
792 ))))
793 }
794 }
795 }
796}
797
798impl<T: NativePType> Executable for Buffer<T> {
804 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
805 let array = PrimitiveArray::execute(array, ctx)?;
806 vortex_ensure!(
807 matches!(
808 array.validity()?,
809 Validity::NonNullable | Validity::AllValid
810 ),
811 "Cannot execute to native buffer: array is not all-valid."
812 );
813 Ok(array.into_buffer())
814 }
815}
816
817impl Executable for PrimitiveArray {
821 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
822 match array.try_downcast::<Primitive>() {
823 Ok(primitive) => Ok(primitive),
824 Err(array) => Ok(Canonical::execute(array, ctx)?.into_primitive()),
825 }
826 }
827}
828
829impl Executable for BoolArray {
833 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
834 match array.try_downcast::<Bool>() {
835 Ok(bool_array) => Ok(bool_array),
836 Err(array) => Ok(Canonical::execute(array, ctx)?.into_bool()),
837 }
838 }
839}
840
841impl Executable for BitBuffer {
845 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
846 let bool = BoolArray::execute(array, ctx)?;
847 assert!(
848 !bool.dtype().is_nullable(),
849 "bit buffer execute only works with non-nullable bool arrays"
850 );
851 Ok(bool.into_bit_buffer())
852 }
853}
854
855impl Executable for NullArray {
859 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
860 match array.try_downcast::<Null>() {
861 Ok(null_array) => Ok(null_array),
862 Err(array) => Ok(Canonical::execute(array, ctx)?.into_null()),
863 }
864 }
865}
866
867impl Executable for VarBinViewArray {
871 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
872 match array.try_downcast::<VarBinView>() {
873 Ok(varbinview) => Ok(varbinview),
874 Err(array) => Ok(Canonical::execute(array, ctx)?.into_varbinview()),
875 }
876 }
877}
878
879impl Executable for ExtensionArray {
883 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
884 match array.try_downcast::<Extension>() {
885 Ok(ext_array) => Ok(ext_array),
886 Err(array) => Ok(Canonical::execute(array, ctx)?.into_extension()),
887 }
888 }
889}
890
891impl Executable for DecimalArray {
895 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
896 match array.try_downcast::<Decimal>() {
897 Ok(decimal) => Ok(decimal),
898 Err(array) => Ok(Canonical::execute(array, ctx)?.into_decimal()),
899 }
900 }
901}
902
903impl Executable for ListViewArray {
907 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
908 match array.try_downcast::<ListView>() {
909 Ok(list) => Ok(list),
910 Err(array) => Ok(Canonical::execute(array, ctx)?.into_listview()),
911 }
912 }
913}
914
915impl Executable for FixedSizeListArray {
919 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
920 match array.try_downcast::<FixedSizeList>() {
921 Ok(fsl) => Ok(fsl),
922 Err(array) => Ok(Canonical::execute(array, ctx)?.into_fixed_size_list()),
923 }
924 }
925}
926
927impl Executable for StructArray {
931 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
932 match array.try_downcast::<Struct>() {
933 Ok(struct_array) => Ok(struct_array),
934 Err(array) => Ok(Canonical::execute(array, ctx)?.into_struct()),
935 }
936 }
937}
938
939#[derive(Debug, Clone, Copy)]
944pub enum CanonicalView<'a> {
945 Null(ArrayView<'a, Null>),
946 Bool(ArrayView<'a, Bool>),
947 Primitive(ArrayView<'a, Primitive>),
948 Decimal(ArrayView<'a, Decimal>),
949 VarBinView(ArrayView<'a, VarBinView>),
950 List(ArrayView<'a, ListView>),
951 FixedSizeList(ArrayView<'a, FixedSizeList>),
952 Struct(ArrayView<'a, Struct>),
953 Extension(ArrayView<'a, Extension>),
954 Variant(ArrayView<'a, Variant>),
955}
956
957impl From<CanonicalView<'_>> for Canonical {
958 fn from(value: CanonicalView<'_>) -> Self {
959 match value {
960 CanonicalView::Null(a) => Canonical::Null(a.into_owned()),
961 CanonicalView::Bool(a) => Canonical::Bool(a.into_owned()),
962 CanonicalView::Primitive(a) => Canonical::Primitive(a.into_owned()),
963 CanonicalView::Decimal(a) => Canonical::Decimal(a.into_owned()),
964 CanonicalView::VarBinView(a) => Canonical::VarBinView(a.into_owned()),
965 CanonicalView::List(a) => Canonical::List(a.into_owned()),
966 CanonicalView::FixedSizeList(a) => Canonical::FixedSizeList(a.into_owned()),
967 CanonicalView::Struct(a) => Canonical::Struct(a.into_owned()),
968 CanonicalView::Extension(a) => Canonical::Extension(a.into_owned()),
969 CanonicalView::Variant(a) => Canonical::Variant(a.into_owned()),
970 }
971 }
972}
973
974impl CanonicalView<'_> {
975 pub fn to_array_ref(&self) -> ArrayRef {
977 match self {
978 CanonicalView::Null(a) => a.array().clone(),
979 CanonicalView::Bool(a) => a.array().clone(),
980 CanonicalView::Primitive(a) => a.array().clone(),
981 CanonicalView::Decimal(a) => a.array().clone(),
982 CanonicalView::VarBinView(a) => a.array().clone(),
983 CanonicalView::List(a) => a.array().clone(),
984 CanonicalView::FixedSizeList(a) => a.array().clone(),
985 CanonicalView::Struct(a) => a.array().clone(),
986 CanonicalView::Extension(a) => a.array().clone(),
987 CanonicalView::Variant(a) => a.array().clone(),
988 }
989 }
990}
991
992pub struct AnyCanonical;
994impl Matcher for AnyCanonical {
995 type Match<'a> = CanonicalView<'a>;
996
997 fn matches(array: &ArrayRef) -> bool {
998 array.is::<Null>()
999 || array.is::<Bool>()
1000 || array.is::<Primitive>()
1001 || array.is::<Decimal>()
1002 || array.is::<Struct>()
1003 || array.is::<ListView>()
1004 || array.is::<FixedSizeList>()
1005 || array.is::<VarBinView>()
1006 || array.is::<Variant>()
1007 || array.is::<Extension>()
1008 || array.is::<Variant>()
1009 }
1010
1011 fn try_match<'a>(array: &'a ArrayRef) -> Option<Self::Match<'a>> {
1012 if let Some(a) = array.as_opt::<Null>() {
1013 Some(CanonicalView::Null(a))
1014 } else if let Some(a) = array.as_opt::<Bool>() {
1015 Some(CanonicalView::Bool(a))
1016 } else if let Some(a) = array.as_opt::<Primitive>() {
1017 Some(CanonicalView::Primitive(a))
1018 } else if let Some(a) = array.as_opt::<Decimal>() {
1019 Some(CanonicalView::Decimal(a))
1020 } else if let Some(a) = array.as_opt::<Struct>() {
1021 Some(CanonicalView::Struct(a))
1022 } else if let Some(a) = array.as_opt::<ListView>() {
1023 Some(CanonicalView::List(a))
1024 } else if let Some(a) = array.as_opt::<FixedSizeList>() {
1025 Some(CanonicalView::FixedSizeList(a))
1026 } else if let Some(a) = array.as_opt::<VarBinView>() {
1027 Some(CanonicalView::VarBinView(a))
1028 } else if let Some(a) = array.as_opt::<Variant>() {
1029 Some(CanonicalView::Variant(a))
1030 } else {
1031 array.as_opt::<Extension>().map(CanonicalView::Extension)
1032 }
1033 }
1034}
1035
1036#[cfg(test)]
1037mod test {
1038 use std::sync::Arc;
1039
1040 use arrow_array::Array as ArrowArray;
1041 use arrow_array::ArrayRef as ArrowArrayRef;
1042 use arrow_array::ListArray as ArrowListArray;
1043 use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
1044 use arrow_array::StringArray;
1045 use arrow_array::StringViewArray;
1046 use arrow_array::StructArray as ArrowStructArray;
1047 use arrow_array::cast::AsArray;
1048 use arrow_array::types::Int32Type;
1049 use arrow_array::types::Int64Type;
1050 use arrow_array::types::UInt64Type;
1051 use arrow_buffer::NullBufferBuilder;
1052 use arrow_buffer::OffsetBuffer;
1053 use arrow_schema::DataType;
1054 use arrow_schema::Field;
1055 use vortex_buffer::buffer;
1056
1057 use crate::ArrayRef;
1058 use crate::IntoArray;
1059 use crate::arrays::ConstantArray;
1060 use crate::arrow::FromArrowArray;
1061 use crate::arrow::IntoArrowArray;
1062 use crate::canonical::StructArray;
1063
1064 #[test]
1065 fn test_canonicalize_nested_struct() {
1066 let nested_struct_array = StructArray::from_fields(&[
1068 ("a", buffer![1u64].into_array()),
1069 (
1070 "b",
1071 StructArray::from_fields(&[(
1072 "inner_a",
1073 ConstantArray::new(100i64, 1).into_array(),
1078 )])
1079 .unwrap()
1080 .into_array(),
1081 ),
1082 ])
1083 .unwrap();
1084
1085 let arrow_struct = nested_struct_array
1086 .into_array()
1087 .into_arrow_preferred()
1088 .unwrap()
1089 .as_any()
1090 .downcast_ref::<ArrowStructArray>()
1091 .cloned()
1092 .unwrap();
1093
1094 assert!(
1095 arrow_struct
1096 .column(0)
1097 .as_any()
1098 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
1099 .is_some()
1100 );
1101
1102 let inner_struct = Arc::clone(arrow_struct.column(1))
1103 .as_any()
1104 .downcast_ref::<ArrowStructArray>()
1105 .cloned()
1106 .unwrap();
1107
1108 let inner_a = inner_struct
1109 .column(0)
1110 .as_any()
1111 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
1112 assert!(inner_a.is_some());
1113
1114 assert_eq!(
1115 inner_a.cloned().unwrap(),
1116 ArrowPrimitiveArray::from_iter([100i64])
1117 );
1118 }
1119
1120 #[test]
1121 fn roundtrip_struct() {
1122 let mut nulls = NullBufferBuilder::new(6);
1123 nulls.append_n_non_nulls(4);
1124 nulls.append_null();
1125 nulls.append_non_null();
1126 let names = Arc::new(StringViewArray::from_iter(vec![
1127 Some("Joseph"),
1128 None,
1129 Some("Angela"),
1130 Some("Mikhail"),
1131 None,
1132 None,
1133 ]));
1134 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
1135 Some(25),
1136 Some(31),
1137 None,
1138 Some(57),
1139 None,
1140 None,
1141 ]));
1142
1143 let arrow_struct = ArrowStructArray::new(
1144 vec![
1145 Arc::new(Field::new("name", DataType::Utf8View, true)),
1146 Arc::new(Field::new("age", DataType::Int32, true)),
1147 ]
1148 .into(),
1149 vec![names, ages],
1150 nulls.finish(),
1151 );
1152
1153 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true).unwrap();
1154
1155 assert_eq!(
1156 &arrow_struct,
1157 vortex_struct.into_arrow_preferred().unwrap().as_struct()
1158 );
1159 }
1160
1161 #[test]
1162 fn roundtrip_list() {
1163 let names = Arc::new(StringArray::from_iter(vec![
1164 Some("Joseph"),
1165 Some("Angela"),
1166 Some("Mikhail"),
1167 ]));
1168
1169 let arrow_list = ArrowListArray::new(
1170 Arc::new(Field::new_list_field(DataType::Utf8, true)),
1171 OffsetBuffer::from_lengths(vec![0, 2, 1]),
1172 names,
1173 None,
1174 );
1175 let list_data_type = arrow_list.data_type();
1176
1177 let vortex_list = ArrayRef::from_arrow(&arrow_list, true).unwrap();
1178
1179 let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
1180
1181 assert_eq!(
1182 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
1183 rt_arrow_list.as_ref()
1184 );
1185 }
1186}