1use std::sync::Arc;
7
8use vortex_buffer::BitBuffer;
9use vortex_buffer::Buffer;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_panic;
14
15use crate::ArrayRef;
16use crate::ArraySlots;
17use crate::Executable;
18use crate::ExecutionCtx;
19use crate::IntoArray;
20use crate::array::ArrayView;
21use crate::array::child_to_validity;
22use crate::arrays::Bool;
23use crate::arrays::BoolArray;
24use crate::arrays::Decimal;
25use crate::arrays::DecimalArray;
26use crate::arrays::Extension;
27use crate::arrays::ExtensionArray;
28use crate::arrays::FixedSizeList;
29use crate::arrays::FixedSizeListArray;
30use crate::arrays::ListView;
31use crate::arrays::ListViewArray;
32use crate::arrays::Null;
33use crate::arrays::NullArray;
34use crate::arrays::Primitive;
35use crate::arrays::PrimitiveArray;
36use crate::arrays::Struct;
37use crate::arrays::StructArray;
38use crate::arrays::VarBinView;
39use crate::arrays::VarBinViewArray;
40use crate::arrays::Variant;
41use crate::arrays::VariantArray;
42use crate::arrays::bool::BoolDataParts;
43use crate::arrays::decimal::DecimalDataParts;
44use crate::arrays::extension::ExtensionArrayExt;
45use crate::arrays::fixed_size_list::FixedSizeListArrayExt;
46use crate::arrays::listview::ListViewDataParts;
47use crate::arrays::listview::ListViewRebuildMode;
48use crate::arrays::primitive::PrimitiveDataParts;
49use crate::arrays::struct_::StructDataParts;
50use crate::arrays::varbinview::VarBinViewDataParts;
51use crate::arrays::variant::VariantArrayExt;
52use crate::dtype::DType;
53use crate::dtype::NativePType;
54use crate::dtype::Nullability;
55use crate::dtype::PType;
56use crate::match_each_decimal_value_type;
57use crate::match_each_native_ptype;
58use crate::matcher::Matcher;
59use crate::validity::Validity;
60
61#[derive(Debug, Clone)]
122pub enum Canonical {
123 Null(NullArray),
124 Bool(BoolArray),
125 Primitive(PrimitiveArray),
126 Decimal(DecimalArray),
127 VarBinView(VarBinViewArray),
128 List(ListViewArray),
129 FixedSizeList(FixedSizeListArray),
130 Struct(StructArray),
131 Extension(ExtensionArray),
133 Variant(VariantArray),
135}
136
137macro_rules! match_each_canonical {
139 ($self:expr, | $ident:ident | $eval:expr) => {{
140 match $self {
141 Canonical::Null($ident) => $eval,
142 Canonical::Bool($ident) => $eval,
143 Canonical::Primitive($ident) => $eval,
144 Canonical::Decimal($ident) => $eval,
145 Canonical::VarBinView($ident) => $eval,
146 Canonical::List($ident) => $eval,
147 Canonical::FixedSizeList($ident) => $eval,
148 Canonical::Struct($ident) => $eval,
149 Canonical::Variant($ident) => $eval,
150 Canonical::Extension($ident) => $eval,
151 }
152 }};
153}
154
155impl Canonical {
156 pub fn empty(dtype: &DType) -> Canonical {
158 match dtype {
159 DType::Null => Canonical::Null(NullArray::new(0)),
160 DType::Bool(n) => Canonical::Bool(unsafe {
161 BoolArray::new_unchecked(BitBuffer::empty(), Validity::from(n))
162 }),
163 DType::Primitive(ptype, n) => {
164 match_each_native_ptype!(ptype, |P| {
165 Canonical::Primitive(unsafe {
166 PrimitiveArray::new_unchecked(Buffer::<P>::empty(), Validity::from(n))
167 })
168 })
169 }
170 DType::Decimal(decimal_type, n) => {
171 match_each_decimal_value_type!(
172 DecimalType::smallest_decimal_value_type(decimal_type),
173 |D| {
174 Canonical::Decimal(unsafe {
175 DecimalArray::new_unchecked::<D>(
176 Buffer::empty(),
177 *decimal_type,
178 Validity::from(n),
179 )
180 })
181 }
182 )
183 }
184 DType::Utf8(n) => Canonical::VarBinView(unsafe {
185 VarBinViewArray::new_unchecked(
186 Buffer::empty(),
187 Arc::new([]),
188 dtype.clone(),
189 Validity::from(n),
190 )
191 }),
192 DType::Binary(n) => Canonical::VarBinView(unsafe {
193 VarBinViewArray::new_unchecked(
194 Buffer::empty(),
195 Arc::new([]),
196 dtype.clone(),
197 Validity::from(n),
198 )
199 }),
200 DType::List(dtype, n) => Canonical::List(unsafe {
201 ListViewArray::new_unchecked(
202 Canonical::empty(dtype).into_array(),
203 Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
204 .into_array(),
205 Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
206 .into_array(),
207 Validity::from(n),
208 )
209 .with_zero_copy_to_list(true)
211 }),
212 DType::FixedSizeList(elem_dtype, list_size, null) => Canonical::FixedSizeList(unsafe {
213 FixedSizeListArray::new_unchecked(
214 Canonical::empty(elem_dtype).into_array(),
215 *list_size,
216 Validity::from(null),
217 0,
218 )
219 }),
220 DType::Struct(struct_dtype, n) => Canonical::Struct(unsafe {
221 StructArray::new_unchecked(
222 struct_dtype
223 .fields()
224 .map(|f| Canonical::empty(&f).into_array())
225 .collect::<Arc<[_]>>(),
226 struct_dtype.clone(),
227 0,
228 Validity::from(n),
229 )
230 }),
231 DType::Union(..) => todo!("TODO(connor)[Union]: unimplemented"),
232 DType::Variant(_) => {
233 vortex_panic!(InvalidArgument: "Canonical empty is not supported for Variant")
234 }
235 DType::Extension(ext_dtype) => Canonical::Extension(ExtensionArray::new(
236 ext_dtype.clone(),
237 Canonical::empty(ext_dtype.storage_dtype()).into_array(),
238 )),
239 }
240 }
241
242 pub fn len(&self) -> usize {
243 match_each_canonical!(self, |arr| arr.len())
244 }
245
246 pub fn dtype(&self) -> &DType {
247 match_each_canonical!(self, |arr| arr.dtype())
248 }
249
250 pub fn is_empty(&self) -> bool {
251 match_each_canonical!(self, |arr| arr.is_empty())
252 }
253}
254
255impl Canonical {
256 pub fn compact(&self, ctx: &mut ExecutionCtx) -> VortexResult<Canonical> {
264 match self {
265 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
266 Canonical::List(array) => Ok(Canonical::List(
267 array.rebuild(ListViewRebuildMode::TrimElements, ctx)?,
268 )),
269 _ => Ok(self.clone()),
270 }
271 }
272}
273
274impl Canonical {
276 pub fn as_null(&self) -> &NullArray {
277 if let Canonical::Null(a) = self {
278 a
279 } else {
280 vortex_panic!("Cannot get NullArray from {:?}", &self)
281 }
282 }
283
284 pub fn into_null(self) -> NullArray {
285 if let Canonical::Null(a) = self {
286 a
287 } else {
288 vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
289 }
290 }
291
292 pub fn as_bool(&self) -> &BoolArray {
293 if let Canonical::Bool(a) = self {
294 a
295 } else {
296 vortex_panic!("Cannot get BoolArray from {:?}", &self)
297 }
298 }
299
300 pub fn into_bool(self) -> BoolArray {
301 if let Canonical::Bool(a) = self {
302 a
303 } else {
304 vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
305 }
306 }
307
308 pub fn as_primitive(&self) -> &PrimitiveArray {
309 if let Canonical::Primitive(a) = self {
310 a
311 } else {
312 vortex_panic!("Cannot get PrimitiveArray from {:?}", &self)
313 }
314 }
315
316 pub fn into_primitive(self) -> PrimitiveArray {
317 if let Canonical::Primitive(a) = self {
318 a
319 } else {
320 vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
321 }
322 }
323
324 pub fn as_decimal(&self) -> &DecimalArray {
325 if let Canonical::Decimal(a) = self {
326 a
327 } else {
328 vortex_panic!("Cannot get DecimalArray from {:?}", &self)
329 }
330 }
331
332 pub fn into_decimal(self) -> DecimalArray {
333 if let Canonical::Decimal(a) = self {
334 a
335 } else {
336 vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
337 }
338 }
339
340 pub fn as_varbinview(&self) -> &VarBinViewArray {
341 if let Canonical::VarBinView(a) = self {
342 a
343 } else {
344 vortex_panic!("Cannot get VarBinViewArray from {:?}", &self)
345 }
346 }
347
348 pub fn into_varbinview(self) -> VarBinViewArray {
349 if let Canonical::VarBinView(a) = self {
350 a
351 } else {
352 vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
353 }
354 }
355
356 pub fn as_listview(&self) -> &ListViewArray {
357 if let Canonical::List(a) = self {
358 a
359 } else {
360 vortex_panic!("Cannot get ListArray from {:?}", &self)
361 }
362 }
363
364 pub fn into_listview(self) -> ListViewArray {
365 if let Canonical::List(a) = self {
366 a
367 } else {
368 vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
369 }
370 }
371
372 pub fn as_fixed_size_list(&self) -> &FixedSizeListArray {
373 if let Canonical::FixedSizeList(a) = self {
374 a
375 } else {
376 vortex_panic!("Cannot get FixedSizeListArray from {:?}", &self)
377 }
378 }
379
380 pub fn into_fixed_size_list(self) -> FixedSizeListArray {
381 if let Canonical::FixedSizeList(a) = self {
382 a
383 } else {
384 vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
385 }
386 }
387
388 pub fn as_struct(&self) -> &StructArray {
389 if let Canonical::Struct(a) = self {
390 a
391 } else {
392 vortex_panic!("Cannot get StructArray from {:?}", &self)
393 }
394 }
395
396 pub fn into_struct(self) -> StructArray {
397 if let Canonical::Struct(a) = self {
398 a
399 } else {
400 vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
401 }
402 }
403
404 pub fn as_extension(&self) -> &ExtensionArray {
405 if let Canonical::Extension(a) = self {
406 a
407 } else {
408 vortex_panic!("Cannot get ExtensionArray from {:?}", &self)
409 }
410 }
411
412 pub fn into_extension(self) -> ExtensionArray {
413 if let Canonical::Extension(a) = self {
414 a
415 } else {
416 vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
417 }
418 }
419}
420
421impl IntoArray for Canonical {
422 fn into_array(self) -> ArrayRef {
423 match_each_canonical!(self, |arr| arr.into_array())
424 }
425}
426
427#[deprecated(note = "use `array.execute::<T>(ctx)` instead")]
433pub trait ToCanonical {
434 #[deprecated(note = "use `array.execute::<NullArray>(ctx)` instead")]
436 fn to_null(&self) -> NullArray;
437
438 #[deprecated(note = "use `array.execute::<BoolArray>(ctx)` instead")]
440 fn to_bool(&self) -> BoolArray;
441
442 #[deprecated(note = "use `array.execute::<PrimitiveArray>(ctx)` instead")]
445 fn to_primitive(&self) -> PrimitiveArray;
446
447 #[deprecated(note = "use `array.execute::<DecimalArray>(ctx)` instead")]
450 fn to_decimal(&self) -> DecimalArray;
451
452 #[deprecated(note = "use `array.execute::<StructArray>(ctx)` instead")]
454 fn to_struct(&self) -> StructArray;
455
456 #[deprecated(note = "use `array.execute::<ListViewArray>(ctx)` instead")]
458 fn to_listview(&self) -> ListViewArray;
459
460 #[deprecated(note = "use `array.execute::<FixedSizeListArray>(ctx)` instead")]
463 fn to_fixed_size_list(&self) -> FixedSizeListArray;
464
465 #[deprecated(note = "use `array.execute::<VarBinViewArray>(ctx)` instead")]
468 fn to_varbinview(&self) -> VarBinViewArray;
469
470 #[deprecated(note = "use `array.execute::<ExtensionArray>(ctx)` instead")]
473 fn to_extension(&self) -> ExtensionArray;
474}
475
476#[expect(deprecated)]
478impl ToCanonical for ArrayRef {
479 fn to_null(&self) -> NullArray {
480 #[expect(deprecated)]
481 let result = self.to_canonical().vortex_expect("to_canonical failed");
482 result.into_null()
483 }
484
485 fn to_bool(&self) -> BoolArray {
486 #[expect(deprecated)]
487 let result = self.to_canonical().vortex_expect("to_canonical failed");
488 result.into_bool()
489 }
490
491 fn to_primitive(&self) -> PrimitiveArray {
492 #[expect(deprecated)]
493 let result = self.to_canonical().vortex_expect("to_canonical failed");
494 result.into_primitive()
495 }
496
497 fn to_decimal(&self) -> DecimalArray {
498 #[expect(deprecated)]
499 let result = self.to_canonical().vortex_expect("to_canonical failed");
500 result.into_decimal()
501 }
502
503 fn to_struct(&self) -> StructArray {
504 #[expect(deprecated)]
505 let result = self.to_canonical().vortex_expect("to_canonical failed");
506 result.into_struct()
507 }
508
509 fn to_listview(&self) -> ListViewArray {
510 #[expect(deprecated)]
511 let result = self.to_canonical().vortex_expect("to_canonical failed");
512 result.into_listview()
513 }
514
515 fn to_fixed_size_list(&self) -> FixedSizeListArray {
516 #[expect(deprecated)]
517 let result = self.to_canonical().vortex_expect("to_canonical failed");
518 result.into_fixed_size_list()
519 }
520
521 fn to_varbinview(&self) -> VarBinViewArray {
522 #[expect(deprecated)]
523 let result = self.to_canonical().vortex_expect("to_canonical failed");
524 result.into_varbinview()
525 }
526
527 fn to_extension(&self) -> ExtensionArray {
528 #[expect(deprecated)]
529 let result = self.to_canonical().vortex_expect("to_canonical failed");
530 result.into_extension()
531 }
532}
533
534impl From<Canonical> for ArrayRef {
535 fn from(value: Canonical) -> Self {
536 match_each_canonical!(value, |arr| arr.into_array())
537 }
538}
539
540impl Executable for Canonical {
546 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
547 let result = array.execute_until::<AnyCanonical>(ctx)?;
548 Ok(result
549 .as_opt::<AnyCanonical>()
550 .map(Canonical::from)
551 .vortex_expect("execute_until::<AnyCanonical> must return a canonical array"))
552 }
553}
554
555pub struct CanonicalValidity(pub Canonical);
560
561impl Executable for CanonicalValidity {
562 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
563 match array.execute::<Canonical>(ctx)? {
564 n @ Canonical::Null(_) => Ok(CanonicalValidity(n)),
565 Canonical::Bool(b) => {
566 let validity = child_to_validity(b.slots()[0].as_ref(), b.dtype().nullability());
567 let len = b.len();
568 let BoolDataParts { bits, meta } = b.into_data().into_parts(len);
569 Ok(CanonicalValidity(Canonical::Bool(
570 BoolArray::try_new_from_handle(
571 bits,
572 meta.offset(),
573 meta.len(),
574 validity.execute(ctx)?,
575 )?,
576 )))
577 }
578 Canonical::Primitive(p) => {
579 let PrimitiveDataParts {
580 ptype,
581 buffer,
582 validity,
583 } = p.into_data_parts();
584 Ok(CanonicalValidity(Canonical::Primitive(unsafe {
585 PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
586 })))
587 }
588 Canonical::Decimal(d) => {
589 let DecimalDataParts {
590 decimal_dtype,
591 values,
592 values_type,
593 validity,
594 } = d.into_data_parts();
595 Ok(CanonicalValidity(Canonical::Decimal(unsafe {
596 DecimalArray::new_unchecked_handle(
597 values,
598 values_type,
599 decimal_dtype,
600 validity.execute(ctx)?,
601 )
602 })))
603 }
604 Canonical::VarBinView(vbv) => {
605 let VarBinViewDataParts {
606 dtype,
607 buffers,
608 views,
609 validity,
610 } = vbv.into_data_parts();
611 Ok(CanonicalValidity(Canonical::VarBinView(unsafe {
612 VarBinViewArray::new_handle_unchecked(
613 views,
614 buffers,
615 dtype,
616 validity.execute(ctx)?,
617 )
618 })))
619 }
620 Canonical::List(l) => {
621 let zctl = l.is_zero_copy_to_list();
622 let ListViewDataParts {
623 elements,
624 offsets,
625 sizes,
626 validity,
627 ..
628 } = l.into_data_parts();
629 Ok(CanonicalValidity(Canonical::List(unsafe {
630 ListViewArray::new_unchecked(elements, offsets, sizes, validity.execute(ctx)?)
631 .with_zero_copy_to_list(zctl)
632 })))
633 }
634 Canonical::FixedSizeList(fsl) => {
635 let list_size = fsl.list_size();
636 let len = fsl.len();
637 let parts = fsl.into_data_parts();
638 let elements = parts.elements;
639 let validity = parts.validity;
640 Ok(CanonicalValidity(Canonical::FixedSizeList(
641 FixedSizeListArray::new(elements, list_size, validity.execute(ctx)?, len),
642 )))
643 }
644 Canonical::Struct(st) => {
645 let len = st.len();
646 let StructDataParts {
647 struct_fields,
648 fields,
649 validity,
650 } = st.into_data_parts();
651 Ok(CanonicalValidity(Canonical::Struct(unsafe {
652 StructArray::new_unchecked(fields, struct_fields, len, validity.execute(ctx)?)
653 })))
654 }
655 Canonical::Extension(ext) => Ok(CanonicalValidity(Canonical::Extension(
656 ExtensionArray::new(
657 ext.ext_dtype().clone(),
658 ext.storage_array()
659 .clone()
660 .execute::<CanonicalValidity>(ctx)?
661 .0
662 .into_array(),
663 ),
664 ))),
665 Canonical::Variant(variant) => {
666 let core_storage = recursively_canonicalize_slots(variant.core_storage(), ctx)?;
667 let shredded = variant
668 .shredded()
669 .map(|shredded| {
670 if shredded.is::<Variant>() {
671 recursively_canonicalize_slots(shredded, ctx)
672 } else {
673 shredded
674 .clone()
675 .execute::<CanonicalValidity>(ctx)
676 .map(|canonical| canonical.0.into_array())
677 }
678 })
679 .transpose()?;
680 Ok(CanonicalValidity(Canonical::Variant(
681 VariantArray::try_new(core_storage, shredded)?,
682 )))
683 }
684 }
685 }
686}
687
688pub struct RecursiveCanonical(pub Canonical);
693
694fn recursively_canonicalize_slots(
697 array: &ArrayRef,
698 ctx: &mut ExecutionCtx,
699) -> VortexResult<ArrayRef> {
700 let slots = array
701 .slots()
702 .iter()
703 .map(|slot| {
704 slot.as_ref()
705 .map(|child| {
706 child
707 .clone()
708 .execute::<RecursiveCanonical>(ctx)
709 .map(|canonical| canonical.0.into_array())
710 })
711 .transpose()
712 })
713 .collect::<VortexResult<ArraySlots>>()?;
714 unsafe { array.clone().with_slots(slots) }
717}
718impl Executable for RecursiveCanonical {
719 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
720 match array.execute::<Canonical>(ctx)? {
721 n @ Canonical::Null(_) => Ok(RecursiveCanonical(n)),
722 Canonical::Bool(b) => {
723 let validity = child_to_validity(b.slots()[0].as_ref(), b.dtype().nullability());
724 let len = b.len();
725 let BoolDataParts { bits, meta } = b.into_data().into_parts(len);
726 Ok(RecursiveCanonical(Canonical::Bool(
727 BoolArray::try_new_from_handle(
728 bits,
729 meta.offset(),
730 meta.len(),
731 validity.execute(ctx)?,
732 )?,
733 )))
734 }
735 Canonical::Primitive(p) => {
736 let PrimitiveDataParts {
737 ptype,
738 buffer,
739 validity,
740 } = p.into_data_parts();
741 Ok(RecursiveCanonical(Canonical::Primitive(unsafe {
742 PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
743 })))
744 }
745 Canonical::Decimal(d) => {
746 let DecimalDataParts {
747 decimal_dtype,
748 values,
749 values_type,
750 validity,
751 } = d.into_data_parts();
752 Ok(RecursiveCanonical(Canonical::Decimal(unsafe {
753 DecimalArray::new_unchecked_handle(
754 values,
755 values_type,
756 decimal_dtype,
757 validity.execute(ctx)?,
758 )
759 })))
760 }
761 Canonical::VarBinView(vbv) => {
762 let VarBinViewDataParts {
763 dtype,
764 buffers,
765 views,
766 validity,
767 } = vbv.into_data_parts();
768 Ok(RecursiveCanonical(Canonical::VarBinView(unsafe {
769 VarBinViewArray::new_handle_unchecked(
770 views,
771 buffers,
772 dtype,
773 validity.execute(ctx)?,
774 )
775 })))
776 }
777 Canonical::List(l) => {
778 let zctl = l.is_zero_copy_to_list();
779 let ListViewDataParts {
780 elements,
781 offsets,
782 sizes,
783 validity,
784 ..
785 } = l.into_data_parts();
786 Ok(RecursiveCanonical(Canonical::List(unsafe {
787 ListViewArray::new_unchecked(
788 elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
789 offsets.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
790 sizes.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
791 validity.execute(ctx)?,
792 )
793 .with_zero_copy_to_list(zctl)
794 })))
795 }
796 Canonical::FixedSizeList(fsl) => {
797 let list_size = fsl.list_size();
798 let len = fsl.len();
799 let parts = fsl.into_data_parts();
800 let elements = parts.elements;
801 let validity = parts.validity;
802 Ok(RecursiveCanonical(Canonical::FixedSizeList(
803 FixedSizeListArray::new(
804 elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
805 list_size,
806 validity.execute(ctx)?,
807 len,
808 ),
809 )))
810 }
811 Canonical::Struct(st) => {
812 let len = st.len();
813 let StructDataParts {
814 struct_fields,
815 fields,
816 validity,
817 } = st.into_data_parts();
818 let executed_fields = fields
819 .iter()
820 .map(|f| Ok(f.clone().execute::<RecursiveCanonical>(ctx)?.0.into_array()))
821 .collect::<VortexResult<Arc<[_]>>>()?;
822
823 Ok(RecursiveCanonical(Canonical::Struct(unsafe {
824 StructArray::new_unchecked(
825 executed_fields,
826 struct_fields,
827 len,
828 validity.execute(ctx)?,
829 )
830 })))
831 }
832 Canonical::Extension(ext) => Ok(RecursiveCanonical(Canonical::Extension(
833 ExtensionArray::new(
834 ext.ext_dtype().clone(),
835 ext.storage_array()
836 .clone()
837 .execute::<RecursiveCanonical>(ctx)?
838 .0
839 .into_array(),
840 ),
841 ))),
842 Canonical::Variant(variant) => {
843 let core_storage = recursively_canonicalize_slots(variant.core_storage(), ctx)?;
844 let shredded = variant
845 .shredded()
846 .map(|shredded| {
847 if shredded.is::<Variant>() {
848 recursively_canonicalize_slots(shredded, ctx)
849 } else {
850 shredded
851 .clone()
852 .execute::<RecursiveCanonical>(ctx)
853 .map(|canonical| canonical.0.into_array())
854 }
855 })
856 .transpose()?;
857 Ok(RecursiveCanonical(Canonical::Variant(
858 VariantArray::try_new(core_storage, shredded)?,
859 )))
860 }
861 }
862 }
863}
864
865impl<T: NativePType> Executable for Buffer<T> {
871 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
872 let array = PrimitiveArray::execute(array, ctx)?;
873 vortex_ensure!(
874 matches!(
875 array.validity()?,
876 Validity::NonNullable | Validity::AllValid
877 ),
878 "Cannot execute to native buffer: array is not all-valid."
879 );
880 Ok(array.into_buffer())
881 }
882}
883
884impl Executable for PrimitiveArray {
888 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
889 match array.try_downcast::<Primitive>() {
890 Ok(primitive) => Ok(primitive),
891 Err(array) => Ok(Canonical::execute(array, ctx)?.into_primitive()),
892 }
893 }
894}
895
896impl Executable for BoolArray {
900 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
901 match array.try_downcast::<Bool>() {
902 Ok(bool_array) => Ok(bool_array),
903 Err(array) => Ok(Canonical::execute(array, ctx)?.into_bool()),
904 }
905 }
906}
907
908impl Executable for BitBuffer {
912 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
913 let bool = BoolArray::execute(array, ctx)?;
914 assert!(
915 !bool.dtype().is_nullable(),
916 "bit buffer execute only works with non-nullable bool arrays"
917 );
918 Ok(bool.into_bit_buffer())
919 }
920}
921
922impl Executable for NullArray {
926 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
927 match array.try_downcast::<Null>() {
928 Ok(null_array) => Ok(null_array),
929 Err(array) => Ok(Canonical::execute(array, ctx)?.into_null()),
930 }
931 }
932}
933
934impl Executable for VarBinViewArray {
938 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
939 match array.try_downcast::<VarBinView>() {
940 Ok(varbinview) => Ok(varbinview),
941 Err(array) => Ok(Canonical::execute(array, ctx)?.into_varbinview()),
942 }
943 }
944}
945
946impl Executable for ExtensionArray {
950 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
951 match array.try_downcast::<Extension>() {
952 Ok(ext_array) => Ok(ext_array),
953 Err(array) => Ok(Canonical::execute(array, ctx)?.into_extension()),
954 }
955 }
956}
957
958impl Executable for DecimalArray {
962 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
963 match array.try_downcast::<Decimal>() {
964 Ok(decimal) => Ok(decimal),
965 Err(array) => Ok(Canonical::execute(array, ctx)?.into_decimal()),
966 }
967 }
968}
969
970impl Executable for ListViewArray {
974 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
975 match array.try_downcast::<ListView>() {
976 Ok(list) => Ok(list),
977 Err(array) => Ok(Canonical::execute(array, ctx)?.into_listview()),
978 }
979 }
980}
981
982impl Executable for FixedSizeListArray {
986 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
987 match array.try_downcast::<FixedSizeList>() {
988 Ok(fsl) => Ok(fsl),
989 Err(array) => Ok(Canonical::execute(array, ctx)?.into_fixed_size_list()),
990 }
991 }
992}
993
994impl Executable for StructArray {
998 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
999 match array.try_downcast::<Struct>() {
1000 Ok(struct_array) => Ok(struct_array),
1001 Err(array) => Ok(Canonical::execute(array, ctx)?.into_struct()),
1002 }
1003 }
1004}
1005
1006impl Executable for VariantArray {
1010 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
1011 match array.try_downcast::<Variant>() {
1012 Ok(variant_array) => Ok(variant_array),
1013 Err(array) => match Canonical::execute(array, ctx)? {
1014 Canonical::Variant(variant_array) => Ok(variant_array),
1015 canonical => vortex_panic!("Cannot unwrap VariantArray from {:?}", canonical),
1016 },
1017 }
1018 }
1019}
1020
1021#[derive(Debug, Clone, Copy)]
1026pub enum CanonicalView<'a> {
1027 Null(ArrayView<'a, Null>),
1028 Bool(ArrayView<'a, Bool>),
1029 Primitive(ArrayView<'a, Primitive>),
1030 Decimal(ArrayView<'a, Decimal>),
1031 VarBinView(ArrayView<'a, VarBinView>),
1032 List(ArrayView<'a, ListView>),
1033 FixedSizeList(ArrayView<'a, FixedSizeList>),
1034 Struct(ArrayView<'a, Struct>),
1035 Extension(ArrayView<'a, Extension>),
1036 Variant(ArrayView<'a, Variant>),
1037}
1038
1039impl From<CanonicalView<'_>> for Canonical {
1040 fn from(value: CanonicalView<'_>) -> Self {
1041 match value {
1042 CanonicalView::Null(a) => Canonical::Null(a.into_owned()),
1043 CanonicalView::Bool(a) => Canonical::Bool(a.into_owned()),
1044 CanonicalView::Primitive(a) => Canonical::Primitive(a.into_owned()),
1045 CanonicalView::Decimal(a) => Canonical::Decimal(a.into_owned()),
1046 CanonicalView::VarBinView(a) => Canonical::VarBinView(a.into_owned()),
1047 CanonicalView::List(a) => Canonical::List(a.into_owned()),
1048 CanonicalView::FixedSizeList(a) => Canonical::FixedSizeList(a.into_owned()),
1049 CanonicalView::Struct(a) => Canonical::Struct(a.into_owned()),
1050 CanonicalView::Extension(a) => Canonical::Extension(a.into_owned()),
1051 CanonicalView::Variant(a) => Canonical::Variant(a.into_owned()),
1052 }
1053 }
1054}
1055
1056impl CanonicalView<'_> {
1057 pub fn to_array_ref(&self) -> ArrayRef {
1059 match self {
1060 CanonicalView::Null(a) => a.array().clone(),
1061 CanonicalView::Bool(a) => a.array().clone(),
1062 CanonicalView::Primitive(a) => a.array().clone(),
1063 CanonicalView::Decimal(a) => a.array().clone(),
1064 CanonicalView::VarBinView(a) => a.array().clone(),
1065 CanonicalView::List(a) => a.array().clone(),
1066 CanonicalView::FixedSizeList(a) => a.array().clone(),
1067 CanonicalView::Struct(a) => a.array().clone(),
1068 CanonicalView::Extension(a) => a.array().clone(),
1069 CanonicalView::Variant(a) => a.array().clone(),
1070 }
1071 }
1072}
1073
1074pub struct AnyCanonical;
1076impl Matcher for AnyCanonical {
1077 type Match<'a> = CanonicalView<'a>;
1078
1079 #[inline]
1080 fn matches(array: &ArrayRef) -> bool {
1081 array.is::<Null>()
1082 || array.is::<Bool>()
1083 || array.is::<Primitive>()
1084 || array.is::<Decimal>()
1085 || array.is::<Struct>()
1086 || array.is::<ListView>()
1087 || array.is::<FixedSizeList>()
1088 || array.is::<VarBinView>()
1089 || array.is::<Variant>()
1090 || array.is::<Extension>()
1091 }
1092
1093 #[inline]
1094 fn try_match(array: &ArrayRef) -> Option<Self::Match<'_>> {
1095 if let Some(a) = array.as_opt::<Null>() {
1096 Some(CanonicalView::Null(a))
1097 } else if let Some(a) = array.as_opt::<Bool>() {
1098 Some(CanonicalView::Bool(a))
1099 } else if let Some(a) = array.as_opt::<Primitive>() {
1100 Some(CanonicalView::Primitive(a))
1101 } else if let Some(a) = array.as_opt::<Decimal>() {
1102 Some(CanonicalView::Decimal(a))
1103 } else if let Some(a) = array.as_opt::<Struct>() {
1104 Some(CanonicalView::Struct(a))
1105 } else if let Some(a) = array.as_opt::<ListView>() {
1106 Some(CanonicalView::List(a))
1107 } else if let Some(a) = array.as_opt::<FixedSizeList>() {
1108 Some(CanonicalView::FixedSizeList(a))
1109 } else if let Some(a) = array.as_opt::<VarBinView>() {
1110 Some(CanonicalView::VarBinView(a))
1111 } else if let Some(a) = array.as_opt::<Variant>() {
1112 Some(CanonicalView::Variant(a))
1113 } else {
1114 array.as_opt::<Extension>().map(CanonicalView::Extension)
1115 }
1116 }
1117}
1118
1119#[cfg(test)]
1120mod test {
1121 use std::sync::Arc;
1122 use std::sync::LazyLock;
1123
1124 use arrow_array::Array as ArrowArray;
1125 use arrow_array::ArrayRef as ArrowArrayRef;
1126 use arrow_array::ListArray as ArrowListArray;
1127 use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
1128 use arrow_array::StringArray;
1129 use arrow_array::StringViewArray;
1130 use arrow_array::StructArray as ArrowStructArray;
1131 use arrow_array::cast::AsArray;
1132 use arrow_array::types::Int32Type;
1133 use arrow_array::types::Int64Type;
1134 use arrow_array::types::UInt64Type;
1135 use arrow_buffer::NullBufferBuilder;
1136 use arrow_buffer::OffsetBuffer;
1137 use arrow_schema::DataType;
1138 use arrow_schema::Field;
1139 use vortex_buffer::buffer;
1140 use vortex_error::VortexResult;
1141 use vortex_error::vortex_err;
1142 use vortex_session::VortexSession;
1143
1144 use crate::ArrayRef;
1145 use crate::Canonical;
1146 use crate::CanonicalValidity;
1147 use crate::IntoArray;
1148 use crate::VortexSessionExecute;
1149 use crate::arrays::Constant;
1150 use crate::arrays::ConstantArray;
1151 use crate::arrays::Primitive;
1152 use crate::arrays::Struct;
1153 use crate::arrays::Variant;
1154 use crate::arrays::VariantArray;
1155 use crate::arrays::struct_::StructArrayExt;
1156 use crate::arrays::variant::VariantArrayExt;
1157 use crate::arrow::ArrowSessionExt;
1158 use crate::arrow::FromArrowArray;
1159 use crate::canonical::StructArray;
1160 use crate::dtype::Nullability;
1161 use crate::scalar::Scalar;
1162
1163 static SESSION: LazyLock<VortexSession> = LazyLock::new(crate::array_session);
1165
1166 fn variant_core_storage(len: usize) -> ArrayRef {
1167 ConstantArray::new(
1168 Scalar::variant(Scalar::primitive(1i32, Nullability::NonNullable)),
1169 len,
1170 )
1171 .into_array()
1172 }
1173
1174 #[test]
1175 fn canonical_validity_canonicalizes_variant_shredded_physical_slots() -> VortexResult<()> {
1176 let len = 2;
1177 let nested_shredded =
1178 StructArray::try_from_iter([("value", ConstantArray::new(10i32, len).into_array())])?;
1179 let inner_variant = VariantArray::try_new(
1180 variant_core_storage(len),
1181 Some(nested_shredded.into_array()),
1182 )?;
1183 let outer_variant =
1184 VariantArray::try_new(variant_core_storage(len), Some(inner_variant.into_array()))?;
1185
1186 let mut ctx = SESSION.create_execution_ctx();
1187 let Canonical::Variant(canonical) = outer_variant
1188 .into_array()
1189 .execute::<CanonicalValidity>(&mut ctx)?
1190 .0
1191 else {
1192 return Err(vortex_err!("expected canonical variant"));
1193 };
1194
1195 let nested_variant = canonical
1196 .shredded()
1197 .and_then(|shredded| shredded.as_opt::<Variant>())
1198 .ok_or_else(|| vortex_err!("expected nested variant shredded child"))?;
1199 let nested_struct = nested_variant
1200 .shredded()
1201 .and_then(|shredded| shredded.as_opt::<Struct>())
1202 .ok_or_else(|| vortex_err!("expected nested struct shredded child"))?;
1203 let value = nested_struct.unmasked_field_by_name("value")?;
1204
1205 assert!(value.is::<Primitive>());
1206 assert!(!value.is::<Constant>());
1207
1208 Ok(())
1209 }
1210
1211 #[test]
1212 fn test_canonicalize_nested_struct() {
1213 let mut ctx = SESSION.create_execution_ctx();
1214 let nested_struct_array = StructArray::from_fields(&[
1216 ("a", buffer![1u64].into_array()),
1217 (
1218 "b",
1219 StructArray::from_fields(&[(
1220 "inner_a",
1221 ConstantArray::new(100i64, 1).into_array(),
1226 )])
1227 .unwrap()
1228 .into_array(),
1229 ),
1230 ])
1231 .unwrap();
1232
1233 let arrow_struct = SESSION
1234 .arrow()
1235 .execute_arrow(nested_struct_array.into_array(), None, &mut ctx)
1236 .unwrap()
1237 .as_any()
1238 .downcast_ref::<ArrowStructArray>()
1239 .cloned()
1240 .unwrap();
1241
1242 assert!(
1243 arrow_struct
1244 .column(0)
1245 .as_any()
1246 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
1247 .is_some()
1248 );
1249
1250 let inner_struct = Arc::clone(arrow_struct.column(1))
1251 .as_any()
1252 .downcast_ref::<ArrowStructArray>()
1253 .cloned()
1254 .unwrap();
1255
1256 let inner_a = inner_struct
1257 .column(0)
1258 .as_any()
1259 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
1260 assert!(inner_a.is_some());
1261
1262 assert_eq!(
1263 inner_a.cloned().unwrap(),
1264 ArrowPrimitiveArray::from_iter([100i64])
1265 );
1266 }
1267
1268 #[test]
1269 fn roundtrip_struct() {
1270 let mut ctx = SESSION.create_execution_ctx();
1271 let mut nulls = NullBufferBuilder::new(6);
1272 nulls.append_n_non_nulls(4);
1273 nulls.append_null();
1274 nulls.append_non_null();
1275 let names = Arc::new(StringViewArray::from_iter(vec![
1276 Some("Joseph"),
1277 None,
1278 Some("Angela"),
1279 Some("Mikhail"),
1280 None,
1281 None,
1282 ]));
1283 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
1284 Some(25),
1285 Some(31),
1286 None,
1287 Some(57),
1288 None,
1289 None,
1290 ]));
1291
1292 let arrow_struct = ArrowStructArray::new(
1293 vec![
1294 Arc::new(Field::new("name", DataType::Utf8View, true)),
1295 Arc::new(Field::new("age", DataType::Int32, true)),
1296 ]
1297 .into(),
1298 vec![names, ages],
1299 nulls.finish(),
1300 );
1301
1302 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true).unwrap();
1303 let vortex_struct = SESSION
1304 .arrow()
1305 .execute_arrow(vortex_struct, None, &mut ctx)
1306 .unwrap();
1307 assert_eq!(&arrow_struct, vortex_struct.as_struct());
1308 }
1309
1310 #[test]
1311 fn roundtrip_list() {
1312 let mut ctx = SESSION.create_execution_ctx();
1313 let names = Arc::new(StringArray::from_iter(vec![
1314 Some("Joseph"),
1315 Some("Angela"),
1316 Some("Mikhail"),
1317 ]));
1318
1319 let arrow_list = ArrowListArray::new(
1320 Arc::new(Field::new_list_field(DataType::Utf8, true)),
1321 OffsetBuffer::from_lengths(vec![0, 2, 1]),
1322 names,
1323 None,
1324 );
1325 let list_data_type = arrow_list.data_type();
1326 let list_field = Field::new(String::new(), list_data_type.clone(), true);
1327
1328 let vortex_list = ArrayRef::from_arrow(&arrow_list, true).unwrap();
1329
1330 let rt_arrow_list = SESSION
1331 .arrow()
1332 .execute_arrow(vortex_list, Some(&list_field), &mut ctx)
1333 .unwrap();
1334
1335 assert_eq!(
1336 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
1337 rt_arrow_list.as_ref()
1338 );
1339 }
1340}