1use std::sync::Arc;
7
8use vortex_buffer::BitBuffer;
9use vortex_buffer::Buffer;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_panic;
14
15use crate::ArrayRef;
16use crate::ArraySlots;
17use crate::Executable;
18use crate::ExecutionCtx;
19use crate::IntoArray;
20use crate::array::ArrayView;
21use crate::array::child_to_validity;
22use crate::arrays::Bool;
23use crate::arrays::BoolArray;
24use crate::arrays::Decimal;
25use crate::arrays::DecimalArray;
26use crate::arrays::Extension;
27use crate::arrays::ExtensionArray;
28use crate::arrays::FixedSizeList;
29use crate::arrays::FixedSizeListArray;
30use crate::arrays::ListView;
31use crate::arrays::ListViewArray;
32use crate::arrays::Null;
33use crate::arrays::NullArray;
34use crate::arrays::Primitive;
35use crate::arrays::PrimitiveArray;
36use crate::arrays::Struct;
37use crate::arrays::StructArray;
38use crate::arrays::VarBinView;
39use crate::arrays::VarBinViewArray;
40use crate::arrays::Variant;
41use crate::arrays::VariantArray;
42use crate::arrays::bool::BoolDataParts;
43use crate::arrays::decimal::DecimalDataParts;
44use crate::arrays::extension::ExtensionArrayExt;
45use crate::arrays::fixed_size_list::FixedSizeListArrayExt;
46use crate::arrays::listview::ListViewDataParts;
47use crate::arrays::listview::ListViewRebuildMode;
48use crate::arrays::primitive::PrimitiveDataParts;
49use crate::arrays::struct_::StructDataParts;
50use crate::arrays::varbinview::VarBinViewDataParts;
51use crate::arrays::variant::VariantArrayExt;
52use crate::dtype::DType;
53use crate::dtype::NativePType;
54use crate::dtype::Nullability;
55use crate::dtype::PType;
56use crate::match_each_decimal_value_type;
57use crate::match_each_native_ptype;
58use crate::matcher::Matcher;
59use crate::validity::Validity;
60
61#[derive(Debug, Clone)]
122pub enum Canonical {
123 Null(NullArray),
124 Bool(BoolArray),
125 Primitive(PrimitiveArray),
126 Decimal(DecimalArray),
127 VarBinView(VarBinViewArray),
128 List(ListViewArray),
129 FixedSizeList(FixedSizeListArray),
130 Struct(StructArray),
131 Extension(ExtensionArray),
132 Variant(VariantArray),
133}
134
135macro_rules! match_each_canonical {
137 ($self:expr, | $ident:ident | $eval:expr) => {{
138 match $self {
139 Canonical::Null($ident) => $eval,
140 Canonical::Bool($ident) => $eval,
141 Canonical::Primitive($ident) => $eval,
142 Canonical::Decimal($ident) => $eval,
143 Canonical::VarBinView($ident) => $eval,
144 Canonical::List($ident) => $eval,
145 Canonical::FixedSizeList($ident) => $eval,
146 Canonical::Struct($ident) => $eval,
147 Canonical::Variant($ident) => $eval,
148 Canonical::Extension($ident) => $eval,
149 }
150 }};
151}
152
153impl Canonical {
154 pub fn empty(dtype: &DType) -> Canonical {
156 match dtype {
157 DType::Null => Canonical::Null(NullArray::new(0)),
158 DType::Bool(n) => Canonical::Bool(unsafe {
159 BoolArray::new_unchecked(BitBuffer::empty(), Validity::from(n))
160 }),
161 DType::Primitive(ptype, n) => {
162 match_each_native_ptype!(ptype, |P| {
163 Canonical::Primitive(unsafe {
164 PrimitiveArray::new_unchecked(Buffer::<P>::empty(), Validity::from(n))
165 })
166 })
167 }
168 DType::Decimal(decimal_type, n) => {
169 match_each_decimal_value_type!(
170 DecimalType::smallest_decimal_value_type(decimal_type),
171 |D| {
172 Canonical::Decimal(unsafe {
173 DecimalArray::new_unchecked::<D>(
174 Buffer::empty(),
175 *decimal_type,
176 Validity::from(n),
177 )
178 })
179 }
180 )
181 }
182 DType::Utf8(n) => Canonical::VarBinView(unsafe {
183 VarBinViewArray::new_unchecked(
184 Buffer::empty(),
185 Arc::new([]),
186 dtype.clone(),
187 Validity::from(n),
188 )
189 }),
190 DType::Binary(n) => Canonical::VarBinView(unsafe {
191 VarBinViewArray::new_unchecked(
192 Buffer::empty(),
193 Arc::new([]),
194 dtype.clone(),
195 Validity::from(n),
196 )
197 }),
198 DType::List(dtype, n) => Canonical::List(unsafe {
199 ListViewArray::new_unchecked(
200 Canonical::empty(dtype).into_array(),
201 Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
202 .into_array(),
203 Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
204 .into_array(),
205 Validity::from(n),
206 )
207 .with_zero_copy_to_list(true)
209 }),
210 DType::FixedSizeList(elem_dtype, list_size, null) => Canonical::FixedSizeList(unsafe {
211 FixedSizeListArray::new_unchecked(
212 Canonical::empty(elem_dtype).into_array(),
213 *list_size,
214 Validity::from(null),
215 0,
216 )
217 }),
218 DType::Struct(struct_dtype, n) => Canonical::Struct(unsafe {
219 StructArray::new_unchecked(
220 struct_dtype
221 .fields()
222 .map(|f| Canonical::empty(&f).into_array())
223 .collect::<Arc<[_]>>(),
224 struct_dtype.clone(),
225 0,
226 Validity::from(n),
227 )
228 }),
229 DType::Union(..) => todo!("TODO(connor)[Union]: unimplemented"),
230 DType::Variant(_) => {
231 vortex_panic!(InvalidArgument: "Canonical empty is not supported for Variant")
232 }
233 DType::Extension(ext_dtype) => Canonical::Extension(ExtensionArray::new(
234 ext_dtype.clone(),
235 Canonical::empty(ext_dtype.storage_dtype()).into_array(),
236 )),
237 }
238 }
239
240 pub fn len(&self) -> usize {
241 match_each_canonical!(self, |arr| arr.len())
242 }
243
244 pub fn dtype(&self) -> &DType {
245 match_each_canonical!(self, |arr| arr.dtype())
246 }
247
248 pub fn is_empty(&self) -> bool {
249 match_each_canonical!(self, |arr| arr.is_empty())
250 }
251}
252
253impl Canonical {
254 pub fn compact(&self) -> VortexResult<Canonical> {
262 match self {
263 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
264 Canonical::List(array) => Ok(Canonical::List(
265 array.rebuild(ListViewRebuildMode::TrimElements)?,
266 )),
267 _ => Ok(self.clone()),
268 }
269 }
270}
271
272impl Canonical {
274 pub fn as_null(&self) -> &NullArray {
275 if let Canonical::Null(a) = self {
276 a
277 } else {
278 vortex_panic!("Cannot get NullArray from {:?}", &self)
279 }
280 }
281
282 pub fn into_null(self) -> NullArray {
283 if let Canonical::Null(a) = self {
284 a
285 } else {
286 vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
287 }
288 }
289
290 pub fn as_bool(&self) -> &BoolArray {
291 if let Canonical::Bool(a) = self {
292 a
293 } else {
294 vortex_panic!("Cannot get BoolArray from {:?}", &self)
295 }
296 }
297
298 pub fn into_bool(self) -> BoolArray {
299 if let Canonical::Bool(a) = self {
300 a
301 } else {
302 vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
303 }
304 }
305
306 pub fn as_primitive(&self) -> &PrimitiveArray {
307 if let Canonical::Primitive(a) = self {
308 a
309 } else {
310 vortex_panic!("Cannot get PrimitiveArray from {:?}", &self)
311 }
312 }
313
314 pub fn into_primitive(self) -> PrimitiveArray {
315 if let Canonical::Primitive(a) = self {
316 a
317 } else {
318 vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
319 }
320 }
321
322 pub fn as_decimal(&self) -> &DecimalArray {
323 if let Canonical::Decimal(a) = self {
324 a
325 } else {
326 vortex_panic!("Cannot get DecimalArray from {:?}", &self)
327 }
328 }
329
330 pub fn into_decimal(self) -> DecimalArray {
331 if let Canonical::Decimal(a) = self {
332 a
333 } else {
334 vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
335 }
336 }
337
338 pub fn as_varbinview(&self) -> &VarBinViewArray {
339 if let Canonical::VarBinView(a) = self {
340 a
341 } else {
342 vortex_panic!("Cannot get VarBinViewArray from {:?}", &self)
343 }
344 }
345
346 pub fn into_varbinview(self) -> VarBinViewArray {
347 if let Canonical::VarBinView(a) = self {
348 a
349 } else {
350 vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
351 }
352 }
353
354 pub fn as_listview(&self) -> &ListViewArray {
355 if let Canonical::List(a) = self {
356 a
357 } else {
358 vortex_panic!("Cannot get ListArray from {:?}", &self)
359 }
360 }
361
362 pub fn into_listview(self) -> ListViewArray {
363 if let Canonical::List(a) = self {
364 a
365 } else {
366 vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
367 }
368 }
369
370 pub fn as_fixed_size_list(&self) -> &FixedSizeListArray {
371 if let Canonical::FixedSizeList(a) = self {
372 a
373 } else {
374 vortex_panic!("Cannot get FixedSizeListArray from {:?}", &self)
375 }
376 }
377
378 pub fn into_fixed_size_list(self) -> FixedSizeListArray {
379 if let Canonical::FixedSizeList(a) = self {
380 a
381 } else {
382 vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
383 }
384 }
385
386 pub fn as_struct(&self) -> &StructArray {
387 if let Canonical::Struct(a) = self {
388 a
389 } else {
390 vortex_panic!("Cannot get StructArray from {:?}", &self)
391 }
392 }
393
394 pub fn into_struct(self) -> StructArray {
395 if let Canonical::Struct(a) = self {
396 a
397 } else {
398 vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
399 }
400 }
401
402 pub fn as_extension(&self) -> &ExtensionArray {
403 if let Canonical::Extension(a) = self {
404 a
405 } else {
406 vortex_panic!("Cannot get ExtensionArray from {:?}", &self)
407 }
408 }
409
410 pub fn into_extension(self) -> ExtensionArray {
411 if let Canonical::Extension(a) = self {
412 a
413 } else {
414 vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
415 }
416 }
417}
418
419impl IntoArray for Canonical {
420 fn into_array(self) -> ArrayRef {
421 match_each_canonical!(self, |arr| arr.into_array())
422 }
423}
424
425#[deprecated(note = "use `array.execute::<T>(ctx)` instead")]
431pub trait ToCanonical {
432 #[deprecated(note = "use `array.execute::<NullArray>(ctx)` instead")]
434 fn to_null(&self) -> NullArray;
435
436 #[deprecated(note = "use `array.execute::<BoolArray>(ctx)` instead")]
438 fn to_bool(&self) -> BoolArray;
439
440 #[deprecated(note = "use `array.execute::<PrimitiveArray>(ctx)` instead")]
443 fn to_primitive(&self) -> PrimitiveArray;
444
445 #[deprecated(note = "use `array.execute::<DecimalArray>(ctx)` instead")]
448 fn to_decimal(&self) -> DecimalArray;
449
450 #[deprecated(note = "use `array.execute::<StructArray>(ctx)` instead")]
452 fn to_struct(&self) -> StructArray;
453
454 #[deprecated(note = "use `array.execute::<ListViewArray>(ctx)` instead")]
456 fn to_listview(&self) -> ListViewArray;
457
458 #[deprecated(note = "use `array.execute::<FixedSizeListArray>(ctx)` instead")]
461 fn to_fixed_size_list(&self) -> FixedSizeListArray;
462
463 #[deprecated(note = "use `array.execute::<VarBinViewArray>(ctx)` instead")]
466 fn to_varbinview(&self) -> VarBinViewArray;
467
468 #[deprecated(note = "use `array.execute::<ExtensionArray>(ctx)` instead")]
471 fn to_extension(&self) -> ExtensionArray;
472}
473
474#[expect(deprecated)]
476impl ToCanonical for ArrayRef {
477 fn to_null(&self) -> NullArray {
478 #[expect(deprecated)]
479 let result = self.to_canonical().vortex_expect("to_canonical failed");
480 result.into_null()
481 }
482
483 fn to_bool(&self) -> BoolArray {
484 #[expect(deprecated)]
485 let result = self.to_canonical().vortex_expect("to_canonical failed");
486 result.into_bool()
487 }
488
489 fn to_primitive(&self) -> PrimitiveArray {
490 #[expect(deprecated)]
491 let result = self.to_canonical().vortex_expect("to_canonical failed");
492 result.into_primitive()
493 }
494
495 fn to_decimal(&self) -> DecimalArray {
496 #[expect(deprecated)]
497 let result = self.to_canonical().vortex_expect("to_canonical failed");
498 result.into_decimal()
499 }
500
501 fn to_struct(&self) -> StructArray {
502 #[expect(deprecated)]
503 let result = self.to_canonical().vortex_expect("to_canonical failed");
504 result.into_struct()
505 }
506
507 fn to_listview(&self) -> ListViewArray {
508 #[expect(deprecated)]
509 let result = self.to_canonical().vortex_expect("to_canonical failed");
510 result.into_listview()
511 }
512
513 fn to_fixed_size_list(&self) -> FixedSizeListArray {
514 #[expect(deprecated)]
515 let result = self.to_canonical().vortex_expect("to_canonical failed");
516 result.into_fixed_size_list()
517 }
518
519 fn to_varbinview(&self) -> VarBinViewArray {
520 #[expect(deprecated)]
521 let result = self.to_canonical().vortex_expect("to_canonical failed");
522 result.into_varbinview()
523 }
524
525 fn to_extension(&self) -> ExtensionArray {
526 #[expect(deprecated)]
527 let result = self.to_canonical().vortex_expect("to_canonical failed");
528 result.into_extension()
529 }
530}
531
532impl From<Canonical> for ArrayRef {
533 fn from(value: Canonical) -> Self {
534 match_each_canonical!(value, |arr| arr.into_array())
535 }
536}
537
538impl Executable for Canonical {
544 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
545 let result = array.execute_until::<AnyCanonical>(ctx)?;
546 Ok(result
547 .as_opt::<AnyCanonical>()
548 .map(Canonical::from)
549 .vortex_expect("execute_until::<AnyCanonical> must return a canonical array"))
550 }
551}
552
553pub struct CanonicalValidity(pub Canonical);
558
559impl Executable for CanonicalValidity {
560 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
561 match array.execute::<Canonical>(ctx)? {
562 n @ Canonical::Null(_) => Ok(CanonicalValidity(n)),
563 Canonical::Bool(b) => {
564 let validity = child_to_validity(b.slots()[0].as_ref(), b.dtype().nullability());
565 let len = b.len();
566 let BoolDataParts { bits, offset, len } = b.into_data().into_parts(len);
567 Ok(CanonicalValidity(Canonical::Bool(
568 BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?,
569 )))
570 }
571 Canonical::Primitive(p) => {
572 let PrimitiveDataParts {
573 ptype,
574 buffer,
575 validity,
576 } = p.into_data_parts();
577 Ok(CanonicalValidity(Canonical::Primitive(unsafe {
578 PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
579 })))
580 }
581 Canonical::Decimal(d) => {
582 let DecimalDataParts {
583 decimal_dtype,
584 values,
585 values_type,
586 validity,
587 } = d.into_data_parts();
588 Ok(CanonicalValidity(Canonical::Decimal(unsafe {
589 DecimalArray::new_unchecked_handle(
590 values,
591 values_type,
592 decimal_dtype,
593 validity.execute(ctx)?,
594 )
595 })))
596 }
597 Canonical::VarBinView(vbv) => {
598 let VarBinViewDataParts {
599 dtype,
600 buffers,
601 views,
602 validity,
603 } = vbv.into_data_parts();
604 Ok(CanonicalValidity(Canonical::VarBinView(unsafe {
605 VarBinViewArray::new_handle_unchecked(
606 views,
607 buffers,
608 dtype,
609 validity.execute(ctx)?,
610 )
611 })))
612 }
613 Canonical::List(l) => {
614 let zctl = l.is_zero_copy_to_list();
615 let ListViewDataParts {
616 elements,
617 offsets,
618 sizes,
619 validity,
620 ..
621 } = l.into_data_parts();
622 Ok(CanonicalValidity(Canonical::List(unsafe {
623 ListViewArray::new_unchecked(elements, offsets, sizes, validity.execute(ctx)?)
624 .with_zero_copy_to_list(zctl)
625 })))
626 }
627 Canonical::FixedSizeList(fsl) => {
628 let list_size = fsl.list_size();
629 let len = fsl.len();
630 let parts = fsl.into_data_parts();
631 let elements = parts.elements;
632 let validity = parts.validity;
633 Ok(CanonicalValidity(Canonical::FixedSizeList(
634 FixedSizeListArray::new(elements, list_size, validity.execute(ctx)?, len),
635 )))
636 }
637 Canonical::Struct(st) => {
638 let len = st.len();
639 let StructDataParts {
640 struct_fields,
641 fields,
642 validity,
643 } = st.into_data_parts();
644 Ok(CanonicalValidity(Canonical::Struct(unsafe {
645 StructArray::new_unchecked(fields, struct_fields, len, validity.execute(ctx)?)
646 })))
647 }
648 Canonical::Extension(ext) => Ok(CanonicalValidity(Canonical::Extension(
649 ExtensionArray::new(
650 ext.ext_dtype().clone(),
651 ext.storage_array()
652 .clone()
653 .execute::<CanonicalValidity>(ctx)?
654 .0
655 .into_array(),
656 ),
657 ))),
658 Canonical::Variant(variant) => {
659 let core_storage = recursively_canonicalize_slots(variant.core_storage(), ctx)?;
660 let shredded = variant
661 .shredded()
662 .map(|shredded| {
663 shredded
664 .clone()
665 .execute::<CanonicalValidity>(ctx)
666 .map(|canonical| canonical.0.into_array())
667 })
668 .transpose()?;
669 Ok(CanonicalValidity(Canonical::Variant(
670 VariantArray::try_new(core_storage, shredded)?,
671 )))
672 }
673 }
674 }
675}
676
677pub struct RecursiveCanonical(pub Canonical);
682
683fn recursively_canonicalize_slots(
686 array: &ArrayRef,
687 ctx: &mut ExecutionCtx,
688) -> VortexResult<ArrayRef> {
689 let slots = array
690 .slots()
691 .iter()
692 .map(|slot| {
693 slot.as_ref()
694 .map(|child| {
695 child
696 .clone()
697 .execute::<RecursiveCanonical>(ctx)
698 .map(|canonical| canonical.0.into_array())
699 })
700 .transpose()
701 })
702 .collect::<VortexResult<ArraySlots>>()?;
703 array.clone().with_slots(slots)
704}
705
706impl Executable for RecursiveCanonical {
707 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
708 match array.execute::<Canonical>(ctx)? {
709 n @ Canonical::Null(_) => Ok(RecursiveCanonical(n)),
710 Canonical::Bool(b) => {
711 let validity = child_to_validity(b.slots()[0].as_ref(), b.dtype().nullability());
712 let len = b.len();
713 let BoolDataParts { bits, offset, len } = b.into_data().into_parts(len);
714 Ok(RecursiveCanonical(Canonical::Bool(
715 BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?,
716 )))
717 }
718 Canonical::Primitive(p) => {
719 let PrimitiveDataParts {
720 ptype,
721 buffer,
722 validity,
723 } = p.into_data_parts();
724 Ok(RecursiveCanonical(Canonical::Primitive(unsafe {
725 PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
726 })))
727 }
728 Canonical::Decimal(d) => {
729 let DecimalDataParts {
730 decimal_dtype,
731 values,
732 values_type,
733 validity,
734 } = d.into_data_parts();
735 Ok(RecursiveCanonical(Canonical::Decimal(unsafe {
736 DecimalArray::new_unchecked_handle(
737 values,
738 values_type,
739 decimal_dtype,
740 validity.execute(ctx)?,
741 )
742 })))
743 }
744 Canonical::VarBinView(vbv) => {
745 let VarBinViewDataParts {
746 dtype,
747 buffers,
748 views,
749 validity,
750 } = vbv.into_data_parts();
751 Ok(RecursiveCanonical(Canonical::VarBinView(unsafe {
752 VarBinViewArray::new_handle_unchecked(
753 views,
754 buffers,
755 dtype,
756 validity.execute(ctx)?,
757 )
758 })))
759 }
760 Canonical::List(l) => {
761 let zctl = l.is_zero_copy_to_list();
762 let ListViewDataParts {
763 elements,
764 offsets,
765 sizes,
766 validity,
767 ..
768 } = l.into_data_parts();
769 Ok(RecursiveCanonical(Canonical::List(unsafe {
770 ListViewArray::new_unchecked(
771 elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
772 offsets.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
773 sizes.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
774 validity.execute(ctx)?,
775 )
776 .with_zero_copy_to_list(zctl)
777 })))
778 }
779 Canonical::FixedSizeList(fsl) => {
780 let list_size = fsl.list_size();
781 let len = fsl.len();
782 let parts = fsl.into_data_parts();
783 let elements = parts.elements;
784 let validity = parts.validity;
785 Ok(RecursiveCanonical(Canonical::FixedSizeList(
786 FixedSizeListArray::new(
787 elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
788 list_size,
789 validity.execute(ctx)?,
790 len,
791 ),
792 )))
793 }
794 Canonical::Struct(st) => {
795 let len = st.len();
796 let StructDataParts {
797 struct_fields,
798 fields,
799 validity,
800 } = st.into_data_parts();
801 let executed_fields = fields
802 .iter()
803 .map(|f| Ok(f.clone().execute::<RecursiveCanonical>(ctx)?.0.into_array()))
804 .collect::<VortexResult<Arc<[_]>>>()?;
805
806 Ok(RecursiveCanonical(Canonical::Struct(unsafe {
807 StructArray::new_unchecked(
808 executed_fields,
809 struct_fields,
810 len,
811 validity.execute(ctx)?,
812 )
813 })))
814 }
815 Canonical::Extension(ext) => Ok(RecursiveCanonical(Canonical::Extension(
816 ExtensionArray::new(
817 ext.ext_dtype().clone(),
818 ext.storage_array()
819 .clone()
820 .execute::<RecursiveCanonical>(ctx)?
821 .0
822 .into_array(),
823 ),
824 ))),
825 Canonical::Variant(variant) => {
826 let core_storage = recursively_canonicalize_slots(variant.core_storage(), ctx)?;
827 let shredded = variant
828 .shredded()
829 .map(|shredded| {
830 shredded
831 .clone()
832 .execute::<RecursiveCanonical>(ctx)
833 .map(|canonical| canonical.0.into_array())
834 })
835 .transpose()?;
836 Ok(RecursiveCanonical(Canonical::Variant(
837 VariantArray::try_new(core_storage, shredded)?,
838 )))
839 }
840 }
841 }
842}
843
844impl<T: NativePType> Executable for Buffer<T> {
850 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
851 let array = PrimitiveArray::execute(array, ctx)?;
852 vortex_ensure!(
853 matches!(
854 array.validity()?,
855 Validity::NonNullable | Validity::AllValid
856 ),
857 "Cannot execute to native buffer: array is not all-valid."
858 );
859 Ok(array.into_buffer())
860 }
861}
862
863impl Executable for PrimitiveArray {
867 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
868 match array.try_downcast::<Primitive>() {
869 Ok(primitive) => Ok(primitive),
870 Err(array) => Ok(Canonical::execute(array, ctx)?.into_primitive()),
871 }
872 }
873}
874
875impl Executable for BoolArray {
879 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
880 match array.try_downcast::<Bool>() {
881 Ok(bool_array) => Ok(bool_array),
882 Err(array) => Ok(Canonical::execute(array, ctx)?.into_bool()),
883 }
884 }
885}
886
887impl Executable for BitBuffer {
891 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
892 let bool = BoolArray::execute(array, ctx)?;
893 assert!(
894 !bool.dtype().is_nullable(),
895 "bit buffer execute only works with non-nullable bool arrays"
896 );
897 Ok(bool.into_bit_buffer())
898 }
899}
900
901impl Executable for NullArray {
905 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
906 match array.try_downcast::<Null>() {
907 Ok(null_array) => Ok(null_array),
908 Err(array) => Ok(Canonical::execute(array, ctx)?.into_null()),
909 }
910 }
911}
912
913impl Executable for VarBinViewArray {
917 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
918 match array.try_downcast::<VarBinView>() {
919 Ok(varbinview) => Ok(varbinview),
920 Err(array) => Ok(Canonical::execute(array, ctx)?.into_varbinview()),
921 }
922 }
923}
924
925impl Executable for ExtensionArray {
929 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
930 match array.try_downcast::<Extension>() {
931 Ok(ext_array) => Ok(ext_array),
932 Err(array) => Ok(Canonical::execute(array, ctx)?.into_extension()),
933 }
934 }
935}
936
937impl Executable for DecimalArray {
941 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
942 match array.try_downcast::<Decimal>() {
943 Ok(decimal) => Ok(decimal),
944 Err(array) => Ok(Canonical::execute(array, ctx)?.into_decimal()),
945 }
946 }
947}
948
949impl Executable for ListViewArray {
953 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
954 match array.try_downcast::<ListView>() {
955 Ok(list) => Ok(list),
956 Err(array) => Ok(Canonical::execute(array, ctx)?.into_listview()),
957 }
958 }
959}
960
961impl Executable for FixedSizeListArray {
965 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
966 match array.try_downcast::<FixedSizeList>() {
967 Ok(fsl) => Ok(fsl),
968 Err(array) => Ok(Canonical::execute(array, ctx)?.into_fixed_size_list()),
969 }
970 }
971}
972
973impl Executable for StructArray {
977 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
978 match array.try_downcast::<Struct>() {
979 Ok(struct_array) => Ok(struct_array),
980 Err(array) => Ok(Canonical::execute(array, ctx)?.into_struct()),
981 }
982 }
983}
984
985impl Executable for VariantArray {
989 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
990 match array.try_downcast::<Variant>() {
991 Ok(variant_array) => Ok(variant_array),
992 Err(array) => match Canonical::execute(array, ctx)? {
993 Canonical::Variant(variant_array) => Ok(variant_array),
994 canonical => vortex_panic!("Cannot unwrap VariantArray from {:?}", canonical),
995 },
996 }
997 }
998}
999
1000#[derive(Debug, Clone, Copy)]
1005pub enum CanonicalView<'a> {
1006 Null(ArrayView<'a, Null>),
1007 Bool(ArrayView<'a, Bool>),
1008 Primitive(ArrayView<'a, Primitive>),
1009 Decimal(ArrayView<'a, Decimal>),
1010 VarBinView(ArrayView<'a, VarBinView>),
1011 List(ArrayView<'a, ListView>),
1012 FixedSizeList(ArrayView<'a, FixedSizeList>),
1013 Struct(ArrayView<'a, Struct>),
1014 Extension(ArrayView<'a, Extension>),
1015 Variant(ArrayView<'a, Variant>),
1016}
1017
1018impl From<CanonicalView<'_>> for Canonical {
1019 fn from(value: CanonicalView<'_>) -> Self {
1020 match value {
1021 CanonicalView::Null(a) => Canonical::Null(a.into_owned()),
1022 CanonicalView::Bool(a) => Canonical::Bool(a.into_owned()),
1023 CanonicalView::Primitive(a) => Canonical::Primitive(a.into_owned()),
1024 CanonicalView::Decimal(a) => Canonical::Decimal(a.into_owned()),
1025 CanonicalView::VarBinView(a) => Canonical::VarBinView(a.into_owned()),
1026 CanonicalView::List(a) => Canonical::List(a.into_owned()),
1027 CanonicalView::FixedSizeList(a) => Canonical::FixedSizeList(a.into_owned()),
1028 CanonicalView::Struct(a) => Canonical::Struct(a.into_owned()),
1029 CanonicalView::Extension(a) => Canonical::Extension(a.into_owned()),
1030 CanonicalView::Variant(a) => Canonical::Variant(a.into_owned()),
1031 }
1032 }
1033}
1034
1035impl CanonicalView<'_> {
1036 pub fn to_array_ref(&self) -> ArrayRef {
1038 match self {
1039 CanonicalView::Null(a) => a.array().clone(),
1040 CanonicalView::Bool(a) => a.array().clone(),
1041 CanonicalView::Primitive(a) => a.array().clone(),
1042 CanonicalView::Decimal(a) => a.array().clone(),
1043 CanonicalView::VarBinView(a) => a.array().clone(),
1044 CanonicalView::List(a) => a.array().clone(),
1045 CanonicalView::FixedSizeList(a) => a.array().clone(),
1046 CanonicalView::Struct(a) => a.array().clone(),
1047 CanonicalView::Extension(a) => a.array().clone(),
1048 CanonicalView::Variant(a) => a.array().clone(),
1049 }
1050 }
1051}
1052
1053pub struct AnyCanonical;
1055impl Matcher for AnyCanonical {
1056 type Match<'a> = CanonicalView<'a>;
1057
1058 fn matches(array: &ArrayRef) -> bool {
1059 array.is::<Null>()
1060 || array.is::<Bool>()
1061 || array.is::<Primitive>()
1062 || array.is::<Decimal>()
1063 || array.is::<Struct>()
1064 || array.is::<ListView>()
1065 || array.is::<FixedSizeList>()
1066 || array.is::<VarBinView>()
1067 || array.is::<Variant>()
1068 || array.is::<Extension>()
1069 }
1070
1071 fn try_match(array: &ArrayRef) -> Option<Self::Match<'_>> {
1072 if let Some(a) = array.as_opt::<Null>() {
1073 Some(CanonicalView::Null(a))
1074 } else if let Some(a) = array.as_opt::<Bool>() {
1075 Some(CanonicalView::Bool(a))
1076 } else if let Some(a) = array.as_opt::<Primitive>() {
1077 Some(CanonicalView::Primitive(a))
1078 } else if let Some(a) = array.as_opt::<Decimal>() {
1079 Some(CanonicalView::Decimal(a))
1080 } else if let Some(a) = array.as_opt::<Struct>() {
1081 Some(CanonicalView::Struct(a))
1082 } else if let Some(a) = array.as_opt::<ListView>() {
1083 Some(CanonicalView::List(a))
1084 } else if let Some(a) = array.as_opt::<FixedSizeList>() {
1085 Some(CanonicalView::FixedSizeList(a))
1086 } else if let Some(a) = array.as_opt::<VarBinView>() {
1087 Some(CanonicalView::VarBinView(a))
1088 } else if let Some(a) = array.as_opt::<Variant>() {
1089 Some(CanonicalView::Variant(a))
1090 } else {
1091 array.as_opt::<Extension>().map(CanonicalView::Extension)
1092 }
1093 }
1094}
1095
1096#[cfg(test)]
1097mod test {
1098 use std::sync::Arc;
1099
1100 use arrow_array::Array as ArrowArray;
1101 use arrow_array::ArrayRef as ArrowArrayRef;
1102 use arrow_array::ListArray as ArrowListArray;
1103 use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
1104 use arrow_array::StringArray;
1105 use arrow_array::StringViewArray;
1106 use arrow_array::StructArray as ArrowStructArray;
1107 use arrow_array::cast::AsArray;
1108 use arrow_array::types::Int32Type;
1109 use arrow_array::types::Int64Type;
1110 use arrow_array::types::UInt64Type;
1111 use arrow_buffer::NullBufferBuilder;
1112 use arrow_buffer::OffsetBuffer;
1113 use arrow_schema::DataType;
1114 use arrow_schema::Field;
1115 use vortex_buffer::buffer;
1116
1117 use crate::ArrayRef;
1118 use crate::IntoArray;
1119 use crate::LEGACY_SESSION;
1120 use crate::VortexSessionExecute;
1121 use crate::arrays::ConstantArray;
1122 use crate::arrow::ArrowSessionExt;
1123 use crate::arrow::FromArrowArray;
1124 use crate::canonical::StructArray;
1125
1126 #[test]
1127 fn test_canonicalize_nested_struct() {
1128 let mut ctx = LEGACY_SESSION.create_execution_ctx();
1129 let nested_struct_array = StructArray::from_fields(&[
1131 ("a", buffer![1u64].into_array()),
1132 (
1133 "b",
1134 StructArray::from_fields(&[(
1135 "inner_a",
1136 ConstantArray::new(100i64, 1).into_array(),
1141 )])
1142 .unwrap()
1143 .into_array(),
1144 ),
1145 ])
1146 .unwrap();
1147
1148 let arrow_struct = LEGACY_SESSION
1149 .arrow()
1150 .execute_arrow(nested_struct_array.into_array(), None, &mut ctx)
1151 .unwrap()
1152 .as_any()
1153 .downcast_ref::<ArrowStructArray>()
1154 .cloned()
1155 .unwrap();
1156
1157 assert!(
1158 arrow_struct
1159 .column(0)
1160 .as_any()
1161 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
1162 .is_some()
1163 );
1164
1165 let inner_struct = Arc::clone(arrow_struct.column(1))
1166 .as_any()
1167 .downcast_ref::<ArrowStructArray>()
1168 .cloned()
1169 .unwrap();
1170
1171 let inner_a = inner_struct
1172 .column(0)
1173 .as_any()
1174 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
1175 assert!(inner_a.is_some());
1176
1177 assert_eq!(
1178 inner_a.cloned().unwrap(),
1179 ArrowPrimitiveArray::from_iter([100i64])
1180 );
1181 }
1182
1183 #[test]
1184 fn roundtrip_struct() {
1185 let mut ctx = LEGACY_SESSION.create_execution_ctx();
1186 let mut nulls = NullBufferBuilder::new(6);
1187 nulls.append_n_non_nulls(4);
1188 nulls.append_null();
1189 nulls.append_non_null();
1190 let names = Arc::new(StringViewArray::from_iter(vec![
1191 Some("Joseph"),
1192 None,
1193 Some("Angela"),
1194 Some("Mikhail"),
1195 None,
1196 None,
1197 ]));
1198 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
1199 Some(25),
1200 Some(31),
1201 None,
1202 Some(57),
1203 None,
1204 None,
1205 ]));
1206
1207 let arrow_struct = ArrowStructArray::new(
1208 vec![
1209 Arc::new(Field::new("name", DataType::Utf8View, true)),
1210 Arc::new(Field::new("age", DataType::Int32, true)),
1211 ]
1212 .into(),
1213 vec![names, ages],
1214 nulls.finish(),
1215 );
1216
1217 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true).unwrap();
1218 let vortex_struct = LEGACY_SESSION
1219 .arrow()
1220 .execute_arrow(vortex_struct, None, &mut ctx)
1221 .unwrap();
1222 assert_eq!(&arrow_struct, vortex_struct.as_struct());
1223 }
1224
1225 #[test]
1226 fn roundtrip_list() {
1227 let mut ctx = LEGACY_SESSION.create_execution_ctx();
1228 let names = Arc::new(StringArray::from_iter(vec![
1229 Some("Joseph"),
1230 Some("Angela"),
1231 Some("Mikhail"),
1232 ]));
1233
1234 let arrow_list = ArrowListArray::new(
1235 Arc::new(Field::new_list_field(DataType::Utf8, true)),
1236 OffsetBuffer::from_lengths(vec![0, 2, 1]),
1237 names,
1238 None,
1239 );
1240 let list_data_type = arrow_list.data_type();
1241 let list_field = Field::new(String::new(), list_data_type.clone(), true);
1242
1243 let vortex_list = ArrayRef::from_arrow(&arrow_list, true).unwrap();
1244
1245 let rt_arrow_list = LEGACY_SESSION
1246 .arrow()
1247 .execute_arrow(vortex_list, Some(&list_field), &mut ctx)
1248 .unwrap();
1249
1250 assert_eq!(
1251 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
1252 rt_arrow_list.as_ref()
1253 );
1254 }
1255}