1use std::sync::Arc;
7
8use vortex_buffer::BitBuffer;
9use vortex_buffer::Buffer;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_panic;
14
15use crate::ArrayRef;
16use crate::ArraySlots;
17use crate::Executable;
18use crate::ExecutionCtx;
19use crate::IntoArray;
20use crate::array::ArrayView;
21use crate::array::child_to_validity;
22use crate::arrays::Bool;
23use crate::arrays::BoolArray;
24use crate::arrays::Decimal;
25use crate::arrays::DecimalArray;
26use crate::arrays::Extension;
27use crate::arrays::ExtensionArray;
28use crate::arrays::FixedSizeList;
29use crate::arrays::FixedSizeListArray;
30use crate::arrays::ListView;
31use crate::arrays::ListViewArray;
32use crate::arrays::Null;
33use crate::arrays::NullArray;
34use crate::arrays::Primitive;
35use crate::arrays::PrimitiveArray;
36use crate::arrays::Struct;
37use crate::arrays::StructArray;
38use crate::arrays::VarBinView;
39use crate::arrays::VarBinViewArray;
40use crate::arrays::Variant;
41use crate::arrays::VariantArray;
42use crate::arrays::bool::BoolDataParts;
43use crate::arrays::decimal::DecimalDataParts;
44use crate::arrays::extension::ExtensionArrayExt;
45use crate::arrays::fixed_size_list::FixedSizeListArrayExt;
46use crate::arrays::listview::ListViewDataParts;
47use crate::arrays::listview::ListViewRebuildMode;
48use crate::arrays::primitive::PrimitiveDataParts;
49use crate::arrays::struct_::StructDataParts;
50use crate::arrays::varbinview::VarBinViewDataParts;
51use crate::arrays::variant::VariantArrayExt;
52use crate::dtype::DType;
53use crate::dtype::NativePType;
54use crate::dtype::Nullability;
55use crate::dtype::PType;
56use crate::match_each_decimal_value_type;
57use crate::match_each_native_ptype;
58use crate::matcher::Matcher;
59use crate::validity::Validity;
60
61#[derive(Debug, Clone)]
122pub enum Canonical {
123 Null(NullArray),
124 Bool(BoolArray),
125 Primitive(PrimitiveArray),
126 Decimal(DecimalArray),
127 VarBinView(VarBinViewArray),
128 List(ListViewArray),
129 FixedSizeList(FixedSizeListArray),
130 Struct(StructArray),
131 Extension(ExtensionArray),
132 Variant(VariantArray),
133}
134
135macro_rules! match_each_canonical {
137 ($self:expr, | $ident:ident | $eval:expr) => {{
138 match $self {
139 Canonical::Null($ident) => $eval,
140 Canonical::Bool($ident) => $eval,
141 Canonical::Primitive($ident) => $eval,
142 Canonical::Decimal($ident) => $eval,
143 Canonical::VarBinView($ident) => $eval,
144 Canonical::List($ident) => $eval,
145 Canonical::FixedSizeList($ident) => $eval,
146 Canonical::Struct($ident) => $eval,
147 Canonical::Variant($ident) => $eval,
148 Canonical::Extension($ident) => $eval,
149 }
150 }};
151}
152
153impl Canonical {
154 pub fn empty(dtype: &DType) -> Canonical {
156 match dtype {
157 DType::Null => Canonical::Null(NullArray::new(0)),
158 DType::Bool(n) => Canonical::Bool(unsafe {
159 BoolArray::new_unchecked(BitBuffer::empty(), Validity::from(n))
160 }),
161 DType::Primitive(ptype, n) => {
162 match_each_native_ptype!(ptype, |P| {
163 Canonical::Primitive(unsafe {
164 PrimitiveArray::new_unchecked(Buffer::<P>::empty(), Validity::from(n))
165 })
166 })
167 }
168 DType::Decimal(decimal_type, n) => {
169 match_each_decimal_value_type!(
170 DecimalType::smallest_decimal_value_type(decimal_type),
171 |D| {
172 Canonical::Decimal(unsafe {
173 DecimalArray::new_unchecked::<D>(
174 Buffer::empty(),
175 *decimal_type,
176 Validity::from(n),
177 )
178 })
179 }
180 )
181 }
182 DType::Utf8(n) => Canonical::VarBinView(unsafe {
183 VarBinViewArray::new_unchecked(
184 Buffer::empty(),
185 Arc::new([]),
186 dtype.clone(),
187 Validity::from(n),
188 )
189 }),
190 DType::Binary(n) => Canonical::VarBinView(unsafe {
191 VarBinViewArray::new_unchecked(
192 Buffer::empty(),
193 Arc::new([]),
194 dtype.clone(),
195 Validity::from(n),
196 )
197 }),
198 DType::List(dtype, n) => Canonical::List(unsafe {
199 ListViewArray::new_unchecked(
200 Canonical::empty(dtype).into_array(),
201 Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
202 .into_array(),
203 Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
204 .into_array(),
205 Validity::from(n),
206 )
207 .with_zero_copy_to_list(true)
209 }),
210 DType::FixedSizeList(elem_dtype, list_size, null) => Canonical::FixedSizeList(unsafe {
211 FixedSizeListArray::new_unchecked(
212 Canonical::empty(elem_dtype).into_array(),
213 *list_size,
214 Validity::from(null),
215 0,
216 )
217 }),
218 DType::Struct(struct_dtype, n) => Canonical::Struct(unsafe {
219 StructArray::new_unchecked(
220 struct_dtype
221 .fields()
222 .map(|f| Canonical::empty(&f).into_array())
223 .collect::<Arc<[_]>>(),
224 struct_dtype.clone(),
225 0,
226 Validity::from(n),
227 )
228 }),
229 DType::Union(..) => todo!("TODO(connor)[Union]: unimplemented"),
230 DType::Variant(_) => {
231 vortex_panic!(InvalidArgument: "Canonical empty is not supported for Variant")
232 }
233 DType::Extension(ext_dtype) => Canonical::Extension(ExtensionArray::new(
234 ext_dtype.clone(),
235 Canonical::empty(ext_dtype.storage_dtype()).into_array(),
236 )),
237 }
238 }
239
240 pub fn len(&self) -> usize {
241 match_each_canonical!(self, |arr| arr.len())
242 }
243
244 pub fn dtype(&self) -> &DType {
245 match_each_canonical!(self, |arr| arr.dtype())
246 }
247
248 pub fn is_empty(&self) -> bool {
249 match_each_canonical!(self, |arr| arr.is_empty())
250 }
251}
252
253impl Canonical {
254 pub fn compact(&self) -> VortexResult<Canonical> {
262 match self {
263 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
264 Canonical::List(array) => Ok(Canonical::List(
265 array.rebuild(ListViewRebuildMode::TrimElements)?,
266 )),
267 _ => Ok(self.clone()),
268 }
269 }
270}
271
272impl Canonical {
274 pub fn as_null(&self) -> &NullArray {
275 if let Canonical::Null(a) = self {
276 a
277 } else {
278 vortex_panic!("Cannot get NullArray from {:?}", &self)
279 }
280 }
281
282 pub fn into_null(self) -> NullArray {
283 if let Canonical::Null(a) = self {
284 a
285 } else {
286 vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
287 }
288 }
289
290 pub fn as_bool(&self) -> &BoolArray {
291 if let Canonical::Bool(a) = self {
292 a
293 } else {
294 vortex_panic!("Cannot get BoolArray from {:?}", &self)
295 }
296 }
297
298 pub fn into_bool(self) -> BoolArray {
299 if let Canonical::Bool(a) = self {
300 a
301 } else {
302 vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
303 }
304 }
305
306 pub fn as_primitive(&self) -> &PrimitiveArray {
307 if let Canonical::Primitive(a) = self {
308 a
309 } else {
310 vortex_panic!("Cannot get PrimitiveArray from {:?}", &self)
311 }
312 }
313
314 pub fn into_primitive(self) -> PrimitiveArray {
315 if let Canonical::Primitive(a) = self {
316 a
317 } else {
318 vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
319 }
320 }
321
322 pub fn as_decimal(&self) -> &DecimalArray {
323 if let Canonical::Decimal(a) = self {
324 a
325 } else {
326 vortex_panic!("Cannot get DecimalArray from {:?}", &self)
327 }
328 }
329
330 pub fn into_decimal(self) -> DecimalArray {
331 if let Canonical::Decimal(a) = self {
332 a
333 } else {
334 vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
335 }
336 }
337
338 pub fn as_varbinview(&self) -> &VarBinViewArray {
339 if let Canonical::VarBinView(a) = self {
340 a
341 } else {
342 vortex_panic!("Cannot get VarBinViewArray from {:?}", &self)
343 }
344 }
345
346 pub fn into_varbinview(self) -> VarBinViewArray {
347 if let Canonical::VarBinView(a) = self {
348 a
349 } else {
350 vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
351 }
352 }
353
354 pub fn as_listview(&self) -> &ListViewArray {
355 if let Canonical::List(a) = self {
356 a
357 } else {
358 vortex_panic!("Cannot get ListArray from {:?}", &self)
359 }
360 }
361
362 pub fn into_listview(self) -> ListViewArray {
363 if let Canonical::List(a) = self {
364 a
365 } else {
366 vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
367 }
368 }
369
370 pub fn as_fixed_size_list(&self) -> &FixedSizeListArray {
371 if let Canonical::FixedSizeList(a) = self {
372 a
373 } else {
374 vortex_panic!("Cannot get FixedSizeListArray from {:?}", &self)
375 }
376 }
377
378 pub fn into_fixed_size_list(self) -> FixedSizeListArray {
379 if let Canonical::FixedSizeList(a) = self {
380 a
381 } else {
382 vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
383 }
384 }
385
386 pub fn as_struct(&self) -> &StructArray {
387 if let Canonical::Struct(a) = self {
388 a
389 } else {
390 vortex_panic!("Cannot get StructArray from {:?}", &self)
391 }
392 }
393
394 pub fn into_struct(self) -> StructArray {
395 if let Canonical::Struct(a) = self {
396 a
397 } else {
398 vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
399 }
400 }
401
402 pub fn as_extension(&self) -> &ExtensionArray {
403 if let Canonical::Extension(a) = self {
404 a
405 } else {
406 vortex_panic!("Cannot get ExtensionArray from {:?}", &self)
407 }
408 }
409
410 pub fn into_extension(self) -> ExtensionArray {
411 if let Canonical::Extension(a) = self {
412 a
413 } else {
414 vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
415 }
416 }
417}
418
419impl IntoArray for Canonical {
420 fn into_array(self) -> ArrayRef {
421 match_each_canonical!(self, |arr| arr.into_array())
422 }
423}
424
425#[deprecated(note = "use `array.execute::<T>(ctx)` instead")]
431pub trait ToCanonical {
432 #[deprecated(note = "use `array.execute::<NullArray>(ctx)` instead")]
434 fn to_null(&self) -> NullArray;
435
436 #[deprecated(note = "use `array.execute::<BoolArray>(ctx)` instead")]
438 fn to_bool(&self) -> BoolArray;
439
440 #[deprecated(note = "use `array.execute::<PrimitiveArray>(ctx)` instead")]
443 fn to_primitive(&self) -> PrimitiveArray;
444
445 #[deprecated(note = "use `array.execute::<DecimalArray>(ctx)` instead")]
448 fn to_decimal(&self) -> DecimalArray;
449
450 #[deprecated(note = "use `array.execute::<StructArray>(ctx)` instead")]
452 fn to_struct(&self) -> StructArray;
453
454 #[deprecated(note = "use `array.execute::<ListViewArray>(ctx)` instead")]
456 fn to_listview(&self) -> ListViewArray;
457
458 #[deprecated(note = "use `array.execute::<FixedSizeListArray>(ctx)` instead")]
461 fn to_fixed_size_list(&self) -> FixedSizeListArray;
462
463 #[deprecated(note = "use `array.execute::<VarBinViewArray>(ctx)` instead")]
466 fn to_varbinview(&self) -> VarBinViewArray;
467
468 #[deprecated(note = "use `array.execute::<ExtensionArray>(ctx)` instead")]
471 fn to_extension(&self) -> ExtensionArray;
472}
473
474#[expect(deprecated)]
476impl ToCanonical for ArrayRef {
477 fn to_null(&self) -> NullArray {
478 #[expect(deprecated)]
479 let result = self.to_canonical().vortex_expect("to_canonical failed");
480 result.into_null()
481 }
482
483 fn to_bool(&self) -> BoolArray {
484 #[expect(deprecated)]
485 let result = self.to_canonical().vortex_expect("to_canonical failed");
486 result.into_bool()
487 }
488
489 fn to_primitive(&self) -> PrimitiveArray {
490 #[expect(deprecated)]
491 let result = self.to_canonical().vortex_expect("to_canonical failed");
492 result.into_primitive()
493 }
494
495 fn to_decimal(&self) -> DecimalArray {
496 #[expect(deprecated)]
497 let result = self.to_canonical().vortex_expect("to_canonical failed");
498 result.into_decimal()
499 }
500
501 fn to_struct(&self) -> StructArray {
502 #[expect(deprecated)]
503 let result = self.to_canonical().vortex_expect("to_canonical failed");
504 result.into_struct()
505 }
506
507 fn to_listview(&self) -> ListViewArray {
508 #[expect(deprecated)]
509 let result = self.to_canonical().vortex_expect("to_canonical failed");
510 result.into_listview()
511 }
512
513 fn to_fixed_size_list(&self) -> FixedSizeListArray {
514 #[expect(deprecated)]
515 let result = self.to_canonical().vortex_expect("to_canonical failed");
516 result.into_fixed_size_list()
517 }
518
519 fn to_varbinview(&self) -> VarBinViewArray {
520 #[expect(deprecated)]
521 let result = self.to_canonical().vortex_expect("to_canonical failed");
522 result.into_varbinview()
523 }
524
525 fn to_extension(&self) -> ExtensionArray {
526 #[expect(deprecated)]
527 let result = self.to_canonical().vortex_expect("to_canonical failed");
528 result.into_extension()
529 }
530}
531
532impl From<Canonical> for ArrayRef {
533 fn from(value: Canonical) -> Self {
534 match_each_canonical!(value, |arr| arr.into_array())
535 }
536}
537
538impl Executable for Canonical {
544 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
545 let result = array.execute_until::<AnyCanonical>(ctx)?;
546 Ok(result
547 .as_opt::<AnyCanonical>()
548 .map(Canonical::from)
549 .vortex_expect("execute_until::<AnyCanonical> must return a canonical array"))
550 }
551}
552
553pub struct CanonicalValidity(pub Canonical);
558
559impl Executable for CanonicalValidity {
560 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
561 match array.execute::<Canonical>(ctx)? {
562 n @ Canonical::Null(_) => Ok(CanonicalValidity(n)),
563 Canonical::Bool(b) => {
564 let validity = child_to_validity(b.slots()[0].as_ref(), b.dtype().nullability());
565 let len = b.len();
566 let BoolDataParts { bits, offset, len } = b.into_data().into_parts(len);
567 Ok(CanonicalValidity(Canonical::Bool(
568 BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?,
569 )))
570 }
571 Canonical::Primitive(p) => {
572 let PrimitiveDataParts {
573 ptype,
574 buffer,
575 validity,
576 } = p.into_data_parts();
577 Ok(CanonicalValidity(Canonical::Primitive(unsafe {
578 PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
579 })))
580 }
581 Canonical::Decimal(d) => {
582 let DecimalDataParts {
583 decimal_dtype,
584 values,
585 values_type,
586 validity,
587 } = d.into_data_parts();
588 Ok(CanonicalValidity(Canonical::Decimal(unsafe {
589 DecimalArray::new_unchecked_handle(
590 values,
591 values_type,
592 decimal_dtype,
593 validity.execute(ctx)?,
594 )
595 })))
596 }
597 Canonical::VarBinView(vbv) => {
598 let VarBinViewDataParts {
599 dtype,
600 buffers,
601 views,
602 validity,
603 } = vbv.into_data_parts();
604 Ok(CanonicalValidity(Canonical::VarBinView(unsafe {
605 VarBinViewArray::new_handle_unchecked(
606 views,
607 buffers,
608 dtype,
609 validity.execute(ctx)?,
610 )
611 })))
612 }
613 Canonical::List(l) => {
614 let zctl = l.is_zero_copy_to_list();
615 let ListViewDataParts {
616 elements,
617 offsets,
618 sizes,
619 validity,
620 ..
621 } = l.into_data_parts();
622 Ok(CanonicalValidity(Canonical::List(unsafe {
623 ListViewArray::new_unchecked(elements, offsets, sizes, validity.execute(ctx)?)
624 .with_zero_copy_to_list(zctl)
625 })))
626 }
627 Canonical::FixedSizeList(fsl) => {
628 let list_size = fsl.list_size();
629 let len = fsl.len();
630 let parts = fsl.into_data_parts();
631 let elements = parts.elements;
632 let validity = parts.validity;
633 Ok(CanonicalValidity(Canonical::FixedSizeList(
634 FixedSizeListArray::new(elements, list_size, validity.execute(ctx)?, len),
635 )))
636 }
637 Canonical::Struct(st) => {
638 let len = st.len();
639 let StructDataParts {
640 struct_fields,
641 fields,
642 validity,
643 } = st.into_data_parts();
644 Ok(CanonicalValidity(Canonical::Struct(unsafe {
645 StructArray::new_unchecked(fields, struct_fields, len, validity.execute(ctx)?)
646 })))
647 }
648 Canonical::Extension(ext) => Ok(CanonicalValidity(Canonical::Extension(
649 ExtensionArray::new(
650 ext.ext_dtype().clone(),
651 ext.storage_array()
652 .clone()
653 .execute::<CanonicalValidity>(ctx)?
654 .0
655 .into_array(),
656 ),
657 ))),
658 Canonical::Variant(variant) => {
659 let core_storage = recursively_canonicalize_slots(variant.core_storage(), ctx)?;
660 let shredded = variant
661 .shredded()
662 .map(|shredded| {
663 if shredded.is::<Variant>() {
664 recursively_canonicalize_slots(shredded, ctx)
665 } else {
666 shredded
667 .clone()
668 .execute::<CanonicalValidity>(ctx)
669 .map(|canonical| canonical.0.into_array())
670 }
671 })
672 .transpose()?;
673 Ok(CanonicalValidity(Canonical::Variant(
674 VariantArray::try_new(core_storage, shredded)?,
675 )))
676 }
677 }
678 }
679}
680
681pub struct RecursiveCanonical(pub Canonical);
686
687fn recursively_canonicalize_slots(
690 array: &ArrayRef,
691 ctx: &mut ExecutionCtx,
692) -> VortexResult<ArrayRef> {
693 let slots = array
694 .slots()
695 .iter()
696 .map(|slot| {
697 slot.as_ref()
698 .map(|child| {
699 child
700 .clone()
701 .execute::<RecursiveCanonical>(ctx)
702 .map(|canonical| canonical.0.into_array())
703 })
704 .transpose()
705 })
706 .collect::<VortexResult<ArraySlots>>()?;
707 array.clone().with_slots(slots)
708}
709impl Executable for RecursiveCanonical {
710 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
711 match array.execute::<Canonical>(ctx)? {
712 n @ Canonical::Null(_) => Ok(RecursiveCanonical(n)),
713 Canonical::Bool(b) => {
714 let validity = child_to_validity(b.slots()[0].as_ref(), b.dtype().nullability());
715 let len = b.len();
716 let BoolDataParts { bits, offset, len } = b.into_data().into_parts(len);
717 Ok(RecursiveCanonical(Canonical::Bool(
718 BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?,
719 )))
720 }
721 Canonical::Primitive(p) => {
722 let PrimitiveDataParts {
723 ptype,
724 buffer,
725 validity,
726 } = p.into_data_parts();
727 Ok(RecursiveCanonical(Canonical::Primitive(unsafe {
728 PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
729 })))
730 }
731 Canonical::Decimal(d) => {
732 let DecimalDataParts {
733 decimal_dtype,
734 values,
735 values_type,
736 validity,
737 } = d.into_data_parts();
738 Ok(RecursiveCanonical(Canonical::Decimal(unsafe {
739 DecimalArray::new_unchecked_handle(
740 values,
741 values_type,
742 decimal_dtype,
743 validity.execute(ctx)?,
744 )
745 })))
746 }
747 Canonical::VarBinView(vbv) => {
748 let VarBinViewDataParts {
749 dtype,
750 buffers,
751 views,
752 validity,
753 } = vbv.into_data_parts();
754 Ok(RecursiveCanonical(Canonical::VarBinView(unsafe {
755 VarBinViewArray::new_handle_unchecked(
756 views,
757 buffers,
758 dtype,
759 validity.execute(ctx)?,
760 )
761 })))
762 }
763 Canonical::List(l) => {
764 let zctl = l.is_zero_copy_to_list();
765 let ListViewDataParts {
766 elements,
767 offsets,
768 sizes,
769 validity,
770 ..
771 } = l.into_data_parts();
772 Ok(RecursiveCanonical(Canonical::List(unsafe {
773 ListViewArray::new_unchecked(
774 elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
775 offsets.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
776 sizes.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
777 validity.execute(ctx)?,
778 )
779 .with_zero_copy_to_list(zctl)
780 })))
781 }
782 Canonical::FixedSizeList(fsl) => {
783 let list_size = fsl.list_size();
784 let len = fsl.len();
785 let parts = fsl.into_data_parts();
786 let elements = parts.elements;
787 let validity = parts.validity;
788 Ok(RecursiveCanonical(Canonical::FixedSizeList(
789 FixedSizeListArray::new(
790 elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
791 list_size,
792 validity.execute(ctx)?,
793 len,
794 ),
795 )))
796 }
797 Canonical::Struct(st) => {
798 let len = st.len();
799 let StructDataParts {
800 struct_fields,
801 fields,
802 validity,
803 } = st.into_data_parts();
804 let executed_fields = fields
805 .iter()
806 .map(|f| Ok(f.clone().execute::<RecursiveCanonical>(ctx)?.0.into_array()))
807 .collect::<VortexResult<Arc<[_]>>>()?;
808
809 Ok(RecursiveCanonical(Canonical::Struct(unsafe {
810 StructArray::new_unchecked(
811 executed_fields,
812 struct_fields,
813 len,
814 validity.execute(ctx)?,
815 )
816 })))
817 }
818 Canonical::Extension(ext) => Ok(RecursiveCanonical(Canonical::Extension(
819 ExtensionArray::new(
820 ext.ext_dtype().clone(),
821 ext.storage_array()
822 .clone()
823 .execute::<RecursiveCanonical>(ctx)?
824 .0
825 .into_array(),
826 ),
827 ))),
828 Canonical::Variant(variant) => {
829 let core_storage = recursively_canonicalize_slots(variant.core_storage(), ctx)?;
830 let shredded = variant
831 .shredded()
832 .map(|shredded| {
833 if shredded.is::<Variant>() {
834 recursively_canonicalize_slots(shredded, ctx)
835 } else {
836 shredded
837 .clone()
838 .execute::<RecursiveCanonical>(ctx)
839 .map(|canonical| canonical.0.into_array())
840 }
841 })
842 .transpose()?;
843 Ok(RecursiveCanonical(Canonical::Variant(
844 VariantArray::try_new(core_storage, shredded)?,
845 )))
846 }
847 }
848 }
849}
850
851impl<T: NativePType> Executable for Buffer<T> {
857 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
858 let array = PrimitiveArray::execute(array, ctx)?;
859 vortex_ensure!(
860 matches!(
861 array.validity()?,
862 Validity::NonNullable | Validity::AllValid
863 ),
864 "Cannot execute to native buffer: array is not all-valid."
865 );
866 Ok(array.into_buffer())
867 }
868}
869
870impl Executable for PrimitiveArray {
874 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
875 match array.try_downcast::<Primitive>() {
876 Ok(primitive) => Ok(primitive),
877 Err(array) => Ok(Canonical::execute(array, ctx)?.into_primitive()),
878 }
879 }
880}
881
882impl Executable for BoolArray {
886 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
887 match array.try_downcast::<Bool>() {
888 Ok(bool_array) => Ok(bool_array),
889 Err(array) => Ok(Canonical::execute(array, ctx)?.into_bool()),
890 }
891 }
892}
893
894impl Executable for BitBuffer {
898 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
899 let bool = BoolArray::execute(array, ctx)?;
900 assert!(
901 !bool.dtype().is_nullable(),
902 "bit buffer execute only works with non-nullable bool arrays"
903 );
904 Ok(bool.into_bit_buffer())
905 }
906}
907
908impl Executable for NullArray {
912 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
913 match array.try_downcast::<Null>() {
914 Ok(null_array) => Ok(null_array),
915 Err(array) => Ok(Canonical::execute(array, ctx)?.into_null()),
916 }
917 }
918}
919
920impl Executable for VarBinViewArray {
924 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
925 match array.try_downcast::<VarBinView>() {
926 Ok(varbinview) => Ok(varbinview),
927 Err(array) => Ok(Canonical::execute(array, ctx)?.into_varbinview()),
928 }
929 }
930}
931
932impl Executable for ExtensionArray {
936 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
937 match array.try_downcast::<Extension>() {
938 Ok(ext_array) => Ok(ext_array),
939 Err(array) => Ok(Canonical::execute(array, ctx)?.into_extension()),
940 }
941 }
942}
943
944impl Executable for DecimalArray {
948 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
949 match array.try_downcast::<Decimal>() {
950 Ok(decimal) => Ok(decimal),
951 Err(array) => Ok(Canonical::execute(array, ctx)?.into_decimal()),
952 }
953 }
954}
955
956impl Executable for ListViewArray {
960 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
961 match array.try_downcast::<ListView>() {
962 Ok(list) => Ok(list),
963 Err(array) => Ok(Canonical::execute(array, ctx)?.into_listview()),
964 }
965 }
966}
967
968impl Executable for FixedSizeListArray {
972 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
973 match array.try_downcast::<FixedSizeList>() {
974 Ok(fsl) => Ok(fsl),
975 Err(array) => Ok(Canonical::execute(array, ctx)?.into_fixed_size_list()),
976 }
977 }
978}
979
980impl Executable for StructArray {
984 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
985 match array.try_downcast::<Struct>() {
986 Ok(struct_array) => Ok(struct_array),
987 Err(array) => Ok(Canonical::execute(array, ctx)?.into_struct()),
988 }
989 }
990}
991
992impl Executable for VariantArray {
996 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
997 match array.try_downcast::<Variant>() {
998 Ok(variant_array) => Ok(variant_array),
999 Err(array) => match Canonical::execute(array, ctx)? {
1000 Canonical::Variant(variant_array) => Ok(variant_array),
1001 canonical => vortex_panic!("Cannot unwrap VariantArray from {:?}", canonical),
1002 },
1003 }
1004 }
1005}
1006
1007#[derive(Debug, Clone, Copy)]
1012pub enum CanonicalView<'a> {
1013 Null(ArrayView<'a, Null>),
1014 Bool(ArrayView<'a, Bool>),
1015 Primitive(ArrayView<'a, Primitive>),
1016 Decimal(ArrayView<'a, Decimal>),
1017 VarBinView(ArrayView<'a, VarBinView>),
1018 List(ArrayView<'a, ListView>),
1019 FixedSizeList(ArrayView<'a, FixedSizeList>),
1020 Struct(ArrayView<'a, Struct>),
1021 Extension(ArrayView<'a, Extension>),
1022 Variant(ArrayView<'a, Variant>),
1023}
1024
1025impl From<CanonicalView<'_>> for Canonical {
1026 fn from(value: CanonicalView<'_>) -> Self {
1027 match value {
1028 CanonicalView::Null(a) => Canonical::Null(a.into_owned()),
1029 CanonicalView::Bool(a) => Canonical::Bool(a.into_owned()),
1030 CanonicalView::Primitive(a) => Canonical::Primitive(a.into_owned()),
1031 CanonicalView::Decimal(a) => Canonical::Decimal(a.into_owned()),
1032 CanonicalView::VarBinView(a) => Canonical::VarBinView(a.into_owned()),
1033 CanonicalView::List(a) => Canonical::List(a.into_owned()),
1034 CanonicalView::FixedSizeList(a) => Canonical::FixedSizeList(a.into_owned()),
1035 CanonicalView::Struct(a) => Canonical::Struct(a.into_owned()),
1036 CanonicalView::Extension(a) => Canonical::Extension(a.into_owned()),
1037 CanonicalView::Variant(a) => Canonical::Variant(a.into_owned()),
1038 }
1039 }
1040}
1041
1042impl CanonicalView<'_> {
1043 pub fn to_array_ref(&self) -> ArrayRef {
1045 match self {
1046 CanonicalView::Null(a) => a.array().clone(),
1047 CanonicalView::Bool(a) => a.array().clone(),
1048 CanonicalView::Primitive(a) => a.array().clone(),
1049 CanonicalView::Decimal(a) => a.array().clone(),
1050 CanonicalView::VarBinView(a) => a.array().clone(),
1051 CanonicalView::List(a) => a.array().clone(),
1052 CanonicalView::FixedSizeList(a) => a.array().clone(),
1053 CanonicalView::Struct(a) => a.array().clone(),
1054 CanonicalView::Extension(a) => a.array().clone(),
1055 CanonicalView::Variant(a) => a.array().clone(),
1056 }
1057 }
1058}
1059
1060pub struct AnyCanonical;
1062impl Matcher for AnyCanonical {
1063 type Match<'a> = CanonicalView<'a>;
1064
1065 fn matches(array: &ArrayRef) -> bool {
1066 array.is::<Null>()
1067 || array.is::<Bool>()
1068 || array.is::<Primitive>()
1069 || array.is::<Decimal>()
1070 || array.is::<Struct>()
1071 || array.is::<ListView>()
1072 || array.is::<FixedSizeList>()
1073 || array.is::<VarBinView>()
1074 || array.is::<Variant>()
1075 || array.is::<Extension>()
1076 }
1077
1078 fn try_match(array: &ArrayRef) -> Option<Self::Match<'_>> {
1079 if let Some(a) = array.as_opt::<Null>() {
1080 Some(CanonicalView::Null(a))
1081 } else if let Some(a) = array.as_opt::<Bool>() {
1082 Some(CanonicalView::Bool(a))
1083 } else if let Some(a) = array.as_opt::<Primitive>() {
1084 Some(CanonicalView::Primitive(a))
1085 } else if let Some(a) = array.as_opt::<Decimal>() {
1086 Some(CanonicalView::Decimal(a))
1087 } else if let Some(a) = array.as_opt::<Struct>() {
1088 Some(CanonicalView::Struct(a))
1089 } else if let Some(a) = array.as_opt::<ListView>() {
1090 Some(CanonicalView::List(a))
1091 } else if let Some(a) = array.as_opt::<FixedSizeList>() {
1092 Some(CanonicalView::FixedSizeList(a))
1093 } else if let Some(a) = array.as_opt::<VarBinView>() {
1094 Some(CanonicalView::VarBinView(a))
1095 } else if let Some(a) = array.as_opt::<Variant>() {
1096 Some(CanonicalView::Variant(a))
1097 } else {
1098 array.as_opt::<Extension>().map(CanonicalView::Extension)
1099 }
1100 }
1101}
1102
1103#[cfg(test)]
1104mod test {
1105 use std::sync::Arc;
1106
1107 use arrow_array::Array as ArrowArray;
1108 use arrow_array::ArrayRef as ArrowArrayRef;
1109 use arrow_array::ListArray as ArrowListArray;
1110 use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
1111 use arrow_array::StringArray;
1112 use arrow_array::StringViewArray;
1113 use arrow_array::StructArray as ArrowStructArray;
1114 use arrow_array::cast::AsArray;
1115 use arrow_array::types::Int32Type;
1116 use arrow_array::types::Int64Type;
1117 use arrow_array::types::UInt64Type;
1118 use arrow_buffer::NullBufferBuilder;
1119 use arrow_buffer::OffsetBuffer;
1120 use arrow_schema::DataType;
1121 use arrow_schema::Field;
1122 use vortex_buffer::buffer;
1123 use vortex_error::VortexResult;
1124 use vortex_error::vortex_err;
1125
1126 use crate::ArrayRef;
1127 use crate::Canonical;
1128 use crate::CanonicalValidity;
1129 use crate::IntoArray;
1130 use crate::LEGACY_SESSION;
1131 use crate::VortexSessionExecute;
1132 use crate::arrays::Constant;
1133 use crate::arrays::ConstantArray;
1134 use crate::arrays::Primitive;
1135 use crate::arrays::Struct;
1136 use crate::arrays::Variant;
1137 use crate::arrays::VariantArray;
1138 use crate::arrays::struct_::StructArrayExt;
1139 use crate::arrays::variant::VariantArrayExt;
1140 use crate::arrow::ArrowSessionExt;
1141 use crate::arrow::FromArrowArray;
1142 use crate::canonical::StructArray;
1143 use crate::dtype::Nullability;
1144 use crate::scalar::Scalar;
1145
1146 fn variant_core_storage(len: usize) -> ArrayRef {
1147 ConstantArray::new(
1148 Scalar::variant(Scalar::primitive(1i32, Nullability::NonNullable)),
1149 len,
1150 )
1151 .into_array()
1152 }
1153
1154 #[test]
1155 fn canonical_validity_canonicalizes_variant_shredded_physical_slots() -> VortexResult<()> {
1156 let len = 2;
1157 let nested_shredded =
1158 StructArray::try_from_iter([("value", ConstantArray::new(10i32, len).into_array())])?;
1159 let inner_variant = VariantArray::try_new(
1160 variant_core_storage(len),
1161 Some(nested_shredded.into_array()),
1162 )?;
1163 let outer_variant =
1164 VariantArray::try_new(variant_core_storage(len), Some(inner_variant.into_array()))?;
1165
1166 let mut ctx = LEGACY_SESSION.create_execution_ctx();
1167 let Canonical::Variant(canonical) = outer_variant
1168 .into_array()
1169 .execute::<CanonicalValidity>(&mut ctx)?
1170 .0
1171 else {
1172 return Err(vortex_err!("expected canonical variant"));
1173 };
1174
1175 let nested_variant = canonical
1176 .shredded()
1177 .and_then(|shredded| shredded.as_opt::<Variant>())
1178 .ok_or_else(|| vortex_err!("expected nested variant shredded child"))?;
1179 let nested_struct = nested_variant
1180 .shredded()
1181 .and_then(|shredded| shredded.as_opt::<Struct>())
1182 .ok_or_else(|| vortex_err!("expected nested struct shredded child"))?;
1183 let value = nested_struct.unmasked_field_by_name("value")?;
1184
1185 assert!(value.is::<Primitive>());
1186 assert!(!value.is::<Constant>());
1187
1188 Ok(())
1189 }
1190
1191 #[test]
1192 fn test_canonicalize_nested_struct() {
1193 let mut ctx = LEGACY_SESSION.create_execution_ctx();
1194 let nested_struct_array = StructArray::from_fields(&[
1196 ("a", buffer![1u64].into_array()),
1197 (
1198 "b",
1199 StructArray::from_fields(&[(
1200 "inner_a",
1201 ConstantArray::new(100i64, 1).into_array(),
1206 )])
1207 .unwrap()
1208 .into_array(),
1209 ),
1210 ])
1211 .unwrap();
1212
1213 let arrow_struct = LEGACY_SESSION
1214 .arrow()
1215 .execute_arrow(nested_struct_array.into_array(), None, &mut ctx)
1216 .unwrap()
1217 .as_any()
1218 .downcast_ref::<ArrowStructArray>()
1219 .cloned()
1220 .unwrap();
1221
1222 assert!(
1223 arrow_struct
1224 .column(0)
1225 .as_any()
1226 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
1227 .is_some()
1228 );
1229
1230 let inner_struct = Arc::clone(arrow_struct.column(1))
1231 .as_any()
1232 .downcast_ref::<ArrowStructArray>()
1233 .cloned()
1234 .unwrap();
1235
1236 let inner_a = inner_struct
1237 .column(0)
1238 .as_any()
1239 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
1240 assert!(inner_a.is_some());
1241
1242 assert_eq!(
1243 inner_a.cloned().unwrap(),
1244 ArrowPrimitiveArray::from_iter([100i64])
1245 );
1246 }
1247
1248 #[test]
1249 fn roundtrip_struct() {
1250 let mut ctx = LEGACY_SESSION.create_execution_ctx();
1251 let mut nulls = NullBufferBuilder::new(6);
1252 nulls.append_n_non_nulls(4);
1253 nulls.append_null();
1254 nulls.append_non_null();
1255 let names = Arc::new(StringViewArray::from_iter(vec![
1256 Some("Joseph"),
1257 None,
1258 Some("Angela"),
1259 Some("Mikhail"),
1260 None,
1261 None,
1262 ]));
1263 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
1264 Some(25),
1265 Some(31),
1266 None,
1267 Some(57),
1268 None,
1269 None,
1270 ]));
1271
1272 let arrow_struct = ArrowStructArray::new(
1273 vec![
1274 Arc::new(Field::new("name", DataType::Utf8View, true)),
1275 Arc::new(Field::new("age", DataType::Int32, true)),
1276 ]
1277 .into(),
1278 vec![names, ages],
1279 nulls.finish(),
1280 );
1281
1282 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true).unwrap();
1283 let vortex_struct = LEGACY_SESSION
1284 .arrow()
1285 .execute_arrow(vortex_struct, None, &mut ctx)
1286 .unwrap();
1287 assert_eq!(&arrow_struct, vortex_struct.as_struct());
1288 }
1289
1290 #[test]
1291 fn roundtrip_list() {
1292 let mut ctx = LEGACY_SESSION.create_execution_ctx();
1293 let names = Arc::new(StringArray::from_iter(vec![
1294 Some("Joseph"),
1295 Some("Angela"),
1296 Some("Mikhail"),
1297 ]));
1298
1299 let arrow_list = ArrowListArray::new(
1300 Arc::new(Field::new_list_field(DataType::Utf8, true)),
1301 OffsetBuffer::from_lengths(vec![0, 2, 1]),
1302 names,
1303 None,
1304 );
1305 let list_data_type = arrow_list.data_type();
1306 let list_field = Field::new(String::new(), list_data_type.clone(), true);
1307
1308 let vortex_list = ArrayRef::from_arrow(&arrow_list, true).unwrap();
1309
1310 let rt_arrow_list = LEGACY_SESSION
1311 .arrow()
1312 .execute_arrow(vortex_list, Some(&list_field), &mut ctx)
1313 .unwrap();
1314
1315 assert_eq!(
1316 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
1317 rt_arrow_list.as_ref()
1318 );
1319 }
1320}