1use std::sync::Arc;
7
8use vortex_buffer::BitBuffer;
9use vortex_buffer::Buffer;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_panic;
14
15use crate::ArrayRef;
16use crate::ArraySlots;
17use crate::Executable;
18use crate::ExecutionCtx;
19use crate::IntoArray;
20use crate::array::ArrayView;
21use crate::array::child_to_validity;
22use crate::arrays::Bool;
23use crate::arrays::BoolArray;
24use crate::arrays::Decimal;
25use crate::arrays::DecimalArray;
26use crate::arrays::Extension;
27use crate::arrays::ExtensionArray;
28use crate::arrays::FixedSizeList;
29use crate::arrays::FixedSizeListArray;
30use crate::arrays::ListView;
31use crate::arrays::ListViewArray;
32use crate::arrays::Null;
33use crate::arrays::NullArray;
34use crate::arrays::Primitive;
35use crate::arrays::PrimitiveArray;
36use crate::arrays::Struct;
37use crate::arrays::StructArray;
38use crate::arrays::VarBinView;
39use crate::arrays::VarBinViewArray;
40use crate::arrays::Variant;
41use crate::arrays::VariantArray;
42use crate::arrays::bool::BoolDataParts;
43use crate::arrays::decimal::DecimalDataParts;
44use crate::arrays::extension::ExtensionArrayExt;
45use crate::arrays::fixed_size_list::FixedSizeListArrayExt;
46use crate::arrays::listview::ListViewDataParts;
47use crate::arrays::listview::ListViewRebuildMode;
48use crate::arrays::primitive::PrimitiveDataParts;
49use crate::arrays::struct_::StructDataParts;
50use crate::arrays::varbinview::VarBinViewDataParts;
51use crate::arrays::variant::VariantArrayExt;
52use crate::dtype::DType;
53use crate::dtype::NativePType;
54use crate::dtype::Nullability;
55use crate::dtype::PType;
56use crate::match_each_decimal_value_type;
57use crate::match_each_native_ptype;
58use crate::matcher::Matcher;
59use crate::validity::Validity;
60
61#[derive(Debug, Clone)]
122pub enum Canonical {
123 Null(NullArray),
124 Bool(BoolArray),
125 Primitive(PrimitiveArray),
126 Decimal(DecimalArray),
127 VarBinView(VarBinViewArray),
128 List(ListViewArray),
129 FixedSizeList(FixedSizeListArray),
130 Struct(StructArray),
131 Extension(ExtensionArray),
132 Variant(VariantArray),
133}
134
135macro_rules! match_each_canonical {
137 ($self:expr, | $ident:ident | $eval:expr) => {{
138 match $self {
139 Canonical::Null($ident) => $eval,
140 Canonical::Bool($ident) => $eval,
141 Canonical::Primitive($ident) => $eval,
142 Canonical::Decimal($ident) => $eval,
143 Canonical::VarBinView($ident) => $eval,
144 Canonical::List($ident) => $eval,
145 Canonical::FixedSizeList($ident) => $eval,
146 Canonical::Struct($ident) => $eval,
147 Canonical::Variant($ident) => $eval,
148 Canonical::Extension($ident) => $eval,
149 }
150 }};
151}
152
153impl Canonical {
154 pub fn empty(dtype: &DType) -> Canonical {
156 match dtype {
157 DType::Null => Canonical::Null(NullArray::new(0)),
158 DType::Bool(n) => Canonical::Bool(unsafe {
159 BoolArray::new_unchecked(BitBuffer::empty(), Validity::from(n))
160 }),
161 DType::Primitive(ptype, n) => {
162 match_each_native_ptype!(ptype, |P| {
163 Canonical::Primitive(unsafe {
164 PrimitiveArray::new_unchecked(Buffer::<P>::empty(), Validity::from(n))
165 })
166 })
167 }
168 DType::Decimal(decimal_type, n) => {
169 match_each_decimal_value_type!(
170 DecimalType::smallest_decimal_value_type(decimal_type),
171 |D| {
172 Canonical::Decimal(unsafe {
173 DecimalArray::new_unchecked::<D>(
174 Buffer::empty(),
175 *decimal_type,
176 Validity::from(n),
177 )
178 })
179 }
180 )
181 }
182 DType::Utf8(n) => Canonical::VarBinView(unsafe {
183 VarBinViewArray::new_unchecked(
184 Buffer::empty(),
185 Arc::new([]),
186 dtype.clone(),
187 Validity::from(n),
188 )
189 }),
190 DType::Binary(n) => Canonical::VarBinView(unsafe {
191 VarBinViewArray::new_unchecked(
192 Buffer::empty(),
193 Arc::new([]),
194 dtype.clone(),
195 Validity::from(n),
196 )
197 }),
198 DType::List(dtype, n) => Canonical::List(unsafe {
199 ListViewArray::new_unchecked(
200 Canonical::empty(dtype).into_array(),
201 Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
202 .into_array(),
203 Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
204 .into_array(),
205 Validity::from(n),
206 )
207 .with_zero_copy_to_list(true)
209 }),
210 DType::FixedSizeList(elem_dtype, list_size, null) => Canonical::FixedSizeList(unsafe {
211 FixedSizeListArray::new_unchecked(
212 Canonical::empty(elem_dtype).into_array(),
213 *list_size,
214 Validity::from(null),
215 0,
216 )
217 }),
218 DType::Struct(struct_dtype, n) => Canonical::Struct(unsafe {
219 StructArray::new_unchecked(
220 struct_dtype
221 .fields()
222 .map(|f| Canonical::empty(&f).into_array())
223 .collect::<Arc<[_]>>(),
224 struct_dtype.clone(),
225 0,
226 Validity::from(n),
227 )
228 }),
229 DType::Union(..) => todo!("TODO(connor)[Union]: unimplemented"),
230 DType::Variant(_) => {
231 vortex_panic!(InvalidArgument: "Canonical empty is not supported for Variant")
232 }
233 DType::Extension(ext_dtype) => Canonical::Extension(ExtensionArray::new(
234 ext_dtype.clone(),
235 Canonical::empty(ext_dtype.storage_dtype()).into_array(),
236 )),
237 }
238 }
239
240 pub fn len(&self) -> usize {
241 match_each_canonical!(self, |arr| arr.len())
242 }
243
244 pub fn dtype(&self) -> &DType {
245 match_each_canonical!(self, |arr| arr.dtype())
246 }
247
248 pub fn is_empty(&self) -> bool {
249 match_each_canonical!(self, |arr| arr.is_empty())
250 }
251}
252
253impl Canonical {
254 pub fn compact(&self, ctx: &mut ExecutionCtx) -> VortexResult<Canonical> {
262 match self {
263 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
264 Canonical::List(array) => Ok(Canonical::List(
265 array.rebuild(ListViewRebuildMode::TrimElements, ctx)?,
266 )),
267 _ => Ok(self.clone()),
268 }
269 }
270}
271
272impl Canonical {
274 pub fn as_null(&self) -> &NullArray {
275 if let Canonical::Null(a) = self {
276 a
277 } else {
278 vortex_panic!("Cannot get NullArray from {:?}", &self)
279 }
280 }
281
282 pub fn into_null(self) -> NullArray {
283 if let Canonical::Null(a) = self {
284 a
285 } else {
286 vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
287 }
288 }
289
290 pub fn as_bool(&self) -> &BoolArray {
291 if let Canonical::Bool(a) = self {
292 a
293 } else {
294 vortex_panic!("Cannot get BoolArray from {:?}", &self)
295 }
296 }
297
298 pub fn into_bool(self) -> BoolArray {
299 if let Canonical::Bool(a) = self {
300 a
301 } else {
302 vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
303 }
304 }
305
306 pub fn as_primitive(&self) -> &PrimitiveArray {
307 if let Canonical::Primitive(a) = self {
308 a
309 } else {
310 vortex_panic!("Cannot get PrimitiveArray from {:?}", &self)
311 }
312 }
313
314 pub fn into_primitive(self) -> PrimitiveArray {
315 if let Canonical::Primitive(a) = self {
316 a
317 } else {
318 vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
319 }
320 }
321
322 pub fn as_decimal(&self) -> &DecimalArray {
323 if let Canonical::Decimal(a) = self {
324 a
325 } else {
326 vortex_panic!("Cannot get DecimalArray from {:?}", &self)
327 }
328 }
329
330 pub fn into_decimal(self) -> DecimalArray {
331 if let Canonical::Decimal(a) = self {
332 a
333 } else {
334 vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
335 }
336 }
337
338 pub fn as_varbinview(&self) -> &VarBinViewArray {
339 if let Canonical::VarBinView(a) = self {
340 a
341 } else {
342 vortex_panic!("Cannot get VarBinViewArray from {:?}", &self)
343 }
344 }
345
346 pub fn into_varbinview(self) -> VarBinViewArray {
347 if let Canonical::VarBinView(a) = self {
348 a
349 } else {
350 vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
351 }
352 }
353
354 pub fn as_listview(&self) -> &ListViewArray {
355 if let Canonical::List(a) = self {
356 a
357 } else {
358 vortex_panic!("Cannot get ListArray from {:?}", &self)
359 }
360 }
361
362 pub fn into_listview(self) -> ListViewArray {
363 if let Canonical::List(a) = self {
364 a
365 } else {
366 vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
367 }
368 }
369
370 pub fn as_fixed_size_list(&self) -> &FixedSizeListArray {
371 if let Canonical::FixedSizeList(a) = self {
372 a
373 } else {
374 vortex_panic!("Cannot get FixedSizeListArray from {:?}", &self)
375 }
376 }
377
378 pub fn into_fixed_size_list(self) -> FixedSizeListArray {
379 if let Canonical::FixedSizeList(a) = self {
380 a
381 } else {
382 vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
383 }
384 }
385
386 pub fn as_struct(&self) -> &StructArray {
387 if let Canonical::Struct(a) = self {
388 a
389 } else {
390 vortex_panic!("Cannot get StructArray from {:?}", &self)
391 }
392 }
393
394 pub fn into_struct(self) -> StructArray {
395 if let Canonical::Struct(a) = self {
396 a
397 } else {
398 vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
399 }
400 }
401
402 pub fn as_extension(&self) -> &ExtensionArray {
403 if let Canonical::Extension(a) = self {
404 a
405 } else {
406 vortex_panic!("Cannot get ExtensionArray from {:?}", &self)
407 }
408 }
409
410 pub fn into_extension(self) -> ExtensionArray {
411 if let Canonical::Extension(a) = self {
412 a
413 } else {
414 vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
415 }
416 }
417}
418
419impl IntoArray for Canonical {
420 fn into_array(self) -> ArrayRef {
421 match_each_canonical!(self, |arr| arr.into_array())
422 }
423}
424
425#[deprecated(note = "use `array.execute::<T>(ctx)` instead")]
431pub trait ToCanonical {
432 #[deprecated(note = "use `array.execute::<NullArray>(ctx)` instead")]
434 fn to_null(&self) -> NullArray;
435
436 #[deprecated(note = "use `array.execute::<BoolArray>(ctx)` instead")]
438 fn to_bool(&self) -> BoolArray;
439
440 #[deprecated(note = "use `array.execute::<PrimitiveArray>(ctx)` instead")]
443 fn to_primitive(&self) -> PrimitiveArray;
444
445 #[deprecated(note = "use `array.execute::<DecimalArray>(ctx)` instead")]
448 fn to_decimal(&self) -> DecimalArray;
449
450 #[deprecated(note = "use `array.execute::<StructArray>(ctx)` instead")]
452 fn to_struct(&self) -> StructArray;
453
454 #[deprecated(note = "use `array.execute::<ListViewArray>(ctx)` instead")]
456 fn to_listview(&self) -> ListViewArray;
457
458 #[deprecated(note = "use `array.execute::<FixedSizeListArray>(ctx)` instead")]
461 fn to_fixed_size_list(&self) -> FixedSizeListArray;
462
463 #[deprecated(note = "use `array.execute::<VarBinViewArray>(ctx)` instead")]
466 fn to_varbinview(&self) -> VarBinViewArray;
467
468 #[deprecated(note = "use `array.execute::<ExtensionArray>(ctx)` instead")]
471 fn to_extension(&self) -> ExtensionArray;
472}
473
474#[expect(deprecated)]
476impl ToCanonical for ArrayRef {
477 fn to_null(&self) -> NullArray {
478 #[expect(deprecated)]
479 let result = self.to_canonical().vortex_expect("to_canonical failed");
480 result.into_null()
481 }
482
483 fn to_bool(&self) -> BoolArray {
484 #[expect(deprecated)]
485 let result = self.to_canonical().vortex_expect("to_canonical failed");
486 result.into_bool()
487 }
488
489 fn to_primitive(&self) -> PrimitiveArray {
490 #[expect(deprecated)]
491 let result = self.to_canonical().vortex_expect("to_canonical failed");
492 result.into_primitive()
493 }
494
495 fn to_decimal(&self) -> DecimalArray {
496 #[expect(deprecated)]
497 let result = self.to_canonical().vortex_expect("to_canonical failed");
498 result.into_decimal()
499 }
500
501 fn to_struct(&self) -> StructArray {
502 #[expect(deprecated)]
503 let result = self.to_canonical().vortex_expect("to_canonical failed");
504 result.into_struct()
505 }
506
507 fn to_listview(&self) -> ListViewArray {
508 #[expect(deprecated)]
509 let result = self.to_canonical().vortex_expect("to_canonical failed");
510 result.into_listview()
511 }
512
513 fn to_fixed_size_list(&self) -> FixedSizeListArray {
514 #[expect(deprecated)]
515 let result = self.to_canonical().vortex_expect("to_canonical failed");
516 result.into_fixed_size_list()
517 }
518
519 fn to_varbinview(&self) -> VarBinViewArray {
520 #[expect(deprecated)]
521 let result = self.to_canonical().vortex_expect("to_canonical failed");
522 result.into_varbinview()
523 }
524
525 fn to_extension(&self) -> ExtensionArray {
526 #[expect(deprecated)]
527 let result = self.to_canonical().vortex_expect("to_canonical failed");
528 result.into_extension()
529 }
530}
531
532impl From<Canonical> for ArrayRef {
533 fn from(value: Canonical) -> Self {
534 match_each_canonical!(value, |arr| arr.into_array())
535 }
536}
537
538impl Executable for Canonical {
544 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
545 let result = array.execute_until::<AnyCanonical>(ctx)?;
546 Ok(result
547 .as_opt::<AnyCanonical>()
548 .map(Canonical::from)
549 .vortex_expect("execute_until::<AnyCanonical> must return a canonical array"))
550 }
551}
552
553pub struct CanonicalValidity(pub Canonical);
558
559impl Executable for CanonicalValidity {
560 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
561 match array.execute::<Canonical>(ctx)? {
562 n @ Canonical::Null(_) => Ok(CanonicalValidity(n)),
563 Canonical::Bool(b) => {
564 let validity = child_to_validity(b.slots()[0].as_ref(), b.dtype().nullability());
565 let len = b.len();
566 let BoolDataParts { bits, meta } = b.into_data().into_parts(len);
567 Ok(CanonicalValidity(Canonical::Bool(
568 BoolArray::try_new_from_handle(
569 bits,
570 meta.offset(),
571 meta.len(),
572 validity.execute(ctx)?,
573 )?,
574 )))
575 }
576 Canonical::Primitive(p) => {
577 let PrimitiveDataParts {
578 ptype,
579 buffer,
580 validity,
581 } = p.into_data_parts();
582 Ok(CanonicalValidity(Canonical::Primitive(unsafe {
583 PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
584 })))
585 }
586 Canonical::Decimal(d) => {
587 let DecimalDataParts {
588 decimal_dtype,
589 values,
590 values_type,
591 validity,
592 } = d.into_data_parts();
593 Ok(CanonicalValidity(Canonical::Decimal(unsafe {
594 DecimalArray::new_unchecked_handle(
595 values,
596 values_type,
597 decimal_dtype,
598 validity.execute(ctx)?,
599 )
600 })))
601 }
602 Canonical::VarBinView(vbv) => {
603 let VarBinViewDataParts {
604 dtype,
605 buffers,
606 views,
607 validity,
608 } = vbv.into_data_parts();
609 Ok(CanonicalValidity(Canonical::VarBinView(unsafe {
610 VarBinViewArray::new_handle_unchecked(
611 views,
612 buffers,
613 dtype,
614 validity.execute(ctx)?,
615 )
616 })))
617 }
618 Canonical::List(l) => {
619 let zctl = l.is_zero_copy_to_list();
620 let ListViewDataParts {
621 elements,
622 offsets,
623 sizes,
624 validity,
625 ..
626 } = l.into_data_parts();
627 Ok(CanonicalValidity(Canonical::List(unsafe {
628 ListViewArray::new_unchecked(elements, offsets, sizes, validity.execute(ctx)?)
629 .with_zero_copy_to_list(zctl)
630 })))
631 }
632 Canonical::FixedSizeList(fsl) => {
633 let list_size = fsl.list_size();
634 let len = fsl.len();
635 let parts = fsl.into_data_parts();
636 let elements = parts.elements;
637 let validity = parts.validity;
638 Ok(CanonicalValidity(Canonical::FixedSizeList(
639 FixedSizeListArray::new(elements, list_size, validity.execute(ctx)?, len),
640 )))
641 }
642 Canonical::Struct(st) => {
643 let len = st.len();
644 let StructDataParts {
645 struct_fields,
646 fields,
647 validity,
648 } = st.into_data_parts();
649 Ok(CanonicalValidity(Canonical::Struct(unsafe {
650 StructArray::new_unchecked(fields, struct_fields, len, validity.execute(ctx)?)
651 })))
652 }
653 Canonical::Extension(ext) => Ok(CanonicalValidity(Canonical::Extension(
654 ExtensionArray::new(
655 ext.ext_dtype().clone(),
656 ext.storage_array()
657 .clone()
658 .execute::<CanonicalValidity>(ctx)?
659 .0
660 .into_array(),
661 ),
662 ))),
663 Canonical::Variant(variant) => {
664 let core_storage = recursively_canonicalize_slots(variant.core_storage(), ctx)?;
665 let shredded = variant
666 .shredded()
667 .map(|shredded| {
668 if shredded.is::<Variant>() {
669 recursively_canonicalize_slots(shredded, ctx)
670 } else {
671 shredded
672 .clone()
673 .execute::<CanonicalValidity>(ctx)
674 .map(|canonical| canonical.0.into_array())
675 }
676 })
677 .transpose()?;
678 Ok(CanonicalValidity(Canonical::Variant(
679 VariantArray::try_new(core_storage, shredded)?,
680 )))
681 }
682 }
683 }
684}
685
686pub struct RecursiveCanonical(pub Canonical);
691
692fn recursively_canonicalize_slots(
695 array: &ArrayRef,
696 ctx: &mut ExecutionCtx,
697) -> VortexResult<ArrayRef> {
698 let slots = array
699 .slots()
700 .iter()
701 .map(|slot| {
702 slot.as_ref()
703 .map(|child| {
704 child
705 .clone()
706 .execute::<RecursiveCanonical>(ctx)
707 .map(|canonical| canonical.0.into_array())
708 })
709 .transpose()
710 })
711 .collect::<VortexResult<ArraySlots>>()?;
712 array.clone().with_slots(slots)
713}
714impl Executable for RecursiveCanonical {
715 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
716 match array.execute::<Canonical>(ctx)? {
717 n @ Canonical::Null(_) => Ok(RecursiveCanonical(n)),
718 Canonical::Bool(b) => {
719 let validity = child_to_validity(b.slots()[0].as_ref(), b.dtype().nullability());
720 let len = b.len();
721 let BoolDataParts { bits, meta } = b.into_data().into_parts(len);
722 Ok(RecursiveCanonical(Canonical::Bool(
723 BoolArray::try_new_from_handle(
724 bits,
725 meta.offset(),
726 meta.len(),
727 validity.execute(ctx)?,
728 )?,
729 )))
730 }
731 Canonical::Primitive(p) => {
732 let PrimitiveDataParts {
733 ptype,
734 buffer,
735 validity,
736 } = p.into_data_parts();
737 Ok(RecursiveCanonical(Canonical::Primitive(unsafe {
738 PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
739 })))
740 }
741 Canonical::Decimal(d) => {
742 let DecimalDataParts {
743 decimal_dtype,
744 values,
745 values_type,
746 validity,
747 } = d.into_data_parts();
748 Ok(RecursiveCanonical(Canonical::Decimal(unsafe {
749 DecimalArray::new_unchecked_handle(
750 values,
751 values_type,
752 decimal_dtype,
753 validity.execute(ctx)?,
754 )
755 })))
756 }
757 Canonical::VarBinView(vbv) => {
758 let VarBinViewDataParts {
759 dtype,
760 buffers,
761 views,
762 validity,
763 } = vbv.into_data_parts();
764 Ok(RecursiveCanonical(Canonical::VarBinView(unsafe {
765 VarBinViewArray::new_handle_unchecked(
766 views,
767 buffers,
768 dtype,
769 validity.execute(ctx)?,
770 )
771 })))
772 }
773 Canonical::List(l) => {
774 let zctl = l.is_zero_copy_to_list();
775 let ListViewDataParts {
776 elements,
777 offsets,
778 sizes,
779 validity,
780 ..
781 } = l.into_data_parts();
782 Ok(RecursiveCanonical(Canonical::List(unsafe {
783 ListViewArray::new_unchecked(
784 elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
785 offsets.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
786 sizes.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
787 validity.execute(ctx)?,
788 )
789 .with_zero_copy_to_list(zctl)
790 })))
791 }
792 Canonical::FixedSizeList(fsl) => {
793 let list_size = fsl.list_size();
794 let len = fsl.len();
795 let parts = fsl.into_data_parts();
796 let elements = parts.elements;
797 let validity = parts.validity;
798 Ok(RecursiveCanonical(Canonical::FixedSizeList(
799 FixedSizeListArray::new(
800 elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
801 list_size,
802 validity.execute(ctx)?,
803 len,
804 ),
805 )))
806 }
807 Canonical::Struct(st) => {
808 let len = st.len();
809 let StructDataParts {
810 struct_fields,
811 fields,
812 validity,
813 } = st.into_data_parts();
814 let executed_fields = fields
815 .iter()
816 .map(|f| Ok(f.clone().execute::<RecursiveCanonical>(ctx)?.0.into_array()))
817 .collect::<VortexResult<Arc<[_]>>>()?;
818
819 Ok(RecursiveCanonical(Canonical::Struct(unsafe {
820 StructArray::new_unchecked(
821 executed_fields,
822 struct_fields,
823 len,
824 validity.execute(ctx)?,
825 )
826 })))
827 }
828 Canonical::Extension(ext) => Ok(RecursiveCanonical(Canonical::Extension(
829 ExtensionArray::new(
830 ext.ext_dtype().clone(),
831 ext.storage_array()
832 .clone()
833 .execute::<RecursiveCanonical>(ctx)?
834 .0
835 .into_array(),
836 ),
837 ))),
838 Canonical::Variant(variant) => {
839 let core_storage = recursively_canonicalize_slots(variant.core_storage(), ctx)?;
840 let shredded = variant
841 .shredded()
842 .map(|shredded| {
843 if shredded.is::<Variant>() {
844 recursively_canonicalize_slots(shredded, ctx)
845 } else {
846 shredded
847 .clone()
848 .execute::<RecursiveCanonical>(ctx)
849 .map(|canonical| canonical.0.into_array())
850 }
851 })
852 .transpose()?;
853 Ok(RecursiveCanonical(Canonical::Variant(
854 VariantArray::try_new(core_storage, shredded)?,
855 )))
856 }
857 }
858 }
859}
860
861impl<T: NativePType> Executable for Buffer<T> {
867 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
868 let array = PrimitiveArray::execute(array, ctx)?;
869 vortex_ensure!(
870 matches!(
871 array.validity()?,
872 Validity::NonNullable | Validity::AllValid
873 ),
874 "Cannot execute to native buffer: array is not all-valid."
875 );
876 Ok(array.into_buffer())
877 }
878}
879
880impl Executable for PrimitiveArray {
884 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
885 match array.try_downcast::<Primitive>() {
886 Ok(primitive) => Ok(primitive),
887 Err(array) => Ok(Canonical::execute(array, ctx)?.into_primitive()),
888 }
889 }
890}
891
892impl Executable for BoolArray {
896 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
897 match array.try_downcast::<Bool>() {
898 Ok(bool_array) => Ok(bool_array),
899 Err(array) => Ok(Canonical::execute(array, ctx)?.into_bool()),
900 }
901 }
902}
903
904impl Executable for BitBuffer {
908 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
909 let bool = BoolArray::execute(array, ctx)?;
910 assert!(
911 !bool.dtype().is_nullable(),
912 "bit buffer execute only works with non-nullable bool arrays"
913 );
914 Ok(bool.into_bit_buffer())
915 }
916}
917
918impl Executable for NullArray {
922 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
923 match array.try_downcast::<Null>() {
924 Ok(null_array) => Ok(null_array),
925 Err(array) => Ok(Canonical::execute(array, ctx)?.into_null()),
926 }
927 }
928}
929
930impl Executable for VarBinViewArray {
934 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
935 match array.try_downcast::<VarBinView>() {
936 Ok(varbinview) => Ok(varbinview),
937 Err(array) => Ok(Canonical::execute(array, ctx)?.into_varbinview()),
938 }
939 }
940}
941
942impl Executable for ExtensionArray {
946 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
947 match array.try_downcast::<Extension>() {
948 Ok(ext_array) => Ok(ext_array),
949 Err(array) => Ok(Canonical::execute(array, ctx)?.into_extension()),
950 }
951 }
952}
953
954impl Executable for DecimalArray {
958 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
959 match array.try_downcast::<Decimal>() {
960 Ok(decimal) => Ok(decimal),
961 Err(array) => Ok(Canonical::execute(array, ctx)?.into_decimal()),
962 }
963 }
964}
965
966impl Executable for ListViewArray {
970 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
971 match array.try_downcast::<ListView>() {
972 Ok(list) => Ok(list),
973 Err(array) => Ok(Canonical::execute(array, ctx)?.into_listview()),
974 }
975 }
976}
977
978impl Executable for FixedSizeListArray {
982 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
983 match array.try_downcast::<FixedSizeList>() {
984 Ok(fsl) => Ok(fsl),
985 Err(array) => Ok(Canonical::execute(array, ctx)?.into_fixed_size_list()),
986 }
987 }
988}
989
990impl Executable for StructArray {
994 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
995 match array.try_downcast::<Struct>() {
996 Ok(struct_array) => Ok(struct_array),
997 Err(array) => Ok(Canonical::execute(array, ctx)?.into_struct()),
998 }
999 }
1000}
1001
1002impl Executable for VariantArray {
1006 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
1007 match array.try_downcast::<Variant>() {
1008 Ok(variant_array) => Ok(variant_array),
1009 Err(array) => match Canonical::execute(array, ctx)? {
1010 Canonical::Variant(variant_array) => Ok(variant_array),
1011 canonical => vortex_panic!("Cannot unwrap VariantArray from {:?}", canonical),
1012 },
1013 }
1014 }
1015}
1016
1017#[derive(Debug, Clone, Copy)]
1022pub enum CanonicalView<'a> {
1023 Null(ArrayView<'a, Null>),
1024 Bool(ArrayView<'a, Bool>),
1025 Primitive(ArrayView<'a, Primitive>),
1026 Decimal(ArrayView<'a, Decimal>),
1027 VarBinView(ArrayView<'a, VarBinView>),
1028 List(ArrayView<'a, ListView>),
1029 FixedSizeList(ArrayView<'a, FixedSizeList>),
1030 Struct(ArrayView<'a, Struct>),
1031 Extension(ArrayView<'a, Extension>),
1032 Variant(ArrayView<'a, Variant>),
1033}
1034
1035impl From<CanonicalView<'_>> for Canonical {
1036 fn from(value: CanonicalView<'_>) -> Self {
1037 match value {
1038 CanonicalView::Null(a) => Canonical::Null(a.into_owned()),
1039 CanonicalView::Bool(a) => Canonical::Bool(a.into_owned()),
1040 CanonicalView::Primitive(a) => Canonical::Primitive(a.into_owned()),
1041 CanonicalView::Decimal(a) => Canonical::Decimal(a.into_owned()),
1042 CanonicalView::VarBinView(a) => Canonical::VarBinView(a.into_owned()),
1043 CanonicalView::List(a) => Canonical::List(a.into_owned()),
1044 CanonicalView::FixedSizeList(a) => Canonical::FixedSizeList(a.into_owned()),
1045 CanonicalView::Struct(a) => Canonical::Struct(a.into_owned()),
1046 CanonicalView::Extension(a) => Canonical::Extension(a.into_owned()),
1047 CanonicalView::Variant(a) => Canonical::Variant(a.into_owned()),
1048 }
1049 }
1050}
1051
1052impl CanonicalView<'_> {
1053 pub fn to_array_ref(&self) -> ArrayRef {
1055 match self {
1056 CanonicalView::Null(a) => a.array().clone(),
1057 CanonicalView::Bool(a) => a.array().clone(),
1058 CanonicalView::Primitive(a) => a.array().clone(),
1059 CanonicalView::Decimal(a) => a.array().clone(),
1060 CanonicalView::VarBinView(a) => a.array().clone(),
1061 CanonicalView::List(a) => a.array().clone(),
1062 CanonicalView::FixedSizeList(a) => a.array().clone(),
1063 CanonicalView::Struct(a) => a.array().clone(),
1064 CanonicalView::Extension(a) => a.array().clone(),
1065 CanonicalView::Variant(a) => a.array().clone(),
1066 }
1067 }
1068}
1069
1070pub struct AnyCanonical;
1072impl Matcher for AnyCanonical {
1073 type Match<'a> = CanonicalView<'a>;
1074
1075 #[inline]
1076 fn matches(array: &ArrayRef) -> bool {
1077 array.is::<Null>()
1078 || array.is::<Bool>()
1079 || array.is::<Primitive>()
1080 || array.is::<Decimal>()
1081 || array.is::<Struct>()
1082 || array.is::<ListView>()
1083 || array.is::<FixedSizeList>()
1084 || array.is::<VarBinView>()
1085 || array.is::<Variant>()
1086 || array.is::<Extension>()
1087 }
1088
1089 #[inline]
1090 fn try_match(array: &ArrayRef) -> Option<Self::Match<'_>> {
1091 if let Some(a) = array.as_opt::<Null>() {
1092 Some(CanonicalView::Null(a))
1093 } else if let Some(a) = array.as_opt::<Bool>() {
1094 Some(CanonicalView::Bool(a))
1095 } else if let Some(a) = array.as_opt::<Primitive>() {
1096 Some(CanonicalView::Primitive(a))
1097 } else if let Some(a) = array.as_opt::<Decimal>() {
1098 Some(CanonicalView::Decimal(a))
1099 } else if let Some(a) = array.as_opt::<Struct>() {
1100 Some(CanonicalView::Struct(a))
1101 } else if let Some(a) = array.as_opt::<ListView>() {
1102 Some(CanonicalView::List(a))
1103 } else if let Some(a) = array.as_opt::<FixedSizeList>() {
1104 Some(CanonicalView::FixedSizeList(a))
1105 } else if let Some(a) = array.as_opt::<VarBinView>() {
1106 Some(CanonicalView::VarBinView(a))
1107 } else if let Some(a) = array.as_opt::<Variant>() {
1108 Some(CanonicalView::Variant(a))
1109 } else {
1110 array.as_opt::<Extension>().map(CanonicalView::Extension)
1111 }
1112 }
1113}
1114
1115#[cfg(test)]
1116mod test {
1117 use std::sync::Arc;
1118 use std::sync::LazyLock;
1119
1120 use arrow_array::Array as ArrowArray;
1121 use arrow_array::ArrayRef as ArrowArrayRef;
1122 use arrow_array::ListArray as ArrowListArray;
1123 use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
1124 use arrow_array::StringArray;
1125 use arrow_array::StringViewArray;
1126 use arrow_array::StructArray as ArrowStructArray;
1127 use arrow_array::cast::AsArray;
1128 use arrow_array::types::Int32Type;
1129 use arrow_array::types::Int64Type;
1130 use arrow_array::types::UInt64Type;
1131 use arrow_buffer::NullBufferBuilder;
1132 use arrow_buffer::OffsetBuffer;
1133 use arrow_schema::DataType;
1134 use arrow_schema::Field;
1135 use vortex_buffer::buffer;
1136 use vortex_error::VortexResult;
1137 use vortex_error::vortex_err;
1138 use vortex_session::VortexSession;
1139
1140 use crate::ArrayRef;
1141 use crate::Canonical;
1142 use crate::CanonicalValidity;
1143 use crate::IntoArray;
1144 use crate::VortexSessionExecute;
1145 use crate::arrays::Constant;
1146 use crate::arrays::ConstantArray;
1147 use crate::arrays::Primitive;
1148 use crate::arrays::Struct;
1149 use crate::arrays::Variant;
1150 use crate::arrays::VariantArray;
1151 use crate::arrays::struct_::StructArrayExt;
1152 use crate::arrays::variant::VariantArrayExt;
1153 use crate::arrow::ArrowSessionExt;
1154 use crate::arrow::FromArrowArray;
1155 use crate::canonical::StructArray;
1156 use crate::dtype::Nullability;
1157 use crate::scalar::Scalar;
1158 use crate::session::ArraySession;
1159
1160 static SESSION: LazyLock<VortexSession> =
1162 LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
1163
1164 fn variant_core_storage(len: usize) -> ArrayRef {
1165 ConstantArray::new(
1166 Scalar::variant(Scalar::primitive(1i32, Nullability::NonNullable)),
1167 len,
1168 )
1169 .into_array()
1170 }
1171
1172 #[test]
1173 fn canonical_validity_canonicalizes_variant_shredded_physical_slots() -> VortexResult<()> {
1174 let len = 2;
1175 let nested_shredded =
1176 StructArray::try_from_iter([("value", ConstantArray::new(10i32, len).into_array())])?;
1177 let inner_variant = VariantArray::try_new(
1178 variant_core_storage(len),
1179 Some(nested_shredded.into_array()),
1180 )?;
1181 let outer_variant =
1182 VariantArray::try_new(variant_core_storage(len), Some(inner_variant.into_array()))?;
1183
1184 let mut ctx = SESSION.create_execution_ctx();
1185 let Canonical::Variant(canonical) = outer_variant
1186 .into_array()
1187 .execute::<CanonicalValidity>(&mut ctx)?
1188 .0
1189 else {
1190 return Err(vortex_err!("expected canonical variant"));
1191 };
1192
1193 let nested_variant = canonical
1194 .shredded()
1195 .and_then(|shredded| shredded.as_opt::<Variant>())
1196 .ok_or_else(|| vortex_err!("expected nested variant shredded child"))?;
1197 let nested_struct = nested_variant
1198 .shredded()
1199 .and_then(|shredded| shredded.as_opt::<Struct>())
1200 .ok_or_else(|| vortex_err!("expected nested struct shredded child"))?;
1201 let value = nested_struct.unmasked_field_by_name("value")?;
1202
1203 assert!(value.is::<Primitive>());
1204 assert!(!value.is::<Constant>());
1205
1206 Ok(())
1207 }
1208
1209 #[test]
1210 fn test_canonicalize_nested_struct() {
1211 let mut ctx = SESSION.create_execution_ctx();
1212 let nested_struct_array = StructArray::from_fields(&[
1214 ("a", buffer![1u64].into_array()),
1215 (
1216 "b",
1217 StructArray::from_fields(&[(
1218 "inner_a",
1219 ConstantArray::new(100i64, 1).into_array(),
1224 )])
1225 .unwrap()
1226 .into_array(),
1227 ),
1228 ])
1229 .unwrap();
1230
1231 let arrow_struct = SESSION
1232 .arrow()
1233 .execute_arrow(nested_struct_array.into_array(), None, &mut ctx)
1234 .unwrap()
1235 .as_any()
1236 .downcast_ref::<ArrowStructArray>()
1237 .cloned()
1238 .unwrap();
1239
1240 assert!(
1241 arrow_struct
1242 .column(0)
1243 .as_any()
1244 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
1245 .is_some()
1246 );
1247
1248 let inner_struct = Arc::clone(arrow_struct.column(1))
1249 .as_any()
1250 .downcast_ref::<ArrowStructArray>()
1251 .cloned()
1252 .unwrap();
1253
1254 let inner_a = inner_struct
1255 .column(0)
1256 .as_any()
1257 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
1258 assert!(inner_a.is_some());
1259
1260 assert_eq!(
1261 inner_a.cloned().unwrap(),
1262 ArrowPrimitiveArray::from_iter([100i64])
1263 );
1264 }
1265
1266 #[test]
1267 fn roundtrip_struct() {
1268 let mut ctx = SESSION.create_execution_ctx();
1269 let mut nulls = NullBufferBuilder::new(6);
1270 nulls.append_n_non_nulls(4);
1271 nulls.append_null();
1272 nulls.append_non_null();
1273 let names = Arc::new(StringViewArray::from_iter(vec![
1274 Some("Joseph"),
1275 None,
1276 Some("Angela"),
1277 Some("Mikhail"),
1278 None,
1279 None,
1280 ]));
1281 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
1282 Some(25),
1283 Some(31),
1284 None,
1285 Some(57),
1286 None,
1287 None,
1288 ]));
1289
1290 let arrow_struct = ArrowStructArray::new(
1291 vec![
1292 Arc::new(Field::new("name", DataType::Utf8View, true)),
1293 Arc::new(Field::new("age", DataType::Int32, true)),
1294 ]
1295 .into(),
1296 vec![names, ages],
1297 nulls.finish(),
1298 );
1299
1300 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true).unwrap();
1301 let vortex_struct = SESSION
1302 .arrow()
1303 .execute_arrow(vortex_struct, None, &mut ctx)
1304 .unwrap();
1305 assert_eq!(&arrow_struct, vortex_struct.as_struct());
1306 }
1307
1308 #[test]
1309 fn roundtrip_list() {
1310 let mut ctx = SESSION.create_execution_ctx();
1311 let names = Arc::new(StringArray::from_iter(vec![
1312 Some("Joseph"),
1313 Some("Angela"),
1314 Some("Mikhail"),
1315 ]));
1316
1317 let arrow_list = ArrowListArray::new(
1318 Arc::new(Field::new_list_field(DataType::Utf8, true)),
1319 OffsetBuffer::from_lengths(vec![0, 2, 1]),
1320 names,
1321 None,
1322 );
1323 let list_data_type = arrow_list.data_type();
1324 let list_field = Field::new(String::new(), list_data_type.clone(), true);
1325
1326 let vortex_list = ArrayRef::from_arrow(&arrow_list, true).unwrap();
1327
1328 let rt_arrow_list = SESSION
1329 .arrow()
1330 .execute_arrow(vortex_list, Some(&list_field), &mut ctx)
1331 .unwrap();
1332
1333 assert_eq!(
1334 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
1335 rt_arrow_list.as_ref()
1336 );
1337 }
1338}