1use std::sync::Arc;
7
8use vortex_buffer::BitBuffer;
9use vortex_buffer::Buffer;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_panic;
14
15use crate::ArrayRef;
16use crate::Executable;
17use crate::ExecutionCtx;
18use crate::IntoArray;
19use crate::array::ArrayView;
20use crate::array::child_to_validity;
21use crate::arrays::Bool;
22use crate::arrays::BoolArray;
23use crate::arrays::Decimal;
24use crate::arrays::DecimalArray;
25use crate::arrays::Extension;
26use crate::arrays::ExtensionArray;
27use crate::arrays::FixedSizeList;
28use crate::arrays::FixedSizeListArray;
29use crate::arrays::ListView;
30use crate::arrays::ListViewArray;
31use crate::arrays::Null;
32use crate::arrays::NullArray;
33use crate::arrays::Primitive;
34use crate::arrays::PrimitiveArray;
35use crate::arrays::Struct;
36use crate::arrays::StructArray;
37use crate::arrays::VarBinView;
38use crate::arrays::VarBinViewArray;
39use crate::arrays::Variant;
40use crate::arrays::VariantArray;
41use crate::arrays::bool::BoolDataParts;
42use crate::arrays::decimal::DecimalDataParts;
43use crate::arrays::extension::ExtensionArrayExt;
44use crate::arrays::fixed_size_list::FixedSizeListArrayExt;
45use crate::arrays::listview::ListViewDataParts;
46use crate::arrays::listview::ListViewRebuildMode;
47use crate::arrays::primitive::PrimitiveDataParts;
48use crate::arrays::struct_::StructDataParts;
49use crate::arrays::varbinview::VarBinViewDataParts;
50use crate::arrays::variant::VariantArrayExt;
51use crate::dtype::DType;
52use crate::dtype::NativePType;
53use crate::dtype::Nullability;
54use crate::dtype::PType;
55use crate::match_each_decimal_value_type;
56use crate::match_each_native_ptype;
57use crate::matcher::Matcher;
58use crate::validity::Validity;
59
60#[derive(Debug, Clone)]
121pub enum Canonical {
122 Null(NullArray),
123 Bool(BoolArray),
124 Primitive(PrimitiveArray),
125 Decimal(DecimalArray),
126 VarBinView(VarBinViewArray),
127 List(ListViewArray),
128 FixedSizeList(FixedSizeListArray),
129 Struct(StructArray),
130 Extension(ExtensionArray),
131 Variant(VariantArray),
132}
133
134macro_rules! match_each_canonical {
136 ($self:expr, | $ident:ident | $eval:expr) => {{
137 match $self {
138 Canonical::Null($ident) => $eval,
139 Canonical::Bool($ident) => $eval,
140 Canonical::Primitive($ident) => $eval,
141 Canonical::Decimal($ident) => $eval,
142 Canonical::VarBinView($ident) => $eval,
143 Canonical::List($ident) => $eval,
144 Canonical::FixedSizeList($ident) => $eval,
145 Canonical::Struct($ident) => $eval,
146 Canonical::Variant($ident) => $eval,
147 Canonical::Extension($ident) => $eval,
148 }
149 }};
150}
151
152impl Canonical {
153 pub fn empty(dtype: &DType) -> Canonical {
155 match dtype {
156 DType::Null => Canonical::Null(NullArray::new(0)),
157 DType::Bool(n) => Canonical::Bool(unsafe {
158 BoolArray::new_unchecked(BitBuffer::empty(), Validity::from(n))
159 }),
160 DType::Primitive(ptype, n) => {
161 match_each_native_ptype!(ptype, |P| {
162 Canonical::Primitive(unsafe {
163 PrimitiveArray::new_unchecked(Buffer::<P>::empty(), Validity::from(n))
164 })
165 })
166 }
167 DType::Decimal(decimal_type, n) => {
168 match_each_decimal_value_type!(
169 DecimalType::smallest_decimal_value_type(decimal_type),
170 |D| {
171 Canonical::Decimal(unsafe {
172 DecimalArray::new_unchecked::<D>(
173 Buffer::empty(),
174 *decimal_type,
175 Validity::from(n),
176 )
177 })
178 }
179 )
180 }
181 DType::Utf8(n) => Canonical::VarBinView(unsafe {
182 VarBinViewArray::new_unchecked(
183 Buffer::empty(),
184 Arc::new([]),
185 dtype.clone(),
186 Validity::from(n),
187 )
188 }),
189 DType::Binary(n) => Canonical::VarBinView(unsafe {
190 VarBinViewArray::new_unchecked(
191 Buffer::empty(),
192 Arc::new([]),
193 dtype.clone(),
194 Validity::from(n),
195 )
196 }),
197 DType::Struct(struct_dtype, n) => Canonical::Struct(unsafe {
198 StructArray::new_unchecked(
199 struct_dtype
200 .fields()
201 .map(|f| Canonical::empty(&f).into_array())
202 .collect::<Arc<[_]>>(),
203 struct_dtype.clone(),
204 0,
205 Validity::from(n),
206 )
207 }),
208 DType::List(dtype, n) => Canonical::List(unsafe {
209 ListViewArray::new_unchecked(
210 Canonical::empty(dtype).into_array(),
211 Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
212 .into_array(),
213 Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
214 .into_array(),
215 Validity::from(n),
216 )
217 .with_zero_copy_to_list(true)
219 }),
220 DType::FixedSizeList(elem_dtype, list_size, null) => Canonical::FixedSizeList(unsafe {
221 FixedSizeListArray::new_unchecked(
222 Canonical::empty(elem_dtype).into_array(),
223 *list_size,
224 Validity::from(null),
225 0,
226 )
227 }),
228 DType::Extension(ext_dtype) => Canonical::Extension(ExtensionArray::new(
229 ext_dtype.clone(),
230 Canonical::empty(ext_dtype.storage_dtype()).into_array(),
231 )),
232 DType::Variant(_) => {
233 vortex_panic!(InvalidArgument: "Canonical empty is not supported for Variant")
234 }
235 }
236 }
237
238 pub fn len(&self) -> usize {
239 match_each_canonical!(self, |arr| arr.len())
240 }
241
242 pub fn dtype(&self) -> &DType {
243 match_each_canonical!(self, |arr| arr.dtype())
244 }
245
246 pub fn is_empty(&self) -> bool {
247 match_each_canonical!(self, |arr| arr.is_empty())
248 }
249}
250
251impl Canonical {
252 pub fn compact(&self) -> VortexResult<Canonical> {
260 match self {
261 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
262 Canonical::List(array) => Ok(Canonical::List(
263 array.rebuild(ListViewRebuildMode::TrimElements)?,
264 )),
265 _ => Ok(self.clone()),
266 }
267 }
268}
269
270impl Canonical {
272 pub fn as_null(&self) -> &NullArray {
273 if let Canonical::Null(a) = self {
274 a
275 } else {
276 vortex_panic!("Cannot get NullArray from {:?}", &self)
277 }
278 }
279
280 pub fn into_null(self) -> NullArray {
281 if let Canonical::Null(a) = self {
282 a
283 } else {
284 vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
285 }
286 }
287
288 pub fn as_bool(&self) -> &BoolArray {
289 if let Canonical::Bool(a) = self {
290 a
291 } else {
292 vortex_panic!("Cannot get BoolArray from {:?}", &self)
293 }
294 }
295
296 pub fn into_bool(self) -> BoolArray {
297 if let Canonical::Bool(a) = self {
298 a
299 } else {
300 vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
301 }
302 }
303
304 pub fn as_primitive(&self) -> &PrimitiveArray {
305 if let Canonical::Primitive(a) = self {
306 a
307 } else {
308 vortex_panic!("Cannot get PrimitiveArray from {:?}", &self)
309 }
310 }
311
312 pub fn into_primitive(self) -> PrimitiveArray {
313 if let Canonical::Primitive(a) = self {
314 a
315 } else {
316 vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
317 }
318 }
319
320 pub fn as_decimal(&self) -> &DecimalArray {
321 if let Canonical::Decimal(a) = self {
322 a
323 } else {
324 vortex_panic!("Cannot get DecimalArray from {:?}", &self)
325 }
326 }
327
328 pub fn into_decimal(self) -> DecimalArray {
329 if let Canonical::Decimal(a) = self {
330 a
331 } else {
332 vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
333 }
334 }
335
336 pub fn as_varbinview(&self) -> &VarBinViewArray {
337 if let Canonical::VarBinView(a) = self {
338 a
339 } else {
340 vortex_panic!("Cannot get VarBinViewArray from {:?}", &self)
341 }
342 }
343
344 pub fn into_varbinview(self) -> VarBinViewArray {
345 if let Canonical::VarBinView(a) = self {
346 a
347 } else {
348 vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
349 }
350 }
351
352 pub fn as_listview(&self) -> &ListViewArray {
353 if let Canonical::List(a) = self {
354 a
355 } else {
356 vortex_panic!("Cannot get ListArray from {:?}", &self)
357 }
358 }
359
360 pub fn into_listview(self) -> ListViewArray {
361 if let Canonical::List(a) = self {
362 a
363 } else {
364 vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
365 }
366 }
367
368 pub fn as_fixed_size_list(&self) -> &FixedSizeListArray {
369 if let Canonical::FixedSizeList(a) = self {
370 a
371 } else {
372 vortex_panic!("Cannot get FixedSizeListArray from {:?}", &self)
373 }
374 }
375
376 pub fn into_fixed_size_list(self) -> FixedSizeListArray {
377 if let Canonical::FixedSizeList(a) = self {
378 a
379 } else {
380 vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
381 }
382 }
383
384 pub fn as_struct(&self) -> &StructArray {
385 if let Canonical::Struct(a) = self {
386 a
387 } else {
388 vortex_panic!("Cannot get StructArray from {:?}", &self)
389 }
390 }
391
392 pub fn into_struct(self) -> StructArray {
393 if let Canonical::Struct(a) = self {
394 a
395 } else {
396 vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
397 }
398 }
399
400 pub fn as_extension(&self) -> &ExtensionArray {
401 if let Canonical::Extension(a) = self {
402 a
403 } else {
404 vortex_panic!("Cannot get ExtensionArray from {:?}", &self)
405 }
406 }
407
408 pub fn into_extension(self) -> ExtensionArray {
409 if let Canonical::Extension(a) = self {
410 a
411 } else {
412 vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
413 }
414 }
415}
416
417impl IntoArray for Canonical {
418 fn into_array(self) -> ArrayRef {
419 match_each_canonical!(self, |arr| arr.into_array())
420 }
421}
422
423#[deprecated(note = "use `array.execute::<T>(ctx)` instead")]
429pub trait ToCanonical {
430 #[deprecated(note = "use `array.execute::<NullArray>(ctx)` instead")]
432 fn to_null(&self) -> NullArray;
433
434 #[deprecated(note = "use `array.execute::<BoolArray>(ctx)` instead")]
436 fn to_bool(&self) -> BoolArray;
437
438 #[deprecated(note = "use `array.execute::<PrimitiveArray>(ctx)` instead")]
441 fn to_primitive(&self) -> PrimitiveArray;
442
443 #[deprecated(note = "use `array.execute::<DecimalArray>(ctx)` instead")]
446 fn to_decimal(&self) -> DecimalArray;
447
448 #[deprecated(note = "use `array.execute::<StructArray>(ctx)` instead")]
450 fn to_struct(&self) -> StructArray;
451
452 #[deprecated(note = "use `array.execute::<ListViewArray>(ctx)` instead")]
454 fn to_listview(&self) -> ListViewArray;
455
456 #[deprecated(note = "use `array.execute::<FixedSizeListArray>(ctx)` instead")]
459 fn to_fixed_size_list(&self) -> FixedSizeListArray;
460
461 #[deprecated(note = "use `array.execute::<VarBinViewArray>(ctx)` instead")]
464 fn to_varbinview(&self) -> VarBinViewArray;
465
466 #[deprecated(note = "use `array.execute::<ExtensionArray>(ctx)` instead")]
469 fn to_extension(&self) -> ExtensionArray;
470}
471
472#[expect(deprecated)]
474impl ToCanonical for ArrayRef {
475 fn to_null(&self) -> NullArray {
476 #[expect(deprecated)]
477 let result = self.to_canonical().vortex_expect("to_canonical failed");
478 result.into_null()
479 }
480
481 fn to_bool(&self) -> BoolArray {
482 #[expect(deprecated)]
483 let result = self.to_canonical().vortex_expect("to_canonical failed");
484 result.into_bool()
485 }
486
487 fn to_primitive(&self) -> PrimitiveArray {
488 #[expect(deprecated)]
489 let result = self.to_canonical().vortex_expect("to_canonical failed");
490 result.into_primitive()
491 }
492
493 fn to_decimal(&self) -> DecimalArray {
494 #[expect(deprecated)]
495 let result = self.to_canonical().vortex_expect("to_canonical failed");
496 result.into_decimal()
497 }
498
499 fn to_struct(&self) -> StructArray {
500 #[expect(deprecated)]
501 let result = self.to_canonical().vortex_expect("to_canonical failed");
502 result.into_struct()
503 }
504
505 fn to_listview(&self) -> ListViewArray {
506 #[expect(deprecated)]
507 let result = self.to_canonical().vortex_expect("to_canonical failed");
508 result.into_listview()
509 }
510
511 fn to_fixed_size_list(&self) -> FixedSizeListArray {
512 #[expect(deprecated)]
513 let result = self.to_canonical().vortex_expect("to_canonical failed");
514 result.into_fixed_size_list()
515 }
516
517 fn to_varbinview(&self) -> VarBinViewArray {
518 #[expect(deprecated)]
519 let result = self.to_canonical().vortex_expect("to_canonical failed");
520 result.into_varbinview()
521 }
522
523 fn to_extension(&self) -> ExtensionArray {
524 #[expect(deprecated)]
525 let result = self.to_canonical().vortex_expect("to_canonical failed");
526 result.into_extension()
527 }
528}
529
530impl From<Canonical> for ArrayRef {
531 fn from(value: Canonical) -> Self {
532 match_each_canonical!(value, |arr| arr.into_array())
533 }
534}
535
536impl Executable for Canonical {
542 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
543 let result = array.execute_until::<AnyCanonical>(ctx)?;
544 Ok(result
545 .as_opt::<AnyCanonical>()
546 .map(Canonical::from)
547 .vortex_expect("execute_until::<AnyCanonical> must return a canonical array"))
548 }
549}
550
551pub struct CanonicalValidity(pub Canonical);
556
557impl Executable for CanonicalValidity {
558 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
559 match array.execute::<Canonical>(ctx)? {
560 n @ Canonical::Null(_) => Ok(CanonicalValidity(n)),
561 Canonical::Bool(b) => {
562 let validity = child_to_validity(b.slots()[0].as_ref(), b.dtype().nullability());
563 let len = b.len();
564 let BoolDataParts { bits, offset, len } = b.into_data().into_parts(len);
565 Ok(CanonicalValidity(Canonical::Bool(
566 BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?,
567 )))
568 }
569 Canonical::Primitive(p) => {
570 let PrimitiveDataParts {
571 ptype,
572 buffer,
573 validity,
574 } = p.into_data_parts();
575 Ok(CanonicalValidity(Canonical::Primitive(unsafe {
576 PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
577 })))
578 }
579 Canonical::Decimal(d) => {
580 let DecimalDataParts {
581 decimal_dtype,
582 values,
583 values_type,
584 validity,
585 } = d.into_data_parts();
586 Ok(CanonicalValidity(Canonical::Decimal(unsafe {
587 DecimalArray::new_unchecked_handle(
588 values,
589 values_type,
590 decimal_dtype,
591 validity.execute(ctx)?,
592 )
593 })))
594 }
595 Canonical::VarBinView(vbv) => {
596 let VarBinViewDataParts {
597 dtype,
598 buffers,
599 views,
600 validity,
601 } = vbv.into_data_parts();
602 Ok(CanonicalValidity(Canonical::VarBinView(unsafe {
603 VarBinViewArray::new_handle_unchecked(
604 views,
605 buffers,
606 dtype,
607 validity.execute(ctx)?,
608 )
609 })))
610 }
611 Canonical::List(l) => {
612 let zctl = l.is_zero_copy_to_list();
613 let ListViewDataParts {
614 elements,
615 offsets,
616 sizes,
617 validity,
618 ..
619 } = l.into_data_parts();
620 Ok(CanonicalValidity(Canonical::List(unsafe {
621 ListViewArray::new_unchecked(elements, offsets, sizes, validity.execute(ctx)?)
622 .with_zero_copy_to_list(zctl)
623 })))
624 }
625 Canonical::FixedSizeList(fsl) => {
626 let list_size = fsl.list_size();
627 let len = fsl.len();
628 let parts = fsl.into_data_parts();
629 let elements = parts.elements;
630 let validity = parts.validity;
631 Ok(CanonicalValidity(Canonical::FixedSizeList(
632 FixedSizeListArray::new(elements, list_size, validity.execute(ctx)?, len),
633 )))
634 }
635 Canonical::Struct(st) => {
636 let len = st.len();
637 let StructDataParts {
638 struct_fields,
639 fields,
640 validity,
641 } = st.into_data_parts();
642 Ok(CanonicalValidity(Canonical::Struct(unsafe {
643 StructArray::new_unchecked(fields, struct_fields, len, validity.execute(ctx)?)
644 })))
645 }
646 Canonical::Extension(ext) => Ok(CanonicalValidity(Canonical::Extension(
647 ExtensionArray::new(
648 ext.ext_dtype().clone(),
649 ext.storage_array()
650 .clone()
651 .execute::<CanonicalValidity>(ctx)?
652 .0
653 .into_array(),
654 ),
655 ))),
656 Canonical::Variant(variant) => {
657 Ok(CanonicalValidity(Canonical::Variant(VariantArray::new(
658 variant
659 .child()
660 .clone()
661 .execute::<CanonicalValidity>(ctx)?
662 .0
663 .into_array(),
664 ))))
665 }
666 }
667 }
668}
669
670pub struct RecursiveCanonical(pub Canonical);
675
676impl Executable for RecursiveCanonical {
677 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
678 match array.execute::<Canonical>(ctx)? {
679 n @ Canonical::Null(_) => Ok(RecursiveCanonical(n)),
680 Canonical::Bool(b) => {
681 let validity = child_to_validity(b.slots()[0].as_ref(), b.dtype().nullability());
682 let len = b.len();
683 let BoolDataParts { bits, offset, len } = b.into_data().into_parts(len);
684 Ok(RecursiveCanonical(Canonical::Bool(
685 BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?,
686 )))
687 }
688 Canonical::Primitive(p) => {
689 let PrimitiveDataParts {
690 ptype,
691 buffer,
692 validity,
693 } = p.into_data_parts();
694 Ok(RecursiveCanonical(Canonical::Primitive(unsafe {
695 PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
696 })))
697 }
698 Canonical::Decimal(d) => {
699 let DecimalDataParts {
700 decimal_dtype,
701 values,
702 values_type,
703 validity,
704 } = d.into_data_parts();
705 Ok(RecursiveCanonical(Canonical::Decimal(unsafe {
706 DecimalArray::new_unchecked_handle(
707 values,
708 values_type,
709 decimal_dtype,
710 validity.execute(ctx)?,
711 )
712 })))
713 }
714 Canonical::VarBinView(vbv) => {
715 let VarBinViewDataParts {
716 dtype,
717 buffers,
718 views,
719 validity,
720 } = vbv.into_data_parts();
721 Ok(RecursiveCanonical(Canonical::VarBinView(unsafe {
722 VarBinViewArray::new_handle_unchecked(
723 views,
724 buffers,
725 dtype,
726 validity.execute(ctx)?,
727 )
728 })))
729 }
730 Canonical::List(l) => {
731 let zctl = l.is_zero_copy_to_list();
732 let ListViewDataParts {
733 elements,
734 offsets,
735 sizes,
736 validity,
737 ..
738 } = l.into_data_parts();
739 Ok(RecursiveCanonical(Canonical::List(unsafe {
740 ListViewArray::new_unchecked(
741 elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
742 offsets.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
743 sizes.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
744 validity.execute(ctx)?,
745 )
746 .with_zero_copy_to_list(zctl)
747 })))
748 }
749 Canonical::FixedSizeList(fsl) => {
750 let list_size = fsl.list_size();
751 let len = fsl.len();
752 let parts = fsl.into_data_parts();
753 let elements = parts.elements;
754 let validity = parts.validity;
755 Ok(RecursiveCanonical(Canonical::FixedSizeList(
756 FixedSizeListArray::new(
757 elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
758 list_size,
759 validity.execute(ctx)?,
760 len,
761 ),
762 )))
763 }
764 Canonical::Struct(st) => {
765 let len = st.len();
766 let StructDataParts {
767 struct_fields,
768 fields,
769 validity,
770 } = st.into_data_parts();
771 let executed_fields = fields
772 .iter()
773 .map(|f| Ok(f.clone().execute::<RecursiveCanonical>(ctx)?.0.into_array()))
774 .collect::<VortexResult<Arc<[_]>>>()?;
775
776 Ok(RecursiveCanonical(Canonical::Struct(unsafe {
777 StructArray::new_unchecked(
778 executed_fields,
779 struct_fields,
780 len,
781 validity.execute(ctx)?,
782 )
783 })))
784 }
785 Canonical::Extension(ext) => Ok(RecursiveCanonical(Canonical::Extension(
786 ExtensionArray::new(
787 ext.ext_dtype().clone(),
788 ext.storage_array()
789 .clone()
790 .execute::<RecursiveCanonical>(ctx)?
791 .0
792 .into_array(),
793 ),
794 ))),
795 Canonical::Variant(variant) => {
796 Ok(RecursiveCanonical(Canonical::Variant(VariantArray::new(
797 variant
798 .child()
799 .clone()
800 .execute::<RecursiveCanonical>(ctx)?
801 .0
802 .into_array(),
803 ))))
804 }
805 }
806 }
807}
808
809impl<T: NativePType> Executable for Buffer<T> {
815 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
816 let array = PrimitiveArray::execute(array, ctx)?;
817 vortex_ensure!(
818 matches!(
819 array.validity()?,
820 Validity::NonNullable | Validity::AllValid
821 ),
822 "Cannot execute to native buffer: array is not all-valid."
823 );
824 Ok(array.into_buffer())
825 }
826}
827
828impl Executable for PrimitiveArray {
832 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
833 match array.try_downcast::<Primitive>() {
834 Ok(primitive) => Ok(primitive),
835 Err(array) => Ok(Canonical::execute(array, ctx)?.into_primitive()),
836 }
837 }
838}
839
840impl Executable for BoolArray {
844 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
845 match array.try_downcast::<Bool>() {
846 Ok(bool_array) => Ok(bool_array),
847 Err(array) => Ok(Canonical::execute(array, ctx)?.into_bool()),
848 }
849 }
850}
851
852impl Executable for BitBuffer {
856 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
857 let bool = BoolArray::execute(array, ctx)?;
858 assert!(
859 !bool.dtype().is_nullable(),
860 "bit buffer execute only works with non-nullable bool arrays"
861 );
862 Ok(bool.into_bit_buffer())
863 }
864}
865
866impl Executable for NullArray {
870 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
871 match array.try_downcast::<Null>() {
872 Ok(null_array) => Ok(null_array),
873 Err(array) => Ok(Canonical::execute(array, ctx)?.into_null()),
874 }
875 }
876}
877
878impl Executable for VarBinViewArray {
882 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
883 match array.try_downcast::<VarBinView>() {
884 Ok(varbinview) => Ok(varbinview),
885 Err(array) => Ok(Canonical::execute(array, ctx)?.into_varbinview()),
886 }
887 }
888}
889
890impl Executable for ExtensionArray {
894 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
895 match array.try_downcast::<Extension>() {
896 Ok(ext_array) => Ok(ext_array),
897 Err(array) => Ok(Canonical::execute(array, ctx)?.into_extension()),
898 }
899 }
900}
901
902impl Executable for DecimalArray {
906 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
907 match array.try_downcast::<Decimal>() {
908 Ok(decimal) => Ok(decimal),
909 Err(array) => Ok(Canonical::execute(array, ctx)?.into_decimal()),
910 }
911 }
912}
913
914impl Executable for ListViewArray {
918 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
919 match array.try_downcast::<ListView>() {
920 Ok(list) => Ok(list),
921 Err(array) => Ok(Canonical::execute(array, ctx)?.into_listview()),
922 }
923 }
924}
925
926impl Executable for FixedSizeListArray {
930 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
931 match array.try_downcast::<FixedSizeList>() {
932 Ok(fsl) => Ok(fsl),
933 Err(array) => Ok(Canonical::execute(array, ctx)?.into_fixed_size_list()),
934 }
935 }
936}
937
938impl Executable for StructArray {
942 fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
943 match array.try_downcast::<Struct>() {
944 Ok(struct_array) => Ok(struct_array),
945 Err(array) => Ok(Canonical::execute(array, ctx)?.into_struct()),
946 }
947 }
948}
949
950#[derive(Debug, Clone, Copy)]
955pub enum CanonicalView<'a> {
956 Null(ArrayView<'a, Null>),
957 Bool(ArrayView<'a, Bool>),
958 Primitive(ArrayView<'a, Primitive>),
959 Decimal(ArrayView<'a, Decimal>),
960 VarBinView(ArrayView<'a, VarBinView>),
961 List(ArrayView<'a, ListView>),
962 FixedSizeList(ArrayView<'a, FixedSizeList>),
963 Struct(ArrayView<'a, Struct>),
964 Extension(ArrayView<'a, Extension>),
965 Variant(ArrayView<'a, Variant>),
966}
967
968impl From<CanonicalView<'_>> for Canonical {
969 fn from(value: CanonicalView<'_>) -> Self {
970 match value {
971 CanonicalView::Null(a) => Canonical::Null(a.into_owned()),
972 CanonicalView::Bool(a) => Canonical::Bool(a.into_owned()),
973 CanonicalView::Primitive(a) => Canonical::Primitive(a.into_owned()),
974 CanonicalView::Decimal(a) => Canonical::Decimal(a.into_owned()),
975 CanonicalView::VarBinView(a) => Canonical::VarBinView(a.into_owned()),
976 CanonicalView::List(a) => Canonical::List(a.into_owned()),
977 CanonicalView::FixedSizeList(a) => Canonical::FixedSizeList(a.into_owned()),
978 CanonicalView::Struct(a) => Canonical::Struct(a.into_owned()),
979 CanonicalView::Extension(a) => Canonical::Extension(a.into_owned()),
980 CanonicalView::Variant(a) => Canonical::Variant(a.into_owned()),
981 }
982 }
983}
984
985impl CanonicalView<'_> {
986 pub fn to_array_ref(&self) -> ArrayRef {
988 match self {
989 CanonicalView::Null(a) => a.array().clone(),
990 CanonicalView::Bool(a) => a.array().clone(),
991 CanonicalView::Primitive(a) => a.array().clone(),
992 CanonicalView::Decimal(a) => a.array().clone(),
993 CanonicalView::VarBinView(a) => a.array().clone(),
994 CanonicalView::List(a) => a.array().clone(),
995 CanonicalView::FixedSizeList(a) => a.array().clone(),
996 CanonicalView::Struct(a) => a.array().clone(),
997 CanonicalView::Extension(a) => a.array().clone(),
998 CanonicalView::Variant(a) => a.array().clone(),
999 }
1000 }
1001}
1002
1003pub struct AnyCanonical;
1005impl Matcher for AnyCanonical {
1006 type Match<'a> = CanonicalView<'a>;
1007
1008 fn matches(array: &ArrayRef) -> bool {
1009 array.is::<Null>()
1010 || array.is::<Bool>()
1011 || array.is::<Primitive>()
1012 || array.is::<Decimal>()
1013 || array.is::<Struct>()
1014 || array.is::<ListView>()
1015 || array.is::<FixedSizeList>()
1016 || array.is::<VarBinView>()
1017 || array.is::<Variant>()
1018 || array.is::<Extension>()
1019 || array.is::<Variant>()
1020 }
1021
1022 fn try_match<'a>(array: &'a ArrayRef) -> Option<Self::Match<'a>> {
1023 if let Some(a) = array.as_opt::<Null>() {
1024 Some(CanonicalView::Null(a))
1025 } else if let Some(a) = array.as_opt::<Bool>() {
1026 Some(CanonicalView::Bool(a))
1027 } else if let Some(a) = array.as_opt::<Primitive>() {
1028 Some(CanonicalView::Primitive(a))
1029 } else if let Some(a) = array.as_opt::<Decimal>() {
1030 Some(CanonicalView::Decimal(a))
1031 } else if let Some(a) = array.as_opt::<Struct>() {
1032 Some(CanonicalView::Struct(a))
1033 } else if let Some(a) = array.as_opt::<ListView>() {
1034 Some(CanonicalView::List(a))
1035 } else if let Some(a) = array.as_opt::<FixedSizeList>() {
1036 Some(CanonicalView::FixedSizeList(a))
1037 } else if let Some(a) = array.as_opt::<VarBinView>() {
1038 Some(CanonicalView::VarBinView(a))
1039 } else if let Some(a) = array.as_opt::<Variant>() {
1040 Some(CanonicalView::Variant(a))
1041 } else {
1042 array.as_opt::<Extension>().map(CanonicalView::Extension)
1043 }
1044 }
1045}
1046
1047#[cfg(test)]
1048mod test {
1049 use std::sync::Arc;
1050
1051 use arrow_array::Array as ArrowArray;
1052 use arrow_array::ArrayRef as ArrowArrayRef;
1053 use arrow_array::ListArray as ArrowListArray;
1054 use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
1055 use arrow_array::StringArray;
1056 use arrow_array::StringViewArray;
1057 use arrow_array::StructArray as ArrowStructArray;
1058 use arrow_array::cast::AsArray;
1059 use arrow_array::types::Int32Type;
1060 use arrow_array::types::Int64Type;
1061 use arrow_array::types::UInt64Type;
1062 use arrow_buffer::NullBufferBuilder;
1063 use arrow_buffer::OffsetBuffer;
1064 use arrow_schema::DataType;
1065 use arrow_schema::Field;
1066 use vortex_buffer::buffer;
1067
1068 use crate::ArrayRef;
1069 use crate::IntoArray;
1070 use crate::LEGACY_SESSION;
1071 use crate::VortexSessionExecute;
1072 use crate::arrays::ConstantArray;
1073 use crate::arrow::ArrowArrayExecutor;
1074 use crate::arrow::FromArrowArray;
1075 use crate::canonical::StructArray;
1076
1077 #[test]
1078 fn test_canonicalize_nested_struct() {
1079 let mut ctx = LEGACY_SESSION.create_execution_ctx();
1080 let nested_struct_array = StructArray::from_fields(&[
1082 ("a", buffer![1u64].into_array()),
1083 (
1084 "b",
1085 StructArray::from_fields(&[(
1086 "inner_a",
1087 ConstantArray::new(100i64, 1).into_array(),
1092 )])
1093 .unwrap()
1094 .into_array(),
1095 ),
1096 ])
1097 .unwrap();
1098
1099 let arrow_struct = nested_struct_array
1100 .into_array()
1101 .execute_arrow(None, &mut ctx)
1102 .unwrap()
1103 .as_any()
1104 .downcast_ref::<ArrowStructArray>()
1105 .cloned()
1106 .unwrap();
1107
1108 assert!(
1109 arrow_struct
1110 .column(0)
1111 .as_any()
1112 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
1113 .is_some()
1114 );
1115
1116 let inner_struct = Arc::clone(arrow_struct.column(1))
1117 .as_any()
1118 .downcast_ref::<ArrowStructArray>()
1119 .cloned()
1120 .unwrap();
1121
1122 let inner_a = inner_struct
1123 .column(0)
1124 .as_any()
1125 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
1126 assert!(inner_a.is_some());
1127
1128 assert_eq!(
1129 inner_a.cloned().unwrap(),
1130 ArrowPrimitiveArray::from_iter([100i64])
1131 );
1132 }
1133
1134 #[test]
1135 fn roundtrip_struct() {
1136 let mut ctx = LEGACY_SESSION.create_execution_ctx();
1137 let mut nulls = NullBufferBuilder::new(6);
1138 nulls.append_n_non_nulls(4);
1139 nulls.append_null();
1140 nulls.append_non_null();
1141 let names = Arc::new(StringViewArray::from_iter(vec![
1142 Some("Joseph"),
1143 None,
1144 Some("Angela"),
1145 Some("Mikhail"),
1146 None,
1147 None,
1148 ]));
1149 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
1150 Some(25),
1151 Some(31),
1152 None,
1153 Some(57),
1154 None,
1155 None,
1156 ]));
1157
1158 let arrow_struct = ArrowStructArray::new(
1159 vec![
1160 Arc::new(Field::new("name", DataType::Utf8View, true)),
1161 Arc::new(Field::new("age", DataType::Int32, true)),
1162 ]
1163 .into(),
1164 vec![names, ages],
1165 nulls.finish(),
1166 );
1167
1168 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true).unwrap();
1169
1170 assert_eq!(
1171 &arrow_struct,
1172 vortex_struct
1173 .execute_arrow(None, &mut ctx)
1174 .unwrap()
1175 .as_struct()
1176 );
1177 }
1178
1179 #[test]
1180 fn roundtrip_list() {
1181 let mut ctx = LEGACY_SESSION.create_execution_ctx();
1182 let names = Arc::new(StringArray::from_iter(vec![
1183 Some("Joseph"),
1184 Some("Angela"),
1185 Some("Mikhail"),
1186 ]));
1187
1188 let arrow_list = ArrowListArray::new(
1189 Arc::new(Field::new_list_field(DataType::Utf8, true)),
1190 OffsetBuffer::from_lengths(vec![0, 2, 1]),
1191 names,
1192 None,
1193 );
1194 let list_data_type = arrow_list.data_type();
1195
1196 let vortex_list = ArrayRef::from_arrow(&arrow_list, true).unwrap();
1197
1198 let rt_arrow_list = vortex_list
1199 .execute_arrow(Some(list_data_type), &mut ctx)
1200 .unwrap();
1201
1202 assert_eq!(
1203 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
1204 rt_arrow_list.as_ref()
1205 );
1206 }
1207}