1use vortex_dtype::DType;
7use vortex_error::{VortexResult, vortex_panic};
8
9use crate::arrays::{
10 BoolArray, DecimalArray, ExtensionArray, FixedSizeListArray, ListViewArray,
11 ListViewRebuildMode, NullArray, PrimitiveArray, StructArray, VarBinViewArray,
12};
13use crate::builders::builder_with_capacity;
14use crate::{Array, ArrayRef, IntoArray};
15
16#[derive(Debug, Clone)]
79pub enum Canonical {
80 Null(NullArray),
81 Bool(BoolArray),
82 Primitive(PrimitiveArray),
83 Decimal(DecimalArray),
84 VarBinView(VarBinViewArray),
85 List(ListViewArray),
86 FixedSizeList(FixedSizeListArray),
87 Struct(StructArray),
88 Extension(ExtensionArray),
89}
90
91impl Canonical {
92 pub fn empty(dtype: &DType) -> Canonical {
95 builder_with_capacity(dtype, 0).finish_into_canonical()
96 }
97}
98
99impl Canonical {
100 pub fn compact(&self) -> VortexResult<Canonical> {
108 match self {
109 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
110 Canonical::List(array) => Ok(Canonical::List(
111 array.rebuild(ListViewRebuildMode::MakeZeroCopyToList),
112 )),
113 _ => Ok(self.clone()),
114 }
115 }
116}
117
118impl Canonical {
120 pub fn as_null(&self) -> &NullArray {
121 if let Canonical::Null(a) = self {
122 a
123 } else {
124 vortex_panic!("Cannot get NullArray from {:?}", &self)
125 }
126 }
127
128 pub fn into_null(self) -> NullArray {
129 if let Canonical::Null(a) = self {
130 a
131 } else {
132 vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
133 }
134 }
135
136 pub fn as_bool(&self) -> &BoolArray {
137 if let Canonical::Bool(a) = self {
138 a
139 } else {
140 vortex_panic!("Cannot get BoolArray from {:?}", &self)
141 }
142 }
143
144 pub fn into_bool(self) -> BoolArray {
145 if let Canonical::Bool(a) = self {
146 a
147 } else {
148 vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
149 }
150 }
151
152 pub fn as_primitive(&self) -> &PrimitiveArray {
153 if let Canonical::Primitive(a) = self {
154 a
155 } else {
156 vortex_panic!("Cannot get PrimitiveArray from {:?}", &self)
157 }
158 }
159
160 pub fn into_primitive(self) -> PrimitiveArray {
161 if let Canonical::Primitive(a) = self {
162 a
163 } else {
164 vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
165 }
166 }
167
168 pub fn as_decimal(&self) -> &DecimalArray {
169 if let Canonical::Decimal(a) = self {
170 a
171 } else {
172 vortex_panic!("Cannot get DecimalArray from {:?}", &self)
173 }
174 }
175
176 pub fn into_decimal(self) -> DecimalArray {
177 if let Canonical::Decimal(a) = self {
178 a
179 } else {
180 vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
181 }
182 }
183
184 pub fn as_varbinview(&self) -> &VarBinViewArray {
185 if let Canonical::VarBinView(a) = self {
186 a
187 } else {
188 vortex_panic!("Cannot get VarBinViewArray from {:?}", &self)
189 }
190 }
191
192 pub fn into_varbinview(self) -> VarBinViewArray {
193 if let Canonical::VarBinView(a) = self {
194 a
195 } else {
196 vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
197 }
198 }
199
200 pub fn as_listview(&self) -> &ListViewArray {
201 if let Canonical::List(a) = self {
202 a
203 } else {
204 vortex_panic!("Cannot get ListArray from {:?}", &self)
205 }
206 }
207
208 pub fn into_listview(self) -> ListViewArray {
209 if let Canonical::List(a) = self {
210 a
211 } else {
212 vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
213 }
214 }
215
216 pub fn as_fixed_size_list(&self) -> &FixedSizeListArray {
217 if let Canonical::FixedSizeList(a) = self {
218 a
219 } else {
220 vortex_panic!("Cannot get FixedSizeListArray from {:?}", &self)
221 }
222 }
223
224 pub fn into_fixed_size_list(self) -> FixedSizeListArray {
225 if let Canonical::FixedSizeList(a) = self {
226 a
227 } else {
228 vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
229 }
230 }
231
232 pub fn as_struct(&self) -> &StructArray {
233 if let Canonical::Struct(a) = self {
234 a
235 } else {
236 vortex_panic!("Cannot get StructArray from {:?}", &self)
237 }
238 }
239
240 pub fn into_struct(self) -> StructArray {
241 if let Canonical::Struct(a) = self {
242 a
243 } else {
244 vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
245 }
246 }
247
248 pub fn as_extension(&self) -> &ExtensionArray {
249 if let Canonical::Extension(a) = self {
250 a
251 } else {
252 vortex_panic!("Cannot get ExtensionArray from {:?}", &self)
253 }
254 }
255
256 pub fn into_extension(self) -> ExtensionArray {
257 if let Canonical::Extension(a) = self {
258 a
259 } else {
260 vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
261 }
262 }
263}
264
265impl AsRef<dyn Array> for Canonical {
266 fn as_ref(&self) -> &(dyn Array + 'static) {
267 match &self {
268 Canonical::Null(a) => a.as_ref(),
269 Canonical::Bool(a) => a.as_ref(),
270 Canonical::Primitive(a) => a.as_ref(),
271 Canonical::Decimal(a) => a.as_ref(),
272 Canonical::Struct(a) => a.as_ref(),
273 Canonical::List(a) => a.as_ref(),
274 Canonical::FixedSizeList(a) => a.as_ref(),
275 Canonical::VarBinView(a) => a.as_ref(),
276 Canonical::Extension(a) => a.as_ref(),
277 }
278 }
279}
280
281impl IntoArray for Canonical {
282 fn into_array(self) -> ArrayRef {
283 match self {
284 Canonical::Null(a) => a.into_array(),
285 Canonical::Bool(a) => a.into_array(),
286 Canonical::Primitive(a) => a.into_array(),
287 Canonical::Decimal(a) => a.into_array(),
288 Canonical::Struct(a) => a.into_array(),
289 Canonical::List(a) => a.into_array(),
290 Canonical::FixedSizeList(a) => a.into_array(),
291 Canonical::VarBinView(a) => a.into_array(),
292 Canonical::Extension(a) => a.into_array(),
293 }
294 }
295}
296
297pub trait ToCanonical {
303 fn to_null(&self) -> NullArray;
305
306 fn to_bool(&self) -> BoolArray;
308
309 fn to_primitive(&self) -> PrimitiveArray;
312
313 fn to_decimal(&self) -> DecimalArray;
316
317 fn to_struct(&self) -> StructArray;
319
320 fn to_listview(&self) -> ListViewArray;
322
323 fn to_fixed_size_list(&self) -> FixedSizeListArray;
326
327 fn to_varbinview(&self) -> VarBinViewArray;
330
331 fn to_extension(&self) -> ExtensionArray;
334}
335
336impl<A: Array + ?Sized> ToCanonical for A {
338 fn to_null(&self) -> NullArray {
339 self.to_canonical().into_null()
340 }
341
342 fn to_bool(&self) -> BoolArray {
343 self.to_canonical().into_bool()
344 }
345
346 fn to_primitive(&self) -> PrimitiveArray {
347 self.to_canonical().into_primitive()
348 }
349
350 fn to_decimal(&self) -> DecimalArray {
351 self.to_canonical().into_decimal()
352 }
353
354 fn to_struct(&self) -> StructArray {
355 self.to_canonical().into_struct()
356 }
357
358 fn to_listview(&self) -> ListViewArray {
359 self.to_canonical().into_listview()
360 }
361
362 fn to_fixed_size_list(&self) -> FixedSizeListArray {
363 self.to_canonical().into_fixed_size_list()
364 }
365
366 fn to_varbinview(&self) -> VarBinViewArray {
367 self.to_canonical().into_varbinview()
368 }
369
370 fn to_extension(&self) -> ExtensionArray {
371 self.to_canonical().into_extension()
372 }
373}
374
375impl From<Canonical> for ArrayRef {
376 fn from(value: Canonical) -> Self {
377 match value {
378 Canonical::Null(a) => a.into_array(),
379 Canonical::Bool(a) => a.into_array(),
380 Canonical::Primitive(a) => a.into_array(),
381 Canonical::Decimal(a) => a.into_array(),
382 Canonical::Struct(a) => a.into_array(),
383 Canonical::List(a) => a.into_array(),
384 Canonical::FixedSizeList(a) => a.into_array(),
385 Canonical::VarBinView(a) => a.into_array(),
386 Canonical::Extension(a) => a.into_array(),
387 }
388 }
389}
390
391#[cfg(test)]
392mod test {
393 use std::sync::Arc;
394
395 use arrow_array::cast::AsArray;
396 use arrow_array::types::{Int32Type, Int64Type, UInt64Type};
397 use arrow_array::{
398 Array as ArrowArray, ArrayRef as ArrowArrayRef, ListArray as ArrowListArray,
399 PrimitiveArray as ArrowPrimitiveArray, StringArray, StringViewArray,
400 StructArray as ArrowStructArray,
401 };
402 use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
403 use arrow_schema::{DataType, Field};
404 use vortex_buffer::buffer;
405
406 use crate::arrays::{ConstantArray, StructArray};
407 use crate::arrow::{FromArrowArray, IntoArrowArray};
408 use crate::{ArrayRef, IntoArray};
409
410 #[test]
411 fn test_canonicalize_nested_struct() {
412 let nested_struct_array = StructArray::from_fields(&[
414 ("a", buffer![1u64].into_array()),
415 (
416 "b",
417 StructArray::from_fields(&[(
418 "inner_a",
419 ConstantArray::new(100i64, 1).into_array(),
424 )])
425 .unwrap()
426 .into_array(),
427 ),
428 ])
429 .unwrap();
430
431 let arrow_struct = nested_struct_array
432 .into_array()
433 .into_arrow_preferred()
434 .unwrap()
435 .as_any()
436 .downcast_ref::<ArrowStructArray>()
437 .cloned()
438 .unwrap();
439
440 assert!(
441 arrow_struct
442 .column(0)
443 .as_any()
444 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
445 .is_some()
446 );
447
448 let inner_struct = arrow_struct
449 .column(1)
450 .clone()
451 .as_any()
452 .downcast_ref::<ArrowStructArray>()
453 .cloned()
454 .unwrap();
455
456 let inner_a = inner_struct
457 .column(0)
458 .as_any()
459 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
460 assert!(inner_a.is_some());
461
462 assert_eq!(
463 inner_a.cloned().unwrap(),
464 ArrowPrimitiveArray::from_iter([100i64])
465 );
466 }
467
468 #[test]
469 fn roundtrip_struct() {
470 let mut nulls = NullBufferBuilder::new(6);
471 nulls.append_n_non_nulls(4);
472 nulls.append_null();
473 nulls.append_non_null();
474 let names = Arc::new(StringViewArray::from_iter(vec![
475 Some("Joseph"),
476 None,
477 Some("Angela"),
478 Some("Mikhail"),
479 None,
480 None,
481 ]));
482 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
483 Some(25),
484 Some(31),
485 None,
486 Some(57),
487 None,
488 None,
489 ]));
490
491 let arrow_struct = ArrowStructArray::new(
492 vec![
493 Arc::new(Field::new("name", DataType::Utf8View, true)),
494 Arc::new(Field::new("age", DataType::Int32, true)),
495 ]
496 .into(),
497 vec![names, ages],
498 nulls.finish(),
499 );
500
501 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true);
502
503 assert_eq!(
504 &arrow_struct,
505 vortex_struct.into_arrow_preferred().unwrap().as_struct()
506 );
507 }
508
509 #[test]
510 fn roundtrip_list() {
511 let names = Arc::new(StringArray::from_iter(vec![
512 Some("Joseph"),
513 Some("Angela"),
514 Some("Mikhail"),
515 ]));
516
517 let arrow_list = ArrowListArray::new(
518 Arc::new(Field::new_list_field(DataType::Utf8, true)),
519 OffsetBuffer::from_lengths(vec![0, 2, 1]),
520 names,
521 None,
522 );
523 let list_data_type = arrow_list.data_type();
524
525 let vortex_list = ArrayRef::from_arrow(&arrow_list, true);
526
527 let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
528
529 assert_eq!(
530 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
531 rt_arrow_list.as_ref()
532 );
533 }
534}