1use vortex_dtype::DType;
7use vortex_error::VortexResult;
8use vortex_error::vortex_panic;
9
10use crate::Array;
11use crate::ArrayRef;
12use crate::IntoArray;
13use crate::arrays::BoolArray;
14use crate::arrays::DecimalArray;
15use crate::arrays::ExtensionArray;
16use crate::arrays::FixedSizeListArray;
17use crate::arrays::ListViewArray;
18use crate::arrays::ListViewRebuildMode;
19use crate::arrays::NullArray;
20use crate::arrays::PrimitiveArray;
21use crate::arrays::StructArray;
22use crate::arrays::VarBinViewArray;
23use crate::builders::builder_with_capacity;
24
25#[derive(Debug, Clone)]
86pub enum Canonical {
87 Null(NullArray),
88 Bool(BoolArray),
89 Primitive(PrimitiveArray),
90 Decimal(DecimalArray),
91 VarBinView(VarBinViewArray),
92 List(ListViewArray),
93 FixedSizeList(FixedSizeListArray),
94 Struct(StructArray),
95 Extension(ExtensionArray),
96}
97
98impl Canonical {
99 pub fn empty(dtype: &DType) -> Canonical {
102 builder_with_capacity(dtype, 0).finish_into_canonical()
103 }
104}
105
106impl Canonical {
107 pub fn compact(&self) -> VortexResult<Canonical> {
115 match self {
116 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
117 Canonical::List(array) => Ok(Canonical::List(
118 array.rebuild(ListViewRebuildMode::MakeZeroCopyToList),
119 )),
120 _ => Ok(self.clone()),
121 }
122 }
123}
124
125impl Canonical {
127 pub fn as_null(&self) -> &NullArray {
128 if let Canonical::Null(a) = self {
129 a
130 } else {
131 vortex_panic!("Cannot get NullArray from {:?}", &self)
132 }
133 }
134
135 pub fn into_null(self) -> NullArray {
136 if let Canonical::Null(a) = self {
137 a
138 } else {
139 vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
140 }
141 }
142
143 pub fn as_bool(&self) -> &BoolArray {
144 if let Canonical::Bool(a) = self {
145 a
146 } else {
147 vortex_panic!("Cannot get BoolArray from {:?}", &self)
148 }
149 }
150
151 pub fn into_bool(self) -> BoolArray {
152 if let Canonical::Bool(a) = self {
153 a
154 } else {
155 vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
156 }
157 }
158
159 pub fn as_primitive(&self) -> &PrimitiveArray {
160 if let Canonical::Primitive(a) = self {
161 a
162 } else {
163 vortex_panic!("Cannot get PrimitiveArray from {:?}", &self)
164 }
165 }
166
167 pub fn into_primitive(self) -> PrimitiveArray {
168 if let Canonical::Primitive(a) = self {
169 a
170 } else {
171 vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
172 }
173 }
174
175 pub fn as_decimal(&self) -> &DecimalArray {
176 if let Canonical::Decimal(a) = self {
177 a
178 } else {
179 vortex_panic!("Cannot get DecimalArray from {:?}", &self)
180 }
181 }
182
183 pub fn into_decimal(self) -> DecimalArray {
184 if let Canonical::Decimal(a) = self {
185 a
186 } else {
187 vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
188 }
189 }
190
191 pub fn as_varbinview(&self) -> &VarBinViewArray {
192 if let Canonical::VarBinView(a) = self {
193 a
194 } else {
195 vortex_panic!("Cannot get VarBinViewArray from {:?}", &self)
196 }
197 }
198
199 pub fn into_varbinview(self) -> VarBinViewArray {
200 if let Canonical::VarBinView(a) = self {
201 a
202 } else {
203 vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
204 }
205 }
206
207 pub fn as_listview(&self) -> &ListViewArray {
208 if let Canonical::List(a) = self {
209 a
210 } else {
211 vortex_panic!("Cannot get ListArray from {:?}", &self)
212 }
213 }
214
215 pub fn into_listview(self) -> ListViewArray {
216 if let Canonical::List(a) = self {
217 a
218 } else {
219 vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
220 }
221 }
222
223 pub fn as_fixed_size_list(&self) -> &FixedSizeListArray {
224 if let Canonical::FixedSizeList(a) = self {
225 a
226 } else {
227 vortex_panic!("Cannot get FixedSizeListArray from {:?}", &self)
228 }
229 }
230
231 pub fn into_fixed_size_list(self) -> FixedSizeListArray {
232 if let Canonical::FixedSizeList(a) = self {
233 a
234 } else {
235 vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
236 }
237 }
238
239 pub fn as_struct(&self) -> &StructArray {
240 if let Canonical::Struct(a) = self {
241 a
242 } else {
243 vortex_panic!("Cannot get StructArray from {:?}", &self)
244 }
245 }
246
247 pub fn into_struct(self) -> StructArray {
248 if let Canonical::Struct(a) = self {
249 a
250 } else {
251 vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
252 }
253 }
254
255 pub fn as_extension(&self) -> &ExtensionArray {
256 if let Canonical::Extension(a) = self {
257 a
258 } else {
259 vortex_panic!("Cannot get ExtensionArray from {:?}", &self)
260 }
261 }
262
263 pub fn into_extension(self) -> ExtensionArray {
264 if let Canonical::Extension(a) = self {
265 a
266 } else {
267 vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
268 }
269 }
270}
271
272impl AsRef<dyn Array> for Canonical {
273 fn as_ref(&self) -> &(dyn Array + 'static) {
274 match &self {
275 Canonical::Null(a) => a.as_ref(),
276 Canonical::Bool(a) => a.as_ref(),
277 Canonical::Primitive(a) => a.as_ref(),
278 Canonical::Decimal(a) => a.as_ref(),
279 Canonical::Struct(a) => a.as_ref(),
280 Canonical::List(a) => a.as_ref(),
281 Canonical::FixedSizeList(a) => a.as_ref(),
282 Canonical::VarBinView(a) => a.as_ref(),
283 Canonical::Extension(a) => a.as_ref(),
284 }
285 }
286}
287
288impl IntoArray for Canonical {
289 fn into_array(self) -> ArrayRef {
290 match self {
291 Canonical::Null(a) => a.into_array(),
292 Canonical::Bool(a) => a.into_array(),
293 Canonical::Primitive(a) => a.into_array(),
294 Canonical::Decimal(a) => a.into_array(),
295 Canonical::Struct(a) => a.into_array(),
296 Canonical::List(a) => a.into_array(),
297 Canonical::FixedSizeList(a) => a.into_array(),
298 Canonical::VarBinView(a) => a.into_array(),
299 Canonical::Extension(a) => a.into_array(),
300 }
301 }
302}
303
304pub trait ToCanonical {
310 fn to_null(&self) -> NullArray;
312
313 fn to_bool(&self) -> BoolArray;
315
316 fn to_primitive(&self) -> PrimitiveArray;
319
320 fn to_decimal(&self) -> DecimalArray;
323
324 fn to_struct(&self) -> StructArray;
326
327 fn to_listview(&self) -> ListViewArray;
329
330 fn to_fixed_size_list(&self) -> FixedSizeListArray;
333
334 fn to_varbinview(&self) -> VarBinViewArray;
337
338 fn to_extension(&self) -> ExtensionArray;
341}
342
343impl<A: Array + ?Sized> ToCanonical for A {
345 fn to_null(&self) -> NullArray {
346 self.to_canonical().into_null()
347 }
348
349 fn to_bool(&self) -> BoolArray {
350 self.to_canonical().into_bool()
351 }
352
353 fn to_primitive(&self) -> PrimitiveArray {
354 self.to_canonical().into_primitive()
355 }
356
357 fn to_decimal(&self) -> DecimalArray {
358 self.to_canonical().into_decimal()
359 }
360
361 fn to_struct(&self) -> StructArray {
362 self.to_canonical().into_struct()
363 }
364
365 fn to_listview(&self) -> ListViewArray {
366 self.to_canonical().into_listview()
367 }
368
369 fn to_fixed_size_list(&self) -> FixedSizeListArray {
370 self.to_canonical().into_fixed_size_list()
371 }
372
373 fn to_varbinview(&self) -> VarBinViewArray {
374 self.to_canonical().into_varbinview()
375 }
376
377 fn to_extension(&self) -> ExtensionArray {
378 self.to_canonical().into_extension()
379 }
380}
381
382impl From<Canonical> for ArrayRef {
383 fn from(value: Canonical) -> Self {
384 match value {
385 Canonical::Null(a) => a.into_array(),
386 Canonical::Bool(a) => a.into_array(),
387 Canonical::Primitive(a) => a.into_array(),
388 Canonical::Decimal(a) => a.into_array(),
389 Canonical::Struct(a) => a.into_array(),
390 Canonical::List(a) => a.into_array(),
391 Canonical::FixedSizeList(a) => a.into_array(),
392 Canonical::VarBinView(a) => a.into_array(),
393 Canonical::Extension(a) => a.into_array(),
394 }
395 }
396}
397
398#[cfg(test)]
399mod test {
400 use std::sync::Arc;
401
402 use arrow_array::Array as ArrowArray;
403 use arrow_array::ArrayRef as ArrowArrayRef;
404 use arrow_array::ListArray as ArrowListArray;
405 use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
406 use arrow_array::StringArray;
407 use arrow_array::StringViewArray;
408 use arrow_array::StructArray as ArrowStructArray;
409 use arrow_array::cast::AsArray;
410 use arrow_array::types::Int32Type;
411 use arrow_array::types::Int64Type;
412 use arrow_array::types::UInt64Type;
413 use arrow_buffer::NullBufferBuilder;
414 use arrow_buffer::OffsetBuffer;
415 use arrow_schema::DataType;
416 use arrow_schema::Field;
417 use vortex_buffer::buffer;
418
419 use crate::ArrayRef;
420 use crate::IntoArray;
421 use crate::arrays::ConstantArray;
422 use crate::arrays::StructArray;
423 use crate::arrow::FromArrowArray;
424 use crate::arrow::IntoArrowArray;
425
426 #[test]
427 fn test_canonicalize_nested_struct() {
428 let nested_struct_array = StructArray::from_fields(&[
430 ("a", buffer![1u64].into_array()),
431 (
432 "b",
433 StructArray::from_fields(&[(
434 "inner_a",
435 ConstantArray::new(100i64, 1).into_array(),
440 )])
441 .unwrap()
442 .into_array(),
443 ),
444 ])
445 .unwrap();
446
447 let arrow_struct = nested_struct_array
448 .into_array()
449 .into_arrow_preferred()
450 .unwrap()
451 .as_any()
452 .downcast_ref::<ArrowStructArray>()
453 .cloned()
454 .unwrap();
455
456 assert!(
457 arrow_struct
458 .column(0)
459 .as_any()
460 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
461 .is_some()
462 );
463
464 let inner_struct = arrow_struct
465 .column(1)
466 .clone()
467 .as_any()
468 .downcast_ref::<ArrowStructArray>()
469 .cloned()
470 .unwrap();
471
472 let inner_a = inner_struct
473 .column(0)
474 .as_any()
475 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
476 assert!(inner_a.is_some());
477
478 assert_eq!(
479 inner_a.cloned().unwrap(),
480 ArrowPrimitiveArray::from_iter([100i64])
481 );
482 }
483
484 #[test]
485 fn roundtrip_struct() {
486 let mut nulls = NullBufferBuilder::new(6);
487 nulls.append_n_non_nulls(4);
488 nulls.append_null();
489 nulls.append_non_null();
490 let names = Arc::new(StringViewArray::from_iter(vec![
491 Some("Joseph"),
492 None,
493 Some("Angela"),
494 Some("Mikhail"),
495 None,
496 None,
497 ]));
498 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
499 Some(25),
500 Some(31),
501 None,
502 Some(57),
503 None,
504 None,
505 ]));
506
507 let arrow_struct = ArrowStructArray::new(
508 vec![
509 Arc::new(Field::new("name", DataType::Utf8View, true)),
510 Arc::new(Field::new("age", DataType::Int32, true)),
511 ]
512 .into(),
513 vec![names, ages],
514 nulls.finish(),
515 );
516
517 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true);
518
519 assert_eq!(
520 &arrow_struct,
521 vortex_struct.into_arrow_preferred().unwrap().as_struct()
522 );
523 }
524
525 #[test]
526 fn roundtrip_list() {
527 let names = Arc::new(StringArray::from_iter(vec![
528 Some("Joseph"),
529 Some("Angela"),
530 Some("Mikhail"),
531 ]));
532
533 let arrow_list = ArrowListArray::new(
534 Arc::new(Field::new_list_field(DataType::Utf8, true)),
535 OffsetBuffer::from_lengths(vec![0, 2, 1]),
536 names,
537 None,
538 );
539 let list_data_type = arrow_list.data_type();
540
541 let vortex_list = ArrayRef::from_arrow(&arrow_list, true);
542
543 let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
544
545 assert_eq!(
546 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
547 rt_arrow_list.as_ref()
548 );
549 }
550}