1use vortex_dtype::DType;
7use vortex_error::{VortexResult, vortex_panic};
8
9use crate::arrays::{
10 BoolArray, DecimalArray, ExtensionArray, FixedSizeListArray, ListViewArray,
11 ListViewRebuildMode, NullArray, PrimitiveArray, StructArray, VarBinViewArray,
12};
13use crate::builders::builder_with_capacity;
14use crate::{Array, ArrayRef, IntoArray};
15
16#[derive(Debug, Clone)]
77pub enum Canonical {
78 Null(NullArray),
79 Bool(BoolArray),
80 Primitive(PrimitiveArray),
81 Decimal(DecimalArray),
82 VarBinView(VarBinViewArray),
83 List(ListViewArray),
84 FixedSizeList(FixedSizeListArray),
85 Struct(StructArray),
86 Extension(ExtensionArray),
87}
88
89impl Canonical {
90 pub fn empty(dtype: &DType) -> Canonical {
93 builder_with_capacity(dtype, 0).finish_into_canonical()
94 }
95}
96
97impl Canonical {
98 pub fn compact(&self) -> VortexResult<Canonical> {
106 match self {
107 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
108 Canonical::List(array) => Ok(Canonical::List(
109 array.rebuild(ListViewRebuildMode::MakeZeroCopyToList),
110 )),
111 _ => Ok(self.clone()),
112 }
113 }
114}
115
116impl Canonical {
118 pub fn as_null(&self) -> &NullArray {
119 if let Canonical::Null(a) = self {
120 a
121 } else {
122 vortex_panic!("Cannot get NullArray from {:?}", &self)
123 }
124 }
125
126 pub fn into_null(self) -> NullArray {
127 if let Canonical::Null(a) = self {
128 a
129 } else {
130 vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
131 }
132 }
133
134 pub fn as_bool(&self) -> &BoolArray {
135 if let Canonical::Bool(a) = self {
136 a
137 } else {
138 vortex_panic!("Cannot get BoolArray from {:?}", &self)
139 }
140 }
141
142 pub fn into_bool(self) -> BoolArray {
143 if let Canonical::Bool(a) = self {
144 a
145 } else {
146 vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
147 }
148 }
149
150 pub fn as_primitive(&self) -> &PrimitiveArray {
151 if let Canonical::Primitive(a) = self {
152 a
153 } else {
154 vortex_panic!("Cannot get PrimitiveArray from {:?}", &self)
155 }
156 }
157
158 pub fn into_primitive(self) -> PrimitiveArray {
159 if let Canonical::Primitive(a) = self {
160 a
161 } else {
162 vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
163 }
164 }
165
166 pub fn as_decimal(&self) -> &DecimalArray {
167 if let Canonical::Decimal(a) = self {
168 a
169 } else {
170 vortex_panic!("Cannot get DecimalArray from {:?}", &self)
171 }
172 }
173
174 pub fn into_decimal(self) -> DecimalArray {
175 if let Canonical::Decimal(a) = self {
176 a
177 } else {
178 vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
179 }
180 }
181
182 pub fn as_varbinview(&self) -> &VarBinViewArray {
183 if let Canonical::VarBinView(a) = self {
184 a
185 } else {
186 vortex_panic!("Cannot get VarBinViewArray from {:?}", &self)
187 }
188 }
189
190 pub fn into_varbinview(self) -> VarBinViewArray {
191 if let Canonical::VarBinView(a) = self {
192 a
193 } else {
194 vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
195 }
196 }
197
198 pub fn as_listview(&self) -> &ListViewArray {
199 if let Canonical::List(a) = self {
200 a
201 } else {
202 vortex_panic!("Cannot get ListArray from {:?}", &self)
203 }
204 }
205
206 pub fn into_listview(self) -> ListViewArray {
207 if let Canonical::List(a) = self {
208 a
209 } else {
210 vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
211 }
212 }
213
214 pub fn as_fixed_size_list(&self) -> &FixedSizeListArray {
215 if let Canonical::FixedSizeList(a) = self {
216 a
217 } else {
218 vortex_panic!("Cannot get FixedSizeListArray from {:?}", &self)
219 }
220 }
221
222 pub fn into_fixed_size_list(self) -> FixedSizeListArray {
223 if let Canonical::FixedSizeList(a) = self {
224 a
225 } else {
226 vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
227 }
228 }
229
230 pub fn as_struct(&self) -> &StructArray {
231 if let Canonical::Struct(a) = self {
232 a
233 } else {
234 vortex_panic!("Cannot get StructArray from {:?}", &self)
235 }
236 }
237
238 pub fn into_struct(self) -> StructArray {
239 if let Canonical::Struct(a) = self {
240 a
241 } else {
242 vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
243 }
244 }
245
246 pub fn as_extension(&self) -> &ExtensionArray {
247 if let Canonical::Extension(a) = self {
248 a
249 } else {
250 vortex_panic!("Cannot get ExtensionArray from {:?}", &self)
251 }
252 }
253
254 pub fn into_extension(self) -> ExtensionArray {
255 if let Canonical::Extension(a) = self {
256 a
257 } else {
258 vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
259 }
260 }
261}
262
263impl AsRef<dyn Array> for Canonical {
264 fn as_ref(&self) -> &(dyn Array + 'static) {
265 match &self {
266 Canonical::Null(a) => a.as_ref(),
267 Canonical::Bool(a) => a.as_ref(),
268 Canonical::Primitive(a) => a.as_ref(),
269 Canonical::Decimal(a) => a.as_ref(),
270 Canonical::Struct(a) => a.as_ref(),
271 Canonical::List(a) => a.as_ref(),
272 Canonical::FixedSizeList(a) => a.as_ref(),
273 Canonical::VarBinView(a) => a.as_ref(),
274 Canonical::Extension(a) => a.as_ref(),
275 }
276 }
277}
278
279impl IntoArray for Canonical {
280 fn into_array(self) -> ArrayRef {
281 match self {
282 Canonical::Null(a) => a.into_array(),
283 Canonical::Bool(a) => a.into_array(),
284 Canonical::Primitive(a) => a.into_array(),
285 Canonical::Decimal(a) => a.into_array(),
286 Canonical::Struct(a) => a.into_array(),
287 Canonical::List(a) => a.into_array(),
288 Canonical::FixedSizeList(a) => a.into_array(),
289 Canonical::VarBinView(a) => a.into_array(),
290 Canonical::Extension(a) => a.into_array(),
291 }
292 }
293}
294
295pub trait ToCanonical {
301 fn to_null(&self) -> NullArray;
303
304 fn to_bool(&self) -> BoolArray;
306
307 fn to_primitive(&self) -> PrimitiveArray;
310
311 fn to_decimal(&self) -> DecimalArray;
314
315 fn to_struct(&self) -> StructArray;
317
318 fn to_listview(&self) -> ListViewArray;
320
321 fn to_fixed_size_list(&self) -> FixedSizeListArray;
324
325 fn to_varbinview(&self) -> VarBinViewArray;
328
329 fn to_extension(&self) -> ExtensionArray;
332}
333
334impl<A: Array + ?Sized> ToCanonical for A {
336 fn to_null(&self) -> NullArray {
337 self.to_canonical().into_null()
338 }
339
340 fn to_bool(&self) -> BoolArray {
341 self.to_canonical().into_bool()
342 }
343
344 fn to_primitive(&self) -> PrimitiveArray {
345 self.to_canonical().into_primitive()
346 }
347
348 fn to_decimal(&self) -> DecimalArray {
349 self.to_canonical().into_decimal()
350 }
351
352 fn to_struct(&self) -> StructArray {
353 self.to_canonical().into_struct()
354 }
355
356 fn to_listview(&self) -> ListViewArray {
357 self.to_canonical().into_listview()
358 }
359
360 fn to_fixed_size_list(&self) -> FixedSizeListArray {
361 self.to_canonical().into_fixed_size_list()
362 }
363
364 fn to_varbinview(&self) -> VarBinViewArray {
365 self.to_canonical().into_varbinview()
366 }
367
368 fn to_extension(&self) -> ExtensionArray {
369 self.to_canonical().into_extension()
370 }
371}
372
373impl From<Canonical> for ArrayRef {
374 fn from(value: Canonical) -> Self {
375 match value {
376 Canonical::Null(a) => a.into_array(),
377 Canonical::Bool(a) => a.into_array(),
378 Canonical::Primitive(a) => a.into_array(),
379 Canonical::Decimal(a) => a.into_array(),
380 Canonical::Struct(a) => a.into_array(),
381 Canonical::List(a) => a.into_array(),
382 Canonical::FixedSizeList(a) => a.into_array(),
383 Canonical::VarBinView(a) => a.into_array(),
384 Canonical::Extension(a) => a.into_array(),
385 }
386 }
387}
388
389#[cfg(test)]
390mod test {
391 use std::sync::Arc;
392
393 use arrow_array::cast::AsArray;
394 use arrow_array::types::{Int32Type, Int64Type, UInt64Type};
395 use arrow_array::{
396 Array as ArrowArray, ArrayRef as ArrowArrayRef, ListArray as ArrowListArray,
397 PrimitiveArray as ArrowPrimitiveArray, StringArray, StringViewArray,
398 StructArray as ArrowStructArray,
399 };
400 use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
401 use arrow_schema::{DataType, Field};
402 use vortex_buffer::buffer;
403
404 use crate::arrays::{ConstantArray, StructArray};
405 use crate::arrow::{FromArrowArray, IntoArrowArray};
406 use crate::{ArrayRef, IntoArray};
407
408 #[test]
409 fn test_canonicalize_nested_struct() {
410 let nested_struct_array = StructArray::from_fields(&[
412 ("a", buffer![1u64].into_array()),
413 (
414 "b",
415 StructArray::from_fields(&[(
416 "inner_a",
417 ConstantArray::new(100i64, 1).into_array(),
422 )])
423 .unwrap()
424 .into_array(),
425 ),
426 ])
427 .unwrap();
428
429 let arrow_struct = nested_struct_array
430 .into_array()
431 .into_arrow_preferred()
432 .unwrap()
433 .as_any()
434 .downcast_ref::<ArrowStructArray>()
435 .cloned()
436 .unwrap();
437
438 assert!(
439 arrow_struct
440 .column(0)
441 .as_any()
442 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
443 .is_some()
444 );
445
446 let inner_struct = arrow_struct
447 .column(1)
448 .clone()
449 .as_any()
450 .downcast_ref::<ArrowStructArray>()
451 .cloned()
452 .unwrap();
453
454 let inner_a = inner_struct
455 .column(0)
456 .as_any()
457 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
458 assert!(inner_a.is_some());
459
460 assert_eq!(
461 inner_a.cloned().unwrap(),
462 ArrowPrimitiveArray::from_iter([100i64])
463 );
464 }
465
466 #[test]
467 fn roundtrip_struct() {
468 let mut nulls = NullBufferBuilder::new(6);
469 nulls.append_n_non_nulls(4);
470 nulls.append_null();
471 nulls.append_non_null();
472 let names = Arc::new(StringViewArray::from_iter(vec![
473 Some("Joseph"),
474 None,
475 Some("Angela"),
476 Some("Mikhail"),
477 None,
478 None,
479 ]));
480 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
481 Some(25),
482 Some(31),
483 None,
484 Some(57),
485 None,
486 None,
487 ]));
488
489 let arrow_struct = ArrowStructArray::new(
490 vec![
491 Arc::new(Field::new("name", DataType::Utf8View, true)),
492 Arc::new(Field::new("age", DataType::Int32, true)),
493 ]
494 .into(),
495 vec![names, ages],
496 nulls.finish(),
497 );
498
499 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true);
500
501 assert_eq!(
502 &arrow_struct,
503 vortex_struct.into_arrow_preferred().unwrap().as_struct()
504 );
505 }
506
507 #[test]
508 fn roundtrip_list() {
509 let names = Arc::new(StringArray::from_iter(vec![
510 Some("Joseph"),
511 Some("Angela"),
512 Some("Mikhail"),
513 ]));
514
515 let arrow_list = ArrowListArray::new(
516 Arc::new(Field::new_list_field(DataType::Utf8, true)),
517 OffsetBuffer::from_lengths(vec![0, 2, 1]),
518 names,
519 None,
520 );
521 let list_data_type = arrow_list.data_type();
522
523 let vortex_list = ArrayRef::from_arrow(&arrow_list, true);
524
525 let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
526
527 assert_eq!(
528 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
529 rt_arrow_list.as_ref()
530 );
531 }
532}