1use vortex_dtype::DType;
7use vortex_error::{VortexResult, vortex_panic};
8
9use crate::arrays::{
10 BoolArray, DecimalArray, ExtensionArray, FixedSizeListArray, ListArray, NullArray,
11 PrimitiveArray, StructArray, VarBinViewArray,
12};
13use crate::builders::builder_with_capacity;
14use crate::{Array, ArrayRef, IntoArray};
15
16#[derive(Debug, Clone)]
68pub enum Canonical {
69 Null(NullArray),
70 Bool(BoolArray),
71 Primitive(PrimitiveArray),
72 Decimal(DecimalArray),
73 VarBinView(VarBinViewArray),
74 List(ListArray),
76 FixedSizeList(FixedSizeListArray),
77 Struct(StructArray),
78 Extension(ExtensionArray),
79}
80
81impl Canonical {
82 pub fn empty(dtype: &DType) -> Canonical {
84 builder_with_capacity(dtype, 0).finish_into_canonical()
85 }
86}
87
88impl Canonical {
89 pub fn compact(&self) -> VortexResult<Canonical> {
97 match self {
98 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
99 Canonical::List(array) => Ok(Canonical::List(array.reset_offsets()?)),
100 _ => Ok(self.clone()),
101 }
102 }
103}
104
105impl Canonical {
107 pub fn into_null(self) -> NullArray {
108 if let Canonical::Null(a) = self {
109 a
110 } else {
111 vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
112 }
113 }
114
115 pub fn into_bool(self) -> BoolArray {
116 if let Canonical::Bool(a) = self {
117 a
118 } else {
119 vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
120 }
121 }
122
123 pub fn into_primitive(self) -> PrimitiveArray {
124 if let Canonical::Primitive(a) = self {
125 a
126 } else {
127 vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
128 }
129 }
130
131 pub fn into_decimal(self) -> DecimalArray {
132 if let Canonical::Decimal(a) = self {
133 a
134 } else {
135 vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
136 }
137 }
138
139 pub fn into_varbinview(self) -> VarBinViewArray {
140 if let Canonical::VarBinView(a) = self {
141 a
142 } else {
143 vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
144 }
145 }
146
147 pub fn into_list(self) -> ListArray {
148 if let Canonical::List(a) = self {
149 a
150 } else {
151 vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
152 }
153 }
154
155 pub fn into_fixed_size_list(self) -> FixedSizeListArray {
156 if let Canonical::FixedSizeList(a) = self {
157 a
158 } else {
159 vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
160 }
161 }
162
163 pub fn into_struct(self) -> StructArray {
164 if let Canonical::Struct(a) = self {
165 a
166 } else {
167 vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
168 }
169 }
170
171 pub fn into_extension(self) -> ExtensionArray {
172 if let Canonical::Extension(a) = self {
173 a
174 } else {
175 vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
176 }
177 }
178}
179
180impl AsRef<dyn Array> for Canonical {
181 fn as_ref(&self) -> &(dyn Array + 'static) {
182 match &self {
183 Canonical::Null(a) => a.as_ref(),
184 Canonical::Bool(a) => a.as_ref(),
185 Canonical::Primitive(a) => a.as_ref(),
186 Canonical::Decimal(a) => a.as_ref(),
187 Canonical::Struct(a) => a.as_ref(),
188 Canonical::List(a) => a.as_ref(),
189 Canonical::FixedSizeList(a) => a.as_ref(),
190 Canonical::VarBinView(a) => a.as_ref(),
191 Canonical::Extension(a) => a.as_ref(),
192 }
193 }
194}
195
196impl IntoArray for Canonical {
197 fn into_array(self) -> ArrayRef {
198 match self {
199 Canonical::Null(a) => a.into_array(),
200 Canonical::Bool(a) => a.into_array(),
201 Canonical::Primitive(a) => a.into_array(),
202 Canonical::Decimal(a) => a.into_array(),
203 Canonical::Struct(a) => a.into_array(),
204 Canonical::List(a) => a.into_array(),
205 Canonical::FixedSizeList(a) => a.into_array(),
206 Canonical::VarBinView(a) => a.into_array(),
207 Canonical::Extension(a) => a.into_array(),
208 }
209 }
210}
211
212pub trait ToCanonical {
218 fn to_null(&self) -> NullArray;
220
221 fn to_bool(&self) -> BoolArray;
223
224 fn to_primitive(&self) -> PrimitiveArray;
227
228 fn to_decimal(&self) -> DecimalArray;
231
232 fn to_struct(&self) -> StructArray;
234
235 fn to_list(&self) -> ListArray;
237
238 fn to_fixed_size_list(&self) -> FixedSizeListArray;
240
241 fn to_varbinview(&self) -> VarBinViewArray;
244
245 fn to_extension(&self) -> ExtensionArray;
248}
249
250impl<A: Array + ?Sized> ToCanonical for A {
252 fn to_null(&self) -> NullArray {
253 self.to_canonical().into_null()
254 }
255
256 fn to_bool(&self) -> BoolArray {
257 self.to_canonical().into_bool()
258 }
259
260 fn to_primitive(&self) -> PrimitiveArray {
261 self.to_canonical().into_primitive()
262 }
263
264 fn to_decimal(&self) -> DecimalArray {
265 self.to_canonical().into_decimal()
266 }
267
268 fn to_struct(&self) -> StructArray {
269 self.to_canonical().into_struct()
270 }
271
272 fn to_list(&self) -> ListArray {
273 self.to_canonical().into_list()
274 }
275
276 fn to_fixed_size_list(&self) -> FixedSizeListArray {
277 self.to_canonical().into_fixed_size_list()
278 }
279
280 fn to_varbinview(&self) -> VarBinViewArray {
281 self.to_canonical().into_varbinview()
282 }
283
284 fn to_extension(&self) -> ExtensionArray {
285 self.to_canonical().into_extension()
286 }
287}
288
289impl From<Canonical> for ArrayRef {
290 fn from(value: Canonical) -> Self {
291 match value {
292 Canonical::Null(a) => a.into_array(),
293 Canonical::Bool(a) => a.into_array(),
294 Canonical::Primitive(a) => a.into_array(),
295 Canonical::Decimal(a) => a.into_array(),
296 Canonical::Struct(a) => a.into_array(),
297 Canonical::List(a) => a.into_array(),
298 Canonical::FixedSizeList(a) => a.into_array(),
299 Canonical::VarBinView(a) => a.into_array(),
300 Canonical::Extension(a) => a.into_array(),
301 }
302 }
303}
304
305#[cfg(test)]
306mod test {
307 use std::sync::Arc;
308
309 use arrow_array::cast::AsArray;
310 use arrow_array::types::{Int32Type, Int64Type, UInt64Type};
311 use arrow_array::{
312 Array as ArrowArray, ArrayRef as ArrowArrayRef, ListArray as ArrowListArray,
313 PrimitiveArray as ArrowPrimitiveArray, StringArray, StringViewArray,
314 StructArray as ArrowStructArray,
315 };
316 use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
317 use arrow_schema::{DataType, Field};
318 use vortex_buffer::buffer;
319
320 use crate::arrays::{ConstantArray, StructArray};
321 use crate::arrow::{FromArrowArray, IntoArrowArray};
322 use crate::{ArrayRef, IntoArray};
323
324 #[test]
325 fn test_canonicalize_nested_struct() {
326 let nested_struct_array = StructArray::from_fields(&[
328 ("a", buffer![1u64].into_array()),
329 (
330 "b",
331 StructArray::from_fields(&[(
332 "inner_a",
333 ConstantArray::new(100i64, 1).into_array(),
338 )])
339 .unwrap()
340 .into_array(),
341 ),
342 ])
343 .unwrap();
344
345 let arrow_struct = nested_struct_array
346 .into_array()
347 .into_arrow_preferred()
348 .unwrap()
349 .as_any()
350 .downcast_ref::<ArrowStructArray>()
351 .cloned()
352 .unwrap();
353
354 assert!(
355 arrow_struct
356 .column(0)
357 .as_any()
358 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
359 .is_some()
360 );
361
362 let inner_struct = arrow_struct
363 .column(1)
364 .clone()
365 .as_any()
366 .downcast_ref::<ArrowStructArray>()
367 .cloned()
368 .unwrap();
369
370 let inner_a = inner_struct
371 .column(0)
372 .as_any()
373 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
374 assert!(inner_a.is_some());
375
376 assert_eq!(
377 inner_a.cloned().unwrap(),
378 ArrowPrimitiveArray::from_iter([100i64]),
379 );
380 }
381
382 #[test]
383 fn roundtrip_struct() {
384 let mut nulls = NullBufferBuilder::new(6);
385 nulls.append_n_non_nulls(4);
386 nulls.append_null();
387 nulls.append_non_null();
388 let names = Arc::new(StringViewArray::from_iter(vec![
389 Some("Joseph"),
390 None,
391 Some("Angela"),
392 Some("Mikhail"),
393 None,
394 None,
395 ]));
396 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
397 Some(25),
398 Some(31),
399 None,
400 Some(57),
401 None,
402 None,
403 ]));
404
405 let arrow_struct = ArrowStructArray::new(
406 vec![
407 Arc::new(Field::new("name", DataType::Utf8View, true)),
408 Arc::new(Field::new("age", DataType::Int32, true)),
409 ]
410 .into(),
411 vec![names, ages],
412 nulls.finish(),
413 );
414
415 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true);
416
417 assert_eq!(
418 &arrow_struct,
419 vortex_struct.into_arrow_preferred().unwrap().as_struct()
420 );
421 }
422
423 #[test]
424 fn roundtrip_list() {
425 let names = Arc::new(StringArray::from_iter(vec![
426 Some("Joseph"),
427 Some("Angela"),
428 Some("Mikhail"),
429 ]));
430
431 let arrow_list = ArrowListArray::new(
432 Arc::new(Field::new_list_field(DataType::Utf8, true)),
433 OffsetBuffer::from_lengths(vec![0, 2, 1]),
434 names,
435 None,
436 );
437 let list_data_type = arrow_list.data_type();
438
439 let vortex_list = ArrayRef::from_arrow(&arrow_list, true);
440
441 let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
442
443 assert_eq!(
444 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
445 rt_arrow_list.as_ref()
446 );
447 }
448}