1use vortex_dtype::DType;
7use vortex_error::{VortexResult, vortex_panic};
8
9use crate::arrays::{
10 BoolArray, DecimalArray, ExtensionArray, FixedSizeListArray, ListArray, NullArray,
11 PrimitiveArray, StructArray, VarBinViewArray,
12};
13use crate::builders::builder_with_capacity;
14use crate::{Array, ArrayRef, IntoArray};
15
16#[derive(Debug, Clone)]
75pub enum Canonical {
76 Null(NullArray),
77 Bool(BoolArray),
78 Primitive(PrimitiveArray),
79 Decimal(DecimalArray),
80 VarBinView(VarBinViewArray),
81 List(ListArray),
83 FixedSizeList(FixedSizeListArray),
84 Struct(StructArray),
85 Extension(ExtensionArray),
86}
87
88impl Canonical {
89 pub fn empty(dtype: &DType) -> Canonical {
91 builder_with_capacity(dtype, 0).finish_into_canonical()
92 }
93}
94
95impl Canonical {
96 pub fn compact(&self) -> VortexResult<Canonical> {
104 match self {
105 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
106 Canonical::List(array) => Ok(Canonical::List(array.reset_offsets()?)),
107 _ => Ok(self.clone()),
108 }
109 }
110}
111
112impl Canonical {
114 pub fn into_null(self) -> NullArray {
115 if let Canonical::Null(a) = self {
116 a
117 } else {
118 vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
119 }
120 }
121
122 pub fn into_bool(self) -> BoolArray {
123 if let Canonical::Bool(a) = self {
124 a
125 } else {
126 vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
127 }
128 }
129
130 pub fn into_primitive(self) -> PrimitiveArray {
131 if let Canonical::Primitive(a) = self {
132 a
133 } else {
134 vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
135 }
136 }
137
138 pub fn into_decimal(self) -> DecimalArray {
139 if let Canonical::Decimal(a) = self {
140 a
141 } else {
142 vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
143 }
144 }
145
146 pub fn into_varbinview(self) -> VarBinViewArray {
147 if let Canonical::VarBinView(a) = self {
148 a
149 } else {
150 vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
151 }
152 }
153
154 pub fn into_list(self) -> ListArray {
155 if let Canonical::List(a) = self {
156 a
157 } else {
158 vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
159 }
160 }
161
162 pub fn into_fixed_size_list(self) -> FixedSizeListArray {
163 if let Canonical::FixedSizeList(a) = self {
164 a
165 } else {
166 vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
167 }
168 }
169
170 pub fn into_struct(self) -> StructArray {
171 if let Canonical::Struct(a) = self {
172 a
173 } else {
174 vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
175 }
176 }
177
178 pub fn into_extension(self) -> ExtensionArray {
179 if let Canonical::Extension(a) = self {
180 a
181 } else {
182 vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
183 }
184 }
185}
186
187impl AsRef<dyn Array> for Canonical {
188 fn as_ref(&self) -> &(dyn Array + 'static) {
189 match &self {
190 Canonical::Null(a) => a.as_ref(),
191 Canonical::Bool(a) => a.as_ref(),
192 Canonical::Primitive(a) => a.as_ref(),
193 Canonical::Decimal(a) => a.as_ref(),
194 Canonical::Struct(a) => a.as_ref(),
195 Canonical::List(a) => a.as_ref(),
196 Canonical::FixedSizeList(a) => a.as_ref(),
197 Canonical::VarBinView(a) => a.as_ref(),
198 Canonical::Extension(a) => a.as_ref(),
199 }
200 }
201}
202
203impl IntoArray for Canonical {
204 fn into_array(self) -> ArrayRef {
205 match self {
206 Canonical::Null(a) => a.into_array(),
207 Canonical::Bool(a) => a.into_array(),
208 Canonical::Primitive(a) => a.into_array(),
209 Canonical::Decimal(a) => a.into_array(),
210 Canonical::Struct(a) => a.into_array(),
211 Canonical::List(a) => a.into_array(),
212 Canonical::FixedSizeList(a) => a.into_array(),
213 Canonical::VarBinView(a) => a.into_array(),
214 Canonical::Extension(a) => a.into_array(),
215 }
216 }
217}
218
219pub trait ToCanonical {
225 fn to_null(&self) -> NullArray;
227
228 fn to_bool(&self) -> BoolArray;
230
231 fn to_primitive(&self) -> PrimitiveArray;
234
235 fn to_decimal(&self) -> DecimalArray;
238
239 fn to_struct(&self) -> StructArray;
241
242 fn to_list(&self) -> ListArray;
244
245 fn to_fixed_size_list(&self) -> FixedSizeListArray;
247
248 fn to_varbinview(&self) -> VarBinViewArray;
251
252 fn to_extension(&self) -> ExtensionArray;
255}
256
257impl<A: Array + ?Sized> ToCanonical for A {
259 fn to_null(&self) -> NullArray {
260 self.to_canonical().into_null()
261 }
262
263 fn to_bool(&self) -> BoolArray {
264 self.to_canonical().into_bool()
265 }
266
267 fn to_primitive(&self) -> PrimitiveArray {
268 self.to_canonical().into_primitive()
269 }
270
271 fn to_decimal(&self) -> DecimalArray {
272 self.to_canonical().into_decimal()
273 }
274
275 fn to_struct(&self) -> StructArray {
276 self.to_canonical().into_struct()
277 }
278
279 fn to_list(&self) -> ListArray {
280 self.to_canonical().into_list()
281 }
282
283 fn to_fixed_size_list(&self) -> FixedSizeListArray {
284 self.to_canonical().into_fixed_size_list()
285 }
286
287 fn to_varbinview(&self) -> VarBinViewArray {
288 self.to_canonical().into_varbinview()
289 }
290
291 fn to_extension(&self) -> ExtensionArray {
292 self.to_canonical().into_extension()
293 }
294}
295
296impl From<Canonical> for ArrayRef {
297 fn from(value: Canonical) -> Self {
298 match value {
299 Canonical::Null(a) => a.into_array(),
300 Canonical::Bool(a) => a.into_array(),
301 Canonical::Primitive(a) => a.into_array(),
302 Canonical::Decimal(a) => a.into_array(),
303 Canonical::Struct(a) => a.into_array(),
304 Canonical::List(a) => a.into_array(),
305 Canonical::FixedSizeList(a) => a.into_array(),
306 Canonical::VarBinView(a) => a.into_array(),
307 Canonical::Extension(a) => a.into_array(),
308 }
309 }
310}
311
312#[cfg(test)]
313mod test {
314 use std::sync::Arc;
315
316 use arrow_array::cast::AsArray;
317 use arrow_array::types::{Int32Type, Int64Type, UInt64Type};
318 use arrow_array::{
319 Array as ArrowArray, ArrayRef as ArrowArrayRef, ListArray as ArrowListArray,
320 PrimitiveArray as ArrowPrimitiveArray, StringArray, StringViewArray,
321 StructArray as ArrowStructArray,
322 };
323 use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
324 use arrow_schema::{DataType, Field};
325 use vortex_buffer::buffer;
326
327 use crate::arrays::{ConstantArray, StructArray};
328 use crate::arrow::{FromArrowArray, IntoArrowArray};
329 use crate::{ArrayRef, IntoArray};
330
331 #[test]
332 fn test_canonicalize_nested_struct() {
333 let nested_struct_array = StructArray::from_fields(&[
335 ("a", buffer![1u64].into_array()),
336 (
337 "b",
338 StructArray::from_fields(&[(
339 "inner_a",
340 ConstantArray::new(100i64, 1).into_array(),
345 )])
346 .unwrap()
347 .into_array(),
348 ),
349 ])
350 .unwrap();
351
352 let arrow_struct = nested_struct_array
353 .into_array()
354 .into_arrow_preferred()
355 .unwrap()
356 .as_any()
357 .downcast_ref::<ArrowStructArray>()
358 .cloned()
359 .unwrap();
360
361 assert!(
362 arrow_struct
363 .column(0)
364 .as_any()
365 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
366 .is_some()
367 );
368
369 let inner_struct = arrow_struct
370 .column(1)
371 .clone()
372 .as_any()
373 .downcast_ref::<ArrowStructArray>()
374 .cloned()
375 .unwrap();
376
377 let inner_a = inner_struct
378 .column(0)
379 .as_any()
380 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
381 assert!(inner_a.is_some());
382
383 assert_eq!(
384 inner_a.cloned().unwrap(),
385 ArrowPrimitiveArray::from_iter([100i64])
386 );
387 }
388
389 #[test]
390 fn roundtrip_struct() {
391 let mut nulls = NullBufferBuilder::new(6);
392 nulls.append_n_non_nulls(4);
393 nulls.append_null();
394 nulls.append_non_null();
395 let names = Arc::new(StringViewArray::from_iter(vec![
396 Some("Joseph"),
397 None,
398 Some("Angela"),
399 Some("Mikhail"),
400 None,
401 None,
402 ]));
403 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
404 Some(25),
405 Some(31),
406 None,
407 Some(57),
408 None,
409 None,
410 ]));
411
412 let arrow_struct = ArrowStructArray::new(
413 vec![
414 Arc::new(Field::new("name", DataType::Utf8View, true)),
415 Arc::new(Field::new("age", DataType::Int32, true)),
416 ]
417 .into(),
418 vec![names, ages],
419 nulls.finish(),
420 );
421
422 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true);
423
424 assert_eq!(
425 &arrow_struct,
426 vortex_struct.into_arrow_preferred().unwrap().as_struct()
427 );
428 }
429
430 #[test]
431 fn roundtrip_list() {
432 let names = Arc::new(StringArray::from_iter(vec![
433 Some("Joseph"),
434 Some("Angela"),
435 Some("Mikhail"),
436 ]));
437
438 let arrow_list = ArrowListArray::new(
439 Arc::new(Field::new_list_field(DataType::Utf8, true)),
440 OffsetBuffer::from_lengths(vec![0, 2, 1]),
441 names,
442 None,
443 );
444 let list_data_type = arrow_list.data_type();
445
446 let vortex_list = ArrayRef::from_arrow(&arrow_list, true);
447
448 let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
449
450 assert_eq!(
451 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
452 rt_arrow_list.as_ref()
453 );
454 }
455}