1use vortex_dtype::DType;
7use vortex_error::{VortexExpect, VortexResult, vortex_bail};
8
9use crate::arrays::{
10 BoolArray, DecimalArray, ExtensionArray, FixedSizeListArray, ListArray, NullArray,
11 PrimitiveArray, StructArray, VarBinViewArray,
12};
13use crate::builders::builder_with_capacity;
14use crate::{Array, ArrayRef, IntoArray};
15
16#[derive(Debug, Clone)]
68pub enum Canonical {
69 Null(NullArray),
70 Bool(BoolArray),
71 Primitive(PrimitiveArray),
72 Decimal(DecimalArray),
73 VarBinView(VarBinViewArray),
74 List(ListArray),
76 FixedSizeList(FixedSizeListArray),
77 Struct(StructArray),
78 Extension(ExtensionArray),
79}
80
81impl Canonical {
82 pub fn empty(dtype: &DType) -> Canonical {
84 builder_with_capacity(dtype, 0)
85 .finish()
86 .to_canonical()
87 .vortex_expect("cannot fail to convert an empty array to canonical")
88 }
89}
90
91impl Canonical {
92 pub fn compact(&self) -> VortexResult<Canonical> {
100 match self {
101 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
102 Canonical::List(array) => Ok(Canonical::List(array.reset_offsets()?)),
103 _ => Ok(self.clone()),
104 }
105 }
106}
107
108impl Canonical {
110 pub fn into_null(self) -> VortexResult<NullArray> {
111 if let Canonical::Null(a) = self {
112 Ok(a)
113 } else {
114 vortex_bail!("Cannot unwrap NullArray from {:?}", &self)
115 }
116 }
117
118 pub fn into_bool(self) -> VortexResult<BoolArray> {
119 if let Canonical::Bool(a) = self {
120 Ok(a)
121 } else {
122 vortex_bail!("Cannot unwrap BoolArray from {:?}", &self)
123 }
124 }
125
126 pub fn into_primitive(self) -> VortexResult<PrimitiveArray> {
127 if let Canonical::Primitive(a) = self {
128 Ok(a)
129 } else {
130 vortex_bail!("Cannot unwrap PrimitiveArray from {:?}", &self)
131 }
132 }
133
134 pub fn into_decimal(self) -> VortexResult<DecimalArray> {
135 if let Canonical::Decimal(a) = self {
136 Ok(a)
137 } else {
138 vortex_bail!("Cannot unwrap DecimalArray from {:?}", &self)
139 }
140 }
141
142 pub fn into_varbinview(self) -> VortexResult<VarBinViewArray> {
143 if let Canonical::VarBinView(a) = self {
144 Ok(a)
145 } else {
146 vortex_bail!("Cannot unwrap VarBinViewArray from {:?}", &self)
147 }
148 }
149
150 pub fn into_list(self) -> VortexResult<ListArray> {
151 if let Canonical::List(a) = self {
152 Ok(a)
153 } else {
154 vortex_bail!("Cannot unwrap ListArray from {:?}", &self)
155 }
156 }
157
158 pub fn into_fixed_size_list(self) -> VortexResult<FixedSizeListArray> {
159 if let Canonical::FixedSizeList(a) = self {
160 Ok(a)
161 } else {
162 vortex_bail!("Cannot unwrap FixedSizeListArray from {:?}", &self)
163 }
164 }
165
166 pub fn into_struct(self) -> VortexResult<StructArray> {
167 if let Canonical::Struct(a) = self {
168 Ok(a)
169 } else {
170 vortex_bail!("Cannot unwrap StructArray from {:?}", &self)
171 }
172 }
173
174 pub fn into_extension(self) -> VortexResult<ExtensionArray> {
175 if let Canonical::Extension(a) = self {
176 Ok(a)
177 } else {
178 vortex_bail!("Cannot unwrap ExtensionArray from {:?}", &self)
179 }
180 }
181}
182
183impl AsRef<dyn Array> for Canonical {
184 fn as_ref(&self) -> &(dyn Array + 'static) {
185 match &self {
186 Canonical::Null(a) => a.as_ref(),
187 Canonical::Bool(a) => a.as_ref(),
188 Canonical::Primitive(a) => a.as_ref(),
189 Canonical::Decimal(a) => a.as_ref(),
190 Canonical::Struct(a) => a.as_ref(),
191 Canonical::List(a) => a.as_ref(),
192 Canonical::FixedSizeList(a) => a.as_ref(),
193 Canonical::VarBinView(a) => a.as_ref(),
194 Canonical::Extension(a) => a.as_ref(),
195 }
196 }
197}
198
199impl IntoArray for Canonical {
200 fn into_array(self) -> ArrayRef {
201 match self {
202 Canonical::Null(a) => a.into_array(),
203 Canonical::Bool(a) => a.into_array(),
204 Canonical::Primitive(a) => a.into_array(),
205 Canonical::Decimal(a) => a.into_array(),
206 Canonical::Struct(a) => a.into_array(),
207 Canonical::List(a) => a.into_array(),
208 Canonical::FixedSizeList(a) => a.into_array(),
209 Canonical::VarBinView(a) => a.into_array(),
210 Canonical::Extension(a) => a.into_array(),
211 }
212 }
213}
214
215pub trait ToCanonical {
221 fn to_null(&self) -> VortexResult<NullArray>;
223
224 fn to_bool(&self) -> VortexResult<BoolArray>;
226
227 fn to_primitive(&self) -> VortexResult<PrimitiveArray>;
230
231 fn to_decimal(&self) -> VortexResult<DecimalArray>;
234
235 fn to_struct(&self) -> VortexResult<StructArray>;
237
238 fn to_list(&self) -> VortexResult<ListArray>;
240
241 fn to_fixed_size_list(&self) -> VortexResult<FixedSizeListArray>;
243
244 fn to_varbinview(&self) -> VortexResult<VarBinViewArray>;
247
248 fn to_extension(&self) -> VortexResult<ExtensionArray>;
251}
252
253impl<A: Array + ?Sized> ToCanonical for A {
255 fn to_null(&self) -> VortexResult<NullArray> {
256 self.to_canonical()?.into_null()
257 }
258
259 fn to_bool(&self) -> VortexResult<BoolArray> {
260 self.to_canonical()?.into_bool()
261 }
262
263 fn to_primitive(&self) -> VortexResult<PrimitiveArray> {
264 self.to_canonical()?.into_primitive()
265 }
266
267 fn to_decimal(&self) -> VortexResult<DecimalArray> {
268 self.to_canonical()?.into_decimal()
269 }
270
271 fn to_struct(&self) -> VortexResult<StructArray> {
272 self.to_canonical()?.into_struct()
273 }
274
275 fn to_list(&self) -> VortexResult<ListArray> {
276 self.to_canonical()?.into_list()
277 }
278
279 fn to_fixed_size_list(&self) -> VortexResult<FixedSizeListArray> {
280 self.to_canonical()?.into_fixed_size_list()
281 }
282
283 fn to_varbinview(&self) -> VortexResult<VarBinViewArray> {
284 self.to_canonical()?.into_varbinview()
285 }
286
287 fn to_extension(&self) -> VortexResult<ExtensionArray> {
288 self.to_canonical()?.into_extension()
289 }
290}
291
292impl From<Canonical> for ArrayRef {
293 fn from(value: Canonical) -> Self {
294 match value {
295 Canonical::Null(a) => a.into_array(),
296 Canonical::Bool(a) => a.into_array(),
297 Canonical::Primitive(a) => a.into_array(),
298 Canonical::Decimal(a) => a.into_array(),
299 Canonical::Struct(a) => a.into_array(),
300 Canonical::List(a) => a.into_array(),
301 Canonical::FixedSizeList(a) => a.into_array(),
302 Canonical::VarBinView(a) => a.into_array(),
303 Canonical::Extension(a) => a.into_array(),
304 }
305 }
306}
307
308#[cfg(test)]
309mod test {
310 use std::sync::Arc;
311
312 use arrow_array::cast::AsArray;
313 use arrow_array::types::{Int32Type, Int64Type, UInt64Type};
314 use arrow_array::{
315 Array as ArrowArray, ArrayRef as ArrowArrayRef, ListArray as ArrowListArray,
316 PrimitiveArray as ArrowPrimitiveArray, StringArray, StringViewArray,
317 StructArray as ArrowStructArray,
318 };
319 use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
320 use arrow_schema::{DataType, Field};
321 use vortex_buffer::buffer;
322
323 use crate::arrays::{ConstantArray, StructArray};
324 use crate::arrow::{FromArrowArray, IntoArrowArray};
325 use crate::{ArrayRef, IntoArray};
326
327 #[test]
328 fn test_canonicalize_nested_struct() {
329 let nested_struct_array = StructArray::from_fields(&[
331 ("a", buffer![1u64].into_array()),
332 (
333 "b",
334 StructArray::from_fields(&[(
335 "inner_a",
336 ConstantArray::new(100i64, 1).into_array(),
341 )])
342 .unwrap()
343 .into_array(),
344 ),
345 ])
346 .unwrap();
347
348 let arrow_struct = nested_struct_array
349 .into_array()
350 .into_arrow_preferred()
351 .unwrap()
352 .as_any()
353 .downcast_ref::<ArrowStructArray>()
354 .cloned()
355 .unwrap();
356
357 assert!(
358 arrow_struct
359 .column(0)
360 .as_any()
361 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
362 .is_some()
363 );
364
365 let inner_struct = arrow_struct
366 .column(1)
367 .clone()
368 .as_any()
369 .downcast_ref::<ArrowStructArray>()
370 .cloned()
371 .unwrap();
372
373 let inner_a = inner_struct
374 .column(0)
375 .as_any()
376 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
377 assert!(inner_a.is_some());
378
379 assert_eq!(
380 inner_a.cloned().unwrap(),
381 ArrowPrimitiveArray::from_iter([100i64]),
382 );
383 }
384
385 #[test]
386 fn roundtrip_struct() {
387 let mut nulls = NullBufferBuilder::new(6);
388 nulls.append_n_non_nulls(4);
389 nulls.append_null();
390 nulls.append_non_null();
391 let names = Arc::new(StringViewArray::from_iter(vec![
392 Some("Joseph"),
393 None,
394 Some("Angela"),
395 Some("Mikhail"),
396 None,
397 None,
398 ]));
399 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
400 Some(25),
401 Some(31),
402 None,
403 Some(57),
404 None,
405 None,
406 ]));
407
408 let arrow_struct = ArrowStructArray::new(
409 vec![
410 Arc::new(Field::new("name", DataType::Utf8View, true)),
411 Arc::new(Field::new("age", DataType::Int32, true)),
412 ]
413 .into(),
414 vec![names, ages],
415 nulls.finish(),
416 );
417
418 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true);
419
420 assert_eq!(
421 &arrow_struct,
422 vortex_struct.into_arrow_preferred().unwrap().as_struct()
423 );
424 }
425
426 #[test]
427 fn roundtrip_list() {
428 let names = Arc::new(StringArray::from_iter(vec![
429 Some("Joseph"),
430 Some("Angela"),
431 Some("Mikhail"),
432 ]));
433
434 let arrow_list = ArrowListArray::new(
435 Arc::new(Field::new_list_field(DataType::Utf8, true)),
436 OffsetBuffer::from_lengths(vec![0, 2, 1]),
437 names,
438 None,
439 );
440 let list_data_type = arrow_list.data_type();
441
442 let vortex_list = ArrayRef::from_arrow(&arrow_list, true);
443
444 let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
445
446 assert_eq!(
447 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
448 rt_arrow_list.as_ref()
449 );
450 }
451}