1use vortex_dtype::DType;
7use vortex_error::{VortexExpect, VortexResult, vortex_bail};
8
9use crate::arrays::{
10 BoolArray, DecimalArray, ExtensionArray, ListArray, NullArray, PrimitiveArray, StructArray,
11 VarBinViewArray,
12};
13use crate::builders::builder_with_capacity;
14use crate::{Array, ArrayRef, IntoArray};
15
16#[derive(Debug, Clone)]
67pub enum Canonical {
68 Null(NullArray),
69 Bool(BoolArray),
70 Primitive(PrimitiveArray),
71 Decimal(DecimalArray),
72 Struct(StructArray),
73 List(ListArray),
75 VarBinView(VarBinViewArray),
77 Extension(ExtensionArray),
78}
79
80impl Canonical {
81 pub fn empty(dtype: &DType) -> Canonical {
83 builder_with_capacity(dtype, 0)
84 .finish()
85 .to_canonical()
86 .vortex_expect("cannot fail to convert an empty array to canonical")
87 }
88}
89
90impl Canonical {
91 pub fn compact(&self) -> VortexResult<Canonical> {
99 match self {
100 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
101 Canonical::List(array) => Ok(Canonical::List(array.reset_offsets()?)),
102 _ => Ok(self.clone()),
103 }
104 }
105}
106
107impl Canonical {
109 pub fn into_null(self) -> VortexResult<NullArray> {
110 if let Canonical::Null(a) = self {
111 Ok(a)
112 } else {
113 vortex_bail!("Cannot unwrap NullArray from {:?}", &self)
114 }
115 }
116
117 pub fn into_bool(self) -> VortexResult<BoolArray> {
118 if let Canonical::Bool(a) = self {
119 Ok(a)
120 } else {
121 vortex_bail!("Cannot unwrap BoolArray from {:?}", &self)
122 }
123 }
124
125 pub fn into_primitive(self) -> VortexResult<PrimitiveArray> {
126 if let Canonical::Primitive(a) = self {
127 Ok(a)
128 } else {
129 vortex_bail!("Cannot unwrap PrimitiveArray from {:?}", &self)
130 }
131 }
132
133 pub fn into_decimal(self) -> VortexResult<DecimalArray> {
134 if let Canonical::Decimal(a) = self {
135 Ok(a)
136 } else {
137 vortex_bail!("Cannot unwrap DecimalArray from {:?}", &self)
138 }
139 }
140
141 pub fn into_struct(self) -> VortexResult<StructArray> {
142 if let Canonical::Struct(a) = self {
143 Ok(a)
144 } else {
145 vortex_bail!("Cannot unwrap StructArray from {:?}", &self)
146 }
147 }
148
149 pub fn into_list(self) -> VortexResult<ListArray> {
150 if let Canonical::List(a) = self {
151 Ok(a)
152 } else {
153 vortex_bail!("Cannot unwrap ListArray from {:?}", &self)
154 }
155 }
156
157 pub fn into_varbinview(self) -> VortexResult<VarBinViewArray> {
158 if let Canonical::VarBinView(a) = self {
159 Ok(a)
160 } else {
161 vortex_bail!("Cannot unwrap VarBinViewArray from {:?}", &self)
162 }
163 }
164
165 pub fn into_extension(self) -> VortexResult<ExtensionArray> {
166 if let Canonical::Extension(a) = self {
167 Ok(a)
168 } else {
169 vortex_bail!("Cannot unwrap ExtensionArray from {:?}", &self)
170 }
171 }
172}
173
174impl AsRef<dyn Array> for Canonical {
175 fn as_ref(&self) -> &(dyn Array + 'static) {
176 match &self {
177 Canonical::Null(a) => a.as_ref(),
178 Canonical::Bool(a) => a.as_ref(),
179 Canonical::Primitive(a) => a.as_ref(),
180 Canonical::Decimal(a) => a.as_ref(),
181 Canonical::Struct(a) => a.as_ref(),
182 Canonical::List(a) => a.as_ref(),
183 Canonical::VarBinView(a) => a.as_ref(),
184 Canonical::Extension(a) => a.as_ref(),
185 }
186 }
187}
188
189impl IntoArray for Canonical {
190 fn into_array(self) -> ArrayRef {
191 match self {
192 Canonical::Null(a) => a.into_array(),
193 Canonical::Bool(a) => a.into_array(),
194 Canonical::Primitive(a) => a.into_array(),
195 Canonical::Decimal(a) => a.into_array(),
196 Canonical::Struct(a) => a.into_array(),
197 Canonical::List(a) => a.into_array(),
198 Canonical::VarBinView(a) => a.into_array(),
199 Canonical::Extension(a) => a.into_array(),
200 }
201 }
202}
203
204pub trait ToCanonical {
210 fn to_null(&self) -> VortexResult<NullArray>;
212
213 fn to_bool(&self) -> VortexResult<BoolArray>;
215
216 fn to_primitive(&self) -> VortexResult<PrimitiveArray>;
219
220 fn to_decimal(&self) -> VortexResult<DecimalArray>;
223
224 fn to_struct(&self) -> VortexResult<StructArray>;
226
227 fn to_list(&self) -> VortexResult<ListArray>;
229
230 fn to_varbinview(&self) -> VortexResult<VarBinViewArray>;
233
234 fn to_extension(&self) -> VortexResult<ExtensionArray>;
237}
238
239impl<A: Array + ?Sized> ToCanonical for A {
241 fn to_null(&self) -> VortexResult<NullArray> {
242 self.to_canonical()?.into_null()
243 }
244
245 fn to_bool(&self) -> VortexResult<BoolArray> {
246 self.to_canonical()?.into_bool()
247 }
248
249 fn to_primitive(&self) -> VortexResult<PrimitiveArray> {
250 self.to_canonical()?.into_primitive()
251 }
252
253 fn to_decimal(&self) -> VortexResult<DecimalArray> {
254 self.to_canonical()?.into_decimal()
255 }
256
257 fn to_struct(&self) -> VortexResult<StructArray> {
258 self.to_canonical()?.into_struct()
259 }
260
261 fn to_list(&self) -> VortexResult<ListArray> {
262 self.to_canonical()?.into_list()
263 }
264
265 fn to_varbinview(&self) -> VortexResult<VarBinViewArray> {
266 self.to_canonical()?.into_varbinview()
267 }
268
269 fn to_extension(&self) -> VortexResult<ExtensionArray> {
270 self.to_canonical()?.into_extension()
271 }
272}
273
274impl From<Canonical> for ArrayRef {
275 fn from(value: Canonical) -> Self {
276 match value {
277 Canonical::Null(a) => a.into_array(),
278 Canonical::Bool(a) => a.into_array(),
279 Canonical::Primitive(a) => a.into_array(),
280 Canonical::Decimal(a) => a.into_array(),
281 Canonical::Struct(a) => a.into_array(),
282 Canonical::List(a) => a.into_array(),
283 Canonical::VarBinView(a) => a.into_array(),
284 Canonical::Extension(a) => a.into_array(),
285 }
286 }
287}
288
289#[cfg(test)]
290mod test {
291 use std::sync::Arc;
292
293 use arrow_array::cast::AsArray;
294 use arrow_array::types::{Int32Type, Int64Type, UInt64Type};
295 use arrow_array::{
296 Array as ArrowArray, ArrayRef as ArrowArrayRef, ListArray as ArrowListArray,
297 PrimitiveArray as ArrowPrimitiveArray, StringArray, StringViewArray,
298 StructArray as ArrowStructArray,
299 };
300 use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
301 use arrow_schema::{DataType, Field};
302 use vortex_buffer::buffer;
303
304 use crate::arrays::{ConstantArray, StructArray};
305 use crate::arrow::{FromArrowArray, IntoArrowArray};
306 use crate::{ArrayRef, IntoArray};
307
308 #[test]
309 fn test_canonicalize_nested_struct() {
310 let nested_struct_array = StructArray::from_fields(&[
312 ("a", buffer![1u64].into_array()),
313 (
314 "b",
315 StructArray::from_fields(&[(
316 "inner_a",
317 ConstantArray::new(100i64, 1).into_array(),
322 )])
323 .unwrap()
324 .into_array(),
325 ),
326 ])
327 .unwrap();
328
329 let arrow_struct = nested_struct_array
330 .into_array()
331 .into_arrow_preferred()
332 .unwrap()
333 .as_any()
334 .downcast_ref::<ArrowStructArray>()
335 .cloned()
336 .unwrap();
337
338 assert!(
339 arrow_struct
340 .column(0)
341 .as_any()
342 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
343 .is_some()
344 );
345
346 let inner_struct = arrow_struct
347 .column(1)
348 .clone()
349 .as_any()
350 .downcast_ref::<ArrowStructArray>()
351 .cloned()
352 .unwrap();
353
354 let inner_a = inner_struct
355 .column(0)
356 .as_any()
357 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
358 assert!(inner_a.is_some());
359
360 assert_eq!(
361 inner_a.cloned().unwrap(),
362 ArrowPrimitiveArray::from_iter([100i64]),
363 );
364 }
365
366 #[test]
367 fn roundtrip_struct() {
368 let mut nulls = NullBufferBuilder::new(6);
369 nulls.append_n_non_nulls(4);
370 nulls.append_null();
371 nulls.append_non_null();
372 let names = Arc::new(StringViewArray::from_iter(vec![
373 Some("Joseph"),
374 None,
375 Some("Angela"),
376 Some("Mikhail"),
377 None,
378 None,
379 ]));
380 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
381 Some(25),
382 Some(31),
383 None,
384 Some(57),
385 None,
386 None,
387 ]));
388
389 let arrow_struct = ArrowStructArray::new(
390 vec![
391 Arc::new(Field::new("name", DataType::Utf8View, true)),
392 Arc::new(Field::new("age", DataType::Int32, true)),
393 ]
394 .into(),
395 vec![names, ages],
396 nulls.finish(),
397 );
398
399 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true);
400
401 assert_eq!(
402 &arrow_struct,
403 vortex_struct.into_arrow_preferred().unwrap().as_struct()
404 );
405 }
406
407 #[test]
408 fn roundtrip_list() {
409 let names = Arc::new(StringArray::from_iter(vec![
410 Some("Joseph"),
411 Some("Angela"),
412 Some("Mikhail"),
413 ]));
414
415 let arrow_list = ArrowListArray::new(
416 Arc::new(Field::new_list_field(DataType::Utf8, true)),
417 OffsetBuffer::from_lengths(vec![0, 2, 1]),
418 names,
419 None,
420 );
421 let list_data_type = arrow_list.data_type();
422
423 let vortex_list = ArrayRef::from_arrow(&arrow_list, true);
424
425 let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
426
427 assert_eq!(
428 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
429 rt_arrow_list.as_ref()
430 );
431 }
432}