1use vortex_dtype::DType;
7use vortex_error::{VortexExpect, VortexResult, vortex_bail};
8
9use crate::arrays::{
10 BoolArray, DecimalArray, ExtensionArray, ListArray, NullArray, PrimitiveArray, StructArray,
11 VarBinViewArray,
12};
13use crate::builders::builder_with_capacity;
14use crate::{Array, ArrayRef, IntoArray};
15
16#[derive(Debug, Clone)]
67pub enum Canonical {
68 Null(NullArray),
69 Bool(BoolArray),
70 Primitive(PrimitiveArray),
71 Decimal(DecimalArray),
72 Struct(StructArray),
73 List(ListArray),
75 VarBinView(VarBinViewArray),
76 Extension(ExtensionArray),
77}
78
79impl Canonical {
80 pub fn empty(dtype: &DType) -> Canonical {
82 builder_with_capacity(dtype, 0)
83 .finish()
84 .to_canonical()
85 .vortex_expect("cannot fail to convert an empty array to canonical")
86 }
87}
88
89impl Canonical {
90 pub fn compact(&self) -> VortexResult<Canonical> {
98 match self {
99 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
100 Canonical::List(array) => Ok(Canonical::List(array.reset_offsets()?)),
101 _ => Ok(self.clone()),
102 }
103 }
104}
105
106impl Canonical {
108 pub fn into_null(self) -> VortexResult<NullArray> {
109 if let Canonical::Null(a) = self {
110 Ok(a)
111 } else {
112 vortex_bail!("Cannot unwrap NullArray from {:?}", &self)
113 }
114 }
115
116 pub fn into_bool(self) -> VortexResult<BoolArray> {
117 if let Canonical::Bool(a) = self {
118 Ok(a)
119 } else {
120 vortex_bail!("Cannot unwrap BoolArray from {:?}", &self)
121 }
122 }
123
124 pub fn into_primitive(self) -> VortexResult<PrimitiveArray> {
125 if let Canonical::Primitive(a) = self {
126 Ok(a)
127 } else {
128 vortex_bail!("Cannot unwrap PrimitiveArray from {:?}", &self)
129 }
130 }
131
132 pub fn into_decimal(self) -> VortexResult<DecimalArray> {
133 if let Canonical::Decimal(a) = self {
134 Ok(a)
135 } else {
136 vortex_bail!("Cannot unwrap DecimalArray from {:?}", &self)
137 }
138 }
139
140 pub fn into_struct(self) -> VortexResult<StructArray> {
141 if let Canonical::Struct(a) = self {
142 Ok(a)
143 } else {
144 vortex_bail!("Cannot unwrap StructArray from {:?}", &self)
145 }
146 }
147
148 pub fn into_list(self) -> VortexResult<ListArray> {
149 if let Canonical::List(a) = self {
150 Ok(a)
151 } else {
152 vortex_bail!("Cannot unwrap ListArray from {:?}", &self)
153 }
154 }
155
156 pub fn into_varbinview(self) -> VortexResult<VarBinViewArray> {
157 if let Canonical::VarBinView(a) = self {
158 Ok(a)
159 } else {
160 vortex_bail!("Cannot unwrap VarBinViewArray from {:?}", &self)
161 }
162 }
163
164 pub fn into_extension(self) -> VortexResult<ExtensionArray> {
165 if let Canonical::Extension(a) = self {
166 Ok(a)
167 } else {
168 vortex_bail!("Cannot unwrap ExtensionArray from {:?}", &self)
169 }
170 }
171}
172
173impl AsRef<dyn Array> for Canonical {
174 fn as_ref(&self) -> &(dyn Array + 'static) {
175 match &self {
176 Canonical::Null(a) => a.as_ref(),
177 Canonical::Bool(a) => a.as_ref(),
178 Canonical::Primitive(a) => a.as_ref(),
179 Canonical::Decimal(a) => a.as_ref(),
180 Canonical::Struct(a) => a.as_ref(),
181 Canonical::List(a) => a.as_ref(),
182 Canonical::VarBinView(a) => a.as_ref(),
183 Canonical::Extension(a) => a.as_ref(),
184 }
185 }
186}
187
188impl IntoArray for Canonical {
189 fn into_array(self) -> ArrayRef {
190 match self {
191 Canonical::Null(a) => a.into_array(),
192 Canonical::Bool(a) => a.into_array(),
193 Canonical::Primitive(a) => a.into_array(),
194 Canonical::Decimal(a) => a.into_array(),
195 Canonical::Struct(a) => a.into_array(),
196 Canonical::List(a) => a.into_array(),
197 Canonical::VarBinView(a) => a.into_array(),
198 Canonical::Extension(a) => a.into_array(),
199 }
200 }
201}
202
203pub trait ToCanonical {
209 fn to_null(&self) -> VortexResult<NullArray>;
211
212 fn to_bool(&self) -> VortexResult<BoolArray>;
214
215 fn to_primitive(&self) -> VortexResult<PrimitiveArray>;
218
219 fn to_decimal(&self) -> VortexResult<DecimalArray>;
222
223 fn to_struct(&self) -> VortexResult<StructArray>;
225
226 fn to_list(&self) -> VortexResult<ListArray>;
228
229 fn to_varbinview(&self) -> VortexResult<VarBinViewArray>;
232
233 fn to_extension(&self) -> VortexResult<ExtensionArray>;
236}
237
238impl<A: Array + ?Sized> ToCanonical for A {
240 fn to_null(&self) -> VortexResult<NullArray> {
241 self.to_canonical()?.into_null()
242 }
243
244 fn to_bool(&self) -> VortexResult<BoolArray> {
245 self.to_canonical()?.into_bool()
246 }
247
248 fn to_primitive(&self) -> VortexResult<PrimitiveArray> {
249 self.to_canonical()?.into_primitive()
250 }
251
252 fn to_decimal(&self) -> VortexResult<DecimalArray> {
253 self.to_canonical()?.into_decimal()
254 }
255
256 fn to_struct(&self) -> VortexResult<StructArray> {
257 self.to_canonical()?.into_struct()
258 }
259
260 fn to_list(&self) -> VortexResult<ListArray> {
261 self.to_canonical()?.into_list()
262 }
263
264 fn to_varbinview(&self) -> VortexResult<VarBinViewArray> {
265 self.to_canonical()?.into_varbinview()
266 }
267
268 fn to_extension(&self) -> VortexResult<ExtensionArray> {
269 self.to_canonical()?.into_extension()
270 }
271}
272
273impl From<Canonical> for ArrayRef {
274 fn from(value: Canonical) -> Self {
275 match value {
276 Canonical::Null(a) => a.into_array(),
277 Canonical::Bool(a) => a.into_array(),
278 Canonical::Primitive(a) => a.into_array(),
279 Canonical::Decimal(a) => a.into_array(),
280 Canonical::Struct(a) => a.into_array(),
281 Canonical::List(a) => a.into_array(),
282 Canonical::VarBinView(a) => a.into_array(),
283 Canonical::Extension(a) => a.into_array(),
284 }
285 }
286}
287
288#[cfg(test)]
289mod test {
290 use std::sync::Arc;
291
292 use arrow_array::cast::AsArray;
293 use arrow_array::types::{Int32Type, Int64Type, UInt64Type};
294 use arrow_array::{
295 Array as ArrowArray, ArrayRef as ArrowArrayRef, ListArray as ArrowListArray,
296 PrimitiveArray as ArrowPrimitiveArray, StringArray, StringViewArray,
297 StructArray as ArrowStructArray,
298 };
299 use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
300 use arrow_schema::{DataType, Field};
301 use vortex_buffer::buffer;
302
303 use crate::arrays::{ConstantArray, StructArray};
304 use crate::arrow::{FromArrowArray, IntoArrowArray};
305 use crate::{ArrayRef, IntoArray};
306
307 #[test]
308 fn test_canonicalize_nested_struct() {
309 let nested_struct_array = StructArray::from_fields(&[
311 ("a", buffer![1u64].into_array()),
312 (
313 "b",
314 StructArray::from_fields(&[(
315 "inner_a",
316 ConstantArray::new(100i64, 1).into_array(),
321 )])
322 .unwrap()
323 .into_array(),
324 ),
325 ])
326 .unwrap();
327
328 let arrow_struct = nested_struct_array
329 .into_array()
330 .into_arrow_preferred()
331 .unwrap()
332 .as_any()
333 .downcast_ref::<ArrowStructArray>()
334 .cloned()
335 .unwrap();
336
337 assert!(
338 arrow_struct
339 .column(0)
340 .as_any()
341 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
342 .is_some()
343 );
344
345 let inner_struct = arrow_struct
346 .column(1)
347 .clone()
348 .as_any()
349 .downcast_ref::<ArrowStructArray>()
350 .cloned()
351 .unwrap();
352
353 let inner_a = inner_struct
354 .column(0)
355 .as_any()
356 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
357 assert!(inner_a.is_some());
358
359 assert_eq!(
360 inner_a.cloned().unwrap(),
361 ArrowPrimitiveArray::from_iter([100i64]),
362 );
363 }
364
365 #[test]
366 fn roundtrip_struct() {
367 let mut nulls = NullBufferBuilder::new(6);
368 nulls.append_n_non_nulls(4);
369 nulls.append_null();
370 nulls.append_non_null();
371 let names = Arc::new(StringViewArray::from_iter(vec![
372 Some("Joseph"),
373 None,
374 Some("Angela"),
375 Some("Mikhail"),
376 None,
377 None,
378 ]));
379 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
380 Some(25),
381 Some(31),
382 None,
383 Some(57),
384 None,
385 None,
386 ]));
387
388 let arrow_struct = ArrowStructArray::new(
389 vec![
390 Arc::new(Field::new("name", DataType::Utf8View, true)),
391 Arc::new(Field::new("age", DataType::Int32, true)),
392 ]
393 .into(),
394 vec![names, ages],
395 nulls.finish(),
396 );
397
398 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true);
399
400 assert_eq!(
401 &arrow_struct,
402 vortex_struct.into_arrow_preferred().unwrap().as_struct()
403 );
404 }
405
406 #[test]
407 fn roundtrip_list() {
408 let names = Arc::new(StringArray::from_iter(vec![
409 Some("Joseph"),
410 Some("Angela"),
411 Some("Mikhail"),
412 ]));
413
414 let arrow_list = ArrowListArray::new(
415 Arc::new(Field::new_list_field(DataType::Utf8, true)),
416 OffsetBuffer::from_lengths(vec![0, 2, 1]),
417 names,
418 None,
419 );
420 let list_data_type = arrow_list.data_type();
421
422 let vortex_list = ArrayRef::from_arrow(&arrow_list, true);
423
424 let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
425
426 assert_eq!(
427 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
428 rt_arrow_list.as_ref()
429 );
430 }
431}