1use arrow_array::ArrayRef as ArrowArrayRef;
4use arrow_schema::DataType;
5use vortex_dtype::DType;
6use vortex_error::{VortexExpect, VortexResult, vortex_bail};
7
8use crate::arrays::{
9 BoolArray, DecimalArray, ExtensionArray, ListArray, NullArray, PrimitiveArray, StructArray,
10 VarBinViewArray,
11};
12use crate::arrow::IntoArrowArray;
13use crate::builders::builder_with_capacity;
14use crate::compute::{preferred_arrow_data_type, to_arrow};
15use crate::{Array, ArrayRef, IntoArray};
16
17#[derive(Debug, Clone)]
41pub enum Canonical {
42 Null(NullArray),
43 Bool(BoolArray),
44 Primitive(PrimitiveArray),
45 Decimal(DecimalArray),
46 Struct(StructArray),
47 List(ListArray),
49 VarBinView(VarBinViewArray),
50 Extension(ExtensionArray),
51}
52
53impl Canonical {
54 pub fn empty(dtype: &DType) -> Canonical {
56 builder_with_capacity(dtype, 0)
57 .finish()
58 .to_canonical()
59 .vortex_expect("cannot fail to convert an empty array to canonical")
60 }
61}
62
63impl Canonical {
65 pub fn into_null(self) -> VortexResult<NullArray> {
66 match self {
67 Canonical::Null(a) => Ok(a),
68 _ => vortex_bail!("Cannot unwrap NullArray from {:?}", &self),
69 }
70 }
71
72 pub fn into_bool(self) -> VortexResult<BoolArray> {
73 match self {
74 Canonical::Bool(a) => Ok(a),
75 _ => vortex_bail!("Cannot unwrap BoolArray from {:?}", &self),
76 }
77 }
78
79 pub fn into_primitive(self) -> VortexResult<PrimitiveArray> {
80 match self {
81 Canonical::Primitive(a) => Ok(a),
82 _ => vortex_bail!("Cannot unwrap PrimitiveArray from {:?}", &self),
83 }
84 }
85
86 pub fn into_decimal(self) -> VortexResult<DecimalArray> {
87 match self {
88 Canonical::Decimal(a) => Ok(a),
89 _ => vortex_bail!("Cannot unwrap DecimalArray from {:?}", &self),
90 }
91 }
92
93 pub fn into_struct(self) -> VortexResult<StructArray> {
94 match self {
95 Canonical::Struct(a) => Ok(a),
96 _ => vortex_bail!("Cannot unwrap StructArray from {:?}", &self),
97 }
98 }
99
100 pub fn into_list(self) -> VortexResult<ListArray> {
101 match self {
102 Canonical::List(a) => Ok(a),
103 _ => vortex_bail!("Cannot unwrap StructArray from {:?}", &self),
104 }
105 }
106
107 pub fn into_varbinview(self) -> VortexResult<VarBinViewArray> {
108 match self {
109 Canonical::VarBinView(a) => Ok(a),
110 _ => vortex_bail!("Cannot unwrap VarBinViewArray from {:?}", &self),
111 }
112 }
113
114 pub fn into_extension(self) -> VortexResult<ExtensionArray> {
115 match self {
116 Canonical::Extension(a) => Ok(a),
117 _ => vortex_bail!("Cannot unwrap ExtensionArray from {:?}", &self),
118 }
119 }
120}
121
122impl AsRef<dyn Array> for Canonical {
123 fn as_ref(&self) -> &(dyn Array + 'static) {
124 match &self {
125 Canonical::Null(a) => a,
126 Canonical::Bool(a) => a,
127 Canonical::Primitive(a) => a,
128 Canonical::Decimal(a) => a,
129 Canonical::Struct(a) => a,
130 Canonical::List(a) => a,
131 Canonical::VarBinView(a) => a,
132 Canonical::Extension(a) => a,
133 }
134 }
135}
136
137impl IntoArray for Canonical {
138 fn into_array(self) -> ArrayRef {
139 match self {
140 Canonical::Null(a) => a.into_array(),
141 Canonical::Bool(a) => a.into_array(),
142 Canonical::Primitive(a) => a.into_array(),
143 Canonical::Decimal(a) => a.into_array(),
144 Canonical::Struct(a) => a.into_array(),
145 Canonical::List(a) => a.into_array(),
146 Canonical::VarBinView(a) => a.into_array(),
147 Canonical::Extension(a) => a.into_array(),
148 }
149 }
150}
151
152pub trait ToCanonical: Array {
158 fn to_null(&self) -> VortexResult<NullArray> {
159 self.to_canonical()?.into_null()
160 }
161
162 fn to_bool(&self) -> VortexResult<BoolArray> {
163 self.to_canonical()?.into_bool()
164 }
165
166 fn to_primitive(&self) -> VortexResult<PrimitiveArray> {
167 self.to_canonical()?.into_primitive()
168 }
169
170 fn to_decimal(&self) -> VortexResult<DecimalArray> {
171 self.to_canonical()?.into_decimal()
172 }
173
174 fn to_struct(&self) -> VortexResult<StructArray> {
175 self.to_canonical()?.into_struct()
176 }
177
178 fn to_list(&self) -> VortexResult<ListArray> {
179 self.to_canonical()?.into_list()
180 }
181
182 fn to_varbinview(&self) -> VortexResult<VarBinViewArray> {
183 self.to_canonical()?.into_varbinview()
184 }
185
186 fn to_extension(&self) -> VortexResult<ExtensionArray> {
187 self.to_canonical()?.into_extension()
188 }
189}
190
191impl<A: Array + ?Sized> ToCanonical for A {}
192
193impl IntoArrowArray for ArrayRef {
194 fn into_arrow_preferred(self) -> VortexResult<ArrowArrayRef> {
197 let data_type = preferred_arrow_data_type(&self)?;
198 self.into_arrow(&data_type)
199 }
200
201 fn into_arrow(self, data_type: &DataType) -> VortexResult<ArrowArrayRef> {
202 to_arrow(&self, data_type)
203 }
204}
205
206impl From<Canonical> for ArrayRef {
212 fn from(value: Canonical) -> Self {
213 match value {
214 Canonical::Null(a) => a.into_array(),
215 Canonical::Bool(a) => a.into_array(),
216 Canonical::Primitive(a) => a.into_array(),
217 Canonical::Decimal(a) => a.into_array(),
218 Canonical::Struct(a) => a.into_array(),
219 Canonical::List(a) => a.into_array(),
220 Canonical::VarBinView(a) => a.into_array(),
221 Canonical::Extension(a) => a.into_array(),
222 }
223 }
224}
225
226#[cfg(test)]
227mod test {
228 use std::sync::Arc;
229
230 use arrow_array::cast::AsArray;
231 use arrow_array::types::{Int32Type, Int64Type, UInt64Type};
232 use arrow_array::{
233 Array as ArrowArray, ArrayRef as ArrowArrayRef, ListArray as ArrowListArray,
234 PrimitiveArray as ArrowPrimitiveArray, StringArray, StringViewArray,
235 StructArray as ArrowStructArray,
236 };
237 use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
238 use arrow_schema::{DataType, Field};
239 use vortex_buffer::buffer;
240
241 use crate::array::Array;
242 use crate::arrays::{ConstantArray, StructArray};
243 use crate::arrow::{FromArrowArray, IntoArrowArray};
244 use crate::{ArrayRef, IntoArray};
245
246 #[test]
247 fn test_canonicalize_nested_struct() {
248 let nested_struct_array = StructArray::from_fields(&[
250 ("a", buffer![1u64].into_array()),
251 (
252 "b",
253 StructArray::from_fields(&[(
254 "inner_a",
255 ConstantArray::new(100i64, 1).into_array(),
260 )])
261 .unwrap()
262 .into_array(),
263 ),
264 ])
265 .unwrap();
266
267 let arrow_struct = nested_struct_array
268 .into_array()
269 .into_arrow_preferred()
270 .unwrap()
271 .as_any()
272 .downcast_ref::<ArrowStructArray>()
273 .cloned()
274 .unwrap();
275
276 assert!(
277 arrow_struct
278 .column(0)
279 .as_any()
280 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
281 .is_some()
282 );
283
284 let inner_struct = arrow_struct
285 .column(1)
286 .clone()
287 .as_any()
288 .downcast_ref::<ArrowStructArray>()
289 .cloned()
290 .unwrap();
291
292 let inner_a = inner_struct
293 .column(0)
294 .as_any()
295 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
296 assert!(inner_a.is_some());
297
298 assert_eq!(
299 inner_a.cloned().unwrap(),
300 ArrowPrimitiveArray::from_iter([100i64]),
301 );
302 }
303
304 #[test]
305 fn roundtrip_struct() {
306 let mut nulls = NullBufferBuilder::new(6);
307 nulls.append_n_non_nulls(4);
308 nulls.append_null();
309 nulls.append_non_null();
310 let names = Arc::new(StringViewArray::from_iter(vec![
311 Some("Joseph"),
312 None,
313 Some("Angela"),
314 Some("Mikhail"),
315 None,
316 None,
317 ]));
318 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
319 Some(25),
320 Some(31),
321 None,
322 Some(57),
323 None,
324 None,
325 ]));
326
327 let arrow_struct = ArrowStructArray::new(
328 vec![
329 Arc::new(Field::new("name", DataType::Utf8View, true)),
330 Arc::new(Field::new("age", DataType::Int32, true)),
331 ]
332 .into(),
333 vec![names, ages],
334 nulls.finish(),
335 );
336
337 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true);
338
339 assert_eq!(
340 &arrow_struct,
341 vortex_struct.into_arrow_preferred().unwrap().as_struct()
342 );
343 }
344
345 #[test]
346 fn roundtrip_list() {
347 let names = Arc::new(StringArray::from_iter(vec![
348 Some("Joseph"),
349 Some("Angela"),
350 Some("Mikhail"),
351 ]));
352
353 let arrow_list = ArrowListArray::new(
354 Arc::new(Field::new_list_field(DataType::Utf8, true)),
355 OffsetBuffer::from_lengths(vec![0, 2, 1]),
356 names,
357 None,
358 );
359 let list_data_type = arrow_list.data_type();
360
361 let vortex_list = ArrayRef::from_arrow(&arrow_list, true);
362
363 let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
364
365 assert_eq!(
366 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
367 rt_arrow_list.as_ref()
368 );
369 }
370}