1use arrow_array::ArrayRef as ArrowArrayRef;
4use arrow_schema::DataType;
5use vortex_dtype::DType;
6use vortex_error::{VortexExpect, VortexResult, vortex_bail};
7
8use crate::arrays::{
9    BoolArray, ExtensionArray, ListArray, NullArray, PrimitiveArray, StructArray, VarBinViewArray,
10};
11use crate::arrow::IntoArrowArray;
12use crate::builders::builder_with_capacity;
13use crate::compute::{preferred_arrow_data_type, to_arrow};
14use crate::{Array, ArrayRef, IntoArray};
15
16#[derive(Debug, Clone)]
40pub enum Canonical {
41    Null(NullArray),
42    Bool(BoolArray),
43    Primitive(PrimitiveArray),
44    Struct(StructArray),
45    List(ListArray),
47    VarBinView(VarBinViewArray),
48    Extension(ExtensionArray),
49}
50
51impl Canonical {
52    pub fn empty(dtype: &DType) -> Canonical {
54        builder_with_capacity(dtype, 0)
55            .finish()
56            .to_canonical()
57            .vortex_expect("cannot fail to convert an empty array to canonical")
58    }
59}
60
61impl Canonical {
63    pub fn into_null(self) -> VortexResult<NullArray> {
64        match self {
65            Canonical::Null(a) => Ok(a),
66            _ => vortex_bail!("Cannot unwrap NullArray from {:?}", &self),
67        }
68    }
69
70    pub fn into_bool(self) -> VortexResult<BoolArray> {
71        match self {
72            Canonical::Bool(a) => Ok(a),
73            _ => vortex_bail!("Cannot unwrap BoolArray from {:?}", &self),
74        }
75    }
76
77    pub fn into_primitive(self) -> VortexResult<PrimitiveArray> {
78        match self {
79            Canonical::Primitive(a) => Ok(a),
80            _ => vortex_bail!("Cannot unwrap PrimitiveArray from {:?}", &self),
81        }
82    }
83
84    pub fn into_struct(self) -> VortexResult<StructArray> {
85        match self {
86            Canonical::Struct(a) => Ok(a),
87            _ => vortex_bail!("Cannot unwrap StructArray from {:?}", &self),
88        }
89    }
90
91    pub fn into_list(self) -> VortexResult<ListArray> {
92        match self {
93            Canonical::List(a) => Ok(a),
94            _ => vortex_bail!("Cannot unwrap StructArray from {:?}", &self),
95        }
96    }
97
98    pub fn into_varbinview(self) -> VortexResult<VarBinViewArray> {
99        match self {
100            Canonical::VarBinView(a) => Ok(a),
101            _ => vortex_bail!("Cannot unwrap VarBinViewArray from {:?}", &self),
102        }
103    }
104
105    pub fn into_extension(self) -> VortexResult<ExtensionArray> {
106        match self {
107            Canonical::Extension(a) => Ok(a),
108            _ => vortex_bail!("Cannot unwrap ExtensionArray from {:?}", &self),
109        }
110    }
111}
112
113impl AsRef<dyn Array> for Canonical {
114    fn as_ref(&self) -> &(dyn Array + 'static) {
115        match &self {
116            Canonical::Null(a) => a,
117            Canonical::Bool(a) => a,
118            Canonical::Primitive(a) => a,
119            Canonical::Struct(a) => a,
120            Canonical::List(a) => a,
121            Canonical::VarBinView(a) => a,
122            Canonical::Extension(a) => a,
123        }
124    }
125}
126
127impl IntoArray for Canonical {
128    fn into_array(self) -> ArrayRef {
129        match self {
130            Canonical::Null(a) => a.into_array(),
131            Canonical::Bool(a) => a.into_array(),
132            Canonical::Primitive(a) => a.into_array(),
133            Canonical::Struct(a) => a.into_array(),
134            Canonical::List(a) => a.into_array(),
135            Canonical::VarBinView(a) => a.into_array(),
136            Canonical::Extension(a) => a.into_array(),
137        }
138    }
139}
140
141pub trait ToCanonical: Array {
147    fn to_null(&self) -> VortexResult<NullArray> {
148        self.to_canonical()?.into_null()
149    }
150
151    fn to_bool(&self) -> VortexResult<BoolArray> {
152        self.to_canonical()?.into_bool()
153    }
154
155    fn to_primitive(&self) -> VortexResult<PrimitiveArray> {
156        self.to_canonical()?.into_primitive()
157    }
158
159    fn to_struct(&self) -> VortexResult<StructArray> {
160        self.to_canonical()?.into_struct()
161    }
162
163    fn to_list(&self) -> VortexResult<ListArray> {
164        self.to_canonical()?.into_list()
165    }
166
167    fn to_varbinview(&self) -> VortexResult<VarBinViewArray> {
168        self.to_canonical()?.into_varbinview()
169    }
170
171    fn to_extension(&self) -> VortexResult<ExtensionArray> {
172        self.to_canonical()?.into_extension()
173    }
174}
175
176impl<A: Array + ?Sized> ToCanonical for A {}
177
178impl IntoArrowArray for ArrayRef {
179    fn into_arrow_preferred(self) -> VortexResult<ArrowArrayRef> {
182        let data_type = preferred_arrow_data_type(&self)?;
183        self.into_arrow(&data_type)
184    }
185
186    fn into_arrow(self, data_type: &DataType) -> VortexResult<ArrowArrayRef> {
187        to_arrow(&self, data_type)
188    }
189}
190
191impl From<Canonical> for ArrayRef {
197    fn from(value: Canonical) -> Self {
198        match value {
199            Canonical::Null(a) => a.into_array(),
200            Canonical::Bool(a) => a.into_array(),
201            Canonical::Primitive(a) => a.into_array(),
202            Canonical::Struct(a) => a.into_array(),
203            Canonical::List(a) => a.into_array(),
204            Canonical::VarBinView(a) => a.into_array(),
205            Canonical::Extension(a) => a.into_array(),
206        }
207    }
208}
209
210#[cfg(test)]
211mod test {
212    use std::sync::Arc;
213
214    use arrow_array::cast::AsArray;
215    use arrow_array::types::{Int32Type, Int64Type, UInt64Type};
216    use arrow_array::{
217        Array as ArrowArray, ArrayRef as ArrowArrayRef, ListArray as ArrowListArray,
218        PrimitiveArray as ArrowPrimitiveArray, StringArray, StringViewArray,
219        StructArray as ArrowStructArray,
220    };
221    use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
222    use arrow_schema::{DataType, Field};
223    use vortex_buffer::buffer;
224
225    use crate::array::Array;
226    use crate::arrays::{ConstantArray, StructArray};
227    use crate::arrow::{FromArrowArray, IntoArrowArray};
228    use crate::{ArrayRef, IntoArray};
229
230    #[test]
231    fn test_canonicalize_nested_struct() {
232        let nested_struct_array = StructArray::from_fields(&[
234            ("a", buffer![1u64].into_array()),
235            (
236                "b",
237                StructArray::from_fields(&[(
238                    "inner_a",
239                    ConstantArray::new(100i64, 1).into_array(),
244                )])
245                .unwrap()
246                .into_array(),
247            ),
248        ])
249        .unwrap();
250
251        let arrow_struct = nested_struct_array
252            .into_array()
253            .into_arrow_preferred()
254            .unwrap()
255            .as_any()
256            .downcast_ref::<ArrowStructArray>()
257            .cloned()
258            .unwrap();
259
260        assert!(
261            arrow_struct
262                .column(0)
263                .as_any()
264                .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
265                .is_some()
266        );
267
268        let inner_struct = arrow_struct
269            .column(1)
270            .clone()
271            .as_any()
272            .downcast_ref::<ArrowStructArray>()
273            .cloned()
274            .unwrap();
275
276        let inner_a = inner_struct
277            .column(0)
278            .as_any()
279            .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
280        assert!(inner_a.is_some());
281
282        assert_eq!(
283            inner_a.cloned().unwrap(),
284            ArrowPrimitiveArray::from_iter([100i64]),
285        );
286    }
287
288    #[test]
289    fn roundtrip_struct() {
290        let mut nulls = NullBufferBuilder::new(6);
291        nulls.append_n_non_nulls(4);
292        nulls.append_null();
293        nulls.append_non_null();
294        let names = Arc::new(StringViewArray::from_iter(vec![
295            Some("Joseph"),
296            None,
297            Some("Angela"),
298            Some("Mikhail"),
299            None,
300            None,
301        ]));
302        let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
303            Some(25),
304            Some(31),
305            None,
306            Some(57),
307            None,
308            None,
309        ]));
310
311        let arrow_struct = ArrowStructArray::new(
312            vec![
313                Arc::new(Field::new("name", DataType::Utf8View, true)),
314                Arc::new(Field::new("age", DataType::Int32, true)),
315            ]
316            .into(),
317            vec![names, ages],
318            nulls.finish(),
319        );
320
321        let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true);
322
323        assert_eq!(
324            &arrow_struct,
325            vortex_struct.into_arrow_preferred().unwrap().as_struct()
326        );
327    }
328
329    #[test]
330    fn roundtrip_list() {
331        let names = Arc::new(StringArray::from_iter(vec![
332            Some("Joseph"),
333            Some("Angela"),
334            Some("Mikhail"),
335        ]));
336
337        let arrow_list = ArrowListArray::new(
338            Arc::new(Field::new_list_field(DataType::Utf8, true)),
339            OffsetBuffer::from_lengths(vec![0, 2, 1]),
340            names,
341            None,
342        );
343        let list_data_type = arrow_list.data_type();
344
345        let vortex_list = ArrayRef::from_arrow(&arrow_list, true);
346
347        let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
348
349        assert_eq!(
350            (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
351            rt_arrow_list.as_ref()
352        );
353    }
354}