1use arrow_array::ArrayRef as ArrowArrayRef;
4use arrow_schema::DataType;
5use vortex_dtype::DType;
6use vortex_error::{VortexExpect, VortexResult, vortex_bail};
7
8use crate::arrays::{
9 BoolArray, ExtensionArray, ListArray, NullArray, PrimitiveArray, StructArray, VarBinViewArray,
10};
11use crate::arrow::IntoArrowArray;
12use crate::builders::builder_with_capacity;
13use crate::compute::{preferred_arrow_data_type, to_arrow};
14use crate::{Array, ArrayRef, IntoArray};
15
16#[derive(Debug, Clone)]
40pub enum Canonical {
41 Null(NullArray),
42 Bool(BoolArray),
43 Primitive(PrimitiveArray),
44 Struct(StructArray),
45 List(ListArray),
47 VarBinView(VarBinViewArray),
48 Extension(ExtensionArray),
49}
50
51impl Canonical {
52 pub fn empty(dtype: &DType) -> Canonical {
54 builder_with_capacity(dtype, 0)
55 .finish()
56 .to_canonical()
57 .vortex_expect("cannot fail to convert an empty array to canonical")
58 }
59}
60
61impl Canonical {
63 pub fn into_null(self) -> VortexResult<NullArray> {
64 match self {
65 Canonical::Null(a) => Ok(a),
66 _ => vortex_bail!("Cannot unwrap NullArray from {:?}", &self),
67 }
68 }
69
70 pub fn into_bool(self) -> VortexResult<BoolArray> {
71 match self {
72 Canonical::Bool(a) => Ok(a),
73 _ => vortex_bail!("Cannot unwrap BoolArray from {:?}", &self),
74 }
75 }
76
77 pub fn into_primitive(self) -> VortexResult<PrimitiveArray> {
78 match self {
79 Canonical::Primitive(a) => Ok(a),
80 _ => vortex_bail!("Cannot unwrap PrimitiveArray from {:?}", &self),
81 }
82 }
83
84 pub fn into_struct(self) -> VortexResult<StructArray> {
85 match self {
86 Canonical::Struct(a) => Ok(a),
87 _ => vortex_bail!("Cannot unwrap StructArray from {:?}", &self),
88 }
89 }
90
91 pub fn into_list(self) -> VortexResult<ListArray> {
92 match self {
93 Canonical::List(a) => Ok(a),
94 _ => vortex_bail!("Cannot unwrap StructArray from {:?}", &self),
95 }
96 }
97
98 pub fn into_varbinview(self) -> VortexResult<VarBinViewArray> {
99 match self {
100 Canonical::VarBinView(a) => Ok(a),
101 _ => vortex_bail!("Cannot unwrap VarBinViewArray from {:?}", &self),
102 }
103 }
104
105 pub fn into_extension(self) -> VortexResult<ExtensionArray> {
106 match self {
107 Canonical::Extension(a) => Ok(a),
108 _ => vortex_bail!("Cannot unwrap ExtensionArray from {:?}", &self),
109 }
110 }
111}
112
113impl AsRef<dyn Array> for Canonical {
114 fn as_ref(&self) -> &(dyn Array + 'static) {
115 match &self {
116 Canonical::Null(a) => a,
117 Canonical::Bool(a) => a,
118 Canonical::Primitive(a) => a,
119 Canonical::Struct(a) => a,
120 Canonical::List(a) => a,
121 Canonical::VarBinView(a) => a,
122 Canonical::Extension(a) => a,
123 }
124 }
125}
126
127impl IntoArray for Canonical {
128 fn into_array(self) -> ArrayRef {
129 match self {
130 Canonical::Null(a) => a.into_array(),
131 Canonical::Bool(a) => a.into_array(),
132 Canonical::Primitive(a) => a.into_array(),
133 Canonical::Struct(a) => a.into_array(),
134 Canonical::List(a) => a.into_array(),
135 Canonical::VarBinView(a) => a.into_array(),
136 Canonical::Extension(a) => a.into_array(),
137 }
138 }
139}
140
141pub trait ToCanonical: Array {
147 fn to_null(&self) -> VortexResult<NullArray> {
148 self.to_canonical()?.into_null()
149 }
150
151 fn to_bool(&self) -> VortexResult<BoolArray> {
152 self.to_canonical()?.into_bool()
153 }
154
155 fn to_primitive(&self) -> VortexResult<PrimitiveArray> {
156 self.to_canonical()?.into_primitive()
157 }
158
159 fn to_struct(&self) -> VortexResult<StructArray> {
160 self.to_canonical()?.into_struct()
161 }
162
163 fn to_list(&self) -> VortexResult<ListArray> {
164 self.to_canonical()?.into_list()
165 }
166
167 fn to_varbinview(&self) -> VortexResult<VarBinViewArray> {
168 self.to_canonical()?.into_varbinview()
169 }
170
171 fn to_extension(&self) -> VortexResult<ExtensionArray> {
172 self.to_canonical()?.into_extension()
173 }
174}
175
176impl<A: Array + ?Sized> ToCanonical for A {}
177
178impl IntoArrowArray for ArrayRef {
179 fn into_arrow_preferred(self) -> VortexResult<ArrowArrayRef> {
182 let data_type = preferred_arrow_data_type(&self)?;
183 self.into_arrow(&data_type)
184 }
185
186 fn into_arrow(self, data_type: &DataType) -> VortexResult<ArrowArrayRef> {
187 to_arrow(&self, data_type)
188 }
189}
190
191impl From<Canonical> for ArrayRef {
197 fn from(value: Canonical) -> Self {
198 match value {
199 Canonical::Null(a) => a.into_array(),
200 Canonical::Bool(a) => a.into_array(),
201 Canonical::Primitive(a) => a.into_array(),
202 Canonical::Struct(a) => a.into_array(),
203 Canonical::List(a) => a.into_array(),
204 Canonical::VarBinView(a) => a.into_array(),
205 Canonical::Extension(a) => a.into_array(),
206 }
207 }
208}
209
210#[cfg(test)]
211mod test {
212 use std::sync::Arc;
213
214 use arrow_array::cast::AsArray;
215 use arrow_array::types::{Int32Type, Int64Type, UInt64Type};
216 use arrow_array::{
217 Array as ArrowArray, ArrayRef as ArrowArrayRef, ListArray as ArrowListArray,
218 PrimitiveArray as ArrowPrimitiveArray, StringArray, StringViewArray,
219 StructArray as ArrowStructArray,
220 };
221 use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
222 use arrow_schema::{DataType, Field};
223 use vortex_buffer::buffer;
224
225 use crate::array::Array;
226 use crate::arrays::{ConstantArray, StructArray};
227 use crate::arrow::{FromArrowArray, IntoArrowArray};
228 use crate::{ArrayRef, IntoArray};
229
230 #[test]
231 fn test_canonicalize_nested_struct() {
232 let nested_struct_array = StructArray::from_fields(&[
234 ("a", buffer![1u64].into_array()),
235 (
236 "b",
237 StructArray::from_fields(&[(
238 "inner_a",
239 ConstantArray::new(100i64, 1).into_array(),
244 )])
245 .unwrap()
246 .into_array(),
247 ),
248 ])
249 .unwrap();
250
251 let arrow_struct = nested_struct_array
252 .into_array()
253 .into_arrow_preferred()
254 .unwrap()
255 .as_any()
256 .downcast_ref::<ArrowStructArray>()
257 .cloned()
258 .unwrap();
259
260 assert!(
261 arrow_struct
262 .column(0)
263 .as_any()
264 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
265 .is_some()
266 );
267
268 let inner_struct = arrow_struct
269 .column(1)
270 .clone()
271 .as_any()
272 .downcast_ref::<ArrowStructArray>()
273 .cloned()
274 .unwrap();
275
276 let inner_a = inner_struct
277 .column(0)
278 .as_any()
279 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
280 assert!(inner_a.is_some());
281
282 assert_eq!(
283 inner_a.cloned().unwrap(),
284 ArrowPrimitiveArray::from_iter([100i64]),
285 );
286 }
287
288 #[test]
289 fn roundtrip_struct() {
290 let mut nulls = NullBufferBuilder::new(6);
291 nulls.append_n_non_nulls(4);
292 nulls.append_null();
293 nulls.append_non_null();
294 let names = Arc::new(StringViewArray::from_iter(vec![
295 Some("Joseph"),
296 None,
297 Some("Angela"),
298 Some("Mikhail"),
299 None,
300 None,
301 ]));
302 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
303 Some(25),
304 Some(31),
305 None,
306 Some(57),
307 None,
308 None,
309 ]));
310
311 let arrow_struct = ArrowStructArray::new(
312 vec![
313 Arc::new(Field::new("name", DataType::Utf8View, true)),
314 Arc::new(Field::new("age", DataType::Int32, true)),
315 ]
316 .into(),
317 vec![names, ages],
318 nulls.finish(),
319 );
320
321 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true);
322
323 assert_eq!(
324 &arrow_struct,
325 vortex_struct.into_arrow_preferred().unwrap().as_struct()
326 );
327 }
328
329 #[test]
330 fn roundtrip_list() {
331 let names = Arc::new(StringArray::from_iter(vec![
332 Some("Joseph"),
333 Some("Angela"),
334 Some("Mikhail"),
335 ]));
336
337 let arrow_list = ArrowListArray::new(
338 Arc::new(Field::new_list_field(DataType::Utf8, true)),
339 OffsetBuffer::from_lengths(vec![0, 2, 1]),
340 names,
341 None,
342 );
343 let list_data_type = arrow_list.data_type();
344
345 let vortex_list = ArrayRef::from_arrow(&arrow_list, true);
346
347 let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
348
349 assert_eq!(
350 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
351 rt_arrow_list.as_ref()
352 );
353 }
354}