1use vortex_dtype::DType;
7use vortex_error::{VortexExpect, VortexResult, vortex_bail};
8
9use crate::arrays::{
10 BoolArray, DecimalArray, ExtensionArray, ListArray, NullArray, PrimitiveArray, StructArray,
11 VarBinViewArray, compact_buffers,
12};
13use crate::builders::builder_with_capacity;
14use crate::{Array, ArrayRef, IntoArray};
15
16#[derive(Debug, Clone)]
67pub enum Canonical {
68 Null(NullArray),
69 Bool(BoolArray),
70 Primitive(PrimitiveArray),
71 Decimal(DecimalArray),
72 Struct(StructArray),
73 List(ListArray),
75 VarBinView(VarBinViewArray),
76 Extension(ExtensionArray),
77}
78
79impl Canonical {
80 pub fn empty(dtype: &DType) -> Canonical {
82 builder_with_capacity(dtype, 0)
83 .finish()
84 .to_canonical()
85 .vortex_expect("cannot fail to convert an empty array to canonical")
86 }
87}
88
89impl Canonical {
90 pub fn compact(&self) -> VortexResult<Canonical> {
98 match self {
99 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(compact_buffers(array)?)),
100 other => Ok(other.clone()),
101 }
102 }
103}
104
105impl Canonical {
107 pub fn into_null(self) -> VortexResult<NullArray> {
108 match self {
109 Canonical::Null(a) => Ok(a),
110 _ => vortex_bail!("Cannot unwrap NullArray from {:?}", &self),
111 }
112 }
113
114 pub fn into_bool(self) -> VortexResult<BoolArray> {
115 match self {
116 Canonical::Bool(a) => Ok(a),
117 _ => vortex_bail!("Cannot unwrap BoolArray from {:?}", &self),
118 }
119 }
120
121 pub fn into_primitive(self) -> VortexResult<PrimitiveArray> {
122 match self {
123 Canonical::Primitive(a) => Ok(a),
124 _ => vortex_bail!("Cannot unwrap PrimitiveArray from {:?}", &self),
125 }
126 }
127
128 pub fn into_decimal(self) -> VortexResult<DecimalArray> {
129 match self {
130 Canonical::Decimal(a) => Ok(a),
131 _ => vortex_bail!("Cannot unwrap DecimalArray from {:?}", &self),
132 }
133 }
134
135 pub fn into_struct(self) -> VortexResult<StructArray> {
136 match self {
137 Canonical::Struct(a) => Ok(a),
138 _ => vortex_bail!("Cannot unwrap StructArray from {:?}", &self),
139 }
140 }
141
142 pub fn into_list(self) -> VortexResult<ListArray> {
143 match self {
144 Canonical::List(a) => Ok(a),
145 _ => vortex_bail!("Cannot unwrap ListArray from {:?}", &self),
146 }
147 }
148
149 pub fn into_varbinview(self) -> VortexResult<VarBinViewArray> {
150 match self {
151 Canonical::VarBinView(a) => Ok(a),
152 _ => vortex_bail!("Cannot unwrap VarBinViewArray from {:?}", &self),
153 }
154 }
155
156 pub fn into_extension(self) -> VortexResult<ExtensionArray> {
157 match self {
158 Canonical::Extension(a) => Ok(a),
159 _ => vortex_bail!("Cannot unwrap ExtensionArray from {:?}", &self),
160 }
161 }
162}
163
164impl AsRef<dyn Array> for Canonical {
165 fn as_ref(&self) -> &(dyn Array + 'static) {
166 match &self {
167 Canonical::Null(a) => a.as_ref(),
168 Canonical::Bool(a) => a.as_ref(),
169 Canonical::Primitive(a) => a.as_ref(),
170 Canonical::Decimal(a) => a.as_ref(),
171 Canonical::Struct(a) => a.as_ref(),
172 Canonical::List(a) => a.as_ref(),
173 Canonical::VarBinView(a) => a.as_ref(),
174 Canonical::Extension(a) => a.as_ref(),
175 }
176 }
177}
178
179impl IntoArray for Canonical {
180 fn into_array(self) -> ArrayRef {
181 match self {
182 Canonical::Null(a) => a.into_array(),
183 Canonical::Bool(a) => a.into_array(),
184 Canonical::Primitive(a) => a.into_array(),
185 Canonical::Decimal(a) => a.into_array(),
186 Canonical::Struct(a) => a.into_array(),
187 Canonical::List(a) => a.into_array(),
188 Canonical::VarBinView(a) => a.into_array(),
189 Canonical::Extension(a) => a.into_array(),
190 }
191 }
192}
193
194pub trait ToCanonical {
200 fn to_null(&self) -> VortexResult<NullArray>;
202
203 fn to_bool(&self) -> VortexResult<BoolArray>;
205
206 fn to_primitive(&self) -> VortexResult<PrimitiveArray>;
209
210 fn to_decimal(&self) -> VortexResult<DecimalArray>;
213
214 fn to_struct(&self) -> VortexResult<StructArray>;
216
217 fn to_list(&self) -> VortexResult<ListArray>;
219
220 fn to_varbinview(&self) -> VortexResult<VarBinViewArray>;
223
224 fn to_extension(&self) -> VortexResult<ExtensionArray>;
227}
228
229impl<A: Array + ?Sized> ToCanonical for A {
231 fn to_null(&self) -> VortexResult<NullArray> {
232 self.to_canonical()?.into_null()
233 }
234
235 fn to_bool(&self) -> VortexResult<BoolArray> {
236 self.to_canonical()?.into_bool()
237 }
238
239 fn to_primitive(&self) -> VortexResult<PrimitiveArray> {
240 self.to_canonical()?.into_primitive()
241 }
242
243 fn to_decimal(&self) -> VortexResult<DecimalArray> {
244 self.to_canonical()?.into_decimal()
245 }
246
247 fn to_struct(&self) -> VortexResult<StructArray> {
248 self.to_canonical()?.into_struct()
249 }
250
251 fn to_list(&self) -> VortexResult<ListArray> {
252 self.to_canonical()?.into_list()
253 }
254
255 fn to_varbinview(&self) -> VortexResult<VarBinViewArray> {
256 self.to_canonical()?.into_varbinview()
257 }
258
259 fn to_extension(&self) -> VortexResult<ExtensionArray> {
260 self.to_canonical()?.into_extension()
261 }
262}
263
264impl From<Canonical> for ArrayRef {
265 fn from(value: Canonical) -> Self {
266 match value {
267 Canonical::Null(a) => a.into_array(),
268 Canonical::Bool(a) => a.into_array(),
269 Canonical::Primitive(a) => a.into_array(),
270 Canonical::Decimal(a) => a.into_array(),
271 Canonical::Struct(a) => a.into_array(),
272 Canonical::List(a) => a.into_array(),
273 Canonical::VarBinView(a) => a.into_array(),
274 Canonical::Extension(a) => a.into_array(),
275 }
276 }
277}
278
279#[cfg(test)]
280mod test {
281 use std::sync::Arc;
282
283 use arrow_array::cast::AsArray;
284 use arrow_array::types::{Int32Type, Int64Type, UInt64Type};
285 use arrow_array::{
286 Array as ArrowArray, ArrayRef as ArrowArrayRef, ListArray as ArrowListArray,
287 PrimitiveArray as ArrowPrimitiveArray, StringArray, StringViewArray,
288 StructArray as ArrowStructArray,
289 };
290 use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
291 use arrow_schema::{DataType, Field};
292 use vortex_buffer::buffer;
293
294 use crate::arrays::{ConstantArray, StructArray};
295 use crate::arrow::{FromArrowArray, IntoArrowArray};
296 use crate::{ArrayRef, IntoArray};
297
298 #[test]
299 fn test_canonicalize_nested_struct() {
300 let nested_struct_array = StructArray::from_fields(&[
302 ("a", buffer![1u64].into_array()),
303 (
304 "b",
305 StructArray::from_fields(&[(
306 "inner_a",
307 ConstantArray::new(100i64, 1).into_array(),
312 )])
313 .unwrap()
314 .into_array(),
315 ),
316 ])
317 .unwrap();
318
319 let arrow_struct = nested_struct_array
320 .into_array()
321 .into_arrow_preferred()
322 .unwrap()
323 .as_any()
324 .downcast_ref::<ArrowStructArray>()
325 .cloned()
326 .unwrap();
327
328 assert!(
329 arrow_struct
330 .column(0)
331 .as_any()
332 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
333 .is_some()
334 );
335
336 let inner_struct = arrow_struct
337 .column(1)
338 .clone()
339 .as_any()
340 .downcast_ref::<ArrowStructArray>()
341 .cloned()
342 .unwrap();
343
344 let inner_a = inner_struct
345 .column(0)
346 .as_any()
347 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
348 assert!(inner_a.is_some());
349
350 assert_eq!(
351 inner_a.cloned().unwrap(),
352 ArrowPrimitiveArray::from_iter([100i64]),
353 );
354 }
355
356 #[test]
357 fn roundtrip_struct() {
358 let mut nulls = NullBufferBuilder::new(6);
359 nulls.append_n_non_nulls(4);
360 nulls.append_null();
361 nulls.append_non_null();
362 let names = Arc::new(StringViewArray::from_iter(vec![
363 Some("Joseph"),
364 None,
365 Some("Angela"),
366 Some("Mikhail"),
367 None,
368 None,
369 ]));
370 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
371 Some(25),
372 Some(31),
373 None,
374 Some(57),
375 None,
376 None,
377 ]));
378
379 let arrow_struct = ArrowStructArray::new(
380 vec![
381 Arc::new(Field::new("name", DataType::Utf8View, true)),
382 Arc::new(Field::new("age", DataType::Int32, true)),
383 ]
384 .into(),
385 vec![names, ages],
386 nulls.finish(),
387 );
388
389 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true);
390
391 assert_eq!(
392 &arrow_struct,
393 vortex_struct.into_arrow_preferred().unwrap().as_struct()
394 );
395 }
396
397 #[test]
398 fn roundtrip_list() {
399 let names = Arc::new(StringArray::from_iter(vec![
400 Some("Joseph"),
401 Some("Angela"),
402 Some("Mikhail"),
403 ]));
404
405 let arrow_list = ArrowListArray::new(
406 Arc::new(Field::new_list_field(DataType::Utf8, true)),
407 OffsetBuffer::from_lengths(vec![0, 2, 1]),
408 names,
409 None,
410 );
411 let list_data_type = arrow_list.data_type();
412
413 let vortex_list = ArrayRef::from_arrow(&arrow_list, true);
414
415 let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
416
417 assert_eq!(
418 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
419 rt_arrow_list.as_ref()
420 );
421 }
422}