1use vortex_dtype::DType;
7use vortex_error::{VortexExpect, VortexResult, vortex_bail};
8
9use crate::arrays::{
10 BoolArray, DecimalArray, ExtensionArray, ListArray, NullArray, PrimitiveArray, StructArray,
11 VarBinViewArray,
12};
13use crate::builders::builder_with_capacity;
14use crate::{Array, ArrayRef, IntoArray};
15
16#[derive(Debug, Clone)]
67pub enum Canonical {
68 Null(NullArray),
69 Bool(BoolArray),
70 Primitive(PrimitiveArray),
71 Decimal(DecimalArray),
72 Struct(StructArray),
73 List(ListArray),
75 VarBinView(VarBinViewArray),
76 Extension(ExtensionArray),
77}
78
79impl Canonical {
80 pub fn empty(dtype: &DType) -> Canonical {
82 builder_with_capacity(dtype, 0)
83 .finish()
84 .to_canonical()
85 .vortex_expect("cannot fail to convert an empty array to canonical")
86 }
87}
88
89impl Canonical {
90 pub fn compact(&self) -> VortexResult<Canonical> {
98 match self {
99 Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
100 Canonical::List(array) => Ok(Canonical::List(array.reset_offsets()?)),
101 other => Ok(other.clone()),
102 }
103 }
104}
105
106impl Canonical {
108 pub fn into_null(self) -> VortexResult<NullArray> {
109 match self {
110 Canonical::Null(a) => Ok(a),
111 _ => vortex_bail!("Cannot unwrap NullArray from {:?}", &self),
112 }
113 }
114
115 pub fn into_bool(self) -> VortexResult<BoolArray> {
116 match self {
117 Canonical::Bool(a) => Ok(a),
118 _ => vortex_bail!("Cannot unwrap BoolArray from {:?}", &self),
119 }
120 }
121
122 pub fn into_primitive(self) -> VortexResult<PrimitiveArray> {
123 match self {
124 Canonical::Primitive(a) => Ok(a),
125 _ => vortex_bail!("Cannot unwrap PrimitiveArray from {:?}", &self),
126 }
127 }
128
129 pub fn into_decimal(self) -> VortexResult<DecimalArray> {
130 match self {
131 Canonical::Decimal(a) => Ok(a),
132 _ => vortex_bail!("Cannot unwrap DecimalArray from {:?}", &self),
133 }
134 }
135
136 pub fn into_struct(self) -> VortexResult<StructArray> {
137 match self {
138 Canonical::Struct(a) => Ok(a),
139 _ => vortex_bail!("Cannot unwrap StructArray from {:?}", &self),
140 }
141 }
142
143 pub fn into_list(self) -> VortexResult<ListArray> {
144 match self {
145 Canonical::List(a) => Ok(a),
146 _ => vortex_bail!("Cannot unwrap ListArray from {:?}", &self),
147 }
148 }
149
150 pub fn into_varbinview(self) -> VortexResult<VarBinViewArray> {
151 match self {
152 Canonical::VarBinView(a) => Ok(a),
153 _ => vortex_bail!("Cannot unwrap VarBinViewArray from {:?}", &self),
154 }
155 }
156
157 pub fn into_extension(self) -> VortexResult<ExtensionArray> {
158 match self {
159 Canonical::Extension(a) => Ok(a),
160 _ => vortex_bail!("Cannot unwrap ExtensionArray from {:?}", &self),
161 }
162 }
163}
164
165impl AsRef<dyn Array> for Canonical {
166 fn as_ref(&self) -> &(dyn Array + 'static) {
167 match &self {
168 Canonical::Null(a) => a.as_ref(),
169 Canonical::Bool(a) => a.as_ref(),
170 Canonical::Primitive(a) => a.as_ref(),
171 Canonical::Decimal(a) => a.as_ref(),
172 Canonical::Struct(a) => a.as_ref(),
173 Canonical::List(a) => a.as_ref(),
174 Canonical::VarBinView(a) => a.as_ref(),
175 Canonical::Extension(a) => a.as_ref(),
176 }
177 }
178}
179
180impl IntoArray for Canonical {
181 fn into_array(self) -> ArrayRef {
182 match self {
183 Canonical::Null(a) => a.into_array(),
184 Canonical::Bool(a) => a.into_array(),
185 Canonical::Primitive(a) => a.into_array(),
186 Canonical::Decimal(a) => a.into_array(),
187 Canonical::Struct(a) => a.into_array(),
188 Canonical::List(a) => a.into_array(),
189 Canonical::VarBinView(a) => a.into_array(),
190 Canonical::Extension(a) => a.into_array(),
191 }
192 }
193}
194
195pub trait ToCanonical {
201 fn to_null(&self) -> VortexResult<NullArray>;
203
204 fn to_bool(&self) -> VortexResult<BoolArray>;
206
207 fn to_primitive(&self) -> VortexResult<PrimitiveArray>;
210
211 fn to_decimal(&self) -> VortexResult<DecimalArray>;
214
215 fn to_struct(&self) -> VortexResult<StructArray>;
217
218 fn to_list(&self) -> VortexResult<ListArray>;
220
221 fn to_varbinview(&self) -> VortexResult<VarBinViewArray>;
224
225 fn to_extension(&self) -> VortexResult<ExtensionArray>;
228}
229
230impl<A: Array + ?Sized> ToCanonical for A {
232 fn to_null(&self) -> VortexResult<NullArray> {
233 self.to_canonical()?.into_null()
234 }
235
236 fn to_bool(&self) -> VortexResult<BoolArray> {
237 self.to_canonical()?.into_bool()
238 }
239
240 fn to_primitive(&self) -> VortexResult<PrimitiveArray> {
241 self.to_canonical()?.into_primitive()
242 }
243
244 fn to_decimal(&self) -> VortexResult<DecimalArray> {
245 self.to_canonical()?.into_decimal()
246 }
247
248 fn to_struct(&self) -> VortexResult<StructArray> {
249 self.to_canonical()?.into_struct()
250 }
251
252 fn to_list(&self) -> VortexResult<ListArray> {
253 self.to_canonical()?.into_list()
254 }
255
256 fn to_varbinview(&self) -> VortexResult<VarBinViewArray> {
257 self.to_canonical()?.into_varbinview()
258 }
259
260 fn to_extension(&self) -> VortexResult<ExtensionArray> {
261 self.to_canonical()?.into_extension()
262 }
263}
264
265impl From<Canonical> for ArrayRef {
266 fn from(value: Canonical) -> Self {
267 match value {
268 Canonical::Null(a) => a.into_array(),
269 Canonical::Bool(a) => a.into_array(),
270 Canonical::Primitive(a) => a.into_array(),
271 Canonical::Decimal(a) => a.into_array(),
272 Canonical::Struct(a) => a.into_array(),
273 Canonical::List(a) => a.into_array(),
274 Canonical::VarBinView(a) => a.into_array(),
275 Canonical::Extension(a) => a.into_array(),
276 }
277 }
278}
279
280#[cfg(test)]
281mod test {
282 use std::sync::Arc;
283
284 use arrow_array::cast::AsArray;
285 use arrow_array::types::{Int32Type, Int64Type, UInt64Type};
286 use arrow_array::{
287 Array as ArrowArray, ArrayRef as ArrowArrayRef, ListArray as ArrowListArray,
288 PrimitiveArray as ArrowPrimitiveArray, StringArray, StringViewArray,
289 StructArray as ArrowStructArray,
290 };
291 use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
292 use arrow_schema::{DataType, Field};
293 use vortex_buffer::buffer;
294
295 use crate::arrays::{ConstantArray, StructArray};
296 use crate::arrow::{FromArrowArray, IntoArrowArray};
297 use crate::{ArrayRef, IntoArray};
298
299 #[test]
300 fn test_canonicalize_nested_struct() {
301 let nested_struct_array = StructArray::from_fields(&[
303 ("a", buffer![1u64].into_array()),
304 (
305 "b",
306 StructArray::from_fields(&[(
307 "inner_a",
308 ConstantArray::new(100i64, 1).into_array(),
313 )])
314 .unwrap()
315 .into_array(),
316 ),
317 ])
318 .unwrap();
319
320 let arrow_struct = nested_struct_array
321 .into_array()
322 .into_arrow_preferred()
323 .unwrap()
324 .as_any()
325 .downcast_ref::<ArrowStructArray>()
326 .cloned()
327 .unwrap();
328
329 assert!(
330 arrow_struct
331 .column(0)
332 .as_any()
333 .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
334 .is_some()
335 );
336
337 let inner_struct = arrow_struct
338 .column(1)
339 .clone()
340 .as_any()
341 .downcast_ref::<ArrowStructArray>()
342 .cloned()
343 .unwrap();
344
345 let inner_a = inner_struct
346 .column(0)
347 .as_any()
348 .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
349 assert!(inner_a.is_some());
350
351 assert_eq!(
352 inner_a.cloned().unwrap(),
353 ArrowPrimitiveArray::from_iter([100i64]),
354 );
355 }
356
357 #[test]
358 fn roundtrip_struct() {
359 let mut nulls = NullBufferBuilder::new(6);
360 nulls.append_n_non_nulls(4);
361 nulls.append_null();
362 nulls.append_non_null();
363 let names = Arc::new(StringViewArray::from_iter(vec![
364 Some("Joseph"),
365 None,
366 Some("Angela"),
367 Some("Mikhail"),
368 None,
369 None,
370 ]));
371 let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
372 Some(25),
373 Some(31),
374 None,
375 Some(57),
376 None,
377 None,
378 ]));
379
380 let arrow_struct = ArrowStructArray::new(
381 vec![
382 Arc::new(Field::new("name", DataType::Utf8View, true)),
383 Arc::new(Field::new("age", DataType::Int32, true)),
384 ]
385 .into(),
386 vec![names, ages],
387 nulls.finish(),
388 );
389
390 let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true);
391
392 assert_eq!(
393 &arrow_struct,
394 vortex_struct.into_arrow_preferred().unwrap().as_struct()
395 );
396 }
397
398 #[test]
399 fn roundtrip_list() {
400 let names = Arc::new(StringArray::from_iter(vec![
401 Some("Joseph"),
402 Some("Angela"),
403 Some("Mikhail"),
404 ]));
405
406 let arrow_list = ArrowListArray::new(
407 Arc::new(Field::new_list_field(DataType::Utf8, true)),
408 OffsetBuffer::from_lengths(vec![0, 2, 1]),
409 names,
410 None,
411 );
412 let list_data_type = arrow_list.data_type();
413
414 let vortex_list = ArrayRef::from_arrow(&arrow_list, true);
415
416 let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
417
418 assert_eq!(
419 (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
420 rt_arrow_list.as_ref()
421 );
422 }
423}