Skip to main content

vortex_array/arrow/
convert.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use arrow_array::AnyDictionaryArray;
7use arrow_array::Array as ArrowArray;
8use arrow_array::ArrowPrimitiveType;
9use arrow_array::BooleanArray as ArrowBooleanArray;
10use arrow_array::DictionaryArray;
11use arrow_array::FixedSizeListArray as ArrowFixedSizeListArray;
12use arrow_array::GenericByteArray;
13use arrow_array::GenericByteViewArray;
14use arrow_array::GenericListArray;
15use arrow_array::GenericListViewArray;
16use arrow_array::NullArray as ArrowNullArray;
17use arrow_array::OffsetSizeTrait;
18use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
19use arrow_array::RecordBatch;
20use arrow_array::StructArray as ArrowStructArray;
21use arrow_array::cast::AsArray;
22use arrow_array::cast::as_null_array;
23use arrow_array::make_array;
24use arrow_array::types::ArrowDictionaryKeyType;
25use arrow_array::types::ByteArrayType;
26use arrow_array::types::ByteViewType;
27use arrow_array::types::Date32Type;
28use arrow_array::types::Date64Type;
29use arrow_array::types::Decimal32Type;
30use arrow_array::types::Decimal64Type;
31use arrow_array::types::Decimal128Type;
32use arrow_array::types::Decimal256Type;
33use arrow_array::types::Float16Type;
34use arrow_array::types::Float32Type;
35use arrow_array::types::Float64Type;
36use arrow_array::types::Int8Type;
37use arrow_array::types::Int16Type;
38use arrow_array::types::Int32Type;
39use arrow_array::types::Int64Type;
40use arrow_array::types::Time32MillisecondType;
41use arrow_array::types::Time32SecondType;
42use arrow_array::types::Time64MicrosecondType;
43use arrow_array::types::Time64NanosecondType;
44use arrow_array::types::TimestampMicrosecondType;
45use arrow_array::types::TimestampMillisecondType;
46use arrow_array::types::TimestampNanosecondType;
47use arrow_array::types::TimestampSecondType;
48use arrow_array::types::UInt8Type;
49use arrow_array::types::UInt16Type;
50use arrow_array::types::UInt32Type;
51use arrow_array::types::UInt64Type;
52use arrow_buffer::ArrowNativeType;
53use arrow_buffer::BooleanBuffer;
54use arrow_buffer::Buffer as ArrowBuffer;
55use arrow_buffer::ScalarBuffer;
56use arrow_buffer::buffer::NullBuffer;
57use arrow_buffer::buffer::OffsetBuffer;
58use arrow_schema::DataType;
59use arrow_schema::TimeUnit as ArrowTimeUnit;
60use itertools::Itertools;
61use vortex_buffer::Alignment;
62use vortex_buffer::BitBuffer;
63use vortex_buffer::Buffer;
64use vortex_buffer::ByteBuffer;
65use vortex_error::VortexExpect as _;
66use vortex_error::VortexResult;
67use vortex_error::vortex_bail;
68use vortex_error::vortex_panic;
69
70use crate::ArrayRef;
71use crate::IntoArray;
72use crate::arrays::BoolArray;
73use crate::arrays::DecimalArray;
74use crate::arrays::DictArray;
75use crate::arrays::FixedSizeListArray;
76use crate::arrays::ListArray;
77use crate::arrays::ListViewArray;
78use crate::arrays::NullArray;
79use crate::arrays::PrimitiveArray;
80use crate::arrays::StructArray;
81use crate::arrays::TemporalArray;
82use crate::arrays::VarBinArray;
83use crate::arrays::VarBinViewArray;
84use crate::arrow::FromArrowArray;
85use crate::dtype::DType;
86use crate::dtype::DecimalDType;
87use crate::dtype::IntegerPType;
88use crate::dtype::NativePType;
89use crate::dtype::PType;
90use crate::dtype::i256;
91use crate::extension::datetime::TimeUnit;
92use crate::validity::Validity;
93
94impl IntoArray for ArrowBuffer {
95    fn into_array(self) -> ArrayRef {
96        PrimitiveArray::from_byte_buffer(
97            ByteBuffer::from_arrow_buffer(self, Alignment::of::<u8>()),
98            PType::U8,
99            Validity::NonNullable,
100        )
101        .into_array()
102    }
103}
104
105impl IntoArray for BooleanBuffer {
106    fn into_array(self) -> ArrayRef {
107        BoolArray::new(self.into(), Validity::NonNullable).into_array()
108    }
109}
110
111impl<T> IntoArray for ScalarBuffer<T>
112where
113    T: ArrowNativeType + NativePType,
114{
115    fn into_array(self) -> ArrayRef {
116        PrimitiveArray::new(
117            Buffer::<T>::from_arrow_scalar_buffer(self),
118            Validity::NonNullable,
119        )
120        .into_array()
121    }
122}
123
124impl<O> IntoArray for OffsetBuffer<O>
125where
126    O: IntegerPType + OffsetSizeTrait,
127{
128    fn into_array(self) -> ArrayRef {
129        let primitive = PrimitiveArray::new(
130            Buffer::from_arrow_scalar_buffer(self.into_inner()),
131            Validity::NonNullable,
132        );
133
134        primitive.into_array()
135    }
136}
137
138macro_rules! impl_from_arrow_primitive {
139    ($T:path) => {
140        impl FromArrowArray<&ArrowPrimitiveArray<$T>> for ArrayRef {
141            fn from_arrow(value: &ArrowPrimitiveArray<$T>, nullable: bool) -> VortexResult<Self> {
142                let buffer = Buffer::from_arrow_scalar_buffer(value.values().clone());
143                let validity = nulls(value.nulls(), nullable);
144                Ok(PrimitiveArray::new(buffer, validity).into_array())
145            }
146        }
147    };
148}
149
150impl_from_arrow_primitive!(Int8Type);
151impl_from_arrow_primitive!(Int16Type);
152impl_from_arrow_primitive!(Int32Type);
153impl_from_arrow_primitive!(Int64Type);
154impl_from_arrow_primitive!(UInt8Type);
155impl_from_arrow_primitive!(UInt16Type);
156impl_from_arrow_primitive!(UInt32Type);
157impl_from_arrow_primitive!(UInt64Type);
158impl_from_arrow_primitive!(Float16Type);
159impl_from_arrow_primitive!(Float32Type);
160impl_from_arrow_primitive!(Float64Type);
161
162impl FromArrowArray<&ArrowPrimitiveArray<Decimal32Type>> for ArrayRef {
163    fn from_arrow(
164        array: &ArrowPrimitiveArray<Decimal32Type>,
165        nullable: bool,
166    ) -> VortexResult<Self> {
167        let decimal_type = DecimalDType::new(array.precision(), array.scale());
168        let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone());
169        let validity = nulls(array.nulls(), nullable);
170        Ok(DecimalArray::new(buffer, decimal_type, validity).into_array())
171    }
172}
173
174impl FromArrowArray<&ArrowPrimitiveArray<Decimal64Type>> for ArrayRef {
175    fn from_arrow(
176        array: &ArrowPrimitiveArray<Decimal64Type>,
177        nullable: bool,
178    ) -> VortexResult<Self> {
179        let decimal_type = DecimalDType::new(array.precision(), array.scale());
180        let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone());
181        let validity = nulls(array.nulls(), nullable);
182        Ok(DecimalArray::new(buffer, decimal_type, validity).into_array())
183    }
184}
185
186impl FromArrowArray<&ArrowPrimitiveArray<Decimal128Type>> for ArrayRef {
187    fn from_arrow(
188        array: &ArrowPrimitiveArray<Decimal128Type>,
189        nullable: bool,
190    ) -> VortexResult<Self> {
191        let decimal_type = DecimalDType::new(array.precision(), array.scale());
192        let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone());
193        let validity = nulls(array.nulls(), nullable);
194        Ok(DecimalArray::new(buffer, decimal_type, validity).into_array())
195    }
196}
197
198impl FromArrowArray<&ArrowPrimitiveArray<Decimal256Type>> for ArrayRef {
199    fn from_arrow(
200        array: &ArrowPrimitiveArray<Decimal256Type>,
201        nullable: bool,
202    ) -> VortexResult<Self> {
203        let decimal_type = DecimalDType::new(array.precision(), array.scale());
204        let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone());
205        // SAFETY: Our i256 implementation has the same bit-pattern representation of the
206        //  arrow_buffer::i256 type. It is safe to treat values held inside the buffer as values
207        //  of either type.
208        let buffer =
209            unsafe { std::mem::transmute::<Buffer<arrow_buffer::i256>, Buffer<i256>>(buffer) };
210        let validity = nulls(array.nulls(), nullable);
211        Ok(DecimalArray::new(buffer, decimal_type, validity).into_array())
212    }
213}
214
215macro_rules! impl_from_arrow_temporal {
216    ($T:path) => {
217        impl FromArrowArray<&ArrowPrimitiveArray<$T>> for ArrayRef {
218            fn from_arrow(
219                value: &ArrowPrimitiveArray<$T>,
220                nullable: bool,
221            ) -> vortex_error::VortexResult<Self> {
222                Ok(temporal_array(value, nullable))
223            }
224        }
225    };
226}
227
228// timestamp
229impl_from_arrow_temporal!(TimestampSecondType);
230impl_from_arrow_temporal!(TimestampMillisecondType);
231impl_from_arrow_temporal!(TimestampMicrosecondType);
232impl_from_arrow_temporal!(TimestampNanosecondType);
233
234// time
235impl_from_arrow_temporal!(Time32SecondType);
236impl_from_arrow_temporal!(Time32MillisecondType);
237impl_from_arrow_temporal!(Time64MicrosecondType);
238impl_from_arrow_temporal!(Time64NanosecondType);
239
240// date
241impl_from_arrow_temporal!(Date32Type);
242impl_from_arrow_temporal!(Date64Type);
243
244fn temporal_array<T: ArrowPrimitiveType>(value: &ArrowPrimitiveArray<T>, nullable: bool) -> ArrayRef
245where
246    T::Native: NativePType,
247{
248    let arr = PrimitiveArray::new(
249        Buffer::from_arrow_scalar_buffer(value.values().clone()),
250        nulls(value.nulls(), nullable),
251    )
252    .into_array();
253
254    match value.data_type() {
255        DataType::Timestamp(time_unit, tz) => {
256            TemporalArray::new_timestamp(arr, time_unit.into(), tz.clone()).into()
257        }
258        DataType::Time32(time_unit) => TemporalArray::new_time(arr, time_unit.into()).into(),
259        DataType::Time64(time_unit) => TemporalArray::new_time(arr, time_unit.into()).into(),
260        DataType::Date32 => TemporalArray::new_date(arr, TimeUnit::Days).into(),
261        DataType::Date64 => TemporalArray::new_date(arr, TimeUnit::Milliseconds).into(),
262        DataType::Duration(_) => unimplemented!(),
263        DataType::Interval(_) => unimplemented!(),
264        _ => vortex_panic!("Invalid temporal type: {}", value.data_type()),
265    }
266}
267
268impl<T: ByteArrayType> FromArrowArray<&GenericByteArray<T>> for ArrayRef
269where
270    <T as ByteArrayType>::Offset: IntegerPType,
271{
272    fn from_arrow(value: &GenericByteArray<T>, nullable: bool) -> VortexResult<Self> {
273        let dtype = match T::DATA_TYPE {
274            DataType::Binary | DataType::LargeBinary => DType::Binary(nullable.into()),
275            DataType::Utf8 | DataType::LargeUtf8 => DType::Utf8(nullable.into()),
276            dt => vortex_panic!("Invalid data type for ByteArray: {dt}"),
277        };
278        // SAFETY: Arrow arrays are already validated (valid UTF-8, valid offsets, correct validity).
279        Ok(unsafe {
280            VarBinArray::new_unchecked(
281                value.offsets().clone().into_array(),
282                ByteBuffer::from_arrow_buffer(value.values().clone(), Alignment::of::<u8>()),
283                dtype,
284                nulls(value.nulls(), nullable),
285            )
286        }
287        .into_array())
288    }
289}
290
291impl<T: ByteViewType> FromArrowArray<&GenericByteViewArray<T>> for ArrayRef {
292    fn from_arrow(value: &GenericByteViewArray<T>, nullable: bool) -> VortexResult<Self> {
293        let dtype = match T::DATA_TYPE {
294            DataType::BinaryView => DType::Binary(nullable.into()),
295            DataType::Utf8View => DType::Utf8(nullable.into()),
296            dt => vortex_panic!("Invalid data type for ByteViewArray: {dt}"),
297        };
298
299        let views_buffer = Buffer::from_byte_buffer(
300            Buffer::from_arrow_scalar_buffer(value.views().clone()).into_byte_buffer(),
301        );
302
303        // SAFETY: arrow-rs ByteViewArray already checks the same invariants, we inherit those
304        //  guarantees by zero-copy constructing from one.
305        Ok(unsafe {
306            VarBinViewArray::new_unchecked(
307                views_buffer,
308                Arc::from(
309                    value
310                        .data_buffers()
311                        .iter()
312                        .map(|b| ByteBuffer::from_arrow_buffer(b.clone(), Alignment::of::<u8>()))
313                        .collect::<Vec<_>>(),
314                ),
315                dtype,
316                nulls(value.nulls(), nullable),
317            )
318            .into_array()
319        })
320    }
321}
322
323impl FromArrowArray<&ArrowBooleanArray> for ArrayRef {
324    fn from_arrow(value: &ArrowBooleanArray, nullable: bool) -> VortexResult<Self> {
325        Ok(BoolArray::new(
326            value.values().clone().into(),
327            nulls(value.nulls(), nullable),
328        )
329        .into_array())
330    }
331}
332
333/// Strip out the nulls from this array and return a new array without nulls.
334pub(crate) fn remove_nulls(data: arrow_data::ArrayData) -> arrow_data::ArrayData {
335    if data.null_count() == 0 {
336        // No nulls to remove, return the array as is
337        return data;
338    }
339
340    let children = match data.data_type() {
341        DataType::Struct(fields) => Some(
342            fields
343                .iter()
344                .zip(data.child_data().iter())
345                .map(|(field, child_data)| {
346                    if field.is_nullable() {
347                        child_data.clone()
348                    } else {
349                        remove_nulls(child_data.clone())
350                    }
351                })
352                .collect_vec(),
353        ),
354        DataType::List(f)
355        | DataType::LargeList(f)
356        | DataType::ListView(f)
357        | DataType::LargeListView(f)
358        | DataType::FixedSizeList(f, _)
359            if !f.is_nullable() =>
360        {
361            // All list types only have one child
362            assert_eq!(
363                data.child_data().len(),
364                1,
365                "List types should have one child"
366            );
367            Some(vec![remove_nulls(data.child_data()[0].clone())])
368        }
369        _ => None,
370    };
371
372    let mut builder = data.into_builder().nulls(None);
373    if let Some(children) = children {
374        builder = builder.child_data(children);
375    }
376    builder
377        .build()
378        .vortex_expect("reconstructing array without nulls")
379}
380
381impl FromArrowArray<&ArrowStructArray> for ArrayRef {
382    fn from_arrow(value: &ArrowStructArray, nullable: bool) -> VortexResult<Self> {
383        Ok(StructArray::try_new(
384            value.column_names().iter().copied().collect(),
385            value
386                .columns()
387                .iter()
388                .zip(value.fields())
389                .map(|(c, field)| {
390                    // Arrow pushes down nulls, even into non-nullable fields. So we strip them
391                    // out here because Vortex is a little more strict.
392                    if c.null_count() > 0 && !field.is_nullable() {
393                        let stripped = make_array(remove_nulls(c.into_data()));
394                        Self::from_arrow(stripped.as_ref(), false)
395                    } else {
396                        Self::from_arrow(c.as_ref(), field.is_nullable())
397                    }
398                })
399                .collect::<VortexResult<Vec<_>>>()?,
400            value.len(),
401            nulls(value.nulls(), nullable),
402        )?
403        .into_array())
404    }
405}
406
407impl<O: IntegerPType + OffsetSizeTrait> FromArrowArray<&GenericListArray<O>> for ArrayRef {
408    fn from_arrow(value: &GenericListArray<O>, nullable: bool) -> VortexResult<Self> {
409        // Extract the validity of the underlying element array.
410        let elements_are_nullable = match value.data_type() {
411            DataType::List(field) => field.is_nullable(),
412            DataType::LargeList(field) => field.is_nullable(),
413            dt => vortex_panic!("Invalid data type for ListArray: {dt}"),
414        };
415
416        let elements = Self::from_arrow(value.values().as_ref(), elements_are_nullable)?;
417
418        // `offsets` are always non-nullable.
419        let offsets = value.offsets().clone().into_array();
420        let nulls = nulls(value.nulls(), nullable);
421
422        Ok(ListArray::try_new(elements, offsets, nulls)?.into_array())
423    }
424}
425
426impl<O: OffsetSizeTrait + NativePType> FromArrowArray<&GenericListViewArray<O>> for ArrayRef {
427    fn from_arrow(array: &GenericListViewArray<O>, nullable: bool) -> VortexResult<Self> {
428        // Extract the validity of the underlying element array.
429        let elements_are_nullable = match array.data_type() {
430            DataType::ListView(field) => field.is_nullable(),
431            DataType::LargeListView(field) => field.is_nullable(),
432            dt => vortex_panic!("Invalid data type for ListViewArray: {dt}"),
433        };
434
435        let elements = Self::from_arrow(array.values().as_ref(), elements_are_nullable)?;
436
437        // `offsets` and `sizes` are always non-nullable.
438        let offsets = array.offsets().clone().into_array();
439        let sizes = array.sizes().clone().into_array();
440        let nulls = nulls(array.nulls(), nullable);
441
442        Ok(ListViewArray::try_new(elements, offsets, sizes, nulls)?.into_array())
443    }
444}
445
446impl FromArrowArray<&ArrowFixedSizeListArray> for ArrayRef {
447    fn from_arrow(array: &ArrowFixedSizeListArray, nullable: bool) -> VortexResult<Self> {
448        let DataType::FixedSizeList(field, list_size) = array.data_type() else {
449            vortex_panic!("Invalid data type for ListArray: {}", array.data_type());
450        };
451
452        Ok(FixedSizeListArray::try_new(
453            Self::from_arrow(array.values().as_ref(), field.is_nullable())?,
454            *list_size as u32,
455            nulls(array.nulls(), nullable),
456            array.len(),
457        )?
458        .into_array())
459    }
460}
461
462impl FromArrowArray<&ArrowNullArray> for ArrayRef {
463    fn from_arrow(value: &ArrowNullArray, nullable: bool) -> VortexResult<Self> {
464        assert!(nullable);
465        Ok(NullArray::new(value.len()).into_array())
466    }
467}
468
469impl<K: ArrowDictionaryKeyType> FromArrowArray<&DictionaryArray<K>> for DictArray {
470    fn from_arrow(array: &DictionaryArray<K>, nullable: bool) -> VortexResult<Self> {
471        let keys = AnyDictionaryArray::keys(array);
472        let keys = ArrayRef::from_arrow(keys, keys.is_nullable())?;
473        let values = ArrayRef::from_arrow(array.values().as_ref(), nullable)?;
474        // SAFETY: we assume that Arrow has checked the invariants on construction.
475        Ok(unsafe { DictArray::new_unchecked(keys, values) })
476    }
477}
478
479pub(crate) fn nulls(nulls: Option<&NullBuffer>, nullable: bool) -> Validity {
480    if nullable {
481        nulls
482            .map(|nulls| {
483                if nulls.null_count() == nulls.len() {
484                    Validity::AllInvalid
485                } else {
486                    Validity::from(BitBuffer::from(nulls.inner().clone()))
487                }
488            })
489            .unwrap_or_else(|| Validity::AllValid)
490    } else {
491        assert!(nulls.map(|x| x.null_count() == 0).unwrap_or(true));
492        Validity::NonNullable
493    }
494}
495
496impl FromArrowArray<&dyn ArrowArray> for ArrayRef {
497    fn from_arrow(array: &dyn ArrowArray, nullable: bool) -> VortexResult<Self> {
498        match array.data_type() {
499            DataType::Boolean => Self::from_arrow(array.as_boolean(), nullable),
500            DataType::UInt8 => Self::from_arrow(array.as_primitive::<UInt8Type>(), nullable),
501            DataType::UInt16 => Self::from_arrow(array.as_primitive::<UInt16Type>(), nullable),
502            DataType::UInt32 => Self::from_arrow(array.as_primitive::<UInt32Type>(), nullable),
503            DataType::UInt64 => Self::from_arrow(array.as_primitive::<UInt64Type>(), nullable),
504            DataType::Int8 => Self::from_arrow(array.as_primitive::<Int8Type>(), nullable),
505            DataType::Int16 => Self::from_arrow(array.as_primitive::<Int16Type>(), nullable),
506            DataType::Int32 => Self::from_arrow(array.as_primitive::<Int32Type>(), nullable),
507            DataType::Int64 => Self::from_arrow(array.as_primitive::<Int64Type>(), nullable),
508            DataType::Float16 => Self::from_arrow(array.as_primitive::<Float16Type>(), nullable),
509            DataType::Float32 => Self::from_arrow(array.as_primitive::<Float32Type>(), nullable),
510            DataType::Float64 => Self::from_arrow(array.as_primitive::<Float64Type>(), nullable),
511            DataType::Utf8 => Self::from_arrow(array.as_string::<i32>(), nullable),
512            DataType::LargeUtf8 => Self::from_arrow(array.as_string::<i64>(), nullable),
513            DataType::Binary => Self::from_arrow(array.as_binary::<i32>(), nullable),
514            DataType::LargeBinary => Self::from_arrow(array.as_binary::<i64>(), nullable),
515            DataType::BinaryView => Self::from_arrow(array.as_binary_view(), nullable),
516            DataType::Utf8View => Self::from_arrow(array.as_string_view(), nullable),
517            DataType::Struct(_) => Self::from_arrow(array.as_struct(), nullable),
518            DataType::List(_) => Self::from_arrow(array.as_list::<i32>(), nullable),
519            DataType::LargeList(_) => Self::from_arrow(array.as_list::<i64>(), nullable),
520            DataType::ListView(_) => Self::from_arrow(array.as_list_view::<i32>(), nullable),
521            DataType::LargeListView(_) => Self::from_arrow(array.as_list_view::<i64>(), nullable),
522            DataType::FixedSizeList(..) => Self::from_arrow(array.as_fixed_size_list(), nullable),
523            DataType::Null => Self::from_arrow(as_null_array(array), nullable),
524            DataType::Timestamp(u, _) => match u {
525                ArrowTimeUnit::Second => {
526                    Self::from_arrow(array.as_primitive::<TimestampSecondType>(), nullable)
527                }
528                ArrowTimeUnit::Millisecond => {
529                    Self::from_arrow(array.as_primitive::<TimestampMillisecondType>(), nullable)
530                }
531                ArrowTimeUnit::Microsecond => {
532                    Self::from_arrow(array.as_primitive::<TimestampMicrosecondType>(), nullable)
533                }
534                ArrowTimeUnit::Nanosecond => {
535                    Self::from_arrow(array.as_primitive::<TimestampNanosecondType>(), nullable)
536                }
537            },
538            DataType::Date32 => Self::from_arrow(array.as_primitive::<Date32Type>(), nullable),
539            DataType::Date64 => Self::from_arrow(array.as_primitive::<Date64Type>(), nullable),
540            DataType::Time32(u) => match u {
541                ArrowTimeUnit::Second => {
542                    Self::from_arrow(array.as_primitive::<Time32SecondType>(), nullable)
543                }
544                ArrowTimeUnit::Millisecond => {
545                    Self::from_arrow(array.as_primitive::<Time32MillisecondType>(), nullable)
546                }
547                ArrowTimeUnit::Microsecond | ArrowTimeUnit::Nanosecond => unreachable!(),
548            },
549            DataType::Time64(u) => match u {
550                ArrowTimeUnit::Microsecond => {
551                    Self::from_arrow(array.as_primitive::<Time64MicrosecondType>(), nullable)
552                }
553                ArrowTimeUnit::Nanosecond => {
554                    Self::from_arrow(array.as_primitive::<Time64NanosecondType>(), nullable)
555                }
556                ArrowTimeUnit::Second | ArrowTimeUnit::Millisecond => unreachable!(),
557            },
558            DataType::Decimal32(..) => {
559                Self::from_arrow(array.as_primitive::<Decimal32Type>(), nullable)
560            }
561            DataType::Decimal64(..) => {
562                Self::from_arrow(array.as_primitive::<Decimal64Type>(), nullable)
563            }
564            DataType::Decimal128(..) => {
565                Self::from_arrow(array.as_primitive::<Decimal128Type>(), nullable)
566            }
567            DataType::Decimal256(..) => {
568                Self::from_arrow(array.as_primitive::<Decimal256Type>(), nullable)
569            }
570            DataType::Dictionary(key_type, _) => match key_type.as_ref() {
571                DataType::Int8 => Ok(DictArray::from_arrow(
572                    array.as_dictionary::<Int8Type>(),
573                    nullable,
574                )?
575                .into_array()),
576                DataType::Int16 => Ok(DictArray::from_arrow(
577                    array.as_dictionary::<Int16Type>(),
578                    nullable,
579                )?
580                .into_array()),
581                DataType::Int32 => Ok(DictArray::from_arrow(
582                    array.as_dictionary::<Int32Type>(),
583                    nullable,
584                )?
585                .into_array()),
586                DataType::Int64 => Ok(DictArray::from_arrow(
587                    array.as_dictionary::<Int64Type>(),
588                    nullable,
589                )?
590                .into_array()),
591                DataType::UInt8 => Ok(DictArray::from_arrow(
592                    array.as_dictionary::<UInt8Type>(),
593                    nullable,
594                )?
595                .into_array()),
596                DataType::UInt16 => Ok(DictArray::from_arrow(
597                    array.as_dictionary::<UInt16Type>(),
598                    nullable,
599                )?
600                .into_array()),
601                DataType::UInt32 => Ok(DictArray::from_arrow(
602                    array.as_dictionary::<UInt32Type>(),
603                    nullable,
604                )?
605                .into_array()),
606                DataType::UInt64 => Ok(DictArray::from_arrow(
607                    array.as_dictionary::<UInt64Type>(),
608                    nullable,
609                )?
610                .into_array()),
611                key_dt => vortex_bail!("Unsupported dictionary key type: {key_dt}"),
612            },
613            dt => vortex_bail!("Array encoding not implemented for Arrow data type {dt}"),
614        }
615    }
616}
617
618impl FromArrowArray<RecordBatch> for ArrayRef {
619    fn from_arrow(array: RecordBatch, nullable: bool) -> VortexResult<Self> {
620        ArrayRef::from_arrow(&arrow_array::StructArray::from(array), nullable)
621    }
622}
623
624impl FromArrowArray<&RecordBatch> for ArrayRef {
625    fn from_arrow(array: &RecordBatch, nullable: bool) -> VortexResult<Self> {
626        Self::from_arrow(array.clone(), nullable)
627    }
628}
629
630#[cfg(test)]
631mod tests {
632    use std::sync::Arc;
633
634    use arrow_array::Array as ArrowArray;
635    use arrow_array::BinaryArray;
636    use arrow_array::BooleanArray;
637    use arrow_array::Date32Array;
638    use arrow_array::Date64Array;
639    use arrow_array::FixedSizeListArray as ArrowFixedSizeListArray;
640    use arrow_array::Float32Array;
641    use arrow_array::Float64Array;
642    use arrow_array::GenericListViewArray;
643    use arrow_array::Int8Array;
644    use arrow_array::Int16Array;
645    use arrow_array::Int32Array;
646    use arrow_array::Int64Array;
647    use arrow_array::LargeBinaryArray;
648    use arrow_array::LargeStringArray;
649    use arrow_array::NullArray;
650    use arrow_array::RecordBatch;
651    use arrow_array::StringArray;
652    use arrow_array::StructArray;
653    use arrow_array::Time32MillisecondArray;
654    use arrow_array::Time32SecondArray;
655    use arrow_array::Time64MicrosecondArray;
656    use arrow_array::Time64NanosecondArray;
657    use arrow_array::TimestampMicrosecondArray;
658    use arrow_array::TimestampMillisecondArray;
659    use arrow_array::TimestampNanosecondArray;
660    use arrow_array::TimestampSecondArray;
661    use arrow_array::UInt8Array;
662    use arrow_array::UInt16Array;
663    use arrow_array::UInt32Array;
664    use arrow_array::UInt64Array;
665    use arrow_array::builder::BinaryViewBuilder;
666    use arrow_array::builder::Decimal128Builder;
667    use arrow_array::builder::Decimal256Builder;
668    use arrow_array::builder::Int32Builder;
669    use arrow_array::builder::LargeListBuilder;
670    use arrow_array::builder::ListBuilder;
671    use arrow_array::builder::StringViewBuilder;
672    use arrow_array::new_null_array;
673    use arrow_array::types::ArrowPrimitiveType;
674    use arrow_array::types::Float16Type;
675    use arrow_buffer::BooleanBuffer;
676    use arrow_buffer::Buffer as ArrowBuffer;
677    use arrow_buffer::OffsetBuffer;
678    use arrow_buffer::ScalarBuffer;
679    use arrow_schema::DataType;
680    use arrow_schema::Field;
681    use arrow_schema::Fields;
682    use arrow_schema::Schema;
683    use rstest::rstest;
684
685    use crate::ArrayRef;
686    use crate::IntoArray;
687    use crate::arrays::Decimal;
688    use crate::arrays::FixedSizeList;
689    use crate::arrays::List;
690    use crate::arrays::ListView;
691    use crate::arrays::Primitive;
692    use crate::arrays::Struct;
693    use crate::arrays::VarBinView;
694    use crate::arrays::fixed_size_list::FixedSizeListArrayExt;
695    use crate::arrays::list::ListArrayExt;
696    use crate::arrays::listview::ListViewArrayExt;
697    use crate::arrays::struct_::StructArrayExt;
698    use crate::arrow::FromArrowArray as _;
699    use crate::dtype::DType;
700    use crate::dtype::Nullability;
701    use crate::dtype::PType;
702    use crate::extension::datetime::TimeUnit;
703    use crate::extension::datetime::Timestamp;
704
705    #[rstest]
706    #[case::i8(
707        Arc::new(Int8Array::from(vec![Some(1), None, Some(3), Some(4)])),
708        Arc::new(Int8Array::from(vec![1, 2, 3, 4])),
709        PType::I8,
710    )]
711    #[case::i16(
712        Arc::new(Int16Array::from(vec![Some(100), None, Some(300), Some(400)])),
713        Arc::new(Int16Array::from(vec![100, 200, 300, 400])),
714        PType::I16,
715    )]
716    #[case::i32(
717        Arc::new(Int32Array::from(vec![Some(1000), None, Some(3000), Some(4000)])),
718        Arc::new(Int32Array::from(vec![1000, 2000, 3000, 4000])),
719        PType::I32,
720    )]
721    #[case::i64(
722        Arc::new(Int64Array::from(vec![Some(10000), None, Some(30000), Some(40000)])),
723        Arc::new(Int64Array::from(vec![10000_i64, 20000, 30000, 40000])),
724        PType::I64,
725    )]
726    #[case::u8(
727        Arc::new(UInt8Array::from(vec![Some(1), None, Some(3), Some(4)])),
728        Arc::new(UInt8Array::from(vec![1_u8, 2, 3, 4])),
729        PType::U8,
730    )]
731    #[case::u16(
732        Arc::new(UInt16Array::from(vec![Some(100), None, Some(300), Some(400)])),
733        Arc::new(UInt16Array::from(vec![100_u16, 200, 300, 400])),
734        PType::U16,
735    )]
736    #[case::u32(
737        Arc::new(UInt32Array::from(vec![Some(1000), None, Some(3000), Some(4000)])),
738        Arc::new(UInt32Array::from(vec![1000_u32, 2000, 3000, 4000])),
739        PType::U32,
740    )]
741    #[case::u64(
742        Arc::new(UInt64Array::from(vec![Some(10000), None, Some(30000), Some(40000)])),
743        Arc::new(UInt64Array::from(vec![10000_u64, 20000, 30000, 40000])),
744        PType::U64,
745    )]
746    #[case::f32(
747        Arc::new(Float32Array::from(vec![Some(1.5), None, Some(3.5), Some(4.5)])),
748        Arc::new(Float32Array::from(vec![1.5_f32, 2.5, 3.5, 4.5])),
749        PType::F32,
750    )]
751    #[case::f64(
752        Arc::new(Float64Array::from(vec![Some(1.5), None, Some(3.5), Some(4.5)])),
753        Arc::new(Float64Array::from(vec![1.5_f64, 2.5, 3.5, 4.5])),
754        PType::F64,
755    )]
756    fn test_primitive_array_conversion(
757        #[case] nullable: Arc<dyn ArrowArray>,
758        #[case] non_nullable: Arc<dyn ArrowArray>,
759        #[case] expected_ptype: PType,
760    ) {
761        let v_null = ArrayRef::from_arrow(nullable.as_ref(), true).unwrap();
762        let v_non_null = ArrayRef::from_arrow(non_nullable.as_ref(), false).unwrap();
763        assert_eq!(v_null.len(), 4);
764        assert_eq!(v_non_null.len(), 4);
765        assert_eq!(v_null.as_::<Primitive>().ptype(), expected_ptype);
766        assert_eq!(v_non_null.as_::<Primitive>().ptype(), expected_ptype);
767    }
768
769    #[test]
770    fn test_float16_array_conversion() {
771        let values = vec![
772            Some(<Float16Type as ArrowPrimitiveType>::Native::from_f32(1.5)),
773            None,
774            Some(<Float16Type as ArrowPrimitiveType>::Native::from_f32(3.5)),
775        ];
776        let arrow_array = arrow_array::PrimitiveArray::<Float16Type>::from(values);
777        let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
778
779        let non_null_values = vec![
780            <Float16Type as ArrowPrimitiveType>::Native::from_f32(1.5),
781            <Float16Type as ArrowPrimitiveType>::Native::from_f32(2.5),
782        ];
783        let arrow_array_non_null =
784            arrow_array::PrimitiveArray::<Float16Type>::from(non_null_values);
785        let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
786
787        assert_eq!(vortex_array.len(), 3);
788        assert_eq!(vortex_array_non_null.len(), 2);
789
790        // Verify metadata - should be PrimitiveArray with F16 ptype
791        let primitive_array = vortex_array.as_::<Primitive>();
792        assert_eq!(primitive_array.ptype(), PType::F16);
793
794        let primitive_array_non_null = vortex_array_non_null.as_::<Primitive>();
795        assert_eq!(primitive_array_non_null.ptype(), PType::F16);
796    }
797
798    // Test decimal array conversions
799    #[test]
800    fn test_decimal128_array_conversion() {
801        let mut builder = Decimal128Builder::with_capacity(4);
802        builder.append_value(12345);
803        builder.append_null();
804        builder.append_value(67890);
805        builder.append_value(11111);
806        let decimal_array = builder.finish().with_precision_and_scale(10, 2).unwrap();
807
808        let vortex_array = ArrayRef::from_arrow(&decimal_array, true).unwrap();
809        assert_eq!(vortex_array.len(), 4);
810
811        let mut builder_non_null = Decimal128Builder::with_capacity(3);
812        builder_non_null.append_value(12345);
813        builder_non_null.append_value(67890);
814        builder_non_null.append_value(11111);
815        let decimal_array_non_null = builder_non_null
816            .finish()
817            .with_precision_and_scale(10, 2)
818            .unwrap();
819
820        let vortex_array_non_null = ArrayRef::from_arrow(&decimal_array_non_null, false).unwrap();
821        assert_eq!(vortex_array_non_null.len(), 3);
822
823        // Verify metadata - should be DecimalArray with correct precision and scale
824        let decimal_vortex_array = vortex_array.as_::<Decimal>();
825        assert_eq!(decimal_vortex_array.decimal_dtype().precision(), 10);
826        assert_eq!(decimal_vortex_array.decimal_dtype().scale(), 2);
827
828        let decimal_vortex_array_non_null = vortex_array_non_null.as_::<Decimal>();
829        assert_eq!(
830            decimal_vortex_array_non_null.decimal_dtype().precision(),
831            10
832        );
833        assert_eq!(decimal_vortex_array_non_null.decimal_dtype().scale(), 2);
834    }
835
836    #[test]
837    fn test_decimal256_array_conversion() {
838        let mut builder = Decimal256Builder::with_capacity(4);
839        builder.append_value(arrow_buffer::i256::from_i128(12345));
840        builder.append_null();
841        builder.append_value(arrow_buffer::i256::from_i128(67890));
842        builder.append_value(arrow_buffer::i256::from_i128(11111));
843        let decimal_array = builder.finish().with_precision_and_scale(38, 10).unwrap();
844
845        let vortex_array = ArrayRef::from_arrow(&decimal_array, true).unwrap();
846        assert_eq!(vortex_array.len(), 4);
847
848        let mut builder_non_null = Decimal256Builder::with_capacity(3);
849        builder_non_null.append_value(arrow_buffer::i256::from_i128(12345));
850        builder_non_null.append_value(arrow_buffer::i256::from_i128(67890));
851        builder_non_null.append_value(arrow_buffer::i256::from_i128(11111));
852        let decimal_array_non_null = builder_non_null
853            .finish()
854            .with_precision_and_scale(38, 10)
855            .unwrap();
856
857        let vortex_array_non_null = ArrayRef::from_arrow(&decimal_array_non_null, false).unwrap();
858        assert_eq!(vortex_array_non_null.len(), 3);
859
860        // Verify metadata - should be DecimalArray with correct precision and scale
861        let decimal_vortex_array = vortex_array.as_::<Decimal>();
862        assert_eq!(decimal_vortex_array.decimal_dtype().precision(), 38);
863        assert_eq!(decimal_vortex_array.decimal_dtype().scale(), 10);
864
865        let decimal_vortex_array_non_null = vortex_array_non_null.as_::<Decimal>();
866        assert_eq!(
867            decimal_vortex_array_non_null.decimal_dtype().precision(),
868            38
869        );
870        assert_eq!(decimal_vortex_array_non_null.decimal_dtype().scale(), 10);
871    }
872
873    // Test temporal array conversions
874    #[rstest]
875    #[case::timestamp_second(
876        Arc::new(TimestampSecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
877        Arc::new(TimestampSecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
878    )]
879    #[case::timestamp_millisecond(
880        Arc::new(TimestampMillisecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
881        Arc::new(TimestampMillisecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
882    )]
883    #[case::timestamp_microsecond(
884        Arc::new(TimestampMicrosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
885        Arc::new(TimestampMicrosecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
886    )]
887    #[case::timestamp_nanosecond(
888        Arc::new(TimestampNanosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
889        Arc::new(TimestampNanosecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
890    )]
891    #[case::time32_second(
892        Arc::new(Time32SecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
893        Arc::new(Time32SecondArray::from(vec![1000_i32, 2000, 3000, 4000])),
894    )]
895    #[case::time32_millisecond(
896        Arc::new(Time32MillisecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
897        Arc::new(Time32MillisecondArray::from(vec![1000_i32, 2000, 3000, 4000])),
898    )]
899    #[case::time64_microsecond(
900        Arc::new(Time64MicrosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
901        Arc::new(Time64MicrosecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
902    )]
903    #[case::time64_nanosecond(
904        Arc::new(Time64NanosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
905        Arc::new(Time64NanosecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
906    )]
907    #[case::date32(
908        Arc::new(Date32Array::from(vec![Some(18000), None, Some(18002), Some(18003)])),
909        Arc::new(Date32Array::from(vec![18000_i32, 18001, 18002, 18003])),
910    )]
911    #[case::date64(
912        Arc::new(Date64Array::from(vec![Some(1555200000000), None, Some(1555286400000), Some(1555372800000)]
913        )),
914        Arc::new(Date64Array::from(vec![1555200000000_i64, 1555213600000, 1555286400000, 1555372800000]
915        )),
916    )]
917    fn test_temporal_array_conversion(
918        #[case] nullable: Arc<dyn ArrowArray>,
919        #[case] non_nullable: Arc<dyn ArrowArray>,
920    ) {
921        let v_null = ArrayRef::from_arrow(nullable.as_ref(), true).unwrap();
922        let v_non_null = ArrayRef::from_arrow(non_nullable.as_ref(), false).unwrap();
923        assert_eq!(v_null.len(), 4);
924        assert_eq!(v_non_null.len(), 4);
925    }
926
927    #[test]
928    fn test_timestamp_timezone_microsecond_array_conversion() {
929        let arrow_array =
930            TimestampMicrosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])
931                .with_timezone("UTC");
932        let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
933
934        let arrow_array_non_null =
935            TimestampMicrosecondArray::from(vec![1000_i64, 2000, 3000, 4000]).with_timezone("UTC");
936        let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
937
938        assert_eq!(vortex_array.len(), 4);
939        assert_eq!(
940            vortex_array.dtype(),
941            &DType::Extension(
942                Timestamp::new_with_tz(
943                    TimeUnit::Microseconds,
944                    Some("UTC".into()),
945                    Nullability::Nullable
946                )
947                .erased()
948            ),
949        );
950        assert_eq!(vortex_array_non_null.len(), 4);
951        assert_eq!(
952            vortex_array_non_null.dtype(),
953            &DType::Extension(
954                Timestamp::new_with_tz(
955                    TimeUnit::Microseconds,
956                    Some("UTC".into()),
957                    Nullability::NonNullable
958                )
959                .erased()
960            )
961        );
962    }
963
964    // Test string/binary array conversions
965    #[rstest]
966    #[case::utf8(
967        Arc::new(StringArray::from(vec![Some("hello"), None, Some("world"), Some("test")])),
968        Arc::new(StringArray::from(vec!["hello", "world", "test", "vortex"])),
969        DType::Utf8(Nullability::NonNullable),
970    )]
971    #[case::large_utf8(
972        Arc::new(LargeStringArray::from(vec![Some("hello"), None, Some("world"), Some("test")])),
973        Arc::new(LargeStringArray::from(vec!["hello", "world", "test", "vortex"])),
974        DType::Utf8(Nullability::NonNullable),
975    )]
976    #[case::binary(
977        Arc::new(BinaryArray::from(vec![
978            Some("hello".as_bytes()), None, Some("world".as_bytes()), Some("test".as_bytes()),
979        ])),
980        Arc::new(BinaryArray::from(vec![
981            "hello".as_bytes(), "world".as_bytes(), "test".as_bytes(), "vortex".as_bytes(),
982        ])),
983        DType::Binary(Nullability::NonNullable),
984    )]
985    #[case::large_binary(
986        Arc::new(LargeBinaryArray::from(vec![
987            Some("hello".as_bytes()), None, Some("world".as_bytes()), Some("test".as_bytes()),
988        ])),
989        Arc::new(LargeBinaryArray::from(vec![
990            "hello".as_bytes(), "world".as_bytes(), "test".as_bytes(), "vortex".as_bytes(),
991        ])),
992        DType::Binary(Nullability::NonNullable),
993    )]
994    fn test_string_binary_array_conversion(
995        #[case] nullable: Arc<dyn ArrowArray>,
996        #[case] non_nullable: Arc<dyn ArrowArray>,
997        #[case] expected_non_nullable_dtype: DType,
998    ) {
999        let v_null = ArrayRef::from_arrow(nullable.as_ref(), true).unwrap();
1000        let v_non_null = ArrayRef::from_arrow(non_nullable.as_ref(), false).unwrap();
1001        assert_eq!(v_null.len(), 4);
1002        assert_eq!(v_non_null.len(), 4);
1003        assert_eq!(v_null.dtype(), &expected_non_nullable_dtype.as_nullable());
1004        assert_eq!(v_non_null.dtype(), &expected_non_nullable_dtype);
1005    }
1006
1007    #[test]
1008    fn test_utf8_view_array_conversion() {
1009        let mut builder = StringViewBuilder::new();
1010        builder.append_value("hello");
1011        builder.append_null();
1012        builder.append_value("world");
1013        builder.append_value("test");
1014        let arrow_array = builder.finish();
1015        let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1016
1017        let mut builder_non_null = StringViewBuilder::new();
1018        builder_non_null.append_value("hello");
1019        builder_non_null.append_value("world");
1020        builder_non_null.append_value("test");
1021        builder_non_null.append_value("vortex");
1022        let arrow_array_non_null = builder_non_null.finish();
1023        let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1024
1025        assert_eq!(vortex_array.len(), 4);
1026        assert_eq!(vortex_array_non_null.len(), 4);
1027
1028        // Verify metadata - should be VarBinViewArray with correct buffer count and dtype
1029        let varbin_view_array = vortex_array.as_::<VarBinView>();
1030        assert_eq!(
1031            varbin_view_array.data_buffers().len(),
1032            arrow_array.data_buffers().len()
1033        );
1034        assert_eq!(varbin_view_array.dtype(), &DType::Utf8(true.into()));
1035
1036        let varbin_view_array_non_null = vortex_array_non_null.as_::<VarBinView>();
1037        assert_eq!(
1038            varbin_view_array_non_null.data_buffers().len(),
1039            arrow_array_non_null.data_buffers().len()
1040        );
1041        assert_eq!(
1042            varbin_view_array_non_null.dtype(),
1043            &DType::Utf8(false.into())
1044        );
1045    }
1046
1047    #[test]
1048    fn test_binary_view_array_conversion() {
1049        let mut builder = BinaryViewBuilder::new();
1050        builder.append_value(b"hello");
1051        builder.append_null();
1052        builder.append_value(b"world");
1053        builder.append_value(b"test");
1054        let arrow_array = builder.finish();
1055        let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1056
1057        let mut builder_non_null = BinaryViewBuilder::new();
1058        builder_non_null.append_value(b"hello");
1059        builder_non_null.append_value(b"world");
1060        builder_non_null.append_value(b"test");
1061        builder_non_null.append_value(b"vortex");
1062        let arrow_array_non_null = builder_non_null.finish();
1063        let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1064
1065        assert_eq!(vortex_array.len(), 4);
1066        assert_eq!(vortex_array_non_null.len(), 4);
1067
1068        // Verify metadata - should be VarBinViewArray with correct buffer count and dtype
1069        let varbin_view_array = vortex_array.as_::<VarBinView>();
1070        assert_eq!(
1071            varbin_view_array.data_buffers().len(),
1072            arrow_array.data_buffers().len()
1073        );
1074        assert_eq!(varbin_view_array.dtype(), &DType::Binary(true.into()));
1075
1076        let varbin_view_array_non_null = vortex_array_non_null.as_::<VarBinView>();
1077        assert_eq!(
1078            varbin_view_array_non_null.data_buffers().len(),
1079            arrow_array_non_null.data_buffers().len()
1080        );
1081        assert_eq!(
1082            varbin_view_array_non_null.dtype(),
1083            &DType::Binary(false.into())
1084        );
1085    }
1086
1087    // Test boolean array conversions
1088    #[test]
1089    fn test_boolean_array_conversion() {
1090        let arrow_array = BooleanArray::from(vec![Some(true), None, Some(false), Some(true)]);
1091        let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1092
1093        let arrow_array_non_null = BooleanArray::from(vec![true, false, true, false]);
1094        let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1095
1096        assert_eq!(vortex_array.len(), 4);
1097        assert_eq!(vortex_array_non_null.len(), 4);
1098    }
1099
1100    // Test struct array conversions
1101    #[test]
1102    fn test_struct_array_conversion() {
1103        let fields = vec![
1104            Field::new("field1", DataType::Int32, true),
1105            Field::new("field2", DataType::Utf8, false),
1106        ];
1107        let schema = Fields::from(fields);
1108
1109        let field1_data = Int32Array::from(vec![Some(1), None, Some(3)]);
1110        let field2_data = StringArray::from(vec!["a", "b", "c"]);
1111
1112        let arrow_array = StructArray::new(
1113            schema.clone(),
1114            vec![Arc::new(field1_data), Arc::new(field2_data)],
1115            None,
1116        );
1117
1118        let vortex_array = ArrayRef::from_arrow(&arrow_array, false).unwrap();
1119        assert_eq!(vortex_array.len(), 3);
1120
1121        // Verify metadata - should be StructArray with correct field names
1122        let struct_vortex_array = vortex_array.as_::<Struct>();
1123        assert_eq!(struct_vortex_array.names().len(), 2);
1124        assert_eq!(struct_vortex_array.names()[0], "field1");
1125        assert_eq!(struct_vortex_array.names()[1], "field2");
1126
1127        // Test nullable struct
1128        let nullable_array = StructArray::new(
1129            schema,
1130            vec![
1131                Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])),
1132                Arc::new(StringArray::from(vec!["a", "b", "c"])),
1133            ],
1134            Some(arrow_buffer::NullBuffer::new(BooleanBuffer::from(vec![
1135                true, false, true,
1136            ]))),
1137        );
1138
1139        let vortex_nullable_array = ArrayRef::from_arrow(&nullable_array, true).unwrap();
1140        assert_eq!(vortex_nullable_array.len(), 3);
1141
1142        // Verify metadata for nullable struct
1143        let struct_vortex_nullable_array = vortex_nullable_array.as_::<Struct>();
1144        assert_eq!(struct_vortex_nullable_array.names().len(), 2);
1145        assert_eq!(struct_vortex_nullable_array.names()[0], "field1");
1146        assert_eq!(struct_vortex_nullable_array.names()[1], "field2");
1147    }
1148
1149    // Test list array conversions
1150    #[test]
1151    fn test_list_array_conversion() {
1152        let mut builder = ListBuilder::new(Int32Builder::new());
1153        builder.append_value([Some(1), None, Some(3)]);
1154        builder.append_null();
1155        builder.append_value([Some(4), Some(5)]);
1156        let arrow_array = builder.finish();
1157
1158        let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1159        assert_eq!(vortex_array.len(), 3);
1160
1161        // Verify metadata - should be ListArray with correct offsets
1162        let list_vortex_array = vortex_array.as_::<List>();
1163        let offsets_array = list_vortex_array.offsets().as_::<Primitive>();
1164        assert_eq!(offsets_array.len(), 4); // n+1 offsets for n lists
1165        assert_eq!(offsets_array.ptype(), PType::I32);
1166
1167        // Test non-nullable list
1168        let mut builder_non_null = ListBuilder::new(Int32Builder::new());
1169        builder_non_null.append_value([Some(1), None, Some(3)]);
1170        builder_non_null.append_value([Some(4), Some(5)]);
1171        let arrow_array_non_null = builder_non_null.finish();
1172
1173        let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1174        assert_eq!(vortex_array_non_null.len(), 2);
1175
1176        // Verify metadata for non-nullable list
1177        let list_vortex_array_non_null = vortex_array_non_null.as_::<List>();
1178        let offsets_array_non_null = list_vortex_array_non_null.offsets().as_::<Primitive>();
1179        assert_eq!(offsets_array_non_null.len(), 3); // n+1 offsets for n lists
1180        assert_eq!(offsets_array_non_null.ptype(), PType::I32);
1181    }
1182
1183    #[test]
1184    fn test_large_list_array_conversion() {
1185        let mut builder = LargeListBuilder::new(Int32Builder::new());
1186        builder.append_value([Some(1), None, Some(3)]);
1187        builder.append_null();
1188        builder.append_value([Some(4), Some(5)]);
1189        let arrow_array = builder.finish();
1190
1191        let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1192        assert_eq!(vortex_array.len(), 3);
1193
1194        // Verify metadata - should be ListArray with correct offsets (I64 for large lists)
1195        let list_vortex_array = vortex_array.as_::<List>();
1196        let offsets_array = list_vortex_array.offsets().as_::<Primitive>();
1197        assert_eq!(offsets_array.len(), 4); // n+1 offsets for n lists
1198        assert_eq!(offsets_array.ptype(), PType::I64); // Large lists use I64 offsets
1199
1200        // Test non-nullable large list
1201        let mut builder_non_null = LargeListBuilder::new(Int32Builder::new());
1202        builder_non_null.append_value([Some(1), None, Some(3)]);
1203        builder_non_null.append_value([Some(4), Some(5)]);
1204        let arrow_array_non_null = builder_non_null.finish();
1205
1206        let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1207        assert_eq!(vortex_array_non_null.len(), 2);
1208
1209        // Verify metadata for non-nullable large list
1210        let list_vortex_array_non_null = vortex_array_non_null.as_::<List>();
1211        let offsets_array_non_null = list_vortex_array_non_null.offsets().as_::<Primitive>();
1212        assert_eq!(offsets_array_non_null.len(), 3); // n+1 offsets for n lists
1213        assert_eq!(offsets_array_non_null.ptype(), PType::I64); // Large lists use I64 offsets
1214    }
1215
1216    #[test]
1217    fn test_fixed_size_list_array_conversion() {
1218        // Create elements for the fixed-size lists
1219        let values = Int32Array::from(vec![
1220            Some(1),
1221            Some(2),
1222            Some(3), // First list
1223            Some(4),
1224            None,
1225            Some(6), // Second list (with null element)
1226            Some(7),
1227            Some(8),
1228            Some(9), // Third list
1229            Some(10),
1230            Some(11),
1231            Some(12), // Fourth list
1232        ]);
1233
1234        // Create a FixedSizeListArray with list_size=3
1235        let field = Arc::new(Field::new("item", DataType::Int32, true));
1236        let arrow_array =
1237            ArrowFixedSizeListArray::try_new(Arc::clone(&field), 3, Arc::new(values), None)
1238                .unwrap();
1239        let vortex_array = ArrayRef::from_arrow(&arrow_array, false).unwrap();
1240
1241        assert_eq!(vortex_array.len(), 4);
1242
1243        // Verify metadata - should be FixedSizeListArray with correct list size
1244        let fsl_vortex_array = vortex_array.as_::<FixedSizeList>();
1245        assert_eq!(fsl_vortex_array.list_size(), 3);
1246        assert_eq!(fsl_vortex_array.elements().len(), 12); // 4 lists * 3 elements
1247
1248        // Test nullable fixed-size list
1249        let values_nullable = Int32Array::from(vec![
1250            Some(1),
1251            Some(2),
1252            Some(3), // First list
1253            Some(4),
1254            None,
1255            Some(6), // Second list (will be null)
1256            Some(7),
1257            Some(8),
1258            Some(9), // Third list
1259        ]);
1260
1261        // Create nulls buffer - second list is null
1262        let null_buffer =
1263            arrow_buffer::NullBuffer::new(BooleanBuffer::from(vec![true, false, true]));
1264
1265        let arrow_array_nullable = ArrowFixedSizeListArray::try_new(
1266            field,
1267            3,
1268            Arc::new(values_nullable),
1269            Some(null_buffer),
1270        )
1271        .unwrap();
1272        let vortex_array_nullable = ArrayRef::from_arrow(&arrow_array_nullable, true).unwrap();
1273
1274        assert_eq!(vortex_array_nullable.len(), 3);
1275
1276        // Verify metadata for nullable array
1277        let fsl_vortex_array_nullable = vortex_array_nullable.as_::<FixedSizeList>();
1278        assert_eq!(fsl_vortex_array_nullable.list_size(), 3);
1279        assert_eq!(fsl_vortex_array_nullable.elements().len(), 9); // 3 lists * 3 elements
1280    }
1281
1282    #[test]
1283    fn test_list_view_array_conversion() {
1284        // Create values array for the lists
1285        let values = Int32Array::from(vec![
1286            Some(1),
1287            Some(2),
1288            Some(3), // First list [1, 2, 3]
1289            Some(4),
1290            Some(5), // Second list [4, 5]
1291            Some(6), // Third list [6]
1292            Some(7),
1293            Some(8),
1294            Some(9),
1295            Some(10), // Fourth list [7, 8, 9, 10]
1296        ]);
1297
1298        // Create offsets and sizes for ListView
1299        let offsets = ScalarBuffer::from(vec![0i32, 3, 5, 6]);
1300        let sizes = ScalarBuffer::from(vec![3i32, 2, 1, 4]);
1301
1302        let field = Arc::new(Field::new("item", DataType::Int32, true));
1303        let arrow_array = GenericListViewArray::try_new(
1304            Arc::clone(&field),
1305            offsets.clone(),
1306            sizes.clone(),
1307            Arc::new(values.clone()),
1308            None,
1309        )
1310        .unwrap();
1311
1312        let vortex_array = ArrayRef::from_arrow(&arrow_array, false).unwrap();
1313        assert_eq!(vortex_array.len(), 4);
1314
1315        // Verify metadata - should be ListViewArray with correct offsets and sizes
1316        let list_view_vortex_array = vortex_array.as_::<ListView>();
1317        let offsets_array = list_view_vortex_array.offsets().as_::<Primitive>();
1318        let sizes_array = list_view_vortex_array.sizes().as_::<Primitive>();
1319
1320        assert_eq!(offsets_array.len(), 4);
1321        assert_eq!(offsets_array.ptype(), PType::I32);
1322        assert_eq!(sizes_array.len(), 4);
1323        assert_eq!(sizes_array.ptype(), PType::I32);
1324
1325        // Test nullable ListView
1326        let null_buffer =
1327            arrow_buffer::NullBuffer::new(BooleanBuffer::from(vec![true, false, true, true]));
1328
1329        let arrow_array_nullable = GenericListViewArray::try_new(
1330            Arc::clone(&field),
1331            offsets,
1332            sizes,
1333            Arc::new(values.clone()),
1334            Some(null_buffer),
1335        )
1336        .unwrap();
1337
1338        let vortex_array_nullable = ArrayRef::from_arrow(&arrow_array_nullable, true).unwrap();
1339        assert_eq!(vortex_array_nullable.len(), 4);
1340
1341        // Test LargeListView (i64 offsets and sizes)
1342        let large_offsets = ScalarBuffer::from(vec![0i64, 3, 5, 6]);
1343        let large_sizes = ScalarBuffer::from(vec![3i64, 2, 1, 4]);
1344
1345        let large_arrow_array = GenericListViewArray::try_new(
1346            field,
1347            large_offsets,
1348            large_sizes,
1349            Arc::new(values),
1350            None,
1351        )
1352        .unwrap();
1353
1354        let large_vortex_array = ArrayRef::from_arrow(&large_arrow_array, false).unwrap();
1355        assert_eq!(large_vortex_array.len(), 4);
1356
1357        // Verify metadata for large ListView
1358        let large_list_view_vortex_array = large_vortex_array.as_::<ListView>();
1359        let large_offsets_array = large_list_view_vortex_array.offsets().as_::<Primitive>();
1360        let large_sizes_array = large_list_view_vortex_array.sizes().as_::<Primitive>();
1361
1362        assert_eq!(large_offsets_array.len(), 4);
1363        assert_eq!(large_offsets_array.ptype(), PType::I64); // Large ListView uses I64 offsets
1364        assert_eq!(large_sizes_array.len(), 4);
1365        assert_eq!(large_sizes_array.ptype(), PType::I64); // Large ListView uses I64 sizes
1366    }
1367
1368    // Test null array conversions
1369    #[test]
1370    fn test_null_array_conversion() {
1371        let arrow_array = NullArray::new(5);
1372        let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1373        assert_eq!(vortex_array.len(), 5);
1374    }
1375
1376    // Test buffer conversions
1377    #[test]
1378    fn test_arrow_buffer_conversion() {
1379        let data = vec![1u8, 2, 3, 4, 5];
1380        let arrow_buffer = ArrowBuffer::from_vec(data);
1381        let vortex_array = arrow_buffer.into_array();
1382        assert_eq!(vortex_array.len(), 5);
1383    }
1384
1385    #[test]
1386    fn test_boolean_buffer_conversion() {
1387        let data = vec![true, false, true, false, true];
1388        let boolean_buffer = BooleanBuffer::from(data);
1389        let vortex_array = boolean_buffer.into_array();
1390        assert_eq!(vortex_array.len(), 5);
1391    }
1392
1393    #[test]
1394    fn test_scalar_buffer_conversion() {
1395        let data = vec![1i32, 2, 3, 4, 5];
1396        let scalar_buffer = ScalarBuffer::from(data);
1397        let vortex_array = scalar_buffer.into_array();
1398        assert_eq!(vortex_array.len(), 5);
1399    }
1400
1401    #[test]
1402    fn test_offset_buffer_conversion() {
1403        let data = vec![0i32, 2, 5, 8, 10];
1404        let offset_buffer = OffsetBuffer::new(ScalarBuffer::from(data));
1405        let vortex_array = offset_buffer.into_array();
1406        assert_eq!(vortex_array.len(), 5);
1407    }
1408
1409    // Test RecordBatch conversions
1410    #[test]
1411    fn test_record_batch_conversion() {
1412        let schema = Arc::new(Schema::new(vec![
1413            Field::new("field1", DataType::Int32, false),
1414            Field::new("field2", DataType::Utf8, false),
1415        ]));
1416
1417        let field1_data = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
1418        let field2_data = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
1419
1420        let record_batch = RecordBatch::try_new(schema, vec![field1_data, field2_data]).unwrap();
1421
1422        let vortex_array = ArrayRef::from_arrow(record_batch, false).unwrap();
1423        assert_eq!(vortex_array.len(), 4);
1424
1425        // Test with reference
1426        let schema = Arc::new(Schema::new(vec![
1427            Field::new("field1", DataType::Int32, false),
1428            Field::new("field2", DataType::Utf8, false),
1429        ]));
1430
1431        let field1_data = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
1432        let field2_data = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
1433
1434        let record_batch = RecordBatch::try_new(schema, vec![field1_data, field2_data]).unwrap();
1435
1436        let vortex_array = ArrayRef::from_arrow(&record_batch, false).unwrap();
1437        assert_eq!(vortex_array.len(), 4);
1438    }
1439
1440    // Test dynamic dispatch conversion
1441    #[test]
1442    fn test_dyn_array_conversion() {
1443        let int_array = Int32Array::from(vec![1, 2, 3, 4]);
1444        let dyn_array: &dyn ArrowArray = &int_array;
1445        let vortex_array = ArrayRef::from_arrow(dyn_array, false).unwrap();
1446        assert_eq!(vortex_array.len(), 4);
1447
1448        let string_array = StringArray::from(vec!["a", "b", "c"]);
1449        let dyn_array: &dyn ArrowArray = &string_array;
1450        let vortex_array = ArrayRef::from_arrow(dyn_array, false).unwrap();
1451        assert_eq!(vortex_array.len(), 3);
1452
1453        let bool_array = BooleanArray::from(vec![true, false, true]);
1454        let dyn_array: &dyn ArrowArray = &bool_array;
1455        let vortex_array = ArrayRef::from_arrow(dyn_array, false).unwrap();
1456        assert_eq!(vortex_array.len(), 3);
1457    }
1458
1459    // Existing tests
1460    #[test]
1461    pub fn nullable_may_contain_non_nullable() {
1462        let null_struct_array_with_non_nullable_field = new_null_array(
1463            &DataType::Struct(Fields::from(vec![Field::new(
1464                "non_nullable_inner",
1465                DataType::Int32,
1466                false,
1467            )])),
1468            1,
1469        );
1470        ArrayRef::from_arrow(null_struct_array_with_non_nullable_field.as_ref(), true).unwrap();
1471    }
1472
1473    #[test]
1474    pub fn nullable_may_contain_deeply_nested_non_nullable() {
1475        let null_struct_array_with_non_nullable_field = new_null_array(
1476            &DataType::Struct(Fields::from(vec![Field::new(
1477                "non_nullable_inner",
1478                DataType::Struct(Fields::from(vec![Field::new(
1479                    "non_nullable_deeper_inner",
1480                    DataType::Int32,
1481                    false,
1482                )])),
1483                false,
1484            )])),
1485            1,
1486        );
1487        ArrayRef::from_arrow(null_struct_array_with_non_nullable_field.as_ref(), true).unwrap();
1488    }
1489
1490    #[test]
1491    #[should_panic]
1492    pub fn cannot_handle_nullable_struct_containing_non_nullable_dictionary() {
1493        let null_struct_array_with_non_nullable_field = new_null_array(
1494            &DataType::Struct(Fields::from(vec![Field::new(
1495                "non_nullable_deeper_inner",
1496                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1497                false,
1498            )])),
1499            1,
1500        );
1501
1502        ArrayRef::from_arrow(null_struct_array_with_non_nullable_field.as_ref(), true).unwrap();
1503    }
1504}