Skip to main content

vortex_array/arrow/
convert.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use arrow_array::AnyDictionaryArray;
7use arrow_array::Array as ArrowArray;
8use arrow_array::ArrowPrimitiveType;
9use arrow_array::BooleanArray as ArrowBooleanArray;
10use arrow_array::DictionaryArray;
11use arrow_array::FixedSizeListArray as ArrowFixedSizeListArray;
12use arrow_array::GenericByteArray;
13use arrow_array::GenericByteViewArray;
14use arrow_array::GenericListArray;
15use arrow_array::GenericListViewArray;
16use arrow_array::NullArray as ArrowNullArray;
17use arrow_array::OffsetSizeTrait;
18use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
19use arrow_array::RecordBatch;
20use arrow_array::StructArray as ArrowStructArray;
21use arrow_array::cast::AsArray;
22use arrow_array::cast::as_null_array;
23use arrow_array::make_array;
24use arrow_array::types::ArrowDictionaryKeyType;
25use arrow_array::types::ByteArrayType;
26use arrow_array::types::ByteViewType;
27use arrow_array::types::Date32Type;
28use arrow_array::types::Date64Type;
29use arrow_array::types::Decimal32Type;
30use arrow_array::types::Decimal64Type;
31use arrow_array::types::Decimal128Type;
32use arrow_array::types::Decimal256Type;
33use arrow_array::types::Float16Type;
34use arrow_array::types::Float32Type;
35use arrow_array::types::Float64Type;
36use arrow_array::types::Int8Type;
37use arrow_array::types::Int16Type;
38use arrow_array::types::Int32Type;
39use arrow_array::types::Int64Type;
40use arrow_array::types::Time32MillisecondType;
41use arrow_array::types::Time32SecondType;
42use arrow_array::types::Time64MicrosecondType;
43use arrow_array::types::Time64NanosecondType;
44use arrow_array::types::TimestampMicrosecondType;
45use arrow_array::types::TimestampMillisecondType;
46use arrow_array::types::TimestampNanosecondType;
47use arrow_array::types::TimestampSecondType;
48use arrow_array::types::UInt8Type;
49use arrow_array::types::UInt16Type;
50use arrow_array::types::UInt32Type;
51use arrow_array::types::UInt64Type;
52use arrow_buffer::ArrowNativeType;
53use arrow_buffer::BooleanBuffer;
54use arrow_buffer::Buffer as ArrowBuffer;
55use arrow_buffer::ScalarBuffer;
56use arrow_buffer::buffer::NullBuffer;
57use arrow_buffer::buffer::OffsetBuffer;
58use arrow_schema::DataType;
59use arrow_schema::TimeUnit as ArrowTimeUnit;
60use vortex_buffer::Alignment;
61use vortex_buffer::BitBuffer;
62use vortex_buffer::Buffer;
63use vortex_buffer::ByteBuffer;
64use vortex_error::VortexResult;
65use vortex_error::vortex_bail;
66use vortex_error::vortex_ensure;
67use vortex_error::vortex_ensure_eq;
68use vortex_error::vortex_err;
69use vortex_error::vortex_panic;
70
71use crate::ArrayRef;
72use crate::IntoArray;
73use crate::arrays::BoolArray;
74use crate::arrays::DecimalArray;
75use crate::arrays::DictArray;
76use crate::arrays::FixedSizeListArray;
77use crate::arrays::ListArray;
78use crate::arrays::ListViewArray;
79use crate::arrays::NullArray;
80use crate::arrays::PrimitiveArray;
81use crate::arrays::StructArray;
82use crate::arrays::TemporalArray;
83use crate::arrays::VarBinArray;
84use crate::arrays::VarBinViewArray;
85use crate::arrow::FromArrowArray;
86use crate::dtype::DType;
87use crate::dtype::DecimalDType;
88use crate::dtype::IntegerPType;
89use crate::dtype::NativePType;
90use crate::dtype::PType;
91use crate::dtype::i256;
92use crate::extension::datetime::TimeUnit;
93use crate::validity::Validity;
94
95impl IntoArray for ArrowBuffer {
96    fn into_array(self) -> ArrayRef {
97        PrimitiveArray::from_byte_buffer(
98            ByteBuffer::from_arrow_buffer(self, Alignment::of::<u8>()),
99            PType::U8,
100            Validity::NonNullable,
101        )
102        .into_array()
103    }
104}
105
106impl IntoArray for BooleanBuffer {
107    fn into_array(self) -> ArrayRef {
108        BoolArray::new(self.into(), Validity::NonNullable).into_array()
109    }
110}
111
112impl<T> IntoArray for ScalarBuffer<T>
113where
114    T: ArrowNativeType + NativePType,
115{
116    fn into_array(self) -> ArrayRef {
117        PrimitiveArray::new(
118            Buffer::<T>::from_arrow_scalar_buffer(self),
119            Validity::NonNullable,
120        )
121        .into_array()
122    }
123}
124
125impl<O> IntoArray for OffsetBuffer<O>
126where
127    O: IntegerPType + OffsetSizeTrait,
128{
129    fn into_array(self) -> ArrayRef {
130        let primitive = PrimitiveArray::new(
131            Buffer::from_arrow_scalar_buffer(self.into_inner()),
132            Validity::NonNullable,
133        );
134
135        primitive.into_array()
136    }
137}
138
139macro_rules! impl_from_arrow_primitive {
140    ($T:path) => {
141        impl FromArrowArray<&ArrowPrimitiveArray<$T>> for ArrayRef {
142            fn from_arrow(value: &ArrowPrimitiveArray<$T>, nullable: bool) -> VortexResult<Self> {
143                let buffer = Buffer::from_arrow_scalar_buffer(value.values().clone());
144                let validity = nulls(value.nulls(), nullable)?;
145                Ok(PrimitiveArray::new(buffer, validity).into_array())
146            }
147        }
148    };
149}
150
151impl_from_arrow_primitive!(Int8Type);
152impl_from_arrow_primitive!(Int16Type);
153impl_from_arrow_primitive!(Int32Type);
154impl_from_arrow_primitive!(Int64Type);
155impl_from_arrow_primitive!(UInt8Type);
156impl_from_arrow_primitive!(UInt16Type);
157impl_from_arrow_primitive!(UInt32Type);
158impl_from_arrow_primitive!(UInt64Type);
159impl_from_arrow_primitive!(Float16Type);
160impl_from_arrow_primitive!(Float32Type);
161impl_from_arrow_primitive!(Float64Type);
162
163impl FromArrowArray<&ArrowPrimitiveArray<Decimal32Type>> for ArrayRef {
164    fn from_arrow(
165        array: &ArrowPrimitiveArray<Decimal32Type>,
166        nullable: bool,
167    ) -> VortexResult<Self> {
168        let decimal_type = DecimalDType::new(array.precision(), array.scale());
169        let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone());
170        let validity = nulls(array.nulls(), nullable)?;
171        Ok(DecimalArray::new(buffer, decimal_type, validity).into_array())
172    }
173}
174
175impl FromArrowArray<&ArrowPrimitiveArray<Decimal64Type>> for ArrayRef {
176    fn from_arrow(
177        array: &ArrowPrimitiveArray<Decimal64Type>,
178        nullable: bool,
179    ) -> VortexResult<Self> {
180        let decimal_type = DecimalDType::new(array.precision(), array.scale());
181        let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone());
182        let validity = nulls(array.nulls(), nullable)?;
183        Ok(DecimalArray::new(buffer, decimal_type, validity).into_array())
184    }
185}
186
187impl FromArrowArray<&ArrowPrimitiveArray<Decimal128Type>> for ArrayRef {
188    fn from_arrow(
189        array: &ArrowPrimitiveArray<Decimal128Type>,
190        nullable: bool,
191    ) -> VortexResult<Self> {
192        let decimal_type = DecimalDType::new(array.precision(), array.scale());
193        let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone());
194        let validity = nulls(array.nulls(), nullable)?;
195        Ok(DecimalArray::new(buffer, decimal_type, validity).into_array())
196    }
197}
198
199impl FromArrowArray<&ArrowPrimitiveArray<Decimal256Type>> for ArrayRef {
200    fn from_arrow(
201        array: &ArrowPrimitiveArray<Decimal256Type>,
202        nullable: bool,
203    ) -> VortexResult<Self> {
204        let decimal_type = DecimalDType::new(array.precision(), array.scale());
205        let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone());
206        // SAFETY: Our i256 implementation has the same bit-pattern representation of the
207        //  arrow_buffer::i256 type. It is safe to treat values held inside the buffer as values
208        //  of either type.
209        let buffer =
210            unsafe { std::mem::transmute::<Buffer<arrow_buffer::i256>, Buffer<i256>>(buffer) };
211        let validity = nulls(array.nulls(), nullable)?;
212        Ok(DecimalArray::new(buffer, decimal_type, validity).into_array())
213    }
214}
215
216macro_rules! impl_from_arrow_temporal {
217    ($T:path) => {
218        impl FromArrowArray<&ArrowPrimitiveArray<$T>> for ArrayRef {
219            fn from_arrow(
220                value: &ArrowPrimitiveArray<$T>,
221                nullable: bool,
222            ) -> vortex_error::VortexResult<Self> {
223                temporal_array(value, nullable)
224            }
225        }
226    };
227}
228
229// timestamp
230impl_from_arrow_temporal!(TimestampSecondType);
231impl_from_arrow_temporal!(TimestampMillisecondType);
232impl_from_arrow_temporal!(TimestampMicrosecondType);
233impl_from_arrow_temporal!(TimestampNanosecondType);
234
235// time
236impl_from_arrow_temporal!(Time32SecondType);
237impl_from_arrow_temporal!(Time32MillisecondType);
238impl_from_arrow_temporal!(Time64MicrosecondType);
239impl_from_arrow_temporal!(Time64NanosecondType);
240
241// date
242impl_from_arrow_temporal!(Date32Type);
243impl_from_arrow_temporal!(Date64Type);
244
245fn temporal_array<T: ArrowPrimitiveType>(
246    value: &ArrowPrimitiveArray<T>,
247    nullable: bool,
248) -> VortexResult<ArrayRef>
249where
250    T::Native: NativePType,
251{
252    let arr = PrimitiveArray::new(
253        Buffer::from_arrow_scalar_buffer(value.values().clone()),
254        nulls(value.nulls(), nullable)?,
255    )
256    .into_array();
257
258    Ok(match value.data_type() {
259        DataType::Timestamp(time_unit, tz) => {
260            TemporalArray::new_timestamp(arr, time_unit.into(), tz.clone()).into()
261        }
262        DataType::Time32(time_unit) => TemporalArray::new_time(arr, time_unit.into()).into(),
263        DataType::Time64(time_unit) => TemporalArray::new_time(arr, time_unit.into()).into(),
264        DataType::Date32 => TemporalArray::new_date(arr, TimeUnit::Days).into(),
265        DataType::Date64 => TemporalArray::new_date(arr, TimeUnit::Milliseconds).into(),
266        DataType::Duration(_) => unimplemented!(),
267        DataType::Interval(_) => unimplemented!(),
268        _ => vortex_panic!("Invalid temporal type: {}", value.data_type()),
269    })
270}
271
272impl<T: ByteArrayType> FromArrowArray<&GenericByteArray<T>> for ArrayRef
273where
274    <T as ByteArrayType>::Offset: IntegerPType,
275{
276    fn from_arrow(value: &GenericByteArray<T>, nullable: bool) -> VortexResult<Self> {
277        let dtype = match T::DATA_TYPE {
278            DataType::Binary | DataType::LargeBinary => DType::Binary(nullable.into()),
279            DataType::Utf8 | DataType::LargeUtf8 => DType::Utf8(nullable.into()),
280            dt => vortex_panic!("Invalid data type for ByteArray: {dt}"),
281        };
282        // SAFETY: Arrow arrays are already validated (valid UTF-8, valid offsets, correct validity).
283        Ok(unsafe {
284            VarBinArray::new_unchecked(
285                value.offsets().clone().into_array(),
286                ByteBuffer::from_arrow_buffer(value.values().clone(), Alignment::of::<u8>()),
287                dtype,
288                nulls(value.nulls(), nullable)?,
289            )
290        }
291        .into_array())
292    }
293}
294
295impl<T: ByteViewType> FromArrowArray<&GenericByteViewArray<T>> for ArrayRef {
296    fn from_arrow(value: &GenericByteViewArray<T>, nullable: bool) -> VortexResult<Self> {
297        let dtype = match T::DATA_TYPE {
298            DataType::BinaryView => DType::Binary(nullable.into()),
299            DataType::Utf8View => DType::Utf8(nullable.into()),
300            dt => vortex_panic!("Invalid data type for ByteViewArray: {dt}"),
301        };
302
303        let views_buffer = Buffer::from_byte_buffer(
304            Buffer::from_arrow_scalar_buffer(value.views().clone()).into_byte_buffer(),
305        );
306
307        // SAFETY: arrow-rs ByteViewArray already checks the same invariants, we inherit those
308        //  guarantees by zero-copy constructing from one.
309        Ok(unsafe {
310            VarBinViewArray::new_unchecked(
311                views_buffer,
312                Arc::from(
313                    value
314                        .data_buffers()
315                        .iter()
316                        .map(|b| ByteBuffer::from_arrow_buffer(b.clone(), Alignment::of::<u8>()))
317                        .collect::<Vec<_>>(),
318                ),
319                dtype,
320                nulls(value.nulls(), nullable)?,
321            )
322            .into_array()
323        })
324    }
325}
326
327impl FromArrowArray<&ArrowBooleanArray> for ArrayRef {
328    fn from_arrow(value: &ArrowBooleanArray, nullable: bool) -> VortexResult<Self> {
329        Ok(BoolArray::new(
330            value.values().clone().into(),
331            nulls(value.nulls(), nullable)?,
332        )
333        .into_array())
334    }
335}
336
337/// Strip out the nulls from this array and return a new array without nulls.
338pub(crate) fn remove_nulls(data: arrow_data::ArrayData) -> VortexResult<arrow_data::ArrayData> {
339    if data.null_count() == 0 {
340        // No nulls to remove, return the array as is
341        return Ok(data);
342    }
343
344    let children = match data.data_type() {
345        DataType::Struct(fields) => Some(
346            fields
347                .iter()
348                .zip(data.child_data().iter())
349                .map(|(field, child_data)| {
350                    if field.is_nullable() {
351                        Ok(child_data.clone())
352                    } else {
353                        remove_nulls(child_data.clone())
354                    }
355                })
356                .collect::<VortexResult<Vec<_>>>()?,
357        ),
358        DataType::List(f)
359        | DataType::LargeList(f)
360        | DataType::ListView(f)
361        | DataType::LargeListView(f)
362        | DataType::FixedSizeList(f, _)
363            if !f.is_nullable() =>
364        {
365            // All list types only have one child
366            vortex_ensure_eq!(
367                data.child_data().len(),
368                1,
369                "List types should have one child"
370            );
371            Some(vec![remove_nulls(data.child_data()[0].clone())?])
372        }
373        _ => None,
374    };
375
376    let mut builder = data.into_builder().nulls(None);
377    if let Some(children) = children {
378        builder = builder.child_data(children);
379    }
380    builder
381        .build()
382        .map_err(|e| vortex_err!("Failed to reconstruct Arrow array without nulls: {e}"))
383}
384
385impl FromArrowArray<&ArrowStructArray> for ArrayRef {
386    fn from_arrow(value: &ArrowStructArray, nullable: bool) -> VortexResult<Self> {
387        Ok(StructArray::try_new(
388            value.column_names().iter().copied().collect(),
389            value
390                .columns()
391                .iter()
392                .zip(value.fields())
393                .map(|(c, field)| {
394                    // Arrow pushes down nulls, even into non-nullable fields. So we strip them
395                    // out here because Vortex is a little more strict.
396                    if c.null_count() > 0 && !field.is_nullable() {
397                        let stripped = make_array(remove_nulls(c.into_data())?);
398                        Self::from_arrow(stripped.as_ref(), false)
399                    } else {
400                        Self::from_arrow(c.as_ref(), field.is_nullable())
401                    }
402                })
403                .collect::<VortexResult<Vec<_>>>()?,
404            value.len(),
405            nulls(value.nulls(), nullable)?,
406        )?
407        .into_array())
408    }
409}
410
411impl<O: IntegerPType + OffsetSizeTrait> FromArrowArray<&GenericListArray<O>> for ArrayRef {
412    fn from_arrow(value: &GenericListArray<O>, nullable: bool) -> VortexResult<Self> {
413        // Extract the validity of the underlying element array.
414        let elements_are_nullable = match value.data_type() {
415            DataType::List(field) => field.is_nullable(),
416            DataType::LargeList(field) => field.is_nullable(),
417            dt => vortex_panic!("Invalid data type for ListArray: {dt}"),
418        };
419
420        let elements = Self::from_arrow(value.values().as_ref(), elements_are_nullable)?;
421
422        // `offsets` are always non-nullable.
423        let offsets = value.offsets().clone().into_array();
424        let nulls = nulls(value.nulls(), nullable)?;
425
426        Ok(ListArray::try_new(elements, offsets, nulls)?.into_array())
427    }
428}
429
430impl<O: OffsetSizeTrait + NativePType> FromArrowArray<&GenericListViewArray<O>> for ArrayRef {
431    fn from_arrow(array: &GenericListViewArray<O>, nullable: bool) -> VortexResult<Self> {
432        // Extract the validity of the underlying element array.
433        let elements_are_nullable = match array.data_type() {
434            DataType::ListView(field) => field.is_nullable(),
435            DataType::LargeListView(field) => field.is_nullable(),
436            dt => vortex_panic!("Invalid data type for ListViewArray: {dt}"),
437        };
438
439        let elements = Self::from_arrow(array.values().as_ref(), elements_are_nullable)?;
440
441        // `offsets` and `sizes` are always non-nullable.
442        let offsets = array.offsets().clone().into_array();
443        let sizes = array.sizes().clone().into_array();
444        let nulls = nulls(array.nulls(), nullable)?;
445
446        Ok(ListViewArray::try_new(elements, offsets, sizes, nulls)?.into_array())
447    }
448}
449
450impl FromArrowArray<&ArrowFixedSizeListArray> for ArrayRef {
451    fn from_arrow(array: &ArrowFixedSizeListArray, nullable: bool) -> VortexResult<Self> {
452        let DataType::FixedSizeList(field, list_size) = array.data_type() else {
453            vortex_panic!("Invalid data type for ListArray: {}", array.data_type());
454        };
455
456        Ok(FixedSizeListArray::try_new(
457            Self::from_arrow(array.values().as_ref(), field.is_nullable())?,
458            *list_size as u32,
459            nulls(array.nulls(), nullable)?,
460            array.len(),
461        )?
462        .into_array())
463    }
464}
465
466impl FromArrowArray<&ArrowNullArray> for ArrayRef {
467    fn from_arrow(value: &ArrowNullArray, nullable: bool) -> VortexResult<Self> {
468        vortex_ensure!(
469            nullable,
470            "Cannot convert an Arrow NullArray into a non-nullable Vortex array"
471        );
472        Ok(NullArray::new(value.len()).into_array())
473    }
474}
475
476impl<K: ArrowDictionaryKeyType> FromArrowArray<&DictionaryArray<K>> for DictArray {
477    fn from_arrow(array: &DictionaryArray<K>, nullable: bool) -> VortexResult<Self> {
478        let keys = AnyDictionaryArray::keys(array);
479        let keys = ArrayRef::from_arrow(keys, keys.is_nullable())?;
480        let values = ArrayRef::from_arrow(array.values().as_ref(), nullable)?;
481        // SAFETY: we assume that Arrow has checked the invariants on construction.
482        Ok(unsafe { DictArray::new_unchecked(keys, values) })
483    }
484}
485
486pub(crate) fn nulls(nulls: Option<&NullBuffer>, nullable: bool) -> VortexResult<Validity> {
487    if nullable {
488        Ok(nulls
489            .map(|nulls| {
490                if nulls.null_count() == nulls.len() {
491                    Validity::AllInvalid
492                } else {
493                    Validity::from(BitBuffer::from(nulls.inner().clone()))
494                }
495            })
496            .unwrap_or(Validity::AllValid))
497    } else {
498        let null_count = nulls.map(NullBuffer::null_count).unwrap_or(0);
499        vortex_ensure_eq!(
500            null_count,
501            0,
502            "Cannot convert an Arrow array containing {null_count} nulls into a non-nullable Vortex array"
503        );
504        Ok(Validity::NonNullable)
505    }
506}
507
508impl FromArrowArray<&dyn ArrowArray> for ArrayRef {
509    fn from_arrow(array: &dyn ArrowArray, nullable: bool) -> VortexResult<Self> {
510        match array.data_type() {
511            DataType::Boolean => Self::from_arrow(array.as_boolean(), nullable),
512            DataType::UInt8 => Self::from_arrow(array.as_primitive::<UInt8Type>(), nullable),
513            DataType::UInt16 => Self::from_arrow(array.as_primitive::<UInt16Type>(), nullable),
514            DataType::UInt32 => Self::from_arrow(array.as_primitive::<UInt32Type>(), nullable),
515            DataType::UInt64 => Self::from_arrow(array.as_primitive::<UInt64Type>(), nullable),
516            DataType::Int8 => Self::from_arrow(array.as_primitive::<Int8Type>(), nullable),
517            DataType::Int16 => Self::from_arrow(array.as_primitive::<Int16Type>(), nullable),
518            DataType::Int32 => Self::from_arrow(array.as_primitive::<Int32Type>(), nullable),
519            DataType::Int64 => Self::from_arrow(array.as_primitive::<Int64Type>(), nullable),
520            DataType::Float16 => Self::from_arrow(array.as_primitive::<Float16Type>(), nullable),
521            DataType::Float32 => Self::from_arrow(array.as_primitive::<Float32Type>(), nullable),
522            DataType::Float64 => Self::from_arrow(array.as_primitive::<Float64Type>(), nullable),
523            DataType::Utf8 => Self::from_arrow(array.as_string::<i32>(), nullable),
524            DataType::LargeUtf8 => Self::from_arrow(array.as_string::<i64>(), nullable),
525            DataType::Binary => Self::from_arrow(array.as_binary::<i32>(), nullable),
526            DataType::LargeBinary => Self::from_arrow(array.as_binary::<i64>(), nullable),
527            DataType::BinaryView => Self::from_arrow(array.as_binary_view(), nullable),
528            DataType::Utf8View => Self::from_arrow(array.as_string_view(), nullable),
529            DataType::Struct(_) => Self::from_arrow(array.as_struct(), nullable),
530            DataType::List(_) => Self::from_arrow(array.as_list::<i32>(), nullable),
531            DataType::LargeList(_) => Self::from_arrow(array.as_list::<i64>(), nullable),
532            DataType::ListView(_) => Self::from_arrow(array.as_list_view::<i32>(), nullable),
533            DataType::LargeListView(_) => Self::from_arrow(array.as_list_view::<i64>(), nullable),
534            DataType::FixedSizeList(..) => Self::from_arrow(array.as_fixed_size_list(), nullable),
535            DataType::Null => Self::from_arrow(as_null_array(array), nullable),
536            DataType::Timestamp(u, _) => match u {
537                ArrowTimeUnit::Second => {
538                    Self::from_arrow(array.as_primitive::<TimestampSecondType>(), nullable)
539                }
540                ArrowTimeUnit::Millisecond => {
541                    Self::from_arrow(array.as_primitive::<TimestampMillisecondType>(), nullable)
542                }
543                ArrowTimeUnit::Microsecond => {
544                    Self::from_arrow(array.as_primitive::<TimestampMicrosecondType>(), nullable)
545                }
546                ArrowTimeUnit::Nanosecond => {
547                    Self::from_arrow(array.as_primitive::<TimestampNanosecondType>(), nullable)
548                }
549            },
550            DataType::Date32 => Self::from_arrow(array.as_primitive::<Date32Type>(), nullable),
551            DataType::Date64 => Self::from_arrow(array.as_primitive::<Date64Type>(), nullable),
552            DataType::Time32(u) => match u {
553                ArrowTimeUnit::Second => {
554                    Self::from_arrow(array.as_primitive::<Time32SecondType>(), nullable)
555                }
556                ArrowTimeUnit::Millisecond => {
557                    Self::from_arrow(array.as_primitive::<Time32MillisecondType>(), nullable)
558                }
559                ArrowTimeUnit::Microsecond | ArrowTimeUnit::Nanosecond => unreachable!(),
560            },
561            DataType::Time64(u) => match u {
562                ArrowTimeUnit::Microsecond => {
563                    Self::from_arrow(array.as_primitive::<Time64MicrosecondType>(), nullable)
564                }
565                ArrowTimeUnit::Nanosecond => {
566                    Self::from_arrow(array.as_primitive::<Time64NanosecondType>(), nullable)
567                }
568                ArrowTimeUnit::Second | ArrowTimeUnit::Millisecond => unreachable!(),
569            },
570            DataType::Decimal32(..) => {
571                Self::from_arrow(array.as_primitive::<Decimal32Type>(), nullable)
572            }
573            DataType::Decimal64(..) => {
574                Self::from_arrow(array.as_primitive::<Decimal64Type>(), nullable)
575            }
576            DataType::Decimal128(..) => {
577                Self::from_arrow(array.as_primitive::<Decimal128Type>(), nullable)
578            }
579            DataType::Decimal256(..) => {
580                Self::from_arrow(array.as_primitive::<Decimal256Type>(), nullable)
581            }
582            DataType::Dictionary(key_type, _) => match key_type.as_ref() {
583                DataType::Int8 => Ok(DictArray::from_arrow(
584                    array.as_dictionary::<Int8Type>(),
585                    nullable,
586                )?
587                .into_array()),
588                DataType::Int16 => Ok(DictArray::from_arrow(
589                    array.as_dictionary::<Int16Type>(),
590                    nullable,
591                )?
592                .into_array()),
593                DataType::Int32 => Ok(DictArray::from_arrow(
594                    array.as_dictionary::<Int32Type>(),
595                    nullable,
596                )?
597                .into_array()),
598                DataType::Int64 => Ok(DictArray::from_arrow(
599                    array.as_dictionary::<Int64Type>(),
600                    nullable,
601                )?
602                .into_array()),
603                DataType::UInt8 => Ok(DictArray::from_arrow(
604                    array.as_dictionary::<UInt8Type>(),
605                    nullable,
606                )?
607                .into_array()),
608                DataType::UInt16 => Ok(DictArray::from_arrow(
609                    array.as_dictionary::<UInt16Type>(),
610                    nullable,
611                )?
612                .into_array()),
613                DataType::UInt32 => Ok(DictArray::from_arrow(
614                    array.as_dictionary::<UInt32Type>(),
615                    nullable,
616                )?
617                .into_array()),
618                DataType::UInt64 => Ok(DictArray::from_arrow(
619                    array.as_dictionary::<UInt64Type>(),
620                    nullable,
621                )?
622                .into_array()),
623                key_dt => vortex_bail!("Unsupported dictionary key type: {key_dt}"),
624            },
625            dt => vortex_bail!("Array encoding not implemented for Arrow data type {dt}"),
626        }
627    }
628}
629
630impl FromArrowArray<RecordBatch> for ArrayRef {
631    fn from_arrow(array: RecordBatch, nullable: bool) -> VortexResult<Self> {
632        ArrayRef::from_arrow(&arrow_array::StructArray::from(array), nullable)
633    }
634}
635
636impl FromArrowArray<&RecordBatch> for ArrayRef {
637    fn from_arrow(array: &RecordBatch, nullable: bool) -> VortexResult<Self> {
638        Self::from_arrow(array.clone(), nullable)
639    }
640}
641
642#[cfg(test)]
643mod tests {
644    use std::sync::Arc;
645
646    use arrow_array::Array as ArrowArray;
647    use arrow_array::BinaryArray;
648    use arrow_array::BooleanArray;
649    use arrow_array::Date32Array;
650    use arrow_array::Date64Array;
651    use arrow_array::FixedSizeListArray as ArrowFixedSizeListArray;
652    use arrow_array::Float32Array;
653    use arrow_array::Float64Array;
654    use arrow_array::GenericListViewArray;
655    use arrow_array::Int8Array;
656    use arrow_array::Int16Array;
657    use arrow_array::Int32Array;
658    use arrow_array::Int64Array;
659    use arrow_array::LargeBinaryArray;
660    use arrow_array::LargeStringArray;
661    use arrow_array::NullArray;
662    use arrow_array::RecordBatch;
663    use arrow_array::StringArray;
664    use arrow_array::StructArray;
665    use arrow_array::Time32MillisecondArray;
666    use arrow_array::Time32SecondArray;
667    use arrow_array::Time64MicrosecondArray;
668    use arrow_array::Time64NanosecondArray;
669    use arrow_array::TimestampMicrosecondArray;
670    use arrow_array::TimestampMillisecondArray;
671    use arrow_array::TimestampNanosecondArray;
672    use arrow_array::TimestampSecondArray;
673    use arrow_array::UInt8Array;
674    use arrow_array::UInt16Array;
675    use arrow_array::UInt32Array;
676    use arrow_array::UInt64Array;
677    use arrow_array::builder::BinaryViewBuilder;
678    use arrow_array::builder::Decimal128Builder;
679    use arrow_array::builder::Decimal256Builder;
680    use arrow_array::builder::Int32Builder;
681    use arrow_array::builder::LargeListBuilder;
682    use arrow_array::builder::ListBuilder;
683    use arrow_array::builder::StringViewBuilder;
684    use arrow_array::new_null_array;
685    use arrow_array::types::ArrowPrimitiveType;
686    use arrow_array::types::Float16Type;
687    use arrow_buffer::BooleanBuffer;
688    use arrow_buffer::Buffer as ArrowBuffer;
689    use arrow_buffer::OffsetBuffer;
690    use arrow_buffer::ScalarBuffer;
691    use arrow_schema::DataType;
692    use arrow_schema::Field;
693    use arrow_schema::Fields;
694    use arrow_schema::Schema;
695    use rstest::rstest;
696
697    use crate::ArrayRef;
698    use crate::IntoArray;
699    use crate::arrays::Decimal;
700    use crate::arrays::FixedSizeList;
701    use crate::arrays::List;
702    use crate::arrays::ListView;
703    use crate::arrays::Primitive;
704    use crate::arrays::Struct;
705    use crate::arrays::VarBinView;
706    use crate::arrays::fixed_size_list::FixedSizeListArrayExt;
707    use crate::arrays::list::ListArrayExt;
708    use crate::arrays::listview::ListViewArrayExt;
709    use crate::arrays::struct_::StructArrayExt;
710    use crate::arrow::FromArrowArray as _;
711    use crate::dtype::DType;
712    use crate::dtype::Nullability;
713    use crate::dtype::PType;
714    use crate::extension::datetime::TimeUnit;
715    use crate::extension::datetime::Timestamp;
716
717    #[rstest]
718    #[case::i8(
719        Arc::new(Int8Array::from(vec![Some(1), None, Some(3), Some(4)])),
720        Arc::new(Int8Array::from(vec![1, 2, 3, 4])),
721        PType::I8,
722    )]
723    #[case::i16(
724        Arc::new(Int16Array::from(vec![Some(100), None, Some(300), Some(400)])),
725        Arc::new(Int16Array::from(vec![100, 200, 300, 400])),
726        PType::I16,
727    )]
728    #[case::i32(
729        Arc::new(Int32Array::from(vec![Some(1000), None, Some(3000), Some(4000)])),
730        Arc::new(Int32Array::from(vec![1000, 2000, 3000, 4000])),
731        PType::I32,
732    )]
733    #[case::i64(
734        Arc::new(Int64Array::from(vec![Some(10000), None, Some(30000), Some(40000)])),
735        Arc::new(Int64Array::from(vec![10000_i64, 20000, 30000, 40000])),
736        PType::I64,
737    )]
738    #[case::u8(
739        Arc::new(UInt8Array::from(vec![Some(1), None, Some(3), Some(4)])),
740        Arc::new(UInt8Array::from(vec![1_u8, 2, 3, 4])),
741        PType::U8,
742    )]
743    #[case::u16(
744        Arc::new(UInt16Array::from(vec![Some(100), None, Some(300), Some(400)])),
745        Arc::new(UInt16Array::from(vec![100_u16, 200, 300, 400])),
746        PType::U16,
747    )]
748    #[case::u32(
749        Arc::new(UInt32Array::from(vec![Some(1000), None, Some(3000), Some(4000)])),
750        Arc::new(UInt32Array::from(vec![1000_u32, 2000, 3000, 4000])),
751        PType::U32,
752    )]
753    #[case::u64(
754        Arc::new(UInt64Array::from(vec![Some(10000), None, Some(30000), Some(40000)])),
755        Arc::new(UInt64Array::from(vec![10000_u64, 20000, 30000, 40000])),
756        PType::U64,
757    )]
758    #[case::f32(
759        Arc::new(Float32Array::from(vec![Some(1.5), None, Some(3.5), Some(4.5)])),
760        Arc::new(Float32Array::from(vec![1.5_f32, 2.5, 3.5, 4.5])),
761        PType::F32,
762    )]
763    #[case::f64(
764        Arc::new(Float64Array::from(vec![Some(1.5), None, Some(3.5), Some(4.5)])),
765        Arc::new(Float64Array::from(vec![1.5_f64, 2.5, 3.5, 4.5])),
766        PType::F64,
767    )]
768    fn test_primitive_array_conversion(
769        #[case] nullable: Arc<dyn ArrowArray>,
770        #[case] non_nullable: Arc<dyn ArrowArray>,
771        #[case] expected_ptype: PType,
772    ) {
773        let v_null = ArrayRef::from_arrow(nullable.as_ref(), true).unwrap();
774        let v_non_null = ArrayRef::from_arrow(non_nullable.as_ref(), false).unwrap();
775        assert_eq!(v_null.len(), 4);
776        assert_eq!(v_non_null.len(), 4);
777        assert_eq!(v_null.as_::<Primitive>().ptype(), expected_ptype);
778        assert_eq!(v_non_null.as_::<Primitive>().ptype(), expected_ptype);
779    }
780
781    #[test]
782    fn test_float16_array_conversion() {
783        let values = vec![
784            Some(<Float16Type as ArrowPrimitiveType>::Native::from_f32(1.5)),
785            None,
786            Some(<Float16Type as ArrowPrimitiveType>::Native::from_f32(3.5)),
787        ];
788        let arrow_array = arrow_array::PrimitiveArray::<Float16Type>::from(values);
789        let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
790
791        let non_null_values = vec![
792            <Float16Type as ArrowPrimitiveType>::Native::from_f32(1.5),
793            <Float16Type as ArrowPrimitiveType>::Native::from_f32(2.5),
794        ];
795        let arrow_array_non_null =
796            arrow_array::PrimitiveArray::<Float16Type>::from(non_null_values);
797        let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
798
799        assert_eq!(vortex_array.len(), 3);
800        assert_eq!(vortex_array_non_null.len(), 2);
801
802        // Verify metadata - should be PrimitiveArray with F16 ptype
803        let primitive_array = vortex_array.as_::<Primitive>();
804        assert_eq!(primitive_array.ptype(), PType::F16);
805
806        let primitive_array_non_null = vortex_array_non_null.as_::<Primitive>();
807        assert_eq!(primitive_array_non_null.ptype(), PType::F16);
808    }
809
810    // Test decimal array conversions
811    #[test]
812    fn test_decimal128_array_conversion() {
813        let mut builder = Decimal128Builder::with_capacity(4);
814        builder.append_value(12345);
815        builder.append_null();
816        builder.append_value(67890);
817        builder.append_value(11111);
818        let decimal_array = builder.finish().with_precision_and_scale(10, 2).unwrap();
819
820        let vortex_array = ArrayRef::from_arrow(&decimal_array, true).unwrap();
821        assert_eq!(vortex_array.len(), 4);
822
823        let mut builder_non_null = Decimal128Builder::with_capacity(3);
824        builder_non_null.append_value(12345);
825        builder_non_null.append_value(67890);
826        builder_non_null.append_value(11111);
827        let decimal_array_non_null = builder_non_null
828            .finish()
829            .with_precision_and_scale(10, 2)
830            .unwrap();
831
832        let vortex_array_non_null = ArrayRef::from_arrow(&decimal_array_non_null, false).unwrap();
833        assert_eq!(vortex_array_non_null.len(), 3);
834
835        // Verify metadata - should be DecimalArray with correct precision and scale
836        let decimal_vortex_array = vortex_array.as_::<Decimal>();
837        assert_eq!(decimal_vortex_array.decimal_dtype().precision(), 10);
838        assert_eq!(decimal_vortex_array.decimal_dtype().scale(), 2);
839
840        let decimal_vortex_array_non_null = vortex_array_non_null.as_::<Decimal>();
841        assert_eq!(
842            decimal_vortex_array_non_null.decimal_dtype().precision(),
843            10
844        );
845        assert_eq!(decimal_vortex_array_non_null.decimal_dtype().scale(), 2);
846    }
847
848    #[test]
849    fn test_decimal256_array_conversion() {
850        let mut builder = Decimal256Builder::with_capacity(4);
851        builder.append_value(arrow_buffer::i256::from_i128(12345));
852        builder.append_null();
853        builder.append_value(arrow_buffer::i256::from_i128(67890));
854        builder.append_value(arrow_buffer::i256::from_i128(11111));
855        let decimal_array = builder.finish().with_precision_and_scale(38, 10).unwrap();
856
857        let vortex_array = ArrayRef::from_arrow(&decimal_array, true).unwrap();
858        assert_eq!(vortex_array.len(), 4);
859
860        let mut builder_non_null = Decimal256Builder::with_capacity(3);
861        builder_non_null.append_value(arrow_buffer::i256::from_i128(12345));
862        builder_non_null.append_value(arrow_buffer::i256::from_i128(67890));
863        builder_non_null.append_value(arrow_buffer::i256::from_i128(11111));
864        let decimal_array_non_null = builder_non_null
865            .finish()
866            .with_precision_and_scale(38, 10)
867            .unwrap();
868
869        let vortex_array_non_null = ArrayRef::from_arrow(&decimal_array_non_null, false).unwrap();
870        assert_eq!(vortex_array_non_null.len(), 3);
871
872        // Verify metadata - should be DecimalArray with correct precision and scale
873        let decimal_vortex_array = vortex_array.as_::<Decimal>();
874        assert_eq!(decimal_vortex_array.decimal_dtype().precision(), 38);
875        assert_eq!(decimal_vortex_array.decimal_dtype().scale(), 10);
876
877        let decimal_vortex_array_non_null = vortex_array_non_null.as_::<Decimal>();
878        assert_eq!(
879            decimal_vortex_array_non_null.decimal_dtype().precision(),
880            38
881        );
882        assert_eq!(decimal_vortex_array_non_null.decimal_dtype().scale(), 10);
883    }
884
885    // Test temporal array conversions
886    #[rstest]
887    #[case::timestamp_second(
888        Arc::new(TimestampSecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
889        Arc::new(TimestampSecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
890    )]
891    #[case::timestamp_millisecond(
892        Arc::new(TimestampMillisecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
893        Arc::new(TimestampMillisecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
894    )]
895    #[case::timestamp_microsecond(
896        Arc::new(TimestampMicrosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
897        Arc::new(TimestampMicrosecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
898    )]
899    #[case::timestamp_nanosecond(
900        Arc::new(TimestampNanosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
901        Arc::new(TimestampNanosecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
902    )]
903    #[case::time32_second(
904        Arc::new(Time32SecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
905        Arc::new(Time32SecondArray::from(vec![1000_i32, 2000, 3000, 4000])),
906    )]
907    #[case::time32_millisecond(
908        Arc::new(Time32MillisecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
909        Arc::new(Time32MillisecondArray::from(vec![1000_i32, 2000, 3000, 4000])),
910    )]
911    #[case::time64_microsecond(
912        Arc::new(Time64MicrosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
913        Arc::new(Time64MicrosecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
914    )]
915    #[case::time64_nanosecond(
916        Arc::new(Time64NanosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
917        Arc::new(Time64NanosecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
918    )]
919    #[case::date32(
920        Arc::new(Date32Array::from(vec![Some(18000), None, Some(18002), Some(18003)])),
921        Arc::new(Date32Array::from(vec![18000_i32, 18001, 18002, 18003])),
922    )]
923    #[case::date64(
924        Arc::new(Date64Array::from(vec![Some(1555200000000), None, Some(1555286400000), Some(1555372800000)]
925        )),
926        Arc::new(Date64Array::from(vec![1555200000000_i64, 1555213600000, 1555286400000, 1555372800000]
927        )),
928    )]
929    fn test_temporal_array_conversion(
930        #[case] nullable: Arc<dyn ArrowArray>,
931        #[case] non_nullable: Arc<dyn ArrowArray>,
932    ) {
933        let v_null = ArrayRef::from_arrow(nullable.as_ref(), true).unwrap();
934        let v_non_null = ArrayRef::from_arrow(non_nullable.as_ref(), false).unwrap();
935        assert_eq!(v_null.len(), 4);
936        assert_eq!(v_non_null.len(), 4);
937    }
938
939    #[test]
940    fn test_timestamp_timezone_microsecond_array_conversion() {
941        let arrow_array =
942            TimestampMicrosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])
943                .with_timezone("UTC");
944        let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
945
946        let arrow_array_non_null =
947            TimestampMicrosecondArray::from(vec![1000_i64, 2000, 3000, 4000]).with_timezone("UTC");
948        let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
949
950        assert_eq!(vortex_array.len(), 4);
951        assert_eq!(
952            vortex_array.dtype(),
953            &DType::Extension(
954                Timestamp::new_with_tz(
955                    TimeUnit::Microseconds,
956                    Some("UTC".into()),
957                    Nullability::Nullable
958                )
959                .erased()
960            ),
961        );
962        assert_eq!(vortex_array_non_null.len(), 4);
963        assert_eq!(
964            vortex_array_non_null.dtype(),
965            &DType::Extension(
966                Timestamp::new_with_tz(
967                    TimeUnit::Microseconds,
968                    Some("UTC".into()),
969                    Nullability::NonNullable
970                )
971                .erased()
972            )
973        );
974    }
975
976    // Test string/binary array conversions
977    #[rstest]
978    #[case::utf8(
979        Arc::new(StringArray::from(vec![Some("hello"), None, Some("world"), Some("test")])),
980        Arc::new(StringArray::from(vec!["hello", "world", "test", "vortex"])),
981        DType::Utf8(Nullability::NonNullable),
982    )]
983    #[case::large_utf8(
984        Arc::new(LargeStringArray::from(vec![Some("hello"), None, Some("world"), Some("test")])),
985        Arc::new(LargeStringArray::from(vec!["hello", "world", "test", "vortex"])),
986        DType::Utf8(Nullability::NonNullable),
987    )]
988    #[case::binary(
989        Arc::new(BinaryArray::from(vec![
990            Some("hello".as_bytes()), None, Some("world".as_bytes()), Some("test".as_bytes()),
991        ])),
992        Arc::new(BinaryArray::from(vec![
993            "hello".as_bytes(), "world".as_bytes(), "test".as_bytes(), "vortex".as_bytes(),
994        ])),
995        DType::Binary(Nullability::NonNullable),
996    )]
997    #[case::large_binary(
998        Arc::new(LargeBinaryArray::from(vec![
999            Some("hello".as_bytes()), None, Some("world".as_bytes()), Some("test".as_bytes()),
1000        ])),
1001        Arc::new(LargeBinaryArray::from(vec![
1002            "hello".as_bytes(), "world".as_bytes(), "test".as_bytes(), "vortex".as_bytes(),
1003        ])),
1004        DType::Binary(Nullability::NonNullable),
1005    )]
1006    fn test_string_binary_array_conversion(
1007        #[case] nullable: Arc<dyn ArrowArray>,
1008        #[case] non_nullable: Arc<dyn ArrowArray>,
1009        #[case] expected_non_nullable_dtype: DType,
1010    ) {
1011        let v_null = ArrayRef::from_arrow(nullable.as_ref(), true).unwrap();
1012        let v_non_null = ArrayRef::from_arrow(non_nullable.as_ref(), false).unwrap();
1013        assert_eq!(v_null.len(), 4);
1014        assert_eq!(v_non_null.len(), 4);
1015        assert_eq!(v_null.dtype(), &expected_non_nullable_dtype.as_nullable());
1016        assert_eq!(v_non_null.dtype(), &expected_non_nullable_dtype);
1017    }
1018
1019    #[test]
1020    fn test_utf8_view_array_conversion() {
1021        let mut builder = StringViewBuilder::new();
1022        builder.append_value("hello");
1023        builder.append_null();
1024        builder.append_value("world");
1025        builder.append_value("test");
1026        let arrow_array = builder.finish();
1027        let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1028
1029        let mut builder_non_null = StringViewBuilder::new();
1030        builder_non_null.append_value("hello");
1031        builder_non_null.append_value("world");
1032        builder_non_null.append_value("test");
1033        builder_non_null.append_value("vortex");
1034        let arrow_array_non_null = builder_non_null.finish();
1035        let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1036
1037        assert_eq!(vortex_array.len(), 4);
1038        assert_eq!(vortex_array_non_null.len(), 4);
1039
1040        // Verify metadata - should be VarBinViewArray with correct buffer count and dtype
1041        let varbin_view_array = vortex_array.as_::<VarBinView>();
1042        assert_eq!(
1043            varbin_view_array.data_buffers().len(),
1044            arrow_array.data_buffers().len()
1045        );
1046        assert_eq!(varbin_view_array.dtype(), &DType::Utf8(true.into()));
1047
1048        let varbin_view_array_non_null = vortex_array_non_null.as_::<VarBinView>();
1049        assert_eq!(
1050            varbin_view_array_non_null.data_buffers().len(),
1051            arrow_array_non_null.data_buffers().len()
1052        );
1053        assert_eq!(
1054            varbin_view_array_non_null.dtype(),
1055            &DType::Utf8(false.into())
1056        );
1057    }
1058
1059    #[test]
1060    fn test_binary_view_array_conversion() {
1061        let mut builder = BinaryViewBuilder::new();
1062        builder.append_value(b"hello");
1063        builder.append_null();
1064        builder.append_value(b"world");
1065        builder.append_value(b"test");
1066        let arrow_array = builder.finish();
1067        let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1068
1069        let mut builder_non_null = BinaryViewBuilder::new();
1070        builder_non_null.append_value(b"hello");
1071        builder_non_null.append_value(b"world");
1072        builder_non_null.append_value(b"test");
1073        builder_non_null.append_value(b"vortex");
1074        let arrow_array_non_null = builder_non_null.finish();
1075        let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1076
1077        assert_eq!(vortex_array.len(), 4);
1078        assert_eq!(vortex_array_non_null.len(), 4);
1079
1080        // Verify metadata - should be VarBinViewArray with correct buffer count and dtype
1081        let varbin_view_array = vortex_array.as_::<VarBinView>();
1082        assert_eq!(
1083            varbin_view_array.data_buffers().len(),
1084            arrow_array.data_buffers().len()
1085        );
1086        assert_eq!(varbin_view_array.dtype(), &DType::Binary(true.into()));
1087
1088        let varbin_view_array_non_null = vortex_array_non_null.as_::<VarBinView>();
1089        assert_eq!(
1090            varbin_view_array_non_null.data_buffers().len(),
1091            arrow_array_non_null.data_buffers().len()
1092        );
1093        assert_eq!(
1094            varbin_view_array_non_null.dtype(),
1095            &DType::Binary(false.into())
1096        );
1097    }
1098
1099    // Test boolean array conversions
1100    #[test]
1101    fn test_boolean_array_conversion() {
1102        let arrow_array = BooleanArray::from(vec![Some(true), None, Some(false), Some(true)]);
1103        let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1104
1105        let arrow_array_non_null = BooleanArray::from(vec![true, false, true, false]);
1106        let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1107
1108        assert_eq!(vortex_array.len(), 4);
1109        assert_eq!(vortex_array_non_null.len(), 4);
1110    }
1111
1112    // Test struct array conversions
1113    #[test]
1114    fn test_struct_array_conversion() {
1115        let fields = vec![
1116            Field::new("field1", DataType::Int32, true),
1117            Field::new("field2", DataType::Utf8, false),
1118        ];
1119        let schema = Fields::from(fields);
1120
1121        let field1_data = Int32Array::from(vec![Some(1), None, Some(3)]);
1122        let field2_data = StringArray::from(vec!["a", "b", "c"]);
1123
1124        let arrow_array = StructArray::new(
1125            schema.clone(),
1126            vec![Arc::new(field1_data), Arc::new(field2_data)],
1127            None,
1128        );
1129
1130        let vortex_array = ArrayRef::from_arrow(&arrow_array, false).unwrap();
1131        assert_eq!(vortex_array.len(), 3);
1132
1133        // Verify metadata - should be StructArray with correct field names
1134        let struct_vortex_array = vortex_array.as_::<Struct>();
1135        assert_eq!(struct_vortex_array.names().len(), 2);
1136        assert_eq!(struct_vortex_array.names()[0], "field1");
1137        assert_eq!(struct_vortex_array.names()[1], "field2");
1138
1139        // Test nullable struct
1140        let nullable_array = StructArray::new(
1141            schema,
1142            vec![
1143                Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])),
1144                Arc::new(StringArray::from(vec!["a", "b", "c"])),
1145            ],
1146            Some(arrow_buffer::NullBuffer::new(BooleanBuffer::from(vec![
1147                true, false, true,
1148            ]))),
1149        );
1150
1151        let vortex_nullable_array = ArrayRef::from_arrow(&nullable_array, true).unwrap();
1152        assert_eq!(vortex_nullable_array.len(), 3);
1153
1154        // Verify metadata for nullable struct
1155        let struct_vortex_nullable_array = vortex_nullable_array.as_::<Struct>();
1156        assert_eq!(struct_vortex_nullable_array.names().len(), 2);
1157        assert_eq!(struct_vortex_nullable_array.names()[0], "field1");
1158        assert_eq!(struct_vortex_nullable_array.names()[1], "field2");
1159    }
1160
1161    // Test list array conversions
1162    #[test]
1163    fn test_list_array_conversion() {
1164        let mut builder = ListBuilder::new(Int32Builder::new());
1165        builder.append_value([Some(1), None, Some(3)]);
1166        builder.append_null();
1167        builder.append_value([Some(4), Some(5)]);
1168        let arrow_array = builder.finish();
1169
1170        let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1171        assert_eq!(vortex_array.len(), 3);
1172
1173        // Verify metadata - should be ListArray with correct offsets
1174        let list_vortex_array = vortex_array.as_::<List>();
1175        let offsets_array = list_vortex_array.offsets().as_::<Primitive>();
1176        assert_eq!(offsets_array.len(), 4); // n+1 offsets for n lists
1177        assert_eq!(offsets_array.ptype(), PType::I32);
1178
1179        // Test non-nullable list
1180        let mut builder_non_null = ListBuilder::new(Int32Builder::new());
1181        builder_non_null.append_value([Some(1), None, Some(3)]);
1182        builder_non_null.append_value([Some(4), Some(5)]);
1183        let arrow_array_non_null = builder_non_null.finish();
1184
1185        let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1186        assert_eq!(vortex_array_non_null.len(), 2);
1187
1188        // Verify metadata for non-nullable list
1189        let list_vortex_array_non_null = vortex_array_non_null.as_::<List>();
1190        let offsets_array_non_null = list_vortex_array_non_null.offsets().as_::<Primitive>();
1191        assert_eq!(offsets_array_non_null.len(), 3); // n+1 offsets for n lists
1192        assert_eq!(offsets_array_non_null.ptype(), PType::I32);
1193    }
1194
1195    #[test]
1196    fn test_large_list_array_conversion() {
1197        let mut builder = LargeListBuilder::new(Int32Builder::new());
1198        builder.append_value([Some(1), None, Some(3)]);
1199        builder.append_null();
1200        builder.append_value([Some(4), Some(5)]);
1201        let arrow_array = builder.finish();
1202
1203        let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1204        assert_eq!(vortex_array.len(), 3);
1205
1206        // Verify metadata - should be ListArray with correct offsets (I64 for large lists)
1207        let list_vortex_array = vortex_array.as_::<List>();
1208        let offsets_array = list_vortex_array.offsets().as_::<Primitive>();
1209        assert_eq!(offsets_array.len(), 4); // n+1 offsets for n lists
1210        assert_eq!(offsets_array.ptype(), PType::I64); // Large lists use I64 offsets
1211
1212        // Test non-nullable large list
1213        let mut builder_non_null = LargeListBuilder::new(Int32Builder::new());
1214        builder_non_null.append_value([Some(1), None, Some(3)]);
1215        builder_non_null.append_value([Some(4), Some(5)]);
1216        let arrow_array_non_null = builder_non_null.finish();
1217
1218        let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1219        assert_eq!(vortex_array_non_null.len(), 2);
1220
1221        // Verify metadata for non-nullable large list
1222        let list_vortex_array_non_null = vortex_array_non_null.as_::<List>();
1223        let offsets_array_non_null = list_vortex_array_non_null.offsets().as_::<Primitive>();
1224        assert_eq!(offsets_array_non_null.len(), 3); // n+1 offsets for n lists
1225        assert_eq!(offsets_array_non_null.ptype(), PType::I64); // Large lists use I64 offsets
1226    }
1227
1228    #[test]
1229    fn test_fixed_size_list_array_conversion() {
1230        // Create elements for the fixed-size lists
1231        let values = Int32Array::from(vec![
1232            Some(1),
1233            Some(2),
1234            Some(3), // First list
1235            Some(4),
1236            None,
1237            Some(6), // Second list (with null element)
1238            Some(7),
1239            Some(8),
1240            Some(9), // Third list
1241            Some(10),
1242            Some(11),
1243            Some(12), // Fourth list
1244        ]);
1245
1246        // Create a FixedSizeListArray with list_size=3
1247        let field = Arc::new(Field::new("item", DataType::Int32, true));
1248        let arrow_array =
1249            ArrowFixedSizeListArray::try_new(Arc::clone(&field), 3, Arc::new(values), None)
1250                .unwrap();
1251        let vortex_array = ArrayRef::from_arrow(&arrow_array, false).unwrap();
1252
1253        assert_eq!(vortex_array.len(), 4);
1254
1255        // Verify metadata - should be FixedSizeListArray with correct list size
1256        let fsl_vortex_array = vortex_array.as_::<FixedSizeList>();
1257        assert_eq!(fsl_vortex_array.list_size(), 3);
1258        assert_eq!(fsl_vortex_array.elements().len(), 12); // 4 lists * 3 elements
1259
1260        // Test nullable fixed-size list
1261        let values_nullable = Int32Array::from(vec![
1262            Some(1),
1263            Some(2),
1264            Some(3), // First list
1265            Some(4),
1266            None,
1267            Some(6), // Second list (will be null)
1268            Some(7),
1269            Some(8),
1270            Some(9), // Third list
1271        ]);
1272
1273        // Create nulls buffer - second list is null
1274        let null_buffer =
1275            arrow_buffer::NullBuffer::new(BooleanBuffer::from(vec![true, false, true]));
1276
1277        let arrow_array_nullable = ArrowFixedSizeListArray::try_new(
1278            field,
1279            3,
1280            Arc::new(values_nullable),
1281            Some(null_buffer),
1282        )
1283        .unwrap();
1284        let vortex_array_nullable = ArrayRef::from_arrow(&arrow_array_nullable, true).unwrap();
1285
1286        assert_eq!(vortex_array_nullable.len(), 3);
1287
1288        // Verify metadata for nullable array
1289        let fsl_vortex_array_nullable = vortex_array_nullable.as_::<FixedSizeList>();
1290        assert_eq!(fsl_vortex_array_nullable.list_size(), 3);
1291        assert_eq!(fsl_vortex_array_nullable.elements().len(), 9); // 3 lists * 3 elements
1292    }
1293
1294    #[test]
1295    fn test_list_view_array_conversion() {
1296        // Create values array for the lists
1297        let values = Int32Array::from(vec![
1298            Some(1),
1299            Some(2),
1300            Some(3), // First list [1, 2, 3]
1301            Some(4),
1302            Some(5), // Second list [4, 5]
1303            Some(6), // Third list [6]
1304            Some(7),
1305            Some(8),
1306            Some(9),
1307            Some(10), // Fourth list [7, 8, 9, 10]
1308        ]);
1309
1310        // Create offsets and sizes for ListView
1311        let offsets = ScalarBuffer::from(vec![0i32, 3, 5, 6]);
1312        let sizes = ScalarBuffer::from(vec![3i32, 2, 1, 4]);
1313
1314        let field = Arc::new(Field::new("item", DataType::Int32, true));
1315        let arrow_array = GenericListViewArray::try_new(
1316            Arc::clone(&field),
1317            offsets.clone(),
1318            sizes.clone(),
1319            Arc::new(values.clone()),
1320            None,
1321        )
1322        .unwrap();
1323
1324        let vortex_array = ArrayRef::from_arrow(&arrow_array, false).unwrap();
1325        assert_eq!(vortex_array.len(), 4);
1326
1327        // Verify metadata - should be ListViewArray with correct offsets and sizes
1328        let list_view_vortex_array = vortex_array.as_::<ListView>();
1329        let offsets_array = list_view_vortex_array.offsets().as_::<Primitive>();
1330        let sizes_array = list_view_vortex_array.sizes().as_::<Primitive>();
1331
1332        assert_eq!(offsets_array.len(), 4);
1333        assert_eq!(offsets_array.ptype(), PType::I32);
1334        assert_eq!(sizes_array.len(), 4);
1335        assert_eq!(sizes_array.ptype(), PType::I32);
1336
1337        // Test nullable ListView
1338        let null_buffer =
1339            arrow_buffer::NullBuffer::new(BooleanBuffer::from(vec![true, false, true, true]));
1340
1341        let arrow_array_nullable = GenericListViewArray::try_new(
1342            Arc::clone(&field),
1343            offsets,
1344            sizes,
1345            Arc::new(values.clone()),
1346            Some(null_buffer),
1347        )
1348        .unwrap();
1349
1350        let vortex_array_nullable = ArrayRef::from_arrow(&arrow_array_nullable, true).unwrap();
1351        assert_eq!(vortex_array_nullable.len(), 4);
1352
1353        // Test LargeListView (i64 offsets and sizes)
1354        let large_offsets = ScalarBuffer::from(vec![0i64, 3, 5, 6]);
1355        let large_sizes = ScalarBuffer::from(vec![3i64, 2, 1, 4]);
1356
1357        let large_arrow_array = GenericListViewArray::try_new(
1358            field,
1359            large_offsets,
1360            large_sizes,
1361            Arc::new(values),
1362            None,
1363        )
1364        .unwrap();
1365
1366        let large_vortex_array = ArrayRef::from_arrow(&large_arrow_array, false).unwrap();
1367        assert_eq!(large_vortex_array.len(), 4);
1368
1369        // Verify metadata for large ListView
1370        let large_list_view_vortex_array = large_vortex_array.as_::<ListView>();
1371        let large_offsets_array = large_list_view_vortex_array.offsets().as_::<Primitive>();
1372        let large_sizes_array = large_list_view_vortex_array.sizes().as_::<Primitive>();
1373
1374        assert_eq!(large_offsets_array.len(), 4);
1375        assert_eq!(large_offsets_array.ptype(), PType::I64); // Large ListView uses I64 offsets
1376        assert_eq!(large_sizes_array.len(), 4);
1377        assert_eq!(large_sizes_array.ptype(), PType::I64); // Large ListView uses I64 sizes
1378    }
1379
1380    // Test null array conversions
1381    #[test]
1382    fn test_null_array_conversion() {
1383        let arrow_array = NullArray::new(5);
1384        let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1385        assert_eq!(vortex_array.len(), 5);
1386    }
1387
1388    // Test buffer conversions
1389    #[test]
1390    fn test_arrow_buffer_conversion() {
1391        let data = vec![1u8, 2, 3, 4, 5];
1392        let arrow_buffer = ArrowBuffer::from_vec(data);
1393        let vortex_array = arrow_buffer.into_array();
1394        assert_eq!(vortex_array.len(), 5);
1395    }
1396
1397    #[test]
1398    fn test_boolean_buffer_conversion() {
1399        let data = vec![true, false, true, false, true];
1400        let boolean_buffer = BooleanBuffer::from(data);
1401        let vortex_array = boolean_buffer.into_array();
1402        assert_eq!(vortex_array.len(), 5);
1403    }
1404
1405    #[test]
1406    fn test_scalar_buffer_conversion() {
1407        let data = vec![1i32, 2, 3, 4, 5];
1408        let scalar_buffer = ScalarBuffer::from(data);
1409        let vortex_array = scalar_buffer.into_array();
1410        assert_eq!(vortex_array.len(), 5);
1411    }
1412
1413    #[test]
1414    fn test_offset_buffer_conversion() {
1415        let data = vec![0i32, 2, 5, 8, 10];
1416        let offset_buffer = OffsetBuffer::new(ScalarBuffer::from(data));
1417        let vortex_array = offset_buffer.into_array();
1418        assert_eq!(vortex_array.len(), 5);
1419    }
1420
1421    // Test RecordBatch conversions
1422    #[test]
1423    fn test_record_batch_conversion() {
1424        let schema = Arc::new(Schema::new(vec![
1425            Field::new("field1", DataType::Int32, false),
1426            Field::new("field2", DataType::Utf8, false),
1427        ]));
1428
1429        let field1_data = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
1430        let field2_data = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
1431
1432        let record_batch = RecordBatch::try_new(schema, vec![field1_data, field2_data]).unwrap();
1433
1434        let vortex_array = ArrayRef::from_arrow(record_batch, false).unwrap();
1435        assert_eq!(vortex_array.len(), 4);
1436
1437        // Test with reference
1438        let schema = Arc::new(Schema::new(vec![
1439            Field::new("field1", DataType::Int32, false),
1440            Field::new("field2", DataType::Utf8, false),
1441        ]));
1442
1443        let field1_data = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
1444        let field2_data = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
1445
1446        let record_batch = RecordBatch::try_new(schema, vec![field1_data, field2_data]).unwrap();
1447
1448        let vortex_array = ArrayRef::from_arrow(&record_batch, false).unwrap();
1449        assert_eq!(vortex_array.len(), 4);
1450    }
1451
1452    // Test dynamic dispatch conversion
1453    #[test]
1454    fn test_dyn_array_conversion() {
1455        let int_array = Int32Array::from(vec![1, 2, 3, 4]);
1456        let dyn_array: &dyn ArrowArray = &int_array;
1457        let vortex_array = ArrayRef::from_arrow(dyn_array, false).unwrap();
1458        assert_eq!(vortex_array.len(), 4);
1459
1460        let string_array = StringArray::from(vec!["a", "b", "c"]);
1461        let dyn_array: &dyn ArrowArray = &string_array;
1462        let vortex_array = ArrayRef::from_arrow(dyn_array, false).unwrap();
1463        assert_eq!(vortex_array.len(), 3);
1464
1465        let bool_array = BooleanArray::from(vec![true, false, true]);
1466        let dyn_array: &dyn ArrowArray = &bool_array;
1467        let vortex_array = ArrayRef::from_arrow(dyn_array, false).unwrap();
1468        assert_eq!(vortex_array.len(), 3);
1469    }
1470
1471    // Existing tests
1472    #[test]
1473    pub fn nullable_may_contain_non_nullable() {
1474        let null_struct_array_with_non_nullable_field = new_null_array(
1475            &DataType::Struct(Fields::from(vec![Field::new(
1476                "non_nullable_inner",
1477                DataType::Int32,
1478                false,
1479            )])),
1480            1,
1481        );
1482        ArrayRef::from_arrow(null_struct_array_with_non_nullable_field.as_ref(), true).unwrap();
1483    }
1484
1485    #[test]
1486    pub fn nullable_may_contain_deeply_nested_non_nullable() {
1487        let null_struct_array_with_non_nullable_field = new_null_array(
1488            &DataType::Struct(Fields::from(vec![Field::new(
1489                "non_nullable_inner",
1490                DataType::Struct(Fields::from(vec![Field::new(
1491                    "non_nullable_deeper_inner",
1492                    DataType::Int32,
1493                    false,
1494                )])),
1495                false,
1496            )])),
1497            1,
1498        );
1499        ArrayRef::from_arrow(null_struct_array_with_non_nullable_field.as_ref(), true).unwrap();
1500    }
1501
1502    #[test]
1503    fn non_nullable_request_rejects_nulls() {
1504        // Requesting `nullable = false` on an Arrow array that physically contains nulls is a
1505        // contradiction and must surface as an error, not a panic.
1506        let arrow_array = Int32Array::from(vec![Some(1), None, Some(3)]);
1507        assert!(ArrayRef::from_arrow(&arrow_array, false).is_err());
1508    }
1509
1510    #[test]
1511    fn non_nullable_request_rejects_null_array() {
1512        // An Arrow NullArray is entirely null, so it cannot be converted to a non-nullable
1513        // Vortex array.
1514        let arrow_array = NullArray::new(5);
1515        assert!(ArrayRef::from_arrow(&arrow_array, false).is_err());
1516    }
1517
1518    #[test]
1519    fn non_nullable_struct_with_nulls_errors() {
1520        // A struct array carrying top-level nulls cannot be converted to a non-nullable Vortex
1521        // struct; the struct-level validity reconciliation must error rather than panic.
1522        let struct_array = new_null_array(
1523            &DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, true)])),
1524            3,
1525        );
1526        assert!(ArrayRef::from_arrow(struct_array.as_ref(), false).is_err());
1527    }
1528
1529    #[test]
1530    fn non_nullable_list_with_nulls_errors() {
1531        // Likewise for a list array with a null entry: requesting a non-nullable list must error
1532        // rather than panic.
1533        let mut builder = ListBuilder::new(Int32Builder::new());
1534        builder.append_value([Some(1), Some(2)]);
1535        builder.append_null();
1536        let list = builder.finish();
1537        assert!(ArrayRef::from_arrow(&list, false).is_err());
1538    }
1539
1540    #[test]
1541    pub fn nullable_struct_containing_non_nullable_dictionary_with_nulls_errors() {
1542        // `remove_nulls` cannot strip pushed-down nulls out of a non-nullable dictionary field,
1543        // so the values end up converted with `nullable = false` while still containing nulls.
1544        // This must surface as an error rather than panicking.
1545        let null_struct_array_with_non_nullable_field = new_null_array(
1546            &DataType::Struct(Fields::from(vec![Field::new(
1547                "non_nullable_deeper_inner",
1548                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1549                false,
1550            )])),
1551            1,
1552        );
1553
1554        assert!(
1555            ArrayRef::from_arrow(null_struct_array_with_non_nullable_field.as_ref(), true).is_err()
1556        );
1557    }
1558}