1use std::sync::Arc;
5
6use arrow_array::AnyDictionaryArray;
7use arrow_array::Array as ArrowArray;
8use arrow_array::ArrowPrimitiveType;
9use arrow_array::BooleanArray as ArrowBooleanArray;
10use arrow_array::DictionaryArray;
11use arrow_array::FixedSizeListArray as ArrowFixedSizeListArray;
12use arrow_array::GenericByteArray;
13use arrow_array::GenericByteViewArray;
14use arrow_array::GenericListArray;
15use arrow_array::GenericListViewArray;
16use arrow_array::NullArray as ArrowNullArray;
17use arrow_array::OffsetSizeTrait;
18use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
19use arrow_array::RecordBatch;
20use arrow_array::StructArray as ArrowStructArray;
21use arrow_array::cast::AsArray;
22use arrow_array::cast::as_null_array;
23use arrow_array::make_array;
24use arrow_array::types::ArrowDictionaryKeyType;
25use arrow_array::types::ByteArrayType;
26use arrow_array::types::ByteViewType;
27use arrow_array::types::Date32Type;
28use arrow_array::types::Date64Type;
29use arrow_array::types::Decimal32Type;
30use arrow_array::types::Decimal64Type;
31use arrow_array::types::Decimal128Type;
32use arrow_array::types::Decimal256Type;
33use arrow_array::types::Float16Type;
34use arrow_array::types::Float32Type;
35use arrow_array::types::Float64Type;
36use arrow_array::types::Int8Type;
37use arrow_array::types::Int16Type;
38use arrow_array::types::Int32Type;
39use arrow_array::types::Int64Type;
40use arrow_array::types::Time32MillisecondType;
41use arrow_array::types::Time32SecondType;
42use arrow_array::types::Time64MicrosecondType;
43use arrow_array::types::Time64NanosecondType;
44use arrow_array::types::TimestampMicrosecondType;
45use arrow_array::types::TimestampMillisecondType;
46use arrow_array::types::TimestampNanosecondType;
47use arrow_array::types::TimestampSecondType;
48use arrow_array::types::UInt8Type;
49use arrow_array::types::UInt16Type;
50use arrow_array::types::UInt32Type;
51use arrow_array::types::UInt64Type;
52use arrow_buffer::ArrowNativeType;
53use arrow_buffer::BooleanBuffer;
54use arrow_buffer::Buffer as ArrowBuffer;
55use arrow_buffer::ScalarBuffer;
56use arrow_buffer::buffer::NullBuffer;
57use arrow_buffer::buffer::OffsetBuffer;
58use arrow_schema::DataType;
59use arrow_schema::TimeUnit as ArrowTimeUnit;
60use itertools::Itertools;
61use vortex_buffer::Alignment;
62use vortex_buffer::BitBuffer;
63use vortex_buffer::Buffer;
64use vortex_buffer::ByteBuffer;
65use vortex_error::VortexExpect as _;
66use vortex_error::VortexResult;
67use vortex_error::vortex_bail;
68use vortex_error::vortex_panic;
69
70use crate::ArrayRef;
71use crate::IntoArray;
72use crate::arrays::BoolArray;
73use crate::arrays::DecimalArray;
74use crate::arrays::DictArray;
75use crate::arrays::FixedSizeListArray;
76use crate::arrays::ListArray;
77use crate::arrays::ListViewArray;
78use crate::arrays::NullArray;
79use crate::arrays::PrimitiveArray;
80use crate::arrays::StructArray;
81use crate::arrays::TemporalArray;
82use crate::arrays::VarBinArray;
83use crate::arrays::VarBinViewArray;
84use crate::arrow::FromArrowArray;
85use crate::dtype::DType;
86use crate::dtype::DecimalDType;
87use crate::dtype::IntegerPType;
88use crate::dtype::NativePType;
89use crate::dtype::PType;
90use crate::dtype::i256;
91use crate::extension::datetime::TimeUnit;
92use crate::validity::Validity;
93
94impl IntoArray for ArrowBuffer {
95 fn into_array(self) -> ArrayRef {
96 PrimitiveArray::from_byte_buffer(
97 ByteBuffer::from_arrow_buffer(self, Alignment::of::<u8>()),
98 PType::U8,
99 Validity::NonNullable,
100 )
101 .into_array()
102 }
103}
104
105impl IntoArray for BooleanBuffer {
106 fn into_array(self) -> ArrayRef {
107 BoolArray::new(self.into(), Validity::NonNullable).into_array()
108 }
109}
110
111impl<T> IntoArray for ScalarBuffer<T>
112where
113 T: ArrowNativeType + NativePType,
114{
115 fn into_array(self) -> ArrayRef {
116 PrimitiveArray::new(
117 Buffer::<T>::from_arrow_scalar_buffer(self),
118 Validity::NonNullable,
119 )
120 .into_array()
121 }
122}
123
124impl<O> IntoArray for OffsetBuffer<O>
125where
126 O: IntegerPType + OffsetSizeTrait,
127{
128 fn into_array(self) -> ArrayRef {
129 let primitive = PrimitiveArray::new(
130 Buffer::from_arrow_scalar_buffer(self.into_inner()),
131 Validity::NonNullable,
132 );
133
134 primitive.into_array()
135 }
136}
137
138macro_rules! impl_from_arrow_primitive {
139 ($T:path) => {
140 impl FromArrowArray<&ArrowPrimitiveArray<$T>> for ArrayRef {
141 fn from_arrow(value: &ArrowPrimitiveArray<$T>, nullable: bool) -> VortexResult<Self> {
142 let buffer = Buffer::from_arrow_scalar_buffer(value.values().clone());
143 let validity = nulls(value.nulls(), nullable);
144 Ok(PrimitiveArray::new(buffer, validity).into_array())
145 }
146 }
147 };
148}
149
150impl_from_arrow_primitive!(Int8Type);
151impl_from_arrow_primitive!(Int16Type);
152impl_from_arrow_primitive!(Int32Type);
153impl_from_arrow_primitive!(Int64Type);
154impl_from_arrow_primitive!(UInt8Type);
155impl_from_arrow_primitive!(UInt16Type);
156impl_from_arrow_primitive!(UInt32Type);
157impl_from_arrow_primitive!(UInt64Type);
158impl_from_arrow_primitive!(Float16Type);
159impl_from_arrow_primitive!(Float32Type);
160impl_from_arrow_primitive!(Float64Type);
161
162impl FromArrowArray<&ArrowPrimitiveArray<Decimal32Type>> for ArrayRef {
163 fn from_arrow(
164 array: &ArrowPrimitiveArray<Decimal32Type>,
165 nullable: bool,
166 ) -> VortexResult<Self> {
167 let decimal_type = DecimalDType::new(array.precision(), array.scale());
168 let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone());
169 let validity = nulls(array.nulls(), nullable);
170 Ok(DecimalArray::new(buffer, decimal_type, validity).into_array())
171 }
172}
173
174impl FromArrowArray<&ArrowPrimitiveArray<Decimal64Type>> for ArrayRef {
175 fn from_arrow(
176 array: &ArrowPrimitiveArray<Decimal64Type>,
177 nullable: bool,
178 ) -> VortexResult<Self> {
179 let decimal_type = DecimalDType::new(array.precision(), array.scale());
180 let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone());
181 let validity = nulls(array.nulls(), nullable);
182 Ok(DecimalArray::new(buffer, decimal_type, validity).into_array())
183 }
184}
185
186impl FromArrowArray<&ArrowPrimitiveArray<Decimal128Type>> for ArrayRef {
187 fn from_arrow(
188 array: &ArrowPrimitiveArray<Decimal128Type>,
189 nullable: bool,
190 ) -> VortexResult<Self> {
191 let decimal_type = DecimalDType::new(array.precision(), array.scale());
192 let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone());
193 let validity = nulls(array.nulls(), nullable);
194 Ok(DecimalArray::new(buffer, decimal_type, validity).into_array())
195 }
196}
197
198impl FromArrowArray<&ArrowPrimitiveArray<Decimal256Type>> for ArrayRef {
199 fn from_arrow(
200 array: &ArrowPrimitiveArray<Decimal256Type>,
201 nullable: bool,
202 ) -> VortexResult<Self> {
203 let decimal_type = DecimalDType::new(array.precision(), array.scale());
204 let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone());
205 let buffer =
209 unsafe { std::mem::transmute::<Buffer<arrow_buffer::i256>, Buffer<i256>>(buffer) };
210 let validity = nulls(array.nulls(), nullable);
211 Ok(DecimalArray::new(buffer, decimal_type, validity).into_array())
212 }
213}
214
215macro_rules! impl_from_arrow_temporal {
216 ($T:path) => {
217 impl FromArrowArray<&ArrowPrimitiveArray<$T>> for ArrayRef {
218 fn from_arrow(
219 value: &ArrowPrimitiveArray<$T>,
220 nullable: bool,
221 ) -> vortex_error::VortexResult<Self> {
222 Ok(temporal_array(value, nullable))
223 }
224 }
225 };
226}
227
228impl_from_arrow_temporal!(TimestampSecondType);
230impl_from_arrow_temporal!(TimestampMillisecondType);
231impl_from_arrow_temporal!(TimestampMicrosecondType);
232impl_from_arrow_temporal!(TimestampNanosecondType);
233
234impl_from_arrow_temporal!(Time32SecondType);
236impl_from_arrow_temporal!(Time32MillisecondType);
237impl_from_arrow_temporal!(Time64MicrosecondType);
238impl_from_arrow_temporal!(Time64NanosecondType);
239
240impl_from_arrow_temporal!(Date32Type);
242impl_from_arrow_temporal!(Date64Type);
243
244fn temporal_array<T: ArrowPrimitiveType>(value: &ArrowPrimitiveArray<T>, nullable: bool) -> ArrayRef
245where
246 T::Native: NativePType,
247{
248 let arr = PrimitiveArray::new(
249 Buffer::from_arrow_scalar_buffer(value.values().clone()),
250 nulls(value.nulls(), nullable),
251 )
252 .into_array();
253
254 match value.data_type() {
255 DataType::Timestamp(time_unit, tz) => {
256 TemporalArray::new_timestamp(arr, time_unit.into(), tz.clone()).into()
257 }
258 DataType::Time32(time_unit) => TemporalArray::new_time(arr, time_unit.into()).into(),
259 DataType::Time64(time_unit) => TemporalArray::new_time(arr, time_unit.into()).into(),
260 DataType::Date32 => TemporalArray::new_date(arr, TimeUnit::Days).into(),
261 DataType::Date64 => TemporalArray::new_date(arr, TimeUnit::Milliseconds).into(),
262 DataType::Duration(_) => unimplemented!(),
263 DataType::Interval(_) => unimplemented!(),
264 _ => vortex_panic!("Invalid temporal type: {}", value.data_type()),
265 }
266}
267
268impl<T: ByteArrayType> FromArrowArray<&GenericByteArray<T>> for ArrayRef
269where
270 <T as ByteArrayType>::Offset: IntegerPType,
271{
272 fn from_arrow(value: &GenericByteArray<T>, nullable: bool) -> VortexResult<Self> {
273 let dtype = match T::DATA_TYPE {
274 DataType::Binary | DataType::LargeBinary => DType::Binary(nullable.into()),
275 DataType::Utf8 | DataType::LargeUtf8 => DType::Utf8(nullable.into()),
276 dt => vortex_panic!("Invalid data type for ByteArray: {dt}"),
277 };
278 Ok(unsafe {
280 VarBinArray::new_unchecked(
281 value.offsets().clone().into_array(),
282 ByteBuffer::from_arrow_buffer(value.values().clone(), Alignment::of::<u8>()),
283 dtype,
284 nulls(value.nulls(), nullable),
285 )
286 }
287 .into_array())
288 }
289}
290
291impl<T: ByteViewType> FromArrowArray<&GenericByteViewArray<T>> for ArrayRef {
292 fn from_arrow(value: &GenericByteViewArray<T>, nullable: bool) -> VortexResult<Self> {
293 let dtype = match T::DATA_TYPE {
294 DataType::BinaryView => DType::Binary(nullable.into()),
295 DataType::Utf8View => DType::Utf8(nullable.into()),
296 dt => vortex_panic!("Invalid data type for ByteViewArray: {dt}"),
297 };
298
299 let views_buffer = Buffer::from_byte_buffer(
300 Buffer::from_arrow_scalar_buffer(value.views().clone()).into_byte_buffer(),
301 );
302
303 Ok(unsafe {
306 VarBinViewArray::new_unchecked(
307 views_buffer,
308 Arc::from(
309 value
310 .data_buffers()
311 .iter()
312 .map(|b| ByteBuffer::from_arrow_buffer(b.clone(), Alignment::of::<u8>()))
313 .collect::<Vec<_>>(),
314 ),
315 dtype,
316 nulls(value.nulls(), nullable),
317 )
318 .into_array()
319 })
320 }
321}
322
323impl FromArrowArray<&ArrowBooleanArray> for ArrayRef {
324 fn from_arrow(value: &ArrowBooleanArray, nullable: bool) -> VortexResult<Self> {
325 Ok(BoolArray::new(
326 value.values().clone().into(),
327 nulls(value.nulls(), nullable),
328 )
329 .into_array())
330 }
331}
332
333pub(crate) fn remove_nulls(data: arrow_data::ArrayData) -> arrow_data::ArrayData {
335 if data.null_count() == 0 {
336 return data;
338 }
339
340 let children = match data.data_type() {
341 DataType::Struct(fields) => Some(
342 fields
343 .iter()
344 .zip(data.child_data().iter())
345 .map(|(field, child_data)| {
346 if field.is_nullable() {
347 child_data.clone()
348 } else {
349 remove_nulls(child_data.clone())
350 }
351 })
352 .collect_vec(),
353 ),
354 DataType::List(f)
355 | DataType::LargeList(f)
356 | DataType::ListView(f)
357 | DataType::LargeListView(f)
358 | DataType::FixedSizeList(f, _)
359 if !f.is_nullable() =>
360 {
361 assert_eq!(
363 data.child_data().len(),
364 1,
365 "List types should have one child"
366 );
367 Some(vec![remove_nulls(data.child_data()[0].clone())])
368 }
369 _ => None,
370 };
371
372 let mut builder = data.into_builder().nulls(None);
373 if let Some(children) = children {
374 builder = builder.child_data(children);
375 }
376 builder
377 .build()
378 .vortex_expect("reconstructing array without nulls")
379}
380
381impl FromArrowArray<&ArrowStructArray> for ArrayRef {
382 fn from_arrow(value: &ArrowStructArray, nullable: bool) -> VortexResult<Self> {
383 Ok(StructArray::try_new(
384 value.column_names().iter().copied().collect(),
385 value
386 .columns()
387 .iter()
388 .zip(value.fields())
389 .map(|(c, field)| {
390 if c.null_count() > 0 && !field.is_nullable() {
393 let stripped = make_array(remove_nulls(c.into_data()));
394 Self::from_arrow(stripped.as_ref(), false)
395 } else {
396 Self::from_arrow(c.as_ref(), field.is_nullable())
397 }
398 })
399 .collect::<VortexResult<Vec<_>>>()?,
400 value.len(),
401 nulls(value.nulls(), nullable),
402 )?
403 .into_array())
404 }
405}
406
407impl<O: IntegerPType + OffsetSizeTrait> FromArrowArray<&GenericListArray<O>> for ArrayRef {
408 fn from_arrow(value: &GenericListArray<O>, nullable: bool) -> VortexResult<Self> {
409 let elements_are_nullable = match value.data_type() {
411 DataType::List(field) => field.is_nullable(),
412 DataType::LargeList(field) => field.is_nullable(),
413 dt => vortex_panic!("Invalid data type for ListArray: {dt}"),
414 };
415
416 let elements = Self::from_arrow(value.values().as_ref(), elements_are_nullable)?;
417
418 let offsets = value.offsets().clone().into_array();
420 let nulls = nulls(value.nulls(), nullable);
421
422 Ok(ListArray::try_new(elements, offsets, nulls)?.into_array())
423 }
424}
425
426impl<O: OffsetSizeTrait + NativePType> FromArrowArray<&GenericListViewArray<O>> for ArrayRef {
427 fn from_arrow(array: &GenericListViewArray<O>, nullable: bool) -> VortexResult<Self> {
428 let elements_are_nullable = match array.data_type() {
430 DataType::ListView(field) => field.is_nullable(),
431 DataType::LargeListView(field) => field.is_nullable(),
432 dt => vortex_panic!("Invalid data type for ListViewArray: {dt}"),
433 };
434
435 let elements = Self::from_arrow(array.values().as_ref(), elements_are_nullable)?;
436
437 let offsets = array.offsets().clone().into_array();
439 let sizes = array.sizes().clone().into_array();
440 let nulls = nulls(array.nulls(), nullable);
441
442 Ok(ListViewArray::try_new(elements, offsets, sizes, nulls)?.into_array())
443 }
444}
445
446impl FromArrowArray<&ArrowFixedSizeListArray> for ArrayRef {
447 fn from_arrow(array: &ArrowFixedSizeListArray, nullable: bool) -> VortexResult<Self> {
448 let DataType::FixedSizeList(field, list_size) = array.data_type() else {
449 vortex_panic!("Invalid data type for ListArray: {}", array.data_type());
450 };
451
452 Ok(FixedSizeListArray::try_new(
453 Self::from_arrow(array.values().as_ref(), field.is_nullable())?,
454 *list_size as u32,
455 nulls(array.nulls(), nullable),
456 array.len(),
457 )?
458 .into_array())
459 }
460}
461
462impl FromArrowArray<&ArrowNullArray> for ArrayRef {
463 fn from_arrow(value: &ArrowNullArray, nullable: bool) -> VortexResult<Self> {
464 assert!(nullable);
465 Ok(NullArray::new(value.len()).into_array())
466 }
467}
468
469impl<K: ArrowDictionaryKeyType> FromArrowArray<&DictionaryArray<K>> for DictArray {
470 fn from_arrow(array: &DictionaryArray<K>, nullable: bool) -> VortexResult<Self> {
471 let keys = AnyDictionaryArray::keys(array);
472 let keys = ArrayRef::from_arrow(keys, keys.is_nullable())?;
473 let values = ArrayRef::from_arrow(array.values().as_ref(), nullable)?;
474 Ok(unsafe { DictArray::new_unchecked(keys, values) })
476 }
477}
478
479pub(crate) fn nulls(nulls: Option<&NullBuffer>, nullable: bool) -> Validity {
480 if nullable {
481 nulls
482 .map(|nulls| {
483 if nulls.null_count() == nulls.len() {
484 Validity::AllInvalid
485 } else {
486 Validity::from(BitBuffer::from(nulls.inner().clone()))
487 }
488 })
489 .unwrap_or_else(|| Validity::AllValid)
490 } else {
491 assert!(nulls.map(|x| x.null_count() == 0).unwrap_or(true));
492 Validity::NonNullable
493 }
494}
495
496impl FromArrowArray<&dyn ArrowArray> for ArrayRef {
497 fn from_arrow(array: &dyn ArrowArray, nullable: bool) -> VortexResult<Self> {
498 match array.data_type() {
499 DataType::Boolean => Self::from_arrow(array.as_boolean(), nullable),
500 DataType::UInt8 => Self::from_arrow(array.as_primitive::<UInt8Type>(), nullable),
501 DataType::UInt16 => Self::from_arrow(array.as_primitive::<UInt16Type>(), nullable),
502 DataType::UInt32 => Self::from_arrow(array.as_primitive::<UInt32Type>(), nullable),
503 DataType::UInt64 => Self::from_arrow(array.as_primitive::<UInt64Type>(), nullable),
504 DataType::Int8 => Self::from_arrow(array.as_primitive::<Int8Type>(), nullable),
505 DataType::Int16 => Self::from_arrow(array.as_primitive::<Int16Type>(), nullable),
506 DataType::Int32 => Self::from_arrow(array.as_primitive::<Int32Type>(), nullable),
507 DataType::Int64 => Self::from_arrow(array.as_primitive::<Int64Type>(), nullable),
508 DataType::Float16 => Self::from_arrow(array.as_primitive::<Float16Type>(), nullable),
509 DataType::Float32 => Self::from_arrow(array.as_primitive::<Float32Type>(), nullable),
510 DataType::Float64 => Self::from_arrow(array.as_primitive::<Float64Type>(), nullable),
511 DataType::Utf8 => Self::from_arrow(array.as_string::<i32>(), nullable),
512 DataType::LargeUtf8 => Self::from_arrow(array.as_string::<i64>(), nullable),
513 DataType::Binary => Self::from_arrow(array.as_binary::<i32>(), nullable),
514 DataType::LargeBinary => Self::from_arrow(array.as_binary::<i64>(), nullable),
515 DataType::BinaryView => Self::from_arrow(array.as_binary_view(), nullable),
516 DataType::Utf8View => Self::from_arrow(array.as_string_view(), nullable),
517 DataType::Struct(_) => Self::from_arrow(array.as_struct(), nullable),
518 DataType::List(_) => Self::from_arrow(array.as_list::<i32>(), nullable),
519 DataType::LargeList(_) => Self::from_arrow(array.as_list::<i64>(), nullable),
520 DataType::ListView(_) => Self::from_arrow(array.as_list_view::<i32>(), nullable),
521 DataType::LargeListView(_) => Self::from_arrow(array.as_list_view::<i64>(), nullable),
522 DataType::FixedSizeList(..) => Self::from_arrow(array.as_fixed_size_list(), nullable),
523 DataType::Null => Self::from_arrow(as_null_array(array), nullable),
524 DataType::Timestamp(u, _) => match u {
525 ArrowTimeUnit::Second => {
526 Self::from_arrow(array.as_primitive::<TimestampSecondType>(), nullable)
527 }
528 ArrowTimeUnit::Millisecond => {
529 Self::from_arrow(array.as_primitive::<TimestampMillisecondType>(), nullable)
530 }
531 ArrowTimeUnit::Microsecond => {
532 Self::from_arrow(array.as_primitive::<TimestampMicrosecondType>(), nullable)
533 }
534 ArrowTimeUnit::Nanosecond => {
535 Self::from_arrow(array.as_primitive::<TimestampNanosecondType>(), nullable)
536 }
537 },
538 DataType::Date32 => Self::from_arrow(array.as_primitive::<Date32Type>(), nullable),
539 DataType::Date64 => Self::from_arrow(array.as_primitive::<Date64Type>(), nullable),
540 DataType::Time32(u) => match u {
541 ArrowTimeUnit::Second => {
542 Self::from_arrow(array.as_primitive::<Time32SecondType>(), nullable)
543 }
544 ArrowTimeUnit::Millisecond => {
545 Self::from_arrow(array.as_primitive::<Time32MillisecondType>(), nullable)
546 }
547 ArrowTimeUnit::Microsecond | ArrowTimeUnit::Nanosecond => unreachable!(),
548 },
549 DataType::Time64(u) => match u {
550 ArrowTimeUnit::Microsecond => {
551 Self::from_arrow(array.as_primitive::<Time64MicrosecondType>(), nullable)
552 }
553 ArrowTimeUnit::Nanosecond => {
554 Self::from_arrow(array.as_primitive::<Time64NanosecondType>(), nullable)
555 }
556 ArrowTimeUnit::Second | ArrowTimeUnit::Millisecond => unreachable!(),
557 },
558 DataType::Decimal32(..) => {
559 Self::from_arrow(array.as_primitive::<Decimal32Type>(), nullable)
560 }
561 DataType::Decimal64(..) => {
562 Self::from_arrow(array.as_primitive::<Decimal64Type>(), nullable)
563 }
564 DataType::Decimal128(..) => {
565 Self::from_arrow(array.as_primitive::<Decimal128Type>(), nullable)
566 }
567 DataType::Decimal256(..) => {
568 Self::from_arrow(array.as_primitive::<Decimal256Type>(), nullable)
569 }
570 DataType::Dictionary(key_type, _) => match key_type.as_ref() {
571 DataType::Int8 => Ok(DictArray::from_arrow(
572 array.as_dictionary::<Int8Type>(),
573 nullable,
574 )?
575 .into_array()),
576 DataType::Int16 => Ok(DictArray::from_arrow(
577 array.as_dictionary::<Int16Type>(),
578 nullable,
579 )?
580 .into_array()),
581 DataType::Int32 => Ok(DictArray::from_arrow(
582 array.as_dictionary::<Int32Type>(),
583 nullable,
584 )?
585 .into_array()),
586 DataType::Int64 => Ok(DictArray::from_arrow(
587 array.as_dictionary::<Int64Type>(),
588 nullable,
589 )?
590 .into_array()),
591 DataType::UInt8 => Ok(DictArray::from_arrow(
592 array.as_dictionary::<UInt8Type>(),
593 nullable,
594 )?
595 .into_array()),
596 DataType::UInt16 => Ok(DictArray::from_arrow(
597 array.as_dictionary::<UInt16Type>(),
598 nullable,
599 )?
600 .into_array()),
601 DataType::UInt32 => Ok(DictArray::from_arrow(
602 array.as_dictionary::<UInt32Type>(),
603 nullable,
604 )?
605 .into_array()),
606 DataType::UInt64 => Ok(DictArray::from_arrow(
607 array.as_dictionary::<UInt64Type>(),
608 nullable,
609 )?
610 .into_array()),
611 key_dt => vortex_bail!("Unsupported dictionary key type: {key_dt}"),
612 },
613 dt => vortex_bail!("Array encoding not implemented for Arrow data type {dt}"),
614 }
615 }
616}
617
618impl FromArrowArray<RecordBatch> for ArrayRef {
619 fn from_arrow(array: RecordBatch, nullable: bool) -> VortexResult<Self> {
620 ArrayRef::from_arrow(&arrow_array::StructArray::from(array), nullable)
621 }
622}
623
624impl FromArrowArray<&RecordBatch> for ArrayRef {
625 fn from_arrow(array: &RecordBatch, nullable: bool) -> VortexResult<Self> {
626 Self::from_arrow(array.clone(), nullable)
627 }
628}
629
630#[cfg(test)]
631mod tests {
632 use std::sync::Arc;
633
634 use arrow_array::Array as ArrowArray;
635 use arrow_array::BinaryArray;
636 use arrow_array::BooleanArray;
637 use arrow_array::Date32Array;
638 use arrow_array::Date64Array;
639 use arrow_array::FixedSizeListArray as ArrowFixedSizeListArray;
640 use arrow_array::Float32Array;
641 use arrow_array::Float64Array;
642 use arrow_array::GenericListViewArray;
643 use arrow_array::Int8Array;
644 use arrow_array::Int16Array;
645 use arrow_array::Int32Array;
646 use arrow_array::Int64Array;
647 use arrow_array::LargeBinaryArray;
648 use arrow_array::LargeStringArray;
649 use arrow_array::NullArray;
650 use arrow_array::RecordBatch;
651 use arrow_array::StringArray;
652 use arrow_array::StructArray;
653 use arrow_array::Time32MillisecondArray;
654 use arrow_array::Time32SecondArray;
655 use arrow_array::Time64MicrosecondArray;
656 use arrow_array::Time64NanosecondArray;
657 use arrow_array::TimestampMicrosecondArray;
658 use arrow_array::TimestampMillisecondArray;
659 use arrow_array::TimestampNanosecondArray;
660 use arrow_array::TimestampSecondArray;
661 use arrow_array::UInt8Array;
662 use arrow_array::UInt16Array;
663 use arrow_array::UInt32Array;
664 use arrow_array::UInt64Array;
665 use arrow_array::builder::BinaryViewBuilder;
666 use arrow_array::builder::Decimal128Builder;
667 use arrow_array::builder::Decimal256Builder;
668 use arrow_array::builder::Int32Builder;
669 use arrow_array::builder::LargeListBuilder;
670 use arrow_array::builder::ListBuilder;
671 use arrow_array::builder::StringViewBuilder;
672 use arrow_array::new_null_array;
673 use arrow_array::types::ArrowPrimitiveType;
674 use arrow_array::types::Float16Type;
675 use arrow_buffer::BooleanBuffer;
676 use arrow_buffer::Buffer as ArrowBuffer;
677 use arrow_buffer::OffsetBuffer;
678 use arrow_buffer::ScalarBuffer;
679 use arrow_schema::DataType;
680 use arrow_schema::Field;
681 use arrow_schema::Fields;
682 use arrow_schema::Schema;
683 use rstest::rstest;
684
685 use crate::ArrayRef;
686 use crate::IntoArray;
687 use crate::arrays::Decimal;
688 use crate::arrays::FixedSizeList;
689 use crate::arrays::List;
690 use crate::arrays::ListView;
691 use crate::arrays::Primitive;
692 use crate::arrays::Struct;
693 use crate::arrays::VarBinView;
694 use crate::arrays::fixed_size_list::FixedSizeListArrayExt;
695 use crate::arrays::list::ListArrayExt;
696 use crate::arrays::listview::ListViewArrayExt;
697 use crate::arrays::struct_::StructArrayExt;
698 use crate::arrow::FromArrowArray as _;
699 use crate::dtype::DType;
700 use crate::dtype::Nullability;
701 use crate::dtype::PType;
702 use crate::extension::datetime::TimeUnit;
703 use crate::extension::datetime::Timestamp;
704
705 #[rstest]
706 #[case::i8(
707 Arc::new(Int8Array::from(vec![Some(1), None, Some(3), Some(4)])),
708 Arc::new(Int8Array::from(vec![1, 2, 3, 4])),
709 PType::I8,
710 )]
711 #[case::i16(
712 Arc::new(Int16Array::from(vec![Some(100), None, Some(300), Some(400)])),
713 Arc::new(Int16Array::from(vec![100, 200, 300, 400])),
714 PType::I16,
715 )]
716 #[case::i32(
717 Arc::new(Int32Array::from(vec![Some(1000), None, Some(3000), Some(4000)])),
718 Arc::new(Int32Array::from(vec![1000, 2000, 3000, 4000])),
719 PType::I32,
720 )]
721 #[case::i64(
722 Arc::new(Int64Array::from(vec![Some(10000), None, Some(30000), Some(40000)])),
723 Arc::new(Int64Array::from(vec![10000_i64, 20000, 30000, 40000])),
724 PType::I64,
725 )]
726 #[case::u8(
727 Arc::new(UInt8Array::from(vec![Some(1), None, Some(3), Some(4)])),
728 Arc::new(UInt8Array::from(vec![1_u8, 2, 3, 4])),
729 PType::U8,
730 )]
731 #[case::u16(
732 Arc::new(UInt16Array::from(vec![Some(100), None, Some(300), Some(400)])),
733 Arc::new(UInt16Array::from(vec![100_u16, 200, 300, 400])),
734 PType::U16,
735 )]
736 #[case::u32(
737 Arc::new(UInt32Array::from(vec![Some(1000), None, Some(3000), Some(4000)])),
738 Arc::new(UInt32Array::from(vec![1000_u32, 2000, 3000, 4000])),
739 PType::U32,
740 )]
741 #[case::u64(
742 Arc::new(UInt64Array::from(vec![Some(10000), None, Some(30000), Some(40000)])),
743 Arc::new(UInt64Array::from(vec![10000_u64, 20000, 30000, 40000])),
744 PType::U64,
745 )]
746 #[case::f32(
747 Arc::new(Float32Array::from(vec![Some(1.5), None, Some(3.5), Some(4.5)])),
748 Arc::new(Float32Array::from(vec![1.5_f32, 2.5, 3.5, 4.5])),
749 PType::F32,
750 )]
751 #[case::f64(
752 Arc::new(Float64Array::from(vec![Some(1.5), None, Some(3.5), Some(4.5)])),
753 Arc::new(Float64Array::from(vec![1.5_f64, 2.5, 3.5, 4.5])),
754 PType::F64,
755 )]
756 fn test_primitive_array_conversion(
757 #[case] nullable: Arc<dyn ArrowArray>,
758 #[case] non_nullable: Arc<dyn ArrowArray>,
759 #[case] expected_ptype: PType,
760 ) {
761 let v_null = ArrayRef::from_arrow(nullable.as_ref(), true).unwrap();
762 let v_non_null = ArrayRef::from_arrow(non_nullable.as_ref(), false).unwrap();
763 assert_eq!(v_null.len(), 4);
764 assert_eq!(v_non_null.len(), 4);
765 assert_eq!(v_null.as_::<Primitive>().ptype(), expected_ptype);
766 assert_eq!(v_non_null.as_::<Primitive>().ptype(), expected_ptype);
767 }
768
769 #[test]
770 fn test_float16_array_conversion() {
771 let values = vec![
772 Some(<Float16Type as ArrowPrimitiveType>::Native::from_f32(1.5)),
773 None,
774 Some(<Float16Type as ArrowPrimitiveType>::Native::from_f32(3.5)),
775 ];
776 let arrow_array = arrow_array::PrimitiveArray::<Float16Type>::from(values);
777 let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
778
779 let non_null_values = vec![
780 <Float16Type as ArrowPrimitiveType>::Native::from_f32(1.5),
781 <Float16Type as ArrowPrimitiveType>::Native::from_f32(2.5),
782 ];
783 let arrow_array_non_null =
784 arrow_array::PrimitiveArray::<Float16Type>::from(non_null_values);
785 let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
786
787 assert_eq!(vortex_array.len(), 3);
788 assert_eq!(vortex_array_non_null.len(), 2);
789
790 let primitive_array = vortex_array.as_::<Primitive>();
792 assert_eq!(primitive_array.ptype(), PType::F16);
793
794 let primitive_array_non_null = vortex_array_non_null.as_::<Primitive>();
795 assert_eq!(primitive_array_non_null.ptype(), PType::F16);
796 }
797
798 #[test]
800 fn test_decimal128_array_conversion() {
801 let mut builder = Decimal128Builder::with_capacity(4);
802 builder.append_value(12345);
803 builder.append_null();
804 builder.append_value(67890);
805 builder.append_value(11111);
806 let decimal_array = builder.finish().with_precision_and_scale(10, 2).unwrap();
807
808 let vortex_array = ArrayRef::from_arrow(&decimal_array, true).unwrap();
809 assert_eq!(vortex_array.len(), 4);
810
811 let mut builder_non_null = Decimal128Builder::with_capacity(3);
812 builder_non_null.append_value(12345);
813 builder_non_null.append_value(67890);
814 builder_non_null.append_value(11111);
815 let decimal_array_non_null = builder_non_null
816 .finish()
817 .with_precision_and_scale(10, 2)
818 .unwrap();
819
820 let vortex_array_non_null = ArrayRef::from_arrow(&decimal_array_non_null, false).unwrap();
821 assert_eq!(vortex_array_non_null.len(), 3);
822
823 let decimal_vortex_array = vortex_array.as_::<Decimal>();
825 assert_eq!(decimal_vortex_array.decimal_dtype().precision(), 10);
826 assert_eq!(decimal_vortex_array.decimal_dtype().scale(), 2);
827
828 let decimal_vortex_array_non_null = vortex_array_non_null.as_::<Decimal>();
829 assert_eq!(
830 decimal_vortex_array_non_null.decimal_dtype().precision(),
831 10
832 );
833 assert_eq!(decimal_vortex_array_non_null.decimal_dtype().scale(), 2);
834 }
835
836 #[test]
837 fn test_decimal256_array_conversion() {
838 let mut builder = Decimal256Builder::with_capacity(4);
839 builder.append_value(arrow_buffer::i256::from_i128(12345));
840 builder.append_null();
841 builder.append_value(arrow_buffer::i256::from_i128(67890));
842 builder.append_value(arrow_buffer::i256::from_i128(11111));
843 let decimal_array = builder.finish().with_precision_and_scale(38, 10).unwrap();
844
845 let vortex_array = ArrayRef::from_arrow(&decimal_array, true).unwrap();
846 assert_eq!(vortex_array.len(), 4);
847
848 let mut builder_non_null = Decimal256Builder::with_capacity(3);
849 builder_non_null.append_value(arrow_buffer::i256::from_i128(12345));
850 builder_non_null.append_value(arrow_buffer::i256::from_i128(67890));
851 builder_non_null.append_value(arrow_buffer::i256::from_i128(11111));
852 let decimal_array_non_null = builder_non_null
853 .finish()
854 .with_precision_and_scale(38, 10)
855 .unwrap();
856
857 let vortex_array_non_null = ArrayRef::from_arrow(&decimal_array_non_null, false).unwrap();
858 assert_eq!(vortex_array_non_null.len(), 3);
859
860 let decimal_vortex_array = vortex_array.as_::<Decimal>();
862 assert_eq!(decimal_vortex_array.decimal_dtype().precision(), 38);
863 assert_eq!(decimal_vortex_array.decimal_dtype().scale(), 10);
864
865 let decimal_vortex_array_non_null = vortex_array_non_null.as_::<Decimal>();
866 assert_eq!(
867 decimal_vortex_array_non_null.decimal_dtype().precision(),
868 38
869 );
870 assert_eq!(decimal_vortex_array_non_null.decimal_dtype().scale(), 10);
871 }
872
873 #[rstest]
875 #[case::timestamp_second(
876 Arc::new(TimestampSecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
877 Arc::new(TimestampSecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
878 )]
879 #[case::timestamp_millisecond(
880 Arc::new(TimestampMillisecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
881 Arc::new(TimestampMillisecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
882 )]
883 #[case::timestamp_microsecond(
884 Arc::new(TimestampMicrosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
885 Arc::new(TimestampMicrosecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
886 )]
887 #[case::timestamp_nanosecond(
888 Arc::new(TimestampNanosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
889 Arc::new(TimestampNanosecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
890 )]
891 #[case::time32_second(
892 Arc::new(Time32SecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
893 Arc::new(Time32SecondArray::from(vec![1000_i32, 2000, 3000, 4000])),
894 )]
895 #[case::time32_millisecond(
896 Arc::new(Time32MillisecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
897 Arc::new(Time32MillisecondArray::from(vec![1000_i32, 2000, 3000, 4000])),
898 )]
899 #[case::time64_microsecond(
900 Arc::new(Time64MicrosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
901 Arc::new(Time64MicrosecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
902 )]
903 #[case::time64_nanosecond(
904 Arc::new(Time64NanosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
905 Arc::new(Time64NanosecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
906 )]
907 #[case::date32(
908 Arc::new(Date32Array::from(vec![Some(18000), None, Some(18002), Some(18003)])),
909 Arc::new(Date32Array::from(vec![18000_i32, 18001, 18002, 18003])),
910 )]
911 #[case::date64(
912 Arc::new(Date64Array::from(vec![Some(1555200000000), None, Some(1555286400000), Some(1555372800000)]
913 )),
914 Arc::new(Date64Array::from(vec![1555200000000_i64, 1555213600000, 1555286400000, 1555372800000]
915 )),
916 )]
917 fn test_temporal_array_conversion(
918 #[case] nullable: Arc<dyn ArrowArray>,
919 #[case] non_nullable: Arc<dyn ArrowArray>,
920 ) {
921 let v_null = ArrayRef::from_arrow(nullable.as_ref(), true).unwrap();
922 let v_non_null = ArrayRef::from_arrow(non_nullable.as_ref(), false).unwrap();
923 assert_eq!(v_null.len(), 4);
924 assert_eq!(v_non_null.len(), 4);
925 }
926
927 #[test]
928 fn test_timestamp_timezone_microsecond_array_conversion() {
929 let arrow_array =
930 TimestampMicrosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])
931 .with_timezone("UTC");
932 let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
933
934 let arrow_array_non_null =
935 TimestampMicrosecondArray::from(vec![1000_i64, 2000, 3000, 4000]).with_timezone("UTC");
936 let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
937
938 assert_eq!(vortex_array.len(), 4);
939 assert_eq!(
940 vortex_array.dtype(),
941 &DType::Extension(
942 Timestamp::new_with_tz(
943 TimeUnit::Microseconds,
944 Some("UTC".into()),
945 Nullability::Nullable
946 )
947 .erased()
948 ),
949 );
950 assert_eq!(vortex_array_non_null.len(), 4);
951 assert_eq!(
952 vortex_array_non_null.dtype(),
953 &DType::Extension(
954 Timestamp::new_with_tz(
955 TimeUnit::Microseconds,
956 Some("UTC".into()),
957 Nullability::NonNullable
958 )
959 .erased()
960 )
961 );
962 }
963
964 #[rstest]
966 #[case::utf8(
967 Arc::new(StringArray::from(vec![Some("hello"), None, Some("world"), Some("test")])),
968 Arc::new(StringArray::from(vec!["hello", "world", "test", "vortex"])),
969 DType::Utf8(Nullability::NonNullable),
970 )]
971 #[case::large_utf8(
972 Arc::new(LargeStringArray::from(vec![Some("hello"), None, Some("world"), Some("test")])),
973 Arc::new(LargeStringArray::from(vec!["hello", "world", "test", "vortex"])),
974 DType::Utf8(Nullability::NonNullable),
975 )]
976 #[case::binary(
977 Arc::new(BinaryArray::from(vec![
978 Some("hello".as_bytes()), None, Some("world".as_bytes()), Some("test".as_bytes()),
979 ])),
980 Arc::new(BinaryArray::from(vec![
981 "hello".as_bytes(), "world".as_bytes(), "test".as_bytes(), "vortex".as_bytes(),
982 ])),
983 DType::Binary(Nullability::NonNullable),
984 )]
985 #[case::large_binary(
986 Arc::new(LargeBinaryArray::from(vec![
987 Some("hello".as_bytes()), None, Some("world".as_bytes()), Some("test".as_bytes()),
988 ])),
989 Arc::new(LargeBinaryArray::from(vec![
990 "hello".as_bytes(), "world".as_bytes(), "test".as_bytes(), "vortex".as_bytes(),
991 ])),
992 DType::Binary(Nullability::NonNullable),
993 )]
994 fn test_string_binary_array_conversion(
995 #[case] nullable: Arc<dyn ArrowArray>,
996 #[case] non_nullable: Arc<dyn ArrowArray>,
997 #[case] expected_non_nullable_dtype: DType,
998 ) {
999 let v_null = ArrayRef::from_arrow(nullable.as_ref(), true).unwrap();
1000 let v_non_null = ArrayRef::from_arrow(non_nullable.as_ref(), false).unwrap();
1001 assert_eq!(v_null.len(), 4);
1002 assert_eq!(v_non_null.len(), 4);
1003 assert_eq!(v_null.dtype(), &expected_non_nullable_dtype.as_nullable());
1004 assert_eq!(v_non_null.dtype(), &expected_non_nullable_dtype);
1005 }
1006
1007 #[test]
1008 fn test_utf8_view_array_conversion() {
1009 let mut builder = StringViewBuilder::new();
1010 builder.append_value("hello");
1011 builder.append_null();
1012 builder.append_value("world");
1013 builder.append_value("test");
1014 let arrow_array = builder.finish();
1015 let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1016
1017 let mut builder_non_null = StringViewBuilder::new();
1018 builder_non_null.append_value("hello");
1019 builder_non_null.append_value("world");
1020 builder_non_null.append_value("test");
1021 builder_non_null.append_value("vortex");
1022 let arrow_array_non_null = builder_non_null.finish();
1023 let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1024
1025 assert_eq!(vortex_array.len(), 4);
1026 assert_eq!(vortex_array_non_null.len(), 4);
1027
1028 let varbin_view_array = vortex_array.as_::<VarBinView>();
1030 assert_eq!(
1031 varbin_view_array.data_buffers().len(),
1032 arrow_array.data_buffers().len()
1033 );
1034 assert_eq!(varbin_view_array.dtype(), &DType::Utf8(true.into()));
1035
1036 let varbin_view_array_non_null = vortex_array_non_null.as_::<VarBinView>();
1037 assert_eq!(
1038 varbin_view_array_non_null.data_buffers().len(),
1039 arrow_array_non_null.data_buffers().len()
1040 );
1041 assert_eq!(
1042 varbin_view_array_non_null.dtype(),
1043 &DType::Utf8(false.into())
1044 );
1045 }
1046
1047 #[test]
1048 fn test_binary_view_array_conversion() {
1049 let mut builder = BinaryViewBuilder::new();
1050 builder.append_value(b"hello");
1051 builder.append_null();
1052 builder.append_value(b"world");
1053 builder.append_value(b"test");
1054 let arrow_array = builder.finish();
1055 let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1056
1057 let mut builder_non_null = BinaryViewBuilder::new();
1058 builder_non_null.append_value(b"hello");
1059 builder_non_null.append_value(b"world");
1060 builder_non_null.append_value(b"test");
1061 builder_non_null.append_value(b"vortex");
1062 let arrow_array_non_null = builder_non_null.finish();
1063 let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1064
1065 assert_eq!(vortex_array.len(), 4);
1066 assert_eq!(vortex_array_non_null.len(), 4);
1067
1068 let varbin_view_array = vortex_array.as_::<VarBinView>();
1070 assert_eq!(
1071 varbin_view_array.data_buffers().len(),
1072 arrow_array.data_buffers().len()
1073 );
1074 assert_eq!(varbin_view_array.dtype(), &DType::Binary(true.into()));
1075
1076 let varbin_view_array_non_null = vortex_array_non_null.as_::<VarBinView>();
1077 assert_eq!(
1078 varbin_view_array_non_null.data_buffers().len(),
1079 arrow_array_non_null.data_buffers().len()
1080 );
1081 assert_eq!(
1082 varbin_view_array_non_null.dtype(),
1083 &DType::Binary(false.into())
1084 );
1085 }
1086
1087 #[test]
1089 fn test_boolean_array_conversion() {
1090 let arrow_array = BooleanArray::from(vec![Some(true), None, Some(false), Some(true)]);
1091 let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1092
1093 let arrow_array_non_null = BooleanArray::from(vec![true, false, true, false]);
1094 let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1095
1096 assert_eq!(vortex_array.len(), 4);
1097 assert_eq!(vortex_array_non_null.len(), 4);
1098 }
1099
1100 #[test]
1102 fn test_struct_array_conversion() {
1103 let fields = vec![
1104 Field::new("field1", DataType::Int32, true),
1105 Field::new("field2", DataType::Utf8, false),
1106 ];
1107 let schema = Fields::from(fields);
1108
1109 let field1_data = Int32Array::from(vec![Some(1), None, Some(3)]);
1110 let field2_data = StringArray::from(vec!["a", "b", "c"]);
1111
1112 let arrow_array = StructArray::new(
1113 schema.clone(),
1114 vec![Arc::new(field1_data), Arc::new(field2_data)],
1115 None,
1116 );
1117
1118 let vortex_array = ArrayRef::from_arrow(&arrow_array, false).unwrap();
1119 assert_eq!(vortex_array.len(), 3);
1120
1121 let struct_vortex_array = vortex_array.as_::<Struct>();
1123 assert_eq!(struct_vortex_array.names().len(), 2);
1124 assert_eq!(struct_vortex_array.names()[0], "field1");
1125 assert_eq!(struct_vortex_array.names()[1], "field2");
1126
1127 let nullable_array = StructArray::new(
1129 schema,
1130 vec![
1131 Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])),
1132 Arc::new(StringArray::from(vec!["a", "b", "c"])),
1133 ],
1134 Some(arrow_buffer::NullBuffer::new(BooleanBuffer::from(vec![
1135 true, false, true,
1136 ]))),
1137 );
1138
1139 let vortex_nullable_array = ArrayRef::from_arrow(&nullable_array, true).unwrap();
1140 assert_eq!(vortex_nullable_array.len(), 3);
1141
1142 let struct_vortex_nullable_array = vortex_nullable_array.as_::<Struct>();
1144 assert_eq!(struct_vortex_nullable_array.names().len(), 2);
1145 assert_eq!(struct_vortex_nullable_array.names()[0], "field1");
1146 assert_eq!(struct_vortex_nullable_array.names()[1], "field2");
1147 }
1148
1149 #[test]
1151 fn test_list_array_conversion() {
1152 let mut builder = ListBuilder::new(Int32Builder::new());
1153 builder.append_value([Some(1), None, Some(3)]);
1154 builder.append_null();
1155 builder.append_value([Some(4), Some(5)]);
1156 let arrow_array = builder.finish();
1157
1158 let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1159 assert_eq!(vortex_array.len(), 3);
1160
1161 let list_vortex_array = vortex_array.as_::<List>();
1163 let offsets_array = list_vortex_array.offsets().as_::<Primitive>();
1164 assert_eq!(offsets_array.len(), 4); assert_eq!(offsets_array.ptype(), PType::I32);
1166
1167 let mut builder_non_null = ListBuilder::new(Int32Builder::new());
1169 builder_non_null.append_value([Some(1), None, Some(3)]);
1170 builder_non_null.append_value([Some(4), Some(5)]);
1171 let arrow_array_non_null = builder_non_null.finish();
1172
1173 let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1174 assert_eq!(vortex_array_non_null.len(), 2);
1175
1176 let list_vortex_array_non_null = vortex_array_non_null.as_::<List>();
1178 let offsets_array_non_null = list_vortex_array_non_null.offsets().as_::<Primitive>();
1179 assert_eq!(offsets_array_non_null.len(), 3); assert_eq!(offsets_array_non_null.ptype(), PType::I32);
1181 }
1182
1183 #[test]
1184 fn test_large_list_array_conversion() {
1185 let mut builder = LargeListBuilder::new(Int32Builder::new());
1186 builder.append_value([Some(1), None, Some(3)]);
1187 builder.append_null();
1188 builder.append_value([Some(4), Some(5)]);
1189 let arrow_array = builder.finish();
1190
1191 let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1192 assert_eq!(vortex_array.len(), 3);
1193
1194 let list_vortex_array = vortex_array.as_::<List>();
1196 let offsets_array = list_vortex_array.offsets().as_::<Primitive>();
1197 assert_eq!(offsets_array.len(), 4); assert_eq!(offsets_array.ptype(), PType::I64); let mut builder_non_null = LargeListBuilder::new(Int32Builder::new());
1202 builder_non_null.append_value([Some(1), None, Some(3)]);
1203 builder_non_null.append_value([Some(4), Some(5)]);
1204 let arrow_array_non_null = builder_non_null.finish();
1205
1206 let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1207 assert_eq!(vortex_array_non_null.len(), 2);
1208
1209 let list_vortex_array_non_null = vortex_array_non_null.as_::<List>();
1211 let offsets_array_non_null = list_vortex_array_non_null.offsets().as_::<Primitive>();
1212 assert_eq!(offsets_array_non_null.len(), 3); assert_eq!(offsets_array_non_null.ptype(), PType::I64); }
1215
1216 #[test]
1217 fn test_fixed_size_list_array_conversion() {
1218 let values = Int32Array::from(vec![
1220 Some(1),
1221 Some(2),
1222 Some(3), Some(4),
1224 None,
1225 Some(6), Some(7),
1227 Some(8),
1228 Some(9), Some(10),
1230 Some(11),
1231 Some(12), ]);
1233
1234 let field = Arc::new(Field::new("item", DataType::Int32, true));
1236 let arrow_array =
1237 ArrowFixedSizeListArray::try_new(Arc::clone(&field), 3, Arc::new(values), None)
1238 .unwrap();
1239 let vortex_array = ArrayRef::from_arrow(&arrow_array, false).unwrap();
1240
1241 assert_eq!(vortex_array.len(), 4);
1242
1243 let fsl_vortex_array = vortex_array.as_::<FixedSizeList>();
1245 assert_eq!(fsl_vortex_array.list_size(), 3);
1246 assert_eq!(fsl_vortex_array.elements().len(), 12); let values_nullable = Int32Array::from(vec![
1250 Some(1),
1251 Some(2),
1252 Some(3), Some(4),
1254 None,
1255 Some(6), Some(7),
1257 Some(8),
1258 Some(9), ]);
1260
1261 let null_buffer =
1263 arrow_buffer::NullBuffer::new(BooleanBuffer::from(vec![true, false, true]));
1264
1265 let arrow_array_nullable = ArrowFixedSizeListArray::try_new(
1266 field,
1267 3,
1268 Arc::new(values_nullable),
1269 Some(null_buffer),
1270 )
1271 .unwrap();
1272 let vortex_array_nullable = ArrayRef::from_arrow(&arrow_array_nullable, true).unwrap();
1273
1274 assert_eq!(vortex_array_nullable.len(), 3);
1275
1276 let fsl_vortex_array_nullable = vortex_array_nullable.as_::<FixedSizeList>();
1278 assert_eq!(fsl_vortex_array_nullable.list_size(), 3);
1279 assert_eq!(fsl_vortex_array_nullable.elements().len(), 9); }
1281
1282 #[test]
1283 fn test_list_view_array_conversion() {
1284 let values = Int32Array::from(vec![
1286 Some(1),
1287 Some(2),
1288 Some(3), Some(4),
1290 Some(5), Some(6), Some(7),
1293 Some(8),
1294 Some(9),
1295 Some(10), ]);
1297
1298 let offsets = ScalarBuffer::from(vec![0i32, 3, 5, 6]);
1300 let sizes = ScalarBuffer::from(vec![3i32, 2, 1, 4]);
1301
1302 let field = Arc::new(Field::new("item", DataType::Int32, true));
1303 let arrow_array = GenericListViewArray::try_new(
1304 Arc::clone(&field),
1305 offsets.clone(),
1306 sizes.clone(),
1307 Arc::new(values.clone()),
1308 None,
1309 )
1310 .unwrap();
1311
1312 let vortex_array = ArrayRef::from_arrow(&arrow_array, false).unwrap();
1313 assert_eq!(vortex_array.len(), 4);
1314
1315 let list_view_vortex_array = vortex_array.as_::<ListView>();
1317 let offsets_array = list_view_vortex_array.offsets().as_::<Primitive>();
1318 let sizes_array = list_view_vortex_array.sizes().as_::<Primitive>();
1319
1320 assert_eq!(offsets_array.len(), 4);
1321 assert_eq!(offsets_array.ptype(), PType::I32);
1322 assert_eq!(sizes_array.len(), 4);
1323 assert_eq!(sizes_array.ptype(), PType::I32);
1324
1325 let null_buffer =
1327 arrow_buffer::NullBuffer::new(BooleanBuffer::from(vec![true, false, true, true]));
1328
1329 let arrow_array_nullable = GenericListViewArray::try_new(
1330 Arc::clone(&field),
1331 offsets,
1332 sizes,
1333 Arc::new(values.clone()),
1334 Some(null_buffer),
1335 )
1336 .unwrap();
1337
1338 let vortex_array_nullable = ArrayRef::from_arrow(&arrow_array_nullable, true).unwrap();
1339 assert_eq!(vortex_array_nullable.len(), 4);
1340
1341 let large_offsets = ScalarBuffer::from(vec![0i64, 3, 5, 6]);
1343 let large_sizes = ScalarBuffer::from(vec![3i64, 2, 1, 4]);
1344
1345 let large_arrow_array = GenericListViewArray::try_new(
1346 field,
1347 large_offsets,
1348 large_sizes,
1349 Arc::new(values),
1350 None,
1351 )
1352 .unwrap();
1353
1354 let large_vortex_array = ArrayRef::from_arrow(&large_arrow_array, false).unwrap();
1355 assert_eq!(large_vortex_array.len(), 4);
1356
1357 let large_list_view_vortex_array = large_vortex_array.as_::<ListView>();
1359 let large_offsets_array = large_list_view_vortex_array.offsets().as_::<Primitive>();
1360 let large_sizes_array = large_list_view_vortex_array.sizes().as_::<Primitive>();
1361
1362 assert_eq!(large_offsets_array.len(), 4);
1363 assert_eq!(large_offsets_array.ptype(), PType::I64); assert_eq!(large_sizes_array.len(), 4);
1365 assert_eq!(large_sizes_array.ptype(), PType::I64); }
1367
1368 #[test]
1370 fn test_null_array_conversion() {
1371 let arrow_array = NullArray::new(5);
1372 let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1373 assert_eq!(vortex_array.len(), 5);
1374 }
1375
1376 #[test]
1378 fn test_arrow_buffer_conversion() {
1379 let data = vec![1u8, 2, 3, 4, 5];
1380 let arrow_buffer = ArrowBuffer::from_vec(data);
1381 let vortex_array = arrow_buffer.into_array();
1382 assert_eq!(vortex_array.len(), 5);
1383 }
1384
1385 #[test]
1386 fn test_boolean_buffer_conversion() {
1387 let data = vec![true, false, true, false, true];
1388 let boolean_buffer = BooleanBuffer::from(data);
1389 let vortex_array = boolean_buffer.into_array();
1390 assert_eq!(vortex_array.len(), 5);
1391 }
1392
1393 #[test]
1394 fn test_scalar_buffer_conversion() {
1395 let data = vec![1i32, 2, 3, 4, 5];
1396 let scalar_buffer = ScalarBuffer::from(data);
1397 let vortex_array = scalar_buffer.into_array();
1398 assert_eq!(vortex_array.len(), 5);
1399 }
1400
1401 #[test]
1402 fn test_offset_buffer_conversion() {
1403 let data = vec![0i32, 2, 5, 8, 10];
1404 let offset_buffer = OffsetBuffer::new(ScalarBuffer::from(data));
1405 let vortex_array = offset_buffer.into_array();
1406 assert_eq!(vortex_array.len(), 5);
1407 }
1408
1409 #[test]
1411 fn test_record_batch_conversion() {
1412 let schema = Arc::new(Schema::new(vec![
1413 Field::new("field1", DataType::Int32, false),
1414 Field::new("field2", DataType::Utf8, false),
1415 ]));
1416
1417 let field1_data = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
1418 let field2_data = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
1419
1420 let record_batch = RecordBatch::try_new(schema, vec![field1_data, field2_data]).unwrap();
1421
1422 let vortex_array = ArrayRef::from_arrow(record_batch, false).unwrap();
1423 assert_eq!(vortex_array.len(), 4);
1424
1425 let schema = Arc::new(Schema::new(vec![
1427 Field::new("field1", DataType::Int32, false),
1428 Field::new("field2", DataType::Utf8, false),
1429 ]));
1430
1431 let field1_data = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
1432 let field2_data = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
1433
1434 let record_batch = RecordBatch::try_new(schema, vec![field1_data, field2_data]).unwrap();
1435
1436 let vortex_array = ArrayRef::from_arrow(&record_batch, false).unwrap();
1437 assert_eq!(vortex_array.len(), 4);
1438 }
1439
1440 #[test]
1442 fn test_dyn_array_conversion() {
1443 let int_array = Int32Array::from(vec![1, 2, 3, 4]);
1444 let dyn_array: &dyn ArrowArray = &int_array;
1445 let vortex_array = ArrayRef::from_arrow(dyn_array, false).unwrap();
1446 assert_eq!(vortex_array.len(), 4);
1447
1448 let string_array = StringArray::from(vec!["a", "b", "c"]);
1449 let dyn_array: &dyn ArrowArray = &string_array;
1450 let vortex_array = ArrayRef::from_arrow(dyn_array, false).unwrap();
1451 assert_eq!(vortex_array.len(), 3);
1452
1453 let bool_array = BooleanArray::from(vec![true, false, true]);
1454 let dyn_array: &dyn ArrowArray = &bool_array;
1455 let vortex_array = ArrayRef::from_arrow(dyn_array, false).unwrap();
1456 assert_eq!(vortex_array.len(), 3);
1457 }
1458
1459 #[test]
1461 pub fn nullable_may_contain_non_nullable() {
1462 let null_struct_array_with_non_nullable_field = new_null_array(
1463 &DataType::Struct(Fields::from(vec![Field::new(
1464 "non_nullable_inner",
1465 DataType::Int32,
1466 false,
1467 )])),
1468 1,
1469 );
1470 ArrayRef::from_arrow(null_struct_array_with_non_nullable_field.as_ref(), true).unwrap();
1471 }
1472
1473 #[test]
1474 pub fn nullable_may_contain_deeply_nested_non_nullable() {
1475 let null_struct_array_with_non_nullable_field = new_null_array(
1476 &DataType::Struct(Fields::from(vec![Field::new(
1477 "non_nullable_inner",
1478 DataType::Struct(Fields::from(vec![Field::new(
1479 "non_nullable_deeper_inner",
1480 DataType::Int32,
1481 false,
1482 )])),
1483 false,
1484 )])),
1485 1,
1486 );
1487 ArrayRef::from_arrow(null_struct_array_with_non_nullable_field.as_ref(), true).unwrap();
1488 }
1489
1490 #[test]
1491 #[should_panic]
1492 pub fn cannot_handle_nullable_struct_containing_non_nullable_dictionary() {
1493 let null_struct_array_with_non_nullable_field = new_null_array(
1494 &DataType::Struct(Fields::from(vec![Field::new(
1495 "non_nullable_deeper_inner",
1496 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1497 false,
1498 )])),
1499 1,
1500 );
1501
1502 ArrayRef::from_arrow(null_struct_array_with_non_nullable_field.as_ref(), true).unwrap();
1503 }
1504}