1use std::sync::Arc;
5
6use arrow_array::AnyDictionaryArray;
7use arrow_array::Array as ArrowArray;
8use arrow_array::ArrowPrimitiveType;
9use arrow_array::BooleanArray as ArrowBooleanArray;
10use arrow_array::DictionaryArray;
11use arrow_array::FixedSizeListArray as ArrowFixedSizeListArray;
12use arrow_array::GenericByteArray;
13use arrow_array::GenericByteViewArray;
14use arrow_array::GenericListArray;
15use arrow_array::GenericListViewArray;
16use arrow_array::NullArray as ArrowNullArray;
17use arrow_array::OffsetSizeTrait;
18use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
19use arrow_array::RecordBatch;
20use arrow_array::StructArray as ArrowStructArray;
21use arrow_array::cast::AsArray;
22use arrow_array::cast::as_null_array;
23use arrow_array::make_array;
24use arrow_array::types::ArrowDictionaryKeyType;
25use arrow_array::types::ByteArrayType;
26use arrow_array::types::ByteViewType;
27use arrow_array::types::Date32Type;
28use arrow_array::types::Date64Type;
29use arrow_array::types::Decimal32Type;
30use arrow_array::types::Decimal64Type;
31use arrow_array::types::Decimal128Type;
32use arrow_array::types::Decimal256Type;
33use arrow_array::types::Float16Type;
34use arrow_array::types::Float32Type;
35use arrow_array::types::Float64Type;
36use arrow_array::types::Int8Type;
37use arrow_array::types::Int16Type;
38use arrow_array::types::Int32Type;
39use arrow_array::types::Int64Type;
40use arrow_array::types::Time32MillisecondType;
41use arrow_array::types::Time32SecondType;
42use arrow_array::types::Time64MicrosecondType;
43use arrow_array::types::Time64NanosecondType;
44use arrow_array::types::TimestampMicrosecondType;
45use arrow_array::types::TimestampMillisecondType;
46use arrow_array::types::TimestampNanosecondType;
47use arrow_array::types::TimestampSecondType;
48use arrow_array::types::UInt8Type;
49use arrow_array::types::UInt16Type;
50use arrow_array::types::UInt32Type;
51use arrow_array::types::UInt64Type;
52use arrow_buffer::ArrowNativeType;
53use arrow_buffer::BooleanBuffer;
54use arrow_buffer::Buffer as ArrowBuffer;
55use arrow_buffer::ScalarBuffer;
56use arrow_buffer::buffer::NullBuffer;
57use arrow_buffer::buffer::OffsetBuffer;
58use arrow_schema::DataType;
59use arrow_schema::TimeUnit as ArrowTimeUnit;
60use vortex_buffer::Alignment;
61use vortex_buffer::BitBuffer;
62use vortex_buffer::Buffer;
63use vortex_buffer::ByteBuffer;
64use vortex_error::VortexResult;
65use vortex_error::vortex_bail;
66use vortex_error::vortex_ensure;
67use vortex_error::vortex_ensure_eq;
68use vortex_error::vortex_err;
69use vortex_error::vortex_panic;
70
71use crate::ArrayRef;
72use crate::IntoArray;
73use crate::arrays::BoolArray;
74use crate::arrays::DecimalArray;
75use crate::arrays::DictArray;
76use crate::arrays::FixedSizeListArray;
77use crate::arrays::ListArray;
78use crate::arrays::ListViewArray;
79use crate::arrays::NullArray;
80use crate::arrays::PrimitiveArray;
81use crate::arrays::StructArray;
82use crate::arrays::TemporalArray;
83use crate::arrays::VarBinArray;
84use crate::arrays::VarBinViewArray;
85use crate::arrow::FromArrowArray;
86use crate::dtype::DType;
87use crate::dtype::DecimalDType;
88use crate::dtype::IntegerPType;
89use crate::dtype::NativePType;
90use crate::dtype::PType;
91use crate::dtype::i256;
92use crate::extension::datetime::TimeUnit;
93use crate::validity::Validity;
94
95impl IntoArray for ArrowBuffer {
96 fn into_array(self) -> ArrayRef {
97 PrimitiveArray::from_byte_buffer(
98 ByteBuffer::from_arrow_buffer(self, Alignment::of::<u8>()),
99 PType::U8,
100 Validity::NonNullable,
101 )
102 .into_array()
103 }
104}
105
106impl IntoArray for BooleanBuffer {
107 fn into_array(self) -> ArrayRef {
108 BoolArray::new(self.into(), Validity::NonNullable).into_array()
109 }
110}
111
112impl<T> IntoArray for ScalarBuffer<T>
113where
114 T: ArrowNativeType + NativePType,
115{
116 fn into_array(self) -> ArrayRef {
117 PrimitiveArray::new(
118 Buffer::<T>::from_arrow_scalar_buffer(self),
119 Validity::NonNullable,
120 )
121 .into_array()
122 }
123}
124
125impl<O> IntoArray for OffsetBuffer<O>
126where
127 O: IntegerPType + OffsetSizeTrait,
128{
129 fn into_array(self) -> ArrayRef {
130 let primitive = PrimitiveArray::new(
131 Buffer::from_arrow_scalar_buffer(self.into_inner()),
132 Validity::NonNullable,
133 );
134
135 primitive.into_array()
136 }
137}
138
139macro_rules! impl_from_arrow_primitive {
140 ($T:path) => {
141 impl FromArrowArray<&ArrowPrimitiveArray<$T>> for ArrayRef {
142 fn from_arrow(value: &ArrowPrimitiveArray<$T>, nullable: bool) -> VortexResult<Self> {
143 let buffer = Buffer::from_arrow_scalar_buffer(value.values().clone());
144 let validity = nulls(value.nulls(), nullable)?;
145 Ok(PrimitiveArray::new(buffer, validity).into_array())
146 }
147 }
148 };
149}
150
151impl_from_arrow_primitive!(Int8Type);
152impl_from_arrow_primitive!(Int16Type);
153impl_from_arrow_primitive!(Int32Type);
154impl_from_arrow_primitive!(Int64Type);
155impl_from_arrow_primitive!(UInt8Type);
156impl_from_arrow_primitive!(UInt16Type);
157impl_from_arrow_primitive!(UInt32Type);
158impl_from_arrow_primitive!(UInt64Type);
159impl_from_arrow_primitive!(Float16Type);
160impl_from_arrow_primitive!(Float32Type);
161impl_from_arrow_primitive!(Float64Type);
162
163impl FromArrowArray<&ArrowPrimitiveArray<Decimal32Type>> for ArrayRef {
164 fn from_arrow(
165 array: &ArrowPrimitiveArray<Decimal32Type>,
166 nullable: bool,
167 ) -> VortexResult<Self> {
168 let decimal_type = DecimalDType::new(array.precision(), array.scale());
169 let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone());
170 let validity = nulls(array.nulls(), nullable)?;
171 Ok(DecimalArray::new(buffer, decimal_type, validity).into_array())
172 }
173}
174
175impl FromArrowArray<&ArrowPrimitiveArray<Decimal64Type>> for ArrayRef {
176 fn from_arrow(
177 array: &ArrowPrimitiveArray<Decimal64Type>,
178 nullable: bool,
179 ) -> VortexResult<Self> {
180 let decimal_type = DecimalDType::new(array.precision(), array.scale());
181 let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone());
182 let validity = nulls(array.nulls(), nullable)?;
183 Ok(DecimalArray::new(buffer, decimal_type, validity).into_array())
184 }
185}
186
187impl FromArrowArray<&ArrowPrimitiveArray<Decimal128Type>> for ArrayRef {
188 fn from_arrow(
189 array: &ArrowPrimitiveArray<Decimal128Type>,
190 nullable: bool,
191 ) -> VortexResult<Self> {
192 let decimal_type = DecimalDType::new(array.precision(), array.scale());
193 let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone());
194 let validity = nulls(array.nulls(), nullable)?;
195 Ok(DecimalArray::new(buffer, decimal_type, validity).into_array())
196 }
197}
198
199impl FromArrowArray<&ArrowPrimitiveArray<Decimal256Type>> for ArrayRef {
200 fn from_arrow(
201 array: &ArrowPrimitiveArray<Decimal256Type>,
202 nullable: bool,
203 ) -> VortexResult<Self> {
204 let decimal_type = DecimalDType::new(array.precision(), array.scale());
205 let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone());
206 let buffer =
210 unsafe { std::mem::transmute::<Buffer<arrow_buffer::i256>, Buffer<i256>>(buffer) };
211 let validity = nulls(array.nulls(), nullable)?;
212 Ok(DecimalArray::new(buffer, decimal_type, validity).into_array())
213 }
214}
215
216macro_rules! impl_from_arrow_temporal {
217 ($T:path) => {
218 impl FromArrowArray<&ArrowPrimitiveArray<$T>> for ArrayRef {
219 fn from_arrow(
220 value: &ArrowPrimitiveArray<$T>,
221 nullable: bool,
222 ) -> vortex_error::VortexResult<Self> {
223 temporal_array(value, nullable)
224 }
225 }
226 };
227}
228
229impl_from_arrow_temporal!(TimestampSecondType);
231impl_from_arrow_temporal!(TimestampMillisecondType);
232impl_from_arrow_temporal!(TimestampMicrosecondType);
233impl_from_arrow_temporal!(TimestampNanosecondType);
234
235impl_from_arrow_temporal!(Time32SecondType);
237impl_from_arrow_temporal!(Time32MillisecondType);
238impl_from_arrow_temporal!(Time64MicrosecondType);
239impl_from_arrow_temporal!(Time64NanosecondType);
240
241impl_from_arrow_temporal!(Date32Type);
243impl_from_arrow_temporal!(Date64Type);
244
245fn temporal_array<T: ArrowPrimitiveType>(
246 value: &ArrowPrimitiveArray<T>,
247 nullable: bool,
248) -> VortexResult<ArrayRef>
249where
250 T::Native: NativePType,
251{
252 let arr = PrimitiveArray::new(
253 Buffer::from_arrow_scalar_buffer(value.values().clone()),
254 nulls(value.nulls(), nullable)?,
255 )
256 .into_array();
257
258 Ok(match value.data_type() {
259 DataType::Timestamp(time_unit, tz) => {
260 TemporalArray::new_timestamp(arr, time_unit.into(), tz.clone()).into()
261 }
262 DataType::Time32(time_unit) => TemporalArray::new_time(arr, time_unit.into()).into(),
263 DataType::Time64(time_unit) => TemporalArray::new_time(arr, time_unit.into()).into(),
264 DataType::Date32 => TemporalArray::new_date(arr, TimeUnit::Days).into(),
265 DataType::Date64 => TemporalArray::new_date(arr, TimeUnit::Milliseconds).into(),
266 DataType::Duration(_) => unimplemented!(),
267 DataType::Interval(_) => unimplemented!(),
268 _ => vortex_panic!("Invalid temporal type: {}", value.data_type()),
269 })
270}
271
272impl<T: ByteArrayType> FromArrowArray<&GenericByteArray<T>> for ArrayRef
273where
274 <T as ByteArrayType>::Offset: IntegerPType,
275{
276 fn from_arrow(value: &GenericByteArray<T>, nullable: bool) -> VortexResult<Self> {
277 let dtype = match T::DATA_TYPE {
278 DataType::Binary | DataType::LargeBinary => DType::Binary(nullable.into()),
279 DataType::Utf8 | DataType::LargeUtf8 => DType::Utf8(nullable.into()),
280 dt => vortex_panic!("Invalid data type for ByteArray: {dt}"),
281 };
282 Ok(unsafe {
284 VarBinArray::new_unchecked(
285 value.offsets().clone().into_array(),
286 ByteBuffer::from_arrow_buffer(value.values().clone(), Alignment::of::<u8>()),
287 dtype,
288 nulls(value.nulls(), nullable)?,
289 )
290 }
291 .into_array())
292 }
293}
294
295impl<T: ByteViewType> FromArrowArray<&GenericByteViewArray<T>> for ArrayRef {
296 fn from_arrow(value: &GenericByteViewArray<T>, nullable: bool) -> VortexResult<Self> {
297 let dtype = match T::DATA_TYPE {
298 DataType::BinaryView => DType::Binary(nullable.into()),
299 DataType::Utf8View => DType::Utf8(nullable.into()),
300 dt => vortex_panic!("Invalid data type for ByteViewArray: {dt}"),
301 };
302
303 let views_buffer = Buffer::from_byte_buffer(
304 Buffer::from_arrow_scalar_buffer(value.views().clone()).into_byte_buffer(),
305 );
306
307 Ok(unsafe {
310 VarBinViewArray::new_unchecked(
311 views_buffer,
312 Arc::from(
313 value
314 .data_buffers()
315 .iter()
316 .map(|b| ByteBuffer::from_arrow_buffer(b.clone(), Alignment::of::<u8>()))
317 .collect::<Vec<_>>(),
318 ),
319 dtype,
320 nulls(value.nulls(), nullable)?,
321 )
322 .into_array()
323 })
324 }
325}
326
327impl FromArrowArray<&ArrowBooleanArray> for ArrayRef {
328 fn from_arrow(value: &ArrowBooleanArray, nullable: bool) -> VortexResult<Self> {
329 Ok(BoolArray::new(
330 value.values().clone().into(),
331 nulls(value.nulls(), nullable)?,
332 )
333 .into_array())
334 }
335}
336
337pub(crate) fn remove_nulls(data: arrow_data::ArrayData) -> VortexResult<arrow_data::ArrayData> {
339 if data.null_count() == 0 {
340 return Ok(data);
342 }
343
344 let children = match data.data_type() {
345 DataType::Struct(fields) => Some(
346 fields
347 .iter()
348 .zip(data.child_data().iter())
349 .map(|(field, child_data)| {
350 if field.is_nullable() {
351 Ok(child_data.clone())
352 } else {
353 remove_nulls(child_data.clone())
354 }
355 })
356 .collect::<VortexResult<Vec<_>>>()?,
357 ),
358 DataType::List(f)
359 | DataType::LargeList(f)
360 | DataType::ListView(f)
361 | DataType::LargeListView(f)
362 | DataType::FixedSizeList(f, _)
363 if !f.is_nullable() =>
364 {
365 vortex_ensure_eq!(
367 data.child_data().len(),
368 1,
369 "List types should have one child"
370 );
371 Some(vec![remove_nulls(data.child_data()[0].clone())?])
372 }
373 _ => None,
374 };
375
376 let mut builder = data.into_builder().nulls(None);
377 if let Some(children) = children {
378 builder = builder.child_data(children);
379 }
380 builder
381 .build()
382 .map_err(|e| vortex_err!("Failed to reconstruct Arrow array without nulls: {e}"))
383}
384
385impl FromArrowArray<&ArrowStructArray> for ArrayRef {
386 fn from_arrow(value: &ArrowStructArray, nullable: bool) -> VortexResult<Self> {
387 Ok(StructArray::try_new(
388 value.column_names().iter().copied().collect(),
389 value
390 .columns()
391 .iter()
392 .zip(value.fields())
393 .map(|(c, field)| {
394 if c.null_count() > 0 && !field.is_nullable() {
397 let stripped = make_array(remove_nulls(c.into_data())?);
398 Self::from_arrow(stripped.as_ref(), false)
399 } else {
400 Self::from_arrow(c.as_ref(), field.is_nullable())
401 }
402 })
403 .collect::<VortexResult<Vec<_>>>()?,
404 value.len(),
405 nulls(value.nulls(), nullable)?,
406 )?
407 .into_array())
408 }
409}
410
411impl<O: IntegerPType + OffsetSizeTrait> FromArrowArray<&GenericListArray<O>> for ArrayRef {
412 fn from_arrow(value: &GenericListArray<O>, nullable: bool) -> VortexResult<Self> {
413 let elements_are_nullable = match value.data_type() {
415 DataType::List(field) => field.is_nullable(),
416 DataType::LargeList(field) => field.is_nullable(),
417 dt => vortex_panic!("Invalid data type for ListArray: {dt}"),
418 };
419
420 let elements = Self::from_arrow(value.values().as_ref(), elements_are_nullable)?;
421
422 let offsets = value.offsets().clone().into_array();
424 let nulls = nulls(value.nulls(), nullable)?;
425
426 Ok(ListArray::try_new(elements, offsets, nulls)?.into_array())
427 }
428}
429
430impl<O: OffsetSizeTrait + NativePType> FromArrowArray<&GenericListViewArray<O>> for ArrayRef {
431 fn from_arrow(array: &GenericListViewArray<O>, nullable: bool) -> VortexResult<Self> {
432 let elements_are_nullable = match array.data_type() {
434 DataType::ListView(field) => field.is_nullable(),
435 DataType::LargeListView(field) => field.is_nullable(),
436 dt => vortex_panic!("Invalid data type for ListViewArray: {dt}"),
437 };
438
439 let elements = Self::from_arrow(array.values().as_ref(), elements_are_nullable)?;
440
441 let offsets = array.offsets().clone().into_array();
443 let sizes = array.sizes().clone().into_array();
444 let nulls = nulls(array.nulls(), nullable)?;
445
446 Ok(ListViewArray::try_new(elements, offsets, sizes, nulls)?.into_array())
447 }
448}
449
450impl FromArrowArray<&ArrowFixedSizeListArray> for ArrayRef {
451 fn from_arrow(array: &ArrowFixedSizeListArray, nullable: bool) -> VortexResult<Self> {
452 let DataType::FixedSizeList(field, list_size) = array.data_type() else {
453 vortex_panic!("Invalid data type for ListArray: {}", array.data_type());
454 };
455
456 Ok(FixedSizeListArray::try_new(
457 Self::from_arrow(array.values().as_ref(), field.is_nullable())?,
458 *list_size as u32,
459 nulls(array.nulls(), nullable)?,
460 array.len(),
461 )?
462 .into_array())
463 }
464}
465
466impl FromArrowArray<&ArrowNullArray> for ArrayRef {
467 fn from_arrow(value: &ArrowNullArray, nullable: bool) -> VortexResult<Self> {
468 vortex_ensure!(
469 nullable,
470 "Cannot convert an Arrow NullArray into a non-nullable Vortex array"
471 );
472 Ok(NullArray::new(value.len()).into_array())
473 }
474}
475
476impl<K: ArrowDictionaryKeyType> FromArrowArray<&DictionaryArray<K>> for DictArray {
477 fn from_arrow(array: &DictionaryArray<K>, nullable: bool) -> VortexResult<Self> {
478 let keys = AnyDictionaryArray::keys(array);
479 let keys = ArrayRef::from_arrow(keys, keys.is_nullable())?;
480 let values = ArrayRef::from_arrow(array.values().as_ref(), nullable)?;
481 Ok(unsafe { DictArray::new_unchecked(keys, values) })
483 }
484}
485
486pub(crate) fn nulls(nulls: Option<&NullBuffer>, nullable: bool) -> VortexResult<Validity> {
487 if nullable {
488 Ok(nulls
489 .map(|nulls| {
490 if nulls.null_count() == nulls.len() {
491 Validity::AllInvalid
492 } else {
493 Validity::from(BitBuffer::from(nulls.inner().clone()))
494 }
495 })
496 .unwrap_or(Validity::AllValid))
497 } else {
498 let null_count = nulls.map(NullBuffer::null_count).unwrap_or(0);
499 vortex_ensure_eq!(
500 null_count,
501 0,
502 "Cannot convert an Arrow array containing {null_count} nulls into a non-nullable Vortex array"
503 );
504 Ok(Validity::NonNullable)
505 }
506}
507
508impl FromArrowArray<&dyn ArrowArray> for ArrayRef {
509 fn from_arrow(array: &dyn ArrowArray, nullable: bool) -> VortexResult<Self> {
510 match array.data_type() {
511 DataType::Boolean => Self::from_arrow(array.as_boolean(), nullable),
512 DataType::UInt8 => Self::from_arrow(array.as_primitive::<UInt8Type>(), nullable),
513 DataType::UInt16 => Self::from_arrow(array.as_primitive::<UInt16Type>(), nullable),
514 DataType::UInt32 => Self::from_arrow(array.as_primitive::<UInt32Type>(), nullable),
515 DataType::UInt64 => Self::from_arrow(array.as_primitive::<UInt64Type>(), nullable),
516 DataType::Int8 => Self::from_arrow(array.as_primitive::<Int8Type>(), nullable),
517 DataType::Int16 => Self::from_arrow(array.as_primitive::<Int16Type>(), nullable),
518 DataType::Int32 => Self::from_arrow(array.as_primitive::<Int32Type>(), nullable),
519 DataType::Int64 => Self::from_arrow(array.as_primitive::<Int64Type>(), nullable),
520 DataType::Float16 => Self::from_arrow(array.as_primitive::<Float16Type>(), nullable),
521 DataType::Float32 => Self::from_arrow(array.as_primitive::<Float32Type>(), nullable),
522 DataType::Float64 => Self::from_arrow(array.as_primitive::<Float64Type>(), nullable),
523 DataType::Utf8 => Self::from_arrow(array.as_string::<i32>(), nullable),
524 DataType::LargeUtf8 => Self::from_arrow(array.as_string::<i64>(), nullable),
525 DataType::Binary => Self::from_arrow(array.as_binary::<i32>(), nullable),
526 DataType::LargeBinary => Self::from_arrow(array.as_binary::<i64>(), nullable),
527 DataType::BinaryView => Self::from_arrow(array.as_binary_view(), nullable),
528 DataType::Utf8View => Self::from_arrow(array.as_string_view(), nullable),
529 DataType::Struct(_) => Self::from_arrow(array.as_struct(), nullable),
530 DataType::List(_) => Self::from_arrow(array.as_list::<i32>(), nullable),
531 DataType::LargeList(_) => Self::from_arrow(array.as_list::<i64>(), nullable),
532 DataType::ListView(_) => Self::from_arrow(array.as_list_view::<i32>(), nullable),
533 DataType::LargeListView(_) => Self::from_arrow(array.as_list_view::<i64>(), nullable),
534 DataType::FixedSizeList(..) => Self::from_arrow(array.as_fixed_size_list(), nullable),
535 DataType::Null => Self::from_arrow(as_null_array(array), nullable),
536 DataType::Timestamp(u, _) => match u {
537 ArrowTimeUnit::Second => {
538 Self::from_arrow(array.as_primitive::<TimestampSecondType>(), nullable)
539 }
540 ArrowTimeUnit::Millisecond => {
541 Self::from_arrow(array.as_primitive::<TimestampMillisecondType>(), nullable)
542 }
543 ArrowTimeUnit::Microsecond => {
544 Self::from_arrow(array.as_primitive::<TimestampMicrosecondType>(), nullable)
545 }
546 ArrowTimeUnit::Nanosecond => {
547 Self::from_arrow(array.as_primitive::<TimestampNanosecondType>(), nullable)
548 }
549 },
550 DataType::Date32 => Self::from_arrow(array.as_primitive::<Date32Type>(), nullable),
551 DataType::Date64 => Self::from_arrow(array.as_primitive::<Date64Type>(), nullable),
552 DataType::Time32(u) => match u {
553 ArrowTimeUnit::Second => {
554 Self::from_arrow(array.as_primitive::<Time32SecondType>(), nullable)
555 }
556 ArrowTimeUnit::Millisecond => {
557 Self::from_arrow(array.as_primitive::<Time32MillisecondType>(), nullable)
558 }
559 ArrowTimeUnit::Microsecond | ArrowTimeUnit::Nanosecond => unreachable!(),
560 },
561 DataType::Time64(u) => match u {
562 ArrowTimeUnit::Microsecond => {
563 Self::from_arrow(array.as_primitive::<Time64MicrosecondType>(), nullable)
564 }
565 ArrowTimeUnit::Nanosecond => {
566 Self::from_arrow(array.as_primitive::<Time64NanosecondType>(), nullable)
567 }
568 ArrowTimeUnit::Second | ArrowTimeUnit::Millisecond => unreachable!(),
569 },
570 DataType::Decimal32(..) => {
571 Self::from_arrow(array.as_primitive::<Decimal32Type>(), nullable)
572 }
573 DataType::Decimal64(..) => {
574 Self::from_arrow(array.as_primitive::<Decimal64Type>(), nullable)
575 }
576 DataType::Decimal128(..) => {
577 Self::from_arrow(array.as_primitive::<Decimal128Type>(), nullable)
578 }
579 DataType::Decimal256(..) => {
580 Self::from_arrow(array.as_primitive::<Decimal256Type>(), nullable)
581 }
582 DataType::Dictionary(key_type, _) => match key_type.as_ref() {
583 DataType::Int8 => Ok(DictArray::from_arrow(
584 array.as_dictionary::<Int8Type>(),
585 nullable,
586 )?
587 .into_array()),
588 DataType::Int16 => Ok(DictArray::from_arrow(
589 array.as_dictionary::<Int16Type>(),
590 nullable,
591 )?
592 .into_array()),
593 DataType::Int32 => Ok(DictArray::from_arrow(
594 array.as_dictionary::<Int32Type>(),
595 nullable,
596 )?
597 .into_array()),
598 DataType::Int64 => Ok(DictArray::from_arrow(
599 array.as_dictionary::<Int64Type>(),
600 nullable,
601 )?
602 .into_array()),
603 DataType::UInt8 => Ok(DictArray::from_arrow(
604 array.as_dictionary::<UInt8Type>(),
605 nullable,
606 )?
607 .into_array()),
608 DataType::UInt16 => Ok(DictArray::from_arrow(
609 array.as_dictionary::<UInt16Type>(),
610 nullable,
611 )?
612 .into_array()),
613 DataType::UInt32 => Ok(DictArray::from_arrow(
614 array.as_dictionary::<UInt32Type>(),
615 nullable,
616 )?
617 .into_array()),
618 DataType::UInt64 => Ok(DictArray::from_arrow(
619 array.as_dictionary::<UInt64Type>(),
620 nullable,
621 )?
622 .into_array()),
623 key_dt => vortex_bail!("Unsupported dictionary key type: {key_dt}"),
624 },
625 dt => vortex_bail!("Array encoding not implemented for Arrow data type {dt}"),
626 }
627 }
628}
629
630impl FromArrowArray<RecordBatch> for ArrayRef {
631 fn from_arrow(array: RecordBatch, nullable: bool) -> VortexResult<Self> {
632 ArrayRef::from_arrow(&arrow_array::StructArray::from(array), nullable)
633 }
634}
635
636impl FromArrowArray<&RecordBatch> for ArrayRef {
637 fn from_arrow(array: &RecordBatch, nullable: bool) -> VortexResult<Self> {
638 Self::from_arrow(array.clone(), nullable)
639 }
640}
641
642#[cfg(test)]
643mod tests {
644 use std::sync::Arc;
645
646 use arrow_array::Array as ArrowArray;
647 use arrow_array::BinaryArray;
648 use arrow_array::BooleanArray;
649 use arrow_array::Date32Array;
650 use arrow_array::Date64Array;
651 use arrow_array::FixedSizeListArray as ArrowFixedSizeListArray;
652 use arrow_array::Float32Array;
653 use arrow_array::Float64Array;
654 use arrow_array::GenericListViewArray;
655 use arrow_array::Int8Array;
656 use arrow_array::Int16Array;
657 use arrow_array::Int32Array;
658 use arrow_array::Int64Array;
659 use arrow_array::LargeBinaryArray;
660 use arrow_array::LargeStringArray;
661 use arrow_array::NullArray;
662 use arrow_array::RecordBatch;
663 use arrow_array::StringArray;
664 use arrow_array::StructArray;
665 use arrow_array::Time32MillisecondArray;
666 use arrow_array::Time32SecondArray;
667 use arrow_array::Time64MicrosecondArray;
668 use arrow_array::Time64NanosecondArray;
669 use arrow_array::TimestampMicrosecondArray;
670 use arrow_array::TimestampMillisecondArray;
671 use arrow_array::TimestampNanosecondArray;
672 use arrow_array::TimestampSecondArray;
673 use arrow_array::UInt8Array;
674 use arrow_array::UInt16Array;
675 use arrow_array::UInt32Array;
676 use arrow_array::UInt64Array;
677 use arrow_array::builder::BinaryViewBuilder;
678 use arrow_array::builder::Decimal128Builder;
679 use arrow_array::builder::Decimal256Builder;
680 use arrow_array::builder::Int32Builder;
681 use arrow_array::builder::LargeListBuilder;
682 use arrow_array::builder::ListBuilder;
683 use arrow_array::builder::StringViewBuilder;
684 use arrow_array::new_null_array;
685 use arrow_array::types::ArrowPrimitiveType;
686 use arrow_array::types::Float16Type;
687 use arrow_buffer::BooleanBuffer;
688 use arrow_buffer::Buffer as ArrowBuffer;
689 use arrow_buffer::OffsetBuffer;
690 use arrow_buffer::ScalarBuffer;
691 use arrow_schema::DataType;
692 use arrow_schema::Field;
693 use arrow_schema::Fields;
694 use arrow_schema::Schema;
695 use rstest::rstest;
696
697 use crate::ArrayRef;
698 use crate::IntoArray;
699 use crate::arrays::Decimal;
700 use crate::arrays::FixedSizeList;
701 use crate::arrays::List;
702 use crate::arrays::ListView;
703 use crate::arrays::Primitive;
704 use crate::arrays::Struct;
705 use crate::arrays::VarBinView;
706 use crate::arrays::fixed_size_list::FixedSizeListArrayExt;
707 use crate::arrays::list::ListArrayExt;
708 use crate::arrays::listview::ListViewArrayExt;
709 use crate::arrays::struct_::StructArrayExt;
710 use crate::arrow::FromArrowArray as _;
711 use crate::dtype::DType;
712 use crate::dtype::Nullability;
713 use crate::dtype::PType;
714 use crate::extension::datetime::TimeUnit;
715 use crate::extension::datetime::Timestamp;
716
717 #[rstest]
718 #[case::i8(
719 Arc::new(Int8Array::from(vec![Some(1), None, Some(3), Some(4)])),
720 Arc::new(Int8Array::from(vec![1, 2, 3, 4])),
721 PType::I8,
722 )]
723 #[case::i16(
724 Arc::new(Int16Array::from(vec![Some(100), None, Some(300), Some(400)])),
725 Arc::new(Int16Array::from(vec![100, 200, 300, 400])),
726 PType::I16,
727 )]
728 #[case::i32(
729 Arc::new(Int32Array::from(vec![Some(1000), None, Some(3000), Some(4000)])),
730 Arc::new(Int32Array::from(vec![1000, 2000, 3000, 4000])),
731 PType::I32,
732 )]
733 #[case::i64(
734 Arc::new(Int64Array::from(vec![Some(10000), None, Some(30000), Some(40000)])),
735 Arc::new(Int64Array::from(vec![10000_i64, 20000, 30000, 40000])),
736 PType::I64,
737 )]
738 #[case::u8(
739 Arc::new(UInt8Array::from(vec![Some(1), None, Some(3), Some(4)])),
740 Arc::new(UInt8Array::from(vec![1_u8, 2, 3, 4])),
741 PType::U8,
742 )]
743 #[case::u16(
744 Arc::new(UInt16Array::from(vec![Some(100), None, Some(300), Some(400)])),
745 Arc::new(UInt16Array::from(vec![100_u16, 200, 300, 400])),
746 PType::U16,
747 )]
748 #[case::u32(
749 Arc::new(UInt32Array::from(vec![Some(1000), None, Some(3000), Some(4000)])),
750 Arc::new(UInt32Array::from(vec![1000_u32, 2000, 3000, 4000])),
751 PType::U32,
752 )]
753 #[case::u64(
754 Arc::new(UInt64Array::from(vec![Some(10000), None, Some(30000), Some(40000)])),
755 Arc::new(UInt64Array::from(vec![10000_u64, 20000, 30000, 40000])),
756 PType::U64,
757 )]
758 #[case::f32(
759 Arc::new(Float32Array::from(vec![Some(1.5), None, Some(3.5), Some(4.5)])),
760 Arc::new(Float32Array::from(vec![1.5_f32, 2.5, 3.5, 4.5])),
761 PType::F32,
762 )]
763 #[case::f64(
764 Arc::new(Float64Array::from(vec![Some(1.5), None, Some(3.5), Some(4.5)])),
765 Arc::new(Float64Array::from(vec![1.5_f64, 2.5, 3.5, 4.5])),
766 PType::F64,
767 )]
768 fn test_primitive_array_conversion(
769 #[case] nullable: Arc<dyn ArrowArray>,
770 #[case] non_nullable: Arc<dyn ArrowArray>,
771 #[case] expected_ptype: PType,
772 ) {
773 let v_null = ArrayRef::from_arrow(nullable.as_ref(), true).unwrap();
774 let v_non_null = ArrayRef::from_arrow(non_nullable.as_ref(), false).unwrap();
775 assert_eq!(v_null.len(), 4);
776 assert_eq!(v_non_null.len(), 4);
777 assert_eq!(v_null.as_::<Primitive>().ptype(), expected_ptype);
778 assert_eq!(v_non_null.as_::<Primitive>().ptype(), expected_ptype);
779 }
780
781 #[test]
782 fn test_float16_array_conversion() {
783 let values = vec![
784 Some(<Float16Type as ArrowPrimitiveType>::Native::from_f32(1.5)),
785 None,
786 Some(<Float16Type as ArrowPrimitiveType>::Native::from_f32(3.5)),
787 ];
788 let arrow_array = arrow_array::PrimitiveArray::<Float16Type>::from(values);
789 let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
790
791 let non_null_values = vec![
792 <Float16Type as ArrowPrimitiveType>::Native::from_f32(1.5),
793 <Float16Type as ArrowPrimitiveType>::Native::from_f32(2.5),
794 ];
795 let arrow_array_non_null =
796 arrow_array::PrimitiveArray::<Float16Type>::from(non_null_values);
797 let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
798
799 assert_eq!(vortex_array.len(), 3);
800 assert_eq!(vortex_array_non_null.len(), 2);
801
802 let primitive_array = vortex_array.as_::<Primitive>();
804 assert_eq!(primitive_array.ptype(), PType::F16);
805
806 let primitive_array_non_null = vortex_array_non_null.as_::<Primitive>();
807 assert_eq!(primitive_array_non_null.ptype(), PType::F16);
808 }
809
810 #[test]
812 fn test_decimal128_array_conversion() {
813 let mut builder = Decimal128Builder::with_capacity(4);
814 builder.append_value(12345);
815 builder.append_null();
816 builder.append_value(67890);
817 builder.append_value(11111);
818 let decimal_array = builder.finish().with_precision_and_scale(10, 2).unwrap();
819
820 let vortex_array = ArrayRef::from_arrow(&decimal_array, true).unwrap();
821 assert_eq!(vortex_array.len(), 4);
822
823 let mut builder_non_null = Decimal128Builder::with_capacity(3);
824 builder_non_null.append_value(12345);
825 builder_non_null.append_value(67890);
826 builder_non_null.append_value(11111);
827 let decimal_array_non_null = builder_non_null
828 .finish()
829 .with_precision_and_scale(10, 2)
830 .unwrap();
831
832 let vortex_array_non_null = ArrayRef::from_arrow(&decimal_array_non_null, false).unwrap();
833 assert_eq!(vortex_array_non_null.len(), 3);
834
835 let decimal_vortex_array = vortex_array.as_::<Decimal>();
837 assert_eq!(decimal_vortex_array.decimal_dtype().precision(), 10);
838 assert_eq!(decimal_vortex_array.decimal_dtype().scale(), 2);
839
840 let decimal_vortex_array_non_null = vortex_array_non_null.as_::<Decimal>();
841 assert_eq!(
842 decimal_vortex_array_non_null.decimal_dtype().precision(),
843 10
844 );
845 assert_eq!(decimal_vortex_array_non_null.decimal_dtype().scale(), 2);
846 }
847
848 #[test]
849 fn test_decimal256_array_conversion() {
850 let mut builder = Decimal256Builder::with_capacity(4);
851 builder.append_value(arrow_buffer::i256::from_i128(12345));
852 builder.append_null();
853 builder.append_value(arrow_buffer::i256::from_i128(67890));
854 builder.append_value(arrow_buffer::i256::from_i128(11111));
855 let decimal_array = builder.finish().with_precision_and_scale(38, 10).unwrap();
856
857 let vortex_array = ArrayRef::from_arrow(&decimal_array, true).unwrap();
858 assert_eq!(vortex_array.len(), 4);
859
860 let mut builder_non_null = Decimal256Builder::with_capacity(3);
861 builder_non_null.append_value(arrow_buffer::i256::from_i128(12345));
862 builder_non_null.append_value(arrow_buffer::i256::from_i128(67890));
863 builder_non_null.append_value(arrow_buffer::i256::from_i128(11111));
864 let decimal_array_non_null = builder_non_null
865 .finish()
866 .with_precision_and_scale(38, 10)
867 .unwrap();
868
869 let vortex_array_non_null = ArrayRef::from_arrow(&decimal_array_non_null, false).unwrap();
870 assert_eq!(vortex_array_non_null.len(), 3);
871
872 let decimal_vortex_array = vortex_array.as_::<Decimal>();
874 assert_eq!(decimal_vortex_array.decimal_dtype().precision(), 38);
875 assert_eq!(decimal_vortex_array.decimal_dtype().scale(), 10);
876
877 let decimal_vortex_array_non_null = vortex_array_non_null.as_::<Decimal>();
878 assert_eq!(
879 decimal_vortex_array_non_null.decimal_dtype().precision(),
880 38
881 );
882 assert_eq!(decimal_vortex_array_non_null.decimal_dtype().scale(), 10);
883 }
884
885 #[rstest]
887 #[case::timestamp_second(
888 Arc::new(TimestampSecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
889 Arc::new(TimestampSecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
890 )]
891 #[case::timestamp_millisecond(
892 Arc::new(TimestampMillisecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
893 Arc::new(TimestampMillisecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
894 )]
895 #[case::timestamp_microsecond(
896 Arc::new(TimestampMicrosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
897 Arc::new(TimestampMicrosecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
898 )]
899 #[case::timestamp_nanosecond(
900 Arc::new(TimestampNanosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
901 Arc::new(TimestampNanosecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
902 )]
903 #[case::time32_second(
904 Arc::new(Time32SecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
905 Arc::new(Time32SecondArray::from(vec![1000_i32, 2000, 3000, 4000])),
906 )]
907 #[case::time32_millisecond(
908 Arc::new(Time32MillisecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
909 Arc::new(Time32MillisecondArray::from(vec![1000_i32, 2000, 3000, 4000])),
910 )]
911 #[case::time64_microsecond(
912 Arc::new(Time64MicrosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
913 Arc::new(Time64MicrosecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
914 )]
915 #[case::time64_nanosecond(
916 Arc::new(Time64NanosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])),
917 Arc::new(Time64NanosecondArray::from(vec![1000_i64, 2000, 3000, 4000])),
918 )]
919 #[case::date32(
920 Arc::new(Date32Array::from(vec![Some(18000), None, Some(18002), Some(18003)])),
921 Arc::new(Date32Array::from(vec![18000_i32, 18001, 18002, 18003])),
922 )]
923 #[case::date64(
924 Arc::new(Date64Array::from(vec![Some(1555200000000), None, Some(1555286400000), Some(1555372800000)]
925 )),
926 Arc::new(Date64Array::from(vec![1555200000000_i64, 1555213600000, 1555286400000, 1555372800000]
927 )),
928 )]
929 fn test_temporal_array_conversion(
930 #[case] nullable: Arc<dyn ArrowArray>,
931 #[case] non_nullable: Arc<dyn ArrowArray>,
932 ) {
933 let v_null = ArrayRef::from_arrow(nullable.as_ref(), true).unwrap();
934 let v_non_null = ArrayRef::from_arrow(non_nullable.as_ref(), false).unwrap();
935 assert_eq!(v_null.len(), 4);
936 assert_eq!(v_non_null.len(), 4);
937 }
938
939 #[test]
940 fn test_timestamp_timezone_microsecond_array_conversion() {
941 let arrow_array =
942 TimestampMicrosecondArray::from(vec![Some(1000), None, Some(3000), Some(4000)])
943 .with_timezone("UTC");
944 let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
945
946 let arrow_array_non_null =
947 TimestampMicrosecondArray::from(vec![1000_i64, 2000, 3000, 4000]).with_timezone("UTC");
948 let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
949
950 assert_eq!(vortex_array.len(), 4);
951 assert_eq!(
952 vortex_array.dtype(),
953 &DType::Extension(
954 Timestamp::new_with_tz(
955 TimeUnit::Microseconds,
956 Some("UTC".into()),
957 Nullability::Nullable
958 )
959 .erased()
960 ),
961 );
962 assert_eq!(vortex_array_non_null.len(), 4);
963 assert_eq!(
964 vortex_array_non_null.dtype(),
965 &DType::Extension(
966 Timestamp::new_with_tz(
967 TimeUnit::Microseconds,
968 Some("UTC".into()),
969 Nullability::NonNullable
970 )
971 .erased()
972 )
973 );
974 }
975
976 #[rstest]
978 #[case::utf8(
979 Arc::new(StringArray::from(vec![Some("hello"), None, Some("world"), Some("test")])),
980 Arc::new(StringArray::from(vec!["hello", "world", "test", "vortex"])),
981 DType::Utf8(Nullability::NonNullable),
982 )]
983 #[case::large_utf8(
984 Arc::new(LargeStringArray::from(vec![Some("hello"), None, Some("world"), Some("test")])),
985 Arc::new(LargeStringArray::from(vec!["hello", "world", "test", "vortex"])),
986 DType::Utf8(Nullability::NonNullable),
987 )]
988 #[case::binary(
989 Arc::new(BinaryArray::from(vec![
990 Some("hello".as_bytes()), None, Some("world".as_bytes()), Some("test".as_bytes()),
991 ])),
992 Arc::new(BinaryArray::from(vec![
993 "hello".as_bytes(), "world".as_bytes(), "test".as_bytes(), "vortex".as_bytes(),
994 ])),
995 DType::Binary(Nullability::NonNullable),
996 )]
997 #[case::large_binary(
998 Arc::new(LargeBinaryArray::from(vec![
999 Some("hello".as_bytes()), None, Some("world".as_bytes()), Some("test".as_bytes()),
1000 ])),
1001 Arc::new(LargeBinaryArray::from(vec![
1002 "hello".as_bytes(), "world".as_bytes(), "test".as_bytes(), "vortex".as_bytes(),
1003 ])),
1004 DType::Binary(Nullability::NonNullable),
1005 )]
1006 fn test_string_binary_array_conversion(
1007 #[case] nullable: Arc<dyn ArrowArray>,
1008 #[case] non_nullable: Arc<dyn ArrowArray>,
1009 #[case] expected_non_nullable_dtype: DType,
1010 ) {
1011 let v_null = ArrayRef::from_arrow(nullable.as_ref(), true).unwrap();
1012 let v_non_null = ArrayRef::from_arrow(non_nullable.as_ref(), false).unwrap();
1013 assert_eq!(v_null.len(), 4);
1014 assert_eq!(v_non_null.len(), 4);
1015 assert_eq!(v_null.dtype(), &expected_non_nullable_dtype.as_nullable());
1016 assert_eq!(v_non_null.dtype(), &expected_non_nullable_dtype);
1017 }
1018
1019 #[test]
1020 fn test_utf8_view_array_conversion() {
1021 let mut builder = StringViewBuilder::new();
1022 builder.append_value("hello");
1023 builder.append_null();
1024 builder.append_value("world");
1025 builder.append_value("test");
1026 let arrow_array = builder.finish();
1027 let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1028
1029 let mut builder_non_null = StringViewBuilder::new();
1030 builder_non_null.append_value("hello");
1031 builder_non_null.append_value("world");
1032 builder_non_null.append_value("test");
1033 builder_non_null.append_value("vortex");
1034 let arrow_array_non_null = builder_non_null.finish();
1035 let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1036
1037 assert_eq!(vortex_array.len(), 4);
1038 assert_eq!(vortex_array_non_null.len(), 4);
1039
1040 let varbin_view_array = vortex_array.as_::<VarBinView>();
1042 assert_eq!(
1043 varbin_view_array.data_buffers().len(),
1044 arrow_array.data_buffers().len()
1045 );
1046 assert_eq!(varbin_view_array.dtype(), &DType::Utf8(true.into()));
1047
1048 let varbin_view_array_non_null = vortex_array_non_null.as_::<VarBinView>();
1049 assert_eq!(
1050 varbin_view_array_non_null.data_buffers().len(),
1051 arrow_array_non_null.data_buffers().len()
1052 );
1053 assert_eq!(
1054 varbin_view_array_non_null.dtype(),
1055 &DType::Utf8(false.into())
1056 );
1057 }
1058
1059 #[test]
1060 fn test_binary_view_array_conversion() {
1061 let mut builder = BinaryViewBuilder::new();
1062 builder.append_value(b"hello");
1063 builder.append_null();
1064 builder.append_value(b"world");
1065 builder.append_value(b"test");
1066 let arrow_array = builder.finish();
1067 let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1068
1069 let mut builder_non_null = BinaryViewBuilder::new();
1070 builder_non_null.append_value(b"hello");
1071 builder_non_null.append_value(b"world");
1072 builder_non_null.append_value(b"test");
1073 builder_non_null.append_value(b"vortex");
1074 let arrow_array_non_null = builder_non_null.finish();
1075 let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1076
1077 assert_eq!(vortex_array.len(), 4);
1078 assert_eq!(vortex_array_non_null.len(), 4);
1079
1080 let varbin_view_array = vortex_array.as_::<VarBinView>();
1082 assert_eq!(
1083 varbin_view_array.data_buffers().len(),
1084 arrow_array.data_buffers().len()
1085 );
1086 assert_eq!(varbin_view_array.dtype(), &DType::Binary(true.into()));
1087
1088 let varbin_view_array_non_null = vortex_array_non_null.as_::<VarBinView>();
1089 assert_eq!(
1090 varbin_view_array_non_null.data_buffers().len(),
1091 arrow_array_non_null.data_buffers().len()
1092 );
1093 assert_eq!(
1094 varbin_view_array_non_null.dtype(),
1095 &DType::Binary(false.into())
1096 );
1097 }
1098
1099 #[test]
1101 fn test_boolean_array_conversion() {
1102 let arrow_array = BooleanArray::from(vec![Some(true), None, Some(false), Some(true)]);
1103 let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1104
1105 let arrow_array_non_null = BooleanArray::from(vec![true, false, true, false]);
1106 let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1107
1108 assert_eq!(vortex_array.len(), 4);
1109 assert_eq!(vortex_array_non_null.len(), 4);
1110 }
1111
1112 #[test]
1114 fn test_struct_array_conversion() {
1115 let fields = vec![
1116 Field::new("field1", DataType::Int32, true),
1117 Field::new("field2", DataType::Utf8, false),
1118 ];
1119 let schema = Fields::from(fields);
1120
1121 let field1_data = Int32Array::from(vec![Some(1), None, Some(3)]);
1122 let field2_data = StringArray::from(vec!["a", "b", "c"]);
1123
1124 let arrow_array = StructArray::new(
1125 schema.clone(),
1126 vec![Arc::new(field1_data), Arc::new(field2_data)],
1127 None,
1128 );
1129
1130 let vortex_array = ArrayRef::from_arrow(&arrow_array, false).unwrap();
1131 assert_eq!(vortex_array.len(), 3);
1132
1133 let struct_vortex_array = vortex_array.as_::<Struct>();
1135 assert_eq!(struct_vortex_array.names().len(), 2);
1136 assert_eq!(struct_vortex_array.names()[0], "field1");
1137 assert_eq!(struct_vortex_array.names()[1], "field2");
1138
1139 let nullable_array = StructArray::new(
1141 schema,
1142 vec![
1143 Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])),
1144 Arc::new(StringArray::from(vec!["a", "b", "c"])),
1145 ],
1146 Some(arrow_buffer::NullBuffer::new(BooleanBuffer::from(vec![
1147 true, false, true,
1148 ]))),
1149 );
1150
1151 let vortex_nullable_array = ArrayRef::from_arrow(&nullable_array, true).unwrap();
1152 assert_eq!(vortex_nullable_array.len(), 3);
1153
1154 let struct_vortex_nullable_array = vortex_nullable_array.as_::<Struct>();
1156 assert_eq!(struct_vortex_nullable_array.names().len(), 2);
1157 assert_eq!(struct_vortex_nullable_array.names()[0], "field1");
1158 assert_eq!(struct_vortex_nullable_array.names()[1], "field2");
1159 }
1160
1161 #[test]
1163 fn test_list_array_conversion() {
1164 let mut builder = ListBuilder::new(Int32Builder::new());
1165 builder.append_value([Some(1), None, Some(3)]);
1166 builder.append_null();
1167 builder.append_value([Some(4), Some(5)]);
1168 let arrow_array = builder.finish();
1169
1170 let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1171 assert_eq!(vortex_array.len(), 3);
1172
1173 let list_vortex_array = vortex_array.as_::<List>();
1175 let offsets_array = list_vortex_array.offsets().as_::<Primitive>();
1176 assert_eq!(offsets_array.len(), 4); assert_eq!(offsets_array.ptype(), PType::I32);
1178
1179 let mut builder_non_null = ListBuilder::new(Int32Builder::new());
1181 builder_non_null.append_value([Some(1), None, Some(3)]);
1182 builder_non_null.append_value([Some(4), Some(5)]);
1183 let arrow_array_non_null = builder_non_null.finish();
1184
1185 let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1186 assert_eq!(vortex_array_non_null.len(), 2);
1187
1188 let list_vortex_array_non_null = vortex_array_non_null.as_::<List>();
1190 let offsets_array_non_null = list_vortex_array_non_null.offsets().as_::<Primitive>();
1191 assert_eq!(offsets_array_non_null.len(), 3); assert_eq!(offsets_array_non_null.ptype(), PType::I32);
1193 }
1194
1195 #[test]
1196 fn test_large_list_array_conversion() {
1197 let mut builder = LargeListBuilder::new(Int32Builder::new());
1198 builder.append_value([Some(1), None, Some(3)]);
1199 builder.append_null();
1200 builder.append_value([Some(4), Some(5)]);
1201 let arrow_array = builder.finish();
1202
1203 let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1204 assert_eq!(vortex_array.len(), 3);
1205
1206 let list_vortex_array = vortex_array.as_::<List>();
1208 let offsets_array = list_vortex_array.offsets().as_::<Primitive>();
1209 assert_eq!(offsets_array.len(), 4); assert_eq!(offsets_array.ptype(), PType::I64); let mut builder_non_null = LargeListBuilder::new(Int32Builder::new());
1214 builder_non_null.append_value([Some(1), None, Some(3)]);
1215 builder_non_null.append_value([Some(4), Some(5)]);
1216 let arrow_array_non_null = builder_non_null.finish();
1217
1218 let vortex_array_non_null = ArrayRef::from_arrow(&arrow_array_non_null, false).unwrap();
1219 assert_eq!(vortex_array_non_null.len(), 2);
1220
1221 let list_vortex_array_non_null = vortex_array_non_null.as_::<List>();
1223 let offsets_array_non_null = list_vortex_array_non_null.offsets().as_::<Primitive>();
1224 assert_eq!(offsets_array_non_null.len(), 3); assert_eq!(offsets_array_non_null.ptype(), PType::I64); }
1227
1228 #[test]
1229 fn test_fixed_size_list_array_conversion() {
1230 let values = Int32Array::from(vec![
1232 Some(1),
1233 Some(2),
1234 Some(3), Some(4),
1236 None,
1237 Some(6), Some(7),
1239 Some(8),
1240 Some(9), Some(10),
1242 Some(11),
1243 Some(12), ]);
1245
1246 let field = Arc::new(Field::new("item", DataType::Int32, true));
1248 let arrow_array =
1249 ArrowFixedSizeListArray::try_new(Arc::clone(&field), 3, Arc::new(values), None)
1250 .unwrap();
1251 let vortex_array = ArrayRef::from_arrow(&arrow_array, false).unwrap();
1252
1253 assert_eq!(vortex_array.len(), 4);
1254
1255 let fsl_vortex_array = vortex_array.as_::<FixedSizeList>();
1257 assert_eq!(fsl_vortex_array.list_size(), 3);
1258 assert_eq!(fsl_vortex_array.elements().len(), 12); let values_nullable = Int32Array::from(vec![
1262 Some(1),
1263 Some(2),
1264 Some(3), Some(4),
1266 None,
1267 Some(6), Some(7),
1269 Some(8),
1270 Some(9), ]);
1272
1273 let null_buffer =
1275 arrow_buffer::NullBuffer::new(BooleanBuffer::from(vec![true, false, true]));
1276
1277 let arrow_array_nullable = ArrowFixedSizeListArray::try_new(
1278 field,
1279 3,
1280 Arc::new(values_nullable),
1281 Some(null_buffer),
1282 )
1283 .unwrap();
1284 let vortex_array_nullable = ArrayRef::from_arrow(&arrow_array_nullable, true).unwrap();
1285
1286 assert_eq!(vortex_array_nullable.len(), 3);
1287
1288 let fsl_vortex_array_nullable = vortex_array_nullable.as_::<FixedSizeList>();
1290 assert_eq!(fsl_vortex_array_nullable.list_size(), 3);
1291 assert_eq!(fsl_vortex_array_nullable.elements().len(), 9); }
1293
1294 #[test]
1295 fn test_list_view_array_conversion() {
1296 let values = Int32Array::from(vec![
1298 Some(1),
1299 Some(2),
1300 Some(3), Some(4),
1302 Some(5), Some(6), Some(7),
1305 Some(8),
1306 Some(9),
1307 Some(10), ]);
1309
1310 let offsets = ScalarBuffer::from(vec![0i32, 3, 5, 6]);
1312 let sizes = ScalarBuffer::from(vec![3i32, 2, 1, 4]);
1313
1314 let field = Arc::new(Field::new("item", DataType::Int32, true));
1315 let arrow_array = GenericListViewArray::try_new(
1316 Arc::clone(&field),
1317 offsets.clone(),
1318 sizes.clone(),
1319 Arc::new(values.clone()),
1320 None,
1321 )
1322 .unwrap();
1323
1324 let vortex_array = ArrayRef::from_arrow(&arrow_array, false).unwrap();
1325 assert_eq!(vortex_array.len(), 4);
1326
1327 let list_view_vortex_array = vortex_array.as_::<ListView>();
1329 let offsets_array = list_view_vortex_array.offsets().as_::<Primitive>();
1330 let sizes_array = list_view_vortex_array.sizes().as_::<Primitive>();
1331
1332 assert_eq!(offsets_array.len(), 4);
1333 assert_eq!(offsets_array.ptype(), PType::I32);
1334 assert_eq!(sizes_array.len(), 4);
1335 assert_eq!(sizes_array.ptype(), PType::I32);
1336
1337 let null_buffer =
1339 arrow_buffer::NullBuffer::new(BooleanBuffer::from(vec![true, false, true, true]));
1340
1341 let arrow_array_nullable = GenericListViewArray::try_new(
1342 Arc::clone(&field),
1343 offsets,
1344 sizes,
1345 Arc::new(values.clone()),
1346 Some(null_buffer),
1347 )
1348 .unwrap();
1349
1350 let vortex_array_nullable = ArrayRef::from_arrow(&arrow_array_nullable, true).unwrap();
1351 assert_eq!(vortex_array_nullable.len(), 4);
1352
1353 let large_offsets = ScalarBuffer::from(vec![0i64, 3, 5, 6]);
1355 let large_sizes = ScalarBuffer::from(vec![3i64, 2, 1, 4]);
1356
1357 let large_arrow_array = GenericListViewArray::try_new(
1358 field,
1359 large_offsets,
1360 large_sizes,
1361 Arc::new(values),
1362 None,
1363 )
1364 .unwrap();
1365
1366 let large_vortex_array = ArrayRef::from_arrow(&large_arrow_array, false).unwrap();
1367 assert_eq!(large_vortex_array.len(), 4);
1368
1369 let large_list_view_vortex_array = large_vortex_array.as_::<ListView>();
1371 let large_offsets_array = large_list_view_vortex_array.offsets().as_::<Primitive>();
1372 let large_sizes_array = large_list_view_vortex_array.sizes().as_::<Primitive>();
1373
1374 assert_eq!(large_offsets_array.len(), 4);
1375 assert_eq!(large_offsets_array.ptype(), PType::I64); assert_eq!(large_sizes_array.len(), 4);
1377 assert_eq!(large_sizes_array.ptype(), PType::I64); }
1379
1380 #[test]
1382 fn test_null_array_conversion() {
1383 let arrow_array = NullArray::new(5);
1384 let vortex_array = ArrayRef::from_arrow(&arrow_array, true).unwrap();
1385 assert_eq!(vortex_array.len(), 5);
1386 }
1387
1388 #[test]
1390 fn test_arrow_buffer_conversion() {
1391 let data = vec![1u8, 2, 3, 4, 5];
1392 let arrow_buffer = ArrowBuffer::from_vec(data);
1393 let vortex_array = arrow_buffer.into_array();
1394 assert_eq!(vortex_array.len(), 5);
1395 }
1396
1397 #[test]
1398 fn test_boolean_buffer_conversion() {
1399 let data = vec![true, false, true, false, true];
1400 let boolean_buffer = BooleanBuffer::from(data);
1401 let vortex_array = boolean_buffer.into_array();
1402 assert_eq!(vortex_array.len(), 5);
1403 }
1404
1405 #[test]
1406 fn test_scalar_buffer_conversion() {
1407 let data = vec![1i32, 2, 3, 4, 5];
1408 let scalar_buffer = ScalarBuffer::from(data);
1409 let vortex_array = scalar_buffer.into_array();
1410 assert_eq!(vortex_array.len(), 5);
1411 }
1412
1413 #[test]
1414 fn test_offset_buffer_conversion() {
1415 let data = vec![0i32, 2, 5, 8, 10];
1416 let offset_buffer = OffsetBuffer::new(ScalarBuffer::from(data));
1417 let vortex_array = offset_buffer.into_array();
1418 assert_eq!(vortex_array.len(), 5);
1419 }
1420
1421 #[test]
1423 fn test_record_batch_conversion() {
1424 let schema = Arc::new(Schema::new(vec![
1425 Field::new("field1", DataType::Int32, false),
1426 Field::new("field2", DataType::Utf8, false),
1427 ]));
1428
1429 let field1_data = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
1430 let field2_data = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
1431
1432 let record_batch = RecordBatch::try_new(schema, vec![field1_data, field2_data]).unwrap();
1433
1434 let vortex_array = ArrayRef::from_arrow(record_batch, false).unwrap();
1435 assert_eq!(vortex_array.len(), 4);
1436
1437 let schema = Arc::new(Schema::new(vec![
1439 Field::new("field1", DataType::Int32, false),
1440 Field::new("field2", DataType::Utf8, false),
1441 ]));
1442
1443 let field1_data = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
1444 let field2_data = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
1445
1446 let record_batch = RecordBatch::try_new(schema, vec![field1_data, field2_data]).unwrap();
1447
1448 let vortex_array = ArrayRef::from_arrow(&record_batch, false).unwrap();
1449 assert_eq!(vortex_array.len(), 4);
1450 }
1451
1452 #[test]
1454 fn test_dyn_array_conversion() {
1455 let int_array = Int32Array::from(vec![1, 2, 3, 4]);
1456 let dyn_array: &dyn ArrowArray = &int_array;
1457 let vortex_array = ArrayRef::from_arrow(dyn_array, false).unwrap();
1458 assert_eq!(vortex_array.len(), 4);
1459
1460 let string_array = StringArray::from(vec!["a", "b", "c"]);
1461 let dyn_array: &dyn ArrowArray = &string_array;
1462 let vortex_array = ArrayRef::from_arrow(dyn_array, false).unwrap();
1463 assert_eq!(vortex_array.len(), 3);
1464
1465 let bool_array = BooleanArray::from(vec![true, false, true]);
1466 let dyn_array: &dyn ArrowArray = &bool_array;
1467 let vortex_array = ArrayRef::from_arrow(dyn_array, false).unwrap();
1468 assert_eq!(vortex_array.len(), 3);
1469 }
1470
1471 #[test]
1473 pub fn nullable_may_contain_non_nullable() {
1474 let null_struct_array_with_non_nullable_field = new_null_array(
1475 &DataType::Struct(Fields::from(vec![Field::new(
1476 "non_nullable_inner",
1477 DataType::Int32,
1478 false,
1479 )])),
1480 1,
1481 );
1482 ArrayRef::from_arrow(null_struct_array_with_non_nullable_field.as_ref(), true).unwrap();
1483 }
1484
1485 #[test]
1486 pub fn nullable_may_contain_deeply_nested_non_nullable() {
1487 let null_struct_array_with_non_nullable_field = new_null_array(
1488 &DataType::Struct(Fields::from(vec![Field::new(
1489 "non_nullable_inner",
1490 DataType::Struct(Fields::from(vec![Field::new(
1491 "non_nullable_deeper_inner",
1492 DataType::Int32,
1493 false,
1494 )])),
1495 false,
1496 )])),
1497 1,
1498 );
1499 ArrayRef::from_arrow(null_struct_array_with_non_nullable_field.as_ref(), true).unwrap();
1500 }
1501
1502 #[test]
1503 fn non_nullable_request_rejects_nulls() {
1504 let arrow_array = Int32Array::from(vec![Some(1), None, Some(3)]);
1507 assert!(ArrayRef::from_arrow(&arrow_array, false).is_err());
1508 }
1509
1510 #[test]
1511 fn non_nullable_request_rejects_null_array() {
1512 let arrow_array = NullArray::new(5);
1515 assert!(ArrayRef::from_arrow(&arrow_array, false).is_err());
1516 }
1517
1518 #[test]
1519 fn non_nullable_struct_with_nulls_errors() {
1520 let struct_array = new_null_array(
1523 &DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, true)])),
1524 3,
1525 );
1526 assert!(ArrayRef::from_arrow(struct_array.as_ref(), false).is_err());
1527 }
1528
1529 #[test]
1530 fn non_nullable_list_with_nulls_errors() {
1531 let mut builder = ListBuilder::new(Int32Builder::new());
1534 builder.append_value([Some(1), Some(2)]);
1535 builder.append_null();
1536 let list = builder.finish();
1537 assert!(ArrayRef::from_arrow(&list, false).is_err());
1538 }
1539
1540 #[test]
1541 pub fn nullable_struct_containing_non_nullable_dictionary_with_nulls_errors() {
1542 let null_struct_array_with_non_nullable_field = new_null_array(
1546 &DataType::Struct(Fields::from(vec![Field::new(
1547 "non_nullable_deeper_inner",
1548 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1549 false,
1550 )])),
1551 1,
1552 );
1553
1554 assert!(
1555 ArrayRef::from_arrow(null_struct_array_with_non_nullable_field.as_ref(), true).is_err()
1556 );
1557 }
1558}