1use arrow_array::array::{
2 Array as ArrowArray, ArrayRef as ArrowArrayRef, ArrowPrimitiveType,
3 BooleanArray as ArrowBooleanArray, GenericByteArray, NullArray as ArrowNullArray,
4 OffsetSizeTrait, PrimitiveArray as ArrowPrimitiveArray, StructArray as ArrowStructArray,
5};
6use arrow_array::cast::{AsArray, as_null_array};
7use arrow_array::types::{
8 ByteArrayType, ByteViewType, Date32Type, Date64Type, DurationMicrosecondType,
9 DurationMillisecondType, DurationNanosecondType, DurationSecondType, Float16Type, Float32Type,
10 Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, Time32MillisecondType,
11 Time32SecondType, Time64MicrosecondType, Time64NanosecondType, TimestampMicrosecondType,
12 TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt8Type, UInt16Type,
13 UInt32Type, UInt64Type,
14};
15use arrow_array::{BinaryViewArray, GenericByteViewArray, GenericListArray, StringViewArray};
16use arrow_buffer::buffer::{NullBuffer, OffsetBuffer};
17use arrow_buffer::{ArrowNativeType, BooleanBuffer, Buffer as ArrowBuffer, ScalarBuffer};
18use arrow_schema::{DataType, TimeUnit as ArrowTimeUnit};
19use vortex_buffer::{Alignment, Buffer, ByteBuffer};
20use vortex_dtype::datetime::TimeUnit;
21use vortex_dtype::{DType, NativePType, PType};
22use vortex_error::{VortexExpect as _, vortex_panic};
23
24use crate::arrays::{
25 BoolArray, ListArray, NullArray, PrimitiveArray, StructArray, TemporalArray, VarBinArray,
26 VarBinViewArray,
27};
28use crate::arrow::FromArrowArray;
29use crate::validity::Validity;
30use crate::{Array, ArrayRef, IntoArray};
31
32impl IntoArray for ArrowBuffer {
33 fn into_array(self) -> ArrayRef {
34 PrimitiveArray::from_byte_buffer(
35 ByteBuffer::from_arrow_buffer(self, Alignment::of::<u8>()),
36 PType::U8,
37 Validity::NonNullable,
38 )
39 .into_array()
40 }
41}
42
43impl IntoArray for BooleanBuffer {
44 fn into_array(self) -> ArrayRef {
45 BoolArray::new(self, Validity::NonNullable).into_array()
46 }
47}
48
49impl<T> IntoArray for ScalarBuffer<T>
50where
51 T: ArrowNativeType + NativePType,
52{
53 fn into_array(self) -> ArrayRef {
54 PrimitiveArray::new(
55 Buffer::<T>::from_arrow_scalar_buffer(self),
56 Validity::NonNullable,
57 )
58 .into_array()
59 }
60}
61
62impl<O> IntoArray for OffsetBuffer<O>
63where
64 O: NativePType + OffsetSizeTrait,
65{
66 fn into_array(self) -> ArrayRef {
67 let primitive = PrimitiveArray::new(
68 Buffer::from_arrow_scalar_buffer(self.into_inner()),
69 Validity::NonNullable,
70 );
71
72 primitive.into_array()
73 }
74}
75
76impl<T: ArrowPrimitiveType> FromArrowArray<&ArrowPrimitiveArray<T>> for ArrayRef
77where
78 <T as ArrowPrimitiveType>::Native: NativePType,
79{
80 fn from_arrow(value: &ArrowPrimitiveArray<T>, nullable: bool) -> Self {
81 let arr = PrimitiveArray::new(
82 Buffer::from_arrow_scalar_buffer(value.values().clone()),
83 nulls(value.nulls(), nullable),
84 );
85
86 if T::DATA_TYPE.is_numeric() {
87 return arr.into_array();
88 }
89
90 match T::DATA_TYPE {
91 DataType::Timestamp(time_unit, tz) => {
92 let tz = tz.map(|s| s.to_string());
93 TemporalArray::new_timestamp(arr.into_array(), time_unit.into(), tz).into()
94 }
95 DataType::Time32(time_unit) => {
96 TemporalArray::new_time(arr.into_array(), time_unit.into()).into()
97 }
98 DataType::Time64(time_unit) => {
99 TemporalArray::new_time(arr.into_array(), time_unit.into()).into()
100 }
101 DataType::Date32 => TemporalArray::new_date(arr.into_array(), TimeUnit::D).into(),
102 DataType::Date64 => TemporalArray::new_date(arr.into_array(), TimeUnit::Ms).into(),
103 DataType::Duration(_) => unimplemented!(),
104 DataType::Interval(_) => unimplemented!(),
105 _ => vortex_panic!("Invalid data type for PrimitiveArray: {}", T::DATA_TYPE),
106 }
107 }
108}
109
110impl<T: ByteArrayType> FromArrowArray<&GenericByteArray<T>> for ArrayRef
111where
112 <T as ByteArrayType>::Offset: NativePType,
113{
114 fn from_arrow(value: &GenericByteArray<T>, nullable: bool) -> Self {
115 let dtype = match T::DATA_TYPE {
116 DataType::Binary | DataType::LargeBinary => DType::Binary(nullable.into()),
117 DataType::Utf8 | DataType::LargeUtf8 => DType::Utf8(nullable.into()),
118 _ => vortex_panic!("Invalid data type for ByteArray: {}", T::DATA_TYPE),
119 };
120 VarBinArray::try_new(
121 value.offsets().clone().into_array(),
122 ByteBuffer::from_arrow_buffer(value.values().clone(), Alignment::of::<u8>()),
123 dtype,
124 nulls(value.nulls(), nullable),
125 )
126 .vortex_expect("Failed to convert Arrow GenericByteArray to Vortex VarBinArray")
127 .into_array()
128 }
129}
130
131impl<T: ByteViewType> FromArrowArray<&GenericByteViewArray<T>> for ArrayRef {
132 fn from_arrow(value: &GenericByteViewArray<T>, nullable: bool) -> Self {
133 let dtype = match T::DATA_TYPE {
134 DataType::BinaryView => DType::Binary(nullable.into()),
135 DataType::Utf8View => DType::Utf8(nullable.into()),
136 _ => vortex_panic!("Invalid data type for ByteViewArray: {}", T::DATA_TYPE),
137 };
138
139 let views_buffer = Buffer::from_byte_buffer(
140 Buffer::from_arrow_scalar_buffer(value.views().clone()).into_byte_buffer(),
141 );
142
143 VarBinViewArray::try_new(
144 views_buffer,
145 value
146 .data_buffers()
147 .iter()
148 .map(|b| ByteBuffer::from_arrow_buffer(b.clone(), Alignment::of::<u8>()))
149 .collect::<Vec<_>>(),
150 dtype,
151 nulls(value.nulls(), nullable),
152 )
153 .vortex_expect("Failed to convert Arrow GenericByteViewArray to Vortex VarBinViewArray")
154 .into_array()
155 }
156}
157
158impl FromArrowArray<&ArrowBooleanArray> for ArrayRef {
159 fn from_arrow(value: &ArrowBooleanArray, nullable: bool) -> Self {
160 BoolArray::new(value.values().clone(), nulls(value.nulls(), nullable)).into_array()
161 }
162}
163
164impl FromArrowArray<&ArrowStructArray> for ArrayRef {
165 fn from_arrow(value: &ArrowStructArray, nullable: bool) -> Self {
166 StructArray::try_new(
167 value.column_names().iter().map(|s| (*s).into()).collect(),
168 value
169 .columns()
170 .iter()
171 .zip(value.fields())
172 .map(|(c, field)| Self::from_arrow(c.clone(), field.is_nullable()))
173 .collect(),
174 value.len(),
175 nulls(value.nulls(), nullable),
176 )
177 .vortex_expect("Failed to convert Arrow StructArray to Vortex StructArray")
178 .into_array()
179 }
180}
181
182impl<O: OffsetSizeTrait + NativePType> FromArrowArray<&GenericListArray<O>> for ArrayRef {
183 fn from_arrow(value: &GenericListArray<O>, nullable: bool) -> Self {
184 let elem_nullable = match value.data_type() {
186 DataType::List(field) => field.is_nullable(),
187 DataType::LargeList(field) => field.is_nullable(),
188 dt => vortex_panic!("Invalid data type for ListArray: {dt}"),
189 };
190 ListArray::try_new(
191 Self::from_arrow(value.values().clone(), elem_nullable),
192 value.offsets().clone().into_array(),
194 nulls(value.nulls(), nullable),
195 )
196 .vortex_expect("Failed to convert Arrow StructArray to Vortex StructArray")
197 .into_array()
198 }
199}
200
201impl FromArrowArray<&ArrowNullArray> for ArrayRef {
202 fn from_arrow(value: &ArrowNullArray, nullable: bool) -> Self {
203 assert!(nullable);
204 NullArray::new(value.len()).into_array()
205 }
206}
207
208fn nulls(nulls: Option<&NullBuffer>, nullable: bool) -> Validity {
209 if nullable {
210 nulls
211 .map(|nulls| {
212 if nulls.null_count() == nulls.len() {
213 Validity::AllInvalid
214 } else {
215 Validity::from(nulls.inner().clone())
216 }
217 })
218 .unwrap_or_else(|| Validity::AllValid)
219 } else {
220 assert!(nulls.map(|x| x.null_count() == 0).unwrap_or(true));
221 Validity::NonNullable
222 }
223}
224
225impl FromArrowArray<ArrowArrayRef> for ArrayRef {
226 fn from_arrow(array: ArrowArrayRef, nullable: bool) -> Self {
227 match array.data_type() {
228 DataType::Boolean => Self::from_arrow(array.as_boolean(), nullable),
229 DataType::UInt8 => Self::from_arrow(array.as_primitive::<UInt8Type>(), nullable),
230 DataType::UInt16 => Self::from_arrow(array.as_primitive::<UInt16Type>(), nullable),
231 DataType::UInt32 => Self::from_arrow(array.as_primitive::<UInt32Type>(), nullable),
232 DataType::UInt64 => Self::from_arrow(array.as_primitive::<UInt64Type>(), nullable),
233 DataType::Int8 => Self::from_arrow(array.as_primitive::<Int8Type>(), nullable),
234 DataType::Int16 => Self::from_arrow(array.as_primitive::<Int16Type>(), nullable),
235 DataType::Int32 => Self::from_arrow(array.as_primitive::<Int32Type>(), nullable),
236 DataType::Int64 => Self::from_arrow(array.as_primitive::<Int64Type>(), nullable),
237 DataType::Float16 => Self::from_arrow(array.as_primitive::<Float16Type>(), nullable),
238 DataType::Float32 => Self::from_arrow(array.as_primitive::<Float32Type>(), nullable),
239 DataType::Float64 => Self::from_arrow(array.as_primitive::<Float64Type>(), nullable),
240 DataType::Utf8 => Self::from_arrow(array.as_string::<i32>(), nullable),
241 DataType::LargeUtf8 => Self::from_arrow(array.as_string::<i64>(), nullable),
242 DataType::Binary => Self::from_arrow(array.as_binary::<i32>(), nullable),
243 DataType::LargeBinary => Self::from_arrow(array.as_binary::<i64>(), nullable),
244 DataType::BinaryView => Self::from_arrow(
245 array
246 .as_any()
247 .downcast_ref::<BinaryViewArray>()
248 .vortex_expect("Expected Arrow BinaryViewArray for DataType::BinaryView"),
249 nullable,
250 ),
251 DataType::Utf8View => Self::from_arrow(
252 array
253 .as_any()
254 .downcast_ref::<StringViewArray>()
255 .vortex_expect("Expected Arrow StringViewArray for DataType::Utf8View"),
256 nullable,
257 ),
258 DataType::Struct(_) => Self::from_arrow(array.as_struct(), nullable),
259 DataType::List(_) => Self::from_arrow(array.as_list::<i32>(), nullable),
260 DataType::LargeList(_) => Self::from_arrow(array.as_list::<i64>(), nullable),
261 DataType::Null => Self::from_arrow(as_null_array(&array), nullable),
262 DataType::Timestamp(u, _) => match u {
263 ArrowTimeUnit::Second => {
264 Self::from_arrow(array.as_primitive::<TimestampSecondType>(), nullable)
265 }
266 ArrowTimeUnit::Millisecond => {
267 Self::from_arrow(array.as_primitive::<TimestampMillisecondType>(), nullable)
268 }
269 ArrowTimeUnit::Microsecond => {
270 Self::from_arrow(array.as_primitive::<TimestampMicrosecondType>(), nullable)
271 }
272 ArrowTimeUnit::Nanosecond => {
273 Self::from_arrow(array.as_primitive::<TimestampNanosecondType>(), nullable)
274 }
275 },
276 DataType::Date32 => Self::from_arrow(array.as_primitive::<Date32Type>(), nullable),
277 DataType::Date64 => Self::from_arrow(array.as_primitive::<Date64Type>(), nullable),
278 DataType::Time32(u) => match u {
279 ArrowTimeUnit::Second => {
280 Self::from_arrow(array.as_primitive::<Time32SecondType>(), nullable)
281 }
282 ArrowTimeUnit::Millisecond => {
283 Self::from_arrow(array.as_primitive::<Time32MillisecondType>(), nullable)
284 }
285 _ => unreachable!(),
286 },
287 DataType::Time64(u) => match u {
288 ArrowTimeUnit::Microsecond => {
289 Self::from_arrow(array.as_primitive::<Time64MicrosecondType>(), nullable)
290 }
291 ArrowTimeUnit::Nanosecond => {
292 Self::from_arrow(array.as_primitive::<Time64NanosecondType>(), nullable)
293 }
294 _ => unreachable!(),
295 },
296 DataType::Duration(u) => match u {
297 ArrowTimeUnit::Second => {
298 Self::from_arrow(array.as_primitive::<DurationSecondType>(), nullable)
299 }
300 ArrowTimeUnit::Millisecond => {
301 Self::from_arrow(array.as_primitive::<DurationMillisecondType>(), nullable)
302 }
303 ArrowTimeUnit::Microsecond => {
304 Self::from_arrow(array.as_primitive::<DurationMicrosecondType>(), nullable)
305 }
306 ArrowTimeUnit::Nanosecond => {
307 Self::from_arrow(array.as_primitive::<DurationNanosecondType>(), nullable)
308 }
309 },
310 _ => vortex_panic!(
311 "Array encoding not implemented for Arrow data type {}",
312 array.data_type().clone()
313 ),
314 }
315 }
316}