1use std::sync::Arc;
3
4use crate::bitmap::utils::count_zeros;
5use crate::buffer::BytesAllocator;
6use crate::{
7 array::*,
8 bitmap::{utils::bytes_for, Bitmap},
9 buffer::{Buffer, Bytes},
10 datatypes::{DataType, PhysicalType},
11 error::{Error, Result},
12 ffi::schema::get_child,
13 types::NativeType,
14};
15
16use super::ArrowArray;
17
18pub unsafe fn try_from<A: ArrowArrayRef>(array: A) -> Result<Box<dyn Array>> {
23 use PhysicalType::*;
24 Ok(match array.data_type().to_physical_type() {
25 Null => Box::new(NullArray::try_from_ffi(array)?),
26 Boolean => Box::new(BooleanArray::try_from_ffi(array)?),
27 Primitive(primitive) => with_match_primitive_type!(primitive, |$T| {
28 Box::new(PrimitiveArray::<$T>::try_from_ffi(array)?)
29 }),
30 Utf8 => Box::new(Utf8Array::<i32>::try_from_ffi(array)?),
31 LargeUtf8 => Box::new(Utf8Array::<i64>::try_from_ffi(array)?),
32 Binary => Box::new(BinaryArray::<i32>::try_from_ffi(array)?),
33 LargeBinary => Box::new(BinaryArray::<i64>::try_from_ffi(array)?),
34 FixedSizeBinary => Box::new(FixedSizeBinaryArray::try_from_ffi(array)?),
35 List => Box::new(ListArray::<i32>::try_from_ffi(array)?),
36 LargeList => Box::new(ListArray::<i64>::try_from_ffi(array)?),
37 FixedSizeList => Box::new(FixedSizeListArray::try_from_ffi(array)?),
38 Struct => Box::new(StructArray::try_from_ffi(array)?),
39 Dictionary(key_type) => {
40 match_integer_type!(key_type, |$T| {
41 Box::new(DictionaryArray::<$T>::try_from_ffi(array)?)
42 })
43 }
44 Union => Box::new(UnionArray::try_from_ffi(array)?),
45 Map => Box::new(MapArray::try_from_ffi(array)?),
46 })
47}
48
49unsafe impl Send for ArrowArray {}
54unsafe impl Sync for ArrowArray {}
55
56impl Drop for ArrowArray {
57 fn drop(&mut self) {
58 match self.release {
59 None => (),
60 Some(release) => unsafe { release(self) },
61 };
62 }
63}
64
65unsafe extern "C" fn c_release_array(array: *mut ArrowArray) {
67 if array.is_null() {
68 return;
69 }
70 let array = &mut *array;
71
72 let private = Box::from_raw(array.private_data as *mut PrivateData);
74 for child in private.children_ptr.iter() {
75 let _ = Box::from_raw(*child);
76 }
77
78 if let Some(ptr) = private.dictionary_ptr {
79 let _ = Box::from_raw(ptr);
80 }
81
82 array.release = None;
83}
84
85#[allow(dead_code)]
86struct PrivateData {
87 array: Box<dyn Array>,
88 buffers_ptr: Box<[*const std::os::raw::c_void]>,
89 children_ptr: Box<[*mut ArrowArray]>,
90 dictionary_ptr: Option<*mut ArrowArray>,
91}
92
93impl ArrowArray {
94 pub(crate) fn new(array: Box<dyn Array>) -> Self {
99 let (offset, buffers, children, dictionary) =
100 offset_buffers_children_dictionary(array.as_ref());
101
102 let buffers_ptr = buffers
103 .iter()
104 .map(|maybe_buffer| match maybe_buffer {
105 Some(b) => *b as *const std::os::raw::c_void,
106 None => std::ptr::null(),
107 })
108 .collect::<Box<[_]>>();
109 let n_buffers = buffers.len() as i64;
110
111 let children_ptr = children
112 .into_iter()
113 .map(|child| Box::into_raw(Box::new(ArrowArray::new(child))))
114 .collect::<Box<_>>();
115 let n_children = children_ptr.len() as i64;
116
117 let dictionary_ptr =
118 dictionary.map(|array| Box::into_raw(Box::new(ArrowArray::new(array))));
119
120 let length = array.len() as i64;
121 let null_count = array.null_count() as i64;
122
123 let mut private_data = Box::new(PrivateData {
124 array,
125 buffers_ptr,
126 children_ptr,
127 dictionary_ptr,
128 });
129
130 Self {
131 length,
132 null_count,
133 offset: offset as i64,
134 n_buffers,
135 n_children,
136 buffers: private_data.buffers_ptr.as_mut_ptr(),
137 children: private_data.children_ptr.as_mut_ptr(),
138 dictionary: private_data.dictionary_ptr.unwrap_or(std::ptr::null_mut()),
139 release: Some(c_release_array),
140 private_data: Box::into_raw(private_data) as *mut ::std::os::raw::c_void,
141 }
142 }
143
144 pub fn empty() -> Self {
146 Self {
147 length: 0,
148 null_count: 0,
149 offset: 0,
150 n_buffers: 0,
151 n_children: 0,
152 buffers: std::ptr::null_mut(),
153 children: std::ptr::null_mut(),
154 dictionary: std::ptr::null_mut(),
155 release: None,
156 private_data: std::ptr::null_mut(),
157 }
158 }
159
160 pub(crate) fn len(&self) -> usize {
162 self.length as usize
163 }
164
165 pub(crate) fn offset(&self) -> usize {
167 self.offset as usize
168 }
169
170 pub(crate) fn null_count(&self) -> usize {
172 self.null_count as usize
173 }
174}
175
176unsafe fn get_buffer_ptr<T: NativeType>(
179 array: &ArrowArray,
180 data_type: &DataType,
181 index: usize,
182) -> Result<*mut T> {
183 if array.buffers.is_null() {
184 return Err(Error::oos(format!(
185 "An ArrowArray of type {data_type:?} must have non-null buffers"
186 )));
187 }
188
189 if array
190 .buffers
191 .align_offset(std::mem::align_of::<*mut *const u8>())
192 != 0
193 {
194 return Err(Error::oos(format!(
195 "An ArrowArray of type {data_type:?}
196 must have buffer {index} aligned to type {}",
197 std::any::type_name::<*mut *const u8>()
198 )));
199 }
200 let buffers = array.buffers as *mut *const u8;
201
202 if index >= array.n_buffers as usize {
203 return Err(Error::oos(format!(
204 "An ArrowArray of type {data_type:?}
205 must have buffer {index}."
206 )));
207 }
208
209 let ptr = *buffers.add(index);
210 if ptr.is_null() {
211 return Err(Error::oos(format!(
212 "An array of type {data_type:?}
213 must have a non-null buffer {index}"
214 )));
215 }
216
217 Ok(ptr as *mut T)
219}
220
221unsafe fn create_buffer<T: NativeType>(
227 array: &ArrowArray,
228 data_type: &DataType,
229 owner: InternalArrowArray,
230 index: usize,
231) -> Result<Buffer<T>> {
232 let len = buffer_len(array, data_type, index)?;
233
234 if len == 0 {
235 return Ok(Buffer::new());
236 }
237
238 let offset = buffer_offset(array, data_type, index);
239 let ptr: *mut T = get_buffer_ptr(array, data_type, index)?;
240
241 if ptr.align_offset(std::mem::align_of::<T>()) == 0 {
244 let bytes = Bytes::from_foreign(ptr, len, BytesAllocator::InternalArrowArray(owner));
245 Ok(Buffer::from_bytes(bytes).sliced(offset, len - offset))
246 }
247 else {
250 let buf = std::slice::from_raw_parts(ptr, len - offset).to_vec();
251 Ok(Buffer::from(buf))
252 }
253}
254
255unsafe fn create_bitmap(
261 array: &ArrowArray,
262 data_type: &DataType,
263 owner: InternalArrowArray,
264 index: usize,
265 is_validity: bool,
268) -> Result<Bitmap> {
269 let len: usize = array.length.try_into().expect("length to fit in `usize`");
270 if len == 0 {
271 return Ok(Bitmap::new());
272 }
273 let ptr = get_buffer_ptr(array, data_type, index)?;
274
275 let offset: usize = array.offset.try_into().expect("offset to fit in `usize`");
278 let bytes_len = bytes_for(offset + len);
279 let bytes = Bytes::from_foreign(ptr, bytes_len, BytesAllocator::InternalArrowArray(owner));
280
281 let null_count: usize = if is_validity {
282 array.null_count()
283 } else {
284 count_zeros(bytes.as_ref(), offset, len)
285 };
286 Bitmap::from_inner(Arc::new(bytes), offset, len, null_count)
287}
288
289fn buffer_offset(array: &ArrowArray, data_type: &DataType, i: usize) -> usize {
290 use PhysicalType::*;
291 match (data_type.to_physical_type(), i) {
292 (LargeUtf8, 2) | (LargeBinary, 2) | (Utf8, 2) | (Binary, 2) => 0,
293 (FixedSizeBinary, 1) => {
294 if let DataType::FixedSizeBinary(size) = data_type.to_logical_type() {
295 let offset: usize = array.offset.try_into().expect("Offset to fit in `usize`");
296 offset * *size
297 } else {
298 unreachable!()
299 }
300 }
301 _ => array.offset.try_into().expect("Offset to fit in `usize`"),
302 }
303}
304
305unsafe fn buffer_len(array: &ArrowArray, data_type: &DataType, i: usize) -> Result<usize> {
307 Ok(match (data_type.to_physical_type(), i) {
308 (PhysicalType::FixedSizeBinary, 1) => {
309 if let DataType::FixedSizeBinary(size) = data_type.to_logical_type() {
310 *size * (array.offset as usize + array.length as usize)
311 } else {
312 unreachable!()
313 }
314 }
315 (PhysicalType::FixedSizeList, 1) => {
316 if let DataType::FixedSizeList(_, size) = data_type.to_logical_type() {
317 *size * (array.offset as usize + array.length as usize)
318 } else {
319 unreachable!()
320 }
321 }
322 (PhysicalType::Utf8, 1)
323 | (PhysicalType::LargeUtf8, 1)
324 | (PhysicalType::Binary, 1)
325 | (PhysicalType::LargeBinary, 1)
326 | (PhysicalType::List, 1)
327 | (PhysicalType::LargeList, 1)
328 | (PhysicalType::Map, 1) => {
329 array.offset as usize + array.length as usize + 1
331 }
332 (PhysicalType::Utf8, 2) | (PhysicalType::Binary, 2) => {
333 let len = buffer_len(array, data_type, 1)?;
335 let offset_buffer = unsafe { *(array.buffers as *mut *const u8).add(1) };
337 let offset_buffer = offset_buffer as *const i32;
339 (unsafe { *offset_buffer.add(len - 1) }) as usize
342 }
343 (PhysicalType::LargeUtf8, 2) | (PhysicalType::LargeBinary, 2) => {
344 let len = buffer_len(array, data_type, 1)?;
346 let offset_buffer = unsafe { *(array.buffers as *mut *const u8).add(1) };
348 let offset_buffer = offset_buffer as *const i64;
350 (unsafe { *offset_buffer.add(len - 1) }) as usize
352 }
353 _ => array.offset as usize + array.length as usize,
355 })
356}
357
358unsafe fn create_child(
365 array: &ArrowArray,
366 data_type: &DataType,
367 parent: InternalArrowArray,
368 index: usize,
369) -> Result<ArrowArrayChild<'static>> {
370 let data_type = get_child(data_type, index)?;
371
372 if array.children.is_null() {
374 return Err(Error::oos(format!(
375 "An ArrowArray of type {data_type:?} must have non-null children"
376 )));
377 }
378
379 if index >= array.n_children as usize {
380 return Err(Error::oos(format!(
381 "An ArrowArray of type {data_type:?}
382 must have child {index}."
383 )));
384 }
385
386 let arr_ptr = unsafe { *array.children.add(index) };
388
389 if arr_ptr.is_null() {
391 return Err(Error::oos(format!(
392 "An array of type {data_type:?}
393 must have a non-null child {index}"
394 )));
395 }
396
397 let arr_ptr = unsafe { &*arr_ptr };
399 Ok(ArrowArrayChild::new(arr_ptr, data_type, parent))
400}
401
402unsafe fn create_dictionary(
407 array: &ArrowArray,
408 data_type: &DataType,
409 parent: InternalArrowArray,
410) -> Result<Option<ArrowArrayChild<'static>>> {
411 if let DataType::Dictionary(_, values, _) = data_type {
412 let data_type = values.as_ref().clone();
413 if array.dictionary.is_null() {
415 return Err(Error::oos(format!(
416 "An array of type {data_type:?}
417 must have a non-null dictionary"
418 )));
419 }
420
421 let array = unsafe { &*array.dictionary };
423 Ok(Some(ArrowArrayChild::new(array, data_type, parent)))
424 } else {
425 Ok(None)
426 }
427}
428
429pub trait ArrowArrayRef: std::fmt::Debug {
430 fn owner(&self) -> InternalArrowArray {
431 (*self.parent()).clone()
432 }
433
434 unsafe fn validity(&self) -> Result<Option<Bitmap>> {
441 if self.array().null_count() == 0 {
442 Ok(None)
443 } else {
444 create_bitmap(self.array(), self.data_type(), self.owner(), 0, true).map(Some)
445 }
446 }
447
448 unsafe fn buffer<T: NativeType>(&self, index: usize) -> Result<Buffer<T>> {
452 create_buffer::<T>(self.array(), self.data_type(), self.owner(), index)
453 }
454
455 unsafe fn bitmap(&self, index: usize) -> Result<Bitmap> {
460 create_bitmap(self.array(), self.data_type(), self.owner(), index, false)
461 }
462
463 unsafe fn child(&self, index: usize) -> Result<ArrowArrayChild> {
469 create_child(self.array(), self.data_type(), self.parent().clone(), index)
470 }
471
472 unsafe fn dictionary(&self) -> Result<Option<ArrowArrayChild>> {
473 create_dictionary(self.array(), self.data_type(), self.parent().clone())
474 }
475
476 fn n_buffers(&self) -> usize;
477
478 fn parent(&self) -> &InternalArrowArray;
479 fn array(&self) -> &ArrowArray;
480 fn data_type(&self) -> &DataType;
481}
482
483#[derive(Debug, Clone)]
503pub struct InternalArrowArray {
504 array: Arc<ArrowArray>,
506 data_type: Arc<DataType>,
508}
509
510impl InternalArrowArray {
511 pub fn new(array: ArrowArray, data_type: DataType) -> Self {
512 Self {
513 array: Arc::new(array),
514 data_type: Arc::new(data_type),
515 }
516 }
517}
518
519impl ArrowArrayRef for InternalArrowArray {
520 fn data_type(&self) -> &DataType {
522 &self.data_type
523 }
524
525 fn parent(&self) -> &InternalArrowArray {
526 self
527 }
528
529 fn array(&self) -> &ArrowArray {
530 self.array.as_ref()
531 }
532
533 fn n_buffers(&self) -> usize {
534 self.array.n_buffers as usize
535 }
536}
537
538#[derive(Debug)]
539pub struct ArrowArrayChild<'a> {
540 array: &'a ArrowArray,
541 data_type: DataType,
542 parent: InternalArrowArray,
543}
544
545impl<'a> ArrowArrayRef for ArrowArrayChild<'a> {
546 fn data_type(&self) -> &DataType {
548 &self.data_type
549 }
550
551 fn parent(&self) -> &InternalArrowArray {
552 &self.parent
553 }
554
555 fn array(&self) -> &ArrowArray {
556 self.array
557 }
558
559 fn n_buffers(&self) -> usize {
560 self.array.n_buffers as usize
561 }
562}
563
564impl<'a> ArrowArrayChild<'a> {
565 fn new(array: &'a ArrowArray, data_type: DataType, parent: InternalArrowArray) -> Self {
566 Self {
567 array,
568 data_type,
569 parent,
570 }
571 }
572}