1use std::sync::Arc;
3
4use polars_buffer::{Buffer, SharedStorage};
5use polars_error::{PolarsResult, polars_bail};
6
7use super::ArrowArray;
8use crate::array::*;
9use crate::bitmap::Bitmap;
10use crate::bitmap::utils::bytes_for;
11use crate::datatypes::{ArrowDataType, PhysicalType};
12use crate::ffi::schema::get_child;
13use crate::types::{NativeType, PrimitiveType, months_days_ns};
14use crate::{ffi, match_integer_type, with_match_primitive_type_full};
15
16pub unsafe fn try_from<A: ArrowArrayRef>(array: A) -> PolarsResult<Box<dyn Array>> {
21 use PhysicalType::*;
22 Ok(match array.dtype().to_physical_type() {
23 Null => Box::new(NullArray::try_from_ffi(array)?),
24 Boolean => Box::new(BooleanArray::try_from_ffi(array)?),
25 Primitive(PrimitiveType::MonthDayNano) => {
26 Box::new(PrimitiveArray::<months_days_ns>::try_from_ffi(array)?)
27 },
28 Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
29 Box::new(PrimitiveArray::<$T>::try_from_ffi(array)?)
30 }),
31 Utf8 => Box::new(Utf8Array::<i32>::try_from_ffi(array)?),
32 LargeUtf8 => Box::new(Utf8Array::<i64>::try_from_ffi(array)?),
33 Binary => Box::new(BinaryArray::<i32>::try_from_ffi(array)?),
34 LargeBinary => Box::new(BinaryArray::<i64>::try_from_ffi(array)?),
35 FixedSizeBinary => Box::new(FixedSizeBinaryArray::try_from_ffi(array)?),
36 List => Box::new(ListArray::<i32>::try_from_ffi(array)?),
37 LargeList => Box::new(ListArray::<i64>::try_from_ffi(array)?),
38 FixedSizeList => Box::new(FixedSizeListArray::try_from_ffi(array)?),
39 Struct => Box::new(StructArray::try_from_ffi(array)?),
40 Dictionary(key_type) => {
41 match_integer_type!(key_type, |$T| {
42 Box::new(DictionaryArray::<$T>::try_from_ffi(array)?)
43 })
44 },
45 Union => Box::new(UnionArray::try_from_ffi(array)?),
46 Map => Box::new(MapArray::try_from_ffi(array)?),
47 BinaryView => Box::new(BinaryViewArray::try_from_ffi(array)?),
48 Utf8View => Box::new(Utf8ViewArray::try_from_ffi(array)?),
49 })
50}
51
52unsafe impl Send for ArrowArray {}
57unsafe impl Sync for ArrowArray {}
58
59impl Drop for ArrowArray {
60 fn drop(&mut self) {
61 match self.release {
62 None => (),
63 Some(release) => unsafe { release(self) },
64 };
65 }
66}
67
68unsafe extern "C" fn c_release_array(array: *mut ArrowArray) {
70 if array.is_null() {
71 return;
72 }
73 let array = &mut *array;
74
75 let private = Box::from_raw(array.private_data as *mut PrivateData);
77 for child in private.children_ptr.iter() {
78 let _ = Box::from_raw(*child);
79 }
80
81 if let Some(ptr) = private.dictionary_ptr {
82 let _ = Box::from_raw(ptr);
83 }
84
85 array.release = None;
86}
87
88#[allow(dead_code)]
89struct PrivateData {
90 array: Box<dyn Array>,
91 buffers_ptr: Box<[*const std::os::raw::c_void]>,
92 children_ptr: Box<[*mut ArrowArray]>,
93 dictionary_ptr: Option<*mut ArrowArray>,
94 variadic_buffer_sizes: Box<[i64]>,
95}
96
97impl ArrowArray {
98 pub(crate) fn new(array: Box<dyn Array>) -> Self {
104 #[allow(unused_mut)]
105 let (offset, mut buffers, children, dictionary) =
106 offset_buffers_children_dictionary(array.as_ref());
107
108 let variadic_buffer_sizes = match array.dtype().to_storage() {
109 ArrowDataType::BinaryView => {
110 let arr = array.as_any().downcast_ref::<BinaryViewArray>().unwrap();
111 let boxed = arr.variadic_buffer_lengths().into_boxed_slice();
112 let ptr = boxed.as_ptr().cast::<u8>();
113 buffers.push(Some(ptr));
114 boxed
115 },
116 ArrowDataType::Utf8View => {
117 let arr = array.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
118 let boxed = arr.variadic_buffer_lengths().into_boxed_slice();
119 let ptr = boxed.as_ptr().cast::<u8>();
120 buffers.push(Some(ptr));
121 boxed
122 },
123 _ => Box::new([]),
124 };
125
126 let buffers_ptr = buffers
127 .iter()
128 .map(|maybe_buffer| match maybe_buffer {
129 Some(b) => *b as *const std::os::raw::c_void,
130 None => std::ptr::null(),
131 })
132 .collect::<Box<[_]>>();
133 let n_buffers = buffers.len() as i64;
134
135 let children_ptr = children
136 .into_iter()
137 .map(|child| {
138 Box::into_raw(Box::new(ArrowArray::new(ffi::align_to_c_data_interface(
139 child,
140 ))))
141 })
142 .collect::<Box<_>>();
143 let n_children = children_ptr.len() as i64;
144
145 let dictionary_ptr = dictionary.map(|array| {
146 Box::into_raw(Box::new(ArrowArray::new(ffi::align_to_c_data_interface(
147 array,
148 ))))
149 });
150
151 let length = array.len() as i64;
152 let null_count = array.null_count() as i64;
153
154 let mut private_data = Box::new(PrivateData {
155 array,
156 buffers_ptr,
157 children_ptr,
158 dictionary_ptr,
159 variadic_buffer_sizes,
160 });
161
162 Self {
163 length,
164 null_count,
165 offset: offset as i64,
166 n_buffers,
167 n_children,
168 buffers: private_data.buffers_ptr.as_mut_ptr(),
169 children: private_data.children_ptr.as_mut_ptr(),
170 dictionary: private_data.dictionary_ptr.unwrap_or(std::ptr::null_mut()),
171 release: Some(c_release_array),
172 private_data: Box::into_raw(private_data) as *mut ::std::os::raw::c_void,
173 }
174 }
175
176 pub fn empty() -> Self {
178 Self {
179 length: 0,
180 null_count: 0,
181 offset: 0,
182 n_buffers: 0,
183 n_children: 0,
184 buffers: std::ptr::null_mut(),
185 children: std::ptr::null_mut(),
186 dictionary: std::ptr::null_mut(),
187 release: None,
188 private_data: std::ptr::null_mut(),
189 }
190 }
191
192 pub(crate) fn len(&self) -> usize {
194 self.length as usize
195 }
196
197 pub(crate) fn offset(&self) -> usize {
199 self.offset as usize
200 }
201
202 pub(crate) fn null_count(&self) -> usize {
204 self.null_count as usize
205 }
206}
207
208unsafe fn get_buffer_ptr<T: NativeType>(
211 array: &ArrowArray,
212 dtype: &ArrowDataType,
213 index: usize,
214) -> PolarsResult<*mut T> {
215 if array.buffers.is_null() {
216 polars_bail!( ComputeError:
217 "an ArrowArray of type {dtype:?} must have non-null buffers"
218 );
219 }
220
221 if array.buffers.align_offset(align_of::<*mut *const u8>()) != 0 {
222 polars_bail!( ComputeError:
223 "an ArrowArray of type {dtype:?}
224 must have buffer {index} aligned to type {}",
225 std::any::type_name::<*mut *const u8>()
226 );
227 }
228 let buffers = array.buffers as *mut *const u8;
229
230 if index >= array.n_buffers as usize {
231 polars_bail!(ComputeError:
232 "An ArrowArray of type {dtype:?}
233 must have buffer {index}."
234 )
235 }
236
237 let ptr = *buffers.add(index);
238 if ptr.is_null() {
239 polars_bail!(ComputeError:
240 "An array of type {dtype:?}
241 must have a non-null buffer {index}"
242 )
243 }
244
245 Ok(ptr as *mut T)
247}
248
249unsafe fn create_buffer_known_len<T: NativeType>(
250 array: &ArrowArray,
251 dtype: &ArrowDataType,
252 owner: InternalArrowArray,
253 len: usize,
254 index: usize,
255) -> PolarsResult<Buffer<T>> {
256 if len == 0 {
257 return Ok(Buffer::new());
260 }
261 let ptr: *mut T = get_buffer_ptr(array, dtype, index)?;
262 let slice = core::slice::from_raw_parts(ptr, len);
263 let storage = SharedStorage::from_slice_with_owner(slice, owner);
264 Ok(Buffer::from_storage(storage))
265}
266
267unsafe fn create_buffer<T: NativeType>(
273 array: &ArrowArray,
274 dtype: &ArrowDataType,
275 owner: InternalArrowArray,
276 index: usize,
277) -> PolarsResult<Buffer<T>> {
278 let buf_len = buffer_len(array, dtype, index)?;
279
280 if buf_len == 0 {
281 return Ok(Buffer::new());
284 }
285
286 let offset = buffer_offset(array, dtype, index);
287 let ptr: *mut T = get_buffer_ptr(array, dtype, index)?;
288 let len = buf_len - offset;
289
290 if ptr.is_aligned() {
292 let slice = core::slice::from_raw_parts(ptr.add(offset), len);
293 let storage = SharedStorage::from_slice_with_owner(slice, owner);
294 Ok(Buffer::from_storage(storage))
295 } else {
296 let mut v = Vec::with_capacity(len);
298 core::ptr::copy_nonoverlapping(
299 ptr.add(offset).cast::<u8>(),
300 v.spare_capacity_mut().as_mut_ptr().cast::<u8>(),
301 len * size_of::<T>(),
302 );
303 v.set_len(len);
304 Ok(Buffer::from(v))
305 }
306}
307
308unsafe fn create_bitmap(
314 array: &ArrowArray,
315 dtype: &ArrowDataType,
316 owner: InternalArrowArray,
317 index: usize,
318 is_validity: bool,
321) -> PolarsResult<Bitmap> {
322 let len: usize = array.length.try_into().expect("length to fit in `usize`");
323 if len == 0 {
324 return Ok(Bitmap::new());
327 }
328 let ptr = get_buffer_ptr(array, dtype, index)?;
329
330 let offset: usize = array.offset.try_into().expect("offset to fit in `usize`");
332 let bytes_len = bytes_for(offset + len);
333 let slice = core::slice::from_raw_parts(ptr, bytes_len);
334 let storage = SharedStorage::from_slice_with_owner(slice, owner);
335
336 let null_count = if is_validity {
337 Some(array.null_count())
338 } else {
339 None
340 };
341 Ok(Bitmap::from_inner_unchecked(
342 storage, offset, len, null_count,
343 ))
344}
345
346fn buffer_offset(array: &ArrowArray, dtype: &ArrowDataType, i: usize) -> usize {
347 use PhysicalType::*;
348 match (dtype.to_physical_type(), i) {
349 (LargeUtf8, 2) | (LargeBinary, 2) | (Utf8, 2) | (Binary, 2) => 0,
350 (FixedSizeBinary, 1) => {
351 if let ArrowDataType::FixedSizeBinary(size) = dtype.to_storage() {
352 let offset: usize = array.offset.try_into().expect("Offset to fit in `usize`");
353 offset * *size
354 } else {
355 unreachable!()
356 }
357 },
358 _ => array.offset.try_into().expect("Offset to fit in `usize`"),
359 }
360}
361
362unsafe fn buffer_len(array: &ArrowArray, dtype: &ArrowDataType, i: usize) -> PolarsResult<usize> {
364 Ok(match (dtype.to_physical_type(), i) {
365 (PhysicalType::FixedSizeBinary, 1) => {
366 if let ArrowDataType::FixedSizeBinary(size) = dtype.to_storage() {
367 *size * (array.offset as usize + array.length as usize)
368 } else {
369 unreachable!()
370 }
371 },
372 (PhysicalType::FixedSizeList, 1) => {
373 if let ArrowDataType::FixedSizeList(_, size) = dtype.to_storage() {
374 *size * (array.offset as usize + array.length as usize)
375 } else {
376 unreachable!()
377 }
378 },
379 (PhysicalType::Utf8, 1)
380 | (PhysicalType::LargeUtf8, 1)
381 | (PhysicalType::Binary, 1)
382 | (PhysicalType::LargeBinary, 1)
383 | (PhysicalType::List, 1)
384 | (PhysicalType::LargeList, 1)
385 | (PhysicalType::Map, 1) => {
386 array.offset as usize + array.length as usize + 1
388 },
389 (PhysicalType::BinaryView, 1) | (PhysicalType::Utf8View, 1) => {
390 array.offset as usize + array.length as usize
391 },
392 (PhysicalType::Utf8, 2) | (PhysicalType::Binary, 2) => {
393 let len = buffer_len(array, dtype, 1)?;
395 let offset_buffer = unsafe { *(array.buffers as *mut *const u8).add(1) };
397 let offset_buffer = offset_buffer as *const i32;
399 (unsafe { *offset_buffer.add(len - 1) }) as usize
402 },
403 (PhysicalType::LargeUtf8, 2) | (PhysicalType::LargeBinary, 2) => {
404 let len = buffer_len(array, dtype, 1)?;
406 let offset_buffer = unsafe { *(array.buffers as *mut *const u8).add(1) };
408 let offset_buffer = offset_buffer as *const i64;
410 (unsafe { *offset_buffer.add(len - 1) }) as usize
412 },
413 _ => array.offset as usize + array.length as usize,
415 })
416}
417
418unsafe fn create_child(
426 array: &ArrowArray,
427 dtype: &ArrowDataType,
428 parent: InternalArrowArray,
429 index: usize,
430) -> PolarsResult<ArrowArrayChild<'static>> {
431 let dtype = get_child(dtype, index)?;
432
433 if array.children.is_null() {
435 polars_bail!(ComputeError: "an ArrowArray of type {dtype:?} must have non-null children");
436 }
437
438 if index >= array.n_children as usize {
439 polars_bail!(ComputeError:
440 "an ArrowArray of type {dtype:?}
441 must have child {index}."
442 );
443 }
444
445 let arr_ptr = unsafe { *array.children.add(index) };
447
448 if arr_ptr.is_null() {
450 polars_bail!(ComputeError:
451 "an array of type {dtype:?}
452 must have a non-null child {index}"
453 )
454 }
455
456 let arr_ptr = unsafe { &*arr_ptr };
458 Ok(ArrowArrayChild::new(arr_ptr, dtype, parent))
459}
460
461unsafe fn create_dictionary(
467 array: &ArrowArray,
468 dtype: &ArrowDataType,
469 parent: InternalArrowArray,
470) -> PolarsResult<Option<ArrowArrayChild<'static>>> {
471 if let ArrowDataType::Dictionary(_, values, _) = dtype {
472 let dtype = values.as_ref().clone();
473 if array.dictionary.is_null() {
475 polars_bail!(ComputeError:
476 "an array of type {dtype:?}
477 must have a non-null dictionary"
478 )
479 }
480
481 let array = unsafe { &*array.dictionary };
483 Ok(Some(ArrowArrayChild::new(array, dtype, parent)))
484 } else {
485 Ok(None)
486 }
487}
488
489pub trait ArrowArrayRef: std::fmt::Debug {
490 fn owner(&self) -> InternalArrowArray {
491 (*self.parent()).clone()
492 }
493
494 unsafe fn validity(&self) -> PolarsResult<Option<Bitmap>> {
502 if self.array().null_count() == 0 {
503 Ok(None)
504 } else {
505 create_bitmap(self.array(), self.dtype(), self.owner(), 0, true).map(Some)
506 }
507 }
508
509 unsafe fn buffer<T: NativeType>(&self, index: usize) -> PolarsResult<Buffer<T>> {
513 create_buffer::<T>(self.array(), self.dtype(), self.owner(), index)
514 }
515
516 unsafe fn buffer_known_len<T: NativeType>(
520 &self,
521 index: usize,
522 len: usize,
523 ) -> PolarsResult<Buffer<T>> {
524 create_buffer_known_len::<T>(self.array(), self.dtype(), self.owner(), len, index)
525 }
526
527 unsafe fn bitmap(&self, index: usize) -> PolarsResult<Bitmap> {
532 create_bitmap(self.array(), self.dtype(), self.owner(), index, false)
533 }
534
535 unsafe fn child(&self, index: usize) -> PolarsResult<ArrowArrayChild<'_>> {
541 create_child(self.array(), self.dtype(), self.parent().clone(), index)
542 }
543
544 unsafe fn dictionary(&self) -> PolarsResult<Option<ArrowArrayChild<'_>>> {
545 create_dictionary(self.array(), self.dtype(), self.parent().clone())
546 }
547
548 fn n_buffers(&self) -> usize;
549
550 fn offset(&self) -> usize;
551 fn length(&self) -> usize;
552
553 fn parent(&self) -> &InternalArrowArray;
554 fn array(&self) -> &ArrowArray;
555 fn dtype(&self) -> &ArrowDataType;
556}
557
558#[derive(Debug, Clone)]
578pub struct InternalArrowArray {
579 array: Arc<ArrowArray>,
581 dtype: Arc<ArrowDataType>,
583}
584
585impl InternalArrowArray {
586 pub fn new(array: ArrowArray, dtype: ArrowDataType) -> Self {
587 Self {
588 array: Arc::new(array),
589 dtype: Arc::new(dtype),
590 }
591 }
592}
593
594impl ArrowArrayRef for InternalArrowArray {
595 fn dtype(&self) -> &ArrowDataType {
597 &self.dtype
598 }
599
600 fn parent(&self) -> &InternalArrowArray {
601 self
602 }
603
604 fn array(&self) -> &ArrowArray {
605 self.array.as_ref()
606 }
607
608 fn n_buffers(&self) -> usize {
609 self.array.n_buffers as usize
610 }
611
612 fn offset(&self) -> usize {
613 self.array.offset as usize
614 }
615
616 fn length(&self) -> usize {
617 self.array.length as usize
618 }
619}
620
621#[derive(Debug)]
622pub struct ArrowArrayChild<'a> {
623 array: &'a ArrowArray,
624 dtype: ArrowDataType,
625 parent: InternalArrowArray,
626}
627
628impl ArrowArrayRef for ArrowArrayChild<'_> {
629 fn dtype(&self) -> &ArrowDataType {
631 &self.dtype
632 }
633
634 fn parent(&self) -> &InternalArrowArray {
635 &self.parent
636 }
637
638 fn array(&self) -> &ArrowArray {
639 self.array
640 }
641
642 fn n_buffers(&self) -> usize {
643 self.array.n_buffers as usize
644 }
645
646 fn offset(&self) -> usize {
647 self.array.offset as usize
648 }
649
650 fn length(&self) -> usize {
651 self.array.length as usize
652 }
653}
654
655impl<'a> ArrowArrayChild<'a> {
656 fn new(array: &'a ArrowArray, dtype: ArrowDataType, parent: InternalArrowArray) -> Self {
657 Self {
658 array,
659 dtype,
660 parent,
661 }
662 }
663}