1use std::sync::Arc;
3
4use polars_error::{PolarsResult, polars_bail};
5
6use super::ArrowArray;
7use crate::array::*;
8use crate::bitmap::Bitmap;
9use crate::bitmap::utils::bytes_for;
10use crate::buffer::Buffer;
11use crate::datatypes::{ArrowDataType, PhysicalType};
12use crate::ffi::schema::get_child;
13use crate::storage::SharedStorage;
14use crate::types::NativeType;
15use crate::{ffi, match_integer_type, with_match_primitive_type_full};
16
17pub unsafe fn try_from<A: ArrowArrayRef>(array: A) -> PolarsResult<Box<dyn Array>> {
22 use PhysicalType::*;
23 Ok(match array.dtype().to_physical_type() {
24 Null => Box::new(NullArray::try_from_ffi(array)?),
25 Boolean => Box::new(BooleanArray::try_from_ffi(array)?),
26 Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
27 Box::new(PrimitiveArray::<$T>::try_from_ffi(array)?)
28 }),
29 Utf8 => Box::new(Utf8Array::<i32>::try_from_ffi(array)?),
30 LargeUtf8 => Box::new(Utf8Array::<i64>::try_from_ffi(array)?),
31 Binary => Box::new(BinaryArray::<i32>::try_from_ffi(array)?),
32 LargeBinary => Box::new(BinaryArray::<i64>::try_from_ffi(array)?),
33 FixedSizeBinary => Box::new(FixedSizeBinaryArray::try_from_ffi(array)?),
34 List => Box::new(ListArray::<i32>::try_from_ffi(array)?),
35 LargeList => Box::new(ListArray::<i64>::try_from_ffi(array)?),
36 FixedSizeList => Box::new(FixedSizeListArray::try_from_ffi(array)?),
37 Struct => Box::new(StructArray::try_from_ffi(array)?),
38 Dictionary(key_type) => {
39 match_integer_type!(key_type, |$T| {
40 Box::new(DictionaryArray::<$T>::try_from_ffi(array)?)
41 })
42 },
43 Union => Box::new(UnionArray::try_from_ffi(array)?),
44 Map => Box::new(MapArray::try_from_ffi(array)?),
45 BinaryView => Box::new(BinaryViewArray::try_from_ffi(array)?),
46 Utf8View => Box::new(Utf8ViewArray::try_from_ffi(array)?),
47 })
48}
49
50unsafe impl Send for ArrowArray {}
55unsafe impl Sync for ArrowArray {}
56
57impl Drop for ArrowArray {
58 fn drop(&mut self) {
59 match self.release {
60 None => (),
61 Some(release) => unsafe { release(self) },
62 };
63 }
64}
65
66unsafe extern "C" fn c_release_array(array: *mut ArrowArray) {
68 if array.is_null() {
69 return;
70 }
71 let array = &mut *array;
72
73 let private = Box::from_raw(array.private_data as *mut PrivateData);
75 for child in private.children_ptr.iter() {
76 let _ = Box::from_raw(*child);
77 }
78
79 if let Some(ptr) = private.dictionary_ptr {
80 let _ = Box::from_raw(ptr);
81 }
82
83 array.release = None;
84}
85
86#[allow(dead_code)]
87struct PrivateData {
88 array: Box<dyn Array>,
89 buffers_ptr: Box<[*const std::os::raw::c_void]>,
90 children_ptr: Box<[*mut ArrowArray]>,
91 dictionary_ptr: Option<*mut ArrowArray>,
92 variadic_buffer_sizes: Box<[i64]>,
93}
94
95impl ArrowArray {
96 pub(crate) fn new(array: Box<dyn Array>) -> Self {
102 #[allow(unused_mut)]
103 let (offset, mut buffers, children, dictionary) =
104 offset_buffers_children_dictionary(array.as_ref());
105
106 let variadic_buffer_sizes = match array.dtype() {
107 ArrowDataType::BinaryView => {
108 let arr = array.as_any().downcast_ref::<BinaryViewArray>().unwrap();
109 let boxed = arr.variadic_buffer_lengths().into_boxed_slice();
110 let ptr = boxed.as_ptr().cast::<u8>();
111 buffers.push(Some(ptr));
112 boxed
113 },
114 ArrowDataType::Utf8View => {
115 let arr = array.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
116 let boxed = arr.variadic_buffer_lengths().into_boxed_slice();
117 let ptr = boxed.as_ptr().cast::<u8>();
118 buffers.push(Some(ptr));
119 boxed
120 },
121 _ => Box::new([]),
122 };
123
124 let buffers_ptr = buffers
125 .iter()
126 .map(|maybe_buffer| match maybe_buffer {
127 Some(b) => *b as *const std::os::raw::c_void,
128 None => std::ptr::null(),
129 })
130 .collect::<Box<[_]>>();
131 let n_buffers = buffers.len() as i64;
132
133 let children_ptr = children
134 .into_iter()
135 .map(|child| {
136 Box::into_raw(Box::new(ArrowArray::new(ffi::align_to_c_data_interface(
137 child,
138 ))))
139 })
140 .collect::<Box<_>>();
141 let n_children = children_ptr.len() as i64;
142
143 let dictionary_ptr = dictionary.map(|array| {
144 Box::into_raw(Box::new(ArrowArray::new(ffi::align_to_c_data_interface(
145 array,
146 ))))
147 });
148
149 let length = array.len() as i64;
150 let null_count = array.null_count() as i64;
151
152 let mut private_data = Box::new(PrivateData {
153 array,
154 buffers_ptr,
155 children_ptr,
156 dictionary_ptr,
157 variadic_buffer_sizes,
158 });
159
160 Self {
161 length,
162 null_count,
163 offset: offset as i64,
164 n_buffers,
165 n_children,
166 buffers: private_data.buffers_ptr.as_mut_ptr(),
167 children: private_data.children_ptr.as_mut_ptr(),
168 dictionary: private_data.dictionary_ptr.unwrap_or(std::ptr::null_mut()),
169 release: Some(c_release_array),
170 private_data: Box::into_raw(private_data) as *mut ::std::os::raw::c_void,
171 }
172 }
173
174 pub fn empty() -> Self {
176 Self {
177 length: 0,
178 null_count: 0,
179 offset: 0,
180 n_buffers: 0,
181 n_children: 0,
182 buffers: std::ptr::null_mut(),
183 children: std::ptr::null_mut(),
184 dictionary: std::ptr::null_mut(),
185 release: None,
186 private_data: std::ptr::null_mut(),
187 }
188 }
189
190 pub(crate) fn len(&self) -> usize {
192 self.length as usize
193 }
194
195 pub(crate) fn offset(&self) -> usize {
197 self.offset as usize
198 }
199
200 pub(crate) fn null_count(&self) -> usize {
202 self.null_count as usize
203 }
204}
205
206unsafe fn get_buffer_ptr<T: NativeType>(
209 array: &ArrowArray,
210 dtype: &ArrowDataType,
211 index: usize,
212) -> PolarsResult<*mut T> {
213 if array.buffers.is_null() {
214 polars_bail!( ComputeError:
215 "an ArrowArray of type {dtype:?} must have non-null buffers"
216 );
217 }
218
219 if array.buffers.align_offset(align_of::<*mut *const u8>()) != 0 {
220 polars_bail!( ComputeError:
221 "an ArrowArray of type {dtype:?}
222 must have buffer {index} aligned to type {}",
223 std::any::type_name::<*mut *const u8>()
224 );
225 }
226 let buffers = array.buffers as *mut *const u8;
227
228 if index >= array.n_buffers as usize {
229 polars_bail!(ComputeError:
230 "An ArrowArray of type {dtype:?}
231 must have buffer {index}."
232 )
233 }
234
235 let ptr = *buffers.add(index);
236 if ptr.is_null() {
237 polars_bail!(ComputeError:
238 "An array of type {dtype:?}
239 must have a non-null buffer {index}"
240 )
241 }
242
243 Ok(ptr as *mut T)
245}
246
247unsafe fn create_buffer_known_len<T: NativeType>(
248 array: &ArrowArray,
249 dtype: &ArrowDataType,
250 owner: InternalArrowArray,
251 len: usize,
252 index: usize,
253) -> PolarsResult<Buffer<T>> {
254 if len == 0 {
255 return Ok(Buffer::new());
256 }
257 let ptr: *mut T = get_buffer_ptr(array, dtype, index)?;
258 let storage = SharedStorage::from_internal_arrow_array(ptr, len, owner);
259 Ok(Buffer::from_storage(storage))
260}
261
262unsafe fn create_buffer<T: NativeType>(
268 array: &ArrowArray,
269 dtype: &ArrowDataType,
270 owner: InternalArrowArray,
271 index: usize,
272) -> PolarsResult<Buffer<T>> {
273 let len = buffer_len(array, dtype, index)?;
274
275 if len == 0 {
276 return Ok(Buffer::new());
277 }
278
279 let offset = buffer_offset(array, dtype, index);
280 let ptr: *mut T = get_buffer_ptr(array, dtype, index)?;
281
282 if ptr.align_offset(align_of::<T>()) == 0 {
285 let storage = SharedStorage::from_internal_arrow_array(ptr, len, owner);
286 Ok(Buffer::from_storage(storage).sliced(offset, len - offset))
287 }
288 else {
291 let buf = std::slice::from_raw_parts(ptr, len - offset).to_vec();
292 Ok(Buffer::from(buf))
293 }
294}
295
296unsafe fn create_bitmap(
302 array: &ArrowArray,
303 dtype: &ArrowDataType,
304 owner: InternalArrowArray,
305 index: usize,
306 is_validity: bool,
309) -> PolarsResult<Bitmap> {
310 let len: usize = array.length.try_into().expect("length to fit in `usize`");
311 if len == 0 {
312 return Ok(Bitmap::new());
313 }
314 let ptr = get_buffer_ptr(array, dtype, index)?;
315
316 let offset: usize = array.offset.try_into().expect("offset to fit in `usize`");
319 let bytes_len = bytes_for(offset + len);
320 let storage = SharedStorage::from_internal_arrow_array(ptr, bytes_len, owner);
321
322 let null_count = if is_validity {
323 Some(array.null_count())
324 } else {
325 None
326 };
327 Ok(Bitmap::from_inner_unchecked(
328 storage, offset, len, null_count,
329 ))
330}
331
332fn buffer_offset(array: &ArrowArray, dtype: &ArrowDataType, i: usize) -> usize {
333 use PhysicalType::*;
334 match (dtype.to_physical_type(), i) {
335 (LargeUtf8, 2) | (LargeBinary, 2) | (Utf8, 2) | (Binary, 2) => 0,
336 (FixedSizeBinary, 1) => {
337 if let ArrowDataType::FixedSizeBinary(size) = dtype.to_logical_type() {
338 let offset: usize = array.offset.try_into().expect("Offset to fit in `usize`");
339 offset * *size
340 } else {
341 unreachable!()
342 }
343 },
344 _ => array.offset.try_into().expect("Offset to fit in `usize`"),
345 }
346}
347
348unsafe fn buffer_len(array: &ArrowArray, dtype: &ArrowDataType, i: usize) -> PolarsResult<usize> {
350 Ok(match (dtype.to_physical_type(), i) {
351 (PhysicalType::FixedSizeBinary, 1) => {
352 if let ArrowDataType::FixedSizeBinary(size) = dtype.to_logical_type() {
353 *size * (array.offset as usize + array.length as usize)
354 } else {
355 unreachable!()
356 }
357 },
358 (PhysicalType::FixedSizeList, 1) => {
359 if let ArrowDataType::FixedSizeList(_, size) = dtype.to_logical_type() {
360 *size * (array.offset as usize + array.length as usize)
361 } else {
362 unreachable!()
363 }
364 },
365 (PhysicalType::Utf8, 1)
366 | (PhysicalType::LargeUtf8, 1)
367 | (PhysicalType::Binary, 1)
368 | (PhysicalType::LargeBinary, 1)
369 | (PhysicalType::List, 1)
370 | (PhysicalType::LargeList, 1)
371 | (PhysicalType::Map, 1) => {
372 array.offset as usize + array.length as usize + 1
374 },
375 (PhysicalType::BinaryView, 1) | (PhysicalType::Utf8View, 1) => {
376 array.offset as usize + array.length as usize
377 },
378 (PhysicalType::Utf8, 2) | (PhysicalType::Binary, 2) => {
379 let len = buffer_len(array, dtype, 1)?;
381 let offset_buffer = unsafe { *(array.buffers as *mut *const u8).add(1) };
383 let offset_buffer = offset_buffer as *const i32;
385 (unsafe { *offset_buffer.add(len - 1) }) as usize
388 },
389 (PhysicalType::LargeUtf8, 2) | (PhysicalType::LargeBinary, 2) => {
390 let len = buffer_len(array, dtype, 1)?;
392 let offset_buffer = unsafe { *(array.buffers as *mut *const u8).add(1) };
394 let offset_buffer = offset_buffer as *const i64;
396 (unsafe { *offset_buffer.add(len - 1) }) as usize
398 },
399 _ => array.offset as usize + array.length as usize,
401 })
402}
403
404unsafe fn create_child(
412 array: &ArrowArray,
413 dtype: &ArrowDataType,
414 parent: InternalArrowArray,
415 index: usize,
416) -> PolarsResult<ArrowArrayChild<'static>> {
417 let dtype = get_child(dtype, index)?;
418
419 if array.children.is_null() {
421 polars_bail!(ComputeError: "an ArrowArray of type {dtype:?} must have non-null children");
422 }
423
424 if index >= array.n_children as usize {
425 polars_bail!(ComputeError:
426 "an ArrowArray of type {dtype:?}
427 must have child {index}."
428 );
429 }
430
431 let arr_ptr = unsafe { *array.children.add(index) };
433
434 if arr_ptr.is_null() {
436 polars_bail!(ComputeError:
437 "an array of type {dtype:?}
438 must have a non-null child {index}"
439 )
440 }
441
442 let arr_ptr = unsafe { &*arr_ptr };
444 Ok(ArrowArrayChild::new(arr_ptr, dtype, parent))
445}
446
447unsafe fn create_dictionary(
453 array: &ArrowArray,
454 dtype: &ArrowDataType,
455 parent: InternalArrowArray,
456) -> PolarsResult<Option<ArrowArrayChild<'static>>> {
457 if let ArrowDataType::Dictionary(_, values, _) = dtype {
458 let dtype = values.as_ref().clone();
459 if array.dictionary.is_null() {
461 polars_bail!(ComputeError:
462 "an array of type {dtype:?}
463 must have a non-null dictionary"
464 )
465 }
466
467 let array = unsafe { &*array.dictionary };
469 Ok(Some(ArrowArrayChild::new(array, dtype, parent)))
470 } else {
471 Ok(None)
472 }
473}
474
475pub trait ArrowArrayRef: std::fmt::Debug {
476 fn owner(&self) -> InternalArrowArray {
477 (*self.parent()).clone()
478 }
479
480 unsafe fn validity(&self) -> PolarsResult<Option<Bitmap>> {
488 if self.array().null_count() == 0 {
489 Ok(None)
490 } else {
491 create_bitmap(self.array(), self.dtype(), self.owner(), 0, true).map(Some)
492 }
493 }
494
495 unsafe fn buffer<T: NativeType>(&self, index: usize) -> PolarsResult<Buffer<T>> {
499 create_buffer::<T>(self.array(), self.dtype(), self.owner(), index)
500 }
501
502 unsafe fn buffer_known_len<T: NativeType>(
506 &self,
507 index: usize,
508 len: usize,
509 ) -> PolarsResult<Buffer<T>> {
510 create_buffer_known_len::<T>(self.array(), self.dtype(), self.owner(), len, index)
511 }
512
513 unsafe fn bitmap(&self, index: usize) -> PolarsResult<Bitmap> {
518 create_bitmap(self.array(), self.dtype(), self.owner(), index, false)
519 }
520
521 unsafe fn child(&self, index: usize) -> PolarsResult<ArrowArrayChild> {
527 create_child(self.array(), self.dtype(), self.parent().clone(), index)
528 }
529
530 unsafe fn dictionary(&self) -> PolarsResult<Option<ArrowArrayChild>> {
531 create_dictionary(self.array(), self.dtype(), self.parent().clone())
532 }
533
534 fn n_buffers(&self) -> usize;
535
536 fn offset(&self) -> usize;
537 fn length(&self) -> usize;
538
539 fn parent(&self) -> &InternalArrowArray;
540 fn array(&self) -> &ArrowArray;
541 fn dtype(&self) -> &ArrowDataType;
542}
543
544#[derive(Debug, Clone)]
564pub struct InternalArrowArray {
565 array: Arc<ArrowArray>,
567 dtype: Arc<ArrowDataType>,
569}
570
571impl InternalArrowArray {
572 pub fn new(array: ArrowArray, dtype: ArrowDataType) -> Self {
573 Self {
574 array: Arc::new(array),
575 dtype: Arc::new(dtype),
576 }
577 }
578}
579
580impl ArrowArrayRef for InternalArrowArray {
581 fn dtype(&self) -> &ArrowDataType {
583 &self.dtype
584 }
585
586 fn parent(&self) -> &InternalArrowArray {
587 self
588 }
589
590 fn array(&self) -> &ArrowArray {
591 self.array.as_ref()
592 }
593
594 fn n_buffers(&self) -> usize {
595 self.array.n_buffers as usize
596 }
597
598 fn offset(&self) -> usize {
599 self.array.offset as usize
600 }
601
602 fn length(&self) -> usize {
603 self.array.length as usize
604 }
605}
606
607#[derive(Debug)]
608pub struct ArrowArrayChild<'a> {
609 array: &'a ArrowArray,
610 dtype: ArrowDataType,
611 parent: InternalArrowArray,
612}
613
614impl ArrowArrayRef for ArrowArrayChild<'_> {
615 fn dtype(&self) -> &ArrowDataType {
617 &self.dtype
618 }
619
620 fn parent(&self) -> &InternalArrowArray {
621 &self.parent
622 }
623
624 fn array(&self) -> &ArrowArray {
625 self.array
626 }
627
628 fn n_buffers(&self) -> usize {
629 self.array.n_buffers as usize
630 }
631
632 fn offset(&self) -> usize {
633 self.array.offset as usize
634 }
635
636 fn length(&self) -> usize {
637 self.array.length as usize
638 }
639}
640
641impl<'a> ArrowArrayChild<'a> {
642 fn new(array: &'a ArrowArray, dtype: ArrowDataType, parent: InternalArrowArray) -> Self {
643 Self {
644 array,
645 dtype,
646 parent,
647 }
648 }
649}