polars_arrow/ffi/
mmap.rs

1//! Functionality to mmap in-memory data regions.
2use std::sync::Arc;
3
4use polars_error::{PolarsResult, polars_bail};
5
6use super::{ArrowArray, InternalArrowArray};
7use crate::array::{BooleanArray, FromFfi, PrimitiveArray};
8use crate::bitmap::Bitmap;
9use crate::buffer::Buffer;
10use crate::datatypes::ArrowDataType;
11use crate::storage::SharedStorage;
12use crate::types::NativeType;
13
14#[allow(dead_code)]
15struct PrivateData<T> {
16    // the owner of the pointers' regions
17    data: T,
18    buffers_ptr: Box<[*const std::os::raw::c_void]>,
19    children_ptr: Box<[*mut ArrowArray]>,
20    dictionary_ptr: Option<*mut ArrowArray>,
21}
22
23pub(crate) unsafe fn create_array<
24    T,
25    I: Iterator<Item = Option<*const u8>>,
26    II: Iterator<Item = ArrowArray>,
27>(
28    data: Arc<T>,
29    num_rows: usize,
30    null_count: usize,
31    buffers: I,
32    children: II,
33    dictionary: Option<ArrowArray>,
34    offset: Option<usize>,
35) -> ArrowArray {
36    let buffers_ptr = buffers
37        .map(|maybe_buffer| match maybe_buffer {
38            Some(b) => b as *const std::os::raw::c_void,
39            None => std::ptr::null(),
40        })
41        .collect::<Box<[_]>>();
42    let n_buffers = buffers_ptr.len() as i64;
43
44    let children_ptr = children
45        .map(|child| Box::into_raw(Box::new(child)))
46        .collect::<Box<_>>();
47    let n_children = children_ptr.len() as i64;
48
49    let dictionary_ptr = dictionary.map(|array| Box::into_raw(Box::new(array)));
50
51    let mut private_data = Box::new(PrivateData::<Arc<T>> {
52        data,
53        buffers_ptr,
54        children_ptr,
55        dictionary_ptr,
56    });
57
58    ArrowArray {
59        length: num_rows as i64,
60        null_count: null_count as i64,
61        offset: offset.unwrap_or(0) as i64, // Unwrap: IPC files are by definition not offset
62        n_buffers,
63        n_children,
64        buffers: private_data.buffers_ptr.as_mut_ptr(),
65        children: private_data.children_ptr.as_mut_ptr(),
66        dictionary: private_data.dictionary_ptr.unwrap_or(std::ptr::null_mut()),
67        release: Some(release::<Arc<T>>),
68        private_data: Box::into_raw(private_data) as *mut ::std::os::raw::c_void,
69    }
70}
71
72/// callback used to drop [`ArrowArray`] when it is exported specified for [`PrivateData`].
73unsafe extern "C" fn release<T>(array: *mut ArrowArray) {
74    if array.is_null() {
75        return;
76    }
77    let array = &mut *array;
78
79    // take ownership of `private_data`, therefore dropping it
80    let private = Box::from_raw(array.private_data as *mut PrivateData<T>);
81    for child in private.children_ptr.iter() {
82        let _ = Box::from_raw(*child);
83    }
84
85    if let Some(ptr) = private.dictionary_ptr {
86        let _ = Box::from_raw(ptr);
87    }
88
89    array.release = None;
90}
91
92/// Creates a (non-null) [`PrimitiveArray`] from a slice of values.
93/// This does not have memcopy and is the fastest way to create a [`PrimitiveArray`].
94///
95/// This can be useful if you want to apply arrow kernels on slices without incurring
96/// a memcopy cost.
97///
98/// # Safety
99///
100/// Using this function is not unsafe, but the returned PrimitiveArray's lifetime is bound to the lifetime
101/// of the slice. The returned [`PrimitiveArray`] _must not_ outlive the passed slice.
102pub unsafe fn slice<T: NativeType>(values: &[T]) -> PrimitiveArray<T> {
103    let static_values = std::mem::transmute::<&[T], &'static [T]>(values);
104    let storage = SharedStorage::from_static(static_values);
105    let buffer = Buffer::from_storage(storage);
106    PrimitiveArray::new_unchecked(T::PRIMITIVE.into(), buffer, None)
107}
108
109/// Creates a (non-null) [`PrimitiveArray`] from a slice of values.
110/// This does not have memcopy and is the fastest way to create a [`PrimitiveArray`].
111///
112/// This can be useful if you want to apply arrow kernels on slices without incurring
113/// a memcopy cost.
114///
115/// # Safety
116///
117/// The caller must ensure the passed `owner` ensures the data remains alive.
118pub unsafe fn slice_and_owner<T: NativeType, O>(slice: &[T], owner: O) -> PrimitiveArray<T> {
119    let num_rows = slice.len();
120    let null_count = 0;
121    let validity = None;
122
123    let data: &[u8] = bytemuck::cast_slice(slice);
124    let ptr = data.as_ptr();
125    let data = Arc::new(owner);
126
127    // SAFETY: the underlying assumption of this function: the array will not be used
128    // beyond the
129    let array = create_array(
130        data,
131        num_rows,
132        null_count,
133        [validity, Some(ptr)].into_iter(),
134        [].into_iter(),
135        None,
136        None,
137    );
138    let array = InternalArrowArray::new(array, T::PRIMITIVE.into());
139
140    // SAFETY: we just created a valid array
141    unsafe { PrimitiveArray::<T>::try_from_ffi(array) }.unwrap()
142}
143
144/// Creates a (non-null) [`BooleanArray`] from a slice of bits.
145/// This does not have memcopy and is the fastest way to create a [`BooleanArray`].
146///
147/// This can be useful if you want to apply arrow kernels on slices without
148/// incurring a memcopy cost.
149///
150/// The `offset` indicates where the first bit starts in the first byte.
151///
152/// # Safety
153///
154/// Using this function is not unsafe, but the returned BooleanArrays's lifetime
155/// is bound to the lifetime of the slice. The returned [`BooleanArray`] _must
156/// not_ outlive the passed slice.
157pub unsafe fn bitmap(data: &[u8], offset: usize, length: usize) -> PolarsResult<BooleanArray> {
158    if offset >= 8 {
159        polars_bail!(InvalidOperation: "offset should be < 8")
160    };
161    if length > data.len() * 8 - offset {
162        polars_bail!(InvalidOperation: "given length is oob")
163    }
164    let static_data = std::mem::transmute::<&[u8], &'static [u8]>(data);
165    let storage = SharedStorage::from_static(static_data);
166    let bitmap = Bitmap::from_inner_unchecked(storage, offset, length, None);
167    Ok(BooleanArray::new(ArrowDataType::Boolean, bitmap, None))
168}
169
170/// Creates a (non-null) [`BooleanArray`] from a slice of bits.
171/// This does not have memcopy and is the fastest way to create a [`BooleanArray`].
172///
173/// This can be useful if you want to apply arrow kernels on slices without
174/// incurring a memcopy cost.
175///
176/// The `offset` indicates where the first bit starts in the first byte.
177///
178/// # Safety
179///
180/// The caller must ensure the passed `owner` ensures the data remains alive.
181pub unsafe fn bitmap_and_owner<O>(
182    data: &[u8],
183    offset: usize,
184    length: usize,
185    owner: O,
186) -> PolarsResult<BooleanArray> {
187    if offset >= 8 {
188        polars_bail!(InvalidOperation: "offset should be < 8")
189    };
190    if length > data.len() * 8 - offset {
191        polars_bail!(InvalidOperation: "given length is oob")
192    }
193    let null_count = 0;
194    let validity = None;
195
196    let ptr = data.as_ptr();
197    let data = Arc::new(owner);
198
199    // SAFETY: the underlying assumption of this function: the array will not be used
200    // beyond the
201    let array = create_array(
202        data,
203        length,
204        null_count,
205        [validity, Some(ptr)].into_iter(),
206        [].into_iter(),
207        None,
208        Some(offset),
209    );
210    let array = InternalArrowArray::new(array, ArrowDataType::Boolean);
211
212    // SAFETY: we just created a valid array
213    Ok(unsafe { BooleanArray::try_from_ffi(array) }.unwrap())
214}