Skip to main content

arrow_data/
ffi.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains declarations to bind to the [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html).
19
20use crate::bit_mask::set_bits;
21use crate::{ArrayData, layout};
22use arrow_buffer::buffer::NullBuffer;
23use arrow_buffer::{Buffer, MutableBuffer, ScalarBuffer};
24use arrow_schema::DataType;
25use std::ffi::c_void;
26
27/// ABI-compatible struct for ArrowArray from C Data Interface
28/// See <https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowarray-structure>
29///
30/// ```
31/// # use arrow_data::ArrayData;
32/// # use arrow_data::ffi::FFI_ArrowArray;
33/// fn export_array(array: &ArrayData) -> FFI_ArrowArray {
34///     FFI_ArrowArray::new(array)
35/// }
36/// ```
37#[repr(C)]
38#[derive(Debug)]
39pub struct FFI_ArrowArray {
40    /// Logical length of the array
41    pub length: i64,
42    /// Number of null items in the array
43    pub null_count: i64,
44    /// logical offset inside the array
45    pub offset: i64,
46    /// Number of physical buffers backing this array
47    pub n_buffers: i64,
48    /// Number of children this array has
49    pub n_children: i64,
50    /// C array of pointers to the start of each physical buffer backing this array
51    pub buffers: *mut *const c_void,
52    /// C array of pointers to each child array of this array
53    pub children: *mut *mut FFI_ArrowArray,
54    /// Pointer to the underlying array of dictionary values
55    pub dictionary: *mut FFI_ArrowArray,
56    /// Pointer to a producer-provided release callback
57    pub release: Option<unsafe extern "C" fn(arg1: *mut FFI_ArrowArray)>,
58    /// Opaque pointer to producer-provided private data
59    /// When exported, this MUST contain everything that is owned by this array.
60    /// For example, any buffer pointed to in `buffers` must be here, as well
61    /// as the `buffers` pointer itself.
62    /// In other words, everything in [FFI_ArrowArray] must be owned by
63    /// `private_data` and can assume that they do not outlive `private_data`.
64    pub private_data: *mut c_void,
65}
66
67impl Drop for FFI_ArrowArray {
68    fn drop(&mut self) {
69        match self.release {
70            None => (),
71            Some(release) => unsafe { release(self) },
72        };
73    }
74}
75
76unsafe impl Send for FFI_ArrowArray {}
77unsafe impl Sync for FFI_ArrowArray {}
78
79// callback used to drop [FFI_ArrowArray] when it is exported
80unsafe extern "C" fn release_array(array: *mut FFI_ArrowArray) {
81    if array.is_null() {
82        return;
83    }
84    let array = unsafe { &mut *array };
85
86    // take ownership of `private_data`, therefore dropping it`
87    let private = unsafe { Box::from_raw(array.private_data as *mut ArrayPrivateData) };
88    for child in private.children.iter() {
89        let _ = unsafe { Box::from_raw(*child) };
90    }
91    if !private.dictionary.is_null() {
92        let _ = unsafe { Box::from_raw(private.dictionary) };
93    }
94
95    array.release = None;
96}
97
98/// Aligns the provided `nulls` to the provided `data_offset`
99///
100/// This is a temporary measure until offset is removed from ArrayData (#1799)
101fn align_nulls(data_offset: usize, nulls: Option<&NullBuffer>) -> Option<Buffer> {
102    let nulls = nulls?;
103    if data_offset == nulls.offset() {
104        // Underlying buffer is already aligned
105        return Some(nulls.buffer().clone());
106    }
107    if data_offset == 0 {
108        return Some(nulls.inner().sliced());
109    }
110    let mut builder = MutableBuffer::new_null(data_offset + nulls.len());
111    set_bits(
112        builder.as_slice_mut(),
113        nulls.validity(),
114        data_offset,
115        nulls.offset(),
116        nulls.len(),
117    );
118    Some(builder.into())
119}
120
121struct ArrayPrivateData {
122    #[allow(dead_code)]
123    buffers: Vec<Option<Buffer>>,
124    buffers_ptr: Box<[*const c_void]>,
125    children: Box<[*mut FFI_ArrowArray]>,
126    dictionary: *mut FFI_ArrowArray,
127}
128
129impl FFI_ArrowArray {
130    /// creates a new `FFI_ArrowArray` from existing data.
131    pub fn new(data: &ArrayData) -> Self {
132        let data_layout = layout(data.data_type());
133
134        let mut buffers = if data_layout.can_contain_null_mask {
135            // * insert the null buffer at the start
136            // * make all others `Option<Buffer>`.
137            std::iter::once(align_nulls(data.offset(), data.nulls()))
138                .chain(data.buffers().iter().map(|b| Some(b.clone())))
139                .collect::<Vec<_>>()
140        } else {
141            data.buffers().iter().map(|b| Some(b.clone())).collect()
142        };
143
144        // `n_buffers` is the number of buffers by the spec.
145        let mut n_buffers = {
146            data_layout.buffers.len() + {
147                // If the layout has a null buffer by Arrow spec.
148                // Note that even the array doesn't have a null buffer because it has
149                // no null value, we still need to count 1 here to follow the spec.
150                usize::from(data_layout.can_contain_null_mask)
151            }
152        } as i64;
153
154        if data_layout.variadic {
155            // Save the lengths of all variadic buffers into a new buffer.
156            // The first buffer is `views`, and the rest are variadic.
157            let mut data_buffers_lengths = Vec::new();
158            for buffer in data.buffers().iter().skip(1) {
159                data_buffers_lengths.push(buffer.len() as i64);
160                n_buffers += 1;
161            }
162
163            buffers.push(Some(ScalarBuffer::from(data_buffers_lengths).into_inner()));
164            n_buffers += 1;
165        }
166
167        let buffers_ptr = buffers
168            .iter()
169            .flat_map(|maybe_buffer| match maybe_buffer {
170                Some(b) => Some(b.as_ptr() as *const c_void),
171                // This is for null buffer. We only put a null pointer for
172                // null buffer if by spec it can contain null mask.
173                None if data_layout.can_contain_null_mask => Some(std::ptr::null()),
174                None => None,
175            })
176            .collect::<Box<[_]>>();
177
178        let empty = vec![];
179        let (child_data, dictionary) = match data.data_type() {
180            DataType::Dictionary(_, _) => (
181                empty.as_slice(),
182                Box::into_raw(Box::new(FFI_ArrowArray::new(&data.child_data()[0]))),
183            ),
184            _ => (data.child_data(), std::ptr::null_mut()),
185        };
186
187        let children = child_data
188            .iter()
189            .map(|child| Box::into_raw(Box::new(FFI_ArrowArray::new(child))))
190            .collect::<Box<_>>();
191        let n_children = children.len() as i64;
192
193        // As in the IPC format, emit null_count = length for Null type
194        let null_count = match data.data_type() {
195            DataType::Null => data.len(),
196            _ => data.null_count(),
197        };
198
199        // create the private data owning everything.
200        // any other data must be added here, e.g. via a struct, to track lifetime.
201        let mut private_data = Box::new(ArrayPrivateData {
202            buffers,
203            buffers_ptr,
204            children,
205            dictionary,
206        });
207
208        Self {
209            length: data.len() as i64,
210            null_count: null_count as i64,
211            offset: data.offset() as i64,
212            n_buffers,
213            n_children,
214            buffers: private_data.buffers_ptr.as_mut_ptr(),
215            children: private_data.children.as_mut_ptr(),
216            dictionary,
217            release: Some(release_array),
218            private_data: Box::into_raw(private_data) as *mut c_void,
219        }
220    }
221
222    /// Takes ownership of the pointed to [`FFI_ArrowArray`]
223    ///
224    /// This acts to [move] the data out of `array`, setting the release callback to NULL
225    ///
226    /// # Safety
227    ///
228    /// * `array` must be [valid] for reads and writes
229    /// * `array` must be properly aligned
230    /// * `array` must point to a properly initialized value of [`FFI_ArrowArray`]
231    ///
232    /// [move]: https://arrow.apache.org/docs/format/CDataInterface.html#moving-an-array
233    /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
234    pub unsafe fn from_raw(array: *mut FFI_ArrowArray) -> Self {
235        unsafe { std::ptr::replace(array, Self::empty()) }
236    }
237
238    /// create an empty `FFI_ArrowArray`, which can be used to import data into
239    pub fn empty() -> Self {
240        Self {
241            length: 0,
242            null_count: 0,
243            offset: 0,
244            n_buffers: 0,
245            n_children: 0,
246            buffers: std::ptr::null_mut(),
247            children: std::ptr::null_mut(),
248            dictionary: std::ptr::null_mut(),
249            release: None,
250            private_data: std::ptr::null_mut(),
251        }
252    }
253
254    /// the length of the array
255    #[inline]
256    pub fn len(&self) -> usize {
257        self.length as usize
258    }
259
260    /// whether the array is empty
261    #[inline]
262    pub fn is_empty(&self) -> bool {
263        self.length == 0
264    }
265
266    /// Whether the array has been released
267    #[inline]
268    pub fn is_released(&self) -> bool {
269        self.release.is_none()
270    }
271
272    /// the offset of the array
273    #[inline]
274    pub fn offset(&self) -> usize {
275        self.offset as usize
276    }
277
278    /// the null count of the array
279    #[inline]
280    pub fn null_count(&self) -> usize {
281        self.null_count as usize
282    }
283
284    /// Returns the null count, checking for validity
285    #[inline]
286    pub fn null_count_opt(&self) -> Option<usize> {
287        usize::try_from(self.null_count).ok()
288    }
289
290    /// Set the null count of the array
291    ///
292    /// # Safety
293    /// Null count must match that of null buffer
294    #[inline]
295    pub unsafe fn set_null_count(&mut self, null_count: i64) {
296        self.null_count = null_count;
297    }
298
299    /// Returns the buffer at the provided index
300    ///
301    /// # Panic
302    /// Panics if index >= self.num_buffers() or the buffer is not correctly aligned
303    #[inline]
304    pub fn buffer(&self, index: usize) -> *const u8 {
305        assert!(!self.buffers.is_null());
306        assert!(index < self.num_buffers());
307        // SAFETY:
308        // If buffers is not null must be valid for reads up to num_buffers
309        unsafe { std::ptr::read_unaligned((self.buffers as *mut *const u8).add(index)) }
310    }
311
312    /// Returns the number of buffers
313    #[inline]
314    pub fn num_buffers(&self) -> usize {
315        self.n_buffers as _
316    }
317
318    /// Returns the child at the provided index
319    #[inline]
320    pub fn child(&self, index: usize) -> &FFI_ArrowArray {
321        assert!(!self.children.is_null());
322        assert!(index < self.num_children());
323        // Safety:
324        // If children is not null must be valid for reads up to num_children
325        unsafe {
326            let child = std::ptr::read_unaligned(self.children.add(index));
327            child.as_ref().unwrap()
328        }
329    }
330
331    /// Returns the number of children
332    #[inline]
333    pub fn num_children(&self) -> usize {
334        self.n_children as _
335    }
336
337    /// Returns the dictionary if any
338    #[inline]
339    pub fn dictionary(&self) -> Option<&Self> {
340        // Safety:
341        // If dictionary is not null should be valid for reads of `Self`
342        unsafe { self.dictionary.as_ref() }
343    }
344}
345
346#[cfg(test)]
347mod tests {
348    use super::*;
349
350    // More tests located in top-level arrow crate
351
352    #[test]
353    fn null_array_n_buffers() {
354        let data = ArrayData::new_null(&DataType::Null, 10);
355
356        let ffi_array = FFI_ArrowArray::new(&data);
357        assert_eq!(0, ffi_array.n_buffers);
358
359        let private_data =
360            unsafe { Box::from_raw(ffi_array.private_data as *mut ArrayPrivateData) };
361
362        assert_eq!(0, private_data.buffers_ptr.len());
363
364        let _ = Box::into_raw(private_data);
365    }
366}