arrow_data/ffi.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains declarations to bind to the [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html).
19
20use crate::bit_mask::set_bits;
21use crate::{ArrayData, layout};
22use arrow_buffer::buffer::NullBuffer;
23use arrow_buffer::{Buffer, MutableBuffer, ScalarBuffer};
24use arrow_schema::DataType;
25use std::ffi::c_void;
26
27/// ABI-compatible struct for ArrowArray from C Data Interface
28/// See <https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowarray-structure>
29///
30/// ```
31/// # use arrow_data::ArrayData;
32/// # use arrow_data::ffi::FFI_ArrowArray;
33/// fn export_array(array: &ArrayData) -> FFI_ArrowArray {
34/// FFI_ArrowArray::new(array)
35/// }
36/// ```
37#[repr(C)]
38#[derive(Debug)]
39pub struct FFI_ArrowArray {
40 /// Logical length of the array
41 pub length: i64,
42 /// Number of null items in the array
43 pub null_count: i64,
44 /// logical offset inside the array
45 pub offset: i64,
46 /// Number of physical buffers backing this array
47 pub n_buffers: i64,
48 /// Number of children this array has
49 pub n_children: i64,
50 /// C array of pointers to the start of each physical buffer backing this array
51 pub buffers: *mut *const c_void,
52 /// C array of pointers to each child array of this array
53 pub children: *mut *mut FFI_ArrowArray,
54 /// Pointer to the underlying array of dictionary values
55 pub dictionary: *mut FFI_ArrowArray,
56 /// Pointer to a producer-provided release callback
57 pub release: Option<unsafe extern "C" fn(arg1: *mut FFI_ArrowArray)>,
58 /// Opaque pointer to producer-provided private data
59 /// When exported, this MUST contain everything that is owned by this array.
60 /// For example, any buffer pointed to in `buffers` must be here, as well
61 /// as the `buffers` pointer itself.
62 /// In other words, everything in [FFI_ArrowArray] must be owned by
63 /// `private_data` and can assume that they do not outlive `private_data`.
64 pub private_data: *mut c_void,
65}
66
67impl Drop for FFI_ArrowArray {
68 fn drop(&mut self) {
69 match self.release {
70 None => (),
71 Some(release) => unsafe { release(self) },
72 };
73 }
74}
75
76unsafe impl Send for FFI_ArrowArray {}
77unsafe impl Sync for FFI_ArrowArray {}
78
79// callback used to drop [FFI_ArrowArray] when it is exported
80unsafe extern "C" fn release_array(array: *mut FFI_ArrowArray) {
81 if array.is_null() {
82 return;
83 }
84 let array = unsafe { &mut *array };
85
86 // take ownership of `private_data`, therefore dropping it`
87 let private = unsafe { Box::from_raw(array.private_data as *mut ArrayPrivateData) };
88 for child in private.children.iter() {
89 let _ = unsafe { Box::from_raw(*child) };
90 }
91 if !private.dictionary.is_null() {
92 let _ = unsafe { Box::from_raw(private.dictionary) };
93 }
94
95 array.release = None;
96}
97
98/// Aligns the provided `nulls` to the provided `data_offset`
99///
100/// This is a temporary measure until offset is removed from ArrayData (#1799)
101fn align_nulls(data_offset: usize, nulls: Option<&NullBuffer>) -> Option<Buffer> {
102 let nulls = nulls?;
103 if data_offset == nulls.offset() {
104 // Underlying buffer is already aligned
105 return Some(nulls.buffer().clone());
106 }
107 if data_offset == 0 {
108 return Some(nulls.inner().sliced());
109 }
110 let mut builder = MutableBuffer::new_null(data_offset + nulls.len());
111 set_bits(
112 builder.as_slice_mut(),
113 nulls.validity(),
114 data_offset,
115 nulls.offset(),
116 nulls.len(),
117 );
118 Some(builder.into())
119}
120
121struct ArrayPrivateData {
122 #[allow(dead_code)]
123 buffers: Vec<Option<Buffer>>,
124 buffers_ptr: Box<[*const c_void]>,
125 children: Box<[*mut FFI_ArrowArray]>,
126 dictionary: *mut FFI_ArrowArray,
127}
128
129impl FFI_ArrowArray {
130 /// creates a new `FFI_ArrowArray` from existing data.
131 pub fn new(data: &ArrayData) -> Self {
132 let data_layout = layout(data.data_type());
133
134 let mut buffers = if data_layout.can_contain_null_mask {
135 // * insert the null buffer at the start
136 // * make all others `Option<Buffer>`.
137 std::iter::once(align_nulls(data.offset(), data.nulls()))
138 .chain(data.buffers().iter().map(|b| Some(b.clone())))
139 .collect::<Vec<_>>()
140 } else {
141 data.buffers().iter().map(|b| Some(b.clone())).collect()
142 };
143
144 // `n_buffers` is the number of buffers by the spec.
145 let mut n_buffers = {
146 data_layout.buffers.len() + {
147 // If the layout has a null buffer by Arrow spec.
148 // Note that even the array doesn't have a null buffer because it has
149 // no null value, we still need to count 1 here to follow the spec.
150 usize::from(data_layout.can_contain_null_mask)
151 }
152 } as i64;
153
154 if data_layout.variadic {
155 // Save the lengths of all variadic buffers into a new buffer.
156 // The first buffer is `views`, and the rest are variadic.
157 let mut data_buffers_lengths = Vec::new();
158 for buffer in data.buffers().iter().skip(1) {
159 data_buffers_lengths.push(buffer.len() as i64);
160 n_buffers += 1;
161 }
162
163 buffers.push(Some(ScalarBuffer::from(data_buffers_lengths).into_inner()));
164 n_buffers += 1;
165 }
166
167 let buffers_ptr = buffers
168 .iter()
169 .flat_map(|maybe_buffer| match maybe_buffer {
170 Some(b) => Some(b.as_ptr() as *const c_void),
171 // This is for null buffer. We only put a null pointer for
172 // null buffer if by spec it can contain null mask.
173 None if data_layout.can_contain_null_mask => Some(std::ptr::null()),
174 None => None,
175 })
176 .collect::<Box<[_]>>();
177
178 let empty = vec![];
179 let (child_data, dictionary) = match data.data_type() {
180 DataType::Dictionary(_, _) => (
181 empty.as_slice(),
182 Box::into_raw(Box::new(FFI_ArrowArray::new(&data.child_data()[0]))),
183 ),
184 _ => (data.child_data(), std::ptr::null_mut()),
185 };
186
187 let children = child_data
188 .iter()
189 .map(|child| Box::into_raw(Box::new(FFI_ArrowArray::new(child))))
190 .collect::<Box<_>>();
191 let n_children = children.len() as i64;
192
193 // As in the IPC format, emit null_count = length for Null type
194 let null_count = match data.data_type() {
195 DataType::Null => data.len(),
196 _ => data.null_count(),
197 };
198
199 // create the private data owning everything.
200 // any other data must be added here, e.g. via a struct, to track lifetime.
201 let mut private_data = Box::new(ArrayPrivateData {
202 buffers,
203 buffers_ptr,
204 children,
205 dictionary,
206 });
207
208 Self {
209 length: data.len() as i64,
210 null_count: null_count as i64,
211 offset: data.offset() as i64,
212 n_buffers,
213 n_children,
214 buffers: private_data.buffers_ptr.as_mut_ptr(),
215 children: private_data.children.as_mut_ptr(),
216 dictionary,
217 release: Some(release_array),
218 private_data: Box::into_raw(private_data) as *mut c_void,
219 }
220 }
221
222 /// Takes ownership of the pointed to [`FFI_ArrowArray`]
223 ///
224 /// This acts to [move] the data out of `array`, setting the release callback to NULL
225 ///
226 /// # Safety
227 ///
228 /// * `array` must be [valid] for reads and writes
229 /// * `array` must be properly aligned
230 /// * `array` must point to a properly initialized value of [`FFI_ArrowArray`]
231 ///
232 /// [move]: https://arrow.apache.org/docs/format/CDataInterface.html#moving-an-array
233 /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
234 pub unsafe fn from_raw(array: *mut FFI_ArrowArray) -> Self {
235 unsafe { std::ptr::replace(array, Self::empty()) }
236 }
237
238 /// create an empty `FFI_ArrowArray`, which can be used to import data into
239 pub fn empty() -> Self {
240 Self {
241 length: 0,
242 null_count: 0,
243 offset: 0,
244 n_buffers: 0,
245 n_children: 0,
246 buffers: std::ptr::null_mut(),
247 children: std::ptr::null_mut(),
248 dictionary: std::ptr::null_mut(),
249 release: None,
250 private_data: std::ptr::null_mut(),
251 }
252 }
253
254 /// the length of the array
255 #[inline]
256 pub fn len(&self) -> usize {
257 self.length as usize
258 }
259
260 /// whether the array is empty
261 #[inline]
262 pub fn is_empty(&self) -> bool {
263 self.length == 0
264 }
265
266 /// Whether the array has been released
267 #[inline]
268 pub fn is_released(&self) -> bool {
269 self.release.is_none()
270 }
271
272 /// the offset of the array
273 #[inline]
274 pub fn offset(&self) -> usize {
275 self.offset as usize
276 }
277
278 /// the null count of the array
279 #[inline]
280 pub fn null_count(&self) -> usize {
281 self.null_count as usize
282 }
283
284 /// Returns the null count, checking for validity
285 #[inline]
286 pub fn null_count_opt(&self) -> Option<usize> {
287 usize::try_from(self.null_count).ok()
288 }
289
290 /// Set the null count of the array
291 ///
292 /// # Safety
293 /// Null count must match that of null buffer
294 #[inline]
295 pub unsafe fn set_null_count(&mut self, null_count: i64) {
296 self.null_count = null_count;
297 }
298
299 /// Returns the buffer at the provided index
300 ///
301 /// # Panic
302 /// Panics if index >= self.num_buffers() or the buffer is not correctly aligned
303 #[inline]
304 pub fn buffer(&self, index: usize) -> *const u8 {
305 assert!(!self.buffers.is_null());
306 assert!(index < self.num_buffers());
307 // SAFETY:
308 // If buffers is not null must be valid for reads up to num_buffers
309 unsafe { std::ptr::read_unaligned((self.buffers as *mut *const u8).add(index)) }
310 }
311
312 /// Returns the number of buffers
313 #[inline]
314 pub fn num_buffers(&self) -> usize {
315 self.n_buffers as _
316 }
317
318 /// Returns the child at the provided index
319 #[inline]
320 pub fn child(&self, index: usize) -> &FFI_ArrowArray {
321 assert!(!self.children.is_null());
322 assert!(index < self.num_children());
323 // Safety:
324 // If children is not null must be valid for reads up to num_children
325 unsafe {
326 let child = std::ptr::read_unaligned(self.children.add(index));
327 child.as_ref().unwrap()
328 }
329 }
330
331 /// Returns the number of children
332 #[inline]
333 pub fn num_children(&self) -> usize {
334 self.n_children as _
335 }
336
337 /// Returns the dictionary if any
338 #[inline]
339 pub fn dictionary(&self) -> Option<&Self> {
340 // Safety:
341 // If dictionary is not null should be valid for reads of `Self`
342 unsafe { self.dictionary.as_ref() }
343 }
344}
345
346#[cfg(test)]
347mod tests {
348 use super::*;
349
350 // More tests located in top-level arrow crate
351
352 #[test]
353 fn null_array_n_buffers() {
354 let data = ArrayData::new_null(&DataType::Null, 10);
355
356 let ffi_array = FFI_ArrowArray::new(&data);
357 assert_eq!(0, ffi_array.n_buffers);
358
359 let private_data =
360 unsafe { Box::from_raw(ffi_array.private_data as *mut ArrayPrivateData) };
361
362 assert_eq!(0, private_data.buffers_ptr.len());
363
364 let _ = Box::into_raw(private_data);
365 }
366}