use crate::bit_mask::set_bits;
use crate::{layout, ArrayData};
use arrow_buffer::buffer::NullBuffer;
use arrow_buffer::{Buffer, MutableBuffer, ScalarBuffer};
use arrow_schema::DataType;
use std::ffi::c_void;
#[repr(C)]
#[derive(Debug)]
pub struct FFI_ArrowArray {
length: i64,
null_count: i64,
offset: i64,
n_buffers: i64,
n_children: i64,
buffers: *mut *const c_void,
children: *mut *mut FFI_ArrowArray,
dictionary: *mut FFI_ArrowArray,
release: Option<unsafe extern "C" fn(arg1: *mut FFI_ArrowArray)>,
private_data: *mut c_void,
}
impl Drop for FFI_ArrowArray {
fn drop(&mut self) {
match self.release {
None => (),
Some(release) => unsafe { release(self) },
};
}
}
unsafe impl Send for FFI_ArrowArray {}
unsafe impl Sync for FFI_ArrowArray {}
unsafe extern "C" fn release_array(array: *mut FFI_ArrowArray) {
if array.is_null() {
return;
}
let array = &mut *array;
let private = Box::from_raw(array.private_data as *mut ArrayPrivateData);
for child in private.children.iter() {
let _ = Box::from_raw(*child);
}
if !private.dictionary.is_null() {
let _ = Box::from_raw(private.dictionary);
}
array.release = None;
}
fn align_nulls(data_offset: usize, nulls: Option<&NullBuffer>) -> Option<Buffer> {
let nulls = nulls?;
if data_offset == nulls.offset() {
return Some(nulls.buffer().clone());
}
if data_offset == 0 {
return Some(nulls.inner().sliced());
}
let mut builder = MutableBuffer::new_null(data_offset + nulls.len());
set_bits(
builder.as_slice_mut(),
nulls.validity(),
data_offset,
nulls.offset(),
nulls.len(),
);
Some(builder.into())
}
struct ArrayPrivateData {
#[allow(dead_code)]
buffers: Vec<Option<Buffer>>,
buffers_ptr: Box<[*const c_void]>,
children: Box<[*mut FFI_ArrowArray]>,
dictionary: *mut FFI_ArrowArray,
}
impl FFI_ArrowArray {
pub fn new(data: &ArrayData) -> Self {
let data_layout = layout(data.data_type());
let mut buffers = if data_layout.can_contain_null_mask {
std::iter::once(align_nulls(data.offset(), data.nulls()))
.chain(data.buffers().iter().map(|b| Some(b.clone())))
.collect::<Vec<_>>()
} else {
data.buffers().iter().map(|b| Some(b.clone())).collect()
};
let mut n_buffers = {
data_layout.buffers.len() + {
usize::from(data_layout.can_contain_null_mask)
}
} as i64;
if data_layout.variadic {
let mut data_buffers_lengths = Vec::new();
for buffer in data.buffers().iter().skip(1) {
data_buffers_lengths.push(buffer.len() as i64);
n_buffers += 1;
}
buffers.push(Some(ScalarBuffer::from(data_buffers_lengths).into_inner()));
n_buffers += 1;
}
let buffers_ptr = buffers
.iter()
.flat_map(|maybe_buffer| match maybe_buffer {
Some(b) => Some(b.as_ptr() as *const c_void),
None if data_layout.can_contain_null_mask => Some(std::ptr::null()),
None => None,
})
.collect::<Box<[_]>>();
let empty = vec![];
let (child_data, dictionary) = match data.data_type() {
DataType::Dictionary(_, _) => (
empty.as_slice(),
Box::into_raw(Box::new(FFI_ArrowArray::new(&data.child_data()[0]))),
),
_ => (data.child_data(), std::ptr::null_mut()),
};
let children = child_data
.iter()
.map(|child| Box::into_raw(Box::new(FFI_ArrowArray::new(child))))
.collect::<Box<_>>();
let n_children = children.len() as i64;
let null_count = match data.data_type() {
DataType::Null => data.len(),
_ => data.null_count(),
};
let mut private_data = Box::new(ArrayPrivateData {
buffers,
buffers_ptr,
children,
dictionary,
});
Self {
length: data.len() as i64,
null_count: null_count as i64,
offset: data.offset() as i64,
n_buffers,
n_children,
buffers: private_data.buffers_ptr.as_mut_ptr(),
children: private_data.children.as_mut_ptr(),
dictionary,
release: Some(release_array),
private_data: Box::into_raw(private_data) as *mut c_void,
}
}
pub unsafe fn from_raw(array: *mut FFI_ArrowArray) -> Self {
std::ptr::replace(array, Self::empty())
}
pub fn empty() -> Self {
Self {
length: 0,
null_count: 0,
offset: 0,
n_buffers: 0,
n_children: 0,
buffers: std::ptr::null_mut(),
children: std::ptr::null_mut(),
dictionary: std::ptr::null_mut(),
release: None,
private_data: std::ptr::null_mut(),
}
}
#[inline]
pub fn len(&self) -> usize {
self.length as usize
}
#[inline]
pub fn is_empty(&self) -> bool {
self.length == 0
}
#[inline]
pub fn is_released(&self) -> bool {
self.release.is_none()
}
#[inline]
pub fn offset(&self) -> usize {
self.offset as usize
}
#[inline]
pub fn null_count(&self) -> usize {
self.null_count as usize
}
#[inline]
pub fn null_count_opt(&self) -> Option<usize> {
usize::try_from(self.null_count).ok()
}
#[inline]
pub unsafe fn set_null_count(&mut self, null_count: i64) {
self.null_count = null_count;
}
#[inline]
pub fn buffer(&self, index: usize) -> *const u8 {
assert!(!self.buffers.is_null());
assert!(index < self.num_buffers());
unsafe { std::ptr::read_unaligned((self.buffers as *mut *const u8).add(index)) }
}
#[inline]
pub fn num_buffers(&self) -> usize {
self.n_buffers as _
}
#[inline]
pub fn child(&self, index: usize) -> &FFI_ArrowArray {
assert!(!self.children.is_null());
assert!(index < self.num_children());
unsafe {
let child = std::ptr::read_unaligned(self.children.add(index));
child.as_ref().unwrap()
}
}
#[inline]
pub fn num_children(&self) -> usize {
self.n_children as _
}
#[inline]
pub fn dictionary(&self) -> Option<&Self> {
unsafe { self.dictionary.as_ref() }
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn null_array_n_buffers() {
let data = ArrayData::new_null(&DataType::Null, 10);
let ffi_array = FFI_ArrowArray::new(&data);
assert_eq!(0, ffi_array.n_buffers);
let private_data =
unsafe { Box::from_raw(ffi_array.private_data as *mut ArrayPrivateData) };
assert_eq!(0, private_data.buffers_ptr.len());
let _ = Box::into_raw(private_data);
}
}