use std::mem;
use std::sync::Arc;
use crate::buffer::Buffer;
use crate::datatypes::DataType;
use crate::{bitmap::Bitmap, datatypes::ArrowNativeType};
use super::equal::equal;
#[inline]
pub(crate) fn count_nulls(
null_bit_buffer: Option<&Buffer>,
offset: usize,
len: usize,
) -> usize {
if let Some(buf) = null_bit_buffer {
len.checked_sub(buf.count_set_bits_offset(offset, len))
.unwrap()
} else {
0
}
}
#[derive(Debug, Clone)]
pub struct ArrayData {
data_type: DataType,
len: usize,
null_count: usize,
offset: usize,
buffers: Vec<Buffer>,
child_data: Vec<ArrayDataRef>,
null_bitmap: Option<Bitmap>,
}
pub type ArrayDataRef = Arc<ArrayData>;
impl ArrayData {
pub fn new(
data_type: DataType,
len: usize,
null_count: Option<usize>,
null_bit_buffer: Option<Buffer>,
offset: usize,
buffers: Vec<Buffer>,
child_data: Vec<ArrayDataRef>,
) -> Self {
let null_count = match null_count {
None => count_nulls(null_bit_buffer.as_ref(), offset, len),
Some(null_count) => null_count,
};
let null_bitmap = null_bit_buffer.map(Bitmap::from);
Self {
data_type,
len,
null_count,
offset,
buffers,
child_data,
null_bitmap,
}
}
#[inline]
pub const fn builder(data_type: DataType) -> ArrayDataBuilder {
ArrayDataBuilder::new(data_type)
}
#[inline]
pub const fn data_type(&self) -> &DataType {
&self.data_type
}
pub fn buffers(&self) -> &[Buffer] {
&self.buffers[..]
}
pub fn child_data(&self) -> &[ArrayDataRef] {
&self.child_data[..]
}
pub fn is_null(&self, i: usize) -> bool {
if let Some(ref b) = self.null_bitmap {
return !b.is_set(self.offset + i);
}
false
}
#[inline]
pub const fn null_bitmap(&self) -> &Option<Bitmap> {
&self.null_bitmap
}
pub fn null_buffer(&self) -> Option<&Buffer> {
self.null_bitmap().as_ref().map(|b| b.buffer_ref())
}
pub fn is_valid(&self, i: usize) -> bool {
if let Some(ref b) = self.null_bitmap {
return b.is_set(self.offset + i);
}
true
}
#[inline]
pub const fn len(&self) -> usize {
self.len
}
#[inline]
pub const fn is_empty(&self) -> bool {
self.len == 0
}
#[inline]
pub const fn offset(&self) -> usize {
self.offset
}
#[inline]
pub const fn null_count(&self) -> usize {
self.null_count
}
pub fn get_buffer_memory_size(&self) -> usize {
let mut size = 0;
for buffer in &self.buffers {
size += buffer.capacity();
}
if let Some(bitmap) = &self.null_bitmap {
size += bitmap.get_buffer_memory_size()
}
for child in &self.child_data {
size += child.get_buffer_memory_size();
}
size
}
pub fn get_array_memory_size(&self) -> usize {
let mut size = 0;
size += mem::size_of_val(self)
- mem::size_of_val(&self.buffers)
- mem::size_of_val(&self.null_bitmap)
- mem::size_of_val(&self.child_data);
for buffer in &self.buffers {
size += mem::size_of_val(&buffer);
size += buffer.capacity();
}
if let Some(bitmap) = &self.null_bitmap {
size += bitmap.get_array_memory_size()
}
for child in &self.child_data {
size += child.get_array_memory_size();
}
size
}
pub fn slice(&self, offset: usize, length: usize) -> ArrayData {
assert!((offset + length) <= self.len());
let mut new_data = self.clone();
new_data.len = length;
new_data.offset = offset + self.offset;
new_data.null_count =
count_nulls(new_data.null_buffer(), new_data.offset, new_data.len);
new_data
}
#[inline]
pub(super) fn buffer<T: ArrowNativeType>(&self, buffer: usize) -> &[T] {
let values = unsafe { self.buffers[buffer].as_slice().align_to::<T>() };
if !values.0.is_empty() || !values.2.is_empty() {
panic!("The buffer is not byte-aligned with its interpretation")
};
assert_ne!(self.data_type, DataType::Boolean);
&values.1[self.offset..]
}
}
impl PartialEq for ArrayData {
fn eq(&self, other: &Self) -> bool {
equal(self, other)
}
}
#[derive(Debug)]
pub struct ArrayDataBuilder {
data_type: DataType,
len: usize,
null_count: Option<usize>,
null_bit_buffer: Option<Buffer>,
offset: usize,
buffers: Vec<Buffer>,
child_data: Vec<ArrayDataRef>,
}
impl ArrayDataBuilder {
#[inline]
pub const fn new(data_type: DataType) -> Self {
Self {
data_type,
len: 0,
null_count: None,
null_bit_buffer: None,
offset: 0,
buffers: vec![],
child_data: vec![],
}
}
#[inline]
pub const fn len(mut self, n: usize) -> Self {
self.len = n;
self
}
pub fn null_bit_buffer(mut self, buf: Buffer) -> Self {
self.null_bit_buffer = Some(buf);
self
}
#[inline]
pub const fn offset(mut self, n: usize) -> Self {
self.offset = n;
self
}
pub fn buffers(mut self, v: Vec<Buffer>) -> Self {
self.buffers = v;
self
}
pub fn add_buffer(mut self, b: Buffer) -> Self {
self.buffers.push(b);
self
}
pub fn child_data(mut self, v: Vec<ArrayDataRef>) -> Self {
self.child_data = v;
self
}
pub fn add_child_data(mut self, r: ArrayDataRef) -> Self {
self.child_data.push(r);
self
}
pub fn build(self) -> ArrayDataRef {
let data = ArrayData::new(
self.data_type,
self.len,
self.null_count,
self.null_bit_buffer,
self.offset,
self.buffers,
self.child_data,
);
Arc::new(data)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Arc;
use crate::datatypes::ToByteSlice;
use crate::util::bit_util;
#[test]
fn test_new() {
let arr_data =
ArrayData::new(DataType::Boolean, 10, Some(1), None, 2, vec![], vec![]);
assert_eq!(10, arr_data.len());
assert_eq!(1, arr_data.null_count());
assert_eq!(2, arr_data.offset());
assert_eq!(0, arr_data.buffers().len());
assert_eq!(0, arr_data.child_data().len());
}
#[test]
fn test_builder() {
let child_arr_data = Arc::new(ArrayData::new(
DataType::Int32,
5,
Some(0),
None,
0,
vec![Buffer::from([1i32, 2, 3, 4, 5].to_byte_slice())],
vec![],
));
let v = vec![0, 1, 2, 3];
let b1 = Buffer::from(&v[..]);
let arr_data = ArrayData::builder(DataType::Int32)
.len(20)
.offset(5)
.add_buffer(b1)
.null_bit_buffer(Buffer::from(vec![
0b01011111, 0b10110101, 0b01100011, 0b00011110,
]))
.add_child_data(child_arr_data.clone())
.build();
assert_eq!(20, arr_data.len());
assert_eq!(10, arr_data.null_count());
assert_eq!(5, arr_data.offset());
assert_eq!(1, arr_data.buffers().len());
assert_eq!(&[0, 1, 2, 3], arr_data.buffers()[0].as_slice());
assert_eq!(1, arr_data.child_data().len());
assert_eq!(child_arr_data, arr_data.child_data()[0]);
}
#[test]
fn test_null_count() {
let mut bit_v: [u8; 2] = [0; 2];
bit_util::set_bit(&mut bit_v, 0);
bit_util::set_bit(&mut bit_v, 3);
bit_util::set_bit(&mut bit_v, 10);
let arr_data = ArrayData::builder(DataType::Int32)
.len(16)
.null_bit_buffer(Buffer::from(bit_v))
.build();
assert_eq!(13, arr_data.null_count());
let mut bit_v: [u8; 2] = [0; 2];
bit_util::set_bit(&mut bit_v, 0);
bit_util::set_bit(&mut bit_v, 3);
bit_util::set_bit(&mut bit_v, 10);
let arr_data = ArrayData::builder(DataType::Int32)
.len(12)
.offset(2)
.null_bit_buffer(Buffer::from(bit_v))
.build();
assert_eq!(10, arr_data.null_count());
}
#[test]
fn test_null_buffer_ref() {
let mut bit_v: [u8; 2] = [0; 2];
bit_util::set_bit(&mut bit_v, 0);
bit_util::set_bit(&mut bit_v, 3);
bit_util::set_bit(&mut bit_v, 10);
let arr_data = ArrayData::builder(DataType::Int32)
.len(16)
.null_bit_buffer(Buffer::from(bit_v))
.build();
assert!(arr_data.null_buffer().is_some());
assert_eq!(&bit_v, arr_data.null_buffer().unwrap().as_slice());
}
#[test]
fn test_slice() {
let mut bit_v: [u8; 2] = [0; 2];
bit_util::set_bit(&mut bit_v, 0);
bit_util::set_bit(&mut bit_v, 3);
bit_util::set_bit(&mut bit_v, 10);
let data = ArrayData::builder(DataType::Int32)
.len(16)
.null_bit_buffer(Buffer::from(bit_v))
.build();
let data = data.as_ref();
let new_data = data.slice(1, 15);
assert_eq!(data.len() - 1, new_data.len());
assert_eq!(1, new_data.offset());
assert_eq!(data.null_count(), new_data.null_count());
let new_data = new_data.slice(1, 14);
assert_eq!(data.len() - 2, new_data.len());
assert_eq!(2, new_data.offset());
assert_eq!(data.null_count() - 1, new_data.null_count());
}
#[test]
fn test_equality() {
let int_data = ArrayData::builder(DataType::Int32).build();
let float_data = ArrayData::builder(DataType::Float32).build();
assert_ne!(int_data, float_data);
}
#[test]
fn test_count_nulls() {
let null_buffer = Some(Buffer::from(vec![0b00010110, 0b10011111]));
let count = count_nulls(null_buffer.as_ref(), 0, 16);
assert_eq!(count, 7);
let count = count_nulls(null_buffer.as_ref(), 4, 8);
assert_eq!(count, 3);
}
}