use crate::array::{empty_offsets, print_long_array};
use crate::iterator::ArrayIter;
use crate::raw_pointer::RawPtrBox;
use crate::types::bytes::ByteArrayNativeType;
use crate::types::ByteArrayType;
use crate::{Array, ArrayAccessor, OffsetSizeTrait};
use arrow_buffer::ArrowNativeType;
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
pub struct GenericByteArray<T: ByteArrayType> {
data: ArrayData,
value_offsets: RawPtrBox<T::Offset>,
value_data: RawPtrBox<u8>,
}
impl<T: ByteArrayType> Clone for GenericByteArray<T> {
fn clone(&self) -> Self {
Self {
data: self.data.clone(),
value_offsets: self.value_offsets,
value_data: self.value_data,
}
}
}
impl<T: ByteArrayType> GenericByteArray<T> {
pub const DATA_TYPE: DataType = T::DATA_TYPE;
#[inline]
pub fn value_length(&self, i: usize) -> T::Offset {
let offsets = self.value_offsets();
offsets[i + 1] - offsets[i]
}
pub fn value_data(&self) -> &[u8] {
self.data.buffers()[1].as_slice()
}
pub fn is_ascii(&self) -> bool {
let offsets = self.value_offsets();
let start = offsets.first().unwrap();
let end = offsets.last().unwrap();
self.value_data()[start.as_usize()..end.as_usize()].is_ascii()
}
#[inline]
pub fn value_offsets(&self) -> &[T::Offset] {
unsafe {
std::slice::from_raw_parts(
self.value_offsets.as_ptr().add(self.data.offset()),
self.len() + 1,
)
}
}
pub unsafe fn value_unchecked(&self, i: usize) -> &T::Native {
let end = *self.value_offsets().get_unchecked(i + 1);
let start = *self.value_offsets().get_unchecked(i);
let b = std::slice::from_raw_parts(
self.value_data.as_ptr().offset(start.to_isize().unwrap()),
(end - start).to_usize().unwrap(),
);
T::Native::from_bytes_unchecked(b)
}
pub fn value(&self, i: usize) -> &T::Native {
assert!(
i < self.data.len(),
"Trying to access an element at index {} from a {}{}Array of length {}",
i,
T::Offset::PREFIX,
T::PREFIX,
self.len()
);
unsafe { self.value_unchecked(i) }
}
pub fn iter(&self) -> ArrayIter<&Self> {
ArrayIter::new(self)
}
}
impl<T: ByteArrayType> std::fmt::Debug for GenericByteArray<T> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}{}Array\n[\n", T::Offset::PREFIX, T::PREFIX)?;
print_long_array(self, f, |array, index, f| {
std::fmt::Debug::fmt(&array.value(index), f)
})?;
write!(f, "]")
}
}
impl<T: ByteArrayType> Array for GenericByteArray<T> {
fn as_any(&self) -> &dyn Any {
self
}
fn data(&self) -> &ArrayData {
&self.data
}
fn into_data(self) -> ArrayData {
self.into()
}
}
impl<'a, T: ByteArrayType> ArrayAccessor for &'a GenericByteArray<T> {
type Item = &'a T::Native;
fn value(&self, index: usize) -> Self::Item {
GenericByteArray::value(self, index)
}
unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
GenericByteArray::value_unchecked(self, index)
}
}
impl<T: ByteArrayType> From<ArrayData> for GenericByteArray<T> {
fn from(data: ArrayData) -> Self {
assert_eq!(
data.data_type(),
&Self::DATA_TYPE,
"{}{}Array expects DataType::{}",
T::Offset::PREFIX,
T::PREFIX,
Self::DATA_TYPE
);
assert_eq!(
data.buffers().len(),
2,
"{}{}Array data should contain 2 buffers only (offsets and values)",
T::Offset::PREFIX,
T::PREFIX,
);
let offsets = match data.is_empty() && data.buffers()[0].is_empty() {
true => empty_offsets::<T::Offset>().as_ptr() as *const _,
false => data.buffers()[0].as_ptr(),
};
let values = data.buffers()[1].as_ptr();
Self {
data,
value_offsets: unsafe { RawPtrBox::new(offsets) },
value_data: unsafe { RawPtrBox::new(values) },
}
}
}
impl<T: ByteArrayType> From<GenericByteArray<T>> for ArrayData {
fn from(array: GenericByteArray<T>) -> Self {
array.data
}
}
impl<'a, T: ByteArrayType> IntoIterator for &'a GenericByteArray<T> {
type Item = Option<&'a T::Native>;
type IntoIter = ArrayIter<Self>;
fn into_iter(self) -> Self::IntoIter {
ArrayIter::new(self)
}
}